|
|
|
/*
|
|
|
|
* 64-bit pSeries and RS/6000 setup code.
|
|
|
|
*
|
|
|
|
* Copyright (C) 1995 Linus Torvalds
|
|
|
|
* Adapted from 'alpha' version by Gary Thomas
|
|
|
|
* Modified by Cort Dougan (cort@cs.nmt.edu)
|
|
|
|
* Modified by PPC64 Team, IBM Corp
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bootup setup stuff..
|
|
|
|
*/
|
|
|
|
|
|
|
|
#undef DEBUG
|
|
|
|
|
|
|
|
#include <linux/config.h>
|
|
|
|
#include <linux/cpu.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/stddef.h>
|
|
|
|
#include <linux/unistd.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/user.h>
|
|
|
|
#include <linux/a.out.h>
|
|
|
|
#include <linux/tty.h>
|
|
|
|
#include <linux/major.h>
|
|
|
|
#include <linux/interrupt.h>
|
|
|
|
#include <linux/reboot.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/ioport.h>
|
|
|
|
#include <linux/console.h>
|
|
|
|
#include <linux/pci.h>
|
|
|
|
#include <linux/utsname.h>
|
|
|
|
#include <linux/adb.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/irq.h>
|
|
|
|
#include <linux/seq_file.h>
|
|
|
|
#include <linux/root_dev.h>
|
|
|
|
|
|
|
|
#include <asm/mmu.h>
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/io.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/prom.h>
|
|
|
|
#include <asm/rtas.h>
|
|
|
|
#include <asm/pci-bridge.h>
|
|
|
|
#include <asm/iommu.h>
|
|
|
|
#include <asm/dma.h>
|
|
|
|
#include <asm/machdep.h>
|
|
|
|
#include <asm/irq.h>
|
[PATCH] powerpc: Merge kexec
This patch merges, to some extent, the PPC32 and PPC64 kexec implementations.
We adopt the PPC32 approach of having ppc_md callbacks for the kexec functions.
The current PPC64 implementation becomes the "default" implementation for PPC64
which platforms can select if they need no special treatment.
I've added these default callbacks to pseries/maple/cell/powermac, this means
iSeries no longer supports kexec - but it never worked anyway.
I've renamed PPC32's machine_kexec_simple to default_machine_kexec, inline with
PPC64. Judging by the comments it might be better named machine_kexec_non_of,
or something, but at the moment it's the only implementation for PPC32 so it's
the "default".
Kexec requires machine_shutdown(), which is in machine_kexec.c on PPC32, but we
already have in setup-common.c on powerpc. All this does is call
ppc_md.nvram_sync, which only powermac implements, so instead make
machine_shutdown a ppc_md member and have it call core99_nvram_sync directly
on powermac.
I've also stuck relocate_kernel.S into misc_32.S for powerpc.
Built for ARCH=ppc, and 32 & 64 bit ARCH=powerpc, with KEXEC=y/n. Booted on
P5 LPAR and successfully kexec'ed.
Should apply on top of 493f25ef4087395891c99fcfe2c72e62e293e89f.
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
19 years ago
|
|
|
#include <asm/kexec.h>
|
|
|
|
#include <asm/time.h>
|
|
|
|
#include <asm/nvram.h>
|
|
|
|
#include "xics.h"
|
|
|
|
#include <asm/firmware.h>
|
|
|
|
#include <asm/pmc.h>
|
|
|
|
#include <asm/mpic.h>
|
|
|
|
#include <asm/ppc-pci.h>
|
|
|
|
#include <asm/i8259.h>
|
|
|
|
#include <asm/udbg.h>
|
|
|
|
#include <asm/smp.h>
|
|
|
|
|
|
|
|
#include "plpar_wrappers.h"
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
#define DBG(fmt...) udbg_printf(fmt)
|
|
|
|
#else
|
|
|
|
#define DBG(fmt...)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
extern void find_udbg_vterm(void);
|
|
|
|
extern void system_reset_fwnmi(void); /* from head.S */
|
|
|
|
extern void machine_check_fwnmi(void); /* from head.S */
|
|
|
|
|
|
|
|
int fwnmi_active; /* TRUE if an FWNMI handler is present */
|
|
|
|
|
|
|
|
extern void pSeries_system_reset_exception(struct pt_regs *regs);
|
|
|
|
extern int pSeries_machine_check_exception(struct pt_regs *regs);
|
|
|
|
|
|
|
|
static void pseries_shared_idle(void);
|
|
|
|
static void pseries_dedicated_idle(void);
|
|
|
|
|
|
|
|
struct mpic *pSeries_mpic;
|
|
|
|
|
|
|
|
void pSeries_show_cpuinfo(struct seq_file *m)
|
|
|
|
{
|
|
|
|
struct device_node *root;
|
|
|
|
const char *model = "";
|
|
|
|
|
|
|
|
root = of_find_node_by_path("/");
|
|
|
|
if (root)
|
|
|
|
model = get_property(root, "model", NULL);
|
|
|
|
seq_printf(m, "machine\t\t: CHRP %s\n", model);
|
|
|
|
of_node_put(root);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialize firmware assisted non-maskable interrupts if
|
|
|
|
* the firmware supports this feature.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static void __init fwnmi_init(void)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
int ibm_nmi_register = rtas_token("ibm,nmi-register");
|
|
|
|
if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
|
|
|
|
return;
|
|
|
|
ret = rtas_call(ibm_nmi_register, 2, 1, NULL,
|
|
|
|
__pa((unsigned long)system_reset_fwnmi),
|
|
|
|
__pa((unsigned long)machine_check_fwnmi));
|
|
|
|
if (ret == 0)
|
|
|
|
fwnmi_active = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init pSeries_init_mpic(void)
|
|
|
|
{
|
|
|
|
unsigned int *addrp;
|
|
|
|
struct device_node *np;
|
|
|
|
unsigned long intack = 0;
|
|
|
|
|
|
|
|
/* All ISUs are setup, complete initialization */
|
|
|
|
mpic_init(pSeries_mpic);
|
|
|
|
|
|
|
|
/* Check what kind of cascade ACK we have */
|
|
|
|
if (!(np = of_find_node_by_name(NULL, "pci"))
|
|
|
|
|| !(addrp = (unsigned int *)
|
|
|
|
get_property(np, "8259-interrupt-acknowledge", NULL)))
|
|
|
|
printk(KERN_ERR "Cannot find pci to get ack address\n");
|
|
|
|
else
|
|
|
|
intack = addrp[prom_n_addr_cells(np)-1];
|
|
|
|
of_node_put(np);
|
|
|
|
|
|
|
|
/* Setup the legacy interrupts & controller */
|
|
|
|
i8259_init(intack, 0);
|
|
|
|
|
|
|
|
/* Hook cascade to mpic */
|
|
|
|
mpic_setup_cascade(NUM_ISA_INTERRUPTS, i8259_irq_cascade, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init pSeries_setup_mpic(void)
|
|
|
|
{
|
|
|
|
unsigned int *opprop;
|
|
|
|
unsigned long openpic_addr = 0;
|
|
|
|
unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS];
|
|
|
|
struct device_node *root;
|
|
|
|
int irq_count;
|
|
|
|
|
|
|
|
/* Find the Open PIC if present */
|
|
|
|
root = of_find_node_by_path("/");
|
|
|
|
opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL);
|
|
|
|
if (opprop != 0) {
|
|
|
|
int n = prom_n_addr_cells(root);
|
|
|
|
|
|
|
|
for (openpic_addr = 0; n > 0; --n)
|
|
|
|
openpic_addr = (openpic_addr << 32) + *opprop++;
|
|
|
|
printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
|
|
|
|
}
|
|
|
|
of_node_put(root);
|
|
|
|
|
|
|
|
BUG_ON(openpic_addr == 0);
|
|
|
|
|
|
|
|
/* Get the sense values from OF */
|
|
|
|
prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS);
|
|
|
|
|
|
|
|
/* Setup the openpic driver */
|
|
|
|
irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */
|
|
|
|
pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY,
|
|
|
|
16, 16, irq_count, /* isu size, irq offset, irq count */
|
|
|
|
NR_IRQS - 4, /* ipi offset */
|
|
|
|
senses, irq_count, /* sense & sense size */
|
|
|
|
" MPIC ");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pseries_lpar_enable_pmcs(void)
|
|
|
|
{
|
|
|
|
unsigned long set, reset;
|
|
|
|
|
|
|
|
power4_enable_pmcs();
|
|
|
|
|
|
|
|
set = 1UL << 63;
|
|
|
|
reset = 0;
|
|
|
|
plpar_hcall_norets(H_PERFMON, set, reset);
|
|
|
|
|
|
|
|
/* instruct hypervisor to maintain PMCs */
|
|
|
|
if (firmware_has_feature(FW_FEATURE_SPLPAR))
|
|
|
|
get_paca()->lppaca.pmcregs_in_use = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __init pSeries_setup_arch(void)
|
|
|
|
{
|
|
|
|
/* Fixup ppc_md depending on the type of interrupt controller */
|
|
|
|
if (ppc64_interrupt_controller == IC_OPEN_PIC) {
|
[PATCH] ppc64: kexec support for ppc64
This patch implements the kexec support for ppc64 platforms.
A couple of notes:
1) We copy the pages in virtual mode, using the full base kernel
and a statically allocated stack. At kexec_prepare time we
scan the pages and if any overlap our (0, _end[]) range we
return -ETXTBSY.
On PowerPC 64 systems running in LPAR (logical partitioning)
mode, only a small region of memory, referred to as the RMO,
can be accessed in real mode. Since Linux runs with only one
zone of memory in the memory allocator, and it can be orders of
magnitude more memory than the RMO, looping until we allocate
pages in the source region is not feasible. Copying in virtual
means we don't have to write a hash table generation and call
hypervisor to insert translations, instead we rely on the pinned
kernel linear mapping. The kernel already has move to linked
location built in, so there is no requirement to load it at 0.
If we want to load something other than a kernel, then a stub
can be written to copy a linear chunk in real mode.
2) The start entry point gets passed parameters from the kernel.
Slaves are started at a fixed address after copying code from
the entry point.
All CPUs get passed their firmware assigned physical id in r3
(most calling conventions use this register for the first
argument).
This is used to distinguish each CPU from all other CPUs.
Since firmware is not around, there is no other way to obtain
this information other than to pass it somewhere.
A single CPU, referred to here as the master and the one executing
the kexec call, branches to start with the address of start in r4.
While this can be calculated, we have to load it through a gpr to
branch to this point so defining the register this is contained
in is free. A stack of unspecified size is available at r1
(also common calling convention).
All remaining running CPUs are sent to start at absolute address
0x60 after copying the first 0x100 bytes from start to address 0.
This convention was chosen because it matches what the kernel
has been doing itself. (only gpr3 is defined).
Note: This is not quite the convention of the kexec bootblock v2
in the kernel. A stub has been written to convert between them,
and we may adjust the kernel in the future to allow this directly
without any stub.
3) Destination pages can be placed anywhere, even where they
would not be accessible in real mode. This will allow us to
place ram disks above the RMO if we choose.
Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: R Sharada <sharada@in.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
20 years ago
|
|
|
ppc_md.init_IRQ = pSeries_init_mpic;
|
|
|
|
ppc_md.get_irq = mpic_get_irq;
|
|
|
|
/* Allocate the mpic now, so that find_and_init_phbs() can
|
|
|
|
* fill the ISUs */
|
|
|
|
pSeries_setup_mpic();
|
|
|
|
} else {
|
|
|
|
ppc_md.init_IRQ = xics_init_IRQ;
|
|
|
|
ppc_md.get_irq = xics_get_irq;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
smp_init_pSeries();
|
|
|
|
#endif
|
|
|
|
/* openpic global configuration register (64-bit format). */
|
|
|
|
/* openpic Interrupt Source Unit pointer (64-bit format). */
|
|
|
|
/* python0 facility area (mmio) (64-bit format) REAL address. */
|
|
|
|
|
|
|
|
/* init to some ~sane value until calibrate_delay() runs */
|
|
|
|
loops_per_jiffy = 50000000;
|
|
|
|
|
|
|
|
if (ROOT_DEV == 0) {
|
|
|
|
printk("No ramdisk, default root is /dev/sda2\n");
|
|
|
|
ROOT_DEV = Root_SDA2;
|
|
|
|
}
|
|
|
|
|
|
|
|
fwnmi_init();
|
|
|
|
|
|
|
|
/* Find and initialize PCI host bridges */
|
|
|
|
init_pci_config_tokens();
|
|
|
|
find_and_init_phbs();
|
|
|
|
eeh_init();
|
|
|
|
|
|
|
|
pSeries_nvram_init();
|
|
|
|
|
|
|
|
/* Choose an idle loop */
|
|
|
|
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
|
|
|
|
vpa_init(boot_cpuid);
|
|
|
|
if (get_paca()->lppaca.shared_proc) {
|
|
|
|
printk(KERN_INFO "Using shared processor idle loop\n");
|
|
|
|
ppc_md.idle_loop = pseries_shared_idle;
|
|
|
|
} else {
|
|
|
|
printk(KERN_INFO "Using dedicated idle loop\n");
|
|
|
|
ppc_md.idle_loop = pseries_dedicated_idle;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
printk(KERN_INFO "Using default idle loop\n");
|
|
|
|
ppc_md.idle_loop = default_idle;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (platform_is_lpar())
|
|
|
|
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
|
|
|
|
else
|
|
|
|
ppc_md.enable_pmcs = power4_enable_pmcs;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init pSeries_init_panel(void)
|
|
|
|
{
|
|
|
|
/* Manually leave the kernel version on the panel. */
|
|
|
|
ppc_md.progress("Linux ppc64\n", 0);
|
|
|
|
ppc_md.progress(system_utsname.version, 0);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
arch_initcall(pSeries_init_panel);
|
|
|
|
|
|
|
|
|
|
|
|
/* Build up the ppc64_firmware_features bitmask field
|
|
|
|
* using contents of device-tree/ibm,hypertas-functions.
|
|
|
|
* Ultimately this functionality may be moved into prom.c prom_init().
|
|
|
|
*/
|
|
|
|
static void __init fw_feature_init(void)
|
|
|
|
{
|
|
|
|
struct device_node * dn;
|
|
|
|
char * hypertas;
|
|
|
|
unsigned int len;
|
|
|
|
|
|
|
|
DBG(" -> fw_feature_init()\n");
|
|
|
|
|
|
|
|
ppc64_firmware_features = 0;
|
|
|
|
dn = of_find_node_by_path("/rtas");
|
|
|
|
if (dn == NULL) {
|
|
|
|
printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n");
|
|
|
|
goto no_rtas;
|
|
|
|
}
|
|
|
|
|
|
|
|
hypertas = get_property(dn, "ibm,hypertas-functions", &len);
|
|
|
|
if (hypertas) {
|
|
|
|
while (len > 0){
|
|
|
|
int i, hypertas_len;
|
|
|
|
/* check value against table of strings */
|
|
|
|
for(i=0; i < FIRMWARE_MAX_FEATURES ;i++) {
|
|
|
|
if ((firmware_features_table[i].name) &&
|
|
|
|
(strcmp(firmware_features_table[i].name,hypertas))==0) {
|
|
|
|
/* we have a match */
|
|
|
|
ppc64_firmware_features |=
|
|
|
|
(firmware_features_table[i].val);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
hypertas_len = strlen(hypertas);
|
|
|
|
len -= hypertas_len +1;
|
|
|
|
hypertas+= hypertas_len +1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
of_node_put(dn);
|
|
|
|
no_rtas:
|
|
|
|
|
|
|
|
DBG(" <- fw_feature_init()\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void __init pSeries_discover_pic(void)
|
|
|
|
{
|
|
|
|
struct device_node *np;
|
|
|
|
char *typep;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Setup interrupt mapping options that are needed for finish_device_tree
|
|
|
|
* to properly parse the OF interrupt tree & do the virtual irq mapping
|
|
|
|
*/
|
|
|
|
__irq_offset_value = NUM_ISA_INTERRUPTS;
|
|
|
|
ppc64_interrupt_controller = IC_INVALID;
|
|
|
|
for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) {
|
|
|
|
typep = (char *)get_property(np, "compatible", NULL);
|
|
|
|
if (strstr(typep, "open-pic"))
|
|
|
|
ppc64_interrupt_controller = IC_OPEN_PIC;
|
|
|
|
else if (strstr(typep, "ppc-xicp"))
|
|
|
|
ppc64_interrupt_controller = IC_PPC_XIC;
|
|
|
|
else
|
|
|
|
printk("pSeries_discover_pic: failed to recognize"
|
|
|
|
" interrupt-controller\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pSeries_mach_cpu_die(void)
|
|
|
|
{
|
|
|
|
local_irq_disable();
|
|
|
|
idle_task_exit();
|
|
|
|
/* Some hardware requires clearing the CPPR, while other hardware does not
|
|
|
|
* it is safe either way
|
|
|
|
*/
|
|
|
|
pSeriesLP_cppr_info(0, 0);
|
|
|
|
rtas_stop_self();
|
|
|
|
/* Should never get here... */
|
|
|
|
BUG();
|
|
|
|
for(;;);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pseries_set_dabr(unsigned long dabr)
|
|
|
|
{
|
|
|
|
return plpar_hcall_norets(H_SET_DABR, dabr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pseries_set_xdabr(unsigned long dabr)
|
|
|
|
{
|
|
|
|
/* We want to catch accesses from kernel and userspace */
|
|
|
|
return plpar_hcall_norets(H_SET_XDABR, dabr,
|
|
|
|
H_DABRX_KERNEL | H_DABRX_USER);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Early initialization. Relocation is on but do not reference unbolted pages
|
|
|
|
*/
|
|
|
|
static void __init pSeries_init_early(void)
|
|
|
|
{
|
|
|
|
int iommu_off = 0;
|
|
|
|
|
|
|
|
DBG(" -> pSeries_init_early()\n");
|
|
|
|
|
|
|
|
fw_feature_init();
|
|
|
|
|
|
|
|
if (platform_is_lpar())
|
|
|
|
hpte_init_lpar();
|
|
|
|
else {
|
|
|
|
hpte_init_native();
|
|
|
|
iommu_off = (of_chosen &&
|
|
|
|
get_property(of_chosen, "linux,iommu-off", NULL));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (platform_is_lpar())
|
|
|
|
find_udbg_vterm();
|
|
|
|
|
|
|
|
if (firmware_has_feature(FW_FEATURE_DABR))
|
|
|
|
ppc_md.set_dabr = pseries_set_dabr;
|
|
|
|
else if (firmware_has_feature(FW_FEATURE_XDABR))
|
|
|
|
ppc_md.set_dabr = pseries_set_xdabr;
|
|
|
|
|
|
|
|
iommu_init_early_pSeries();
|
|
|
|
|
|
|
|
pSeries_discover_pic();
|
|
|
|
|
|
|
|
DBG(" <- pSeries_init_early()\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int pSeries_check_legacy_ioport(unsigned int baseport)
|
|
|
|
{
|
|
|
|
struct device_node *np;
|
|
|
|
|
|
|
|
#define I8042_DATA_REG 0x60
|
|
|
|
#define FDC_BASE 0x3f0
|
|
|
|
|
|
|
|
|
|
|
|
switch(baseport) {
|
|
|
|
case I8042_DATA_REG:
|
|
|
|
np = of_find_node_by_type(NULL, "8042");
|
|
|
|
if (np == NULL)
|
|
|
|
return -ENODEV;
|
|
|
|
of_node_put(np);
|
|
|
|
break;
|
|
|
|
case FDC_BASE:
|
|
|
|
np = of_find_node_by_type(NULL, "fdc");
|
|
|
|
if (np == NULL)
|
|
|
|
return -ENODEV;
|
|
|
|
of_node_put(np);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Called very early, MMU is off, device-tree isn't unflattened
|
|
|
|
*/
|
|
|
|
extern struct machdep_calls pSeries_md;
|
|
|
|
|
|
|
|
static int __init pSeries_probe(int platform)
|
|
|
|
{
|
|
|
|
if (platform != PLATFORM_PSERIES &&
|
|
|
|
platform != PLATFORM_PSERIES_LPAR)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* if we have some ppc_md fixups for LPAR to do, do
|
|
|
|
* it here ...
|
|
|
|
*/
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
|
|
|
|
|
|
|
|
static inline void dedicated_idle_sleep(unsigned int cpu)
|
|
|
|
{
|
|
|
|
struct paca_struct *ppaca = &paca[cpu ^ 1];
|
|
|
|
|
|
|
|
/* Only sleep if the other thread is not idle */
|
|
|
|
if (!(ppaca->lppaca.idle)) {
|
|
|
|
local_irq_disable();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We are about to sleep the thread and so wont be polling any
|
|
|
|
* more.
|
|
|
|
*/
|
|
|
|
clear_thread_flag(TIF_POLLING_NRFLAG);
|
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
19 years ago
|
|
|
smp_mb__after_clear_bit();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SMT dynamic mode. Cede will result in this thread going
|
|
|
|
* dormant, if the partner thread is still doing work. Thread
|
|
|
|
* wakes up if partner goes idle, an interrupt is presented, or
|
|
|
|
* a prod occurs. Returning from the cede enables external
|
|
|
|
* interrupts.
|
|
|
|
*/
|
|
|
|
if (!need_resched())
|
|
|
|
cede_processor();
|
|
|
|
else
|
|
|
|
local_irq_enable();
|
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
19 years ago
|
|
|
set_thread_flag(TIF_POLLING_NRFLAG);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Give the HV an opportunity at the processor, since we are
|
|
|
|
* not doing any work.
|
|
|
|
*/
|
|
|
|
poll_pending();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pseries_dedicated_idle(void)
|
|
|
|
{
|
|
|
|
struct paca_struct *lpaca = get_paca();
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
unsigned long start_snooze;
|
|
|
|
unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
|
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
19 years ago
|
|
|
set_thread_flag(TIF_POLLING_NRFLAG);
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
/*
|
|
|
|
* Indicate to the HV that we are idle. Now would be
|
|
|
|
* a good time to find other work to dispatch.
|
|
|
|
*/
|
|
|
|
lpaca->lppaca.idle = 1;
|
|
|
|
|
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce
confusion, and make their semantics rigid. Improves efficiency of
resched_task and some cpu_idle routines.
* In resched_task:
- TIF_NEED_RESCHED is only cleared with the task's runqueue lock held,
and as we hold it during resched_task, then there is no need for an
atomic test and set there. The only other time this should be set is
when the task's quantum expires, in the timer interrupt - this is
protected against because the rq lock is irq-safe.
- If TIF_NEED_RESCHED is set, then we don't need to do anything. It
won't get unset until the task get's schedule()d off.
- If we are running on the same CPU as the task we resched, then set
TIF_NEED_RESCHED and no further action is required.
- If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set
after TIF_NEED_RESCHED has been set, then we need to send an IPI.
Using these rules, we are able to remove the test and set operation in
resched_task, and make clear the previously vague semantics of
POLLING_NRFLAG.
* In idle routines:
- Enter cpu_idle with preempt disabled. When the need_resched() condition
becomes true, explicitly call schedule(). This makes things a bit clearer
(IMO), but haven't updated all architectures yet.
- Many do a test and clear of TIF_NEED_RESCHED for some reason. According
to the resched_task rules, this isn't needed (and actually breaks the
assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock
held). So remove that. Generally one less locked memory op when switching
to the idle thread.
- Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner
most polling idle loops. The above resched_task semantics allow it to be
set until before the last time need_resched() is checked before going into
a halt requiring interrupt wakeup.
Many idle routines simply never enter such a halt, and so POLLING_NRFLAG
can be always left set, completely eliminating resched IPIs when rescheduling
the idle task.
POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
19 years ago
|
|
|
if (!need_resched()) {
|
|
|
|
start_snooze = get_tb() +
|
|
|
|
*smt_snooze_delay * tb_ticks_per_usec;
|
|
|
|
|
|
|
|
while (!need_resched() && !cpu_is_offline(cpu)) {
|
|
|
|
ppc64_runlatch_off();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Go into low thread priority and possibly
|
|
|
|
* low power mode.
|
|
|
|
*/
|
|
|
|
HMT_low();
|
|
|
|
HMT_very_low();
|
|
|
|
|
|
|
|
if (*smt_snooze_delay != 0 &&
|
|
|
|
get_tb() > start_snooze) {
|
|
|
|
HMT_medium();
|
|
|
|
dedicated_idle_sleep(cpu);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
HMT_medium();
|
|
|
|
}
|
|
|
|
|
|
|
|
lpaca->lppaca.idle = 0;
|
|
|
|
ppc64_runlatch_on();
|
|
|
|
|
|
|
|
preempt_enable_no_resched();
|
|
|
|
schedule();
|
|
|
|
preempt_disable();
|
|
|
|
|
|
|
|
if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
|
|
|
|
cpu_die();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pseries_shared_idle(void)
|
|
|
|
{
|
|
|
|
struct paca_struct *lpaca = get_paca();
|
|
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
/*
|
|
|
|
* Indicate to the HV that we are idle. Now would be
|
|
|
|
* a good time to find other work to dispatch.
|
|
|
|
*/
|
|
|
|
lpaca->lppaca.idle = 1;
|
|
|
|
|
|
|
|
while (!need_resched() && !cpu_is_offline(cpu)) {
|
|
|
|
local_irq_disable();
|
|
|
|
ppc64_runlatch_off();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Yield the processor to the hypervisor. We return if
|
|
|
|
* an external interrupt occurs (which are driven prior
|
|
|
|
* to returning here) or if a prod occurs from another
|
|
|
|
* processor. When returning here, external interrupts
|
|
|
|
* are enabled.
|
|
|
|
*
|
|
|
|
* Check need_resched() again with interrupts disabled
|
|
|
|
* to avoid a race.
|
|
|
|
*/
|
|
|
|
if (!need_resched())
|
|
|
|
cede_processor();
|
|
|
|
else
|
|
|
|
local_irq_enable();
|
|
|
|
|
|
|
|
HMT_medium();
|
|
|
|
}
|
|
|
|
|
|
|
|
lpaca->lppaca.idle = 0;
|
|
|
|
ppc64_runlatch_on();
|
|
|
|
|
|
|
|
preempt_enable_no_resched();
|
|
|
|
schedule();
|
|
|
|
preempt_disable();
|
|
|
|
|
|
|
|
if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
|
|
|
|
cpu_die();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
ppc64: Set up PCI tree from Open Firmware device tree
This adds code which gives us the option on ppc64 of instantiating the
PCI tree (the tree of pci_bus and pci_dev structs) from the Open
Firmware device tree rather than by probing PCI configuration space.
The OF device tree has a node for each PCI device and bridge in the
system, with properties that tell us what addresses the firmware has
configured for them and other details.
There are a couple of reasons why this is needed. First, on systems
with a hypervisor, there is a PCI-PCI bridge per slot under the PCI
host bridges. These PCI-PCI bridges have special isolation features
for virtualization. We can't write to their config space, and we are
not supposed to be reading their config space either. The firmware
tells us about the address ranges that they pass in the OF device
tree.
Secondly, on powermacs, the interrupt controller is in a PCI device
that may be behind a PCI-PCI bridge. If we happened to take an
interrupt just at the point when the device or a bridge on the path to
it was disabled for probing, we would crash when we try to access the
interrupt controller.
I have implemented a platform-specific function which is called for
each PCI bridge (host or PCI-PCI) to say whether the code should look
in the device tree or use normal PCI probing for the devices under
that bridge. On pSeries machines we use the device tree if we're
running under a hypervisor, otherwise we use normal probing. On
powermacs we use normal probing for the AGP bridge, since the device
for the AGP bridge itself isn't shown in the device tree (at least on
my G5), and the device tree for everything else.
This has been tested on a dual G5 powermac, a partition on a POWER5
machine (running under the hypervisor), and a legacy iSeries
partition.
Signed-off-by: Paul Mackerras <paulus@samba.org>
20 years ago
|
|
|
static int pSeries_pci_probe_mode(struct pci_bus *bus)
|
|
|
|
{
|
|
|
|
if (platform_is_lpar())
|
ppc64: Set up PCI tree from Open Firmware device tree
This adds code which gives us the option on ppc64 of instantiating the
PCI tree (the tree of pci_bus and pci_dev structs) from the Open
Firmware device tree rather than by probing PCI configuration space.
The OF device tree has a node for each PCI device and bridge in the
system, with properties that tell us what addresses the firmware has
configured for them and other details.
There are a couple of reasons why this is needed. First, on systems
with a hypervisor, there is a PCI-PCI bridge per slot under the PCI
host bridges. These PCI-PCI bridges have special isolation features
for virtualization. We can't write to their config space, and we are
not supposed to be reading their config space either. The firmware
tells us about the address ranges that they pass in the OF device
tree.
Secondly, on powermacs, the interrupt controller is in a PCI device
that may be behind a PCI-PCI bridge. If we happened to take an
interrupt just at the point when the device or a bridge on the path to
it was disabled for probing, we would crash when we try to access the
interrupt controller.
I have implemented a platform-specific function which is called for
each PCI bridge (host or PCI-PCI) to say whether the code should look
in the device tree or use normal PCI probing for the devices under
that bridge. On pSeries machines we use the device tree if we're
running under a hypervisor, otherwise we use normal probing. On
powermacs we use normal probing for the AGP bridge, since the device
for the AGP bridge itself isn't shown in the device tree (at least on
my G5), and the device tree for everything else.
This has been tested on a dual G5 powermac, a partition on a POWER5
machine (running under the hypervisor), and a legacy iSeries
partition.
Signed-off-by: Paul Mackerras <paulus@samba.org>
20 years ago
|
|
|
return PCI_PROBE_DEVTREE;
|
|
|
|
return PCI_PROBE_NORMAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_KEXEC
|
|
|
|
static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
|
|
|
|
{
|
|
|
|
/* Don't risk a hypervisor call if we're crashing */
|
|
|
|
if (!crash_shutdown) {
|
|
|
|
unsigned long vpa = __pa(&get_paca()->lppaca);
|
|
|
|
|
|
|
|
if (unregister_vpa(hard_smp_processor_id(), vpa)) {
|
|
|
|
printk("VPA deregistration of cpu %u (hw_cpu_id %d) "
|
|
|
|
"failed\n", smp_processor_id(),
|
|
|
|
hard_smp_processor_id());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ppc64_interrupt_controller == IC_OPEN_PIC)
|
|
|
|
mpic_teardown_this_cpu(secondary);
|
|
|
|
else
|
|
|
|
xics_teardown_cpu(secondary);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
struct machdep_calls __initdata pSeries_md = {
|
|
|
|
.probe = pSeries_probe,
|
|
|
|
.setup_arch = pSeries_setup_arch,
|
|
|
|
.init_early = pSeries_init_early,
|
|
|
|
.show_cpuinfo = pSeries_show_cpuinfo,
|
|
|
|
.log_error = pSeries_log_error,
|
|
|
|
.pcibios_fixup = pSeries_final_fixup,
|
ppc64: Set up PCI tree from Open Firmware device tree
This adds code which gives us the option on ppc64 of instantiating the
PCI tree (the tree of pci_bus and pci_dev structs) from the Open
Firmware device tree rather than by probing PCI configuration space.
The OF device tree has a node for each PCI device and bridge in the
system, with properties that tell us what addresses the firmware has
configured for them and other details.
There are a couple of reasons why this is needed. First, on systems
with a hypervisor, there is a PCI-PCI bridge per slot under the PCI
host bridges. These PCI-PCI bridges have special isolation features
for virtualization. We can't write to their config space, and we are
not supposed to be reading their config space either. The firmware
tells us about the address ranges that they pass in the OF device
tree.
Secondly, on powermacs, the interrupt controller is in a PCI device
that may be behind a PCI-PCI bridge. If we happened to take an
interrupt just at the point when the device or a bridge on the path to
it was disabled for probing, we would crash when we try to access the
interrupt controller.
I have implemented a platform-specific function which is called for
each PCI bridge (host or PCI-PCI) to say whether the code should look
in the device tree or use normal PCI probing for the devices under
that bridge. On pSeries machines we use the device tree if we're
running under a hypervisor, otherwise we use normal probing. On
powermacs we use normal probing for the AGP bridge, since the device
for the AGP bridge itself isn't shown in the device tree (at least on
my G5), and the device tree for everything else.
This has been tested on a dual G5 powermac, a partition on a POWER5
machine (running under the hypervisor), and a legacy iSeries
partition.
Signed-off-by: Paul Mackerras <paulus@samba.org>
20 years ago
|
|
|
.pci_probe_mode = pSeries_pci_probe_mode,
|
|
|
|
.irq_bus_setup = pSeries_irq_bus_setup,
|
|
|
|
.restart = rtas_restart,
|
|
|
|
.power_off = rtas_power_off,
|
|
|
|
.halt = rtas_halt,
|
|
|
|
.panic = rtas_os_term,
|
|
|
|
.cpu_die = pSeries_mach_cpu_die,
|
|
|
|
.get_boot_time = rtas_get_boot_time,
|
|
|
|
.get_rtc_time = rtas_get_rtc_time,
|
|
|
|
.set_rtc_time = rtas_set_rtc_time,
|
|
|
|
.calibrate_decr = generic_calibrate_decr,
|
|
|
|
.progress = rtas_progress,
|
|
|
|
.check_legacy_ioport = pSeries_check_legacy_ioport,
|
|
|
|
.system_reset_exception = pSeries_system_reset_exception,
|
|
|
|
.machine_check_exception = pSeries_machine_check_exception,
|
|
|
|
#ifdef CONFIG_KEXEC
|
|
|
|
.kexec_cpu_down = pseries_kexec_cpu_down,
|
[PATCH] powerpc: Merge kexec
This patch merges, to some extent, the PPC32 and PPC64 kexec implementations.
We adopt the PPC32 approach of having ppc_md callbacks for the kexec functions.
The current PPC64 implementation becomes the "default" implementation for PPC64
which platforms can select if they need no special treatment.
I've added these default callbacks to pseries/maple/cell/powermac, this means
iSeries no longer supports kexec - but it never worked anyway.
I've renamed PPC32's machine_kexec_simple to default_machine_kexec, inline with
PPC64. Judging by the comments it might be better named machine_kexec_non_of,
or something, but at the moment it's the only implementation for PPC32 so it's
the "default".
Kexec requires machine_shutdown(), which is in machine_kexec.c on PPC32, but we
already have in setup-common.c on powerpc. All this does is call
ppc_md.nvram_sync, which only powermac implements, so instead make
machine_shutdown a ppc_md member and have it call core99_nvram_sync directly
on powermac.
I've also stuck relocate_kernel.S into misc_32.S for powerpc.
Built for ARCH=ppc, and 32 & 64 bit ARCH=powerpc, with KEXEC=y/n. Booted on
P5 LPAR and successfully kexec'ed.
Should apply on top of 493f25ef4087395891c99fcfe2c72e62e293e89f.
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
19 years ago
|
|
|
.machine_kexec = default_machine_kexec,
|
|
|
|
.machine_kexec_prepare = default_machine_kexec_prepare,
|
|
|
|
#endif
|
|
|
|
};
|