commit
a62e68488d
@ -0,0 +1,316 @@ |
||||
/*
|
||||
* eeh_cache.c |
||||
* PCI address cache; allows the lookup of PCI devices based on I/O address |
||||
* |
||||
* Copyright (C) 2004 Linas Vepstas <linas@austin.ibm.com> IBM Corporation |
||||
* |
||||
* This program is free software; you can redistribute it and/or modify |
||||
* it under the terms of the GNU General Public License as published by |
||||
* the Free Software Foundation; either version 2 of the License, or |
||||
* (at your option) any later version. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
* GNU General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU General Public License |
||||
* along with this program; if not, write to the Free Software |
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
||||
*/ |
||||
|
||||
#include <linux/list.h> |
||||
#include <linux/pci.h> |
||||
#include <linux/rbtree.h> |
||||
#include <linux/spinlock.h> |
||||
#include <asm/atomic.h> |
||||
#include <asm/pci-bridge.h> |
||||
#include <asm/ppc-pci.h> |
||||
|
||||
#undef DEBUG |
||||
|
||||
/**
|
||||
* The pci address cache subsystem. This subsystem places |
||||
* PCI device address resources into a red-black tree, sorted |
||||
* according to the address range, so that given only an i/o |
||||
* address, the corresponding PCI device can be **quickly** |
||||
* found. It is safe to perform an address lookup in an interrupt |
||||
* context; this ability is an important feature. |
||||
* |
||||
* Currently, the only customer of this code is the EEH subsystem; |
||||
* thus, this code has been somewhat tailored to suit EEH better. |
||||
* In particular, the cache does *not* hold the addresses of devices |
||||
* for which EEH is not enabled. |
||||
* |
||||
* (Implementation Note: The RB tree seems to be better/faster |
||||
* than any hash algo I could think of for this problem, even |
||||
* with the penalty of slow pointer chases for d-cache misses). |
||||
*/ |
||||
struct pci_io_addr_range |
||||
{ |
||||
struct rb_node rb_node; |
||||
unsigned long addr_lo; |
||||
unsigned long addr_hi; |
||||
struct pci_dev *pcidev; |
||||
unsigned int flags; |
||||
}; |
||||
|
||||
static struct pci_io_addr_cache |
||||
{ |
||||
struct rb_root rb_root; |
||||
spinlock_t piar_lock; |
||||
} pci_io_addr_cache_root; |
||||
|
||||
static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) |
||||
{ |
||||
struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; |
||||
|
||||
while (n) { |
||||
struct pci_io_addr_range *piar; |
||||
piar = rb_entry(n, struct pci_io_addr_range, rb_node); |
||||
|
||||
if (addr < piar->addr_lo) { |
||||
n = n->rb_left; |
||||
} else { |
||||
if (addr > piar->addr_hi) { |
||||
n = n->rb_right; |
||||
} else { |
||||
pci_dev_get(piar->pcidev); |
||||
return piar->pcidev; |
||||
} |
||||
} |
||||
} |
||||
|
||||
return NULL; |
||||
} |
||||
|
||||
/**
|
||||
* pci_get_device_by_addr - Get device, given only address |
||||
* @addr: mmio (PIO) phys address or i/o port number |
||||
* |
||||
* Given an mmio phys address, or a port number, find a pci device |
||||
* that implements this address. Be sure to pci_dev_put the device |
||||
* when finished. I/O port numbers are assumed to be offset |
||||
* from zero (that is, they do *not* have pci_io_addr added in). |
||||
* It is safe to call this function within an interrupt. |
||||
*/ |
||||
struct pci_dev *pci_get_device_by_addr(unsigned long addr) |
||||
{ |
||||
struct pci_dev *dev; |
||||
unsigned long flags; |
||||
|
||||
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); |
||||
dev = __pci_get_device_by_addr(addr); |
||||
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); |
||||
return dev; |
||||
} |
||||
|
||||
#ifdef DEBUG |
||||
/*
|
||||
* Handy-dandy debug print routine, does nothing more |
||||
* than print out the contents of our addr cache. |
||||
*/ |
||||
static void pci_addr_cache_print(struct pci_io_addr_cache *cache) |
||||
{ |
||||
struct rb_node *n; |
||||
int cnt = 0; |
||||
|
||||
n = rb_first(&cache->rb_root); |
||||
while (n) { |
||||
struct pci_io_addr_range *piar; |
||||
piar = rb_entry(n, struct pci_io_addr_range, rb_node); |
||||
printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n", |
||||
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, |
||||
piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); |
||||
cnt++; |
||||
n = rb_next(n); |
||||
} |
||||
} |
||||
#endif |
||||
|
||||
/* Insert address range into the rb tree. */ |
||||
static struct pci_io_addr_range * |
||||
pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, |
||||
unsigned long ahi, unsigned int flags) |
||||
{ |
||||
struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; |
||||
struct rb_node *parent = NULL; |
||||
struct pci_io_addr_range *piar; |
||||
|
||||
/* Walk tree, find a place to insert into tree */ |
||||
while (*p) { |
||||
parent = *p; |
||||
piar = rb_entry(parent, struct pci_io_addr_range, rb_node); |
||||
if (ahi < piar->addr_lo) { |
||||
p = &parent->rb_left; |
||||
} else if (alo > piar->addr_hi) { |
||||
p = &parent->rb_right; |
||||
} else { |
||||
if (dev != piar->pcidev || |
||||
alo != piar->addr_lo || ahi != piar->addr_hi) { |
||||
printk(KERN_WARNING "PIAR: overlapping address range\n"); |
||||
} |
||||
return piar; |
||||
} |
||||
} |
||||
piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); |
||||
if (!piar) |
||||
return NULL; |
||||
|
||||
piar->addr_lo = alo; |
||||
piar->addr_hi = ahi; |
||||
piar->pcidev = dev; |
||||
piar->flags = flags; |
||||
|
||||
#ifdef DEBUG |
||||
printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n", |
||||
alo, ahi, pci_name (dev)); |
||||
#endif |
||||
|
||||
rb_link_node(&piar->rb_node, parent, p); |
||||
rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root); |
||||
|
||||
return piar; |
||||
} |
||||
|
||||
static void __pci_addr_cache_insert_device(struct pci_dev *dev) |
||||
{ |
||||
struct device_node *dn; |
||||
struct pci_dn *pdn; |
||||
int i; |
||||
int inserted = 0; |
||||
|
||||
dn = pci_device_to_OF_node(dev); |
||||
if (!dn) { |
||||
printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev)); |
||||
return; |
||||
} |
||||
|
||||
/* Skip any devices for which EEH is not enabled. */ |
||||
pdn = PCI_DN(dn); |
||||
if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || |
||||
pdn->eeh_mode & EEH_MODE_NOCHECK) { |
||||
#ifdef DEBUG |
||||
printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n", |
||||
pci_name(dev), pdn->node->full_name); |
||||
#endif |
||||
return; |
||||
} |
||||
|
||||
/* The cache holds a reference to the device... */ |
||||
pci_dev_get(dev); |
||||
|
||||
/* Walk resources on this device, poke them into the tree */ |
||||
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { |
||||
unsigned long start = pci_resource_start(dev,i); |
||||
unsigned long end = pci_resource_end(dev,i); |
||||
unsigned int flags = pci_resource_flags(dev,i); |
||||
|
||||
/* We are interested only bus addresses, not dma or other stuff */ |
||||
if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) |
||||
continue; |
||||
if (start == 0 || ~start == 0 || end == 0 || ~end == 0) |
||||
continue; |
||||
pci_addr_cache_insert(dev, start, end, flags); |
||||
inserted = 1; |
||||
} |
||||
|
||||
/* If there was nothing to add, the cache has no reference... */ |
||||
if (!inserted) |
||||
pci_dev_put(dev); |
||||
} |
||||
|
||||
/**
|
||||
* pci_addr_cache_insert_device - Add a device to the address cache |
||||
* @dev: PCI device whose I/O addresses we are interested in. |
||||
* |
||||
* In order to support the fast lookup of devices based on addresses, |
||||
* we maintain a cache of devices that can be quickly searched. |
||||
* This routine adds a device to that cache. |
||||
*/ |
||||
void pci_addr_cache_insert_device(struct pci_dev *dev) |
||||
{ |
||||
unsigned long flags; |
||||
|
||||
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); |
||||
__pci_addr_cache_insert_device(dev); |
||||
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); |
||||
} |
||||
|
||||
static inline void __pci_addr_cache_remove_device(struct pci_dev *dev) |
||||
{ |
||||
struct rb_node *n; |
||||
int removed = 0; |
||||
|
||||
restart: |
||||
n = rb_first(&pci_io_addr_cache_root.rb_root); |
||||
while (n) { |
||||
struct pci_io_addr_range *piar; |
||||
piar = rb_entry(n, struct pci_io_addr_range, rb_node); |
||||
|
||||
if (piar->pcidev == dev) { |
||||
rb_erase(n, &pci_io_addr_cache_root.rb_root); |
||||
removed = 1; |
||||
kfree(piar); |
||||
goto restart; |
||||
} |
||||
n = rb_next(n); |
||||
} |
||||
|
||||
/* The cache no longer holds its reference to this device... */ |
||||
if (removed) |
||||
pci_dev_put(dev); |
||||
} |
||||
|
||||
/**
|
||||
* pci_addr_cache_remove_device - remove pci device from addr cache |
||||
* @dev: device to remove |
||||
* |
||||
* Remove a device from the addr-cache tree. |
||||
* This is potentially expensive, since it will walk |
||||
* the tree multiple times (once per resource). |
||||
* But so what; device removal doesn't need to be that fast. |
||||
*/ |
||||
void pci_addr_cache_remove_device(struct pci_dev *dev) |
||||
{ |
||||
unsigned long flags; |
||||
|
||||
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); |
||||
__pci_addr_cache_remove_device(dev); |
||||
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); |
||||
} |
||||
|
||||
/**
|
||||
* pci_addr_cache_build - Build a cache of I/O addresses |
||||
* |
||||
* Build a cache of pci i/o addresses. This cache will be used to |
||||
* find the pci device that corresponds to a given address. |
||||
* This routine scans all pci busses to build the cache. |
||||
* Must be run late in boot process, after the pci controllers |
||||
* have been scaned for devices (after all device resources are known). |
||||
*/ |
||||
void __init pci_addr_cache_build(void) |
||||
{ |
||||
struct device_node *dn; |
||||
struct pci_dev *dev = NULL; |
||||
|
||||
spin_lock_init(&pci_io_addr_cache_root.piar_lock); |
||||
|
||||
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
||||
/* Ignore PCI bridges */ |
||||
if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) |
||||
continue; |
||||
|
||||
pci_addr_cache_insert_device(dev); |
||||
|
||||
dn = pci_device_to_OF_node(dev); |
||||
pci_dev_get (dev); /* matching put is in eeh_remove_device() */ |
||||
PCI_DN(dn)->pcidev = dev; |
||||
} |
||||
|
||||
#ifdef DEBUG |
||||
/* Verify tree built up above, echo back the list of addrs. */ |
||||
pci_addr_cache_print(&pci_io_addr_cache_root); |
||||
#endif |
||||
} |
||||
|
@ -0,0 +1,376 @@ |
||||
/*
|
||||
* PCI Error Recovery Driver for RPA-compliant PPC64 platform. |
||||
* Copyright (C) 2004, 2005 Linas Vepstas <linas@linas.org> |
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or modify |
||||
* it under the terms of the GNU General Public License as published by |
||||
* the Free Software Foundation; either version 2 of the License, or (at |
||||
* your option) any later version. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, but |
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
||||
* NON INFRINGEMENT. See the GNU General Public License for more |
||||
* details. |
||||
* |
||||
* You should have received a copy of the GNU General Public License |
||||
* along with this program; if not, write to the Free Software |
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||||
* |
||||
* Send feedback to <linas@us.ibm.com> |
||||
* |
||||
*/ |
||||
#include <linux/delay.h> |
||||
#include <linux/irq.h> |
||||
#include <linux/interrupt.h> |
||||
#include <linux/notifier.h> |
||||
#include <linux/pci.h> |
||||
#include <asm/eeh.h> |
||||
#include <asm/eeh_event.h> |
||||
#include <asm/ppc-pci.h> |
||||
#include <asm/pci-bridge.h> |
||||
#include <asm/prom.h> |
||||
#include <asm/rtas.h> |
||||
|
||||
|
||||
static inline const char * pcid_name (struct pci_dev *pdev) |
||||
{ |
||||
if (pdev->dev.driver) |
||||
return pdev->dev.driver->name; |
||||
return ""; |
||||
} |
||||
|
||||
#ifdef DEBUG |
||||
static void print_device_node_tree (struct pci_dn *pdn, int dent) |
||||
{ |
||||
int i; |
||||
if (!pdn) return; |
||||
for (i=0;i<dent; i++) |
||||
printk(" "); |
||||
printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n", |
||||
pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr, |
||||
pdn->eeh_pe_config_addr, pdn->node->full_name); |
||||
dent += 3; |
||||
struct device_node *pc = pdn->node->child; |
||||
while (pc) { |
||||
print_device_node_tree(PCI_DN(pc), dent); |
||||
pc = pc->sibling; |
||||
} |
||||
} |
||||
#endif |
||||
|
||||
/**
|
||||
* irq_in_use - return true if this irq is being used
|
||||
*/ |
||||
static int irq_in_use(unsigned int irq) |
||||
{ |
||||
int rc = 0; |
||||
unsigned long flags; |
||||
struct irq_desc *desc = irq_desc + irq; |
||||
|
||||
spin_lock_irqsave(&desc->lock, flags); |
||||
if (desc->action) |
||||
rc = 1; |
||||
spin_unlock_irqrestore(&desc->lock, flags); |
||||
return rc; |
||||
} |
||||
|
||||
/* ------------------------------------------------------- */ |
||||
/** eeh_report_error - report an EEH error to each device,
|
||||
* collect up and merge the device responses. |
||||
*/ |
||||
|
||||
static void eeh_report_error(struct pci_dev *dev, void *userdata) |
||||
{ |
||||
enum pci_ers_result rc, *res = userdata; |
||||
struct pci_driver *driver = dev->driver; |
||||
|
||||
dev->error_state = pci_channel_io_frozen; |
||||
|
||||
if (!driver) |
||||
return; |
||||
|
||||
if (irq_in_use (dev->irq)) { |
||||
struct device_node *dn = pci_device_to_OF_node(dev); |
||||
PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; |
||||
disable_irq_nosync(dev->irq); |
||||
} |
||||
if (!driver->err_handler) |
||||
return; |
||||
if (!driver->err_handler->error_detected) |
||||
return; |
||||
|
||||
rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen); |
||||
if (*res == PCI_ERS_RESULT_NONE) *res = rc; |
||||
if (*res == PCI_ERS_RESULT_NEED_RESET) return; |
||||
if (*res == PCI_ERS_RESULT_DISCONNECT && |
||||
rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; |
||||
} |
||||
|
||||
/** eeh_report_reset -- tell this device that the pci slot
|
||||
* has been reset. |
||||
*/ |
||||
|
||||
static void eeh_report_reset(struct pci_dev *dev, void *userdata) |
||||
{ |
||||
struct pci_driver *driver = dev->driver; |
||||
struct device_node *dn = pci_device_to_OF_node(dev); |
||||
|
||||
if (!driver) |
||||
return; |
||||
|
||||
if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) { |
||||
PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; |
||||
enable_irq(dev->irq); |
||||
} |
||||
if (!driver->err_handler) |
||||
return; |
||||
if (!driver->err_handler->slot_reset) |
||||
return; |
||||
|
||||
driver->err_handler->slot_reset(dev); |
||||
} |
||||
|
||||
static void eeh_report_resume(struct pci_dev *dev, void *userdata) |
||||
{ |
||||
struct pci_driver *driver = dev->driver; |
||||
|
||||
dev->error_state = pci_channel_io_normal; |
||||
|
||||
if (!driver) |
||||
return; |
||||
if (!driver->err_handler) |
||||
return; |
||||
if (!driver->err_handler->resume) |
||||
return; |
||||
|
||||
driver->err_handler->resume(dev); |
||||
} |
||||
|
||||
static void eeh_report_failure(struct pci_dev *dev, void *userdata) |
||||
{ |
||||
struct pci_driver *driver = dev->driver; |
||||
|
||||
dev->error_state = pci_channel_io_perm_failure; |
||||
|
||||
if (!driver) |
||||
return; |
||||
|
||||
if (irq_in_use (dev->irq)) { |
||||
struct device_node *dn = pci_device_to_OF_node(dev); |
||||
PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; |
||||
disable_irq_nosync(dev->irq); |
||||
} |
||||
if (!driver->err_handler) |
||||
return; |
||||
if (!driver->err_handler->error_detected) |
||||
return; |
||||
driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); |
||||
} |
||||
|
||||
/* ------------------------------------------------------- */ |
||||
/**
|
||||
* handle_eeh_events -- reset a PCI device after hard lockup. |
||||
* |
||||
* pSeries systems will isolate a PCI slot if the PCI-Host |
||||
* bridge detects address or data parity errors, DMA's |
||||
* occuring to wild addresses (which usually happen due to |
||||
* bugs in device drivers or in PCI adapter firmware). |
||||
* Slot isolations also occur if #SERR, #PERR or other misc |
||||
* PCI-related errors are detected. |
||||
* |
||||
* Recovery process consists of unplugging the device driver |
||||
* (which generated hotplug events to userspace), then issuing |
||||
* a PCI #RST to the device, then reconfiguring the PCI config |
||||
* space for all bridges & devices under this slot, and then |
||||
* finally restarting the device drivers (which cause a second |
||||
* set of hotplug events to go out to userspace). |
||||
*/ |
||||
|
||||
/**
|
||||
* eeh_reset_device() -- perform actual reset of a pci slot |
||||
* Args: bus: pointer to the pci bus structure corresponding |
||||
* to the isolated slot. A non-null value will |
||||
* cause all devices under the bus to be removed |
||||
* and then re-added. |
||||
* pe_dn: pointer to a "Partionable Endpoint" device node. |
||||
* This is the top-level structure on which pci |
||||
* bus resets can be performed. |
||||
*/ |
||||
|
||||
static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) |
||||
{ |
||||
int rc; |
||||
if (bus) |
||||
pcibios_remove_pci_devices(bus); |
||||
|
||||
/* Reset the pci controller. (Asserts RST#; resets config space).
|
||||
* Reconfigure bridges and devices. Don't try to bring the system |
||||
* up if the reset failed for some reason. */ |
||||
rc = rtas_set_slot_reset(pe_dn); |
||||
if (rc) |
||||
return rc; |
||||
|
||||
/* New-style config addrs might be shared across multiple devices,
|
||||
* Walk over all functions on this device */ |
||||
if (pe_dn->eeh_pe_config_addr) { |
||||
struct device_node *pe = pe_dn->node; |
||||
pe = pe->parent->child; |
||||
while (pe) { |
||||
struct pci_dn *ppe = PCI_DN(pe); |
||||
if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) { |
||||
rtas_configure_bridge(ppe); |
||||
eeh_restore_bars(ppe); |
||||
} |
||||
pe = pe->sibling; |
||||
} |
||||
} else { |
||||
rtas_configure_bridge(pe_dn); |
||||
eeh_restore_bars(pe_dn); |
||||
} |
||||
|
||||
/* Give the system 5 seconds to finish running the user-space
|
||||
* hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
|
||||
* this is a hack, but if we don't do this, and try to bring
|
||||
* the device up before the scripts have taken it down,
|
||||
* potentially weird things happen. |
||||
*/ |
||||
if (bus) { |
||||
ssleep (5); |
||||
pcibios_add_pci_devices(bus); |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
/* The longest amount of time to wait for a pci device
|
||||
* to come back on line, in seconds. |
||||
*/ |
||||
#define MAX_WAIT_FOR_RECOVERY 15 |
||||
|
||||
void handle_eeh_events (struct eeh_event *event) |
||||
{ |
||||
struct device_node *frozen_dn; |
||||
struct pci_dn *frozen_pdn; |
||||
struct pci_bus *frozen_bus; |
||||
int rc = 0; |
||||
enum pci_ers_result result = PCI_ERS_RESULT_NONE; |
||||
|
||||
frozen_dn = find_device_pe(event->dn); |
||||
frozen_bus = pcibios_find_pci_bus(frozen_dn); |
||||
|
||||
if (!frozen_dn) { |
||||
printk(KERN_ERR "EEH: Error: Cannot find partition endpoint for %s\n", |
||||
pci_name(event->dev)); |
||||
return; |
||||
} |
||||
|
||||
/* There are two different styles for coming up with the PE.
|
||||
* In the old style, it was the highest EEH-capable device |
||||
* which was always an EADS pci bridge. In the new style, |
||||
* there might not be any EADS bridges, and even when there are, |
||||
* the firmware marks them as "EEH incapable". So another |
||||
* two-step is needed to find the pci bus.. */ |
||||
if (!frozen_bus) |
||||
frozen_bus = pcibios_find_pci_bus (frozen_dn->parent); |
||||
|
||||
if (!frozen_bus) { |
||||
printk(KERN_ERR "EEH: Cannot find PCI bus for %s\n", |
||||
frozen_dn->full_name); |
||||
return; |
||||
} |
||||
|
||||
#if 0 |
||||
/* We may get "permanent failure" messages on empty slots.
|
||||
* These are false alarms. Empty slots have no child dn. */ |
||||
if ((event->state == pci_channel_io_perm_failure) && (frozen_device == NULL)) |
||||
return; |
||||
#endif |
||||
|
||||
frozen_pdn = PCI_DN(frozen_dn); |
||||
frozen_pdn->eeh_freeze_count++; |
||||
|
||||
if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) |
||||
goto hard_fail; |
||||
|
||||
/* If the reset state is a '5' and the time to reset is 0 (infinity)
|
||||
* or is more then 15 seconds, then mark this as a permanent failure. |
||||
*/ |
||||
if ((event->state == pci_channel_io_perm_failure) && |
||||
((event->time_unavail <= 0) || |
||||
(event->time_unavail > MAX_WAIT_FOR_RECOVERY*1000))) |
||||
goto hard_fail; |
||||
|
||||
eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); |
||||
printk(KERN_WARNING |
||||
"EEH: This PCI device has failed %d times since last reboot: %s - %s\n", |
||||
frozen_pdn->eeh_freeze_count, |
||||
pci_name (frozen_pdn->pcidev),
|
||||
pcid_name(frozen_pdn->pcidev)); |
||||
|
||||
/* Walk the various device drivers attached to this slot through
|
||||
* a reset sequence, giving each an opportunity to do what it needs |
||||
* to accomplish the reset. Each child gets a report of the |
||||
* status ... if any child can't handle the reset, then the entire |
||||
* slot is dlpar removed and added. |
||||
*/ |
||||
pci_walk_bus(frozen_bus, eeh_report_error, &result); |
||||
|
||||
/* If all device drivers were EEH-unaware, then shut
|
||||
* down all of the device drivers, and hope they |
||||
* go down willingly, without panicing the system. |
||||
*/ |
||||
if (result == PCI_ERS_RESULT_NONE) { |
||||
rc = eeh_reset_device(frozen_pdn, frozen_bus); |
||||
if (rc) |
||||
goto hard_fail; |
||||
} |
||||
|
||||
/* If any device called out for a reset, then reset the slot */ |
||||
if (result == PCI_ERS_RESULT_NEED_RESET) { |
||||
rc = eeh_reset_device(frozen_pdn, NULL); |
||||
if (rc) |
||||
goto hard_fail; |
||||
pci_walk_bus(frozen_bus, eeh_report_reset, 0); |
||||
} |
||||
|
||||
/* If all devices reported they can proceed, the re-enable PIO */ |
||||
if (result == PCI_ERS_RESULT_CAN_RECOVER) { |
||||
/* XXX Not supported; we brute-force reset the device */ |
||||
rc = eeh_reset_device(frozen_pdn, NULL); |
||||
if (rc) |
||||
goto hard_fail; |
||||
pci_walk_bus(frozen_bus, eeh_report_reset, 0); |
||||
} |
||||
|
||||
/* Tell all device drivers that they can resume operations */ |
||||
pci_walk_bus(frozen_bus, eeh_report_resume, 0); |
||||
|
||||
return; |
||||
|
||||
hard_fail: |
||||
/*
|
||||
* About 90% of all real-life EEH failures in the field |
||||
* are due to poorly seated PCI cards. Only 10% or so are |
||||
* due to actual, failed cards. |
||||
*/ |
||||
printk(KERN_ERR |
||||
"EEH: PCI device %s - %s has failed %d times \n" |
||||
"and has been permanently disabled. Please try reseating\n" |
||||
"this device or replacing it.\n", |
||||
pci_name (frozen_pdn->pcidev),
|
||||
pcid_name(frozen_pdn->pcidev),
|
||||
frozen_pdn->eeh_freeze_count); |
||||
|
||||
eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */); |
||||
|
||||
/* Notify all devices that they're about to go down. */ |
||||
pci_walk_bus(frozen_bus, eeh_report_failure, 0); |
||||
|
||||
/* Shut down the device drivers for good. */ |
||||
pcibios_remove_pci_devices(frozen_bus); |
||||
} |
||||
|
||||
/* ---------- end of file ---------- */ |
Loading…
Reference in new issue