Blame - arch/powerpc/platforms/powernv/eeh-ioda.c - kernel/mindspeed

blob: eba9cb10619ceac9108d09036b97528d2a03e45f [file] [log] [blame]

Gavin Shan	8747f36	2013-06-20 13:21:06 +0800	[diff] [blame]	1	/*
				2	* The file intends to implement the functions needed by EEH, which is
				3	* built on IODA compliant chip. Actually, lots of functions related
				4	* to EEH would be built based on the OPAL APIs.
				5	*
				6	* Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
				7	*
				8	* This program is free software; you can redistribute it and/or modify
				9	* it under the terms of the GNU General Public License as published by
				10	* the Free Software Foundation; either version 2 of the License, or
				11	* (at your option) any later version.
				12	*/
				13
				14	#include <linux/bootmem.h>
Gavin Shan	8998897	2013-06-20 18:13:26 +0800	[diff] [blame]	15	#include <linux/debugfs.h>
Gavin Shan	8747f36	2013-06-20 13:21:06 +0800	[diff] [blame]	16	#include <linux/delay.h>
Gavin Shan	8747f36	2013-06-20 13:21:06 +0800	[diff] [blame]	17	#include <linux/io.h>
				18	#include <linux/irq.h>
				19	#include <linux/kernel.h>
				20	#include <linux/msi.h>
Gavin Shan	7cb9d93	2013-06-20 18:13:24 +0800	[diff] [blame]	21	#include <linux/notifier.h>
Gavin Shan	8747f36	2013-06-20 13:21:06 +0800	[diff] [blame]	22	#include <linux/pci.h>
				23	#include <linux/string.h>
				24
				25	#include <asm/eeh.h>
				26	#include <asm/eeh_event.h>
				27	#include <asm/io.h>
				28	#include <asm/iommu.h>
				29	#include <asm/msi_bitmap.h>
				30	#include <asm/opal.h>
				31	#include <asm/pci-bridge.h>
				32	#include <asm/ppc-pci.h>
				33	#include <asm/tce.h>
				34
				35	#include "powernv.h"
				36	#include "pci.h"
				37
Gavin Shan	7cb9d93	2013-06-20 18:13:24 +0800	[diff] [blame]	38	static int ioda_eeh_nb_init = 0;
				39
				40	static int ioda_eeh_event(struct notifier_block *nb,
				41	unsigned long events, void *change)
				42	{
				43	uint64_t changed_evts = (uint64_t)change;
				44
Gavin Shan	7f52a52	2014-04-24 18:00:18 +1000	[diff] [blame]	45	/*
				46	* We simply send special EEH event if EEH has
				47	* been enabled, or clear pending events in
				48	* case that we enable EEH soon
				49	*/
				50	if (!(changed_evts & OPAL_EVENT_PCI_ERROR) \|\|
				51	!(events & OPAL_EVENT_PCI_ERROR))
				52	return 0;
				53
				54	if (eeh_enabled())
Gavin Shan	7cb9d93	2013-06-20 18:13:24 +0800	[diff] [blame]	55	eeh_send_failure_event(NULL);
Gavin Shan	7f52a52	2014-04-24 18:00:18 +1000	[diff] [blame]	56	else
				57	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
Gavin Shan	7cb9d93	2013-06-20 18:13:24 +0800	[diff] [blame]	58
				59	return 0;
				60	}
				61
				62	static struct notifier_block ioda_eeh_nb = {
				63	.notifier_call = ioda_eeh_event,
				64	.next = NULL,
				65	.priority = 0
				66	};
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	67
Gavin Shan	8998897	2013-06-20 18:13:26 +0800	[diff] [blame]	68	#ifdef CONFIG_DEBUG_FS
Mike Qiu	7a06278	2014-09-30 12:38:57 +1000	[diff] [blame]	69	static ssize_t ioda_eeh_ei_write(struct file *filp,
				70	const char __user *user_buf,
				71	size_t count, loff_t *ppos)
				72	{
				73	struct pci_controller *hose = filp->private_data;
				74	struct pnv_phb *phb = hose->private_data;
				75	struct eeh_dev *edev;
				76	struct eeh_pe *pe;
				77	int pe_no, type, func;
				78	unsigned long addr, mask;
				79	char buf[50];
				80	int ret;
				81
				82	if (!phb->eeh_ops \|\| !phb->eeh_ops->err_inject)
				83	return -ENXIO;
				84
				85	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
				86	if (!ret)
				87	return -EFAULT;
				88
				89	/* Retrieve parameters */
				90	ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
				91	&pe_no, &type, &func, &addr, &mask);
				92	if (ret != 5)
				93	return -EINVAL;
				94
				95	/* Retrieve PE */
				96	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
				97	if (!edev)
				98	return -ENOMEM;
				99	edev->phb = hose;
				100	edev->pe_config_addr = pe_no;
				101	pe = eeh_pe_get(edev);
				102	kfree(edev);
				103	if (!pe)
				104	return -ENODEV;
				105
				106	/* Do error injection */
				107	ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask);
				108	return ret < 0 ? ret : count;
				109	}
				110
				111	static const struct file_operations ioda_eeh_ei_fops = {
				112	.open = simple_open,
				113	.llseek = no_llseek,
				114	.write = ioda_eeh_ei_write,
				115	};
				116
Gavin Shan	ff6bdcd	2013-09-06 09:00:01 +0800	[diff] [blame]	117	static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val)
Gavin Shan	8998897	2013-06-20 18:13:26 +0800	[diff] [blame]	118	{
				119	struct pci_controller *hose = data;
				120	struct pnv_phb *phb = hose->private_data;
				121
Gavin Shan	ff6bdcd	2013-09-06 09:00:01 +0800	[diff] [blame]	122	out_be64(phb->regs + offset, val);
Gavin Shan	8998897	2013-06-20 18:13:26 +0800	[diff] [blame]	123	return 0;
				124	}
				125
Gavin Shan	ff6bdcd	2013-09-06 09:00:01 +0800	[diff] [blame]	126	static int ioda_eeh_dbgfs_get(void data, int offset, u64 val)
Gavin Shan	8998897	2013-06-20 18:13:26 +0800	[diff] [blame]	127	{
				128	struct pci_controller *hose = data;
				129	struct pnv_phb *phb = hose->private_data;
				130
Gavin Shan	ff6bdcd	2013-09-06 09:00:01 +0800	[diff] [blame]	131	*val = in_be64(phb->regs + offset);
Gavin Shan	8998897	2013-06-20 18:13:26 +0800	[diff] [blame]	132	return 0;
				133	}
				134
Gavin Shan	ff6bdcd	2013-09-06 09:00:01 +0800	[diff] [blame]	135	static int ioda_eeh_outb_dbgfs_set(void *data, u64 val)
				136	{
				137	return ioda_eeh_dbgfs_set(data, 0xD10, val);
				138	}
				139
				140	static int ioda_eeh_outb_dbgfs_get(void data, u64 val)
				141	{
				142	return ioda_eeh_dbgfs_get(data, 0xD10, val);
				143	}
				144
				145	static int ioda_eeh_inbA_dbgfs_set(void *data, u64 val)
				146	{
				147	return ioda_eeh_dbgfs_set(data, 0xD90, val);
				148	}
				149
				150	static int ioda_eeh_inbA_dbgfs_get(void data, u64 val)
				151	{
				152	return ioda_eeh_dbgfs_get(data, 0xD90, val);
				153	}
				154
				155	static int ioda_eeh_inbB_dbgfs_set(void *data, u64 val)
				156	{
				157	return ioda_eeh_dbgfs_set(data, 0xE10, val);
				158	}
				159
				160	static int ioda_eeh_inbB_dbgfs_get(void data, u64 val)
				161	{
				162	return ioda_eeh_dbgfs_get(data, 0xE10, val);
				163	}
				164
				165	DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_outb_dbgfs_ops, ioda_eeh_outb_dbgfs_get,
				166	ioda_eeh_outb_dbgfs_set, "0x%llx\n");
				167	DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbA_dbgfs_ops, ioda_eeh_inbA_dbgfs_get,
				168	ioda_eeh_inbA_dbgfs_set, "0x%llx\n");
				169	DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
				170	ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
Gavin Shan	8998897	2013-06-20 18:13:26 +0800	[diff] [blame]	171	#endif /* CONFIG_DEBUG_FS */
				172
Gavin Shan	9471660	2014-02-25 15:28:37 +0800	[diff] [blame]	173
Gavin Shan	73370c6	2013-06-20 13:21:07 +0800	[diff] [blame]	174	/**
				175	* ioda_eeh_post_init - Chip dependent post initialization
				176	* @hose: PCI controller
				177	*
				178	* The function will be called after eeh PEs and devices
				179	* have been built. That means the EEH is ready to supply
				180	* service with I/O cache.
				181	*/
				182	static int ioda_eeh_post_init(struct pci_controller *hose)
				183	{
				184	struct pnv_phb *phb = hose->private_data;
Gavin Shan	7cb9d93	2013-06-20 18:13:24 +0800	[diff] [blame]	185	int ret;
				186
				187	/* Register OPAL event notifier */
				188	if (!ioda_eeh_nb_init) {
				189	ret = opal_notifier_register(&ioda_eeh_nb);
				190	if (ret) {
				191	pr_err("%s: Can't register OPAL event notifier (%d)\n",
				192	__func__, ret);
				193	return ret;
				194	}
				195
				196	ioda_eeh_nb_init = 1;
				197	}
Gavin Shan	73370c6	2013-06-20 13:21:07 +0800	[diff] [blame]	198
Gavin Shan	8998897	2013-06-20 18:13:26 +0800	[diff] [blame]	199	#ifdef CONFIG_DEBUG_FS
Gavin Shan	7f52a52	2014-04-24 18:00:18 +1000	[diff] [blame]	200	if (!phb->has_dbgfs && phb->dbgfs) {
				201	phb->has_dbgfs = 1;
				202
Mike Qiu	7a06278	2014-09-30 12:38:57 +1000	[diff] [blame]	203	debugfs_create_file("err_injct", 0200,
				204	phb->dbgfs, hose,
				205	&ioda_eeh_ei_fops);
				206
Gavin Shan	ff6bdcd	2013-09-06 09:00:01 +0800	[diff] [blame]	207	debugfs_create_file("err_injct_outbound", 0600,
Gavin Shan	20bb842	2013-09-06 09:00:00 +0800	[diff] [blame]	208	phb->dbgfs, hose,
Gavin Shan	ff6bdcd	2013-09-06 09:00:01 +0800	[diff] [blame]	209	&ioda_eeh_outb_dbgfs_ops);
				210	debugfs_create_file("err_injct_inboundA", 0600,
				211	phb->dbgfs, hose,
				212	&ioda_eeh_inbA_dbgfs_ops);
				213	debugfs_create_file("err_injct_inboundB", 0600,
				214	phb->dbgfs, hose,
				215	&ioda_eeh_inbB_dbgfs_ops);
				216	}
Gavin Shan	8998897	2013-06-20 18:13:26 +0800	[diff] [blame]	217	#endif
				218
Gavin Shan	7f52a52	2014-04-24 18:00:18 +1000	[diff] [blame]	219	/* If EEH is enabled, we're going to rely on that.
				220	* Otherwise, we restore to conventional mechanism
				221	* to clear frozen PE during PCI config access.
				222	*/
				223	if (eeh_enabled())
				224	phb->flags \|= PNV_PHB_FLAG_EEH;
				225	else
				226	phb->flags &= ~PNV_PHB_FLAG_EEH;
Gavin Shan	73370c6	2013-06-20 13:21:07 +0800	[diff] [blame]	227
				228	return 0;
				229	}
				230
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	231	/**
				232	* ioda_eeh_set_option - Set EEH operation or I/O setting
				233	* @pe: EEH PE
				234	* @option: options
				235	*
				236	* Enable or disable EEH option for the indicated PE. The
				237	* function also can be used to enable I/O or DMA for the
				238	* PE.
				239	*/
				240	static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
				241	{
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	242	struct pci_controller *hose = pe->phb;
				243	struct pnv_phb *phb = hose->private_data;
Gavin Shan	0d5ee52	2014-09-30 12:38:52 +1000	[diff] [blame]	244	bool freeze_pe = false;
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	245	int enable, ret = 0;
				246	s64 rc;
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	247
				248	/* Check on PE number */
				249	if (pe->addr < 0 \|\| pe->addr >= phb->ioda.total_pe) {
				250	pr_err("%s: PE address %x out of range [0, %x] "
				251	"on PHB#%x\n",
				252	__func__, pe->addr, phb->ioda.total_pe,
				253	hose->global_number);
				254	return -EINVAL;
				255	}
				256
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	257	switch (option) {
				258	case EEH_OPT_DISABLE:
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	259	return -EPERM;
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	260	case EEH_OPT_ENABLE:
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	261	return 0;
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	262	case EEH_OPT_THAW_MMIO:
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	263	enable = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	264	break;
				265	case EEH_OPT_THAW_DMA:
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	266	enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	267	break;
Gavin Shan	0d5ee52	2014-09-30 12:38:52 +1000	[diff] [blame]	268	case EEH_OPT_FREEZE_PE:
				269	freeze_pe = true;
				270	enable = OPAL_EEH_ACTION_SET_FREEZE_ALL;
				271	break;
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	272	default:
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	273	pr_warn("%s: Invalid option %d\n",
				274	__func__, option);
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	275	return -EINVAL;
				276	}
				277
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	278	/* If PHB supports compound PE, to handle it */
Gavin Shan	0d5ee52	2014-09-30 12:38:52 +1000	[diff] [blame]	279	if (freeze_pe) {
				280	if (phb->freeze_pe) {
				281	phb->freeze_pe(phb, pe->addr);
				282	} else {
				283	rc = opal_pci_eeh_freeze_set(phb->opal_id,
				284	pe->addr,
				285	enable);
				286	if (rc != OPAL_SUCCESS) {
				287	pr_warn("%s: Failure %lld freezing "
				288	"PHB#%x-PE#%x\n",
				289	__func__, rc,
				290	phb->hose->global_number, pe->addr);
				291	ret = -EIO;
				292	}
				293	}
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	294	} else {
Gavin Shan	0d5ee52	2014-09-30 12:38:52 +1000	[diff] [blame]	295	if (phb->unfreeze_pe) {
				296	ret = phb->unfreeze_pe(phb, pe->addr, enable);
				297	} else {
				298	rc = opal_pci_eeh_freeze_clear(phb->opal_id,
				299	pe->addr,
				300	enable);
				301	if (rc != OPAL_SUCCESS) {
				302	pr_warn("%s: Failure %lld enable %d "
				303	"for PHB#%x-PE#%x\n",
				304	__func__, rc, option,
				305	phb->hose->global_number, pe->addr);
				306	ret = -EIO;
				307	}
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	308	}
				309	}
				310
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	311	return ret;
				312	}
				313
Gavin Shan	bb593c0	2014-07-17 14:41:43 +1000	[diff] [blame]	314	static void ioda_eeh_phb_diag(struct eeh_pe *pe)
Gavin Shan	9471660	2014-02-25 15:28:37 +0800	[diff] [blame]	315	{
Gavin Shan	bb593c0	2014-07-17 14:41:43 +1000	[diff] [blame]	316	struct pnv_phb *phb = pe->phb->private_data;
Gavin Shan	9471660	2014-02-25 15:28:37 +0800	[diff] [blame]	317	long rc;
				318
Gavin Shan	bb593c0	2014-07-17 14:41:43 +1000	[diff] [blame]	319	rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
Gavin Shan	9471660	2014-02-25 15:28:37 +0800	[diff] [blame]	320	PNV_PCI_DIAG_BUF_SIZE);
Gavin Shan	bb593c0	2014-07-17 14:41:43 +1000	[diff] [blame]	321	if (rc != OPAL_SUCCESS)
Gavin Shan	0dae274	2014-07-17 14:41:41 +1000	[diff] [blame]	322	pr_warn("%s: Failed to get diag-data for PHB#%x (%ld)\n",
Gavin Shan	bb593c0	2014-07-17 14:41:43 +1000	[diff] [blame]	323	__func__, pe->phb->global_number, rc);
Gavin Shan	9471660	2014-02-25 15:28:37 +0800	[diff] [blame]	324	}
				325
Gavin Shan	c979c70	2014-07-21 14:42:32 +1000	[diff] [blame]	326	static int ioda_eeh_get_phb_state(struct eeh_pe *pe)
				327	{
				328	struct pnv_phb *phb = pe->phb->private_data;
				329	u8 fstate;
				330	__be16 pcierr;
				331	s64 rc;
				332	int result = 0;
				333
				334	rc = opal_pci_eeh_freeze_status(phb->opal_id,
				335	pe->addr,
				336	&fstate,
				337	&pcierr,
				338	NULL);
				339	if (rc != OPAL_SUCCESS) {
				340	pr_warn("%s: Failure %lld getting PHB#%x state\n",
				341	__func__, rc, phb->hose->global_number);
				342	return EEH_STATE_NOT_SUPPORT;
				343	}
				344
				345	/*
				346	* Check PHB state. If the PHB is frozen for the
				347	* first time, to dump the PHB diag-data.
				348	*/
				349	if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
				350	result = (EEH_STATE_MMIO_ACTIVE \|
				351	EEH_STATE_DMA_ACTIVE \|
				352	EEH_STATE_MMIO_ENABLED \|
				353	EEH_STATE_DMA_ENABLED);
				354	} else if (!(pe->state & EEH_PE_ISOLATED)) {
				355	eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
				356	ioda_eeh_phb_diag(pe);
				357	}
				358
				359	return result;
				360	}
				361
				362	static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
				363	{
				364	struct pnv_phb *phb = pe->phb->private_data;
				365	u8 fstate;
				366	__be16 pcierr;
				367	s64 rc;
				368	int result;
				369
				370	/*
				371	* We don't clobber hardware frozen state until PE
				372	* reset is completed. In order to keep EEH core
				373	* moving forward, we have to return operational
				374	* state during PE reset.
				375	*/
Gavin Shan	8a6b371	2014-10-01 17:07:50 +1000	[diff] [blame]	376	if (pe->state & EEH_PE_CFG_BLOCKED) {
Gavin Shan	c979c70	2014-07-21 14:42:32 +1000	[diff] [blame]	377	result = (EEH_STATE_MMIO_ACTIVE \|
				378	EEH_STATE_DMA_ACTIVE \|
				379	EEH_STATE_MMIO_ENABLED \|
				380	EEH_STATE_DMA_ENABLED);
				381	return result;
				382	}
				383
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	384	/*
				385	* Fetch PE state from hardware. If the PHB
				386	* supports compound PE, let it handle that.
				387	*/
				388	if (phb->get_pe_state) {
				389	fstate = phb->get_pe_state(phb, pe->addr);
				390	} else {
				391	rc = opal_pci_eeh_freeze_status(phb->opal_id,
				392	pe->addr,
				393	&fstate,
				394	&pcierr,
				395	NULL);
				396	if (rc != OPAL_SUCCESS) {
				397	pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
				398	__func__, rc, phb->hose->global_number, pe->addr);
				399	return EEH_STATE_NOT_SUPPORT;
				400	}
Gavin Shan	c979c70	2014-07-21 14:42:32 +1000	[diff] [blame]	401	}
				402
				403	/* Figure out state */
				404	switch (fstate) {
				405	case OPAL_EEH_STOPPED_NOT_FROZEN:
				406	result = (EEH_STATE_MMIO_ACTIVE \|
				407	EEH_STATE_DMA_ACTIVE \|
				408	EEH_STATE_MMIO_ENABLED \|
				409	EEH_STATE_DMA_ENABLED);
				410	break;
				411	case OPAL_EEH_STOPPED_MMIO_FREEZE:
				412	result = (EEH_STATE_DMA_ACTIVE \|
				413	EEH_STATE_DMA_ENABLED);
				414	break;
				415	case OPAL_EEH_STOPPED_DMA_FREEZE:
				416	result = (EEH_STATE_MMIO_ACTIVE \|
				417	EEH_STATE_MMIO_ENABLED);
				418	break;
				419	case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
				420	result = 0;
				421	break;
				422	case OPAL_EEH_STOPPED_RESET:
				423	result = EEH_STATE_RESET_ACTIVE;
				424	break;
				425	case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
				426	result = EEH_STATE_UNAVAILABLE;
				427	break;
				428	case OPAL_EEH_STOPPED_PERM_UNAVAIL:
				429	result = EEH_STATE_NOT_SUPPORT;
				430	break;
				431	default:
				432	result = EEH_STATE_NOT_SUPPORT;
				433	pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
				434	__func__, phb->hose->global_number,
				435	pe->addr, fstate);
				436	}
				437
				438	/*
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	439	* If PHB supports compound PE, to freeze all
				440	* slave PEs for consistency.
				441	*
Gavin Shan	c979c70	2014-07-21 14:42:32 +1000	[diff] [blame]	442	* If the PE is switching to frozen state for the
				443	* first time, to dump the PHB diag-data.
				444	*/
				445	if (!(result & EEH_STATE_NOT_SUPPORT) &&
				446	!(result & EEH_STATE_UNAVAILABLE) &&
				447	!(result & EEH_STATE_MMIO_ACTIVE) &&
				448	!(result & EEH_STATE_DMA_ACTIVE) &&
				449	!(pe->state & EEH_PE_ISOLATED)) {
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	450	if (phb->freeze_pe)
				451	phb->freeze_pe(phb, pe->addr);
				452
Gavin Shan	c979c70	2014-07-21 14:42:32 +1000	[diff] [blame]	453	eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
				454	ioda_eeh_phb_diag(pe);
				455	}
				456
				457	return result;
				458	}
				459
Gavin Shan	8c41a7f	2013-06-20 13:21:09 +0800	[diff] [blame]	460	/**
				461	* ioda_eeh_get_state - Retrieve the state of PE
				462	* @pe: EEH PE
				463	*
				464	* The PE's state should be retrieved from the PEEV, PEST
				465	* IODA tables. Since the OPAL has exported the function
				466	* to do it, it'd better to use that.
				467	*/
				468	static int ioda_eeh_get_state(struct eeh_pe *pe)
				469	{
Gavin Shan	c979c70	2014-07-21 14:42:32 +1000	[diff] [blame]	470	struct pnv_phb *phb = pe->phb->private_data;
Gavin Shan	8c41a7f	2013-06-20 13:21:09 +0800	[diff] [blame]	471
Gavin Shan	c979c70	2014-07-21 14:42:32 +1000	[diff] [blame]	472	/* Sanity check on PE number. PHB PE should have 0 */
				473	if (pe->addr < 0 \|\|
				474	pe->addr >= phb->ioda.total_pe) {
				475	pr_warn("%s: PHB#%x-PE#%x out of range [0, %x]\n",
				476	__func__, phb->hose->global_number,
				477	pe->addr, phb->ioda.total_pe);
Gavin Shan	8c41a7f	2013-06-20 13:21:09 +0800	[diff] [blame]	478	return EEH_STATE_NOT_SUPPORT;
				479	}
				480
Gavin Shan	c979c70	2014-07-21 14:42:32 +1000	[diff] [blame]	481	if (pe->type & EEH_PE_PHB)
				482	return ioda_eeh_get_phb_state(pe);
Gavin Shan	7895470	2014-04-24 18:00:14 +1000	[diff] [blame]	483
Gavin Shan	c979c70	2014-07-21 14:42:32 +1000	[diff] [blame]	484	return ioda_eeh_get_pe_state(pe);
Gavin Shan	8c41a7f	2013-06-20 13:21:09 +0800	[diff] [blame]	485	}
				486
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	487	static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
				488	{
				489	s64 rc = OPAL_HARDWARE;
				490
				491	while (1) {
				492	rc = opal_pci_poll(phb->opal_id);
				493	if (rc <= 0)
				494	break;
				495
Gavin Shan	361f2a2	2014-04-24 18:00:25 +1000	[diff] [blame]	496	if (system_state < SYSTEM_RUNNING)
				497	udelay(1000 * rc);
				498	else
				499	msleep(rc);
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	500	}
				501
				502	return rc;
				503	}
				504
Gavin Shan	361f2a2	2014-04-24 18:00:25 +1000	[diff] [blame]	505	int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	506	{
				507	struct pnv_phb *phb = hose->private_data;
				508	s64 rc = OPAL_HARDWARE;
				509
				510	pr_debug("%s: Reset PHB#%x, option=%d\n",
				511	__func__, hose->global_number, option);
				512
				513	/* Issue PHB complete reset request */
				514	if (option == EEH_RESET_FUNDAMENTAL \|\|
				515	option == EEH_RESET_HOT)
				516	rc = opal_pci_reset(phb->opal_id,
Gavin Shan	d1a85ee	2014-09-30 12:39:05 +1000	[diff] [blame]	517	OPAL_RESET_PHB_COMPLETE,
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	518	OPAL_ASSERT_RESET);
				519	else if (option == EEH_RESET_DEACTIVATE)
				520	rc = opal_pci_reset(phb->opal_id,
Gavin Shan	d1a85ee	2014-09-30 12:39:05 +1000	[diff] [blame]	521	OPAL_RESET_PHB_COMPLETE,
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	522	OPAL_DEASSERT_RESET);
				523	if (rc < 0)
				524	goto out;
				525
				526	/*
				527	* Poll state of the PHB until the request is done
Gavin Shan	26833a5	2014-04-24 18:00:23 +1000	[diff] [blame]	528	* successfully. The PHB reset is usually PHB complete
				529	* reset followed by hot reset on root bus. So we also
				530	* need the PCI bus settlement delay.
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	531	*/
				532	rc = ioda_eeh_phb_poll(phb);
Gavin Shan	361f2a2	2014-04-24 18:00:25 +1000	[diff] [blame]	533	if (option == EEH_RESET_DEACTIVATE) {
				534	if (system_state < SYSTEM_RUNNING)
				535	udelay(1000 * EEH_PE_RST_SETTLE_TIME);
				536	else
				537	msleep(EEH_PE_RST_SETTLE_TIME);
				538	}
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	539	out:
				540	if (rc != OPAL_SUCCESS)
				541	return -EIO;
				542
				543	return 0;
				544	}
				545
				546	static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
				547	{
				548	struct pnv_phb *phb = hose->private_data;
				549	s64 rc = OPAL_SUCCESS;
				550
				551	pr_debug("%s: Reset PHB#%x, option=%d\n",
				552	__func__, hose->global_number, option);
				553
				554	/*
				555	* During the reset deassert time, we needn't care
				556	* the reset scope because the firmware does nothing
				557	* for fundamental or hot reset during deassert phase.
				558	*/
				559	if (option == EEH_RESET_FUNDAMENTAL)
				560	rc = opal_pci_reset(phb->opal_id,
Gavin Shan	d1a85ee	2014-09-30 12:39:05 +1000	[diff] [blame]	561	OPAL_RESET_PCI_FUNDAMENTAL,
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	562	OPAL_ASSERT_RESET);
				563	else if (option == EEH_RESET_HOT)
				564	rc = opal_pci_reset(phb->opal_id,
Gavin Shan	d1a85ee	2014-09-30 12:39:05 +1000	[diff] [blame]	565	OPAL_RESET_PCI_HOT,
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	566	OPAL_ASSERT_RESET);
				567	else if (option == EEH_RESET_DEACTIVATE)
				568	rc = opal_pci_reset(phb->opal_id,
Gavin Shan	d1a85ee	2014-09-30 12:39:05 +1000	[diff] [blame]	569	OPAL_RESET_PCI_HOT,
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	570	OPAL_DEASSERT_RESET);
				571	if (rc < 0)
				572	goto out;
				573
				574	/* Poll state of the PHB until the request is done */
				575	rc = ioda_eeh_phb_poll(phb);
Gavin Shan	26833a5	2014-04-24 18:00:23 +1000	[diff] [blame]	576	if (option == EEH_RESET_DEACTIVATE)
				577	msleep(EEH_PE_RST_SETTLE_TIME);
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	578	out:
				579	if (rc != OPAL_SUCCESS)
				580	return -EIO;
				581
				582	return 0;
				583	}
				584
Gavin Shan	1d9a544	2014-04-24 18:00:13 +1000	[diff] [blame]	585	static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option)
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	586
Gavin Shan	1d9a544	2014-04-24 18:00:13 +1000	[diff] [blame]	587	{
				588	struct device_node *dn = pci_device_to_OF_node(dev);
Gavin Shan	d92a208	2014-04-24 18:00:24 +1000	[diff] [blame]	589	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
				590	int aer = edev ? edev->aer_cap : 0;
Benjamin Herrenschmidt	965b560	2014-05-20 10:20:49 +1000	[diff] [blame]	591	u32 ctrl;
Gavin Shan	1d9a544	2014-04-24 18:00:13 +1000	[diff] [blame]	592
				593	pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
				594	__func__, pci_domain_nr(dev->bus),
				595	dev->bus->number, option);
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	596
				597	switch (option) {
				598	case EEH_RESET_FUNDAMENTAL:
				599	case EEH_RESET_HOT:
Benjamin Herrenschmidt	965b560	2014-05-20 10:20:49 +1000	[diff] [blame]	600	/* Don't report linkDown event */
Gavin Shan	d92a208	2014-04-24 18:00:24 +1000	[diff] [blame]	601	if (aer) {
Gavin Shan	b2b5efc	2014-04-24 18:00:27 +1000	[diff] [blame]	602	eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
Benjamin Herrenschmidt	965b560	2014-05-20 10:20:49 +1000	[diff] [blame]	603	4, &ctrl);
				604	ctrl \|= PCI_ERR_UNC_SURPDN;
Gavin Shan	b2b5efc	2014-04-24 18:00:27 +1000	[diff] [blame]	605	eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
Benjamin Herrenschmidt	965b560	2014-05-20 10:20:49 +1000	[diff] [blame]	606	4, ctrl);
				607	}
Gavin Shan	b2b5efc	2014-04-24 18:00:27 +1000	[diff] [blame]	608
Benjamin Herrenschmidt	965b560	2014-05-20 10:20:49 +1000	[diff] [blame]	609	eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
				610	ctrl \|= PCI_BRIDGE_CTL_BUS_RESET;
				611	eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
Gavin Shan	26833a5	2014-04-24 18:00:23 +1000	[diff] [blame]	612	msleep(EEH_PE_RST_HOLD_TIME);
Gavin Shan	d92a208	2014-04-24 18:00:24 +1000	[diff] [blame]	613
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	614	break;
				615	case EEH_RESET_DEACTIVATE:
Benjamin Herrenschmidt	965b560	2014-05-20 10:20:49 +1000	[diff] [blame]	616	eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
				617	ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
				618	eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
Gavin Shan	26833a5	2014-04-24 18:00:23 +1000	[diff] [blame]	619	msleep(EEH_PE_RST_SETTLE_TIME);
Gavin Shan	d92a208	2014-04-24 18:00:24 +1000	[diff] [blame]	620
Benjamin Herrenschmidt	965b560	2014-05-20 10:20:49 +1000	[diff] [blame]	621	/* Continue reporting linkDown event */
Gavin Shan	d92a208	2014-04-24 18:00:24 +1000	[diff] [blame]	622	if (aer) {
				623	eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
Benjamin Herrenschmidt	965b560	2014-05-20 10:20:49 +1000	[diff] [blame]	624	4, &ctrl);
				625	ctrl &= ~PCI_ERR_UNC_SURPDN;
Gavin Shan	d92a208	2014-04-24 18:00:24 +1000	[diff] [blame]	626	eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
Benjamin Herrenschmidt	965b560	2014-05-20 10:20:49 +1000	[diff] [blame]	627	4, ctrl);
Gavin Shan	d92a208	2014-04-24 18:00:24 +1000	[diff] [blame]	628	}
				629
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	630	break;
				631	}
				632
				633	return 0;
				634	}
				635
Gavin Shan	d92a208	2014-04-24 18:00:24 +1000	[diff] [blame]	636	void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
				637	{
				638	struct pci_controller *hose;
				639
				640	if (pci_is_root_bus(dev->bus)) {
				641	hose = pci_bus_to_host(dev->bus);
				642	ioda_eeh_root_reset(hose, EEH_RESET_HOT);
				643	ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
				644	} else {
				645	ioda_eeh_bridge_reset(dev, EEH_RESET_HOT);
				646	ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
				647	}
				648	}
				649
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	650	/**
				651	* ioda_eeh_reset - Reset the indicated PE
				652	* @pe: EEH PE
				653	* @option: reset option
				654	*
				655	* Do reset on the indicated PE. For PCI bus sensitive PE,
				656	* we need to reset the parent p2p bridge. The PHB has to
				657	* be reinitialized if the p2p bridge is root bridge. For
				658	* PCI device sensitive PE, we will try to reset the device
				659	* through FLR. For now, we don't have OPAL APIs to do HARD
				660	* reset yet, so all reset would be SOFT (HOT) reset.
				661	*/
				662	static int ioda_eeh_reset(struct eeh_pe *pe, int option)
				663	{
				664	struct pci_controller *hose = pe->phb;
Gavin Shan	5b2e198	2014-02-12 15:24:54 +0800	[diff] [blame]	665	struct pci_bus *bus;
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	666	int ret;
				667
				668	/*
Gavin Shan	fd5cee7	2014-04-24 18:00:22 +1000	[diff] [blame]	669	* For PHB reset, we always have complete reset. For those PEs whose
				670	* primary bus derived from root complex (root bus) or root port
				671	* (usually bus#1), we apply hot or fundamental reset on the root port.
				672	* For other PEs, we always have hot reset on the PE primary bus.
Gavin Shan	7895470	2014-04-24 18:00:14 +1000	[diff] [blame]	673	*
				674	* Here, we have different design to pHyp, which always clear the
				675	* frozen state during PE reset. However, the good idea here from
				676	* benh is to keep frozen state before we get PE reset done completely
				677	* (until BAR restore). With the frozen state, HW drops illegal IO
				678	* or MMIO access, which can incur recrusive frozen PE during PE
				679	* reset. The side effect is that EEH core has to clear the frozen
				680	* state explicitly after BAR restore.
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	681	*/
				682	if (pe->type & EEH_PE_PHB) {
				683	ret = ioda_eeh_phb_reset(hose, option);
				684	} else {
Gavin Shan	d9df1b5	2014-09-30 12:38:58 +1000	[diff] [blame]	685	struct pnv_phb *phb;
				686	s64 rc;
				687
				688	/*
				689	* The frozen PE might be caused by PAPR error injection
				690	* registers, which are expected to be cleared after hitting
				691	* frozen PE as stated in the hardware spec. Unfortunately,
				692	* that's not true on P7IOC. So we have to clear it manually
				693	* to avoid recursive EEH errors during recovery.
				694	*/
				695	phb = hose->private_data;
				696	if (phb->model == PNV_PHB_MODEL_P7IOC &&
				697	(option == EEH_RESET_HOT \|\|
				698	option == EEH_RESET_FUNDAMENTAL)) {
				699	rc = opal_pci_reset(phb->opal_id,
Gavin Shan	d1a85ee	2014-09-30 12:39:05 +1000	[diff] [blame]	700	OPAL_RESET_PHB_ERROR,
Gavin Shan	d9df1b5	2014-09-30 12:38:58 +1000	[diff] [blame]	701	OPAL_ASSERT_RESET);
				702	if (rc != OPAL_SUCCESS) {
				703	pr_warn("%s: Failure %lld clearing "
				704	"error injection registers\n",
				705	__func__, rc);
				706	return -EIO;
				707	}
				708	}
				709
Gavin Shan	5b2e198	2014-02-12 15:24:54 +0800	[diff] [blame]	710	bus = eeh_pe_bus_get(pe);
Gavin Shan	fd5cee7	2014-04-24 18:00:22 +1000	[diff] [blame]	711	if (pci_is_root_bus(bus) \|\|
				712	pci_is_root_bus(bus->parent))
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	713	ret = ioda_eeh_root_reset(hose, option);
				714	else
Gavin Shan	1d9a544	2014-04-24 18:00:13 +1000	[diff] [blame]	715	ret = ioda_eeh_bridge_reset(bus->self, option);
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	716	}
				717
				718	return ret;
				719	}
				720
Gavin Shan	bf90dfe	2013-06-20 13:21:11 +0800	[diff] [blame]	721	/**
Gavin Shan	bb593c0	2014-07-17 14:41:43 +1000	[diff] [blame]	722	* ioda_eeh_get_log - Retrieve error log
				723	* @pe: frozen PE
				724	* @severity: permanent or temporary error
				725	* @drv_log: device driver log
				726	* @len: length of device driver log
				727	*
				728	* Retrieve error log, which contains log from device driver
				729	* and firmware.
				730	*/
Anton Blanchard	e51df2c	2014-08-20 08:55:18 +1000	[diff] [blame]	731	static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
				732	char *drv_log, unsigned long len)
Gavin Shan	bb593c0	2014-07-17 14:41:43 +1000	[diff] [blame]	733	{
				734	pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
				735
				736	return 0;
				737	}
				738
				739	/**
Gavin Shan	bf90dfe	2013-06-20 13:21:11 +0800	[diff] [blame]	740	* ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
				741	* @pe: EEH PE
				742	*
				743	* For particular PE, it might have included PCI bridges. In order
				744	* to make the PE work properly, those PCI bridges should be configured
				745	* correctly. However, we need do nothing on P7IOC since the reset
				746	* function will do everything that should be covered by the function.
				747	*/
				748	static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
				749	{
				750	return 0;
				751	}
				752
Gavin Shan	131c123	2014-09-30 12:38:56 +1000	[diff] [blame]	753	static int ioda_eeh_err_inject(struct eeh_pe *pe, int type, int func,
				754	unsigned long addr, unsigned long mask)
				755	{
				756	struct pci_controller *hose = pe->phb;
				757	struct pnv_phb *phb = hose->private_data;
				758	s64 ret;
				759
				760	/* Sanity check on error type */
				761	if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
				762	type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
				763	pr_warn("%s: Invalid error type %d\n",
				764	__func__, type);
				765	return -ERANGE;
				766	}
				767
				768	if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR \|\|
				769	func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
				770	pr_warn("%s: Invalid error function %d\n",
				771	__func__, func);
				772	return -ERANGE;
				773	}
				774
				775	/* Firmware supports error injection ? */
				776	if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
				777	pr_warn("%s: Firmware doesn't support error injection\n",
				778	__func__);
				779	return -ENXIO;
				780	}
				781
				782	/* Do error injection */
				783	ret = opal_pci_err_inject(phb->opal_id, pe->addr,
				784	type, func, addr, mask);
				785	if (ret != OPAL_SUCCESS) {
				786	pr_warn("%s: Failure %lld injecting error "
				787	"%d-%d to PHB#%x-PE#%x\n",
				788	__func__, ret, type, func,
				789	hose->global_number, pe->addr);
				790	return -EIO;
				791	}
				792
				793	return 0;
				794	}
				795
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	796	static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
				797	{
				798	/* GEM */
Gavin Shan	f18440f	2014-07-17 14:41:42 +1000	[diff] [blame]	799	if (data->gemXfir \|\| data->gemRfir \|\|
				800	data->gemRirqfir \|\| data->gemMask \|\| data->gemRwof)
				801	pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n",
				802	be64_to_cpu(data->gemXfir),
				803	be64_to_cpu(data->gemRfir),
				804	be64_to_cpu(data->gemRirqfir),
				805	be64_to_cpu(data->gemMask),
				806	be64_to_cpu(data->gemRwof));
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	807
				808	/* LEM */
Gavin Shan	f18440f	2014-07-17 14:41:42 +1000	[diff] [blame]	809	if (data->lemFir \|\| data->lemErrMask \|\|
				810	data->lemAction0 \|\| data->lemAction1 \|\| data->lemWof)
				811	pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n",
				812	be64_to_cpu(data->lemFir),
				813	be64_to_cpu(data->lemErrMask),
				814	be64_to_cpu(data->lemAction0),
				815	be64_to_cpu(data->lemAction1),
				816	be64_to_cpu(data->lemWof));
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	817	}
				818
				819	static void ioda_eeh_hub_diag(struct pci_controller *hose)
				820	{
				821	struct pnv_phb *phb = hose->private_data;
Brian W Hart	ca1de5d	2013-12-20 13:06:01 -0600	[diff] [blame]	822	struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	823	long rc;
				824
Brian W Hart	ca1de5d	2013-12-20 13:06:01 -0600	[diff] [blame]	825	rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	826	if (rc != OPAL_SUCCESS) {
Gavin Shan	0dae274	2014-07-17 14:41:41 +1000	[diff] [blame]	827	pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
				828	__func__, phb->hub_id, rc);
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	829	return;
				830	}
				831
				832	switch (data->type) {
				833	case OPAL_P7IOC_DIAG_TYPE_RGC:
				834	pr_info("P7IOC diag-data for RGC\n\n");
				835	ioda_eeh_hub_diag_common(data);
Gavin Shan	f18440f	2014-07-17 14:41:42 +1000	[diff] [blame]	836	if (data->rgc.rgcStatus \|\| data->rgc.rgcLdcp)
				837	pr_info(" RGC: %016llx %016llx\n",
				838	be64_to_cpu(data->rgc.rgcStatus),
				839	be64_to_cpu(data->rgc.rgcLdcp));
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	840	break;
				841	case OPAL_P7IOC_DIAG_TYPE_BI:
				842	pr_info("P7IOC diag-data for BI %s\n\n",
				843	data->bi.biDownbound ? "Downbound" : "Upbound");
				844	ioda_eeh_hub_diag_common(data);
Gavin Shan	f18440f	2014-07-17 14:41:42 +1000	[diff] [blame]	845	if (data->bi.biLdcp0 \|\| data->bi.biLdcp1 \|\|
				846	data->bi.biLdcp2 \|\| data->bi.biFenceStatus)
				847	pr_info(" BI: %016llx %016llx %016llx %016llx\n",
				848	be64_to_cpu(data->bi.biLdcp0),
				849	be64_to_cpu(data->bi.biLdcp1),
				850	be64_to_cpu(data->bi.biLdcp2),
				851	be64_to_cpu(data->bi.biFenceStatus));
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	852	break;
				853	case OPAL_P7IOC_DIAG_TYPE_CI:
Gavin Shan	f18440f	2014-07-17 14:41:42 +1000	[diff] [blame]	854	pr_info("P7IOC diag-data for CI Port %d\n\n",
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	855	data->ci.ciPort);
				856	ioda_eeh_hub_diag_common(data);
Gavin Shan	f18440f	2014-07-17 14:41:42 +1000	[diff] [blame]	857	if (data->ci.ciPortStatus \|\| data->ci.ciPortLdcp)
				858	pr_info(" CI: %016llx %016llx\n",
				859	be64_to_cpu(data->ci.ciPortStatus),
				860	be64_to_cpu(data->ci.ciPortLdcp));
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	861	break;
				862	case OPAL_P7IOC_DIAG_TYPE_MISC:
				863	pr_info("P7IOC diag-data for MISC\n\n");
				864	ioda_eeh_hub_diag_common(data);
				865	break;
				866	case OPAL_P7IOC_DIAG_TYPE_I2C:
				867	pr_info("P7IOC diag-data for I2C\n\n");
				868	ioda_eeh_hub_diag_common(data);
				869	break;
				870	default:
Gavin Shan	0dae274	2014-07-17 14:41:41 +1000	[diff] [blame]	871	pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
				872	__func__, phb->hub_id, data->type);
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	873	}
				874	}
				875
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	876	static int ioda_eeh_get_pe(struct pci_controller *hose,
				877	u16 pe_no, struct eeh_pe **pe)
				878	{
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	879	struct pnv_phb *phb = hose->private_data;
				880	struct pnv_ioda_pe *pnv_pe;
				881	struct eeh_pe *dev_pe;
				882	struct eeh_dev edev;
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	883
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	884	/*
				885	* If PHB supports compound PE, to fetch
				886	* the master PE because slave PE is invisible
				887	* to EEH core.
				888	*/
Gavin Shan	372fb80	2014-09-30 12:39:09 +1000	[diff] [blame]	889	pnv_pe = &phb->ioda.pe_array[pe_no];
				890	if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
				891	pnv_pe = pnv_pe->master;
				892	WARN_ON(!pnv_pe \|\|
				893	!(pnv_pe->flags & PNV_IODA_PE_MASTER));
				894	pe_no = pnv_pe->pe_number;
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	895	}
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	896
				897	/* Find the PE according to PE# */
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	898	memset(&edev, 0, sizeof(struct eeh_dev));
				899	edev.phb = hose;
				900	edev.pe_config_addr = pe_no;
				901	dev_pe = eeh_pe_get(&edev);
				902	if (!dev_pe)
				903	return -EEXIST;
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	904
Gavin Shan	372fb80	2014-09-30 12:39:09 +1000	[diff] [blame]	905	/* Freeze the (compound) PE */
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	906	*pe = dev_pe;
Gavin Shan	372fb80	2014-09-30 12:39:09 +1000	[diff] [blame]	907	if (!(dev_pe->state & EEH_PE_ISOLATED))
Gavin Shan	5828790	2014-07-21 14:42:34 +1000	[diff] [blame]	908	phb->freeze_pe(phb, pe_no);
				909
Gavin Shan	372fb80	2014-09-30 12:39:09 +1000	[diff] [blame]	910	/*
				911	* At this point, we're sure the (compound) PE should
				912	* have been frozen. However, we still need poke until
				913	* hitting the frozen PE on top level.
				914	*/
				915	dev_pe = dev_pe->parent;
				916	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
				917	int ret;
				918	int active_flags = (EEH_STATE_MMIO_ACTIVE \|
				919	EEH_STATE_DMA_ACTIVE);
				920
				921	ret = eeh_ops->get_state(dev_pe, NULL);
				922	if (ret <= 0 \|\| (ret & active_flags) == active_flags) {
				923	dev_pe = dev_pe->parent;
				924	continue;
				925	}
				926
				927	/* Frozen parent PE */
				928	*pe = dev_pe;
				929	if (!(dev_pe->state & EEH_PE_ISOLATED))
				930	phb->freeze_pe(phb, dev_pe->addr);
				931
				932	/* Next one */
				933	dev_pe = dev_pe->parent;
				934	}
				935
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	936	return 0;
				937	}
				938
				939	/**
				940	* ioda_eeh_next_error - Retrieve next error for EEH core to handle
				941	* @pe: The affected PE
				942	*
				943	* The function is expected to be called by EEH core while it gets
				944	* special EEH event (without binding PE). The function calls to
				945	* OPAL APIs for next error to handle. The informational error is
				946	* handled internally by platform. However, the dead IOC, dead PHB,
				947	* fenced PHB and frozen PE should be handled by EEH core eventually.
				948	*/
				949	static int ioda_eeh_next_error(struct eeh_pe **pe)
				950	{
Gavin Shan	7e4e786	2014-01-15 13:16:11 +0800	[diff] [blame]	951	struct pci_controller *hose;
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	952	struct pnv_phb *phb;
Gavin Shan	1ad7a72	2014-05-05 09:29:03 +1000	[diff] [blame]	953	struct eeh_pe phb_pe, parent_pe;
Guo Chao	ddf0322	2014-06-09 16:58:51 +0800	[diff] [blame]	954	__be64 frozen_pe_no;
				955	__be16 err_type, severity;
Gavin Shan	1ad7a72	2014-05-05 09:29:03 +1000	[diff] [blame]	956	int active_flags = (EEH_STATE_MMIO_ACTIVE \| EEH_STATE_DMA_ACTIVE);
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	957	long rc;
Gavin Shan	1ad7a72	2014-05-05 09:29:03 +1000	[diff] [blame]	958	int state, ret = EEH_NEXT_ERR_NONE;
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	959
Gavin Shan	7cb9d93	2013-06-20 18:13:24 +0800	[diff] [blame]	960	/*
				961	* While running here, it's safe to purge the event queue.
				962	* And we should keep the cached OPAL notifier event sychronized
				963	* between the kernel and firmware.
				964	*/
Gavin Shan	5c7a35e	2014-06-04 17:31:52 +1000	[diff] [blame]	965	eeh_remove_event(NULL, false);
Gavin Shan	7cb9d93	2013-06-20 18:13:24 +0800	[diff] [blame]	966	opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	967
Gavin Shan	7e4e786	2014-01-15 13:16:11 +0800	[diff] [blame]	968	list_for_each_entry(hose, &hose_list, list_node) {
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	969	/*
				970	* If the subordinate PCI buses of the PHB has been
Gavin Shan	467f79a	2014-04-24 18:00:08 +1000	[diff] [blame]	971	* removed or is exactly under error recovery, we
				972	* needn't take care of it any more.
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	973	*/
				974	phb = hose->private_data;
Gavin Shan	467f79a	2014-04-24 18:00:08 +1000	[diff] [blame]	975	phb_pe = eeh_phb_pe_get(hose);
				976	if (!phb_pe \|\| (phb_pe->state & EEH_PE_ISOLATED))
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	977	continue;
				978
				979	rc = opal_pci_next_error(phb->opal_id,
				980	&frozen_pe_no, &err_type, &severity);
				981
				982	/* If OPAL API returns error, we needn't proceed */
				983	if (rc != OPAL_SUCCESS) {
Mike Qiu	2021270	2013-08-12 02:15:36 -0400	[diff] [blame]	984	pr_devel("%s: Invalid return value on "
				985	"PHB#%x (0x%lx) from opal_pci_next_error",
				986	__func__, hose->global_number, rc);
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	987	continue;
				988	}
				989
				990	/* If the PHB doesn't have error, stop processing */
Guo Chao	ddf0322	2014-06-09 16:58:51 +0800	[diff] [blame]	991	if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR \|\|
				992	be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
Mike Qiu	2021270	2013-08-12 02:15:36 -0400	[diff] [blame]	993	pr_devel("%s: No error found on PHB#%x\n",
				994	__func__, hose->global_number);
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	995	continue;
				996	}
				997
				998	/*
				999	* Processing the error. We're expecting the error with
				1000	* highest priority reported upon multiple errors on the
				1001	* specific PHB.
				1002	*/
Mike Qiu	2021270	2013-08-12 02:15:36 -0400	[diff] [blame]	1003	pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
Guo Chao	ddf0322	2014-06-09 16:58:51 +0800	[diff] [blame]	1004	__func__, be16_to_cpu(err_type), be16_to_cpu(severity),
				1005	be64_to_cpu(frozen_pe_no), hose->global_number);
				1006	switch (be16_to_cpu(err_type)) {
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	1007	case OPAL_EEH_IOC_ERROR:
Guo Chao	ddf0322	2014-06-09 16:58:51 +0800	[diff] [blame]	1008	if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
Gavin Shan	56ca4fd	2013-06-27 13:46:46 +0800	[diff] [blame]	1009	pr_err("EEH: dead IOC detected\n");
Gavin Shan	7e4e786	2014-01-15 13:16:11 +0800	[diff] [blame]	1010	ret = EEH_NEXT_ERR_DEAD_IOC;
Guo Chao	ddf0322	2014-06-09 16:58:51 +0800	[diff] [blame]	1011	} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
Gavin Shan	56ca4fd	2013-06-27 13:46:46 +0800	[diff] [blame]	1012	pr_info("EEH: IOC informative error "
				1013	"detected\n");
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	1014	ioda_eeh_hub_diag(hose);
Gavin Shan	7e4e786	2014-01-15 13:16:11 +0800	[diff] [blame]	1015	ret = EEH_NEXT_ERR_NONE;
Gavin Shan	56ca4fd	2013-06-27 13:46:46 +0800	[diff] [blame]	1016	}
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	1017
				1018	break;
				1019	case OPAL_EEH_PHB_ERROR:
Guo Chao	ddf0322	2014-06-09 16:58:51 +0800	[diff] [blame]	1020	if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
Gavin Shan	467f79a	2014-04-24 18:00:08 +1000	[diff] [blame]	1021	*pe = phb_pe;
Gavin Shan	357b2f3	2014-06-11 18:26:44 +1000	[diff] [blame]	1022	pr_err("EEH: dead PHB#%x detected, "
				1023	"location: %s\n",
				1024	hose->global_number,
				1025	eeh_pe_loc_get(phb_pe));
Gavin Shan	7e4e786	2014-01-15 13:16:11 +0800	[diff] [blame]	1026	ret = EEH_NEXT_ERR_DEAD_PHB;
Guo Chao	ddf0322	2014-06-09 16:58:51 +0800	[diff] [blame]	1027	} else if (be16_to_cpu(severity) ==
				1028	OPAL_EEH_SEV_PHB_FENCED) {
Gavin Shan	467f79a	2014-04-24 18:00:08 +1000	[diff] [blame]	1029	*pe = phb_pe;
Gavin Shan	357b2f3	2014-06-11 18:26:44 +1000	[diff] [blame]	1030	pr_err("EEH: Fenced PHB#%x detected, "
				1031	"location: %s\n",
				1032	hose->global_number,
				1033	eeh_pe_loc_get(phb_pe));
Gavin Shan	7e4e786	2014-01-15 13:16:11 +0800	[diff] [blame]	1034	ret = EEH_NEXT_ERR_FENCED_PHB;
Guo Chao	ddf0322	2014-06-09 16:58:51 +0800	[diff] [blame]	1035	} else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
Gavin Shan	56ca4fd	2013-06-27 13:46:46 +0800	[diff] [blame]	1036	pr_info("EEH: PHB#%x informative error "
Gavin Shan	357b2f3	2014-06-11 18:26:44 +1000	[diff] [blame]	1037	"detected, location: %s\n",
				1038	hose->global_number,
				1039	eeh_pe_loc_get(phb_pe));
Gavin Shan	bb593c0	2014-07-17 14:41:43 +1000	[diff] [blame]	1040	ioda_eeh_phb_diag(phb_pe);
				1041	pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
Gavin Shan	7e4e786	2014-01-15 13:16:11 +0800	[diff] [blame]	1042	ret = EEH_NEXT_ERR_NONE;
Gavin Shan	56ca4fd	2013-06-27 13:46:46 +0800	[diff] [blame]	1043	}
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	1044
				1045	break;
				1046	case OPAL_EEH_PE_ERROR:
Gavin Shan	cb5b242	2014-01-15 13:16:13 +0800	[diff] [blame]	1047	/*
Gavin Shan	71b540a	2014-05-05 09:29:04 +1000	[diff] [blame]	1048	* If we can't find the corresponding PE, we
				1049	* just try to unfreeze.
Gavin Shan	cb5b242	2014-01-15 13:16:13 +0800	[diff] [blame]	1050	*/
Guo Chao	ddf0322	2014-06-09 16:58:51 +0800	[diff] [blame]	1051	if (ioda_eeh_get_pe(hose,
Gavin Shan	71b540a	2014-05-05 09:29:04 +1000	[diff] [blame]	1052	be64_to_cpu(frozen_pe_no), pe)) {
				1053	/* Try best to clear it */
				1054	pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
				1055	hose->global_number, frozen_pe_no);
Gavin Shan	357b2f3	2014-06-11 18:26:44 +1000	[diff] [blame]	1056	pr_info("EEH: PHB location: %s\n",
				1057	eeh_pe_loc_get(phb_pe));
Gavin Shan	71b540a	2014-05-05 09:29:04 +1000	[diff] [blame]	1058	opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
				1059	OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
				1060	ret = EEH_NEXT_ERR_NONE;
Gavin Shan	05ec424	2014-06-10 11:41:55 +1000	[diff] [blame]	1061	} else if ((*pe)->state & EEH_PE_ISOLATED \|\|
				1062	eeh_pe_passed(*pe)) {
Gavin Shan	6379655	2014-04-24 18:00:20 +1000	[diff] [blame]	1063	ret = EEH_NEXT_ERR_NONE;
Gavin Shan	cb5b242	2014-01-15 13:16:13 +0800	[diff] [blame]	1064	} else {
				1065	pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
				1066	(pe)->addr, (pe)->phb->global_number);
Gavin Shan	357b2f3	2014-06-11 18:26:44 +1000	[diff] [blame]	1067	pr_err("EEH: PE location: %s, PHB location: %s\n",
				1068	eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe));
Gavin Shan	cb5b242	2014-01-15 13:16:13 +0800	[diff] [blame]	1069	ret = EEH_NEXT_ERR_FROZEN_PE;
				1070	}
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	1071
Gavin Shan	7e4e786	2014-01-15 13:16:11 +0800	[diff] [blame]	1072	break;
				1073	default:
				1074	pr_warn("%s: Unexpected error type %d\n",
Guo Chao	ddf0322	2014-06-09 16:58:51 +0800	[diff] [blame]	1075	__func__, be16_to_cpu(err_type));
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	1076	}
Gavin Shan	7e4e786	2014-01-15 13:16:11 +0800	[diff] [blame]	1077
				1078	/*
Gavin Shan	9471660	2014-02-25 15:28:37 +0800	[diff] [blame]	1079	* EEH core will try recover from fenced PHB or
				1080	* frozen PE. In the time for frozen PE, EEH core
				1081	* enable IO path for that before collecting logs,
				1082	* but it ruins the site. So we have to dump the
				1083	* log in advance here.
				1084	*/
				1085	if ((ret == EEH_NEXT_ERR_FROZEN_PE \|\|
				1086	ret == EEH_NEXT_ERR_FENCED_PHB) &&
				1087	!((*pe)->state & EEH_PE_ISOLATED)) {
				1088	eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
Gavin Shan	bb593c0	2014-07-17 14:41:43 +1000	[diff] [blame]	1089	ioda_eeh_phb_diag(*pe);
Gavin Shan	9471660	2014-02-25 15:28:37 +0800	[diff] [blame]	1090	}
				1091
				1092	/*
Gavin Shan	1ad7a72	2014-05-05 09:29:03 +1000	[diff] [blame]	1093	* We probably have the frozen parent PE out there and
				1094	* we need have to handle frozen parent PE firstly.
				1095	*/
				1096	if (ret == EEH_NEXT_ERR_FROZEN_PE) {
				1097	parent_pe = (*pe)->parent;
				1098	while (parent_pe) {
				1099	/* Hit the ceiling ? */
				1100	if (parent_pe->type & EEH_PE_PHB)
				1101	break;
				1102
				1103	/* Frozen parent PE ? */
				1104	state = ioda_eeh_get_state(parent_pe);
				1105	if (state > 0 &&
				1106	(state & active_flags) != active_flags)
				1107	*pe = parent_pe;
				1108
				1109	/* Next parent level */
				1110	parent_pe = parent_pe->parent;
				1111	}
				1112
				1113	/* We possibly migrate to another PE */
				1114	eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
				1115	}
				1116
				1117	/*
Gavin Shan	7e4e786	2014-01-15 13:16:11 +0800	[diff] [blame]	1118	* If we have no errors on the specific PHB or only
				1119	* informative error there, we continue poking it.
				1120	* Otherwise, we need actions to be taken by upper
				1121	* layer.
				1122	*/
				1123	if (ret > EEH_NEXT_ERR_INF)
				1124	break;
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	1125	}
				1126
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	1127	return ret;
				1128	}
				1129
Gavin Shan	8747f36	2013-06-20 13:21:06 +0800	[diff] [blame]	1130	struct pnv_eeh_ops ioda_eeh_ops = {
Gavin Shan	73370c6	2013-06-20 13:21:07 +0800	[diff] [blame]	1131	.post_init = ioda_eeh_post_init,
Gavin Shan	eb00598	2013-06-20 13:21:08 +0800	[diff] [blame]	1132	.set_option = ioda_eeh_set_option,
Gavin Shan	8c41a7f	2013-06-20 13:21:09 +0800	[diff] [blame]	1133	.get_state = ioda_eeh_get_state,
Gavin Shan	9d5cab0	2013-06-20 13:21:10 +0800	[diff] [blame]	1134	.reset = ioda_eeh_reset,
Gavin Shan	bb593c0	2014-07-17 14:41:43 +1000	[diff] [blame]	1135	.get_log = ioda_eeh_get_log,
Gavin Shan	bf90dfe	2013-06-20 13:21:11 +0800	[diff] [blame]	1136	.configure_bridge = ioda_eeh_configure_bridge,
Gavin Shan	131c123	2014-09-30 12:38:56 +1000	[diff] [blame]	1137	.err_inject = ioda_eeh_err_inject,
Gavin Shan	70f942d	2013-06-20 13:21:12 +0800	[diff] [blame]	1138	.next_error = ioda_eeh_next_error
Gavin Shan	8747f36	2013-06-20 13:21:06 +0800	[diff] [blame]	1139	};