diff mbox

[v10,16/17] core/opal: Use PCI slot and new APIs for slot states

Message ID 1462251882-12762-17-git-send-email-gwshan@linux.vnet.ibm.com
State Changes Requested
Headers show

Commit Message

Gavin Shan May 3, 2016, 5:04 a.m. UTC
The various reset requests are completed by PHB's callbacks. All
of them (except reset on IODA table or error injection) are covered
by PCI slot. opal_pci_poll() faces similar situation.

This reimplements opal_pci_reset() and opal_pci_poll() based on
the callbacks provided by PCI slot instead of PHB. Also, couple of
new APIs are introduced based on the callbacks in PCI slot as below:

   * opal_pci_get_presence_state(): Check if there is adapter presented
     behind the specified PHB or PCI slot.
   * opal_pci_get_power_state(): Returns power supply state (on or off)
     on the specified PHB or PCI slot.
   * opal_pci_set_power_state(): Sets power supply state (on or off)
     on the specified PHB or PCI slot.
   * opal_pci_poll2(): Poll for PCI slot state. It accepts two arguments
     while opal_pci_poll() accepts only one.

Eventually, the definition of unused PHB's callbacks are removed.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 core/pci-opal.c    | 174 +++++++++++++++++++++++++++++++++++++++++++++--------
 include/opal-api.h |   6 +-
 include/pci.h      |  72 ----------------------
 3 files changed, 153 insertions(+), 99 deletions(-)

Comments

Alistair Popple May 5, 2016, 9:23 a.m. UTC | #1
On Tue, 3 May 2016 15:04:41 Gavin Shan wrote:
> The various reset requests are completed by PHB's callbacks. All
> of them (except reset on IODA table or error injection) are covered
> by PCI slot. opal_pci_poll() faces similar situation.
> 
> This reimplements opal_pci_reset() and opal_pci_poll() based on
> the callbacks provided by PCI slot instead of PHB. Also, couple of
> new APIs are introduced based on the callbacks in PCI slot as below:
> 
>    * opal_pci_get_presence_state(): Check if there is adapter presented
>      behind the specified PHB or PCI slot.
>    * opal_pci_get_power_state(): Returns power supply state (on or off)
>      on the specified PHB or PCI slot.
>    * opal_pci_set_power_state(): Sets power supply state (on or off)
>      on the specified PHB or PCI slot.
>    * opal_pci_poll2(): Poll for PCI slot state. It accepts two arguments
>      while opal_pci_poll() accepts only one.
> 
> Eventually, the definition of unused PHB's callbacks are removed.
> 
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  core/pci-opal.c    | 174 +++++++++++++++++++++++++++++++++++++++++++++--------
>  include/opal-api.h |   6 +-
>  include/pci.h      |  72 ----------------------
>  3 files changed, 153 insertions(+), 99 deletions(-)
> 
> diff --git a/core/pci-opal.c b/core/pci-opal.c
> index 40eda39..f798c52 100644
> --- a/core/pci-opal.c
> +++ b/core/pci-opal.c
> @@ -18,6 +18,7 @@
>  #include <opal-api.h>
>  #include <pci.h>
>  #include <pci-cfg.h>
> +#include <pci-slot.h>
>  #include <timebase.h>
>  
>  #define OPAL_PCICFG_ACCESS(op, cb, type)	\
> @@ -461,16 +462,15 @@ static int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id,
>  }
>  opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW_REAL, opal_pci_map_pe_dma_window_real, 5);
>  
> -static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
> +static int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope,
>                                uint8_t assert_state)
>  {
> -	struct phb *phb = pci_get_phb(phb_id);
> +	struct pci_slot *slot = pci_slot_find(id);
> +	struct phb *phb = slot ? slot->phb : NULL;
>  	int64_t rc = OPAL_SUCCESS;
>  
> -	if (!phb)
> +	if (!slot || !phb)
>  		return OPAL_PARAMETER;
> -	if (!phb->ops)
> -		return OPAL_UNSUPPORTED;
>  	if (assert_state != OPAL_ASSERT_RESET &&
>  	    assert_state != OPAL_DEASSERT_RESET)
>  		return OPAL_PARAMETER;
> @@ -479,18 +479,22 @@ static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
>  
>  	switch(reset_scope) {
>  	case OPAL_RESET_PHB_COMPLETE:
> -		if (!phb->ops->complete_reset) {
> +		/* Complete reset is applicable to PHB slot only */
> +		if (!slot->ops.creset || slot->pd) {
>  			rc = OPAL_UNSUPPORTED;
>  			break;
>  		}
>  
> -		rc = phb->ops->complete_reset(phb, assert_state);
> +		if (assert_state != OPAL_ASSERT_RESET)
> +			break;
> +
> +		rc = slot->ops.creset(slot);
>  		if (rc < 0)
> -			prerror("PHB#%d: Failure on complete reset, rc=%lld\n",
> -				phb->opal_id, rc);
> +			prlog(PR_ERR, "SLOT-%016llx: Error %lld on complete reset\n",
> +			      slot->id, rc);
>  		break;
>  	case OPAL_RESET_PCI_FUNDAMENTAL:
> -		if (!phb->ops->fundamental_reset) {
> +		if (!slot->ops.freset) {
>  			rc = OPAL_UNSUPPORTED;
>  			break;
>  		}
> @@ -499,13 +503,13 @@ static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
>  		if (assert_state != OPAL_ASSERT_RESET)
>  			break;
>  
> -		rc = phb->ops->fundamental_reset(phb);
> +		rc = slot->ops.freset(slot);
>  		if (rc < 0)
> -			prerror("PHB#%d: Failure on fundamental reset, rc=%lld\n",
> -				phb->opal_id, rc);
> +			prlog(PR_ERR, "SLOT-%016llx: Error %lld on fundamental reset\n",
> +			      slot->id, rc);
>  		break;
>  	case OPAL_RESET_PCI_HOT:
> -		if (!phb->ops->hot_reset) {
> +		if (!slot->ops.hreset) {
>  			rc = OPAL_UNSUPPORTED;
>  			break;
>  		}
> @@ -514,22 +518,34 @@ static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
>  		if (assert_state != OPAL_ASSERT_RESET)
>  			break;
>  
> -		rc = phb->ops->hot_reset(phb);
> +		rc = slot->ops.hreset(slot);
>  		if (rc < 0)
> -			prerror("PHB#%d: Failure on hot reset, rc=%lld\n",
> -				phb->opal_id, rc);
> +			prlog(PR_ERR, "SLOT-%016llx: Error %lld on hot reset\n",
> +			      slot->id, rc);
>  		break;
>  	case OPAL_RESET_PCI_IODA_TABLE:
> +		/* It's allowed on PHB slot only */
> +		if (slot->pd || !phb->ops || !phb->ops->ioda_reset) {
> +			rc = OPAL_UNSUPPORTED;
> +			break;
> +		}
> +
>  		if (assert_state != OPAL_ASSERT_RESET)
>  			break;
> -		if (phb->ops->ioda_reset)
> -			phb->ops->ioda_reset(phb, true);
> +
> +		rc = phb->ops->ioda_reset(phb, true);
>  		break;
>  	case OPAL_RESET_PHB_ERROR:
> +		/* It's allowed on PHB slot only */
> +		if (slot->pd || !phb->ops || !phb->ops->papr_errinjct_reset) {
> +			rc = OPAL_UNSUPPORTED;
> +			break;
> +		}
> +
>  		if (assert_state != OPAL_ASSERT_RESET)
>  			break;
> -		if (phb->ops->papr_errinjct_reset)
> -			phb->ops->papr_errinjct_reset(phb);
> +
> +		rc = phb->ops->papr_errinjct_reset(phb);
>  		break;
>  	default:
>  		rc = OPAL_UNSUPPORTED;
> @@ -560,18 +576,19 @@ static int64_t opal_pci_reinit(uint64_t phb_id,
>  }
>  opal_call(OPAL_PCI_REINIT, opal_pci_reinit, 3);
>  
> -static int64_t opal_pci_poll(uint64_t phb_id)
> +static int64_t opal_pci_poll(uint64_t id)
>  {
> -	struct phb *phb = pci_get_phb(phb_id);
> +	struct pci_slot *slot = pci_slot_find(id);
> +	struct phb *phb = slot ? slot->phb : NULL;
>  	int64_t rc;
>  
> -	if (!phb)
> +	if (!slot || !phb)
>  		return OPAL_PARAMETER;
> -	if (!phb->ops || !phb->ops->poll)
> +	if (!slot->ops.poll)
>  		return OPAL_UNSUPPORTED;
>  
>  	phb_lock(phb);
> -	rc = phb->ops->poll(phb);
> +	rc = slot->ops.poll(slot, NULL);
>  	phb_unlock(phb);
>  
>  	/* Return milliseconds for caller to sleep: round up */
> @@ -585,6 +602,111 @@ static int64_t opal_pci_poll(uint64_t phb_id)
>  }
>  opal_call(OPAL_PCI_POLL, opal_pci_poll, 1);
>  
> +static int64_t opal_pci_get_presence_state(uint64_t id, uint64_t data)
> +{
> +	struct pci_slot *slot = pci_slot_find(id);
> +	struct phb *phb = slot ? slot->phb : NULL;
> +	uint8_t *presence = (uint8_t *)data;
> +	int64_t rc = OPAL_SUCCESS;
> +
> +	if (!slot || !phb)
> +		return OPAL_PARAMETER;
> +	if (!slot->ops.get_presence_status)
> +		return OPAL_UNSUPPORTED;
> +
> +	phb_lock(phb);
> +	rc = slot->ops.get_presence_status(slot, presence);
> +	phb_unlock(phb);
> +
> +	if (rc > 0) {
> +		rc = tb_to_msecs(rc);
> +		if (rc == 0)
> +			rc = 1;
> +	}
> +
> +	return rc;
> +}
> +opal_call(OPAL_PCI_GET_PRESENCE_STATE, opal_pci_get_presence_state, 2);
> +
> +static int64_t opal_pci_get_power_state(uint64_t id, uint64_t data)
> +{
> +	struct pci_slot *slot = pci_slot_find(id);
> +	struct phb *phb = slot ? slot->phb : NULL;
> +	uint8_t *power_state = (uint8_t *)data;
> +	int64_t rc = OPAL_SUCCESS;
> +
> +	if (!slot || !phb)
> +		return OPAL_PARAMETER;
> +	if (!slot->ops.get_power_status)
> +		return OPAL_UNSUPPORTED;
> +
> +	phb_lock(phb);
> +	rc = slot->ops.get_power_status(slot, power_state);
> +	phb_unlock(phb);
> +
> +	if (rc > 0) {
> +		rc = tb_to_msecs(rc);
> +		if (rc == 0)
> +			rc = 1;
> +	}
> +
> +	return rc;
> +}
> +opal_call(OPAL_PCI_GET_POWER_STATE, opal_pci_get_power_state, 2);
> +
> +static int64_t opal_pci_set_power_state(uint64_t id, uint64_t data)
> +{
> +	struct pci_slot *slot = pci_slot_find(id);
> +	struct phb *phb = slot ? slot->phb : NULL;
> +	uint8_t *power_state = (uint8_t *)data;
> +	int64_t rc = OPAL_SUCCESS;
> +
> +	if (!slot || !phb)
> +		return OPAL_PARAMETER;
> +	if (!slot->ops.get_power_status)
> +		return OPAL_UNSUPPORTED;
> +
> +	phb_lock(phb);
> +	rc = slot->ops.set_power_status(slot, *power_state);

As we discussed offline there needs to be much better separation of the
platform specific parts and the generic parts of setting the power state.

At the moment the calls to update device tree, etc. are all in the platform
specific code but they should really appear here as it will apply to all platforms
that eventually support hotplug. The platform specific code should only deal with
platform dependent functions such as actually removing power from the slots which
is completely different on say an FSP system versus a non-FSP system.

> +	phb_unlock(phb);
> +
> +	if (rc > 0) {
> +		rc = tb_to_msecs(rc);
> +		if (rc == 0)
> +			rc = 1;
> +	}
> +
> +	return rc;
> +}
> +opal_call(OPAL_PCI_SET_POWER_STATE, opal_pci_set_power_state, 2);
> +
> +static int64_t opal_pci_poll2(uint64_t id, uint64_t data)
> +{
> +	struct pci_slot *slot = pci_slot_find(id);
> +	struct phb *phb = slot ? slot->phb : NULL;
> +	uint8_t *state = (uint8_t *)data;
> +	int64_t rc;
> +
> +	if (!slot || !phb)
> +		return OPAL_PARAMETER;
> +	if (!slot->ops.poll)
> +		return OPAL_UNSUPPORTED;
> +
> +	phb_lock(phb);
> +	rc = slot->ops.poll(slot, state);
> +	phb_unlock(phb);
> +
> +	/* Return milliseconds for caller to sleep: round up */
> +	if (rc > 0) {
> +		rc = tb_to_msecs(rc);
> +		if (rc == 0)
> +			rc = 1;
> +	}
> +
> +	return rc;
> +}
> +opal_call(OPAL_PCI_POLL2, opal_pci_poll2, 2);
> +
>  static int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id,
>  					   uint64_t tce_mem_addr,
>  					   uint64_t tce_mem_size)
> diff --git a/include/opal-api.h b/include/opal-api.h
> index 50de6d1..d7b5a1e 100644
> --- a/include/opal-api.h
> +++ b/include/opal-api.h
> @@ -164,7 +164,11 @@
>  #define OPAL_CEC_REBOOT2			116
>  #define OPAL_CONSOLE_FLUSH			117
>  #define OPAL_GET_DEVICE_TREE			118
> -#define OPAL_LAST				118
> +#define OPAL_PCI_GET_PRESENCE_STATE		119
> +#define OPAL_PCI_GET_POWER_STATE		120
> +#define OPAL_PCI_SET_POWER_STATE		121
> +#define OPAL_PCI_POLL2				122
> +#define OPAL_LAST				122
>  
>  /* Device tree flags */
>  
> diff --git a/include/pci.h b/include/pci.h
> index 1326a84..048ef0d 100644
> --- a/include/pci.h
> +++ b/include/pci.h
> @@ -293,78 +293,6 @@ struct phb_ops {
>  	 */
>  	int64_t (*pci_msi_eoi)(struct phb *phb, uint32_t hwirq);
>  
> -	/*
> -	 * Slot control
> -	 */
> -
> -	/* presence_detect - Check for a present device
> -	 *
> -	 * Immediate return of:
> -	 *
> -	 * OPAL_SHPC_DEV_NOT_PRESENT = 0,
> -	 * OPAL_SHPC_DEV_PRESENT = 1
> -	 *
> -	 * or a negative OPAL error code
> -	 */
> -	int64_t (*presence_detect)(struct phb *phb);
> -
> -	/* link_state - Check link state
> -	 *
> -	 * Immediate return of:
> -	 *
> -	 * OPAL_SHPC_LINK_DOWN = 0,
> -	 * OPAL_SHPC_LINK_UP_x1 = 1,
> -	 * OPAL_SHPC_LINK_UP_x2 = 2,
> -	 * OPAL_SHPC_LINK_UP_x4 = 4,
> -	 * OPAL_SHPC_LINK_UP_x8 = 8,
> -	 * OPAL_SHPC_LINK_UP_x16 = 16,
> -	 * OPAL_SHPC_LINK_UP_x32 = 32
> -	 *
> -	 * or a negative OPAL error code
> -	 */
> -	int64_t (*link_state)(struct phb *phb);
> -
> -	/* power_state - Check slot power state
> -	 *
> -	 * Immediate return of:
> -	 *
> -	 * OPAL_SLOT_POWER_OFF = 0,
> -	 * OPAL_SLOT_POWER_ON = 1,
> -	 *
> -	 * or a negative OPAL error code
> -	 */
> -	int64_t (*power_state)(struct phb *phb);
> -
> -	/* slot_power_off - Start slot power off sequence
> -	 *
> -	 * Asynchronous function, returns a positive delay
> -	 * or a negative error code
> -	 */
> -	int64_t (*slot_power_off)(struct phb *phb);
> -
> -	/* slot_power_on - Start slot power on sequence
> -	 *
> -	 * Asynchronous function, returns a positive delay
> -	 * or a negative error code.
> -	 */
> -	int64_t (*slot_power_on)(struct phb *phb);
> -
> -	/* PHB power off and on after complete init */
> -	int64_t (*complete_reset)(struct phb *phb, uint8_t assert);
> -
> -	/* hot_reset - Hot Reset sequence */
> -	int64_t (*hot_reset)(struct phb *phb);
> -
> -	/* Fundamental reset */
> -	int64_t (*fundamental_reset)(struct phb *phb);
> -
> -	/* poll - Poll and advance asynchronous operations
> -	 *
> -	 * Returns a positive delay, 0 for success or a
> -	 * negative OPAL error code
> -	 */
> -	int64_t (*poll)(struct phb *phb);
> -
>  	/* Put phb in capi mode or pcie mode */
>  	int64_t (*set_capi_mode)(struct phb *phb, uint64_t mode, uint64_t pe_number);
>  
>
Andrew Donnellan May 6, 2016, 1:03 a.m. UTC | #2
On 03/05/16 15:04, Gavin Shan wrote:
> The various reset requests are completed by PHB's callbacks. All
> of them (except reset on IODA table or error injection) are covered
> by PCI slot. opal_pci_poll() faces similar situation.
>
> This reimplements opal_pci_reset() and opal_pci_poll() based on
> the callbacks provided by PCI slot instead of PHB. Also, couple of
> new APIs are introduced based on the callbacks in PCI slot as below:
>
>     * opal_pci_get_presence_state(): Check if there is adapter presented
>       behind the specified PHB or PCI slot.
>     * opal_pci_get_power_state(): Returns power supply state (on or off)
>       on the specified PHB or PCI slot.
>     * opal_pci_set_power_state(): Sets power supply state (on or off)
>       on the specified PHB or PCI slot.

I notice that the name of the OPAL calls are {get,set}_power_state(), 
while the callbacks are called {get,set}_power_status(). IMHO they 
should be made consistent.
Gavin Shan May 6, 2016, 1:15 a.m. UTC | #3
On Fri, May 06, 2016 at 11:03:35AM +1000, Andrew Donnellan wrote:
>On 03/05/16 15:04, Gavin Shan wrote:
>>The various reset requests are completed by PHB's callbacks. All
>>of them (except reset on IODA table or error injection) are covered
>>by PCI slot. opal_pci_poll() faces similar situation.
>>
>>This reimplements opal_pci_reset() and opal_pci_poll() based on
>>the callbacks provided by PCI slot instead of PHB. Also, couple of
>>new APIs are introduced based on the callbacks in PCI slot as below:
>>
>>    * opal_pci_get_presence_state(): Check if there is adapter presented
>>      behind the specified PHB or PCI slot.
>>    * opal_pci_get_power_state(): Returns power supply state (on or off)
>>      on the specified PHB or PCI slot.
>>    * opal_pci_set_power_state(): Sets power supply state (on or off)
>>      on the specified PHB or PCI slot.
>
>I notice that the name of the OPAL calls are {get,set}_power_state(), while
>the callbacks are called {get,set}_power_status(). IMHO they should be made
>consistent.
>

Thanks for review. Yes, I will make all of them to have suffix _state().

	get_power_state()
	set_power_state()
	get_presence_state()

Thanks,
Gavin

>-- 
>Andrew Donnellan              OzLabs, ADL Canberra
>andrew.donnellan@au1.ibm.com  IBM Australia Limited
Gavin Shan May 6, 2016, 1:28 a.m. UTC | #4
On Thu, May 05, 2016 at 07:23:03PM +1000, Alistair Popple wrote:
>On Tue, 3 May 2016 15:04:41 Gavin Shan wrote:
>> The various reset requests are completed by PHB's callbacks. All
>> of them (except reset on IODA table or error injection) are covered
>> by PCI slot. opal_pci_poll() faces similar situation.
>> 
>> This reimplements opal_pci_reset() and opal_pci_poll() based on
>> the callbacks provided by PCI slot instead of PHB. Also, couple of
>> new APIs are introduced based on the callbacks in PCI slot as below:
>> 
>>    * opal_pci_get_presence_state(): Check if there is adapter presented
>>      behind the specified PHB or PCI slot.
>>    * opal_pci_get_power_state(): Returns power supply state (on or off)
>>      on the specified PHB or PCI slot.
>>    * opal_pci_set_power_state(): Sets power supply state (on or off)
>>      on the specified PHB or PCI slot.
>>    * opal_pci_poll2(): Poll for PCI slot state. It accepts two arguments
>>      while opal_pci_poll() accepts only one.
>> 
>> Eventually, the definition of unused PHB's callbacks are removed.
>> 
>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> ---
>>  core/pci-opal.c    | 174 +++++++++++++++++++++++++++++++++++++++++++++--------
>>  include/opal-api.h |   6 +-
>>  include/pci.h      |  72 ----------------------
>>  3 files changed, 153 insertions(+), 99 deletions(-)
>> 
>> diff --git a/core/pci-opal.c b/core/pci-opal.c
>> index 40eda39..f798c52 100644
>> --- a/core/pci-opal.c
>> +++ b/core/pci-opal.c
>> @@ -18,6 +18,7 @@
>>  #include <opal-api.h>
>>  #include <pci.h>
>>  #include <pci-cfg.h>
>> +#include <pci-slot.h>
>>  #include <timebase.h>
>>  
>>  #define OPAL_PCICFG_ACCESS(op, cb, type)	\
>> @@ -461,16 +462,15 @@ static int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id,
>>  }
>>  opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW_REAL, opal_pci_map_pe_dma_window_real, 5);
>>  
>> -static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
>> +static int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope,
>>                                uint8_t assert_state)
>>  {
>> -	struct phb *phb = pci_get_phb(phb_id);
>> +	struct pci_slot *slot = pci_slot_find(id);
>> +	struct phb *phb = slot ? slot->phb : NULL;
>>  	int64_t rc = OPAL_SUCCESS;
>>  
>> -	if (!phb)
>> +	if (!slot || !phb)
>>  		return OPAL_PARAMETER;
>> -	if (!phb->ops)
>> -		return OPAL_UNSUPPORTED;
>>  	if (assert_state != OPAL_ASSERT_RESET &&
>>  	    assert_state != OPAL_DEASSERT_RESET)
>>  		return OPAL_PARAMETER;
>> @@ -479,18 +479,22 @@ static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
>>  
>>  	switch(reset_scope) {
>>  	case OPAL_RESET_PHB_COMPLETE:
>> -		if (!phb->ops->complete_reset) {
>> +		/* Complete reset is applicable to PHB slot only */
>> +		if (!slot->ops.creset || slot->pd) {
>>  			rc = OPAL_UNSUPPORTED;
>>  			break;
>>  		}
>>  
>> -		rc = phb->ops->complete_reset(phb, assert_state);
>> +		if (assert_state != OPAL_ASSERT_RESET)
>> +			break;
>> +
>> +		rc = slot->ops.creset(slot);
>>  		if (rc < 0)
>> -			prerror("PHB#%d: Failure on complete reset, rc=%lld\n",
>> -				phb->opal_id, rc);
>> +			prlog(PR_ERR, "SLOT-%016llx: Error %lld on complete reset\n",
>> +			      slot->id, rc);
>>  		break;
>>  	case OPAL_RESET_PCI_FUNDAMENTAL:
>> -		if (!phb->ops->fundamental_reset) {
>> +		if (!slot->ops.freset) {
>>  			rc = OPAL_UNSUPPORTED;
>>  			break;
>>  		}
>> @@ -499,13 +503,13 @@ static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
>>  		if (assert_state != OPAL_ASSERT_RESET)
>>  			break;
>>  
>> -		rc = phb->ops->fundamental_reset(phb);
>> +		rc = slot->ops.freset(slot);
>>  		if (rc < 0)
>> -			prerror("PHB#%d: Failure on fundamental reset, rc=%lld\n",
>> -				phb->opal_id, rc);
>> +			prlog(PR_ERR, "SLOT-%016llx: Error %lld on fundamental reset\n",
>> +			      slot->id, rc);
>>  		break;
>>  	case OPAL_RESET_PCI_HOT:
>> -		if (!phb->ops->hot_reset) {
>> +		if (!slot->ops.hreset) {
>>  			rc = OPAL_UNSUPPORTED;
>>  			break;
>>  		}
>> @@ -514,22 +518,34 @@ static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
>>  		if (assert_state != OPAL_ASSERT_RESET)
>>  			break;
>>  
>> -		rc = phb->ops->hot_reset(phb);
>> +		rc = slot->ops.hreset(slot);
>>  		if (rc < 0)
>> -			prerror("PHB#%d: Failure on hot reset, rc=%lld\n",
>> -				phb->opal_id, rc);
>> +			prlog(PR_ERR, "SLOT-%016llx: Error %lld on hot reset\n",
>> +			      slot->id, rc);
>>  		break;
>>  	case OPAL_RESET_PCI_IODA_TABLE:
>> +		/* It's allowed on PHB slot only */
>> +		if (slot->pd || !phb->ops || !phb->ops->ioda_reset) {
>> +			rc = OPAL_UNSUPPORTED;
>> +			break;
>> +		}
>> +
>>  		if (assert_state != OPAL_ASSERT_RESET)
>>  			break;
>> -		if (phb->ops->ioda_reset)
>> -			phb->ops->ioda_reset(phb, true);
>> +
>> +		rc = phb->ops->ioda_reset(phb, true);
>>  		break;
>>  	case OPAL_RESET_PHB_ERROR:
>> +		/* It's allowed on PHB slot only */
>> +		if (slot->pd || !phb->ops || !phb->ops->papr_errinjct_reset) {
>> +			rc = OPAL_UNSUPPORTED;
>> +			break;
>> +		}
>> +
>>  		if (assert_state != OPAL_ASSERT_RESET)
>>  			break;
>> -		if (phb->ops->papr_errinjct_reset)
>> -			phb->ops->papr_errinjct_reset(phb);
>> +
>> +		rc = phb->ops->papr_errinjct_reset(phb);
>>  		break;
>>  	default:
>>  		rc = OPAL_UNSUPPORTED;
>> @@ -560,18 +576,19 @@ static int64_t opal_pci_reinit(uint64_t phb_id,
>>  }
>>  opal_call(OPAL_PCI_REINIT, opal_pci_reinit, 3);
>>  
>> -static int64_t opal_pci_poll(uint64_t phb_id)
>> +static int64_t opal_pci_poll(uint64_t id)
>>  {
>> -	struct phb *phb = pci_get_phb(phb_id);
>> +	struct pci_slot *slot = pci_slot_find(id);
>> +	struct phb *phb = slot ? slot->phb : NULL;
>>  	int64_t rc;
>>  
>> -	if (!phb)
>> +	if (!slot || !phb)
>>  		return OPAL_PARAMETER;
>> -	if (!phb->ops || !phb->ops->poll)
>> +	if (!slot->ops.poll)
>>  		return OPAL_UNSUPPORTED;
>>  
>>  	phb_lock(phb);
>> -	rc = phb->ops->poll(phb);
>> +	rc = slot->ops.poll(slot, NULL);
>>  	phb_unlock(phb);
>>  
>>  	/* Return milliseconds for caller to sleep: round up */
>> @@ -585,6 +602,111 @@ static int64_t opal_pci_poll(uint64_t phb_id)
>>  }
>>  opal_call(OPAL_PCI_POLL, opal_pci_poll, 1);
>>  
>> +static int64_t opal_pci_get_presence_state(uint64_t id, uint64_t data)
>> +{
>> +	struct pci_slot *slot = pci_slot_find(id);
>> +	struct phb *phb = slot ? slot->phb : NULL;
>> +	uint8_t *presence = (uint8_t *)data;
>> +	int64_t rc = OPAL_SUCCESS;
>> +
>> +	if (!slot || !phb)
>> +		return OPAL_PARAMETER;
>> +	if (!slot->ops.get_presence_status)
>> +		return OPAL_UNSUPPORTED;
>> +
>> +	phb_lock(phb);
>> +	rc = slot->ops.get_presence_status(slot, presence);
>> +	phb_unlock(phb);
>> +
>> +	if (rc > 0) {
>> +		rc = tb_to_msecs(rc);
>> +		if (rc == 0)
>> +			rc = 1;
>> +	}
>> +
>> +	return rc;
>> +}
>> +opal_call(OPAL_PCI_GET_PRESENCE_STATE, opal_pci_get_presence_state, 2);
>> +
>> +static int64_t opal_pci_get_power_state(uint64_t id, uint64_t data)
>> +{
>> +	struct pci_slot *slot = pci_slot_find(id);
>> +	struct phb *phb = slot ? slot->phb : NULL;
>> +	uint8_t *power_state = (uint8_t *)data;
>> +	int64_t rc = OPAL_SUCCESS;
>> +
>> +	if (!slot || !phb)
>> +		return OPAL_PARAMETER;
>> +	if (!slot->ops.get_power_status)
>> +		return OPAL_UNSUPPORTED;
>> +
>> +	phb_lock(phb);
>> +	rc = slot->ops.get_power_status(slot, power_state);
>> +	phb_unlock(phb);
>> +
>> +	if (rc > 0) {
>> +		rc = tb_to_msecs(rc);
>> +		if (rc == 0)
>> +			rc = 1;
>> +	}
>> +
>> +	return rc;
>> +}
>> +opal_call(OPAL_PCI_GET_POWER_STATE, opal_pci_get_power_state, 2);
>> +
>> +static int64_t opal_pci_set_power_state(uint64_t id, uint64_t data)
>> +{
>> +	struct pci_slot *slot = pci_slot_find(id);
>> +	struct phb *phb = slot ? slot->phb : NULL;
>> +	uint8_t *power_state = (uint8_t *)data;
>> +	int64_t rc = OPAL_SUCCESS;
>> +
>> +	if (!slot || !phb)
>> +		return OPAL_PARAMETER;
>> +	if (!slot->ops.get_power_status)
>> +		return OPAL_UNSUPPORTED;
>> +
>> +	phb_lock(phb);
>> +	rc = slot->ops.set_power_status(slot, *power_state);
>
>As we discussed offline there needs to be much better separation of the
>platform specific parts and the generic parts of setting the power state.
>
>At the moment the calls to update device tree, etc. are all in the platform
>specific code but they should really appear here as it will apply to all platforms
>that eventually support hotplug. The platform specific code should only deal with
>platform dependent functions such as actually removing power from the slots which
>is completely different on say an FSP system versus a non-FSP system.
>

Thanks for review. Yeah, it's absolutely making sense. I will have the change
after collecting more comments.

Thanks,
Gavin

>> +	phb_unlock(phb);
>> +
>> +	if (rc > 0) {
>> +		rc = tb_to_msecs(rc);
>> +		if (rc == 0)
>> +			rc = 1;
>> +	}
>> +
>> +	return rc;
>> +}
>> +opal_call(OPAL_PCI_SET_POWER_STATE, opal_pci_set_power_state, 2);
>> +
>> +static int64_t opal_pci_poll2(uint64_t id, uint64_t data)
>> +{
>> +	struct pci_slot *slot = pci_slot_find(id);
>> +	struct phb *phb = slot ? slot->phb : NULL;
>> +	uint8_t *state = (uint8_t *)data;
>> +	int64_t rc;
>> +
>> +	if (!slot || !phb)
>> +		return OPAL_PARAMETER;
>> +	if (!slot->ops.poll)
>> +		return OPAL_UNSUPPORTED;
>> +
>> +	phb_lock(phb);
>> +	rc = slot->ops.poll(slot, state);
>> +	phb_unlock(phb);
>> +
>> +	/* Return milliseconds for caller to sleep: round up */
>> +	if (rc > 0) {
>> +		rc = tb_to_msecs(rc);
>> +		if (rc == 0)
>> +			rc = 1;
>> +	}
>> +
>> +	return rc;
>> +}
>> +opal_call(OPAL_PCI_POLL2, opal_pci_poll2, 2);
>> +
>>  static int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id,
>>  					   uint64_t tce_mem_addr,
>>  					   uint64_t tce_mem_size)
>> diff --git a/include/opal-api.h b/include/opal-api.h
>> index 50de6d1..d7b5a1e 100644
>> --- a/include/opal-api.h
>> +++ b/include/opal-api.h
>> @@ -164,7 +164,11 @@
>>  #define OPAL_CEC_REBOOT2			116
>>  #define OPAL_CONSOLE_FLUSH			117
>>  #define OPAL_GET_DEVICE_TREE			118
>> -#define OPAL_LAST				118
>> +#define OPAL_PCI_GET_PRESENCE_STATE		119
>> +#define OPAL_PCI_GET_POWER_STATE		120
>> +#define OPAL_PCI_SET_POWER_STATE		121
>> +#define OPAL_PCI_POLL2				122
>> +#define OPAL_LAST				122
>>  
>>  /* Device tree flags */
>>  
>> diff --git a/include/pci.h b/include/pci.h
>> index 1326a84..048ef0d 100644
>> --- a/include/pci.h
>> +++ b/include/pci.h
>> @@ -293,78 +293,6 @@ struct phb_ops {
>>  	 */
>>  	int64_t (*pci_msi_eoi)(struct phb *phb, uint32_t hwirq);
>>  
>> -	/*
>> -	 * Slot control
>> -	 */
>> -
>> -	/* presence_detect - Check for a present device
>> -	 *
>> -	 * Immediate return of:
>> -	 *
>> -	 * OPAL_SHPC_DEV_NOT_PRESENT = 0,
>> -	 * OPAL_SHPC_DEV_PRESENT = 1
>> -	 *
>> -	 * or a negative OPAL error code
>> -	 */
>> -	int64_t (*presence_detect)(struct phb *phb);
>> -
>> -	/* link_state - Check link state
>> -	 *
>> -	 * Immediate return of:
>> -	 *
>> -	 * OPAL_SHPC_LINK_DOWN = 0,
>> -	 * OPAL_SHPC_LINK_UP_x1 = 1,
>> -	 * OPAL_SHPC_LINK_UP_x2 = 2,
>> -	 * OPAL_SHPC_LINK_UP_x4 = 4,
>> -	 * OPAL_SHPC_LINK_UP_x8 = 8,
>> -	 * OPAL_SHPC_LINK_UP_x16 = 16,
>> -	 * OPAL_SHPC_LINK_UP_x32 = 32
>> -	 *
>> -	 * or a negative OPAL error code
>> -	 */
>> -	int64_t (*link_state)(struct phb *phb);
>> -
>> -	/* power_state - Check slot power state
>> -	 *
>> -	 * Immediate return of:
>> -	 *
>> -	 * OPAL_SLOT_POWER_OFF = 0,
>> -	 * OPAL_SLOT_POWER_ON = 1,
>> -	 *
>> -	 * or a negative OPAL error code
>> -	 */
>> -	int64_t (*power_state)(struct phb *phb);
>> -
>> -	/* slot_power_off - Start slot power off sequence
>> -	 *
>> -	 * Asynchronous function, returns a positive delay
>> -	 * or a negative error code
>> -	 */
>> -	int64_t (*slot_power_off)(struct phb *phb);
>> -
>> -	/* slot_power_on - Start slot power on sequence
>> -	 *
>> -	 * Asynchronous function, returns a positive delay
>> -	 * or a negative error code.
>> -	 */
>> -	int64_t (*slot_power_on)(struct phb *phb);
>> -
>> -	/* PHB power off and on after complete init */
>> -	int64_t (*complete_reset)(struct phb *phb, uint8_t assert);
>> -
>> -	/* hot_reset - Hot Reset sequence */
>> -	int64_t (*hot_reset)(struct phb *phb);
>> -
>> -	/* Fundamental reset */
>> -	int64_t (*fundamental_reset)(struct phb *phb);
>> -
>> -	/* poll - Poll and advance asynchronous operations
>> -	 *
>> -	 * Returns a positive delay, 0 for success or a
>> -	 * negative OPAL error code
>> -	 */
>> -	int64_t (*poll)(struct phb *phb);
>> -
>>  	/* Put phb in capi mode or pcie mode */
>>  	int64_t (*set_capi_mode)(struct phb *phb, uint64_t mode, uint64_t pe_number);
>>  
>> 
>
Alistair Popple May 6, 2016, 1:37 a.m. UTC | #5
Gavin,

<snip>

> >
> >As we discussed offline there needs to be much better separation of the
> >platform specific parts and the generic parts of setting the power state.
> >
> >At the moment the calls to update device tree, etc. are all in the platform
> >specific code but they should really appear here as it will apply to all platforms
> >that eventually support hotplug. The platform specific code should only deal with
> >platform dependent functions such as actually removing power from the slots which
> >is completely different on say an FSP system versus a non-FSP system.
> >
> 
> Thanks for review. Yeah, it's absolutely making sense. I will have the change
> after collecting more comments.

Yeah, no problem. I'm going to get stuck into the review of the kernel/skiboot
ABI next week so we can hopefully get it right in time for the kernel stuff
to go upstream in the next cycle.

Regards,

Alistair

> Thanks,
> Gavin
> 
> >> +	phb_unlock(phb);
> >> +
> >> +	if (rc > 0) {
> >> +		rc = tb_to_msecs(rc);
> >> +		if (rc == 0)
> >> +			rc = 1;
> >> +	}
> >> +
> >> +	return rc;
> >> +}
> >> +opal_call(OPAL_PCI_SET_POWER_STATE, opal_pci_set_power_state, 2);
> >> +
> >> +static int64_t opal_pci_poll2(uint64_t id, uint64_t data)
> >> +{
> >> +	struct pci_slot *slot = pci_slot_find(id);
> >> +	struct phb *phb = slot ? slot->phb : NULL;
> >> +	uint8_t *state = (uint8_t *)data;
> >> +	int64_t rc;
> >> +
> >> +	if (!slot || !phb)
> >> +		return OPAL_PARAMETER;
> >> +	if (!slot->ops.poll)
> >> +		return OPAL_UNSUPPORTED;
> >> +
> >> +	phb_lock(phb);
> >> +	rc = slot->ops.poll(slot, state);
> >> +	phb_unlock(phb);
> >> +
> >> +	/* Return milliseconds for caller to sleep: round up */
> >> +	if (rc > 0) {
> >> +		rc = tb_to_msecs(rc);
> >> +		if (rc == 0)
> >> +			rc = 1;
> >> +	}
> >> +
> >> +	return rc;
> >> +}
> >> +opal_call(OPAL_PCI_POLL2, opal_pci_poll2, 2);
> >> +
> >>  static int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id,
> >>  					   uint64_t tce_mem_addr,
> >>  					   uint64_t tce_mem_size)
> >> diff --git a/include/opal-api.h b/include/opal-api.h
> >> index 50de6d1..d7b5a1e 100644
> >> --- a/include/opal-api.h
> >> +++ b/include/opal-api.h
> >> @@ -164,7 +164,11 @@
> >>  #define OPAL_CEC_REBOOT2			116
> >>  #define OPAL_CONSOLE_FLUSH			117
> >>  #define OPAL_GET_DEVICE_TREE			118
> >> -#define OPAL_LAST				118
> >> +#define OPAL_PCI_GET_PRESENCE_STATE		119
> >> +#define OPAL_PCI_GET_POWER_STATE		120
> >> +#define OPAL_PCI_SET_POWER_STATE		121
> >> +#define OPAL_PCI_POLL2				122
> >> +#define OPAL_LAST				122
> >>  
> >>  /* Device tree flags */
> >>  
> >> diff --git a/include/pci.h b/include/pci.h
> >> index 1326a84..048ef0d 100644
> >> --- a/include/pci.h
> >> +++ b/include/pci.h
> >> @@ -293,78 +293,6 @@ struct phb_ops {
> >>  	 */
> >>  	int64_t (*pci_msi_eoi)(struct phb *phb, uint32_t hwirq);
> >>  
> >> -	/*
> >> -	 * Slot control
> >> -	 */
> >> -
> >> -	/* presence_detect - Check for a present device
> >> -	 *
> >> -	 * Immediate return of:
> >> -	 *
> >> -	 * OPAL_SHPC_DEV_NOT_PRESENT = 0,
> >> -	 * OPAL_SHPC_DEV_PRESENT = 1
> >> -	 *
> >> -	 * or a negative OPAL error code
> >> -	 */
> >> -	int64_t (*presence_detect)(struct phb *phb);
> >> -
> >> -	/* link_state - Check link state
> >> -	 *
> >> -	 * Immediate return of:
> >> -	 *
> >> -	 * OPAL_SHPC_LINK_DOWN = 0,
> >> -	 * OPAL_SHPC_LINK_UP_x1 = 1,
> >> -	 * OPAL_SHPC_LINK_UP_x2 = 2,
> >> -	 * OPAL_SHPC_LINK_UP_x4 = 4,
> >> -	 * OPAL_SHPC_LINK_UP_x8 = 8,
> >> -	 * OPAL_SHPC_LINK_UP_x16 = 16,
> >> -	 * OPAL_SHPC_LINK_UP_x32 = 32
> >> -	 *
> >> -	 * or a negative OPAL error code
> >> -	 */
> >> -	int64_t (*link_state)(struct phb *phb);
> >> -
> >> -	/* power_state - Check slot power state
> >> -	 *
> >> -	 * Immediate return of:
> >> -	 *
> >> -	 * OPAL_SLOT_POWER_OFF = 0,
> >> -	 * OPAL_SLOT_POWER_ON = 1,
> >> -	 *
> >> -	 * or a negative OPAL error code
> >> -	 */
> >> -	int64_t (*power_state)(struct phb *phb);
> >> -
> >> -	/* slot_power_off - Start slot power off sequence
> >> -	 *
> >> -	 * Asynchronous function, returns a positive delay
> >> -	 * or a negative error code
> >> -	 */
> >> -	int64_t (*slot_power_off)(struct phb *phb);
> >> -
> >> -	/* slot_power_on - Start slot power on sequence
> >> -	 *
> >> -	 * Asynchronous function, returns a positive delay
> >> -	 * or a negative error code.
> >> -	 */
> >> -	int64_t (*slot_power_on)(struct phb *phb);
> >> -
> >> -	/* PHB power off and on after complete init */
> >> -	int64_t (*complete_reset)(struct phb *phb, uint8_t assert);
> >> -
> >> -	/* hot_reset - Hot Reset sequence */
> >> -	int64_t (*hot_reset)(struct phb *phb);
> >> -
> >> -	/* Fundamental reset */
> >> -	int64_t (*fundamental_reset)(struct phb *phb);
> >> -
> >> -	/* poll - Poll and advance asynchronous operations
> >> -	 *
> >> -	 * Returns a positive delay, 0 for success or a
> >> -	 * negative OPAL error code
> >> -	 */
> >> -	int64_t (*poll)(struct phb *phb);
> >> -
> >>  	/* Put phb in capi mode or pcie mode */
> >>  	int64_t (*set_capi_mode)(struct phb *phb, uint64_t mode, uint64_t pe_number);
> >>  
> >> 
> >
>
Stewart Smith May 12, 2016, 5:18 a.m. UTC | #6
Gavin Shan <gwshan@linux.vnet.ibm.com> writes:
> The various reset requests are completed by PHB's callbacks. All
> of them (except reset on IODA table or error injection) are covered
> by PCI slot. opal_pci_poll() faces similar situation.
>
> This reimplements opal_pci_reset() and opal_pci_poll() based on
> the callbacks provided by PCI slot instead of PHB. Also, couple of
> new APIs are introduced based on the callbacks in PCI slot as below:
>
>    * opal_pci_get_presence_state(): Check if there is adapter presented
>      behind the specified PHB or PCI slot.
>    * opal_pci_get_power_state(): Returns power supply state (on or off)
>      on the specified PHB or PCI slot.
>    * opal_pci_set_power_state(): Sets power supply state (on or off)
>      on the specified PHB or PCI slot.
>    * opal_pci_poll2(): Poll for PCI slot state. It accepts two arguments
>      while opal_pci_poll() accepts only one.
>
> Eventually, the definition of unused PHB's callbacks are removed.
>
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  core/pci-opal.c    | 174 +++++++++++++++++++++++++++++++++++++++++++++--------
>  include/opal-api.h |   6 +-
>  include/pci.h      |  72 ----------------------
>  3 files changed, 153 insertions(+), 99 deletions(-)

Please also add documentation for the new calls in doc/opal-api/

> +static int64_t opal_pci_poll2(uint64_t id, uint64_t data)

If we're going to start fixing the PCI calls, we could at the same time
use the more modern OPAL convention of async calls, and there's a
standard way to wait for them in the kernel -
opal_async_wait_response() - although we may want to ensure that things
are cranked often enough.

We could also convert the existing users of opal_pci_poll() to proper
async completions too... we'd then be able to get rid of the old calls
for p9 and (eventually) remove them altogether.
Stewart Smith May 12, 2016, 6:13 a.m. UTC | #7
Alistair Popple <alistair@popple.id.au> writes:
>> +	rc = slot->ops.set_power_status(slot, *power_state);
>
> As we discussed offline there needs to be much better separation of the
> platform specific parts and the generic parts of setting the power state.
>
> At the moment the calls to update device tree, etc. are all in the platform
> specific code but they should really appear here as it will apply to all platforms
> that eventually support hotplug. The platform specific code should only deal with
> platform dependent functions such as actually removing power from the slots which
> is completely different on say an FSP system versus a non-FSP system.

My thought that is for P9 (as we can change ABI to kernel there :) we
could *always* hotplug - detect nothing before starting linux, just hotplug
it all in at runtime.
Alistair Popple May 12, 2016, 6:22 a.m. UTC | #8
On Thu, 12 May 2016 16:13:58 Stewart Smith wrote:
> Alistair Popple <alistair@popple.id.au> writes:
> >> +	rc = slot->ops.set_power_status(slot, *power_state);
> >
> > As we discussed offline there needs to be much better separation of the
> > platform specific parts and the generic parts of setting the power state.
> >
> > At the moment the calls to update device tree, etc. are all in the 
platform
> > specific code but they should really appear here as it will apply to all 
platforms
> > that eventually support hotplug. The platform specific code should only 
deal with
> > platform dependent functions such as actually removing power from the 
slots which
> > is completely different on say an FSP system versus a non-FSP system.
> 
> My thought that is for P9 (as we can change ABI to kernel there :) we
> could *always* hotplug - detect nothing before starting linux, just hotplug
> it all in at runtime.

We probably wouldn't even have to change the ABI would we? Just get skiboot to 
start up with all the slots "powered down" (even if they aren't physically 
powered down) with no device tree entries for anything plugged into the slots. 
Then once the system is up get the kernel to do a hotplug-in of each slot to 
do the scan and get dt entries...

Is PCI bus enumeration a significant part of skiboot startup time?

Regards,

Alistair
Gavin Shan May 12, 2016, 6:29 a.m. UTC | #9
On Thu, May 12, 2016 at 04:13:58PM +1000, Stewart Smith wrote:
>Alistair Popple <alistair@popple.id.au> writes:
>>> +	rc = slot->ops.set_power_status(slot, *power_state);
>>
>> As we discussed offline there needs to be much better separation of the
>> platform specific parts and the generic parts of setting the power state.
>>
>> At the moment the calls to update device tree, etc. are all in the platform
>> specific code but they should really appear here as it will apply to all platforms
>> that eventually support hotplug. The platform specific code should only deal with
>> platform dependent functions such as actually removing power from the slots which
>> is completely different on say an FSP system versus a non-FSP system.
>
>My thought that is for P9 (as we can change ABI to kernel there :) we
>could *always* hotplug - detect nothing before starting linux, just hotplug
>it all in at runtime.
>

It's a intresting idea. Before discussing it further, I have to understand
your idea. Stewart, could you please confirm below is something you want
to see?

(A) skiboot boots without PCI enumeration. It means no device nodes for
    PCI devices are created;
(B) kernel is loaded and booted. Prior to PCI enumeration, the PHB is
    hot added by OPAL API. After the OPAL API finishes, the PCI devices
    behind the PHB is probed and the device nodes are created. After
    that, the Linux PCI subsystem starts the enumeration.

A serious question is: what's the benefit from it? On the other hand,
It's going to change current PCI subsystem (skiboot) greatly. That part
written by Ben works well.

>-- 
>Stewart Smith
>OPAL Architect, IBM.
>
>_______________________________________________
>Skiboot mailing list
>Skiboot@lists.ozlabs.org
>https://lists.ozlabs.org/listinfo/skiboot
Stewart Smith May 13, 2016, 12:48 a.m. UTC | #10
Alistair Popple <alistair@popple.id.au> writes:
>> My thought that is for P9 (as we can change ABI to kernel there :) we
>> could *always* hotplug - detect nothing before starting linux, just hotplug
>> it all in at runtime.
>
> We probably wouldn't even have to change the ABI would we? Just get skiboot to 
> start up with all the slots "powered down" (even if they aren't physically 
> powered down) with no device tree entries for anything plugged into the slots. 
> Then once the system is up get the kernel to do a hotplug-in of each slot to 
> do the scan and get dt entries...

Well ABI change would be having to have the kernel support hotplug to be
able to boot, so ABI to the skiboot payload kernel at least.

> Is PCI bus enumeration a significant part of skiboot startup time?
[3237993291,5] PCI: Resetting PHBs...
[4912461686,5] PCI: Probing slots...

so 3.2 seconds between those two (on Tuleta) and then:
[5548605580,7] PCI Summary:

1.2 seconds to get to there.

Whole boot time after CHIPTOD sync? 12.8 seconds (Tuleta).

So, on Tuleta, PCI is (one of) the biggest contributor to skiboot boot
time. On OpenPower, it's the damn slow-as-continental-drift flash.
Stewart Smith May 13, 2016, 12:59 a.m. UTC | #11
Gavin Shan <gwshan@linux.vnet.ibm.com> writes:
> On Thu, May 12, 2016 at 04:13:58PM +1000, Stewart Smith wrote:
>>Alistair Popple <alistair@popple.id.au> writes:
>>>> +	rc = slot->ops.set_power_status(slot, *power_state);
>>>
>>> As we discussed offline there needs to be much better separation of the
>>> platform specific parts and the generic parts of setting the power state.
>>>
>>> At the moment the calls to update device tree, etc. are all in the platform
>>> specific code but they should really appear here as it will apply to all platforms
>>> that eventually support hotplug. The platform specific code should only deal with
>>> platform dependent functions such as actually removing power from the slots which
>>> is completely different on say an FSP system versus a non-FSP system.
>>
>>My thought that is for P9 (as we can change ABI to kernel there :) we
>>could *always* hotplug - detect nothing before starting linux, just hotplug
>>it all in at runtime.
>>
>
> It's a intresting idea. Before discussing it further, I have to understand
> your idea. Stewart, could you please confirm below is something you want
> to see?
>
> (A) skiboot boots without PCI enumeration. It means no device nodes for
>     PCI devices are created;
> (B) kernel is loaded and booted. Prior to PCI enumeration, the PHB is
>     hot added by OPAL API. After the OPAL API finishes, the PCI devices
>     behind the PHB is probed and the device nodes are created. After
>     that, the Linux PCI subsystem starts the enumeration.

We could at least provide PHBs to linux and hotplug everything beneath
it.

But yes, basically, that flow.

> A serious question is: what's the benefit from it? On the other hand,
> It's going to change current PCI subsystem (skiboot) greatly. That part
> written by Ben works well.

and I don't want to break what works.

My most naive implementation idea would be to have pci_scan_phb() jobs
run asynchronously to linux, with linux calling into OPAL until that's
done, and then as long as linux can understand the new hungs of DT being
passed to it, we may just win on boot time a bit - or at least on
*perceived* boot time :)
diff mbox

Patch

diff --git a/core/pci-opal.c b/core/pci-opal.c
index 40eda39..f798c52 100644
--- a/core/pci-opal.c
+++ b/core/pci-opal.c
@@ -18,6 +18,7 @@ 
 #include <opal-api.h>
 #include <pci.h>
 #include <pci-cfg.h>
+#include <pci-slot.h>
 #include <timebase.h>
 
 #define OPAL_PCICFG_ACCESS(op, cb, type)	\
@@ -461,16 +462,15 @@  static int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id,
 }
 opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW_REAL, opal_pci_map_pe_dma_window_real, 5);
 
-static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
+static int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope,
                               uint8_t assert_state)
 {
-	struct phb *phb = pci_get_phb(phb_id);
+	struct pci_slot *slot = pci_slot_find(id);
+	struct phb *phb = slot ? slot->phb : NULL;
 	int64_t rc = OPAL_SUCCESS;
 
-	if (!phb)
+	if (!slot || !phb)
 		return OPAL_PARAMETER;
-	if (!phb->ops)
-		return OPAL_UNSUPPORTED;
 	if (assert_state != OPAL_ASSERT_RESET &&
 	    assert_state != OPAL_DEASSERT_RESET)
 		return OPAL_PARAMETER;
@@ -479,18 +479,22 @@  static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
 
 	switch(reset_scope) {
 	case OPAL_RESET_PHB_COMPLETE:
-		if (!phb->ops->complete_reset) {
+		/* Complete reset is applicable to PHB slot only */
+		if (!slot->ops.creset || slot->pd) {
 			rc = OPAL_UNSUPPORTED;
 			break;
 		}
 
-		rc = phb->ops->complete_reset(phb, assert_state);
+		if (assert_state != OPAL_ASSERT_RESET)
+			break;
+
+		rc = slot->ops.creset(slot);
 		if (rc < 0)
-			prerror("PHB#%d: Failure on complete reset, rc=%lld\n",
-				phb->opal_id, rc);
+			prlog(PR_ERR, "SLOT-%016llx: Error %lld on complete reset\n",
+			      slot->id, rc);
 		break;
 	case OPAL_RESET_PCI_FUNDAMENTAL:
-		if (!phb->ops->fundamental_reset) {
+		if (!slot->ops.freset) {
 			rc = OPAL_UNSUPPORTED;
 			break;
 		}
@@ -499,13 +503,13 @@  static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
 		if (assert_state != OPAL_ASSERT_RESET)
 			break;
 
-		rc = phb->ops->fundamental_reset(phb);
+		rc = slot->ops.freset(slot);
 		if (rc < 0)
-			prerror("PHB#%d: Failure on fundamental reset, rc=%lld\n",
-				phb->opal_id, rc);
+			prlog(PR_ERR, "SLOT-%016llx: Error %lld on fundamental reset\n",
+			      slot->id, rc);
 		break;
 	case OPAL_RESET_PCI_HOT:
-		if (!phb->ops->hot_reset) {
+		if (!slot->ops.hreset) {
 			rc = OPAL_UNSUPPORTED;
 			break;
 		}
@@ -514,22 +518,34 @@  static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
 		if (assert_state != OPAL_ASSERT_RESET)
 			break;
 
-		rc = phb->ops->hot_reset(phb);
+		rc = slot->ops.hreset(slot);
 		if (rc < 0)
-			prerror("PHB#%d: Failure on hot reset, rc=%lld\n",
-				phb->opal_id, rc);
+			prlog(PR_ERR, "SLOT-%016llx: Error %lld on hot reset\n",
+			      slot->id, rc);
 		break;
 	case OPAL_RESET_PCI_IODA_TABLE:
+		/* It's allowed on PHB slot only */
+		if (slot->pd || !phb->ops || !phb->ops->ioda_reset) {
+			rc = OPAL_UNSUPPORTED;
+			break;
+		}
+
 		if (assert_state != OPAL_ASSERT_RESET)
 			break;
-		if (phb->ops->ioda_reset)
-			phb->ops->ioda_reset(phb, true);
+
+		rc = phb->ops->ioda_reset(phb, true);
 		break;
 	case OPAL_RESET_PHB_ERROR:
+		/* It's allowed on PHB slot only */
+		if (slot->pd || !phb->ops || !phb->ops->papr_errinjct_reset) {
+			rc = OPAL_UNSUPPORTED;
+			break;
+		}
+
 		if (assert_state != OPAL_ASSERT_RESET)
 			break;
-		if (phb->ops->papr_errinjct_reset)
-			phb->ops->papr_errinjct_reset(phb);
+
+		rc = phb->ops->papr_errinjct_reset(phb);
 		break;
 	default:
 		rc = OPAL_UNSUPPORTED;
@@ -560,18 +576,19 @@  static int64_t opal_pci_reinit(uint64_t phb_id,
 }
 opal_call(OPAL_PCI_REINIT, opal_pci_reinit, 3);
 
-static int64_t opal_pci_poll(uint64_t phb_id)
+static int64_t opal_pci_poll(uint64_t id)
 {
-	struct phb *phb = pci_get_phb(phb_id);
+	struct pci_slot *slot = pci_slot_find(id);
+	struct phb *phb = slot ? slot->phb : NULL;
 	int64_t rc;
 
-	if (!phb)
+	if (!slot || !phb)
 		return OPAL_PARAMETER;
-	if (!phb->ops || !phb->ops->poll)
+	if (!slot->ops.poll)
 		return OPAL_UNSUPPORTED;
 
 	phb_lock(phb);
-	rc = phb->ops->poll(phb);
+	rc = slot->ops.poll(slot, NULL);
 	phb_unlock(phb);
 
 	/* Return milliseconds for caller to sleep: round up */
@@ -585,6 +602,111 @@  static int64_t opal_pci_poll(uint64_t phb_id)
 }
 opal_call(OPAL_PCI_POLL, opal_pci_poll, 1);
 
+static int64_t opal_pci_get_presence_state(uint64_t id, uint64_t data)
+{
+	struct pci_slot *slot = pci_slot_find(id);
+	struct phb *phb = slot ? slot->phb : NULL;
+	uint8_t *presence = (uint8_t *)data;
+	int64_t rc = OPAL_SUCCESS;
+
+	if (!slot || !phb)
+		return OPAL_PARAMETER;
+	if (!slot->ops.get_presence_status)
+		return OPAL_UNSUPPORTED;
+
+	phb_lock(phb);
+	rc = slot->ops.get_presence_status(slot, presence);
+	phb_unlock(phb);
+
+	if (rc > 0) {
+		rc = tb_to_msecs(rc);
+		if (rc == 0)
+			rc = 1;
+	}
+
+	return rc;
+}
+opal_call(OPAL_PCI_GET_PRESENCE_STATE, opal_pci_get_presence_state, 2);
+
+static int64_t opal_pci_get_power_state(uint64_t id, uint64_t data)
+{
+	struct pci_slot *slot = pci_slot_find(id);
+	struct phb *phb = slot ? slot->phb : NULL;
+	uint8_t *power_state = (uint8_t *)data;
+	int64_t rc = OPAL_SUCCESS;
+
+	if (!slot || !phb)
+		return OPAL_PARAMETER;
+	if (!slot->ops.get_power_status)
+		return OPAL_UNSUPPORTED;
+
+	phb_lock(phb);
+	rc = slot->ops.get_power_status(slot, power_state);
+	phb_unlock(phb);
+
+	if (rc > 0) {
+		rc = tb_to_msecs(rc);
+		if (rc == 0)
+			rc = 1;
+	}
+
+	return rc;
+}
+opal_call(OPAL_PCI_GET_POWER_STATE, opal_pci_get_power_state, 2);
+
+static int64_t opal_pci_set_power_state(uint64_t id, uint64_t data)
+{
+	struct pci_slot *slot = pci_slot_find(id);
+	struct phb *phb = slot ? slot->phb : NULL;
+	uint8_t *power_state = (uint8_t *)data;
+	int64_t rc = OPAL_SUCCESS;
+
+	if (!slot || !phb)
+		return OPAL_PARAMETER;
+	if (!slot->ops.get_power_status)
+		return OPAL_UNSUPPORTED;
+
+	phb_lock(phb);
+	rc = slot->ops.set_power_status(slot, *power_state);
+	phb_unlock(phb);
+
+	if (rc > 0) {
+		rc = tb_to_msecs(rc);
+		if (rc == 0)
+			rc = 1;
+	}
+
+	return rc;
+}
+opal_call(OPAL_PCI_SET_POWER_STATE, opal_pci_set_power_state, 2);
+
+static int64_t opal_pci_poll2(uint64_t id, uint64_t data)
+{
+	struct pci_slot *slot = pci_slot_find(id);
+	struct phb *phb = slot ? slot->phb : NULL;
+	uint8_t *state = (uint8_t *)data;
+	int64_t rc;
+
+	if (!slot || !phb)
+		return OPAL_PARAMETER;
+	if (!slot->ops.poll)
+		return OPAL_UNSUPPORTED;
+
+	phb_lock(phb);
+	rc = slot->ops.poll(slot, state);
+	phb_unlock(phb);
+
+	/* Return milliseconds for caller to sleep: round up */
+	if (rc > 0) {
+		rc = tb_to_msecs(rc);
+		if (rc == 0)
+			rc = 1;
+	}
+
+	return rc;
+}
+opal_call(OPAL_PCI_POLL2, opal_pci_poll2, 2);
+
 static int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id,
 					   uint64_t tce_mem_addr,
 					   uint64_t tce_mem_size)
diff --git a/include/opal-api.h b/include/opal-api.h
index 50de6d1..d7b5a1e 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -164,7 +164,11 @@ 
 #define OPAL_CEC_REBOOT2			116
 #define OPAL_CONSOLE_FLUSH			117
 #define OPAL_GET_DEVICE_TREE			118
-#define OPAL_LAST				118
+#define OPAL_PCI_GET_PRESENCE_STATE		119
+#define OPAL_PCI_GET_POWER_STATE		120
+#define OPAL_PCI_SET_POWER_STATE		121
+#define OPAL_PCI_POLL2				122
+#define OPAL_LAST				122
 
 /* Device tree flags */
 
diff --git a/include/pci.h b/include/pci.h
index 1326a84..048ef0d 100644
--- a/include/pci.h
+++ b/include/pci.h
@@ -293,78 +293,6 @@  struct phb_ops {
 	 */
 	int64_t (*pci_msi_eoi)(struct phb *phb, uint32_t hwirq);
 
-	/*
-	 * Slot control
-	 */
-
-	/* presence_detect - Check for a present device
-	 *
-	 * Immediate return of:
-	 *
-	 * OPAL_SHPC_DEV_NOT_PRESENT = 0,
-	 * OPAL_SHPC_DEV_PRESENT = 1
-	 *
-	 * or a negative OPAL error code
-	 */
-	int64_t (*presence_detect)(struct phb *phb);
-
-	/* link_state - Check link state
-	 *
-	 * Immediate return of:
-	 *
-	 * OPAL_SHPC_LINK_DOWN = 0,
-	 * OPAL_SHPC_LINK_UP_x1 = 1,
-	 * OPAL_SHPC_LINK_UP_x2 = 2,
-	 * OPAL_SHPC_LINK_UP_x4 = 4,
-	 * OPAL_SHPC_LINK_UP_x8 = 8,
-	 * OPAL_SHPC_LINK_UP_x16 = 16,
-	 * OPAL_SHPC_LINK_UP_x32 = 32
-	 *
-	 * or a negative OPAL error code
-	 */
-	int64_t (*link_state)(struct phb *phb);
-
-	/* power_state - Check slot power state
-	 *
-	 * Immediate return of:
-	 *
-	 * OPAL_SLOT_POWER_OFF = 0,
-	 * OPAL_SLOT_POWER_ON = 1,
-	 *
-	 * or a negative OPAL error code
-	 */
-	int64_t (*power_state)(struct phb *phb);
-
-	/* slot_power_off - Start slot power off sequence
-	 *
-	 * Asynchronous function, returns a positive delay
-	 * or a negative error code
-	 */
-	int64_t (*slot_power_off)(struct phb *phb);
-
-	/* slot_power_on - Start slot power on sequence
-	 *
-	 * Asynchronous function, returns a positive delay
-	 * or a negative error code.
-	 */
-	int64_t (*slot_power_on)(struct phb *phb);
-
-	/* PHB power off and on after complete init */
-	int64_t (*complete_reset)(struct phb *phb, uint8_t assert);
-
-	/* hot_reset - Hot Reset sequence */
-	int64_t (*hot_reset)(struct phb *phb);
-
-	/* Fundamental reset */
-	int64_t (*fundamental_reset)(struct phb *phb);
-
-	/* poll - Poll and advance asynchronous operations
-	 *
-	 * Returns a positive delay, 0 for success or a
-	 * negative OPAL error code
-	 */
-	int64_t (*poll)(struct phb *phb);
-
 	/* Put phb in capi mode or pcie mode */
 	int64_t (*set_capi_mode)(struct phb *phb, uint64_t mode, uint64_t pe_number);