diff mbox series

[V2,5/9] PCI/TPH: Introduce API functions to manage steering tags

Message ID 20240531213841.3246055-6-wei.huang2@amd.com
State New
Headers show
Series PCIe TPH and cache direct injection support | expand

Commit Message

Wei Huang May 31, 2024, 9:38 p.m. UTC
This patch introduces three API functions, pcie_tph_intr_vec_supported(),
pcie_tph_get_st() and pcie_tph_set_st(), for a driver to query, retrieve
or configure device's steering tags. There are two possible locations for
steering tag table and the code automatically figure out the right
location to set the tags if pcie_tph_set_st() is called. Note the tag
value is always zero currently and will be extended in the follow-up
patches.

Co-developed-by: Eric Van Tassell <Eric.VanTassell@amd.com>
Signed-off-by: Eric Van Tassell <Eric.VanTassell@amd.com>
Signed-off-by: Wei Huang <wei.huang2@amd.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com> 
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
---
 drivers/pci/pcie/tph.c  | 402 ++++++++++++++++++++++++++++++++++++++++
 include/linux/pci-tph.h |  22 +++
 2 files changed, 424 insertions(+)

Comments

kernel test robot June 6, 2024, 10:30 p.m. UTC | #1
Hi Wei,

kernel test robot noticed the following build errors:

[auto build test ERROR on pci/next]
[also build test ERROR on pci/for-linus awilliam-vfio/next linus/master awilliam-vfio/for-linus v6.10-rc2 next-20240606]
[cannot apply to horms-ipvs/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Wei-Huang/PCI-Introduce-PCIe-TPH-support-framework/20240601-054423
base:   https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git next
patch link:    https://lore.kernel.org/r/20240531213841.3246055-6-wei.huang2%40amd.com
patch subject: [PATCH V2 5/9] PCI/TPH: Introduce API functions to manage steering tags
config: arm-randconfig-r122-20240607 (https://download.01.org/0day-ci/archive/20240607/202406070602.DyLemS9q-lkp@intel.com/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project d7d2d4f53fc79b4b58e8d8d08151b577c3699d4a)
reproduce: (https://download.01.org/0day-ci/archive/20240607/202406070602.DyLemS9q-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406070602.DyLemS9q-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from drivers/pci/pcie/tph.c:18:
   In file included from include/linux/pci.h:1653:
   In file included from include/linux/dmapool.h:14:
   In file included from include/linux/scatterlist.h:8:
   In file included from include/linux/mm.h:2253:
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
>> drivers/pci/pcie/tph.c:95:15: error: no member named 'msix_base' in 'struct pci_dev'; did you mean 'msix_cap'?
      95 |         entry = dev->msix_base + msi_index * PCI_MSIX_ENTRY_SIZE;
         |                      ^~~~~~~~~
         |                      msix_cap
   include/linux/pci.h:350:6: note: 'msix_cap' declared here
     350 |         u8              msix_cap;       /* MSI-X capability offset */
         |                         ^
   1 warning and 1 error generated.


vim +95 drivers/pci/pcie/tph.c

    80	
    81	/*
    82	 * For a given device, return a pointer to the MSI table entry at msi_index.
    83	 */
    84	static void __iomem *tph_msix_table_entry(struct pci_dev *dev,
    85						  u16 msi_index)
    86	{
    87		void __iomem *entry;
    88		u16 tbl_sz;
    89		int ret;
    90	
    91		ret = tph_get_table_size(dev, &tbl_sz);
    92		if (ret || msi_index > tbl_sz)
    93			return NULL;
    94	
  > 95		entry = dev->msix_base + msi_index * PCI_MSIX_ENTRY_SIZE;
    96	
    97		return entry;
    98	}
    99
Jonathan Cameron June 7, 2024, 5:29 p.m. UTC | #2
On Fri, 31 May 2024 16:38:37 -0500
Wei Huang <wei.huang2@amd.com> wrote:

> This patch introduces three API functions, pcie_tph_intr_vec_supported(),
> pcie_tph_get_st() and pcie_tph_set_st(), for a driver to query, retrieve
> or configure device's steering tags. There are two possible locations for
> steering tag table and the code automatically figure out the right
> location to set the tags if pcie_tph_set_st() is called. Note the tag
> value is always zero currently and will be extended in the follow-up
> patches.
> 
> Co-developed-by: Eric Van Tassell <Eric.VanTassell@amd.com>
> Signed-off-by: Eric Van Tassell <Eric.VanTassell@amd.com>
> Signed-off-by: Wei Huang <wei.huang2@amd.com>
> Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
> Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com> 
> Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Hi. 

There are a lot of small functions in here.  I'd look at just calling
the things they are wrapping more directly as with the register
and field names inline it's pretty obvious what is going on.
Wrapping that in a helper function doesn't necessarily help readablity
and perhaps doubles the length of the code.

Various other comments inline.

Jonathan


> ---
>  drivers/pci/pcie/tph.c  | 402 ++++++++++++++++++++++++++++++++++++++++
>  include/linux/pci-tph.h |  22 +++
>  2 files changed, 424 insertions(+)
> 
> diff --git a/drivers/pci/pcie/tph.c b/drivers/pci/pcie/tph.c
> index d5f7309fdf52..320b99c60365 100644
> --- a/drivers/pci/pcie/tph.c
> +++ b/drivers/pci/pcie/tph.c
> @@ -43,6 +43,336 @@ static int tph_set_reg_field_u32(struct pci_dev *dev, u8 offset, u32 mask,
>  	return ret;
>  }
>  
> +static int tph_get_reg_field_u32(struct pci_dev *dev, u8 offset, u32 mask,
> +				 u8 shift, u32 *field)
> +{
> +	u32 reg_val;
> +	int ret;
> +
> +	if (!dev->tph_cap)
> +		return -EINVAL;
> +
> +	ret = pci_read_config_dword(dev, dev->tph_cap + offset, &reg_val);
> +	if (ret)
> +		return ret;
> +
> +	*field = (reg_val & mask) >> shift;
> +
> +	return 0;
> +}

Similar to earlier, I'm not seeing as strong reason for this.
Just do the tph_cap check at the external interface points rather than
in register read paths.

Mind you, if you do keep this a lot of the other helpers
could be flattened as they are simply passing particular parameters
to this and that could be done inline where the results are needed.


> +
> +static int tph_get_table_size(struct pci_dev *dev, u16 *size_out)
> +{
> +	int ret;
> +	u32 tmp;
> +
> +	ret = tph_get_reg_field_u32(dev, PCI_TPH_CAP,
> +				    PCI_TPH_CAP_ST_MASK,
> +				    PCI_TPH_CAP_ST_SHIFT, &tmp);
> +
> +	if (ret)
> +		return ret;
> +
> +	*size_out = (u16)tmp;
> +
> +	return 0;
> +}
> +
> +/*
> + * For a given device, return a pointer to the MSI table entry at msi_index.
> + */
> +static void __iomem *tph_msix_table_entry(struct pci_dev *dev,
> +					  u16 msi_index)
> +{
> +	void __iomem *entry;
> +	u16 tbl_sz;
> +	int ret;
> +
> +	ret = tph_get_table_size(dev, &tbl_sz);
> +	if (ret || msi_index > tbl_sz)
> +		return NULL;
Nice to return the error code via ERR_PTR() etc so ultimate caller gets
some information.
> +
> +	entry = dev->msix_base + msi_index * PCI_MSIX_ENTRY_SIZE;

return dev->msix_base + msi_index * PCI_MSIX_ENTRY_SIZE;

> +
> +	return entry;
> +}
> +
> +/*
> + * For a given device, return a pointer to the vector control register at
> + * offset 0xc of MSI table entry at msi_index.
> + */
> +static void __iomem *tph_msix_vector_control(struct pci_dev *dev,
> +					     u16 msi_index)
> +{
> +	void __iomem *vec_ctrl_addr = tph_msix_table_entry(dev, msi_index);
> +
> +	if (vec_ctrl_addr)
> +		vec_ctrl_addr += PCI_MSIX_ENTRY_VECTOR_CTRL;

I'd do this addition at the caller.  Then don't need this function.

> +
> +	return vec_ctrl_addr;
> +}
> +
> +/*
> + * Translate from MSI-X interrupt index to struct msi_desc *
> + */
> +static struct msi_desc *tph_msix_index_to_desc(struct pci_dev *dev, int index)
> +{
> +	struct msi_desc *entry;
> +
> +	msi_lock_descs(&dev->dev);

I'd take the lock at the caller as not obvious this is going to keep holding it
from the name.

If you call it tph_msix_get_desc_from_index() that would help.

> +	msi_for_each_desc(entry, &dev->dev, MSI_DESC_ASSOCIATED) {
> +		if (entry->msi_index == index)
> +			return entry;
> +	}
> +	msi_unlock_descs(&dev->dev);
> +
> +	return NULL;
> +}

> +
> +static bool msix_nr_in_bounds(struct pci_dev *dev, int msix_nr)
> +{
> +	u16 tbl_sz;
> +
> +	if (tph_get_table_size(dev, &tbl_sz))
> +		return false;
> +
> +	return msix_nr <= tbl_sz;

Use the table entry request and just check reutrn for NULL.

So instead of calling this function,

if (!tph_msix_table_entry(dev, msix_nr));

and drop this function.

> +}
> +
> +/* Return root port capability - 0 means none */
> +static int get_root_port_completer_cap(struct pci_dev *dev)

I'd expect anything ending in _cap in PCI code to be giving
me the offset of a capability. 

static int root_port_tph_support() maybe?

> +{
> +	struct pci_dev *rp;
> +	int ret;
> +	int val;
> +
> +	rp = pcie_find_root_port(dev);
> +	if (!rp) {
> +		pr_err("cannot find root port of %s\n", dev_name(&dev->dev));
> +		return 0;
> +	}
> +
> +	ret = pcie_capability_read_dword(rp, PCI_EXP_DEVCAP2, &val);
> +	if (ret) {
> +		pr_err("cannot read device capabilities 2 of %s\n",
> +		       dev_name(&dev->dev));
> +		return 0;
> +	}
> +
> +	val &= PCI_EXP_DEVCAP2_TPH_COMP;
> +
> +	return val >> PCI_EXP_DEVCAP2_TPH_COMP_SHIFT;
> +}
> +
> +/*
> + * TPH device needs to be below a rootport with the TPH Completer and
> + * the completer must offer a compatible level of completer support to that
> + * requested by the device driver.
> + */
> +static bool completer_support_ok(struct pci_dev *dev, u8 req)
> +{
> +	int rp_cap;
> +
> +	rp_cap = get_root_port_completer_cap(dev);
> +
> +	if (req > rp_cap) {
> +		pr_err("root port lacks proper TPH completer capability\n");

Assumption is that any driver getting to here should really have checked
if this was fine before enabling TPH?  a pr_err() seems overly noisy
otherwise.

> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +/*
> + * The PCI Specification version 5.0 requires the "No ST Mode" mode
> + * be supported by any compatible device.

Why do we need to check something that 'must' be set to 1?
Are there known buggy devices?  If not, just don't check it
as we can assume this is true and save 20 lines of code.

> + */
> +static bool no_st_mode_supported(struct pci_dev *dev)
> +{
> +	bool no_st;
> +	int ret;
> +	u32 tmp;
> +
> +	ret = tph_get_reg_field_u32(dev, PCI_TPH_CAP, PCI_TPH_CAP_NO_ST,
> +				    PCI_TPH_CAP_NO_ST_SHIFT, &tmp);
> +	if (ret)
> +		return false;
> +
> +	no_st = !!tmp;
> +
> +	if (!no_st) {
> +		pr_err("TPH devices must support no ST mode\n");
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +static int tph_write_ctrl_reg(struct pci_dev *dev, u32 value)
> +{
> +	int ret;
> +
> +	ret = tph_set_reg_field_u32(dev, PCI_TPH_CTRL, ~0L, 0, value);
> +
> +	if (ret)
> +		goto err_out;
> +
> +	return 0;
> +
> +err_out:
> +	/* minimizing possible harm by disabling TPH */
> +	pcie_tph_disable(dev);

If the write failed, something is horribly wrong. Do we need
to defend against that case?  It complicates the code a fair bit so
it needs a strong reasoning.  Preferably why can this fail other
than in bug cases or surprise removal or similar.

> +	return ret;
> +}
> +
> +/* Update the ST Mode Select field of the TPH Control Register */
> +static int tph_set_ctrl_reg_mode_sel(struct pci_dev *dev, u8 st_mode)
> +{
> +	int ret;
> +	u32 ctrl_reg;
> +
> +	ret = tph_get_reg_field_u32(dev, PCI_TPH_CTRL, ~0L, 0, &ctrl_reg);
> +	if (ret)
> +		return ret;
> +
> +	/* clear the mode select and enable fields */
> +	ctrl_reg &= ~(PCI_TPH_CTRL_MODE_SEL_MASK);
> +	ctrl_reg |= ((u32)(st_mode << PCI_TPH_CTRL_MODE_SEL_SHIFT) &
> +		     PCI_TPH_CTRL_MODE_SEL_MASK);
> +
> +	ret = tph_write_ctrl_reg(dev, ctrl_reg);

return tph_write_ctrl_reg()

> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +/* Write the steering tag to MSI-X vector control register */
> +static void tph_write_tag_to_msix(struct pci_dev *dev, int msix_nr, u16 tag)
> +{
> +	u32 val;
> +	void __iomem *vec_ctrl;
> +	struct msi_desc *msi_desc;
> +
> +	msi_desc = tph_msix_index_to_desc(dev, msix_nr);
> +	if (!msi_desc) {
> +		pr_err("MSI-X descriptor for #%d not found\n", msix_nr);
> +		return;
return an error so we can handle it at caller.

> +	}
> +
> +	vec_ctrl = tph_msix_vector_control(dev, msi_desc->msi_index);

Whilst this would have already failed, I'd still check for vec_ctrl == NULL
as easier to read.

> +
> +	val = readl(vec_ctrl);
> +	val &= 0xffff;
> +	val |= (tag << 16);
> +	writel(val, vec_ctrl);
> +
> +	/* read back to flush the update */
> +	val = readl(vec_ctrl);
> +	msi_unlock_descs(&dev->dev);
Who took the lock? Not obvious from function naming - see above

> +}
> +

> +static bool pcie_tph_write_st(struct pci_dev *dev, unsigned int msix_nr,
> +			      u8 req_type, u16 tag)

Why bool for error or not?  Better to return an error code.

> +{
> +	int offset;
> +	u8  loc;

Unusual to align the local variables like this. I'd just have a single space before
loc..

> +	int ret;
> +
> +	/* setting ST isn't needed - not an error, just return true */
> +	if (!dev->tph_cap || pci_tph_disabled() || pci_tph_nostmode() ||
> +	    !dev->msix_enabled || !tph_int_vec_mode_supported(dev))
> +		return true;
> +
> +	/* setting ST is incorrect in the following cases - return error */
> +	if (!no_st_mode_supported(dev) || !msix_nr_in_bounds(dev, msix_nr) ||
> +	    !completer_support_ok(dev, req_type))
> +		return false;
> +
> +	/*
> +	 * disable TPH before updating the tag to avoid potential instability
> +	 * as cautioned about in the "ST Table Programming" of PCI-E spec
> +	 */
> +	pcie_tph_disable(dev);
> +
> +	ret = tph_get_table_location(dev, &loc);
> +	if (ret)
> +		return false;
> +
> +	switch (loc) {
> +	case PCI_TPH_LOC_MSIX:
> +		tph_write_tag_to_msix(dev, msix_nr, tag);
handle errors.
> +		break;
> +	case PCI_TPH_LOC_CAP:
> +		offset = dev->tph_cap + PCI_TPH_ST_TABLE
> +			  + msix_nr * sizeof(u16);
> +		pci_write_config_word(dev, offset, tag);
handle errors.
> +		break;
> +	default:
> +		pr_err("unable to write steering tag for device %s\n",
> +		       dev_name(&dev->dev));
> +		return false;
> +	}
> +
> +	/* select interrupt vector mode */
> +	tph_set_ctrl_reg_mode_sel(dev, PCI_TPH_INT_VEC_MODE);
handle errors
> +	tph_set_ctrl_reg_en(dev, req_type);
etc.

> +
> +	return true;
> +}
> +

> +
> +/**
> + * pcie_tph_get_st() - Retrieve steering tag for a specific CPU
> + * @dev: pci device
> + * @cpu: the acpi cpu_uid.
> + * @mem_type: memory type (vram, nvram)
> + * @req_type: request type (disable, tph, extended tph)
> + * @tag: steering tag return value
> + *
> + * Return:
> + *        true : success
> + *        false: failed
> + */
> +bool pcie_tph_get_st(struct pci_dev *dev, unsigned int cpu,

Rename so that it's obvious this isn't just reading back the st
previously set to the device (i.e. it isn't the opposite of
pci_tph_set_st())

pci_tph_get_st_to_use() or something like that.

int so we can have some error information once implemented.
I'm not keen on introducing a stub like this though that doesn't
yet do anything. Bring it in when useful.




> +		    enum tph_mem_type mem_type, u8 req_type,
> +		    u16 *tag)
> +{
> +	*tag = 0;
> +
> +	return true;
> +}
> +EXPORT_SYMBOL(pcie_tph_get_st);
> +
> +/**
> + * pcie_tph_set_st() - Set steering tag in ST table entry
> + * @dev: pci device
> + * @msix_nr: ordinal number of msix interrupt.
> + * @cpu: the acpi cpu_uid.

Given most linux CPU numbers are not necessarily the ACPI CPU ID
I'd call it that.  e.g. cpu_acpi_uid.


> + * @mem_type: memory type (vram, nvram)
> + * @req_type: request type (disable, tph, extended tph)
> + *
> + * Return:
> + *        true : success
> + *        false: failed
> + */
> +bool pcie_tph_set_st(struct pci_dev *dev, unsigned int msix_nr,
> +		     unsigned int cpu, enum tph_mem_type mem_type,
> +		     u8 req_type)
> +{
> +	u16 tag;
> +	bool ret = true;
> +
> +	ret = pcie_tph_get_st(dev, cpu, mem_type, req_type, &tag);
> +
> +	if (!ret)
> +		return false;
> +
> +	pr_debug("%s: writing tag %d for msi-x intr %d (cpu: %d)\n",
> +		 __func__, tag, msix_nr, cpu);
> +
> +	ret = pcie_tph_write_st(dev, msix_nr, req_type, tag);
> +
> +	return ret;
return pcie_tph_write_st() but make it return an integer so caller
gets some information on what when wrong.

> +}
> +EXPORT_SYMBOL(pcie_tph_set_st);
Bjorn Helgaas June 7, 2024, 5:45 p.m. UTC | #3
On Fri, May 31, 2024 at 04:38:37PM -0500, Wei Huang wrote:
> This patch introduces three API functions, pcie_tph_intr_vec_supported(),
> pcie_tph_get_st() and pcie_tph_set_st(), for a driver to query, retrieve
> or configure device's steering tags. There are two possible locations for
> steering tag table and the code automatically figure out the right
> location to set the tags if pcie_tph_set_st() is called. Note the tag
> value is always zero currently and will be extended in the follow-up
> patches.

> +static int tph_get_reg_field_u32(struct pci_dev *dev, u8 offset, u32 mask,
> +				 u8 shift, u32 *field)
> +{
> +	u32 reg_val;
> +	int ret;
> +
> +	if (!dev->tph_cap)
> +		return -EINVAL;
> +
> +	ret = pci_read_config_dword(dev, dev->tph_cap + offset, &reg_val);
> +	if (ret)
> +		return ret;
> +
> +	*field = (reg_val & mask) >> shift;
> +
> +	return 0;
> +}
> +
> +static int tph_get_table_size(struct pci_dev *dev, u16 *size_out)
> +{
> +	int ret;
> +	u32 tmp;
> +
> +	ret = tph_get_reg_field_u32(dev, PCI_TPH_CAP,
> +				    PCI_TPH_CAP_ST_MASK,
> +				    PCI_TPH_CAP_ST_SHIFT, &tmp);

Just use FIELD_GET() instead.

> +	if (ret)
> +		return ret;
> +
> +	*size_out = (u16)tmp;
> +
> +	return 0;
> +}
> +
> +/*
> + * For a given device, return a pointer to the MSI table entry at msi_index.

s/MSI/MSI-X/ to avoid any possible confusion.

> +static void __iomem *tph_msix_table_entry(struct pci_dev *dev,
> +					  u16 msi_index)

> +	ret = pcie_capability_read_dword(rp, PCI_EXP_DEVCAP2, &val);
> +	if (ret) {
> +		pr_err("cannot read device capabilities 2 of %s\n",
> +		       dev_name(&dev->dev));

Never use pr_err() when you can use pci_err() instead.  Obviously no
dev_name() needed with pci_err().  Other instances below.

> +	val &= PCI_EXP_DEVCAP2_TPH_COMP;
> +
> +	return val >> PCI_EXP_DEVCAP2_TPH_COMP_SHIFT;

FIELD_GET()

> + * The PCI Specification version 5.0 requires the "No ST Mode" mode
> + * be supported by any compatible device.

Cite r6.0 or newer and include section number.

> +	/* clear the mode select and enable fields and set new values*/

Space before closing */

> +	ctrl_reg &= ~(PCI_TPH_CTRL_REQ_EN_MASK);
> +	ctrl_reg |= (((u32)req_type << PCI_TPH_CTRL_REQ_EN_SHIFT) &
> +			PCI_TPH_CTRL_REQ_EN_MASK);

FIELD_GET()/FIELD_PREP()

> +static bool pcie_tph_write_st(struct pci_dev *dev, unsigned int msix_nr,
> +			      u8 req_type, u16 tag)

This function is not a predicate and testing for true/false gives no
indication of the sense.

For typical functions that do read/write/etc, returning 0 means
success and -errno means failure.  This is the opposite.

> +	/*
> +	 * disable TPH before updating the tag to avoid potential instability
> +	 * as cautioned about in the "ST Table Programming" of PCI-E spec

s/disable/Disable/

"PCIe r6.0, sec ..."

> +bool pcie_tph_set_st(struct pci_dev *dev, unsigned int msix_nr,
> +		     unsigned int cpu, enum tph_mem_type mem_type,
> +		     u8 req_type)

Should return 0 or -errno.
diff mbox series

Patch

diff --git a/drivers/pci/pcie/tph.c b/drivers/pci/pcie/tph.c
index d5f7309fdf52..320b99c60365 100644
--- a/drivers/pci/pcie/tph.c
+++ b/drivers/pci/pcie/tph.c
@@ -43,6 +43,336 @@  static int tph_set_reg_field_u32(struct pci_dev *dev, u8 offset, u32 mask,
 	return ret;
 }
 
+static int tph_get_reg_field_u32(struct pci_dev *dev, u8 offset, u32 mask,
+				 u8 shift, u32 *field)
+{
+	u32 reg_val;
+	int ret;
+
+	if (!dev->tph_cap)
+		return -EINVAL;
+
+	ret = pci_read_config_dword(dev, dev->tph_cap + offset, &reg_val);
+	if (ret)
+		return ret;
+
+	*field = (reg_val & mask) >> shift;
+
+	return 0;
+}
+
+static int tph_get_table_size(struct pci_dev *dev, u16 *size_out)
+{
+	int ret;
+	u32 tmp;
+
+	ret = tph_get_reg_field_u32(dev, PCI_TPH_CAP,
+				    PCI_TPH_CAP_ST_MASK,
+				    PCI_TPH_CAP_ST_SHIFT, &tmp);
+
+	if (ret)
+		return ret;
+
+	*size_out = (u16)tmp;
+
+	return 0;
+}
+
+/*
+ * For a given device, return a pointer to the MSI table entry at msi_index.
+ */
+static void __iomem *tph_msix_table_entry(struct pci_dev *dev,
+					  u16 msi_index)
+{
+	void __iomem *entry;
+	u16 tbl_sz;
+	int ret;
+
+	ret = tph_get_table_size(dev, &tbl_sz);
+	if (ret || msi_index > tbl_sz)
+		return NULL;
+
+	entry = dev->msix_base + msi_index * PCI_MSIX_ENTRY_SIZE;
+
+	return entry;
+}
+
+/*
+ * For a given device, return a pointer to the vector control register at
+ * offset 0xc of MSI table entry at msi_index.
+ */
+static void __iomem *tph_msix_vector_control(struct pci_dev *dev,
+					     u16 msi_index)
+{
+	void __iomem *vec_ctrl_addr = tph_msix_table_entry(dev, msi_index);
+
+	if (vec_ctrl_addr)
+		vec_ctrl_addr += PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+	return vec_ctrl_addr;
+}
+
+/*
+ * Translate from MSI-X interrupt index to struct msi_desc *
+ */
+static struct msi_desc *tph_msix_index_to_desc(struct pci_dev *dev, int index)
+{
+	struct msi_desc *entry;
+
+	msi_lock_descs(&dev->dev);
+	msi_for_each_desc(entry, &dev->dev, MSI_DESC_ASSOCIATED) {
+		if (entry->msi_index == index)
+			return entry;
+	}
+	msi_unlock_descs(&dev->dev);
+
+	return NULL;
+}
+
+static bool tph_int_vec_mode_supported(struct pci_dev *dev)
+{
+	u32 mode = 0;
+	int ret;
+
+	ret = tph_get_reg_field_u32(dev, PCI_TPH_CAP,
+				    PCI_TPH_CAP_INT_VEC,
+				    PCI_TPH_CAP_INT_VEC_SHIFT, &mode);
+	if (ret)
+		return false;
+
+	return !!mode;
+}
+
+static int tph_get_table_location(struct pci_dev *dev, u8 *loc_out)
+{
+	u32 loc;
+	int ret;
+
+	ret = tph_get_reg_field_u32(dev, PCI_TPH_CAP, PCI_TPH_CAP_LOC_MASK,
+				    PCI_TPH_CAP_LOC_SHIFT, &loc);
+	if (ret)
+		return ret;
+
+	*loc_out = (u8)loc;
+
+	return 0;
+}
+
+static bool msix_nr_in_bounds(struct pci_dev *dev, int msix_nr)
+{
+	u16 tbl_sz;
+
+	if (tph_get_table_size(dev, &tbl_sz))
+		return false;
+
+	return msix_nr <= tbl_sz;
+}
+
+/* Return root port capability - 0 means none */
+static int get_root_port_completer_cap(struct pci_dev *dev)
+{
+	struct pci_dev *rp;
+	int ret;
+	int val;
+
+	rp = pcie_find_root_port(dev);
+	if (!rp) {
+		pr_err("cannot find root port of %s\n", dev_name(&dev->dev));
+		return 0;
+	}
+
+	ret = pcie_capability_read_dword(rp, PCI_EXP_DEVCAP2, &val);
+	if (ret) {
+		pr_err("cannot read device capabilities 2 of %s\n",
+		       dev_name(&dev->dev));
+		return 0;
+	}
+
+	val &= PCI_EXP_DEVCAP2_TPH_COMP;
+
+	return val >> PCI_EXP_DEVCAP2_TPH_COMP_SHIFT;
+}
+
+/*
+ * TPH device needs to be below a rootport with the TPH Completer and
+ * the completer must offer a compatible level of completer support to that
+ * requested by the device driver.
+ */
+static bool completer_support_ok(struct pci_dev *dev, u8 req)
+{
+	int rp_cap;
+
+	rp_cap = get_root_port_completer_cap(dev);
+
+	if (req > rp_cap) {
+		pr_err("root port lacks proper TPH completer capability\n");
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * The PCI Specification version 5.0 requires the "No ST Mode" mode
+ * be supported by any compatible device.
+ */
+static bool no_st_mode_supported(struct pci_dev *dev)
+{
+	bool no_st;
+	int ret;
+	u32 tmp;
+
+	ret = tph_get_reg_field_u32(dev, PCI_TPH_CAP, PCI_TPH_CAP_NO_ST,
+				    PCI_TPH_CAP_NO_ST_SHIFT, &tmp);
+	if (ret)
+		return false;
+
+	no_st = !!tmp;
+
+	if (!no_st) {
+		pr_err("TPH devices must support no ST mode\n");
+		return false;
+	}
+
+	return true;
+}
+
+static int tph_write_ctrl_reg(struct pci_dev *dev, u32 value)
+{
+	int ret;
+
+	ret = tph_set_reg_field_u32(dev, PCI_TPH_CTRL, ~0L, 0, value);
+
+	if (ret)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	/* minimizing possible harm by disabling TPH */
+	pcie_tph_disable(dev);
+	return ret;
+}
+
+/* Update the ST Mode Select field of the TPH Control Register */
+static int tph_set_ctrl_reg_mode_sel(struct pci_dev *dev, u8 st_mode)
+{
+	int ret;
+	u32 ctrl_reg;
+
+	ret = tph_get_reg_field_u32(dev, PCI_TPH_CTRL, ~0L, 0, &ctrl_reg);
+	if (ret)
+		return ret;
+
+	/* clear the mode select and enable fields */
+	ctrl_reg &= ~(PCI_TPH_CTRL_MODE_SEL_MASK);
+	ctrl_reg |= ((u32)(st_mode << PCI_TPH_CTRL_MODE_SEL_SHIFT) &
+		     PCI_TPH_CTRL_MODE_SEL_MASK);
+
+	ret = tph_write_ctrl_reg(dev, ctrl_reg);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/* Write the steering tag to MSI-X vector control register */
+static void tph_write_tag_to_msix(struct pci_dev *dev, int msix_nr, u16 tag)
+{
+	u32 val;
+	void __iomem *vec_ctrl;
+	struct msi_desc *msi_desc;
+
+	msi_desc = tph_msix_index_to_desc(dev, msix_nr);
+	if (!msi_desc) {
+		pr_err("MSI-X descriptor for #%d not found\n", msix_nr);
+		return;
+	}
+
+	vec_ctrl = tph_msix_vector_control(dev, msi_desc->msi_index);
+
+	val = readl(vec_ctrl);
+	val &= 0xffff;
+	val |= (tag << 16);
+	writel(val, vec_ctrl);
+
+	/* read back to flush the update */
+	val = readl(vec_ctrl);
+	msi_unlock_descs(&dev->dev);
+}
+
+/* Update the TPH Requester Enable field of the TPH Control Register */
+static int tph_set_ctrl_reg_en(struct pci_dev *dev, u8 req_type)
+{
+	int ret;
+	u32 ctrl_reg;
+
+	ret = tph_get_reg_field_u32(dev, PCI_TPH_CTRL, ~0L, 0,
+				    &ctrl_reg);
+	if (ret)
+		return ret;
+
+	/* clear the mode select and enable fields and set new values*/
+	ctrl_reg &= ~(PCI_TPH_CTRL_REQ_EN_MASK);
+	ctrl_reg |= (((u32)req_type << PCI_TPH_CTRL_REQ_EN_SHIFT) &
+			PCI_TPH_CTRL_REQ_EN_MASK);
+
+	ret = tph_write_ctrl_reg(dev, ctrl_reg);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static bool pcie_tph_write_st(struct pci_dev *dev, unsigned int msix_nr,
+			      u8 req_type, u16 tag)
+{
+	int offset;
+	u8  loc;
+	int ret;
+
+	/* setting ST isn't needed - not an error, just return true */
+	if (!dev->tph_cap || pci_tph_disabled() || pci_tph_nostmode() ||
+	    !dev->msix_enabled || !tph_int_vec_mode_supported(dev))
+		return true;
+
+	/* setting ST is incorrect in the following cases - return error */
+	if (!no_st_mode_supported(dev) || !msix_nr_in_bounds(dev, msix_nr) ||
+	    !completer_support_ok(dev, req_type))
+		return false;
+
+	/*
+	 * disable TPH before updating the tag to avoid potential instability
+	 * as cautioned about in the "ST Table Programming" of PCI-E spec
+	 */
+	pcie_tph_disable(dev);
+
+	ret = tph_get_table_location(dev, &loc);
+	if (ret)
+		return false;
+
+	switch (loc) {
+	case PCI_TPH_LOC_MSIX:
+		tph_write_tag_to_msix(dev, msix_nr, tag);
+		break;
+	case PCI_TPH_LOC_CAP:
+		offset = dev->tph_cap + PCI_TPH_ST_TABLE
+			  + msix_nr * sizeof(u16);
+		pci_write_config_word(dev, offset, tag);
+		break;
+	default:
+		pr_err("unable to write steering tag for device %s\n",
+		       dev_name(&dev->dev));
+		return false;
+	}
+
+	/* select interrupt vector mode */
+	tph_set_ctrl_reg_mode_sel(dev, PCI_TPH_INT_VEC_MODE);
+	tph_set_ctrl_reg_en(dev, req_type);
+
+	return true;
+}
+
 int tph_set_dev_nostmode(struct pci_dev *dev)
 {
 	int ret;
@@ -77,3 +407,75 @@  void pcie_tph_init(struct pci_dev *dev)
 	dev->tph_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_TPH);
 }
 
+/**
+ * pcie_tph_intr_vec_supported() - Check if interrupt vector mode supported for dev
+ * @dev: pci device
+ *
+ * Return:
+ *        true : intr vector mode supported
+ *        false: intr vector mode not supported
+ */
+bool pcie_tph_intr_vec_supported(struct pci_dev *dev)
+{
+	if (!dev->tph_cap || pci_tph_disabled() || !dev->msix_enabled ||
+	    !tph_int_vec_mode_supported(dev))
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL(pcie_tph_intr_vec_supported);
+
+/**
+ * pcie_tph_get_st() - Retrieve steering tag for a specific CPU
+ * @dev: pci device
+ * @cpu: the acpi cpu_uid.
+ * @mem_type: memory type (vram, nvram)
+ * @req_type: request type (disable, tph, extended tph)
+ * @tag: steering tag return value
+ *
+ * Return:
+ *        true : success
+ *        false: failed
+ */
+bool pcie_tph_get_st(struct pci_dev *dev, unsigned int cpu,
+		    enum tph_mem_type mem_type, u8 req_type,
+		    u16 *tag)
+{
+	*tag = 0;
+
+	return true;
+}
+EXPORT_SYMBOL(pcie_tph_get_st);
+
+/**
+ * pcie_tph_set_st() - Set steering tag in ST table entry
+ * @dev: pci device
+ * @msix_nr: ordinal number of msix interrupt.
+ * @cpu: the acpi cpu_uid.
+ * @mem_type: memory type (vram, nvram)
+ * @req_type: request type (disable, tph, extended tph)
+ *
+ * Return:
+ *        true : success
+ *        false: failed
+ */
+bool pcie_tph_set_st(struct pci_dev *dev, unsigned int msix_nr,
+		     unsigned int cpu, enum tph_mem_type mem_type,
+		     u8 req_type)
+{
+	u16 tag;
+	bool ret = true;
+
+	ret = pcie_tph_get_st(dev, cpu, mem_type, req_type, &tag);
+
+	if (!ret)
+		return false;
+
+	pr_debug("%s: writing tag %d for msi-x intr %d (cpu: %d)\n",
+		 __func__, tag, msix_nr, cpu);
+
+	ret = pcie_tph_write_st(dev, msix_nr, req_type, tag);
+
+	return ret;
+}
+EXPORT_SYMBOL(pcie_tph_set_st);
diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h
index 95269afc8b7d..4fbd1e2fd98c 100644
--- a/include/linux/pci-tph.h
+++ b/include/linux/pci-tph.h
@@ -9,14 +9,36 @@ 
 #ifndef LINUX_PCI_TPH_H
 #define LINUX_PCI_TPH_H
 
+enum tph_mem_type {
+	TPH_MEM_TYPE_VM,	/* volatile memory type */
+	TPH_MEM_TYPE_PM		/* persistent memory type */
+};
+
 #ifdef CONFIG_PCIE_TPH
 int pcie_tph_disable(struct pci_dev *dev);
 int tph_set_dev_nostmode(struct pci_dev *dev);
+bool pcie_tph_intr_vec_supported(struct pci_dev *dev);
+bool pcie_tph_get_st(struct pci_dev *dev, unsigned int cpu,
+		     enum tph_mem_type tag_type, u8 req_enable,
+		     u16 *tag);
+bool pcie_tph_set_st(struct pci_dev *dev, unsigned int msix_nr,
+		     unsigned int cpu, enum tph_mem_type tag_type,
+		     u8 req_enable);
 #else
 static inline int pcie_tph_disable(struct pci_dev *dev)
 { return -EOPNOTSUPP; }
 static inline int tph_set_dev_nostmode(struct pci_dev *dev)
 { return -EOPNOTSUPP; }
+static inline bool pcie_tph_intr_vec_supported(struct pci_dev *dev)
+{ return false; }
+static inline bool pcie_tph_get_st(struct pci_dev *dev, unsigned int cpu,
+				   enum tph_mem_type tag_type, u8 req_enable,
+				   u16 *tag)
+{ return false; }
+static inline bool pcie_tph_set_st(struct pci_dev *dev, unsigned int msix_nr,
+				   unsigned int cpu, enum tph_mem_type tag_type,
+				   u8 req_enable)
+{ return true; }
 #endif
 
 #endif /* LINUX_PCI_TPH_H */