@@ -41,8 +41,6 @@ Example:
ibm,cpu-idle-state-flags = <0x11000 0x81003 0x47003>;
ibm,cpu-idle-state-names = "nap", "fastsleep_", "winkle";
ibm,cpu-idle-state-pmicr = <0x0 0x0 0x20 0x0 0x0 0x0>;
- ibm,pstate-vdds = <0x5758595a 0x5a5b5c5d 0x5e5e5f60 0x61626263 0x64656566 0x66676768 0x6869696a 0x6a6b6b6c 0x6c6d6d6e 0x6e6f6f70 0x70717272>;
- ibm,pstate-vcss = <0x4d4e4f4f 0x50505152 0x52535354 0x55555657 0x57585859 0x59595a5a 0x5a5b5b5c 0x5c5c5d5d 0x5d5e5e5f 0x5f5f6060 0x60616162>;
ibm,pstate-nominal = <0xffffffef>;
ibm,cpu-idle-state-residency-ns = <0x186a0 0x11e1a300 0x3b9aca00>;
ibm,cpu-idle-state-pmicr-mask = <0x0 0x0 0x30 0x0 0x0 0x0>;
@@ -104,20 +102,22 @@ ibm,pstate-max ibm,pstate-min ibm,pstate-nominal
These properties give the maximum, minimum and nominal pstate values, as an id
specified in the ibm,pstate-ids file.
-
-ibm,pstate-vcss ibm,pstate-vdds
--------------------------------
-
-These properties list a voltage-identifier of each of the pstates listed in
-ibm,pstate-ids for the Vcs and Vdd values used for that pstate. Each VID is a
-single byte.
-
ibm,pstate-ultra-turbo ibm,pstate-turbo
---------------------------------------
These properties are added when ultra-turbo(WOF) is enabled. These properties
give the max turbo and max ultra-turbo pstate.
+Example:
+
+.. code-block:: dts
+
+ power-mgt {
+ ibm,pstate-core-max = <0x0 0x0 0x0 0x0 0x0 0x0 0x0>;
+ ibm,pstate-turbo = <0xfffffffb>
+ ibm,pstate-ultra-turbo = <0x0>;
+ };
+
ibm,pstate-core-max
-------------------
new file mode 100644
@@ -0,0 +1,38 @@
+ibm,opal/power-mgt/occ device tree entries
+==========================================
+
+This node exports the per-chip pstate table properties to kernel.
+
+Example:
+
+.. code-block:: dts
+
+occ@7ffddf8000 {
+ ibm,pstate-vdds = [45 45 46 46 46 47 48 49 4a 4b 4c 4d 4f 50 51 52 53 54 55 57 58 59 5a 5b 5c 5d 5e 5f 5f 60 61 62 63 64 65 65 66 67 68 69 6a 6a 6b 6c 6d 6e 6f 70 70 71];
+ ibm,chip-id = <0x1>;
+ phandle = <0x100003b8>;
+ ibm,pstate-vcss = [3b 3d 3f 41 42 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f 50 50 51 52 53 54 55 56 56 57 57 58 58 59 59 5a 5a 5b 5b 5c 5c 5d 5d 5e 5e 5f 5f 60 60 61 61 62 62];
+ reg = <0x7f 0xfddf8000 0xb98>;
+};
+
+ibm,chip-id
+-----------
+
+This property denotes the ID of chip to which OCC belongs to.
+
+reg
+---
+
+This tuple gives the statring address of the OPAL data in HOMER and
+the size of the OPAL data.
+
+The top-level /ibm,opal/power-mgt contains :
+ #size-cells = <1>
+ #address-cells = <2>
+
+ibm,pstate-vcss ibm,pstate-vdds
+-------------------------------
+
+These properties list a voltage-identifier of each of the pstates listed in
+ibm,pstate-ids for the Vcs and Vdd values used for that pstate in that chip.
+Each VID is a single byte.
@@ -30,44 +30,133 @@
/* OCC Communication Area for PStates */
-#define P8_HOMER_SAPPHIRE_DATA_OFFSET 0x1F8000
+#define P8_HOMER_OPAL_DATA_OFFSET 0x1F8000
+#define P9_HOMER_OPAL_DATA_OFFSET 0x0E2000
-#define MAX_PSTATES 256
+#define OPAL_DYNAMIC_DATA_OFFSET 0x0B80
+/* relative to HOMER_OPAL_DATA_OFFSET */
-#define chip_occ_data(chip) \
- ((struct occ_pstate_table *)(chip->homer_base + \
- P8_HOMER_SAPPHIRE_DATA_OFFSET))
+#define MAX_PSTATES 256
+#define MAX_P8_CORES 12
+#define MAX_P9_CORES 24
-static bool occ_reset;
-static struct lock occ_lock = LOCK_UNLOCKED;
-
-struct occ_pstate_entry {
- s8 id;
- u8 flags;
- u8 vdd;
- u8 vcs;
- u32 freq_khz;
-} __packed;
-
-/*
- * OCC-OPAL Shared Memory Region Version 2
- * https://github.com/open-power/occ/blob/master/src/occ/proc/proc_pstate.h
- * Interface defined in 'sapphire_table_t'
+/**
+ * OCC-OPAL Shared Memory Region
+ *
+ * Reference document :
+ * https://github.com/open-power/docs/blob/master/occ/OCC_OpenPwr_FW_Interfaces.pdf
+ *
+ * Supported layout versions:
+ * - 0x01, 0x02 : P8
+ * https://github.com/open-power/occ/blob/master_p8/src/occ/proc/proc_pstate.h
+ *
+ * - 0x90 : P9
+ * https://github.com/open-power/occ/blob/master/src/occ_405/proc/proc_pstate.h
+ * In 0x90 the data is separated into :-
+ * -- Static Data (struct occ_pstate_table): Data is written once by OCC
+ * -- Dynamic Data (struct occ_dynamic_data): Data is updated at runtime
+ *
+ * struct occ_pstate_table - Pstate table layout
+ * @valid: Indicates if data is valid
+ * @version: Layout version
+ * @v2.throttle: Reason for limiting the max pstate
+ * @v9.occ_role: OCC role (Master/Slave)
+ * @v#.pstate_min: Minimum pstate ever allowed
+ * @v#.pstate_nom: Nominal pstate
+ * @v#.pstate_turbo: Maximum turbo pstate
+ * @v#.pstate_ultra_turbo: Maximum ultra turbo pstate and the maximum
+ * pstate ever allowed
+ * @v#.pstates: Pstate-id and frequency list from Pmax to Pmin
+ * @v#.pstates.id: Pstate-id
+ * @v#.pstates.flags: Pstate-flag(reserved)
+ * @v2.pstates.vdd: Voltage Identifier
+ * @v2.pstates.vcs: Voltage Identifier
+ * @v#.pstates.freq_khz: Frequency in KHz
+ * @v#.core_max[1..N]: Max pstate with N active cores
+ * @spare/reserved/pad: Unused data
*/
struct occ_pstate_table {
u8 valid;
u8 version;
- u8 throttle;
- s8 pstate_min;
- s8 pstate_nom;
- s8 pstate_turbo;
- s8 pstate_ultra_turbo;
- u8 spare;
+ union __packed {
+ struct __packed { /* Version 0x01 and 0x02 */
+ u8 throttle;
+ s8 pstate_min;
+ s8 pstate_nom;
+ s8 pstate_turbo;
+ s8 pstate_ultra_turbo;
+ u8 spare;
+ u64 reserved;
+ struct __packed {
+ s8 id;
+ u8 flags;
+ u8 vdd;
+ u8 vcs;
+ u32 freq_khz;
+ } pstates[MAX_PSTATES];
+ s8 core_max[MAX_P8_CORES];
+ u8 pad[100];
+ } v2;
+ struct __packed { /* Version 0x90 */
+ u8 occ_role;
+ u8 pstate_min;
+ u8 pstate_nom;
+ u8 pstate_turbo;
+ u8 pstate_ultra_turbo;
+ u8 spare;
+ u64 reserved1;
+ u64 reserved2;
+ struct __packed {
+ u8 id;
+ u8 flags;
+ u16 reserved;
+ u32 freq_khz;
+ } pstates[MAX_PSTATES];
+ u8 core_max[MAX_P9_CORES];
+ u8 pad[56];
+ } v9;
+ };
+} __packed;
+
+/**
+ * OCC-OPAL Shared Memory Interface Dynamic Data Vx90
+ *
+ * struct occ_dynamic_data - Contains runtime attributes
+ * @occ_state: Current state of OCC
+ * @cpu_throttle: Reason for limiting the max pstate
+ * @mem_throttle: Reason for throttling memory
+ * @quick_pwr_drop: Indicates if QPD is asserted
+ * @pwr_shifting_ratio: Indicates the current percentage of power to
+ * take away from the CPU vs GPU when shifting
+ * power to maintain a power cap. Value of 100
+ * means take all power from CPU.
+ * @pwr_cap_type: Indicates type of power cap in effect
+ * @min_pwr_cap: Minimum allowed system power cap in Watts
+ * @max_pwr_cap: Maximum allowed system power cap in Watts
+ * @cur_pwr_cap: Current system power cap
+ * @spare/reserved: Unused data
+ */
+struct occ_dynamic_data {
+ u8 occ_state;
+ u8 spare1;
+ u8 spare2;
+ u8 spare3;
+ u8 spare4;
+ u8 cpu_throttle;
+ u8 mem_throttle;
+ u8 quick_pwr_drop;
+ u8 pwr_shifting_ratio;
+ u8 pwr_cap_type;
+ u16 min_pwr_cap;
+ u16 max_pwr_cap;
+ u16 cur_pwr_cap;
u64 reserved;
- struct occ_pstate_entry pstates[MAX_PSTATES];
- s8 core_max[16];
} __packed;
+static bool occ_reset;
+static struct lock occ_lock = LOCK_UNLOCKED;
+static unsigned long homer_opal_data_offset;
+
DEFINE_LOG_ENTRY(OPAL_RC_OCC_LOAD, OPAL_PLATFORM_ERR_EVT, OPAL_OCC,
OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_GENERAL,
OPAL_NA);
@@ -128,12 +217,26 @@ static int cmp_negative_pstates(int a, int b)
return 0;
}
+static inline
+struct occ_pstate_table *get_occ_pstate_table(struct proc_chip *chip)
+{
+ return (struct occ_pstate_table *)
+ (chip->homer_base + homer_opal_data_offset);
+}
+
+static inline
+struct occ_dynamic_data *get_occ_dynamic_data(struct proc_chip *chip)
+{
+ return (struct occ_dynamic_data *)
+ (chip->homer_base + homer_opal_data_offset +
+ OPAL_DYNAMIC_DATA_OFFSET);
+}
+
/* Check each chip's HOMER/Sapphire area for PState valid bit */
static bool wait_for_all_occ_init(void)
{
struct proc_chip *chip;
struct dt_node *xn;
- uint64_t occ_data_area;
struct occ_pstate_table *occ_data;
int tries;
uint64_t start_time, end_time;
@@ -161,8 +264,7 @@ static bool wait_for_all_occ_init(void)
}
/* Get PState table address */
- occ_data_area = chip->homer_base + P8_HOMER_SAPPHIRE_DATA_OFFSET;
- occ_data = (struct occ_pstate_table *)occ_data_area;
+ occ_data = get_occ_pstate_table(chip);
/*
* Checking for occ_data->valid == 1 is ok because we clear all
@@ -193,8 +295,7 @@ static bool wait_for_all_occ_init(void)
chip->occ_functional = true;
prlog(PR_DEBUG, "OCC: Chip %02x Data (%016llx) = %016llx\n",
- chip->id, occ_data_area,
- *(uint64_t *)occ_data_area);
+ chip->id, (uint64_t)occ_data, *(uint64_t *)occ_data);
}
end_time = mftb();
prlog(PR_NOTICE, "OCC: All Chip Rdy after %lu ms\n",
@@ -210,39 +311,117 @@ static bool wait_for_all_occ_init(void)
return true;
}
+/*
+ * OCC provides pstate table entries in continuous descending order.
+ * Parse the pstate table to skip pstate_ids that are greater
+ * than Pmax. If a pstate_id is equal to Pmin then add it to
+ * the list and break from the loop as this is the last valid
+ * element in the pstate table.
+ */
+static void parse_pstates_v2(struct occ_pstate_table *data, u32 *dt_id,
+ u32 *dt_freq, int nr_pstates, int pmax, int pmin)
+{
+ int i, j;
+
+ for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
+ if (cmp_pstates(data->v2.pstates[i].id, pmax) > 0)
+ continue;
+
+ dt_id[j] = data->v2.pstates[i].id;
+ dt_freq[j] = data->v2.pstates[i].freq_khz / 1000;
+ j++;
+
+ if (data->v2.pstates[i].id == pmin)
+ break;
+ }
+
+ if (j != nr_pstates)
+ prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
+ nr_pstates, j);
+}
+
+static void parse_pstates_v9(struct occ_pstate_table *data, u32 *dt_id,
+ u32 *dt_freq, int nr_pstates, int pmax, int pmin)
+{
+ int i, j;
+
+ for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
+ if (cmp_pstates(data->v9.pstates[i].id, pmax) > 0)
+ continue;
+
+ dt_id[j] = data->v9.pstates[i].id;
+ dt_freq[j] = data->v9.pstates[i].freq_khz / 1000;
+ j++;
+
+ if (data->v9.pstates[i].id == pmin)
+ break;
+ }
+
+ if (j != nr_pstates)
+ prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
+ nr_pstates, j);
+}
+
+static void parse_vid(struct occ_pstate_table *occ_data,
+ struct dt_node *node, u8 nr_pstates,
+ int pmax, int pmin)
+{
+ u8 *dt_vdd, *dt_vcs;
+ int i, j;
+
+ dt_vdd = malloc(nr_pstates);
+ assert(dt_vdd);
+ dt_vcs = malloc(nr_pstates);
+ assert(dt_vcs);
+
+ for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
+ if (cmp_pstates(occ_data->v2.pstates[i].id, pmax) > 0)
+ continue;
+
+ dt_vdd[j] = occ_data->v2.pstates[i].vdd;
+ dt_vcs[j] = occ_data->v2.pstates[i].vcs;
+ j++;
+
+ if (occ_data->v2.pstates[i].id == pmin)
+ break;
+ }
+
+ dt_add_property(node, "ibm,pstate-vdds", dt_vdd, nr_pstates);
+ dt_add_property(node, "ibm,pstate-vcss", dt_vcs, nr_pstates);
+
+ free(dt_vdd);
+ free(dt_vcs);
+}
+
/* Add device tree properties to describe pstates states */
-/* Retrun nominal pstate to set in each core */
-static bool add_cpu_pstate_properties(s8 *pstate_nom)
+/* Return nominal pstate to set in each core */
+static bool add_cpu_pstate_properties(int *pstate_nom)
{
struct proc_chip *chip;
uint64_t occ_data_area;
struct occ_pstate_table *occ_data;
struct dt_node *power_mgt;
- u8 nr_pstates, nr_cores = 0;
- s8 pmax;
/* Arrays for device tree */
u32 *dt_id, *dt_freq;
- u8 *dt_vdd, *dt_vcs;
- s8 *dt_core_max = NULL;
- bool rc, ultra_turbo_en;
- int i, j;
+ int pmax, pmin, pnom;
+ u8 nr_pstates;
+ bool ultra_turbo_supported;
+ int i;
prlog(PR_DEBUG, "OCC: CPU pstate state device tree init\n");
- /* Find first chip and core */
+ /* Find first chip */
chip = next_chip(NULL);
/* Extract PState information from OCC */
+ occ_data = get_occ_pstate_table(chip);
- /* Dump state table */
- occ_data_area = chip->homer_base + P8_HOMER_SAPPHIRE_DATA_OFFSET;
-
+ /* Dump first 16 bytes of PState table */
+ occ_data_area = (uint64_t)occ_data;
prlog(PR_DEBUG, "OCC: Data (%16llx) = %16llx %16llx\n",
occ_data_area,
*(uint64_t *)occ_data_area,
- *(uint64_t *)(occ_data_area+8));
-
- occ_data = (struct occ_pstate_table *)occ_data_area;
+ *(uint64_t *)(occ_data_area + 8));
if (!occ_data->valid) {
/**
@@ -260,21 +439,70 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
/*
* Workload-Optimized-Frequency(WOF) or Ultra-Turbo is supported
- * from version 2 onwards. If WOF is disabled then, the max
+ * from version 0x02 onwards. If WOF is disabled then, the max
* ultra_turbo pstate will be equal to max turbo pstate.
*/
- if (occ_data->version > 1 &&
- cmp_pstates(occ_data->pstate_ultra_turbo,
- occ_data->pstate_turbo) > 0)
- ultra_turbo_en = true;
- else
- ultra_turbo_en = false;
-
- pmax = ultra_turbo_en ? occ_data->pstate_ultra_turbo :
- occ_data->pstate_turbo;
+ ultra_turbo_supported = true;
+
+ /* Parse Pmax, Pmin and Pnominal */
+ switch (occ_data->version) {
+ case 0x01:
+ ultra_turbo_supported = false;
+ case 0x02:
+ if (proc_gen == proc_gen_p9) {
+ /**
+ * @fwts-label OCCInvalidVersion02
+ * @fwts-advice The PState table layout version is not
+ * supported in P9. So OPAL will not parse the PState
+ * table. CPU frequency scaling will not be functional
+ * as frequency and pstate-ids are not added to DT.
+ */
+ prerror("OCC: Version %x is not supported in P9\n",
+ occ_data->version);
+ return false;
+ }
+ pmin = occ_data->v2.pstate_min;
+ pnom = occ_data->v2.pstate_nom;
+ if (ultra_turbo_supported)
+ pmax = occ_data->v2.pstate_ultra_turbo;
+ else
+ pmax = occ_data->v2.pstate_turbo;
+ break;
+ case 0x90:
+ if (proc_gen == proc_gen_p8) {
+ /**
+ * @fwts-label OCCInvalidVersion90
+ * @fwts-advice The PState table layout version is not
+ * supported in P8. So OPAL will not parse the PState
+ * table. CPU frequency scaling will not be functional
+ * as frequency and pstate-ids are not added to DT.
+ */
+ prerror("OCC: Version %x is not supported in P8\n",
+ occ_data->version);
+ return false;
+ }
+ pmin = occ_data->v9.pstate_min;
+ pnom = occ_data->v9.pstate_nom;
+ if (ultra_turbo_supported)
+ pmax = occ_data->v9.pstate_ultra_turbo;
+ else
+ pmax = occ_data->v9.pstate_turbo;
+ break;
+ default:
+ /**
+ * @fwts-label OCCUnsupportedVersion
+ * @fwts-advice The PState table layout version is not
+ * supported. So OPAL will not parse the PState table.
+ * CPU frequency scaling will not be functional as OPAL
+ * doesn't populate the device tree with pstates.
+ */
+ prerror("OCC: Unsupported pstate table layout version %d\n",
+ occ_data->version);
+ return false;
+ }
/* Sanity check for pstate limits */
- if (cmp_pstates(occ_data->pstate_min, pmax) > 0) {
+ if (cmp_pstates(pmin, pmax) > 0) {
/**
* @fwts-label OCCInvalidPStateLimits
* @fwts-advice The min pstate is greater than the
@@ -284,16 +512,27 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
* CPU Frequency management will not be functional in
* the host.
*/
- prlog(PR_ERR, "OCC: Invalid Pstate Limits. Pmin(%d) > Pmax (%d)\n",
- occ_data->pstate_min, pmax);
+ prerror("OCC: Invalid pstate limits. Pmin(%d) > Pmax (%d)\n",
+ pmin, pmax);
return false;
}
- nr_pstates = labs(pmax - occ_data->pstate_min) + 1;
- prlog(PR_DEBUG, "OCC: Min %d Nom %d Max %d Nr States %d\n",
- occ_data->pstate_min, occ_data->pstate_nom,
- pmax, nr_pstates);
+ if (cmp_pstates(pnom, pmax) > 0) {
+ /**
+ * @fwts-label OCCInvalidNominalPState
+ * @fwts-advice The nominal pstate is greater than the
+ * max pstate, this could be due to corrupted/invalid
+ * data in OCC-OPAL shared memory region. So OPAL has
+ * limited the nominal pstate to max pstate.
+ */
+ prerror("OCC: Clipping nominal pstate(%d) to Pmax(%d)\n",
+ pnom, pmax);
+ pnom = pmax;
+ }
+ nr_pstates = labs(pmax - pmin) + 1;
+ prlog(PR_DEBUG, "OCC: Version %x Min %d Nom %d Max %d Nr States %d\n",
+ occ_data->version, pmin, pnom, pmax, nr_pstates);
if (nr_pstates <= 1 || nr_pstates > 128) {
/**
* @fwts-label OCCInvalidPStateRange
@@ -304,7 +543,8 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
* that CPU idle states and CPU frequency scaling
* will not be functional.
*/
- prlog(PR_ERR, "OCC: OCC range is not valid\n");
+ prerror("OCC: OCC range is not valid; No of pstates = %d\n",
+ nr_pstates);
return false;
}
@@ -320,141 +560,134 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
return false;
}
- rc = false;
-
- /* Setup arrays for device-tree */
- /* Allocate memory */
dt_id = malloc(nr_pstates * sizeof(u32));
- if (!dt_id) {
- /**
- * @fwts-label OCCdt_idENOMEM
- * @fwts-advice Out of memory when allocating pstates array.
- * No Pstates added to device tree, pstates not functional.
- */
- prlog(PR_ERR, "OCC: dt_id array alloc failure\n");
- goto out;
- }
-
+ assert(dt_id);
dt_freq = malloc(nr_pstates * sizeof(u32));
- if (!dt_freq) {
- /**
- * @fwts-label OCCdt_freqENOMEM
- * @fwts-advice Out of memory when allocating pstates array.
- * No Pstates added to device tree, pstates not functional.
- */
- prlog(PR_ERR, "OCC: dt_freq array alloc failure\n");
- goto out_free_id;
- }
+ assert(dt_freq);
- dt_vdd = malloc(nr_pstates * sizeof(u8));
- if (!dt_vdd) {
- /**
- * @fwts-label OCCdt_vddENOMEM
- * @fwts-advice Out of memory when allocating pstates array.
- * No Pstates added to device tree, pstates not functional.
- */
- prlog(PR_ERR, "OCC: dt_vdd array alloc failure\n");
- goto out_free_freq;
- }
-
- dt_vcs = malloc(nr_pstates * sizeof(u8));
- if (!dt_vcs) {
- /**
- * @fwts-label OCCdt_vcsENOMEM
- * @fwts-advice Out of memory when allocating pstates array.
- * No Pstates added to device tree, pstates not functional.
- */
- prlog(PR_ERR, "OCC: dt_vcs array alloc failure\n");
- goto out_free_vdd;
+ switch (occ_data->version) {
+ case 0x01:
+ case 0x02:
+ parse_pstates_v2(occ_data, dt_id, dt_freq, nr_pstates,
+ pmax, pmin);
+ break;
+ case 0x90:
+ parse_pstates_v9(occ_data, dt_id, dt_freq, nr_pstates,
+ pmax, pmin);
+ break;
+ default:
+ return false;
}
- if (ultra_turbo_en) {
- nr_cores = get_available_nr_cores_in_chip(chip->id);
- dt_core_max = malloc(nr_cores * sizeof(s8));
- if (!dt_core_max) {
- /**
- * @fwts-label OCCdt_core_maxENOMEM
- * @fwts-advice Out of memory allocating dt_core_max
- * array. No PStates in Device Tree: non-functional
- * power/frequency management.
- */
- prlog(PR_ERR, "OCC: dt_core_max alloc failure\n");
- goto out_free_vcs;
- }
+ /* Add the device-tree entries */
+ dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
+ nr_pstates * sizeof(u32));
+ dt_add_property(power_mgt, "ibm,pstate-frequencies-mhz", dt_freq,
+ nr_pstates * sizeof(u32));
+ dt_add_property_cells(power_mgt, "ibm,pstate-min", pmin);
+ dt_add_property_cells(power_mgt, "ibm,pstate-nominal", pnom);
+ dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
- for (i = 0; i < nr_cores; i++)
- dt_core_max[i] = occ_data->core_max[i];
- }
+ free(dt_freq);
+ free(dt_id);
/*
- * OCC provides pstate table entries in continuous descending order.
- * Parse the pstate table to skip pstate_ids that are greater
- * than Pmax. If a pstate_id is equal to Pmin then add it to
- * the list and break from the loop as this is the last valid
- * element in the pstate table.
+ * Parse and add WOF properties: turbo, ultra-turbo and core_max array.
+ * core_max[1..n] array provides the max sustainable pstate that can be
+ * achieved with i active cores in the chip.
*/
- for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
- if (cmp_pstates(occ_data->pstates[i].id, pmax) > 0)
- continue;
+ if (ultra_turbo_supported) {
+ int pturbo, pultra_turbo;
+ u8 nr_cores = get_available_nr_cores_in_chip(chip->id);
+ u32 *dt_cmax;
+
+ dt_cmax = malloc(nr_cores * sizeof(u32));
+ assert(dt_cmax);
+ switch (occ_data->version) {
+ case 0x02:
+ pturbo = occ_data->v2.pstate_turbo;
+ pultra_turbo = occ_data->v2.pstate_ultra_turbo;
+ for (i = 0; i < nr_cores; i++)
+ dt_cmax[i] = occ_data->v2.core_max[i];
+ break;
+ case 0x90:
+ pturbo = occ_data->v9.pstate_turbo;
+ pultra_turbo = occ_data->v9.pstate_ultra_turbo;
+ for (i = 0; i < nr_cores; i++)
+ dt_cmax[i] = occ_data->v9.core_max[i];
+ break;
+ default:
+ return false;
+ }
- dt_id[j] = occ_data->pstates[i].id;
- dt_freq[j] = occ_data->pstates[i].freq_khz / 1000;
- dt_vdd[j] = occ_data->pstates[i].vdd;
- dt_vcs[j] = occ_data->pstates[i].vcs;
- j++;
+ if (cmp_pstates(pturbo, pmax) > 0) {
+ prerror("OCC: Clipping turbo pstate(%d) to Pmax(%d)\n",
+ pturbo, pmax);
+ dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
+ pmax);
+ } else {
+ dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
+ pturbo);
+ }
- if (occ_data->pstates[i].id == occ_data->pstate_min)
- break;
- }
+ dt_add_property_cells(power_mgt, "ibm,pstate-ultra-turbo",
+ pultra_turbo);
+ dt_add_property(power_mgt, "ibm,pstate-core-max", dt_cmax,
+ nr_cores * sizeof(u32));
- if (j != nr_pstates) {
- prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
- nr_pstates, j);
- goto out_free_vcs;
+ free(dt_cmax);
}
- /* Add the device-tree entries */
- dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
- nr_pstates * sizeof(u32));
- dt_add_property(power_mgt, "ibm,pstate-frequencies-mhz", dt_freq,
- nr_pstates * sizeof(u32));
- dt_add_property(power_mgt, "ibm,pstate-vdds", dt_vdd, nr_pstates);
- dt_add_property(power_mgt, "ibm,pstate-vcss", dt_vcs, nr_pstates);
- dt_add_property_cells(power_mgt, "ibm,pstate-min", occ_data->pstate_min);
- dt_add_property_cells(power_mgt, "ibm,pstate-nominal", occ_data->pstate_nom);
- dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
+ if (occ_data->version > 0x02)
+ goto out;
- if (ultra_turbo_en) {
- dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
- occ_data->pstate_turbo);
- dt_add_property_cells(power_mgt, "ibm,pstate-ultra-turbo",
- occ_data->pstate_ultra_turbo);
- dt_add_property(power_mgt, "ibm,pstate-core-max", dt_core_max,
- nr_cores);
- free(dt_core_max);
- }
+ dt_add_property_cells(power_mgt, "#address-cells", 2);
+ dt_add_property_cells(power_mgt, "#size-cells", 1);
- /* Return pstate to set for each core */
- *pstate_nom = occ_data->pstate_nom;
- rc = true;
+ /* Add chip specific pstate properties */
+ for_each_chip(chip) {
+ struct dt_node *occ_node;
-out_free_vcs:
- free(dt_vcs);
-out_free_vdd:
- free(dt_vdd);
-out_free_id:
- free(dt_id);
-out_free_freq:
- free(dt_freq);
+ occ_data = get_occ_pstate_table(chip);
+ occ_node = dt_new_addr(power_mgt, "occ", (uint64_t)occ_data);
+ if (!occ_node) {
+ /**
+ * @fwts-label OCCDTFailedNodeCreation
+ * @fwts-advice Failed to create
+ * /ibm,opal/power-mgt/occ. Per-chip pstate properties
+ * are not added to Device Tree.
+ */
+ prerror("OCC: Failed to create /ibm,opal/power-mgt/occ@%llx\n",
+ (uint64_t)occ_data);
+ return false;
+ }
+
+ dt_add_property_cells(occ_node, "reg",
+ hi32((uint64_t)occ_data),
+ lo32((uint64_t)occ_data),
+ OPAL_DYNAMIC_DATA_OFFSET +
+ sizeof(struct occ_dynamic_data));
+ dt_add_property_cells(occ_node, "ibm,chip-id", chip->id);
+
+ /*
+ * Parse and add pstate Voltage Identifiers (VID) to DT which
+ * are provided by OCC in version 0x01 and 0x02
+ */
+ parse_vid(occ_data, occ_node, nr_pstates, pmax, pmin);
+ }
out:
- return rc;
+ /* Return pstate to set for each core */
+ *pstate_nom = pnom;
+ return true;
}
/*
* Prepare chip for pstate transitions
*/
-static bool cpu_pstates_prepare_core(struct proc_chip *chip, struct cpu_thread *c, s8 pstate_nom)
+static bool cpu_pstates_prepare_core(struct proc_chip *chip,
+ struct cpu_thread *c,
+ int pstate_nom)
{
uint32_t core = pir_to_core_id(c->pir);
uint64_t tmp, pstate;
@@ -542,6 +775,23 @@ static void occ_msg_consumed(void *data __unused)
unlock(&occ_lock);
}
+static inline u8 get_cpu_throttle(struct proc_chip *chip)
+{
+ struct occ_pstate_table *pdata = get_occ_pstate_table(chip);
+ struct occ_dynamic_data *data;
+
+ switch (pdata->version) {
+ case 0x01:
+ case 0x02:
+ return pdata->v2.throttle;
+ case 0x90:
+ data = get_occ_dynamic_data(chip);
+ return data->cpu_throttle;
+ default:
+ return 0;
+ };
+}
+
static void occ_throttle_poll(void *data __unused)
{
struct proc_chip *chip;
@@ -555,7 +805,7 @@ static void occ_throttle_poll(void *data __unused)
int inactive = 0;
for_each_chip(chip) {
- occ_data = chip_occ_data(chip);
+ occ_data = get_occ_pstate_table(chip);
if (occ_data->valid != 1) {
inactive = 1;
break;
@@ -578,18 +828,21 @@ static void occ_throttle_poll(void *data __unused)
if (occ_opal_msg_outstanding)
goto done;
for_each_chip(chip) {
- occ_data = chip_occ_data(chip);
+ u8 throttle;
+
+ occ_data = get_occ_pstate_table(chip);
+ throttle = get_cpu_throttle(chip);
if ((occ_data->valid == 1) &&
- (chip->throttle != occ_data->throttle) &&
- (occ_data->throttle <= OCC_MAX_THROTTLE_STATUS)) {
+ (chip->throttle != throttle) &&
+ (throttle <= OCC_MAX_THROTTLE_STATUS)) {
occ_msg.type = cpu_to_be64(OCC_THROTTLE);
occ_msg.chip = cpu_to_be64(chip->id);
- occ_msg.throttle_status = cpu_to_be64(occ_data->throttle);
+ occ_msg.throttle_status = cpu_to_be64(throttle);
rc = _opal_queue_msg(OPAL_MSG_OCC, NULL,
occ_msg_consumed,
3, (uint64_t *)&occ_msg);
if (!rc) {
- chip->throttle = occ_data->throttle;
+ chip->throttle = throttle;
occ_opal_msg_outstanding = true;
break;
}
@@ -601,16 +854,16 @@ done:
}
/* CPU-OCC PState init */
-/* Called after OCC init on P8 */
+/* Called after OCC init on P8 and P9 */
void occ_pstates_init(void)
{
struct proc_chip *chip;
struct cpu_thread *c;
- s8 pstate_nom;
+ int pstate_nom;
static bool occ_pstates_initialized;
- /* OCC is P8 only */
- if (proc_gen != proc_gen_p8)
+ /* OCC is supported in P8 and P9 */
+ if (proc_gen < proc_gen_p8)
return;
/* Handle fast reboots */
if (occ_pstates_initialized)
@@ -619,9 +872,11 @@ void occ_pstates_init(void)
switch (proc_gen) {
case proc_gen_p8:
cmp_pstates = cmp_negative_pstates;
+ homer_opal_data_offset = P8_HOMER_OPAL_DATA_OFFSET;
break;
case proc_gen_p9:
cmp_pstates = cmp_positive_pstates;
+ homer_opal_data_offset = P9_HOMER_OPAL_DATA_OFFSET;
break;
default:
return;
@@ -862,7 +1117,7 @@ int occ_msg_queue_occ_reset(void)
goto out;
}
/*
- * Set 'valid' byte of chip_occ_data to 0 since OCC
+ * Set 'valid' byte of occ_pstate_table to 0 since OCC
* may not clear this byte on a reset.
* OCC will set the 'valid' byte to 1 when it becomes
* active again.
@@ -870,7 +1125,7 @@ int occ_msg_queue_occ_reset(void)
for_each_chip(chip) {
struct occ_pstate_table *occ_data;
- occ_data = chip_occ_data(chip);
+ occ_data = get_occ_pstate_table(chip);
occ_data->valid = 0;
chip->throttle = 0;
}