diff mbox series

[v0,12/12] mlxsw: core: Add ports temperature measurement to thermal algorithm

Message ID 1529594915-20741-3-git-send-email-vadimp@mellanox.com
State Deferred, archived
Delegated to: David Miller
Headers show
Series mlxsw thermal monitoring amendments | expand

Commit Message

Vadim Pasternak June 21, 2018, 3:28 p.m. UTC
Ports temperature has most significant impact on system thermal state
and should be considered by the thermal algorithm. The thermal zone
temperature is extended for reading ports temperatures along with a
chip temperature. The temperature value, provided to the core thermal
algorithm will be accumulated value of a chip and ports temperature
sensing, normalized according to the basic constant thresholds.

Signed-off-by: Vadim Pasternak <vadimp@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 66 ++++++++++++++++++++--
 1 file changed, 62 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 65962ed..23d6197 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -109,6 +109,8 @@  struct mlxsw_thermal {
 	u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1];
 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
 	enum thermal_device_mode mode;
+	int count;
+	int *ports_temp_cache;
 };
 
 static inline u8 mlxsw_state_to_duty(int state)
@@ -213,10 +215,11 @@  static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev,
 	return 0;
 }
 
-static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
-				  int *p_temp)
+static int mlxsw_thermal_init_temp(struct mlxsw_thermal *thermal,
+				   struct mlxsw_env_temp_thresh *delta,
+				   struct mlxsw_env_temp_multi *multi,
+				   int *p_temp, bool *p_crit)
 {
-	struct mlxsw_thermal *thermal = tzdev->devdata;
 	struct device *dev = thermal->bus_info->dev;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
 	unsigned int temp;
@@ -231,10 +234,58 @@  static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
 	}
 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
 
-	*p_temp = (int) temp;
+	if (temp >= MLXSW_ENV_TEMP_CRIT) {
+		*p_crit = true;
+	} else if (temp < MLXSW_ENV_TEMP_NORM) {
+		multi->thresh.normal = temp;
+		delta->normal = MLXSW_ENV_TEMP_NORM - temp;
+	} else if (temp >= MLXSW_ENV_TEMP_HOT) {
+		multi->thresh.crit = temp;
+		delta->crit = temp - MLXSW_ENV_TEMP_HOT;
+		multi->mask |= MLXSW_ENV_CRIT_MASK;
+	} else {
+		multi->thresh.hot = temp;
+		delta->hot = temp - MLXSW_ENV_TEMP_NORM;
+		multi->mask |= MLXSW_ENV_HOT_MASK;
+	}
+	*p_temp = temp;
+
 	return 0;
 }
 
+static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
+				  int *p_temp)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+	struct device *dev = thermal->bus_info->dev;
+	struct mlxsw_env_temp_multi multi;
+	struct mlxsw_env_temp_thresh delta;
+	bool crit = false;
+	int err;
+
+	memset(&multi, 0, sizeof(struct mlxsw_env_temp_multi));
+	memset(&delta, 0, sizeof(struct mlxsw_env_temp_thresh));
+	/* Read ASIC temperature */
+	err = mlxsw_thermal_init_temp(thermal, &delta, &multi,
+				      p_temp, &crit);
+	if (err) {
+		dev_err(dev, "Failed to query ASIC temp sensor\n");
+		return err;
+	}
+
+	/* No need to proceed ports temperature reading, since ASIC temperature
+	 * should be resulted in system shutdown.
+	 */
+	if (crit)
+		return 0;
+
+	/* Collect ports temperature */
+	return mlxsw_env_collect_port_temp(thermal->core,
+					   thermal->ports_temp_cache,
+					   thermal->count, &multi, &delta,
+					   NULL, p_temp);
+}
+
 static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev,
 				       int trip,
 				       enum thermal_trip_type *p_type)
@@ -436,6 +487,7 @@  int mlxsw_thermal_init(struct mlxsw_core *core,
 		       const struct mlxsw_bus_info *bus_info,
 		       struct mlxsw_thermal **p_thermal)
 {
+	unsigned int max_ports = mlxsw_core_max_ports(core);
 	char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 };
 	enum mlxsw_reg_mfcr_pwm_frequency freq;
 	struct device *dev = bus_info->dev;
@@ -452,6 +504,12 @@  int mlxsw_thermal_init(struct mlxsw_core *core,
 	thermal->core = core;
 	thermal->bus_info = bus_info;
 	memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
+	thermal->ports_temp_cache = devm_kmalloc_array(dev, max_ports,
+						       sizeof(int),
+						       GFP_KERNEL);
+	if (!thermal->ports_temp_cache)
+		return -ENOMEM;
+	thermal->count = max_ports;
 
 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
 	if (err) {