diff mbox series

[2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K

Message ID 20190430133615.25721-3-rasmus.villemoes@prevas.dk (mailing list archive)
State Not Applicable
Headers show
Series soc/fsl/qe: cleanups and new DT binding | expand

Commit Message

Rasmus Villemoes April 30, 2019, 1:36 p.m. UTC
The current array of struct qe_snum use 256*4 bytes for just keeping
track of the free/used state of each index, and the struct layout
means there's another 768 bytes of padding. If we just unzip that
structure, the array of snum values just use 256 bytes, while the
free/inuse state can be tracked in a 32 byte bitmap.

So this reduces the .data footprint by 1760 bytes. It also serves as
preparation for introducing another DT binding for specifying the snum
values.

Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
---
 drivers/soc/fsl/qe/qe.c | 37 ++++++++++++-------------------------
 1 file changed, 12 insertions(+), 25 deletions(-)

Comments

Christophe Leroy April 30, 2019, 5:12 p.m. UTC | #1
Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :
> The current array of struct qe_snum use 256*4 bytes for just keeping
> track of the free/used state of each index, and the struct layout
> means there's another 768 bytes of padding. If we just unzip that
> structure, the array of snum values just use 256 bytes, while the
> free/inuse state can be tracked in a 32 byte bitmap.
> 
> So this reduces the .data footprint by 1760 bytes. It also serves as
> preparation for introducing another DT binding for specifying the snum
> values.
> 
> Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
> ---
>   drivers/soc/fsl/qe/qe.c | 37 ++++++++++++-------------------------
>   1 file changed, 12 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
> index 855373deb746..d0393f83145c 100644
> --- a/drivers/soc/fsl/qe/qe.c
> +++ b/drivers/soc/fsl/qe/qe.c
> @@ -14,6 +14,7 @@
>    * Free Software Foundation;  either version 2 of the  License, or (at your
>    * option) any later version.
>    */
> +#include <linux/bitmap.h>
>   #include <linux/errno.h>
>   #include <linux/sched.h>
>   #include <linux/kernel.h>
> @@ -43,25 +44,14 @@ static DEFINE_SPINLOCK(qe_lock);
>   DEFINE_SPINLOCK(cmxgcr_lock);
>   EXPORT_SYMBOL(cmxgcr_lock);
>   
> -/* QE snum state */
> -enum qe_snum_state {
> -	QE_SNUM_STATE_USED,
> -	QE_SNUM_STATE_FREE
> -};
> -
> -/* QE snum */
> -struct qe_snum {
> -	u8 num;
> -	enum qe_snum_state state;
> -};
> -
>   /* We allocate this here because it is used almost exclusively for
>    * the communication processor devices.
>    */
>   struct qe_immap __iomem *qe_immr;
>   EXPORT_SYMBOL(qe_immr);
>   
> -static struct qe_snum snums[QE_NUM_OF_SNUM];	/* Dynamically allocated SNUMs */
> +static u8 snums[QE_NUM_OF_SNUM];	/* Dynamically allocated SNUMs */
> +static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
>   static unsigned int qe_num_of_snum;
>   
>   static phys_addr_t qebase = -1;
> @@ -308,6 +298,7 @@ static void qe_snums_init(void)
>   	};
>   	const u8 *snum_init;
>   
> +	bitmap_zero(snum_state, QE_NUM_OF_SNUM);

Doesn't make much importance, but wouldn't it be more logical to add 
this line where the setting of .state = QE_SNUM_STATE_FREE was done 
previously, ie around the for() loop below ?

>   	qe_num_of_snum = qe_get_num_of_snums();
>   
>   	if (qe_num_of_snum == 76)
> @@ -315,10 +306,8 @@ static void qe_snums_init(void)
>   	else
>   		snum_init = snum_init_46;
>   
> -	for (i = 0; i < qe_num_of_snum; i++) {
> -		snums[i].num = snum_init[i];
> -		snums[i].state = QE_SNUM_STATE_FREE;
> -	}
> +	for (i = 0; i < qe_num_of_snum; i++)
> +		snums[i] = snum_init[i];

Could use memcpy() instead ?

>   }
>   
>   int qe_get_snum(void)
> @@ -328,12 +317,10 @@ int qe_get_snum(void)
>   	int i;
>   
>   	spin_lock_irqsave(&qe_lock, flags);
> -	for (i = 0; i < qe_num_of_snum; i++) {
> -		if (snums[i].state == QE_SNUM_STATE_FREE) {
> -			snums[i].state = QE_SNUM_STATE_USED;
> -			snum = snums[i].num;
> -			break;
> -		}
> +	i = find_first_zero_bit(snum_state, qe_num_of_snum);
> +	if (i < qe_num_of_snum) {
> +		set_bit(i, snum_state);
> +		snum = snums[i];
>   	}
>   	spin_unlock_irqrestore(&qe_lock, flags);
>   
> @@ -346,8 +333,8 @@ void qe_put_snum(u8 snum)
>   	int i;
>   
>   	for (i = 0; i < qe_num_of_snum; i++) {
> -		if (snums[i].num == snum) {
> -			snums[i].state = QE_SNUM_STATE_FREE;
> +		if (snums[i] == snum) {
> +			clear_bit(i, snum_state);
>   			break;
>   		}
>   	}

Can we replace this loop by memchr() ?

Christophe
Rasmus Villemoes May 1, 2019, 5:51 a.m. UTC | #2
On 30/04/2019 19.12, Christophe Leroy wrote:
>  
> Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :
>> The current array of struct qe_snum use 256*4 bytes for just keeping
>> track of the free/used state of each index, and the struct layout
>> means there's another 768 bytes of padding. If we just unzip that
>> structure, the array of snum values just use 256 bytes, while the
>> free/inuse state can be tracked in a 32 byte bitmap.
>>
>> So this reduces the .data footprint by 1760 bytes. It also serves as
>> preparation for introducing another DT binding for specifying the snum
>> values.
>>
>> Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
>> ---
>> -
>>   /* We allocate this here because it is used almost exclusively for
>>    * the communication processor devices.
>>    */
>>   struct qe_immap __iomem *qe_immr;
>>   EXPORT_SYMBOL(qe_immr);
>>   -static struct qe_snum snums[QE_NUM_OF_SNUM];    /* Dynamically
>> allocated SNUMs */
>> +static u8 snums[QE_NUM_OF_SNUM];    /* Dynamically allocated SNUMs */
>> +static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
>>   static unsigned int qe_num_of_snum;
>>     static phys_addr_t qebase = -1;
>> @@ -308,6 +298,7 @@ static void qe_snums_init(void)
>>       };
>>       const u8 *snum_init;
>>   +    bitmap_zero(snum_state, QE_NUM_OF_SNUM);
> 
> Doesn't make much importance, but wouldn't it be more logical to add
> this line where the setting of .state = QE_SNUM_STATE_FREE was done
> previously, ie around the for() loop below ?

This was on purpose, to avoid having to move it up in patch 4, where we
don't necessarily reach the for loop.

>>       qe_num_of_snum = qe_get_num_of_snums();
>>         if (qe_num_of_snum == 76)
>> @@ -315,10 +306,8 @@ static void qe_snums_init(void)
>>       else
>>           snum_init = snum_init_46;
>>   -    for (i = 0; i < qe_num_of_snum; i++) {
>> -        snums[i].num = snum_init[i];
>> -        snums[i].state = QE_SNUM_STATE_FREE;
>> -    }
>> +    for (i = 0; i < qe_num_of_snum; i++)
>> +        snums[i] = snum_init[i];
> 
> Could use memcpy() instead ?

Yes, I switch to that in 5/5. Sure, I could do it here already, but I
did it this way to keep close to the current style. I don't care either
way, so if you prefer introducing memcpy here, fine by me.


>>       spin_unlock_irqrestore(&qe_lock, flags);
>>   @@ -346,8 +333,8 @@ void qe_put_snum(u8 snum)
>>       int i;
>>         for (i = 0; i < qe_num_of_snum; i++) {
>> -        if (snums[i].num == snum) {
>> -            snums[i].state = QE_SNUM_STATE_FREE;
>> +        if (snums[i] == snum) {
>> +            clear_bit(i, snum_state);
>>               break;
>>           }
>>       }
> 
> Can we replace this loop by memchr() ?

Hm, yes. So that would be

  const u8 *p = memchr(snums, snum, qe_num_of_snum)
  if (p)
    clear_bit(p - snums, snum_state);

I guess. Let me fold that in and see how it looks.

Thanks,
Rasmus
diff mbox series

Patch

diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 855373deb746..d0393f83145c 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -14,6 +14,7 @@ 
  * Free Software Foundation;  either version 2 of the  License, or (at your
  * option) any later version.
  */
+#include <linux/bitmap.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -43,25 +44,14 @@  static DEFINE_SPINLOCK(qe_lock);
 DEFINE_SPINLOCK(cmxgcr_lock);
 EXPORT_SYMBOL(cmxgcr_lock);
 
-/* QE snum state */
-enum qe_snum_state {
-	QE_SNUM_STATE_USED,
-	QE_SNUM_STATE_FREE
-};
-
-/* QE snum */
-struct qe_snum {
-	u8 num;
-	enum qe_snum_state state;
-};
-
 /* We allocate this here because it is used almost exclusively for
  * the communication processor devices.
  */
 struct qe_immap __iomem *qe_immr;
 EXPORT_SYMBOL(qe_immr);
 
-static struct qe_snum snums[QE_NUM_OF_SNUM];	/* Dynamically allocated SNUMs */
+static u8 snums[QE_NUM_OF_SNUM];	/* Dynamically allocated SNUMs */
+static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
 static unsigned int qe_num_of_snum;
 
 static phys_addr_t qebase = -1;
@@ -308,6 +298,7 @@  static void qe_snums_init(void)
 	};
 	const u8 *snum_init;
 
+	bitmap_zero(snum_state, QE_NUM_OF_SNUM);
 	qe_num_of_snum = qe_get_num_of_snums();
 
 	if (qe_num_of_snum == 76)
@@ -315,10 +306,8 @@  static void qe_snums_init(void)
 	else
 		snum_init = snum_init_46;
 
-	for (i = 0; i < qe_num_of_snum; i++) {
-		snums[i].num = snum_init[i];
-		snums[i].state = QE_SNUM_STATE_FREE;
-	}
+	for (i = 0; i < qe_num_of_snum; i++)
+		snums[i] = snum_init[i];
 }
 
 int qe_get_snum(void)
@@ -328,12 +317,10 @@  int qe_get_snum(void)
 	int i;
 
 	spin_lock_irqsave(&qe_lock, flags);
-	for (i = 0; i < qe_num_of_snum; i++) {
-		if (snums[i].state == QE_SNUM_STATE_FREE) {
-			snums[i].state = QE_SNUM_STATE_USED;
-			snum = snums[i].num;
-			break;
-		}
+	i = find_first_zero_bit(snum_state, qe_num_of_snum);
+	if (i < qe_num_of_snum) {
+		set_bit(i, snum_state);
+		snum = snums[i];
 	}
 	spin_unlock_irqrestore(&qe_lock, flags);
 
@@ -346,8 +333,8 @@  void qe_put_snum(u8 snum)
 	int i;
 
 	for (i = 0; i < qe_num_of_snum; i++) {
-		if (snums[i].num == snum) {
-			snums[i].state = QE_SNUM_STATE_FREE;
+		if (snums[i] == snum) {
+			clear_bit(i, snum_state);
 			break;
 		}
 	}