@@ -42,7 +42,23 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
- while (size & ~(BITS_PER_LONG-1)) {
+ while (size >= 4*BITS_PER_LONG) {
+ unsigned long d1, d2, d3;
+ tmp = *p;
+ d1 = *(p+1);
+ d2 = *(p+2);
+ d3 = *(p+3);
+ if (tmp) {
+ goto found_middle;
+ }
+ if (d1 | d2 | d3) {
+ break;
+ }
+ p += 4;
+ result += 4*BITS_PER_LONG;
+ size -= 4*BITS_PER_LONG;
+ }
+ while (size >= BITS_PER_LONG) {
if ((tmp = *(p++))) {
goto found_middle;
}
this patch adopts the loop unrolling idea of bitmap_is_zero() to speed up the skipping of large areas with zeros in find_next_bit(). this routine is extensively used to find dirty pages in live migration. testing only the find_next_bit performance on a zeroed bitfield the loop onrolling decreased executing time by approx. 50% on x86_64. Signed-off-by: Peter Lieven <pl@kamp.de> --- util/bitops.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-)