Message ID | SA1PR11MB67600F56B3AE6348E7D6FEBFF5FBA@SA1PR11MB6760.namprd11.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | cpu-throttle: Fix vcpu missed throttle work | expand |
Hi pbonzini: please take some to review this patch. It fixes autoconverge migration issue for heavy memory dirty pages. Any comment will be welcome, Thx. On 2023/9/18 11:29, alloc.young@outlook.com wrote: > From: alloc <yangcg26@midea.com> > > During migrations, vcpu may run longer than 10ms and not exit > on time. If the vcpu runs over 20ms, then it'll miss a throttle > kick and will run the whole tick. When this happens and vcpu > dirties pages fast, the migration will take long time or event > not enable to auto converge. To fix this issue, take overrun > vcpu time into account and adjust the whole sleep time. > > Signed-off-by: yangchunguang <yangcg26@midea.com> > --- > include/hw/core/cpu.h | 5 ++++ > softmmu/cpu-throttle.c | 58 +++++++++++++++++++++++++++++++++++++----- > 2 files changed, 56 insertions(+), 7 deletions(-) > > diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h > index 92a4234439..0b3cc3e81e 100644 > --- a/include/hw/core/cpu.h > +++ b/include/hw/core/cpu.h > @@ -430,6 +430,11 @@ struct CPUState { > */ > bool throttle_thread_scheduled; > > + /* Used to keep last cpu throttle tick > + * > + */ > + int64_t throttle_last_tick; > + > /* > * Sleep throttle_us_per_full microseconds once dirty ring is full > * if dirty page rate limit is enabled. > diff --git a/softmmu/cpu-throttle.c b/softmmu/cpu-throttle.c > index d9bb30a223..bdec8dc954 100644 > --- a/softmmu/cpu-throttle.c > +++ b/softmmu/cpu-throttle.c > @@ -36,22 +36,66 @@ static unsigned int throttle_percentage; > #define CPU_THROTTLE_PCT_MIN 1 > #define CPU_THROTTLE_PCT_MAX 99 > #define CPU_THROTTLE_TIMESLICE_NS 10000000 > +#define CPU_THROTTLE_RUN_MIN_NS (CPU_THROTTLE_TIMESLICE_NS / 100) > > static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque) > { > double pct; > double throttle_ratio; > - int64_t sleeptime_ns, endtime_ns; > + int64_t sleeptime_ns, endtime_ns, now, overrun_ns; > > if (!cpu_throttle_get_percentage()) { > return; > } > > + now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); > pct = (double)cpu_throttle_get_percentage() / 100; > throttle_ratio = pct / (1 - pct); > - /* Add 1ns to fix double's rounding error (like 0.9999999...) */ > - sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1); > - endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns; > + overrun_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) - cpu->throttle_last_tick; > + /* If vcpu runs longer than 20ms, then the vcpu will miss next throttle tick and > + * will run almost the full tick frame. When this happens and vcpu runs fast dirty > + * pages, migration may take long time or can't converge at all. > + * > + * Example of guest run longer than 30ms when cpu throttle is 99% > + * > + * guest run(x) throttle tick(*) guest sleep(+) > + * > + * +++++...+++++x xx+++++...++++++xxxxx...xxxxxx vcpu > + * > + * ----------*----...------*------...-----*------...----*---------- timeframe > + * > + */ > + if (overrun_ns > (CPU_THROTTLE_TIMESLICE_NS - CPU_THROTTLE_RUN_MIN_NS)) { > + int64_t timeframe = CPU_THROTTLE_TIMESLICE_NS / (1 - pct) + 1; > + int64_t new_ns = overrun_ns / (1 - pct) + 1; > + int frames; > + int64_t adj, remainder; > + > + frames = overrun_ns / CPU_THROTTLE_TIMESLICE_NS; > + sleeptime_ns = overrun_ns * throttle_ratio + 1; > + remainder = new_ns - frames * timeframe; > + if (remainder > 0) { > + int64_t left_ns = timeframe - remainder; > + int64_t left_run = (1 - pct) * left_ns; > + > + adj = left_run < CPU_THROTTLE_RUN_MIN_NS ? CPU_THROTTLE_RUN_MIN_NS - left_run : 0; > + sleeptime_ns += left_ns * pct; > + } else > + adj = CPU_THROTTLE_RUN_MIN_NS; > + > + /* Limit max vcpu sleep time to avoid guest hang, > + * max sleep time is 10s when cpu throttle is 99% > + */ > + if (sleeptime_ns > 10 * timeframe) { > + adj = remainder + CPU_THROTTLE_RUN_MIN_NS; > + sleeptime_ns = 10 * timeframe; > + } > + sleeptime_ns -= adj; > + } else > + /* Add 1ns to fix double's rounding error (like 0.9999999...) */ > + sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1); > + > + endtime_ns = now + sleeptime_ns; > while (sleeptime_ns > 0 && !cpu->stop) { > if (sleeptime_ns > SCALE_MS) { > qemu_cond_timedwait_iothread(cpu->halt_cond, > @@ -70,6 +114,7 @@ static void cpu_throttle_timer_tick(void *opaque) > { > CPUState *cpu; > double pct; > + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); > > /* Stop the timer if needed */ > if (!cpu_throttle_get_percentage()) { > @@ -77,14 +122,13 @@ static void cpu_throttle_timer_tick(void *opaque) > } > CPU_FOREACH(cpu) { > if (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1)) { > + cpu->throttle_last_tick = now; > async_run_on_cpu(cpu, cpu_throttle_thread, > RUN_ON_CPU_NULL); > } > } > - > pct = (double)cpu_throttle_get_percentage() / 100; > - timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + > - CPU_THROTTLE_TIMESLICE_NS / (1 - pct)); > + timer_mod(throttle_timer, now + CPU_THROTTLE_TIMESLICE_NS / (1 - pct)); > } > > void cpu_throttle_set(int new_throttle_pct)
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 92a4234439..0b3cc3e81e 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -430,6 +430,11 @@ struct CPUState { */ bool throttle_thread_scheduled; + /* Used to keep last cpu throttle tick + * + */ + int64_t throttle_last_tick; + /* * Sleep throttle_us_per_full microseconds once dirty ring is full * if dirty page rate limit is enabled. diff --git a/softmmu/cpu-throttle.c b/softmmu/cpu-throttle.c index d9bb30a223..bdec8dc954 100644 --- a/softmmu/cpu-throttle.c +++ b/softmmu/cpu-throttle.c @@ -36,22 +36,66 @@ static unsigned int throttle_percentage; #define CPU_THROTTLE_PCT_MIN 1 #define CPU_THROTTLE_PCT_MAX 99 #define CPU_THROTTLE_TIMESLICE_NS 10000000 +#define CPU_THROTTLE_RUN_MIN_NS (CPU_THROTTLE_TIMESLICE_NS / 100) static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque) { double pct; double throttle_ratio; - int64_t sleeptime_ns, endtime_ns; + int64_t sleeptime_ns, endtime_ns, now, overrun_ns; if (!cpu_throttle_get_percentage()) { return; } + now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); pct = (double)cpu_throttle_get_percentage() / 100; throttle_ratio = pct / (1 - pct); - /* Add 1ns to fix double's rounding error (like 0.9999999...) */ - sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1); - endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns; + overrun_ns = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) - cpu->throttle_last_tick; + /* If vcpu runs longer than 20ms, then the vcpu will miss next throttle tick and + * will run almost the full tick frame. When this happens and vcpu runs fast dirty + * pages, migration may take long time or can't converge at all. + * + * Example of guest run longer than 30ms when cpu throttle is 99% + * + * guest run(x) throttle tick(*) guest sleep(+) + * + * +++++...+++++x xx+++++...++++++xxxxx...xxxxxx vcpu + * + * ----------*----...------*------...-----*------...----*---------- timeframe + * + */ + if (overrun_ns > (CPU_THROTTLE_TIMESLICE_NS - CPU_THROTTLE_RUN_MIN_NS)) { + int64_t timeframe = CPU_THROTTLE_TIMESLICE_NS / (1 - pct) + 1; + int64_t new_ns = overrun_ns / (1 - pct) + 1; + int frames; + int64_t adj, remainder; + + frames = overrun_ns / CPU_THROTTLE_TIMESLICE_NS; + sleeptime_ns = overrun_ns * throttle_ratio + 1; + remainder = new_ns - frames * timeframe; + if (remainder > 0) { + int64_t left_ns = timeframe - remainder; + int64_t left_run = (1 - pct) * left_ns; + + adj = left_run < CPU_THROTTLE_RUN_MIN_NS ? CPU_THROTTLE_RUN_MIN_NS - left_run : 0; + sleeptime_ns += left_ns * pct; + } else + adj = CPU_THROTTLE_RUN_MIN_NS; + + /* Limit max vcpu sleep time to avoid guest hang, + * max sleep time is 10s when cpu throttle is 99% + */ + if (sleeptime_ns > 10 * timeframe) { + adj = remainder + CPU_THROTTLE_RUN_MIN_NS; + sleeptime_ns = 10 * timeframe; + } + sleeptime_ns -= adj; + } else + /* Add 1ns to fix double's rounding error (like 0.9999999...) */ + sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1); + + endtime_ns = now + sleeptime_ns; while (sleeptime_ns > 0 && !cpu->stop) { if (sleeptime_ns > SCALE_MS) { qemu_cond_timedwait_iothread(cpu->halt_cond, @@ -70,6 +114,7 @@ static void cpu_throttle_timer_tick(void *opaque) { CPUState *cpu; double pct; + int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); /* Stop the timer if needed */ if (!cpu_throttle_get_percentage()) { @@ -77,14 +122,13 @@ static void cpu_throttle_timer_tick(void *opaque) } CPU_FOREACH(cpu) { if (!qatomic_xchg(&cpu->throttle_thread_scheduled, 1)) { + cpu->throttle_last_tick = now; async_run_on_cpu(cpu, cpu_throttle_thread, RUN_ON_CPU_NULL); } } - pct = (double)cpu_throttle_get_percentage() / 100; - timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + - CPU_THROTTLE_TIMESLICE_NS / (1 - pct)); + timer_mod(throttle_timer, now + CPU_THROTTLE_TIMESLICE_NS / (1 - pct)); } void cpu_throttle_set(int new_throttle_pct)