Message ID | 1348000626-16129-1-git-send-email-aliguori@us.ibm.com |
---|---|
State | New |
Headers | show |
On Tue, Sep 18, 2012 at 4:37 PM, Anthony Liguori <aliguori@us.ibm.com>wrote: > timerfd is a new(ish) mechanism provided in the 2.6.25+ Linux kernels that > allows for a high resolution event timer signaled through a file > descriptor. > This patch adds an alarm timer implementation using timerfd. > > timerfd has a couple advantages over dynticks. Namely, it's fd based > instead > of signal based which reduces the likelihood of receiving EINTR failure > codes. > > Because we can't process timers in a signal handler, we write to a pipe() > and > read that in the main loop. timerfd allows us to avoid the extra write to > a > pipe. > > Finally, since timerfd is just a file descriptor, it could conceivably > used to > implement per-thread alarm timers. It also would integrate very easily in > a > glib main loop. > > Unfortunately, there's a lot of Windows code in qemu-timer.c and > main-loop.c > right now otherwise the refactoring would be trivial. I'll leave that for > another day. > > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Jan Kiszka <jan.kiszka@siemens.com> > Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> > --- > Please note, this is lightly tested. Since this is such a fundamental > change, > I'd like to do some performance analysis before committing but wanted to > share > early. > --- > configure | 14 +++++++++ > qemu-timer.c | 86 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 100 insertions(+), 0 deletions(-) > > diff --git a/configure b/configure > index 8564142..8a3135b 100755 > --- a/configure > +++ b/configure > @@ -2532,6 +2532,17 @@ if compile_prog "" "" ; then > sync_file_range=yes > fi > > +# check for timerfd > +timerfd="no" > +cat > $TMPC << EOF > +#include <sys/timerfd.h> > +int main(void) { return timerfd_create(CLOCK_REALTIME, 0); } > +EOF > + > +if compile_prog "" "" ; then > + timerfd=yes > +fi > + > # check for linux/fiemap.h and FS_IOC_FIEMAP > fiemap=no > cat > $TMPC << EOF > @@ -3467,6 +3478,9 @@ fi > if test "$signalfd" = "yes" ; then > echo "CONFIG_SIGNALFD=y" >> $config_host_mak > fi > +if test "$timerfd" = "yes" ; then > + echo "CONFIG_TIMERFD=y" >> $config_host_mak > +fi > if test "$tcg_interpreter" = "yes" ; then > echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak > fi > diff --git a/qemu-timer.c b/qemu-timer.c > index c7a1551..6f7fa35 100644 > --- a/qemu-timer.c > +++ b/qemu-timer.c > @@ -35,6 +35,10 @@ > #include <mmsystem.h> > #endif > > +#ifdef CONFIG_TIMERFD > +#include <sys/timerfd.h> > +#endif > + > /***********************************************************/ > /* timers */ > > @@ -66,6 +70,9 @@ struct qemu_alarm_timer { > int (*start)(struct qemu_alarm_timer *t); > void (*stop)(struct qemu_alarm_timer *t); > void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns); > +#ifdef CONFIG_TIMERFD > + int timerfd; > +#endif > #if defined(__linux__) > timer_t timer; > int fd; > @@ -121,6 +128,12 @@ static void qemu_rearm_alarm_timer(struct > qemu_alarm_timer *t) > /* TODO: MIN_TIMER_REARM_NS should be optimized */ > #define MIN_TIMER_REARM_NS 250000 > > +#ifdef CONFIG_TIMERFD > +static int timerfd_start_timer(struct qemu_alarm_timer *t); > +static void timerfd_stop_timer(struct qemu_alarm_timer *t); > +static void timerfd_rearm_timer(struct qemu_alarm_timer *t, int64_t > delta); > +#endif > + > #ifdef _WIN32 > > static int mm_start_timer(struct qemu_alarm_timer *t); > @@ -148,6 +161,10 @@ static void dynticks_rearm_timer(struct > qemu_alarm_timer *t, int64_t delta); > #endif /* _WIN32 */ > > static struct qemu_alarm_timer alarm_timers[] = { > +#ifdef CONFIG_TIMERFD > + {"timerfd", timerfd_start_timer, > + timerfd_stop_timer, timerfd_rearm_timer}, > +#endif > #ifndef _WIN32 > #ifdef __linux__ > {"dynticks", dynticks_start_timer, > @@ -476,6 +493,75 @@ static void host_alarm_handler(int host_signum) > > #include "compatfd.h" > > +static void timerfd_event(void *opaque) > +{ > + struct qemu_alarm_timer *t = opaque; > + ssize_t len; > + uint64_t count; > + > + len = read(t->timerfd, &count, sizeof(count)); > + g_assert(len == sizeof(count)); > + > + t->expired = true; > + t->pending = true; > + > + /* We already process pending timers at the end of the main loop > whereas > + * this function is called during I/O processing. That means we know > that > + * pending timers will be checked before select()'ing again which > means we > + * don't need to explicitly call qemu_notify_event() > + */ > +} > + > +static int timerfd_start_timer(struct qemu_alarm_timer *t) > +{ > + t->timerfd = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC); > + if (t->timerfd == -1) { > + return -errno; > + } > + > + qemu_set_fd_handler(t->timerfd, timerfd_event, NULL, t); > + > + return 0; > +} > + > +static void timerfd_stop_timer(struct qemu_alarm_timer *t) > +{ > + qemu_set_fd_handler(t->timerfd, NULL, NULL, NULL); > + close(t->timerfd); > + t->timerfd = -1; > +} > + > +static void timerfd_rearm_timer(struct qemu_alarm_timer *t, > + int64_t nearest_delta_ns) > +{ > + int ret; > + struct itimerspec timeout; > + int64_t current_ns; > + > + if (nearest_delta_ns < MIN_TIMER_REARM_NS) { > + nearest_delta_ns = MIN_TIMER_REARM_NS; > + } > It has been a few months, but IIRC, this pattern can lead to spurious timeouts if these methods are not used correctly. If folks are interested, I can spend some time to dig up my past work on this to explain more. > + /* check whether a timer is already running */ > + ret = timerfd_gettime(t->timerfd, &timeout); > + g_assert(ret == 0); > + > + current_ns = timeout.it_value.tv_sec * 1000000000LL; > + current_ns += timeout.it_value.tv_nsec; > + > + if (current_ns && current_ns <= nearest_delta_ns) { > + return; > + } > + > + timeout.it_interval.tv_sec = 0; > + timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */ > + timeout.it_value.tv_sec = nearest_delta_ns / 1000000000; > + timeout.it_value.tv_nsec = nearest_delta_ns % 1000000000; > + > + ret = timerfd_settime(t->timerfd, 0, &timeout, NULL); > + g_assert(ret == 0); > +} > + > static int dynticks_start_timer(struct qemu_alarm_timer *t) > { > struct sigevent ev; > -- > 1.7.5.4 > > >
diff --git a/configure b/configure index 8564142..8a3135b 100755 --- a/configure +++ b/configure @@ -2532,6 +2532,17 @@ if compile_prog "" "" ; then sync_file_range=yes fi +# check for timerfd +timerfd="no" +cat > $TMPC << EOF +#include <sys/timerfd.h> +int main(void) { return timerfd_create(CLOCK_REALTIME, 0); } +EOF + +if compile_prog "" "" ; then + timerfd=yes +fi + # check for linux/fiemap.h and FS_IOC_FIEMAP fiemap=no cat > $TMPC << EOF @@ -3467,6 +3478,9 @@ fi if test "$signalfd" = "yes" ; then echo "CONFIG_SIGNALFD=y" >> $config_host_mak fi +if test "$timerfd" = "yes" ; then + echo "CONFIG_TIMERFD=y" >> $config_host_mak +fi if test "$tcg_interpreter" = "yes" ; then echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak fi diff --git a/qemu-timer.c b/qemu-timer.c index c7a1551..6f7fa35 100644 --- a/qemu-timer.c +++ b/qemu-timer.c @@ -35,6 +35,10 @@ #include <mmsystem.h> #endif +#ifdef CONFIG_TIMERFD +#include <sys/timerfd.h> +#endif + /***********************************************************/ /* timers */ @@ -66,6 +70,9 @@ struct qemu_alarm_timer { int (*start)(struct qemu_alarm_timer *t); void (*stop)(struct qemu_alarm_timer *t); void (*rearm)(struct qemu_alarm_timer *t, int64_t nearest_delta_ns); +#ifdef CONFIG_TIMERFD + int timerfd; +#endif #if defined(__linux__) timer_t timer; int fd; @@ -121,6 +128,12 @@ static void qemu_rearm_alarm_timer(struct qemu_alarm_timer *t) /* TODO: MIN_TIMER_REARM_NS should be optimized */ #define MIN_TIMER_REARM_NS 250000 +#ifdef CONFIG_TIMERFD +static int timerfd_start_timer(struct qemu_alarm_timer *t); +static void timerfd_stop_timer(struct qemu_alarm_timer *t); +static void timerfd_rearm_timer(struct qemu_alarm_timer *t, int64_t delta); +#endif + #ifdef _WIN32 static int mm_start_timer(struct qemu_alarm_timer *t); @@ -148,6 +161,10 @@ static void dynticks_rearm_timer(struct qemu_alarm_timer *t, int64_t delta); #endif /* _WIN32 */ static struct qemu_alarm_timer alarm_timers[] = { +#ifdef CONFIG_TIMERFD + {"timerfd", timerfd_start_timer, + timerfd_stop_timer, timerfd_rearm_timer}, +#endif #ifndef _WIN32 #ifdef __linux__ {"dynticks", dynticks_start_timer, @@ -476,6 +493,75 @@ static void host_alarm_handler(int host_signum) #include "compatfd.h" +static void timerfd_event(void *opaque) +{ + struct qemu_alarm_timer *t = opaque; + ssize_t len; + uint64_t count; + + len = read(t->timerfd, &count, sizeof(count)); + g_assert(len == sizeof(count)); + + t->expired = true; + t->pending = true; + + /* We already process pending timers at the end of the main loop whereas + * this function is called during I/O processing. That means we know that + * pending timers will be checked before select()'ing again which means we + * don't need to explicitly call qemu_notify_event() + */ +} + +static int timerfd_start_timer(struct qemu_alarm_timer *t) +{ + t->timerfd = timerfd_create(CLOCK_REALTIME, TFD_CLOEXEC); + if (t->timerfd == -1) { + return -errno; + } + + qemu_set_fd_handler(t->timerfd, timerfd_event, NULL, t); + + return 0; +} + +static void timerfd_stop_timer(struct qemu_alarm_timer *t) +{ + qemu_set_fd_handler(t->timerfd, NULL, NULL, NULL); + close(t->timerfd); + t->timerfd = -1; +} + +static void timerfd_rearm_timer(struct qemu_alarm_timer *t, + int64_t nearest_delta_ns) +{ + int ret; + struct itimerspec timeout; + int64_t current_ns; + + if (nearest_delta_ns < MIN_TIMER_REARM_NS) { + nearest_delta_ns = MIN_TIMER_REARM_NS; + } + + /* check whether a timer is already running */ + ret = timerfd_gettime(t->timerfd, &timeout); + g_assert(ret == 0); + + current_ns = timeout.it_value.tv_sec * 1000000000LL; + current_ns += timeout.it_value.tv_nsec; + + if (current_ns && current_ns <= nearest_delta_ns) { + return; + } + + timeout.it_interval.tv_sec = 0; + timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */ + timeout.it_value.tv_sec = nearest_delta_ns / 1000000000; + timeout.it_value.tv_nsec = nearest_delta_ns % 1000000000; + + ret = timerfd_settime(t->timerfd, 0, &timeout, NULL); + g_assert(ret == 0); +} + static int dynticks_start_timer(struct qemu_alarm_timer *t) { struct sigevent ev;
timerfd is a new(ish) mechanism provided in the 2.6.25+ Linux kernels that allows for a high resolution event timer signaled through a file descriptor. This patch adds an alarm timer implementation using timerfd. timerfd has a couple advantages over dynticks. Namely, it's fd based instead of signal based which reduces the likelihood of receiving EINTR failure codes. Because we can't process timers in a signal handler, we write to a pipe() and read that in the main loop. timerfd allows us to avoid the extra write to a pipe. Finally, since timerfd is just a file descriptor, it could conceivably used to implement per-thread alarm timers. It also would integrate very easily in a glib main loop. Unfortunately, there's a lot of Windows code in qemu-timer.c and main-loop.c right now otherwise the refactoring would be trivial. I'll leave that for another day. Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Jan Kiszka <jan.kiszka@siemens.com> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> --- Please note, this is lightly tested. Since this is such a fundamental change, I'd like to do some performance analysis before committing but wanted to share early. --- configure | 14 +++++++++ qemu-timer.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 0 deletions(-)