@@ -16,12 +16,15 @@
#include "cpu-defs.h"
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+/* Timeout for synchronous stats requests (in seconds) */
+#define QEMU_BALLOON_SYNC_TIMEOUT 5
+
+typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target, Monitor *mon);
void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
-void qemu_balloon(ram_addr_t target);
+int qemu_balloon(ram_addr_t target);
-ram_addr_t qemu_balloon_status(void);
+int qemu_balloon_status(Monitor *mon);
#endif
@@ -19,6 +19,10 @@
#include "balloon.h"
#include "virtio-balloon.h"
#include "kvm.h"
+#include "monitor.h"
+#include "qlist.h"
+#include "qint.h"
+#include "qstring.h"
#if defined(__linux__)
#include <sys/mman.h>
@@ -27,9 +31,16 @@
typedef struct VirtIOBalloon
{
VirtIODevice vdev;
- VirtQueue *ivq, *dvq;
+ VirtQueue *ivq, *dvq, *svq;
uint32_t num_pages;
uint32_t actual;
+ uint64_t stats[VIRTIO_BALLOON_S_NR];
+ VirtQueueElement stats_vq_elem;
+ size_t stats_vq_offset;
+ QEMUTimer *stats_timer;
+ uint64_t stats_updated;
+ Monitor *stats_mon;
+ bool stats_requested;
} VirtIOBalloon;
static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +57,50 @@ static void balloon_page(void *addr, int deflate)
#endif
}
+/*
+ * reset_stats - Mark all items in the stats array as unset
+ *
+ * This function needs to be called at device intialization and before
+ * before updating to a set of newly-generated stats. This will ensure that no
+ * stale values stick around in case the guest reports a subset of the supported
+ * statistics.
+ */
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+ int i;
+ for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
+ dev->stats_updated = qemu_get_clock(host_clock);
+}
+
+static void stat_put(QDict *dict, const char *label, uint64_t val)
+{
+ if (val != -1)
+ qdict_put(dict, label, qint_from_int(val));
+}
+
+static QObject *get_stats_qobject(VirtIOBalloon *dev)
+{
+ QDict *dict = qdict_new();
+ uint32_t actual = ram_size - (dev->actual << VIRTIO_BALLOON_PFN_SHIFT);
+ uint64_t age;
+
+ stat_put(dict, "actual", actual);
+ stat_put(dict, "mem_swapped_in", dev->stats[VIRTIO_BALLOON_S_SWAP_IN]);
+ stat_put(dict, "mem_swapped_out", dev->stats[VIRTIO_BALLOON_S_SWAP_OUT]);
+ stat_put(dict, "major_page_faults", dev->stats[VIRTIO_BALLOON_S_MAJFLT]);
+ stat_put(dict, "minor_page_faults", dev->stats[VIRTIO_BALLOON_S_MINFLT]);
+ stat_put(dict, "free_mem", dev->stats[VIRTIO_BALLOON_S_MEMFREE]);
+ stat_put(dict, "total_mem", dev->stats[VIRTIO_BALLOON_S_MEMTOT]);
+
+ /* If age is over the timeout threshold, report it */
+ age = (qemu_get_clock(host_clock) - dev->stats_updated) /
+ (get_ticks_per_sec() / 1000);
+ if (age >= QEMU_BALLOON_SYNC_TIMEOUT * 1000)
+ stat_put(dict, "age", age);
+
+ return QOBJECT(dict);
+}
+
/* FIXME: once we do a virtio refactoring, this will get subsumed into common
* code */
static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
@@ -104,6 +159,73 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
}
}
+/*
+ * complete_stats_request - Clean up and report statistics.
+ */
+static void complete_stats_request(VirtIOBalloon *vb)
+{
+ QObject *stats;
+
+ /* Only respond to incoming stats if we requested them */
+ if (!vb->stats_requested) {
+ return;
+ }
+
+ stats = get_stats_qobject(vb);
+ if (!monitor_ctrl_mode(vb->stats_mon)) {
+ qemu_del_timer(vb->stats_timer);
+ monitor_print_balloon(vb->stats_mon, stats);
+ monitor_resume(vb->stats_mon);
+ } else {
+ monitor_protocol_event(QEVENT_BALLOON, stats);
+ }
+
+ vb->stats_mon = NULL;
+ vb->stats_requested = false;
+}
+
+/*
+ * stats_request_timeout - Timer callback for sychronous request timeout
+ *
+ * In the case of a synchronous timeout, just report the old statistics.
+ */
+static void stats_request_timeout(void *opaque)
+{
+ VirtIOBalloon *vb = (VirtIOBalloon *)opaque;
+ complete_stats_request(vb);
+}
+
+static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
+ VirtQueueElement *elem = &s->stats_vq_elem;
+ VirtIOBalloonStat stat;
+ size_t offset = 0;
+
+ if (!virtqueue_pop(vq, elem)) {
+ return;
+ }
+
+ /* Initialize the stats to get rid of any stale values. This is only
+ * needed to handle the case where a guest supports fewer stats than it
+ * used to (ie. it has booted into an old kernel).
+ */
+ reset_stats(s);
+
+ while (memcpy_from_iovector(&stat, offset, sizeof(stat), elem->out_sg,
+ elem->out_num) == sizeof(stat)) {
+ uint16_t tag = tswap16(stat.tag);
+ uint64_t val = tswap64(stat.val);
+
+ offset += sizeof(stat);
+ if (tag < VIRTIO_BALLOON_S_NR)
+ s->stats[tag] = val;
+ }
+ s->stats_vq_offset = offset;
+
+ complete_stats_request(s);
+}
+
static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
{
VirtIOBalloon *dev = to_virtio_balloon(vdev);
@@ -126,10 +248,56 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
{
+ f |= (1 << VIRTIO_BALLOON_F_STATS_VQ);
return f;
}
-static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
+/*
+ * request_stats - Initiate a request for memory statistics
+ *
+ * If stats are not supported, just report 'actual', otherwise, two types of
+ * requests are supported:
+ *
+ * Synchronous requests are intended to be used with the user monitor. The
+ * monitor is suspended until the stats are received or the timer expires.
+ * Either the newly-update stats or the current stats (in the case of a timeout)
+ * are printed to the monitor and the monitor is resumed.
+ *
+ * Asynchronous requests are intended for QMP. A QMP event will be generated
+ * only if the stats are updated.
+ */
+static void request_stats(VirtIOBalloon *vb, Monitor *mon)
+{
+ /* If a user-monitor is already waiting, resume that one first */
+ if (vb->stats_requested && !monitor_ctrl_mode(vb->stats_mon)) {
+ qemu_del_timer(vb->stats_timer);
+ monitor_resume(vb->stats_mon);
+ }
+ vb->stats_requested = true;
+ vb->stats_mon = mon;
+
+ /* Set up a synchronous request for a user-monitor. */
+ if (!monitor_ctrl_mode(mon)) {
+ uint64_t later = qemu_get_clock(vm_clock) +
+ QEMU_BALLOON_SYNC_TIMEOUT * get_ticks_per_sec();
+ monitor_suspend(mon);
+ qemu_mod_timer(vb->stats_timer, later);
+ }
+
+ if (vb->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ)) {
+ virtqueue_push(vb->svq, &vb->stats_vq_elem, vb->stats_vq_offset);
+ virtio_notify(&vb->vdev, vb->svq);
+ } else {
+ /* Stats are not supported. Clear out any stale values that might have
+ * been set by a more featureful guest kernel.
+ */
+ reset_stats(vb);
+ complete_stats_request(vb);
+ }
+}
+
+static void virtio_balloon_to_target(void *opaque, ram_addr_t target,
+ Monitor *mon)
{
VirtIOBalloon *dev = opaque;
@@ -139,9 +307,9 @@ static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
if (target) {
dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
virtio_notify_config(&dev->vdev);
+ } else {
+ request_stats(dev, mon);
}
-
- return ram_size - (dev->actual << VIRTIO_BALLOON_PFN_SHIFT);
}
static void virtio_balloon_save(QEMUFile *f, void *opaque)
@@ -152,6 +320,11 @@ static void virtio_balloon_save(QEMUFile *f, void *opaque)
qemu_put_be32(f, s->num_pages);
qemu_put_be32(f, s->actual);
+ qemu_put_buffer(f, (uint8_t *)&s->stats_vq_elem, sizeof(VirtQueueElement));
+ qemu_put_buffer(f, (uint8_t *)&s->stats_vq_offset, sizeof(size_t));
+ qemu_put_timer(f, s->stats_timer);
+ qemu_put_be32(f, s->stats_updated);
+/* XXX save monitor and stats_requested */
}
static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
@@ -165,6 +338,11 @@ static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
s->num_pages = qemu_get_be32(f);
s->actual = qemu_get_be32(f);
+ qemu_get_buffer(f, (uint8_t *)&s->stats_vq_elem, sizeof(VirtQueueElement));
+ qemu_get_buffer(f, (uint8_t *)&s->stats_vq_offset, sizeof(size_t));
+ qemu_get_timer(f, s->stats_timer);
+ s->stats_updated = qemu_get_be32(f);
+/* XXX Load monitor and stats_requested */
return 0;
}
@@ -183,7 +361,10 @@ VirtIODevice *virtio_balloon_init(DeviceState *dev)
s->ivq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
+ s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats);
+ s->stats_timer = qemu_new_timer(vm_clock, stats_request_timeout, s);
+ reset_stats(s);
qemu_add_balloon_handler(virtio_balloon_to_target, s);
register_savevm("virtio-balloon", -1, 1, virtio_balloon_save, virtio_balloon_load, s);
@@ -25,6 +25,7 @@
/* The feature bitmap for virtio balloon */
#define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */
+#define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory stats virtqueue */
/* Size of a PFN in the balloon interface. */
#define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -37,4 +38,18 @@ struct virtio_balloon_config
uint32_t actual;
};
+/* Memory Statistics */
+#define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */
+#define VIRTIO_BALLOON_S_SWAP_OUT 1 /* Amount of memory swapped out */
+#define VIRTIO_BALLOON_S_MAJFLT 2 /* Number of major faults */
+#define VIRTIO_BALLOON_S_MINFLT 3 /* Number of minor faults */
+#define VIRTIO_BALLOON_S_MEMFREE 4 /* Total amount of free memory */
+#define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */
+#define VIRTIO_BALLOON_S_NR 6
+
+typedef struct VirtIOBalloonStat {
+ uint16_t tag;
+ uint64_t val;
+} __attribute__((packed)) VirtIOBalloonStat;
+
#endif
@@ -133,7 +133,7 @@ static void monitor_command_cb(Monitor *mon, const char *cmdline,
void *opaque);
/* Return true if in control mode, false otherwise */
-static inline int monitor_ctrl_mode(const Monitor *mon)
+int monitor_ctrl_mode(const Monitor *mon)
{
return (mon->flags & MONITOR_USE_CONTROL);
}
@@ -357,6 +357,9 @@ void monitor_protocol_event(MonitorEvent event, QObject *data)
case QEVENT_STOP:
event_name = "STOP";
break;
+ case QEVENT_BALLOON:
+ event_name = "BALLOON";
+ break;
default:
abort();
break;
@@ -2058,43 +2061,27 @@ static void do_info_status(Monitor *mon, QObject **ret_data)
vm_running, singlestep);
}
-static ram_addr_t balloon_get_value(void)
+static void print_balloon_stat(const char *key, QObject *obj, void *opaque)
{
- ram_addr_t actual;
-
- if (kvm_enabled() && !kvm_has_sync_mmu()) {
- qemu_error_new(QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon");
- return 0;
- }
-
- actual = qemu_balloon_status();
- if (actual == 0) {
- qemu_error_new(QERR_DEVICE_NOT_ACTIVE, "balloon");
- return 0;
- }
-
- return actual;
-}
+ Monitor *mon = opaque;
-/**
- * do_balloon(): Request VM to change its memory allocation
- */
-static void do_balloon(Monitor *mon, const QDict *qdict, QObject **ret_data)
-{
- if (balloon_get_value()) {
- /* ballooning is active */
- qemu_balloon(qdict_get_int(qdict, "value"));
- }
+ if (strcmp(key, "actual"))
+ monitor_printf(mon, ",%s=%" PRId64, key,
+ qint_get_int(qobject_to_qint(obj)));
}
-static void monitor_print_balloon(Monitor *mon, const QObject *data)
+void monitor_print_balloon(Monitor *mon, const QObject *data)
{
QDict *qdict;
qdict = qobject_to_qdict(data);
+ if (!qdict_haskey(qdict, "actual"))
+ return;
- monitor_printf(mon, "balloon: actual=%" PRId64 "\n",
- qdict_get_int(qdict, "balloon") >> 20);
+ monitor_printf(mon, "balloon: actual=%" PRId64,
+ qdict_get_int(qdict, "actual") >> 20);
+ qdict_iter(qdict, print_balloon_stat, mon);
+ monitor_printf(mon, "\n");
}
/**
@@ -2102,21 +2089,52 @@ static void monitor_print_balloon(Monitor *mon, const QObject *data)
*
* Return a QDict with the following information:
*
- * - "balloon": current balloon value in bytes
+ * - "actual": current balloon value in bytes
+ * The following fields may or may not be present:
+ * - "mem_swapped_in": Amount of memory swapped in (bytes)
+ * - "mem_swapped_out": Amount of memory swapped out (bytes)
+ * - "major_page_faults": Number of major faults
+ * - "minor_page_faults": Number of minor faults
+ * - "free_mem": Total amount of free and unused memory (bytes)
+ * - "total_mem": Total amount of available memory (bytes)
*
* Example:
*
- * { "balloon": 1073741824 }
+ * { "actual": 1073741824, "mem_swapped_in": 0, "mem_swapped_out": 0,
+ * "major_page_faults": 142, "minor_page_faults": 239245,
+ * "free_mem": 1014185984, "total_mem": 1044668416 }
*/
static void do_info_balloon(Monitor *mon, QObject **ret_data)
{
- ram_addr_t actual;
+ int ret;
+
+ if (kvm_enabled() && !kvm_has_sync_mmu()) {
+ qemu_error_new(QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon");
+ return;
+ }
+
+ ret = qemu_balloon_status(mon);
+ if (!ret) {
+ qemu_error_new(QERR_DEVICE_NOT_ACTIVE, "balloon");
+ return;
+ }
+}
+
+/**
+ * do_balloon(): Request VM to change its memory allocation
+ */
+static void do_balloon(Monitor *mon, const QDict *qdict, QObject **ret_data)
+{
+ int ret;
- actual = balloon_get_value();
- if (actual != 0) {
- *ret_data = qobject_from_jsonf("{ 'balloon': %" PRId64 "}",
- (int64_t) actual);
+ if (kvm_enabled() && !kvm_has_sync_mmu()) {
+ qemu_error_new(QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon");
+ return;
}
+
+ ret = qemu_balloon(qdict_get_int(qdict, "value"));
+ if (ret == 0)
+ qemu_error_new(QERR_DEVICE_NOT_ACTIVE, "balloon");
}
static qemu_acl *find_acl(Monitor *mon, const char *name)
@@ -20,6 +20,7 @@ typedef enum MonitorEvent {
QEVENT_RESET,
QEVENT_POWERDOWN,
QEVENT_STOP,
+ QEVENT_BALLOON,
QEVENT_MAX,
} MonitorEvent;
@@ -32,6 +33,7 @@ void monitor_resume(Monitor *mon);
void monitor_read_bdrv_key_start(Monitor *mon, BlockDriverState *bs,
BlockDriverCompletionFunc *completion_cb,
void *opaque);
+void monitor_print_balloon(Monitor *mon, const QObject *data);
int monitor_get_fd(Monitor *mon, const char *fdname);
@@ -41,4 +43,6 @@ void monitor_printf(Monitor *mon, const char *fmt, ...)
void monitor_print_filename(Monitor *mon, const char *filename);
void monitor_flush(Monitor *mon);
+extern int monitor_ctrl_mode(const Monitor *mon);
+
#endif /* !MONITOR_H */
@@ -362,17 +362,24 @@ void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque)
qemu_balloon_event_opaque = opaque;
}
-void qemu_balloon(ram_addr_t target)
+int qemu_balloon(ram_addr_t target)
{
- if (qemu_balloon_event)
- qemu_balloon_event(qemu_balloon_event_opaque, target);
+ if (qemu_balloon_event) {
+ qemu_balloon_event(qemu_balloon_event_opaque, target, NULL);
+ return 1;
+ } else {
+ return 0;
+ }
}
-ram_addr_t qemu_balloon_status(void)
+int qemu_balloon_status(Monitor *mon)
{
- if (qemu_balloon_event)
- return qemu_balloon_event(qemu_balloon_event_opaque, 0);
- return 0;
+ if (qemu_balloon_event) {
+ qemu_balloon_event(qemu_balloon_event_opaque, 0, mon);
+ return 1;
+ } else {
+ return 0;
+ }
}
/***********************************************************/
This version improves support for multiple monitors and has been ported up to HEAD as of 01/14. Changes since V6: - Integrated with virtio qdev feature bit changes (specifically: Use VirtIODevice 'guest_features' to check if memory stats is a negotiated feature) - Track which monitor initiated the most recent stats request. Now it does the Right Thing(tm) with multiple monitors making parallel requests. Changes since V5: - Asynchronous query-balloon mode for QMP - Add timeout to prevent hanging the user monitor in synchronous mode Changes since V4: - Virtio spec updated: http://ozlabs.org/~rusty/virtio-spec/virtio-spec-0.8.2.pdf - Guest-side Linux implementation applied by Rusty - Start using the QObject infrastructure - All endian conversions done in the host - Report stats that reference a quantity of memory in bytes Changes since V3: - Increase stat field size to 64 bits - Report all sizes in kb (not pages) - Drop anon_pages stat Changes since V2: - Use a virtqueue for communication instead of the device config space Changes since V1: - In the monitor, print all stats on one line with less abbreviated names - Coding style changes When using ballooning to manage overcommitted memory on a host, a system for guests to communicate their memory usage to the host can provide information that will minimize the impact of ballooning on the guests. The current method employs a daemon running in each guest that communicates memory statistics to a host daemon at a specified time interval. The host daemon aggregates this information and inflates and/or deflates balloons according to the level of host memory pressure. This approach is effective but overly complex since a daemon must be installed inside each guest and coordinated to communicate with the host. A simpler approach is to collect memory statistics in the virtio balloon driver and communicate them directly to the hypervisor. Signed-off-by: Adam Litke <agl@us.ibm.com> To: Anthony Liguori <aliguori@us.ibm.com> Cc: Avi Kivity <avi@redhat.com> Cc: Luiz Capitulino <lcapitulino@redhat.com> Cc: qemu-devel@nongnu.org