@@ -135,6 +135,19 @@ As older slaves don't support negotiating protocol features,
a feature bit was dedicated for this purpose:
#define VHOST_USER_F_PROTOCOL_FEATURES 30
+Multiple queue support
+-------------------
+Multiple queue is treated as a protocol extension, hence the slave has to
+implement protocol features first. Multiple queues is supported only when
+the protocol feature VHOST_USER_PROTOCOL_F_MQ(bit 0) is set.
+
+The max number of queues the slave supports can be queried with message
+VHOST_USER_GET_PROTOCOL_FEATURES. Master should stop when the number of
+requested queues is bigger than that.
+
+As all queues share one connection, the master uses a unique index for each
+queue in the sent message to identify a specified queue.
+
Message types
-------------
@@ -148,6 +148,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
fprintf(stderr, "vhost-net requires net backend to be setup\n");
goto fail;
}
+ net->nc = options->net_backend;
net->dev.max_queues = 1;
@@ -164,8 +165,10 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
net->dev.backend_features = 0;
net->dev.protocol_features = 0;
net->backend = -1;
+
+ /* vhost-user needs vq_index to initiate a specific queue pair */
+ net->dev.vq_index = net->nc->queue_index * 2;
}
- net->nc = options->net_backend;
net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
@@ -187,6 +187,23 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
0 : -1;
}
+static bool vhost_user_one_time_request(VhostUserRequest request)
+{
+ switch (request) {
+ case VHOST_USER_GET_FEATURES:
+ case VHOST_USER_SET_FEATURES:
+ case VHOST_USER_GET_PROTOCOL_FEATURES:
+ case VHOST_USER_SET_PROTOCOL_FEATURES:
+ case VHOST_USER_SET_OWNER:
+ case VHOST_USER_RESET_DEVICE:
+ case VHOST_USER_SET_MEM_TABLE:
+ case VHOST_USER_GET_QUEUE_NUM:
+ return true;
+ default:
+ return false;
+ }
+}
+
static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
void *arg)
{
@@ -207,6 +224,15 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
msg_request = request;
}
+ /*
+ * For none-vring specific requests, like VHOST_USER_GET_FEATURES,
+ * we just need send it once in the first time. For later such
+ * request, we just ignore it.
+ */
+ if (vhost_user_one_time_request(msg_request) && dev->vq_index != 0) {
+ return 0;
+ }
+
msg.request = msg_request;
msg.flags = VHOST_USER_VERSION;
msg.size = 0;
@@ -269,17 +295,20 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
case VHOST_USER_SET_VRING_NUM:
case VHOST_USER_SET_VRING_BASE:
memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
+ msg.addr.index += dev->vq_index;
msg.size = sizeof(m.state);
break;
case VHOST_USER_GET_VRING_BASE:
memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
+ msg.addr.index += dev->vq_index;
msg.size = sizeof(m.state);
need_reply = 1;
break;
case VHOST_USER_SET_VRING_ADDR:
memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
+ msg.addr.index += dev->vq_index;
msg.size = sizeof(m.addr);
break;
@@ -287,7 +316,7 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
case VHOST_USER_SET_VRING_CALL:
case VHOST_USER_SET_VRING_ERR:
file = arg;
- msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
+ msg.u64 = (file->index + dev->vq_index) & VHOST_USER_VRING_IDX_MASK;
msg.size = sizeof(m.u64);
if (ioeventfd_enabled() && file->fd > 0) {
fds[fd_num++] = file->fd;
@@ -331,6 +360,7 @@ static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
error_report("Received bad msg size.");
return -1;
}
+ msg.state.index -= dev->vq_index;
memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
break;
default:
@@ -92,6 +92,7 @@ struct NetClientState {
NetClientDestructor *destructor;
unsigned int queue_index;
unsigned rxfilter_notify_enabled:1;
+ void *opaque;
};
typedef struct NICState {
@@ -29,48 +35,81 @@ typedef struct VhostUserChardevProps {
VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
{
- VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
+ VhostUserState *s = nc->opaque;
assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
- return s->vhost_net;
-}
-
-static int vhost_user_running(VhostUserState *s)
-{
- return (s->vhost_net) ? 1 : 0;
+ return s->pairs[nc->queue_index].vhost_net;
}
static int vhost_user_start(VhostUserState *s)
{
VhostNetOptions options;
+ VHostNetState *vhost_net;
+ int max_queues;
+ int i = 0;
- if (vhost_user_running(s)) {
+ if (s->running) {
return 0;
}
options.backend_type = VHOST_BACKEND_TYPE_USER;
- options.net_backend = &s->nc;
options.opaque = s->chr;
- s->vhost_net = vhost_net_init(&options);
+ options.net_backend = s->pairs[i].nc;
+ vhost_net = s->pairs[i++].vhost_net = vhost_net_init(&options);
+
+ max_queues = vhost_net_get_max_queues(vhost_net);
+ if (s->queues > max_queues) {
+ error_report("you are asking more queues than supported: %d\n",
+ max_queues);
+ return -1;
+ }
+
+ for (; i < s->queues; i++) {
+ options.net_backend = s->pairs[i].nc;
+
+ s->pairs[i].vhost_net = vhost_net_init(&options);
+ if (!s->pairs[i].vhost_net) {
+ return -1;
+ }
+ }
+ s->running = true;
- return vhost_user_running(s) ? 0 : -1;
+ return 0;
}
static void vhost_user_stop(VhostUserState *s)
{
- if (vhost_user_running(s)) {
- vhost_net_cleanup(s->vhost_net);
+ int i;
+ VHostNetState *vhost_net;
+
+ if (!s->running) {
+ return;
}
- s->vhost_net = 0;
+ for (i = 0; i < s->queues; i++) {
+ vhost_net = s->pairs[i].vhost_net;
+ if (vhost_net) {
+ vhost_net_cleanup(vhost_net);
+ }
+ }
+
+ s->running = false;
}
static void vhost_user_cleanup(NetClientState *nc)
{
- VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
+ VhostUserState *s = nc->opaque;
+ VHostNetState *vhost_net = s->pairs[nc->queue_index].vhost_net;
+
+ if (vhost_net) {
+ vhost_net_cleanup(vhost_net);
+ }
- vhost_user_stop(s);
qemu_purge_queued_packets(nc);
+
+ if (nc->queue_index == s->queues - 1) {
+ free(s);
+ }
}
static bool vhost_user_has_vnet_hdr(NetClientState *nc)
@@ -97,18 +136,25 @@ static NetClientInfo net_vhost_user_info = {
static void net_vhost_link_down(VhostUserState *s, bool link_down)
{
- s->nc.link_down = link_down;
+ NetClientState *nc;
+ int i;
- if (s->nc.peer) {
- s->nc.peer->link_down = link_down;
- }
+ for (i = 0; i < s->queues; i++) {
+ nc = s->pairs[i].nc;
- if (s->nc.info->link_status_changed) {
- s->nc.info->link_status_changed(&s->nc);
- }
+ nc->link_down = link_down;
+
+ if (nc->peer) {
+ nc->peer->link_down = link_down;
+ }
- if (s->nc.peer && s->nc.peer->info->link_status_changed) {
- s->nc.peer->info->link_status_changed(s->nc.peer);
+ if (nc->info->link_status_changed) {
+ nc->info->link_status_changed(nc);
+ }
+
+ if (nc->peer && nc->peer->info->link_status_changed) {
+ nc->peer->info->link_status_changed(nc->peer);
+ }
}
}
@@ -118,7 +164,9 @@ static void net_vhost_user_event(void *opaque, int event)
switch (event) {
case CHR_EVENT_OPENED:
- vhost_user_start(s);
+ if (vhost_user_start(s) < 0) {
+ exit(1);
+ }
net_vhost_link_down(s, false);
error_report("chardev \"%s\" went up", s->chr->label);
break;
@@ -131,23 +179,28 @@ static void net_vhost_user_event(void *opaque, int event)
}
static int net_vhost_user_init(NetClientState *peer, const char *device,
- const char *name, CharDriverState *chr)
+ const char *name, VhostUserState *s)
{
NetClientState *nc;
- VhostUserState *s;
+ CharDriverState *chr = s->chr;
+ int i;
- nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
+ for (i = 0; i < s->queues; i++) {
+ nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
- snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user to %s",
- chr->label);
+ snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
+ i, chr->label);
- s = DO_UPCAST(VhostUserState, nc, nc);
+ /* We don't provide a receive callback */
+ nc->receive_disabled = 1;
- /* We don't provide a receive callback */
- s->nc.receive_disabled = 1;
- s->chr = chr;
+ nc->queue_index = i;
+ nc->opaque = s;
- qemu_chr_add_handlers(s->chr, NULL, NULL, net_vhost_user_event, s);
+ s->pairs[i].nc = nc;
+ }
+
+ qemu_chr_add_handlers(chr, NULL, NULL, net_vhost_user_event, s);
return 0;
}
@@ -226,8 +279,10 @@ static int net_vhost_check_net(void *opaque, QemuOpts *opts, Error **errp)
int net_init_vhost_user(const NetClientOptions *opts, const char *name,
NetClientState *peer, Error **errp)
{
+ int queues;
const NetdevVhostUserOptions *vhost_user_opts;
CharDriverState *chr;
+ VhostUserState *s;
assert(opts->kind == NET_CLIENT_OPTIONS_KIND_VHOST_USER);
vhost_user_opts = opts->vhost_user;
@@ -243,6 +298,11 @@ int net_init_vhost_user(const NetClientOptions *opts, const char *name,
return -1;
}
+ queues = vhost_user_opts->has_queues ? vhost_user_opts->queues : 1;
+ s = g_malloc0(sizeof(VhostUserState) +
+ queues * sizeof(VhostUserNetPair));
+ s->queues = queues;
+ s->chr = chr;
- return net_vhost_user_init(peer, "vhost_user", name, chr);
+ return net_vhost_user_init(peer, "vhost_user", name, s);
}
@@ -2480,12 +2480,16 @@
#
# @vhostforce: #optional vhost on for non-MSIX virtio guests (default: false).
#
+# @queues: #optional number of queues to be created for multiqueue vhost-user
+# (default: 1) (Since 2.5)
+#
# Since 2.1
##
{ 'struct': 'NetdevVhostUserOptions',
'data': {
'chardev': 'str',
- '*vhostforce': 'bool' } }
+ '*vhostforce': 'bool',
+ '*queues': 'int' } }
##
# @NetClientOptions
@@ -1963,13 +1963,14 @@ The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
netdev. @code{-net} and @code{-device} with parameter @option{vlan} create the
required hub automatically.
-@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off]
+@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
Establish a vhost-user netdev, backed by a chardev @var{id}. The chardev should
be a unix domain socket backed one. The vhost-user uses a specifically defined
protocol to pass vhost ioctl replacement messages to an application on the other
end of the socket. On non-MSIX guests, the feature can be forced with
-@var{vhostforce}.
+@var{vhostforce}. Use 'queues=@var{n}' to specify the number of queues to
+be created for multiqueue vhost-user.
Example:
@example