@@ -15,6 +15,7 @@
#include "net.h"
#include "net/checksum.h"
#include "net/tap.h"
+#include "net/socket.h"
#include "qemu-timer.h"
#include "virtio-net.h"
@@ -133,6 +134,9 @@ static int peer_has_vnet_hdr(VirtIONet *n)
case NET_CLIENT_TYPE_TAP:
n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
break;
+ case NET_CLIENT_TYPE_SOCKET_RAW:
+ n->has_vnet_hdr = sock_raw_has_vnet_hdr(n->nic->nc.peer);
+ break;
default:
return 0;
}
@@ -149,6 +153,9 @@ static int peer_has_ufo(VirtIONet *n)
case NET_CLIENT_TYPE_TAP:
n->has_ufo = tap_has_ufo(n->nic->nc.peer);
break;
+ case NET_CLIENT_TYPE_SOCKET_RAW:
+ n->has_ufo = sock_raw_has_ufo(n->nic->nc.peer);
+ break;
default:
return 0;
}
@@ -165,6 +172,9 @@ static void peer_using_vnet_hdr(VirtIONet *n, int using_vnet_hdr)
case NET_CLIENT_TYPE_TAP:
tap_using_vnet_hdr(n->nic->nc.peer, using_vnet_hdr);
break;
+ case NET_CLIENT_TYPE_SOCKET_RAW:
+ sock_raw_using_vnet_hdr(n->nic->nc.peer, using_vnet_hdr);
+ break;
default:
break;
}
@@ -180,6 +190,9 @@ static void peer_set_offload(VirtIONet *n, int csum, int tso4, int tso6,
case NET_CLIENT_TYPE_TAP:
tap_set_offload(n->nic->nc.peer, csum, tso4, tso6, ecn, ufo);
break;
+ case NET_CLIENT_TYPE_SOCKET_RAW:
+ sock_raw_set_offload(n->nic->nc.peer, csum, tso4, tso6, ecn, ufo);
+ break;
default:
break;
}
@@ -1002,6 +1002,11 @@ static struct {
.type = QEMU_OPT_STRING,
.help = "UDP multicast address and port number",
},
+ {
+ .name = "ifname",
+ .type = QEMU_OPT_STRING,
+ .help = "interface name",
+ },
{ /* end of list */ }
},
#ifdef CONFIG_VDE
@@ -34,7 +34,8 @@ typedef enum {
NET_CLIENT_TYPE_TAP,
NET_CLIENT_TYPE_SOCKET,
NET_CLIENT_TYPE_VDE,
- NET_CLIENT_TYPE_DUMP
+ NET_CLIENT_TYPE_DUMP,
+ NET_CLIENT_TYPE_SOCKET_RAW,
} net_client_type;
typedef void (NetPoll)(VLANClientState *, bool enable);
@@ -32,6 +32,327 @@
#include "qemu_socket.h"
#include "sysemu.h"
+#include <netpacket/packet.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+/* Maximum GSO packet size (64k) plus plenty of room for
+ * the ethernet and virtio_net headers
+ */
+#define RAW_BUFSIZE (4096 + 65536)
+
+typedef struct NetSocketRawState {
+ VLANClientState nc;
+ int fd;
+ uint8_t buf[RAW_BUFSIZE];
+ int promisc;
+ unsigned int read_poll:1;
+ unsigned int write_poll:1;
+ unsigned int has_vnet_hdr:1;
+ unsigned int using_vnet_hdr:1;
+ unsigned int has_ufo:1;
+} NetSocketRawState;
+
+struct virtio_net_hdr
+{
+ uint8_t flags;
+ uint8_t gso_type;
+ uint16_t hdr_len;
+ uint16_t gso_size;
+ uint16_t csum_start;
+ uint16_t csum_offset;
+};
+
+static int sock_raw_can_send(void *opaque);
+static void sock_raw_send(void *opaque);
+static void sock_raw_writable(void *opaque);
+
+static void sock_raw_update_fd_handler(NetSocketRawState *s)
+{
+ qemu_set_fd_handler2(s->fd,
+ s->read_poll ? sock_raw_can_send : NULL,
+ s->read_poll ? sock_raw_send : NULL,
+ s->write_poll ? sock_raw_writable : NULL,
+ s);
+}
+
+static void sock_raw_read_poll(NetSocketRawState *s, int enable)
+{
+ s->read_poll = !!enable;
+ sock_raw_update_fd_handler(s);
+}
+
+static void sock_raw_write_poll(NetSocketRawState *s, int enable)
+{
+ s->write_poll = !!enable;
+ sock_raw_update_fd_handler(s);
+}
+
+static void sock_raw_writable(void *opaque)
+{
+ NetSocketRawState *s = opaque;
+
+ sock_raw_write_poll(s, 0);
+ qemu_flush_queued_packets(&s->nc);
+}
+
+static ssize_t sock_raw_write_packet(NetSocketRawState *s,
+ const struct iovec *iov,
+ int iovcnt)
+{
+ ssize_t len;
+
+ do {
+ len = writev(s->fd, iov, iovcnt);
+ } while (len == -1 && errno == EINTR);
+
+ if (len == -1 && errno == EAGAIN) {
+ sock_raw_write_poll(s, 1);
+ return 0;
+ }
+
+ if (len == -1)
+ printf("raw_write_packet: errno:%d\n", errno);
+
+ return len;
+}
+
+static ssize_t sock_raw_receive_iov(VLANClientState *nc,
+ const struct iovec *iov,
+ int iovcnt)
+{
+ NetSocketRawState *s = DO_UPCAST(NetSocketRawState, nc, nc);
+ const struct iovec *iovp = iov;
+ struct iovec iov_copy[iovcnt + 1];
+ struct virtio_net_hdr hdr = { 0, };
+
+ if (s->has_vnet_hdr && !s->using_vnet_hdr) {
+ iov_copy[0].iov_base = &hdr;
+ iov_copy[0].iov_len = sizeof(hdr);
+ memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
+ iovp = iov_copy;
+ iovcnt++;
+ }
+
+ return sock_raw_write_packet(s, iovp, iovcnt);
+}
+
+static ssize_t sock_raw_receive_raw(VLANClientState *nc, const uint8_t *buf,
+ size_t size)
+{
+ NetSocketRawState *s = DO_UPCAST(NetSocketRawState, nc, nc);
+ struct iovec iov[2];
+ int iovcnt = 0;
+ struct virtio_net_hdr hdr = { 0, };
+
+ if (s->has_vnet_hdr) {
+ iov[iovcnt].iov_base = &hdr;
+ iov[iovcnt].iov_len = sizeof(hdr);
+ iovcnt++;
+ }
+
+ iov[iovcnt].iov_base = (char *)buf;
+ iov[iovcnt].iov_len = size;
+ iovcnt++;
+
+ return sock_raw_write_packet(s, iov, iovcnt);
+}
+
+static ssize_t sock_raw_receive(VLANClientState *nc, const uint8_t *buf,
+ size_t size)
+{
+ NetSocketRawState *s = DO_UPCAST(NetSocketRawState, nc, nc);
+ struct iovec iov[1];
+
+ if (s->has_vnet_hdr && !s->using_vnet_hdr)
+ return sock_raw_receive_raw(nc, buf, size);
+
+ iov[0].iov_base = (char *)buf;
+ iov[0].iov_len = size;
+
+ return sock_raw_write_packet(s, iov, 1);
+}
+
+static int sock_raw_can_send(void *opaque)
+{
+ NetSocketRawState *s = opaque;
+
+ return qemu_can_send_packet(&s->nc);
+}
+
+ssize_t sock_raw_read_packet(int fd, uint8_t *buf, int maxlen, int flags)
+{
+ int ret;
+
+ ret = recv(fd, buf, maxlen, flags);
+ return ret;
+}
+
+static void sock_raw_send_completed(VLANClientState *nc, ssize_t len)
+{
+ NetSocketRawState *s = DO_UPCAST(NetSocketRawState, nc, nc);
+
+ sock_raw_read_poll(s, 1);
+}
+
+static void sock_raw_send(void *opaque)
+{
+ NetSocketRawState *s = opaque;
+ int size;
+
+ do {
+ uint8_t *buf = s->buf;
+
+ size = sock_raw_read_packet(s->fd, s->buf, sizeof(s->buf), MSG_TRUNC);
+ if (size <= 0)
+ break;
+
+ if (s->has_vnet_hdr && !s->using_vnet_hdr) {
+ buf += sizeof(struct virtio_net_hdr);
+ size -= sizeof(struct virtio_net_hdr);
+ }
+
+ size = qemu_send_packet_async(&s->nc, buf, size,
+ sock_raw_send_completed);
+ if (size == 0)
+ sock_raw_read_poll(s, 0);
+
+ } while (size > 0 && qemu_can_send_packet(&s->nc));
+}
+
+int sock_raw_has_ufo(VLANClientState *nc)
+{
+ NetSocketRawState *s = DO_UPCAST(NetSocketRawState, nc, nc);
+
+ assert(nc->info->type == NET_CLIENT_TYPE_SOCKET_RAW);
+
+ return s->has_ufo;
+}
+
+int sock_raw_has_vnet_hdr(VLANClientState *nc)
+{
+ NetSocketRawState *s = DO_UPCAST(NetSocketRawState, nc, nc);
+
+ assert(nc->info->type == NET_CLIENT_TYPE_SOCKET_RAW);
+
+ return s->has_vnet_hdr;
+}
+
+void sock_raw_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr)
+{
+ NetSocketRawState *s = DO_UPCAST(NetSocketRawState, nc, nc);
+
+ using_vnet_hdr = using_vnet_hdr != 0;
+
+ assert(nc->info->type == NET_CLIENT_TYPE_SOCKET_RAW);
+ assert(s->has_vnet_hdr == using_vnet_hdr);
+
+ s->using_vnet_hdr = using_vnet_hdr;
+}
+
+void sock_raw_set_offload(VLANClientState *nc, int csum, int tso4,
+ int tso6, int ecn, int ufo)
+{
+ return;
+}
+
+static void sock_raw_cleanup(VLANClientState *nc)
+{
+ NetSocketRawState *s = DO_UPCAST(NetSocketRawState, nc, nc);
+
+ qemu_purge_queued_packets(nc);
+
+ sock_raw_read_poll(s, 0);
+ sock_raw_write_poll(s, 0);
+ close(s->fd);
+}
+
+int sock_raw_probe_vnet_hdr(int fd)
+{
+ int val, len;
+
+ len = sizeof(val);
+ if (getsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &val, (socklen_t *)&len)
+ == 0)
+ return 1;
+
+ return 0;
+}
+
+static NetClientInfo net_raw_info = {
+ .type = NET_CLIENT_TYPE_SOCKET_RAW,
+ .size = sizeof(NetSocketRawState),
+ .receive = sock_raw_receive,
+ .receive_raw = NULL,
+ .receive_iov = sock_raw_receive_iov,
+ .cleanup = sock_raw_cleanup,
+};
+
+
+static NetSocketRawState *net_socket_fd_init_raw(VLANState *vlan,
+ const char *model,
+ const char *name, int fd)
+{
+ VLANClientState *nc;
+ NetSocketRawState *s;
+
+ nc = qemu_new_net_client(&net_raw_info, vlan, NULL, model, name);
+
+ s = DO_UPCAST(NetSocketRawState, nc, nc);
+
+ s->fd = fd;
+ s->has_vnet_hdr = sock_raw_probe_vnet_hdr(fd);
+ s->using_vnet_hdr = 0;
+ s->has_ufo = 1;
+ sock_raw_read_poll(s, 1);
+
+ return s;
+}
+
+static int net_socket_raw_ifname_init(VLANState *vlan, const char *model,
+ const char *name, const char *ifname)
+{
+ struct ifreq req;
+ int fd, ret;
+ struct sockaddr_ll lladdr;
+ int val;
+
+ fd = qemu_socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ if (fd < 0)
+ fprintf(stderr, "packet socket failed\n");
+
+ memset(&req, 0, sizeof(req));
+ strncpy(req.ifr_name, ifname, IFNAMSIZ-1);
+ ret = ioctl(fd, SIOCGIFINDEX, &req);
+ if (ret < 0)
+ fprintf(stderr, "SIOCGIFINDEX failed\n");
+
+ memset(&lladdr, 0, sizeof(lladdr));
+ lladdr.sll_family = AF_PACKET;
+ lladdr.sll_protocol = htons(ETH_P_ALL);
+ lladdr.sll_ifindex = req.ifr_ifindex;
+ ret = bind(fd, (const struct sockaddr *)&lladdr, sizeof(lladdr));
+ if (ret < 0)
+ fprintf(stderr, "bind failed\n");
+
+ val = 1;
+ ret=setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, (const char *)&val,
+ sizeof(val));
+ if (ret < 0) {
+ fprintf(stderr, "setsockopt(SOL_PACKET, PACKET_VNET_HDR) failed\n");
+ }
+
+ ret = fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
+ if (ret < 0)
+ fprintf(stderr, "fcntl(O_NONBLOCK) set failed\n");
+
+ net_socket_fd_init_raw(vlan, model, name, fd);
+
+ return 0;
+}
+
+
typedef struct NetSocketState {
VLANClientState nc;
int fd;
@@ -337,6 +658,8 @@ static NetSocketState *net_socket_fd_init(VLANState *vlan,
return net_socket_fd_init_dgram(vlan, model, name, fd, is_connected);
case SOCK_STREAM:
return net_socket_fd_init_stream(vlan, model, name, fd, is_connected);
+ case SOCK_RAW:
+ return (struct NetSocketState *)net_socket_fd_init_raw(vlan, model, name, fd);
default:
/* who knows ... this could be a eg. a pty, do warn and continue as stream */
fprintf(stderr, "qemu: warning: socket type=%d for fd=%d is not SOCK_DGRAM or SOCK_STREAM\n", so_type, fd);
@@ -519,6 +842,22 @@ int net_init_socket(QemuOpts *opts,
close(fd);
return -1;
}
+ } else if (qemu_opt_get(opts, "ifname")) {
+ const char *ifname;
+
+ if (qemu_opt_get(opts, "fd") ||
+ qemu_opt_get(opts, "connect") ||
+ qemu_opt_get(opts, "listen") ||
+ qemu_opt_get(opts, "mcast")) {
+ qemu_error("fd=, connect= and mcast= and listen= is invalid with ifname=\n");
+ return -1;
+ }
+
+ ifname = qemu_opt_get(opts, "ifname");
+
+ if (net_socket_raw_ifname_init(vlan, "socket", name, ifname) == -1) {
+ return -1;
+ }
} else if (qemu_opt_get(opts, "listen")) {
const char *listen;
@@ -30,4 +30,13 @@
int net_init_socket(QemuOpts *opts, Monitor *mon,
const char *name, VLANState *vlan);
+#define PACKET_VNET_HDR 15
+
+ssize_t sock_raw_read_packet(int fd, uint8_t *buf, int maxlen, int flags);
+int sock_raw_has_ufo(VLANClientState *vc);
+int sock_raw_has_vnet_hdr(VLANClientState *vc);
+void sock_raw_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr);
+int sock_raw_probe_vnet_hdr(int fd);
+void sock_raw_set_offload(VLANClientState *vc, int csum, int tso4, int tso6, int ecn, int ufo);
+
#endif /* QEMU_NET_SOCKET_H */