@@ -1194,10 +1194,14 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
{
int error = 0;
int sock = 0;
+ ssize_t retval;
- struct sockaddr_ll sll;
- struct msghdr msg;
if (!is_tap_netdev(netdev_)) {
+ uint32_t resend_idx = 0;
+ struct mmsghdr *msg;
+ struct iovec *iov;
+ struct sockaddr_ll sll;
+
sock = af_packet_sock();
if (sock < 0) {
error = -sock;
@@ -1216,34 +1220,56 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
sll.sll_family = AF_PACKET;
sll.sll_ifindex = ifindex;
- msg.msg_name = &sll;
- msg.msg_namelen = sizeof sll;
- msg.msg_iovlen = 1;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
- }
+ msg = xmalloc(sizeof(*msg) * batch->count);
+ iov = xmalloc(sizeof(*iov) * batch->count);
- /* 'i' is incremented only if there's no error */
- for (int i = 0; i < batch->count; ) {
- const void *data = dp_packet_data(batch->packets[i]);
- size_t size = dp_packet_size(batch->packets[i]);
- ssize_t retval;
+ for (int i = 0; i < batch->count; i++) {
+ const void *data = dp_packet_data(batch->packets[i]);
+ size_t size = dp_packet_size(batch->packets[i]);
- /* Truncate the packet if it is configured. */
- size -= dp_packet_get_cutlen(batch->packets[i]);
+ /* Truncate the packet if it is configured. */
+ size -= dp_packet_get_cutlen(batch->packets[i]);
- if (!is_tap_netdev(netdev_)) {
/* Use our AF_PACKET socket to send to this device. */
- struct iovec iov;
- iov.iov_base = CONST_CAST(void *, data);
- iov.iov_len = size;
+ msg[i].msg_hdr.msg_name = &sll;
+ msg[i].msg_hdr.msg_namelen = sizeof sll;
+ msg[i].msg_hdr.msg_iovlen = 1;
+ msg[i].msg_hdr.msg_control = NULL;
+ msg[i].msg_hdr.msg_controllen = 0;
+ msg[i].msg_hdr.msg_flags = 0;
+ iov[i].iov_base = CONST_CAST(void *, data);
+ iov[i].iov_len = size;
+ msg[i].msg_hdr.msg_iov = &iov[i];
+ }
- msg.msg_iov = &iov;
+resend_batch:
+ retval = sendmmsg(sock, msg + resend_idx,
+ batch->count - resend_idx, 0);
+ if (retval < 0) {
+ if (errno == EINTR) {
+ goto resend_batch;
+ }
+ /* The Linux AF_PACKET implementation never blocks waiting for
+ * room for packets, instead returning ENOBUFS. Translate this
+ * into EAGAIN for the caller. */
+ error = errno == ENOBUFS ? EAGAIN : errno;
+ } else if(retval != batch->count - resend_idx) {
+ resend_idx += retval;
+ goto resend_batch;
+ }
+
+ free(msg);
+ free(iov);
+ } else {
+ /* 'i' is incremented only if there's no error */
+ for (int i = 0; i < batch->count; ) {
+ const void *data = dp_packet_data(batch->packets[i]);
+ size_t size = dp_packet_size(batch->packets[i]);
+
+ /* Truncate the packet if it is configured. */
+ size -= dp_packet_get_cutlen(batch->packets[i]);
- retval = sendmsg(sock, &msg, 0);
- } else {
/* Use the tap fd to send to this device. This is essential for
* tap devices, because packets sent to a tap device with an
* AF_PACKET socket will loop back to be *received* again on the
@@ -1252,45 +1278,43 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
struct netdev_linux *netdev = netdev_linux_cast(netdev_);
retval = write(netdev->tap_fd, data, size);
- }
- if (retval < 0) {
- if (errno == EINTR) {
- /* The send was interrupted by a signal. Retry the packet by
- * continuing without incrementing 'i'.*/
- continue;
- } else if (errno == EIO && is_tap_netdev(netdev_)) {
- /* The Linux tap driver returns EIO if the device is not up.
- * From the OVS side this is not an error, so ignore it. */
- } else {
- /* The Linux AF_PACKET implementation never blocks waiting for
- * room for packets, instead returning ENOBUFS. Translate this
- * into EAGAIN for the caller. */
- error = errno == ENOBUFS ? EAGAIN : errno;
+ if (retval < 0) {
+ if (errno == EINTR) {
+ /* The send was interrupted by a signal.
+ * Retry the packet by continuing without
+ * incrementing 'i'.*/
+ continue;
+ } else if (errno == EIO) {
+ /* The Linux tap driver returns EIO if the device
+ * is not up. From the OVS side this is not an error,
+ * so ignore it. */
+ } else {
+ break;
+ }
+ } else if (retval != size) {
+ VLOG_WARN_RL(&rl, "sent partial Ethernet packet"
+ " (%"PRIuSIZE" bytes"
+ " of %"PRIuSIZE") on %s", retval, size,
+ netdev_get_name(netdev_));
+ error = EMSGSIZE;
break;
}
- } else if (retval != size) {
- VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%"PRIuSIZE" bytes"
- " of %"PRIuSIZE") on %s", retval, size,
- netdev_get_name(netdev_));
- error = EMSGSIZE;
- break;
- }
- /* Process the next packet in the batch */
- i++;
+ /* Process the next packet in the batch */
+ i++;
+ }
}
if (error && error != EAGAIN) {
- VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
- netdev_get_name(netdev_), ovs_strerror(error));
+ VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
+ netdev_get_name(netdev_), ovs_strerror(error));
}
free_batch:
dp_packet_delete_batch(batch, may_steal);
return error;
-
}
/* Registers with the poll loop to wake up from the next call to poll_block()
Sendmmsg can reduce cpu cycles in sending packets to kernel. Replace sendmsg with sendmmsg in function netdev_linux_send to send batch packets. Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com> --- lib/netdev-linux.c | 122 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 73 insertions(+), 49 deletions(-)