From 39ae200972704947ce86a55ec3de1926fd2caf36 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Wed, 19 Apr 2023 10:48:32 +0200 Subject: [PATCH] enable GSO on Linux, if available --- connection.go | 2 +- integrationtests/self/mitm_test.go | 10 ++++++--- packet_handler_map.go | 2 ++ send_conn.go | 9 +++++++- sys_conn.go | 2 +- sys_conn_df.go | 4 +++- sys_conn_df_linux.go | 36 ++++++++++++++++++++++++++++++ sys_conn_no_gso.go | 8 +++++++ sys_conn_oob.go | 24 +++++++++++++++++--- 9 files changed, 87 insertions(+), 10 deletions(-) create mode 100644 sys_conn_no_gso.go diff --git a/connection.go b/connection.go index e98a1344..c942cf1f 100644 --- a/connection.go +++ b/connection.go @@ -691,12 +691,12 @@ runLoop: s.cryptoStreamHandler.Close() <-handshaking + s.sendQueue.Close() // close the send queue before sending the CONNECTION_CLOSE s.handleCloseError(&closeErr) if e := (&errCloseForRecreating{}); !errors.As(closeErr.err, &e) && s.tracer != nil { s.tracer.Close() } s.logger.Infof("Connection %s closed.", s.logID) - s.sendQueue.Close() s.timer.Stop() return closeErr.err } diff --git a/integrationtests/self/mitm_test.go b/integrationtests/self/mitm_test.go index 35e0af91..fc7020f7 100644 --- a/integrationtests/self/mitm_test.go +++ b/integrationtests/self/mitm_test.go @@ -25,7 +25,7 @@ var _ = Describe("MITM test", func() { const connIDLen = 6 // explicitly set the connection ID length, so the proxy can parse it var ( - serverUDPConn, clientUDPConn *net.UDPConn + serverUDPConn, clientUDPConn net.PacketConn serverConn quic.Connection serverConfig *quic.Config ) @@ -33,7 +33,9 @@ var _ = Describe("MITM test", func() { startServerAndProxy := func(delayCb quicproxy.DelayCallback, dropCb quicproxy.DropCallback) (proxyPort int, closeFn func()) { addr, err := net.ResolveUDPAddr("udp", "localhost:0") Expect(err).ToNot(HaveOccurred()) - serverUDPConn, err = net.ListenUDP("udp", addr) + c, err := net.ListenUDP("udp", addr) + Expect(err).ToNot(HaveOccurred()) + serverUDPConn, err = quic.OptimizeConn(c) Expect(err).ToNot(HaveOccurred()) tr := &quic.Transport{ Conn: serverUDPConn, @@ -75,7 +77,9 @@ var _ = Describe("MITM test", func() { serverConfig = getQuicConfig(nil) addr, err := net.ResolveUDPAddr("udp", "localhost:0") Expect(err).ToNot(HaveOccurred()) - clientUDPConn, err = net.ListenUDP("udp", addr) + c, err := net.ListenUDP("udp", addr) + Expect(err).ToNot(HaveOccurred()) + clientUDPConn, err = quic.OptimizeConn(c) Expect(err).ToNot(HaveOccurred()) }) diff --git a/packet_handler_map.go b/packet_handler_map.go index 05f3eff2..4c309138 100644 --- a/packet_handler_map.go +++ b/packet_handler_map.go @@ -19,6 +19,8 @@ type connCapabilities struct { // This connection has the Don't Fragment (DF) bit set. // This means it makes to run DPLPMTUD. DF bool + // GSO (Generic Segmentation Offload) supported + GSO bool } // rawConn is a connection that allow reading of a receivedPackeh. diff --git a/send_conn.go b/send_conn.go index 399e1154..1ec26cf9 100644 --- a/send_conn.go +++ b/send_conn.go @@ -25,11 +25,18 @@ type sconn struct { var _ sendConn = &sconn{} func newSendConn(c rawConn, remote net.Addr, info *packetInfo) *sconn { + oob := info.OOB() + if c.capabilities().GSO { + // add 32 bytes, so we can add the UDP_SEGMENT msg + l := len(oob) + oob = append(oob, make([]byte, 32)...) + oob = oob[:l] + } return &sconn{ rawConn: c, remoteAddr: remote, info: info, - oob: info.OOB(), + oob: oob, } } diff --git a/sys_conn.go b/sys_conn.go index 72f84b0a..29c098a0 100644 --- a/sys_conn.go +++ b/sys_conn.go @@ -66,7 +66,7 @@ func wrapConn(pc net.PacketConn) (interface { return newConn(c, supportsDF) } -// The basicConn is the most trivial implementation of a connection. +// The basicConn is the most trivial implementation of a rawConn. // It reads a single packet from the underlying net.PacketConn. // It is used when // * the net.PacketConn is not a OOBCapablePacketConn, and diff --git a/sys_conn_df.go b/sys_conn_df.go index 7e94b80e..a2189412 100644 --- a/sys_conn_df.go +++ b/sys_conn_df.go @@ -2,7 +2,9 @@ package quic -import "syscall" +import ( + "syscall" +) func setDF(syscall.RawConn) (bool, error) { // no-op on unsupported platforms diff --git a/sys_conn_df_linux.go b/sys_conn_df_linux.go index fe75ab3d..8de9c78f 100644 --- a/sys_conn_df_linux.go +++ b/sys_conn_df_linux.go @@ -4,13 +4,20 @@ package quic import ( "errors" + "log" "syscall" + "unsafe" "golang.org/x/sys/unix" "github.com/quic-go/quic-go/internal/utils" ) +// UDP_SEGMENT controls GSO (Generic Segmentation Offload) +// +//nolint:stylecheck +const UDP_SEGMENT = 103 + func setDF(rawConn syscall.RawConn) (bool, error) { // Enabling IP_MTU_DISCOVER will force the kernel to return "sendto: message too long" // and the datagram will not be fragmented @@ -34,7 +41,36 @@ func setDF(rawConn syscall.RawConn) (bool, error) { return true, nil } +func maybeSetGSO(rawConn syscall.RawConn) bool { + var setErr error + if err := rawConn.Control(func(fd uintptr) { + setErr = unix.SetsockoptInt(int(fd), syscall.IPPROTO_UDP, UDP_SEGMENT, 1) + }); err != nil { + setErr = err + } + if setErr != nil { + log.Println("failed to enable GSO") + return false + } + return true +} + func isMsgSizeErr(err error) bool { // https://man7.org/linux/man-pages/man7/udp.7.html return errors.Is(err, unix.EMSGSIZE) } + +func appendUDPSegmentSizeMsg(b []byte, size int) []byte { + startLen := len(b) + const dataLen = 2 // payload is a uint16 + b = append(b, make([]byte, unix.CmsgSpace(dataLen))...) + h := (*unix.Cmsghdr)(unsafe.Pointer(&b[startLen])) + h.Level = syscall.IPPROTO_UDP + h.Type = UDP_SEGMENT + h.SetLen(unix.CmsgLen(dataLen)) + + // UnixRights uses the private `data` method, but I *think* this achieves the same goal. + offset := startLen + unix.CmsgSpace(0) + *(*uint16)(unsafe.Pointer(&b[offset])) = uint16(size) + return b +} diff --git a/sys_conn_no_gso.go b/sys_conn_no_gso.go new file mode 100644 index 00000000..aa09f6bf --- /dev/null +++ b/sys_conn_no_gso.go @@ -0,0 +1,8 @@ +//go:build darwin || freebsd + +package quic + +import "syscall" + +func maybeSetGSO(_ syscall.RawConn) bool { return false } +func appendUDPSegmentSizeMsg(_ []byte, _ int) []byte { return nil } diff --git a/sys_conn_oob.go b/sys_conn_oob.go index 4f8963ed..f044d7bf 100644 --- a/sys_conn_oob.go +++ b/sys_conn_oob.go @@ -62,7 +62,7 @@ type oobConn struct { messages []ipv4.Message buffers [batchSize]*packetBuffer - supportsDF bool + cap connCapabilities } var _ rawConn = &oobConn{} @@ -124,6 +124,10 @@ func newConn(c OOBCapablePacketConn, supportsDF bool) (*oobConn, error) { bc = ipv4.NewPacketConn(c) } + // Try enabling GSO. + // This will only succeed on Linux, and only for kernels > 4.18. + supportsGSO := maybeSetGSO(rawConn) + msgs := make([]ipv4.Message, batchSize) for i := range msgs { // preallocate the [][]byte @@ -134,8 +138,9 @@ func newConn(c OOBCapablePacketConn, supportsDF bool) (*oobConn, error) { batchConn: bc, messages: msgs, readPos: batchSize, - supportsDF: supportsDF, } + oobConn.cap.DF = supportsDF + oobConn.cap.GSO = supportsGSO for i := 0; i < batchSize; i++ { oobConn.messages[i].OOB = make([]byte, oobBufferSize) } @@ -232,13 +237,26 @@ func (c *oobConn) ReadPacket() (*receivedPacket, error) { }, nil } +// WriteTo (re)implements the net.PacketConn method. +// This is needed for users who call OptimizeConn to be able to send (non-QUIC) packets on the underlying connection. +// With GSO enabled, this would otherwise not be needed, as the kernel requires the UDP_SEGMENT message to be set. +func (c *oobConn) WriteTo(p []byte, addr net.Addr) (int, error) { + return c.WritePacket(p, addr, nil) +} + +// WritePacket writes a new packet. +// If the connection supports GSO (and we activated GSO support before), +// it appends the UDP_SEGMENT size message to oob. func (c *oobConn) WritePacket(b []byte, addr net.Addr, oob []byte) (n int, err error) { + if c.cap.GSO { + oob = appendUDPSegmentSizeMsg(oob, len(b)) + } n, _, err = c.OOBCapablePacketConn.WriteMsgUDP(b, oob, addr.(*net.UDPAddr)) return n, err } func (c *oobConn) capabilities() connCapabilities { - return connCapabilities{DF: c.supportsDF} + return c.cap } func (info *packetInfo) OOB() []byte {