加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0038-add-tso.patch 11.69 KB
一键复制 编辑 原始数据 按行查看 历史
kircher 提交于 2023-02-11 20:11 . fix TSO snd_nxt incorrectly update
From af8ac36acb103aa27b498dafa0ae8ba4332faac8 Mon Sep 17 00:00:00 2001
From: wu-changsheng <wuchangsheng2@huawei.com>
Date: Sat, 3 Dec 2022 21:38:09 +0800
Subject: [PATCH] add-tso
---
src/core/ipv4/etharp.c | 17 +++-
src/core/ipv4/ip4.c | 10 ++-
src/core/tcp.c | 6 ++
src/core/tcp_out.c | 178 +++++++++++++++++++++++++++++++++++++--
src/include/dpdk_cksum.h | 2 +-
src/include/lwip/pbuf.h | 8 +-
src/include/lwipopts.h | 4 +
7 files changed, 211 insertions(+), 14 deletions(-)
diff --git a/src/core/ipv4/etharp.c b/src/core/ipv4/etharp.c
index effb7db..f1903e4 100644
--- a/src/core/ipv4/etharp.c
+++ b/src/core/ipv4/etharp.c
@@ -482,6 +482,13 @@ etharp_update_arp_entry(struct netif *netif, const ip4_addr_t *ipaddr, struct et
struct pbuf *p = arp_table[i].q;
arp_table[i].q = NULL;
#endif /* ARP_QUEUEING */
+#if USE_LIBOS
+ struct pbuf *tmp = p->next;
+ while (tmp != NULL) {
+ tmp->ref--;
+ tmp = tmp->next;
+ }
+#endif
/* send the queued IP packet */
ethernet_output(netif, p, (struct eth_addr *)(netif->hwaddr), ethaddr, ETHTYPE_IP);
/* free the queued IP packet */
@@ -1027,7 +1034,15 @@ etharp_query(struct netif *netif, const ip4_addr_t *ipaddr, struct pbuf *q)
} else {
/* referencing the old pbuf is enough */
p = q;
- pbuf_ref(p);
+#if USE_LIBOS
+ struct pbuf *tmp = p;
+ while (tmp != NULL) {
+ pbuf_ref(tmp);
+ tmp = tmp->next;
+ }
+#else
+ pbuf_ref(p);
+#endif
}
/* packet could be taken over? */
if (p != NULL) {
diff --git a/src/core/ipv4/ip4.c b/src/core/ipv4/ip4.c
index 1334cdc..d823491 100644
--- a/src/core/ipv4/ip4.c
+++ b/src/core/ipv4/ip4.c
@@ -1034,9 +1034,15 @@ ip4_output_if_opt_src(struct pbuf *p, const ip4_addr_t *src, const ip4_addr_t *d
#endif /* ENABLE_LOOPBACK */
#if IP_FRAG
/* don't fragment if interface has mtu set to 0 [loopif] */
- if (netif->mtu && (p->tot_len > netif->mtu)) {
- return ip4_frag(p, netif, dest);
+#if USE_LIBOS
+ if (!(get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO)) {
+#endif
+ if (netif->mtu && (p->tot_len > netif->mtu)) {
+ return ip4_frag(p, netif, dest);
+ }
+#if USE_LIBOS
}
+#endif
#endif /* IP_FRAG */
LWIP_DEBUGF(IP_DEBUG, ("ip4_output_if: call netif->output()\n"));
diff --git a/src/core/tcp.c b/src/core/tcp.c
index 7c18408..51ada38 100644
--- a/src/core/tcp.c
+++ b/src/core/tcp.c
@@ -1756,7 +1756,9 @@ tcp_seg_free(struct tcp_seg *seg)
seg->p = NULL;
#endif /* TCP_DEBUG */
}
+#if !USE_LIBOS
memp_free(MEMP_TCP_SEG, seg);
+#endif
}
}
@@ -1792,10 +1794,14 @@ tcp_seg_copy(struct tcp_seg *seg)
LWIP_ASSERT("tcp_seg_copy: invalid seg", seg != NULL);
+#if USE_LIBOS
+ cseg = (struct tcp_seg *)((uint8_t *)seg->p + sizeof(struct pbuf_custom));
+#else
cseg = (struct tcp_seg *)memp_malloc(MEMP_TCP_SEG);
if (cseg == NULL) {
return NULL;
}
+#endif
SMEMCPY((u8_t *)cseg, (const u8_t *)seg, sizeof(struct tcp_seg));
pbuf_ref(cseg->p);
return cseg;
diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c
index 2834ba3..ee6f40b 100644
--- a/src/core/tcp_out.c
+++ b/src/core/tcp_out.c
@@ -161,6 +161,40 @@ tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst)
* The TCP header is filled in except ackno and wnd.
* p is freed on failure.
*/
+#if USE_LIBOS
+void tcp_init_segment(struct tcp_seg *seg, const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags,
+ u32_t seqno, u8_t optflags)
+{
+ u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb);
+
+ seg->flags = optflags;
+ seg->next = NULL;
+ seg->p = p;
+ seg->len = p->tot_len - optlen;
+
+ /* build TCP header */
+ pbuf_add_header(p, TCP_HLEN);
+ seg->tcphdr = (struct tcp_hdr *)seg->p->payload;
+ seg->tcphdr->src = lwip_htons(pcb->local_port);
+ seg->tcphdr->dest = lwip_htons(pcb->remote_port);
+ seg->tcphdr->seqno = lwip_htonl(seqno);
+
+ TCPH_HDRLEN_FLAGS_SET(seg->tcphdr, (TCP_HLEN + optlen) / 4, hdrflags);
+ seg->tcphdr->urgp = 0;
+}
+
+static struct tcp_seg *
+tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags)
+{
+ struct tcp_seg *seg;
+
+ seg = (struct tcp_seg *)((uint8_t *)p + sizeof(struct pbuf_custom));
+
+ tcp_init_segment(seg, pcb, p, hdrflags, seqno, optflags);
+
+ return seg;
+}
+#else
static struct tcp_seg *
tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags)
{
@@ -210,6 +244,7 @@ tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32
seg->tcphdr->urgp = 0;
return seg;
}
+#endif
/**
* Allocate a PBUF_RAM pbuf, perhaps with extra space at the end.
@@ -1272,6 +1307,60 @@ tcp_build_wnd_scale_option(u32_t *opts)
}
#endif
+#if USE_LIBOS
+static struct tcp_seg *tcp_output_over(struct tcp_pcb *pcb, struct tcp_seg *seg, struct tcp_seg *useg)
+{
+ if (TCP_TCPLEN(seg) > 0) {
+ seg->next = NULL;
+ if (useg == NULL) {
+ pcb->unacked = seg;
+ useg = seg;
+ } else {
+ if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) {
+ /* add segment to before tail of unacked list, keeping the list sorted */
+ struct tcp_seg **cur_seg = &(pcb->unacked);
+ while (*cur_seg &&
+ TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) {
+ cur_seg = &((*cur_seg)->next );
+ }
+ seg->next = (*cur_seg);
+ (*cur_seg) = seg;
+ } else {
+ /* add segment to tail of unacked list */
+ useg->next = seg;
+ useg = seg;
+ }
+ }
+ } else {
+ tcp_seg_free(seg);
+ }
+
+ return useg;
+}
+static err_t tcp_output_seg(struct tcp_pcb *pcb, struct tcp_seg *seg, struct netif *netif, u32_t snd_nxt)
+{
+ if (pcb->state != SYN_SENT) {
+ TCPH_SET_FLAG(seg->tcphdr, TCP_ACK);
+ }
+
+ err_t err = tcp_output_segment(seg, pcb, netif);
+ if (err != ERR_OK) {
+ /* segment could not be sent, for whatever reason */
+ tcp_set_flags(pcb, TF_NAGLEMEMERR);
+ return err;
+ }
+
+ if (pcb->state != SYN_SENT) {
+ tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
+ }
+
+ if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) {
+ pcb->snd_nxt = snd_nxt;
+ }
+
+ return ERR_OK;
+}
+#endif
/**
* @ingroup tcp_raw
* Find out what we can send and send it
@@ -1376,16 +1465,88 @@ tcp_output(struct tcp_pcb *pcb)
for (; useg->next != NULL; useg = useg->next);
}
/* data available and window allows it to be sent? */
+
#if USE_LIBOS
- /* avoid send cose too much time, limit send pkts num max 10 */
- uint16_t send_pkt = 0;
- while (seg != NULL && send_pkt < 10 &&
- lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) {
- send_pkt++;
-#else
+ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) {
+ while(seg) {
+ /**
+ * 1)遍历unsent队列,找到所有的待发送seg. 将seg的buf串起来
+ * 2) 生成新的seg, 调用tcp_output_segment, 新的seg释放掉
+ * 3) 若成功,则更新snd_nxt, unacked队列,和unsent队列。
+ */
+ struct tcp_seg *start_seg = seg;
+ struct pbuf *first_pbuf = NULL;
+ struct pbuf *pre_pbuf = NULL;
+ u8_t pbuf_chain_len = 0;
+ u32_t next_seqno = lwip_ntohl(seg->tcphdr->seqno);
+ while (seg != NULL && pbuf_chain_len < MAX_PBUF_CHAIN_LEN) {
+ u32_t seg_seqno = lwip_ntohl(seg->tcphdr->seqno);
+ if (seg_seqno - pcb->lastack + seg->len > wnd) {
+ if (first_pbuf)
+ break;
+ else
+ goto output_done;
+ }
+
+ if ((tcp_do_output_nagle(pcb) == 0) && ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) {
+ if (first_pbuf)
+ break;
+ else
+ goto output_done;
+ }
+
+ if (seg->len < TCP_MSS || next_seqno != seg_seqno || pbuf_chain_len >= MAX_PBUF_CHAIN_LEN) {
+ break;
+ }
+ if (first_pbuf == NULL && (seg->next == NULL || seg->next->len < TCP_MSS)) {
+ break;
+ }
+
+ pbuf_remove_header(seg->p, seg->p->tot_len - seg->len);
+ if (first_pbuf == NULL) {
+ first_pbuf = seg->p;
+ } else {
+ first_pbuf->tot_len += seg->p->len;
+ pre_pbuf->next = seg->p;
+ }
+
+ pre_pbuf = seg->p;
+ next_seqno = seg_seqno + TCP_TCPLEN(seg);
+ seg = seg->next;
+ pcb->unsent = seg;
+ pbuf_chain_len++;
+ }
+
+ if (first_pbuf == NULL) {
+ err = tcp_output_seg(pcb, seg, netif, next_seqno + seg->len);
+ if (err != ERR_OK)
+ return err;
+ pcb->unsent = seg->next;
+ useg = tcp_output_over(pcb, seg, useg);
+ seg = pcb->unsent;
+ continue;
+ }
+
+ struct tcp_seg new_seg;
+ tcp_init_segment(&new_seg, pcb, first_pbuf, 0, lwip_ntohl(start_seg->tcphdr->seqno), 0);
+
+ err = tcp_output_seg(pcb, &new_seg, netif, next_seqno);
+
+ for (u32_t i = 0; i < pbuf_chain_len; i++) {
+ struct tcp_seg *next_seg = start_seg->next;
+ start_seg->p->next = NULL;
+ useg = tcp_output_over(pcb, start_seg, useg);
+ start_seg = next_seg;
+ }
+
+ pbuf_remove_header(new_seg.p, new_seg.p->tot_len - new_seg.len - TCPH_HDRLEN_BYTES(new_seg.tcphdr));
+ new_seg.p->tot_len = new_seg.p->len;
+ }
+ } else
+#endif
+{
while (seg != NULL &&
lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) {
-#endif
LWIP_ASSERT("RST not expected here!",
(TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0);
/* Stop sending if the nagle algorithm would prevent it
@@ -1462,6 +1623,7 @@ tcp_output(struct tcp_pcb *pcb)
}
seg = pcb->unsent;
}
+}
#if TCP_OVERSIZE
if (pcb->unsent == NULL) {
/* last unsent has been removed, reset unsent_oversize */
@@ -1627,7 +1789,7 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif
IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) {
#if CHECKSUM_GEN_TCP_HW
if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) {
- tcph_cksum_set(seg->p, TCP_HLEN);
+ tcph_cksum_set(seg->p, TCPH_HDRLEN_BYTES(seg->tcphdr));
seg->tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP,seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip);
} else {
#if TCP_CHECKSUM_ON_COPY
diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h
index e57be4d..83c9c38 100644
--- a/src/include/dpdk_cksum.h
+++ b/src/include/dpdk_cksum.h
@@ -78,7 +78,7 @@ static inline void iph_cksum_set(struct pbuf *p, u16_t len, bool do_ipcksum) {
#include <rte_ip.h>
static inline void tcph_cksum_set(struct pbuf *p, u16_t len) {
- (void)len;
+ p->l4_len = len;
p->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
}
diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h
index 87cd960..ef879da 100644
--- a/src/include/lwip/pbuf.h
+++ b/src/include/lwip/pbuf.h
@@ -223,10 +223,14 @@ struct pbuf {
#if USE_LIBOS && CHECKSUM_OFFLOAD_ALL
/** checksum offload ol_flags */
u64_t ol_flags;
- /** checksum offload l2_len */
+ /* < L2 (MAC) Header Length for non-tunneling pkt. */
u64_t l2_len:7;
- /** checksum offload l3_len */
+ /* < L3 (IP) Header Length. */
u64_t l3_len:9;
+ /* < L4 (TCP/UDP) Header Length. */
+ u64_t l4_len:8;
+ u16_t header_off;
+ u8_t rexmit;
#endif /* USE_LIBOS CHECKSUM_OFFLOAD_SWITCH */
/** In case the user needs to store data custom data on a pbuf */
diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h
index a5add21..7c819d0 100644
--- a/src/include/lwipopts.h
+++ b/src/include/lwipopts.h
@@ -173,6 +173,10 @@
#define ARP_QUEUE_LEN 32
+#define MAX_PBUF_CHAIN_LEN 40
+
+#define MIN_TSO_SEG_LEN 256
+
/* ---------------------------------------
* ------- NIC offloads --------
* ---------------------------------------
--
2.23.0
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化