Skip to content

Commit

Permalink
ipvs: adjust outbound window size for synproxy
Browse files Browse the repository at this point in the history
Syn-proxy uses the same wscale option value in inbound direction, i.e., client->vs and vs->rs,
but does not do so in outbound direction. When the wscale option value of rs->vs and vs->client
are different, rs's window size shall not be advertised to client correctly. It causes serious
time delay in bulk data transmission if the wscales differ too much. So window adjustment is
applied in outbound direction for synproxy.

The table below shows the test results in our layer2 network environments. The backend is a
fileserver, and the file upload(.u) or download(.d) time are tested against three TCP windows
related configs `synproxy`, `close_client_window` and `wscale` .

| synproxy | close_client_window | wscale | 10MB file .u | 100MB file .u | 500MB file .u | 10MB file .d | 100MB file .d | 500MB file .d |
| -------- | ------------------- | ------ | ------------ | ------------- | ------------- | ------------ | ------------- | ------------- |
| off      | x                   | x      | 0.055s       | 0.414s        | 1.764s        | 0.023s       | 0.116s        | 0.464s        |
| on       | off                 | 0      | 0.052s       | 0.405s        | 1.820s        | 0.036s       | 0.260s        | 1.178s        |
| on       | off                 | 1      | 0.060s       | 0.384s        | 1.724s        | 0.022s       | 0.118s        | 0.465s        |
| on       | off                 | 3      | 0.051s       | 0.377s        | 1.913s        | 0.023s       | 0.116s        | 0.465s        |
| on       | off                 | 6      | 0.048s       | 0.420s        | 1.871s        | 0.025s       | 0.116s        | 0.467s        |
| on       | off                 | 9      | 0.052s       | 0.392s        | 1.823s        | 0.024s       | 0.116s        | 0.463s        |
| on       | on                  | 0      | 0.054s       | 0.387s        | 1.920s        | 0.040s       | 0.254s        | 1.216s        |
| on       | on                  | 1      | 0.052s       | 0.387s        | 1.742s        | 0.026s       | 0.116s        | 0.463s        |
| on       | on                  | 3      | 0.050s       | 0.377s        | 1.791s        | 0.023s       | 0.115s        | 0.463s        |
| on       | on                  | 6      | 0.051s       | 0.394s        | 1.678s        | 0.023s       | 0.115s        | 0.474s        |
| on       | on                  | 9      | 0.057s       | 0.373s        | 1.734s        | 0.024s       | 0.117s        | 0.462s        |

Compared the test results without this patch shown in the table below:

| synproxy | close_client_window | wscale | 10MB file .u | 100MB file .u | 500MB file .u | 10MB file .d | 100MB file .d | 500MB file .d |
| -------- | ------------------- | ------ | ------------ | ------------- | ------------- | ------------ | ------------- | ------------- |
| off      | x                   | x      | 0.049s       | 0.398s        | 1.767s        | 0.026s       | 0.118s        | 0.466s        |
| on       | off                 | 0      | 0.053s       | 0.403s        | 1.965s        | 0.039s       | 0.239s        | 1.216s        |
| on       | off                 | 1      | >10min       | x             | x             | 0.024s       | 0.116s        | 0.465s        |
| on       | on                  | 0      | 0.057s       | 0.474s        | 1.900s        | 0.039s       | 0.259s        | 1.227s        |
| on       | on                  | 1      | >10min       | x             | x             | 0.024s       | 0.116s        | 0.469s        |

It shows the slow uploading problem is solved by the patch, and other TCP window related configs
have minor effects on the transmission time.

Signed-off-by: ywc689 <[email protected]>
  • Loading branch information
ywc689 committed Jul 4, 2023
1 parent 24d681e commit 9bcd301
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 5 deletions.
4 changes: 3 additions & 1 deletion include/ipvs/conn.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,9 @@ struct dp_vs_conn {
/* synproxy related members */
struct dp_vs_seq syn_proxy_seq; /* seq used in synproxy */
struct list_head ack_mbuf; /* ack mbuf saved in step2 */
uint32_t ack_num; /* ack mbuf number stored */
uint16_t ack_num; /* ack mbuf number stored */
uint8_t wscale_vs; /* outbound wscale factor to client */
uint8_t wscale_rs; /* outbound wscale factor from rs */
struct rte_mbuf *syn_mbuf; /* saved rs syn packet for retransmition */
rte_atomic32_t syn_retry_max; /* syn retransmition max packets */

Expand Down
12 changes: 12 additions & 0 deletions src/ipvs/ip_vs_proto_tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,16 @@ static void tcp_out_save_seq(struct rte_mbuf *mbuf,
conn->rs_end_ack = th->ack_seq;
}

static void tcp_out_adjust_window(struct dp_vs_conn *conn, struct tcphdr *th)
{
uint32_t wnd_client;

wnd_client = ntohs(th->window) * (1 << conn->wscale_rs) / (1 << conn->wscale_vs);
if (unlikely(wnd_client > 0xffff))
wnd_client = 0xffff;
th->window = htons(wnd_client);
}

static void tcp_out_adjust_mss(int af, struct tcphdr *tcph)
{
unsigned char *ptr;
Expand Down Expand Up @@ -763,6 +773,8 @@ static int tcp_fnat_out_handler(struct dp_vs_proto *proto,
th->source = conn->vport;
th->dest = conn->cport;

tcp_out_adjust_window(conn, th);

if (th->syn && th->ack)
tcp_out_adjust_mss(af, th);

Expand Down
44 changes: 40 additions & 4 deletions src/ipvs/ip_vs_synproxy.c
Original file line number Diff line number Diff line change
Expand Up @@ -414,12 +414,11 @@ syn_proxy_v4_cookie_check(struct rte_mbuf *mbuf, uint32_t cookie,
th->source, th->dest, seq, rte_atomic32_read(&g_minute_count),
DP_VS_SYNPROXY_COUNTER_TRIES);

memset(opt, 0, sizeof(struct dp_vs_synproxy_opt));
if ((uint32_t) -1 == res) /* count is invalid, g_minute_count' >> g_minute_count */
return 0;

mssind = (res & DP_VS_SYNPROXY_MSS_MASK) >> DP_VS_SYNPROXY_MSS_BITS;

memset(opt, 0, sizeof(struct dp_vs_synproxy_opt));
if ((mssind < NUM_MSS) && ((res & DP_VS_SYNPROXY_OTHER_MASK) == 0)) {
opt->mss_clamp = msstab[mssind] + 1;
opt->sack_ok = (res & DP_VS_SYNPROXY_SACKOK_MASK) >> DP_VS_SYNPROXY_SACKOK_BIT;
Expand Down Expand Up @@ -451,12 +450,11 @@ syn_proxy_v6_cookie_check(struct rte_mbuf *mbuf, uint32_t cookie,
th->source, th->dest, seq, rte_atomic32_read(&g_minute_count),
DP_VS_SYNPROXY_COUNTER_TRIES);

memset(opt, 0, sizeof(struct dp_vs_synproxy_opt));
if ((uint32_t) -1 == res) /* count is invalid, g_minute_count' >> g_minute_count */
return 0;

mssind = (res & DP_VS_SYNPROXY_MSS_MASK) >> DP_VS_SYNPROXY_MSS_BITS;

memset(opt, 0, sizeof(struct dp_vs_synproxy_opt));
if ((mssind < NUM_MSS) && ((res & DP_VS_SYNPROXY_OTHER_MASK) == 0)) {
opt->mss_clamp = msstab[mssind] + 1;
opt->sack_ok = (res & DP_VS_SYNPROXY_SACKOK_MASK) >> DP_VS_SYNPROXY_SACKOK_BIT;
Expand All @@ -479,6 +477,40 @@ syn_proxy_v6_cookie_check(struct rte_mbuf *mbuf, uint32_t cookie,
* Synproxy implementation
*/

static unsigned char syn_proxy_parse_wscale_opt(struct rte_mbuf *mbuf, struct tcphdr *th)
{
int length;
unsigned char opcode, opsize;
unsigned char *ptr;

length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (unsigned char *)(th + 1);
while (length > 0) {
opcode = *ptr++;
switch (opcode) {
case TCPOPT_EOL:
return 0;
case TCPOPT_NOP:
length--;
continue;
default:
opsize = *ptr++;
if (opsize < 2) /* silly options */
return 0;
if (opsize > length) /* partial options */
return 0;
if (opcode == TCPOPT_WINDOW) {
if (*ptr > DP_VS_SYNPROXY_WSCALE_MAX) /* invalid wscale opt */
return 0;
return *ptr;
}
ptr += opsize -2;
length -= opsize;
}
}
return 0; /* should never reach here */
}

/* Replace tcp options in tcp header, called by syn_proxy_reuse_mbuf() */
static void syn_proxy_parse_set_opts(struct rte_mbuf *mbuf, struct tcphdr *th,
struct dp_vs_synproxy_opt *opt)
Expand Down Expand Up @@ -1191,6 +1223,9 @@ int dp_vs_synproxy_ack_rcv(int af, struct rte_mbuf *mbuf,
return 0;
}

if (opt.wscale_ok)
(*cpp)->wscale_vs = dp_vs_synproxy_ctrl_wscale;

/* Do nothing but print a error msg when fail, because session will be
* correctly freed in dp_vs_conn_expire */
if (EDPVS_OK != (res = syn_proxy_send_rs_syn(af, th, *cpp, mbuf, pp, &opt))) {
Expand Down Expand Up @@ -1409,6 +1444,7 @@ int dp_vs_synproxy_synack_rcv(struct rte_mbuf *mbuf, struct dp_vs_conn *cp,
if ((th->syn) && (th->ack) && (!th->rst) &&
(cp->flags & DPVS_CONN_F_SYNPROXY) &&
(cp->state == DPVS_TCP_S_SYN_SENT)) {
cp->wscale_rs = syn_proxy_parse_wscale_opt(mbuf, th);
cp->syn_proxy_seq.delta = ntohl(cp->syn_proxy_seq.isn) - ntohl(th->seq);
cp->state = DPVS_TCP_S_ESTABLISHED;
dp_vs_conn_set_timeout(cp, pp);
Expand Down

0 comments on commit 9bcd301

Please sign in to comment.