*** ../../vanilla3.0/tcp_output.c Tue Apr 28 10:28:01 1998 --- tcp_output.c Thu Sep 24 10:35:10 1998 *************** *** 12,18 **** */ /* ! * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without --- 12,18 ---- */ /* ! * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995, 1998 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without *************** *** 75,82 **** extern struct mbuf *m_copypack(); #endif ! #define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ /* * Tcp output routine: figure out what should be sent and send it. --- 75,162 ---- extern struct mbuf *m_copypack(); #endif + #ifdef SACK + extern int tcprexmtthresh; + #endif + + #ifdef SACK + #define MAX_TCPOPTLEN 40 /* need 40 at least for 3 SACKs + TIMESTAMP */ + #else + #define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ + #endif + + #ifdef SACK + #ifdef SACK_DEBUG + void + tcp_print_holes(tp) + struct tcpcb *tp; + { + struct sackhole *p = tp->snd_holes; + if (p == 0) + return; + printf("Hole report: start--end dups rxmit\n"); + while (p) { + printf("%x--%x d %d r %x\n", p->start, p->end, p->dups, + p->rxmit); + p = p->next; + } + printf("\n"); + } + #endif /* SACK_DEBUG */ + + /* + * Returns pointer to a sackhole if there are any pending retransmissions; + * NULL otherwise. + */ + struct sackhole * + tcp_sack_output(tp) + register struct tcpcb *tp; + { + struct sackhole *p; + if (tp->sack_disable) + return 0; + p = tp->snd_holes; + while (p) { + if (p->dups >= tcprexmtthresh && SEQ_LT(p->rxmit, p->end)) { + if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */ + p = p->next; + continue; + } + #ifdef SACK_DEBUG + if (p) + tcp_print_holes(tp); + #endif + return p; + } + p = p->next; + } + return 0; + } + + /* + * After a timeout, the SACK list may be rebuilt. This SACK information + * should be used to avoid retransmitting SACKed data. This function + * traverses the SACK list to see if snd_nxt should be moved forward. + */ + void + tcp_sack_adjust(tp) + struct tcpcb *tp; + { + int i; ! for (i = 0; i < tp->rcv_numsacks; i++) { ! if (SEQ_LT(tp->snd_nxt, tp->sackblks[i].start)) ! break; ! if (SEQ_LEQ(tp->sackblks[i].end, tp->snd_nxt)) ! continue; ! if (tp->sackblks[i].start == 0 && tp->sackblks[i].end == 0) ! continue; ! /* snd_nxt must be in middle of block of SACKed data */ ! tp->snd_nxt = tp->sackblks[i].end; ! break; ! } ! } ! #endif /* SACK */ /* * Tcp output routine: figure out what should be sent and send it. *************** *** 94,99 **** --- 174,186 ---- unsigned optlen, hdrlen; int idle, sendalot; struct rtentry *rt; + #ifdef SACK + int i, sack_rxmit = 0; + struct sackhole *p; + #endif + #if defined(SACK) || defined(NEWRENO) + int maxburst = TCP_MAXBURST; + #endif /* * Determine length of data that should be transmitted, *************** *** 119,124 **** --- 206,220 ---- tp->t_flags &= ~TF_WASIDLE; again: sendalot = 0; + #ifdef SACK + /* + * If we've recently taken a timeout, snd_max will be greater than + * snd_nxt. There may be SACK information that allows us to avoid + * resending already delivered data. Adjust snd_nxt accordingly. + */ + if (!tp->sack_disable && SEQ_LT(tp->snd_nxt, tp->snd_max)) + tcp_sack_adjust(tp); + #endif off = tp->snd_nxt - tp->snd_una; win = min(tp->snd_wnd, tp->snd_cwnd); *************** *** 129,134 **** --- 225,256 ---- * and timer expired, we will send what we can * and go to transmit state. */ + + #ifdef SACK + /* + * Send any SACK-generated retransmissions. If we're explicitly trying + * to send out new data (when sendalot is 1), bypass this function. + * If we retransmit in fast recovery mode, decrement snd_cwnd, since + * we're replacing a (future) new transmission with a retransmission + * now, and we previously incremented snd_cwnd in tcp_input(). + */ + if (!tp->sack_disable && !sendalot) { + if (p = tcp_sack_output(tp)) { + off = p->rxmit - tp->snd_una; + sack_rxmit = 1; + #if 0 + /* Coalesce holes into a single retransmission */ + #endif + len = min(tp->t_maxseg, p->end - p->rxmit); + #ifndef FACK + /* in FACK, hold snd_cwnd constant during recovery */ + if (SEQ_LT(tp->snd_una, tp->snd_recover)) + tp->snd_cwnd -= tp->t_maxseg; + #endif + } + } + #endif /* SACK */ + if (tp->t_force) { if (win == 0) { /* *************** *** 156,161 **** --- 278,286 ---- } } + #ifdef SACK + if (!sack_rxmit) { + #endif if (win < so->so_snd.sb_cc) { len = win - off; if (idle) { *************** *** 164,169 **** --- 289,307 ---- } } else len = so->so_snd.sb_cc - off; + #if defined(SACK) && defined(FACK) + /* + * If we're in fast recovery (SEQ_GT(tp->snd_recover, tp->snd_una)), and + * amount of outstanding data (snd_awnd) is >= snd_cwnd, then + * do not send data (like zero window conditions) + */ + if (!tp->sack_disable && len && SEQ_GT(tp->snd_recover, tp->snd_una) && + (tp->snd_awnd >= tp->snd_cwnd)) + len = 0; + #endif /* FACK */ + #ifdef SACK + } + #endif if (len < 0) { /* *************** *** 215,220 **** --- 353,362 ---- goto send; if (SEQ_LT(tp->snd_nxt, tp->snd_max)) goto send; + #ifdef SACK + if (sack_rxmit) + goto send; + #endif } /* *************** *** 314,320 **** mss = htons((u_short) tcp_send_mss(tp->t_inpcb)); bcopy((caddr_t)&mss, (caddr_t)(opt + 2), sizeof(mss)); optlen = 4; ! if (tp->t_flags & TF_USE_SCALE) { *((u_long *) (opt + optlen)) = htonl( TCPOPT_NOP << 24 | --- 456,476 ---- mss = htons((u_short) tcp_send_mss(tp->t_inpcb)); bcopy((caddr_t)&mss, (caddr_t)(opt + 2), sizeof(mss)); optlen = 4; ! #ifdef SACK ! /* ! * If this is the first SYN of connection (not a SYN ! * ACK), include SACK_PERMIT_HDR option. If this is a ! * SYN ACK, include SACK_PERMIT_HDR option if peer has ! * already done so. ! */ ! if (!tp->sack_disable && ((flags & TH_ACK) == 0 || ! (tp->t_flags & TF_SACK_PERMIT))) { ! *((u_long *) (opt + optlen)) = ! htonl(TCPOPT_SACK_PERMIT_HDR); ! optlen += 4; ! } ! #endif ! if (tp->t_flags & TF_USE_SCALE) { *((u_long *) (opt + optlen)) = htonl( TCPOPT_NOP << 24 | *************** *** 341,346 **** --- 497,530 ---- optlen += TCPOLEN_TSTAMP_APPA; } + #ifdef SACK + /* + * Send SACKs if necessary. This should be the last option processed. + * Only as many SACKs are sent as are permitted by the maximum options + * size. No more than three SACKs are sent. + */ + if (!tp->sack_disable && tp->t_state == TCPS_ESTABLISHED && + (tp->t_flags & (TF_SACK_PERMIT|TF_NOOPT)) == TF_SACK_PERMIT && + tp->rcv_numsacks) { + u_long *lp = (u_long *) (opt + optlen); + u_long *olp = lp++; + int count = 0; /* actual number of SACKs inserted */ + int maxsack = (MAX_TCPOPTLEN - (optlen + 4))/TCPOLEN_SACK; + + maxsack = min(maxsack, TCP_MAX_SACK); + for (i=0; (i < tp->rcv_numsacks && count < maxsack); i++) { + struct sackblk sack = tp->sackblks[i]; + if (sack.start == 0 && sack.end == 0) + continue; + *lp++ = htonl(sack.start); + *lp++ = htonl(sack.end); + count++; + } + *olp = htonl(TCPOPT_SACK_HDR|(TCPOLEN_SACK*count+2)); + optlen += TCPOLEN_SACK*count + 4; /* including leading NOPs */ + } + #endif /* SACK */ + hdrlen += optlen; #ifdef already_accounted_for *************** *** 465,470 **** --- 649,671 ---- ti->ti_seq = htonl(tp->snd_nxt); else ti->ti_seq = htonl(tp->snd_max); + #ifdef SACK + if (sack_rxmit) { + /* + * If sendalot was turned on (due to option stuffing), turn it + * off. Properly set th_seq field. Advance the ret'x pointer + * by len. + */ + if (sendalot) + sendalot = 0; + ti->ti_seq = htonl(p->rxmit); + p->rxmit += len; + #if defined(SACK) && defined(FACK) + tp->retran_data += len; + #endif /* FACK */ + } + #endif /* SACK */ + ti->ti_ack = htonl(tp->rcv_nxt); if (optlen) { bcopy((caddr_t)opt, (caddr_t)(ti + 1), optlen); *************** *** 537,542 **** --- 738,750 ---- tp->t_flags |= TF_SENTFIN; } } + #ifdef SACK + if (!tp->sack_disable) { + if (sack_rxmit && (p->rxmit != tp->snd_nxt)) { + goto timer; + } + } + #endif tp->snd_nxt += len; if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { tp->snd_max = tp->snd_nxt; *************** *** 559,564 **** --- 767,785 ---- * Initialize shift counter which is used for backoff * of retransmit time. */ + #ifdef SACK + timer: + if (!tp->sack_disable && sack_rxmit && + tp->t_timer[TCPT_REXMT] == 0 && + tp->snd_nxt != tp->snd_max) { + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + if (tp->t_timer[TCPT_PERSIST]) { + tp->t_timer[TCPT_PERSIST] = 0; + tp->t_rxtshift = 0; + } + } + #endif + if (tp->t_timer[TCPT_REXMT] == 0 && tp->snd_nxt != tp->snd_una) { tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; *************** *** 568,575 **** } } } else ! if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) tp->snd_max = tp->snd_nxt + len; /* * Trace. --- 789,797 ---- } } } else ! if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) { tp->snd_max = tp->snd_nxt + len; + } /* * Trace. *************** *** 606,611 **** --- 828,838 ---- error = ip_output(m, (struct mbuf *)0, &tp->t_inpcb->inp_route, so->so_options & SO_DONTROUTE); #endif + #if defined(SACK) && defined(FACK) + /* Update snd_awnd to reflect the new data that was sent. */ + tp->snd_awnd = tcp_seq_subtract(tp->snd_max, tp->snd_fack) + + tp->retran_data; + #endif } if (error) { out: *************** *** 667,673 **** --- 894,906 ---- tp->last_ack_sent = tp->rcv_nxt; tp->t_flags &= ~TF_ACKNOW; tcp_delack_done(tp); + #if defined(SACK) || defined(NEWRENO) + if (maxburst < 0) + printf("Maxburst error %d\n", maxburst); + if (sendalot && --maxburst) + #else if (sendalot) + #endif goto again; return (0); }