/* * Copyright (c) 1995, 1998 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Daedalus Research * Group of the University of California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Berkeley snoop protocol to improve TCP/IP performance over networks * with wireless links. Deploy this at the base station. * Improves performance in both transfer directions. * The options LINKEMU, E2EELN, NOSNOOPREXMT, and SNOOP_SMART are for * experimental purposes only (at this point) and shouldn't be defined in * normal operation. If the kernel supports priority queueing for IPTOS * LOWDELAY, snoop should perform much better than if it doesn't. * Contact hari@cs.berkeley.edu for more information. Descriptions of the * Snoop protocol and papers can be found at http://www.cs.berkeley.edu/~hari */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef MOBILITY #include #endif #include snoop_state_t *snoopstate; /* the controlling snoop data structure */ #ifdef LINKEMU int snoop_linkemu_enable = 0; /*enable emulation of link-level retx scheme */ #endif #ifdef ELN int tcp_eln_enable; /* enable explicit (wireless) loss notification */ #endif #ifdef NOSNOOPREXMT int snoop_rexmt_disable = 0; /* disable local retransmission */ #endif #ifdef SMART_SNOOP int snoop_smart_enable = 0; /* SMART selective ack processing in snoop */ #endif #define SNOOP_IPTOS /* use low delay flag for local rexmissions */ #define SNOOP_RTX_THRESH 1 u_long snoop_base = 0xd0015b00; u_long snoop_mask = 0xffffff00; #ifdef MEMDEBUG #define dump(cs, p, seq) { printf("%x seq %d lastseen %d lastack %d h %d t %d\n", (p)->mb, (seq) - (cs)->iss, (cs)->last_seen - (cs)->iss, (cs)->last_ack - (cs)->iss, (cs)->bufhead, (cs)->buftail); } #endif /* * Snoop control function. This is the only point of entry into the * protocol, other than retransmissions driven by timers. * * Note: If you're using snoop without the low-latency handoff and mobility * code, fwdflag should always be SNOOP_FWD. * fromflag is FROM_WIRED or FROM_WLESS depending on the source of * the packet (or ack). */ void snoop_ctrl(struct mbuf *m, short fwdflag, short fromflag, int srcrt) { struct tcpiphdr *ti; struct ip *ip; int conn_id; u_short size; conn_state_t *cs; ip = mtod(m, struct ip *); /* Return if not TCP or if snoop is disabled. */ if (ip->ip_p != IPPROTO_TCP || snoopstate->disable) { if (fwdflag == SNOOP_FWD || fromflag == FROM_WLESS) ip_forward(m, 0); return; } /* Get TCP/IP header; it could straddle multiple mbufs. */ get_ti_hdr(m); ti = mtod(m, struct tcpiphdr *); size = ip->ip_len - (ip->ip_hl << 2) - (ti->ti_off << 2); conn_id = snoop_getconn(ti, fromflag); /* * SYN ==> initialize that side of connection. Other side could * already have been initialized, so check for this before cleaning. */ if (ti->ti_flags & TH_SYN) { if (conn_id != -1) { /* Either half-initialized or old connection. */ cs = snoopstate->cstate[conn_id]; if ((fromflag == FROM_WIRED && cs->wi_state & SNOOP_WIRED_ALIVE) || (fromflag == FROM_WLESS && cs->wl_state & SNOOP_WLESS_ALIVE)) snoop_done(conn_id, fromflag); } conn_id = snoop_conninit(m, size, fromflag); fwd_or_free(m, fwdflag, fromflag, 0); return; } /* * If conn_id is -1, then there is no snoop state for this * connection. This can happen either at the start of a new * connection (no SYN), or if the base station has just moved from * inactive to buffering or forwarding state for the first * time during the lifetime of this connection. */ if (conn_id == -1) { /* surely not a SYN */ /* Initialize only if size > 0, to avoid FIN ACK packet */ if (size) { conn_id = snoop_conninit(m, size,fromflag); } if (conn_id == -1) { fwd_or_free(m, fwdflag, fromflag, 0); return; } } /* Clean things up if we get a FIN or RST */ if (ti->ti_flags & (TH_FIN|TH_RST)) { snoop_done(conn_id, fromflag); /* adios for that side */ fwd_or_free(m, fwdflag, fromflag, srcrt); return; } /* * This is a check that needs to be done to handle the full-duplex * connection case, because the connection could be half-open now. */ cs = snoopstate->cstate[conn_id]; if (size && ((fromflag==FROM_WIRED && cs->wi_state==SNOOP_CLOSED) || (fromflag==FROM_WLESS && cs->wl_state==SNOOP_CLOSED))) { snoop_conninit(m, size,fromflag); } if (size) { if (fromflag == FROM_WIRED) cs->wi_state |= SNOOP_WIRED_ALIVE; else cs->wl_state |= SNOOP_WLESS_ALIVE; } if (fwdflag == SNOOP_BUF && fromflag == FROM_WIRED) { snoop_insert(cs, m, ntohl(ti->ti_t.th_seq), size); /* XXX do I need to free m here? */ return; } if (fromflag == FROM_WIRED) /* sending on to wireless link */ snoop_wired(m, conn_id, size, srcrt); else /* recd from wireless link */ snoop_wless(m, conn_id, size, srcrt); return; } /* * Initialize state for the snoop protocol (at boot time). */ void snoop_init() { int i, j; conn_state_t *cs; printf("Snoop initialization... "); snoopstate = (snoop_state_t *) snoop_malloc(sizeof (snoop_state_t)); snoopstate->num_connections = 0; snoopstate->disable = 0; for (i = 0; i < SNOOP_MAXCONN; i++) { cs = snoopstate->cstate[i] = (conn_state_t *) snoop_malloc(sizeof(conn_state_t)); cs->addr = cs->wladdr = 0; cs->port = cs->wlport = 0; cs->expected_dacks = 0; cs->bufhead = cs->buftail = 0; cs->wi_state = cs->wl_state = SNOOP_CLOSED; cs->alloc = 0; #ifdef SMART_SNOOP if (snoop_smart_enable) cs->smart_start = cs->smart_end = 0; #endif for (j = 0; j < SNOOP_MAXWIND; j++) { cs->pkts[j] = (packet_t *) snoop_malloc(sizeof (packet_t)); cs->pkts[j]->mb = 0; cs->pkts[j]->size = 0; cs->pkts[j]->snd_time.tv_sec = 0; cs->pkts[j]->snd_time.tv_usec = 0; cs->pkts[j]->num_rxmit = 0; cs->pkts[j]->sender_rxmit = 0; } cs->wl_bufhead = cs->wl_buftail = 0; for (j = 0; j < SNOOP_MAXBLKS; j++) { cs->wlseqs[j] = (seq_t *) snoop_malloc(sizeof(seq_t)); cs->wlseqs[j]->seq = 0; cs->wlseqs[j]->size = 0; } } printf("done\n"); } /* * Get the snoop connection id of the TCP connection to the mobile host. */ int snoop_getconn(struct tcpiphdr *tcpip_hdr, short fromflag) { int i, j, conn_id; u_long addr, wladdr; u_short port, wlport; if (fromflag == FROM_WIRED) { port = ntohs(tcpip_hdr->ti_t.th_sport); wlport = ntohs(tcpip_hdr->ti_t.th_dport); addr = ntohl(tcpip_hdr->ti_i.ih_src.s_addr); wladdr = ntohl(tcpip_hdr->ti_i.ih_dst.s_addr); } else { port = ntohs(tcpip_hdr->ti_t.th_dport); wlport = ntohs(tcpip_hdr->ti_t.th_sport); addr = ntohl(tcpip_hdr->ti_i.ih_dst.s_addr); wladdr = ntohl(tcpip_hdr->ti_i.ih_src.s_addr); } conn_id = (port ^ wlport) % SNOOP_MAXCONN; for (i=conn_id, j=0; jcstate[i]; if (cs->port == port && cs->wlport == wlport && cs->addr == addr && cs->wladdr == wladdr) return i; } return -1; } int snoop_addconn(struct tcpiphdr *tcpip_hdr, short fromflag) { int i, j, conn_id, accept = -1, halfopen = 0; u_long addr, wladdr; u_short port, wlport; conn_state_t *cs; if (fromflag == FROM_WIRED) { port = ntohs(tcpip_hdr->ti_t.th_sport); wlport = ntohs(tcpip_hdr->ti_t.th_dport); addr = ntohl(tcpip_hdr->ti_i.ih_src.s_addr); wladdr = ntohl(tcpip_hdr->ti_i.ih_dst.s_addr); } else { port = ntohs(tcpip_hdr->ti_t.th_dport); wlport = ntohs(tcpip_hdr->ti_t.th_sport); addr = ntohl(tcpip_hdr->ti_i.ih_dst.s_addr); wladdr = ntohl(tcpip_hdr->ti_i.ih_src.s_addr); } conn_id = (port ^ wlport) % SNOOP_MAXCONN; for (i = conn_id, j=0; jcstate[i]; if (cs->port == port && cs->wlport == wlport && cs->addr == addr && cs->wladdr == wladdr) { /* snoop already initialized for this connection */ halfopen = 1; conn_id = i; break; } else if (cs->wlport == 0 && accept == -1) accept = i; } if (accept >= 0) { snoopstate->num_connections++; cs = snoopstate->cstate[accept]; cs->conn_id = accept; cs->port = port; cs->wlport = wlport; cs->addr = addr; cs->wladdr = wladdr; } if (halfopen) accept = conn_id; return accept; } /* * Initialize connection state for snoop. */ int snoop_conninit(struct mbuf *m, u_short size, short fromflag) { int i, j, accept = -1, conn_id; struct tcpiphdr *tcpip_hdr = mtod(m, struct tcpiphdr *); conn_state_t *cs; tcp_seq seq = ntohl(tcpip_hdr->ti_t.th_seq); tcp_seq ack = ntohl(tcpip_hdr->ti_t.th_ack); conn_id = snoop_addconn(tcpip_hdr, fromflag); if (conn_id >= 0) cs = snoopstate->cstate[conn_id]; else { printf("snoop: not accepting connection\n"); return conn_id; } /* * The garbage timer cleans all the state up if there's been * no activity for a long while (where "long" is defined in snoop.h) */ if (cs->wi_state == SNOOP_CLOSED && cs->wl_state == SNOOP_CLOSED) timeout(snoop_garbage_timeout, (void *) cs, SNOOP_GARBAGE_TIMO/tick); if (fromflag == FROM_WIRED) { if (cs->pkts[cs->bufhead]->mb != 0) { printf("*** mbuf already allocated!\n"); m_freem(cs->pkts[cs->bufhead]->mb); } cs->alloc = 0; cs->wi_state = SNOOP_WIRED_ALIVE; cs->last_seen = seq; cs->last_size = size; cs->last_ack = seq - 1; cs->iss = seq; cs->expected_next_ack = cs->buftail; if (tcpip_hdr->ti_flags & TH_ACK) cs->wl_last_ack = ack; /* * Ideally, this should be initialized to the rtt estimate * from another connection to the same destination, if one * exists. For now, choose an uninformed and conservative * default. */ cs->srtt = SNOOP_RTTDEFAULT; cs->rttdev = SNOOP_RTTDEVDEFAULT; cs->timeout_pending = 0; } else { cs->wl_state = SNOOP_WLESS_ALIVE | SNOOP_WLEMPTY; cs->wl_last_seen = seq; if (tcpip_hdr->ti_flags & TH_ACK) cs->last_ack = ack; } return conn_id; } /* * snoop_wired() */ void snoop_wired(struct mbuf *m, int conn_id, short size, int srcrt) { struct tcpiphdr *ti = mtod(m, struct tcpiphdr *); if (ti->ti_flags & TH_ACK) snoop_wired_ack(m, conn_id, size, srcrt); if (size > 0) snoop_data(m, conn_id, size, srcrt); } void snoop_wless(struct mbuf *m, int conn_id, short size, int srcrt) { struct tcpiphdr *ti = mtod(m, struct tcpiphdr *); if (ti->ti_flags & TH_ACK) snoop_ack(m, conn_id, size); if (size > 0) snoop_wless_data(m, conn_id, size); } /* * snoop_data() handles data transfer from wired to wireless link. */ void snoop_data(struct mbuf *m, int conn_id, short size, int srcrt) { struct conn_state *cs = snoopstate->cstate[conn_id]; struct tcpiphdr *ti = mtod(m, struct tcpiphdr *); tcp_seq seq = ntohl(ti->ti_t.th_seq); packet_t *packet; /* * Check if the snoop is at the (highwater mark. If it is, * stop accepting packets later than cs->last_seen. The cache * is now reserved for lower-numbered packets. */ if (cs->wi_state & SNOOP_FULL) { #ifdef SNOOPFULL_DEBUG printf("%d:%d seq %d lastack %d snoopfull h %d t %d\n", cs->conn_id, cs->wlport, seq - cs->iss, cs->last_ack - cs->iss, cs->bufhead, cs->buftail); #endif } else if ((cs->wi_state&SNOOP_HIGHWATER)&&SEQ_GT(seq,cs->last_seen)) { snoop_insert(cs, m, seq, size); } else { if (snoop_insert(cs, m, seq, size) == SNOOP_OUTSEQ) snoop_untimeout(cs); snoop_timeout(cs); } if (SEQ_GT(seq, cs->last_seen)) { cs->last_seen = seq; cs->last_size = size; } ip_forward(m, srcrt); return; } /* * snoop_insert() does all the hard work for snoop_data(). It traverses the * snoop cache and looks for the right place to insert this packet (or * determines if its already been cached). It then decides whether * this is a packet in the normal increasing sequence, whether it * is a sender-rexmitted-but-lost-due-to-congestion (or network * out-of-order) packet, or if it is a sender-rexmitted packet that * was buffered by us before. */ int snoop_insert(conn_state_t *cs, struct mbuf *m, tcp_seq seq, short size) { packet_t *packet; int i, j, retval = SNOOP_OUTSEQ; if (SEQ_LT(seq, cs->last_ack)) return retval; if (cs->wi_state & SNOOP_FULL) { /* Act as an LRU cache freeing and reusing from the tail. */ snoop_freebuf(cs, cs->pkts[cs->buftail]); cs->buftail = NEXT(cs->buftail); } if (SEQ_GT(seq, cs->last_seen) || (cs->bufhead == cs->buftail)) { /* fast path in common case */ i = cs->bufhead; cs->bufhead = NEXT(cs->bufhead); retval = SNOOP_INSEQ; } else { for (i = cs->buftail; i != cs->bufhead; i = NEXT(i)) { if (cs->pkts[i]->seq == seq) { /* Either repeat packet or fragment thereof. */ packet = cs->pkts[i]; snoop_freebuf(cs, packet); save_ippkt(cs, packet, m, seq, size); packet->sender_rxmit = 1; microtime(&(packet->snd_time)); return retval; } else if (SEQ_GT(cs->pkts[i]->seq, seq)) { packet_t *temp = cs->pkts[PREV(cs->buftail)]; for (j = cs->buftail; j != i; j = NEXT(j)) { cs->pkts[PREV(j)] = cs->pkts[j]; } i = PREV(i); cs->pkts[i] = temp; cs->buftail = PREV(cs->buftail); break; } } } if (cs->bufhead == cs->buftail) { if (cs->wi_state & SNOOP_HIGHWATER) /* full cache */ cs->wi_state |= SNOOP_FULL; } else if (cs->bufhead > cs->buftail) { if ((cs->bufhead - cs->buftail) >= SNOOP_HIGH_THRESH) cs->wi_state |= SNOOP_HIGHWATER; } else if (cs->bufhead < cs->buftail) { if (cs->buftail-cs->bufhead <= SNOOP_MAXWIND-SNOOP_HIGH_THRESH) cs->wi_state |= SNOOP_HIGHWATER; } packet = cs->pkts[i]; microtime(&(packet->snd_time)); save_ippkt(cs, packet, m, seq, size); packet->sender_rxmit = 0; /* * If we have one of the following packets: * 1. a sender retransmission, or * 2. a network-out-of-order packet, * then seq will be < last_seen. * We are sure that this packet wasn't already cached. * We mark this packet as having been due to a sender rexmission * and use this information in snoop_ack(). We let the dupacks * for this packet go through according to expected_dacks. */ if (SEQ_LT(seq, cs->last_seen)) { if (cs->buftail == i) { /* otherwise probably out-of-order */ packet->sender_rxmit = 1; } cs->expected_next_ack = cs->buftail; } return retval; } /* * snoop_ack(): ack processing in the snoop protocol (wired --> wireless). */ void snoop_ack(struct mbuf *m, int conn_id, int size) { conn_state_t *cs = snoopstate->cstate[conn_id]; struct tcpiphdr *ti = mtod(m, struct tcpiphdr *); tcp_seq ack = ntohl(ti->ti_t.th_ack); u_short win = ntohs(ti->ti_t.th_win); if (!(cs->wi_state & SNOOP_WIRED_ALIVE) || !(ti->ti_flags & TH_ACK)) { ip_forward(m, 0); return; } /* SMART processing goes here (not done in this release). */ if (SEQ_GT(cs->last_ack, ack)) { /* spurious ack */ ip_forward(m, 0); return; } else if (cs->last_ack == ack) { /* duplicate ack */ snoop_dupack(cs, m, ack, win, size); } else { /* new ack */ snoop_newack(cs, m, ack); } cs->last_win = win; return; } void snoop_dupack(conn_state_t *cs, struct mbuf *m, tcp_seq ack, u_short win, int size) { packet_t *packet = cs->pkts[cs->buftail]; if ((cs->last_win != win || size > 0) || ((cs->bufhead == cs->buftail) && (~(cs->wi_state & SNOOP_FULL)))) { /* * This is not a duplicate ack but a change window ad, OR * the snoop cache is empty. */ ip_forward(m, 0); return; } if (SEQ_LT(ack, packet->seq)) { /* * We don't have the packet. It was probably lost due * to congestion and it now needs to be retransmitted * from the sender. So forward this ack to the sender. * Warning: Don't try generating any acks on our own... */ ip_forward(m, 0); return; } else { /* * We have the packet. One of three possibilities: * 1) We are not expecting any dupacks (expected == 0) * 2) We are expecting dupacks (expected > 0) * 3) We are totally confused and in an * inconsistent state (expected == -1) */ #ifdef E2EELN /* * If end-to-end ELN is enabled, set the corresponding * bit in the TCP header for the sender to react. */ if (tcp_eln_enable && (!(ti->ti_x2&TH_ELN))) snoop_seteln(ti); #endif E2EELN if (cs->expected_dacks == 0) { /* Not expecting it */ static int thresh = 0; if (thresh++ < SNOOP_RTX_THRESH) { #ifdef LINKEMU /* * Emulating link-level scheme -- let * the ack go thru to the sender. */ if (snoop_linkemu_enable) ip_forward(m, 0); else { #endif if (size == 0) m_freem(m); else ip_forward(m, 0); #ifdef LINKEMU } #endif return; } thresh = 0; /* * If the packet is a sender rexmission then * forward the dupack to the sender. */ if (packet->sender_rxmit) { ip_forward(m, 0); return; } /* Otherwise, not triggered by sender. * If this is the first dupack recd, we must * determine how many dupacks will arrive that * must be ignored and also retransmit the desired * packet. */ cs->expected_dacks = cs->bufhead-cs->expected_next_ack; if (cs->expected_dacks < 0) cs->expected_dacks += SNOOP_MAXWIND; cs->expected_dacks -= SNOOP_RTX_THRESH + 1; cs->expected_next_ack = NEXT(cs->buftail); if (packet->num_rxmit == 0) snoop_rexmt_pkt(cs, packet, IPTOS_LOWDELAY); #ifdef LINKEMU /* * emulating link-level scheme -- * let the ack go thru to the sender. */ if (snoop_linkemu_enable) ip_forward(m, 0); else { #endif if (size == 0) m_freem(m); else ip_forward(m, 0); #ifdef LINKEMU } #endif return; } else if (cs->expected_dacks > 0) { /* * These are subsequent dupacks. These should * be discarded unless the sender initiated * the rxmit of the packet (already checked). */ cs->expected_dacks--; #ifdef LINKEMU /* * emulating link-level scheme -- let * the ack go thru to the sender. */ if (snoop_linkemu_enable) ip_forward(m, 0); else { #endif if (size > 0) { ip_forward(m, 0); } else m_freem(m); #ifdef LINKEMU } #endif return; } else if (cs->expected_dacks == -1) { /* Inconsistent; rexmit last pkt */ if (packet->num_rxmit < 2) snoop_rexmt_pkt(cs, packet, IPTOS_LOWDELAY|IPTOS_RELIABILITY|IPTOS_THROUGHPUT); #ifdef LINKEMU /* * emulating link-level scheme -- let * the ack go thru to the sender. */ if (snoop_linkemu_enable) ip_forward(m, 0); else { #endif if (size > 0) { ip_forward(m, 0); } else m_freem(m); #ifdef LINKEMU } #endif return; } else { /* let sender deal with it */ ip_forward(m, 0); } return; } } /* * New ack from wireless side. Forward ack, clean snoop cache. */ void snoop_newack(conn_state_t *cs, struct mbuf *m, tcp_seq ack) { timev sndtime; int oldtail = -1; packet_t *packet; ip_forward(m, 0); if (cs->pkts[cs->buftail] && cs->pkts[cs->buftail]->num_rxmit) oldtail = cs->buftail; snoop_cleanbufs(cs, ack, &sndtime); packet = cs->pkts[cs->buftail]; if ((cs->wi_state & SNOOP_RTTFLAG) && timerisset(&sndtime)) snoop_rtt(cs, &sndtime); if (oldtail >= 0 && snoop_burst_loss(cs, packet, oldtail)) snoop_rexmt_pkt(cs, packet, IPTOS_LOWDELAY|IPTOS_RELIABILITY| IPTOS_THROUGHPUT); cs->wi_state |= SNOOP_RTTFLAG; cs->expected_dacks = 0; cs->expected_next_ack = cs->buftail; cs->last_ack = ack; return; } /* * Clear snoop cache of received (and ack'd) segments. */ void snoop_cleanbufs(conn_state_t *cs, tcp_seq ack, timev *sndtime) { int i = cs->buftail; snoop_untimeout(cs); timerclear(sndtime); if ((cs->bufhead == cs->buftail) && !(cs->wi_state & SNOOP_FULL)) return; do { packet_t *pkt = cs->pkts[i]; if (pkt->mb == 0) { /* already been cleared */ printf("ack %d %d:%d, already cleared in cleanbufs\n", cs->conn_id, cs->wlport, ack-cs->iss); cs->wi_state &= ~SNOOP_FULL; snoop_freebuf(cs, pkt); i = NEXT(i); continue; } if (SEQ_GT(pkt->seq + pkt->size, ack)) break; if (timerisset(&(pkt->snd_time)) && timercmp(&(pkt->snd_time), sndtime, >)) { sndtime->tv_sec = pkt->snd_time.tv_sec; sndtime->tv_usec = pkt->snd_time.tv_usec; } if (SEQ_LEQ(pkt->seq + pkt->size, ack)) { cs->wi_state &= ~SNOOP_FULL; snoop_freebuf(cs, pkt); } i = NEXT(i); } while (i != cs->bufhead); if ((i != cs->buftail) || (cs->bufhead != cs->buftail)) { cs->wi_state &= ~SNOOP_FULL; cs->buftail = i; } if (((cs->bufhead > cs->buftail) && (cs->bufhead - cs->buftail < SNOOP_HIGH_THRESH)) || ((cs->bufhead <= cs->buftail) && (cs->buftail - cs->bufhead > SNOOP_MAXWIND - SNOOP_HIGH_THRESH))) cs->wi_state &= ~SNOOP_HIGHWATER; if (cs->bufhead != cs->buftail && cs->wi_state & SNOOP_FULL) snoop_timeout(cs); return; } /* * Process an ack from the wired side. We know it's a valid ACK. */ void snoop_wired_ack(struct mbuf *m, int conn_id, short size, int srcrt) { struct tcpiphdr *ti = mtod(m, struct tcpiphdr *); struct conn_state *cs = snoopstate->cstate[conn_id]; tcp_seq ack = ntohl(ti->ti_ack); int i; if (cs->wl_state & SNOOP_WLESS_ALIVE == 0 && size == 0) { ip_forward(m, srcrt); return; } if (ack == cs->wl_last_ack) { if (snoop_wlessloss(cs, ack)) /* did we see data at all? */ snoop_seteln(ti); } else if (SEQ_GT(ack, cs->wl_last_ack)) { /* update info about unack'd data */ for (i = cs->wl_buftail; i != cs->wl_bufhead; i = WL_NEXT(i)) { seq_t *t = cs->wlseqs[i]; if (SEQ_LEQ(t->seq + t->size, ack)) { t->seq = t->size = 0; } else if (SEQ_LEQ(ack, t->seq)) { break; } else if (SEQ_LT(ack, t->seq+t->size)) { /* ack for part of a block */ t->size -= ack - t->seq; t->seq = ack; break; } } cs->wl_buftail = i; if (cs->wl_buftail == cs->wl_bufhead) cs->wl_state |= SNOOP_WLEMPTY; cs->wl_last_ack = ack; /* A new ack could still cause an ELN to be set. */ if (cs->wl_bufhead!=cs->wl_buftail && snoop_wlessloss(cs, ack)) snoop_seteln(ti); } if (size == 0) /* send it on if "pure" ack */ ip_forward(m, srcrt); return; } void snoop_wless_data(struct mbuf *m, int conn_id, short size) { struct tcpiphdr *ti = mtod(m, struct tcpiphdr *); struct conn_state *cs = snoopstate->cstate[conn_id]; tcp_seq seq = ntohl(ti->ti_t.th_seq); int i, j; if (cs->wl_state & SNOOP_WLEMPTY && SEQ_GEQ(seq, cs->wl_last_ack)) { cs->wlseqs[cs->wl_bufhead]->seq = seq; cs->wlseqs[cs->wl_bufhead]->size = size; cs->wl_buftail = cs->wl_bufhead; cs->wl_bufhead = WL_NEXT(cs->wl_bufhead); cs->wl_last_seen = seq; cs->wl_state &= ~SNOOP_WLEMPTY; return; } /* definitely not empty at this point */ if (SEQ_GT(seq, cs->wl_last_seen)) { cs->wl_last_seen = seq; i = WL_PREV(cs->wl_bufhead); if (cs->wlseqs[i]->seq + cs->wlseqs[i]->size == seq) { cs->wlseqs[i]->size += size; return; } i = cs->wl_bufhead; cs->wl_bufhead = WL_NEXT(cs->wl_bufhead); goto found; } else if (SEQ_LT(seq, cs->wlseqs[cs->wl_buftail]->seq)) { if (SEQ_GEQ(seq+size, cs->wlseqs[i = cs->wl_buftail]->seq)) { cs->wlseqs[i]->size += cs->wlseqs[i]->seq - seq; cs->wlseqs[i]->seq = seq; return; } i = cs->wl_buftail = WL_PREV(cs->wl_buftail); goto found; } /* XXX ignore out-of-ordering and rxmissions for now */ return; /* for (i = cs->wl_buftail; i != cs->wl_bufhead; i = WL_NEXT(i)) { if (seq == cs->wlseqs[i]->seq && size == cs->wlseqs[i]->size) { return; } else if (SEQ_GT(cs->wlseqs[i]->seq, seq)) { seq_t *temp = cs->wlseqs[WL_PREV(cs->wl_buftail)]; if (seq + size == cs->wlseqs[i]->seq) { cs->wlseqs[i]->seq = seq; cs->wlseqs[i]->size += size; return; } else if (cs->wlseqs[WL_PREV(i)]->size+size == seq) { cs->wlseqs[WL_PREV(i)]->size += size; return; } for (j = cs->wl_buftail; j != i; j = WL_NEXT(j)) cs->wlseqs[WL_PREV(j)] = cs->wlseqs[j]; i = WL_PREV(i); cs->wlseqs[i] = temp; cs->wl_buftail = WL_PREV(cs->wl_buftail); break; } } */ found: cs->wlseqs[i]->seq = seq; cs->wlseqs[i]->size = size; } #ifndef NOMIP int snoop_mip_change(struct in_addr n_addr, MIP_STATES startflag, MIP_STATES endflag) { } #endif #ifdef MOBILITY /* * Change state of snoop on hint from handoff routine. */ int snoop_mip_change(struct in_addr n_addr, MIP_STATES startflag, MIP_STATES endflag) { int conn_id, i; conn_state_t *cs; struct in_addr addr; short idx; if (snoopstate->disable) return; addr.s_addr = ntohl(n_addr.s_addr); if (startflag == MIP_UNENCAP_FWD) for (conn_id = 0; conn_id < SNOOP_MAXCONN; conn_id++) { cs = snoopstate->cstate[conn_id]; if (cs == NULL) continue; if (cs->addr == addr.s_addr) { snoop_untimeout(cs); /* untimeout(snoop_persist_timeout, (void *) cs);*/ /* * When moving from fwd'ing to buffering, want to * make sure that no rtt computation occurs when * acks start coming when the bs changes back to * fwd'ing. */ if (cs->bufhead == cs->buftail && !(cs->wi_state & SNOOP_FULL)) continue; i = cs->buftail; do { timerclear(&(cs->pkts[i]->snd_time)); i = NEXT(i); } while (i != cs->bufhead); } } if (endflag == MIP_UNENCAP_FWD) for (conn_id = 0; conn_id < SNOOP_MAXCONN; conn_id++) { cs = snoopstate->cstate[conn_id]; if (cs == NULL) continue; if (cs->addr == addr.s_addr) { idx = PREV(cs->bufhead); cs->expected_next_ack = cs->buftail; cs->expected_dacks = 0; do { packet_t *pkt; if (cs->pkts == NULL) break; pkt = cs->pkts[idx]; if (pkt != 0 && SEQ_LT(cs->last_ack, pkt->seq)) { snoop_rexmt_pkt(cs, pkt, IPTOS_LOWDELAY); } idx = NEXT(idx); } while (idx != cs->bufhead); /* * Re-enable timeouts. */ /* timeout(snoop_persist_timeout, (void *) cs, SNOOP_PERSIST_TIMO/tick);*/ if (cs->bufhead == cs->buftail && !(cs->wi_state & SNOOP_FULL)) continue; i = cs->buftail; do { timerclear(&(cs->pkts[i]->snd_time)); i = NEXT(i); } while (i != cs->bufhead); } } if (endflag == MIP_UNUSED) { snoopstate->num_connections--; for (conn_id = 0; conn_id < SNOOP_MAXCONN; conn_id++) { cs = snoopstate->cstate[conn_id]; if (cs == NULL) continue; if (cs->addr != addr.s_addr) continue; snoop_done(conn_id, FROM_WLESS|FROM_WIRED); } } } #endif /* MOBILITY */ /* * Local retransmissions from the snoop buffer to mobile host. */ void snoop_rexmt_pkt(conn_state_t *cs, packet_t *packet, u_char tos) { struct tcpiphdr *ti; struct tcphdr *tcp_hdr; struct ip* ip; struct mbuf *m; int ticks; timev now; #ifdef NOSNOOPREXMT /* if local retransmission has been disabled, just return */ if (snoop_rexmt_disable) return; #endif if (cs->last_ack == cs->iss - 1) return; microtime(&now); if (packet->mb != 0 && packet->num_rxmit < SNOOP_MAX_RXMIT) { if (tos == IPTOS_LOWDELAY || /* dupack-driven */ timerdiff(&now, &(packet->snd_time)) > cs->srtt) { /* Restore flags and other changed fields */ m = m_copym(packet->mb, 0, M_COPYALL, M_DONTWAIT); ip = mtod(m, struct ip *); ti = mtod(m, struct tcpiphdr *); ip->ip_len = packet->iph.len; ip->ip_id = packet->iph.id; ip->ip_ttl = packet->iph.ttl; ip->ip_off = packet->iph.off; /* * Set the tos flag on in the IP TOS field. This puts * this packet in the fast queue at the ether layer, * and is intended to get this packet out as early as * possible to the receiver so that we get as few * dupacks as possible. */ #ifdef SNOOP_IPTOS ip->ip_tos = tos; #endif ti->ti_t.th_sum = packet->tcp_sum; ip_forward(m, 0); packet->num_rxmit++; cs->wi_state &= ~SNOOP_RTTFLAG; packet->snd_time.tv_sec = now.tv_sec; packet->snd_time.tv_usec = now.tv_usec; } } snoop_untimeout(cs); snoop_timeout(cs); return; } /* * Adios. Clear snoop state for this connection. */ void snoop_done(int conn_id, short fromflag) { conn_state_t *cs = snoopstate->cstate[conn_id]; if (cs->wi_state == SNOOP_CLOSED && cs->wl_state == SNOOP_CLOSED) return; if (fromflag | FROM_WIRED) { snoop_wired_clear(cs); if (cs->wl_state & SNOOP_WLESS_ALIVE == 0) snoop_wless_clear(cs); } if (fromflag | FROM_WLESS) { snoop_wless_clear(cs); if (cs->wi_state & SNOOP_WIRED_ALIVE == 0) snoop_wired_clear(cs); } if (cs->wi_state == SNOOP_CLOSED && cs->wl_state == SNOOP_CLOSED) { snoopstate->num_connections--; cs->addr = cs->wladdr = 0; cs->port = cs->wlport = 0; } return; } /* * Utilities, cache management, timer handling, etc. */ /* * malloc() function for the snoop protocol. */ void *snoop_malloc(int size) { void *m; if ((m = malloc(size, M_SNOOP, M_NOWAIT)) == (void *) 0) { printf("snoop_malloc: out of memory\n"); panic("snoop_malloc"); } return m; } inline void save_ippkt(conn_state_t *cs, packet_t *packet, struct mbuf *m, tcp_seq seq, u_short size) { struct ip *ip = mtod(m, struct ip *); struct tcpiphdr *ti = mtod(m, struct tcpiphdr *); packet->iph.len = ip->ip_len; packet->iph.ttl = ip->ip_ttl; packet->iph.id = ip->ip_id; packet->iph.off = ip->ip_off; packet->tcp_sum = ti->ti_t.th_sum; packet->mb = m_copym(m, 0, M_COPYALL, M_DONTWAIT); cs->alloc++; packet->seq = seq; packet->size = size; packet->num_rxmit = 0; } /* * Free one of the snoop buffers. */ inline void snoop_freebuf(conn_state_t *cs, packet_t *pkt) { if (pkt->mb == 0) printf("already freed pkt %x\n", pkt); else cs->alloc--; m_freem(pkt->mb); pkt->mb = 0; pkt->seq = pkt->size = 0; pkt->snd_time.tv_sec = pkt->snd_time.tv_usec = 0; } /* * True if we never saw the data corresponding to this * (dup)ack before, false otherwise (implying the loss was congestion-related) */ inline int snoop_wlessloss(conn_state_t *cs, tcp_seq ack) { if (SEQ_GT(cs->wlseqs[cs->wl_buftail]->seq, ack)) return 1; return 0; } /* * Set ELN bit if we realize that this loss was because of a bad channel. */ inline void snoop_seteln(struct tcpiphdr *ti) { u_short tcp_cksum = ti->ti_sum; ti->ti_x2 |= TH_ELN; /* now adjust the TCP checksum */ tcp_cksum = ~tcp_cksum; #if BYTE_ORDER == LITTLE_ENDIAN tcp_cksum += TH_ELN; #endif #if BYTE_ORDER == BIG_ENDIAN tcp_cksum += TH_ELN*256; #endif ti->ti_sum = ~tcp_cksum; } void snoop_wired_clear(conn_state_t *cs) { int i; snoop_untimeout(cs); for (i = 0; i < SNOOP_MAXWIND; i++) if (cs->pkts[i] && cs->pkts[i]->mb) { snoop_freebuf(cs, cs->pkts[i]); } cs->bufhead = cs->buftail = 0; cs->wi_state = SNOOP_CLOSED; return; } void snoop_wless_clear(conn_state_t *cs) { int i; for (i = 0; i < SNOOP_MAXBLKS; i++) cs->wlseqs[i]->seq = cs->wlseqs[i]->size = 0; cs->wl_bufhead = cs->wl_buftail = 0; cs->wl_state = SNOOP_CLOSED; return; } int snoop_rtt(conn_state_t *cs, timev *sndtime) { long rtt, delta; timev now; microtime(&now); rtt = timerdiff(&now, sndtime); delta = rtt - cs->srtt; cs->srtt = cs->srtt ? cs->srtt + (delta >> 3) : rtt; if (delta < 0) delta = -delta; cs->rttdev = cs->rttdev ? cs->rttdev + ((delta - cs->rttdev) >> 2) : delta; } inline long snoop_rto(conn_state_t *cs) { int rto = cs->srtt + 4 * cs->rttdev; if (rto > SNOOP_MIN_TIMO) return rto/tick; return SNOOP_MIN_TIMO/tick; } inline void snoop_timeout(conn_state_t *cs) { if (!cs->timeout_pending) { cs->timeout_pending = 1; timeout(snoop_rexmt_timeout, (void *)cs, snoop_rto(cs)); } } inline void snoop_untimeout(conn_state_t *cs) { if (cs->timeout_pending) { untimeout(snoop_rexmt_timeout, (void *) cs); cs->timeout_pending = 0; } } void snoop_rexmt_timeout(void *arg) { int s = splnet(); conn_state_t *cs = (conn_state_t *) arg; short idx = cs->buftail; int ticks; #ifdef NOSNOOPREXMT /* * If local retransmission has been disabled, just return. */ if (snoop_rexmt_disable) { /* splx(s);*/ return; } #endif if (!((cs->bufhead == cs->buftail) && !(cs->wi_state & SNOOP_FULL))) { snoop_rexmt_pkt(cs, cs->pkts[idx], IPTOS_LOWDELAY|IPTOS_THROUGHPUT); cs->expected_next_ack = NEXT(idx); } splx(s); return; } /* XXX obsolete. */ /* void snoop_persist_timeout(void *arg) { int s = splnet(); conn_state_t *cs = (conn_state_t *) arg; short idx = cs->buftail; int ticks; static int count = 0; if (cs->wi_state & SNOOP_SL_REXMT) count = 0; else { ++count; if (cs->pkts[idx]->mb != 0 && cs->last_ack == cs->pkts[idx]->seq) cs->expected_dacks = 0; cs->expected_next_ack = idx; if (cs->pkts[idx]->num_rxmit >= SNOOP_MAX_RXMIT) cs->pkts[idx]->num_rxmit = SNOOP_MAX_RXMIT - 2; snoop_rexmt_pkt(cs, cs->pkts[idx], IPTOS_LOWDELAY|IPTOS_RELIABILITY); } ticks = SNOOP_PERSIST_TIMO / tick; cs->wi_state &= ~SNOOP_SL_REXMT; if (!(cs->wi_state & SNOOP_CLOSED)) if (count < SNOOP_MAXPERSIST) { timeout(snoop_persist_timeout, (void *) cs, ticks); if (!cs->timeout_pending) timeout(snoop_rexmt_timeout, (void *) cs, snoop_rto(cs)); } else snoop_done(cs->conn_id, FROM_WIRED); splx(s); } */ void snoop_garbage_timeout(void *arg) { int s = splnet(), settimer = 0, ticks; conn_state_t *cs = (conn_state_t *) arg; if (!cs || (cs->wi_state==SNOOP_CLOSED&&cs->wl_state==SNOOP_CLOSED)) { splx(s); return; } if (cs->wi_state&SNOOP_WIRED_ALIVE || cs->wl_state&SNOOP_WLESS_ALIVE) { if (cs->wi_state & SNOOP_WIRED_ALIVE) cs->wi_state &= ~SNOOP_WIRED_ALIVE; if (cs->wl_state & SNOOP_WLESS_ALIVE) cs->wl_state &= ~SNOOP_WLESS_ALIVE; ticks = SNOOP_GARBAGE_TIMO / tick; timeout(snoop_garbage_timeout, (void *)cs, ticks); } snoop_done(cs->conn_id, FROM_WIRED|FROM_WLESS); splx(s); return; } /* * Process SMART selective ack information. */ #ifdef SMART_SNOOP void snoop_smartoption(struct mbuf *m, conn_state_t *cs) { struct tcpiphdr *ti = mtod(m, struct tcpiphdr *); int off = (ti->ti_off << 2); int optlen = 0, cnt, opt; u_char *cp = mtod(m, u_char *) + sizeof (struct tcpiphdr); if (off < sizeof (struct tcphdr) || off > ((struct ip *)ti)->ip_len) cs->smart_start = cs->smart_end = 0; cnt = off - sizeof (struct tcphdr); /* length of options */ for (; cnt > 0; cnt -= optlen, cp += optlen) { /* process options */ opt = cp[0]; if (opt == TCPOPT_EOL) break; if (opt == TCPOPT_NOP) optlen = 1; else { optlen = cp[1]; if (optlen <= 0) break; } switch (opt) { default: continue; case TCPOPT_SMART: { tcp_seq smart_start_last_rcv, smart_end_last_rcv; if (optlen != TCPOLEN_SMART) cs->smart_start = cs->smart_end = 0; bcopy((char *) cp + 2, (char *) &(cs->smart_start), sizeof(tcp_seq)); NTOHL(cs->smart_start); bcopy((char *) cp + 6, (char *) &(cs->smart_end), sizeof(tcp_seq)); NTOHL(cs->smart_end); } return; } } cs->smart_start = cs->smart_end = 0; return; } #endif /* SMART_SNOOP */ int snoop_burst_loss(conn_state_t *cs, packet_t *pkt, int oldtail) { int num_pkts = cs->bufhead - cs->buftail; if (num_pkts == 0) if (cs->wi_state & SNOOP_FULL) num_pkts = SNOOP_MAXWIND; else return 0; if (num_pkts < 0) num_pkts = -num_pkts; if (NEXT(oldtail) == cs->buftail && num_pkts>1 && pkt->num_rxmit == 0) return 1; /* XXX pretty much random */ /* if (num_pkts > 8) if (NEXT(NEXT(oldtail)) == cs->buftail && pkt->num_rxmit == 0) return 1; */ return 0; } inline void fwd_or_free(struct mbuf *m, short fwdflag, short fromflag, int srcrt) { if (fwdflag == SNOOP_FWD || fromflag == FROM_WLESS) ip_forward(m, srcrt); else m_freem(m); return; } inline void get_ti_hdr(struct mbuf *m) { struct mbuf *n; if (m->m_len < sizeof(struct tcpiphdr)) { if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) { /* pullup frees m on failure */ ip_forward(n, 0); return; } m_freem(n); } return; }