/* * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Berkeley snoop protocol to improve TCP/IP performance over networks * with wireless links. Deploy this at base station. * The options LINKEMU, ELN, NOSNOOPREXMT, and SNOOP_SMART are for * experimental purposes only (at this point) and shouldn't be defined in * normal operation. If the kernel supports priority queueing for IPTOS * LOWDELAY, snoop should perform a little better than if it doesn't (the * Daedalus WaveLAN drivers on the ftp site daedalus.cs.berkeley.edu have * such priority routing enabled). */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef NOMIP #include #endif #include snoop_state_t *snoopstate; /* the controlling snoop data structure */ #ifdef LINKEMU int snoop_linkemu_enable = 0; /*enable emulation of link-level retx scheme */ #endif #ifdef ELN int snoop_eln_enable; /* enable explicit wireless loss notification */ #endif #ifdef NOSNOOPREXMT int snoop_rexmt_disable = 0; /* disable local retransmission */ #endif #ifdef SMART_SNOOP int snoop_smart_enable = 0; /* smart sack processing in snoop */ #endif #define SNOOP_IPTOS /* use low delay flag for local rexmissions */ /* * Snoop on TCP segments meant for a mobile host, buffer them in a cache, * and forward them on if required. Also call functions to do ack processing. * In this function we do several things: * 1. Forward packet and return if the packet is not a TCP segment. * 2. The snoop agent can be in one of two states: forwarding or buffering. * This is a consequence of the integration with the low-latency, multicast-based handoff algorithm (see ip_mobile.c). * In the buffering state, no acks are seen and all the data (last * few packets are cached round robin (using snoop_insert(). * In the forwarding state, more interesting things happen -- packets * are forwarded to the MH, acks are monitored and perhaps suppressed, * and local timeouts enabled. * If you don't have the handoff code incorporated, snoop will always be in * forwarding mode. This is ensured by the right actions in ipintr(). * * 3. If this is a connection establishment (SYN) request from the fixed host, * then do snoop_conninit() to initialize cache and state for this conn. * This is done if conn_id is -1, and the segment is not a SYN, a state * that occurs immediately following the first handoff. * 4. If this is a connection termination packet (FIN), do snoop_done(). * 5. If destination is mobile, do snoop_data(). * 6. If source is mobile and packet is an ack, do snoop_ack(). */ /* This particular version only has the data transfer to the mobile in it */ /* Note: If you're using snoop without the low-latency handoff and mobility * code, fwdflag should always be SNOOP_FWD. fromflag is FROM_FH or FROM_MH * depending on the source of the packet (or ack). */ void snoop_ctrl(struct mbuf *m, short fwdflag, short fromflag, int srcrt) { struct tcpiphdr *tcpip_hdr = mtod(m, struct tcpiphdr *); struct ip *ip = mtod(m, struct ip *); int conn_id; struct in_addr addr; u_short port, size; int drop = 0; if (ip->ip_p != IPPROTO_TCP) { /* not TCP */ if (fwdflag == SNOOP_FWD) ip_forward(m, 0); return; } if (fromflag == FROM_FH) { addr.s_addr = ntohl(tcpip_hdr->ti_i.ih_dst.s_addr); port = ntohs(tcpip_hdr->ti_t.th_dport); } else { /* sender is mobile */ addr.s_addr = ntohl(tcpip_hdr->ti_i.ih_src.s_addr); port = ntohs(tcpip_hdr->ti_t.th_sport); } /* * If snoop has been disabled, call ip_forward() and return. */ if (snoopstate->disable) { if (fwdflag == SNOOP_FWD) ip_forward(m, 0); return; } if (m->m_len < sizeof(struct tcpiphdr)) { /* * TCP/IP header could straddle multiple mbufs. */ struct mbuf *n = m_copym(m, 0, M_COPYALL, M_DONTWAIT); if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) { /* pullup frees m on failure */ ip_forward(n, 0); return; } m_freem(n); ip = mtod(m, struct ip *); tcpip_hdr = mtod(m, struct tcpiphdr *); } size = ip->ip_len - (ip->ip_hl << 2) - (tcpip_hdr->ti_off << 2); conn_id = snoop_getmobconn(addr, port); /* * If conn_id is -1, then there is no snoop state for this * connection. This can happen either at the start of a new * connection, or if the base station has just moved from * inactive to buffering (or forwarding) state for the first * time during the lifetime of this connection. */ if (fromflag == FROM_FH && tcpip_hdr->ti_t.th_flags & TH_SYN) { if (conn_id != -1) /* * Clean up old state which may be around if we * didn't see the SYN packet the last time around * and a timeout hasn't happened for it yet. */ snoop_done(conn_id); conn_id = snoop_conninit(m, addr, port, size); if (fwdflag == SNOOP_FWD) ip_forward(m, 0); else m_freem(m); return; } if (conn_id == -1) { /* not a SYN */ if (fromflag == FROM_FH) conn_id = snoop_conninit(m, addr, port, size); else { /* sender is mobile */ ip_forward(m, 0); return; } if (conn_id == -1) { /* still -1 after conninit */ ip_forward(m, 0); return; } if (!(tcpip_hdr->ti_t.th_flags & TH_SYN)) snoop_insert(snoopstate->cstate[conn_id], m, ntohl(tcpip_hdr->ti_t.th_seq), size); if (fwdflag == SNOOP_FWD) ip_forward(m, 0); return; } if (fromflag == FROM_MH && /* sender mobile */ (tcpip_hdr->ti_t.th_flags & TH_FIN || /* and its the end */ tcpip_hdr->ti_t.th_flags & TH_RST)) { snoop_done(conn_id); /* adios */ if (fwdflag == SNOOP_FWD) ip_forward(m, srcrt); return; } snoopstate->cstate[conn_id]->fstate |= SNOOP_SL_REXMT; snoopstate->cstate[conn_id]->fstate |= SNOOP_ALIVE; if (fwdflag == SNOOP_BUF) { snoop_insert(snoopstate->cstate[conn_id], m, ntohl(tcpip_hdr->ti_t.th_seq), size); return; } if (fromflag == FROM_FH) /* receiver mobile */ snoop_data(m, conn_id, size, drop, srcrt); else /* sender is mobile */ snoop_ack(m, conn_id, size); return; } /* * Initialize state for the snoop protocol (at boot time). */ void snoop_init() { int i, j; conn_state_t *cs; printf("Snoop initialization... "); snoopstate = (snoop_state_t *) snoop_malloc (sizeof (snoop_state_t)); snoopstate->num_connections = 0; snoopstate->disable = 0; for (i = 0; i < SNOOP_MAXCONN; i++) { cs = snoopstate->cstate[i] = (conn_state_t *) snoop_malloc(sizeof(conn_state_t)); cs->dest_addr.s_addr = cs->dest_port = 0; cs->expected_dacks = 0; cs->bufhead = cs->buftail = 0; #ifdef SMART_SNOOP if (snoop_smart_enable) cs->smart_start = cs->smart_end = 0; #endif for (j = 0; j < SNOOP_MAXWIND; j++) { cs->pkts[j] = (packet_t *) snoop_malloc (sizeof (packet_t)); cs->pkts[j]->mb = 0; cs->pkts[j]->size = 0; cs->pkts[j]->snd_time.tv_sec = 0; cs->pkts[j]->snd_time.tv_usec = 0; cs->pkts[j]->num_rxmit = 0; cs->pkts[j]->sender_rxmit = 0; } } printf("done\n"); } /* * Get the snoop connection id of the TCP connection to the mobile host. */ int snoop_getmobconn(struct in_addr addr, u_short port) { int i, j; int conn_id; conn_id = ((addr.s_addr & LASTBYTEMASK) * port) % SNOOP_MAXCONN; for (i = conn_id, j = 0; j < SNOOP_MAXCONN; j++) { if (snoopstate->cstate[i]->dest_port == port && snoopstate->cstate[i]->dest_addr.s_addr == addr.s_addr) return i; i = (i+1) % SNOOP_MAXCONN; } return -1; } /* * Initialize connection state for snoop. */ int snoop_conninit(struct mbuf *m, struct in_addr addr, u_short port, u_short size) { int conn_id; int i, j; struct tcpiphdr *tcpip_hdr = mtod(m, struct tcpiphdr *); conn_state_t *cs; tcp_seq seq = ntohl(tcpip_hdr->ti_t.th_seq); short init_idx; int accept = 0; #ifdef DEBUG printf("conninit\n"); #endif conn_id = ((addr.s_addr & LASTBYTEMASK) * port) % SNOOP_MAXCONN; for (i = conn_id, j = 0; j < SNOOP_MAXCONN; j++) { if (snoopstate->cstate[i]->dest_port == 0) { accept = 1; break; } if (snoopstate->cstate[i]->dest_port == port && snoopstate->cstate[i]->dest_addr.s_addr == addr.s_addr) /* snoop already initialized for this connection */ return; i = (i+1) % SNOOP_MAXCONN; } if (accept) { ++snoopstate->num_connections; snoopstate->cstate[i]->dest_addr.s_addr = addr.s_addr; snoopstate->cstate[i]->dest_port = port; conn_id = i; } else { printf("snoop: not accepting connection\n"); return -1; } cs = snoopstate->cstate[conn_id]; init_idx = cs->bufhead; cs->conn_id = conn_id; cs->fstate = SNOOP_ACTIVE | SNOOP_NOACK | SNOOP_ALIVE | SNOOP_SL_REXMT; cs->dest_addr.s_addr = addr.s_addr; cs->dest_port = port; cs->last_seen = seq; cs->last_size = size; cs->last_ack = seq - 1; cs->initseq = seq; cs->buftail = init_idx; cs->bufhead = init_idx; cs->expected_next_ack = cs->buftail; /* * Ideally, this should be initialized to the rtt estimate from * some other connection to the same destination, if one exists. * For now, choose an uninformed and conservative default. */ cs->srtt = SNOOP_RTTDEFAULT; cs->rttvar = 0; /* currently not used */ cs->timeout_pending = 0; /* * There are two long-duration timers used by the snoop agent: * a persist timer to keep the connection going if there has * been no activity for a while but there are unack'd pkts, and * a garbage timer that cleans all the state up if there's been * no activity for a long while (where "long" is defined in snoop.h */ timeout(snoop_persist_timeout, (void *) cs, SNOOP_PERSIST_TIMO/tick); timeout(snoop_garbage_timeout, (void *) cs, SNOOP_GARBAGE_TIMO/tick); return conn_id; } /* * snoop_data() handles data transfer from fixed to mobile host. */ void snoop_data(struct mbuf *m, int conn_id, short size, int drop, int srcrt) { struct tcpiphdr *tcpip_hdr = mtod(m, struct tcpiphdr *); struct conn_state *cs; struct tcphdr *tcp_hdr = &(tcpip_hdr->ti_t); tcp_seq seq = ntohl(tcp_hdr->th_seq); packet_t *packet; #ifdef DEBUG printf("data: seq = %x\t", seq); #endif if (size == 0) { ip_forward(m, srcrt); return; } cs = snoopstate->cstate[conn_id]; if (cs == 0) { printf("\tvery bad connection state!\n"); ip_forward(m, srcrt); return; } /* * Check if the snoop is 90% full (highwater mark). If it is, * stop accepting packets later than cs->last_seen. The snoop * is now reserved for lower-numbered packets. */ if ((cs->fstate & SNOOP_HIGHWATER) && SEQ_GT(seq, cs->last_seen)) { #ifdef SNOOPFULL_DEBUG printf("Snoop high water mark, fwd'ing packet not caching\n"); #endif ip_forward(m, srcrt); if (SEQ_GT(seq, cs->last_seen)) { cs->last_seen = seq; cs->last_size = size; } return; } if (cs->fstate & SNOOP_FULL) { #ifdef SNOOPFULL_DEBUG printf("Snoop full, fwd'ing packet %x\n", seq); #endif ip_forward(m, srcrt); if (SEQ_GT(seq, cs->last_seen)) { cs->last_seen = seq; cs->last_size = size; } return; } snoop_insert(cs, m, seq, size); if (!cs->timeout_pending) { ++cs->timeout_pending; timeout(snoop_rexmt_timeout, (void *)cs, SNOOP_TIMO(cs)/tick); } ip_forward(m, srcrt); return; } inline void save_ippkt(packet_t *packet, struct mbuf *m) { struct ip *ip; /* to preserve some header fields */ struct tcpiphdr *tcpip_hdr = mtod(m, struct tcpiphdr *); ip = mtod(m, struct ip *); packet->iph.len = ip->ip_len; packet->iph.ttl = ip->ip_ttl; packet->iph.id = ip->ip_id; packet->iph.off = ip->ip_off; packet->mb = m_copym(m, 0, M_COPYALL, M_DONTWAIT); packet->tcp_sum = tcpip_hdr->ti_t.th_sum; } /* * snoop_insert() does all the hard work for snoop_data(). It traverses the * snoop cache and looks for the right place to insert this packet (or * determines if its already been cached). It then decides whether * this is a packet in the normal increasing sequence, whether it * is a sender-rexmitted-but-lost-due-to-congestion (or network * out-of-order) packet, or if it is a sender-rexmitted packet that * was buffered by us before. */ void snoop_insert(conn_state_t *cs, struct mbuf *m, tcp_seq seq, short size) { struct tcpiphdr *tcpip_hdr = mtod(m, struct tcpiphdr *); packet_t *packet; int i, j; if (SEQ_LT(seq, cs->last_ack)) { #ifdef DEBUG printf("\tway out-of-order pkt %x, lastack %x\n", seq, cs->last_ack); #endif return; } /* * snoop_insert() can be called with SNOOP_FULL only from the * mobility code in ipintr(). In this case, the packet must not * be forwarded to the mobile host. Also, the snoop cache now * behaves as an LRU cache, freeing the packet received earliest * and thus maintaining the last several packets sent. */ if (cs->fstate & SNOOP_FULL) { snoop_freebuf(cs->pkts[cs->buftail]); cs->buftail = NEXT(cs->buftail); } if (SEQ_GT(seq, cs->last_seen)) { /* fast path in common case */ i = cs->bufhead; cs->bufhead = NEXT(cs->bufhead); goto found; } else if (SEQ_LT(seq, cs->pkts[cs->buftail]->seq)) { cs->buftail = PREV(cs->buftail); i = cs->buftail; goto found; } for (i = cs->buftail; i != cs->bufhead; i = NEXT(i)) { if (cs->pkts[i]->seq == seq) { #ifdef DEBUG printf("\thave pkt %x at %d\n ", seq, i); #endif /* * Either a repeat packet or a fragment thereof. */ packet = cs->pkts[i]; if (packet->size <= size) { m_freem(packet->mb); save_ippkt(packet, m); packet->seq = seq; packet->size = size; } packet->num_rxmit = 0; packet->sender_rxmit = 1; microtime(&(packet->snd_time)); return; } else if (SEQ_GT(cs->pkts[i]->seq, seq)) { packet_t *temp = cs->pkts[PREV(cs->buftail)]; for (j = cs->buftail; j != i; j = NEXT(j)) cs->pkts[PREV(j)] = cs->pkts[j]; i = PREV(i); cs->pkts[i] = temp; cs->buftail = PREV(cs->buftail); #ifdef DEBUG printf("\tcache reorg; pkt %x, head %d, tail %d", seq, cs->bufhead, cs->buftail); #endif goto found; } } found: if (cs->bufhead == cs->buftail) cs->fstate |= SNOOP_FULL; else if (cs->bufhead > cs->buftail) { if ((cs->bufhead - cs->buftail) >= SNOOP_HIGH_THRESH) cs->fstate |= SNOOP_HIGHWATER; } else { if (cs->buftail-cs->bufhead <= SNOOP_MAXWIND-SNOOP_HIGH_THRESH) cs->fstate |= SNOOP_HIGHWATER; } packet = cs->pkts[i]; microtime(&(packet->snd_time)); /* * Must save IP header in host order or they will be set * network order. */ save_ippkt(packet, m); packet->seq = seq; packet->size = size; packet->num_rxmit = packet->sender_rxmit = 0; #ifdef DEBUG printf("\t%x at %d\n", seq, i); #endif /* * If we have one of the following packets: * 1. a network-out-of-order packet, or * 2. a fast rxmit packet, or * 3. a sender retransmission for some reason, * AND it hasn't already been buffered, * then seq will be < last_seen. * * We mark this packet as having been due to a sender rexmit * and use this information in snoop_ack(). We let the dupacks * for this packet go through according to expected_dacks. * */ if (SEQ_LT(seq, cs->last_seen)) { /* not in-order */ #ifdef DEBUG printf("\tpkt %x out of order, last %x\n", seq, cs->last_seen); #endif if (cs->buftail == i) { packet->sender_rxmit = 1; packet->num_rxmit = 0; } cs->expected_next_ack = cs->buftail; } else if (SEQ_GT(seq, cs->last_seen)) { cs->last_seen = seq; cs->last_size = size; } return; } /* * Process SMART selective ack information. */ #ifdef SMART_SNOOP void snoop_smartoption(struct mbuf *m, conn_state_t *cs) { struct tcpiphdr *ti = mtod(m, struct tcpiphdr *); int off = (ti->ti_off << 2); int optlen = 0, cnt, opt; u_char *cp = mtod(m, u_char *) + sizeof (struct tcpiphdr); if (off < sizeof (struct tcphdr) || off > ((struct ip *)ti)->ip_len) cs->smart_start = cs->smart_end = 0; cnt = off - sizeof (struct tcphdr); /* length of options */ for (; cnt > 0; cnt -= optlen, cp += optlen) { /* process options */ opt = cp[0]; if (opt == TCPOPT_EOL) break; if (opt == TCPOPT_NOP) optlen = 1; else { optlen = cp[1]; if (optlen <= 0) break; } switch (opt) { default: continue; case TCPOPT_SMART: { tcp_seq smart_start_last_rcv, smart_end_last_rcv; if (optlen != TCPOLEN_SMART) cs->smart_start = cs->smart_end = 0; bcopy((char *) cp + 2, (char *) &(cs->smart_start), sizeof(tcp_seq)); NTOHL(cs->smart_start); bcopy((char *) cp + 6, (char *) &(cs->smart_end), sizeof(tcp_seq)); NTOHL(cs->smart_end); #ifdef DEBUG printf("smart %x:%x\n", cs->smart_start, cs->smart_end); #endif } return; } } cs->smart_start = cs->smart_end = 0; return; } #endif /* SMART_SNOOP */ /* * snoop_ack(): ack processing in the snoop protocol. */ void snoop_ack(struct mbuf *m, int conn_id, int size) { struct conn_state *cs = snoopstate->cstate[conn_id]; struct tcpiphdr *tcpip_hdr = mtod(m, struct tcpiphdr *); struct tcphdr *tcp_hdr = &(tcpip_hdr->ti_t); int idx; tcp_seq ack = ntohl(tcp_hdr->th_ack); tcp_seq seq = ntohl(tcp_hdr->th_seq); u_short win = ntohs(tcp_hdr->th_win); register packet_t *packet; /* * There are 3 cases: * 1. last_ack > ack. In this case what has happened is * that the ack's have come out of order, so we don't * do any local processing but forward it on. * 2. last_ack == ack. This is a duplicate ack. If we have * the packet we resend it, and drop the dupack. * Otherwise we never got it from the fixed host, so we * Need to let the dupack get through. * Set expected_dacks to number of packets already sent * This is the number of dup acks to ignore. * 3. last_ack < ack. last_ack <-- ack, and update * the head of the buffer queue. Also clean up buffers of ack'd * packets. */ #ifdef notdef /* NOTE notdef. This is for data xfer from MH */ if (size == 0) /* no data in this packet; maybe just an ack */ goto pure_ack; /* Now we know there's data in this packet. * First check if this packet is the next one in sequence. * If not, then it is very likely that one or more packets got * lost over the wireless link. Note that this is true for a * one-hop link that doesn't reorder packets. This test has to * be appropriately modified for other types of wireless networks, * especially those that have multiple wireless hops. */ if (SEQ_LT(cs->last_mhseq + cs->last_mhsize, seq)) /* some packet(s) in between have been lost. generate NACK */ snoop_sendnack(cs, seq); pure_ack: cs->last_mhseq = seq; cs->last_mhsize = size; #endif #ifdef DEBUG printf("ack %x, expect %x, dacks %d\tcached %x-%x\n", ack, cs->expected_next_ack, cs->expected_dacks, cs->pkts[cs->buftail]->seq, cs->pkts[cs->bufhead]->seq); #endif if (!(tcpip_hdr->ti_flags & TH_ACK)) { ip_forward(m, 0); return; } if (cs->fstate & SNOOP_CLOSED) { ip_forward(m, 0); return; } #ifdef SMART_SNOOP if (!snoop_smart_enable) goto ack; snoop_smartoption(m, cs); /* * Process smart sack first, then go into code to process acks * as before. cs->smart_start:cs->smart_end is the just received * block. */ if (cs->smart_end == 0 && cs->smart_start == 0) goto ack; /* * Now retransmit packets between ack and smart_start. */ #ifdef DEBUG printf("smstart %x smend %x ack %x lastack %x\n", cs->smart_start, cs->smart_end, ack, cs->last_ack); #endif for (idx = cs->buftail; idx != cs->bufhead; idx = NEXT(idx)) { packet_t *packet = cs->pkts[idx]; if (SEQ_LT(packet->seq, ack)) continue; if (SEQ_LT(packet->seq, cs->smart_start)) { if (!packet->sender_rxmit) { timev now; microtime(&now); if (packet->num_rxmit == 0 || (timerisset(&(packet->snd_time)) && timerdiff(&now, &(packet->snd_time)) >= cs->srtt)) if (packet->mb) { snoop_rexmt_pkt(cs, packet, IPTOS_LOWDELAY); } } } if (SEQ_GEQ(packet->seq, cs->smart_start) && SEQ_LEQ(packet->seq + packet->size, cs->smart_end)) { m_freem(packet->mb); packet->mb = 0; } } #endif SMART_SNOOP ack: if (SEQ_GT(cs->last_ack, ack)) { #ifdef DEBUG printf("\tspurious ack %x\n", ack); #endif /* * This is a spurious ack. Don't do anything but * forward the ack to the fixed host. */ ip_forward(m, 0); return; } else if (cs->last_ack == ack) { /* this is a duplicate ack */ if (cs->last_win != win || size > 0) { /* this is not a duplicate ack but a change window ad. */ ip_forward(m, 0); cs->last_win = win; return; } cs->last_win = win; idx = cs->buftail; if ((packet = cs->pkts[idx]) == 0) { printf("\tbad connection state\n"); ip_forward(m, 0); return; } if (SEQ_LT(ack, packet->seq)) { #ifdef DEBUG printf("\tdon't have %x letting thru\n", ack); #endif /* * We don't have the packet. It was probably lost due * to congestion and it now needs to be retransmitted * from the sender. So forward this ack to the sender. * It *may* be a good idea to send a threshold number * of acks to initiate TCP fast retransmit. * (Piggyback on packets already destined to sender???) * This is A BAD idea! * However, simulations using REAL showed this not * to be a win. At this point, its best to let things * take their own course. */ ip_forward(m, 0); return; } else { /* * We have the packet. One of three possibilities: * 1) We are not expecting any dupacks (expected == 0) * 2) We are expecting dupacks (expected > 0) * 3) We are totally confused and in an * inconsistent state (expected == -1) */ #define RTX_THRESH 1 #ifdef DEBUG printf("\tin cache %x", ack); #endif #ifdef ELN /* * If explicit wireless loss notification is enabled, * set the corresponding bit in the TCP header. For * correctness, we do things here only if the the * bit is not already set (though the bit shouldn't * be already set). */ if (snoop_eln_enable && (!(tcpip_hdr->ti_x2&TH_ELN))) { u_short tcp_cksum = tcpip_hdr->ti_sum; tcpip_hdr->ti_x2 |= TH_ELN; /* now adjust the TCP checksum */ tcp_cksum = ~tcp_cksum; #if BYTE_ORDER == LITTLE_ENDIAN tcp_cksum += TH_ELN; #endif #if BYTE_ORDER == BIG_ENDIAN tcp_cksum += TH_ELN*256; #endif tcpip_hdr->ti_sum = ~tcp_cksum; } #endif /* ELN */ if (cs->expected_dacks == 0) { /* not expecting it */ static int thresh = 0; if (thresh++ < RTX_THRESH) { /* no action if under RTX_THRESH */ #ifdef LINKEMU /* * emulating link-level scheme -- let * the ack go thru to the sender. */ if (snoop_linkemu_enable) ip_forward(m, 0); else { #endif if (size == 0) m_freem(m); else ip_forward(m, 0); #ifdef LINKEMU } #endif return; } thresh = 0; /* * If the packet is a sender rexmission then * forward the dupack to the sender. */ if (packet->sender_rxmit) { #ifdef DEBUG printf("\t%x sender rxmit\n", packet->seq); #endif ip_forward(m, 0); return; } /* Otherwise, not triggered by sender. * If this is the first dupack recd, * we must determine how many dupacks * will arrive that must be ignored * and also retransmit the desired * packet. */ /* expected_dacks = -1 if inconsistent */ cs->expected_dacks = cs->bufhead - cs->expected_next_ack; if (cs->expected_dacks < 0) cs->expected_dacks += SNOOP_MAXWIND; cs->expected_dacks -= RTX_THRESH + 1; #ifdef DEBUG printf(" ack %x expect %d more\n", ack, cs->expected_dacks); #endif cs->expected_next_ack = NEXT(cs->buftail); if (packet->num_rxmit == 0) snoop_rexmt_pkt(cs, packet, IPTOS_LOWDELAY); #ifdef LINKEMU /* * emulating link-level scheme -- let * the ack go thru to the sender. */ if (snoop_linkemu_enable) ip_forward(m, 0); else { #endif if (size == 0) m_freem(m); else ip_forward(m, 0); #ifdef LINKEMU } #endif return; } else if (cs->expected_dacks > 0) { /* * These are subsequent dupacks. These should * be discarded unless the sender initiated * the rxmit of the packet. */ --cs->expected_dacks; #ifdef LINKEMU /* * emulating link-level scheme -- let * the ack go thru to the sender. */ if (snoop_linkemu_enable) ip_forward(m, 0); else { #endif if (size > 0) { ip_forward(m, 0); } else m_freem(m); #ifdef LINKEMU } #endif return; } else if (cs->expected_dacks == -1) { /* Inconsistent; rexmit last pkt */ if (packet->num_rxmit < 2) snoop_rexmt_pkt(cs, packet, IPTOS_LOWDELAY|IPTOS_RELIABILITY|IPTOS_THROUGHPUT); #ifdef LINKEMU /* * emulating link-level scheme -- let * the ack go thru to the sender. */ if (snoop_linkemu_enable) ip_forward(m, 0); else { #endif if (size > 0) { ip_forward(m, 0); } else m_freem(m); #ifdef LINKEMU } #endif return; } else { /* let sender deal with it */ ip_forward(m, 0); } return; } } else { /* a new ack */ timev sndtime; int oldtail = -1; ip_forward(m, 0); cs->fstate &= ~SNOOP_NOACK; /* have seen at least 1 new ack */ /* Free buffers first */ if (cs->pkts[cs->buftail] && cs->pkts[cs->buftail]->num_rxmit) oldtail = cs->buftail; snoop_cleanbufs(cs, ack, &sndtime); idx = cs->buftail; if ((packet = cs->pkts[idx]) == 0) return; if ((cs->fstate & SNOOP_RTTFLAG) && timerisset(&sndtime)) { long tao, err; timev now; microtime(&now); tao = timerdiff(&now, &sndtime); err = tao - cs->srtt; cs->srtt += err/8; #ifdef DEBUG printf("sample %d, srtt %d\n", tao, cs->srtt); #endif if (cs->srtt <= 0) cs->srtt = SNOOP_RTTDEFAULT; } if (oldtail >= 0 && snoop_close_pkt(cs, packet, oldtail)) snoop_rexmt_pkt(cs, packet, IPTOS_LOWDELAY|IPTOS_RELIABILITY|IPTOS_THROUGHPUT); cs->fstate |= SNOOP_RTTFLAG; cs->expected_dacks = 0; cs->expected_next_ack = cs->buftail; cs->last_ack = ack; cs->last_win = win; return; } } int snoop_close_pkt(conn_state_t *cs, packet_t *pkt, int oldtail) { int num_pkts = cs->bufhead - cs->buftail; if (num_pkts == 0) if (cs->fstate & SNOOP_FULL) num_pkts = SNOOP_MAXWIND; else return 0; if (num_pkts < 0) num_pkts = -num_pkts; if (NEXT(oldtail) == cs->buftail && num_pkts>1 && pkt->num_rxmit == 0) return 1; if (num_pkts > 8) /* XXX pretty much random */ if (NEXT(NEXT(oldtail)) == cs->buftail && pkt->num_rxmit == 0) return 1; return 0; } /* * Free one of the snoop buffers. */ inline void snoop_freebuf(packet_t *pkt) { m_freem(pkt->mb); pkt->mb = 0; pkt->seq = pkt->size = pkt->snd_time.tv_sec=pkt->snd_time.tv_usec = 0; } /* * Clear snoop cache of received (and ack'd) segments. */ void snoop_cleanbufs(conn_state_t *cs, tcp_seq ack, timev *sndtime) { int i, prev, later_ack; packet_t *packet = cs->pkts[cs->buftail]; timerclear(sndtime); for (; cs->timeout_pending > 0; cs->timeout_pending--) untimeout(snoop_rexmt_timeout, (void *) cs); if (cs->bufhead == cs->buftail && !(cs->fstate & SNOOP_FULL)) return; prev = PREV(cs->bufhead); i = cs->buftail; do { packet_t *pkt = cs->pkts[i]; if (pkt->mb == 0) { /* already been cleared */ cs->fstate &= ~SNOOP_FULL; snoop_freebuf(pkt); i = NEXT(i); continue; } if (SEQ_GEQ(pkt->seq, ack)) break; if (timerisset(&(pkt->snd_time))) { sndtime->tv_sec = pkt->snd_time.tv_sec; sndtime->tv_usec=pkt->snd_time.tv_usec; } if (SEQ_LEQ(pkt->seq + pkt->size, ack)) { cs->fstate &= ~SNOOP_FULL; snoop_freebuf(pkt); } i = NEXT(i); } while (i != cs->bufhead); if ((i != cs->buftail) || (cs->bufhead != cs->buftail)) { cs->fstate &= ~SNOOP_FULL; cs->buftail = i; } if (((cs->bufhead > cs->buftail) && (cs->bufhead - cs->buftail < SNOOP_HIGH_THRESH)) || ((cs->bufhead <= cs->buftail) && (cs->buftail - cs->bufhead > SNOOP_MAXWIND - SNOOP_HIGH_THRESH))) cs->fstate &= ~SNOOP_HIGHWATER; if (!(cs->bufhead == cs->buftail && !(cs->fstate & SNOOP_FULL))) { ++cs->timeout_pending; timeout(snoop_rexmt_timeout, (void *)cs, SNOOP_TIMO(cs)/tick); } return; } #ifdef notdef /* * Generate a NACK in the form of a Selective ACK, SACK, and send it to the * mobile host (the sender). */ void snoop_sendnack(conn_state_t *cs, tcp_seq seq) { } #endif /* * Adios. Clear snoop state for this connection. This function is usually * called when a packet with FIN set arrives from the *mobile*, or RST set * arrives from either the source or the mobile host. */ void snoop_done(int conn_id) { register conn_state_t *cs = snoopstate->cstate[conn_id]; int i; if (cs->fstate == SNOOP_CLOSED) { #ifdef DEBUG printf("snoop_done: already closed connection %d\n", conn_id); #endif return; } for (; cs->timeout_pending; cs->timeout_pending--) { #ifdef DEBUG printf("untimeout\n"); #endif untimeout(snoop_rexmt_timeout, (void *) cs); } untimeout(snoop_persist_timeout, (void *) cs); snoopstate->num_connections--; cs->dest_addr.s_addr = 0; cs->dest_port = 0; cs->fstate = SNOOP_CLOSED; #define DEBUG #ifdef DEBUG printf("srtt %d\n", cs->srtt); #endif #undef DEBUG for (i = 0; i < SNOOP_MAXWIND; i++) if (cs->pkts[i] && cs->pkts[i]->mb) snoop_freebuf(cs->pkts[i]); cs->bufhead = cs->buftail = 0; #ifdef DEBUG printf("Closed %d\n", conn_id); #endif return; } #ifndef NOMIP /* * Change state of snoop on hint from handoff routine. */ int snoop_mip_change(struct in_addr n_addr, MIP_STATES startflag, MIP_STATES endflag) { int conn_id, i; conn_state_t *cs; struct in_addr addr; short idx; addr.s_addr = ntohl(n_addr.s_addr); if (startflag == MIP_UNENCAP_FWD) for (conn_id = 0; conn_id < SNOOP_MAXCONN; conn_id++) { cs = snoopstate->cstate[conn_id]; if (cs == NULL) continue; if (cs->dest_addr.s_addr == addr.s_addr) { #ifdef DEBUG printf("mipchange: fwd to %d\n", endflag); #endif for (; cs->timeout_pending; cs->timeout_pending--) untimeout(snoop_rexmt_timeout, (void *) cs); untimeout(snoop_persist_timeout, (void *) cs); /* * When moving from fwd'ing to buffering, want to * make sure that no rtt computation occurs when * acks start coming when the bs changes back to * fwd'ing. */ if (cs->bufhead == cs->buftail && !(cs->fstate & SNOOP_FULL)) continue; i = cs->buftail; do { timerclear(&(cs->pkts[i]->snd_time)); i = NEXT(i); } while (i != cs->bufhead); } } if (endflag == MIP_UNENCAP_FWD) for (conn_id = 0; conn_id < SNOOP_MAXCONN; conn_id++) { cs = snoopstate->cstate[conn_id]; if (cs == NULL) continue; if (cs->dest_addr.s_addr == addr.s_addr) { /* * Send latest pkt from each connection to * kick start it. * OUCHHH! Send ah, well, 4 packets to kick * start. This works well over wavelan. * I hate doing things like this. Sigh! */ #ifdef DEBUG printf("mipchange: %d to fwd\n"); #endif idx = PREV(PREV(PREV(PREV(cs->bufhead)))); cs->expected_next_ack = cs->buftail; cs->expected_dacks = 0; do { packet_t *pkt = cs->pkts[idx]; if (cs->pkts[idx] != 0 && SEQ_LT(cs->last_ack, pkt->seq)) { snoop_rexmt_pkt(cs, pkt, IPTOS_LOWDELAY); } idx = NEXT(idx); } while (idx != cs->bufhead); /* * Re-enable timeouts. */ timeout(snoop_persist_timeout, (void *) cs, SNOOP_PERSIST_TIMO/tick); if (cs->bufhead == cs->buftail && !(cs->fstate & SNOOP_FULL)) continue; i = cs->buftail; do { timerclear(&(cs->pkts[i]->snd_time)); i = NEXT(i); } while (i != cs->bufhead); } } if (endflag == MIP_UNUSED) { snoopstate->num_connections--; for (conn_id = 0; conn_id < SNOOP_MAXCONN; conn_id++) { cs = snoopstate->cstate[conn_id]; if (cs == NULL) continue; if (cs->dest_addr.s_addr != addr.s_addr) continue; snoopstate->cstate[conn_id]->dest_port = 0; snoopstate->cstate[conn_id]->dest_addr.s_addr = 0; cs->fstate = SNOOP_CLOSED; for (i = 0; i < SNOOP_MAXWIND; i++) { packet_t *pkt; if (pkt) if (pkt->mb) { m_freem(pkt->mb); pkt->mb = 0; } } cs->bufhead = cs->buftail = 0; #ifdef DEBUG printf("Closed in mipchange%d\n", conn_id); #endif } } } #endif /* NOMIP */ /* * malloc() function for the snoop protocol. */ void *snoop_malloc(int size) { void *m; if ((m = malloc(size, M_SNOOP, M_NOWAIT)) == (void *) 0) { printf("snoop_malloc: out of memory\n"); panic("snoop_malloc"); } return m; } /* * Local retransmissions from the snoop buffer to mobile host. */ void snoop_rexmt_pkt(conn_state_t *cs, packet_t *packet, u_char tos) { struct tcpiphdr *tcpip_hdr; struct tcphdr *tcp_hdr; struct ip* ip; struct mbuf *m; int ticks; #ifdef NOSNOOPREXMT /* if local retransmission has been disabled, just return */ if (snoop_rexmt_disable) return; #endif if (packet->mb != 0) { #ifdef DEBUG printf("rexmt seq %x ", packet->seq); #endif /* Restore flags and other changed fields */ m = m_copym(packet->mb, 0, M_COPYALL, M_DONTWAIT); ip = mtod(m, struct ip *); tcpip_hdr = mtod(m, struct tcpiphdr *); ip->ip_len = packet->iph.len; ip->ip_id = packet->iph.id; ip->ip_ttl = packet->iph.ttl; ip->ip_off = packet->iph.off; /* * Set the low delay flag on in the IP TOS field. This puts * this packet in the fast queue at the ether layer, and is * intended to get this packet out as early as possible to * the receiver so that we get as few dupacks as possible. */ #ifdef SNOOP_IPTOS ip->ip_tos |= tos; #endif tcpip_hdr->ti_t.th_sum = packet->tcp_sum; if (packet->num_rxmit < SNOOP_MAX_RXMIT) { ip_forward(m, 0); ++packet->num_rxmit; cs->fstate &= ~SNOOP_RTTFLAG; microtime(&(packet->snd_time)); } else m_freem(m); } for (; cs->timeout_pending; cs->timeout_pending--) untimeout(snoop_rexmt_timeout, cs); ++cs->timeout_pending; timeout(snoop_rexmt_timeout, cs, SNOOP_TIMO(cs)/tick); return; } void snoop_rexmt_timeout(void *arg) { int s = splnet(); conn_state_t *cs = (conn_state_t *) arg; short idx = cs->buftail; int ticks; #ifdef NOSNOOPREXMT /* * If local retransmission has been disabled, just return. */ if (snoop_rexmt_disable) { splx(s); return; } #endif if (!((cs->bufhead == cs->buftail) && !(cs->fstate & SNOOP_FULL))) { snoop_rexmt_pkt(cs, cs->pkts[idx], IPTOS_LOWDELAY|IPTOS_RELIABILITY); cs->expected_next_ack = NEXT(idx); } #ifdef DEBUG printf("timeout: next timeout in %d\n", SNOOP_TIMO(cs)/tick); #endif splx(s); return; } void snoop_persist_timeout(void *arg) { int s = splnet(); conn_state_t *cs = (conn_state_t *) arg; short idx = cs->buftail; int ticks; static int count = 0; if (cs->fstate & SNOOP_SL_REXMT) count = 0; else { ++count; if (cs->pkts[idx]->mb != 0 && cs->last_ack == cs->pkts[idx]->seq) cs->expected_dacks = 0; cs->expected_next_ack = idx; if (cs->pkts[idx]->num_rxmit >= SNOOP_MAX_RXMIT) cs->pkts[idx]->num_rxmit = SNOOP_MAX_RXMIT - 2; snoop_rexmt_pkt(cs, cs->pkts[idx], IPTOS_LOWDELAY|IPTOS_RELIABILITY); } #ifdef DEBUG else if ((cs->fstate & SNOOP_NOACK) && cs->pkts[idx] != 0) { if (cs->pkts[idx]->num_rxmit >= SNOOP_MAX_RXMIT) cs->pkts[idx]->num_rxmit = SNOOP_MAX_RXMIT - 2; snoop_rexmt_pkt(cs, cs->pkts[idx], IPTOS_LOWDELAY|IPTOS_RELIABILITY); cs->expected_next_ack = NEXT(idx); } else printf("\tno pkt either\n"); #endif ticks = SNOOP_PERSIST_TIMO / tick; /* tick: extern in kernel.h */ cs->fstate &= ~SNOOP_SL_REXMT; if (!(cs->fstate & SNOOP_CLOSED)) if (count < SNOOP_MAXPERSIST) { timeout(snoop_persist_timeout, (void *) cs, ticks); if (!cs->timeout_pending) timeout(snoop_rexmt_timeout, (void *) cs, SNOOP_TIMO(cs)/ tick); } else /* clean up state for this connection */ snoop_done(cs->conn_id); splx(s); } void snoop_garbage_timeout(void *arg) { int s = splnet(); conn_state_t *cs = (conn_state_t *) arg; int ticks; if (cs == NULL || cs->fstate == SNOOP_CLOSED) { splx(s); return; } if (cs->fstate & SNOOP_ALIVE) { cs->fstate &= ~SNOOP_ALIVE; ticks = SNOOP_GARBAGE_TIMO / tick; timeout(snoop_garbage_timeout, (void *) cs, ticks); } else { snoop_done(cs->conn_id); } splx(s); return; }