/*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 */ #include #include "tcp.h" #include "tcp_fsm.h" #include "tcp_seq.h" #include "tcp_timer.h" #include "tcp_var.h" #include "tcp_const.h" #include #include /* * samkumar: The V_nolocaltimewait variable corresponds to the * net.inet.tcp.nolocaltimewait option in FreeBSD. When set to 1, it skips the * TIME-WAIT state for TCP connections where both endpoints are local IP * addresses, to save resources on HTTP accelerators, database servers/clients, * etc. In TCPlp, I eliminated support for this feature, but I have kept the * code for it, commented out with "#if 0", in case we choose to bring it back * at a later time. * * See also the "#if 0" block in tcp_twstart. */ #if 0 enum tcp_timewait_consts { V_nolocaltimewait = 0 }; #endif /* * samkumar: The FreeBSD code used a separate, smaller structure, called * struct tcptw, to respresent connections in the TIME-WAIT state. In TCPlp, * we use the full struct tcpcb structure even in the TIME-WAIT state. This * consumes more memory, but switching to a different structure like * struct tcptw to save memory would be difficult because the host system or * application has allocated these structures; we can't simply "free" the * struct tcpcb. It would have to have been done via a callback or something, * and in the common case of statically allocated sockets, this would actually * result in more memory (since an application would need to allocate both the * struct tcpcb and the struct tcptw, if it uses a static allocation approach). * * Below, I've changed the function signatures to accept "struct tcpcb* tp" * instead of "struct tcptw *tw" and I have reimplemented the functions * to work using tp (of type struct tcpcb) instead of tw (of type * struct tcptw). * * Conceptually, the biggest change is in how timers are handled. The FreeBSD * code had a 2MSL timer, which was set for sockets that enter certain * "closing" states of the TCP state machine. But when the TIME-WAIT state was * entered, the state is transferred from struct tcpcb into struct tcptw. * The final timeout is handled as follows; the function tcp_tw_2msl_scan is * called periodically on the slow timer, and it iterates over a linked list * of all the struct tcptw and checks the tw->tw_time field to identify which * TIME-WAIT sockets have expired. * * In our switch to using struct tcpcb even in the TIME-WAIT state, we rely on * the timer system for struct tcpcb. I modified the 2msl callback in * tcp_timer.c to check for the TIME-WAIT case and handle it correctly. */ static void tcp_tw_2msl_reset(struct tcpcb* tp, int rearm) { /* * samkumar: This function used to set tw->tw_time to ticks + 2 * tcp_msl * and insert tw into the linked list V_twq_2msl. I've replaced this, along * with the associated locking logic, with the following call, which uses * the timer system in place for full TCBs. */ tcp_timer_activate(tp, TT_2MSL, 2 * tcp_msl); } /* * samkumar: I've rewritten this code since I need to send out packets via the * host system for TCPlp: allocating buffers from the host system, populate * them, and then pass them back to the host system. I simplified the code by * only using the logic that was fully necessary, eliminating the code for IPv4 * packets and keeping only the code for IPv6 packets. I also removed all of * the mbuf logic, instead using the logic for using the host system's * buffering. * * This rewritten code always returns 0. The original code would return * whatever is returned by ip_output or ip6_output (FreeBSD's functions for * sending out IP packets). I believe 0 indicates success, and a nonzero * value represents an error code. It seems that the return value of * tcp_twrespond is ignored by all instances of its use in TCPlp (maybe even * in all of FreeBSD), so this is a moot point. */ static int tcp_twrespond(struct tcpcb* tp, int flags) { struct tcphdr* nth; struct tcpopt to; uint32_t optlen = 0; uint8_t opt[TCP_MAXOLEN]; to.to_flags = 0; /* * Send a timestamp and echo-reply if both our side and our peer * have sent timestamps in our SYN's and this is not a RST. */ if ((tp->t_flags & TF_RCVD_TSTMP) && flags == TH_ACK) { to.to_flags |= TOF_TS; to.to_tsval = tcp_ts_getticks() + tp->ts_offset; to.to_tsecr = tp->ts_recent; } optlen = tcp_addoptions(&to, opt); otMessage* message = tcplp_sys_new_message(tp->instance); if (message == NULL) { return 0; // drop the message } if (otMessageSetLength(message, sizeof(struct tcphdr) + optlen) != OT_ERROR_NONE) { tcplp_sys_free_message(tp->instance, message); return 0; // drop the message } char outbuf[sizeof(struct tcphdr) + optlen]; nth = (struct tcphdr*) &outbuf[0]; otMessageInfo ip6info; memset(&ip6info, 0x00, sizeof(ip6info)); memcpy(&ip6info.mSockAddr, &tp->laddr, sizeof(ip6info.mSockAddr)); memcpy(&ip6info.mPeerAddr, &tp->faddr, sizeof(ip6info.mPeerAddr)); nth->th_sport = tp->lport; nth->th_dport = tp->fport; nth->th_seq = htonl(tp->snd_nxt); nth->th_ack = htonl(tp->rcv_nxt); nth->th_off_x2 = ((sizeof(struct tcphdr) + optlen) >> 2) << TH_OFF_SHIFT; nth->th_flags = flags; nth->th_win = htons(tp->tw_last_win); nth->th_urp = 0; nth->th_sum = 0; memcpy(nth + 1, opt, optlen); otMessageWrite(message, 0, outbuf, sizeof(struct tcphdr) + optlen); tcplp_sys_send_message(tp->instance, message, &ip6info); return 0; } /* * Move a TCP connection into TIME_WAIT state. * tcbinfo is locked. * inp is locked, and is unlocked before returning. */ /* * samkumar: Locking is removed (so above comments regarding locks are no * not relevant for TCPlp). Rather than allocating a struct tcptw and * discarding the struct tcpcb, this function just switches the tcpcb state * to correspond to TIME-WAIT (updating variables as appropriate). We also * eliminate the "V_nolocaltimewait" optimization. */ void tcp_twstart(struct tcpcb *tp) { int acknow; /* * samkumar: The following code, commented out using "#if 0", handles the * net.inet.tcp.nolocaltimewait option in FreeBSD. The option skips the * TIME-WAIT state for TCP connections where both endpoints are local. * I'm removing this optimization for TCPlp, but I've left the code * commented out as it's a potentially useful feature that we may choose * to restore later. * * See also the "#if 0" block near the top of this file. */ #if 0 if (V_nolocaltimewait) { int error = 0; #ifdef INET6 if (isipv6) error = in6_localaddr(&inp->in6p_faddr); #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET error = in_localip(inp->inp_faddr); #endif if (error) { tp = tcp_close(tp); if (tp != NULL) INP_WUNLOCK(inp); return; } } #endif /* * For use only by DTrace. We do not reference the state * after this point so modifying it in place is not a problem. */ /* * samkumar: The above comment is not true anymore. I use this state, since * I don't associate every struct tcpcb with a struct inpcb. */ tcp_state_change(tp, TCPS_TIME_WAIT); /* * samkumar: There used to be code here to allocate a struct tcptw * using "tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);" and if it fails, close * an existing TIME-WAIT connection, in LRU fashion, to allocate memory. */ /* * Recover last window size sent. */ if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) tp->tw_last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale; else tp->tw_last_win = 0; /* * Set t_recent if timestamps are used on the connection. */ if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == (TF_REQ_TSTMP|TF_RCVD_TSTMP)) { /* * samkumar: This used to do: * tw->t_recent = tp->ts_recent; * tw->ts_offset = tp->ts_offset; * But since we're keeping the state in tp, we don't need to do this * anymore. */ } else { tp->ts_recent = 0; tp->ts_offset = 0; } /* * samkumar: There used to be code here to populate various fields in * tw based on their values in tp, but there's no need for that now since * we can just read the values from tp. tw->tw_time was set to 0, but we * don't need to do that either since we're relying on the old timer system * anyway. */ /* XXX * If this code will * be used for fin-wait-2 state also, then we may need * a ts_recent from the last segment. */ acknow = tp->t_flags & TF_ACKNOW; /* * First, discard tcpcb state, which includes stopping its timers and * freeing it. tcp_discardcb() used to also release the inpcb, but * that work is now done in the caller. * * Note: soisdisconnected() call used to be made in tcp_discardcb(), * and might not be needed here any longer. */ /* * samkumar: Below, I removed the code to discard tp, update inpcb and * release a reference to socket, but kept the rest. I also added a call * to cancel any pending timers on the TCB (which discarding it, as the * original code did, would have done). */ tcp_cancel_timers(tp); if (acknow) tcp_twrespond(tp, TH_ACK); tcp_tw_2msl_reset(tp, 0); } /* * Returns 1 if the TIME_WAIT state was killed and we should start over, * looking for a pcb in the listen state. Returns 0 otherwise. */ /* * samkumar: Old signature was * int * tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th, * struct mbuf *m, int tlen) */ int tcp_twcheck(struct tcpcb* tp, struct tcphdr *th, int tlen) { int thflags; tcp_seq seq; /* * samkumar: There used to be code here that obtains the struct tcptw from * the inpcb, and does "goto drop" if that fails. */ thflags = th->th_flags; /* * NOTE: for FIN_WAIT_2 (to be added later), * must validate sequence number before accepting RST */ /* * If the segment contains RST: * Drop the segment - see Stevens, vol. 2, p. 964 and * RFC 1337. */ if (thflags & TH_RST) goto drop; /* * samkumar: This was commented out (using #if 0) in the original FreeBSD * code. */ #if 0 /* PAWS not needed at the moment */ /* * RFC 1323 PAWS: If we have a timestamp reply on this segment * and it's less than ts_recent, drop it. */ if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to.to_tsval, tp->ts_recent)) { if ((thflags & TH_ACK) == 0) goto drop; goto ack; } /* * ts_recent is never updated because we never accept new segments. */ #endif /* * If a new connection request is received * while in TIME_WAIT, drop the old connection * and start over if the sequence numbers * are above the previous ones. */ if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tp->rcv_nxt)) { /* * samkumar: The FreeBSD code would call tcp_twclose(tw, 0); but we * do it as below since TCPlp represents TIME-WAIT connects as * struct tcpcb's. */ tcp_close(tp); tcplp_sys_connection_lost(tp, CONN_LOST_NORMAL); return (1); } /* * Drop the segment if it does not contain an ACK. */ if ((thflags & TH_ACK) == 0) goto drop; /* * Reset the 2MSL timer if this is a duplicate FIN. */ if (thflags & TH_FIN) { seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0); if (seq + 1 == tp->rcv_nxt) tcp_tw_2msl_reset(tp, 1); } /* * Acknowledge the segment if it has data or is not a duplicate ACK. */ if (thflags != TH_ACK || tlen != 0 || th->th_seq != tp->rcv_nxt || th->th_ack != tp->snd_nxt) tcp_twrespond(tp, TH_ACK); drop: return (0); }