396 lines
13 KiB
C
396 lines
13 KiB
C
/*-
|
|
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
|
|
*/
|
|
|
|
#include <string.h>
|
|
|
|
#include "tcp.h"
|
|
#include "tcp_fsm.h"
|
|
#include "tcp_seq.h"
|
|
#include "tcp_timer.h"
|
|
#include "tcp_var.h"
|
|
|
|
#include "tcp_const.h"
|
|
#include <openthread/ip6.h>
|
|
#include <openthread/message.h>
|
|
|
|
/*
|
|
* samkumar: The V_nolocaltimewait variable corresponds to the
|
|
* net.inet.tcp.nolocaltimewait option in FreeBSD. When set to 1, it skips the
|
|
* TIME-WAIT state for TCP connections where both endpoints are local IP
|
|
* addresses, to save resources on HTTP accelerators, database servers/clients,
|
|
* etc. In TCPlp, I eliminated support for this feature, but I have kept the
|
|
* code for it, commented out with "#if 0", in case we choose to bring it back
|
|
* at a later time.
|
|
*
|
|
* See also the "#if 0" block in tcp_twstart.
|
|
*/
|
|
#if 0
|
|
enum tcp_timewait_consts {
|
|
V_nolocaltimewait = 0
|
|
};
|
|
#endif
|
|
|
|
/*
|
|
* samkumar: The FreeBSD code used a separate, smaller structure, called
|
|
* struct tcptw, to respresent connections in the TIME-WAIT state. In TCPlp,
|
|
* we use the full struct tcpcb structure even in the TIME-WAIT state. This
|
|
* consumes more memory, but switching to a different structure like
|
|
* struct tcptw to save memory would be difficult because the host system or
|
|
* application has allocated these structures; we can't simply "free" the
|
|
* struct tcpcb. It would have to have been done via a callback or something,
|
|
* and in the common case of statically allocated sockets, this would actually
|
|
* result in more memory (since an application would need to allocate both the
|
|
* struct tcpcb and the struct tcptw, if it uses a static allocation approach).
|
|
*
|
|
* Below, I've changed the function signatures to accept "struct tcpcb* tp"
|
|
* instead of "struct tcptw *tw" and I have reimplemented the functions
|
|
* to work using tp (of type struct tcpcb) instead of tw (of type
|
|
* struct tcptw).
|
|
*
|
|
* Conceptually, the biggest change is in how timers are handled. The FreeBSD
|
|
* code had a 2MSL timer, which was set for sockets that enter certain
|
|
* "closing" states of the TCP state machine. But when the TIME-WAIT state was
|
|
* entered, the state is transferred from struct tcpcb into struct tcptw.
|
|
* The final timeout is handled as follows; the function tcp_tw_2msl_scan is
|
|
* called periodically on the slow timer, and it iterates over a linked list
|
|
* of all the struct tcptw and checks the tw->tw_time field to identify which
|
|
* TIME-WAIT sockets have expired.
|
|
*
|
|
* In our switch to using struct tcpcb even in the TIME-WAIT state, we rely on
|
|
* the timer system for struct tcpcb. I modified the 2msl callback in
|
|
* tcp_timer.c to check for the TIME-WAIT case and handle it correctly.
|
|
*/
|
|
|
|
static void
|
|
tcp_tw_2msl_reset(struct tcpcb* tp, int rearm)
|
|
{
|
|
/*
|
|
* samkumar: This function used to set tw->tw_time to ticks + 2 * tcp_msl
|
|
* and insert tw into the linked list V_twq_2msl. I've replaced this, along
|
|
* with the associated locking logic, with the following call, which uses
|
|
* the timer system in place for full TCBs.
|
|
*/
|
|
tcp_timer_activate(tp, TT_2MSL, 2 * tcp_msl);
|
|
}
|
|
|
|
/*
|
|
* samkumar: I've rewritten this code since I need to send out packets via the
|
|
* host system for TCPlp: allocating buffers from the host system, populate
|
|
* them, and then pass them back to the host system. I simplified the code by
|
|
* only using the logic that was fully necessary, eliminating the code for IPv4
|
|
* packets and keeping only the code for IPv6 packets. I also removed all of
|
|
* the mbuf logic, instead using the logic for using the host system's
|
|
* buffering.
|
|
*
|
|
* This rewritten code always returns 0. The original code would return
|
|
* whatever is returned by ip_output or ip6_output (FreeBSD's functions for
|
|
* sending out IP packets). I believe 0 indicates success, and a nonzero
|
|
* value represents an error code. It seems that the return value of
|
|
* tcp_twrespond is ignored by all instances of its use in TCPlp (maybe even
|
|
* in all of FreeBSD), so this is a moot point.
|
|
*/
|
|
static int
|
|
tcp_twrespond(struct tcpcb* tp, int flags)
|
|
{
|
|
struct tcphdr* nth;
|
|
struct tcpopt to;
|
|
uint32_t optlen = 0;
|
|
uint8_t opt[TCP_MAXOLEN];
|
|
|
|
to.to_flags = 0;
|
|
|
|
/*
|
|
* Send a timestamp and echo-reply if both our side and our peer
|
|
* have sent timestamps in our SYN's and this is not a RST.
|
|
*/
|
|
if ((tp->t_flags & TF_RCVD_TSTMP) && flags == TH_ACK) {
|
|
to.to_flags |= TOF_TS;
|
|
to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
|
|
to.to_tsecr = tp->ts_recent;
|
|
}
|
|
optlen = tcp_addoptions(&to, opt);
|
|
|
|
otMessage* message = tcplp_sys_new_message(tp->instance);
|
|
if (message == NULL) {
|
|
return 0; // drop the message
|
|
}
|
|
if (otMessageSetLength(message, sizeof(struct tcphdr) + optlen) != OT_ERROR_NONE) {
|
|
tcplp_sys_free_message(tp->instance, message);
|
|
return 0; // drop the message
|
|
}
|
|
|
|
char outbuf[sizeof(struct tcphdr) + optlen];
|
|
nth = (struct tcphdr*) &outbuf[0];
|
|
otMessageInfo ip6info;
|
|
memset(&ip6info, 0x00, sizeof(ip6info));
|
|
|
|
memcpy(&ip6info.mSockAddr, &tp->laddr, sizeof(ip6info.mSockAddr));
|
|
memcpy(&ip6info.mPeerAddr, &tp->faddr, sizeof(ip6info.mPeerAddr));
|
|
nth->th_sport = tp->lport;
|
|
nth->th_dport = tp->fport;
|
|
nth->th_seq = htonl(tp->snd_nxt);
|
|
nth->th_ack = htonl(tp->rcv_nxt);
|
|
nth->th_off_x2 = ((sizeof(struct tcphdr) + optlen) >> 2) << TH_OFF_SHIFT;
|
|
nth->th_flags = flags;
|
|
nth->th_win = htons(tp->tw_last_win);
|
|
nth->th_urp = 0;
|
|
nth->th_sum = 0;
|
|
|
|
memcpy(nth + 1, opt, optlen);
|
|
otMessageWrite(message, 0, outbuf, sizeof(struct tcphdr) + optlen);
|
|
tcplp_sys_send_message(tp->instance, message, &ip6info);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Move a TCP connection into TIME_WAIT state.
|
|
* tcbinfo is locked.
|
|
* inp is locked, and is unlocked before returning.
|
|
*/
|
|
/*
|
|
* samkumar: Locking is removed (so above comments regarding locks are no
|
|
* not relevant for TCPlp). Rather than allocating a struct tcptw and
|
|
* discarding the struct tcpcb, this function just switches the tcpcb state
|
|
* to correspond to TIME-WAIT (updating variables as appropriate). We also
|
|
* eliminate the "V_nolocaltimewait" optimization.
|
|
*/
|
|
void
|
|
tcp_twstart(struct tcpcb *tp)
|
|
{
|
|
int acknow;
|
|
|
|
/*
|
|
* samkumar: The following code, commented out using "#if 0", handles the
|
|
* net.inet.tcp.nolocaltimewait option in FreeBSD. The option skips the
|
|
* TIME-WAIT state for TCP connections where both endpoints are local.
|
|
* I'm removing this optimization for TCPlp, but I've left the code
|
|
* commented out as it's a potentially useful feature that we may choose
|
|
* to restore later.
|
|
*
|
|
* See also the "#if 0" block near the top of this file.
|
|
*/
|
|
#if 0
|
|
if (V_nolocaltimewait) {
|
|
int error = 0;
|
|
#ifdef INET6
|
|
if (isipv6)
|
|
error = in6_localaddr(&inp->in6p_faddr);
|
|
#endif
|
|
#if defined(INET6) && defined(INET)
|
|
else
|
|
#endif
|
|
#ifdef INET
|
|
error = in_localip(inp->inp_faddr);
|
|
#endif
|
|
if (error) {
|
|
tp = tcp_close(tp);
|
|
if (tp != NULL)
|
|
INP_WUNLOCK(inp);
|
|
return;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* For use only by DTrace. We do not reference the state
|
|
* after this point so modifying it in place is not a problem.
|
|
*/
|
|
/*
|
|
* samkumar: The above comment is not true anymore. I use this state, since
|
|
* I don't associate every struct tcpcb with a struct inpcb.
|
|
*/
|
|
tcp_state_change(tp, TCPS_TIME_WAIT);
|
|
|
|
/*
|
|
* samkumar: There used to be code here to allocate a struct tcptw
|
|
* using "tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);" and if it fails, close
|
|
* an existing TIME-WAIT connection, in LRU fashion, to allocate memory.
|
|
*/
|
|
|
|
/*
|
|
* Recover last window size sent.
|
|
*/
|
|
if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
|
|
tp->tw_last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
|
|
else
|
|
tp->tw_last_win = 0;
|
|
|
|
/*
|
|
* Set t_recent if timestamps are used on the connection.
|
|
*/
|
|
if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
|
|
(TF_REQ_TSTMP|TF_RCVD_TSTMP)) {
|
|
/*
|
|
* samkumar: This used to do:
|
|
* tw->t_recent = tp->ts_recent;
|
|
* tw->ts_offset = tp->ts_offset;
|
|
* But since we're keeping the state in tp, we don't need to do this
|
|
* anymore. */
|
|
} else {
|
|
tp->ts_recent = 0;
|
|
tp->ts_offset = 0;
|
|
}
|
|
|
|
/*
|
|
* samkumar: There used to be code here to populate various fields in
|
|
* tw based on their values in tp, but there's no need for that now since
|
|
* we can just read the values from tp. tw->tw_time was set to 0, but we
|
|
* don't need to do that either since we're relying on the old timer system
|
|
* anyway.
|
|
*/
|
|
|
|
/* XXX
|
|
* If this code will
|
|
* be used for fin-wait-2 state also, then we may need
|
|
* a ts_recent from the last segment.
|
|
*/
|
|
acknow = tp->t_flags & TF_ACKNOW;
|
|
|
|
/*
|
|
* First, discard tcpcb state, which includes stopping its timers and
|
|
* freeing it. tcp_discardcb() used to also release the inpcb, but
|
|
* that work is now done in the caller.
|
|
*
|
|
* Note: soisdisconnected() call used to be made in tcp_discardcb(),
|
|
* and might not be needed here any longer.
|
|
*/
|
|
/*
|
|
* samkumar: Below, I removed the code to discard tp, update inpcb and
|
|
* release a reference to socket, but kept the rest. I also added a call
|
|
* to cancel any pending timers on the TCB (which discarding it, as the
|
|
* original code did, would have done).
|
|
*/
|
|
tcp_cancel_timers(tp);
|
|
if (acknow)
|
|
tcp_twrespond(tp, TH_ACK);
|
|
tcp_tw_2msl_reset(tp, 0);
|
|
}
|
|
|
|
/*
|
|
* Returns 1 if the TIME_WAIT state was killed and we should start over,
|
|
* looking for a pcb in the listen state. Returns 0 otherwise.
|
|
*/
|
|
/*
|
|
* samkumar: Old signature was
|
|
* int
|
|
* tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
|
|
* struct mbuf *m, int tlen)
|
|
*/
|
|
int
|
|
tcp_twcheck(struct tcpcb* tp, struct tcphdr *th, int tlen)
|
|
{
|
|
int thflags;
|
|
tcp_seq seq;
|
|
|
|
/*
|
|
* samkumar: There used to be code here that obtains the struct tcptw from
|
|
* the inpcb, and does "goto drop" if that fails.
|
|
*/
|
|
|
|
thflags = th->th_flags;
|
|
|
|
/*
|
|
* NOTE: for FIN_WAIT_2 (to be added later),
|
|
* must validate sequence number before accepting RST
|
|
*/
|
|
|
|
/*
|
|
* If the segment contains RST:
|
|
* Drop the segment - see Stevens, vol. 2, p. 964 and
|
|
* RFC 1337.
|
|
*/
|
|
if (thflags & TH_RST)
|
|
goto drop;
|
|
|
|
/*
|
|
* samkumar: This was commented out (using #if 0) in the original FreeBSD
|
|
* code.
|
|
*/
|
|
#if 0
|
|
/* PAWS not needed at the moment */
|
|
/*
|
|
* RFC 1323 PAWS: If we have a timestamp reply on this segment
|
|
* and it's less than ts_recent, drop it.
|
|
*/
|
|
if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
|
|
TSTMP_LT(to.to_tsval, tp->ts_recent)) {
|
|
if ((thflags & TH_ACK) == 0)
|
|
goto drop;
|
|
goto ack;
|
|
}
|
|
/*
|
|
* ts_recent is never updated because we never accept new segments.
|
|
*/
|
|
#endif
|
|
|
|
/*
|
|
* If a new connection request is received
|
|
* while in TIME_WAIT, drop the old connection
|
|
* and start over if the sequence numbers
|
|
* are above the previous ones.
|
|
*/
|
|
if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tp->rcv_nxt)) {
|
|
/*
|
|
* samkumar: The FreeBSD code would call tcp_twclose(tw, 0); but we
|
|
* do it as below since TCPlp represents TIME-WAIT connects as
|
|
* struct tcpcb's.
|
|
*/
|
|
tcp_close(tp);
|
|
tcplp_sys_connection_lost(tp, CONN_LOST_NORMAL);
|
|
return (1);
|
|
}
|
|
|
|
/*
|
|
* Drop the segment if it does not contain an ACK.
|
|
*/
|
|
if ((thflags & TH_ACK) == 0)
|
|
goto drop;
|
|
|
|
/*
|
|
* Reset the 2MSL timer if this is a duplicate FIN.
|
|
*/
|
|
if (thflags & TH_FIN) {
|
|
seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
|
|
if (seq + 1 == tp->rcv_nxt)
|
|
tcp_tw_2msl_reset(tp, 1);
|
|
}
|
|
|
|
/*
|
|
* Acknowledge the segment if it has data or is not a duplicate ACK.
|
|
*/
|
|
if (thflags != TH_ACK || tlen != 0 ||
|
|
th->th_seq != tp->rcv_nxt || th->th_ack != tp->snd_nxt)
|
|
tcp_twrespond(tp, TH_ACK);
|
|
drop:
|
|
return (0);
|
|
}
|