tcp_v4_connect

/* This will initiate an outgoing connection.
tcp_v4_connect函数初始化一个对外的连接请求,创建一个SYN包并发送出去,
把套接字的状态从CLOSE切换到SYN_SENT,初始化TCP部分选项数据包序列号、
窗口大小、MSS、套接字传送超时等*/
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__be16 orig_sport, orig_dport;
__be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
struct ip_options_rcu *inet_opt; if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL; if (usin->sin_family != AF_INET)
return -EAFNOSUPPORT;
//是否设置源路由选项 nexthop = daddr = usin->sin_addr.s_addr;
inet_opt = rcu_dereference_protected(inet->inet_opt,
sock_owned_by_user(sk));
if (inet_opt && inet_opt->opt.srr) {
if (!daddr)
return -EINVAL;
nexthop = inet_opt->opt.faddr;
}
/*
根据目的ip、目的端口、网络设备接口调用ip_route_connect选路由,
路由结构保存到rt->rt_dst中,实际调用的函数是ip_route_output_flow,
如果是广播地址、组地址就返回 */
orig_sport = inet->inet_sport;
orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
orig_sport, orig_dport, sk, true);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return err;
} if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
} if (!inet_opt || !inet_opt->opt.srr)
daddr = fl4->daddr; if (!inet->inet_saddr)
inet->inet_saddr = fl4->saddr;
inet->inet_rcv_saddr = inet->inet_saddr; if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
if (likely(!tp->repair))
tp->write_seq = 0;
}
////获取套接字最近使用的时间 if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
tcp_fetch_timewait_stamp(sk, &rt->dst); inet->inet_dport = usin->sin_port;
inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; /* Socket identity is still unknown (sport may be zero).
* However we set state to SYN-SENT and not releasing socket
* lock select source port, enter ourselves into the hash tables and
* complete initialization after this.
调用tcp_set_state设置套接字状态为TCP_SYN_SENT,本把套接字sk加入到连接管理哈希链表中,
为连接分配一个临时端口
*/
tcp_set_state(sk, TCP_SYN_SENT);
//将套接字sk放入TCP连接管理哈希链表中 同时 Bind a port
//绑定IP地址和端口,并将socket加入到连接表中
err = inet_hash_connect(&tcp_death_row, sk);
if (err)
goto failure; rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
goto failure;
}
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->dst); if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
inet->inet_daddr,
inet->inet_sport,
usin->sin_port); inet->inet_id = tp->write_seq ^ jiffies;
/*
初始化第一个序列号,调用tcp_connect函数完成建立连接,
包括发送SYN,tcp_connect将创建号的SYN数据段加入到套接字发送队列,
最后调用tcp_transmit_skb数据包发送到IP层。 */
if (likely(!tp->repair))
err = tcp_connect(sk);
else
err = tcp_repair_connect(sk); rt = NULL;
if (err)
goto failure; return 0; failure:
/*
* This unhashes the socket and releases the local port,
* if necessary.
*/
tcp_set_state(sk, TCP_CLOSE);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;
return err;
} /*
* Bind a port for a connect operation and hash it.
*/
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
__inet_check_established, __inet_hash_nolisten);
} int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16, struct inet_timewait_sock **),
int (*hash)(struct sock *sk, struct inet_timewait_sock *twp))
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
const unsigned short snum = inet_sk(sk)->inet_num;
struct inet_bind_hashbucket *head;
struct inet_bind_bucket *tb;
int ret;
struct net *net = sock_net(sk);
int twrefcnt = 1; if (!snum) {//端口未绑定
int i, remaining, low, high, port;
static u32 hint;
u32 offset = hint + port_offset;
struct hlist_node *node;
struct inet_timewait_sock *tw = NULL; inet_get_local_port_range(&low, &high);
remaining = (high - low) + 1; local_bh_disable();
for (i = 1; i <= remaining; i++) {
port = low + (i + offset) % remaining;
if (inet_is_reserved_local_port(port))
continue;
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock(&head->lock); /* Does not bother with rcv_saddr checks,
* because the established check is already
* unique enough.
//绑定到一个port的socket可能是通过bind 系统调用,也可能是调用connect系统调用时__inet_hash_connect函数选取的
*/
inet_bind_bucket_for_each(tb, node, &head->chain) {
if (net_eq(ib_net(tb), net) &&
tb->port == port) {
if (tb->fastreuse >= 0)
goto next_port;
WARN_ON(hlist_empty(&tb->owners));
if (!check_established(death_row, sk,
port, &tw))
goto ok;
goto next_port;
}
}
//当前端口没有被使用
tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
net, head, port);
if (!tb) {
spin_unlock(&head->lock);
break;
}
tb->fastreuse = -1;
goto ok; next_port:
spin_unlock(&head->lock);
}
local_bh_enable(); return -EADDRNOTAVAIL; ok:
hint += i; /* Head lock still held and bh's disabled
//将socket加入port对应的tb的socket队列中,即将此socket与port相关联
*/
inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) { //如果socket没有被加入到“已建立连接”的连接表中
inet_sk(sk)->inet_sport = htons(port);
twrefcnt += hash(sk, tw);//将socket加入到“已建立连接”的连接表中
}
if (tw)
twrefcnt += inet_twsk_bind_unhash(tw, hinfo);
spin_unlock(&head->lock); if (tw) {
inet_twsk_deschedule(tw, death_row);
while (twrefcnt) {
twrefcnt--;
inet_twsk_put(tw);
}
} ret = 0;
goto out;
} head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
tb = inet_csk(sk)->icsk_bind_hash;//将tb加入到bind hash表中
spin_lock_bh(&head->lock);
//条件为false时,会执行else分支,检查是否可用。这么看来,调用bind()成功并不意味着这个端口就真的可以用
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {//有且仅有一个socket绑定到这个端口,无需冲突检查
hash(sk, NULL);//将socket加入到“已建立连接”的连接表中
spin_unlock_bh(&head->lock);
return 0;
} else {
spin_unlock(&head->lock);
/* No definite answer... Walk to established hash table */
ret = check_established(death_row, sk, snum, NULL);
out:
local_bh_enable();
return ret;
}
}

创建一个套接字,设置SO_REUSEADDR选项,建立连接后立即关闭,关闭后立即又重复同样的过程,发现在第二次调用connect()的时候返回EADDRNOTAVAIL错误
可以看到返回EADDRNOTVAIL错误的有两种情况:
   1、在TIME_WAIT传输控制块中找到匹配的端口,并且twsk_unique()返回true时
   2、在除TIME_WAIT和LISTEN状态外的传输块中存在匹配的端口。
  第二种情况很好容易理解了,只要状态在FIN_WAIT_1、ESTABLISHED等的传输控制块使用的端口和要查找的匹配,就会返回EADDRNOTVAIL错误。
第一种情况还要取决于twsk_uniqueue()的返回值

__inet_hash_connect的主要功能与bind系统调用中的inet_csk_get_port类似,都是:
1、如果没有选取端口则选定一个;

2、将socket与端口绑定;

3、将scoket加入到连接表中(这个功能inet_csk_get_port没有)。

  另外一点不同是:inet_csk_get_port进行冲突检查时关注的是绑定冲突
而__inet_hash_connect检查的是当前socket是否与“已建立连接的socket”的冲突。
__inet_hash_connect检查冲突的函数是__inet_check_established:

/* called with local bh disabled */
static int __inet_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk, __u16 lport,
struct inet_timewait_sock **twp)
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_sock *inet = inet_sk(sk);
__be32 daddr = inet->inet_rcv_saddr;
__be32 saddr = inet->inet_daddr;
int dif = sk->sk_bound_dev_if;
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
struct net *net = sock_net(sk);
unsigned int hash = inet_ehashfn(net, daddr, lport,
saddr, inet->inet_dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);//找到连接表中的表项
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw;
int twrefcnt = 0; spin_lock(lock); /* Check TIME-WAIT sockets first.
先检查TIME_WAIT表,然后再检查establish表,与这两个表中的任意一个冲突都是不允许的
*/
sk_nulls_for_each(sk2, node, &head->twchain) {
tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, net, hash, acookie,
saddr, daddr, ports, dif)) {
if (twsk_unique(sk, sk2, twp))
goto unique;
else
goto not_unique;
}
}
tw = NULL; /* And established part... */
sk_nulls_for_each(sk2, node, &head->chain) {
if (INET_MATCH(sk2, net, hash, acookie,
saddr, daddr, ports, dif))
goto not_unique;
} unique:
/* Must record num and sport now. Otherwise we will see
* in hash table socket with a funny identity. */
inet->inet_num = lport;
inet->inet_sport = htons(lport);
sk->sk_hash = hash;
WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
twrefcnt = inet_twsk_unhash(tw);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
if (twrefcnt)
inet_twsk_put(tw);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) {
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw, death_row); inet_twsk_put(tw);
}
return 0; not_unique:
spin_unlock(lock);
return -EADDRNOTAVAIL;
}
 在listen系统调用中,inet_hash函数会将socket加入到listen连接表中: static void __inet_hash(struct sock *sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb; if (sk->sk_state != TCP_LISTEN) {
__inet_hash_nolisten(sk, NULL);
return;
} WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; spin_lock(&ilb->lock);
__sk_nulls_add_node_rcu(sk, &ilb->head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
spin_unlock(&ilb->lock);
} int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list;
spinlock_t *lock;
struct inet_ehash_bucket *head;
int twrefcnt = 0; WARN_ON(!sk_unhashed(sk)); sk->sk_hash = inet_sk_ehashfn(sk);
head = inet_ehash_bucket(hashinfo, sk->sk_hash);
list = &head->chain;
lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock(lock);
__sk_nulls_add_node_rcu(sk, list);
if (tw) {
WARN_ON(sk->sk_hash != tw->tw_hash);
twrefcnt = inet_twsk_unhash(tw);
}
spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
return twrefcnt;
} static inline struct inet_ehash_bucket *inet_ehash_bucket(
struct inet_hashinfo *hashinfo,
unsigned int hash)
{
return &hashinfo->ehash[hash & hashinfo->ehash_mask];
}/*
可见server端的socket在进行listen系统调用后被加入到sk->sk_prot->h.hashinfo->listening_hash中,
client端的socket在进行connect系统调用后被加入到sk->sk_prot->h.hashinfo->ehash中,
而对于TCPv4和TCPv6,sk->sk_prot->h.hashinfo指向的都是tcp_hashinfo。*/

最新文章

  1. 快速将一个表的数据生成SQL插入语句
  2. 表单 - Form - 无刷新提交原理
  3. Java eclipse下 Ant build.xml实例详解
  4. Javascript 多浏览器兼容性问题及解决方案
  5. package XXX.i386.rpm is not installed(检查在Linux上安装Oracle所需的pkg时)
  6. 【LEETCODE OJ】Clone Graph
  7. Easy Climb
  8. Java如何等待子线程执行结束
  9. android——api
  10. Web项目中用模板Jsp页面引入所有静态样式脚本文件(js,css等)
  11. &lt;转&gt;maven发布第三方jar的一些问题
  12. 数据库比对脚本(PHP版)
  13. ResourceBundle读取文件学习
  14. 深入JS原型与原型链
  15. Python的开发之路
  16. MapReduce论文学习
  17. hadoop学习笔记肆--元数据管理机制
  18. bouncing-balls-evil-circle
  19. windows 8 update to windows 8.1
  20. The Doors(几何+最短路,好题)

热门文章

  1. MeteoInfoLab脚本示例:格点数据散点图
  2. centos8用firewalld搭建防火墙
  3. Model实体类
  4. Spring笔记(4) - Spring的编程式事务和声明式事务详解
  5. asp.net core的授权过滤器中获取action上的Attribute
  6. 理解 Android Binder 机制(一):驱动篇
  7. IDEA上运行Flink任务
  8. 【论文阅读】DGCNN:Dynamic Graph CNN for Learning on Point Clouds
  9. kubelet拉取pause镜像报错pull access denied for 172.20.59.190:81/kubernetes/pause-amd64, repository does not exist or may require &#39;docker login&#39;: denied
  10. C#+Arduino Uno 实现声控系统完全实施手册