(1) 在inet_init中注册了类型为ETH_P_IP协议的数据包的回调ip_rcv
(2) 当二层数据包接收完毕,会调用netif_receive_skb根据协议进行向上层分发
(3) 类型为ETH_P_IP类型的数据包,被传递到三层,调用ip_rcv函数
(4) ip_rcv完成基本的校验和处理工作后,经过PRE_ROUTING钩子点
(5) 经过PRE_ROUTING钩子点之后,调用ip_rcv_finish完成数据包接收,包括选项处理,路由查询,并且根据路由决定数据包是发往本机还是转发
1 static struct packet_type ip_packet_type __read_mostly = { 2 .type = cpu_to_be16(ETH_P_IP), 3 .func = ip_rcv, 4 };
1 /* 2 * Main IP Receive routine. 3 */ 4 int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 5 { 6 const struct iphdr *iph; 7 struct net *net; 8 u32 len; 9 10 /* When the interface is in promisc. mode, drop all the crap 11 * that it receives, do not try to analyse it. 12 */ 13 /* 混杂模式下,非本机包 */ 14 if (skb->pkt_type == PACKET_OTHERHOST) 15 goto drop; 16 17 18 /* 获取net */ 19 net = dev_net(dev); 20 __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); 21 22 /* 检查skb共享 */ 23 skb = skb_share_check(skb, GFP_ATOMIC); 24 if (!skb) { 25 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); 26 goto out; 27 } 28 29 /* 测试是否可以取得ip头 */ 30 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 31 goto inhdr_error; 32 33 /* 取ip头 */ 34 iph = ip_hdr(skb); 35 36 /* 37 * RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum. 38 * 39 * Is the datagram acceptable? 40 * 41 * 1. Length at least the size of an ip header 42 * 2. Version of 4 43 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] 44 * 4. Doesn‘t have a bogus length 45 */ 46 47 /* 头部长度不足20 或者版本不是4 */ 48 if (iph->ihl < 5 || iph->version != 4) 49 goto inhdr_error; 50 51 BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); 52 BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); 53 BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); 54 __IP_ADD_STATS(net, 55 IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), 56 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); 57 58 /* 测试实际应取的ip头 */ 59 if (!pskb_may_pull(skb, iph->ihl*4)) 60 goto inhdr_error; 61 62 /* 取ip头 */ 63 iph = ip_hdr(skb); 64 65 /* 校验和错误 */ 66 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 67 goto csum_error; 68 69 /* 取总长度 */ 70 len = ntohs(iph->tot_len); 71 72 /* skb长度比ip包总长度小 */ 73 if (skb->len < len) { 74 __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); 75 goto drop; 76 } 77 /* 比头部长度还小 */ 78 else if (len < (iph->ihl*4)) 79 goto inhdr_error; 80 81 /* Our transport medium may have padded the buffer out. Now we know it 82 * is IP we can trim to the true length of the frame. 83 * Note this now means skb->len holds ntohs(iph->tot_len). 84 */ 85 /* 设置总长度为ip包的长度 */ 86 if (pskb_trim_rcsum(skb, len)) { 87 __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); 88 goto drop; 89 } 90 91 /* 取得传输层头部 */ 92 skb->transport_header = skb->network_header + iph->ihl*4; 93 94 /* Remove any debris in the socket control block */ 95 /* 重置cb */ 96 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 97 98 /* 保存输入设备信息 */ 99 IPCB(skb)->iif = skb->skb_iif; 100 101 /* Must drop socket now because of tproxy. */ 102 skb_orphan(skb); 103 104 /* 经过PRE_ROUTING钩子点 */ 105 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, 106 net, NULL, skb, dev, NULL, 107 ip_rcv_finish); 108 109 csum_error: 110 __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); 111 inhdr_error: 112 __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); 113 drop: 114 kfree_skb(skb); 115 out: 116 return NET_RX_DROP; 117 }
static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct net_device *dev = skb->dev; void (*edemux)(struct sk_buff *skb); /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip_rcv(skb); if (!skb) return NET_RX_SUCCESS; /* 启用了early_demux skb路由缓存为空 skb的sock为空 不是分片包 */ if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk && !ip_is_fragment(iph)) { const struct net_protocol *ipprot; /* 找到上层协议 */ int protocol = iph->protocol; /* 获取协议对应的prot */ ipprot = rcu_dereference(inet_protos[protocol]); /* 找到early_demux函数,如tcp_v4_early_demux */ if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { /* 调用该函数,将路由信息缓存到skb->refdst */ edemux(skb); /* must reload iph, skb->head might have changed */ /* 重新取ip头 */ iph = ip_hdr(skb); } } /* * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ /* 校验路由失败 */ if (!skb_valid_dst(skb)) { /* 查路由 */ int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, iph->tos, dev); if (unlikely(err)) { if (err == -EXDEV) __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); goto drop; } } #ifdef CONFIG_IP_ROUTE_CLASSID if (unlikely(skb_dst(skb)->tclassid)) { struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); u32 idx = skb_dst(skb)->tclassid; st[idx&0xFF].o_packets++; st[idx&0xFF].o_bytes += skb->len; st[(idx>>16)&0xFF].i_packets++; st[(idx>>16)&0xFF].i_bytes += skb->len; } #endif /* 处理ip选项 */ if (iph->ihl > 5 && ip_rcv_options(skb)) goto drop; /* 找到路由缓存项 */ rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) { __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len); } else if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { struct in_device *in_dev = __in_dev_get_rcu(dev); /* RFC 1122 3.3.6: * * When a host sends a datagram to a link-layer broadcast * address, the IP destination address MUST be a legal IP * broadcast or IP multicast address. * * A host SHOULD silently discard a datagram that is received * via a link-layer broadcast (see Section 2.4) but does not * specify an IP multicast or broadcast destination address. * * This doesn‘t explicitly say L2 *broadcast*, but broadcast is * in a way a form of multicast and the most common use case for * this is 802.11 protecting against cross-station spoofing (the * so-called "hole-196" attack) so do it for both. */ if (in_dev && IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) goto drop; } /* 调用路由项的input函数,可能为ip_local_deliver或者ip_forward */ return dst_input(skb); drop: kfree_skb(skb); return NET_RX_DROP; }
时间: 2024-10-30 02:14:43