对于输出的报文,主要做如下操作:
1. 调用ip_route_output_flow路由查找
2. 在ip_route_output_flow里面会调用xfrm_lookup进行ipsec policy查找
3.
如果policy的action为XFRM_POLICY_ALLOW,则调用xfrm_find_bundle或者xfrm_bundle_create创建dst链表
4. 应用链表中的dst->output函数进行ipsec加密或发包
例如需要分别对报文进行IP COMP, ESP, AH变换的链表如图所示
下面主要分析bundle的创建过程,skb->dst在__xfrm4_bundle_create前后变化如下:
传输模式:
隧道模式:
图片中黄色的为原始报文的路由缓存,蓝色的为隧道外层的路由缓存
/* Allocate chain of dst_entry‘s, attach known xfrm‘s, calculate
* all the metrics... Shortly, bundle a bundle.
*/static int
__xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
struct flowi *fl, struct dst_entry **dst_p)
{
struct dst_entry *dst, *dst_prev;
struct rtable *rt0 = (struct rtable*)(*dst_p);
struct rtable *rt = rt0;
struct flowi fl_tunnel = {
.nl_u = {
.ip4_u = {
.saddr = fl->fl4_src,
.daddr = fl->fl4_dst,
.tos = fl->fl4_tos
}
}
};
int i;
int err;
int header_len = 0;
int trailer_len = 0;dst = dst_prev = NULL;
dst_hold(&rt->u.dst);/* 每个xfrm生成一个dst_entry,child指向下一次变换或最外层报文的实际路由
* 一个报文最多只能有一次隧道变换,因此rt最多只有两个,一个是隧道内报文的路由,一个是隧道报文的路由
*/
for (i = 0; i < nx; i++) {
struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
struct xfrm_dst *xdst;if (unlikely(dst1 == NULL)) {
err = -ENOBUFS;
dst_release(&rt->u.dst);
goto error;
}if (!dst)
dst = dst1;
else {
dst_prev->child = dst1;
dst1->flags |= DST_NOHASH;
dst_clone(dst1);
}xdst = (struct xfrm_dst *)dst1;
xdst->route = &rt->u.dst;
xdst->genid = xfrm[i]->genid;dst1->next = dst_prev;
dst_prev = dst1;header_len += xfrm[i]->props.header_len;
trailer_len += xfrm[i]->props.trailer_len;/* 隧道模式则查找外层ip包的路由 */
if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
unsigned short encap_family = xfrm[i]->props.family;
switch(encap_family) {
case AF_INET:
fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
break;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
case AF_INET6:
ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6);
ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6);
break;
#endif
default:
BUG_ON(1);
}
err = xfrm_dst_lookup((struct xfrm_dst **)&rt,
&fl_tunnel, encap_family);
if (err)
goto error;
} else
dst_hold(&rt->u.dst);
}/* 最后一个child指向最外层报文的路由 */
dst_prev->child = &rt->u.dst;
dst->path = &rt->u.dst;*dst_p = dst;
dst = dst_prev;dst_prev = *dst_p;
i = 0;
for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
struct xfrm_state_afinfo *afinfo;
x->u.rt.fl = *fl;dst_prev->xfrm = xfrm[i++];
dst_prev->dev = rt->u.dst.dev;
if (rt->u.dst.dev)
dev_hold(rt->u.dst.dev);
dst_prev->obsolete = -1;
dst_prev->flags |= DST_HOST;
dst_prev->lastuse = jiffies;
dst_prev->header_len = header_len;
dst_prev->nfheader_len = 0;
dst_prev->trailer_len = trailer_len;
memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));/* Copy neighbout for reachability confirmation */
dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour);
dst_prev->input = rt->u.dst.input;
/* XXX: When IPv6 module can be unloaded, we should manage reference
* to xfrm6_output in afinfo->output. Miyazawa
* */
afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family);
if (!afinfo) {
dst = *dst_p;
err = -EAFNOSUPPORT;
goto error;
}
/* xfrm的dst_entry->output函数初始化为xfrm4_output */
dst_prev->output = afinfo->output;
xfrm_state_put_afinfo(afinfo);
if (dst_prev->xfrm->props.family == AF_INET && rt->peer)
atomic_inc(&rt->peer->refcnt);
x->u.rt.peer = rt->peer;
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
x->u.rt.rt_type = rt->rt_type;
x->u.rt.rt_src = rt0->rt_src;
x->u.rt.rt_dst = rt0->rt_dst;
x->u.rt.rt_gateway = rt->rt_gateway;
x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
x->u.rt.idev = rt0->idev;
in_dev_hold(rt0->idev);
header_len -= x->u.dst.xfrm->props.header_len;
trailer_len -= x->u.dst.xfrm->props.trailer_len;
}xfrm_init_pmtu(dst);
return 0;error:
if (dst)
dst_free(dst);
return err;
}
最后的xfrm4_output_finish2中沿着dst_entry->child每次调用xfrm4_output_one执行一次xfrm,直到最后一个真正的路由dst_entry,这时候调用dst_output进行路由输出;