Openvswitch原理与代码分析(6):用户态流表flow table的操作

当内核无法查找到流表项的时候,则会通过upcall来调用用户态ovs-vswtichd中的flow table。

会调用ofproto-dpif-upcall.c中的udpif_upcall_handler函数。

  1. static
    void *
  2. udpif_upcall_handler(void *arg)
  3. {
  4. ????struct handler *handler = arg;
  5. ????struct udpif *udpif = handler->udpif;
  6. ?
  7. ????while (!latch_is_set(&handler->udpif->exit_latch)) {
  8. ????????if (recv_upcalls(handler)) {
  9. ????????????poll_immediate_wake();
  10. ????????} else {
  11. ????????????dpif_recv_wait(udpif->dpif, handler->handler_id);
  12. ????????????latch_wait(&udpif->exit_latch);
  13. ????????}
  14. ????????poll_block();
  15. ????}
  16. ?
  17. ????return NULL;
  18. }

?

会调用static size_t recv_upcalls(struct handler *handler)

在这个函数里面

(1) 首先读取upcall调用static int upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, const struct dp_packet *packet, enum dpif_upcall_type type, const struct nlattr *userdata, const struct flow *flow, const unsigned int mru, const ovs_u128 *ufid, const unsigned pmd_id)

(2) 其次提取包头调用void flow_extract(struct dp_packet *packet, struct flow *flow),提取出的flow如下:

  1. struct flow {
  2. ????/* Metadata */
  3. ????struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
  4. ????ovs_be64 metadata; /* OpenFlow Metadata. */
  5. ????uint32_t regs[FLOW_N_REGS]; /* Registers. */
  6. ????uint32_t skb_priority; /* Packet priority for QoS. */
  7. ????uint32_t pkt_mark; /* Packet mark. */
  8. ????uint32_t dp_hash; /* Datapath computed hash value. The exact
  9. ?????????????????????????????????* computation is opaque to the user space. */
  10. ????union flow_in_port in_port; /* Input port.*/
  11. ????uint32_t recirc_id; /* Must be exact match. */
  12. ????uint16_t ct_state; /* Connection tracking state. */
  13. ????uint16_t ct_zone; /* Connection tracking zone. */
  14. ????uint32_t ct_mark; /* Connection mark.*/
  15. ????uint8_t pad1[4]; /* Pad to 64 bits. */
  16. ????ovs_u128 ct_label; /* Connection label. */
  17. ????uint32_t conj_id; /* Conjunction ID. */
  18. ????ofp_port_t actset_output; /* Output port in action set. */
  19. ????uint8_t pad2[2]; /* Pad to 64 bits. */
  20. ?
  21. ????/* L2, Order the same as in the Ethernet header! (64-bit aligned) */
  22. ????struct eth_addr dl_dst; /* Ethernet destination address. */
  23. ????struct eth_addr dl_src; /* Ethernet source address. */
  24. ????ovs_be16 dl_type; /* Ethernet frame type. */
  25. ????ovs_be16 vlan_tci; /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */
  26. ????ovs_be32 mpls_lse[ROUND_UP(FLOW_MAX_MPLS_LABELS, 2)]; /* MPLS label stack
  27. ?????????????????????????????????????????????????????????????(with padding). */
  28. ????/* L3 (64-bit aligned) */
  29. ????ovs_be32 nw_src; /* IPv4 source address. */
  30. ????ovs_be32 nw_dst; /* IPv4 destination address. */
  31. ????struct
    in6_addr ipv6_src; /* IPv6 source address. */
  32. ????struct
    in6_addr ipv6_dst; /* IPv6 destination address. */
  33. ????ovs_be32 ipv6_label; /* IPv6 flow label. */
  34. ????uint8_t nw_frag; /* FLOW_FRAG_* flags. */
  35. ????uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */
  36. ????uint8_t nw_ttl; /* IP TTL/Hop Limit. */
  37. ????uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */
  38. ????struct
    in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */
  39. ????struct eth_addr arp_sha; /* ARP/ND source hardware address. */
  40. ????struct eth_addr arp_tha; /* ARP/ND target hardware address. */
  41. ????ovs_be16 tcp_flags; /* TCP flags. With L3 to avoid matching L4. */
  42. ????ovs_be16 pad3; /* Pad to 64 bits. */
  43. ?
  44. ????/* L4 (64-bit aligned) */
  45. ????ovs_be16 tp_src; /* TCP/UDP/SCTP source port/ICMP type. */
  46. ????ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port/ICMP code. */
  47. ????ovs_be32 igmp_group_ip4; /* IGMP group IPv4 address.
  48. ?????????????????????????????????* Keep last for BUILD_ASSERT_DECL below. */
  49. };

?

(3) 然后调用static int process_upcall(struct udpif *udpif, struct upcall *upcall, struct ofpbuf *odp_actions, struct flow_wildcards *wc)来处理upcall。

?

对于MISS_UPCALL,调用static void upcall_xlate(struct udpif *udpif, struct upcall *upcall, struct ofpbuf *odp_actions, struct flow_wildcards *wc)

  1. switch (classify_upcall(upcall->type, userdata)) {
  2. case MISS_UPCALL:
  3. ????upcall_xlate(udpif, upcall, odp_actions, wc);
  4. ????return 0;

?

会调用enum xlate_error xlate_actions(struct xlate_in *xin, struct xlate_out *xout)

在这个函数里面,会在flow table里面查找rule

ctx.rule = rule_dpif_lookup_from_table( ctx.xbridge->ofproto, ctx.tables_version, flow, xin->wc, ctx.xin->resubmit_stats, &ctx.table_id, flow->in_port.ofp_port, true, true);

找到rule之后,调用static void do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, struct xlate_ctx *ctx)在这个函数里面,根据action的不同,修改flow的内容。

  1. switch (a->type) {
  2. case OFPACT_OUTPUT:
  3. ????xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
  4. ????????????????????????ofpact_get_OUTPUT(a)->max_len, true);
  5. ????break;
  6. ?
  7. case OFPACT_SET_VLAN_VID:
  8. ????wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
  9. ????if (flow->vlan_tci & htons(VLAN_CFI) ||
  10. ????????ofpact_get_SET_VLAN_VID(a)->push_vlan_if_needed) {
  11. ????????flow->vlan_tci &= ~htons(VLAN_VID_MASK);
  12. ????????flow->vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)
  13. ???????????????????????????| htons(VLAN_CFI));
  14. ????}
  15. ????break;
  16. ?
  17. case OFPACT_SET_ETH_SRC:
  18. ????WC_MASK_FIELD(wc, dl_src);
  19. ????flow->dl_src = ofpact_get_SET_ETH_SRC(a)->mac;
  20. ????break;
  21. ?
  22. case OFPACT_SET_ETH_DST:
  23. ????WC_MASK_FIELD(wc, dl_dst);
  24. ????flow->dl_dst = ofpact_get_SET_ETH_DST(a)->mac;
  25. ????break;
  26. ?
  27. case OFPACT_SET_IPV4_SRC:
  28. ????CHECK_MPLS_RECIRCULATION();
  29. ????if (flow->dl_type == htons(ETH_TYPE_IP)) {
  30. ????????memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
  31. ????????flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
  32. ????}
  33. ????break;
  34. ?
  35. case OFPACT_SET_IPV4_DST:
  36. ????CHECK_MPLS_RECIRCULATION();
  37. ????if (flow->dl_type == htons(ETH_TYPE_IP)) {
  38. ????????memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
  39. ????????flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
  40. ????}
  41. ????break;
  42. ?
  43. case OFPACT_SET_L4_SRC_PORT:
  44. ????CHECK_MPLS_RECIRCULATION();
  45. ????if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
  46. ????????memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
  47. ????????memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
  48. ????????flow->tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
  49. ????}
  50. ????break;
  51. ?
  52. case OFPACT_SET_L4_DST_PORT:
  53. ????CHECK_MPLS_RECIRCULATION();
  54. ????if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
  55. ????????memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
  56. ????????memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
  57. ????????flow->tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
  58. ????}
  59. ????break;

?

?

(4) 最后调用static void handle_upcalls(struct udpif *udpif, struct upcall *upcalls, size_t n_upcalls)将flow rule添加到内核中的datapath

他会调用void dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops),他会调用dpif->dpif_class->operate(dpif, ops, chunk);

会调用dpif_netlink_operate()

  1. static
    void
  2. dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
  3. {
  4. ????struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
  5. ?
  6. ????while (n_ops > 0) {
  7. ????????size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
  8. ????????ops += chunk;
  9. ????????n_ops -= chunk;
  10. ????}
  11. }

?

在static size_t dpif_netlink_operate__(struct dpif_netlink *dpif, struct dpif_op **ops, size_t n_ops)中,有以下的代码:

  1. switch (op->type) {
  2. case DPIF_OP_FLOW_PUT:
  3. ????put = &op->u.flow_put;
  4. ????dpif_netlink_init_flow_put(dpif, put, &flow);
  5. ????if (put->stats) {
  6. ????????flow.nlmsg_flags |= NLM_F_ECHO;
  7. ????????aux->txn.reply = &aux->reply;
  8. ????}
  9. ????dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
  10. ????break;
  11. ?
  12. case DPIF_OP_FLOW_DEL:
  13. ????del = &op->u.flow_del;
  14. ????dpif_netlink_init_flow_del(dpif, del, &flow);
  15. ????if (del->stats) {
  16. ????????flow.nlmsg_flags |= NLM_F_ECHO;
  17. ????????aux->txn.reply = &aux->reply;
  18. ????}
  19. ????dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
  20. ????break;

?

会调用netlink修改内核中datapath的规则。

时间: 2024-08-08 01:10:54

Openvswitch原理与代码分析(6):用户态流表flow table的操作的相关文章

Openvswitch原理与代码分析(5): 内核中的流表flow table操作

? 当一个数据包到达网卡的时候,首先要经过内核Openvswitch.ko,流表Flow Table在内核中有一份,通过key查找内核中的flow table,即可以得到action,然后执行action之后,直接发送这个包,只有在内核无法查找到流表项的时候,才会到用户态查找用户态的流表.仅仅查找内核中flow table的情况被称为fast path. ? ? 第一步:从数据包中提取出key ? 实现函数为int ovs_flow_key_extract(const struct ip_tun

Openvswitch原理与代码分析(1):总体架构

一.Opevswitch总体架构 Openvswitch的架构网上有如下的图表示: 每个模块都有不同的功能 ovs-vswitchd 为主要模块,实现交换机的守护进程daemon 在Openvswitch所在的服务器进行ps aux可以看到以下的进程 root 1008 0.1 0.8 242948 31712 ? S<Ll Aug06 32:17 ovs-vswitchd unix:/var/run/openvswitch/db.sock -vconsole:emer -vsyslog:err

Openvswitch原理与代码分析(4):网络包的处理过程

? 在上一节提到,Openvswitch的内核模块openvswitch.ko会在网卡上注册一个函数netdev_frame_hook,每当有网络包到达网卡的时候,这个函数就会被调用. ? static struct sk_buff *netdev_frame_hook(struct sk_buff *skb) { ???if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) ??????return skb; ? ???port_receive(skb)

Openvswitch原理与代码分析(2): ovs-vswitchd的启动

ovs-vswitchd.c的main函数最终会进入一个while循环,在这个无限循环中,里面最重要的两个函数是bridge_run()和netdev_run(). ? ? Openvswitch主要管理两种类型的设备,一个是创建的虚拟网桥,一个是连接到虚拟网桥上的设备. ? 其中bridge_run就是初始化数据库中已经创建的虚拟网桥. ? 一.虚拟网桥的初始化bridge_run ? bridge_run会调用bridge_run__,bridge_run__中最重要的是对于所有的网桥,都调

Openvswitch原理与代码分析(3): openvswitch内核模块的加载

在datapath/datapath.c中会调用module_init(dp_init);来初始化内核模块. static int __init dp_init(void){   int err;    BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));    pr_info("Open vSwitch switching datapath %s\n", VERSION);    

Openvswitch原理与代码分析(7): 添加一条流表flow

添加一个flow,调用的命令为 ovs-ofctl add-flow hello "hard_timeout=0 idle_timeout=0 priority=1 table=21 pkt_mark=0x55 tun_id=0x55 actions=mod_nw_dst:192.168.56.101,output:2" 这里调用的是调用ovs/utilities/ovs-ofctl.c的命令行工具 这个命令行工具支持的所有的命令及处理函数定义如下: static const stru

Openvswitch原理与代码分析(8): 修改Openvswitch代码添加自定义action

有时候我们需要自定义一些自己的action,根据包头里面的信息,做一些自己的操作. ? 例如添加一个action名为handle_example ? 第一.修改ofp-actions.c文件 ? 首先在ofp-actions.c里面添加Openflow各个版本的这个action static const struct ofpact_map * get_ofpact_map(enum ofp_version version) { ????/* OpenFlow 1.0 actions. */ ??

免费的Lucene 原理与代码分析完整版下载

Lucene是一个基于Java的高效的全文检索库.那么什么是全文检索,为什么需要全文检索?目前人们生活中出现的数据总的来说分为两类:结构化数据和非结构化数据.很容易理解,结构化数据是有固定格式和结构的或者有限长度的数据,比如数据库,元数据等.非结构化数据则是不定长或者没有固定格式的数据,如图片,邮件,文档等.还有一种较少的分类为半结构化数据,如XML,HTML等,在一定程度上我们可以将其按照结构化数据来处理,也可以抽取纯文本按照非结构化数据来处理.非结构化数据又称为全文数据.,对其搜索主要有两种

Mesos原理与代码分析(5): Mesos Master的启动之四

? 5. Create an instance of allocator. ? 代码如下 ? Mesos源码中默认的Allocator,即HierarchicalDRFAllocator的位置在$MESOS_HOME/src/master/allocator/mesos/hierarchical.hpp,而DRF中对每个Framework排序的Sorter位于$MESOS_HOME/src/master/allocator/sorter/drf/sorter.cpp,可以查看其源码了解它的工作原