virtio的qemu总线与设备模型

(很多内容是网上找的,+上我个人的一点理解,推荐大家去看 http://mnstory.net/2014/10/qemu-device-simulation 这篇文章)

qemu启动时,如果配置了相应virtio设备,会对guest的pci总线,virtio设备等进行模拟,先来看看qemu的设备模拟,那i8254/PIT为例(PIT的硬件规范略过,有兴趣的话可以参考 http://wiki.osdev.org/Programmable_Interval_Timer)

hw/timer/i8254.c定义了PIT设备的模拟,通过qom来定义设备对象模型,e.g.

static const TypeInfo pit_info = {
    .name          = TYPE_I8254,
    .parent        = TYPE_PIT_COMMON,
    .instance_size = sizeof(PITCommonState),
    .class_init    = pit_class_initfn,
    .class_size    = sizeof(PITClass),
};

static void pit_register_types(void)
{
    type_register_static(&pit_info);
}

type_init(pit_register_types)

type_init宏遵循qom的设备定义规范,实际调用的是module_init宏,这个宏被定义为__attribute__((constructor))属性,类似于cpp里面的构建函数,会在main函数之前被调用。

typedef enum {
    MODULE_INIT_BLOCK,
    MODULE_INIT_MACHINE,
    MODULE_INIT_QAPI,
    MODULE_INIT_QOM,
    MODULE_INIT_MAX
} module_init_type;

#define type_init(function) module_init(function, MODULE_INIT_QOM)
/* This should not be used directly.  Use block_init etc. instead.  */
#define module_init(function, type)                                         static void __attribute__((constructor)) do_qemu_init_ ## function(void)    {                                                                               register_module_init(function, type);                                   }

register_module_init的作用就是初始化type类型的ModuleEntry结构,并插入到init_type_list[type]的QLIST列表中。module_call_init则是对type的所有ModuleEntry,调用注册的初始化函数。

typedef struct ModuleEntry
{
    void (*init)(void);
    QTAILQ_ENTRY(ModuleEntry) node;
    module_init_type type;
} ModuleEntry;

typedef QTAILQ_HEAD(, ModuleEntry) ModuleTypeList;

static ModuleTypeList init_type_list[MODULE_INIT_MAX];

static ModuleTypeList *find_type(module_init_type type)
{
    ModuleTypeList *l;

    init_lists();

    l = &init_type_list[type];

    return l;
}

void module_call_init(module_init_type type)
{
    ModuleTypeList *l;
    ModuleEntry *e;

    module_load(type);
    l = find_type(type);

    QTAILQ_FOREACH(e, l, node) {
        e->init();
    }
}

现在回到PIT的设备注册函数pit_register_types,最终是通过传入的TypeInfo生成一个TypeImpl,并把这个TypeImpl插入到一个全局静态的GHashTable
type_table中。

static const TypeInfo pit_info = {
    .name          = TYPE_I8254,
    .parent        = TYPE_PIT_COMMON,
    .instance_size = sizeof(PITCommonState),
    .class_init    = pit_class_initfn,
    .class_size    = sizeof(PITClass),
};

static void pit_register_types(void)
{
    type_register_static(&pit_info);
}

TypeImpl *type_register_static(const TypeInfo *info)
{
    return type_register(info);
}

TypeImpl *type_register(const TypeInfo *info)
{
    assert(info->parent);
    return type_register_internal(info);
}

static TypeImpl *type_register_internal(const TypeInfo *info)
{
    TypeImpl *ti;
    ti = type_new(info);

    type_table_add(ti);
    return ti;
}

static void type_table_add(TypeImpl *ti)
{
    assert(!enumerating_types);
    g_hash_table_insert(type_table_get(), (void *)ti->name, ti);
}

从PITClass也可以看出,qom实际上把qemu的设备对象模型搞成了类似cpp的对象,通过父子类,继承,虚函数等一系列特性,让qemu设备对象形成了一个树形结构,树根就是object,同时TypeInfo, TypeImpl, Object, ObjectClass都可以支持这种树形结构,e.g.

static const TypeInfo pit_info = {
    .name          = TYPE_I8254,
    .parent        = TYPE_PIT_COMMON,
    .instance_size = sizeof(PITCommonState),
    .class_init    = pit_class_initfn,
    .class_size    = sizeof(PITClass),
};

static const TypeInfo pit_common_type = {
    .name          = TYPE_PIT_COMMON,
    .parent        = TYPE_ISA_DEVICE,
    .instance_size = sizeof(PITCommonState),
    .class_size    = sizeof(PITCommonClass),
    .class_init    = pit_common_class_init,
    .abstract      = true,
};

static const TypeInfo isa_device_type_info = {
    .name = TYPE_ISA_DEVICE,
    .parent = TYPE_DEVICE,
    .instance_size = sizeof(ISADevice),
    .instance_init = isa_device_init,
    .abstract = true,
    .class_size = sizeof(ISADeviceClass),
    .class_init = isa_device_class_init,
};

static const TypeInfo device_type_info = {
    .name = TYPE_DEVICE,
    .parent = TYPE_OBJECT,
    .instance_size = sizeof(DeviceState),
    .instance_init = device_initfn,
    .instance_post_init = device_post_init,
    .instance_finalize = device_finalize,
    .class_base_init = device_class_base_init,
    .class_init = device_class_init,
    .abstract = true,
    .class_size = sizeof(DeviceClass),
};

static TypeInfo object_info = {
    .name = TYPE_OBJECT,
    .instance_size = sizeof(Object),
    .instance_init = object_instance_init,
    .abstract = true,
};

ObjectClass这层同样体现了这种继承关系,同时ObjectClass也被用来实现多态,即Object对象的ObjectClass指针实际是一个虚函数的指针,e.g.

typedef struct PITCommonClass {
    ISADeviceClass parent_class;

    void (*set_channel_gate)(PITCommonState *s, PITChannelState *sc, int val);
    void (*get_channel_info)(PITCommonState *s, PITChannelState *sc,
                             PITChannelInfo *info);
    void (*pre_save)(PITCommonState *s);
    void (*post_load)(PITCommonState *s);
} PITCommonClass;

typedef struct ISADeviceClass {
    DeviceClass parent_class;
} ISADeviceClass;

typedef struct DeviceClass {
    /*< private >*/
    ObjectClass parent_class;
    /*< public >*/

    DECLARE_BITMAP(categories, DEVICE_CATEGORY_MAX);
    const char *fw_name;
    const char *desc;
    Property *props;
    ...
} DeviceClass

struct ObjectClass
{
    /*< private >*/
    Type type;
    GSList *interfaces;

    const char *object_cast_cache[OBJECT_CLASS_CAST_CACHE];
    const char *class_cast_cache[OBJECT_CLASS_CAST_CACHE];

    ObjectUnparent *unparent;
};

实际的设备对象继承关系如下:

typedef struct PITCommonState {
    ISADevice dev;
    MemoryRegion ioports;
    uint32_t iobase;
    PITChannelState channels[3];
} PITCommonState;

struct ISADevice {
    /*< private >*/
    DeviceState parent_obj;
    /*< public >*/

    uint32_t isairq[2];
    int nirqs;
    int ioport_id;
};

struct DeviceState {
    /*< private >*/
    Object parent_obj;
    /*< public >*/

    const char *id;
    bool realized;
    bool pending_deleted_event;
    QemuOpts *opts;
    int hotplugged;
    BusState *parent_bus;
    QLIST_HEAD(, NamedGPIOList) gpios;
    QLIST_HEAD(, BusState) child_bus;
    int num_child_bus;
    int instance_id_alias;
    int alias_required_for_version;
};

struct Object
{
    ObjectClass *class;
    ObjectFree *free;
    QTAILQ_HEAD(, ObjectProperty) properties;
    uint32_t ref;
    Object *parent;
};

下图清晰的解释了多态是如何实现的,注意ObjectClass* 指针实际指向的是PITCommonClass

言归正传,现在来看下qemu端的virtio pci设备。在qemu里,virtio pci设备是所有其他virtio设备,e.g. virtio block, virtio net, virtio ballon的父类,其定义如下

static void virtio_device_class_init(ObjectClass *klass, void *data)
{
    /* Set the default value here. */
    DeviceClass *dc = DEVICE_CLASS(klass);

    dc->realize = virtio_device_realize;
    dc->unrealize = virtio_device_unrealize;
    dc->bus_type = TYPE_VIRTIO_BUS;
}

static const TypeInfo virtio_device_info = {
    .name = TYPE_VIRTIO_DEVICE,
    .parent = TYPE_DEVICE,
    .instance_size = sizeof(VirtIODevice),
    .class_init = virtio_device_class_init,
    .abstract = true,
    .class_size = sizeof(VirtioDeviceClass),
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_device_info);
}

type_init(virtio_register_types)

virtio设备的结构如下,其中最关键的是VirtQueue的指针,我理解VirtIODevice主要是封装了VirtQueue

struct VirtIODevice
{
    DeviceState parent_obj;
    const char *name;
    uint8_t status;
    uint8_t isr;
    uint16_t queue_sel;
    uint32_t guest_features;
    size_t config_len;
    void *config;
    uint16_t config_vector;
    int nvectors;
    VirtQueue *vq;
    uint16_t device_id;
    bool vm_running;
    VMChangeStateEntry *vmstate;
    char *bus_name;
    uint8_t device_endian;
};

typedef struct VirtioDeviceClass {
    /*< private >*/
    DeviceClass parent;
    /*< public >*/

    /* This is what a VirtioDevice must implement */
    DeviceRealize realize;
    DeviceUnrealize unrealize;
    uint32_t (*get_features)(VirtIODevice *vdev, uint32_t requested_features);
    uint32_t (*bad_features)(VirtIODevice *vdev);
    void (*set_features)(VirtIODevice *vdev, uint32_t val);
    void (*get_config)(VirtIODevice *vdev, uint8_t *config);
    void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
    void (*reset)(VirtIODevice *vdev);
    void (*set_status)(VirtIODevice *vdev, uint8_t val);
    void (*set_version)(VirtIODevice *vdev, uint32_t val);
    /* Test and clear event pending status.
     * Should be called after unmask to avoid losing events.
     * If backend does not support masking,
     * must check in frontend instead.
     */
    bool (*guest_notifier_pending)(VirtIODevice *vdev, int n);
    /* Mask/unmask events from this vq. Any events reported
     * while masked will become pending.
     * If backend does not support masking,
     * must mask in frontend instead.
     */
    void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask);
    void (*save)(VirtIODevice *vdev, QEMUFile *f);
    int (*load)(VirtIODevice *vdev, QEMUFile *f, int version_id);
} VirtioDeviceClass;

qemu内部也定义了VirtQueue和VRing的结构,注意这里需要保证前端virtio驱动和qemu的vring两者的ABI一致,即内存结构保证一致性,e.g.

/* The standard layout for the ring is a continuous chunk of memory which looks
 * like this.  We assume num is a power of 2.
 *
 * struct vring
 * {
 *  // The actual descriptors (16 bytes each)
 *  struct vring_desc desc[num];
 *
 *  // A ring of available descriptor heads with free-running index.
 *  __u16 avail_flags;
 *  __u16 avail_idx;
 *  __u16 available[num];
 *  __u16 used_event_idx;
 *
 *  // Padding to the next align boundary.
 *  char pad[];
 *
 *  // A ring of used descriptor heads with free-running index.
 *  __u16 used_flags;
 *  __u16 used_idx;
 *  struct vring_used_elem used[num];
 *  __u16 avail_event_idx;
 * };

注意qemu对于VRing的操作一般都只限于VRingUsed这部分以及avail_event_idx的更新

#define VIRTIO_PCI_VRING_ALIGN         4096

typedef struct VRingDesc
{
    uint64_t addr;
    uint32_t len;
    uint16_t flags;
    uint16_t next;
} VRingDesc;

typedef struct VRingAvail
{
    uint16_t flags;
    uint16_t idx;
    uint16_t ring[0];
} VRingAvail;

typedef struct VRingUsedElem
{
    uint32_t id;
    uint32_t len;
} VRingUsedElem;

typedef struct VRingUsed
{
    uint16_t flags;
    uint16_t idx;
    VRingUsedElem ring[0];
} VRingUsed;

typedef struct VRing
{
    unsigned int num;
    unsigned int align;
    hwaddr desc;
    hwaddr avail;
    hwaddr used;
} VRing;

struct VirtQueue
{
    VRing vring;
    hwaddr pa;
    uint16_t last_avail_idx;
    /* Last used index value we have signalled on */
    uint16_t signalled_used;

    /* Last used index value we have signalled on */
    bool signalled_used_valid;

    /* Notification enabled? */
    bool notification;  /* 是否使能notification,只有enable了event_idx才有效 */

    uint16_t queue_index;

    int inuse;

    uint16_t vector;
    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
    VirtIODevice *vdev;
    EventNotifier guest_notifier; /* guest notifier fd,目前只有vhost使用 */
    EventNotifier host_notifier;  /* host notifier fd, 目前只有vhost使用 */
};

qemu的VRing通过查看avail_event_idx来判断是否有新的avail buffer到达。关于VirtQueue和VRing的部分后面再详细阐述。

除了TYPE_DEVICE -> TYPE_VIRTIO_DEVICE -> TYPE_VIRTIO_XXX_DEVICE这条继承关系之外,qemu的virtio设备同时也是PCI设备,因此还存在有TYPE_DEVICE -> TYPE_PCI_DEVICE -> TYPE_VIRTIO_PCI -> TYPE_VIRTIO_XXX_PCI的继承关系,同时总线上也存在TYPE_BUS -> TYPE_VIRTIO_BUS -> TYPE_VIRTIO_PCI_BUS

virtio bus定义如下

static const TypeInfo virtio_bus_info = {
    .name = TYPE_VIRTIO_BUS,
    .parent = TYPE_BUS,
    .instance_size = sizeof(VirtioBusState),
    .abstract = true,
    .class_size = sizeof(VirtioBusClass),
    .class_init = virtio_bus_class_init
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_bus_info);
}

type_init(virtio_register_types)

总线设备virtio-bus,对应类型是VirtioBusClass。设备定义了多个虚函数的接口,具体实现分为TYPE_VIRTIO_MMIO_BUS和TYPE_VIRTIO_PCI_BUS两种,对应设备是virtio-pci-bus和virtio-mmio-bus

static const TypeInfo virtio_pci_bus_info = {
    .name          = TYPE_VIRTIO_PCI_BUS,
    .parent        = TYPE_VIRTIO_BUS,
    .instance_size = sizeof(VirtioPCIBusState),
    .class_init    = virtio_pci_bus_class_init,
};

static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
{
    BusClass *bus_class = BUS_CLASS(klass);
    VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
    bus_class->max_dev = 1;
    k->notify = virtio_pci_notify;
    k->save_config = virtio_pci_save_config;
    k->load_config = virtio_pci_load_config;
    k->save_queue = virtio_pci_save_queue;
    k->load_queue = virtio_pci_load_queue;
    k->get_features = virtio_pci_get_features;
    k->query_guest_notifiers = virtio_pci_query_guest_notifiers;
    k->set_host_notifier = virtio_pci_set_host_notifier;
    k->start_host_notifier = virtio_pci_start_host_notifier;
    k->set_guest_notifiers = virtio_pci_set_guest_notifiers;
    k->vmstate_change = virtio_pci_vmstate_change;
    k->device_plugged = virtio_pci_device_plugged;
    k->device_unplugged = virtio_pci_device_unplugged;
}

另一个相关的设备是virtio-pci,代表了挂载virtio pci总线上的pci设备模型,定义如下

static const TypeInfo virtio_pci_info = {
    .name          = TYPE_VIRTIO_PCI,
    .parent        = TYPE_PCI_DEVICE,
    .instance_size = sizeof(VirtIOPCIProxy),
    .class_init    = virtio_pci_class_init,
    .class_size    = sizeof(VirtioPCIClass),
    .abstract      = true,
};

struct VirtIOPCIProxy {
    PCIDevice pci_dev;
    MemoryRegion bar;
    uint32_t flags;
    uint32_t class_code;
    uint32_t nvectors;
    uint32_t host_features;
    bool ioeventfd_disabled;
    bool ioeventfd_started;
    VirtIOIRQFD *vector_irqfd;
    int nvqs_with_notifiers;
    VirtioBusState bus;
};

typedef struct VirtioPCIClass {
    PCIDeviceClass parent_class;
    int (*init)(VirtIOPCIProxy *vpci_dev);
} VirtioPCIClass;

VirtioPCIBusState除了虚函数的实现,其他地方和VirtioBusState完全一致,同样VirtioBusClass和VirtioPCIBusClass也是完全一致。从代码实现上看,和内核的virtio pci设备基本类似。

virtio_pci_notify用于通知guest中断,根据pci设备是否enable msix

static void virtio_pci_notify(DeviceState *d, uint16_t vector)
{
    VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d);

    if (msix_enabled(&proxy->pci_dev))
        msix_notify(&proxy->pci_dev, vector); /* msix_notify通过写pci配置空间来传递中断 */
    else {
        VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
        pci_set_irq(&proxy->pci_dev, vdev->isr & 1);  /* 通过irq传递中断 */
    }
}

virtio_pci_get_features用于获取pci配置空间的HOST_FEATURES

static unsigned virtio_pci_get_features(DeviceState *d)
{
    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    return proxy->host_features;
}

virtio_pci_save_config/virtio_pci_load_config,virtio_pci_save_queue/virtio_pci_load_queue用于save/load pci的配置信息和队列信息

virtio_pci_device_plugged当有virtio-pci设备被virtio-pci-bus启动时被调用

static void virtio_pci_device_plugged(DeviceState *d)
{
    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
    VirtioBusState *bus = &proxy->bus;
    uint8_t *config;
    uint32_t size;

    config = proxy->pci_dev.config;
    if (proxy->class_code) {
        pci_config_set_class(config, proxy->class_code); /* 设置pci配置空间的class code */
    }
    pci_set_word(config + PCI_SUBSYSTEM_VENDOR_ID,
                 pci_get_word(config + PCI_VENDOR_ID)); /* 设置pci配置空间的VENDOR_ID */
    pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus)); /* 设置pci配置空间的SUBSYSTEM_ID */
    config[PCI_INTERRUPT_PIN] = 1;

    if (proxy->nvectors &&
        msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, 1)) { /* 初始化virtio bar的msix配置 */
        error_report("unable to init msix vectors to %" PRIu32,
                     proxy->nvectors);
        proxy->nvectors = 0;
    }

    proxy->pci_dev.config_write = virtio_write_config;

    size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev)
         + virtio_bus_get_vdev_config_len(bus);
    if (size & (size - 1)) {
        size = 1 << qemu_fls(size);
    }

    memory_region_init_io(&proxy->bar, OBJECT(proxy), &virtio_pci_config_ops,
                          proxy, "virtio-pci", size); /* 初始化bar0的内存为virtio的配置空间 */
    pci_register_bar(&proxy->pci_dev, 0, PCI_BASE_ADDRESS_SPACE_IO,
                     &proxy->bar); /* 注册virtio pci的bar0 */

    if (!kvm_has_many_ioeventfds()) {
        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
    }

    proxy->host_features |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY;
    proxy->host_features |= 0x1 << VIRTIO_F_BAD_FEATURE;
    proxy->host_features = virtio_bus_get_vdev_features(bus,
                                                      proxy->host_features);
}

virtio_pci_device_unplugged当设备停用并从总线上删除时调用,对vhost而言,ioeventfd用于guest通过pci通知host,irqfd用于host把中断注入guest。这里调用了virtio_pci_stop_ioeventfd,该函数最终通过virtio_pci_set_host_notifier_internal来配置ioveventfd

static void virtio_pci_device_unplugged(DeviceState *d)
{
    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);

    virtio_pci_stop_ioeventfd(proxy);
}

VirtioBusClass需要支持guest notifier和host
notifier的相关操作,e.g.

static int virtio_pci_set_host_notifier(DeviceState *d, int n, bool assign)
{
    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);

    /* Stop using ioeventfd for virtqueue kick if the device starts using host
     * notifiers.  This makes it easy to avoid stepping on each others' toes.
     */
    proxy->ioeventfd_disabled = assign; /* assign为true,说明设置host notifier;assign为false,说明清理host notifier */
    if (assign) { /* 如果是assign新的fd,需要先清理掉老的fd */
        virtio_pci_stop_ioeventfd(proxy);
    }
    /* We don't need to start here: it's not needed because backend
     * currently only stops on status change away from ok,
     * reset, vmstop and such. If we do add code to start here,
     * need to check vmstate, device state etc. */
    return virtio_pci_set_host_notifier_internal(proxy, n, assign, false);
}

static int virtio_pci_set_host_notifier_internal(VirtIOPCIProxy *proxy,
                                                 int n, bool assign, bool set_handler)
{
    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    VirtQueue *vq = virtio_get_queue(vdev, n);
    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
    int r = 0;

    if (assign) {
        r = event_notifier_init(notifier, 1);
        if (r < 0) {
            error_report("%s: unable to init event notifier: %d",
                         __func__, r);
            return r;
        }
        virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler); /* set_host_notifier的时候set_handler设置为false */
                                                                          /* start_ioeventfd的时候设置为true */
        memory_region_add_eventfd(&proxy->bar, VIRTIO_PCI_QUEUE_NOTIFY, 2,
                                  true, n, notifier); /* 把ioport/iomem对应的memregion和fd关联起来 */
    } else {
        memory_region_del_eventfd(&proxy->bar, VIRTIO_PCI_QUEUE_NOTIFY, 2,
                                  true, n, notifier);
        virtio_queue_set_host_notifier_fd_handler(vq, false, false);
        event_notifier_cleanup(notifier);
    }
    return r;
}

上述的host notifier,实际对应VirtQueue EventNotifier的一个read fd,通常叫做ioevent_fd,可以通过virtio_pci_start_ioeventfd,virtio_pci_stop_ioeventfd来控制;而guest notifier通常对应于VirtIOPCIProxy的vector_irqfd,通常叫做irqfd,e.g.

static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
{
    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    int r, n;
    bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
        kvm_msi_via_irqfd_enabled();

    nvqs = MIN(nvqs, VIRTIO_PCI_QUEUE_MAX);

    /* When deassigning, pass a consistent nvqs value
     * to avoid leaking notifiers.
     */
    assert(assign || nvqs == proxy->nvqs_with_notifiers);

    proxy->nvqs_with_notifiers = nvqs;

    /* Must unset vector notifier while guest notifier is still assigned */
    if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) {
        msix_unset_vector_notifiers(&proxy->pci_dev); /* 修改pci配置空间清理msix vector */
        if (proxy->vector_irqfd) {
            kvm_virtio_pci_vector_release(proxy, nvqs);
            g_free(proxy->vector_irqfd);
            proxy->vector_irqfd = NULL;
        }
    }

    for (n = 0; n < nvqs; n++) {
        if (!virtio_queue_get_num(vdev, n)) {
            break;
        }

        r = virtio_pci_set_guest_notifier(d, n, assign, with_irqfd); /* 对每个VirtQueue设置irqfd */
        if (r < 0) {
            goto assign_error;
        }
    }

    /* Must set vector notifier after guest notifier has been assigned */
    if ((with_irqfd || k->guest_notifier_mask) && assign) {
        if (with_irqfd) {
            proxy->vector_irqfd =
                g_malloc0(sizeof(*proxy->vector_irqfd) *
                          msix_nr_vectors_allocated(&proxy->pci_dev));
            r = kvm_virtio_pci_vector_use(proxy, nvqs);
            if (r < 0) {
                goto assign_error;
            }
        }
        r = msix_set_vector_notifiers(&proxy->pci_dev,
                                      virtio_pci_vector_unmask,
                                      virtio_pci_vector_mask,
                                      virtio_pci_vector_poll);  /* 同样配置pci空间增加msix vector */
        if (r < 0) {
            goto notifiers_error;
        }
    }

    return 0;

notifiers_error:
    if (with_irqfd) {
        assert(assign);
        kvm_virtio_pci_vector_release(proxy, nvqs);
    }

assign_error:
    /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
    assert(assign);
    while (--n >= 0) {
        virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
    }
    return r;
}

virtio的pci配置空间的读写(bar0的IO空间)操作,用于pci配置空间的offset + size的读写操作,并分为单字节,双字节,四字节三种。对于virtio配置空间20字节之前的读写通过io port完成,20字节之后的通过mmio完成。

static const MemoryRegionOps virtio_pci_config_ops = {
    .read = virtio_pci_config_read,
    .write = virtio_pci_config_write,
    .impl = {
        .min_access_size = 1,
        .max_access_size = 4,
    },
    .endianness = DEVICE_LITTLE_ENDIAN,
};
时间: 2024-10-05 18:33:37

virtio的qemu总线与设备模型的相关文章

驱动程序分层分离概念_总线驱动设备模型_P

分层概念: 驱动程序向上注册的原理: 比如:输入子程序一个input.c作为一层,下层为Dev.c和Dir.c,分别编写Dev.c和Dir.c向上Input.c注册:如图所示 分离概念: 分离概念主要是讲,设备驱动程序分成两个部分,也将引进另一个新概念bus_dri_dev模型 总线-驱动-设备模式,是讲吧一个驱动分成两个部分,分别挂载到一条总线上的链表中,总线上有.match函数还对两个链表相同名字相匹配,匹配成功跳到driver驱动程序的probe函数来实现驱动的操作. 一下例子主要编写总线

Linux平台总线驱动设备模型

platform总线是一种虚拟的总线,相应的设备则为platform_device,而驱动则为platform_driver.Linux 2.6的设备驱动模型中,把I2C.RTC.LCD等都归纳为platform_device. 总线将设备和驱动绑定,在系统每注册一个设备的时候,会寻找与之匹配的驱动:相反的,在系统每注册一个驱动的时候,会寻找与之匹配的设备,而匹配由总线完成. Linux2.6系统中定义了一个bus_type的实例platform_bus_type [cpp] view plai

linux设备模型

为了降低设备多样性带来的Linux驱动开发的复杂度,以及设备热拔插处理.电源管理等,Linux内核提出了设备模型(也称作Driver Model)的概念.设备模型将硬件设备归纳.分类,然后抽象出一套标准的数据结构和接口.驱动的开发,就简化为对内核所规定的数据结构的填充和实现. 我们知道linux内核中常见的的总线有I2C总线,PCI总线,串口总线,SPI总线,PCI总线,CAN总线,单总线等,所以有些设备和驱动就可以挂在这些总线上,然后通过总线上的match进行设备和驱动的匹配.但是有的设备并不

设备模型(device-model)之平台总线(bus),驱动(driver),设备(device)

关于关于驱动设备模型相关概念请参考<Linux Device Drivers>等相关书籍,和内核源码目录...\Documentation\driver-model 简单来说总线(bus),驱动(driver),设备(device)这三者之间的关系就是:驱动开发者可以通过总线(bus)来将驱动(driver)和设备(device)进行隔离,这样的好处就是开发者可以将相对稳定不变的驱动(driver)独立起来,可以通过总线(bus)来桥接与之匹配的设备(device).设备(device)只需要

[转载]KVM虚拟机代码揭秘——QEMU的PCI总线与设备(上)

最近研究了一下QEMU的虚拟PCI设备,打算虚拟一个PCI-PCI桥和一个PCI设备,设备挂在桥上,桥挂在pci主桥上.并且给设备固定映射一个IO基地址,但是发现还是件头疼的事情,经过几天的辛苦,终于算是有点收获,和大家分享一下,有什么问题希望大家支持,一起讨论,共同提高. 申明:本文主要针对x86架构进行说明. 1. PCI 结构简介 为了大家更加容易的理解后文,先来回顾一下PCI总线的基本内存结构.每一个PCI设备都对应一段内存空间,里面按照地址位置放置PCI设备的信息,包括厂家信息,bar

让天堂的归天堂,让尘土的归尘土——谈Linux的总线、设备、驱动模型

公元1951年5月15日的国会听证上,美国陆军五星上将麦克阿瑟建议把朝鲜战争扩大至中国,布莱德利随后发言:"如果我们把战争扩大到共产党中国,那么我们会被卷入到一场错误的时间,错误的地点同错误的对手打的一场错误的战争中." 写代码,适用于同样的原则,那就是把正确的代码放到正确的位置而不是相反.同样的一个代码,可以出现在多个可能的位置,它究竟应该出现在哪里,是软件架构设计的结果,说白了一切都是为了高内核和低耦合. 1.   陷入绝境 下面我们设想一个名字叫做ABC的简单的网卡,它需要接在一

转 Linux设备模型 (1)

作者:wwang 出处:http://www.cnblogs.com/wwang 本文采用知识共享署名-非商业性使用-相同方式共享 2.5 中国大陆许可协议进行许可,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接. 随着计算机的周边外设越来越丰富,设备管理已经成为现代操作系统的一项重要任务,这对于Linux来说也是同样的情况.每次Linux内核新版本的发布,都会伴随着一批设备驱动进入内核.在Linux内核里,驱动程序的代码量占有了相当大的比重.下图是我在网络上搜索到的

【linux设备模型】之platform设备驱动

一.platform总线.设备和驱动 platform是一种虚拟总线,对应的设备称为platform_device,对应的驱动称为platform_driver. platform_device定义在<linux/platform_device.h>中: 1 struct platform_device { 2 const char * name; 3 int id; 4 struct device dev; 5 u32 num_resources; 6 struct resource * r

Linux设备模型——设备驱动模型和sysfs文件系统解读

本文将对Linux系统中的sysfs进行简单的分析,要分析sysfs就必须分析内核的driver-model(驱动模型),两者是紧密联系的.在分析过程中,本文将以platform总线和spi主控制器的platform驱动为例来进行讲解.其实,platform机制是基于driver-model的,通过本文,也会对platform机制有个简单的了解. 内核版本:2.6.30 1. What is sysfs? 个人理解:sysfs向用户空间展示了驱动设备的层次结构.我们都知道设备和对应的驱动都是由内