virtio blk原理

別人寫(xiě)的virtio blk原理

virtio的原理說(shuō)起來(lái)挺簡(jiǎn)單的,兩邊是front-driver,backend-driver,中間是virtio和vring。關(guān)鍵是front-driver如何將io操作傳到backend-driver,backend-driver又是如何將這些io操作解析出來(lái),執(zhí)行完,通知回front-driver的?這里面的細(xì)節(jié),細(xì)思極恐!
問(wèn)題一:Guest,Hypervisor,Host是如何建立聯(lián)系的?本質(zhì)就是初始化過(guò)程!

virtio初始化函數(shù)很有規(guī)律基本就是:virtio_*_init,這種形式,那就好搞了,用systemtap抓取初始化的調(diào)用:

各種class初始化:
call virtio_device_class_init
call virtio_blk_class_init
call virtio_rng_class_init
call virtio_serial_port_class_init
call virtio_pci_class_init
call virtio_serial_pci_class_init
call virtio_bus_class_init
call virtio_pci_bus_class_init
call virtio_scsi_pci_class_init
call virtio_balloon_pci_class_init
call virtio_serial_class_init
call virtio_balloon_class_init
call virtio_mmio_bus_class_init
call virtio_scsi_common_class_init
call virtio_net_class_init
call virtio_mmio_class_init
call virtio_9p_class_init
call virtio_blk_pci_class_init
call virtio_scsi_class_init
call virtio_9p_pci_class_init
call virtio_rng_pci_class_init
call virtio_net_pci_class_init

virtio-serial-pci初始化(這個(gè)是個(gè)什么設(shè)備):
call virtio_serial_pci_instance_init
call virtio_pci_init
call virtio_serial_pci_init

我們的主角virtio blk初始化:
call virtio_blk_pci_instance_init
call virtio_pci_init
call virtio_blk_pci_init

我在虛擬機(jī)里面添加了virtio balloon設(shè)備:
call virtio_balloon_pci_instance_init
call virtio_pci_init
call virtio_balloon_pci_init

virtio blk的Qemu相關(guān)參數(shù)有兩個(gè):

-drive file=/home/mq/Documents/IDV/idv-update/login.img,if=none,id=drive-virtio-disk1,format=qcow2 
-device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x8,drive=drive-virtio-disk1,id=virtio-disk1,bootindex=1

可以看到virtio blk的TypeInfo name是:"virtio-blk-pci"。
Instance結(jié)構(gòu):

struct PCIDevice {
    DeviceState qdev;

    /* PCI config space */
    uint8_t *config;

    /* Used to enable config checks on load. Note that writable bits are
     * never checked even if set in cmask. */
    uint8_t *cmask;

    /* Used to implement R/W bytes */
    uint8_t *wmask;

    /* Used to implement RW1C(Write 1 to Clear) bytes */
    uint8_t *w1cmask;

    /* Used to allocate config space for capabilities. */
    uint8_t *used;

    /* the following fields are read only */
    PCIBus *bus;
    int32_t devfn;
    /* Cached device to fetch requester ID from, to avoid the PCI
     * tree walking every time we invoke PCI request (e.g.,
     * MSI). For conventional PCI root complex, this field is
     * meaningless. */
    PCIReqIDCache requester_id_cache;
    char name[64];
    PCIIORegion io_regions[PCI_NUM_REGIONS];
    AddressSpace bus_master_as;
    MemoryRegion bus_master_container_region;
    MemoryRegion bus_master_enable_region;

    /* do not access the following fields */
    PCIConfigReadFunc *config_read;
    PCIConfigWriteFunc *config_write;

    /* Legacy PCI VGA regions */
    MemoryRegion *vga_regions[QEMU_PCI_VGA_NUM_REGIONS];
    bool has_vga;

    /* Current IRQ levels.  Used internally by the generic PCI code.  */
    uint8_t irq_state;

    /* Capability bits */
    uint32_t cap_present;

    /* Offset of MSI-X capability in config space */
    uint8_t msix_cap;

    /* MSI-X entries */
    int msix_entries_nr;

    /* Space to store MSIX table & pending bit array */
    uint8_t *msix_table;
    uint8_t *msix_pba;
    /* MemoryRegion container for msix exclusive BAR setup */
    MemoryRegion msix_exclusive_bar;
    /* Memory Regions for MSIX table and pending bit entries. */
    MemoryRegion msix_table_mmio;
    MemoryRegion msix_pba_mmio;
    /* Reference-count for entries actually in use by driver. */
    unsigned *msix_entry_used;
    /* MSIX function mask set or MSIX disabled */
    bool msix_function_masked;
    /* Version id needed for VMState */
    int32_t version_id;

    /* Offset of MSI capability in config space */
    uint8_t msi_cap;

    /* PCI Express */
    PCIExpressDevice exp;

    /* SHPC */
    SHPCDevice *shpc;

    /* Location of option rom */
    char *romfile;
    bool has_rom;
    MemoryRegion rom;
    uint32_t rom_bar;

    /* INTx routing notifier */
    PCIINTxRoutingNotifier intx_routing_notifier;

    /* MSI-X notifiers */
    MSIVectorUseNotifier msix_vector_use_notifier;
    MSIVectorReleaseNotifier msix_vector_release_notifier;
    MSIVectorPollNotifier msix_vector_poll_notifier;
};
struct VirtIOPCIProxy {
    PCIDevice pci_dev;
    MemoryRegion bar;
    VirtIOPCIRegion common;
    VirtIOPCIRegion isr;
    VirtIOPCIRegion device;
    VirtIOPCIRegion notify;
    VirtIOPCIRegion notify_pio;
    MemoryRegion modern_bar;
    MemoryRegion io_bar;
    MemoryRegion modern_cfg;
    AddressSpace modern_as;
    uint32_t legacy_io_bar_idx;
    uint32_t msix_bar_idx;
    uint32_t modern_io_bar_idx;
    uint32_t modern_mem_bar_idx;
    int config_cap;
    uint32_t flags;
    bool disable_modern;
    bool ignore_backend_features;
    OnOffAuto disable_legacy;
    uint32_t class_code;
    uint32_t nvectors;
    uint32_t dfselect;
    uint32_t gfselect;
    uint32_t guest_features[2];
    VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX];

    VirtIOIRQFD *vector_irqfd;
    int nvqs_with_notifiers;
    VirtioBusState bus;
};
struct VirtIOBlkPCI {
    VirtIOPCIProxy parent_obj;
    VirtIOBlock vdev;
};

上面的結(jié)構(gòu)體有個(gè)規(guī)律,逐漸由pci device往blk device轉(zhuǎn)變,VirtIOPCIProxy,VirtIOBlock兩個(gè)就是pci device和blk device的橋梁。

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

  • virtio Virtio是IO虛擬化中的一個(gè)優(yōu)化方案,屬于para-virtulization的一種實(shí)現(xiàn),即Gu...
    goldhorn閱讀 9,201評(píng)論 2 11
  • 一、KVM簡(jiǎn)介 KVM的全稱是Kernel Virtual Machine,翻譯過(guò)來(lái)就是內(nèi)核虛擬機(jī)。它是一個(gè) Li...
    愛(ài)吃土豆的程序猿閱讀 9,685評(píng)論 0 13
  • Blocks Blocks Blocks 是帶有局部變量的匿名函數(shù) 截取自動(dòng)變量值 int main(){ ...
    南京小伙閱讀 1,080評(píng)論 1 3
  • 這份文檔旨在幫助開(kāi)發(fā)者理解 QEMU 構(gòu)建系統(tǒng)的架構(gòu)。正如使用 GNU autotools 的項(xiàng)目一樣,QEMU ...
    hanpfei閱讀 2,018評(píng)論 0 0
  • 又到了喜聞樂(lè)見(jiàn)的總結(jié)時(shí)間了,我測(cè)試了一下virtio接口的虛擬磁盤(pán)的性能,基本上和native差了一半。不知道是不...
    mqddb閱讀 865評(píng)論 0 0

友情鏈接更多精彩內(nèi)容