Qemu重入漏洞梳理 & CVE-2024-3446分析

Qemu重入漏洞梳理 & CVE-2024-3446分析

原创 胡志斌 华为安全应急响应中心 2024-12-27 10:10

1

简介

本文主要是分析CVE-2024-3446漏洞的成因和漏洞的补丁,以及之前的补丁为何失效,顺便对Qemu历史重入漏洞进行了分析梳理

2

时间线

  • 2020/07/21 e1000e重入导致的UAF(还没归为重入问题)

  • 2021/08/23 重入漏洞整理为一类问题
    https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com/

  • 2023/04/28 给出第一个通用用修复方案
    https://gitlab.com/qemu-project/qemu/-/commit/c40ca2301c7603524eaddb5308a3f524c6f89d24

  • 2024/04/09 修复后再次爆出的重入漏洞(CVE-2024-3446)
    https://bugzilla.redhat.com/show_bug.cgi?id=2274211

  • 2024/04/10 修复CVE-2024-3446
    https://gitlab.com/qemu-project/qemu/-/commit/f243175727903a0d2b52422e7baef86c1838a895

3

漏洞成因

通常是在设备交互的函数执行了DMA操作,即调用了函数cpu_physical_memory_write、address_space_write或pci_dma_write类似函数,当目标地址是设备地址时,导致再次进入设备的交互函数中,从而引发一些安全问题

设备交互

众所周知操作设备有以下3种方式:
– BH

  • Timer

  • PMIO/MMIO

接下来我们分别了解一下这三种方式的操作及原理

BH

BH 被注册到全局的qemu_aio_context结构中,在事件轮询处理时做响应。BH 维护一个链表,新的操作会插入到尾部,在主线程的qemu_main_loop中依次处理,所以仅仅使用BH不会导致重入

struct QEMUBH {
    AioContext *ctx;    // 下半部所在的context(qemu_aio_context)
    const char *name;
    QEMUBHFunc *cb;    // 下半部要执行的函数
    void *opaque;        // 函数参数
    QSLIST_ENTRY(QEMUBH) next;    // 下一个要执行的下半部
    unsigned flags;        // 调用qemu_bh_schedule后赋值为BH_PENDING | BH_SCHEDULED,使用后变为0
    MemReentrancyGuard *reentrancy_guard;    // 新增防护重入
};

qemu_bh_schedule触发后插入到BH事件处理链表

static void bh_test_cb(void *opaque)
{
    bhtestState *obj = opaque;

    print_timestamp_ms();
    qemu_bh_schedule(obj->bh);
}

static void pci_bhtest_realize(PCIDevice *pdev, Error **errp)
{
    bhtestState *obj = DO_UPCAST(bhtestState, pdev, pdev);

    // 初始化
    obj->bh = qemu_bh_new(bh_test_cb, obj);
    // 激活
    qemu_bh_schedule(obj->bh);
}

Timer

Timer 对象会被注册到main_loop_tlg结构中,初始化时会设置timer_list指向main_loop_tlg中对应类型的链表,Timer 处理同样在主线程的qemu_main_loop函数中,所以仅仅使用Timer也不会导致重入

static void timer_cb(void *opaque)
{
    mmiotestState *obj = opaque;
    printf("in timer_cb\n");
}


static void pci_mmiotest_realize(PCIDevice *pdev, Error **errp)
{
    mmiotestState *obj = DO_UPCAST(mmiotestState, pdev, pdev);
    // 初始化
    obj->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, timer_cb, obj);
}

激活时会将timer对象放到main_loop_tlg的链表中,并设置到期时间

timer_mod(obj->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));

PMIO/MMIO

IO内存注册,当对内存读写时会执行注册的MemoryRegionOps操作,当IO函数中再次执行了DMA操作时,会再次进入对应的操作函数,就会导致重入

static const MemoryRegionOps mmiotest_mmio_ops = {
    .read = mmiotest_mmio_read,
    .write = mmiotest_mmio_write,
    .endianness = DEVICE_NATIVE_ENDIAN,    
    .valid = {
        .min_access_size = 1,
        .max_access_size = 8,
    },
    .impl = {
        .min_access_size = 1,
        .max_access_size = 8,
    },
};

static void pci_mmiotest_realize(PCIDevice *pdev, Error **errp)
{
    mmiotestState *obj = DO_UPCAST(mmiotestState, pdev, pdev);

    memory_region_init_io(&obj->mmio, OBJECT(obj), &mmiotest_mmio_ops, obj, "mmiotest-mmio",0x100);
    pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &obj->mmio);
}

mmio重入调用栈样例DMA操作进入了mr_mmio_write,然后在函数中调用
cpu_physical_memory_write再次进入
mr_mmio_write导致了重入

#0  0x0000555555a3d6c7 in mr_mmio_write (opaque=0x555557779930, addr=3840, val=0, size=8) at ../hw/pci/mr.c:132
#1  0x0000555555bf58c3 in memory_region_write_accessor (mr=0x55555777a2a0, addr=3840, value=0x7ffff6acfdf8, size=8, shift=0, mask=18446744073709551615, attrs=...) at ../softmmu/memory.c:492
#2  0x0000555555bf5b11 in access_with_adjusted_size (addr=3840, value=0x7ffff6acfdf8, size=8, access_size_min=1, access_size_max=8, access_fn=0x555555bf57c9 <memory_region_write_accessor>, mr=0x55555777a2a0, attrs=...) at ../softmmu/memory.c:554
#3  0x0000555555bf8c0f in memory_region_dispatch_write (mr=0x55555777a2a0, addr=3840, data=0, op=MO_64, attrs=...) at ../softmmu/memory.c:1504
#4  0x0000555555beb934 in flatview_write_continue (fv=0x7fff302b57c0, addr=4273938176, attrs=..., ptr=0x55555777a500, len=8, addr1=3840, l=8, mr=0x55555777a2a0) at ../softmmu/physmem.c:2782
#5  0x0000555555beba7d in flatview_write (fv=0x7fff302b57c0, addr=4273938176, attrs=..., buf=0x55555777a500, len=8) at ../softmmu/physmem.c:2822
#6  0x0000555555bebdf7 in address_space_write (as=0x555556768ec0 <address_space_memory>, addr=4273938176, attrs=..., buf=0x55555777a500, len=8) at ../softmmu/physmem.c:2914
#7  0x0000555555bebe68 in address_space_rw (as=0x555556768ec0 <address_space_memory>, addr=4273938176, attrs=..., buf=0x55555777a500, len=8, is_write=true) at ../softmmu/physmem.c:2924
#8  0x0000555555bebed6 in cpu_physical_memory_rw (addr=4273938176, buf=0x55555777a500, len=8, is_write=true) at ../softmmu/physmem.c:2933
#9  0x0000555555a3d167 in cpu_physical_memory_write (addr=4273938176, buf=0x55555777a500, len=8) at /home/test/Desktop/qemu-6.2.0/include/exec/cpu-common.h:82

#10 0x0000555555a3d5c2 in mr_mmio_write (opaque=0x555557779930, addr=3840, val=313778176, size=8) at ../hw/pci/mr.c:117
#11 0x0000555555bf58c3 in memory_region_write_accessor (mr=0x55555777a2a0, addr=3840, value=0x7ffff6ad0148, size=8, shift=0, mask=18446744073709551615, attrs=...) at ../softmmu/memory.c:492
#12 0x0000555555bf5b11 in access_with_adjusted_size (addr=3840, value=0x7ffff6ad0148, size=8, access_size_min=1, access_size_max=8, access_fn=0x555555bf57c9 <memory_region_write_accessor>, mr=0x55555777a2a0, attrs=...) at ../softmmu/memory.c:554
#13 0x0000555555bf8c0f in memory_region_dispatch_write (mr=0x55555777a2a0, addr=3840, data=313778176, op=MO_64, attrs=...) at ../softmmu/memory.c:1504

#14 0x0000555555d30f75 in io_writex (env=0x555556a49410, iotlbentry=0x7fff307ba9a0, mmu_idx=1, val=313778176, addr=140444038311680, retaddr=140735287403492, op=MO_64) at ../accel/tcg/cputlb.c:1420
#15 0x0000555555d33592 in store_helper (env=0x555556a49410, addr=140444038311680, val=313778176, oi=49, retaddr=140735287403492, op=MO_64) at ../accel/tcg/cputlb.c:2355
#16 0x0000555555d33a2e in helper_le_stq_mmu (env=0x555556a49410, addr=140444038311680, val=313778176, oi=49, retaddr=140735287403492) at ../accel/tcg/cputlb.c:2469
#17 0x00007fff7cd023e4 in code_gen_buffer ()
#18 0x0000555555d1e243 in cpu_tb_exec (cpu=0x555556a40b60, itb=0x7fffba817180, tb_exit=0x7ffff6ad0864) at ../accel/tcg/cpu-exec.c:357
#19 0x0000555555d1f108 in cpu_loop_exec_tb (cpu=0x555556a40b60, tb=0x7fffba817180, last_tb=0x7ffff6ad0870, tb_exit=0x7ffff6ad0864) at ../accel/tcg/cpu-exec.c:842
#20 0x0000555555d1f4c6 in cpu_exec (cpu=0x555556a40b60) at ../accel/tcg/cpu-exec.c:1001
#21 0x0000555555d41bea in tcg_cpus_exec (cpu=0x555556a40b60) at ../accel/tcg/tcg-accel-ops.c:67
#22 0x0000555555d41f7a in mttcg_cpu_thread_fn (arg=0x555556a40b60) at ../accel/tcg/tcg-accel-ops-mttcg.c:95
#23 0x0000555555efd63b in qemu_thread_start (args=0x555556a60820) at ../util/qemu-thread-posix.c:556
#24 0x00007ffff7938609 in start_thread () from target:/lib/x86_64-linux-gnu/libpthread.so.0
#25 0x00007ffff785d353 in clone () from target:/lib/x86_64-linux-gnu/libc.so.6

场景

漏洞场景

  • m
    m
    i
    o –

    dm
    a
     -> 
    m
    mio

  • bh -> dma -> mmio

  • timer -> dma -> mmio

利用场景

  • 单设备重复进入(A -> A -> …)

  • 多设备重复进入(A -> B -> A -> B -> … )(是否存在存疑)

4

漏洞危害

stack overflow

因为不断的进入mmio的操作函数,不断的递归,从而导致了栈被撑爆,触发了Dos攻击

UAF

在触发重入后,比较常见的场景是去执行reset函数,导致一些对象被释放,函数返回继续执行导致UAF

Fix

@a1xndr在2023/04/28提交了多个commit修复re-entrancy问题

分析其中3个重要的Fix

Fix01

这个补丁主要针对场景:
mmio -> dma -> mmio (√)
1. 在mr结构中增加了dev对象和disable_reentrancy_guard

  1. 在dev对象中增加mem_reentrancy_guard标志


  2. access_with_adjusted_size函数中对mem_reentrancy_guard标志做了判断

https://github.com/qemu/qemu/commit/a2e1753b8054344f32cf94f31c6399a58794a380#diff-164a41822a3316a554081cfd1c7202bb60723035af488f3fe5a16bff05cdc130L770

static MemTxResult access_with_adjusted_size(hwaddr addr,
                                      uint64_t *value,
                                      unsigned size,
                                      unsigned access_size_min,
                                      unsigned access_size_max,
                                      MemTxResult (*access_fn)
                                                  (MemoryRegion *mr,
                                                   hwaddr addr,
                                                   uint64_t *value,
                                                   unsigned size,
                                                   signed shift,
                                                   uint64_t mask,
                                                   MemTxAttrs attrs),
                                      MemoryRegion *mr,
                                      MemTxAttrs attrs)
{
    uint64_t access_mask;
    unsigned access_size;
    unsigned i;
    MemTxResult r = MEMTX_OK;
    bool reentrancy_guard_applied = false;

    if (!access_size_min) {
        access_size_min = 1;
    }
    if (!access_size_max) {
        access_size_max = 4;
    }

    /* Do not allow more than one simultaneous access to a device's IO Regions */
    if (mr->dev && !mr->disable_reentrancy_guard &&
        !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
        if (mr->dev->mem_reentrancy_guard.engaged_in_io) {
            warn_report_once("Blocked re-entrant IO on MemoryRegion: "
                             "%s at addr: 0x%" HWADDR_PRIX,
                             memory_region_name(mr), addr);
            return MEMTX_ACCESS_ERROR;
        }
        mr->dev->mem_reentrancy_guard.engaged_in_io = true;
        reentrancy_guard_applied = true;
    }

    /* FIXME: support unaligned access? */
    access_size = MAX(MIN(size, access_size_max), access_size_min);
    access_mask = MAKE_64BIT_MASK(0, access_size * 8);
    if (memory_region_big_endian(mr)) {
        for (i = 0; i < size; i += access_size) {
            r |= access_fn(mr, addr + i, value, access_size,
                        (size - access_size - i) * 8, access_mask, attrs);
        }
    } else {
        for (i = 0; i < size; i += access_size) {
            r |= access_fn(mr, addr + i, value, access_size, i * 8,
                        access_mask, attrs);
        }
    }
    if (mr->dev && reentrancy_guard_applied) {
        mr->dev->mem_reentrancy_guard.engaged_in_io = false;
    }
    return r;
}

Fix02

这个补丁主要针对场景:
bh -> dma -> mmio (√)
1. 在BH结构中增加了reentrancy_guard标志

  1. 在执行BH回调时会对reentrancy_guard进行置位,再进入mr操作则会被阻止

https://gitlab.com/qemu-project/qemu/-/commit/9c86c97f12c060bf7484dd931f38634e166a81f0

void aio_bh_call(QEMUBH *bh)
{
    bool last_engaged_in_io = false;

    /* Make a copy of the guard-pointer as cb may free the bh */
    MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard;
    if (reentrancy_guard) {
        last_engaged_in_io = reentrancy_guard->engaged_in_io;
        if (reentrancy_guard->engaged_in_io) {
            trace_reentrant_aio(bh->ctx, bh->name);
        }
        reentrancy_guard->engaged_in_io = true;
    }

    bh->cb(bh->opaque);

    if (reentrancy_guard) {
        reentrancy_guard->engaged_in_io = last_engaged_in_io;
    }
}

Fix03

通过替换以下函数,对设备开启防护:
1. qemu_bh_new-> qemu_bh_new_guarded

  1. aio_bh_new-> aio_bh_new_guarded

https://gitlab.com/qemu-project/qemu/-/commit/f63192b0544af5d3e4d5edfd85ab520fcf671377

被修改的设备列表:

1. xen9pfs
2. virtio-blk
3. xen-block
4. virtio-serial-bus
5. qxl
6. virtio-gpu
7. ahci
8. imx_rngc
9. mac_dbdma
10. virtio-net
11. nvme
12. scsi/*
13. usb/*
14. virtio-balloon
15. virtio-crypto

新增的guard类函数,主要是添加了
reentrancy_guard参数

#define aio_bh_new_guarded(ctx, cb, opaque, guard) \
    aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard)

QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
                         MemReentrancyGuard *reentrancy_guard)
{
    return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
                           reentrancy_guard);
}

QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
                        const char *name, MemReentrancyGuard *reentrancy_guard)
{
    QEMUBH *bh;
    bh = g_new(QEMUBH, 1);
    *bh = (QEMUBH){
        .ctx = ctx,
        .cb = cb,
        .opaque = opaque,
        .name = name,
        .reentrancy_guard = reentrancy_guard,
    };
    return bh;
}

QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev,
                                   QEMUBHFunc *cb, void *opaque,
                                   const char *name)
{
    DeviceState *transport = qdev_get_parent_bus(dev)->parent;

    return qemu_bh_new_full(cb, opaque, name,
                            &transport->mem_reentrancy_guard);
}

这里有一个问题,补丁只修复了两种情况,第三种情况是没有修复的

mmio -> dma -> mmio (√)

bh -> dma -> mmio (√)

timer -> dma -> mmio (x)

5

CVE-2024-3446

漏洞描述

在 QEMU virtio 设备(virtio-gpu、virtio-serial-bus、virtio-crypto)中发现了双重释放漏洞,其中 mem_reentrancy_guard 标志不足以防止 DMA 重入问题。此问题可能允许恶意特权来宾用户破坏主机上的 QEMU 进程,从而导致拒绝服务或允许在主机上的 QEMU 进程上下文中执行任意代码。

之前的补丁为何无效?

重新修复了这个三个设备代码(其它设备为什么不受影响?)
1. virtio-gpu

  1. virtio-crypto

  2. virtio-serial-bus

https://gitlab.com/qemu-project/qemu/-/commits/master?search=CVE-2024-3446

Fix分析

以virtio-gpu为样例,替换了
qemu_bh_new_guarded函数使用
virtio_bh_new_guarded_full函数

virtio_bh_new_guarded_full函数将判断重入时使用的设备从
dev变成了
dev->parent_bus->parent,parent指向的是谁?这就涉及virtio设备的注册过程,如果是普通的pci设备它的结构就是
pci-bus -> pci-device,而virtio设备会注册一个代理设备virtio-xx-pci,以virtio-gpu为例,它的结构是
pci-bus -> virtio-gpu-pci -> virtio-bus -> virtio-gpu,所以parent指向的是
virtio-gpu-pci设备,为什么要使用它呢?

QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev,
                                   QEMUBHFunc *cb, void *opaque,
                                   const char *name)
{
    // dev->parent_bus->parent(virtio-xx-pci)
    DeviceState *transport = qdev_get_parent_bus(dev)->parent;

    return qemu_bh_new_full(cb, opaque, name,
                            &transport->mem_reentrancy_guard);
}

跟一下virtio设备mmio初始化流程,virtio设备调用realize函数初始化时会执行到
virtio_bus_device_plugged,在
virtio_bus_device_plugged中会注册mmio操作,而在注册io操作中指定的owner(就是proxy对象,virtio-xx-pci),会被保存到mr->dev中,所以mr->dev指向的是virtio-xx-pci,这就导致了之前补丁无效的原因

static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
{
    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
    VirtioBusState *bus = &proxy->bus;
    bool legacy = virtio_pci_legacy(proxy);
    bool modern;
    bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
    uint8_t *config;
    uint32_t size;
    VirtIODevice *vdev = virtio_bus_get_device(bus);

    ...

        virtio_pci_modern_regions_init(proxy, vdev->name);

        virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
        virtio_pci_modern_mem_region_map(proxy, &proxy->isr, &cap);
        virtio_pci_modern_mem_region_map(proxy, &proxy->device, &cap);
        virtio_pci_modern_mem_region_map(proxy, &proxy->notify, &notify.cap);

    ....
}

static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy,
                                           const char *vdev_name)
{
    ...

    g_string_printf(name, "virtio-pci-common-%s", vdev_name);
    memory_region_init_io(&proxy->common.mr, OBJECT(proxy),
                          &common_ops,
                          proxy,
                          name->str,
                          proxy->common.size);

    g_string_printf(name, "virtio-pci-isr-%s", vdev_name);
    memory_region_init_io(&proxy->isr.mr, OBJECT(proxy),
                          &isr_ops,
                          proxy,
                          name->str,
                          proxy->isr.size);

    g_string_printf(name, "virtio-pci-device-%s", vdev_name);
    memory_region_init_io(&proxy->device.mr, OBJECT(proxy),
                          &device_ops,
                          proxy,
                          name->str,
                          proxy->device.size);

    g_string_printf(name, "virtio-pci-notify-%s", vdev_name);
    memory_region_init_io(&proxy->notify.mr, OBJECT(proxy),
                          &notify_ops,
                          proxy,
                          name->str,
                          proxy->notify.size);

    g_string_printf(name, "virtio-pci-notify-pio-%s", vdev_name);
    memory_region_init_io(&proxy->notify_pio.mr, OBJECT(proxy),
                          &notify_pio_ops,
                          proxy,
                          name->str,
                          proxy->notify_pio.size);
}

6

如何发现此类漏洞

fuzz

之前外界公开过一个fuzz工具的实现,它对Qemu代码进行了定制,通过hook dma相关操作,喂给dma操作特定的数据,这样就能捕获到dma操作导致的异常

codeql

《Hunting and Exploiting Recursive MMIO Flaws in QEMU/KVM》中提供的Codeql代码,主要功能就是路径搜索,源为内存io操作的注册函数,目标为dma write的功能函数

/**
* @kind path-problem
*/

class MMIOFn extends Function {
  MMIOFn() {
    exists(GlobalVariable gv |
    gv.getFile().getAbsolutePath().regexpMatch(".*qemu-6.1.0/hw/.*") and
    gv.getType().getName().regexpMatch(".*MemoryRegionOps.*") and
    gv.getName().regexpMatch(".*mmio.*") and
    gv.getInitializer().getExpr().getChild(1).toString() = this.toString()
    )
  }
}
class BHTFn extends Function {
  BHTFn() {
    exists(FunctionCall fc |
    fc.getTarget().getName().regexpMatch("qemu_bh_new_full|timer_new_ns") and
    fc.getFile().getAbsolutePath().regexpMatch(".*qemu-6.1.0/hw/.*") and
    (fc.getChild(0).toString() = this.toString() or fc.getChild(1).toString() = this.toString())
    )
  }
}

class FreeFn extends Function {
  FreeFn() {
    exists(FunctionCall fc |
    fc.getTarget().getName().matches("g_free") and
    fc.getEnclosingFunction() = this and
    not this.getName().regexpMatch(".*shutdown.*") and
    not this.getFile()
    .getRelativePath()
    .regexpMatch(".*error.*|.*test.*|.*replay.*|.*translate-all.*|.*xen.*|.*qapi-visit.*")
    )
  }
}

class ReentryFn extends Function {
  ReentryFn() {
    this.getName()
    .regexpMatch("address_space_write|dma_memory_write|stb_dma|stl_be_dma|stl_le_dm
a|stq_be_dma|stq_le_dma|stw_be_dma|stw_le_dma|pci_dma_write|dma_buf_read|...")
  }
}

query predicate edges(Function a, Function b) {
  a.calls(b)
}

from MMIOFn entry_fn, ReentryFn end_fn
where edges+(entry_fn, end_fn)
select end_fn, entry_fn, end_fn, "MMIO -> Reentry: from " + entry_fn.getName() + " to " +
end_fn.getName()

扩展分析

对比上面的修复列表还有3个virtio设备没有更新补丁,是否还存在漏洞
1. virtio-blk

v8.2.0 移除了BH功能函数 (https://github.com/qemu/qemu/commit/073458da5619c8914a91440ef243d1e2b888c1b7)

  1. virtio-net

  1. virtio-balloon

是否存在: timer -> dma -> mmio攻击场景

用codeql搜索未找到此类攻击路径,但应该存在这种攻击风险

6

参考

https://i.blackhat.com/Asia-22/Thursday-Materials/AS-22-Qiuhao-Recursive-MMIO-final.pdf

https://conference.hitb.org/hitbsecconf2023ams/materials/D1T1 – Leveraging Advanced Techniques of DMA Reentrancy to Escape QEMU – Quan Jin & Ao Wang.pdf

本公众号发布、转载的文章所涉及的技术、思路、工具仅供学习交流,任何人不得将其用于非法用途及盈利等目的,否则后果自行承担!

推荐阅读


docker历史上的第一个漏洞:关于shocker的一切

华为终端安全奖励计划诚邀您参与鸿蒙安全研究,赢取丰厚漏洞奖金

“协作共御、洞见未来” | 首届华为漏洞管理与应急响应技术大会于深圳成功举办

点这里
关注我们,一键三连~