r110 でも再現しました。
しかし、以下のように、vm1 に割り当てる CPU を 1 個のみにしたところ、再現しませんでした。
./install_to_usb.sh -c 'vm0.boot_int18 vm=vm0,vm1 vm1.mem=100000000-21f5fffff vm1.cpu=6 vm1.pci=00:1c.7,00:1c.5 shell=0' /dev/sdd
gcc 4.4.7 で SPT3 を無効化した VMM をコンパイルしてみましたが、再現しました。
[ 0.097504] CPU0: Intel(R) Core(TM) i5-3470 CPU @ 3.20GHz stepping 09 [ 0.185119] Performance Events: PEBS fmt1+, generic architected perfmon, Intel PMU driver. [ 0.188694] ... version: 3 [ 0.190419] ... bit width: 48 [ 0.192183] ... generic registers: 8 [ 0.193909] ... value mask: 0000ffffffffffff [ 0.196203] ... max period: 000000007fffffff [ 0.198492] ... fixed-purpose events: 3 [ 0.200218] ... event mask: 00000007000000ff [ 0.202567] Booting Node 0, Processors #1 Ok. [ 0.204554] smpboot cpu 1: start_ip = 9a000 Reset cpu 0x6 [ 0.455188] Brought up 2 CPUs [ 0.456501] Total of 2 processors activated (22564.55 BogoMIPS).
CPU6 panic: Fatal interrupt in VMM: 0x02 NMI CR0: 0x80000039 CR2: 0x00000000 CR3: 0xC4D56000 CR4: 0x00042668 RSP on interrupt: 0x4033FAA8 Stack information: +00 Error code / RIP : 0x4010EEBD +32 RSP / SS : 0x00000058 +08 RIP / CS : 0x00000050 +40 SS / : 0xFEE000B0 +16 CS / RFLAGS : 0x00000002 +48 / : 0x00000004 +24 RFLAGS / RSP : 0x4033FAD0 +56 / : 0x4033FB10 RAX: 0x00000000 RCX: 0x0000002F RDX: 0x40500000 RBX: 0x00000000 RSP: 0x4033FA50 RBP: 0x4033FAE0 RSI: 0x00000003 RDI: 0x00000000 R8: 0x00000004 R9: 0x00000001 R10: 0xFFFFFFFFF000 R11: 0xC0000000 R12: 0x00000004 R13: 0x00000001 R14: 0x4033FB9C R15: 0x00000004 ES: 0x0058 CS: 0x0050 SS: 0x0058 DS: 0x0058 FS: 0x0058 GS: 0x0080 VMM state of CPU6 ------------------------------ RAX 00000031 RCX 000036BA RDX 00000003 RBX 40146AA8 RSP 4033F6A0 RBP 4033F900 RSI 00000000 RDI 00000000 R8 00000000 R9 FFFFFFFFFFFFFFFF R10 00000000 R11 00000000 R12 4033F9F0 R13 4033F910 R14 00000000 R15 000706BA CR0 80000039 CR2 00000000 CR3 C4D56000 CR4 00042668 RFLAGS 00000002 GDTR 4032A400+0000FFFF IDTR 401AFC40+0000FFFF backtrace: 0x4033f698: 0x4011bb17 0x4033f908: 0x4011c368 0x4033f968: 0x401118ae 0x4033f988: 0x40111aac 0x4033f9e8: 0x40116177 0x4033fae8: 0x40107575 0x4033fb18: 0x4011ac06 0x4033fb88: 0x4010fe37 0x4033fbb8: 0x4010db6b 0x4033fc08: 0x40109779 0x4033fc48: 0x4010a15f 0x4033fc88: 0x4010a987 0x4033fd38: 0x4010b831 0x4033fe18: 0x401371b6 0x4033fe38: 0x4012c78d 0x4033fe88: 0x4012d0ee 0x4033ffc8: 0x40116ecb 0x4033fff8: 0x0 ------------------------------------------------ Guest state of CPU6 ---------------------------- RAX FFFFFFFF81027A54 RCX FFFF88014A4BC7B0 RDX 00000001 RBX 0000DC80 RSP FFFF880150303E38 RBP FFFF88014A4C3E88 RSI 00000000 RDI 000000B0 R8 00000005 R9 00000004 R10 00000020 R11 FFFF88015030DC80 R12 00000000 R13 00000002 R14 00000000 R15 FFFF88014A4C3FD8 CR0 8005003B CR2 00000000 CR3 01605000 CR4 001406E0 ACR ES 0001C000 CS 0000A09B SS 0001C000 DS 0001C000 FS 0001C000 GS 0001C000 LIMIT ES FFFFFFFFFFFFFFFF CS FFFFFFFFFFFFFFFF SS FFFFFFFFFFFFFFFF DS FFFFFFFFFFFFFFFF FS FFFFFFFFFFFFFFFF GS FFFFFFFFFFFFFFFF BASE ES 00000000 CS 00000000 SS 00000000 DS 00000000 FS 00000000 GS FFFF880150300000 SEL ES 00000000 CS 00000010 SS 00000000 DS 00000000 FS 00000000 GS 00000000 RIP FFFFFFFF81027A56 RFLAGS 00010046 GDTR FFFF880150304000+0000007F IDTR FFFFFFFF8172D000+00000FFF EFER 00000D01 Exit reason: 0x30 (EPT Violation) Exit qualification 00000182 Exit intr info 00000000 Exit intr errcode 00000000 VMCS IDTR ffffffff8172d000+00000fff VMCS RFLAGS 00010046 Guest phys addr fee000b0 Guest linear addr ffffffffff5fb0b0 pe=1 pg=1 sw:en=0x0 es=0x9a00 cs=0x9a00 ss=0x9a00 ds=0x9a00 fs=0x0 gs=0x0 ------------------------------------------------
(gdb) list *0x4010EEBD 0x4010eebd is in extint_apic_eoi (asm.h:852). 847 848 static inline ulong 849 asm_lock_test_and_set_ulong (ulong *mem, int bit) 850 { 851 ulong oldval; 852 asm volatile ("lock bts %2,%0\n" 853 "setc %%al" 854 : "=m" (*mem) 855 , "=a" (oldval) 856 : "r" (bit)
(gdb) list *0x40107575 0x40107575 is in apic_mmio_handler (apic_pass.c:393). 388 { 389 if (!wr) { 390 /* Read */ 391 return 0; 392 } 393 extint_apic_eoi(); 394 return 1; /* emulated */ 395 } 396 397 static int (gdb) list *0x4011ac06 0x4011ac06 is in mmio_access_memory (mmio.c:147). 142 if (gphys & ((len << 1) - 1)) { 143 break; 144 } 145 } 146 emulated = handle->handler(handle->data, gphys, wr, buf, len, flags); 147 if (!emulated) { 148 mmio_gphys_access(gphys, wr, buf, len, flags); 149 } 150 gphys += len; 151 length -= len;
SPT3 を有効(デフォルト)にして、再現評価をしてみました。
SPT3 無効時と、 Guest の RIP が同一です。
------------------------------------------------ CPU6 panic: Fatal interrupt in VMM: 0x02 NMI CR0: 0x80000039 CR2: 0x00000000 CR3: 0xC4D5B000 CR4: 0x00042668 RSP on interrupt: 0x40377988 Stack information: +00 Error code / RIP : 0x4010EFF1 +32 RSP / SS : 0x00000058 +08 RIP / CS : 0x00000050 +40 SS / : 0x00000000 +16 CS / RFLAGS : 0x00000097 +48 / : 0x01605FF8 +24 RFLAGS / RSP : 0x403779B0 +56 / : 0x403779F0 RAX: 0x437A6000 RCX: 0x437A6000 RDX: 0x00000000 RBX: 0x01605FF8 RSP: 0x40377930 RBP: 0x403779C0 RSI: 0x00000001 RDI: 0x01605FF8 R8: 0x00000000 R9: 0x00000000 R10: 0xC0000000 R11: 0xFFFFFFFFF000 R12: 0x40377A08 R13: 0x00000000 R14: 0x00000000 R15: 0x00000020 ES: 0x0058 CS: 0x0050 SS: 0x0058 DS: 0x0058 FS: 0x0058 GS: 0x0080 VMM state of CPU6 ------------------------------ RAX 00000031 RCX 000036D2 RDX 00000003 RBX 403778D0 RSP 403776C0 RBP 40377800 RSI 00000000 RDI 00000000 R8 00000000 R9 40377581 R10 00000006 R11 00000000 R12 40149C10 R13 40377818 R14 00000001 R15 403E0370 CR0 80000039 CR2 00000000 CR3 C4D5B000 CR4 00042668 RFLAGS 00000006 GDTR 4032FC00+0000FFFF IDTR 401B4B20+0000FFFF backtrace: 0x403776b8: 0x4011a4fb 0x40377808: 0x4011ad28 0x40377868: 0x401411d2 0x40377888: 0x401147d1 0x403778c8: 0x40114a17 0x403779c8: 0x4010eb5c 0x403779f8: 0x401163dd 0x40377a38: 0x4010bd92 0x40377b88: 0x4010c289 0x40377c28: 0x4010c4d6 0x40377c68: 0x4010cb81 0x40377ca8: 0x4010954c 0x40377d08: 0x4010a8d5 0x40377e18: 0x40139387 0x40377e38: 0x4012a88d 0x40377e88: 0x4012ae86 0x40377fc8: 0x4011562d 0x40377ff8: 0x0 ------------------------------------------------ Guest state of CPU6 ---------------------------- RAX FFFFFFFF81027A54 RCX FFFF88014A4BC7B0 RDX 00000001 RBX 0000DC80 RSP FFFF880150303E38 RBP FFFF88014A4C3E88 RSI 00000000 RDI 000000B0 R8 00000005 R9 00000004 R10 00000020 R11 FFFF88015030DC80 R12 00000000 R13 00000002 R14 00000000 R15 FFFF88014A4C3FD8 CR0 8005003B CR2 00000000 CR3 01605000 CR4 001406E0 ACR ES 0001C000 CS 0000A09B SS 0001C000 DS 0001C000 FS 0001C000 GS 0001C000 LIMIT ES FFFFFFFFFFFFFFFF CS FFFFFFFFFFFFFFFF SS FFFFFFFFFFFFFFFF DS FFFFFFFFFFFFFFFF FS FFFFFFFFFFFFFFFF GS FFFFFFFFFFFFFFFF BASE ES 00000000 CS 00000000 SS 00000000 DS 00000000 FS 00000000 GS FFFF880150300000 SEL ES 00000000 CS 00000010 SS 00000000 DS 00000000 FS 00000000 GS 00000000 RIP FFFFFFFF81027A56 RFLAGS 00010046 GDTR FFFF880150304000+0000007F IDTR FFFFFFFF8172D000+00000FFF EFER 00000D01 Exit reason: 0x30 (EPT Violation) Exit qualification 00000182 Exit intr info 00000000 Exit intr errcode 00000000 VMCS IDTR ffffffff8172d000+00000fff VMCS RFLAGS 00010046 Guest phys addr fee000b0 Guest linear addr ffffffffff5fb0b0 pe=1 pg=1 sw:en=0x0 es=0x9a00 cs=0x9a00 ss=0x9a00 ds=0x9a00 fs=0x0 gs=0x0 ------------------------------------------------
(gdb) list *0x4010EFF1 0x4010eff1 is in gmm_trans_gp2hp (gmm_trans.c:70). 65 66 while (low <= high) { 67 mid = (low + high) / 2; 68 if (gphys < mem_map[mid].gphys_start) { 69 high = mid - 1; 70 } else if (gphys > mem_map[mid].gphys_end) { 71 low = mid + 1; 72 } else { 73 return mem_map[mid].hphys.start 74 + (gphys - mem_map[mid].gphys_start); (gdb) list *0x4010eb5c 0x4010eb5c is in cmpxchg_gphys_q (gmm_access.c:210). 205 mmio_lock (); 206 if (mmio_access_memory (phys, false, olddata, 8, flags)) 207 panic ("CMPXCHG MMIO!"); 208 mmio_unlock (); 209 phys = current->vm->gmm.gp2hp(phys); 210 if (phys == GMM_NO_MAPPING) { 211 panic ("cmpxchg_gphys_l modifying no-mapping memory."); 212 } 213 return cmpxchg_hphys_q (phys, olddata, data, flags); 214 } (gdb) list *0x401163dd 0x401163dd is in pmap_read (mm.c:528). 523 break; 524 case PMAP_TYPE_GUEST: 525 read_gphys_q (phys, &r, attr); 526 break; 527 case PMAP_TYPE_GUEST_ATOMIC: 528 cmpxchg_gphys_q (phys, &r, r, attr); 529 break; 530 } 531 return r; 532 } (gdb) list *0x4010bd92 0x4010bd92 is in mmu_get_pte (cpu_mmu.c:197). 192 entries[levels] = entry; 193 for (i = levels; i >= 1; i--) { 194 pmap_setlevel (&m, i); 195 retry: 196 entry = pmap_read (&m); 197 if (!(entry & PDE_P_BIT)) 198 goto ret_nopage; 199 if (test_pmap_entry_reserved_bit (entry, i, levels, d)) 200 goto ret_reserved; 201 if (levels == 3 && i == 3) /* simplify2 */ (gdb) list *0x4010c289 0x4010c289 is in get_pte (cpu_mmu.c:272). 267 current->vmctl.read_control_reg (CONTROL_REG_CR3, &cr3); 268 current->vmctl.read_control_reg (CONTROL_REG_CR4, &cr4); 269 current->vmctl.read_msr (MSR_IA32_EFER, &efer); 270 r = mmu_get_pte (virt, cr0, cr3, cr4, efer, wr, us, ex, entries, 271 &levels); 272 if (r == VMMERR_SUCCESS) 273 *pte = entries[0]; 274 return r; 275 } 276 (gdb) list *0x4010c4d6 0x4010c4d6 is in read_linearaddr_b (cpu_mmu.c:374). 369 bool us; 370 vmmerr_t err; 371 372 us = USER_MODE(); 373 err = get_pte(linear, false, us, false /* FIXME */, &pte); 374 if (err) { 375 mmu_generate_pagefault(err, false, us, linear); 376 return err; 377 } 378 read_gphys_b ((pte & PTE_ADDR_MASK64) | (linear & 0xFFF), data, (gdb) list *0x4010cb81 0x4010cb81 is in cpu_seg_read_b (cpu_seg.c:56). 51 /* FIXME: expand-down */ 52 /* FIXME: limit check */ 53 /* FIXME: CPL check */ 54 /* FIXME: access rights check */ 55 linear = base + offset; 56 RIE (read_linearaddr_b (linear, data)); 57 return VMMERR_SUCCESS; 58 } 59 60 vmmerr_t 0x4010954c is in get_modrm (cpu_interpreter.c:1074). 1069 case 2: 1070 READ_NEXT_W (op, &tmp2); 1071 op->disp = tmp2; 1072 break; 1073 case 4: 1074 READ_NEXT_L (op, &op->disp); 1075 break; 1076 default: 1077 op->disp = 0; 1078 }
CPU6 で繰り返し 239(0xef) に対して EOI が発行されていた。
arch/x86/include/asm/irq_vectors.h で 0xef の用途を調べると、ローカルタイマ割り込みだった。
#define LOCAL_TIMER_VECTOR 0xef
4GB以上の物理メモリも VMM の仮想アドレス空間に固定的にマップするようにしたら、4GB 以上のメモリのみ割り当てた vm1 にて Linux の起動に成功した。
マップ/アンマップのオーバヘッドが大きすぎてストールしてしまったようです。
Index: vmm/core/mm.h =================================================================== --- vmm/core/mm.h (リビジョン 110) +++ vmm/core/mm.h (作業コピー) @@ -77,7 +77,7 @@ #ifdef __x86_64__ # define PDPE_ATTR (PDE_P_BIT | PDE_RW_BIT | PDE_US_BIT) -# define NUM_OF_HPHYS_PAGES (1 * 1024 * 1024) +# define NUM_OF_HPHYS_PAGES (2 * 1024 * 1024) # define HPHYS_ADDR (1ULL << (12 + 9 + 9 + 9)) #else # define PDPE_ATTR PDE_P_BIT
r111 で PC に実装されてい る物理メモリをすべて VMM の仮想アドレス空間に固定的にマップするようにしました。 仮に 128GB の物理メモリが実装されても必要なページテーブルは 2MB なのでメモリ使用量としても問題ないと思います。
4GB以上のメモリをvm1に割り当てると、Linux が起動中にストールする。