[PATCH 01/40] KVM: MMU: Update shadow ptes on partial guest pte writes



From: Dong, Eddie <eddie.dong@xxxxxxxxx>

A guest partial guest pte write will leave shadow_trap_nonpresent_pte
in spte, which generates a vmexit at the next guest access through that pte.

This patch improves this by reading the full guest pte in advance and thus
being able to update the spte and eliminate the vmexit.

This helps pae guests which use two 32-bit writes to set a single 64-bit pte.

[truncation fix by Eric]

Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@xxxxxxxxx>
Signed-off-by: Feng (Eric) Liu <eric.e.liu@xxxxxxxxx>
Signed-off-by: Avi Kivity <avi@xxxxxxxxxxxx>
---
arch/x86/kvm/mmu.c | 23 ++++++++++++++++-------
arch/x86/kvm/paging_tmpl.h | 7 ++-----
2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e55af12..28f9a44 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1329,8 +1329,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp,
u64 *spte,
- const void *new, int bytes,
- int offset_in_pte)
+ const void *new)
{
if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
++vcpu->kvm->stat.mmu_pde_zapped;
@@ -1339,9 +1338,9 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,

++vcpu->kvm->stat.mmu_pte_updated;
if (sp->role.glevels == PT32_ROOT_LEVEL)
- paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
+ paging32_update_pte(vcpu, sp, spte, new);
else
- paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
+ paging64_update_pte(vcpu, sp, spte, new);
}

static bool need_remote_flush(u64 old, u64 new)
@@ -1423,7 +1422,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
struct hlist_node *node, *n;
struct hlist_head *bucket;
unsigned index;
- u64 entry;
+ u64 entry, gentry;
u64 *spte;
unsigned offset = offset_in_page(gpa);
unsigned pte_size;
@@ -1433,6 +1432,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
int level;
int flooded = 0;
int npte;
+ int r;

pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
@@ -1496,11 +1496,20 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
continue;
}
spte = &sp->spt[page_offset / sizeof(*spte)];
+ if ((gpa & (pte_size - 1)) || (bytes < pte_size)) {
+ gentry = 0;
+ r = kvm_read_guest_atomic(vcpu->kvm,
+ gpa & ~(u64)(pte_size - 1),
+ &gentry, pte_size);
+ new = (const void *)&gentry;
+ if (r < 0)
+ new = NULL;
+ }
while (npte--) {
entry = *spte;
mmu_pte_write_zap_pte(vcpu, sp, spte);
- mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes,
- page_offset & (pte_size - 1));
+ if (new)
+ mmu_pte_write_new_pte(vcpu, sp, spte, new);
mmu_pte_write_flush_tlb(vcpu, entry, *spte);
++spte;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ecc0856..c2fd2b9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -243,8 +243,7 @@ err:
}

static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
- u64 *spte, const void *pte, int bytes,
- int offset_in_pte)
+ u64 *spte, const void *pte)
{
pt_element_t gpte;
unsigned pte_access;
@@ -252,12 +251,10 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,

gpte = *(const pt_element_t *)pte;
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
- if (!offset_in_pte && !is_present_pte(gpte))
+ if (!is_present_pte(gpte))
set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
return;
}
- if (bytes < sizeof(pt_element_t))
- return;
pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
--
1.5.4.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



Relevant Pages

  • [PATCH 4/33] KVM: MMU: Fold fetch_guest() into init_walker()
    ... It is never necessary to fetch a guest entry from an intermediate page table ... * Fetch a guest pte from a specific level in the paging hierarchy. ... Fetch a shadow pte for a specific level in the paging hierarchy. ...
    (Linux-Kernel)
  • Re: [kvm-devel] mmu notifier #v14
    ... You should not assume a struct page exists for any given spte. ... Avi can correct me if I'm wrong, but I don't think the consensus of that discussion was that we're going to avoid putting mmio pages in the rmap. ... Does anything else ever call mmu_notifier_unregister that would implicitly destroy the VM? ... It seems a little strange to me as a KVM guest isn't really tied to the current mm. ...
    (Linux-Kernel)
  • [PATCH] kvm mmu: add support for 1GB pages in shadow paging code
    ... guest unstable when running with this patch and GB pages enabled. ... KVM is loaded using shadow paging ... while (spte) { ... #undef PT_MAX_FULL_LEVELS ...
    (Linux-Kernel)
  • [PATCH 06/47] KVM: MMU audit: audit_mappings tweaks
    ... For the pre pte write case, avoid spurious "gva is valid but spte is notrap" ... messages (the emulation code does the guest write first, ...
    (Linux-Kernel)
  • [PATCH 06/10] KVM: SVM: Add tracepoint for injected #vmexit
    ... This patch adds a tracepoint for a nested #vmexit that gets ... re-injected to the guest. ...
    (Linux-Kernel)