ANDROID: fix a race between speculative page walk and unmap operations

Speculative page fault walks the page tables under RCU protection and
assumes that page tables are stable after ptl lock is taken. Current
implementation has three issues:
1. While pmd can't be destroyed while in RCU read section, it can be
cleared and result in an invalid ptl lock address. Fix that by
rechecking pmd value after obtaining ptl lock.
2. In case of CONFIG_ALLOC_SPLIT_PTLOCKS, ptl lock is separate from the
pmd and is destroyed by a synchronous call to pgtable_pmd_page_dtor,
which can happen while page walker is in RCU section. Prevent this by
adding a dependency for CONFIG_SPECULATIVE_PAGE_FAULT to require
!CONIG_ALLOC_SPLIT_PTLOCKS.
3. Below sequence when do_mmap happens after the last mmap_seq check
would result in use-after-free issue.

__pte_map_lock
      rcu_read_lock()
      mmap_seq_read_check()

      ptl = pte_lockptr(vmf->pmd)

      spin_trylock(ptl)
      mmap_seq_read_check()
                             mmap_write_lock()
                             do_mmap()
                               unmap_region()
                                 unmap_vmas()
                                 free_pgtables()
                                   ...
                                   free_pte_range
                                   pmd_clear
                                     pte_free_tlb
                                        ...
                                        call_rcu(tlb_remove_table_rcu)

      rcu_read_unlock()
                             tlb_remove_table_rcu
      spin_unlock(ptl) <-- UAF!

To prevent that free_pte_range needs to be blocked if ptl is locked and
is in use.

Bug: 245389404
Change-Id: I7b353f0995fc59e92bb2069bcdc7d1ac29b521b9
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
(cherry picked from commit 365ffc56b4862e9b49097450a1ad10392723bbe0)
This commit is contained in:
Suren Baghdasaryan
2022-11-01 11:12:05 -07:00
parent 4f5abe7812
commit 19f11a21de
2 changed files with 20 additions and 6 deletions

View File

@@ -951,6 +951,8 @@ config SPECULATIVE_PAGE_FAULT
bool "Speculative page faults" bool "Speculative page faults"
default y default y
depends on ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT && MMU && SMP depends on ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT && MMU && SMP
# split ptl lock can result in PTL destruction under RCU
depends on !ALLOC_SPLIT_PTLOCKS
help help
Try to handle user space page faults without holding the mmap lock. Try to handle user space page faults without holding the mmap lock.

View File

@@ -217,6 +217,16 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long addr) unsigned long addr)
{ {
pgtable_t token = pmd_pgtable(*pmd); pgtable_t token = pmd_pgtable(*pmd);
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
/*
* Ensure page table destruction is blocked if __pte_map_lock managed
* to take this lock. Without this barrier tlb_remove_table_rcu can
* destroy ptl after __pte_map_lock locked it and during unlock would
* cause a use-after-free.
*/
spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
spin_unlock(ptl);
#endif
pmd_clear(pmd); pmd_clear(pmd);
pte_free_tlb(tlb, token, addr); pte_free_tlb(tlb, token, addr);
mm_dec_nr_ptes(tlb->mm); mm_dec_nr_ptes(tlb->mm);
@@ -2741,9 +2751,7 @@ EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
bool __pte_map_lock(struct vm_fault *vmf) bool __pte_map_lock(struct vm_fault *vmf)
{ {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
pmd_t pmdval; pmd_t pmdval;
#endif
pte_t *pte = vmf->pte; pte_t *pte = vmf->pte;
spinlock_t *ptl; spinlock_t *ptl;
@@ -2764,20 +2772,20 @@ bool __pte_map_lock(struct vm_fault *vmf)
* tables are still valid at that point, and * tables are still valid at that point, and
* speculative_page_walk_begin() ensures that they stay around. * speculative_page_walk_begin() ensures that they stay around.
*/ */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* /*
* We check if the pmd value is still the same to ensure that there * We check if the pmd value is still the same to ensure that there
* is not a huge collapse operation in progress in our back. * is not a huge collapse operation in progress in our back.
* It also ensures that pmd was not cleared by pmd_clear in
* free_pte_range and ptl is still valid.
*/ */
pmdval = READ_ONCE(*vmf->pmd); pmdval = READ_ONCE(*vmf->pmd);
if (!pmd_same(pmdval, vmf->orig_pmd)) { if (!pmd_same(pmdval, vmf->orig_pmd)) {
count_vm_spf_event(SPF_ABORT_PTE_MAP_LOCK_PMD); count_vm_spf_event(SPF_ABORT_PTE_MAP_LOCK_PMD);
goto fail; goto fail;
} }
#endif ptl = pte_lockptr(vmf->vma->vm_mm, &pmdval);
ptl = pte_lockptr(vmf->vma->vm_mm, vmf->pmd);
if (!pte) if (!pte)
pte = pte_offset_map(vmf->pmd, vmf->address); pte = pte_offset_map(&pmdval, vmf->address);
/* /*
* Try locking the page table. * Try locking the page table.
* *
@@ -2794,6 +2802,10 @@ bool __pte_map_lock(struct vm_fault *vmf)
count_vm_spf_event(SPF_ABORT_PTE_MAP_LOCK_PTL); count_vm_spf_event(SPF_ABORT_PTE_MAP_LOCK_PTL);
goto fail; goto fail;
} }
/*
* The check below will fail if __pte_map_lock passed its ptl barrier
* before we took the ptl lock.
*/
if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq, if (!mmap_seq_read_check(vmf->vma->vm_mm, vmf->seq,
SPF_ABORT_PTE_MAP_LOCK_SEQ2)) SPF_ABORT_PTE_MAP_LOCK_SEQ2))
goto unlock_fail; goto unlock_fail;