Merge tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm and dax updates from Dan Williams:
"Save for a few late fixes, all of these commits have shipped in -next
releases since before the merge window opened, and 0day has given a
build success notification.
The ext4 touches came from Jan, and the xfs touches have Darrick's
reviewed-by. An xfstest for the MAP_SYNC feature has been through
a few round of reviews and is on track to be merged.
- Introduce MAP_SYNC and MAP_SHARED_VALIDATE, a mechanism to enable
'userspace flush' of persistent memory updates via filesystem-dax
mappings. It arranges for any filesystem metadata updates that may
be required to satisfy a write fault to also be flushed ("on disk")
before the kernel returns to userspace from the fault handler.
Effectively every write-fault that dirties metadata completes an
fsync() before returning from the fault handler. The new
MAP_SHARED_VALIDATE mapping type guarantees that the MAP_SYNC flag
is validated as supported by the filesystem's ->mmap() file
operation.
- Add support for the standard ACPI 6.2 label access methods that
replace the NVDIMM_FAMILY_INTEL (vendor specific) label methods.
This enables interoperability with environments that only implement
the standardized methods.
- Add support for the ACPI 6.2 NVDIMM media error injection methods.
- Add support for the NVDIMM_FAMILY_INTEL v1.6 DIMM commands for
latch last shutdown status, firmware update, SMART error injection,
and SMART alarm threshold control.
- Cleanup physical address information disclosures to be root-only.
- Fix revalidation of the DIMM "locked label area" status to support
dynamic unlock of the label area.
- Expand unit test infrastructure to mock the ACPI 6.2 Translate SPA
(system-physical-address) command and error injection commands.
Acknowledgements that came after the commits were pushed to -next:
- 957ac8c421 ("dax: fix PMD faults on zero-length files"):
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
- a39e596baa ("xfs: support for synchronous DAX faults") and
7b565c9f96 ("xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()")
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>"
* tag 'libnvdimm-for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (49 commits)
acpi, nfit: add 'Enable Latch System Shutdown Status' command support
dax: fix general protection fault in dax_alloc_inode
dax: fix PMD faults on zero-length files
dax: stop requiring a live device for dax_flush()
brd: remove dax support
dax: quiet bdev_dax_supported()
fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core
tools/testing/nvdimm: unit test clear-error commands
acpi, nfit: validate commands against the device type
tools/testing/nvdimm: stricter bounds checking for error injection commands
xfs: support for synchronous DAX faults
xfs: Implement xfs_filemap_pfn_mkwrite() using __xfs_filemap_fault()
ext4: Support for synchronous DAX faults
ext4: Simplify error handling in ext4_dax_huge_fault()
dax: Implement dax_finish_sync_fault()
dax, iomap: Add support for synchronous faults
mm: Define MAP_SYNC and VM_SYNC flags
dax: Allow tuning whether dax_insert_mapping_entry() dirties entry
dax: Allow dax_iomap_fault() to return pfn
dax: Fix comment describing dax_iomap_fault()
...
This commit is contained in:
323
fs/dax.c
323
fs/dax.c
@@ -526,13 +526,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
|
||||
static void *dax_insert_mapping_entry(struct address_space *mapping,
|
||||
struct vm_fault *vmf,
|
||||
void *entry, sector_t sector,
|
||||
unsigned long flags)
|
||||
unsigned long flags, bool dirty)
|
||||
{
|
||||
struct radix_tree_root *page_tree = &mapping->page_tree;
|
||||
void *new_entry;
|
||||
pgoff_t index = vmf->pgoff;
|
||||
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
if (dirty)
|
||||
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
|
||||
|
||||
if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) {
|
||||
@@ -569,7 +569,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
|
||||
entry = new_entry;
|
||||
}
|
||||
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
if (dirty)
|
||||
radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
|
||||
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
@@ -825,38 +825,42 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
|
||||
|
||||
static int dax_insert_mapping(struct address_space *mapping,
|
||||
struct block_device *bdev, struct dax_device *dax_dev,
|
||||
sector_t sector, size_t size, void *entry,
|
||||
struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
|
||||
{
|
||||
unsigned long vaddr = vmf->address;
|
||||
void *ret, *kaddr;
|
||||
pgoff_t pgoff;
|
||||
int id, rc;
|
||||
pfn_t pfn;
|
||||
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
|
||||
}
|
||||
|
||||
rc = bdev_dax_pgoff(bdev, sector, size, &pgoff);
|
||||
static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
|
||||
pfn_t *pfnp)
|
||||
{
|
||||
const sector_t sector = dax_iomap_sector(iomap, pos);
|
||||
pgoff_t pgoff;
|
||||
void *kaddr;
|
||||
int id, rc;
|
||||
long length;
|
||||
|
||||
rc = bdev_dax_pgoff(iomap->bdev, sector, size, &pgoff);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
id = dax_read_lock();
|
||||
rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn);
|
||||
if (rc < 0) {
|
||||
dax_read_unlock(id);
|
||||
return rc;
|
||||
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
|
||||
&kaddr, pfnp);
|
||||
if (length < 0) {
|
||||
rc = length;
|
||||
goto out;
|
||||
}
|
||||
rc = -EINVAL;
|
||||
if (PFN_PHYS(length) < size)
|
||||
goto out;
|
||||
if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
|
||||
goto out;
|
||||
/* For larger pages we need devmap */
|
||||
if (length > 1 && !pfn_t_devmap(*pfnp))
|
||||
goto out;
|
||||
rc = 0;
|
||||
out:
|
||||
dax_read_unlock(id);
|
||||
|
||||
ret = dax_insert_mapping_entry(mapping, vmf, entry, sector, 0);
|
||||
if (IS_ERR(ret))
|
||||
return PTR_ERR(ret);
|
||||
|
||||
trace_dax_insert_mapping(mapping->host, vmf, ret);
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
return vm_insert_mixed_mkwrite(vma, vaddr, pfn);
|
||||
else
|
||||
return vm_insert_mixed(vma, vaddr, pfn);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -882,7 +886,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
|
||||
}
|
||||
|
||||
entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0,
|
||||
RADIX_DAX_ZERO_PAGE);
|
||||
RADIX_DAX_ZERO_PAGE, false);
|
||||
if (IS_ERR(entry2)) {
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
goto out;
|
||||
@@ -941,11 +945,6 @@ int __dax_zero_page_range(struct block_device *bdev,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
|
||||
|
||||
static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
|
||||
{
|
||||
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
|
||||
}
|
||||
|
||||
static loff_t
|
||||
dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
|
||||
struct iomap *iomap)
|
||||
@@ -1085,19 +1084,33 @@ static int dax_fault_return(int error)
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static int dax_iomap_pte_fault(struct vm_fault *vmf,
|
||||
/*
|
||||
* MAP_SYNC on a dax mapping guarantees dirty metadata is
|
||||
* flushed on write-faults (non-cow), but not read-faults.
|
||||
*/
|
||||
static bool dax_fault_is_synchronous(unsigned long flags,
|
||||
struct vm_area_struct *vma, struct iomap *iomap)
|
||||
{
|
||||
return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
|
||||
&& (iomap->flags & IOMAP_F_DIRTY);
|
||||
}
|
||||
|
||||
static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
||||
const struct iomap_ops *ops)
|
||||
{
|
||||
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct address_space *mapping = vma->vm_file->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
unsigned long vaddr = vmf->address;
|
||||
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
|
||||
sector_t sector;
|
||||
struct iomap iomap = { 0 };
|
||||
unsigned flags = IOMAP_FAULT;
|
||||
int error, major = 0;
|
||||
bool write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
bool sync;
|
||||
int vmf_ret = 0;
|
||||
void *entry;
|
||||
pfn_t pfn;
|
||||
|
||||
trace_dax_pte_fault(inode, vmf, vmf_ret);
|
||||
/*
|
||||
@@ -1110,7 +1123,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
|
||||
if (write && !vmf->cow_page)
|
||||
flags |= IOMAP_WRITE;
|
||||
|
||||
entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
|
||||
@@ -1145,9 +1158,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
|
||||
goto error_finish_iomap;
|
||||
}
|
||||
|
||||
sector = dax_iomap_sector(&iomap, pos);
|
||||
|
||||
if (vmf->cow_page) {
|
||||
sector_t sector = dax_iomap_sector(&iomap, pos);
|
||||
|
||||
switch (iomap.type) {
|
||||
case IOMAP_HOLE:
|
||||
case IOMAP_UNWRITTEN:
|
||||
@@ -1173,22 +1186,55 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
|
||||
goto finish_iomap;
|
||||
}
|
||||
|
||||
sync = dax_fault_is_synchronous(flags, vma, &iomap);
|
||||
|
||||
switch (iomap.type) {
|
||||
case IOMAP_MAPPED:
|
||||
if (iomap.flags & IOMAP_F_NEW) {
|
||||
count_vm_event(PGMAJFAULT);
|
||||
count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
|
||||
count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
|
||||
major = VM_FAULT_MAJOR;
|
||||
}
|
||||
error = dax_insert_mapping(mapping, iomap.bdev, iomap.dax_dev,
|
||||
sector, PAGE_SIZE, entry, vmf->vma, vmf);
|
||||
error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn);
|
||||
if (error < 0)
|
||||
goto error_finish_iomap;
|
||||
|
||||
entry = dax_insert_mapping_entry(mapping, vmf, entry,
|
||||
dax_iomap_sector(&iomap, pos),
|
||||
0, write && !sync);
|
||||
if (IS_ERR(entry)) {
|
||||
error = PTR_ERR(entry);
|
||||
goto error_finish_iomap;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are doing synchronous page fault and inode needs fsync,
|
||||
* we can insert PTE into page tables only after that happens.
|
||||
* Skip insertion for now and return the pfn so that caller can
|
||||
* insert it after fsync is done.
|
||||
*/
|
||||
if (sync) {
|
||||
if (WARN_ON_ONCE(!pfnp)) {
|
||||
error = -EIO;
|
||||
goto error_finish_iomap;
|
||||
}
|
||||
*pfnp = pfn;
|
||||
vmf_ret = VM_FAULT_NEEDDSYNC | major;
|
||||
goto finish_iomap;
|
||||
}
|
||||
trace_dax_insert_mapping(inode, vmf, entry);
|
||||
if (write)
|
||||
error = vm_insert_mixed_mkwrite(vma, vaddr, pfn);
|
||||
else
|
||||
error = vm_insert_mixed(vma, vaddr, pfn);
|
||||
|
||||
/* -EBUSY is fine, somebody else faulted on the same PTE */
|
||||
if (error == -EBUSY)
|
||||
error = 0;
|
||||
break;
|
||||
case IOMAP_UNWRITTEN:
|
||||
case IOMAP_HOLE:
|
||||
if (!(vmf->flags & FAULT_FLAG_WRITE)) {
|
||||
if (!write) {
|
||||
vmf_ret = dax_load_hole(mapping, entry, vmf);
|
||||
goto finish_iomap;
|
||||
}
|
||||
@@ -1223,53 +1269,11 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_DAX_PMD
|
||||
static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
|
||||
loff_t pos, void *entry)
|
||||
{
|
||||
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
||||
const sector_t sector = dax_iomap_sector(iomap, pos);
|
||||
struct dax_device *dax_dev = iomap->dax_dev;
|
||||
struct block_device *bdev = iomap->bdev;
|
||||
struct inode *inode = mapping->host;
|
||||
const size_t size = PMD_SIZE;
|
||||
void *ret = NULL, *kaddr;
|
||||
long length = 0;
|
||||
pgoff_t pgoff;
|
||||
pfn_t pfn = {};
|
||||
int id;
|
||||
|
||||
if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0)
|
||||
goto fallback;
|
||||
|
||||
id = dax_read_lock();
|
||||
length = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn);
|
||||
if (length < 0)
|
||||
goto unlock_fallback;
|
||||
length = PFN_PHYS(length);
|
||||
|
||||
if (length < size)
|
||||
goto unlock_fallback;
|
||||
if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR)
|
||||
goto unlock_fallback;
|
||||
if (!pfn_t_devmap(pfn))
|
||||
goto unlock_fallback;
|
||||
dax_read_unlock(id);
|
||||
|
||||
ret = dax_insert_mapping_entry(mapping, vmf, entry, sector,
|
||||
RADIX_DAX_PMD);
|
||||
if (IS_ERR(ret))
|
||||
goto fallback;
|
||||
|
||||
trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret);
|
||||
return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
|
||||
pfn, vmf->flags & FAULT_FLAG_WRITE);
|
||||
|
||||
unlock_fallback:
|
||||
dax_read_unlock(id);
|
||||
fallback:
|
||||
trace_dax_pmd_insert_mapping_fallback(inode, vmf, length, pfn, ret);
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
/*
|
||||
* The 'colour' (ie low bits) within a PMD of a page offset. This comes up
|
||||
* more often than one might expect in the below functions.
|
||||
*/
|
||||
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
|
||||
|
||||
static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
|
||||
void *entry)
|
||||
@@ -1288,7 +1292,7 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
|
||||
goto fallback;
|
||||
|
||||
ret = dax_insert_mapping_entry(mapping, vmf, entry, 0,
|
||||
RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE);
|
||||
RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
|
||||
if (IS_ERR(ret))
|
||||
goto fallback;
|
||||
|
||||
@@ -1310,13 +1314,14 @@ fallback:
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
|
||||
static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
||||
const struct iomap_ops *ops)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
struct address_space *mapping = vma->vm_file->f_mapping;
|
||||
unsigned long pmd_addr = vmf->address & PMD_MASK;
|
||||
bool write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
bool sync;
|
||||
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
|
||||
struct inode *inode = mapping->host;
|
||||
int result = VM_FAULT_FALLBACK;
|
||||
@@ -1325,6 +1330,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
void *entry;
|
||||
loff_t pos;
|
||||
int error;
|
||||
pfn_t pfn;
|
||||
|
||||
/*
|
||||
* Check whether offset isn't beyond end of file now. Caller is
|
||||
@@ -1332,7 +1338,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
* this is a reliable test.
|
||||
*/
|
||||
pgoff = linear_page_index(vma, pmd_addr);
|
||||
max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
|
||||
max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
|
||||
|
||||
trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
|
||||
|
||||
@@ -1356,13 +1362,13 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
|
||||
goto fallback;
|
||||
|
||||
if (pgoff > max_pgoff) {
|
||||
if (pgoff >= max_pgoff) {
|
||||
result = VM_FAULT_SIGBUS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If the PMD would extend beyond the file size */
|
||||
if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
|
||||
if ((pgoff | PG_PMD_COLOUR) >= max_pgoff)
|
||||
goto fallback;
|
||||
|
||||
/*
|
||||
@@ -1400,9 +1406,37 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
if (iomap.offset + iomap.length < pos + PMD_SIZE)
|
||||
goto finish_iomap;
|
||||
|
||||
sync = dax_fault_is_synchronous(iomap_flags, vma, &iomap);
|
||||
|
||||
switch (iomap.type) {
|
||||
case IOMAP_MAPPED:
|
||||
result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry);
|
||||
error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn);
|
||||
if (error < 0)
|
||||
goto finish_iomap;
|
||||
|
||||
entry = dax_insert_mapping_entry(mapping, vmf, entry,
|
||||
dax_iomap_sector(&iomap, pos),
|
||||
RADIX_DAX_PMD, write && !sync);
|
||||
if (IS_ERR(entry))
|
||||
goto finish_iomap;
|
||||
|
||||
/*
|
||||
* If we are doing synchronous page fault and inode needs fsync,
|
||||
* we can insert PMD into page tables only after that happens.
|
||||
* Skip insertion for now and return the pfn so that caller can
|
||||
* insert it after fsync is done.
|
||||
*/
|
||||
if (sync) {
|
||||
if (WARN_ON_ONCE(!pfnp))
|
||||
goto finish_iomap;
|
||||
*pfnp = pfn;
|
||||
result = VM_FAULT_NEEDDSYNC;
|
||||
goto finish_iomap;
|
||||
}
|
||||
|
||||
trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
|
||||
result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
|
||||
write);
|
||||
break;
|
||||
case IOMAP_UNWRITTEN:
|
||||
case IOMAP_HOLE:
|
||||
@@ -1442,7 +1476,7 @@ out:
|
||||
return result;
|
||||
}
|
||||
#else
|
||||
static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
|
||||
const struct iomap_ops *ops)
|
||||
{
|
||||
return VM_FAULT_FALLBACK;
|
||||
@@ -1452,7 +1486,9 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
/**
|
||||
* dax_iomap_fault - handle a page fault on a DAX file
|
||||
* @vmf: The description of the fault
|
||||
* @ops: iomap ops passed from the file system
|
||||
* @pe_size: Size of the page to fault in
|
||||
* @pfnp: PFN to insert for synchronous faults if fsync is required
|
||||
* @ops: Iomap ops passed from the file system
|
||||
*
|
||||
* When a page fault occurs, filesystems may call this helper in
|
||||
* their fault handler for DAX files. dax_iomap_fault() assumes the caller
|
||||
@@ -1460,15 +1496,98 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
|
||||
* successfully.
|
||||
*/
|
||||
int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
|
||||
const struct iomap_ops *ops)
|
||||
pfn_t *pfnp, const struct iomap_ops *ops)
|
||||
{
|
||||
switch (pe_size) {
|
||||
case PE_SIZE_PTE:
|
||||
return dax_iomap_pte_fault(vmf, ops);
|
||||
return dax_iomap_pte_fault(vmf, pfnp, ops);
|
||||
case PE_SIZE_PMD:
|
||||
return dax_iomap_pmd_fault(vmf, ops);
|
||||
return dax_iomap_pmd_fault(vmf, pfnp, ops);
|
||||
default:
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_iomap_fault);
|
||||
|
||||
/**
|
||||
* dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables
|
||||
* @vmf: The description of the fault
|
||||
* @pe_size: Size of entry to be inserted
|
||||
* @pfn: PFN to insert
|
||||
*
|
||||
* This function inserts writeable PTE or PMD entry into page tables for mmaped
|
||||
* DAX file. It takes care of marking corresponding radix tree entry as dirty
|
||||
* as well.
|
||||
*/
|
||||
static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
|
||||
enum page_entry_size pe_size,
|
||||
pfn_t pfn)
|
||||
{
|
||||
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
||||
void *entry, **slot;
|
||||
pgoff_t index = vmf->pgoff;
|
||||
int vmf_ret, error;
|
||||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
entry = get_unlocked_mapping_entry(mapping, index, &slot);
|
||||
/* Did we race with someone splitting entry or so? */
|
||||
if (!entry ||
|
||||
(pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
|
||||
(pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
|
||||
put_unlocked_mapping_entry(mapping, index, entry);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
|
||||
VM_FAULT_NOPAGE);
|
||||
return VM_FAULT_NOPAGE;
|
||||
}
|
||||
radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY);
|
||||
entry = lock_slot(mapping, slot);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
switch (pe_size) {
|
||||
case PE_SIZE_PTE:
|
||||
error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
|
||||
vmf_ret = dax_fault_return(error);
|
||||
break;
|
||||
#ifdef CONFIG_FS_DAX_PMD
|
||||
case PE_SIZE_PMD:
|
||||
vmf_ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
|
||||
pfn, true);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
vmf_ret = VM_FAULT_FALLBACK;
|
||||
}
|
||||
put_locked_mapping_entry(mapping, index);
|
||||
trace_dax_insert_pfn_mkwrite(mapping->host, vmf, vmf_ret);
|
||||
return vmf_ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* dax_finish_sync_fault - finish synchronous page fault
|
||||
* @vmf: The description of the fault
|
||||
* @pe_size: Size of entry to be inserted
|
||||
* @pfn: PFN to insert
|
||||
*
|
||||
* This function ensures that the file range touched by the page fault is
|
||||
* stored persistently on the media and handles inserting of appropriate page
|
||||
* table entry.
|
||||
*/
|
||||
int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
|
||||
pfn_t pfn)
|
||||
{
|
||||
int err;
|
||||
loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;
|
||||
size_t len = 0;
|
||||
|
||||
if (pe_size == PE_SIZE_PTE)
|
||||
len = PAGE_SIZE;
|
||||
else if (pe_size == PE_SIZE_PMD)
|
||||
len = PMD_SIZE;
|
||||
else
|
||||
WARN_ON_ONCE(1);
|
||||
err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1);
|
||||
if (err)
|
||||
return VM_FAULT_SIGBUS;
|
||||
return dax_insert_pfn_mkwrite(vmf, pe_size, pfn);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
|
||||
|
||||
Reference in New Issue
Block a user