Files
kernel_arpi/fs/sync.c
Greg Kroah-Hartman 7a32f9d6ff Merge 5.15.36 into android14-5.15
Changes in 5.15.36
	fs: remove __sync_filesystem
	block: remove __sync_blockdev
	block: simplify the block device syncing code
	vfs: make sync_filesystem return errors from ->sync_fs
	xfs: return errors in xfs_fs_sync_fs
	dma-mapping: remove bogus test for pfn_valid from dma_map_resource
	arm64/mm: drop HAVE_ARCH_PFN_VALID
	etherdevice: Adjust ether_addr* prototypes to silence -Wstringop-overead
	mm: page_alloc: fix building error on -Werror=array-compare
	perf tools: Fix segfault accessing sample_id xyarray
	mm, kfence: support kmem_dump_obj() for KFENCE objects
	gfs2: assign rgrp glock before compute_bitstructs
	scsi: ufs: core: scsi_get_lba() error fix
	net/sched: cls_u32: fix netns refcount changes in u32_change()
	ALSA: usb-audio: Clear MIDI port active flag after draining
	ALSA: hda/realtek: Add quirk for Clevo NP70PNP
	ASoC: atmel: Remove system clock tree configuration for at91sam9g20ek
	ASoC: topology: Correct error handling in soc_tplg_dapm_widget_create()
	ASoC: rk817: Use devm_clk_get() in rk817_platform_probe
	ASoC: msm8916-wcd-digital: Check failure for devm_snd_soc_register_component
	ASoC: codecs: wcd934x: do not switch off SIDO Buck when codec is in use
	dmaengine: idxd: fix device cleanup on disable
	dmaengine: imx-sdma: Fix error checking in sdma_event_remap
	dmaengine: mediatek:Fix PM usage reference leak of mtk_uart_apdma_alloc_chan_resources
	dmaengine: dw-edma: Fix unaligned 64bit access
	spi: spi-mtk-nor: initialize spi controller after resume
	esp: limit skb_page_frag_refill use to a single page
	spi: cadence-quadspi: fix incorrect supports_op() return value
	igc: Fix infinite loop in release_swfw_sync
	igc: Fix BUG: scheduling while atomic
	igc: Fix suspending when PTM is active
	ALSA: hda/hdmi: fix warning about PCM count when used with SOF
	rxrpc: Restore removed timer deletion
	net/smc: Fix sock leak when release after smc_shutdown()
	net/packet: fix packet_sock xmit return value checking
	ip6_gre: Avoid updating tunnel->tun_hlen in __gre6_xmit()
	ip6_gre: Fix skb_under_panic in __gre6_xmit()
	net: restore alpha order to Ethernet devices in config
	net/sched: cls_u32: fix possible leak in u32_init_knode()
	l3mdev: l3mdev_master_upper_ifindex_by_index_rcu should be using netdev_master_upper_dev_get_rcu
	ipv6: make ip6_rt_gc_expire an atomic_t
	can: isotp: stop timeout monitoring when no first frame was sent
	net: dsa: hellcreek: Calculate checksums in tagger
	net: mscc: ocelot: fix broken IP multicast flooding
	netlink: reset network and mac headers in netlink_dump()
	drm/i915/display/psr: Unset enable_psr2_sel_fetch if other checks in intel_psr2_config_valid() fails
	net: stmmac: Use readl_poll_timeout_atomic() in atomic state
	dmaengine: idxd: add RO check for wq max_batch_size write
	dmaengine: idxd: add RO check for wq max_transfer_size write
	dmaengine: idxd: skip clearing device context when device is read-only
	selftests: mlxsw: vxlan_flooding: Prevent flooding of unwanted packets
	arm64: mm: fix p?d_leaf()
	ARM: vexpress/spc: Avoid negative array index when !SMP
	reset: renesas: Check return value of reset_control_deassert()
	reset: tegra-bpmp: Restore Handle errors in BPMP response
	platform/x86: samsung-laptop: Fix an unsigned comparison which can never be negative
	ALSA: usb-audio: Fix undefined behavior due to shift overflowing the constant
	drm/msm/disp: check the return value of kzalloc()
	arm64: dts: imx: Fix imx8*-var-som touchscreen property sizes
	vxlan: fix error return code in vxlan_fdb_append
	cifs: Check the IOCB_DIRECT flag, not O_DIRECT
	net: atlantic: Avoid out-of-bounds indexing
	mt76: Fix undefined behavior due to shift overflowing the constant
	brcmfmac: sdio: Fix undefined behavior due to shift overflowing the constant
	dpaa_eth: Fix missing of_node_put in dpaa_get_ts_info()
	drm/msm/mdp5: check the return of kzalloc()
	net: macb: Restart tx only if queue pointer is lagging
	scsi: iscsi: Release endpoint ID when its freed
	scsi: iscsi: Merge suspend fields
	scsi: iscsi: Fix NOP handling during conn recovery
	scsi: qedi: Fix failed disconnect handling
	stat: fix inconsistency between struct stat and struct compat_stat
	VFS: filename_create(): fix incorrect intent.
	nvme: add a quirk to disable namespace identifiers
	nvme-pci: disable namespace identifiers for the MAXIO MAP1002/1202
	nvme-pci: disable namespace identifiers for Qemu controllers
	EDAC/synopsys: Read the error count from the correct register
	mm/memory-failure.c: skip huge_zero_page in memory_failure()
	memcg: sync flush only if periodic flush is delayed
	mm, hugetlb: allow for "high" userspace addresses
	oom_kill.c: futex: delay the OOM reaper to allow time for proper futex cleanup
	mm/mmu_notifier.c: fix race in mmu_interval_notifier_remove()
	ata: pata_marvell: Check the 'bmdma_addr' beforing reading
	dma: at_xdmac: fix a missing check on list iterator
	dmaengine: imx-sdma: fix init of uart scripts
	net: atlantic: invert deep par in pm functions, preventing null derefs
	Input: omap4-keypad - fix pm_runtime_get_sync() error checking
	scsi: sr: Do not leak information in ioctl
	sched/pelt: Fix attach_entity_load_avg() corner case
	perf/core: Fix perf_mmap fail when CONFIG_PERF_USE_VMALLOC enabled
	drm/panel/raspberrypi-touchscreen: Avoid NULL deref if not initialised
	drm/panel/raspberrypi-touchscreen: Initialise the bridge in prepare
	KVM: PPC: Fix TCE handling for VFIO
	drm/vc4: Use pm_runtime_resume_and_get to fix pm_runtime_get_sync() usage
	powerpc/perf: Fix power9 event alternatives
	powerpc/perf: Fix power10 event alternatives
	perf script: Always allow field 'data_src' for auxtrace
	perf report: Set PERF_SAMPLE_DATA_SRC bit for Arm SPE event
	xtensa: patch_text: Fixup last cpu should be master
	xtensa: fix a7 clobbering in coprocessor context load/store
	openvswitch: fix OOB access in reserve_sfa_size()
	gpio: Request interrupts after IRQ is initialized
	ASoC: soc-dapm: fix two incorrect uses of list iterator
	e1000e: Fix possible overflow in LTR decoding
	ARC: entry: fix syscall_trace_exit argument
	arm_pmu: Validate single/group leader events
	KVM: x86/pmu: Update AMD PMC sample period to fix guest NMI-watchdog
	KVM: x86: Pend KVM_REQ_APICV_UPDATE during vCPU creation to fix a race
	KVM: nVMX: Defer APICv updates while L2 is active until L1 is active
	KVM: SVM: Flush when freeing encrypted pages even on SME_COHERENT CPUs
	netfilter: conntrack: convert to refcount_t api
	netfilter: conntrack: avoid useless indirection during conntrack destruction
	ext4: fix fallocate to use file_modified to update permissions consistently
	ext4: fix symlink file size not match to file content
	ext4: fix use-after-free in ext4_search_dir
	ext4: limit length to bitmap_maxbytes - blocksize in punch_hole
	ext4, doc: fix incorrect h_reserved size
	ext4: fix overhead calculation to account for the reserved gdt blocks
	ext4: force overhead calculation if the s_overhead_cluster makes no sense
	netfilter: nft_ct: fix use after free when attaching zone template
	jbd2: fix a potential race while discarding reserved buffers after an abort
	spi: atmel-quadspi: Fix the buswidth adjustment between spi-mem and controller
	block/compat_ioctl: fix range check in BLKGETSIZE
	arm64: dts: qcom: add IPA qcom,qmp property
	Linux 5.15.36

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Iba23a60bda8fd07f26fd7f9217f208c2e6ee26c2
2022-06-06 11:12:02 +02:00

384 lines
10 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* High-level sync()-related operations
*/
#include <linux/blkdev.h>
#include <linux/kernel.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/namei.h>
#include <linux/sched/xacct.h>
#include <linux/writeback.h>
#include <linux/syscalls.h>
#include <linux/linkage.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/backing-dev.h>
#include "internal.h"
#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
SYNC_FILE_RANGE_WAIT_AFTER)
/*
* Write out and wait upon all dirty data associated with this
* superblock. Filesystem data as well as the underlying block
* device. Takes the superblock lock.
*/
int sync_filesystem(struct super_block *sb)
{
int ret = 0;
/*
* We need to be protected against the filesystem going from
* r/o to r/w or vice versa.
*/
WARN_ON(!rwsem_is_locked(&sb->s_umount));
/*
* No point in syncing out anything if the filesystem is read-only.
*/
if (sb_rdonly(sb))
return 0;
/*
* Do the filesystem syncing work. For simple filesystems
* writeback_inodes_sb(sb) just dirties buffers with inodes so we have
* to submit I/O for these buffers via sync_blockdev(). This also
* speeds up the wait == 1 case since in that case write_inode()
* methods call sync_dirty_buffer() and thus effectively write one block
* at a time.
*/
writeback_inodes_sb(sb, WB_REASON_SYNC);
if (sb->s_op->sync_fs) {
ret = sb->s_op->sync_fs(sb, 0);
if (ret)
return ret;
}
ret = sync_blockdev_nowait(sb->s_bdev);
if (ret)
return ret;
sync_inodes_sb(sb);
if (sb->s_op->sync_fs) {
ret = sb->s_op->sync_fs(sb, 1);
if (ret)
return ret;
}
return sync_blockdev(sb->s_bdev);
}
EXPORT_SYMBOL_NS(sync_filesystem, ANDROID_GKI_VFS_EXPORT_ONLY);
static void sync_inodes_one_sb(struct super_block *sb, void *arg)
{
if (!sb_rdonly(sb))
sync_inodes_sb(sb);
}
static void sync_fs_one_sb(struct super_block *sb, void *arg)
{
if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) &&
sb->s_op->sync_fs)
sb->s_op->sync_fs(sb, *(int *)arg);
}
/*
* Sync everything. We start by waking flusher threads so that most of
* writeback runs on all devices in parallel. Then we sync all inodes reliably
* which effectively also waits for all flusher threads to finish doing
* writeback. At this point all data is on disk so metadata should be stable
* and we tell filesystems to sync their metadata via ->sync_fs() calls.
* Finally, we writeout all block devices because some filesystems (e.g. ext2)
* just write metadata (such as inodes or bitmaps) to block device page cache
* and do not sync it on their own in ->sync_fs().
*/
void ksys_sync(void)
{
int nowait = 0, wait = 1;
wakeup_flusher_threads(WB_REASON_SYNC);
iterate_supers(sync_inodes_one_sb, NULL);
iterate_supers(sync_fs_one_sb, &nowait);
iterate_supers(sync_fs_one_sb, &wait);
sync_bdevs(false);
sync_bdevs(true);
if (unlikely(laptop_mode))
laptop_sync_completion();
}
SYSCALL_DEFINE0(sync)
{
ksys_sync();
return 0;
}
static void do_sync_work(struct work_struct *work)
{
int nowait = 0;
/*
* Sync twice to reduce the possibility we skipped some inodes / pages
* because they were temporarily locked
*/
iterate_supers(sync_inodes_one_sb, &nowait);
iterate_supers(sync_fs_one_sb, &nowait);
sync_bdevs(false);
iterate_supers(sync_inodes_one_sb, &nowait);
iterate_supers(sync_fs_one_sb, &nowait);
sync_bdevs(false);
printk("Emergency Sync complete\n");
kfree(work);
}
void emergency_sync(void)
{
struct work_struct *work;
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (work) {
INIT_WORK(work, do_sync_work);
schedule_work(work);
}
}
/*
* sync a single super
*/
SYSCALL_DEFINE1(syncfs, int, fd)
{
struct fd f = fdget(fd);
struct super_block *sb;
int ret, ret2;
if (!f.file)
return -EBADF;
sb = f.file->f_path.dentry->d_sb;
down_read(&sb->s_umount);
ret = sync_filesystem(sb);
up_read(&sb->s_umount);
ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
fdput(f);
return ret ? ret : ret2;
}
/**
* vfs_fsync_range - helper to sync a range of data & metadata to disk
* @file: file to sync
* @start: offset in bytes of the beginning of data range to sync
* @end: offset in bytes of the end of data range (inclusive)
* @datasync: perform only datasync
*
* Write back data in range @start..@end and metadata for @file to disk. If
* @datasync is set only metadata needed to access modified file data is
* written.
*/
int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
if (!file->f_op->fsync)
return -EINVAL;
if (!datasync && (inode->i_state & I_DIRTY_TIME))
mark_inode_dirty_sync(inode);
return file->f_op->fsync(file, start, end, datasync);
}
EXPORT_SYMBOL(vfs_fsync_range);
/**
* vfs_fsync - perform a fsync or fdatasync on a file
* @file: file to sync
* @datasync: only perform a fdatasync operation
*
* Write back data and metadata for @file to disk. If @datasync is
* set only metadata needed to access modified file data is written.
*/
int vfs_fsync(struct file *file, int datasync)
{
return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
}
EXPORT_SYMBOL(vfs_fsync);
static int do_fsync(unsigned int fd, int datasync)
{
struct fd f = fdget(fd);
int ret = -EBADF;
if (f.file) {
ret = vfs_fsync(f.file, datasync);
fdput(f);
inc_syscfs(current);
}
return ret;
}
SYSCALL_DEFINE1(fsync, unsigned int, fd)
{
return do_fsync(fd, 0);
}
SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
{
return do_fsync(fd, 1);
}
int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
unsigned int flags)
{
int ret;
struct address_space *mapping;
loff_t endbyte; /* inclusive */
umode_t i_mode;
ret = -EINVAL;
if (flags & ~VALID_FLAGS)
goto out;
endbyte = offset + nbytes;
if ((s64)offset < 0)
goto out;
if ((s64)endbyte < 0)
goto out;
if (endbyte < offset)
goto out;
if (sizeof(pgoff_t) == 4) {
if (offset >= (0x100000000ULL << PAGE_SHIFT)) {
/*
* The range starts outside a 32 bit machine's
* pagecache addressing capabilities. Let it "succeed"
*/
ret = 0;
goto out;
}
if (endbyte >= (0x100000000ULL << PAGE_SHIFT)) {
/*
* Out to EOF
*/
nbytes = 0;
}
}
if (nbytes == 0)
endbyte = LLONG_MAX;
else
endbyte--; /* inclusive */
i_mode = file_inode(file)->i_mode;
ret = -ESPIPE;
if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
!S_ISLNK(i_mode))
goto out;
mapping = file->f_mapping;
ret = 0;
if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
ret = file_fdatawait_range(file, offset, endbyte);
if (ret < 0)
goto out;
}
if (flags & SYNC_FILE_RANGE_WRITE) {
int sync_mode = WB_SYNC_NONE;
if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) ==
SYNC_FILE_RANGE_WRITE_AND_WAIT)
sync_mode = WB_SYNC_ALL;
ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
sync_mode);
if (ret < 0)
goto out;
}
if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
ret = file_fdatawait_range(file, offset, endbyte);
out:
return ret;
}
/*
* ksys_sync_file_range() permits finely controlled syncing over a segment of
* a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
* zero then ksys_sync_file_range() will operate from offset out to EOF.
*
* The flag bits are:
*
* SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
* before performing the write.
*
* SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
* range which are not presently under writeback. Note that this may block for
* significant periods due to exhaustion of disk request structures.
*
* SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
* after performing the write.
*
* Useful combinations of the flag bits are:
*
* SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
* in the range which were dirty on entry to ksys_sync_file_range() are placed
* under writeout. This is a start-write-for-data-integrity operation.
*
* SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
* are not presently under writeout. This is an asynchronous flush-to-disk
* operation. Not suitable for data integrity operations.
*
* SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
* completion of writeout of all pages in the range. This will be used after an
* earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
* for that operation to complete and to return the result.
*
* SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER
* (a.k.a. SYNC_FILE_RANGE_WRITE_AND_WAIT):
* a traditional sync() operation. This is a write-for-data-integrity operation
* which will ensure that all pages in the range which were dirty on entry to
* ksys_sync_file_range() are written to disk. It should be noted that disk
* caches are not flushed by this call, so there are no guarantees here that the
* data will be available on disk after a crash.
*
*
* SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
* I/O errors or ENOSPC conditions and will return those to the caller, after
* clearing the EIO and ENOSPC flags in the address_space.
*
* It should be noted that none of these operations write out the file's
* metadata. So unless the application is strictly performing overwrites of
* already-instantiated disk blocks, there are no guarantees here that the data
* will be available after a crash.
*/
int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
unsigned int flags)
{
int ret;
struct fd f;
ret = -EBADF;
f = fdget(fd);
if (f.file)
ret = sync_file_range(f.file, offset, nbytes, flags);
fdput(f);
return ret;
}
SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
unsigned int, flags)
{
return ksys_sync_file_range(fd, offset, nbytes, flags);
}
/* It would be nice if people remember that not all the world's an i386
when they introduce new system calls */
SYSCALL_DEFINE4(sync_file_range2, int, fd, unsigned int, flags,
loff_t, offset, loff_t, nbytes)
{
return ksys_sync_file_range(fd, offset, nbytes, flags);
}