Merge remote-tracking branch 'aosp/upstream-f2fs-stable-linux-5.15.y' into android13-5.15

* aosp/upstream-f2fs-stable-linux-5.15.y:
  erofs: fix deadlock when shrink erofs slab
  erofs: remove useless cache strategy of DELAYEDALLOC
  erofs: fix unsafe pagevec reuse of hooked pclusters
  erofs: don't trigger WARN() when decompression fails
  erofs: get rid of ->lru usage
  erofs: lzma compression support
  erofs: rename some generic methods in decompressor
  lib/xz, lib/decompress_unxz.c: Fix spelling in comments
  lib/xz: Add MicroLZMA decoder
  lib/xz: Move s->lzma.len = 0 initialization to lzma_reset()
  lib/xz: Validate the value before assigning it to an enum variable
  lib/xz: Avoid overlapping memcpy() with invalid input with in-place decompression
  erofs: introduce readmore decompression strategy
  erofs: introduce the secondary compression head
  erofs: get compression algorithms directly on mapping
  erofs: add multiple device support
  erofs: decouple basic mount options from fs_context
  erofs: remove the fast path of per-CPU buffer decompression
  fscrypt: improve a few comments
  fscrypt: allow 256-bit master keys with AES-256-XTS
  fscrypt: improve documentation for inline encryption
  fscrypt: clean up comments in bio.c
  fscrypt: remove fscrypt_operations::max_namelen
  f2fs: fix UAF in f2fs_available_free_memory
  f2fs: invalidate META_MAPPING before IPU/DIO write
  f2fs: support fault injection for dquot_initialize()
  f2fs: fix incorrect return value in f2fs_sanity_check_ckpt()
  f2fs: compress: disallow disabling compress on non-empty compressed file
  f2fs: compress: fix overwrite may reduce compress ratio unproperly
  f2fs: multidevice: support direct IO
  f2fs: introduce fragment allocation mode mount option
  f2fs: replace snprintf in show functions with sysfs_emit
  f2fs: include non-compressed blocks in compr_written_block
  f2fs: fix wrong condition to trigger background checkpoint correctly
  f2fs: fix to use WHINT_MODE
  f2fs: fix up f2fs_lookup tracepoints
  f2fs: set SBI_NEED_FSCK flag when inconsistent node block found
  f2fs: introduce excess_dirty_threshold()
  f2fs: avoid attaching SB_ACTIVE flag during mount
  f2fs: quota: fix potential deadlock
  f2fs: should use GFP_NOFS for directory inodes

Bug: 199807319
Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
Change-Id: I87a218cae12a7b2379b057eb52ac53d82f1847ee
This commit is contained in:
Jaegeuk Kim
2021-12-02 16:03:13 -08:00
53 changed files with 1688 additions and 469 deletions

View File

@@ -512,3 +512,19 @@ Date: July 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: You can control the multiplier value of bdi device readahead window size
between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.
What: /sys/fs/f2fs/<disk>/max_fragment_chunk
Date: August 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: With "mode=fragment:block" mount options, we can scatter block allocation.
f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
in the length of 1..<max_fragment_hole> by turns. This value can be set
between 1..512 and the default value is 4.
What: /sys/fs/f2fs/<disk>/max_fragment_hole
Date: August 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: With "mode=fragment:block" mount options, we can scatter block allocation.
f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
in the length of 1..<max_fragment_hole> by turns. This value can be set
between 1..512 and the default value is 4.

View File

@@ -1,5 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
.. _inline_encryption:
=================
Inline Encryption
=================

View File

@@ -19,9 +19,10 @@ It is designed as a better filesystem solution for the following scenarios:
immutable and bit-for-bit identical to the official golden image for
their releases due to security and other considerations and
- hope to save some extra storage space with guaranteed end-to-end performance
by using reduced metadata and transparent file compression, especially
for those embedded devices with limited memory (ex, smartphone);
- hope to minimize extra storage space with guaranteed end-to-end performance
by using compact layout, transparent file compression and direct access,
especially for those embedded devices with limited memory and high-density
hosts with numerous containers;
Here is the main features of EROFS:
@@ -51,7 +52,9 @@ Here is the main features of EROFS:
- Support POSIX.1e ACLs by using xattrs;
- Support transparent data compression as an option:
LZ4 algorithm with the fixed-sized output compression for high performance.
LZ4 algorithm with the fixed-sized output compression for high performance;
- Multiple device support for multi-layer container images.
The following git tree provides the file system user-space tools under
development (ex, formatting tool mkfs.erofs):
@@ -87,6 +90,7 @@ cache_strategy=%s Select a strategy for cached decompression from now on:
dax={always,never} Use direct access (no page cache). See
Documentation/filesystems/dax.rst.
dax A legacy option which is an alias for ``dax=always``.
device=%s Specify a path to an extra device to be used together.
=================== =========================================================
On-disk details

View File

@@ -197,10 +197,29 @@ fault_type=%d Support configuring fault injection type, should be
FAULT_DISCARD 0x000002000
FAULT_WRITE_IO 0x000004000
FAULT_SLAB_ALLOC 0x000008000
FAULT_DQUOT_INIT 0x000010000
=================== ===========
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
"fragment:segment" and "fragment:block" are newly added here.
These are developer options for experiments to simulate filesystem
fragmentation/after-GC situation itself. The developers use these
modes to understand filesystem fragmentation/after-GC condition well,
and eventually get some insights to handle them better.
In "fragment:segment", f2fs allocates a new segment in ramdom
position. With this, we can simulate the after-GC condition.
In "fragment:block", we can scatter block allocation with
"max_fragment_chunk" and "max_fragment_hole" sysfs nodes.
We added some randomness to both chunk and hole size to make
it close to realistic IO pattern. So, in this mode, f2fs will allocate
1..<max_fragment_chunk> blocks in a chunk and make a hole in the
length of 1..<max_fragment_hole> by turns. With this, the newly
allocated blocks will be scattered throughout the whole partition.
Note that "fragment:block" implicitly enables "fragment:segment"
option for more randomness.
Please, use these options for your experiments and we strongly
recommend to re-format the filesystem after using these options.
io_bits=%u Set the bit size of write IO requests. It should be set
with "mode=lfs".
usrquota Enable plain user disk quota accounting.

View File

@@ -77,11 +77,11 @@ Side-channel attacks
fscrypt is only resistant to side-channel attacks, such as timing or
electromagnetic attacks, to the extent that the underlying Linux
Cryptographic API algorithms are. If a vulnerable algorithm is used,
such as a table-based implementation of AES, it may be possible for an
attacker to mount a side channel attack against the online system.
Side channel attacks may also be mounted against applications
consuming decrypted data.
Cryptographic API algorithms or inline encryption hardware are. If a
vulnerable algorithm is used, such as a table-based implementation of
AES, it may be possible for an attacker to mount a side channel attack
against the online system. Side channel attacks may also be mounted
against applications consuming decrypted data.
Unauthorized file access
~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1135,6 +1135,50 @@ where applications may later write sensitive data. It is recommended
that systems implementing a form of "verified boot" take advantage of
this by validating all top-level encryption policies prior to access.
Inline encryption support
=========================
By default, fscrypt uses the kernel crypto API for all cryptographic
operations (other than HKDF, which fscrypt partially implements
itself). The kernel crypto API supports hardware crypto accelerators,
but only ones that work in the traditional way where all inputs and
outputs (e.g. plaintexts and ciphertexts) are in memory. fscrypt can
take advantage of such hardware, but the traditional acceleration
model isn't particularly efficient and fscrypt hasn't been optimized
for it.
Instead, many newer systems (especially mobile SoCs) have *inline
encryption hardware* that can encrypt/decrypt data while it is on its
way to/from the storage device. Linux supports inline encryption
through a set of extensions to the block layer called *blk-crypto*.
blk-crypto allows filesystems to attach encryption contexts to bios
(I/O requests) to specify how the data will be encrypted or decrypted
in-line. For more information about blk-crypto, see
:ref:`Documentation/block/inline-encryption.rst <inline_encryption>`.
On supported filesystems (currently ext4 and f2fs), fscrypt can use
blk-crypto instead of the kernel crypto API to encrypt/decrypt file
contents. To enable this, set CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y in
the kernel configuration, and specify the "inlinecrypt" mount option
when mounting the filesystem.
Note that the "inlinecrypt" mount option just specifies to use inline
encryption when possible; it doesn't force its use. fscrypt will
still fall back to using the kernel crypto API on files where the
inline encryption hardware doesn't have the needed crypto capabilities
(e.g. support for the needed encryption algorithm and data unit size)
and where blk-crypto-fallback is unusable. (For blk-crypto-fallback
to be usable, it must be enabled in the kernel configuration with
CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y.)
Currently fscrypt always uses the filesystem block size (which is
usually 4096 bytes) as the data unit size. Therefore, it can only use
inline encryption hardware that supports that data unit size.
Inline encryption doesn't affect the ciphertext or other aspects of
the on-disk format, so users may freely switch back and forth between
using "inlinecrypt" and not using "inlinecrypt".
Implementation details
======================
@@ -1184,6 +1228,13 @@ keys`_ and `DIRECT_KEY policies`_.
Data path changes
-----------------
When inline encryption is used, filesystems just need to associate
encryption contexts with bios to specify how the block layer or the
inline encryption hardware will encrypt/decrypt the file contents.
When inline encryption isn't used, filesystems must encrypt/decrypt
the file contents themselves, as described below:
For the read path (->readpage()) of regular files, filesystems can
read the ciphertext into the page cache and decrypt it in-place. The
page lock must be held until decryption has finished, to prevent the
@@ -1197,18 +1248,6 @@ buffer. Some filesystems, such as UBIFS, already use temporary
buffers regardless of encryption. Other filesystems, such as ext4 and
F2FS, have to allocate bounce pages specially for encryption.
Fscrypt is also able to use inline encryption hardware instead of the
kernel crypto API for en/decryption of file contents. When possible,
and if directed to do so (by specifying the 'inlinecrypt' mount option
for an ext4/F2FS filesystem), it adds encryption contexts to bios and
uses blk-crypto to perform the en/decryption instead of making use of
the above read/write path changes. Of course, even if directed to
make use of inline encryption, fscrypt will only be able to do so if
either hardware inline encryption support is available for the
selected encryption algorithm or CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK
is selected. If neither is the case, fscrypt will fall back to using
the above mentioned read/write path changes for en/decryption.
Filename hashing and encoding
-----------------------------

View File

@@ -1,23 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/*
* This contains encryption functions for per-file encryption.
* Utility functions for file contents encryption/decryption on
* block device-based filesystems.
*
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2015, Motorola Mobility
*
* Written by Michael Halcrow, 2014.
*
* Filename encryption additions
* Uday Savagaonkar, 2014
* Encryption policy handling additions
* Ildar Muslukhov, 2014
* Add fscrypt_pullback_bio_page()
* Jaegeuk Kim, 2015.
*
* This has not yet undergone a rigorous security audit.
*
* The usage of AES-XTS should conform to recommendations in NIST
* Special Publication 800-38E and IEEE P1619/D16.
*/
#include <linux/pagemap.h>
@@ -26,6 +13,21 @@
#include <linux/namei.h>
#include "fscrypt_private.h"
/**
* fscrypt_decrypt_bio() - decrypt the contents of a bio
* @bio: the bio to decrypt
*
* Decrypt the contents of a "read" bio following successful completion of the
* underlying disk read. The bio must be reading a whole number of blocks of an
* encrypted file directly into the page cache. If the bio is reading the
* ciphertext into bounce pages instead of the page cache (for example, because
* the file is also compressed, so decompression is required after decryption),
* then this function isn't applicable. This function may sleep, so it must be
* called from a workqueue rather than from the bio's bi_end_io callback.
*
* This function sets PG_error on any pages that contain any blocks that failed
* to be decrypted. The filesystem must not mark such pages uptodate.
*/
void fscrypt_decrypt_bio(struct bio *bio)
{
struct bio_vec *bv;

View File

@@ -429,8 +429,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
if (fscrypt_has_encryption_key(dir)) {
if (!fscrypt_fname_encrypted_size(&dir->i_crypt_info->ci_policy,
iname->len,
dir->i_sb->s_cop->max_namelen,
iname->len, NAME_MAX,
&fname->crypto_buf.len))
return -ENAMETOOLONG;
fname->crypto_buf.name = kmalloc(fname->crypto_buf.len,

View File

@@ -20,6 +20,11 @@
#define FSCRYPT_FILE_NONCE_SIZE 16
/*
* Minimum size of an fscrypt master key. Note: a longer key will be required
* if ciphers with a 256-bit security strength are used. This is just the
* absolute minimum, which applies when only 128-bit encryption is used.
*/
#define FSCRYPT_MIN_KEY_SIZE 16
#define FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE 128
@@ -437,7 +442,11 @@ struct fscrypt_master_key_secret {
*/
struct fscrypt_hkdf hkdf;
/* Size of the raw key in bytes. Set even if ->raw isn't set. */
/*
* Size of the raw key in bytes. This remains set even if ->raw was
* zeroized due to no longer being needed. I.e. we still remember the
* size of the key even if we don't need to remember the key itself.
*/
u32 size;
/* True if the key in ->raw is a hardware-wrapped key. */

View File

@@ -122,8 +122,9 @@ err_free_tfm:
/*
* Prepare the crypto transform object or blk-crypto key in @prep_key, given the
* raw key, encryption mode, and flag indicating which encryption implementation
* (fs-layer or blk-crypto) will be used.
* raw key, encryption mode (@ci->ci_mode), flag indicating which encryption
* implementation (fs-layer or blk-crypto) will be used (@ci->ci_inlinecrypt),
* and IV generation method (@ci->ci_policy.flags).
*/
int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key,
const u8 *raw_key, unsigned int raw_key_size,

View File

@@ -6,16 +6,22 @@ config EROFS_FS
select FS_IOMAP
select LIBCRC32C
help
EROFS (Enhanced Read-Only File System) is a lightweight
read-only file system with modern designs (eg. page-sized
blocks, inline xattrs/data, etc.) for scenarios which need
high-performance read-only requirements, e.g. Android OS
for mobile phones and LIVECDs.
EROFS (Enhanced Read-Only File System) is a lightweight read-only
file system with modern designs (e.g. no buffer heads, inline
xattrs/data, chunk-based deduplication, multiple devices, etc.) for
scenarios which need high-performance read-only solutions, e.g.
smartphones with Android OS, LiveCDs and high-density hosts with
numerous containers;
It also provides fixed-sized output compression support,
which improves storage density, keeps relatively higher
compression ratios, which is more useful to achieve high
performance for embedded devices with limited memory.
It also provides fixed-sized output compression support in order to
improve storage density as well as keep relatively higher compression
ratios and implements in-place decompression to reuse the file page
for compressed data temporarily with proper strategies, which is
quite useful to ensure guaranteed end-to-end runtime decompression
performance under extremely memory pressure without extra cost.
See the documentation at <file:Documentation/filesystems/erofs.rst>
for more details.
If unsure, say N.
@@ -76,3 +82,19 @@ config EROFS_FS_ZIP
Enable fixed-sized output compression for EROFS.
If you don't want to enable compression feature, say N.
config EROFS_FS_ZIP_LZMA
bool "EROFS LZMA compressed data support"
depends on EROFS_FS_ZIP
select XZ_DEC
select XZ_DEC_MICROLZMA
help
Saying Y here includes support for reading EROFS file systems
containing LZMA compressed data, specifically called microLZMA. it
gives better compression ratios than the LZ4 algorithm, at the
expense of more CPU overhead.
LZMA support is an experimental feature for now and so most file
systems will be readable without selecting this option.
If unsure, say N.

View File

@@ -4,3 +4,4 @@ obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o

View File

@@ -8,11 +8,6 @@
#include "internal.h"
enum {
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
Z_EROFS_COMPRESSION_RUNTIME_MAX
};
struct z_erofs_decompress_req {
struct super_block *sb;
struct page **in, **out;
@@ -25,6 +20,12 @@ struct z_erofs_decompress_req {
bool inplace_io, partial_decoding;
};
struct z_erofs_decompressor {
int (*decompress)(struct z_erofs_decompress_req *rq,
struct page **pagepool);
char *name;
};
/* some special page->private (unsigned long, see below) */
#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2)
@@ -63,7 +64,7 @@ static inline bool z_erofs_is_shortlived_page(struct page *page)
return true;
}
static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
static inline bool z_erofs_put_shortlivedpage(struct page **pagepool,
struct page *page)
{
if (!z_erofs_is_shortlived_page(page))
@@ -74,13 +75,22 @@ static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
put_page(page);
} else {
/* follow the pcluster rule above. */
set_page_private(page, 0);
list_add(&page->lru, pagepool);
erofs_pagepool_add(pagepool, page);
}
return true;
}
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct list_head *pagepool);
#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
struct page *page)
{
return page->mapping == MNGD_MAPPING(sbi);
}
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool);
/* prototypes for specific algorithms */
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool);
#endif

View File

@@ -89,6 +89,7 @@ static int erofs_map_blocks(struct inode *inode,
erofs_off_t pos;
int err = 0;
map->m_deviceid = 0;
if (map->m_la >= inode->i_size) {
/* leave out-of-bound access unmapped */
map->m_flags = 0;
@@ -135,14 +136,8 @@ static int erofs_map_blocks(struct inode *inode,
map->m_flags = 0;
break;
default:
/* only one device is supported for now */
if (idx->device_id) {
erofs_err(sb, "invalid device id %u @ %llu for nid %llu",
le16_to_cpu(idx->device_id),
chunknr, vi->nid);
err = -EFSCORRUPTED;
goto out_unlock;
}
map->m_deviceid = le16_to_cpu(idx->device_id) &
EROFS_SB(sb)->device_id_mask;
map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
map->m_flags = EROFS_MAP_MAPPED;
break;
@@ -155,11 +150,55 @@ out:
return err;
}
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
{
struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
struct erofs_device_info *dif;
int id;
/* primary device by default */
map->m_bdev = sb->s_bdev;
map->m_daxdev = EROFS_SB(sb)->dax_dev;
if (map->m_deviceid) {
down_read(&devs->rwsem);
dif = idr_find(&devs->tree, map->m_deviceid - 1);
if (!dif) {
up_read(&devs->rwsem);
return -ENODEV;
}
map->m_bdev = dif->bdev;
map->m_daxdev = dif->dax_dev;
up_read(&devs->rwsem);
} else if (devs->extra_devices) {
down_read(&devs->rwsem);
idr_for_each_entry(&devs->tree, dif, id) {
erofs_off_t startoff, length;
if (!dif->mapped_blkaddr)
continue;
startoff = blknr_to_addr(dif->mapped_blkaddr);
length = blknr_to_addr(dif->blocks);
if (map->m_pa >= startoff &&
map->m_pa < startoff + length) {
map->m_pa -= startoff;
map->m_bdev = dif->bdev;
map->m_daxdev = dif->dax_dev;
break;
}
}
up_read(&devs->rwsem);
}
return 0;
}
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
{
int ret;
struct erofs_map_blocks map;
struct erofs_map_dev mdev;
map.m_la = offset;
map.m_llen = length;
@@ -168,8 +207,16 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (ret < 0)
return ret;
iomap->bdev = inode->i_sb->s_bdev;
iomap->dax_dev = EROFS_I_SB(inode)->dax_dev;
mdev = (struct erofs_map_dev) {
.m_deviceid = map.m_deviceid,
.m_pa = map.m_pa,
};
ret = erofs_map_dev(inode->i_sb, &mdev);
if (ret)
return ret;
iomap->bdev = mdev.m_bdev;
iomap->dax_dev = mdev.m_daxdev;
iomap->offset = map.m_la;
iomap->length = map.m_llen;
iomap->flags = 0;
@@ -188,15 +235,15 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->type = IOMAP_INLINE;
ipage = erofs_get_meta_page(inode->i_sb,
erofs_blknr(map.m_pa));
erofs_blknr(mdev.m_pa));
if (IS_ERR(ipage))
return PTR_ERR(ipage);
iomap->inline_data = page_address(ipage) +
erofs_blkoff(map.m_pa);
erofs_blkoff(mdev.m_pa);
iomap->private = ipage;
} else {
iomap->type = IOMAP_MAPPED;
iomap->addr = map.m_pa;
iomap->addr = mdev.m_pa;
}
return 0;
}

View File

@@ -16,17 +16,6 @@
#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
#endif
struct z_erofs_decompressor {
/*
* if destpages have sparsed pages, fill them with bounce pages.
* it also check whether destpages indicate continuous physical memory.
*/
int (*prepare_destpages)(struct z_erofs_decompress_req *rq,
struct list_head *pagepool);
int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out);
char *name;
};
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int size)
@@ -63,8 +52,12 @@ int z_erofs_load_lz4_config(struct super_block *sb,
return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks);
}
static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
/*
* Fill all gaps with bounce pages if it's a sparse page list. Also check if
* all physical pages are consecutive, which can be seen for moderate CR.
*/
static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
const unsigned int nr =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
@@ -119,7 +112,7 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
return kaddr ? 1 : 0;
}
static void *z_erofs_handle_inplace_io(struct z_erofs_decompress_req *rq,
static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq,
void *inpage, unsigned int *inputmargin, int *maptype,
bool support_0padding)
{
@@ -189,7 +182,8 @@ docopy:
return src;
}
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq,
u8 *out)
{
unsigned int inputmargin;
u8 *headpage, *src;
@@ -216,8 +210,8 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
}
rq->inputsize -= inputmargin;
src = z_erofs_handle_inplace_io(rq, headpage, &inputmargin, &maptype,
support_0padding);
src = z_erofs_lz4_handle_inplace_io(rq, headpage, &inputmargin,
&maptype, support_0padding);
if (IS_ERR(src))
return PTR_ERR(src);
@@ -241,6 +235,8 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
if (ret >= 0)
memset(out + ret, 0, rq->outputsize - ret);
ret = -EIO;
} else {
ret = 0;
}
if (maptype == 0) {
@@ -256,86 +252,25 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
return ret;
}
static struct z_erofs_decompressor decompressors[] = {
[Z_EROFS_COMPRESSION_SHIFTED] = {
.name = "shifted"
},
[Z_EROFS_COMPRESSION_LZ4] = {
.prepare_destpages = z_erofs_lz4_prepare_destpages,
.decompress = z_erofs_lz4_decompress,
.name = "lz4"
},
};
static void copy_from_pcpubuf(struct page **out, const char *dst,
unsigned short pageofs_out,
unsigned int outputsize)
{
const char *end = dst + outputsize;
const unsigned int righthalf = PAGE_SIZE - pageofs_out;
const char *cur = dst - pageofs_out;
while (cur < end) {
struct page *const page = *out++;
if (page) {
char *buf = kmap_atomic(page);
if (cur >= dst) {
memcpy(buf, cur, min_t(uint, PAGE_SIZE,
end - cur));
} else {
memcpy(buf + pageofs_out, cur + pageofs_out,
min_t(uint, righthalf, end - cur));
}
kunmap_atomic(buf);
}
cur += PAGE_SIZE;
}
}
static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const struct z_erofs_decompressor *alg = decompressors + rq->alg;
unsigned int dst_maptype;
void *dst;
int ret;
/* two optimized fast paths only for non bigpcluster cases yet */
if (rq->inputsize <= PAGE_SIZE) {
if (nrpages_out == 1 && !rq->inplace_io) {
/* one optimized fast path only for non bigpcluster cases yet */
if (rq->inputsize <= PAGE_SIZE && nrpages_out == 1 && !rq->inplace_io) {
DBG_BUGON(!*rq->out);
dst = kmap_atomic(*rq->out);
dst_maptype = 0;
goto dstmap_out;
}
/*
* For the case of small output size (especially much less
* than PAGE_SIZE), memcpy the decompressed data rather than
* compressed data is preferred.
*/
if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
dst = erofs_get_pcpubuf(1);
if (IS_ERR(dst))
return PTR_ERR(dst);
rq->inplace_io = false;
ret = alg->decompress(rq, dst);
if (!ret)
copy_from_pcpubuf(rq->out, dst, rq->pageofs_out,
rq->outputsize);
erofs_put_pcpubuf(dst);
return ret;
}
}
/* general decoding path which can be used for all cases */
ret = alg->prepare_destpages(rq, pagepool);
ret = z_erofs_lz4_prepare_dstpages(rq, pagepool);
if (ret < 0)
return ret;
if (ret) {
@@ -350,7 +285,7 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
dst_maptype = 2;
dstmap_out:
ret = alg->decompress(rq, dst + rq->pageofs_out);
ret = z_erofs_lz4_decompress_mem(rq, dst + rq->pageofs_out);
if (!dst_maptype)
kunmap_atomic(dst);
@@ -359,8 +294,8 @@ dstmap_out:
return ret;
}
static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
@@ -398,10 +333,25 @@ static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq,
return 0;
}
static struct z_erofs_decompressor decompressors[] = {
[Z_EROFS_COMPRESSION_SHIFTED] = {
.decompress = z_erofs_shifted_transform,
.name = "shifted"
},
[Z_EROFS_COMPRESSION_LZ4] = {
.decompress = z_erofs_lz4_decompress,
.name = "lz4"
},
#ifdef CONFIG_EROFS_FS_ZIP_LZMA
[Z_EROFS_COMPRESSION_LZMA] = {
.decompress = z_erofs_lzma_decompress,
.name = "lzma"
},
#endif
};
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
struct page **pagepool)
{
if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED)
return z_erofs_shifted_transform(rq, pagepool);
return z_erofs_decompress_generic(rq, pagepool);
return decompressors[rq->alg].decompress(rq, pagepool);
}

View File

@@ -0,0 +1,290 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <linux/xz.h>
#include <linux/module.h>
#include "compress.h"
struct z_erofs_lzma {
struct z_erofs_lzma *next;
struct xz_dec_microlzma *state;
struct xz_buf buf;
u8 bounce[PAGE_SIZE];
};
/* considering the LZMA performance, no need to use a lockless list for now */
static DEFINE_SPINLOCK(z_erofs_lzma_lock);
static unsigned int z_erofs_lzma_max_dictsize;
static unsigned int z_erofs_lzma_nstrms, z_erofs_lzma_avail_strms;
static struct z_erofs_lzma *z_erofs_lzma_head;
static DECLARE_WAIT_QUEUE_HEAD(z_erofs_lzma_wq);
module_param_named(lzma_streams, z_erofs_lzma_nstrms, uint, 0444);
void z_erofs_lzma_exit(void)
{
/* there should be no running fs instance */
while (z_erofs_lzma_avail_strms) {
struct z_erofs_lzma *strm;
spin_lock(&z_erofs_lzma_lock);
strm = z_erofs_lzma_head;
if (!strm) {
spin_unlock(&z_erofs_lzma_lock);
DBG_BUGON(1);
return;
}
z_erofs_lzma_head = NULL;
spin_unlock(&z_erofs_lzma_lock);
while (strm) {
struct z_erofs_lzma *n = strm->next;
if (strm->state)
xz_dec_microlzma_end(strm->state);
kfree(strm);
--z_erofs_lzma_avail_strms;
strm = n;
}
}
}
int z_erofs_lzma_init(void)
{
unsigned int i;
/* by default, use # of possible CPUs instead */
if (!z_erofs_lzma_nstrms)
z_erofs_lzma_nstrms = num_possible_cpus();
for (i = 0; i < z_erofs_lzma_nstrms; ++i) {
struct z_erofs_lzma *strm = kzalloc(sizeof(*strm), GFP_KERNEL);
if (!strm) {
z_erofs_lzma_exit();
return -ENOMEM;
}
spin_lock(&z_erofs_lzma_lock);
strm->next = z_erofs_lzma_head;
z_erofs_lzma_head = strm;
spin_unlock(&z_erofs_lzma_lock);
++z_erofs_lzma_avail_strms;
}
return 0;
}
int z_erofs_load_lzma_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lzma_cfgs *lzma, int size)
{
static DEFINE_MUTEX(lzma_resize_mutex);
unsigned int dict_size, i;
struct z_erofs_lzma *strm, *head = NULL;
int err;
if (!lzma || size < sizeof(struct z_erofs_lzma_cfgs)) {
erofs_err(sb, "invalid lzma cfgs, size=%u", size);
return -EINVAL;
}
if (lzma->format) {
erofs_err(sb, "unidentified lzma format %x, please check kernel version",
le16_to_cpu(lzma->format));
return -EINVAL;
}
dict_size = le32_to_cpu(lzma->dict_size);
if (dict_size > Z_EROFS_LZMA_MAX_DICT_SIZE || dict_size < 4096) {
erofs_err(sb, "unsupported lzma dictionary size %u",
dict_size);
return -EINVAL;
}
erofs_info(sb, "EXPERIMENTAL MicroLZMA in use. Use at your own risk!");
/* in case 2 z_erofs_load_lzma_config() race to avoid deadlock */
mutex_lock(&lzma_resize_mutex);
if (z_erofs_lzma_max_dictsize >= dict_size) {
mutex_unlock(&lzma_resize_mutex);
return 0;
}
/* 1. collect/isolate all streams for the following check */
for (i = 0; i < z_erofs_lzma_avail_strms; ++i) {
struct z_erofs_lzma *last;
again:
spin_lock(&z_erofs_lzma_lock);
strm = z_erofs_lzma_head;
if (!strm) {
spin_unlock(&z_erofs_lzma_lock);
wait_event(z_erofs_lzma_wq,
READ_ONCE(z_erofs_lzma_head));
goto again;
}
z_erofs_lzma_head = NULL;
spin_unlock(&z_erofs_lzma_lock);
for (last = strm; last->next; last = last->next)
++i;
last->next = head;
head = strm;
}
err = 0;
/* 2. walk each isolated stream and grow max dict_size if needed */
for (strm = head; strm; strm = strm->next) {
if (strm->state)
xz_dec_microlzma_end(strm->state);
strm->state = xz_dec_microlzma_alloc(XZ_PREALLOC, dict_size);
if (!strm->state)
err = -ENOMEM;
}
/* 3. push back all to the global list and update max dict_size */
spin_lock(&z_erofs_lzma_lock);
DBG_BUGON(z_erofs_lzma_head);
z_erofs_lzma_head = head;
spin_unlock(&z_erofs_lzma_lock);
z_erofs_lzma_max_dictsize = dict_size;
mutex_unlock(&lzma_resize_mutex);
return err;
}
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const unsigned int nrpages_in =
PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
unsigned int inputmargin, inlen, outlen, pageofs;
struct z_erofs_lzma *strm;
u8 *kin;
bool bounced = false;
int no, ni, j, err = 0;
/* 1. get the exact LZMA compressed size */
kin = kmap(*rq->in);
inputmargin = 0;
while (!kin[inputmargin & ~PAGE_MASK])
if (!(++inputmargin & ~PAGE_MASK))
break;
if (inputmargin >= PAGE_SIZE) {
kunmap(*rq->in);
return -EFSCORRUPTED;
}
rq->inputsize -= inputmargin;
/* 2. get an available lzma context */
again:
spin_lock(&z_erofs_lzma_lock);
strm = z_erofs_lzma_head;
if (!strm) {
spin_unlock(&z_erofs_lzma_lock);
wait_event(z_erofs_lzma_wq, READ_ONCE(z_erofs_lzma_head));
goto again;
}
z_erofs_lzma_head = strm->next;
spin_unlock(&z_erofs_lzma_lock);
/* 3. multi-call decompress */
inlen = rq->inputsize;
outlen = rq->outputsize;
xz_dec_microlzma_reset(strm->state, inlen, outlen,
!rq->partial_decoding);
pageofs = rq->pageofs_out;
strm->buf.in = kin + inputmargin;
strm->buf.in_pos = 0;
strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin);
inlen -= strm->buf.in_size;
strm->buf.out = NULL;
strm->buf.out_pos = 0;
strm->buf.out_size = 0;
for (ni = 0, no = -1;;) {
enum xz_ret xz_err;
if (strm->buf.out_pos == strm->buf.out_size) {
if (strm->buf.out) {
kunmap(rq->out[no]);
strm->buf.out = NULL;
}
if (++no >= nrpages_out || !outlen) {
erofs_err(rq->sb, "decompressed buf out of bound");
err = -EFSCORRUPTED;
break;
}
strm->buf.out_pos = 0;
strm->buf.out_size = min_t(u32, outlen,
PAGE_SIZE - pageofs);
outlen -= strm->buf.out_size;
if (rq->out[no])
strm->buf.out = kmap(rq->out[no]) + pageofs;
pageofs = 0;
} else if (strm->buf.in_pos == strm->buf.in_size) {
kunmap(rq->in[ni]);
if (++ni >= nrpages_in || !inlen) {
erofs_err(rq->sb, "compressed buf out of bound");
err = -EFSCORRUPTED;
break;
}
strm->buf.in_pos = 0;
strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE);
inlen -= strm->buf.in_size;
kin = kmap(rq->in[ni]);
strm->buf.in = kin;
bounced = false;
}
/*
* Handle overlapping: Use bounced buffer if the compressed
* data is under processing; Otherwise, Use short-lived pages
* from the on-stack pagepool where pages share with the same
* request.
*/
if (!bounced && rq->out[no] == rq->in[ni]) {
memcpy(strm->bounce, strm->buf.in, strm->buf.in_size);
strm->buf.in = strm->bounce;
bounced = true;
}
for (j = ni + 1; j < nrpages_in; ++j) {
struct page *tmppage;
if (rq->out[no] != rq->in[j])
continue;
DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
rq->in[j]));
tmppage = erofs_allocpage(pagepool,
GFP_KERNEL | __GFP_NOFAIL);
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
copy_highpage(tmppage, rq->in[j]);
rq->in[j] = tmppage;
}
xz_err = xz_dec_microlzma_run(strm->state, &strm->buf);
DBG_BUGON(strm->buf.out_pos > strm->buf.out_size);
DBG_BUGON(strm->buf.in_pos > strm->buf.in_size);
if (xz_err != XZ_OK) {
if (xz_err == XZ_STREAM_END && !outlen)
break;
erofs_err(rq->sb, "failed to decompress %d in[%u] out[%u]",
xz_err, rq->inputsize, rq->outputsize);
err = -EFSCORRUPTED;
break;
}
}
if (no < nrpages_out && strm->buf.out)
kunmap(rq->in[no]);
if (ni < nrpages_in)
kunmap(rq->in[ni]);
/* 4. push back LZMA stream context to the global list */
spin_lock(&z_erofs_lzma_lock);
strm->next = z_erofs_lzma_head;
z_erofs_lzma_head = strm;
spin_unlock(&z_erofs_lzma_lock);
wake_up(&z_erofs_lzma_wq);
return err;
}

View File

@@ -21,14 +21,29 @@
#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004
#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008
#define EROFS_ALL_FEATURE_INCOMPAT \
(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE)
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
EROFS_FEATURE_INCOMPAT_COMPR_HEAD2)
#define EROFS_SB_EXTSLOT_SIZE 16
struct erofs_deviceslot {
union {
u8 uuid[16]; /* used for device manager later */
u8 userdata[64]; /* digest(sha256), etc. */
} u;
__le32 blocks; /* total fs blocks of this device */
__le32 mapped_blkaddr; /* map starting at mapped_blkaddr */
u8 reserved[56];
};
#define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot)
/* erofs on-disk super block (currently 128 bytes) */
struct erofs_super_block {
__le32 magic; /* file system magic number */
@@ -54,7 +69,9 @@ struct erofs_super_block {
/* customized sliding window size instead of 64k by default */
__le16 lz4_max_distance;
} __packed u1;
__u8 reserved2[42];
__le16 extra_devices; /* # of devices besides the primary device */
__le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */
__u8 reserved2[38];
};
/*
@@ -238,7 +255,7 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
/* 8-byte inode chunk indexes */
struct erofs_inode_chunk_index {
__le16 advise; /* always 0, don't care for now */
__le16 device_id; /* back-end storage id, always 0 for now */
__le16 device_id; /* back-end storage id (with bits masked) */
__le32 blkaddr; /* start block address of this inode chunk */
};
@@ -248,9 +265,10 @@ struct erofs_inode_chunk_index {
/* available compression algorithm types (for h_algorithmtype) */
enum {
Z_EROFS_COMPRESSION_LZ4 = 0,
Z_EROFS_COMPRESSION_LZMA = 1,
Z_EROFS_COMPRESSION_MAX
};
#define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1))
#define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1)
/* 14 bytes (+ length field = 16 bytes) */
struct z_erofs_lz4_cfgs {
@@ -259,6 +277,15 @@ struct z_erofs_lz4_cfgs {
u8 reserved[10];
} __packed;
/* 14 bytes (+ length field = 16 bytes) */
struct z_erofs_lzma_cfgs {
__le32 dict_size;
__le16 format;
u8 reserved[8];
} __packed;
#define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE)
/*
* bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
* e.g. for 4k logical cluster size, 4B if compacted 2B is off;
@@ -288,35 +315,34 @@ struct z_erofs_map_header {
#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8
/*
* Fixed-sized output compression ondisk Logical Extent cluster type:
* 0 - literal (uncompressed) cluster
* 1 - compressed cluster (for the head logical cluster)
* 2 - compressed cluster (for the other logical clusters)
* Fixed-sized output compression on-disk logical cluster type:
* 0 - literal (uncompressed) lcluster
* 1,3 - compressed lcluster (for HEAD lclusters)
* 2 - compressed lcluster (for NONHEAD lclusters)
*
* In detail,
* 0 - literal (uncompressed) cluster,
* 0 - literal (uncompressed) lcluster,
* di_advise = 0
* di_clusterofs = the literal data offset of the cluster
* di_blkaddr = the blkaddr of the literal cluster
* di_clusterofs = the literal data offset of the lcluster
* di_blkaddr = the blkaddr of the literal pcluster
*
* 1 - compressed cluster (for the head logical cluster)
* di_advise = 1
* di_clusterofs = the decompressed data offset of the cluster
* di_blkaddr = the blkaddr of the compressed cluster
* 1,3 - compressed lcluster (for HEAD lclusters)
* di_advise = 1 or 3
* di_clusterofs = the decompressed data offset of the lcluster
* di_blkaddr = the blkaddr of the compressed pcluster
*
* 2 - compressed cluster (for the other logical clusters)
* 2 - compressed lcluster (for NONHEAD lclusters)
* di_advise = 2
* di_clusterofs =
* the decompressed data offset in its own head cluster
* di_u.delta[0] = distance to its corresponding head cluster
* di_u.delta[1] = distance to its corresponding tail cluster
* (di_advise could be 0, 1 or 2)
* the decompressed data offset in its own HEAD lcluster
* di_u.delta[0] = distance to this HEAD lcluster
* di_u.delta[1] = distance to the next HEAD lcluster
*/
enum {
Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0,
Z_EROFS_VLE_CLUSTER_TYPE_HEAD = 1,
Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 = 1,
Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2,
Z_EROFS_VLE_CLUSTER_TYPE_RESERVED = 3,
Z_EROFS_VLE_CLUSTER_TYPE_HEAD2 = 3,
Z_EROFS_VLE_CLUSTER_TYPE_MAX
};
@@ -384,6 +410,7 @@ static inline void erofs_check_ondisk_layout_definitions(void)
/* keep in sync between 2 index structures for better extendibility */
BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) !=
sizeof(struct z_erofs_vle_decompressed_index));
BUILD_BUG_ON(sizeof(struct erofs_deviceslot) != 128);
BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);

View File

@@ -192,7 +192,7 @@ static struct page *erofs_read_inode(struct inode *inode,
inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
inode->i_flags &= ~S_DAX;
if (test_opt(&sbi->ctx, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
vi->datalayout == EROFS_INODE_FLAT_PLAIN)
inode->i_flags |= S_DAX;
if (!nblks)

View File

@@ -47,7 +47,16 @@ typedef u64 erofs_off_t;
/* data type for filesystem-wide blocks number */
typedef u32 erofs_blk_t;
struct erofs_fs_context {
struct erofs_device_info {
char *path;
struct block_device *bdev;
struct dax_device *dax_dev;
u32 blocks;
u32 mapped_blkaddr;
};
struct erofs_mount_opts {
#ifdef CONFIG_EROFS_FS_ZIP
/* current strategy of how to use managed cache */
unsigned char cache_strategy;
@@ -60,6 +69,18 @@ struct erofs_fs_context {
unsigned int mount_opt;
};
struct erofs_dev_context {
struct idr tree;
struct rw_semaphore rwsem;
unsigned int extra_devices;
};
struct erofs_fs_context {
struct erofs_mount_opts opt;
struct erofs_dev_context *devs;
};
/* all filesystem-wide lz4 configurations */
struct erofs_sb_lz4_info {
/* # of pages needed for EROFS lz4 rolling decompression */
@@ -69,6 +90,7 @@ struct erofs_sb_lz4_info {
};
struct erofs_sb_info {
struct erofs_mount_opts opt; /* options */
#ifdef CONFIG_EROFS_FS_ZIP
/* list for all registered superblocks, mainly for shrinker */
struct list_head list;
@@ -85,12 +107,16 @@ struct erofs_sb_info {
struct erofs_sb_lz4_info lz4;
#endif /* CONFIG_EROFS_FS_ZIP */
struct erofs_dev_context *devs;
struct dax_device *dax_dev;
u32 blocks;
u64 total_blocks;
u32 primarydevice_blocks;
u32 meta_blkaddr;
#ifdef CONFIG_EROFS_FS_XATTR
u32 xattr_blkaddr;
#endif
u16 device_id_mask; /* valid bits of device id to be used */
/* inode slot unit size in bit shift */
unsigned char islotbits;
@@ -108,8 +134,6 @@ struct erofs_sb_info {
u8 volume_name[16]; /* volume name */
u32 feature_compat;
u32 feature_incompat;
struct erofs_fs_context ctx; /* options */
};
#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
@@ -121,9 +145,9 @@ struct erofs_sb_info {
#define EROFS_MOUNT_DAX_ALWAYS 0x00000040
#define EROFS_MOUNT_DAX_NEVER 0x00000080
#define clear_opt(ctx, option) ((ctx)->mount_opt &= ~EROFS_MOUNT_##option)
#define set_opt(ctx, option) ((ctx)->mount_opt |= EROFS_MOUNT_##option)
#define test_opt(ctx, option) ((ctx)->mount_opt & EROFS_MOUNT_##option)
#define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option)
#define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option)
#define test_opt(opt, option) ((opt)->mount_opt & EROFS_MOUNT_##option)
enum {
EROFS_ZIP_CACHE_DISABLED,
@@ -237,6 +261,7 @@ static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \
EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
/* atomic flag definitions */
@@ -307,6 +332,19 @@ static inline unsigned int erofs_inode_datalayout(unsigned int value)
EROFS_I_DATALAYOUT_BITS);
}
/*
* Different from grab_cache_page_nowait(), reclaiming is never triggered
* when allocating new pages.
*/
static inline
struct page *erofs_grab_cache_page_nowait(struct address_space *mapping,
pgoff_t index)
{
return pagecache_get_page(mapping, index,
FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
readahead_gfp_mask(mapping) & ~__GFP_RECLAIM);
}
extern const struct super_operations erofs_sops;
extern const struct address_space_operations erofs_raw_access_aops;
@@ -338,7 +376,7 @@ extern const struct address_space_operations z_erofs_aops;
* of the corresponding uncompressed data in the file.
*/
enum {
BH_Zipped = BH_PrivateStart,
BH_Encoded = BH_PrivateStart,
BH_FullMapped,
};
@@ -346,8 +384,8 @@ enum {
#define EROFS_MAP_MAPPED (1 << BH_Mapped)
/* Located in metadata (could be copied from bd_inode) */
#define EROFS_MAP_META (1 << BH_Meta)
/* The extent has been compressed */
#define EROFS_MAP_ZIPPED (1 << BH_Zipped)
/* The extent is encoded */
#define EROFS_MAP_ENCODED (1 << BH_Encoded)
/* The length of extent is full */
#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
@@ -355,6 +393,8 @@ struct erofs_map_blocks {
erofs_off_t m_pa, m_la;
u64 m_plen, m_llen;
unsigned short m_deviceid;
char m_algorithmformat;
unsigned int m_flags;
struct page *mpage;
@@ -367,6 +407,13 @@ struct erofs_map_blocks {
* approach instead if possible since it's more metadata lightweight.)
*/
#define EROFS_GET_BLOCKS_FIEMAP 0x0002
/* Used to map the whole extent if non-negligible data is requested for LZMA */
#define EROFS_GET_BLOCKS_READMORE 0x0004
enum {
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
Z_EROFS_COMPRESSION_RUNTIME_MAX
};
/* zmap.c */
extern const struct iomap_ops z_erofs_iomap_report_ops;
@@ -386,9 +433,18 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode,
}
#endif /* !CONFIG_EROFS_FS_ZIP */
struct erofs_map_dev {
struct block_device *m_bdev;
struct dax_device *m_daxdev;
erofs_off_t m_pa;
unsigned int m_deviceid;
};
/* data.c */
extern const struct file_operations erofs_file_fops;
struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr);
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
@@ -443,7 +499,14 @@ void erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void);
/* utils.c / zdata.c */
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp);
static inline void erofs_pagepool_add(struct page **pagepool,
struct page *page)
{
set_page_private(page, (unsigned long)*pagepool);
*pagepool = page;
}
void erofs_release_pages(struct page **pagepool);
#ifdef CONFIG_EROFS_FS_ZIP
int erofs_workgroup_put(struct erofs_workgroup *grp);
@@ -483,6 +546,26 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb,
}
#endif /* !CONFIG_EROFS_FS_ZIP */
#ifdef CONFIG_EROFS_FS_ZIP_LZMA
int z_erofs_lzma_init(void);
void z_erofs_lzma_exit(void);
int z_erofs_load_lzma_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lzma_cfgs *lzma, int size);
#else
static inline int z_erofs_lzma_init(void) { return 0; }
static inline int z_erofs_lzma_exit(void) { return 0; }
static inline int z_erofs_load_lzma_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lzma_cfgs *lzma, int size) {
if (lzma) {
erofs_err(sb, "lzma algorithm isn't enabled");
return -EINVAL;
}
return 0;
}
#endif /* !CONFIG_EROFS_FS_ZIP */
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
#endif /* __EROFS_INTERNAL_H */

View File

@@ -49,7 +49,7 @@ int erofs_pcpubuf_growsize(unsigned int nrpages)
{
static DEFINE_MUTEX(pcb_resize_mutex);
static unsigned int pcb_nrpages;
LIST_HEAD(pagepool);
struct page *pagepool = NULL;
int delta, cpu, ret, i;
mutex_lock(&pcb_resize_mutex);
@@ -102,13 +102,13 @@ int erofs_pcpubuf_growsize(unsigned int nrpages)
vunmap(old_ptr);
free_pagearray:
while (i)
list_add(&oldpages[--i]->lru, &pagepool);
erofs_pagepool_add(&pagepool, oldpages[--i]);
kfree(oldpages);
if (ret)
break;
}
pcb_nrpages = nrpages;
put_pages_list(&pagepool);
erofs_release_pages(&pagepool);
out:
mutex_unlock(&pcb_resize_mutex);
return ret;

View File

@@ -225,6 +225,9 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
case Z_EROFS_COMPRESSION_LZ4:
ret = z_erofs_load_lz4_config(sb, dsb, data, size);
break;
case Z_EROFS_COMPRESSION_LZMA:
ret = z_erofs_load_lzma_config(sb, dsb, data, size);
break;
default:
DBG_BUGON(1);
ret = -EFAULT;
@@ -252,6 +255,79 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
}
#endif
static int erofs_init_devices(struct super_block *sb,
struct erofs_super_block *dsb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
unsigned int ondisk_extradevs;
erofs_off_t pos;
struct page *page = NULL;
struct erofs_device_info *dif;
struct erofs_deviceslot *dis;
void *ptr;
int id, err = 0;
sbi->total_blocks = sbi->primarydevice_blocks;
if (!erofs_sb_has_device_table(sbi))
ondisk_extradevs = 0;
else
ondisk_extradevs = le16_to_cpu(dsb->extra_devices);
if (ondisk_extradevs != sbi->devs->extra_devices) {
erofs_err(sb, "extra devices don't match (ondisk %u, given %u)",
ondisk_extradevs, sbi->devs->extra_devices);
return -EINVAL;
}
if (!ondisk_extradevs)
return 0;
sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1;
pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE;
down_read(&sbi->devs->rwsem);
idr_for_each_entry(&sbi->devs->tree, dif, id) {
erofs_blk_t blk = erofs_blknr(pos);
struct block_device *bdev;
if (!page || page->index != blk) {
if (page) {
kunmap(page);
unlock_page(page);
put_page(page);
}
page = erofs_get_meta_page(sb, blk);
if (IS_ERR(page)) {
up_read(&sbi->devs->rwsem);
return PTR_ERR(page);
}
ptr = kmap(page);
}
dis = ptr + erofs_blkoff(pos);
bdev = blkdev_get_by_path(dif->path,
FMODE_READ | FMODE_EXCL,
sb->s_type);
if (IS_ERR(bdev)) {
err = PTR_ERR(bdev);
goto err_out;
}
dif->bdev = bdev;
dif->dax_dev = fs_dax_get_by_bdev(bdev);
dif->blocks = le32_to_cpu(dis->blocks);
dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
sbi->total_blocks += dif->blocks;
pos += EROFS_DEVT_SLOT_SIZE;
}
err_out:
up_read(&sbi->devs->rwsem);
if (page) {
kunmap(page);
unlock_page(page);
put_page(page);
}
return err;
}
static int erofs_read_superblock(struct super_block *sb)
{
struct erofs_sb_info *sbi;
@@ -303,7 +379,7 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->sb_size);
goto out;
}
sbi->blocks = le32_to_cpu(dsb->blocks);
sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
#ifdef CONFIG_EROFS_FS_XATTR
sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
@@ -330,6 +406,11 @@ static int erofs_read_superblock(struct super_block *sb)
ret = erofs_load_compr_cfgs(sb, dsb);
else
ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0);
if (ret < 0)
goto out;
/* handle multiple devices */
ret = erofs_init_devices(sb, dsb);
out:
kunmap(page);
put_page(page);
@@ -340,15 +421,15 @@ out:
static void erofs_default_options(struct erofs_fs_context *ctx)
{
#ifdef CONFIG_EROFS_FS_ZIP
ctx->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
ctx->max_sync_decompress_pages = 3;
ctx->readahead_sync_decompress = false;
ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
ctx->opt.max_sync_decompress_pages = 3;
ctx->opt.readahead_sync_decompress = false;
#endif
#ifdef CONFIG_EROFS_FS_XATTR
set_opt(ctx, XATTR_USER);
set_opt(&ctx->opt, XATTR_USER);
#endif
#ifdef CONFIG_EROFS_FS_POSIX_ACL
set_opt(ctx, POSIX_ACL);
set_opt(&ctx->opt, POSIX_ACL);
#endif
}
@@ -358,6 +439,7 @@ enum {
Opt_cache_strategy,
Opt_dax,
Opt_dax_enum,
Opt_device,
Opt_err
};
@@ -381,6 +463,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
erofs_param_cache_strategy),
fsparam_flag("dax", Opt_dax),
fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums),
fsparam_string("device", Opt_device),
{}
};
@@ -392,12 +475,12 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
switch (mode) {
case EROFS_MOUNT_DAX_ALWAYS:
warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
set_opt(ctx, DAX_ALWAYS);
clear_opt(ctx, DAX_NEVER);
set_opt(&ctx->opt, DAX_ALWAYS);
clear_opt(&ctx->opt, DAX_NEVER);
return true;
case EROFS_MOUNT_DAX_NEVER:
set_opt(ctx, DAX_NEVER);
clear_opt(ctx, DAX_ALWAYS);
set_opt(&ctx->opt, DAX_NEVER);
clear_opt(&ctx->opt, DAX_ALWAYS);
return true;
default:
DBG_BUGON(1);
@@ -412,9 +495,10 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
static int erofs_fc_parse_param(struct fs_context *fc,
struct fs_parameter *param)
{
struct erofs_fs_context *ctx __maybe_unused = fc->fs_private;
struct erofs_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
int opt;
struct erofs_device_info *dif;
int opt, ret;
opt = fs_parse(fc, erofs_fs_parameters, param, &result);
if (opt < 0)
@@ -424,9 +508,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
case Opt_user_xattr:
#ifdef CONFIG_EROFS_FS_XATTR
if (result.boolean)
set_opt(ctx, XATTR_USER);
set_opt(&ctx->opt, XATTR_USER);
else
clear_opt(ctx, XATTR_USER);
clear_opt(&ctx->opt, XATTR_USER);
#else
errorfc(fc, "{,no}user_xattr options not supported");
#endif
@@ -434,16 +518,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
case Opt_acl:
#ifdef CONFIG_EROFS_FS_POSIX_ACL
if (result.boolean)
set_opt(ctx, POSIX_ACL);
set_opt(&ctx->opt, POSIX_ACL);
else
clear_opt(ctx, POSIX_ACL);
clear_opt(&ctx->opt, POSIX_ACL);
#else
errorfc(fc, "{,no}acl options not supported");
#endif
break;
case Opt_cache_strategy:
#ifdef CONFIG_EROFS_FS_ZIP
ctx->cache_strategy = result.uint_32;
ctx->opt.cache_strategy = result.uint_32;
#else
errorfc(fc, "compression not supported, cache_strategy ignored");
#endif
@@ -456,6 +540,25 @@ static int erofs_fc_parse_param(struct fs_context *fc,
if (!erofs_fc_set_dax_mode(fc, result.uint_32))
return -EINVAL;
break;
case Opt_device:
dif = kzalloc(sizeof(*dif), GFP_KERNEL);
if (!dif)
return -ENOMEM;
dif->path = kstrdup(param->string, GFP_KERNEL);
if (!dif->path) {
kfree(dif);
return -ENOMEM;
}
down_write(&ctx->devs->rwsem);
ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL);
up_write(&ctx->devs->rwsem);
if (ret < 0) {
kfree(dif->path);
kfree(dif);
return ret;
}
++ctx->devs->extra_devices;
break;
default:
return -ENOPARAM;
}
@@ -540,15 +643,19 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return -ENOMEM;
sb->s_fs_info = sbi;
sbi->opt = ctx->opt;
sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
sbi->devs = ctx->devs;
ctx->devs = NULL;
err = erofs_read_superblock(sb);
if (err)
return err;
if (test_opt(ctx, DAX_ALWAYS) &&
if (test_opt(&sbi->opt, DAX_ALWAYS) &&
!dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) {
errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
clear_opt(ctx, DAX_ALWAYS);
clear_opt(&sbi->opt, DAX_ALWAYS);
}
sb->s_flags |= SB_RDONLY | SB_NOATIME;
sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -557,13 +664,11 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_op = &erofs_sops;
sb->s_xattr = erofs_xattr_handlers;
if (test_opt(ctx, POSIX_ACL))
if (test_opt(&sbi->opt, POSIX_ACL))
sb->s_flags |= SB_POSIXACL;
else
sb->s_flags &= ~SB_POSIXACL;
sbi->ctx = *ctx;
#ifdef CONFIG_EROFS_FS_ZIP
xa_init(&sbi->managed_pslots);
#endif
@@ -607,20 +712,44 @@ static int erofs_fc_reconfigure(struct fs_context *fc)
DBG_BUGON(!sb_rdonly(sb));
if (test_opt(ctx, POSIX_ACL))
if (test_opt(&ctx->opt, POSIX_ACL))
fc->sb_flags |= SB_POSIXACL;
else
fc->sb_flags &= ~SB_POSIXACL;
sbi->ctx = *ctx;
sbi->opt = ctx->opt;
fc->sb_flags |= SB_RDONLY;
return 0;
}
static int erofs_release_device_info(int id, void *ptr, void *data)
{
struct erofs_device_info *dif = ptr;
fs_put_dax(dif->dax_dev);
if (dif->bdev)
blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL);
kfree(dif->path);
kfree(dif);
return 0;
}
static void erofs_free_dev_context(struct erofs_dev_context *devs)
{
if (!devs)
return;
idr_for_each(&devs->tree, &erofs_release_device_info, NULL);
idr_destroy(&devs->tree);
kfree(devs);
}
static void erofs_fc_free(struct fs_context *fc)
{
kfree(fc->fs_private);
struct erofs_fs_context *ctx = fc->fs_private;
erofs_free_dev_context(ctx->devs);
kfree(ctx);
}
static const struct fs_context_operations erofs_context_ops = {
@@ -632,15 +761,21 @@ static const struct fs_context_operations erofs_context_ops = {
static int erofs_init_fs_context(struct fs_context *fc)
{
fc->fs_private = kzalloc(sizeof(struct erofs_fs_context), GFP_KERNEL);
if (!fc->fs_private)
struct erofs_fs_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
if (!ctx->devs) {
kfree(ctx);
return -ENOMEM;
}
fc->fs_private = ctx;
/* set default mount options */
erofs_default_options(fc->fs_private);
idr_init(&ctx->devs->tree);
init_rwsem(&ctx->devs->rwsem);
erofs_default_options(ctx);
fc->ops = &erofs_context_ops;
return 0;
}
@@ -659,6 +794,8 @@ static void erofs_kill_sb(struct super_block *sb)
sbi = EROFS_SB(sb);
if (!sbi)
return;
erofs_free_dev_context(sbi->devs);
fs_put_dax(sbi->dax_dev);
kfree(sbi);
sb->s_fs_info = NULL;
@@ -706,6 +843,10 @@ static int __init erofs_module_init(void)
if (err)
goto shrinker_err;
err = z_erofs_lzma_init();
if (err)
goto lzma_err;
erofs_pcpubuf_init();
err = z_erofs_init_zip_subsystem();
if (err)
@@ -720,6 +861,8 @@ static int __init erofs_module_init(void)
fs_err:
z_erofs_exit_zip_subsystem();
zip_err:
z_erofs_lzma_exit();
lzma_err:
erofs_exit_shrinker();
shrinker_err:
kmem_cache_destroy(erofs_inode_cachep);
@@ -730,11 +873,13 @@ icache_err:
static void __exit erofs_module_exit(void)
{
unregister_filesystem(&erofs_fs_type);
z_erofs_exit_zip_subsystem();
erofs_exit_shrinker();
/* Ensure all RCU free inodes are safe before cache is destroyed. */
/* Ensure all RCU free inodes / pclusters are safe to be destroyed. */
rcu_barrier();
z_erofs_exit_zip_subsystem();
z_erofs_lzma_exit();
erofs_exit_shrinker();
kmem_cache_destroy(erofs_inode_cachep);
erofs_pcpubuf_exit();
}
@@ -748,7 +893,7 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_type = sb->s_magic;
buf->f_bsize = EROFS_BLKSIZ;
buf->f_blocks = sbi->blocks;
buf->f_blocks = sbi->total_blocks;
buf->f_bfree = buf->f_bavail = 0;
buf->f_files = ULLONG_MAX;
@@ -763,31 +908,31 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
static int erofs_show_options(struct seq_file *seq, struct dentry *root)
{
struct erofs_sb_info *sbi = EROFS_SB(root->d_sb);
struct erofs_fs_context *ctx = &sbi->ctx;
struct erofs_mount_opts *opt = &sbi->opt;
#ifdef CONFIG_EROFS_FS_XATTR
if (test_opt(ctx, XATTR_USER))
if (test_opt(opt, XATTR_USER))
seq_puts(seq, ",user_xattr");
else
seq_puts(seq, ",nouser_xattr");
#endif
#ifdef CONFIG_EROFS_FS_POSIX_ACL
if (test_opt(ctx, POSIX_ACL))
if (test_opt(opt, POSIX_ACL))
seq_puts(seq, ",acl");
else
seq_puts(seq, ",noacl");
#endif
#ifdef CONFIG_EROFS_FS_ZIP
if (ctx->cache_strategy == EROFS_ZIP_CACHE_DISABLED)
if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED)
seq_puts(seq, ",cache_strategy=disabled");
else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAHEAD)
else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD)
seq_puts(seq, ",cache_strategy=readahead");
else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
seq_puts(seq, ",cache_strategy=readaround");
#endif
if (test_opt(ctx, DAX_ALWAYS))
if (test_opt(opt, DAX_ALWAYS))
seq_puts(seq, ",dax=always");
if (test_opt(ctx, DAX_NEVER))
if (test_opt(opt, DAX_NEVER))
seq_puts(seq, ",dax=never");
return 0;
}

View File

@@ -6,20 +6,29 @@
#include "internal.h"
#include <linux/pagevec.h>
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
{
struct page *page;
struct page *page = *pagepool;
if (!list_empty(pool)) {
page = lru_to_page(pool);
if (page) {
DBG_BUGON(page_ref_count(page) != 1);
list_del(&page->lru);
*pagepool = (struct page *)page_private(page);
} else {
page = alloc_page(gfp);
}
return page;
}
void erofs_release_pages(struct page **pagepool)
{
while (*pagepool) {
struct page *page = *pagepool;
*pagepool = (struct page *)page_private(page);
put_page(page);
}
}
#ifdef CONFIG_EROFS_FS_ZIP
/* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt;

View File

@@ -429,7 +429,7 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
static bool erofs_xattr_user_list(struct dentry *dentry)
{
return test_opt(&EROFS_SB(dentry->d_sb)->ctx, XATTR_USER);
return test_opt(&EROFS_SB(dentry->d_sb)->opt, XATTR_USER);
}
static bool erofs_xattr_trusted_list(struct dentry *dentry)
@@ -476,7 +476,7 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler,
switch (handler->flags) {
case EROFS_XATTR_INDEX_USER:
if (!test_opt(&sbi->ctx, XATTR_USER))
if (!test_opt(&sbi->opt, XATTR_USER))
return -EOPNOTSUPP;
break;
case EROFS_XATTR_INDEX_TRUSTED:

View File

@@ -96,16 +96,9 @@ static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
DBG_BUGON(1);
}
/*
* a compressed_pages[] placeholder in order to avoid
* being filled with file pages for in-place decompression.
*/
#define PAGE_UNALLOCATED ((void *)0x5F0E4B1D)
/* how to allocate cached pages for a pcluster */
enum z_erofs_cache_alloctype {
DONTALLOC, /* don't allocate any cached pages */
DELAYEDALLOC, /* delayed allocation (at the time of submitting io) */
/*
* try to use cached I/O if page allocation succeeds or fallback
* to in-place I/O instead to avoid any direct reclaim.
@@ -236,7 +229,7 @@ static DEFINE_MUTEX(z_pagemap_global_lock);
static void preload_compressed_pages(struct z_erofs_collector *clt,
struct address_space *mc,
enum z_erofs_cache_alloctype type,
struct list_head *pagepool)
struct page **pagepool)
{
struct z_erofs_pcluster *pcl = clt->pcl;
bool standalone = true;
@@ -267,10 +260,6 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
/* I/O is needed, no possible to decompress directly */
standalone = false;
switch (type) {
case DELAYEDALLOC:
t = tagptr_init(compressed_page_t,
PAGE_UNALLOCATED);
break;
case TRYALLOC:
newpage = erofs_allocpage(pagepool, gfp);
if (!newpage)
@@ -287,12 +276,10 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
continue;
if (page) {
if (page)
put_page(page);
} else if (newpage) {
set_page_private(newpage, 0);
list_add(&newpage->lru, pagepool);
}
else if (newpage)
erofs_pagepool_add(pagepool, newpage);
}
/*
@@ -476,6 +463,11 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
struct erofs_workgroup *grp;
int err;
if (!(map->m_flags & EROFS_MAP_ENCODED)) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
/* no available pcluster, let's allocate one */
pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT);
if (IS_ERR(pcl))
@@ -483,16 +475,11 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt,
atomic_set(&pcl->obj.refcount, 1);
pcl->obj.index = map->m_pa >> PAGE_SHIFT;
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
(map->m_flags & EROFS_MAP_FULL_MAPPED ?
Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
if (map->m_flags & EROFS_MAP_ZIPPED)
pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4;
else
pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
/* new pclusters should be claimed as type 1, primary and followed */
pcl->next = clt->owned_head;
clt->mode = COLLECT_PRIMARY_FOLLOWED;
@@ -643,7 +630,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
}
static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct page *page, struct list_head *pagepool)
struct page *page, struct page **pagepool)
{
struct inode *const inode = fe->inode;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
@@ -695,7 +682,7 @@ restart_now:
goto err_out;
/* preload all compressed pages (maybe downgrade role if necessary) */
if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la))
if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la))
cache_strategy = TRYALLOC;
else
cache_strategy = DONTALLOC;
@@ -797,7 +784,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
/* Use workqueue and sync decompression for atomic contexts only */
if (in_atomic() || irqs_disabled()) {
queue_work(z_erofs_workqueue, &io->u.work);
sbi->ctx.readahead_sync_decompress = true;
sbi->opt.readahead_sync_decompress = true;
return;
}
z_erofs_decompressqueue_work(&io->u.work);
@@ -837,7 +824,7 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
static int z_erofs_decompress_pcluster(struct super_block *sb,
struct z_erofs_pcluster *pcl,
struct list_head *pagepool)
struct page **pagepool)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
struct z_erofs_pagevec_ctor ctor;
@@ -1037,7 +1024,7 @@ out:
}
static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
struct list_head *pagepool)
struct page **pagepool)
{
z_erofs_next_pcluster_t owned = io->head;
@@ -1061,18 +1048,18 @@ static void z_erofs_decompressqueue_work(struct work_struct *work)
{
struct z_erofs_decompressqueue *bgq =
container_of(work, struct z_erofs_decompressqueue, u.work);
LIST_HEAD(pagepool);
struct page *pagepool = NULL;
DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
z_erofs_decompress_queue(bgq, &pagepool);
put_pages_list(&pagepool);
erofs_release_pages(&pagepool);
kvfree(bgq);
}
static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
unsigned int nr,
struct list_head *pagepool,
struct page **pagepool,
struct address_space *mc,
gfp_t gfp)
{
@@ -1092,15 +1079,6 @@ repeat:
if (!page)
goto out_allocpage;
/*
* the cached page has not been allocated and
* an placeholder is out there, prepare it now.
*/
if (page == PAGE_UNALLOCATED) {
tocache = true;
goto out_allocpage;
}
/* process the target tagged pointer */
t = tagptr_init(compressed_page_t, page);
justfound = tagptr_unfold_tags(t);
@@ -1174,7 +1152,7 @@ repeat:
out_allocpage:
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
list_add(&page->lru, pagepool);
erofs_pagepool_add(pagepool, page);
cond_resched();
goto repeat;
}
@@ -1258,7 +1236,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
static void z_erofs_submit_queue(struct super_block *sb,
struct z_erofs_decompress_frontend *f,
struct list_head *pagepool,
struct page **pagepool,
struct z_erofs_decompressqueue *fgq,
bool *force_fg)
{
@@ -1267,8 +1245,9 @@ static void z_erofs_submit_queue(struct super_block *sb,
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
void *bi_private;
z_erofs_next_pcluster_t owned_head = f->clt.owned_head;
/* since bio will be NULL, no need to initialize last_index */
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
pgoff_t last_index;
struct block_device *last_bdev;
unsigned int nr_bios = 0;
struct bio *bio = NULL;
@@ -1280,6 +1259,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
q[JQ_SUBMIT]->head = owned_head;
do {
struct erofs_map_dev mdev;
struct z_erofs_pcluster *pcl;
pgoff_t cur, end;
unsigned int i = 0;
@@ -1291,7 +1271,13 @@ static void z_erofs_submit_queue(struct super_block *sb,
pcl = container_of(owned_head, struct z_erofs_pcluster, next);
cur = pcl->obj.index;
/* no device id here, thus it will always succeed */
mdev = (struct erofs_map_dev) {
.m_pa = blknr_to_addr(pcl->obj.index),
};
(void)erofs_map_dev(sb, &mdev);
cur = erofs_blknr(mdev.m_pa);
end = cur + pcl->pclusterpages;
/* close the main owned chain at first */
@@ -1307,7 +1293,8 @@ static void z_erofs_submit_queue(struct super_block *sb,
if (!page)
continue;
if (bio && cur != last_index + 1) {
if (bio && (cur != last_index + 1 ||
last_bdev != mdev.m_bdev)) {
submit_bio_retry:
submit_bio(bio);
bio = NULL;
@@ -1315,9 +1302,10 @@ submit_bio_retry:
if (!bio) {
bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
bio->bi_end_io = z_erofs_decompressqueue_endio;
bio_set_dev(bio, sb->s_bdev);
bio_set_dev(bio, mdev.m_bdev);
last_bdev = mdev.m_bdev;
bio->bi_iter.bi_sector = (sector_t)cur <<
LOG_SECTORS_PER_BLOCK;
bio->bi_private = bi_private;
@@ -1356,7 +1344,7 @@ submit_bio_retry:
static void z_erofs_runqueue(struct super_block *sb,
struct z_erofs_decompress_frontend *f,
struct list_head *pagepool, bool force_fg)
struct page **pagepool, bool force_fg)
{
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
@@ -1378,18 +1366,87 @@ static void z_erofs_runqueue(struct super_block *sb,
z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool);
}
/*
* Since partial uptodate is still unimplemented for now, we have to use
* approximate readmore strategies as a start.
*/
static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
struct readahead_control *rac,
erofs_off_t end,
struct page **pagepool,
bool backmost)
{
struct inode *inode = f->inode;
struct erofs_map_blocks *map = &f->map;
erofs_off_t cur;
int err;
if (backmost) {
map->m_la = end;
err = z_erofs_map_blocks_iter(inode, map,
EROFS_GET_BLOCKS_READMORE);
if (err)
return;
/* expend ra for the trailing edge if readahead */
if (rac) {
loff_t newstart = readahead_pos(rac);
cur = round_up(map->m_la + map->m_llen, PAGE_SIZE);
readahead_expand(rac, newstart, cur - newstart);
return;
}
end = round_up(end, PAGE_SIZE);
} else {
end = round_up(map->m_la, PAGE_SIZE);
if (!map->m_llen)
return;
}
cur = map->m_la + map->m_llen - 1;
while (cur >= end) {
pgoff_t index = cur >> PAGE_SHIFT;
struct page *page;
page = erofs_grab_cache_page_nowait(inode->i_mapping, index);
if (!page)
goto skip;
if (PageUptodate(page)) {
unlock_page(page);
put_page(page);
goto skip;
}
err = z_erofs_do_read_page(f, page, pagepool);
if (err)
erofs_err(inode->i_sb,
"readmore error at page %lu @ nid %llu",
index, EROFS_I(inode)->nid);
put_page(page);
skip:
if (cur < PAGE_SIZE)
break;
cur = (index << PAGE_SHIFT) - 1;
}
}
static int z_erofs_readpage(struct file *file, struct page *page)
{
struct inode *const inode = page->mapping->host;
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
struct page *pagepool = NULL;
int err;
LIST_HEAD(pagepool);
trace_erofs_readpage(page, false);
f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
z_erofs_pcluster_readmore(&f, NULL, f.headoffset + PAGE_SIZE - 1,
&pagepool, true);
err = z_erofs_do_read_page(&f, page, &pagepool);
z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false);
(void)z_erofs_collector_end(&f.clt);
/* if some compressed cluster ready, need submit them anyway */
@@ -1401,8 +1458,7 @@ static int z_erofs_readpage(struct file *file, struct page *page)
if (f.map.mpage)
put_page(f.map.mpage);
/* clean up the remaining free pages */
put_pages_list(&pagepool);
erofs_release_pages(&pagepool);
return err;
}
@@ -1410,29 +1466,19 @@ static void z_erofs_readahead(struct readahead_control *rac)
{
struct inode *const inode = rac->mapping->host;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
unsigned int nr_pages = readahead_count(rac);
bool sync = (sbi->ctx.readahead_sync_decompress &&
nr_pages <= sbi->ctx.max_sync_decompress_pages);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
struct page *page, *head = NULL;
LIST_HEAD(pagepool);
trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
struct page *pagepool = NULL, *head = NULL, *page;
unsigned int nr_pages;
f.readahead = true;
f.headoffset = readahead_pos(rac);
z_erofs_pcluster_readmore(&f, rac, f.headoffset +
readahead_length(rac) - 1, &pagepool, true);
nr_pages = readahead_count(rac);
trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
while ((page = readahead_page(rac))) {
prefetchw(&page->flags);
/*
* A pure asynchronous readahead is indicated if
* a PG_readahead marked page is hitted at first.
* Let's also do asynchronous decompression for this case.
*/
sync &= !(PageReadahead(page) && !head);
set_page_private(page, (unsigned long)head);
head = page;
}
@@ -1451,16 +1497,15 @@ static void z_erofs_readahead(struct readahead_control *rac)
page->index, EROFS_I(inode)->nid);
put_page(page);
}
z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
(void)z_erofs_collector_end(&f.clt);
z_erofs_runqueue(inode->i_sb, &f, &pagepool, sync);
z_erofs_runqueue(inode->i_sb, &f, &pagepool,
sbi->opt.readahead_sync_decompress &&
nr_pages <= sbi->opt.max_sync_decompress_pages);
if (f.map.mpage)
put_page(f.map.mpage);
/* clean up the remaining free pages */
put_pages_list(&pagepool);
erofs_release_pages(&pagepool);
}
const struct address_space_operations z_erofs_aops = {

View File

@@ -94,13 +94,6 @@ struct z_erofs_decompressqueue {
} u;
};
#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
struct page *page)
{
return page->mapping == MNGD_MAPPING(sbi);
}
#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
@@ -186,4 +179,3 @@ static inline void z_erofs_onlinepage_endio(struct page *page)
#define Z_EROFS_VMAP_GLOBAL_PAGES 2048
#endif

View File

@@ -28,7 +28,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
{
struct erofs_inode *const vi = EROFS_I(inode);
struct super_block *const sb = inode->i_sb;
int err;
int err, headnr;
erofs_off_t pos;
struct page *page;
void *kaddr;
@@ -68,9 +68,11 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) {
erofs_err(sb, "unknown compression format %u for nid %llu, please upgrade kernel",
vi->z_algorithmtype[0], vi->nid);
headnr = 0;
if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX ||
vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) {
erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel",
headnr + 1, vi->z_algorithmtype[headnr], vi->nid);
err = -EOPNOTSUPP;
goto unmap_done;
}
@@ -111,7 +113,7 @@ struct z_erofs_maprecorder {
unsigned long lcn;
/* compression extent information gathered */
u8 type;
u8 type, headtype;
u16 clusterofs;
u16 delta[2];
erofs_blk_t pblk, compressedlcs;
@@ -178,7 +180,8 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
m->clusterofs = 1 << vi->z_logical_clusterbits;
m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
@@ -189,7 +192,8 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
break;
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
m->clusterofs = le16_to_cpu(di->di_clusterofs);
m->pblk = le32_to_cpu(di->di_u.blkaddr);
break;
@@ -446,9 +450,9 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
}
return z_erofs_extent_lookback(m, m->delta[0]);
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
map->m_flags &= ~EROFS_MAP_ZIPPED;
fallthrough;
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
m->headtype = m->type;
map->m_la = (lcn << lclusterbits) | m->clusterofs;
break;
default:
@@ -471,13 +475,18 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
int err;
DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN &&
m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD);
if (!(map->m_flags & EROFS_MAP_ZIPPED) ||
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 &&
m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD2);
DBG_BUGON(m->type != m->headtype);
if (m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD1) &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
map->m_plen = 1 << lclusterbits;
return 0;
}
lcn = m->lcn + 1;
if (m->compressedlcs)
goto out;
@@ -499,7 +508,8 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
switch (m->type) {
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
/*
* if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
* rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
@@ -554,7 +564,8 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
DBG_BUGON(!m->delta[1] &&
m->clusterofs != 1 << lclusterbits);
} else if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN ||
m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD) {
m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 ||
m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) {
/* go on until the next HEAD lcluster */
if (lcn != headlcn)
break;
@@ -609,16 +620,15 @@ int z_erofs_map_blocks_iter(struct inode *inode,
if (err)
goto unmap_out;
map->m_flags = EROFS_MAP_ZIPPED; /* by default, compressed */
map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
end = (m.lcn + 1ULL) << lclusterbits;
switch (m.type) {
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
if (endoff >= m.clusterofs)
map->m_flags &= ~EROFS_MAP_ZIPPED;
fallthrough;
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2:
if (endoff >= m.clusterofs) {
m.headtype = m.type;
map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
break;
}
@@ -650,13 +660,22 @@ int z_erofs_map_blocks_iter(struct inode *inode,
map->m_llen = end - map->m_la;
map->m_pa = blknr_to_addr(m.pblk);
map->m_flags |= EROFS_MAP_MAPPED;
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
if (err)
goto out;
if (flags & EROFS_GET_BLOCKS_FIEMAP) {
if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN)
map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2)
map->m_algorithmformat = vi->z_algorithmtype[1];
else
map->m_algorithmformat = vi->z_algorithmtype[0];
if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
((flags & EROFS_GET_BLOCKS_READMORE) &&
map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA &&
map->m_llen >= EROFS_BLKSIZ)) {
err = z_erofs_get_extent_decompressedlen(&m);
if (!err)
map->m_flags |= EROFS_MAP_FULL_MAPPED;

View File

@@ -1572,7 +1572,6 @@ static const struct fscrypt_operations ext4_cryptops = {
.set_context = ext4_set_context,
.get_dummy_policy = ext4_get_dummy_policy,
.empty_dir = ext4_empty_dir,
.max_namelen = EXT4_NAME_LEN,
.has_stable_inodes = ext4_has_stable_inodes,
.get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits,
};

View File

@@ -653,7 +653,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
return PTR_ERR(inode);
}
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err) {
iput(inode);
goto err_out;
@@ -705,9 +705,6 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
}
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= SB_ACTIVE;
/*
* Turn on quotas which were not enabled for read-only mounts if
* filesystem has quota feature, so that they are updated correctly.

View File

@@ -881,6 +881,25 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index)
return is_page_in_cluster(cc, index);
}
bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec,
int index, int nr_pages)
{
unsigned long pgidx;
int i;
if (nr_pages - index < cc->cluster_size)
return false;
pgidx = pvec->pages[index]->index;
for (i = 1; i < cc->cluster_size; i++) {
if (pvec->pages[index + i]->index != pgidx + i)
return false;
}
return true;
}
static bool cluster_has_invalid_data(struct compress_ctx *cc)
{
loff_t i_size = i_size_read(cc->inode);

View File

@@ -1470,10 +1470,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
struct extent_info ei = {0, };
block_t blkaddr;
unsigned int start_pgofs;
int bidx = 0;
if (!maxblocks)
return 0;
map->m_bdev = inode->i_sb->s_bdev;
map->m_multidev_dio =
f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
map->m_len = 0;
map->m_flags = 0;
@@ -1496,6 +1501,21 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
if (flag == F2FS_GET_BLOCK_DIO)
f2fs_wait_on_block_writeback_range(inode,
map->m_pblk, map->m_len);
if (map->m_multidev_dio) {
block_t blk_addr = map->m_pblk;
bidx = f2fs_target_device_index(sbi, map->m_pblk);
map->m_bdev = FDEV(bidx).bdev;
map->m_pblk -= FDEV(bidx).start_blk;
map->m_len = min(map->m_len,
FDEV(bidx).end_blk + 1 - map->m_pblk);
if (map->m_may_create)
f2fs_update_device_state(sbi, inode->i_ino,
blk_addr, map->m_len);
}
goto out;
}
@@ -1614,6 +1634,9 @@ next_block:
if (flag == F2FS_GET_BLOCK_PRE_AIO)
goto skip;
if (map->m_multidev_dio)
bidx = f2fs_target_device_index(sbi, blkaddr);
if (map->m_len == 0) {
/* preallocated unwritten block should be mapped for fiemap. */
if (blkaddr == NEW_ADDR)
@@ -1622,10 +1645,15 @@ next_block:
map->m_pblk = blkaddr;
map->m_len = 1;
if (map->m_multidev_dio)
map->m_bdev = FDEV(bidx).bdev;
} else if ((map->m_pblk != NEW_ADDR &&
blkaddr == (map->m_pblk + ofs)) ||
(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
flag == F2FS_GET_BLOCK_PRE_DIO) {
if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
goto sync_out;
ofs++;
map->m_len++;
} else {
@@ -1678,10 +1706,32 @@ skip:
sync_out:
/* for hardware encryption, but to avoid potential issue in future */
if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
/*
* for hardware encryption, but to avoid potential issue
* in future
*/
f2fs_wait_on_block_writeback_range(inode,
map->m_pblk, map->m_len);
invalidate_mapping_pages(META_MAPPING(sbi),
map->m_pblk, map->m_pblk);
if (map->m_multidev_dio) {
block_t blk_addr = map->m_pblk;
bidx = f2fs_target_device_index(sbi, map->m_pblk);
map->m_bdev = FDEV(bidx).bdev;
map->m_pblk -= FDEV(bidx).start_blk;
if (map->m_may_create)
f2fs_update_device_state(sbi, inode->i_ino,
blk_addr, map->m_len);
f2fs_bug_on(sbi, blk_addr + map->m_len >
FDEV(bidx).end_blk + 1);
}
}
if (flag == F2FS_GET_BLOCK_PRECACHE) {
if (map->m_flags & F2FS_MAP_MAPPED) {
@@ -1701,7 +1751,7 @@ unlock_out:
f2fs_balance_fs(sbi, dn.node_changed);
}
out:
trace_f2fs_map_blocks(inode, map, err);
trace_f2fs_map_blocks(inode, map, create, flag, err);
return err;
}
@@ -1760,6 +1810,9 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
bh->b_size = blks_to_bytes(inode, map.m_len);
if (map.m_multidev_dio)
bh->b_bdev = map.m_bdev;
}
return err;
}
@@ -2994,6 +3047,10 @@ readd:
need_readd = false;
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (f2fs_compressed_file(inode)) {
void *fsdata = NULL;
struct page *pagep;
int ret2;
ret = f2fs_init_compress_ctx(&cc);
if (ret) {
done = 1;
@@ -3012,10 +3069,8 @@ readd:
if (unlikely(f2fs_cp_error(sbi)))
goto lock_page;
if (f2fs_cluster_is_empty(&cc)) {
void *fsdata = NULL;
struct page *pagep;
int ret2;
if (!f2fs_cluster_is_empty(&cc))
goto lock_page;
ret2 = f2fs_prepare_compress_overwrite(
inode, &pagep,
@@ -3025,15 +3080,13 @@ readd:
done = 1;
break;
} else if (ret2 &&
!f2fs_compress_write_end(inode,
fsdata, page->index,
1)) {
(!f2fs_compress_write_end(inode,
fsdata, page->index, 1) ||
!f2fs_all_cluster_page_loaded(&cc,
&pvec, i, nr_pages))) {
retry = 1;
break;
}
} else {
goto lock_page;
}
}
#endif
/* give a priority to WB_SYNC threads */

View File

@@ -55,6 +55,7 @@ enum {
FAULT_DISCARD,
FAULT_WRITE_IO,
FAULT_SLAB_ALLOC,
FAULT_DQUOT_INIT,
FAULT_MAX,
};
@@ -561,6 +562,9 @@ enum {
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
/* dirty segments threshold for triggering CP */
#define DEFAULT_DIRTY_THRESHOLD 4
/* for in-memory extent cache entry */
#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
@@ -617,6 +621,7 @@ struct extent_tree {
F2FS_MAP_UNWRITTEN)
struct f2fs_map_blocks {
struct block_device *m_bdev; /* for multi-device dio */
block_t m_pblk;
block_t m_lblk;
unsigned int m_len;
@@ -625,6 +630,7 @@ struct f2fs_map_blocks {
pgoff_t *m_next_extent; /* point to next possible extent */
int m_seg_type;
bool m_may_create; /* indicate it is from write path */
bool m_multidev_dio; /* indicate it allows multi-device dio */
};
/* for flag in get_data_block */
@@ -1286,6 +1292,8 @@ enum {
enum {
FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */
FS_MODE_LFS, /* use lfs allocation only */
FS_MODE_FRAGMENT_SEG, /* segment fragmentation mode */
FS_MODE_FRAGMENT_BLK, /* block fragmentation mode */
};
enum {
@@ -1728,12 +1736,15 @@ struct f2fs_sb_info {
/* For shrinker support */
struct list_head s_list;
struct mutex umount_mutex;
unsigned int shrinker_run_no;
/* For multi devices */
int s_ndevs; /* number of devices */
struct f2fs_dev_info *devs; /* for device list */
unsigned int dirty_device; /* for checkpoint data flush */
spinlock_t dev_lock; /* protect dirty_device */
struct mutex umount_mutex;
unsigned int shrinker_run_no;
bool aligned_blksize; /* all devices has the same logical blksize */
/* For write statistics */
u64 sectors_written_start;
@@ -1756,6 +1767,9 @@ struct f2fs_sb_info {
unsigned long seq_file_ra_mul; /* multiplier for ra_pages of seq. files in fadvise */
int max_fragment_chunk; /* max chunk size for block fragmentation mode */
int max_fragment_hole; /* max hole size for block fragmentation mode */
#ifdef CONFIG_F2FS_FS_COMPRESSION
struct kmem_cache *page_array_slab; /* page array entry */
unsigned int page_array_slab_size; /* default page array slab size */
@@ -3363,6 +3377,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
*/
int f2fs_inode_dirtied(struct inode *inode, bool sync);
void f2fs_inode_synced(struct inode *inode);
int f2fs_dquot_initialize(struct inode *inode);
int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
int f2fs_quota_sync(struct super_block *sb, int type);
loff_t max_file_blocks(struct inode *inode);
@@ -3492,6 +3507,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio);
void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
block_t blkaddr, unsigned int blkcnt);
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type, bool ordered, bool locked);
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
@@ -3516,6 +3533,16 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
unsigned int segno);
#define DEF_FRAGMENT_SIZE 4
#define MIN_FRAGMENT_SIZE 1
#define MAX_FRAGMENT_SIZE 512
static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi)
{
return F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG ||
F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK;
}
/*
* checkpoint.c
*/
@@ -4027,6 +4054,8 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed,
block_t blkaddr);
bool f2fs_cluster_is_empty(struct compress_ctx *cc);
bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec,
int index, int nr_pages);
bool f2fs_sanity_check_cluster(struct dnode_of_data *dn);
void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
int f2fs_write_multi_pages(struct compress_ctx *cc,
@@ -4301,6 +4330,16 @@ static inline int block_unaligned_IO(struct inode *inode,
return align & blocksize_mask;
}
static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi,
int flag)
{
if (!f2fs_is_multi_device(sbi))
return false;
if (flag != F2FS_GET_BLOCK_DIO)
return false;
return sbi->aligned_blksize;
}
static inline bool f2fs_force_buffered_io(struct inode *inode,
struct kiocb *iocb, struct iov_iter *iter)
{
@@ -4313,7 +4352,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
return true;
if (f2fs_compressed_file(inode))
return true;
if (f2fs_is_multi_device(sbi))
/* disallow direct IO if any of devices has unaligned blksize */
if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
return true;
/*
* for blkzoned device, fallback direct IO to buffered IO, so

View File

@@ -786,7 +786,7 @@ int f2fs_truncate(struct inode *inode)
return -EIO;
}
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
return err;
@@ -916,7 +916,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return err;
if (is_quota_modification(inode, attr)) {
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
return err;
}
@@ -3020,7 +3020,7 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
}
f2fs_put_page(ipage, 1);
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
return err;

View File

@@ -14,6 +14,7 @@
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/sched/signal.h>
#include <linux/random.h>
#include "f2fs.h"
#include "node.h"
@@ -257,7 +258,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->max_search = sbi->max_victim_search;
/* let's select beginning hot/small space first in no_heap mode*/
if (test_opt(sbi, NOHEAP) &&
if (f2fs_need_rand_seg(sbi))
p->offset = prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
else if (test_opt(sbi, NOHEAP) &&
(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
p->offset = 0;
else

View File

@@ -210,7 +210,7 @@ int f2fs_convert_inline_inode(struct inode *inode)
f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb))
return 0;
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
return err;

View File

@@ -754,7 +754,7 @@ void f2fs_evict_inode(struct inode *inode)
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err) {
err = 0;
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);

View File

@@ -74,7 +74,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (err)
goto fail_drop;
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
goto fail_drop;
@@ -345,7 +345,7 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
@@ -404,7 +404,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
F2FS_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
@@ -460,7 +460,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
return 0;
}
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
@@ -598,10 +598,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
goto fail;
}
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
goto fail;
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
goto fail;
@@ -675,7 +675,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
if (err)
return err;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
@@ -746,7 +746,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
@@ -803,7 +803,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
@@ -841,7 +841,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
@@ -965,16 +965,16 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
return err;
}
err = dquot_initialize(old_dir);
err = f2fs_dquot_initialize(old_dir);
if (err)
goto out;
err = dquot_initialize(new_dir);
err = f2fs_dquot_initialize(new_dir);
if (err)
goto out;
if (new_inode) {
err = dquot_initialize(new_inode);
err = f2fs_dquot_initialize(new_inode);
if (err)
goto out;
}
@@ -1138,11 +1138,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
F2FS_I(new_dentry->d_inode)->i_projid)))
return -EXDEV;
err = dquot_initialize(old_dir);
err = f2fs_dquot_initialize(old_dir);
if (err)
goto out;
err = dquot_initialize(new_dir);
err = f2fs_dquot_initialize(new_dir);
if (err)
goto out;

View File

@@ -138,11 +138,6 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD;
}
static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
{
return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8;
}
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */

View File

@@ -81,7 +81,7 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
if (IS_ERR(inode))
return ERR_CAST(inode);
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
goto err_out;
@@ -203,7 +203,7 @@ retry:
goto out_put;
}
err = dquot_initialize(einode);
err = f2fs_dquot_initialize(einode);
if (err) {
iput(einode);
goto out_put;
@@ -508,7 +508,7 @@ got_it:
if (IS_ERR(inode))
return PTR_ERR(inode);
ret = dquot_initialize(inode);
ret = f2fs_dquot_initialize(inode);
if (ret) {
iput(inode);
return ret;
@@ -787,8 +787,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
}
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= SB_ACTIVE;
/* Turn on quotas so that they are updated correctly */
quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
#endif
@@ -816,10 +814,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
else {
/* restore s_flags to let iput() trash data */
sbi->sb->s_flags = s_flags;
}
else
f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE);
skip:
fix_curseg_write_pointer = !check_only || list_empty(&inode_list);

View File

@@ -15,6 +15,7 @@
#include <linux/timer.h>
#include <linux/freezer.h>
#include <linux/sched/signal.h>
#include <linux/random.h>
#include "f2fs.h"
#include "segment.h"
@@ -529,6 +530,25 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
}
}
static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
{
int factor = rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
unsigned int threshold = sbi->blocks_per_seg * factor *
DEFAULT_DIRTY_THRESHOLD;
unsigned int global_threshold = threshold * 3 / 2;
if (dents >= threshold || qdata >= threshold ||
nodes >= threshold || meta >= threshold ||
imeta >= threshold)
return true;
return dents + qdata + nodes + meta + imeta > global_threshold;
}
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
{
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
@@ -547,8 +567,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
else
f2fs_build_free_nids(sbi, false, false);
if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) ||
excess_prefree_segs(sbi))
if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
goto do_sync;
/* there is background inflight IO or foreground operation recently */
@@ -2630,6 +2650,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
unsigned short seg_type = curseg->seg_type;
sanity_check_seg_type(sbi, seg_type);
if (f2fs_need_rand_seg(sbi))
return prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
/* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi))
@@ -2681,6 +2703,9 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
curseg->next_segno = segno;
reset_curseg(sbi, type, 1);
curseg->alloc_type = LFS;
if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
curseg->fragment_remained_chunk =
prandom_u32() % sbi->max_fragment_chunk + 1;
}
static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@@ -2707,12 +2732,22 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi,
static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
struct curseg_info *seg)
{
if (seg->alloc_type == SSR)
if (seg->alloc_type == SSR) {
seg->next_blkoff =
__next_free_blkoff(sbi, seg->segno,
seg->next_blkoff + 1);
else
} else {
seg->next_blkoff++;
if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) {
/* To allocate block chunks in different sizes, use random number */
if (--seg->fragment_remained_chunk <= 0) {
seg->fragment_remained_chunk =
prandom_u32() % sbi->max_fragment_chunk + 1;
seg->next_blkoff +=
prandom_u32() % sbi->max_fragment_hole + 1;
}
}
}
}
bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
@@ -3485,18 +3520,18 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
up_read(&SM_I(sbi)->curseg_lock);
}
static void update_device_state(struct f2fs_io_info *fio)
void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
block_t blkaddr, unsigned int blkcnt)
{
struct f2fs_sb_info *sbi = fio->sbi;
unsigned int devidx;
if (!f2fs_is_multi_device(sbi))
return;
devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
while (1) {
unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
/* update device state for fsync */
f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
/* update device state for checkpoint */
if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
@@ -3504,6 +3539,12 @@ static void update_device_state(struct f2fs_io_info *fio)
f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
spin_unlock(&sbi->dev_lock);
}
if (blkcnt <= blks)
break;
blkcnt -= blks;
blkaddr += blks;
}
}
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
@@ -3529,7 +3570,7 @@ reallocate:
goto reallocate;
}
update_device_state(fio);
f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
if (keep_order)
up_read(&fio->sbi->io_order_lock);
@@ -3611,6 +3652,9 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
goto drop_bio;
}
invalidate_mapping_pages(META_MAPPING(sbi),
fio->new_blkaddr, fio->new_blkaddr);
stat_inc_inplace_blocks(fio->sbi);
if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE)))
@@ -3618,7 +3662,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
else
err = f2fs_submit_page_bio(fio);
if (!err) {
update_device_state(fio);
f2fs_update_device_state(fio->sbi, fio->ino,
fio->new_blkaddr, 1);
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
}

View File

@@ -314,6 +314,7 @@ struct curseg_info {
unsigned short next_blkoff; /* next block offset to write */
unsigned int zone; /* current zone number */
unsigned int next_segno; /* preallocated segment */
int fragment_remained_chunk; /* remained block size in a chunk for block fragmentation mode */
bool inited; /* indicate inmem log is inited */
};

View File

@@ -58,6 +58,7 @@ const char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_DISCARD] = "discard error",
[FAULT_WRITE_IO] = "write IO error",
[FAULT_SLAB_ALLOC] = "slab alloc",
[FAULT_DQUOT_INIT] = "dquot initialize",
};
void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -817,6 +818,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
} else if (!strcmp(name, "lfs")) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
} else if (!strcmp(name, "fragment:segment")) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG;
} else if (!strcmp(name, "fragment:block")) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK;
} else {
kfree(name);
return -EINVAL;
@@ -1896,6 +1901,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, "adaptive");
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS)
seq_puts(seq, "lfs");
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG)
seq_puts(seq, "fragment:segment");
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
seq_puts(seq, "fragment:block");
seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
if (test_opt(sbi, RESERVE_ROOT))
seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
@@ -2491,6 +2500,16 @@ retry:
return len - towrite;
}
int f2fs_dquot_initialize(struct inode *inode)
{
if (time_to_inject(F2FS_I_SB(inode), FAULT_DQUOT_INIT)) {
f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_DQUOT_INIT);
return -ESRCH;
}
return dquot_initialize(inode);
}
static struct dquot **f2fs_get_dquots(struct inode *inode)
{
return F2FS_I(inode)->i_dquot;
@@ -2875,6 +2894,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
.get_nextdqblk = dquot_get_next_dqblk,
};
#else
int f2fs_dquot_initialize(struct inode *inode)
{
return 0;
}
int f2fs_quota_sync(struct super_block *sb, int type)
{
return 0;
@@ -2976,7 +3000,6 @@ static const struct fscrypt_operations f2fs_cryptops = {
.set_context = f2fs_set_context,
.get_dummy_policy = f2fs_get_dummy_policy,
.empty_dir = f2fs_empty_dir,
.max_namelen = F2FS_NAME_LEN,
.has_stable_inodes = f2fs_has_stable_inodes,
.get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits,
.get_num_devices = f2fs_get_num_devices,
@@ -3523,6 +3546,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
sbi->migration_granularity = sbi->segs_per_sec;
sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -3747,6 +3772,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
unsigned int max_devices = MAX_DEVICES;
unsigned int logical_blksize;
int i;
/* Initialize single device information */
@@ -3767,6 +3793,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
if (!sbi->devs)
return -ENOMEM;
logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
sbi->aligned_blksize = true;
for (i = 0; i < max_devices; i++) {
if (i > 0 && !RDEV(i).path[0])
@@ -3803,6 +3832,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
/* to release errored devices */
sbi->s_ndevs = i + 1;
if (logical_blksize != bdev_logical_block_size(FDEV(i).bdev))
sbi->aligned_blksize = false;
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
!f2fs_sb_has_blkzoned(sbi)) {

View File

@@ -196,7 +196,7 @@ static ssize_t encoding_show(struct f2fs_attr *a,
struct super_block *sb = sbi->sb;
if (f2fs_sb_has_casefold(sbi))
return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n",
return sysfs_emit(buf, "%s (%d.%d.%d)\n",
sb->s_encoding->charset,
(sb->s_encoding->version >> 16) & 0xff,
(sb->s_encoding->version >> 8) & 0xff,
@@ -245,7 +245,7 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a,
static ssize_t main_blkaddr_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%llu\n",
return sysfs_emit(buf, "%llu\n",
(unsigned long long)MAIN_BLKADDR(sbi));
}
@@ -551,6 +551,22 @@ out:
return count;
}
if (!strcmp(a->attr.name, "max_fragment_chunk")) {
if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE)
sbi->max_fragment_chunk = t;
else
return -EINVAL;
return count;
}
if (!strcmp(a->attr.name, "max_fragment_hole")) {
if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE)
sbi->max_fragment_hole = t;
else
return -EINVAL;
return count;
}
*ui = (unsigned int)t;
return count;
@@ -781,6 +797,8 @@ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -859,6 +877,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(seq_file_ra_mul),
ATTR_LIST(gc_segment_mode),
ATTR_LIST(gc_reclaimed_segments),
ATTR_LIST(max_fragment_chunk),
ATTR_LIST(max_fragment_hole),
NULL,
};
ATTRIBUTE_GROUPS(f2fs);

View File

@@ -136,7 +136,7 @@ static int f2fs_begin_enable_verity(struct file *filp)
* here and not rely on ->open() doing it. This must be done before
* evicting the inline data.
*/
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
return err;

View File

@@ -773,7 +773,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
return err;

View File

@@ -82,5 +82,4 @@ const struct fscrypt_operations ubifs_crypt_operations = {
.get_context = ubifs_crypt_get_context,
.set_context = ubifs_crypt_set_context,
.empty_dir = ubifs_crypt_empty_dir,
.max_namelen = UBIFS_MAX_NLEN,
};

View File

@@ -118,9 +118,6 @@ struct fscrypt_operations {
*/
bool (*empty_dir)(struct inode *inode);
/* The filesystem's maximum ciphertext filename length, in bytes */
unsigned int max_namelen;
/*
* Check whether the filesystem's inode numbers and UUID are stable,
* meaning that they will never be changed even by offline operations

View File

@@ -233,6 +233,112 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s);
*/
XZ_EXTERN void xz_dec_end(struct xz_dec *s);
/*
* Decompressor for MicroLZMA, an LZMA variant with a very minimal header.
* See xz_dec_microlzma_alloc() below for details.
*
* These functions aren't used or available in preboot code and thus aren't
* marked with XZ_EXTERN. This avoids warnings about static functions that
* are never defined.
*/
/**
* struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state
*/
struct xz_dec_microlzma;
/**
* xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder
* @mode XZ_SINGLE or XZ_PREALLOC
* @dict_size LZMA dictionary size. This must be at least 4 KiB and
* at most 3 GiB.
*
* In contrast to xz_dec_init(), this function only allocates the memory
* and remembers the dictionary size. xz_dec_microlzma_reset() must be used
* before calling xz_dec_microlzma_run().
*
* The amount of allocated memory is a little less than 30 KiB with XZ_SINGLE.
* With XZ_PREALLOC also a dictionary buffer of dict_size bytes is allocated.
*
* On success, xz_dec_microlzma_alloc() returns a pointer to
* struct xz_dec_microlzma. If memory allocation fails or
* dict_size is invalid, NULL is returned.
*
* The compressed format supported by this decoder is a raw LZMA stream
* whose first byte (always 0x00) has been replaced with bitwise-negation
* of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is
* 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00.
* Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream
* marker must not be used. The unused values are reserved for future use.
* This MicroLZMA header format was created for use in EROFS but may be used
* by others too.
*/
extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
uint32_t dict_size);
/**
* xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state
* @s Decoder state allocated using xz_dec_microlzma_alloc()
* @comp_size Compressed size of the input stream
* @uncomp_size Uncompressed size of the input stream. A value smaller
* than the real uncompressed size of the input stream can
* be specified if uncomp_size_is_exact is set to false.
* uncomp_size can never be set to a value larger than the
* expected real uncompressed size because it would eventually
* result in XZ_DATA_ERROR.
* @uncomp_size_is_exact This is an int instead of bool to avoid
* requiring stdbool.h. This should normally be set to true.
* When this is set to false, error detection is weaker.
*/
extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s,
uint32_t comp_size, uint32_t uncomp_size,
int uncomp_size_is_exact);
/**
* xz_dec_microlzma_run() - Run the MicroLZMA decoder
* @s Decoder state initialized using xz_dec_microlzma_reset()
* @b: Input and output buffers
*
* This works similarly to xz_dec_run() with a few important differences.
* Only the differences are documented here.
*
* The only possible return values are XZ_OK, XZ_STREAM_END, and
* XZ_DATA_ERROR. This function cannot return XZ_BUF_ERROR: if no progress
* is possible due to lack of input data or output space, this function will
* keep returning XZ_OK. Thus, the calling code must be written so that it
* will eventually provide input and output space matching (or exceeding)
* comp_size and uncomp_size arguments given to xz_dec_microlzma_reset().
* If the caller cannot do this (for example, if the input file is truncated
* or otherwise corrupt), the caller must detect this error by itself to
* avoid an infinite loop.
*
* If the compressed data seems to be corrupt, XZ_DATA_ERROR is returned.
* This can happen also when incorrect dictionary, uncompressed, or
* compressed sizes have been specified.
*
* With XZ_PREALLOC only: As an extra feature, b->out may be NULL to skip over
* uncompressed data. This way the caller doesn't need to provide a temporary
* output buffer for the bytes that will be ignored.
*
* With XZ_SINGLE only: In contrast to xz_dec_run(), the return value XZ_OK
* is also possible and thus XZ_SINGLE is actually a limited multi-call mode.
* After XZ_OK the bytes decoded so far may be read from the output buffer.
* It is possible to continue decoding but the variables b->out and b->out_pos
* MUST NOT be changed by the caller. Increasing the value of b->out_size is
* allowed to make more output space available; one doesn't need to provide
* space for the whole uncompressed data on the first call. The input buffer
* may be changed normally like with XZ_PREALLOC. This way input data can be
* provided from non-contiguous memory.
*/
extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s,
struct xz_buf *b);
/**
* xz_dec_microlzma_end() - Free the memory allocated for the decoder state
* @s: Decoder state allocated using xz_dec_microlzma_alloc().
* If s is NULL, this function does nothing.
*/
extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s);
/*
* Standalone build (userspace build or in-kernel build for boot time use)
* needs a CRC32 implementation. For normal in-kernel use, kernel's own

View File

@@ -24,7 +24,7 @@ struct erofs_map_blocks;
#define show_mflags(flags) __print_flags(flags, "", \
{ EROFS_MAP_MAPPED, "M" }, \
{ EROFS_MAP_META, "I" }, \
{ EROFS_MAP_ZIPPED, "Z" })
{ EROFS_MAP_ENCODED, "E" })
TRACE_EVENT(erofs_lookup,

View File

@@ -570,9 +570,10 @@ TRACE_EVENT(f2fs_file_write_iter,
);
TRACE_EVENT(f2fs_map_blocks,
TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret),
TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map,
int create, int flag, int ret),
TP_ARGS(inode, map, ret),
TP_ARGS(inode, map, create, flag, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -583,11 +584,14 @@ TRACE_EVENT(f2fs_map_blocks,
__field(unsigned int, m_flags)
__field(int, m_seg_type)
__field(bool, m_may_create)
__field(bool, m_multidev_dio)
__field(int, create)
__field(int, flag)
__field(int, ret)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->dev = map->m_bdev->bd_dev;
__entry->ino = inode->i_ino;
__entry->m_lblk = map->m_lblk;
__entry->m_pblk = map->m_pblk;
@@ -595,12 +599,16 @@ TRACE_EVENT(f2fs_map_blocks,
__entry->m_flags = map->m_flags;
__entry->m_seg_type = map->m_seg_type;
__entry->m_may_create = map->m_may_create;
__entry->m_multidev_dio = map->m_multidev_dio;
__entry->create = create;
__entry->flag = flag;
__entry->ret = ret;
),
TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, "
"start blkaddr = 0x%llx, len = 0x%llx, flags = %u, "
"seg_type = %d, may_create = %d, err = %d",
"seg_type = %d, may_create = %d, multidevice = %d, "
"create = %d, flag = %d, err = %d",
show_dev_ino(__entry),
(unsigned long long)__entry->m_lblk,
(unsigned long long)__entry->m_pblk,
@@ -608,6 +616,9 @@ TRACE_EVENT(f2fs_map_blocks,
__entry->m_flags,
__entry->m_seg_type,
__entry->m_may_create,
__entry->m_multidev_dio,
__entry->create,
__entry->flag,
__entry->ret)
);

View File

@@ -20,8 +20,8 @@
*
* The worst case for in-place decompression is that the beginning of
* the file is compressed extremely well, and the rest of the file is
* uncompressible. Thus, we must look for worst-case expansion when the
* compressor is encoding uncompressible data.
* incompressible. Thus, we must look for worst-case expansion when the
* compressor is encoding incompressible data.
*
* The structure of the .xz file in case of a compressed kernel is as follows.
* Sizes (as bytes) of the fields are in parenthesis.
@@ -58,7 +58,7 @@
* uncompressed size of the payload is in practice never less than the
* payload size itself. The LZMA2 format would allow uncompressed size
* to be less than the payload size, but no sane compressor creates such
* files. LZMA2 supports storing uncompressible data in uncompressed form,
* files. LZMA2 supports storing incompressible data in uncompressed form,
* so there's never a need to create payloads whose uncompressed size is
* smaller than the compressed size.
*

View File

@@ -39,6 +39,19 @@ config XZ_DEC_SPARC
default y
select XZ_DEC_BCJ
config XZ_DEC_MICROLZMA
bool "MicroLZMA decoder"
default n
help
MicroLZMA is a header format variant where the first byte
of a raw LZMA stream (without the end of stream marker) has
been replaced with a bitwise-negation of the lc/lp/pb
properties byte. MicroLZMA was created to be used in EROFS
but can be used by other things too where wasting minimal
amount of space for headers is important.
Unless you know that you need this, say N.
endif
config XZ_DEC_BCJ

View File

@@ -248,6 +248,10 @@ struct lzma2_dec {
* before the first LZMA chunk.
*/
bool need_props;
#ifdef XZ_DEC_MICROLZMA
bool pedantic_microlzma;
#endif
};
struct xz_dec_lzma2 {
@@ -419,6 +423,12 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
}
}
#ifdef XZ_DEC_MICROLZMA
# define DICT_FLUSH_SUPPORTS_SKIPPING true
#else
# define DICT_FLUSH_SUPPORTS_SKIPPING false
#endif
/*
* Flush pending data from dictionary to b->out. It is assumed that there is
* enough space in b->out. This is guaranteed because caller uses dict_limit()
@@ -437,7 +447,12 @@ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
* decompression because in multi-call mode dict->buf
* has been allocated by us in this file; it's not
* provided by the caller like in single-call mode.
*
* With MicroLZMA, b->out can be NULL to skip bytes that
* the caller doesn't need. This cannot be done with XZ
* because it would break BCJ filters.
*/
if (!DICT_FLUSH_SUPPORTS_SKIPPING || b->out != NULL)
memcpy(b->out + b->out_pos, dict->buf + dict->start,
copy_size);
}
@@ -505,7 +520,7 @@ static __always_inline void rc_normalize(struct rc_dec *rc)
* functions so that the compiler is supposed to be able to more easily avoid
* an extra branch. In this particular version of the LZMA decoder, this
* doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
* on x86). Using a non-splitted version results in nicer looking code too.
* on x86). Using a non-split version results in nicer looking code too.
*
* NOTE: This must return an int. Do not make it return a bool or the speed
* of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
@@ -791,6 +806,7 @@ static void lzma_reset(struct xz_dec_lzma2 *s)
s->lzma.rep1 = 0;
s->lzma.rep2 = 0;
s->lzma.rep3 = 0;
s->lzma.len = 0;
/*
* All probabilities are initialized to the same value. This hack
@@ -1174,8 +1190,6 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
}
}
s->lzma.len = 0;
s->lzma2.sequence = SEQ_CONTROL;
s->lzma2.need_dict_reset = true;
@@ -1191,3 +1205,140 @@ XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
kfree(s);
}
#ifdef XZ_DEC_MICROLZMA
/* This is a wrapper struct to have a nice struct name in the public API. */
struct xz_dec_microlzma {
struct xz_dec_lzma2 s;
};
enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr,
struct xz_buf *b)
{
struct xz_dec_lzma2 *s = &s_ptr->s;
/*
* sequence is SEQ_PROPERTIES before the first input byte,
* SEQ_LZMA_PREPARE until a total of five bytes have been read,
* and SEQ_LZMA_RUN for the rest of the input stream.
*/
if (s->lzma2.sequence != SEQ_LZMA_RUN) {
if (s->lzma2.sequence == SEQ_PROPERTIES) {
/* One byte is needed for the props. */
if (b->in_pos >= b->in_size)
return XZ_OK;
/*
* Don't increment b->in_pos here. The same byte is
* also passed to rc_read_init() which will ignore it.
*/
if (!lzma_props(s, ~b->in[b->in_pos]))
return XZ_DATA_ERROR;
s->lzma2.sequence = SEQ_LZMA_PREPARE;
}
/*
* xz_dec_microlzma_reset() doesn't validate the compressed
* size so we do it here. We have to limit the maximum size
* to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice
* round number and much more than users of this code should
* ever need.
*/
if (s->lzma2.compressed < RC_INIT_BYTES
|| s->lzma2.compressed > (3U << 30))
return XZ_DATA_ERROR;
if (!rc_read_init(&s->rc, b))
return XZ_OK;
s->lzma2.compressed -= RC_INIT_BYTES;
s->lzma2.sequence = SEQ_LZMA_RUN;
dict_reset(&s->dict, b);
}
/* This is to allow increasing b->out_size between calls. */
if (DEC_IS_SINGLE(s->dict.mode))
s->dict.end = b->out_size - b->out_pos;
while (true) {
dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos,
s->lzma2.uncompressed));
if (!lzma2_lzma(s, b))
return XZ_DATA_ERROR;
s->lzma2.uncompressed -= dict_flush(&s->dict, b);
if (s->lzma2.uncompressed == 0) {
if (s->lzma2.pedantic_microlzma) {
if (s->lzma2.compressed > 0 || s->lzma.len > 0
|| !rc_is_finished(&s->rc))
return XZ_DATA_ERROR;
}
return XZ_STREAM_END;
}
if (b->out_pos == b->out_size)
return XZ_OK;
if (b->in_pos == b->in_size
&& s->temp.size < s->lzma2.compressed)
return XZ_OK;
}
}
struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
uint32_t dict_size)
{
struct xz_dec_microlzma *s;
/* Restrict dict_size to the same range as in the LZMA2 code. */
if (dict_size < 4096 || dict_size > (3U << 30))
return NULL;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (s == NULL)
return NULL;
s->s.dict.mode = mode;
s->s.dict.size = dict_size;
if (DEC_IS_MULTI(mode)) {
s->s.dict.end = dict_size;
s->s.dict.buf = vmalloc(dict_size);
if (s->s.dict.buf == NULL) {
kfree(s);
return NULL;
}
}
return s;
}
void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size,
uint32_t uncomp_size, int uncomp_size_is_exact)
{
/*
* comp_size is validated in xz_dec_microlzma_run().
* uncomp_size can safely be anything.
*/
s->s.lzma2.compressed = comp_size;
s->s.lzma2.uncompressed = uncomp_size;
s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact;
s->s.lzma2.sequence = SEQ_PROPERTIES;
s->s.temp.size = 0;
}
void xz_dec_microlzma_end(struct xz_dec_microlzma *s)
{
if (DEC_IS_MULTI(s->s.dict.mode))
vfree(s->s.dict.buf);
kfree(s);
}
#endif

View File

@@ -15,8 +15,15 @@ EXPORT_SYMBOL(xz_dec_reset);
EXPORT_SYMBOL(xz_dec_run);
EXPORT_SYMBOL(xz_dec_end);
#ifdef CONFIG_XZ_DEC_MICROLZMA
EXPORT_SYMBOL(xz_dec_microlzma_alloc);
EXPORT_SYMBOL(xz_dec_microlzma_reset);
EXPORT_SYMBOL(xz_dec_microlzma_run);
EXPORT_SYMBOL(xz_dec_microlzma_end);
#endif
MODULE_DESCRIPTION("XZ decompressor");
MODULE_VERSION("1.0");
MODULE_VERSION("1.1");
MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org> and Igor Pavlov");
/*

View File

@@ -37,6 +37,9 @@
# ifdef CONFIG_XZ_DEC_SPARC
# define XZ_DEC_SPARC
# endif
# ifdef CONFIG_XZ_DEC_MICROLZMA
# define XZ_DEC_MICROLZMA
# endif
# define memeq(a, b, size) (memcmp(a, b, size) == 0)
# define memzero(buf, size) memset(buf, 0, size)
# endif