Files
kernel_arpi/kernel/time/timekeeping.c
Greg Kroah-Hartman 47c7e57022 Merge 5.15.61 into android14-5.15
Changes in 5.15.61
        Makefile: link with -z noexecstack --no-warn-rwx-segments
        x86: link vdso and boot with -z noexecstack --no-warn-rwx-segments
        Revert "pNFS: nfs3_set_ds_client should set NFS_CS_NOPING"
        scsi: Revert "scsi: qla2xxx: Fix disk failure to rediscover"
        pNFS/flexfiles: Report RDMA connection errors to the server
        NFSD: Clean up the show_nf_flags() macro
        nfsd: eliminate the NFSD_FILE_BREAK_* flags
        ALSA: usb-audio: Add quirk for Behringer UMC202HD
        ALSA: bcd2000: Fix a UAF bug on the error path of probing
        ALSA: hda/realtek: Add quirk for Clevo NV45PZ
        ALSA: hda/realtek: Add quirk for HP Spectre x360 15-eb0xxx
        wifi: mac80211_hwsim: fix race condition in pending packet
        wifi: mac80211_hwsim: add back erroneously removed cast
        wifi: mac80211_hwsim: use 32-bit skb cookie
        add barriers to buffer_uptodate and set_buffer_uptodate
        lockd: detect and reject lock arguments that overflow
        HID: hid-input: add Surface Go battery quirk
        HID: wacom: Only report rotation for art pen
        HID: wacom: Don't register pad_input for touch switch
        KVM: nVMX: Snapshot pre-VM-Enter BNDCFGS for !nested_run_pending case
        KVM: nVMX: Snapshot pre-VM-Enter DEBUGCTL for !nested_run_pending case
        KVM: SVM: Don't BUG if userspace injects an interrupt with GIF=0
        KVM: s390: pv: don't present the ecall interrupt twice
        KVM: x86: Split kvm_is_valid_cr4() and export only the non-vendor bits
        KVM: nVMX: Let userspace set nVMX MSR to any _host_ supported value
        KVM: nVMX: Account for KVM reserved CR4 bits in consistency checks
        KVM: nVMX: Inject #UD if VMXON is attempted with incompatible CR0/CR4
        KVM: x86: Mark TSS busy during LTR emulation _after_ all fault checks
        KVM: x86: Set error code to segment selector on LLDT/LTR non-canonical #GP
        KVM: nVMX: Always enable TSC scaling for L2 when it was enabled for L1
        KVM: x86: Tag kvm_mmu_x86_module_init() with __init
        KVM: x86: do not report preemption if the steal time cache is stale
        KVM: x86: revalidate steal time cache if MSR value changes
        riscv: set default pm_power_off to NULL
        ALSA: hda/conexant: Add quirk for LENOVO 20149 Notebook model
        ALSA: hda/cirrus - support for iMac 12,1 model
        ALSA: hda/realtek: Add quirk for another Asus K42JZ model
        ALSA: hda/realtek: Add a quirk for HP OMEN 15 (8786) mute LED
        tty: vt: initialize unicode screen buffer
        vfs: Check the truncate maximum size in inode_newsize_ok()
        fs: Add missing umask strip in vfs_tmpfile
        thermal: sysfs: Fix cooling_device_stats_setup() error code path
        fbcon: Fix boundary checks for fbcon=vc:n1-n2 parameters
        fbcon: Fix accelerated fbdev scrolling while logo is still shown
        usbnet: Fix linkwatch use-after-free on disconnect
        fix short copy handling in copy_mc_pipe_to_iter()
        crypto: ccp - Use kzalloc for sev ioctl interfaces to prevent kernel memory leak
        ovl: drop WARN_ON() dentry is NULL in ovl_encode_fh()
        parisc: Fix device names in /proc/iomem
        parisc: Drop pa_swapper_pg_lock spinlock
        parisc: Check the return value of ioremap() in lba_driver_probe()
        parisc: io_pgetevents_time64() needs compat syscall in 32-bit compat mode
        riscv:uprobe fix SR_SPIE set/clear handling
        dt-bindings: riscv: fix SiFive l2-cache's cache-sets
        RISC-V: kexec: Fixup use of smp_processor_id() in preemptible context
        RISC-V: Fixup get incorrect user mode PC for kernel mode regs
        RISC-V: Fixup schedule out issue in machine_crash_shutdown()
        RISC-V: Add modules to virtual kernel memory layout dump
        rtc: rx8025: fix 12/24 hour mode detection on RX-8035
        drm/gem: Properly annotate WW context on drm_gem_lock_reservations() error
        drm/shmem-helper: Add missing vunmap on error
        drm/vc4: hdmi: Disable audio if dmas property is present but empty
        drm/hyperv-drm: Include framebuffer and EDID headers
        drm/nouveau: fix another off-by-one in nvbios_addr
        drm/nouveau: Don't pm_runtime_put_sync(), only pm_runtime_put_autosuspend()
        drm/nouveau/acpi: Don't print error when we get -EINPROGRESS from pm_runtime
        drm/nouveau/kms: Fix failure path for creating DP connectors
        drm/amdgpu: Check BO's requested pinning domains against its preferred_domains
        drm/amdgpu: fix check in fbdev init
        bpf: Fix KASAN use-after-free Read in compute_effective_progs
        btrfs: reject log replay if there is unsupported RO compat flag
        mtd: rawnand: arasan: Fix clock rate in NV-DDR
        mtd: rawnand: arasan: Update NAND bus clock instead of system clock
        um: Remove straying parenthesis
        um: seed rng using host OS rng
        iio: fix iio_format_avail_range() printing for none IIO_VAL_INT
        iio: light: isl29028: Fix the warning in isl29028_remove()
        scsi: sg: Allow waiting for commands to complete on removed device
        scsi: qla2xxx: Fix incorrect display of max frame size
        scsi: qla2xxx: Zero undefined mailbox IN registers
        soundwire: qcom: Check device status before reading devid
        ksmbd: fix memory leak in smb2_handle_negotiate
        ksmbd: prevent out of bound read for SMB2_TREE_CONNNECT
        ksmbd: fix use-after-free bug in smb2_tree_disconect
        fuse: limit nsec
        fuse: ioctl: translate ENOSYS
        serial: mvebu-uart: uart2 error bits clearing
        md-raid: destroy the bitmap after destroying the thread
        md-raid10: fix KASAN warning
        mbcache: don't reclaim used entries
        mbcache: add functions to delete entry if unused
        media: [PATCH] pci: atomisp_cmd: fix three missing checks on list iterator
        ia64, processor: fix -Wincompatible-pointer-types in ia64_get_irr()
        PCI: Add defines for normal and subtractive PCI bridges
        powerpc/fsl-pci: Fix Class Code of PCIe Root Port
        powerpc/ptdump: Fix display of RW pages on FSL_BOOK3E
        powerpc/powernv: Avoid crashing if rng is NULL
        MIPS: cpuinfo: Fix a warning for CONFIG_CPUMASK_OFFSTACK
        coresight: Clear the connection field properly
        usb: typec: ucsi: Acknowledge the GET_ERROR_STATUS command completion
        USB: HCD: Fix URB giveback issue in tasklet function
        ARM: dts: uniphier: Fix USB interrupts for PXs2 SoC
        arm64: dts: uniphier: Fix USB interrupts for PXs3 SoC
        usb: dwc3: gadget: refactor dwc3_repare_one_trb
        usb: dwc3: gadget: fix high speed multiplier setting
        netfilter: nf_tables: do not allow SET_ID to refer to another table
        netfilter: nf_tables: do not allow CHAIN_ID to refer to another table
        netfilter: nf_tables: do not allow RULE_ID to refer to another chain
        netfilter: nf_tables: fix null deref due to zeroed list head
        epoll: autoremove wakers even more aggressively
        x86: Handle idle=nomwait cmdline properly for x86_idle
        arch: make TRACE_IRQFLAGS_NMI_SUPPORT generic
        arm64: Do not forget syscall when starting a new thread.
        arm64: fix oops in concurrently setting insn_emulation sysctls
        arm64: kasan: Revert "arm64: mte: reset the page tag in page->flags"
        ext2: Add more validity checks for inode counts
        sched/fair: Introduce SIS_UTIL to search idle CPU based on sum of util_avg
        genirq: Don't return error on missing optional irq_request_resources()
        irqchip/mips-gic: Only register IPI domain when SMP is enabled
        genirq: GENERIC_IRQ_IPI depends on SMP
        sched/core: Always flush pending blk_plug
        irqchip/mips-gic: Check the return value of ioremap() in gic_of_init()
        wait: Fix __wait_event_hrtimeout for RT/DL tasks
        ARM: dts: imx6ul: add missing properties for sram
        ARM: dts: imx6ul: change operating-points to uint32-matrix
        ARM: dts: imx6ul: fix keypad compatible
        ARM: dts: imx6ul: fix csi node compatible
        ARM: dts: imx6ul: fix lcdif node compatible
        ARM: dts: imx6ul: fix qspi node compatible
        ARM: dts: BCM5301X: Add DT for Meraki MR26
        ARM: dts: ux500: Fix Codina accelerometer mounting matrix
        ARM: dts: ux500: Fix Gavini accelerometer mounting matrix
        spi: synquacer: Add missing clk_disable_unprepare()
        ARM: OMAP2+: display: Fix refcount leak bug
        ARM: OMAP2+: pdata-quirks: Fix refcount leak bug
        ACPI: EC: Remove duplicate ThinkPad X1 Carbon 6th entry from DMI quirks
        ACPI: EC: Drop the EC_FLAGS_IGNORE_DSDT_GPE quirk
        ACPI: PM: save NVS memory for Lenovo G40-45
        ACPI: LPSS: Fix missing check in register_device_clock()
        ARM: dts: qcom: sdx55: Fix the IRQ trigger type for UART
        arm64: dts: qcom: ipq8074: fix NAND node name
        arm64: dts: allwinner: a64: orangepi-win: Fix LED node name
        ARM: shmobile: rcar-gen2: Increase refcount for new reference
        firmware: tegra: Fix error check return value of debugfs_create_file()
        hwmon: (dell-smm) Add Dell XPS 13 7390 to fan control whitelist
        hwmon: (sht15) Fix wrong assumptions in device remove callback
        PM: hibernate: defer device probing when resuming from hibernation
        selinux: fix memleak in security_read_state_kernel()
        selinux: Add boundary check in put_entry()
        kasan: test: Silence GCC 12 warnings
        drm/amdgpu: Remove one duplicated ef removal
        powerpc/64s: Disable stack variable initialisation for prom_init
        spi: spi-rspi: Fix PIO fallback on RZ platforms
        ARM: findbit: fix overflowing offset
        meson-mx-socinfo: Fix refcount leak in meson_mx_socinfo_init
        arm64: dts: renesas: beacon: Fix regulator node names
        spi: spi-altera-dfl: Fix an error handling path
        ARM: bcm: Fix refcount leak in bcm_kona_smc_init
        ACPI: processor/idle: Annotate more functions to live in cpuidle section
        ARM: dts: imx7d-colibri-emmc: add cpu1 supply
        soc: renesas: r8a779a0-sysc: Fix A2DP1 and A2CV[2357] PDR values
        scsi: hisi_sas: Use managed PCI functions
        dt-bindings: iio: accel: Add DT binding doc for ADXL355
        soc: amlogic: Fix refcount leak in meson-secure-pwrc.c
        arm64: dts: renesas: Fix thermal-sensors on single-zone sensors
        x86/pmem: Fix platform-device leak in error path
        ARM: dts: ast2500-evb: fix board compatible
        ARM: dts: ast2600-evb: fix board compatible
        ARM: dts: ast2600-evb-a1: fix board compatible
        arm64: dts: mt8192: Fix idle-states nodes naming scheme
        arm64: dts: mt8192: Fix idle-states entry-method
        arm64: select TRACE_IRQFLAGS_NMI_SUPPORT
        arm64: cpufeature: Allow different PMU versions in ID_DFR0_EL1
        locking/lockdep: Fix lockdep_init_map_*() confusion
        arm64: dts: qcom: sc7180: Remove ipa_fw_mem node on trogdor
        soc: fsl: guts: machine variable might be unset
        block: fix infinite loop for invalid zone append
        ARM: dts: qcom: mdm9615: add missing PMIC GPIO reg
        ARM: OMAP2+: Fix refcount leak in omapdss_init_of
        ARM: OMAP2+: Fix refcount leak in omap3xxx_prm_late_init
        arm64: dts: qcom: sdm630: disable GPU by default
        arm64: dts: qcom: sdm630: fix the qusb2phy ref clock
        arm64: dts: qcom: sdm630: fix gpu's interconnect path
        arm64: dts: qcom: sdm636-sony-xperia-ganges-mermaid: correct sdc2 pinconf
        cpufreq: zynq: Fix refcount leak in zynq_get_revision
        regulator: qcom_smd: Fix pm8916_pldo range
        ACPI: APEI: Fix _EINJ vs EFI_MEMORY_SP
        ARM: dts: qcom-msm8974: fix irq type on blsp2_uart1
        soc: qcom: ocmem: Fix refcount leak in of_get_ocmem
        soc: qcom: aoss: Fix refcount leak in qmp_cooling_devices_register
        ARM: dts: qcom: pm8841: add required thermal-sensor-cells
        bus: hisi_lpc: fix missing platform_device_put() in hisi_lpc_acpi_probe()
        stack: Declare {randomize_,}kstack_offset to fix Sparse warnings
        arm64: dts: qcom: msm8916: Fix typo in pronto remoteproc node
        ACPI: APEI: explicit init of HEST and GHES in apci_init()
        drivers/iio: Remove all strcpy() uses
        ACPI: VIOT: Fix ACS setup
        arm64: dts: qcom: sm6125: Move sdc2 pinctrl from seine-pdx201 to sm6125
        arm64: dts: qcom: sm6125: Append -state suffix to pinctrl nodes
        arm64: dts: qcom: sm8250: add missing PCIe PHY clock-cells
        arm64: dts: mt7622: fix BPI-R64 WPS button
        arm64: tegra: Fixup SYSRAM references
        arm64: tegra: Update Tegra234 BPMP channel addresses
        arm64: tegra: Mark BPMP channels as no-memory-wc
        arm64: tegra: Fix SDMMC1 CD on P2888
        erofs: avoid consecutive detection for Highmem memory
        blk-mq: don't create hctx debugfs dir until q->debugfs_dir is created
        spi: Fix simplification of devm_spi_register_controller
        spi: tegra20-slink: fix UAF in tegra_slink_remove()
        hwmon: (drivetemp) Add module alias
        blktrace: Trace remapped requests correctly
        PM: domains: Ensure genpd_debugfs_dir exists before remove
        dm writecache: return void from functions
        dm writecache: count number of blocks read, not number of read bios
        dm writecache: count number of blocks written, not number of write bios
        dm writecache: count number of blocks discarded, not number of discard bios
        regulator: of: Fix refcount leak bug in of_get_regulation_constraints()
        soc: qcom: Make QCOM_RPMPD depend on PM
        arm64: dts: qcom: qcs404: Fix incorrect USB2 PHYs assignment
        irqdomain: Report irq number for NOMAP domains
        drivers/perf: arm_spe: Fix consistency of SYS_PMSCR_EL1.CX
        nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt()
        x86/extable: Fix ex_handler_msr() print condition
        selftests/seccomp: Fix compile warning when CC=clang
        thermal/tools/tmon: Include pthread and time headers in tmon.h
        dm: return early from dm_pr_call() if DM device is suspended
        pwm: sifive: Simplify offset calculation for PWMCMP registers
        pwm: sifive: Ensure the clk is enabled exactly once per running PWM
        pwm: sifive: Shut down hardware only after pwmchip_remove() completed
        pwm: lpc18xx-sct: Reduce number of devm memory allocations
        pwm: lpc18xx-sct: Simplify driver by not using pwm_[gs]et_chip_data()
        pwm: lpc18xx: Fix period handling
        drm/dp: Export symbol / kerneldoc fixes for DP AUX bus
        drm/bridge: tc358767: Move (e)DP bridge endpoint parsing into dedicated function
        ath10k: do not enforce interrupt trigger type
        drm/st7735r: Fix module autoloading for Okaya RH128128T
        drm/panel: Fix build error when CONFIG_DRM_PANEL_SAMSUNG_ATNA33XC20=y && CONFIG_DRM_DISPLAY_HELPER=m
        wifi: rtlwifi: fix error codes in rtl_debugfs_set_write_h2c()
        ath11k: fix netdev open race
        drm/mipi-dbi: align max_chunk to 2 in spi_transfer
        ath11k: Fix incorrect debug_mask mappings
        drm/radeon: fix potential buffer overflow in ni_set_mc_special_registers()
        drm/mediatek: Modify dsi funcs to atomic operations
        drm/mediatek: Separate poweron/poweroff from enable/disable and define new funcs
        drm/mediatek: Add pull-down MIPI operation in mtk_dsi_poweroff function
        drm/meson: encoder_hdmi: switch to bridge DRM_BRIDGE_ATTACH_NO_CONNECTOR
        drm/meson: encoder_hdmi: Fix refcount leak in meson_encoder_hdmi_init
        drm/bridge: lt9611uxc: Cancel only driver's work
        i2c: npcm: Remove own slave addresses 2:10
        i2c: npcm: Correct slave role behavior
        i2c: mxs: Silence a clang warning
        virtio-gpu: fix a missing check to avoid NULL dereference
        drm/shmem-helper: Unexport drm_gem_shmem_create_with_handle()
        drm/shmem-helper: Export dedicated wrappers for GEM object functions
        drm/shmem-helper: Pass GEM shmem object in public interfaces
        drm/virtio: Fix NULL vs IS_ERR checking in virtio_gpu_object_shmem_init
        drm: adv7511: override i2c address of cec before accessing it
        crypto: sun8i-ss - do not allocate memory when handling hash requests
        crypto: sun8i-ss - fix error codes in allocate_flows()
        net: fix sk_wmem_schedule() and sk_rmem_schedule() errors
        can: netlink: allow configuring of fixed bit rates without need for do_set_bittiming callback
        can: netlink: allow configuring of fixed data bit rates without need for do_set_data_bittiming callback
        i2c: Fix a potential use after free
        crypto: sun8i-ss - fix infinite loop in sun8i_ss_setup_ivs()
        media: atmel: atmel-sama7g5-isc: fix warning in configs without OF
        media: tw686x: Register the irq at the end of probe
        media: imx-jpeg: Correct some definition according specification
        media: imx-jpeg: Leave a blank space before the configuration data
        media: imx-jpeg: Add pm-runtime support for imx-jpeg
        media: imx-jpeg: use NV12M to represent non contiguous NV12
        media: imx-jpeg: Set V4L2_BUF_FLAG_LAST at eos
        media: imx-jpeg: Refactor function mxc_jpeg_parse
        media: imx-jpeg: Identify and handle precision correctly
        media: imx-jpeg: Handle source change in a function
        media: imx-jpeg: Support dynamic resolution change
        media: imx-jpeg: Align upwards buffer size
        media: imx-jpeg: Implement drain using v4l2-mem2mem helpers
        ath9k: fix use-after-free in ath9k_hif_usb_rx_cb
        wifi: iwlegacy: 4965: fix potential off-by-one overflow in il4965_rs_fill_link_cmd()
        drm/radeon: fix incorrrect SPDX-License-Identifiers
        rcutorture: Warn on individual rcu_torture_init() error conditions
        rcutorture: Don't cpuhp_remove_state() if cpuhp_setup_state() failed
        rcutorture: Fix ksoftirqd boosting timing and iteration
        test_bpf: fix incorrect netdev features
        crypto: ccp - During shutdown, check SEV data pointer before using
        drm: bridge: adv7511: Add check for mipi_dsi_driver_register
        media: imx-jpeg: Disable slot interrupt when frame done
        drm/mcde: Fix refcount leak in mcde_dsi_bind
        media: hdpvr: fix error value returns in hdpvr_read
        media: v4l2-mem2mem: prevent pollerr when last_buffer_dequeued is set
        media: driver/nxp/imx-jpeg: fix a unexpected return value problem
        media: tw686x: Fix memory leak in tw686x_video_init
        drm/vc4: plane: Remove subpixel positioning check
        drm/vc4: plane: Fix margin calculations for the right/bottom edges
        drm/bridge: Add a function to abstract away panels
        drm/vc4: dsi: Switch to devm_drm_of_get_bridge
        drm/vc4: Use of_device_get_match_data()
        drm/vc4: dsi: Release workaround buffer and DMA
        drm/vc4: dsi: Correct DSI divider calculations
        drm/vc4: dsi: Correct pixel order for DSI0
        drm/vc4: dsi: Register dsi0 as the correct vc4 encoder type
        drm/vc4: dsi: Fix dsi0 interrupt support
        drm/vc4: dsi: Add correct stop condition to vc4_dsi_encoder_disable iteration
        drm/vc4: hdmi: Fix HPD GPIO detection
        drm/vc4: hdmi: Avoid full hdmi audio fifo writes
        drm/vc4: hdmi: Reset HDMI MISC_CONTROL register
        drm/vc4: hdmi: Fix timings for interlaced modes
        drm/vc4: hdmi: Correct HDMI timing registers for interlaced modes
        crypto: arm64/gcm - Select AEAD for GHASH_ARM64_CE
        selftests/xsk: Destroy BPF resources only when ctx refcount drops to 0
        drm/rockchip: vop: Don't crash for invalid duplicate_state()
        drm/rockchip: Fix an error handling path rockchip_dp_probe()
        drm/mediatek: dpi: Remove output format of YUV
        drm/mediatek: dpi: Only enable dpi after the bridge is enabled
        drm: bridge: sii8620: fix possible off-by-one
        hinic: Use the bitmap API when applicable
        net: hinic: fix bug that ethtool get wrong stats
        net: hinic: avoid kernel hung in hinic_get_stats64()
        drm/msm/mdp5: Fix global state lock backoff
        crypto: hisilicon/sec - don't sleep when in softirq
        crypto: hisilicon - Kunpeng916 crypto driver don't sleep when in softirq
        media: platform: mtk-mdp: Fix mdp_ipi_comm structure alignment
        drm/msm: Avoid dirtyfb stalls on video mode displays (v2)
        drm/msm/dpu: Fix for non-visible planes
        mt76: mt76x02u: fix possible memory leak in __mt76x02u_mcu_send_msg
        mt76: mt7615: do not update pm stats in case of error
        ieee80211: add EHT 1K aggregation definitions
        mt76: mt7921: fix aggregation subframes setting to HE max
        mt76: mt7921: enlarge maximum VHT MPDU length to 11454
        mediatek: mt76: mac80211: Fix missing of_node_put() in mt76_led_init()
        mediatek: mt76: eeprom: fix missing of_node_put() in mt76_find_power_limits_node()
        skmsg: Fix invalid last sg check in sk_msg_recvmsg()
        drm/exynos/exynos7_drm_decon: free resources when clk_set_parent() failed.
        tcp: make retransmitted SKB fit into the send window
        libbpf: Fix the name of a reused map
        selftests: timers: valid-adjtimex: build fix for newer toolchains
        selftests: timers: clocksource-switch: fix passing errors from child
        bpf: Fix subprog names in stack traces.
        fs: check FMODE_LSEEK to control internal pipe splicing
        media: cedrus: h265: Fix flag name
        media: hantro: postproc: Fix motion vector space size
        media: hantro: Simplify postprocessor
        media: hevc: Embedded indexes in RPS
        media: staging: media: hantro: Fix typos
        wifi: wil6210: debugfs: fix info leak in wil_write_file_wmi()
        wifi: p54: Fix an error handling path in p54spi_probe()
        wifi: p54: add missing parentheses in p54_flush()
        selftests/bpf: fix a test for snprintf() overflow
        libbpf: fix an snprintf() overflow check
        can: pch_can: do not report txerr and rxerr during bus-off
        can: rcar_can: do not report txerr and rxerr during bus-off
        can: sja1000: do not report txerr and rxerr during bus-off
        can: hi311x: do not report txerr and rxerr during bus-off
        can: sun4i_can: do not report txerr and rxerr during bus-off
        can: kvaser_usb_hydra: do not report txerr and rxerr during bus-off
        can: kvaser_usb_leaf: do not report txerr and rxerr during bus-off
        can: usb_8dev: do not report txerr and rxerr during bus-off
        can: error: specify the values of data[5..7] of CAN error frames
        can: pch_can: pch_can_error(): initialize errc before using it
        Bluetooth: hci_intel: Add check for platform_driver_register
        i2c: cadence: Support PEC for SMBus block read
        i2c: mux-gpmux: Add of_node_put() when breaking out of loop
        wifi: wil6210: debugfs: fix uninitialized variable use in `wil_write_file_wmi()`
        wifi: iwlwifi: mvm: fix double list_add at iwl_mvm_mac_wake_tx_queue
        wifi: libertas: Fix possible refcount leak in if_usb_probe()
        media: cedrus: hevc: Add check for invalid timestamp
        net/mlx5e: Remove WARN_ON when trying to offload an unsupported TLS cipher/version
        net/mlx5e: Fix the value of MLX5E_MAX_RQ_NUM_MTTS
        net/mlx5: Adjust log_max_qp to be 18 at most
        crypto: hisilicon/hpre - don't use GFP_KERNEL to alloc mem during softirq
        crypto: inside-secure - Add missing MODULE_DEVICE_TABLE for of
        crypto: hisilicon/sec - fix auth key size error
        inet: add READ_ONCE(sk->sk_bound_dev_if) in INET_MATCH()
        ipv6: add READ_ONCE(sk->sk_bound_dev_if) in INET6_MATCH()
        net: allow unbound socket for packets in VRF when tcp_l3mdev_accept set
        netdevsim: fib: Fix reference count leak on route deletion failure
        wifi: rtw88: check the return value of alloc_workqueue()
        iavf: Fix max_rate limiting
        iavf: Fix 'tc qdisc show' listing too many queues
        netdevsim: Avoid allocation warnings triggered from user space
        net: rose: fix netdev reference changes
        net: ionic: fix error check for vlan flags in ionic_set_nic_features()
        dccp: put dccp_qpolicy_full() and dccp_qpolicy_push() in the same lock
        net: usb: make USB_RTL8153_ECM non user configurable
        wireguard: ratelimiter: use hrtimer in selftest
        wireguard: allowedips: don't corrupt stack when detecting overflow
        HID: amd_sfh: Don't show client init failed as error when discovery fails
        clk: renesas: r9a06g032: Fix UART clkgrp bitsel
        mtd: maps: Fix refcount leak in of_flash_probe_versatile
        mtd: maps: Fix refcount leak in ap_flash_init
        mtd: rawnand: meson: Fix a potential double free issue
        of: check previous kernel's ima-kexec-buffer against memory bounds
        scsi: qla2xxx: edif: Reduce Initiator-Initiator thrashing
        scsi: qla2xxx: edif: Fix potential stuck session in sa update
        scsi: qla2xxx: edif: Reduce connection thrash
        scsi: qla2xxx: edif: Fix inconsistent check of db_flags
        scsi: qla2xxx: edif: Synchronize NPIV deletion with authentication application
        scsi: qla2xxx: edif: Add retry for ELS passthrough
        scsi: qla2xxx: edif: Fix n2n discovery issue with secure target
        scsi: qla2xxx: edif: Fix n2n login retry for secure device
        KVM: SVM: Unwind "speculative" RIP advancement if INTn injection "fails"
        KVM: SVM: Stuff next_rip on emulated INT3 injection if NRIPS is supported
        phy: samsung: exynosautov9-ufs: correct TSRV register configurations
        PCI: microchip: Fix refcount leak in mc_pcie_init_irq_domains()
        PCI: tegra194: Fix PM error handling in tegra_pcie_config_ep()
        HID: cp2112: prevent a buffer overflow in cp2112_xfer()
        mtd: sm_ftl: Fix deadlock caused by cancel_work_sync in sm_release
        mtd: partitions: Fix refcount leak in parse_redboot_of
        mtd: parsers: ofpart: Fix refcount leak in bcm4908_partitions_fw_offset
        mtd: st_spi_fsm: Add a clk_disable_unprepare() in .probe()'s error path
        PCI: mediatek-gen3: Fix refcount leak in mtk_pcie_init_irq_domains()
        fpga: altera-pr-ip: fix unsigned comparison with less than zero
        usb: host: Fix refcount leak in ehci_hcd_ppc_of_probe
        usb: ohci-nxp: Fix refcount leak in ohci_hcd_nxp_probe
        usb: gadget: tegra-xudc: Fix error check in tegra_xudc_powerdomain_init()
        usb: xhci: tegra: Fix error check
        netfilter: xtables: Bring SPDX identifier back
        scsi: qla2xxx: edif: Send LOGO for unexpected IKE message
        scsi: qla2xxx: edif: Reduce disruption due to multiple app start
        scsi: qla2xxx: edif: Fix no login after app start
        scsi: qla2xxx: edif: Tear down session if keys have been removed
        scsi: qla2xxx: edif: Fix session thrash
        scsi: qla2xxx: edif: Fix no logout on delete for N2N
        iio: accel: bma400: Fix the scale min and max macro values
        platform/chrome: cros_ec: Always expose last resume result
        iio: accel: bma400: Reordering of header files
        clk: mediatek: reset: Fix written reset bit offset
        lib/test_hmm: avoid accessing uninitialized pages
        memremap: remove support for external pgmap refcounts
        mm/memremap: fix memunmap_pages() race with get_dev_pagemap()
        KVM: Don't set Accessed/Dirty bits for ZERO_PAGE
        mwifiex: Ignore BTCOEX events from the 88W8897 firmware
        mwifiex: fix sleep in atomic context bugs caused by dev_coredumpv
        scsi: iscsi: Allow iscsi_if_stop_conn() to be called from kernel
        scsi: iscsi: Add helper to remove a session from the kernel
        scsi: iscsi: Fix session removal on shutdown
        dmaengine: dw-edma: Fix eDMA Rd/Wr-channels and DMA-direction semantics
        mtd: dataflash: Add SPI ID table
        clk: qcom: camcc-sm8250: Fix halt on boot by reducing driver's init level
        misc: rtsx: Fix an error handling path in rtsx_pci_probe()
        driver core: fix potential deadlock in __driver_attach
        clk: qcom: clk-krait: unlock spin after mux completion
        clk: qcom: gcc-msm8939: Add missing SYSTEM_MM_NOC_BFDCD_CLK_SRC
        clk: qcom: gcc-msm8939: Fix bimc_ddr_clk_src rcgr base address
        clk: qcom: gcc-msm8939: Add missing system_mm_noc_bfdcd_clk_src
        clk: qcom: gcc-msm8939: Point MM peripherals to system_mm_noc clock
        usb: host: xhci: use snprintf() in xhci_decode_trb()
        RDMA/rxe: Fix deadlock in rxe_do_local_ops()
        clk: qcom: ipq8074: fix NSS core PLL-s
        clk: qcom: ipq8074: SW workaround for UBI32 PLL lock
        clk: qcom: ipq8074: fix NSS port frequency tables
        clk: qcom: ipq8074: set BRANCH_HALT_DELAY flag for UBI clocks
        clk: qcom: camcc-sdm845: Fix topology around titan_top power domain
        clk: qcom: camcc-sm8250: Fix topology around titan_top power domain
        clk: qcom: clk-rcg2: Fail Duty-Cycle configuration if MND divider is not enabled.
        clk: qcom: clk-rcg2: Make sure to not write d=0 to the NMD register
        mm/mempolicy: fix get_nodes out of bound access
        PCI: dwc: Stop link on host_init errors and de-initialization
        PCI: dwc: Add unroll iATU space support to dw_pcie_disable_atu()
        PCI: dwc: Disable outbound windows only for controllers using iATU
        PCI: dwc: Set INCREASE_REGION_SIZE flag based on limit address
        PCI: dwc: Deallocate EPC memory on dw_pcie_ep_init() errors
        PCI: dwc: Always enable CDM check if "snps,enable-cdm-check" exists
        soundwire: bus_type: fix remove and shutdown support
        soundwire: revisit driver bind/unbind and callbacks
        KVM: arm64: Don't return from void function
        dmaengine: sf-pdma: Add multithread support for a DMA channel
        PCI: endpoint: Don't stop controller when unbinding endpoint function
        scsi: qla2xxx: Check correct variable in qla24xx_async_gffid()
        intel_th: Fix a resource leak in an error handling path
        intel_th: msu-sink: Potential dereference of null pointer
        intel_th: msu: Fix vmalloced buffers
        binder: fix redefinition of seq_file attributes
        staging: rtl8192u: Fix sleep in atomic context bug in dm_fsync_timer_callback
        mmc: sdhci-of-esdhc: Fix refcount leak in esdhc_signal_voltage_switch
        mmc: mxcmmc: Silence a clang warning
        mmc: renesas_sdhi: Get the reset handle early in the probe
        memstick/ms_block: Fix some incorrect memory allocation
        memstick/ms_block: Fix a memory leak
        mmc: sdhci-of-at91: fix set_uhs_signaling rewriting of MC1R
        of: device: Fix missing of_node_put() in of_dma_set_restricted_buffer
        mmc: block: Add single read for 4k sector cards
        KVM: s390: pv: leak the topmost page table when destroy fails
        PCI/portdrv: Don't disable AER reporting in get_port_device_capability()
        PCI: qcom: Set up rev 2.1.0 PARF_PHY before enabling clocks
        scsi: smartpqi: Fix DMA direction for RAID requests
        xtensa: iss/network: provide release() callback
        xtensa: iss: fix handling error cases in iss_net_configure()
        usb: gadget: udc: amd5536 depends on HAS_DMA
        usb: aspeed-vhub: Fix refcount leak bug in ast_vhub_init_desc()
        usb: dwc3: core: Deprecate GCTL.CORESOFTRESET
        usb: dwc3: core: Do not perform GCTL_CORE_SOFTRESET during bootup
        usb: dwc3: qcom: fix missing optional irq warnings
        eeprom: idt_89hpesx: uninitialized data in idt_dbgfs_csr_write()
        phy: stm32: fix error return in stm32_usbphyc_phy_init
        interconnect: imx: fix max_node_id
        um: random: Don't initialise hwrng struct with zero
        RDMA/irdma: Fix a window for use-after-free
        RDMA/irdma: Fix VLAN connection with wildcard address
        RDMA/irdma: Fix setting of QP context err_rq_idx_valid field
        RDMA/rtrs-srv: Fix modinfo output for stringify
        RDMA/rtrs: Fix warning when use poll mode on client side.
        RDMA/rtrs: Replace duplicate check with is_pollqueue helper
        RDMA/rtrs: Introduce destroy_cq helper
        RDMA/rtrs: Do not allow sessname to contain special symbols / and .
        RDMA/rtrs: Rename rtrs_sess to rtrs_path
        RDMA/rtrs-srv: Rename rtrs_srv_sess to rtrs_srv_path
        RDMA/rtrs-clt: Rename rtrs_clt_sess to rtrs_clt_path
        RDMA/rtrs-clt: Replace list_next_or_null_rr_rcu with an inline function
        RDMA/qedr: Fix potential memory leak in __qedr_alloc_mr()
        RDMA/hns: Fix incorrect clearing of interrupt status register
        RDMA/siw: Fix duplicated reported IW_CM_EVENT_CONNECT_REPLY event
        iio: cros: Register FIFO callback after sensor is registered
        clk: qcom: gcc-msm8939: Fix weird field spacing in ftbl_gcc_camss_cci_clk
        RDMA/hfi1: fix potential memory leak in setup_base_ctxt()
        gpio: gpiolib-of: Fix refcount bugs in of_mm_gpiochip_add_data()
        HID: mcp2221: prevent a buffer overflow in mcp_smbus_write()
        HID: amd_sfh: Add NULL check for hid device
        dmaengine: imx-dma: Cast of_device_get_match_data() with (uintptr_t)
        scripts/gdb: lx-dmesg: read records individually
        scripts/gdb: fix 'lx-dmesg' on 32 bits arch
        RDMA/rxe: Fix mw bind to allow any consumer key portion
        mmc: cavium-octeon: Add of_node_put() when breaking out of loop
        mmc: cavium-thunderx: Add of_node_put() when breaking out of loop
        HID: alps: Declare U1_UNICORN_LEGACY support
        RDMA/rxe: For invalidate compare according to set keys in mr
        PCI: tegra194: Fix Root Port interrupt handling
        PCI: tegra194: Fix link up retry sequence
        HID: amd_sfh: Handle condition of "no sensors"
        USB: serial: fix tty-port initialized comments
        usb: cdns3: change place of 'priv_ep' assignment in cdns3_gadget_ep_dequeue(), cdns3_gadget_ep_enable()
        mtd: spi-nor: fix spi_nor_spimem_setup_op() call in spi_nor_erase_{sector,chip}()
        KVM: nVMX: Set UMIP bit CR4_FIXED1 MSR when emulating UMIP
        platform/olpc: Fix uninitialized data in debugfs write
        RDMA/srpt: Duplicate port name members
        RDMA/srpt: Introduce a reference count in struct srpt_device
        RDMA/srpt: Fix a use-after-free
        android: binder: stop saving a pointer to the VMA
        mm/mmap.c: fix missing call to vm_unacct_memory in mmap_region
        selftests: kvm: set rax before vmcall
        of/fdt: declared return type does not match actual return type
        RDMA/mlx5: Add missing check for return value in get namespace flow
        RDMA/rxe: Add memory barriers to kernel queues
        RDMA/rxe: Remove the is_user members of struct rxe_sq/rxe_rq/rxe_srq
        RDMA/rxe: Fix error unwind in rxe_create_qp()
        block/rnbd-srv: Set keep_id to true after mutex_trylock
        null_blk: fix ida error handling in null_add_dev()
        nvme: use command_id instead of req->tag in trace_nvme_complete_rq()
        nvme: define compat_ioctl again to unbreak 32-bit userspace.
        nvme: disable namespace access for unsupported metadata
        nvme: don't return an error from nvme_configure_metadata
        nvme: catch -ENODEV from nvme_revalidate_zones again
        block/bio: remove duplicate append pages code
        block: ensure iov_iter advances for added pages
        jbd2: fix outstanding credits assert in jbd2_journal_commit_transaction()
        ext4: recover csum seed of tmp_inode after migrating to extents
        jbd2: fix assertion 'jh->b_frozen_data == NULL' failure when journal aborted
        usb: cdns3: Don't use priv_dev uninitialized in cdns3_gadget_ep_enable()
        opp: Fix error check in dev_pm_opp_attach_genpd()
        ASoC: cros_ec_codec: Fix refcount leak in cros_ec_codec_platform_probe
        ASoC: samsung: Fix error handling in aries_audio_probe
        ASoC: imx-audmux: Silence a clang warning
        ASoC: mediatek: mt8173: Fix refcount leak in mt8173_rt5650_rt5676_dev_probe
        ASoC: mt6797-mt6351: Fix refcount leak in mt6797_mt6351_dev_probe
        ASoC: codecs: da7210: add check for i2c_add_driver
        ASoC: mediatek: mt8173-rt5650: Fix refcount leak in mt8173_rt5650_dev_probe
        serial: 8250: Export ICR access helpers for internal use
        serial: 8250: dma: Allow driver operations before starting DMA transfers
        serial: 8250_dw: Store LSR into lsr_saved_flags in dw8250_tx_wait_empty()
        ASoC: codecs: msm8916-wcd-digital: move gains from SX_TLV to S8_TLV
        ASoC: codecs: wcd9335: move gains from SX_TLV to S8_TLV
        rpmsg: char: Add mutex protection for rpmsg_eptdev_open()
        rpmsg: mtk_rpmsg: Fix circular locking dependency
        remoteproc: k3-r5: Fix refcount leak in k3_r5_cluster_of_init
        selftests/livepatch: better synchronize test_klp_callbacks_busy
        profiling: fix shift too large makes kernel panic
        remoteproc: imx_rproc: Fix refcount leak in imx_rproc_addr_init
        ASoC: samsung: h1940_uda1380: include proepr GPIO consumer header
        powerpc/perf: Optimize clearing the pending PMI and remove WARN_ON for PMI check in power_pmu_disable
        ASoC: samsung: change gpiod_speaker_power and rx1950_audio from global to static variables
        tty: n_gsm: Delete gsmtty open SABM frame when config requester
        tty: n_gsm: fix user open not possible at responder until initiator open
        tty: n_gsm: fix tty registration before control channel open
        tty: n_gsm: fix wrong queuing behavior in gsm_dlci_data_output()
        tty: n_gsm: fix missing timer to handle stalled links
        tty: n_gsm: fix non flow control frames during mux flow off
        tty: n_gsm: fix packet re-transmission without open control channel
        tty: n_gsm: fix race condition in gsmld_write()
        tty: n_gsm: fix resource allocation order in gsm_activate_mux()
        ASoC: qcom: Fix missing of_node_put() in asoc_qcom_lpass_cpu_platform_probe()
        ASoC: imx-card: Fix DSD/PDM mclk frequency
        remoteproc: qcom: wcnss: Fix handling of IRQs
        vfio/ccw: Do not change FSM state in subchannel event
        serial: 8250_fsl: Don't report FE, PE and OE twice
        tty: n_gsm: fix wrong T1 retry count handling
        tty: n_gsm: fix DM command
        tty: n_gsm: fix missing corner cases in gsmld_poll()
        MIPS: vdso: Utilize __pa() for gic_pfn
        swiotlb: fail map correctly with failed io_tlb_default_mem
        ASoC: mt6359: Fix refcount leak bug
        serial: 8250_bcm7271: Save/restore RTS in suspend/resume
        iommu/exynos: Handle failed IOMMU device registration properly
        9p: fix a bunch of checkpatch warnings
        9p: Drop kref usage
        9p: Add client parameter to p9_req_put()
        net: 9p: fix refcount leak in p9_read_work() error handling
        MIPS: Fixed __debug_virt_addr_valid()
        rpmsg: qcom_smd: Fix refcount leak in qcom_smd_parse_edge
        kfifo: fix kfifo_to_user() return type
        lib/smp_processor_id: fix imbalanced instrumentation_end() call
        proc: fix a dentry lock race between release_task and lookup
        remoteproc: qcom: pas: Check if coredump is enabled
        remoteproc: sysmon: Wait for SSCTL service to come up
        mfd: t7l66xb: Drop platform disable callback
        mfd: max77620: Fix refcount leak in max77620_initialise_fps
        iommu/arm-smmu: qcom_iommu: Add of_node_put() when breaking out of loop
        perf tools: Fix dso_id inode generation comparison
        s390/dump: fix old lowcore virtual vs physical address confusion
        s390/maccess: fix semantics of memcpy_real() and its callers
        s390/crash: fix incorrect number of bytes to copy to user space
        s390/zcore: fix race when reading from hardware system area
        ASoC: fsl_asrc: force cast the asrc_format type
        ASoC: fsl-asoc-card: force cast the asrc_format type
        ASoC: fsl_easrc: use snd_pcm_format_t type for sample_format
        ASoC: imx-card: use snd_pcm_format_t type for asrc_format
        ASoC: qcom: q6dsp: Fix an off-by-one in q6adm_alloc_copp()
        fuse: Remove the control interface for virtio-fs
        ASoC: audio-graph-card: Add of_node_put() in fail path
        watchdog: sp5100_tco: Fix a memory leak of EFCH MMIO resource
        watchdog: armada_37xx_wdt: check the return value of devm_ioremap() in armada_37xx_wdt_probe()
        video: fbdev: amba-clcd: Fix refcount leak bugs
        video: fbdev: sis: fix typos in SiS_GetModeID()
        ASoC: mchp-spdifrx: disable end of block interrupt on failures
        powerpc/32: Call mmu_mark_initmem_nx() regardless of data block mapping.
        powerpc/32: Do not allow selection of e5500 or e6500 CPUs on PPC32
        powerpc/iommu: Fix iommu_table_in_use for a small default DMA window case
        powerpc/pci: Prefer PCI domain assignment via DT 'linux,pci-domain' and alias
        tty: serial: fsl_lpuart: correct the count of break characters
        s390/dump: fix os_info virtual vs physical address confusion
        s390/smp: cleanup target CPU callback starting
        s390/smp: cleanup control register update routines
        s390/maccess: rework absolute lowcore accessors
        s390/smp: enforce lowcore protection on CPU restart
        f2fs: fix to remove F2FS_COMPR_FL and tag F2FS_NOCOMP_FL at the same time
        powerpc/spufs: Fix refcount leak in spufs_init_isolated_loader
        powerpc/xive: Fix refcount leak in xive_get_max_prio
        powerpc/cell/axon_msi: Fix refcount leak in setup_msi_msg_address
        perf symbol: Fail to read phdr workaround
        kprobes: Forbid probing on trampoline and BPF code areas
        x86/bus_lock: Don't assume the init value of DEBUGCTLMSR.BUS_LOCK_DETECT to be zero
        powerpc/pci: Fix PHB numbering when using opal-phbid
        genelf: Use HAVE_LIBCRYPTO_SUPPORT, not the never defined HAVE_LIBCRYPTO
        scripts/faddr2line: Fix vmlinux detection on arm64
        sched/deadline: Merge dl_task_can_attach() and dl_cpu_busy()
        sched, cpuset: Fix dl_cpu_busy() panic due to empty cs->cpus_allowed
        x86/numa: Use cpumask_available instead of hardcoded NULL check
        video: fbdev: arkfb: Fix a divide-by-zero bug in ark_set_pixclock()
        tools/thermal: Fix possible path truncations
        sched: Fix the check of nr_running at queue wakelist
        sched: Remove the limitation of WF_ON_CPU on wakelist if wakee cpu is idle
        sched/core: Do not requeue task on CPU excluded from cpus_mask
        x86/entry: Build thunk_$(BITS) only if CONFIG_PREEMPTION=y
        f2fs: allow compression for mmap files in compress_mode=user
        f2fs: do not allow to decompress files have FI_COMPRESS_RELEASED
        video: fbdev: vt8623fb: Check the size of screen before memset_io()
        video: fbdev: arkfb: Check the size of screen before memset_io()
        video: fbdev: s3fb: Check the size of screen before memset_io()
        scsi: ufs: core: Correct ufshcd_shutdown() flow
        scsi: zfcp: Fix missing auto port scan and thus missing target ports
        scsi: qla2xxx: Fix imbalance vha->vref_count
        scsi: qla2xxx: Fix discovery issues in FC-AL topology
        scsi: qla2xxx: Turn off multi-queue for 8G adapters
        scsi: qla2xxx: Fix crash due to stale SRB access around I/O timeouts
        scsi: qla2xxx: Fix excessive I/O error messages by default
        scsi: qla2xxx: Fix erroneous mailbox timeout after PCI error injection
        scsi: qla2xxx: Wind down adapter after PCIe error
        scsi: qla2xxx: Fix losing FCP-2 targets on long port disable with I/Os
        scsi: qla2xxx: Fix losing target when it reappears during delete
        scsi: qla2xxx: Fix losing FCP-2 targets during port perturbation tests
        x86/bugs: Enable STIBP for IBPB mitigated RETBleed
        ftrace/x86: Add back ftrace_expected assignment
        x86/kprobes: Update kcb status flag after singlestepping
        x86/olpc: fix 'logical not is only applied to the left hand side'
        SMB3: fix lease break timeout when multiple deferred close handles for the same file.
        posix-cpu-timers: Cleanup CPU timers before freeing them during exec
        Input: gscps2 - check return value of ioremap() in gscps2_probe()
        __follow_mount_rcu(): verify that mount_lock remains unchanged
        spmi: trace: fix stack-out-of-bound access in SPMI tracing functions
        drm/mediatek: Allow commands to be sent during video mode
        drm/mediatek: Keep dsi as LP00 before dcs cmds transfer
        crypto: blake2s - remove shash module
        drm/dp/mst: Read the extended DPCD capabilities during system resume
        drm/vc4: drv: Adopt the dma configuration from the HVS or V3D component
        usbnet: smsc95xx: Don't clear read-only PHY interrupt
        usbnet: smsc95xx: Avoid link settings race on interrupt reception
        usbnet: smsc95xx: Forward PHY interrupts to PHY driver to avoid polling
        usbnet: smsc95xx: Fix deadlock on runtime resume
        firmware: arm_scpi: Ensure scpi_info is not assigned if the probe fails
        scsi: lpfc: Fix EEH support for NVMe I/O
        scsi: lpfc: SLI path split: Refactor lpfc_iocbq
        scsi: lpfc: SLI path split: Refactor fast and slow paths to native SLI4
        scsi: lpfc: SLI path split: Refactor SCSI paths
        scsi: lpfc: Remove extra atomic_inc on cmd_pending in queuecommand after VMID
        intel_th: pci: Add Meteor Lake-P support
        intel_th: pci: Add Raptor Lake-S PCH support
        intel_th: pci: Add Raptor Lake-S CPU support
        KVM: set_msr_mce: Permit guests to ignore single-bit ECC errors
        KVM: x86: Signal #GP, not -EPERM, on bad WRMSR(MCi_CTL/STATUS)
        iommu/vt-d: avoid invalid memory access via node_online(NUMA_NO_NODE)
        PCI/AER: Iterate over error counters instead of error strings
        PCI: qcom: Power on PHY before IPQ8074 DBI register accesses
        serial: 8250_pci: Refactor the loop in pci_ite887x_init()
        serial: 8250_pci: Replace dev_*() by pci_*() macros
        serial: 8250: Fold EndRun device support into OxSemi Tornado code
        serial: 8250: Add proper clock handling for OxSemi PCIe devices
        tty: 8250: Add support for Brainboxes PX cards.
        dm writecache: set a default MAX_WRITEBACK_JOBS
        kexec, KEYS, s390: Make use of built-in and secondary keyring for signature verification
        dm thin: fix use-after-free crash in dm_sm_register_threshold_callback
        net/9p: Initialize the iounit field during fid creation
        ARM: remove some dead code
        timekeeping: contribute wall clock to rng on time change
        locking/csd_lock: Change csdlock_debug from early_param to __setup
        block: remove the struct blk_queue_ctx forward declaration
        block: don't allow the same type rq_qos add more than once
        btrfs: ensure pages are unlocked on cow_file_range() failure
        btrfs: reset block group chunk force if we have to wait
        btrfs: properly flag filesystem with BTRFS_FEATURE_INCOMPAT_BIG_METADATA
        ACPI: CPPC: Do not prevent CPPC from working in the future
        powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
        KVM: x86/pmu: Introduce the ctrl_mask value for fixed counter
        KVM: VMX: Mark all PERF_GLOBAL_(OVF)_CTRL bits reserved if there's no vPMU
        KVM: x86/pmu: Ignore pmu->global_ctrl check if vPMU doesn't support global_ctrl
        KVM: VMX: Add helper to check if the guest PMU has PERF_GLOBAL_CTRL
        KVM: nVMX: Attempt to load PERF_GLOBAL_CTRL on nVMX xfer iff it exists
        dm raid: fix address sanitizer warning in raid_status
        dm raid: fix address sanitizer warning in raid_resume
        tracing: Add '__rel_loc' using trace event macros
        tracing: Avoid -Warray-bounds warning for __rel_loc macro
        ext4: update s_overhead_clusters in the superblock during an on-line resize
        ext4: fix extent status tree race in writeback error recovery path
        ext4: add EXT4_INODE_HAS_XATTR_SPACE macro in xattr.h
        ext4: fix use-after-free in ext4_xattr_set_entry
        ext4: correct max_inline_xattr_value_size computing
        ext4: correct the misjudgment in ext4_iget_extra_inode
        ext4: fix warning in ext4_iomap_begin as race between bmap and write
        ext4: check if directory block is within i_size
        ext4: make sure ext4_append() always allocates new block
        ext4: remove EA inode entry from mbcache on inode eviction
        ext4: use kmemdup() to replace kmalloc + memcpy
        ext4: unindent codeblock in ext4_xattr_block_set()
        ext4: fix race when reusing xattr blocks
        KEYS: asymmetric: enforce SM2 signature use pkey algo
        tpm: eventlog: Fix section mismatch for DEBUG_SECTION_MISMATCH
        xen-blkback: fix persistent grants negotiation
        xen-blkback: Apply 'feature_persistent' parameter when connect
        xen-blkfront: Apply 'feature_persistent' parameter when connect
        powerpc: Fix eh field when calling lwarx on PPC32
        tracing: Use a struct alignof to determine trace event field alignment
        net_sched: cls_route: remove from list when handle is 0
        mac80211: fix a memory leak where sta_info is not freed
        tcp: fix over estimation in sk_forced_mem_schedule()
        crypto: lib/blake2s - reduce stack frame usage in self test
        Revert "mwifiex: fix sleep in atomic context bugs caused by dev_coredumpv"
        Revert "s390/smp: enforce lowcore protection on CPU restart"
        drm/bridge: tc358767: Fix (e)DP bridge endpoint parsing in dedicated function
        net: phy: smsc: Disable Energy Detect Power-Down in interrupt mode
        drm/vc4: change vc4_dma_range_matches from a global to static
        tracing/perf: Avoid -Warray-bounds warning for __rel_loc macro
        drm/msm: Fix dirtyfb refcounting
        drm/meson: Fix refcount leak in meson_encoder_hdmi_init
        io_uring: mem-account pbuf buckets
        Revert "net: usb: ax88179_178a needs FLAG_SEND_ZLP"
        Bluetooth: L2CAP: Fix l2cap_global_chan_by_psm regression
        drm/bridge: Move devm_drm_of_get_bridge to bridge/panel.c
        scsi: lpfc: Fix locking for lpfc_sli_iocbq_lookup()
        scsi: lpfc: Fix element offset in __lpfc_sli_release_iocbq_s4()
        scsi: lpfc: Resolve some cleanup issues following SLI path refactoring
        Linux 5.15.61

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Iec359ed301bcbcd6e19b67ee8534418fab26850b
2022-09-21 17:30:12 +02:00

2490 lines
70 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Kernel timekeeping code and accessor functions. Based on code from
* timer.c, moved in commit 8524070b7982.
*/
#include <linux/timekeeper_internal.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/clock.h>
#include <linux/syscore_ops.h>
#include <linux/clocksource.h>
#include <linux/jiffies.h>
#include <linux/time.h>
#include <linux/timex.h>
#include <linux/tick.h>
#include <linux/stop_machine.h>
#include <linux/pvclock_gtod.h>
#include <linux/compiler.h>
#include <linux/audit.h>
#include <linux/random.h>
#include <trace/hooks/timekeeping.h>
#include "tick-internal.h"
#include "ntp_internal.h"
#include "timekeeping_internal.h"
#define TK_CLEAR_NTP (1 << 0)
#define TK_MIRROR (1 << 1)
#define TK_CLOCK_WAS_SET (1 << 2)
enum timekeeping_adv_mode {
/* Update timekeeper when a tick has passed */
TK_ADV_TICK,
/* Update timekeeper on a direct frequency change */
TK_ADV_FREQ
};
DEFINE_RAW_SPINLOCK(timekeeper_lock);
/*
* The most important data for readout fits into a single 64 byte
* cache line.
*/
static struct {
seqcount_raw_spinlock_t seq;
struct timekeeper timekeeper;
} tk_core ____cacheline_aligned = {
.seq = SEQCNT_RAW_SPINLOCK_ZERO(tk_core.seq, &timekeeper_lock),
};
static struct timekeeper shadow_timekeeper;
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
/**
* struct tk_fast - NMI safe timekeeper
* @seq: Sequence counter for protecting updates. The lowest bit
* is the index for the tk_read_base array
* @base: tk_read_base array. Access is indexed by the lowest bit of
* @seq.
*
* See @update_fast_timekeeper() below.
*/
struct tk_fast {
seqcount_latch_t seq;
struct tk_read_base base[2];
};
/* Suspend-time cycles value for halted fast timekeeper. */
static u64 cycles_at_suspend;
static u64 dummy_clock_read(struct clocksource *cs)
{
if (timekeeping_suspended)
return cycles_at_suspend;
return local_clock();
}
static struct clocksource dummy_clock = {
.read = dummy_clock_read,
};
/*
* Boot time initialization which allows local_clock() to be utilized
* during early boot when clocksources are not available. local_clock()
* returns nanoseconds already so no conversion is required, hence mult=1
* and shift=0. When the first proper clocksource is installed then
* the fast time keepers are updated with the correct values.
*/
#define FAST_TK_INIT \
{ \
.clock = &dummy_clock, \
.mask = CLOCKSOURCE_MASK(64), \
.mult = 1, \
.shift = 0, \
}
static struct tk_fast tk_fast_mono ____cacheline_aligned = {
.seq = SEQCNT_LATCH_ZERO(tk_fast_mono.seq),
.base[0] = FAST_TK_INIT,
.base[1] = FAST_TK_INIT,
};
static struct tk_fast tk_fast_raw ____cacheline_aligned = {
.seq = SEQCNT_LATCH_ZERO(tk_fast_raw.seq),
.base[0] = FAST_TK_INIT,
.base[1] = FAST_TK_INIT,
};
static inline void tk_normalize_xtime(struct timekeeper *tk)
{
while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
tk->xtime_sec++;
}
while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) {
tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
tk->raw_sec++;
}
}
static inline struct timespec64 tk_xtime(const struct timekeeper *tk)
{
struct timespec64 ts;
ts.tv_sec = tk->xtime_sec;
ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
return ts;
}
static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
{
tk->xtime_sec = ts->tv_sec;
tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
}
static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
{
tk->xtime_sec += ts->tv_sec;
tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
tk_normalize_xtime(tk);
}
static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
{
struct timespec64 tmp;
/*
* Verify consistency of: offset_real = -wall_to_monotonic
* before modifying anything
*/
set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec,
-tk->wall_to_monotonic.tv_nsec);
WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp));
tk->wall_to_monotonic = wtm;
set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
tk->offs_real = timespec64_to_ktime(tmp);
tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
}
static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
{
tk->offs_boot = ktime_add(tk->offs_boot, delta);
/*
* Timespec representation for VDSO update to avoid 64bit division
* on every update.
*/
tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
}
/*
* tk_clock_read - atomic clocksource read() helper
*
* This helper is necessary to use in the read paths because, while the
* seqcount ensures we don't return a bad value while structures are updated,
* it doesn't protect from potential crashes. There is the possibility that
* the tkr's clocksource may change between the read reference, and the
* clock reference passed to the read function. This can cause crashes if
* the wrong clocksource is passed to the wrong read function.
* This isn't necessary to use when holding the timekeeper_lock or doing
* a read of the fast-timekeeper tkrs (which is protected by its own locking
* and update logic).
*/
static inline u64 tk_clock_read(const struct tk_read_base *tkr)
{
struct clocksource *clock = READ_ONCE(tkr->clock);
return clock->read(clock);
}
#ifdef CONFIG_DEBUG_TIMEKEEPING
#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
{
u64 max_cycles = tk->tkr_mono.clock->max_cycles;
const char *name = tk->tkr_mono.clock->name;
if (offset > max_cycles) {
printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
offset, name, max_cycles);
printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
} else {
if (offset > (max_cycles >> 1)) {
printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
offset, name, max_cycles >> 1);
printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
}
}
if (tk->underflow_seen) {
if (jiffies - tk->last_warning > WARNING_FREQ) {
printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
printk_deferred(" Your kernel is probably still fine.\n");
tk->last_warning = jiffies;
}
tk->underflow_seen = 0;
}
if (tk->overflow_seen) {
if (jiffies - tk->last_warning > WARNING_FREQ) {
printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
printk_deferred(" Your kernel is probably still fine.\n");
tk->last_warning = jiffies;
}
tk->overflow_seen = 0;
}
}
static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
{
struct timekeeper *tk = &tk_core.timekeeper;
u64 now, last, mask, max, delta;
unsigned int seq;
/*
* Since we're called holding a seqcount, the data may shift
* under us while we're doing the calculation. This can cause
* false positives, since we'd note a problem but throw the
* results away. So nest another seqcount here to atomically
* grab the points we are checking with.
*/
do {
seq = read_seqcount_begin(&tk_core.seq);
now = tk_clock_read(tkr);
last = tkr->cycle_last;
mask = tkr->mask;
max = tkr->clock->max_cycles;
} while (read_seqcount_retry(&tk_core.seq, seq));
delta = clocksource_delta(now, last, mask);
/*
* Try to catch underflows by checking if we are seeing small
* mask-relative negative values.
*/
if (unlikely((~delta & mask) < (mask >> 3))) {
tk->underflow_seen = 1;
delta = 0;
}
/* Cap delta value to the max_cycles values to avoid mult overflows */
if (unlikely(delta > max)) {
tk->overflow_seen = 1;
delta = tkr->clock->max_cycles;
}
return delta;
}
#else
static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
{
}
static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
{
u64 cycle_now, delta;
/* read clocksource */
cycle_now = tk_clock_read(tkr);
/* calculate the delta since the last update_wall_time */
delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
return delta;
}
#endif
/**
* tk_setup_internals - Set up internals to use clocksource clock.
*
* @tk: The target timekeeper to setup.
* @clock: Pointer to clocksource.
*
* Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
* pair and interval request.
*
* Unless you're the timekeeping code, you should not be using this!
*/
static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
{
u64 interval;
u64 tmp, ntpinterval;
struct clocksource *old_clock;
++tk->cs_was_changed_seq;
old_clock = tk->tkr_mono.clock;
tk->tkr_mono.clock = clock;
tk->tkr_mono.mask = clock->mask;
tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
tk->tkr_raw.clock = clock;
tk->tkr_raw.mask = clock->mask;
tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
/* Do the ns -> cycle conversion first, using original mult */
tmp = NTP_INTERVAL_LENGTH;
tmp <<= clock->shift;
ntpinterval = tmp;
tmp += clock->mult/2;
do_div(tmp, clock->mult);
if (tmp == 0)
tmp = 1;
interval = (u64) tmp;
tk->cycle_interval = interval;
/* Go back from cycles -> shifted ns */
tk->xtime_interval = interval * clock->mult;
tk->xtime_remainder = ntpinterval - tk->xtime_interval;
tk->raw_interval = interval * clock->mult;
/* if changing clocks, convert xtime_nsec shift units */
if (old_clock) {
int shift_change = clock->shift - old_clock->shift;
if (shift_change < 0) {
tk->tkr_mono.xtime_nsec >>= -shift_change;
tk->tkr_raw.xtime_nsec >>= -shift_change;
} else {
tk->tkr_mono.xtime_nsec <<= shift_change;
tk->tkr_raw.xtime_nsec <<= shift_change;
}
}
tk->tkr_mono.shift = clock->shift;
tk->tkr_raw.shift = clock->shift;
tk->ntp_error = 0;
tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
tk->ntp_tick = ntpinterval << tk->ntp_error_shift;
/*
* The timekeeper keeps its own mult values for the currently
* active clocksource. These value will be adjusted via NTP
* to counteract clock drifting.
*/
tk->tkr_mono.mult = clock->mult;
tk->tkr_raw.mult = clock->mult;
tk->ntp_err_mult = 0;
tk->skip_second_overflow = 0;
}
/* Timekeeper helper functions. */
static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 delta)
{
u64 nsec;
nsec = delta * tkr->mult + tkr->xtime_nsec;
nsec >>= tkr->shift;
return nsec;
}
static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
{
u64 delta;
delta = timekeeping_get_delta(tkr);
return timekeeping_delta_to_ns(tkr, delta);
}
static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
{
u64 delta;
/* calculate the delta since the last update_wall_time */
delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
return timekeeping_delta_to_ns(tkr, delta);
}
/**
* update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
* @tkr: Timekeeping readout base from which we take the update
* @tkf: Pointer to NMI safe timekeeper
*
* We want to use this from any context including NMI and tracing /
* instrumenting the timekeeping code itself.
*
* Employ the latch technique; see @raw_write_seqcount_latch.
*
* So if a NMI hits the update of base[0] then it will use base[1]
* which is still consistent. In the worst case this can result is a
* slightly wrong timestamp (a few nanoseconds). See
* @ktime_get_mono_fast_ns.
*/
static void update_fast_timekeeper(const struct tk_read_base *tkr,
struct tk_fast *tkf)
{
struct tk_read_base *base = tkf->base;
/* Force readers off to base[1] */
raw_write_seqcount_latch(&tkf->seq);
/* Update base[0] */
memcpy(base, tkr, sizeof(*base));
/* Force readers back to base[0] */
raw_write_seqcount_latch(&tkf->seq);
/* Update base[1] */
memcpy(base + 1, base, sizeof(*base));
}
static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
{
struct tk_read_base *tkr;
unsigned int seq;
u64 now;
do {
seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base);
now += timekeeping_delta_to_ns(tkr,
clocksource_delta(
tk_clock_read(tkr),
tkr->cycle_last,
tkr->mask));
} while (read_seqcount_latch_retry(&tkf->seq, seq));
return now;
}
/**
* ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
*
* This timestamp is not guaranteed to be monotonic across an update.
* The timestamp is calculated by:
*
* now = base_mono + clock_delta * slope
*
* So if the update lowers the slope, readers who are forced to the
* not yet updated second array are still using the old steeper slope.
*
* tmono
* ^
* | o n
* | o n
* | u
* | o
* |o
* |12345678---> reader order
*
* o = old slope
* u = update
* n = new slope
*
* So reader 6 will observe time going backwards versus reader 5.
*
* While other CPUs are likely to be able to observe that, the only way
* for a CPU local observation is when an NMI hits in the middle of
* the update. Timestamps taken from that NMI context might be ahead
* of the following timestamps. Callers need to be aware of that and
* deal with it.
*/
u64 notrace ktime_get_mono_fast_ns(void)
{
return __ktime_get_fast_ns(&tk_fast_mono);
}
EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
/**
* ktime_get_raw_fast_ns - Fast NMI safe access to clock monotonic raw
*
* Contrary to ktime_get_mono_fast_ns() this is always correct because the
* conversion factor is not affected by NTP/PTP correction.
*/
u64 notrace ktime_get_raw_fast_ns(void)
{
return __ktime_get_fast_ns(&tk_fast_raw);
}
EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
/**
* ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
*
* To keep it NMI safe since we're accessing from tracing, we're not using a
* separate timekeeper with updates to monotonic clock and boot offset
* protected with seqcounts. This has the following minor side effects:
*
* (1) Its possible that a timestamp be taken after the boot offset is updated
* but before the timekeeper is updated. If this happens, the new boot offset
* is added to the old timekeeping making the clock appear to update slightly
* earlier:
* CPU 0 CPU 1
* timekeeping_inject_sleeptime64()
* __timekeeping_inject_sleeptime(tk, delta);
* timestamp();
* timekeeping_update(tk, TK_CLEAR_NTP...);
*
* (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
* partially updated. Since the tk->offs_boot update is a rare event, this
* should be a rare occurrence which postprocessing should be able to handle.
*
* The caveats vs. timestamp ordering as documented for ktime_get_fast_ns()
* apply as well.
*/
u64 notrace ktime_get_boot_fast_ns(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
}
EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
{
struct tk_read_base *tkr;
u64 basem, baser, delta;
unsigned int seq;
do {
seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
basem = ktime_to_ns(tkr->base);
baser = ktime_to_ns(tkr->base_real);
delta = timekeeping_delta_to_ns(tkr,
clocksource_delta(tk_clock_read(tkr),
tkr->cycle_last, tkr->mask));
} while (read_seqcount_latch_retry(&tkf->seq, seq));
if (mono)
*mono = basem + delta;
return baser + delta;
}
/**
* ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime.
*
* See ktime_get_fast_ns() for documentation of the time stamp ordering.
*/
u64 ktime_get_real_fast_ns(void)
{
return __ktime_get_real_fast(&tk_fast_mono, NULL);
}
EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
/**
* ktime_get_fast_timestamps: - NMI safe timestamps
* @snapshot: Pointer to timestamp storage
*
* Stores clock monotonic, boottime and realtime timestamps.
*
* Boot time is a racy access on 32bit systems if the sleep time injection
* happens late during resume and not in timekeeping_resume(). That could
* be avoided by expanding struct tk_read_base with boot offset for 32bit
* and adding more overhead to the update. As this is a hard to observe
* once per resume event which can be filtered with reasonable effort using
* the accurate mono/real timestamps, it's probably not worth the trouble.
*
* Aside of that it might be possible on 32 and 64 bit to observe the
* following when the sleep time injection happens late:
*
* CPU 0 CPU 1
* timekeeping_resume()
* ktime_get_fast_timestamps()
* mono, real = __ktime_get_real_fast()
* inject_sleep_time()
* update boot offset
* boot = mono + bootoffset;
*
* That means that boot time already has the sleep time adjustment, but
* real time does not. On the next readout both are in sync again.
*
* Preventing this for 64bit is not really feasible without destroying the
* careful cache layout of the timekeeper because the sequence count and
* struct tk_read_base would then need two cache lines instead of one.
*
* Access to the time keeper clock source is disabled across the innermost
* steps of suspend/resume. The accessors still work, but the timestamps
* are frozen until time keeping is resumed which happens very early.
*
* For regular suspend/resume there is no observable difference vs. sched
* clock, but it might affect some of the nasty low level debug printks.
*
* OTOH, access to sched clock is not guaranteed across suspend/resume on
* all systems either so it depends on the hardware in use.
*
* If that turns out to be a real problem then this could be mitigated by
* using sched clock in a similar way as during early boot. But it's not as
* trivial as on early boot because it needs some careful protection
* against the clock monotonic timestamp jumping backwards on resume.
*/
void ktime_get_fast_timestamps(struct ktime_timestamps *snapshot)
{
struct timekeeper *tk = &tk_core.timekeeper;
snapshot->real = __ktime_get_real_fast(&tk_fast_mono, &snapshot->mono);
snapshot->boot = snapshot->mono + ktime_to_ns(data_race(tk->offs_boot));
}
/**
* halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
* @tk: Timekeeper to snapshot.
*
* It generally is unsafe to access the clocksource after timekeeping has been
* suspended, so take a snapshot of the readout base of @tk and use it as the
* fast timekeeper's readout base while suspended. It will return the same
* number of cycles every time until timekeeping is resumed at which time the
* proper readout base for the fast timekeeper will be restored automatically.
*/
static void halt_fast_timekeeper(const struct timekeeper *tk)
{
static struct tk_read_base tkr_dummy;
const struct tk_read_base *tkr = &tk->tkr_mono;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
cycles_at_suspend = tk_clock_read(tkr);
tkr_dummy.clock = &dummy_clock;
tkr_dummy.base_real = tkr->base + tk->offs_real;
update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
tkr = &tk->tkr_raw;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
tkr_dummy.clock = &dummy_clock;
update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
}
static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
{
raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
}
/**
* pvclock_gtod_register_notifier - register a pvclock timedata update listener
* @nb: Pointer to the notifier block to register
*/
int pvclock_gtod_register_notifier(struct notifier_block *nb)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned long flags;
int ret;
raw_spin_lock_irqsave(&timekeeper_lock, flags);
ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
update_pvclock_gtod(tk, true);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
/**
* pvclock_gtod_unregister_notifier - unregister a pvclock
* timedata update listener
* @nb: Pointer to the notifier block to unregister
*/
int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
{
unsigned long flags;
int ret;
raw_spin_lock_irqsave(&timekeeper_lock, flags);
ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
/*
* tk_update_leap_state - helper to update the next_leap_ktime
*/
static inline void tk_update_leap_state(struct timekeeper *tk)
{
tk->next_leap_ktime = ntp_get_next_leap();
if (tk->next_leap_ktime != KTIME_MAX)
/* Convert to monotonic time */
tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
}
/*
* Update the ktime_t based scalar nsec members of the timekeeper
*/
static inline void tk_update_ktime_data(struct timekeeper *tk)
{
u64 seconds;
u32 nsec;
/*
* The xtime based monotonic readout is:
* nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now();
* The ktime based monotonic readout is:
* nsec = base_mono + now();
* ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
*/
seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
nsec = (u32) tk->wall_to_monotonic.tv_nsec;
tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
/*
* The sum of the nanoseconds portions of xtime and
* wall_to_monotonic can be greater/equal one second. Take
* this into account before updating tk->ktime_sec.
*/
nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
if (nsec >= NSEC_PER_SEC)
seconds++;
tk->ktime_sec = seconds;
/* Update the monotonic raw base */
tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
}
/* must hold timekeeper_lock */
static void timekeeping_update(struct timekeeper *tk, unsigned int action)
{
if (action & TK_CLEAR_NTP) {
tk->ntp_error = 0;
ntp_clear();
}
tk_update_leap_state(tk);
tk_update_ktime_data(tk);
update_vsyscall(tk);
update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;
update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
if (action & TK_CLOCK_WAS_SET)
tk->clock_was_set_seq++;
/*
* The mirroring of the data to the shadow-timekeeper needs
* to happen last here to ensure we don't over-write the
* timekeeper structure on the next update with stale data
*/
if (action & TK_MIRROR)
memcpy(&shadow_timekeeper, &tk_core.timekeeper,
sizeof(tk_core.timekeeper));
}
/**
* timekeeping_forward_now - update clock to the current time
* @tk: Pointer to the timekeeper to update
*
* Forward the current clock to update its state since the last call to
* update_wall_time(). This is useful before significant clock changes,
* as it avoids having to deal with this time offset explicitly.
*/
static void timekeeping_forward_now(struct timekeeper *tk)
{
u64 cycle_now, delta;
cycle_now = tk_clock_read(&tk->tkr_mono);
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult;
tk_normalize_xtime(tk);
}
/**
* ktime_get_real_ts64 - Returns the time of day in a timespec64.
* @ts: pointer to the timespec to be set
*
* Returns the time of day in a timespec64 (WARN if suspended).
*/
void ktime_get_real_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
u64 nsecs;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
ts->tv_nsec = 0;
timespec64_add_ns(ts, nsecs);
}
EXPORT_SYMBOL(ktime_get_real_ts64);
ktime_t ktime_get(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base;
u64 nsecs;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr_mono.base;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get);
u32 ktime_get_resolution_ns(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
u32 nsecs;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift;
} while (read_seqcount_retry(&tk_core.seq, seq));
return nsecs;
}
EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
static ktime_t *offsets[TK_OFFS_MAX] = {
[TK_OFFS_REAL] = &tk_core.timekeeper.offs_real,
[TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot,
[TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai,
};
ktime_t ktime_get_with_offset(enum tk_offsets offs)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base, *offset = offsets[offs];
u64 nsecs;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
base = ktime_add(tk->tkr_mono.base, *offset);
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get_with_offset);
ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base, *offset = offsets[offs];
u64 nsecs;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
base = ktime_add(tk->tkr_mono.base, *offset);
nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
} while (read_seqcount_retry(&tk_core.seq, seq));
return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
/**
* ktime_mono_to_any() - convert monotonic time to any other time
* @tmono: time to convert.
* @offs: which offset to use
*/
ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
{
ktime_t *offset = offsets[offs];
unsigned int seq;
ktime_t tconv;
do {
seq = read_seqcount_begin(&tk_core.seq);
tconv = ktime_add(tmono, *offset);
} while (read_seqcount_retry(&tk_core.seq, seq));
return tconv;
}
EXPORT_SYMBOL_GPL(ktime_mono_to_any);
/**
* ktime_get_raw - Returns the raw monotonic time in ktime_t format
*/
ktime_t ktime_get_raw(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base;
u64 nsecs;
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr_raw.base;
nsecs = timekeeping_get_ns(&tk->tkr_raw);
} while (read_seqcount_retry(&tk_core.seq, seq));
return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get_raw);
/**
* ktime_get_ts64 - get the monotonic clock in timespec64 format
* @ts: pointer to timespec variable
*
* The function calculates the monotonic clock from the realtime
* clock and the wall_to_monotonic offset and stores the result
* in normalized timespec64 format in the variable pointed to by @ts.
*/
void ktime_get_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct timespec64 tomono;
unsigned int seq;
u64 nsec;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
nsec = timekeeping_get_ns(&tk->tkr_mono);
tomono = tk->wall_to_monotonic;
} while (read_seqcount_retry(&tk_core.seq, seq));
ts->tv_sec += tomono.tv_sec;
ts->tv_nsec = 0;
timespec64_add_ns(ts, nsec + tomono.tv_nsec);
}
EXPORT_SYMBOL_GPL(ktime_get_ts64);
/**
* ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
*
* Returns the seconds portion of CLOCK_MONOTONIC with a single non
* serialized read. tk->ktime_sec is of type 'unsigned long' so this
* works on both 32 and 64 bit systems. On 32 bit systems the readout
* covers ~136 years of uptime which should be enough to prevent
* premature wrap arounds.
*/
time64_t ktime_get_seconds(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
WARN_ON(timekeeping_suspended);
return tk->ktime_sec;
}
EXPORT_SYMBOL_GPL(ktime_get_seconds);
/**
* ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME
*
* Returns the wall clock seconds since 1970.
*
* For 64bit systems the fast access to tk->xtime_sec is preserved. On
* 32bit systems the access must be protected with the sequence
* counter to provide "atomic" access to the 64bit tk->xtime_sec
* value.
*/
time64_t ktime_get_real_seconds(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
time64_t seconds;
unsigned int seq;
if (IS_ENABLED(CONFIG_64BIT))
return tk->xtime_sec;
do {
seq = read_seqcount_begin(&tk_core.seq);
seconds = tk->xtime_sec;
} while (read_seqcount_retry(&tk_core.seq, seq));
return seconds;
}
EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
/**
* __ktime_get_real_seconds - The same as ktime_get_real_seconds
* but without the sequence counter protect. This internal function
* is called just when timekeeping lock is already held.
*/
noinstr time64_t __ktime_get_real_seconds(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
return tk->xtime_sec;
}
/**
* ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter
* @systime_snapshot: pointer to struct receiving the system time snapshot
*/
void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base_raw;
ktime_t base_real;
u64 nsec_raw;
u64 nsec_real;
u64 now;
WARN_ON_ONCE(timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
now = tk_clock_read(&tk->tkr_mono);
systime_snapshot->cs_id = tk->tkr_mono.clock->id;
systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
base_real = ktime_add(tk->tkr_mono.base,
tk_core.timekeeper.offs_real);
base_raw = tk->tkr_raw.base;
nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
} while (read_seqcount_retry(&tk_core.seq, seq));
systime_snapshot->cycles = now;
systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
}
EXPORT_SYMBOL_GPL(ktime_get_snapshot);
/* Scale base by mult/div checking for overflow */
static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
{
u64 tmp, rem;
tmp = div64_u64_rem(*base, div, &rem);
if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) ||
((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
return -EOVERFLOW;
tmp *= mult;
rem = div64_u64(rem * mult, div);
*base = tmp + rem;
return 0;
}
/**
* adjust_historical_crosststamp - adjust crosstimestamp previous to current interval
* @history: Snapshot representing start of history
* @partial_history_cycles: Cycle offset into history (fractional part)
* @total_history_cycles: Total history length in cycles
* @discontinuity: True indicates clock was set on history period
* @ts: Cross timestamp that should be adjusted using
* partial/total ratio
*
* Helper function used by get_device_system_crosststamp() to correct the
* crosstimestamp corresponding to the start of the current interval to the
* system counter value (timestamp point) provided by the driver. The
* total_history_* quantities are the total history starting at the provided
* reference point and ending at the start of the current interval. The cycle
* count between the driver timestamp point and the start of the current
* interval is partial_history_cycles.
*/
static int adjust_historical_crosststamp(struct system_time_snapshot *history,
u64 partial_history_cycles,
u64 total_history_cycles,
bool discontinuity,
struct system_device_crosststamp *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
u64 corr_raw, corr_real;
bool interp_forward;
int ret;
if (total_history_cycles == 0 || partial_history_cycles == 0)
return 0;
/* Interpolate shortest distance from beginning or end of history */
interp_forward = partial_history_cycles > total_history_cycles / 2;
partial_history_cycles = interp_forward ?
total_history_cycles - partial_history_cycles :
partial_history_cycles;
/*
* Scale the monotonic raw time delta by:
* partial_history_cycles / total_history_cycles
*/
corr_raw = (u64)ktime_to_ns(
ktime_sub(ts->sys_monoraw, history->raw));
ret = scale64_check_overflow(partial_history_cycles,
total_history_cycles, &corr_raw);
if (ret)
return ret;
/*
* If there is a discontinuity in the history, scale monotonic raw
* correction by:
* mult(real)/mult(raw) yielding the realtime correction
* Otherwise, calculate the realtime correction similar to monotonic
* raw calculation
*/
if (discontinuity) {
corr_real = mul_u64_u32_div
(corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
} else {
corr_real = (u64)ktime_to_ns(
ktime_sub(ts->sys_realtime, history->real));
ret = scale64_check_overflow(partial_history_cycles,
total_history_cycles, &corr_real);
if (ret)
return ret;
}
/* Fixup monotonic raw and real time time values */
if (interp_forward) {
ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
ts->sys_realtime = ktime_add_ns(history->real, corr_real);
} else {
ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
}
return 0;
}
/*
* cycle_between - true if test occurs chronologically between before and after
*/
static bool cycle_between(u64 before, u64 test, u64 after)
{
if (test > before && test < after)
return true;
if (test < before && before > after)
return true;
return false;
}
/**
* get_device_system_crosststamp - Synchronously capture system/device timestamp
* @get_time_fn: Callback to get simultaneous device time and
* system counter from the device driver
* @ctx: Context passed to get_time_fn()
* @history_begin: Historical reference point used to interpolate system
* time when counter provided by the driver is before the current interval
* @xtstamp: Receives simultaneously captured system and device time
*
* Reads a timestamp from a device and correlates it to system time
*/
int get_device_system_crosststamp(int (*get_time_fn)
(ktime_t *device_time,
struct system_counterval_t *sys_counterval,
void *ctx),
void *ctx,
struct system_time_snapshot *history_begin,
struct system_device_crosststamp *xtstamp)
{
struct system_counterval_t system_counterval;
struct timekeeper *tk = &tk_core.timekeeper;
u64 cycles, now, interval_start;
unsigned int clock_was_set_seq = 0;
ktime_t base_real, base_raw;
u64 nsec_real, nsec_raw;
u8 cs_was_changed_seq;
unsigned int seq;
bool do_interp;
int ret;
do {
seq = read_seqcount_begin(&tk_core.seq);
/*
* Try to synchronously capture device time and a system
* counter value calling back into the device driver
*/
ret = get_time_fn(&xtstamp->device, &system_counterval, ctx);
if (ret)
return ret;
/*
* Verify that the clocksource associated with the captured
* system counter value is the same as the currently installed
* timekeeper clocksource
*/
if (tk->tkr_mono.clock != system_counterval.cs)
return -ENODEV;
cycles = system_counterval.cycles;
/*
* Check whether the system counter value provided by the
* device driver is on the current timekeeping interval.
*/
now = tk_clock_read(&tk->tkr_mono);
interval_start = tk->tkr_mono.cycle_last;
if (!cycle_between(interval_start, cycles, now)) {
clock_was_set_seq = tk->clock_was_set_seq;
cs_was_changed_seq = tk->cs_was_changed_seq;
cycles = interval_start;
do_interp = true;
} else {
do_interp = false;
}
base_real = ktime_add(tk->tkr_mono.base,
tk_core.timekeeper.offs_real);
base_raw = tk->tkr_raw.base;
nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono,
system_counterval.cycles);
nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw,
system_counterval.cycles);
} while (read_seqcount_retry(&tk_core.seq, seq));
xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw);
/*
* Interpolate if necessary, adjusting back from the start of the
* current interval
*/
if (do_interp) {
u64 partial_history_cycles, total_history_cycles;
bool discontinuity;
/*
* Check that the counter value occurs after the provided
* history reference and that the history doesn't cross a
* clocksource change
*/
if (!history_begin ||
!cycle_between(history_begin->cycles,
system_counterval.cycles, cycles) ||
history_begin->cs_was_changed_seq != cs_was_changed_seq)
return -EINVAL;
partial_history_cycles = cycles - system_counterval.cycles;
total_history_cycles = cycles - history_begin->cycles;
discontinuity =
history_begin->clock_was_set_seq != clock_was_set_seq;
ret = adjust_historical_crosststamp(history_begin,
partial_history_cycles,
total_history_cycles,
discontinuity, xtstamp);
if (ret)
return ret;
}
return 0;
}
EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
/**
* do_settimeofday64 - Sets the time of day.
* @ts: pointer to the timespec64 variable containing the new time
*
* Sets the time of day to the new time and update NTP and notify hrtimers
*/
int do_settimeofday64(const struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct timespec64 ts_delta, xt;
unsigned long flags;
int ret = 0;
if (!timespec64_valid_settod(ts))
return -EINVAL;
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
timekeeping_forward_now(tk);
xt = tk_xtime(tk);
ts_delta = timespec64_sub(*ts, xt);
if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) {
ret = -EINVAL;
goto out;
}
tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta));
tk_set_xtime(tk, ts);
out:
timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
trace_android_rvh_tk_based_time_sync(tk);
/* Signal hrtimers about time change */
clock_was_set(CLOCK_SET_WALL);
if (!ret) {
audit_tk_injoffset(ts_delta);
add_device_randomness(ts, sizeof(*ts));
}
return ret;
}
EXPORT_SYMBOL(do_settimeofday64);
/**
* timekeeping_inject_offset - Adds or subtracts from the current time.
* @ts: Pointer to the timespec variable containing the offset
*
* Adds or subtracts an offset value from the current time.
*/
static int timekeeping_inject_offset(const struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned long flags;
struct timespec64 tmp;
int ret = 0;
if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
return -EINVAL;
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
timekeeping_forward_now(tk);
/* Make sure the proposed value is valid */
tmp = timespec64_add(tk_xtime(tk), *ts);
if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 ||
!timespec64_valid_settod(&tmp)) {
ret = -EINVAL;
goto error;
}
tk_xtime_add(tk, ts);
tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *ts));
error: /* even if we error out, we forwarded the time, so call update */
timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
/* Signal hrtimers about time change */
clock_was_set(CLOCK_SET_WALL);
return ret;
}
/*
* Indicates if there is an offset between the system clock and the hardware
* clock/persistent clock/rtc.
*/
int persistent_clock_is_local;
/*
* Adjust the time obtained from the CMOS to be UTC time instead of
* local time.
*
* This is ugly, but preferable to the alternatives. Otherwise we
* would either need to write a program to do it in /etc/rc (and risk
* confusion if the program gets run more than once; it would also be
* hard to make the program warp the clock precisely n hours) or
* compile in the timezone information into the kernel. Bad, bad....
*
* - TYT, 1992-01-01
*
* The best thing to do is to keep the CMOS clock in universal time (UTC)
* as real UNIX machines always do it. This avoids all headaches about
* daylight saving times and warping kernel clocks.
*/
void timekeeping_warp_clock(void)
{
if (sys_tz.tz_minuteswest != 0) {
struct timespec64 adjust;
persistent_clock_is_local = 1;
adjust.tv_sec = sys_tz.tz_minuteswest * 60;
adjust.tv_nsec = 0;
timekeeping_inject_offset(&adjust);
}
}
/*
* __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic
*/
static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
{
tk->tai_offset = tai_offset;
tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));
}
/*
* change_clocksource - Swaps clocksources if a new one is available
*
* Accumulates current time interval and initializes new clocksource
*/
static int change_clocksource(void *data)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct clocksource *new, *old = NULL;
unsigned long flags;
bool change = false;
new = (struct clocksource *) data;
/*
* If the cs is in module, get a module reference. Succeeds
* for built-in code (owner == NULL) as well.
*/
if (try_module_get(new->owner)) {
if (!new->enable || new->enable(new) == 0)
change = true;
else
module_put(new->owner);
}
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
timekeeping_forward_now(tk);
if (change) {
old = tk->tkr_mono.clock;
tk_setup_internals(tk, new);
}
timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
if (old) {
if (old->disable)
old->disable(old);
module_put(old->owner);
}
return 0;
}
/**
* timekeeping_notify - Install a new clock source
* @clock: pointer to the clock source
*
* This function is called from clocksource.c after a new, better clock
* source has been registered. The caller holds the clocksource_mutex.
*/
int timekeeping_notify(struct clocksource *clock)
{
struct timekeeper *tk = &tk_core.timekeeper;
if (tk->tkr_mono.clock == clock)
return 0;
stop_machine(change_clocksource, clock, NULL);
tick_clock_notify();
return tk->tkr_mono.clock == clock ? 0 : -1;
}
/**
* ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec
* @ts: pointer to the timespec64 to be set
*
* Returns the raw monotonic time (completely un-modified by ntp)
*/
void ktime_get_raw_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
u64 nsecs;
do {
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->raw_sec;
nsecs = timekeeping_get_ns(&tk->tkr_raw);
} while (read_seqcount_retry(&tk_core.seq, seq));
ts->tv_nsec = 0;
timespec64_add_ns(ts, nsecs);
}
EXPORT_SYMBOL(ktime_get_raw_ts64);
/**
* timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
*/
int timekeeping_valid_for_hres(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
int ret;
do {
seq = read_seqcount_begin(&tk_core.seq);
ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
} while (read_seqcount_retry(&tk_core.seq, seq));
return ret;
}
/**
* timekeeping_max_deferment - Returns max time the clocksource can be deferred
*/
u64 timekeeping_max_deferment(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
u64 ret;
do {
seq = read_seqcount_begin(&tk_core.seq);
ret = tk->tkr_mono.clock->max_idle_ns;
} while (read_seqcount_retry(&tk_core.seq, seq));
return ret;
}
/**
* read_persistent_clock64 - Return time from the persistent clock.
* @ts: Pointer to the storage for the readout value
*
* Weak dummy function for arches that do not yet support it.
* Reads the time from the battery backed persistent clock.
* Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
*
* XXX - Do be sure to remove it once all arches implement it.
*/
void __weak read_persistent_clock64(struct timespec64 *ts)
{
ts->tv_sec = 0;
ts->tv_nsec = 0;
}
/**
* read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
* from the boot.
*
* Weak dummy function for arches that do not yet support it.
* @wall_time: - current time as returned by persistent clock
* @boot_offset: - offset that is defined as wall_time - boot_time
*
* The default function calculates offset based on the current value of
* local_clock(). This way architectures that support sched_clock() but don't
* support dedicated boot time clock will provide the best estimate of the
* boot time.
*/
void __weak __init
read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
struct timespec64 *boot_offset)
{
read_persistent_clock64(wall_time);
*boot_offset = ns_to_timespec64(local_clock());
}
/*
* Flag reflecting whether timekeeping_resume() has injected sleeptime.
*
* The flag starts of false and is only set when a suspend reaches
* timekeeping_suspend(), timekeeping_resume() sets it to false when the
* timekeeper clocksource is not stopping across suspend and has been
* used to update sleep time. If the timekeeper clocksource has stopped
* then the flag stays true and is used by the RTC resume code to decide
* whether sleeptime must be injected and if so the flag gets false then.
*
* If a suspend fails before reaching timekeeping_resume() then the flag
* stays false and prevents erroneous sleeptime injection.
*/
static bool suspend_timing_needed;
/* Flag for if there is a persistent clock on this platform */
static bool persistent_clock_exists;
/*
* timekeeping_init - Initializes the clocksource and common timekeeping values
*/
void __init timekeeping_init(void)
{
struct timespec64 wall_time, boot_offset, wall_to_mono;
struct timekeeper *tk = &tk_core.timekeeper;
struct clocksource *clock;
unsigned long flags;
read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
if (timespec64_valid_settod(&wall_time) &&
timespec64_to_ns(&wall_time) > 0) {
persistent_clock_exists = true;
} else if (timespec64_to_ns(&wall_time) != 0) {
pr_warn("Persistent clock returned invalid value");
wall_time = (struct timespec64){0};
}
if (timespec64_compare(&wall_time, &boot_offset) < 0)
boot_offset = (struct timespec64){0};
/*
* We want set wall_to_mono, so the following is true:
* wall time + wall_to_mono = boot time
*/
wall_to_mono = timespec64_sub(boot_offset, wall_time);
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
ntp_init();
clock = clocksource_default_clock();
if (clock->enable)
clock->enable(clock);
tk_setup_internals(tk, clock);
tk_set_xtime(tk, &wall_time);
tk->raw_sec = 0;
tk_set_wall_to_mono(tk, wall_to_mono);
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
}
/* time in seconds when suspend began for persistent clock */
static struct timespec64 timekeeping_suspend_time;
/**
* __timekeeping_inject_sleeptime - Internal function to add sleep interval
* @tk: Pointer to the timekeeper to be updated
* @delta: Pointer to the delta value in timespec64 format
*
* Takes a timespec offset measuring a suspend interval and properly
* adds the sleep offset to the timekeeping variables.
*/
static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
const struct timespec64 *delta)
{
if (!timespec64_valid_strict(delta)) {
printk_deferred(KERN_WARNING
"__timekeeping_inject_sleeptime: Invalid "
"sleep delta value!\n");
return;
}
tk_xtime_add(tk, delta);
tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
tk_debug_account_sleep_time(delta);
}
#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
/**
* We have three kinds of time sources to use for sleep time
* injection, the preference order is:
* 1) non-stop clocksource
* 2) persistent clock (ie: RTC accessible when irqs are off)
* 3) RTC
*
* 1) and 2) are used by timekeeping, 3) by RTC subsystem.
* If system has neither 1) nor 2), 3) will be used finally.
*
*
* If timekeeping has injected sleeptime via either 1) or 2),
* 3) becomes needless, so in this case we don't need to call
* rtc_resume(), and this is what timekeeping_rtc_skipresume()
* means.
*/
bool timekeeping_rtc_skipresume(void)
{
return !suspend_timing_needed;
}
/**
* 1) can be determined whether to use or not only when doing
* timekeeping_resume() which is invoked after rtc_suspend(),
* so we can't skip rtc_suspend() surely if system has 1).
*
* But if system has 2), 2) will definitely be used, so in this
* case we don't need to call rtc_suspend(), and this is what
* timekeeping_rtc_skipsuspend() means.
*/
bool timekeeping_rtc_skipsuspend(void)
{
return persistent_clock_exists;
}
/**
* timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
* @delta: pointer to a timespec64 delta value
*
* This hook is for architectures that cannot support read_persistent_clock64
* because their RTC/persistent clock is only accessible when irqs are enabled.
* and also don't have an effective nonstop clocksource.
*
* This function should only be called by rtc_resume(), and allows
* a suspend offset to be injected into the timekeeping values.
*/
void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned long flags;
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
suspend_timing_needed = false;
timekeeping_forward_now(tk);
__timekeeping_inject_sleeptime(tk, delta);
timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
/* Signal hrtimers about time change */
clock_was_set(CLOCK_SET_WALL | CLOCK_SET_BOOT);
}
#endif
/**
* timekeeping_resume - Resumes the generic timekeeping subsystem.
*/
void timekeeping_resume(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct clocksource *clock = tk->tkr_mono.clock;
unsigned long flags;
struct timespec64 ts_new, ts_delta;
u64 cycle_now, nsec;
bool inject_sleeptime = false;
read_persistent_clock64(&ts_new);
clockevents_resume();
clocksource_resume();
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
/*
* After system resumes, we need to calculate the suspended time and
* compensate it for the OS time. There are 3 sources that could be
* used: Nonstop clocksource during suspend, persistent clock and rtc
* device.
*
* One specific platform may have 1 or 2 or all of them, and the
* preference will be:
* suspend-nonstop clocksource -> persistent clock -> rtc
* The less preferred source will only be tried if there is no better
* usable source. The rtc part is handled separately in rtc core code.
*/
cycle_now = tk_clock_read(&tk->tkr_mono);
nsec = clocksource_stop_suspend_timing(clock, cycle_now);
if (nsec > 0) {
ts_delta = ns_to_timespec64(nsec);
inject_sleeptime = true;
} else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
inject_sleeptime = true;
}
if (inject_sleeptime) {
suspend_timing_needed = false;
__timekeeping_inject_sleeptime(tk, &ts_delta);
}
/* Re-base the last cycle value */
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
tk->ntp_error = 0;
timekeeping_suspended = 0;
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
touch_softlockup_watchdog();
/* Resume the clockevent device(s) and hrtimers */
tick_resume();
/* Notify timerfd as resume is equivalent to clock_was_set() */
timerfd_resume();
}
int timekeeping_suspend(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned long flags;
struct timespec64 delta, delta_delta;
static struct timespec64 old_delta;
struct clocksource *curr_clock;
u64 cycle_now;
read_persistent_clock64(&timekeeping_suspend_time);
/*
* On some systems the persistent_clock can not be detected at
* timekeeping_init by its return value, so if we see a valid
* value returned, update the persistent_clock_exists flag.
*/
if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
persistent_clock_exists = true;
suspend_timing_needed = true;
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
timekeeping_forward_now(tk);
timekeeping_suspended = 1;
/*
* Since we've called forward_now, cycle_last stores the value
* just read from the current clocksource. Save this to potentially
* use in suspend timing.
*/
curr_clock = tk->tkr_mono.clock;
cycle_now = tk->tkr_mono.cycle_last;
clocksource_start_suspend_timing(curr_clock, cycle_now);
if (persistent_clock_exists) {
/*
* To avoid drift caused by repeated suspend/resumes,
* which each can add ~1 second drift error,
* try to compensate so the difference in system time
* and persistent_clock time stays close to constant.
*/
delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
delta_delta = timespec64_sub(delta, old_delta);
if (abs(delta_delta.tv_sec) >= 2) {
/*
* if delta_delta is too large, assume time correction
* has occurred and set old_delta to the current delta.
*/
old_delta = delta;
} else {
/* Otherwise try to adjust old_system to compensate */
timekeeping_suspend_time =
timespec64_add(timekeeping_suspend_time, delta_delta);
}
}
timekeeping_update(tk, TK_MIRROR);
halt_fast_timekeeper(tk);
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
tick_suspend();
clocksource_suspend();
clockevents_suspend();
return 0;
}
/* sysfs resume/suspend bits for timekeeping */
static struct syscore_ops timekeeping_syscore_ops = {
.resume = timekeeping_resume,
.suspend = timekeeping_suspend,
};
static int __init timekeeping_init_ops(void)
{
register_syscore_ops(&timekeeping_syscore_ops);
return 0;
}
device_initcall(timekeeping_init_ops);
/*
* Apply a multiplier adjustment to the timekeeper
*/
static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
s64 offset,
s32 mult_adj)
{
s64 interval = tk->cycle_interval;
if (mult_adj == 0) {
return;
} else if (mult_adj == -1) {
interval = -interval;
offset = -offset;
} else if (mult_adj != 1) {
interval *= mult_adj;
offset *= mult_adj;
}
/*
* So the following can be confusing.
*
* To keep things simple, lets assume mult_adj == 1 for now.
*
* When mult_adj != 1, remember that the interval and offset values
* have been appropriately scaled so the math is the same.
*
* The basic idea here is that we're increasing the multiplier
* by one, this causes the xtime_interval to be incremented by
* one cycle_interval. This is because:
* xtime_interval = cycle_interval * mult
* So if mult is being incremented by one:
* xtime_interval = cycle_interval * (mult + 1)
* Its the same as:
* xtime_interval = (cycle_interval * mult) + cycle_interval
* Which can be shortened to:
* xtime_interval += cycle_interval
*
* So offset stores the non-accumulated cycles. Thus the current
* time (in shifted nanoseconds) is:
* now = (offset * adj) + xtime_nsec
* Now, even though we're adjusting the clock frequency, we have
* to keep time consistent. In other words, we can't jump back
* in time, and we also want to avoid jumping forward in time.
*
* So given the same offset value, we need the time to be the same
* both before and after the freq adjustment.
* now = (offset * adj_1) + xtime_nsec_1
* now = (offset * adj_2) + xtime_nsec_2
* So:
* (offset * adj_1) + xtime_nsec_1 =
* (offset * adj_2) + xtime_nsec_2
* And we know:
* adj_2 = adj_1 + 1
* So:
* (offset * adj_1) + xtime_nsec_1 =
* (offset * (adj_1+1)) + xtime_nsec_2
* (offset * adj_1) + xtime_nsec_1 =
* (offset * adj_1) + offset + xtime_nsec_2
* Canceling the sides:
* xtime_nsec_1 = offset + xtime_nsec_2
* Which gives us:
* xtime_nsec_2 = xtime_nsec_1 - offset
* Which simplifies to:
* xtime_nsec -= offset
*/
if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
/* NTP adjustment caused clocksource mult overflow */
WARN_ON_ONCE(1);
return;
}
tk->tkr_mono.mult += mult_adj;
tk->xtime_interval += interval;
tk->tkr_mono.xtime_nsec -= offset;
}
/*
* Adjust the timekeeper's multiplier to the correct frequency
* and also to reduce the accumulated error value.
*/
static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
{
u32 mult;
/*
* Determine the multiplier from the current NTP tick length.
* Avoid expensive division when the tick length doesn't change.
*/
if (likely(tk->ntp_tick == ntp_tick_length())) {
mult = tk->tkr_mono.mult - tk->ntp_err_mult;
} else {
tk->ntp_tick = ntp_tick_length();
mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) -
tk->xtime_remainder, tk->cycle_interval);
}
/*
* If the clock is behind the NTP time, increase the multiplier by 1
* to catch up with it. If it's ahead and there was a remainder in the
* tick division, the clock will slow down. Otherwise it will stay
* ahead until the tick length changes to a non-divisible value.
*/
tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0;
mult += tk->ntp_err_mult;
timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult);
if (unlikely(tk->tkr_mono.clock->maxadj &&
(abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
> tk->tkr_mono.clock->maxadj))) {
printk_once(KERN_WARNING
"Adjusting %s more than 11%% (%ld vs %ld)\n",
tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
(long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
}
/*
* It may be possible that when we entered this function, xtime_nsec
* was very small. Further, if we're slightly speeding the clocksource
* in the code above, its possible the required corrective factor to
* xtime_nsec could cause it to underflow.
*
* Now, since we have already accumulated the second and the NTP
* subsystem has been notified via second_overflow(), we need to skip
* the next update.
*/
if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC <<
tk->tkr_mono.shift;
tk->xtime_sec--;
tk->skip_second_overflow = 1;
}
}
/*
* accumulate_nsecs_to_secs - Accumulates nsecs into secs
*
* Helper function that accumulates the nsecs greater than a second
* from the xtime_nsec field to the xtime_secs field.
* It also calls into the NTP code to handle leapsecond processing.
*/
static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
{
u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
unsigned int clock_set = 0;
while (tk->tkr_mono.xtime_nsec >= nsecps) {
int leap;
tk->tkr_mono.xtime_nsec -= nsecps;
tk->xtime_sec++;
/*
* Skip NTP update if this second was accumulated before,
* i.e. xtime_nsec underflowed in timekeeping_adjust()
*/
if (unlikely(tk->skip_second_overflow)) {
tk->skip_second_overflow = 0;
continue;
}
/* Figure out if its a leap sec and apply if needed */
leap = second_overflow(tk->xtime_sec);
if (unlikely(leap)) {
struct timespec64 ts;
tk->xtime_sec += leap;
ts.tv_sec = leap;
ts.tv_nsec = 0;
tk_set_wall_to_mono(tk,
timespec64_sub(tk->wall_to_monotonic, ts));
__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
clock_set = TK_CLOCK_WAS_SET;
}
}
return clock_set;
}
/*
* logarithmic_accumulation - shifted accumulation of cycles
*
* This functions accumulates a shifted interval of cycles into
* a shifted interval nanoseconds. Allows for O(log) accumulation
* loop.
*
* Returns the unconsumed cycles.
*/
static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
u32 shift, unsigned int *clock_set)
{
u64 interval = tk->cycle_interval << shift;
u64 snsec_per_sec;
/* If the offset is smaller than a shifted interval, do nothing */
if (offset < interval)
return offset;
/* Accumulate one shifted interval */
offset -= interval;
tk->tkr_mono.cycle_last += interval;
tk->tkr_raw.cycle_last += interval;
tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
*clock_set |= accumulate_nsecs_to_secs(tk);
/* Accumulate raw time */
tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
tk->tkr_raw.xtime_nsec -= snsec_per_sec;
tk->raw_sec++;
}
/* Accumulate error between NTP and clock interval */
tk->ntp_error += tk->ntp_tick << shift;
tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
(tk->ntp_error_shift + shift);
return offset;
}
/*
* timekeeping_advance - Updates the timekeeper to the current time and
* current NTP tick length
*/
static bool timekeeping_advance(enum timekeeping_adv_mode mode)
{
struct timekeeper *real_tk = &tk_core.timekeeper;
struct timekeeper *tk = &shadow_timekeeper;
u64 offset;
int shift = 0, maxshift;
unsigned int clock_set = 0;
unsigned long flags;
raw_spin_lock_irqsave(&timekeeper_lock, flags);
/* Make sure we're fully resumed: */
if (unlikely(timekeeping_suspended))
goto out;
offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
/* Check if there's really nothing to do */
if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
goto out;
/* Do some additional sanity checking */
timekeeping_check_update(tk, offset);
/*
* With NO_HZ we may have to accumulate many cycle_intervals
* (think "ticks") worth of time at once. To do this efficiently,
* we calculate the largest doubling multiple of cycle_intervals
* that is smaller than the offset. We then accumulate that
* chunk in one go, and then try to consume the next smaller
* doubled multiple.
*/
shift = ilog2(offset) - ilog2(tk->cycle_interval);
shift = max(0, shift);
/* Bound shift to one less than what overflows tick_length */
maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
shift = min(shift, maxshift);
while (offset >= tk->cycle_interval) {
offset = logarithmic_accumulation(tk, offset, shift,
&clock_set);
if (offset < tk->cycle_interval<<shift)
shift--;
}
/* Adjust the multiplier to correct NTP error */
timekeeping_adjust(tk, offset);
/*
* Finally, make sure that after the rounding
* xtime_nsec isn't larger than NSEC_PER_SEC
*/
clock_set |= accumulate_nsecs_to_secs(tk);
write_seqcount_begin(&tk_core.seq);
/*
* Update the real timekeeper.
*
* We could avoid this memcpy by switching pointers, but that
* requires changes to all other timekeeper usage sites as
* well, i.e. move the timekeeper pointer getter into the
* spinlocked/seqcount protected sections. And we trade this
* memcpy under the tk_core.seq against one before we start
* updating.
*/
timekeeping_update(tk, clock_set);
memcpy(real_tk, tk, sizeof(*tk));
/* The memcpy must come last. Do not put anything here! */
write_seqcount_end(&tk_core.seq);
out:
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
return !!clock_set;
}
/**
* update_wall_time - Uses the current clocksource to increment the wall time
*
*/
void update_wall_time(void)
{
if (timekeeping_advance(TK_ADV_TICK))
clock_was_set_delayed();
}
/**
* getboottime64 - Return the real time of system boot.
* @ts: pointer to the timespec64 to be set
*
* Returns the wall-time of boot in a timespec64.
*
* This is based on the wall_to_monotonic offset and the total suspend
* time. Calls to settimeofday will affect the value returned (which
* basically means that however wrong your real time clock is at boot time,
* you get the right time here).
*/
void getboottime64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
*ts = ktime_to_timespec64(t);
}
EXPORT_SYMBOL_GPL(getboottime64);
void ktime_get_coarse_real_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
do {
seq = read_seqcount_begin(&tk_core.seq);
*ts = tk_xtime(tk);
} while (read_seqcount_retry(&tk_core.seq, seq));
}
EXPORT_SYMBOL(ktime_get_coarse_real_ts64);
void ktime_get_coarse_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct timespec64 now, mono;
unsigned int seq;
do {
seq = read_seqcount_begin(&tk_core.seq);
now = tk_xtime(tk);
mono = tk->wall_to_monotonic;
} while (read_seqcount_retry(&tk_core.seq, seq));
set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec,
now.tv_nsec + mono.tv_nsec);
}
EXPORT_SYMBOL(ktime_get_coarse_ts64);
/*
* Must hold jiffies_lock
*/
void do_timer(unsigned long ticks)
{
jiffies_64 += ticks;
calc_global_load();
}
/**
* ktime_get_update_offsets_now - hrtimer helper
* @cwsseq: pointer to check and store the clock was set sequence number
* @offs_real: pointer to storage for monotonic -> realtime offset
* @offs_boot: pointer to storage for monotonic -> boottime offset
* @offs_tai: pointer to storage for monotonic -> clock tai offset
*
* Returns current monotonic time and updates the offsets if the
* sequence number in @cwsseq and timekeeper.clock_was_set_seq are
* different.
*
* Called from hrtimer_interrupt() or retrigger_next_event()
*/
ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
ktime_t *offs_boot, ktime_t *offs_tai)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base;
u64 nsecs;
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr_mono.base;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
base = ktime_add_ns(base, nsecs);
if (*cwsseq != tk->clock_was_set_seq) {
*cwsseq = tk->clock_was_set_seq;
*offs_real = tk->offs_real;
*offs_boot = tk->offs_boot;
*offs_tai = tk->offs_tai;
}
/* Handle leapsecond insertion adjustments */
if (unlikely(base >= tk->next_leap_ktime))
*offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0));
} while (read_seqcount_retry(&tk_core.seq, seq));
return base;
}
/*
* timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
*/
static int timekeeping_validate_timex(const struct __kernel_timex *txc)
{
if (txc->modes & ADJ_ADJTIME) {
/* singleshot must not be used with any other mode bits */
if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
return -EINVAL;
if (!(txc->modes & ADJ_OFFSET_READONLY) &&
!capable(CAP_SYS_TIME))
return -EPERM;
} else {
/* In order to modify anything, you gotta be super-user! */
if (txc->modes && !capable(CAP_SYS_TIME))
return -EPERM;
/*
* if the quartz is off by more than 10% then
* something is VERY wrong!
*/
if (txc->modes & ADJ_TICK &&
(txc->tick < 900000/USER_HZ ||
txc->tick > 1100000/USER_HZ))
return -EINVAL;
}
if (txc->modes & ADJ_SETOFFSET) {
/* In order to inject time, you gotta be super-user! */
if (!capable(CAP_SYS_TIME))
return -EPERM;
/*
* Validate if a timespec/timeval used to inject a time
* offset is valid. Offsets can be positive or negative, so
* we don't check tv_sec. The value of the timeval/timespec
* is the sum of its fields,but *NOTE*:
* The field tv_usec/tv_nsec must always be non-negative and
* we can't have more nanoseconds/microseconds than a second.
*/
if (txc->time.tv_usec < 0)
return -EINVAL;
if (txc->modes & ADJ_NANO) {
if (txc->time.tv_usec >= NSEC_PER_SEC)
return -EINVAL;
} else {
if (txc->time.tv_usec >= USEC_PER_SEC)
return -EINVAL;
}
}
/*
* Check for potential multiplication overflows that can
* only happen on 64-bit systems:
*/
if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
if (LLONG_MIN / PPM_SCALE > txc->freq)
return -EINVAL;
if (LLONG_MAX / PPM_SCALE < txc->freq)
return -EINVAL;
}
return 0;
}
/**
* random_get_entropy_fallback - Returns the raw clock source value,
* used by random.c for platforms with no valid random_get_entropy().
*/
unsigned long random_get_entropy_fallback(void)
{
struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
struct clocksource *clock = READ_ONCE(tkr->clock);
if (unlikely(timekeeping_suspended || !clock))
return 0;
return clock->read(clock);
}
EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
/**
* do_adjtimex() - Accessor function to NTP __do_adjtimex function
*/
int do_adjtimex(struct __kernel_timex *txc)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct audit_ntp_data ad;
bool clock_set = false;
struct timespec64 ts;
unsigned long flags;
s32 orig_tai, tai;
int ret;
/* Validate the data before disabling interrupts */
ret = timekeeping_validate_timex(txc);
if (ret)
return ret;
add_device_randomness(txc, sizeof(*txc));
if (txc->modes & ADJ_SETOFFSET) {
struct timespec64 delta;
delta.tv_sec = txc->time.tv_sec;
delta.tv_nsec = txc->time.tv_usec;
if (!(txc->modes & ADJ_NANO))
delta.tv_nsec *= 1000;
ret = timekeeping_inject_offset(&delta);
if (ret)
return ret;
audit_tk_injoffset(delta);
}
audit_ntp_init(&ad);
ktime_get_real_ts64(&ts);
add_device_randomness(&ts, sizeof(ts));
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
orig_tai = tai = tk->tai_offset;
ret = __do_adjtimex(txc, &ts, &tai, &ad);
if (tai != orig_tai) {
__timekeeping_set_tai_offset(tk, tai);
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
clock_set = true;
}
tk_update_leap_state(tk);
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
audit_ntp_log(&ad);
/* Update the multiplier immediately if frequency was set directly */
if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
clock_set |= timekeeping_advance(TK_ADV_FREQ);
if (clock_set)
clock_was_set(CLOCK_REALTIME);
ntp_notify_cmos_timer();
return ret;
}
#ifdef CONFIG_NTP_PPS
/**
* hardpps() - Accessor function to NTP __hardpps function
*/
void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
{
unsigned long flags;
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
__hardpps(phase_ts, raw_ts);
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
}
EXPORT_SYMBOL(hardpps);
#endif /* CONFIG_NTP_PPS */