Compare commits

...

657 Commits

Author SHA1 Message Date
Linus Torvalds
ac3fd01e4c Linux 6.18-rc7 2025-11-23 14:53:16 -08:00
Linus Torvalds
d0e88704d9 Merge tag 'clk-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux
Pull clk fixes from Stephen Boyd:
 "Fixes for the Allwinner A523 clk driver:

   - Lower the minimum rate for the A523 audio PLL to support
     frequencies required by audio devices

   - Mark a couple clks critical on A523 so that Linux doesn't turn them
     off when they're used by other code like TF-A"

* tag 'clk-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux:
  clk: sunxi-ng: sun55i-a523-ccu: Lower audio0 pll minimum rate
  clk: sunxi-ng: sun55i-a523-r-ccu: Mark bus-r-dma as critical
  clk: sunxi-ng: Mark A523 bus-r-cpucfg clock as critical
2025-11-23 12:03:28 -08:00
Linus Torvalds
1af5c1d3a9 Merge tag 'timers-urgent-2025-11-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer fixes from Ingo Molnar:

 - Fix a race in timer->function clearing in timer_shutdown_sync()

 - Fix a timekeeper sysfs-setup resource leak in error paths

 - Fix the NOHZ report_idle_softirq() syslog rate-limiting
   logic to have no side effects on the return value

* tag 'timers-urgent-2025-11-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  timers: Fix NULL function pointer race in timer_shutdown_sync()
  timekeeping: Fix resource leak in tk_aux_sysfs_init() error paths
  tick/sched: Fix bogus condition in report_idle_softirq()
2025-11-23 08:23:30 -08:00
Linus Torvalds
e624f73775 Merge tag 'perf-urgent-2025-11-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar:
 "Fix perf CPU-clock counters, and address a static checker warning"

* tag 'perf-urgent-2025-11-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf: Fix 0 count issue of cpu-clock
  perf/x86/intel/uncore: Remove superfluous check
2025-11-23 08:20:15 -08:00
Yipeng Zou
20739af073 timers: Fix NULL function pointer race in timer_shutdown_sync()
There is a race condition between timer_shutdown_sync() and timer
expiration that can lead to hitting a WARN_ON in expire_timers().

The issue occurs when timer_shutdown_sync() clears the timer function
to NULL while the timer is still running on another CPU. The race
scenario looks like this:

CPU0					CPU1
					<SOFTIRQ>
					lock_timer_base()
					expire_timers()
					base->running_timer = timer;
					unlock_timer_base()
					[call_timer_fn enter]
					mod_timer()
					...
timer_shutdown_sync()
lock_timer_base()
// For now, will not detach the timer but only clear its function to NULL
if (base->running_timer != timer)
	ret = detach_if_pending(timer, base, true);
if (shutdown)
	timer->function = NULL;
unlock_timer_base()
					[call_timer_fn exit]
					lock_timer_base()
					base->running_timer = NULL;
					unlock_timer_base()
					...
					// Now timer is pending while its function set to NULL.
					// next timer trigger
					<SOFTIRQ>
					expire_timers()
					WARN_ON_ONCE(!fn) // hit
					...
lock_timer_base()
// Now timer will detach
if (base->running_timer != timer)
	ret = detach_if_pending(timer, base, true);
if (shutdown)
	timer->function = NULL;
unlock_timer_base()

The problem is that timer_shutdown_sync() clears the timer function
regardless of whether the timer is currently running. This can leave a
pending timer with a NULL function pointer, which triggers the
WARN_ON_ONCE(!fn) check in expire_timers().

Fix this by only clearing the timer function when actually detaching the
timer. If the timer is running, leave the function pointer intact, which is
safe because the timer will be properly detached when it finishes running.

Fixes: 0cc04e8045 ("timers: Add shutdown mechanism to the internal functions")
Signed-off-by: Yipeng Zou <zouyipeng@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20251122093942.301559-1-zouyipeng@huawei.com
2025-11-22 22:55:26 +01:00
Linus Torvalds
d13f3ac64e Merge tag 'mips-fixes_6.18_1' of git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux
Pull MIPS fixes from Thomas Bogendoerfer:

 - Fix CPU type in DT for econet

 - Fix for Malta PCI MMIO breakage for SOC-it

 - Fix TLB shutdown caused by iniital uniquification

 - Fix random seg faults due to missed vdso storage requirement

* tag 'mips-fixes_6.18_1' of git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux:
  MIPS: kernel: Fix random segmentation faults
  MIPS: mm: Prevent a TLB shutdown on initial uniquification
  mips: dts: econet: fix EN751221 core type
  MIPS: Malta: Fix !EVA SOC-it PCI MMIO
2025-11-22 12:55:18 -08:00
Linus Torvalds
0629dcf772 Merge tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull crypto library fix from Eric Biggers:
 "Fix another KMSAN warning that made it in while KMSAN wasn't working
  reliably"

* tag 'libcrypto-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux:
  lib/crypto: tests: Fix KMSAN warning in test_sha256_finup_2x()
2025-11-22 11:53:53 -08:00
Linus Torvalds
89edd36fd8 Merge tag 'xfs-fixes-6.18-rc7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fix from Carlos Maiolino:
 "A single out-of-bounds fix, nothing special"

* tag 'xfs-fixes-6.18-rc7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: fix out of bounds memory read error in symlink repair
2025-11-22 10:23:34 -08:00
Linus Torvalds
7e29f07760 Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI fixes from James Bottomley:
 "One target driver fix and one scsi-generic one. The latter is 10 lines
  because the problem lock has to be dropped and re-taken around the
  call causing the sleep in atomic"

* tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi:
  scsi: sg: Do not sleep in atomic context
  scsi: target: tcm_loop: Fix segfault in tcm_loop_tpg_address_show()
2025-11-22 10:16:21 -08:00
Linus Torvalds
ebd975458d Merge tag 'input-for-v6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
Pull input fixes from Dmitry Torokhov:

 - INPUT_PROP_HAPTIC_TOUCHPAD definition added early in 6.18 cycle has
   been renamed to INPUT_PROP_PRESSUREPAD to better reflect the kind of
   devices it is supposed to be set for

 - a new ID for a touchscreen found in Ayaneo Flip DS in Goodix driver

 - Goodix driver no longer tries to set reset pin as "input" as it
   causes issues when there is no pull up resistor installed on the
   board

 - fixes for cros_ec_keyb, imx_sc_key, and pegasus-notetaker drivers to
   deal with potential out-of-bounds access and memory corruption issues

* tag 'input-for-v6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input:
  Input: rename INPUT_PROP_HAPTIC_TOUCHPAD to INPUT_PROP_PRESSUREPAD
  Input: cros_ec_keyb - fix an invalid memory access
  Input: imx_sc_key - fix memory corruption on unload
  Input: pegasus-notetaker - fix potential out-of-bounds access
  Input: goodix - remove setting of RST pin to input
  Input: goodix - add support for ACPI ID GDIX1003
2025-11-22 09:58:41 -08:00
Linus Torvalds
a6ff0d85eb Merge tag 'riscv-for-linus-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux
Pull RISC-V fixes from Paul Walmsley:

 - Correct the MIPS RISC-V/JEDEC vendor ID

 - Fix the system shutdown behavior in the legacy case where
   CONFIG_RISCV_SBI_V01 is set, but the firmware implementation
   doesn't support the older v0.1 system shutdown method

 - Align some tools/ macro definitions with the corresponding
   kernel headers

* tag 'riscv-for-linus-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux:
  tools: riscv: Fixed misalignment of CSR related definitions
  riscv: sbi: Prefer SRST shutdown over legacy
  riscv: Update MIPS vendor id to 0x127
2025-11-22 09:44:50 -08:00
Linus Torvalds
5703357ede Merge tag 'selinux-pr-20251121' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux
Pull selinux fixes from Paul Moore:
 "Three SELinux patches for v6.18 to fix issues around accessing the
  per-task decision cache that we introduced in v6.16 to help reduce
  SELinux overhead on path walks. The problem was that despite the cache
  being located in the SELinux "task_security_struct", the parent struct
  wasn't actually tied to the task, it was tied to a cred.

  Historically SELinux did locate the task_security_struct in the
  task_struct's security blob, but it was later relocated to the cred
  struct when the cred work happened, as it made the most sense at the
  time.

  Unfortunately we never did the task_security_struct to
  cred_security_struct rename work (avoid code churn maybe? who knows)
  because it didn't really matter at the time. However, it suddenly
  became a problem when we added a per-task cache to a per-cred object
  and didn't notice because of the old, no-longer-correct struct naming.

  Thanks to KCSAN for flagging this, as the silly humans running things
  forgot that the task_security_struct was a big lie.

  This contains three patches, only one of which actually fixes the
  problem described above and moves the SELinux decision cache from the
  per-cred struct to a newly (re)created per-task struct.

  The other two patches, which form the bulk of the diffstat, take care
  of the associated renaming tasks so we can hopefully avoid making the
  same stupid mistake in the future.

  For the record, I did contemplate sending just a fix for the cache,
  leaving the renaming patches for the upcoming merge window, but the
  type/variable naming ended up being pretty awful and would have made
  v6.18 an outlier stuck between the "old" names and the "new" names in
  v6.19. The renaming patches are also fairly mechanical/trivial and
  shouldn't pose much risk despite their size.

  TLDR; naming things may be hard, but if you mess it up bad things
  happen"

* tag 'selinux-pr-20251121' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux:
  selinux: rename the cred_security_struct variables to "crsec"
  selinux: move avdcache to per-task security struct
  selinux: rename task_security_struct to cred_security_struct
2025-11-22 09:24:36 -08:00
Linus Torvalds
2eba5e05d9 Merge tag 'loongarch-fixes-6.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
Pull LoongArch fixes from Huacai Chen:
 "Use UAPI types in ptrace UAPI header to fix nolibc ptrace.

  Fix CPU name display, NUMA node parsing, kexec/kdump, PCI init and BPF
  trampoline"

* tag 'loongarch-fixes-6.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson:
  LoongArch: BPF: Disable trampoline for kernel module function trace
  LoongArch: Don't panic if no valid cache info for PCI
  LoongArch: Mask all interrupts during kexec/kdump
  LoongArch: Fix NUMA node parsing with numa_memblks
  LoongArch: Consolidate CPU names in /proc/cpuinfo
  LoongArch: Use UAPI types in ptrace UAPI header
2025-11-21 11:16:14 -08:00
Linus Torvalds
e3fe48f9bd Merge tag 'v6.18-rc6-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6
Pull smb client fixes from Steve French:

 - Fix potential memory leak in mount

 - Add some missing read tracepoints

 - Fix locking issue with directory leases

* tag 'v6.18-rc6-smb3-client-fixes' of git://git.samba.org/sfrench/cifs-2.6:
  cifs: Add the smb3_read_* tracepoints to SMB1
  cifs: fix memory leak in smb3_fs_context_parse_param error path
  smb: client: introduce close_cached_dir_locked()
2025-11-21 11:14:21 -08:00
Linus Torvalds
a07a003ce6 Merge tag 'io_uring-6.18-20251120' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring fix from Jens Axboe:
 "Just a single fix for a mixup of arguments for the skb_queue_splice()
  call, in the io_uring timestamp retrieval code"

* tag 'io_uring-6.18-20251120' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring/cmd_net: fix wrong argument types for skb_queue_splice()
2025-11-21 11:09:57 -08:00
Linus Torvalds
a4165ffc83 Merge tag 'block-6.18-20251120' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block fixes from Jens Axboe:
 "NVMe pull request via Keith:

   - Admin queue use-after-free fix (Keith)

   - Target authentication fix (Alistar)

   - Multipath lockdeup fix (Shin'ichiro)

   - FC transport teardown fixes (Ewan)"

* tag 'block-6.18-20251120' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  nvme: nvme-fc: Ensure ->ioerr_work is cancelled in nvme_fc_delete_ctrl()
  nvme: nvme-fc: move tagset removal to nvme_fc_delete_ctrl()
  nvme-multipath: fix lockdep WARN due to partition scan work
  nvmet-auth: update sc_c in target host hash calculation
  nvme: fix admin request_queue lifetime
2025-11-21 10:59:35 -08:00
Linus Torvalds
317c4d8a2a Merge tag 'ata-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux
Pull ata fixes from Niklas Cassel:

 - Add a missing refcount decrement in ata_scsi_dev_rescan() when
   the device or its queue is not running.

   In the case where the device is running, the recount is already
   decremented properly (Yihang Li)

 - Generate the proper sense code for a Security locked device.

   There was a regression caused by a recent change of how sense
   data is generated for commands that did not provide any sense
   data. This broke system suspend for Security locked devices.

   Generate the sense data that the SCSI disk driver expects for a
   Security locked device so that system suspend works again (me)

 - Set capacity to zero for a Security locked device.

   All I/O commands will be aborted by a Security locked device.
   Thus, the block layer disk partition scanning will result in
   a bunch of, for the user, confusing I/O errors in dmesg during
   boot.

   Since a Security locked device is unusable anyway, set the capacity
   to zero, to avoid the disk partition scanning during boot. We still
   create the block device in /dev such that the user may unlock the
   device using e.g. hdparm (me)

* tag 'ata-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/libata/linux:
  ata: libata-core: Set capacity to zero for a security locked drive
  ata: libata-scsi: Fix system suspend for a security locked drive
  ata: libata-scsi: Add missing scsi_device_put() in ata_scsi_dev_rescan()
2025-11-21 10:53:23 -08:00
Linus Torvalds
68d804c64a Merge tag 'pinctrl-v6.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl
Pull pin control fixes from Linus Walleij:

 - Fix register naming in the Mediatek mt8189 driver

 - Select REGMAP_MMIO for the Realtek RTD driver

 - Fix the number of items in groups in the Toshiba Visconti driver

 - Fix a memory leak in the Cirrus CS42L43 driver

 - Fix a deadlock (!) in Qualcomm pinmux configuration

 - Fix use of uninitialized memory and list initialization in the S32CC
   pin controller

* tag 'pinctrl-v6.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl:
  dt-bindings: pinctrl: xlnx,versal-pinctrl: Add missing unevaluatedProperties on '^conf' nodes
  pinctrl: s32cc: initialize gpio_pin_config::list after kmalloc()
  pinctrl: s32cc: fix uninitialized memory in s32_pinctrl_desc
  pinctrl: qcom: msm: Fix deadlock in pinmux configuration
  pinctrl: cirrus: Fix fwnode leak in cs42l43_pin_probe()
  dt-bindings: pinctrl: toshiba,visconti: Fix number of items in groups
  pinctrl: realtek: Select REGMAP_MMIO for RTD driver
  pinctrl: mediatek: mt8189: align register base names to dt-bindings ones
  pinctrl: mediatek: mt8196: align register base names to dt-bindings ones
2025-11-21 10:47:24 -08:00
Linus Torvalds
2c26574cc4 Merge tag 'gpio-fixes-for-v6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux
Pull gpio fixes from Bartosz Golaszewski:

 - fix a use-after-free bug in GPIO character device code

 - update MAINTAINERS

* tag 'gpio-fixes-for-v6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux:
  MAINTAINERS: update my email address
  gpio: cdev: make sure the cdev fd is still active before emitting events
2025-11-21 10:43:58 -08:00
Eric Biggers
141fbbecec lib/crypto: tests: Fix KMSAN warning in test_sha256_finup_2x()
Fully initialize *ctx, including the buf field which sha256_init()
doesn't initialize, to avoid a KMSAN warning when comparing *ctx to
orig_ctx.  This KMSAN warning slipped in while KMSAN was not working
reliably due to a stackdepot bug, which has now been fixed.

Fixes: 6733968be7 ("lib/crypto: tests: Add tests and benchmark for sha256_finup_2x()")
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20251121033431.34406-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
2025-11-21 10:22:24 -08:00
Linus Torvalds
c6d732c3bd Merge tag 'drm-fixes-2025-11-21' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie:
 "A range of small fixes across the board, the i915 display
  disambiguation is probably the biggest otherwise amdgpu and xe as
  usual with tegra, nouveau, radeon and a core atomic fix.

  Looks mostly normal.

  atomic:
   - Return error codes on failed blob creation for planes

  nouveau:
   - Fix memory leak

  tegra:
   - Fix device ref counting
   - Fix pid ref counting
   - Revert booting on Pixel C

  xe:
   - Fix out-of-bounds access with BIT()
   - Fix kunit test checking wrong condition
   - Drop duplicate kconfig select
   - Fix guc2host irq handler with MSI-X

  i915:
   - Wildcat Lake and Panther Lake detangled for display fixes

  amdgpu:
   - DTBCLK gating fix
   - EDID fetching retry improvements
   - HDMI HPD debounce filtering
   - DCN 2.0 cursor fix
   - DP MST PBN fix
   - VPE fix
   - GC 11 fix
   - PRT fix
   - MMIO remap page fix
   - SR-IOV fix

  radeon:
   - Fence deadlock fix"

* tag 'drm-fixes-2025-11-21' of https://gitlab.freedesktop.org/drm/kernel: (25 commits)
  drm/amdgpu: Add sriov vf check for VCN per queue reset support.
  drm/amdgpu/ttm: Fix crash when handling MMIO_REMAP in PDE flags
  drm/amdgpu/vm: Check PRT uAPI flag instead of PTE flag
  drm/amdgpu: Skip emit de meta data on gfx11 with rs64 enabled
  drm/amd: Skip power ungate during suspend for VPE
  drm/plane: Fix create_in_format_blob() return value
  drm/xe/irq: Handle msix vector0 interrupt
  drm/xe: Remove duplicate DRM_EXEC selection from Kconfig
  drm/xe/kunit: Fix forcewake assertion in mocs test
  drm/xe: Prevent BIT() overflow when handling invalid prefetch region
  drm/radeon: delete radeon_fence_process in is_signaled, no deadlock
  drm/amd/display: Fix pbn to kbps Conversion
  drm/amd/display: Clear the CUR_ENABLE register on DCN20 on DPP5
  drm/amd/display: Add an HPD filter for HDMI
  drm/amd/display: Increase DPCD read retries
  drm/amd/display: Move sleep into each retry for retrieve_link_cap()
  drm/amd/display: Prevent Gating DTBCLK before It Is Properly Latched
  drm/i915/xe3: Restrict PTL intel_encoder_is_c10phy() to only PHY A
  drm/i915/display: Add definition for wcl as subplatform
  drm/pcids: Split PTL pciids group to make wcl subplatform
  ...
2025-11-21 09:55:55 -08:00
Linus Torvalds
a48f822908 samples: work around glibc redefining some of our defines wrong
Apparently as of version 2.42, glibc headers define AT_RENAME_NOREPLACE
and some of the other flags for renameat2() and friends in <stdio.h>.

Which would all be fine, except for inexplicable reasons glibc decided
to define them _differently_ from the kernel definitions, which then
makes some of our sample code that includes both kernel headers and user
space headers unhappy, because the compiler will (correctly) complain
about redefining things.

Now, mixing kernel headers and user space headers is always a somewhat
iffy proposition due to namespacing issues, but it's kind of inevitable
in our sample and selftest code.  And this is just glibc being stupid.

Those defines come from the kernel, glibc is exposing the kernel
interfaces, and glibc shouldn't make up some random new expressions for
these values.

It's not like glibc headers changed the actual result values, but they
arbitrarily just decided to use a different expression to describe those
values.  The kernel just does

    #define AT_RENAME_NOREPLACE  0x0001

while glibc does

    # define RENAME_NOREPLACE (1 << 0)
    # define AT_RENAME_NOREPLACE RENAME_NOREPLACE

instead.  Same value in the end, but very different macro definition.

For absolutely no reason.

This has since been fixed in the glibc development tree, so eventually
we'll end up with the canonical expressions and no clashes.  But in the
meantime the broken headers are in the glibc-2.42 release and have made
it out into distributions.

Do a minimal work-around to make the samples build cleanly by just
undefining the affected macros in between the user space header include
and the kernel header includes.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2025-11-21 09:29:02 -08:00
Thomas Bogendoerfer
14b46ba92b MIPS: kernel: Fix random segmentation faults
Commit 69896119dc ("MIPS: vdso: Switch to generic storage
implementation") switches to a generic vdso storage, which increases
the number of data pages from 1 to 4. But there is only one page
reserved, which causes segementation faults depending where the VDSO
area is randomized to. To fix this use the same size of reservation
and allocation of the VDSO data pages.

Fixes: 69896119dc ("MIPS: vdso: Switch to generic storage implementation")
Reviewed-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de>
Reviewed-by: Huacai Chen <chenhuacai@loongson.cn>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
2025-11-21 13:24:05 +01:00
Maciej W. Rozycki
9f048fa487 MIPS: mm: Prevent a TLB shutdown on initial uniquification
Depending on the particular CPU implementation a TLB shutdown may occur
if multiple matching entries are detected upon the execution of a TLBP
or the TLBWI/TLBWR instructions.  Given that we don't know what entries
we have been handed we need to be very careful with the initial TLB
setup and avoid all these instructions.

Therefore read all the TLB entries one by one with the TLBR instruction,
bypassing the content addressing logic, and truncate any large pages in
place so as to avoid a case in the second step where an incoming entry
for a large page at a lower address overlaps with a replacement entry
chosen at another index.  Then preinitialize the TLB using addresses
outside our usual unique range and avoiding clashes with any entries
received, before making the usual call to local_flush_tlb_all().

This fixes (at least) R4x00 cores if TLBP hits multiple matching TLB
entries (SGI IP22 PROM for examples sets up all TLBs to the same virtual
address).

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Fixes: 35ad7e1815 ("MIPS: mm: tlb-r4k: Uniquify TLB entries on init")
Cc: stable@vger.kernel.org
Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Tested-by: Jiaxun Yang <jiaxun.yang@flygoat.com> # Boston I6400, M5150 sim
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
2025-11-21 13:23:21 +01:00
Dave Airlie
40b53db87c Merge tag 'drm-xe-fixes-2025-11-21' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Driver Changes:
 - Fix out-of-bounds access with BIT() (Shuicheng Lin)
 - Fix kunit test checking wrong condition (Matt Roper)
 - Drop duplicate kconfig select (Shuicheng Lin)
 - Fix guc2host irq handler with MSI-X (Venkata Ramana Nayana)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patch.msgid.link/uadbrmftcud3wg32c6tje7mmfcr7wgmpnkzxwubk6fletahje2@coek2ciunkvz
2025-11-21 18:33:07 +10:00
Dave Airlie
4e9fd472d1 Merge tag 'amd-drm-fixes-6.18-2025-11-20' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-6.18-2025-11-20:

amdgpu:
- DTBCLK gating fix
- EDID fetching retry improvements
- HDMI HPD debounce filtering
- DCN 2.0 cursor fix
- DP MST PBN fix
- VPE fix
- GC 11 fix
- PRT fix
- MMIO remap page fix
- SR-IOV fix

radeon:
- Fence deadlock fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20251120164110.1077973-1-alexander.deucher@amd.com
2025-11-21 18:20:10 +10:00
Dave Airlie
201a32e61b Merge tag 'drm-misc-fixes-2025-11-20' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-fixes
Short summary of fixes pull:

atomic:
- Return error codes on failed blob creation for planes

nouveau:
- Fix memory leak

tegra:
- Fix device ref counting
- Fix pid ref counting
- Revert booting on Pixel C

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patch.msgid.link/20251120151308.GA589436@linux.fritz.box
2025-11-21 17:51:19 +10:00
Dave Airlie
9b571b3231 Merge tag 'drm-intel-fixes-2025-11-20' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-fixes
- Wildcat Lake and Panther Lake detangled for display fixes (Dnyaneshwar)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/aR8jByCwjIThpnpk@intel.com
2025-11-21 17:12:20 +10:00
Paul Moore
3ded250b97 selinux: rename the cred_security_struct variables to "crsec"
Along with the renaming from task_security_struct to cred_security_struct,
rename the local variables to "crsec" from "tsec".  This both fits with
existing conventions and helps distinguish between task and cred related
variables.

No functional changes.

Acked-by: Stephen Smalley <stephen.smalley.work@gmail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2025-11-20 16:47:50 -05:00
Stephen Smalley
dde3a5d0f4 selinux: move avdcache to per-task security struct
The avdcache is meant to be per-task; move it to a new
task_security_struct that is duplicated per-task.

Cc: stable@vger.kernel.org
Fixes: 5d7ddc59b3 ("selinux: reduce path walk overhead")
Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com>
[PM: line length fixes]
Signed-off-by: Paul Moore <paul@paul-moore.com>
2025-11-20 16:43:51 -05:00
Stephen Smalley
75f72fe289 selinux: rename task_security_struct to cred_security_struct
Before Linux had cred structures, the SELinux task_security_struct was
per-task and although the structure was switched to being per-cred
long ago, the name was never updated. This change renames it to
cred_security_struct to avoid confusion and pave the way for the
introduction of an actual per-task security structure for SELinux. No
functional change.

Cc: stable@vger.kernel.org
Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
2025-11-20 16:43:50 -05:00
Linus Torvalds
fd95357fd8 Merge tag 'sched_ext-for-6.18-rc6-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fix from Tejun Heo:
 "One low risk and obvious fix: scx_enable() was dereferencing an error
  pointer on helper kthread creation failure. Fixed"

* tag 'sched_ext-for-6.18-rc6-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: Fix scx_enable() crash on helper kthread creation failure
2025-11-20 11:04:37 -08:00
Linus Torvalds
c966813ea1 Merge tag 'slab-for-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab fix from Vlastimil Babka:

 - Fix mempool poisoning order>0 pages with CONFIG_HIGHMEM (Vlastimil Babka)

* tag 'slab-for-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  mm/mempool: fix poisoning order>0 pages with HIGHMEM
2025-11-20 10:49:12 -08:00
Saket Kumar Bhaskar
7b6216baae sched_ext: Fix scx_enable() crash on helper kthread creation failure
A crash was observed when the sched_ext selftests runner was
terminated with Ctrl+\ while test 15 was running:

NIP [c00000000028fa58] scx_enable.constprop.0+0x358/0x12b0
LR [c00000000028fa2c] scx_enable.constprop.0+0x32c/0x12b0
Call Trace:
scx_enable.constprop.0+0x32c/0x12b0 (unreliable)
bpf_struct_ops_link_create+0x18c/0x22c
__sys_bpf+0x23f8/0x3044
sys_bpf+0x2c/0x6c
system_call_exception+0x124/0x320
system_call_vectored_common+0x15c/0x2ec

kthread_run_worker() returns an ERR_PTR() on failure rather than NULL,
but the current code in scx_alloc_and_add_sched() only checks for a NULL
helper. Incase of failure on SIGQUIT, the error is not handled in
scx_alloc_and_add_sched() and scx_enable() ends up dereferencing an
error pointer.

Error handling is fixed in scx_alloc_and_add_sched() to propagate
PTR_ERR() into ret, so that scx_enable() jumps to the existing error
path, avoiding random dereference on failure.

Fixes: bff3b5aec1 ("sched_ext: Move disable machinery into scx_sched")
Cc: stable@vger.kernel.org # v6.16+
Reported-and-tested-by: Samir Mulani <samir@linux.ibm.com>
Signed-off-by: Saket Kumar Bhaskar <skb99@linux.ibm.com>
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Reviewed-by: Andrea Righi <arighi@nvidia.com>
Reviewed-by: Vishal Chourasia <vishalc@linux.ibm.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2025-11-20 08:45:43 -10:00
Jens Axboe
46447367a5 io_uring/cmd_net: fix wrong argument types for skb_queue_splice()
If timestamp retriving needs to be retried and the local list of
SKB's already has entries, then it's spliced back into the socket
queue. However, the arguments for the splice helper are transposed,
causing exactly the wrong direction of splicing into the on-stack
list. Fix that up.

Cc: stable@vger.kernel.org
Reported-by: Google Big Sleep <big-sleep-vuln-reports+bigsleep-462435176@google.com>
Fixes: 9e4ed359b8 ("io_uring/netcmd: add tx timestamping cmd support")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2025-11-20 11:40:15 -07:00
Linus Torvalds
07e09c3233 Merge tag 'pm-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management fix from Rafael Wysocki:
 "Fix a regression introduced during the 6.16 development cycle that may
  cause runtime PM to be enabled by mistake for devices that do not
  support it (which may lead to some serious trouble) if there is a
  system wakeup event during the "late suspend" phase of system suspend"

* tag 'pm-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  PM: sleep: core: Fix runtime PM enabling in device_resume_early()
2025-11-20 09:46:52 -08:00
Linus Torvalds
1753d40dce Merge tag 'acpi-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull ACPI fix from Rafael Wysocki:
 "This fixes EINJV2 support introduced during the 6.17 cycle by
  unbreaking the initialization broken by a previous attempted fix,
  adding sanity checks for data coming from the platform firmware, and
  updating the code to handle injecting legacy error types on an EINJV2
  capable systems properly (Tony Luck)"

* tag 'acpi-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  ACPI: APEI: EINJ: Fix EINJV2 initialization and injection
2025-11-20 09:44:27 -08:00
Linus Torvalds
6ba3bb3348 Merge tag 'platform-drivers-x86-v6.18-4' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86
Pull x86 platform driver fixes from Ilpo Järvinen:
 "This one has lots of new HW entries which adds to the size in diffstat
  but the individual changes are simple.

  Fixes

   - acer-wmi: Ignore backlight event

   - alienware-wmi-wmax: Fix quirk match table order & drop redundant
     entries

   - amd/pmc:
      - Add Xbox Ally to spurious 8042 quirk list
      - Quirk list Lenovo Legion Go 2 NVMe resume

   - msi-wmi-platform:
      - Correct GUID to uppercase
      - GUID is uncleverly copy-pasted from an example so add a DMI
        whitelist

   - intel/speed_select_if: PCIBIOS_* return code conversion

   - intel-uncore-freq & ISST: Fix kernel doc warnings

  New HW support

   - alienware-wmi-wmax:
      - Alienware 16 Aurora support
      - Alienware M support
      - Alienware X support
      - Dell G support

   - amd/pmc:
      - ROG Xbox Ally (non-X) support

   - huaway-wmi: HONOR MagicBoox X16/X14 PrintScreen & YOYO keys

   - hp-wmi:
      - Omen 16-wf1xxx fan support
      - Omen MAX 16-ah0xx fan + thermal profile support
      - Victus 16-r0 and 16-s0 fan + thermal profile support

   - intel/hid: Intel Nova Lake support

   - intel-uncore-freq:
      - Intel Panther Lake support
      - Intel Wildcat Lake support
      - Intel Nova Lake support"

* tag 'platform-drivers-x86-v6.18-4' of git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86: (21 commits)
  platform/x86: intel-uncore-freq: fix all header kernel-doc warnings
  platform/x86: acer-wmi: Ignore backlight event
  platform/x86/intel/speed_select_if: Convert PCIBIOS_* return codes to errnos
  platform/x86/intel/hid: Add Nova Lake support
  platform/x86: alienware-wmi-wmax: Add AWCC support to Alienware 16 Aurora
  platform/x86: hp-wmi: Add Omen MAX 16-ah0xx fan support and thermal profile
  platform/x86: msi-wmi-platform: Fix typo in WMI GUID
  platform/x86: msi-wmi-platform: Only load on MSI devices
  platform/x86/amd: pmc: Add Lenovo Legion Go 2 to pmc quirk list
  platform/x86/amd/pmc: Add spurious_8042 to Xbox Ally
  platform/x86/amd/pmc: Add support for Van Gogh SoC
  platform/x86: alienware-wmi-wmax: Add support for the whole "G" family
  platform/x86: alienware-wmi-wmax: Add support for the whole "X" family
  platform/x86: alienware-wmi-wmax: Add support for the whole "M" family
  platform/x86: alienware-wmi-wmax: Drop redundant DMI entries
  platform/x86: alienware-wmi-wmax: Fix "Alienware m16 R1 AMD" quirk order
  platform/x86: ISST: isst_if.h: fix all kernel-doc warnings
  platform/x86: intel-uncore-freq: Add additional client processors
  platform/x86: hp-wmi: Add Omen 16-wf1xxx fan support
  platform/x86: huawei-wmi: add keys for HONOR models
  ...
2025-11-20 09:39:34 -08:00
Linus Torvalds
8e621c9a33 Merge tag 'net-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski:
 "Including fixes from IPsec and wireless.

  Previous releases - regressions:

   - prevent NULL deref in generic_hwtstamp_ioctl_lower(),
     newer APIs don't populate all the pointers in the request

   - phylink: add missing supported link modes for the fixed-link

   - mptcp: fix false positive warning in mptcp_pm_nl_rm_addr

  Previous releases - always broken:

   - openvswitch: remove never-working support for setting NSH fields

   - xfrm: number of fixes for error paths of xfrm_state creation/
     modification/deletion

   - xfrm: fixes for offload
      - fix the determination of the protocol of the inner packet
      - don't push locally generated packets directly to L2 tunnel
        mode offloading, they still need processing from the standard
        xfrm path

   - mptcp: fix a couple of corner cases in fallback and fastclose
     handling

   - wifi: rtw89: hw_scan: prevent connections from getting stuck,
     work around apparent bug in FW by tweaking messages we send

   - af_unix: fix duplicate data if PEEK w/ peek_offset needs to wait

   - veth: more robust handing of race to avoid txq getting stuck

   - eth: ps3_gelic_net: handle skb allocation failures"

* tag 'net-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (47 commits)
  vsock: Ignore signal/timeout on connect() if already established
  be2net: pass wrb_params in case of OS2BMC
  l2tp: reset skb control buffer on xmit
  net: dsa: microchip: lan937x: Fix RGMII delay tuning
  selftests: mptcp: add a check for 'add_addr_accepted'
  mptcp: fix address removal logic in mptcp_pm_nl_rm_addr
  selftests: mptcp: join: userspace: longer timeout
  selftests: mptcp: join: endpoints: longer timeout
  selftests: mptcp: join: fastclose: remove flaky marks
  mptcp: fix duplicate reset on fastclose
  mptcp: decouple mptcp fastclose from tcp close
  mptcp: do not fallback when OoO is present
  mptcp: fix premature close in case of fallback
  mptcp: avoid unneeded subflow-level drops
  mptcp: fix ack generation for fallback msk
  wifi: rtw89: hw_scan: Don't let the operating channel be last
  net: phylink: add missing supported link modes for the fixed-link
  selftest: af_unix: Add test for SO_PEEK_OFF.
  af_unix: Read sk_peek_offset() again after sleeping in unix_stream_read_generic().
  net/mlx5: Clean up only new IRQ glue on request_irq() failure
  ...
2025-11-20 08:52:07 -08:00
Malaya Kumar Rout
7b5ab04f03 timekeeping: Fix resource leak in tk_aux_sysfs_init() error paths
tk_aux_sysfs_init() returns immediately on error during the auxiliary clock
initialization loop without cleaning up previously allocated kobjects and
sysfs groups.

If kobject_create_and_add() or sysfs_create_group() fails during loop
iteration, the parent kobjects (tko and auxo) and any previously created
child kobjects are leaked.

Fix this by adding proper error handling with goto labels to ensure all
allocated resources are cleaned up on failure. kobject_put() on the
parent kobjects will handle cleanup of their children.

Fixes: 7b95663a3d ("timekeeping: Provide interface to control auxiliary clocks")
Signed-off-by: Malaya Kumar Rout <mrout@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://patch.msgid.link/20251120150213.246777-1-mrout@redhat.com
2025-11-20 16:40:48 +01:00
Michal Luczaj
002541ef65 vsock: Ignore signal/timeout on connect() if already established
During connect(), acting on a signal/timeout by disconnecting an already
established socket leads to several issues:

1. connect() invoking vsock_transport_cancel_pkt() ->
   virtio_transport_purge_skbs() may race with sendmsg() invoking
   virtio_transport_get_credit(). This results in a permanently elevated
   `vvs->bytes_unsent`. Which, in turn, confuses the SOCK_LINGER handling.

2. connect() resetting a connected socket's state may race with socket
   being placed in a sockmap. A disconnected socket remaining in a sockmap
   breaks sockmap's assumptions. And gives rise to WARNs.

3. connect() transitioning SS_CONNECTED -> SS_UNCONNECTED allows for a
   transport change/drop after TCP_ESTABLISHED. Which poses a problem for
   any simultaneous sendmsg() or connect() and may result in a
   use-after-free/null-ptr-deref.

Do not disconnect socket on signal/timeout. Keep the logic for unconnected
sockets: they don't linger, can't be placed in a sockmap, are rejected by
sendmsg().

[1]: https://lore.kernel.org/netdev/e07fd95c-9a38-4eea-9638-133e38c2ec9b@rbox.co/
[2]: https://lore.kernel.org/netdev/20250317-vsock-trans-signal-race-v4-0-fc8837f3f1d4@rbox.co/
[3]: https://lore.kernel.org/netdev/60f1b7db-3099-4f6a-875e-af9f6ef194f6@rbox.co/

Fixes: d021c34405 ("VSOCK: Introduce VM Sockets")
Signed-off-by: Michal Luczaj <mhal@rbox.co>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://patch.msgid.link/20251119-vsock-interrupted-connect-v2-1-70734cf1233f@rbox.co
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-20 07:40:06 -08:00
Andrey Vatoropin
7d277a7a58 be2net: pass wrb_params in case of OS2BMC
be_insert_vlan_in_pkt() is called with the wrb_params argument being NULL
at be_send_pkt_to_bmc() call site.  This may lead to dereferencing a NULL
pointer when processing a workaround for specific packet, as commit
bc0c3405ab ("be2net: fix a Tx stall bug caused by a specific ipv6
packet") states.

The correct way would be to pass the wrb_params from be_xmit().

Fixes: 760c295e0e ("be2net: Support for OS2BMC.")
Cc: stable@vger.kernel.org
Signed-off-by: Andrey Vatoropin <a.vatoropin@crpt.ru>
Link: https://patch.msgid.link/20251119105015.194501-1-a.vatoropin@crpt.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-20 07:39:54 -08:00
Jens Axboe
49c2d5941c Merge tag 'nvme-6.18-2025-11-20' of git://git.infradead.org/nvme into block-6.18
Pull NVMe fixes from Keith:

"nvme fixes for Linux 6.18

 - Admin queue use-after-free fix (Keith)
 - Target authentication fix (Alistar)
 - Multipath lockdeup fix (Shin'ichiro)
 - FC transport teardown fixes (Ewan)"

* tag 'nvme-6.18-2025-11-20' of git://git.infradead.org/nvme:
  nvme: nvme-fc: Ensure ->ioerr_work is cancelled in nvme_fc_delete_ctrl()
  nvme: nvme-fc: move tagset removal to nvme_fc_delete_ctrl()
  nvme-multipath: fix lockdep WARN due to partition scan work
  nvmet-auth: update sc_c in target host hash calculation
  nvme: fix admin request_queue lifetime
2025-11-20 08:39:17 -07:00
Niklas Cassel
91842ed844 ata: libata-core: Set capacity to zero for a security locked drive
For Security locked drives (drives that have Security enabled, and have
not been Security unlocked by boot firmware), the automatic partition
scanning will result in the user being spammed with errors such as:

  ata5.00: failed command: READ DMA
  ata5.00: cmd c8/00:08:00:00:00/00:00:00:00:00/e0 tag 7 dma 4096 in
           res 51/04:08:00:00:00/00:00:00:00:00/e0 Emask 0x1 (device error)
  ata5.00: status: { DRDY ERR }
  ata5.00: error: { ABRT }
  sd 4:0:0:0: [sda] tag#7 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s
  sd 4:0:0:0: [sda] tag#7 Sense Key : Aborted Command [current]
  sd 4:0:0:0: [sda] tag#7 Add. Sense: No additional sense information

during boot, because most commands except for IDENTIFY will be aborted by
a Security locked drive.

For a Security locked drive, set capacity to zero, so that no automatic
partition scanning will happen.

If the user later unlocks the drive using e.g. hdparm, the close() by the
user space application should trigger a revalidation of the drive.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
2025-11-20 13:37:18 +01:00
Niklas Cassel
b118906833 ata: libata-scsi: Fix system suspend for a security locked drive
Commit cf3fc03762 ("ata: libata-scsi: Fix ata_to_sense_error() status
handling") fixed ata_to_sense_error() to properly generate sense key
ABORTED COMMAND (without any additional sense code), instead of the
previous bogus sense key ILLEGAL REQUEST with the additional sense code
UNALIGNED WRITE COMMAND, for a failed command.

However, this broke suspend for Security locked drives (drives that have
Security enabled, and have not been Security unlocked by boot firmware).

The reason for this is that the SCSI disk driver, for the Synchronize
Cache command only, treats any sense data with sense key ILLEGAL REQUEST
as a successful command (regardless of ASC / ASCQ).

After commit cf3fc03762 ("ata: libata-scsi: Fix ata_to_sense_error()
status handling") the code that treats any sense data with sense key
ILLEGAL REQUEST as a successful command is no longer applicable, so the
command fails, which causes the system suspend to be aborted:

  sd 1:0:0:0: PM: dpm_run_callback(): scsi_bus_suspend returns -5
  sd 1:0:0:0: PM: failed to suspend async: error -5
  PM: Some devices failed to suspend, or early wake event detected

To make suspend work once again, for a Security locked device only,
return sense data LOGICAL UNIT ACCESS NOT AUTHORIZED, the actual sense
data which a real SCSI device would have returned if locked.
The SCSI disk driver treats this sense data as a successful command.

Cc: stable@vger.kernel.org
Reported-by: Ilia Baryshnikov <qwelias@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220704
Fixes: cf3fc03762 ("ata: libata-scsi: Fix ata_to_sense_error() status handling")
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
2025-11-20 13:37:17 +01:00
Yihang Li
b32cc17d60 ata: libata-scsi: Add missing scsi_device_put() in ata_scsi_dev_rescan()
Call scsi_device_put() in ata_scsi_dev_rescan() if the device or its
queue are not running.

Fixes: 0c76106cb9 ("scsi: sd: Fix TCG OPAL unlock on system resume")
Cc: stable@vger.kernel.org
Signed-off-by: Yihang Li <liyihang9@h-partners.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
2025-11-20 13:12:26 +01:00
Paolo Abeni
dc9e7e652f Merge tag 'wireless-2025-11-20' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless
Johannes Berg says:

====================
wireless-2025-11-20

A single fix for scanning on some rtw89 devices.

* tag 'wireless-2025-11-20' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless:
  wifi: rtw89: hw_scan: Don't let the operating channel be last
====================

Link: https://patch.msgid.link/20251120085433.8601-3-johannes@sipsolutions.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-20 13:03:43 +01:00
David Bauer
d70b592551 l2tp: reset skb control buffer on xmit
The L2TP stack did not reset the skb control buffer before sending the
encapsulated package.

In a setup with an ath10k radio and batman-adv over an L2TP tunnel
massive fragmentations happen sporadically if the L2TP tunnel is
established over IPv4.

L2TP might reset some of the fields in the IP control buffer, but L2TP
assumes the type of the control buffer to be of an IPv4 packet.

In case the L2TP interface is used as a batadv hardif or the packet is
an IPv6 packet, this assumption breaks.

Clear the entire control buffer to avoid such mishaps altogether.

Fixes: f77ae93904 ("[PPPOL2TP]: Reset meta-data in xmit function")
Signed-off-by: David Bauer <mail@david-bauer.net>
Link: https://patch.msgid.link/20251118001619.242107-1-mail@david-bauer.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-20 11:52:24 +01:00
Oleksij Rempel
3ceb6ac211 net: dsa: microchip: lan937x: Fix RGMII delay tuning
Correct RGMII delay application logic in lan937x_set_tune_adj().

The function was missing `data16 &= ~PORT_TUNE_ADJ` before setting the
new delay value. This caused the new value to be bitwise-OR'd with the
existing PORT_TUNE_ADJ field instead of replacing it.

For example, when setting the RGMII 2 TX delay on port 4, the
intended TUNE_ADJUST value of 0 (RGMII_2_TX_DELAY_2NS) was
incorrectly OR'd with the default 0x1B (from register value 0xDA3),
leaving the delay at the wrong setting.

This patch adds the missing mask to clear the field, ensuring the
correct delay value is written. Physical measurements on the RGMII TX
lines confirm the fix, showing the delay changing from ~1ns (before
change) to ~2ns.

While testing on i.MX 8MP showed this was within the platform's timing
tolerance, it did not match the intended hardware-characterized value.

Fixes: b19ac41faa ("net: dsa: microchip: apply rgmii tx and rx delay in phylink mac config")
Cc: stable@vger.kernel.org
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://patch.msgid.link/20251114090951.4057261-1-o.rempel@pengutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-20 11:26:14 +01:00
Darrick J. Wong
678e1cc2f4 xfs: fix out of bounds memory read error in symlink repair
xfs/286 produced this report on my test fleet:

 ==================================================================
 BUG: KFENCE: out-of-bounds read in memcpy_orig+0x54/0x110

 Out-of-bounds read at 0xffff88843fe9e038 (184B right of kfence-#184):
  memcpy_orig+0x54/0x110
  xrep_symlink_salvage_inline+0xb3/0xf0 [xfs]
  xrep_symlink_salvage+0x100/0x110 [xfs]
  xrep_symlink+0x2e/0x80 [xfs]
  xrep_attempt+0x61/0x1f0 [xfs]
  xfs_scrub_metadata+0x34f/0x5c0 [xfs]
  xfs_ioc_scrubv_metadata+0x387/0x560 [xfs]
  xfs_file_ioctl+0xe23/0x10e0 [xfs]
  __x64_sys_ioctl+0x76/0xc0
  do_syscall_64+0x4e/0x1e0
  entry_SYSCALL_64_after_hwframe+0x4b/0x53

 kfence-#184: 0xffff88843fe9df80-0xffff88843fe9dfea, size=107, cache=kmalloc-128

 allocated by task 3470 on cpu 1 at 263329.131592s (192823.508886s ago):
  xfs_init_local_fork+0x79/0xe0 [xfs]
  xfs_iformat_local+0xa4/0x170 [xfs]
  xfs_iformat_data_fork+0x148/0x180 [xfs]
  xfs_inode_from_disk+0x2cd/0x480 [xfs]
  xfs_iget+0x450/0xd60 [xfs]
  xfs_bulkstat_one_int+0x6b/0x510 [xfs]
  xfs_bulkstat_iwalk+0x1e/0x30 [xfs]
  xfs_iwalk_ag_recs+0xdf/0x150 [xfs]
  xfs_iwalk_run_callbacks+0xb9/0x190 [xfs]
  xfs_iwalk_ag+0x1dc/0x2f0 [xfs]
  xfs_iwalk_args.constprop.0+0x6a/0x120 [xfs]
  xfs_iwalk+0xa4/0xd0 [xfs]
  xfs_bulkstat+0xfa/0x170 [xfs]
  xfs_ioc_fsbulkstat.isra.0+0x13a/0x230 [xfs]
  xfs_file_ioctl+0xbf2/0x10e0 [xfs]
  __x64_sys_ioctl+0x76/0xc0
  do_syscall_64+0x4e/0x1e0
  entry_SYSCALL_64_after_hwframe+0x4b/0x53

 CPU: 1 UID: 0 PID: 1300113 Comm: xfs_scrub Not tainted 6.18.0-rc4-djwx #rc4 PREEMPT(lazy)  3d744dd94e92690f00a04398d2bd8631dcef1954
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-4.module+el8.8.0+21164+ed375313 04/01/2014
 ==================================================================

On further analysis, I realized that the second parameter to min() is
not correct.  xfs_ifork::if_bytes is the size of the xfs_ifork::if_data
buffer.  if_bytes can be smaller than the data fork size because:

(a) the forkoff code tries to keep the data area as large as possible
(b) for symbolic links, if_bytes is the ondisk file size + 1
(c) forkoff is always a multiple of 8.

Case in point: for a single-byte symlink target, forkoff will be
8 but the buffer will only be 2 bytes long.

In other words, the logic here is wrong and we walk off the end of the
incore buffer.  Fix that.

Cc: stable@vger.kernel.org # v6.10
Fixes: 2651923d8d ("xfs: online repair of symbolic links")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
2025-11-20 11:06:24 +01:00
Dapeng Mi
f1f96511b1 perf: Fix 0 count issue of cpu-clock
Currently cpu-clock event always returns 0 count, e.g.,

perf stat -e cpu-clock -- sleep 1

 Performance counter stats for 'sleep 1':
                 0      cpu-clock                        #    0.000 CPUs utilized
       1.002308394 seconds time elapsed

The root cause is the commit 'bc4394e5e79c ("perf: Fix the throttle
 error of some clock events")' adds PERF_EF_UPDATE flag check before
calling cpu_clock_event_update() to update the count, however the
PERF_EF_UPDATE flag is never set when the cpu-clock event is stopped in
counting mode (pmu->dev() -> cpu_clock_event_del() ->
cpu_clock_event_stop()). This leads to the cpu-clock event count is
never updated.

To fix this issue, force to set PERF_EF_UPDATE flag for cpu-clock event
just like what task-clock does.

Fixes: bc4394e5e7 ("perf: Fix the throttle error of some clock events")
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://patch.msgid.link/20251112080526.3971392-1-dapeng1.mi@linux.intel.com
2025-11-20 10:42:12 +01:00
David Howells
d5227c8817 cifs: Add the smb3_read_* tracepoints to SMB1
Add the smb3_read_* tracepoints to SMB1's cifs_async_readv() and
cifs_readv_callback().

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.org>
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
2025-11-20 03:12:05 -06:00
Shaurya Rane
7e4d9120cf cifs: fix memory leak in smb3_fs_context_parse_param error path
Add proper cleanup of ctx->source and fc->source to the
cifs_parse_mount_err error handler. This ensures that memory allocated
for the source strings is correctly freed on all error paths, matching
the cleanup already performed in the success path by
smb3_cleanup_fs_context_contents().
Pointers are also set to NULL after freeing to prevent potential
double-free issues.

This change fixes a memory leak originally detected by syzbot. The
leak occurred when processing Opt_source mount options if an error
happened after ctx->source and fc->source were successfully
allocated but before the function completed.

The specific leak sequence was:
1. ctx->source = smb3_fs_context_fullpath(ctx, '/') allocates memory
2. fc->source = kstrdup(ctx->source, GFP_KERNEL) allocates more memory
3. A subsequent error jumps to cifs_parse_mount_err
4. The old error handler freed passwords but not the source strings,
causing the memory to leak.

This issue was not addressed by commit e8c73eb7db ("cifs: client:
fix memory leak in smb3_fs_context_parse_param"), which only fixed
leaks from repeated fsconfig() calls but not this error path.

Patch updated with minor change suggested by kernel test robot

Reported-by: syzbot+87be6809ed9bf6d718e3@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=87be6809ed9bf6d718e3
Fixes: 24e0a1eff9 ("cifs: switch to new mount api")
Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Shaurya Rane <ssrane_b23@ee.vjti.ac.in>
Signed-off-by: Steve French <stfrench@microsoft.com>
2025-11-20 03:06:25 -06:00
Henrique Carvalho
a9d1f38df7 smb: client: introduce close_cached_dir_locked()
Replace close_cached_dir() calls under cfid_list_lock with a new
close_cached_dir_locked() variant that uses kref_put() instead of
kref_put_lock() to avoid recursive locking when dropping references.

While the existing code works if the refcount >= 2 invariant holds,
this area has proven error-prone. Make deadlocks impossible and WARN
on invariant violations.

Cc: stable@vger.kernel.org
Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Henrique Carvalho <henrique.carvalho@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2025-11-20 03:03:30 -06:00
Johannes Berg
0ff8eeafba Merge tag 'rtw-2025-11-20' of https://github.com/pkshih/rtw
Ping-Ke Shih says:
==================
rtw patches for v6.18-rc7

Fix firmware goes wrong and causes device unusable after scanning. This
issue presents under certain regulatory domain reported from end users.
==================

Link: https://patch.msgid.link/8217bee0-96c4-44c1-9593-2e9ca12eccc5@RTKEXHMBS03.realtek.com.tw
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2025-11-20 09:44:08 +01:00
Vincent Li
677e6123e3 LoongArch: BPF: Disable trampoline for kernel module function trace
The current LoongArch BPF trampoline implementation is incompatible
with tracing functions in kernel modules. This causes several severe
and user-visible problems:

* The `bpf_selftests/module_attach` test fails consistently.
* Kernel lockup when a BPF program is attached to a module function [1].
* Critical kernel modules like WireGuard experience traffic disruption
  when their functions are traced with fentry [2].

Given the severity and the potential for other unknown side-effects, it
is safest to disable the feature entirely for now. This patch prevents
the BPF subsystem from allowing trampoline attachments to kernel module
functions on LoongArch.

This is a temporary mitigation until the core issues in the trampoline
code for kernel module handling can be identified and fixed.

[root@fedora bpf]# ./test_progs -a module_attach -v
bpf_testmod.ko is already unloaded.
Loading bpf_testmod.ko...
Successfully loaded bpf_testmod.ko.
test_module_attach:PASS:skel_open 0 nsec
test_module_attach:PASS:set_attach_target 0 nsec
test_module_attach:PASS:set_attach_target_explicit 0 nsec
test_module_attach:PASS:skel_load 0 nsec
libbpf: prog 'handle_fentry': failed to attach: -ENOTSUPP
libbpf: prog 'handle_fentry': failed to auto-attach: -ENOTSUPP
test_module_attach:FAIL:skel_attach skeleton attach failed: -524
Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED
Successfully unloaded bpf_testmod.ko.

[1]: https://lore.kernel.org/loongarch/CAK3+h2wDmpC-hP4u4pJY8T-yfKyk4yRzpu2LMO+C13FMT58oqQ@mail.gmail.com/
[2]: https://lore.kernel.org/loongarch/CAK3+h2wYcpc+OwdLDUBvg2rF9rvvyc5amfHT-KcFaK93uoELPg@mail.gmail.com/

Cc: stable@vger.kernel.org
Fixes: f9b6b41f0c ("LoongArch: BPF: Add basic bpf trampoline support")
Acked-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Vincent Li <vincent.mc.li@gmail.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-20 14:42:05 +08:00
Huacai Chen
a6b533adfc LoongArch: Don't panic if no valid cache info for PCI
If there is no valid cache info detected (may happen in virtual machine)
for pci_dfl_cache_line_size, kernel shouldn't panic. Because in the PCI
core it will be evaluated to (L1_CACHE_BYTES >> 2).

Cc: <stable@vger.kernel.org>
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-20 14:42:05 +08:00
Huacai Chen
863a320dc6 LoongArch: Mask all interrupts during kexec/kdump
If the default state of the interrupt controllers in the first kernel
don't mask any interrupts, it may cause the second kernel to potentially
receive interrupts (which were previously allocated by the first kernel)
immediately after a CPU becomes online during its boot process. These
interrupts cannot be properly routed, leading to bad IRQ issues.

This patch calls machine_kexec_mask_interrupts() to mask all interrupts
during the kexec/kdump process.

Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-20 14:42:05 +08:00
Bibo Mao
acf5de1b23 LoongArch: Fix NUMA node parsing with numa_memblks
On physical machine, NUMA node id comes from high bit 44:48 of physical
address. However it is not true on virt machine. With general method, it
comes from ACPI SRAT table.

Here the common function numa_memblks_init() is used to parse NUMA node
information with numa_memblks.

Cc: <stable@vger.kernel.org>
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-20 14:42:05 +08:00
Huacai Chen
1c004609fd LoongArch: Consolidate CPU names in /proc/cpuinfo
Some processors have no IOCSR.VENDOR and IOCSR.CPUNAME, some processors
have these registers but there is no valid information.

Consolidate CPU names in /proc/cpuinfo:
1. Add "PRID" to display the PRID & Core-Name;
2. Let "Model Name" display "Unknown" if no valid name.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-20 14:42:05 +08:00
Thomas Weißschuh
20d7338f2d LoongArch: Use UAPI types in ptrace UAPI header
The kernel UAPI headers already contain fixed-width integer types, there
is no need to rely on the libc types. There may not be a libc available
or the libc may not provides the <stdint.h>, like for example on nolibc.

This also aligns the header with the rest of the LoongArch UAPI headers.

Fixes: 803b0fc5c3 ("LoongArch: Add process management")
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-20 14:42:05 +08:00
Jakub Kicinski
f170b1dc26 Merge branch '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue
Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2025-11-18 (idpf, ice)

This series contains updates to idpf and ice drivers.

Emil adds a check for NULL vport_config during removal to avoid NULL
pointer dereference in idpf.

Grzegorz fixes PTP teardown paths to account for some missed cleanups
for ice driver.

* '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue:
  ice: fix PTP cleanup on driver removal in error path
  idpf: fix possible vport_config NULL pointer deref in remove
====================

Link: https://patch.msgid.link/20251118235207.2165495-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:10:53 -08:00
Jakub Kicinski
4026310a04 Merge branch 'mptcp-misc-fixes-for-v6-18-rc7'
Matthieu Baerts says:

====================
mptcp: misc fixes for v6.18-rc7

Here are various unrelated fixes:

- Patch 1: Fix window space computation for fallback connections which
  can affect ACK generation. A fix for v5.11.

- Patch 2: Avoid unneeded subflow-level drops due to unsynced received
  window. A fix for v5.11.

- Patch 3: Avoid premature close for fallback connections with PREEMPT
  kernels. A fix for v5.12.

- Patch 4: Reset instead of fallback in case of data in the MPTCP
  out-of-order queue. A fix for v5.7.

- Patches 5-7: Avoid also sending "plain" TCP reset when closing with an
  MP_FASTCLOSE. A fix for v6.1.

- Patches 8-9: Longer timeout for background connections in MPTCP Join
  selftests. An additional fix for recent patches for v5.13/v6.1.

- Patches 10-11: Fix typo in a check introduce in a recent refactoring.
  A fix for v6.15.
====================

Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-0-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:19 -08:00
Gang Yan
0eee0fdf9b selftests: mptcp: add a check for 'add_addr_accepted'
The previous patch fixed an issue with the 'add_addr_accepted' counter.
This was not spot by the test suite.

Check this counter and 'add_addr_signal' in MPTCP Join 'delete re-add
signal' test. This should help spotting similar regressions later on.
These counters are crucial for ensuring the MPTCP path manager correctly
handles the subflow creation via 'ADD_ADDR'.

Signed-off-by: Gang Yan <yangang@kylinos.cn>
Reviewed-by: Geliang Tang <geliang@kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-11-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:16 -08:00
Gang Yan
92e239e36d mptcp: fix address removal logic in mptcp_pm_nl_rm_addr
Fix inverted WARN_ON_ONCE condition that prevented normal address
removal counter updates. The current code only executes decrement
logic when the counter is already 0 (abnormal state), while
normal removals (counter > 0) are ignored.

Signed-off-by: Gang Yan <yangang@kylinos.cn>
Fixes: 6361139185 ("mptcp: pm: remove '_nl' from mptcp_pm_nl_rm_addr_received")
Cc: stable@vger.kernel.org
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-10-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:15 -08:00
Matthieu Baerts (NGI0)
0e4ec14dc1 selftests: mptcp: join: userspace: longer timeout
In rare cases, when the test environment is very slow, some userspace
tests can fail because some expected events have not been seen.

Because the tests are expecting a long on-going connection, and they are
not waiting for the end of the transfer, it is fine to have a longer
timeout, and even go over the default one. This connection will be
killed at the end, after the verifications: increasing the timeout
doesn't change anything, apart from avoiding it to end before the end of
the verifications.

To play it safe, all userspace tests not waiting for the end of the
transfer are now having a longer timeout: 2 minutes.

The Fixes commit was making the connection longer, but still, the
default timeout would have stopped it after 1 minute, which might not be
enough in very slow environments.

Fixes: 290493078b ("selftests: mptcp: join: userspace: longer transfer")
Cc: stable@vger.kernel.org
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Reviewed-by: Geliang Tang <geliang@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-9-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:15 -08:00
Matthieu Baerts (NGI0)
fb13c6bb81 selftests: mptcp: join: endpoints: longer timeout
In rare cases, when the test environment is very slow, some endpoints
tests can fail because some expected events have not been seen.

Because the tests are expecting a long on-going connection, and they are
not waiting for the end of the transfer, it is fine to have a longer
timeout, and even go over the default one. This connection will be
killed at the end, after the verifications: increasing the timeout
doesn't change anything, apart from avoiding it to end before the end of
the verifications.

To play it safe, all endpoints tests not waiting for the end of the
transfer are now having a longer timeout: 2 minutes.

The Fixes commit was making the connection longer, but still, the
default timeout would have stopped it after 1 minute, which might not be
enough in very slow environments.

Fixes: 6457595db9 ("selftests: mptcp: join: endpoints: longer transfer")
Cc: stable@vger.kernel.org
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Reviewed-by: Geliang Tang <geliang@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-8-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:15 -08:00
Matthieu Baerts (NGI0)
efff6cd53a selftests: mptcp: join: fastclose: remove flaky marks
After recent fixes like the parent commit, and "selftests: mptcp:
connect: trunc: read all recv data", the two fastclose subtests no
longer look flaky any more.

It then feels fine to remove these flaky marks, to no longer ignore
these subtests in case of errors.

Reviewed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-7-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:15 -08:00
Paolo Abeni
ae15506024 mptcp: fix duplicate reset on fastclose
The CI reports sporadic failures of the fastclose self-tests. The root
cause is a duplicate reset, not carrying the relevant MPTCP option.
In the failing scenario the bad reset is received by the peer before
the fastclose one, preventing the reception of the latter.

Indeed there is window of opportunity at fastclose time for the
following race:

  mptcp_do_fastclose
    __mptcp_close_ssk
      __tcp_close()
        tcp_set_state() [1]
        tcp_send_active_reset() [2]

After [1] the stack will send reset to in-flight data reaching the now
closed port. Such reset may race with [2].

Address the issue explicitly sending a single reset on fastclose before
explicitly moving the subflow to close status.

Fixes: d21f834855 ("mptcp: use fastclose on more edge scenarios")
Cc: stable@vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/596
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Geliang Tang <geliang@kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-6-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:15 -08:00
Paolo Abeni
fff0c87996 mptcp: decouple mptcp fastclose from tcp close
With the current fastclose implementation, the mptcp_do_fastclose()
helper is in charge of two distinct actions: send the fastclose reset
and cleanup the subflows.

Formally decouple the two steps, ensuring that mptcp explicitly closes
all the subflows after the mentioned helper.

This will make the upcoming fix simpler, and allows dropping the 2nd
argument from mptcp_destroy_common(). The Fixes tag is then the same as
in the next commit to help with the backports.

Fixes: d21f834855 ("mptcp: use fastclose on more edge scenarios")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Geliang Tang <geliang@kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-5-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:15 -08:00
Paolo Abeni
1bba3f219c mptcp: do not fallback when OoO is present
In case of DSS corruption, the MPTCP protocol tries to avoid the subflow
reset if fallback is possible. Such corruptions happen in the receive
path; to ensure fallback is possible the stack additionally needs to
check for OoO data, otherwise the fallback will break the data stream.

Fixes: e32d262c89 ("mptcp: handle consistently DSS corruption")
Cc: stable@vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/598
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-4-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:14 -08:00
Paolo Abeni
17393fa7b7 mptcp: fix premature close in case of fallback
I'm observing very frequent self-tests failures in case of fallback when
running on a CONFIG_PREEMPT kernel.

The root cause is that subflow_sched_work_if_closed() closes any subflow
as soon as it is half-closed and has no incoming data pending.

That works well for regular subflows - MPTCP needs bi-directional
connectivity to operate on a given subflow - but for fallback socket is
race prone.

When TCP peer closes the connection before the MPTCP one,
subflow_sched_work_if_closed() will schedule the MPTCP worker to
gracefully close the subflow, and shortly after will do another schedule
to inject and process a dummy incoming DATA_FIN.

On CONFIG_PREEMPT kernel, the MPTCP worker can kick-in and close the
fallback subflow before subflow_sched_work_if_closed() is able to create
the dummy DATA_FIN, unexpectedly interrupting the transfer.

Address the issue explicitly avoiding closing fallback subflows on when
the peer is only half-closed.

Note that, when the subflow is able to create the DATA_FIN before the
worker invocation, the worker will change the msk state before trying to
close the subflow and will skip the latter operation as the msk will not
match anymore the precondition in __mptcp_close_subflow().

Fixes: f09b0ad55a ("mptcp: close subflow when receiving TCP+FIN")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-3-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:14 -08:00
Paolo Abeni
4f102d747c mptcp: avoid unneeded subflow-level drops
The rcv window is shared among all the subflows. Currently, MPTCP sync
the TCP-level rcv window with the MPTCP one at tcp_transmit_skb() time.

The above means that incoming data may sporadically observe outdated
TCP-level rcv window and being wrongly dropped by TCP.

Address the issue checking for the edge condition before queuing the
data at TCP level, and eventually syncing the rcv window as needed.

Note that the issue is actually present from the very first MPTCP
implementation, but backports older than the blamed commit below will
range from impossible to useless.

Before:

  $ nstat -n; sleep 1; nstat -z TcpExtBeyondWindow
  TcpExtBeyondWindow              14                 0.0

After:

  $ nstat -n; sleep 1; nstat -z TcpExtBeyondWindow
  TcpExtBeyondWindow              0                  0.0

Fixes: fa3fe2b150 ("mptcp: track window announced to peer")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-2-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:14 -08:00
Paolo Abeni
5e15395f6d mptcp: fix ack generation for fallback msk
mptcp_cleanup_rbuf() needs to know the last most recent, mptcp-level
rcv_wnd sent, and such information is tracked into the msk->old_wspace
field, updated at ack transmission time by mptcp_write_options().

Fallback socket do not add any mptcp options, such helper is never
invoked, and msk->old_wspace value remain stale. That in turn makes
ack generation at recvmsg() time quite random.

Address the issue ensuring mptcp_write_options() is invoked even for
fallback sockets, and just update the needed info in such a case.

The issue went unnoticed for a long time, as mptcp currently overshots
the fallback socket receive buffer autotune significantly. It is going
to change in the near future.

Fixes: e3859603ba ("mptcp: better msk receive window updates")
Cc: stable@vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/594
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Geliang Tang <geliang@kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-1-806d3781c95f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 20:07:14 -08:00
Bart Van Assche
90449f2d1e scsi: sg: Do not sleep in atomic context
sg_finish_rem_req() calls blk_rq_unmap_user(). The latter function may
sleep. Hence, call sg_finish_rem_req() with interrupts enabled instead
of disabled.

Reported-by: syzbot+c01f8e6e73f20459912e@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-scsi/691560c4.a70a0220.3124cb.001a.GAE@google.com/
Cc: Hannes Reinecke <hare@suse.de>
Cc: stable@vger.kernel.org
Fixes: 97d27b0dd0 ("scsi: sg: close race condition in sg_remove_sfp_usercontext()")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://patch.msgid.link/20251113181643.1108973-1-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2025-11-19 22:46:36 -05:00
Bitterblue Smith
e837b9091b wifi: rtw89: hw_scan: Don't let the operating channel be last
Scanning can be offloaded to the firmware. To that end, the driver
prepares a list of channels to scan, including periodic visits back to
the operating channel, and sends the list to the firmware.

When the channel list is too long to fit in a single H2C message, the
driver splits the list, sends the first part, and tells the firmware to
scan. When the scan is complete, the driver sends the next part of the
list and tells the firmware to scan.

When the last channel that fit in the H2C message is the operating
channel something seems to go wrong in the firmware. It will
acknowledge receiving the list of channels but apparently it will not
do anything more. The AP can't be pinged anymore. The driver still
receives beacons, though.

One way to avoid this is to split the list of channels before the
operating channel.

Affected devices:

* RTL8851BU with firmware 0.29.41.3
* RTL8832BU with firmware 0.29.29.8
* RTL8852BE with firmware 0.29.29.8

The commit 57a5fbe39a ("wifi: rtw89: refactor flow that hw scan handles channel list")
is found by git blame, but it is actually to refine the scan flow, but not
a culprit, so skip Fixes tag.

Reported-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
Closes: https://lore.kernel.org/linux-wireless/0abbda91-c5c2-4007-84c8-215679e652e1@gmail.com/
Cc: stable@vger.kernel.org # 6.16+
Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
Acked-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Link: https://patch.msgid.link/c1e61744-8db4-4646-867f-241b47d30386@gmail.com
2025-11-20 11:36:01 +08:00
Shikang Fan
c156c7f27e drm/amdgpu: Add sriov vf check for VCN per queue reset support.
Add SRIOV check when setting VCN ring's supported reset mask.

Signed-off-by: Shikang Fan <shikang.fan@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit ee9b603ad43f9870eb75184f9fb0a84f8c3cc852)
Cc: stable@vger.kernel.org
2025-11-19 18:08:37 -05:00
Srinivasan Shanmugam
21f46f5476 drm/amdgpu/ttm: Fix crash when handling MMIO_REMAP in PDE flags
The MMIO_REMAP BO is a special 4K IO page that does not have a ttm_tt
behind it. However, amdgpu_ttm_tt_pde_flags() was treating it like
normal TT/doorbell/preempt memory and unconditionally accessed
ttm->caching. For the MMIO_REMAP BO, ttm is NULL, so this leads to a
NULL pointer dereference when computing PDE flags.

Fix this by checking that ttm is non-NULL before reading ttm->caching.
This prevents the crash for MMIO_REMAP and also makes the code more
defensive if other BOs ever come through without a ttm_tt.

Fixes: fb5a52dbe9 ("drm/amdgpu: Implement TTM handling for MMIO_REMAP placement")
Suggested-by: Jesse Zhang <Jesse.Zhang@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Reviewed-by: Jesse Zhang <Jesse.Zhang@amd.com>
Tested-by: Jesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 0db94da5a0a1cacda080b9ec8425fcbe4babc141)
2025-11-19 18:08:37 -05:00
Timur Kristóf
a445923393 drm/amdgpu/vm: Check PRT uAPI flag instead of PTE flag
This fixes sparse mappings (aka. partially resident textures).

Check the correct flags.
Since a recent refactor, the code works with uAPI flags (for
mapping buffer objects), and not PTE (page table entry) flags.

Fixes: 6716a823d1 ("drm/amdgpu: rework how PTE flags are generated v3")
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 8feeab26c80635b802f72b3ed986c693ff8f3212)
2025-11-19 18:08:37 -05:00
Yifan Zha
80d8a9ad15 drm/amdgpu: Skip emit de meta data on gfx11 with rs64 enabled
[Why]
Accoreding to CP updated to RS64 on gfx11,
WRITE_DATA with PREEMPTION_META_MEMORY(dst_sel=8) is illegal for CP FW.
That packet is used for MCBP on F32 based system.
So it would lead to incorrect GRBM write and FW is not handling that
extra case correctly.

[How]
With gfx11 rs64 enabled, skip emit de meta data.

Signed-off-by: Yifan Zha <Yifan.Zha@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 8366cd442d226463e673bed5d199df916f4ecbcf)
Cc: stable@vger.kernel.org
2025-11-19 18:08:37 -05:00
Mario Limonciello
31ab31433c drm/amd: Skip power ungate during suspend for VPE
During the suspend sequence VPE is already going to be power gated
as part of vpe_suspend().  It's unnecessary to call during calls to
amdgpu_device_set_pg_state().

It actually can expose a race condition with the firmware if s0i3
sequence starts as well.  Drop these calls.

Cc: Peyton.Lee@amd.com
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 2a6c826cfeedd7714611ac115371a959ead55bda)
Cc: stable@vger.kernel.org
2025-11-19 18:08:36 -05:00
Wen Yang
807e0d187d tick/sched: Fix bogus condition in report_idle_softirq()
In commit 0345691b24 ("tick/rcu: Stop allowing RCU_SOFTIRQ in idle") the
new function report_idle_softirq() was created by breaking code out of the
existing can_stop_idle_tick() for kernels v5.18 and newer.

In doing so, the code essentially went from this form:

	if (A) {
		static int ratelimit;
		if (ratelimit < 10 && !C && A&D) {
                       pr_warn("NOHZ tick-stop error: ...");
		       ratelimit++;
		}
		return false;
	}

to a new function:

static bool report_idle_softirq(void)
{
       static int ratelimit;

       if (likely(!A))
               return false;

       if (ratelimit < 10)
               return false;
...
       pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
               pending);
       ratelimit++;

       return true;
}

commit a7e282c777 ("tick/rcu: Fix bogus ratelimit condition") realized
ratelimit was essentially set to zero instead of ten, and hence *no*
softirq pending messages would ever be issued, but "fixed" it as:

-       if (ratelimit < 10)
+       if (ratelimit >= 10)
                return false;

However, this fix introduced another issue:

When ratelimit is greater than or equal 10, even if A is true, it will
directly return false. While ratelimit in the original code was only used
to control printing and will not affect the return value.

Restore the original logic and restrict ratelimit to control the printk and
not the return value.

Fixes: 0345691b24 ("tick/rcu: Stop allowing RCU_SOFTIRQ in idle")
Fixes: a7e282c777 ("tick/rcu: Fix bogus ratelimit condition")
Signed-off-by: Wen Yang <wen.yang@linux.dev>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://patch.msgid.link/20251119174525.29470-1-wen.yang@linux.dev
2025-11-19 19:30:45 +01:00
Stephen Boyd
3c493b524f Merge tag 'sunxi-clk-fixes-for-6.18' of https://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux into clk-fixes
Pull Allwinner clk driver fixes from Chen-Yu Tsai:

Just a couple fixes for the A523 family. A couple clocks are marked as
critical, and the lower bound of the audio PLL was lowered to match
the datasheet.

* tag 'sunxi-clk-fixes-for-6.18' of https://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux:
  clk: sunxi-ng: sun55i-a523-ccu: Lower audio0 pll minimum rate
  clk: sunxi-ng: sun55i-a523-r-ccu: Mark bus-r-dma as critical
  clk: sunxi-ng: Mark A523 bus-r-cpucfg clock as critical
2025-11-19 10:59:21 -07:00
Ville Syrjälä
cead55e24c drm/plane: Fix create_in_format_blob() return value
create_in_format_blob() is either supposed to return a valid
pointer or an error, but never NULL. The caller will dereference
the blob when it is not an error, and thus will oops if NULL
returned. Return proper error values in the failure cases.

Cc: stable@vger.kernel.org
Cc: Arun R Murthy <arun.r.murthy@intel.com>
Fixes: 0d6dcd741c ("drm/plane: modify create_in_formats to acommodate async")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patch.msgid.link/20251112233030.24117-2-ville.syrjala@linux.intel.com
Reviewed-by: Arun R Murthy <arun.r.murthy@intel.com>
2025-11-19 19:37:04 +02:00
Linus Torvalds
23cb64fb76 Merge tag 'soc-fixes-6.18-3' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
Pull SoC fixes from Arnd Bergmann:
 "These are mainly devicetree fixes for the arm platforms from Rockchips
  NXP, ASpeed and Broadcom, addressing issues with accidental
  overclocking, pinctrl, network and dtc warnings.

  There are additional fixes for regressions with the i.MX reset and
  memory controller drivers as well as the Tegra memory controller
  driver.

  Minor updates to the MAINTAINERS file, tee documentation and
  defconfigs bring those up to date with recent changes elsewhere"

* tag 'soc-fixes-6.18-3' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc: (29 commits)
  MAINTAINERS: sync omap devicetree maintainers with omap platform
  MAINTAINERS: Update Krzysztof Kozlowski's email
  arm64: dts: rockchip: fix PCIe 3.3V regulator voltage on orangepi-5
  arm64: dts: rockchip: disable HS400 on RK3588 Tiger
  arm64: dts: rockchip: drop reset from rk3576 i2c9 node
  tee: <uapi/linux/tee.h: fix all kernel-doc issues
  arm64: dts: rockchip: Fix USB power enable pin for BTT CB2 and Pi2
  arm64: dts: broadcom: bcm2712: rpi-5: Add ethernet0 alias
  arm64: dts: broadcom: Assign clock rates in eth node for RPi5
  reset: imx8mp-audiomix: Fix bad mask values
  ARM: dts: BCM53573: Fix address of Luxul XAP-1440's Ethernet PHY
  arm64: defconfig: Fix V3D deferred probe timeout
  arm64: dts: rockchip: Fix vccio4-supply on rk3566-pinetab2
  arm64: dts: rockchip: include rk3399-base instead of rk3399 in rk3399-op1
  arm64: dts: imx8mp-kontron: Fix USB OTG role switching
  arm64: dts: imx95: Fix MSI mapping for PCIe endpoint nodes
  arm64: dts: imx8-ss-img: Avoid gpio0_mipi_csi GPIOs being deferred
  arm: imx_v6_v7_defconfig: enable ext4 directly
  memory: tegra210: Fix incorrect client ids
  arm64: dts: rockchip: Fix indentation on rk3399 haikou demo dtso
  ...
2025-11-19 09:36:04 -08:00
Linus Torvalds
fdeee1cc7f Merge tag 'pwm/for-6.18-rc7-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/ukleinek/linux
Pull pwm fix from Uwe Kleine-König:
 "Correct mismatched pwm chip info for adp5585.

  Luke Wang found a problem in the pwm-adp5585 driver about how register
  information is mapped to the different device variants. This
  effectively made the driver non-functional.

  That didn't pop up before because the driver change was developed as
  part of a bigger mfd series and the original author didn't retest PWM
  functionality after it was tested in an earlier revision but then
  reworked"

* tag 'pwm/for-6.18-rc7-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/ukleinek/linux:
  pwm: adp5585: Correct mismatched pwm chip info
2025-11-19 09:26:09 -08:00
Linus Torvalds
0678f56304 Merge tag 'hid-for-linus-2025111901' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid
Pull HID fixes from Jiri Kosina:

 - memory leak fixes in hid-uclogic, hid-ntrig and hid-playstation
   drivers (Abdun Nihaal, Masami Ichikawa)

 - regression fix for playback handling in hid-pidff (Tomasz Pakuła)

 - initialization fix for some amd_sfh platforms (Mario Limonciello)

 - a few assorted device-specific ID additions and quirks

* tag 'hid-for-linus-2025111901' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid:
  HID: uclogic: Fix potential memory leak in error path
  HID: playstation: Fix memory leak in dualshock4_get_calibration_data()
  HID: pidff: Fix needs_playback check
  HID: corsair-void: Use %pe for printing PTR_ERR
  HID: elecom: Add support for ELECOM M-XT3URBK (018F)
  HID: hid-input: Extend Elan ignore battery quirk to USB
  HID: hid-ntrig: Prevent memory leak in ntrig_report_version()
  HID: amd_sfh: Stop sensor before starting
  HID: apple: Add SONiX AK870 PRO to non_apple_keyboards quirk list
  HID: lenovo: fixup Lenovo Yoga Slim 7x Keyboard rdesc
  HID: quirks: work around VID/PID conflict for 0x4c4a/0x4155
2025-11-19 08:54:58 -08:00
Wei Fang
e31a11be41 net: phylink: add missing supported link modes for the fixed-link
Pause, Asym_Pause and Autoneg bits are not set when pl->supported is
initialized, so these link modes will not work for the fixed-link. This
leads to a TCP performance degradation issue observed on the i.MX943
platform.

The switch CPU port of i.MX943 is connected to an ENETC MAC, this link
is a fixed link and the link speed is 2.5Gbps. And one of the switch
user ports is the RGMII interface, and its link speed is 1Gbps. If the
flow-control of the fixed link is not enabled, we can easily observe
the iperf performance of TCP packets is very low. Because the inbound
rate on the CPU port is greater than the outbound rate on the user port,
the switch is prone to congestion, leading to the loss of some TCP
packets and requiring multiple retransmissions.

Solving this problem should be as simple as setting the Asym_Pause and
Pause bits. The reason why the Autoneg bit needs to be set, Russell
has gave a very good explanation in the thread [1], see below.

"As the advertising and lp_advertising bitmasks have to be non-empty,
and the swphy reports aneg capable, aneg complete, and AN enabled, then
for consistency with that state, Autoneg should be set. This is how it
was prior to the blamed commit."

Fixes: de7d3f87be ("net: phylink: Use phy_caps_lookup for fixed-link configuration")
Link: https://lore.kernel.org/aRjqLN8eQDIQfBjS@shell.armlinux.org.uk # [1]
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Link: https://patch.msgid.link/20251117102943.1862680-1-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-19 08:31:11 -08:00
Linus Torvalds
2df79e4d72 Merge tag 'fixes-2025-11-19' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock
Pull memblock fix from Mike Rapoport:
 "Fix memblock_estimated_nr_free_pages() for soft-reserved memory

  The "soft-reserved" memory regions (EFI_MEMORY_SP) are added to the
  memblock.reserved, but not to the memblock.memory. It causes
  memblock_estimated_nr_free_pages() to return a value smaller value
  than expected, or if it underflows, an extremely large value.

  Calculate the number of estimated free pages using
  memblock_reserved_kern_size() instead of memblock_reserved_size() to
  fix the issue"

* tag 'fixes-2025-11-19' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock:
  memblock: fix memblock_estimated_nr_free_pages() for soft-reserved memory
2025-11-19 08:27:05 -08:00
Thomas Zimmermann
d4f14fa5c6 Merge drm/drm-fixes into drm-misc-fixes
Backmerging to get fixes from v6.18-rc6.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
2025-11-19 15:26:25 +01:00
Rob Herring (Arm)
79afd3c5ed dt-bindings: pinctrl: xlnx,versal-pinctrl: Add missing unevaluatedProperties on '^conf' nodes
Add the missing unevaluatedProperties to disallow extra properties on
the '^conf' nodes.

Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
2025-11-19 15:21:44 +01:00
Tony Luck
d2932a59c2 ACPI: APEI: EINJ: Fix EINJV2 initialization and injection
ACPI 6.6 specification for EINJV2 appends an extra structure to
the end of the existing struct set_error_type_with_address.

Several issues showed up in testing.

 1) Initialization was broken by an earlier fix [1] since is_v2 is only
    set while performing an injection, not during initialization.

 2) A buggy BIOS provided invalid "revision" and "length" for the
    extension structure. Add several sanity checks.

 3) When injecting legacy error types on an EINJV2 capable system,
    don't copy the component arrays.

Fixes: 6c70585149 ("ACPI: APEI: EINJ: Check if user asked for EINJV2 injection") # [1]
Fixes: b47610296d ("ACPI: APEI: EINJ: Enable EINJv2 error injections")
Signed-off-by: Tony Luck <tony.luck@intel.com>
[ rjw: Changelog edits ]
Cc: 6.17+ <stable@vger.kernel.org> # 6.17+
Link: https://patch.msgid.link/20251119012712.178715-1-tony.luck@intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2025-11-19 13:36:29 +01:00
Bartosz Golaszewski
2b6d546ba8 MAINTAINERS: update my email address
Due to an upcoming change in my professional situation, I will need to
start using my kernel.org address. Update all my MAINTAINERS entries.

Signed-off-by: Bartosz Golaszewski <brgl@kernel.org>
Link: https://lore.kernel.org/r/20251118121816.23018-1-brgl@bgdev.pl
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
2025-11-19 12:02:59 +01:00
Bartosz Golaszewski
d4cd0902c1 gpio: cdev: make sure the cdev fd is still active before emitting events
With the final call to fput() on a file descriptor, the release action
may be deferred and scheduled on a work queue. The reference count of
that descriptor is still zero and it must not be used. It's possible
that a GPIO change, we want to notify the user-space about, happens
AFTER the reference count on the file descriptor associated with the
character device went down to zero but BEFORE the .release() callback
was called from the workqueue and so BEFORE we unregistered from the
notifier.

Using the regular get_file() routine in this situation triggers the
following warning:

  struct file::f_count incremented from zero; use-after-free condition present!

So use the get_file_active() variant that will return NULL on file
descriptors that have been or are being released.

Fixes: 40b7c49950 ("gpio: cdev: put emitting the line state events on a workqueue")
Reported-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
Closes: https://lore.kernel.org/all/5d605f7fc99456804911403102a4fe999a14cc85.camel@siemens.com/
Tested-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
Link: https://lore.kernel.org/r/20251117-gpio-cdev-get-file-v1-1-28a16b5985b8@linaro.org
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
2025-11-19 12:00:28 +01:00
Jiri Slaby (SUSE)
a24074ca88 perf/x86/intel/uncore: Remove superfluous check
The 'pmu' pointer cannot be NULL, as it is taken as a pointer to an array.
Remove the superfluous NULL check.

Found by Coverity: CID#1497507.

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Liang Kan <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://patch.msgid.link/20251119091538.825307-1-jirislaby@kernel.org
2025-11-19 10:26:31 +01:00
Jakub Kicinski
106a67494c Merge branch 'af_unix-fix-so_peek_off-bug-in-unix_stream_read_generic'
Kuniyuki Iwashima says:

====================
af_unix: Fix SO_PEEK_OFF bug in unix_stream_read_generic().

Miao Wang reported a bug of SO_PEEK_OFF on AF_UNIX SOCK_STREAM socket.

Patch 1 fixes the bug and Patch 2 adds a new selftest to cover the case.
====================

Link: https://patch.msgid.link/20251117174740.3684604-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-18 19:19:29 -08:00
Kuniyuki Iwashima
e1bb28bf13 selftest: af_unix: Add test for SO_PEEK_OFF.
The test covers various cases to verify SO_PEEK_OFF behaviour
for all AF_UNIX socket types.

two_chunks_blocking and two_chunks_overlap_blocking reproduce
the issue mentioned in the previous patch.

Without the patch, the two tests fail:

  #  RUN           so_peek_off.stream.two_chunks_blocking ...
  # so_peek_off.c:121:two_chunks_blocking:Expected 'bbbb' == 'aaaabbbb'.
  # two_chunks_blocking: Test terminated by assertion
  #          FAIL  so_peek_off.stream.two_chunks_blocking
  not ok 3 so_peek_off.stream.two_chunks_blocking

  #  RUN           so_peek_off.stream.two_chunks_overlap_blocking ...
  # so_peek_off.c:159:two_chunks_overlap_blocking:Expected 'bbbb' == 'aaaabbbb'.
  # two_chunks_overlap_blocking: Test terminated by assertion
  #          FAIL  so_peek_off.stream.two_chunks_overlap_blocking
  not ok 5 so_peek_off.stream.two_chunks_overlap_blocking

With the patch, all tests pass:

  # PASSED: 15 / 15 tests passed.
  # Totals: pass:15 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251117174740.3684604-3-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-18 19:19:09 -08:00
Kuniyuki Iwashima
7bf3a476ce af_unix: Read sk_peek_offset() again after sleeping in unix_stream_read_generic().
Miao Wang reported a bug of SO_PEEK_OFF on AF_UNIX SOCK_STREAM
socket.

The unexpected behaviour is triggered when the peek offset is
larger than the recv queue and the thread is unblocked by new
data.

Let's assume a socket which has "aaaa" in the recv queue and
the peek offset is 4.

First, unix_stream_read_generic() reads the offset 4 and skips
the skb(s) of "aaaa" with the code below:

	skip = max(sk_peek_offset(sk, flags), 0);	/* @skip is 4. */

	do {
	...
		while (skip >= unix_skb_len(skb)) {
			skip -= unix_skb_len(skb);
		...
			skb = skb_peek_next(skb, &sk->sk_receive_queue);
			if (!skb)
				goto again;		/* @skip is 0. */
		}

The thread jumps to the 'again' label and goes to sleep since
new data has not arrived yet.

Later, new data "bbbb" unblocks the thread, and the thread jumps
to the 'redo:' label to restart the entire process from the first
skb in the recv queue.

	do {
		...
redo:
		...
		last = skb = skb_peek(&sk->sk_receive_queue);
		...
again:
		if (skb == NULL) {
			...
			timeo = unix_stream_data_wait(sk, timeo, last,
						      last_len, freezable);
			...
			goto redo;			/* @skip is 0 !! */

However, the peek offset is not reset in the path.

If the buffer size is 8, recv() will return "aaaabbbb" without
skipping any data, and the final offset will be 12 (the original
offset 4 + peeked skbs' length 8).

After sleeping in unix_stream_read_generic(), we have to fetch the
peek offset again.

Let's move the redo label before mutex_lock(&u->iolock).

Fixes: 9f389e3567 ("af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag")
Reported-by: Miao Wang <shankerwangmiao@gmail.com>
Closes: https://lore.kernel.org/netdev/3B969F90-F51F-4B9D-AB1A-994D9A54D460@gmail.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251117174740.3684604-2-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-18 19:19:09 -08:00
Pradyumn Rahar
d47515af6c net/mlx5: Clean up only new IRQ glue on request_irq() failure
The mlx5_irq_alloc() function can inadvertently free the entire rmap
and end up in a crash[1] when the other threads tries to access this,
when request_irq() fails due to exhausted IRQ vectors. This commit
modifies the cleanup to remove only the specific IRQ mapping that was
just added.

This prevents removal of other valid mappings and ensures precise
cleanup of the failed IRQ allocation's associated glue object.

Note: This error is observed when both fwctl and rds configs are enabled.

[1]
mlx5_core 0000:05:00.0: Successfully registered panic handler for port 1
mlx5_core 0000:05:00.0: mlx5_irq_alloc:293:(pid 66740): Failed to
request irq. err = -28
infiniband mlx5_0: mlx5_ib_test_wc:290:(pid 66740): Error -28 while
trying to test write-combining support
mlx5_core 0000:05:00.0: Successfully unregistered panic handler for port 1
mlx5_core 0000:06:00.0: Successfully registered panic handler for port 1
mlx5_core 0000:06:00.0: mlx5_irq_alloc:293:(pid 66740): Failed to
request irq. err = -28
infiniband mlx5_0: mlx5_ib_test_wc:290:(pid 66740): Error -28 while
trying to test write-combining support
mlx5_core 0000:06:00.0: Successfully unregistered panic handler for port 1
mlx5_core 0000:03:00.0: mlx5_irq_alloc:293:(pid 28895): Failed to
request irq. err = -28
mlx5_core 0000:05:00.0: mlx5_irq_alloc:293:(pid 28895): Failed to
request irq. err = -28
general protection fault, probably for non-canonical address
0xe277a58fde16f291: 0000 [#1] SMP NOPTI

RIP: 0010:free_irq_cpu_rmap+0x23/0x7d
Call Trace:
   <TASK>
   ? show_trace_log_lvl+0x1d6/0x2f9
   ? show_trace_log_lvl+0x1d6/0x2f9
   ? mlx5_irq_alloc.cold+0x5d/0xf3 [mlx5_core]
   ? __die_body.cold+0x8/0xa
   ? die_addr+0x39/0x53
   ? exc_general_protection+0x1c4/0x3e9
   ? dev_vprintk_emit+0x5f/0x90
   ? asm_exc_general_protection+0x22/0x27
   ? free_irq_cpu_rmap+0x23/0x7d
   mlx5_irq_alloc.cold+0x5d/0xf3 [mlx5_core]
   irq_pool_request_vector+0x7d/0x90 [mlx5_core]
   mlx5_irq_request+0x2e/0xe0 [mlx5_core]
   mlx5_irq_request_vector+0xad/0xf7 [mlx5_core]
   comp_irq_request_pci+0x64/0xf0 [mlx5_core]
   create_comp_eq+0x71/0x385 [mlx5_core]
   ? mlx5e_open_xdpsq+0x11c/0x230 [mlx5_core]
   mlx5_comp_eqn_get+0x72/0x90 [mlx5_core]
   ? xas_load+0x8/0x91
   mlx5_comp_irqn_get+0x40/0x90 [mlx5_core]
   mlx5e_open_channel+0x7d/0x3c7 [mlx5_core]
   mlx5e_open_channels+0xad/0x250 [mlx5_core]
   mlx5e_open_locked+0x3e/0x110 [mlx5_core]
   mlx5e_open+0x23/0x70 [mlx5_core]
   __dev_open+0xf1/0x1a5
   __dev_change_flags+0x1e1/0x249
   dev_change_flags+0x21/0x5c
   do_setlink+0x28b/0xcc4
   ? __nla_parse+0x22/0x3d
   ? inet6_validate_link_af+0x6b/0x108
   ? cpumask_next+0x1f/0x35
   ? __snmp6_fill_stats64.constprop.0+0x66/0x107
   ? __nla_validate_parse+0x48/0x1e6
   __rtnl_newlink+0x5ff/0xa57
   ? kmem_cache_alloc_trace+0x164/0x2ce
   rtnl_newlink+0x44/0x6e
   rtnetlink_rcv_msg+0x2bb/0x362
   ? __netlink_sendskb+0x4c/0x6c
   ? netlink_unicast+0x28f/0x2ce
   ? rtnl_calcit.isra.0+0x150/0x146
   netlink_rcv_skb+0x5f/0x112
   netlink_unicast+0x213/0x2ce
   netlink_sendmsg+0x24f/0x4d9
   __sock_sendmsg+0x65/0x6a
   ____sys_sendmsg+0x28f/0x2c9
   ? import_iovec+0x17/0x2b
   ___sys_sendmsg+0x97/0xe0
   __sys_sendmsg+0x81/0xd8
   do_syscall_64+0x35/0x87
   entry_SYSCALL_64_after_hwframe+0x6e/0x0
RIP: 0033:0x7fc328603727
Code: c3 66 90 41 54 41 89 d4 55 48 89 f5 53 89 fb 48 83 ec 10 e8 0b ed
ff ff 44 89 e2 48 89 ee 89 df 41 89 c0 b8 2e 00 00 00 0f 05 <48> 3d 00
f0 ff ff 77 35 44 89 c7 48 89 44 24 08 e8 44 ed ff ff 48
RSP: 002b:00007ffe8eb3f1a0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007fc328603727
RDX: 0000000000000000 RSI: 00007ffe8eb3f1f0 RDI: 000000000000000d
RBP: 00007ffe8eb3f1f0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000000
R13: 0000000000000000 R14: 00007ffe8eb3f3c8 R15: 00007ffe8eb3f3bc
   </TASK>
---[ end trace f43ce73c3c2b13a2 ]---
RIP: 0010:free_irq_cpu_rmap+0x23/0x7d
Code: 0f 1f 80 00 00 00 00 48 85 ff 74 6b 55 48 89 fd 53 66 83 7f 06 00
74 24 31 db 48 8b 55 08 0f b7 c3 48 8b 04 c2 48 85 c0 74 09 <8b> 38 31
f6 e8 c4 0a b8 ff 83 c3 01 66 3b 5d 06 72 de b8 ff ff ff
RSP: 0018:ff384881640eaca0 EFLAGS: 00010282
RAX: e277a58fde16f291 RBX: 0000000000000000 RCX: 0000000000000000
RDX: ff2335e2e20b3600 RSI: 0000000000000000 RDI: ff2335e2e20b3400
RBP: ff2335e2e20b3400 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 00000000ffffffe4 R12: ff384881640ead88
R13: ff2335c3760751e0 R14: ff2335e2e1672200 R15: ff2335c3760751f8
FS:  00007fc32ac22480(0000) GS:ff2335e2d6e00000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f651ab54000 CR3: 00000029f1206003 CR4: 0000000000771ef0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
PKRU: 55555554
Kernel panic - not syncing: Fatal exception
Kernel Offset: 0x1dc00000 from 0xffffffff81000000 (relocation range:
0xffffffff80000000-0xffffffffbfffffff)
kvm-guest: disable async PF for cpu 0

Fixes: 3354822cde ("net/mlx5: Use dynamic msix vectors allocation")
Signed-off-by: Mohith Kumar Thummaluru<mohith.k.kumar.thummaluru@oracle.com>
Tested-by: Mohith Kumar Thummaluru<mohith.k.kumar.thummaluru@oracle.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drori <shayd@nvidia.com>
Signed-off-by: Pradyumn Rahar <pradyumn.rahar@oracle.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1763381768-1234998-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-18 18:47:09 -08:00
Eric Dumazet
426358d9be mptcp: fix a race in mptcp_pm_del_add_timer()
mptcp_pm_del_add_timer() can call sk_stop_timer_sync(sk, &entry->add_timer)
while another might have free entry already, as reported by syzbot.

Add RCU protection to fix this issue.

Also change confusing add_timer variable with stop_timer boolean.

syzbot report:

BUG: KASAN: slab-use-after-free in __timer_delete_sync+0x372/0x3f0 kernel/time/timer.c:1616
Read of size 4 at addr ffff8880311e4150 by task kworker/1:1/44

CPU: 1 UID: 0 PID: 44 Comm: kworker/1:1 Not tainted syzkaller #0 PREEMPT_{RT,(full)}
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025
Workqueue: events mptcp_worker
Call Trace:
 <TASK>
  dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
  print_address_description mm/kasan/report.c:378 [inline]
  print_report+0xca/0x240 mm/kasan/report.c:482
  kasan_report+0x118/0x150 mm/kasan/report.c:595
  __timer_delete_sync+0x372/0x3f0 kernel/time/timer.c:1616
  sk_stop_timer_sync+0x1b/0x90 net/core/sock.c:3631
  mptcp_pm_del_add_timer+0x283/0x310 net/mptcp/pm.c:362
  mptcp_incoming_options+0x1357/0x1f60 net/mptcp/options.c:1174
  tcp_data_queue+0xca/0x6450 net/ipv4/tcp_input.c:5361
  tcp_rcv_established+0x1335/0x2670 net/ipv4/tcp_input.c:6441
  tcp_v4_do_rcv+0x98b/0xbf0 net/ipv4/tcp_ipv4.c:1931
  tcp_v4_rcv+0x252a/0x2dc0 net/ipv4/tcp_ipv4.c:2374
  ip_protocol_deliver_rcu+0x221/0x440 net/ipv4/ip_input.c:205
  ip_local_deliver_finish+0x3bb/0x6f0 net/ipv4/ip_input.c:239
  NF_HOOK+0x30c/0x3a0 include/linux/netfilter.h:318
  NF_HOOK+0x30c/0x3a0 include/linux/netfilter.h:318
  __netif_receive_skb_one_core net/core/dev.c:6079 [inline]
  __netif_receive_skb+0x143/0x380 net/core/dev.c:6192
  process_backlog+0x31e/0x900 net/core/dev.c:6544
  __napi_poll+0xb6/0x540 net/core/dev.c:7594
  napi_poll net/core/dev.c:7657 [inline]
  net_rx_action+0x5f7/0xda0 net/core/dev.c:7784
  handle_softirqs+0x22f/0x710 kernel/softirq.c:622
  __do_softirq kernel/softirq.c:656 [inline]
  __local_bh_enable_ip+0x1a0/0x2e0 kernel/softirq.c:302
  mptcp_pm_send_ack net/mptcp/pm.c:210 [inline]
 mptcp_pm_addr_send_ack+0x41f/0x500 net/mptcp/pm.c:-1
  mptcp_pm_worker+0x174/0x320 net/mptcp/pm.c:1002
  mptcp_worker+0xd5/0x1170 net/mptcp/protocol.c:2762
  process_one_work kernel/workqueue.c:3263 [inline]
  process_scheduled_works+0xae1/0x17b0 kernel/workqueue.c:3346
  worker_thread+0x8a0/0xda0 kernel/workqueue.c:3427
  kthread+0x711/0x8a0 kernel/kthread.c:463
  ret_from_fork+0x4bc/0x870 arch/x86/kernel/process.c:158
  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
 </TASK>

Allocated by task 44:
  kasan_save_stack mm/kasan/common.c:56 [inline]
  kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
  poison_kmalloc_redzone mm/kasan/common.c:400 [inline]
  __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:417
  kasan_kmalloc include/linux/kasan.h:262 [inline]
  __kmalloc_cache_noprof+0x1ef/0x6c0 mm/slub.c:5748
  kmalloc_noprof include/linux/slab.h:957 [inline]
  mptcp_pm_alloc_anno_list+0x104/0x460 net/mptcp/pm.c:385
  mptcp_pm_create_subflow_or_signal_addr+0xf9d/0x1360 net/mptcp/pm_kernel.c:355
  mptcp_pm_nl_fully_established net/mptcp/pm_kernel.c:409 [inline]
  __mptcp_pm_kernel_worker+0x417/0x1ef0 net/mptcp/pm_kernel.c:1529
  mptcp_pm_worker+0x1ee/0x320 net/mptcp/pm.c:1008
  mptcp_worker+0xd5/0x1170 net/mptcp/protocol.c:2762
  process_one_work kernel/workqueue.c:3263 [inline]
  process_scheduled_works+0xae1/0x17b0 kernel/workqueue.c:3346
  worker_thread+0x8a0/0xda0 kernel/workqueue.c:3427
  kthread+0x711/0x8a0 kernel/kthread.c:463
  ret_from_fork+0x4bc/0x870 arch/x86/kernel/process.c:158
  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245

Freed by task 6630:
  kasan_save_stack mm/kasan/common.c:56 [inline]
  kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
  __kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:587
  kasan_save_free_info mm/kasan/kasan.h:406 [inline]
  poison_slab_object mm/kasan/common.c:252 [inline]
  __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:284
  kasan_slab_free include/linux/kasan.h:234 [inline]
  slab_free_hook mm/slub.c:2523 [inline]
  slab_free mm/slub.c:6611 [inline]
  kfree+0x197/0x950 mm/slub.c:6818
  mptcp_remove_anno_list_by_saddr+0x2d/0x40 net/mptcp/pm.c:158
  mptcp_pm_flush_addrs_and_subflows net/mptcp/pm_kernel.c:1209 [inline]
  mptcp_nl_flush_addrs_list net/mptcp/pm_kernel.c:1240 [inline]
  mptcp_pm_nl_flush_addrs_doit+0x593/0xbb0 net/mptcp/pm_kernel.c:1281
  genl_family_rcv_msg_doit+0x215/0x300 net/netlink/genetlink.c:1115
  genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline]
  genl_rcv_msg+0x60e/0x790 net/netlink/genetlink.c:1210
  netlink_rcv_skb+0x208/0x470 net/netlink/af_netlink.c:2552
  genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219
  netlink_unicast_kernel net/netlink/af_netlink.c:1320 [inline]
  netlink_unicast+0x846/0xa10 net/netlink/af_netlink.c:1346
  netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1896
  sock_sendmsg_nosec net/socket.c:727 [inline]
  __sock_sendmsg+0x21c/0x270 net/socket.c:742
  ____sys_sendmsg+0x508/0x820 net/socket.c:2630
  ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2684
  __sys_sendmsg net/socket.c:2716 [inline]
  __do_sys_sendmsg net/socket.c:2721 [inline]
  __se_sys_sendmsg net/socket.c:2719 [inline]
  __x64_sys_sendmsg+0x1a1/0x260 net/socket.c:2719
  do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
  do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Cc: stable@vger.kernel.org
Fixes: 00cfd77b90 ("mptcp: retransmit ADD_ADDR when timeout")
Reported-by: syzbot+2a6fbf0f0530375968df@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/691ad3c3.a70a0220.f6df1.0004.GAE@google.com
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Geliang Tang <geliang@kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251117100745.1913963-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-18 18:33:01 -08:00
Jakub Kicinski
c3995fc1a8 Merge tag 'ipsec-2025-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
Steffen Klassert says:

====================
pull request (net): ipsec 2025-11-18

1) Misc fixes for xfrm_state creation/modification/deletion.
   Patchset from Sabrina Dubroca.

2) Fix inner packet family determination for xfrm offloads.
   From Jianbo Liu.

3) Don't push locally generated packets directly to L2 tunnel
   mode offloading, they still need processing from the standard
   xfrm path. From Jianbo Liu.

4) Fix memory leaks in xfrm_add_acquire for policy offloads and policy
   security contexts. From Zilin Guan.

* tag 'ipsec-2025-11-18' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec:
  xfrm: fix memory leak in xfrm_add_acquire()
  xfrm: Prevent locally generated packets from direct output in tunnel mode
  xfrm: Determine inner GSO type from packet inner protocol
  xfrm: Check inner packet family directly from skb_dst
  xfrm: check all hash buckets for leftover states during netns deletion
  xfrm: set err and extack on failure to create pcpu SA
  xfrm: call xfrm_dev_state_delete when xfrm_state_migrate fails to add the state
  xfrm: make state as DEAD before final put when migrate fails
  xfrm: also call xfrm_state_delete_tunnel at destroy time for states that were never added
  xfrm: drop SA reference in xfrm_state_update if dir doesn't match
====================

Link: https://patch.msgid.link/20251118085344.2199815-1-steffen.klassert@secunet.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-18 17:58:44 -08:00
Shay Drory
f94c1a114a devlink: rate: Unset parent pointer in devl_rate_nodes_destroy
The function devl_rate_nodes_destroy is documented to "Unset parent for
all rate objects". However, it was only calling the driver-specific
`rate_leaf_parent_set` or `rate_node_parent_set` ops and decrementing
the parent's refcount, without actually setting the
`devlink_rate->parent` pointer to NULL.

This leaves a dangling pointer in the `devlink_rate` struct, which cause
refcount error in netdevsim[1] and mlx5[2]. In addition, this is
inconsistent with the behavior of `devlink_nl_rate_parent_node_set`,
where the parent pointer is correctly cleared.

This patch fixes the issue by explicitly setting `devlink_rate->parent`
to NULL after notifying the driver, thus fulfilling the function's
documented behavior for all rate objects.

[1]
repro steps:
echo 1 > /sys/bus/netdevsim/new_device
devlink dev eswitch set netdevsim/netdevsim1 mode switchdev
echo 1 > /sys/bus/netdevsim/devices/netdevsim1/sriov_numvfs
devlink port function rate add netdevsim/netdevsim1/test_node
devlink port function rate set netdevsim/netdevsim1/128 parent test_node
echo 1 > /sys/bus/netdevsim/del_device

dmesg:
refcount_t: decrement hit 0; leaking memory.
WARNING: CPU: 8 PID: 1530 at lib/refcount.c:31 refcount_warn_saturate+0x42/0xe0
CPU: 8 UID: 0 PID: 1530 Comm: bash Not tainted 6.18.0-rc4+ #1 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
RIP: 0010:refcount_warn_saturate+0x42/0xe0
Call Trace:
 <TASK>
 devl_rate_leaf_destroy+0x8d/0x90
 __nsim_dev_port_del+0x6c/0x70 [netdevsim]
 nsim_dev_reload_destroy+0x11c/0x140 [netdevsim]
 nsim_drv_remove+0x2b/0xb0 [netdevsim]
 device_release_driver_internal+0x194/0x1f0
 bus_remove_device+0xc6/0x130
 device_del+0x159/0x3c0
 device_unregister+0x1a/0x60
 del_device_store+0x111/0x170 [netdevsim]
 kernfs_fop_write_iter+0x12e/0x1e0
 vfs_write+0x215/0x3d0
 ksys_write+0x5f/0xd0
 do_syscall_64+0x55/0x10f0
 entry_SYSCALL_64_after_hwframe+0x4b/0x53

[2]
devlink dev eswitch set pci/0000:08:00.0 mode switchdev
devlink port add pci/0000:08:00.0 flavour pcisf pfnum 0 sfnum 1000
devlink port function rate add pci/0000:08:00.0/group1
devlink port function rate set pci/0000:08:00.0/32768 parent group1
modprobe -r mlx5_ib mlx5_fwctl mlx5_core

dmesg:
refcount_t: decrement hit 0; leaking memory.
WARNING: CPU: 7 PID: 16151 at lib/refcount.c:31 refcount_warn_saturate+0x42/0xe0
CPU: 7 UID: 0 PID: 16151 Comm: bash Not tainted 6.17.0-rc7_for_upstream_min_debug_2025_10_02_12_44 #1 NONE
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
RIP: 0010:refcount_warn_saturate+0x42/0xe0
Call Trace:
 <TASK>
 devl_rate_leaf_destroy+0x8d/0x90
 mlx5_esw_offloads_devlink_port_unregister+0x33/0x60 [mlx5_core]
 mlx5_esw_offloads_unload_rep+0x3f/0x50 [mlx5_core]
 mlx5_eswitch_unload_sf_vport+0x40/0x90 [mlx5_core]
 mlx5_sf_esw_event+0xc4/0x120 [mlx5_core]
 notifier_call_chain+0x33/0xa0
 blocking_notifier_call_chain+0x3b/0x50
 mlx5_eswitch_disable_locked+0x50/0x110 [mlx5_core]
 mlx5_eswitch_disable+0x63/0x90 [mlx5_core]
 mlx5_unload+0x1d/0x170 [mlx5_core]
 mlx5_uninit_one+0xa2/0x130 [mlx5_core]
 remove_one+0x78/0xd0 [mlx5_core]
 pci_device_remove+0x39/0xa0
 device_release_driver_internal+0x194/0x1f0
 unbind_store+0x99/0xa0
 kernfs_fop_write_iter+0x12e/0x1e0
 vfs_write+0x215/0x3d0
 ksys_write+0x5f/0xd0
 do_syscall_64+0x53/0x1f0
 entry_SYSCALL_64_after_hwframe+0x4b/0x53

Fixes: d755598450 ("devlink: Allow setting parent node of rate objects")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1763381149-1234377-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-18 17:12:21 -08:00
Jared Kangas
6010d4d8b5 pinctrl: s32cc: initialize gpio_pin_config::list after kmalloc()
s32_pmx_gpio_request_enable() does not initialize the newly-allocated
gpio_pin_config::list before adding it to s32_pinctrl::gpio_configs.
This could result in a linked list corruption.

Initialize the new list_head with INIT_LIST_HEAD() to fix this.

Fixes: fd84aaa817 ("pinctrl: add NXP S32 SoC family support")
Signed-off-by: Jared Kangas <jkangas@redhat.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
2025-11-18 23:56:11 +01:00
Jared Kangas
97ea34defb pinctrl: s32cc: fix uninitialized memory in s32_pinctrl_desc
s32_pinctrl_desc is allocated with devm_kmalloc(), but not all of its
fields are initialized. Notably, num_custom_params is used in
pinconf_generic_parse_dt_config(), resulting in intermittent allocation
errors, such as the following splat when probing i2c-imx:

        WARNING: CPU: 0 PID: 176 at mm/page_alloc.c:4795 __alloc_pages_noprof+0x290/0x300
        [...]
        Hardware name: NXP S32G3 Reference Design Board 3 (S32G-VNP-RDB3) (DT)
        [...]
        Call trace:
         __alloc_pages_noprof+0x290/0x300 (P)
         ___kmalloc_large_node+0x84/0x168
         __kmalloc_large_node_noprof+0x34/0x120
         __kmalloc_noprof+0x2ac/0x378
         pinconf_generic_parse_dt_config+0x68/0x1a0
         s32_dt_node_to_map+0x104/0x248
         dt_to_map_one_config+0x154/0x1d8
         pinctrl_dt_to_map+0x12c/0x280
         create_pinctrl+0x6c/0x270
         pinctrl_get+0xc0/0x170
         devm_pinctrl_get+0x50/0xa0
         pinctrl_bind_pins+0x60/0x2a0
         really_probe+0x60/0x3a0
        [...]
         __platform_driver_register+0x2c/0x40
         i2c_adap_imx_init+0x28/0xff8 [i2c_imx]
        [...]

This results in later parse failures that can cause issues in dependent
drivers:

        s32g-siul2-pinctrl 4009c240.pinctrl: /soc@0/pinctrl@4009c240/i2c0-pins/i2c0-grp0: could not parse node property
        s32g-siul2-pinctrl 4009c240.pinctrl: /soc@0/pinctrl@4009c240/i2c0-pins/i2c0-grp0: could not parse node property
        [...]
        pca953x 0-0022: failed writing register: -6
        i2c i2c-0: IMX I2C adapter registered
        s32g-siul2-pinctrl 4009c240.pinctrl: /soc@0/pinctrl@4009c240/i2c2-pins/i2c2-grp0: could not parse node property
        s32g-siul2-pinctrl 4009c240.pinctrl: /soc@0/pinctrl@4009c240/i2c2-pins/i2c2-grp0: could not parse node property
        i2c i2c-1: IMX I2C adapter registered
        s32g-siul2-pinctrl 4009c240.pinctrl: /soc@0/pinctrl@4009c240/i2c4-pins/i2c4-grp0: could not parse node property
        s32g-siul2-pinctrl 4009c240.pinctrl: /soc@0/pinctrl@4009c240/i2c4-pins/i2c4-grp0: could not parse node property
        i2c i2c-2: IMX I2C adapter registered

Fix this by initializing s32_pinctrl_desc with devm_kzalloc() instead of
devm_kmalloc() in s32_pinctrl_probe(), which sets the previously
uninitialized fields to zero.

Fixes: fd84aaa817 ("pinctrl: add NXP S32 SoC family support")
Signed-off-by: Jared Kangas <jkangas@redhat.com>
Tested-by: Jan Petrous (OSS) <jan.petrous@oss.nxp.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
2025-11-18 23:56:11 +01:00
Grzegorz Nitka
23a5b9b12d ice: fix PTP cleanup on driver removal in error path
Improve the cleanup on releasing PTP resources in error path.
The error case might happen either at the driver probe and PTP
feature initialization or on PTP restart (errors in reset handling, NVM
update etc). In both cases, calls to PF PTP cleanup (ice_ptp_cleanup_pf
function) and 'ps_lock' mutex deinitialization were missed.
Additionally, ptp clock was not unregistered in the latter case.

Keep PTP state as 'uninitialized' on init to distinguish between error
scenarios and to avoid resource release duplication at driver removal.

The consequence of missing ice_ptp_cleanup_pf call is the following call
trace dumped when ice_adapter object is freed (port list is not empty,
as it is required at this stage):

[  T93022] ------------[ cut here ]------------
[  T93022] WARNING: CPU: 10 PID: 93022 at
ice/ice_adapter.c:67 ice_adapter_put+0xef/0x100 [ice]
...
[  T93022] RIP: 0010:ice_adapter_put+0xef/0x100 [ice]
...
[  T93022] Call Trace:
[  T93022]  <TASK>
[  T93022]  ? ice_adapter_put+0xef/0x100 [ice
33d2647ad4f6d866d41eefff1806df37c68aef0c]
[  T93022]  ? __warn.cold+0xb0/0x10e
[  T93022]  ? ice_adapter_put+0xef/0x100 [ice
33d2647ad4f6d866d41eefff1806df37c68aef0c]
[  T93022]  ? report_bug+0xd8/0x150
[  T93022]  ? handle_bug+0xe9/0x110
[  T93022]  ? exc_invalid_op+0x17/0x70
[  T93022]  ? asm_exc_invalid_op+0x1a/0x20
[  T93022]  ? ice_adapter_put+0xef/0x100 [ice
33d2647ad4f6d866d41eefff1806df37c68aef0c]
[  T93022]  pci_device_remove+0x42/0xb0
[  T93022]  device_release_driver_internal+0x19f/0x200
[  T93022]  driver_detach+0x48/0x90
[  T93022]  bus_remove_driver+0x70/0xf0
[  T93022]  pci_unregister_driver+0x42/0xb0
[  T93022]  ice_module_exit+0x10/0xdb0 [ice
33d2647ad4f6d866d41eefff1806df37c68aef0c]
...
[  T93022] ---[ end trace 0000000000000000 ]---
[  T93022] ice: module unloaded

Fixes: e800654e85 ("ice: Use ice_adapter for PTP shared data instead of auxdev")
Signed-off-by: Grzegorz Nitka <grzegorz.nitka@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Tested-by: Rinitha S <sx.rinitha@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2025-11-18 14:30:11 -08:00
Andreas Kemnade
7a601324ac MAINTAINERS: sync omap devicetree maintainers with omap platform
Both used to go through Tony's branches, so lets keep things together.
This was missed at the time when Co-Maintainers were added.

Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
Acked-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Acked-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Acked-by: Kevin Hilman <khilman@baylibre.com>
Link: https://patch.msgid.link/20240915195321.1071967-1-andreas@kemnade.info
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Link: https://lore.kernel.org/r/20251118192652.316198-1-khilman@baylibre.com
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-18 22:46:13 +01:00
Arnd Bergmann
c10519c8c8 Merge tag 'v6.18-rockchip-dtsfixes1' of git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip into arm/fixes
Regulator/supply fixes for a number of boards, removed too fast
cpu OPPs from rk3576 (not supported in newer vendor TF-A and never
supported in upstream TF-A). As well as some DTS validation fixes
and one pinctrl fix for the odroid-m1.

* tag 'v6.18-rockchip-dtsfixes1' of git://git.kernel.org/pub/scm/linux/kernel/git/mmind/linux-rockchip:
  arm64: dts: rockchip: fix PCIe 3.3V regulator voltage on orangepi-5
  arm64: dts: rockchip: disable HS400 on RK3588 Tiger
  arm64: dts: rockchip: drop reset from rk3576 i2c9 node
  arm64: dts: rockchip: Fix USB power enable pin for BTT CB2 and Pi2
  arm64: dts: rockchip: Fix vccio4-supply on rk3566-pinetab2
  arm64: dts: rockchip: include rk3399-base instead of rk3399 in rk3399-op1
  arm64: dts: rockchip: Fix indentation on rk3399 haikou demo dtso
  arm64: dts: rockchip: Make RK3588 GPU OPP table naming less generic
  arm64: dts: rockchip: Drop 'rockchip,grf' prop from tsadc on rk3328
  arm64: dts: rockchip: Remove non-functioning CPU OPPs from RK3576
  arm64: dts: rockchip: Fix PCIe power enable pin for BigTreeTech CB2 and Pi2
  arm64: dts: rockchip: Set correct pinctrl for I2S1 8ch TX on odroid-m1

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-18 22:45:25 +01:00
Emil Tantilov
118082368c idpf: fix possible vport_config NULL pointer deref in remove
Attempting to remove the driver will cause a crash in cases where
the vport failed to initialize. Following trace is from an instance where
the driver failed during an attempt to create a VF:
[ 1661.543624] idpf 0000:84:00.7: Device HW Reset initiated
[ 1722.923726] idpf 0000:84:00.7: Transaction timed-out (op:1 cookie:2900 vc_op:1 salt:29 timeout:60000ms)
[ 1723.353263] BUG: kernel NULL pointer dereference, address: 0000000000000028
...
[ 1723.358472] RIP: 0010:idpf_remove+0x11c/0x200 [idpf]
...
[ 1723.364973] Call Trace:
[ 1723.365475]  <TASK>
[ 1723.365972]  pci_device_remove+0x42/0xb0
[ 1723.366481]  device_release_driver_internal+0x1a9/0x210
[ 1723.366987]  pci_stop_bus_device+0x6d/0x90
[ 1723.367488]  pci_stop_and_remove_bus_device+0x12/0x20
[ 1723.367971]  pci_iov_remove_virtfn+0xbd/0x120
[ 1723.368309]  sriov_disable+0x34/0xe0
[ 1723.368643]  idpf_sriov_configure+0x58/0x140 [idpf]
[ 1723.368982]  sriov_numvfs_store+0xda/0x1c0

Avoid the NULL pointer dereference by adding NULL pointer check for
vport_config[i], before freeing user_config.q_coalesce.

Fixes: e1e3fec3e3 ("idpf: preserve coalescing settings across resets")
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Reviewed-by: Chittim Madhu <madhu.chittim@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2025-11-18 13:22:11 -08:00
Venkata Ramana Nayana
5b38c22687 drm/xe/irq: Handle msix vector0 interrupt
Current gu2host handler registered as MSI-X vector 0 and as per bspec for
a msix vector 0 interrupt, the driver must check the legacy registers
190008(TILE_INT_REG), 190060h (GT INTR Identity Reg 0) and other registers
mentioned in "Interrupt Service Routine Pseudocode" otherwise it will block
the next interrupts. To overcome this issue replacing guc2host handler
with legacy xe_irq_handler.

Fixes: da889070be ("drm/xe/irq: Separate MSI and MSI-X flows")
Bspec: 62357
Signed-off-by: Venkata Ramana Nayana <venkata.ramana.nayana@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patch.msgid.link/20251107083141.2080189-1-venkata.ramana.nayana@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit c34a14bce7090862ebe5a64abe8d85df75e62737)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
2025-11-18 10:05:03 -08:00
Shuicheng Lin
27c0a54e48 drm/xe: Remove duplicate DRM_EXEC selection from Kconfig
There are 2 identical "select DRM_EXEC" lines for DRM_XE.
Remove one to clean up the configuration.

Fixes: d490ecf577 ("drm/xe: Rework xe_exec and the VM rebind worker to use the drm_exec helper")
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Reviewed-by: Nitin Gote <nitin.r.gote@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patch.msgid.link/20251110232657.1807998-2-shuicheng.lin@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
(cherry picked from commit b1aa02acd03bfef3ed39c511d33c4a4303d2f9b1)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
2025-11-18 10:04:53 -08:00
Matt Roper
905a3468ec drm/xe/kunit: Fix forcewake assertion in mocs test
The MOCS kunit test calls KUNIT_ASSERT_TRUE_MSG() with a condition of
'true;' this prevents the assertion from ever failing.  Replace
KUNIT_ASSERT_TRUE_MSG with KUNIT_FAIL_AND_ABORT to get the intended
failure behavior in cases where forcewake was not acquired successfully.

Fixes: 51c0ee84e4 ("drm/xe/tests/mocs: Hold XE_FORCEWAKE_ALL for LNCF regs")
Cc: Tejas Upadhyay <tejas.upadhyay@intel.com>
Cc: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Link: https://patch.msgid.link/20251113234038.2256106-2-matthew.d.roper@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit 9be4f0f687048ba77428ceca11994676736507b7)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
2025-11-18 10:04:48 -08:00
Shuicheng Lin
d52dea485c drm/xe: Prevent BIT() overflow when handling invalid prefetch region
If user provides a large value (such as 0x80) for parameter
prefetch_mem_region_instance in vm_bind ioctl, it will cause
BIT(prefetch_region) overflow as below:
"
 ------------[ cut here ]------------
 UBSAN: shift-out-of-bounds in drivers/gpu/drm/xe/xe_vm.c:3414:7
 shift exponent 128 is too large for 64-bit type 'long unsigned int'
 CPU: 8 UID: 0 PID: 53120 Comm: xe_exec_system_ Tainted: G        W           6.18.0-rc1-lgci-xe-kernel+ #200 PREEMPT(voluntary)
 Tainted: [W]=WARN
 Hardware name: ASUS System Product Name/PRIME Z790-P WIFI, BIOS 0812 02/24/2023
 Call Trace:
  <TASK>
  dump_stack_lvl+0xa0/0xc0
  dump_stack+0x10/0x20
  ubsan_epilogue+0x9/0x40
  __ubsan_handle_shift_out_of_bounds+0x10e/0x170
  ? mutex_unlock+0x12/0x20
  xe_vm_bind_ioctl.cold+0x20/0x3c [xe]
 ...
"
Fix it by validating prefetch_region before the BIT() usage.

v2: Add Closes and Cc stable kernels. (Matt)

Reported-by: Koen Koning <koen.koning@intel.com>
Reported-by: Peter Senna Tschudin <peter.senna@linux.intel.com>
Fixes: dd08ebf6c3 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6478
Cc: <stable@vger.kernel.org> # v6.8+
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Shuicheng Lin <shuicheng.lin@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patch.msgid.link/20251112181005.2120521-2-shuicheng.lin@intel.com
(cherry picked from commit 8f565bdd14eec5611cc041dba4650e42ccdf71d9)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
2025-11-18 10:04:41 -08:00
Linus Torvalds
8b690556d8 Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
 "Arm:

   - Only adjust the ID registers when no irqchip has been created once
     per VM run, instead of doing it once per vcpu, as this otherwise
     triggers a pretty bad conbsistency check failure in the sysreg code

   - Make sure the per-vcpu Fine Grain Traps are computed before we load
     the system registers on the HW, as we otherwise start running
     without anything set until the first preemption of the vcpu

  x86:

   - Fix selftests failure on AMD, checking for an optimization that was
     not happening anymore"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: SVM: Fix redundant updates of LBR MSR intercepts
  KVM: arm64: VHE: Compute fgt traps before activating them
  KVM: arm64: Finalize ID registers only once per VM
2025-11-18 10:02:22 -08:00
Robert McClinton
9eb00b5f56 drm/radeon: delete radeon_fence_process in is_signaled, no deadlock
Delete the attempt to progress the queue when checking if fence is
signaled. This avoids deadlock.

dma-fence_ops::signaled can be called with the fence lock in unknown
state. For radeon, the fence lock is also the wait queue lock. This can
cause a self deadlock when signaled() tries to make forward progress on
the wait queue. But advancing the queue is unneeded because incorrectly
returning false from signaled() is perfectly acceptable.

Link: https://github.com/brave/brave-browser/issues/49182
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4641
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Robert McClinton <rbmccav@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 527ba26e50ec2ca2be9c7c82f3ad42998a75d0db)
Cc: stable@vger.kernel.org
2025-11-18 12:09:47 -05:00
Fangzhi Zuo
1788ef3072 drm/amd/display: Fix pbn to kbps Conversion
[Why]
Existing routine has two conversion sequence,
pbn_to_kbps and kbps_to_pbn with margin.
Non of those has without-margin calculation.

kbps_to_pbn with margin conversion includes
fec overhead which has already been included in
pbn_div calculation with 0.994 factor considered.
It is a double counted fec overhead factor that causes
potential bw loss.

[How]
Add without-margin calculation.
Fix fec overhead double counted issue.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3735
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Fangzhi Zuo <Jerry.Zuo@amd.com>
Signed-off-by: Ivan Lipski <ivan.lipski@amd.com>
Tested-by: Dan Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit e0dec00f3d05e8c0eceaaebfdca217f8d10d380c)
Cc: stable@vger.kernel.org
2025-11-18 12:08:51 -05:00
Abdun Nihaal
a78eb69d60 HID: uclogic: Fix potential memory leak in error path
In uclogic_params_ugee_v2_init_event_hooks(), the memory allocated for
event_hook is not freed in the next error path. Fix that by freeing it.

Fixes: a251d6576d ("HID: uclogic: Handle wireless device reconnection")
Signed-off-by: Abdun Nihaal <nihaal@cse.iitm.ac.in>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2025-11-18 18:08:20 +01:00
Ivan Lipski
5bab4c8939 drm/amd/display: Clear the CUR_ENABLE register on DCN20 on DPP5
[Why]
On DCN20 & DCN30, the 6th DPP's & HUBP's are powered on permanently and
cannot be power gated. Thus, when dpp_reset() is invoked for the DPP5,
while it's still powered on, the cached cursor_state
(dpp_base->pos.cur0_ctl.bits.cur0_enable)
and the actual state (CUR0_ENABLE) bit are unsycned. This can cause a
double cursor in full screen with non-native scaling.

[How]
Force disable cursor on DPP5 on plane powerdown for ASICs w/ 6 DPPs/HUBPs.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4673
Reviewed-by: Aric Cyr <aric.cyr@amd.com>
Signed-off-by: Ivan Lipski <ivan.lipski@amd.com>
Tested-by: Dan Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 79b3c037f972dcb13e325a8eabfb8da835764e15)
Cc: stable@vger.kernel.org
2025-11-18 12:07:50 -05:00
Abdun Nihaal
8513c154f8 HID: playstation: Fix memory leak in dualshock4_get_calibration_data()
The memory allocated for buf is not freed in the error paths when
ps_get_report() fails. Free buf before jumping to transfer_failed label

Fixes: 947992c7fa ("HID: playstation: DS4: Fix calibration workaround for clone devices")
Signed-off-by: Abdun Nihaal <nihaal@cse.iitm.ac.in>
Reviewed-by: Silvan Jegen <s.jegen@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2025-11-18 18:07:14 +01:00
Ivan Lipski
c97da4785b drm/amd/display: Add an HPD filter for HDMI
[Why]
Some monitors perform rapid “autoscan” HPD re‑assertions right after a
disconnect or powersaving mode enablement. These appear as a quick
disconnect→reconnect with an identical EDID. Since Linux has no HDMI
hotplug detection (HPD) filter, these quick reconnects are seen as hotplug
events, which can unintentionally wake a system with DPMS off.

An example: https://gitlab.freedesktop.org/drm/amd/-/issues/2876

Such 'fake reconnects' are considered when the interval between a
disconnect and a connect is within 1500ms (experimentally chosen using
several monitors), and the two connections have the same EDID.

[How]
Implement a time-based debounce mechanism:

1. On HDMI disconnect detection, instead of immediately processing the
HPD event, save the current sink and schedule delayed work (default 1500ms)

2. If another HDMI disconnect HPD event arrives during the debounce period,
it reschedules the pending work, ensuring only the final state is processed.

3. When the debounce timer expires, re-detect the display and compare the
new sink with the cached one using EDID comparison.

4. If sinks match (same EDID), this was a spontaneous HPD toggle:
   - Update connector state internally
   - Skip hotplug event to prevent desktop rearrangement

   If sinks differ, this was a real display change:
   - Process normally with the hotplug event

The debounce delay is configurable via module parameter
'hdmi_hpd_debounce_delay_ms'.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2876
Reviewed-by: Sun peng (Leo) Li <sunpeng.li@amd.com>
Signed-off-by: Ivan Lipski <ivan.lipski@amd.com>
Tested-by: Dan Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit c918e75e1ed95be76f8e3156a411188f650fe03f)
2025-11-18 12:07:07 -05:00
Mario Limonciello (AMD)
8612badc33 drm/amd/display: Increase DPCD read retries
[Why]
Empirical measurement of some monitors that fail to read EDID while
booting shows that the number of retries with a 30ms delay between
tries is as high as 16.

[How]
Increase number of retries to 20.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4672
Reviewed-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Ivan Lipski <ivan.lipski@amd.com>
Tested-by: Dan Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit ad1c59ad7cf74ec06e32fe2c330ac1e957222288)
Cc: stable@vger.kernel.org
2025-11-18 12:06:34 -05:00
Tomasz Pakuła
9d7b89a102 HID: pidff: Fix needs_playback check
A small bug made it's way here when rewriting code to Linux quality.
Currently, if an effect is not infinite and a program requests it's
playback with the same number of loops, the play command won't be fired
and if an effect is infinite, the spam will continue.

We want every playback update for non-infinite effects and only some
for infinite (detecting when a program requests stop with 0 which will
be different than previous value which is usually 1 or 255).

Signed-off-by: Tomasz Pakuła <tomasz.pakula.oficjalny@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2025-11-18 18:04:28 +01:00
Stuart Hayhurst
4e127a7478 HID: corsair-void: Use %pe for printing PTR_ERR
Use %pe to print a PTR_ERR to silence a cocci warning

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Julia Lawall <julia.lawall@inria.fr>
Closes: https://lore.kernel.org/r/202510300342.WtPn2jF3-lkp@intel.com/
Signed-off-by: Stuart Hayhurst <stuart.a.hayhurst@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2025-11-18 18:03:48 +01:00
Mario Limonciello (AMD)
71ad9054c1 drm/amd/display: Move sleep into each retry for retrieve_link_cap()
[Why]
When a monitor is booting it's possible that it isn't ready to retrieve
link caps and this can lead to an EDID read failure:

```
[drm:retrieve_link_cap [amdgpu]] *ERROR* retrieve_link_cap: Read receiver caps dpcd data failed.
amdgpu 0000:c5:00.0: [drm] *ERROR* No EDID read.
```

[How]
Rather than msleep once and try a few times, msleep each time.  Should
be no changes for existing working monitors, but should correct reading
caps on a monitor that is slow to boot.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4672
Reviewed-by: Alex Hung <alex.hung@amd.com>
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Ivan Lipski <ivan.lipski@amd.com>
Tested-by: Dan Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 669dca37b3348a447db04bbdcbb3def94d5997cc)
Cc: stable@vger.kernel.org
2025-11-18 12:03:42 -05:00
Naoki Ueki
cdcbb8e8d1 HID: elecom: Add support for ELECOM M-XT3URBK (018F)
The ELECOM M-XT3URBK trackball has an additional device ID (0x018F), which
shares the same report descriptor as the existing device (0x00FB). However,
the driver does not currently recognize this new ID, resulting in only five
buttons being functional.

This patch adds the new device ID so that all six buttons work properly.

Signed-off-by: Naoki Ueki <naoki25519@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2025-11-18 17:54:49 +01:00
Yosry Ahmed
3fa05f96fc KVM: SVM: Fix redundant updates of LBR MSR intercepts
Don't update the LBR MSR intercept bitmaps if they're already up-to-date,
as unconditionally updating the intercepts forces KVM to recalculate the
MSR bitmaps for vmcb02 on every nested VMRUN.  The redundant updates are
functionally okay; however, they neuter an optimization in Hyper-V
nested virtualization enlightenments and this manifests as a self-test
failure.

In particular, Hyper-V lets L1 mark "nested enlightenments" as clean, i.e.
tell KVM that no changes were made to the MSR bitmap since the last VMRUN.
The hyperv_svm_test KVM selftest intentionally changes the MSR bitmap
"without telling KVM about it" to verify that KVM honors the clean hint,
correctly fails because KVM notices the changed bitmap anyway:

  ==== Test Assertion Failure ====
  x86/hyperv_svm_test.c:120: vmcb->control.exit_code == 0x081
  pid=193558 tid=193558 errno=4 - Interrupted system call
     1	0x0000000000411361: assert_on_unhandled_exception at processor.c:659
     2	0x0000000000406186: _vcpu_run at kvm_util.c:1699
     3	 (inlined by) vcpu_run at kvm_util.c:1710
     4	0x0000000000401f2a: main at hyperv_svm_test.c:175
     5	0x000000000041d0d3: __libc_start_call_main at libc-start.o:?
     6	0x000000000041f27c: __libc_start_main_impl at ??:?
     7	0x00000000004021a0: _start at ??:?
  vmcb->control.exit_code == SVM_EXIT_VMMCALL

Do *not* fix this by skipping svm_hv_vmcb_dirty_nested_enlightenments()
when svm_set_intercept_for_msr() performs a no-op change.  changes to
the L0 MSR interception bitmap are only triggered by full CPUID updates
and MSR filter updates, both of which should be rare.  Changing
svm_set_intercept_for_msr() risks hiding unintended pessimizations
like this one, and is actually more complex than this change.

Fixes: fbe5e5f030 ("KVM: nSVM: Always recalculate LBR MSR intercepts in svm_update_lbrv()")
Cc: stable@vger.kernel.org
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Link: https://patch.msgid.link/20251112013017.1836863-1-yosry.ahmed@linux.dev
[Rewritten commit message based on mailing list discussion. - Paolo]
Reviewed-by: Sean Christopherson <seanjc@google.com>
Tested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2025-11-18 17:52:20 +01:00
Fangzhi Zuo
cfa0904a35 drm/amd/display: Prevent Gating DTBCLK before It Is Properly Latched
[why]
1. With allow_0_dtb_clk enabled, the time required to latch DTBCLK to 600 MHz
depends on the SMU. If DTBCLK is not latched to 600 MHz before set_mode completes,
gating DTBCLK causes the DP2 sink to lose its clock source.

2. The existing DTBCLK gating sequence ungates DTBCLK based on both pix_clk and ref_dtbclk,
but gates DTBCLK when either pix_clk or ref_dtbclk is zero.
pix_clk can be zero outside the set_mode sequence before DTBCLK is properly latched,
which can lead to DTBCLK being gated by mistake.

[how]
Consider both pixel_clk and ref_dtbclk when determining when it is safe to gate DTBCLK;
this is more accurate.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4701
Fixes: 5949e7c489 ("drm/amd/display: Enable Dynamic DTBCLK Switch")
Reviewed-by: Charlene Liu <charlene.liu@amd.com>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Fangzhi Zuo <Jerry.Zuo@amd.com>
Signed-off-by: Roman Li <roman.li@amd.com>
Tested-by: Dan Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit d04eb0c402780ca037b62a6aecf23b863545ebca)
Cc: stable@vger.kernel.org
2025-11-18 11:52:18 -05:00
Paolo Bonzini
b82ebaf298 Merge tag 'kvmarm-fixes-6.18-3' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 fixes for 6.18, take #3

- Only adjust the ID registers when no irqchip has been created once
  per VM run, instead of doing it once per vcpu, as this otherwise
  triggers a pretty bad conbsistency check failure in the sysreg code.

- Make sure the per-vcpu Fine Grain Traps are computed before we load
  the system registers on the HW, as we otherwise start running without
  anything set until the first preemption of the vcpu.
2025-11-18 17:38:01 +01:00
Linus Torvalds
5bebe8de19 mm/huge_memory: Fix initialization of huge zero folio
The recent fix to properly initialize the tags of the huge zero folio
had an unfortunate not-so-subtle side effect: it caused the actual
*contents* of the huge zero folio to not be initialized at all when the
hardware didn't support the memory tagging.

The reason was the unfortunate semantics of tag_clear_highpage(): on
hardware that didn't do the tagging, it would silently just not do
anything at all.  And since this is done only on arm64 with MTE support,
that basically meant most hardware.

It wasn't necessarily immediately obvious since the huge zero page isn't
necessarily very heavily used - or because it might already be zero
because all-zeroes is the most common pattern.  But it ends up causing
random odd user space failures when you do hit it.

The unfortunate semantics have been around for a while, but became a
real bug only when we started actively using __GFP_ZEROTAGS in the
generic get_huge_zero_folio() function - before that, it had only ever
been used in code that checked that the hardware supported it.

Fix this by simply changing the semantics of tag_clear_highpage() to
return whether it actually successfully did something or not.  While at
it, also make it initialize multiple pages in one go, since that's
actually what the only caller wants it to do and it simplifies the whole
logic.

Fixes: adfb6609c6 ("mm/huge_memory: initialise the tags of the huge zero folio")
Link: https://lore.kernel.org/all/20251117082023.90176-1-00107082@163.com/
Reviewed-by: David Hildenbrand (Red Hat) <david@kernel.org>
Reported-and-tested-by: David Wang <00107082@163.com>
Reported-and-tested-by: Carlos Llamas <cmllamas@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2025-11-18 08:21:27 -08:00
Rafael J. Wysocki
f384497a76 PM: sleep: core: Fix runtime PM enabling in device_resume_early()
Runtime PM should only be enabled in device_resume_early() if it has
been disabled for the given device by device_suspend_late().  Otherwise,
it may cause runtime PM callbacks to run prematurely in some cases
which leads to further functional issues.

Make two changes to address this problem.

First, reorder device_suspend_late() to only disable runtime PM for a
device when it is going to look for the device's callback or if the
device is a "syscore" one.  In all of the other cases, disabling runtime
PM for the device is not in fact necessary.  However, if the device's
callback returns an error and the power.is_late_suspended flag is not
going to be set, enable runtime PM so it only remains disabled when
power.is_late_suspended is set.

Second, make device_resume_early() only enable runtime PM for the
devices with the power.is_late_suspended flag set.

Fixes: 443046d1ad ("PM: sleep: Make suspend of devices more asynchronous")
Reported-by: Rose Wu <ya-jou.wu@mediatek.com>
Closes: https://lore.kernel.org/linux-pm/70b25dca6f8c2756d78f076f4a7dee7edaaffc33.camel@mediatek.com/
Cc: 6.16+ <stable@vger.kernel.org> # 6.16+
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://patch.msgid.link/12784270.O9o76ZdvQC@rafael.j.wysocki
2025-11-18 15:47:55 +01:00
Dnyaneshwar Bhadane
5474560381 drm/i915/xe3: Restrict PTL intel_encoder_is_c10phy() to only PHY A
On PTL, no combo PHY is connected to PORT B. However, PORT B can
still be used for Type-C and will utilize the C20 PHY for eDP
over Type-C. In such configurations, VBTs also enumerate PORT B.

This leads to issues where PORT B is incorrectly identified as using the
C10 PHY, due to the assumption that returning true for PORT B in
intel_encoder_is_c10phy() would not cause problems.

From PTL's perspective, only PORT A/PHY A uses the C10 PHY.

Update the helper intel_encoder_is_c10phy() to return true only for
PORT A/PHY on PTL.

v2: Change the condition code style for ptl/wcl

Bspec: 72571,73944
Fixes: 9d10de78a3 ("drm/i915/wcl: C10 phy connected to port A and B")
Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Suraj Kandpal <suraj.kandpal@intel.com>
Link: https://lore.kernel.org/r/20250922150317.2334680-4-dnyaneshwar.bhadane@intel.com
(cherry picked from commit 8147f7a1c083fd565fb958824f7c552de3b2dc46)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2025-11-18 08:48:59 -05:00
Dnyaneshwar Bhadane
913253ed47 drm/i915/display: Add definition for wcl as subplatform
We will need to differentiate between WCL and PTL in
intel_encoder_is_c10phy(). Since WCL and PTL use the same display
architecture, let's define WCL as a subplatform of PTL to allow the
differentiation.

v2: Update commit message and reorder wcl define (Gustavo)

Fixes: 3c0f211bc8 ("drm/xe: Add Wildcat Lake device IDs to PTL list")
Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Suraj Kandpal <suraj.kandpal@intel.com>
Link: https://lore.kernel.org/r/20250922150317.2334680-3-dnyaneshwar.bhadane@intel.com
(cherry picked from commit 4dfaae643e59cf3ab71b88689dce1b874f036f00)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rodrigo added Fixes tag when porting it to fixes]
2025-11-18 08:48:39 -05:00
Dnyaneshwar Bhadane
6eb2e056b0 drm/pcids: Split PTL pciids group to make wcl subplatform
To form the WCL platform as a subplatform of PTL in definition,
WCL pci ids are splited into saparate group from PTL.
So update the pciidlist struct to cover all the pci ids.

v2:
- Squash wcl description in single patch for display and xe.(jani,gustavo)

Fixes: 3c0f211bc8 ("drm/xe: Add Wildcat Lake device IDs to PTL list")
Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Suraj Kandpal <suraj.kandpal@intel.com>
Link: https://lore.kernel.org/r/20250922150317.2334680-2-dnyaneshwar.bhadane@intel.com
(cherry picked from commit 32620e176443bf23ec81bfe8f177c6721a904864)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rodrigo added the Fixes tag when porting it to fixes]
2025-11-18 08:47:58 -05:00
Florian Fuchs
0f08f0b0fb net: ps3_gelic_net: handle skb allocation failures
Handle skb allocation failures in RX path, to avoid NULL pointer
dereference and RX stalls under memory pressure. If the refill fails
with -ENOMEM, complete napi polling and wake up later to retry via timer.
Also explicitly re-enable RX DMA after oom, so the dmac doesn't remain
stopped in this situation.

Previously, memory pressure could lead to skb allocation failures and
subsequent Oops like:

	Oops: Kernel access of bad area, sig: 11 [#2]
	Hardware name: SonyPS3 Cell Broadband Engine 0x701000 PS3
	NIP [c0003d0000065900] gelic_net_poll+0x6c/0x2d0 [ps3_gelic] (unreliable)
	LR [c0003d00000659c4] gelic_net_poll+0x130/0x2d0 [ps3_gelic]
	Call Trace:
	  gelic_net_poll+0x130/0x2d0 [ps3_gelic] (unreliable)
	  __napi_poll+0x44/0x168
	  net_rx_action+0x178/0x290

Steps to reproduce the issue:
	1. Start a continuous network traffic, like scp of a 20GB file
	2. Inject failslab errors using the kernel fault injection:
	    echo -1 > /sys/kernel/debug/failslab/times
	    echo 30 > /sys/kernel/debug/failslab/interval
	    echo 100 > /sys/kernel/debug/failslab/probability
	3. After some time, traces start to appear, kernel Oopses
	   and the system stops

Step 2 is not always necessary, as it is usually already triggered by
the transfer of a big enough file.

Fixes: 02c1889166 ("ps3: gigabit ethernet driver for PS3, take3")
Signed-off-by: Florian Fuchs <fuchsfl@gmail.com>
Link: https://patch.msgid.link/20251113181000.3914980-1-fuchsfl@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-18 12:31:09 +01:00
Pavel Zhigulin
896f1a2493 net: qlogic/qede: fix potential out-of-bounds read in qede_tpa_cont() and qede_tpa_end()
The loops in 'qede_tpa_cont()' and 'qede_tpa_end()', iterate
over 'cqe->len_list[]' using only a zero-length terminator as
the stopping condition. If the terminator was missing or
malformed, the loop could run past the end of the fixed-size array.

Add an explicit bound check using ARRAY_SIZE() in both loops to prevent
a potential out-of-bounds access.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Fixes: 55482edc25 ("qede: Add slowpath/fastpath support and enable hardware GRO")
Signed-off-by: Pavel Zhigulin <Pavel.Zhigulin@kaspersky.com>
Link: https://patch.msgid.link/20251113112757.4166625-1-Pavel.Zhigulin@kaspersky.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-18 11:09:58 +01:00
Randy Dunlap
db30233361 platform/x86: intel-uncore-freq: fix all header kernel-doc warnings
In file uncore-frequency/uncore-frequency-common.h,
correct all kernel-doc warnings by adding missing leading " *" to some
lines, adding a missing kernel-doc entry, and fixing a name typo.

Warning: uncore-frequency-common.h:50 bad line:
   Storage for kobject attribute elc_low_threshold_percent
Warning: uncore-frequency-common.h:52 bad line:
   Storage for kobject attribute elc_high_threshold_percent
Warning: uncore-frequency-common.h:54 bad line:
   Storage for kobject attribute elc_high_threshold_enable
Warning: uncore-frequency-common.h:92 struct member
 'min_freq_khz_kobj_attr' not described in 'uncore_data'
Warning: uncore-frequency-common.h:92 struct member
 'die_id_kobj_attr' not described in 'uncore_data'

Fixes: 24b6616355 ("platform/x86/intel-uncore-freq: Add efficiency latency control to sysfs interface")
Fixes: 416de0246f ("platform/x86: intel-uncore-freq: Fix types in sysfs callbacks")
Fixes: 247b43fcd8 ("platform/x86/intel-uncore-freq: Add attributes to show die_id")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20251111060938.1998542-1-rdunlap@infradead.org
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-18 09:37:21 +02:00
Armin Wolf
444a9256f8 platform/x86: acer-wmi: Ignore backlight event
On the Acer Nitro AN515-58, the event 4 - 0 is send by the ACPI
firmware when the backlight up/down keys are pressed. Ignore this
event to avoid spamming the kernel log with error messages, as the
acpi-video driver already handles brightness up/down events.

Reported-by: Bugaddr <Bugaddr@protonmail.com>
Closes: https://bugaddr.tech/posts/2025-11-16-debugging-the-acer-nitro-5-an515-58-fn-f10-keyboard-backlight-bug-on-linux/#wmi-interface-issues
Tested-by: Bugaddr <Bugaddr@protonmail.com>
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20251117155938.3030-1-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-18 09:35:53 +02:00
Haotian Zhang
d8bb447efc platform/x86/intel/speed_select_if: Convert PCIBIOS_* return codes to errnos
isst_if_probe() uses pci_read_config_dword() that returns PCIBIOS_*
codes. The return code is returned from the probe function as is but
probe functions should return normal errnos. A proper implementation
can be found in drivers/leds/leds-ss4200.c.

Convert PCIBIOS_* return codes using pcibios_err_to_errno() into
normal errno before returning.

Fixes: d3a2358429 ("platform/x86: ISST: Add Intel Speed Select mmio interface")
Signed-off-by: Haotian Zhang <vulab@iscas.ac.cn>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://patch.msgid.link/20251117033354.132-1-vulab@iscas.ac.cn
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-18 09:35:12 +02:00
Srinivas Pandruvada
ddf5ffff3a platform/x86/intel/hid: Add Nova Lake support
Add ACPI ID for Nova Lake.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://patch.msgid.link/20251110235041.123685-1-srinivas.pandruvada@linux.intel.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-18 09:34:24 +02:00
Anthony Wong
6f91ad24c6 platform/x86: alienware-wmi-wmax: Add AWCC support to Alienware 16 Aurora
Add AWCC support to Alienware 16 Aurora

Cc: stable@vger.kernel.org
Signed-off-by: Anthony Wong <anthony.wong@ubuntu.com>
Reviewed-by: Kurt Borja <kuurtb@gmail.com>
Link: https://patch.msgid.link/20251116185311.18074-1-anthony.wong@canonical.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-18 09:32:23 +02:00
Peter Hutterer
ae8966b7b5 Input: rename INPUT_PROP_HAPTIC_TOUCHPAD to INPUT_PROP_PRESSUREPAD
And expand it to encompass all pressure pads.

Definition: "pressure pad" as used here as includes all touchpads that
use physical pressure to convert to click, without physical hinges. Also
called haptic touchpads in general parlance, Synaptics calls them
ForcePads.

Most (all?) pressure pads are currently advertised as
INPUT_PROP_BUTTONPAD. The suggestion to identify them as pressure pads
by defining the resolution on ABS_MT_PRESSURE has been in the docs since
commit 20ccc8dd38 ("Documentation: input: define
ABS_PRESSURE/ABS_MT_PRESSURE resolution as grams") but few devices
provide this information.

In userspace it's thus impossible to determine whether a device is a
true pressure pad (pressure equals pressure) or a normal clickpad with
(pressure equals finger size).

Commit 7075ae4ac9 ("Input: add INPUT_PROP_HAPTIC_TOUCHPAD") introduces
INPUT_PROP_HAPTIC_TOUCHPAD but restricted it to those touchpads that
have support for userspace-controlled effects. Let's expand and rename
that definition to include all pressure pad touchpads since those that
do support FF effects can be identified by the presence of the
FF_HAPTIC bit.

This means:
- clickpad: INPUT_PROP_BUTTONPAD
- pressurepad: INPUT_PROP_BUTTONPAD + INPUT_PROP_PRESSUREPAD
- pressurepad with configurable haptics:
  INPUT_PROP_BUTTONPAD + INPUT_PROP_PRESSUREPAD + FF_HAPTIC

Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Acked-by: Benjamin Tissoires <bentiss@kernel.org>
Link: https://patch.msgid.link/20251106114534.GA405512@tassie
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
2025-11-17 23:18:32 -08:00
Dmitry Torokhov
f39b6c468c Merge tag 'v6.18-rc6' into for-linus
Sync up with the mainline to bring in definition of
INPUT_PROP_HAPTIC_TOUCHPAD.
2025-11-17 23:16:55 -08:00
Lorenzo Bianconi
8e0a754b08 net: airoha: Do not loopback traffic to GDM2 if it is available on the device
Airoha_eth driver forwards offloaded uplink traffic (packets received
on GDM1 and forwarded to GDM{3,4}) to GDM2 in order to apply hw QoS.
This is correct if the device does not support a dedicated GDM2 port.
In this case, in order to enable hw offloading for uplink traffic,
the packets should be sent to GDM{3,4} directly.

Fixes: 9cd451d414 ("net: airoha: Add loopback support for GDM2")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://patch.msgid.link/20251113-airoha-hw-offload-gdm2-fix-v1-1-7e4ca300872f@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-17 20:00:07 -08:00
Ido Schimmel
bed22c7b90 selftests: net: lib: Do not overwrite error messages
ret_set_ksft_status() calls ksft_status_merge() with the current return
status and the last one. It treats a non-zero return code from
ksft_status_merge() as an indication that the return status was
overwritten by the last one and therefore overwrites the return message
with the last one.

Currently, ksft_status_merge() returns a non-zero return code even if
the current return status and the last one are equal. This results in
return messages being overwritten which is counter-productive since we
are more interested in the first failure message and not the last one.

Fix by changing ksft_status_merge() to only return a non-zero return
code if the current return status was actually changed.

Add a test case which checks that the first error message is not
overwritten.

Before:

 # ./lib_sh_test.sh
 [...]
 TEST: RET tfail2 tfail -> fail                                      [FAIL]
        retmsg=tfail expected tfail2
 [...]
 # echo $?
 1

After:

 # ./lib_sh_test.sh
 [...]
 TEST: RET tfail2 tfail -> fail                                      [ OK ]
 [...]
 # echo $?
 0

Fixes: 596c8819cb ("selftests: forwarding: Have RET track kselftest framework constants")
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20251116081029.69112-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-17 19:32:12 -08:00
Aleksei Nikiforov
da02a18248 s390/ctcm: Fix double-kfree
The function 'mpc_rcvd_sweep_req(mpcginfo)' is called conditionally
from function 'ctcmpc_unpack_skb'. It frees passed mpcginfo.
After that a call to function 'kfree' in function 'ctcmpc_unpack_skb'
frees it again.

Remove 'kfree' call in function 'mpc_rcvd_sweep_req(mpcginfo)'.

Bug detected by the clang static analyzer.

Fixes: 0c0b20587b ("s390/ctcm: fix potential memory leak")
Reviewed-by: Aswin Karuvally <aswin@linux.ibm.com>
Signed-off-by: Aleksei Nikiforov <aleksei.nikiforov@linux.ibm.com>
Signed-off-by: Aswin Karuvally <aswin@linux.ibm.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20251112182724.1109474-1-aswin@linux.ibm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-17 16:58:25 -08:00
Nam Cao
949f1fd222 nouveau/firmware: Add missing kfree() of nvkm_falcon_fw::boot
nvkm_falcon_fw::boot is allocated, but no one frees it. This causes a
kmemleak warning.

Make sure this data is deallocated.

Fixes: 2541626cfb ("drm/nouveau/acr: use common falcon HS FW code for ACR FWs")
Signed-off-by: Nam Cao <namcao@linutronix.de>
Cc: stable@vger.kernel.org
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Lyude Paul <lyude@redhat.com>
Link: https://patch.msgid.link/20251117084231.2910561-1-namcao@linutronix.de
2025-11-17 18:12:17 -05:00
Dnyaneshwar Bhadane
fa766e759f drm/i915/xe3lpd: Load DMC for Xe3_LPD version 30.02
Load the DMC for Xe3_LPD version 30.02.

Fixes: 3c0f211bc8 ("drm/xe: Add Wildcat Lake device IDs to PTL list")
Signed-off-by: Dnyaneshwar Bhadane <dnyaneshwar.bhadane@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Reviewed-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
Link: https://lore.kernel.org/r/20251016131517.2032684-1-dnyaneshwar.bhadane@intel.com
Signed-off-by: Gustavo Sousa <gustavo.sousa@intel.com>
(cherry picked from commit a63db39a578b543f5e5719b9f14dd82d3b8648d1)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rodrigo added the Fixes tag while cherry-picking to fixes]
2025-11-17 16:30:34 -05:00
Linus Torvalds
e7c375b181 Merge tag 'vfs-6.18-rc7.fixes' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs
Pull vfs fixes from Christian Brauner:

 - Fix unitialized variable in statmount_string()

 - Fix hostfs mounting when passing host root during boot

 - Fix dynamic lookup to fail on cell lookup failure

 - Fix missing file type when reading bfs inodes from disk

 - Enforce checking of sb_min_blocksize() calls and update all callers
   accordingly

 - Restore write access before closing files opened by open_exec() in
   binfmt_misc

 - Always freeze efivarfs during suspend/hibernate cycles

 - Fix statmount()'s and listmount()'s grab_requested_mnt_ns() helper to
   actually allow mount namespace file descriptor in addition to mount
   namespace ids

 - Fix tmpfs remount when noswap is specified

 - Switch Landlock to iput_not_last() to remove false-positives from
   might_sleep() annotations in iput()

 - Remove dead node_to_mnt_ns() code

 - Ensure that per-queue kobjects are successfully created

* tag 'vfs-6.18-rc7.fixes' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs:
  landlock: fix splats from iput() after it started calling might_sleep()
  fs: add iput_not_last()
  shmem: fix tmpfs reconfiguration (remount) when noswap is set
  fs/namespace: correctly handle errors returned by grab_requested_mnt_ns
  power: always freeze efivarfs
  binfmt_misc: restore write access before closing files opened by open_exec()
  block: add __must_check attribute to sb_min_blocksize()
  virtio-fs: fix incorrect check for fsvq->kobj
  xfs: check the return value of sb_min_blocksize() in xfs_fs_fill_super
  isofs: check the return value of sb_min_blocksize() in isofs_fill_super
  exfat: check return value of sb_min_blocksize in exfat_read_boot_sector
  vfat: fix missing sb_min_blocksize() return value checks
  mnt: Remove dead code which might prevent from building
  bfs: Reconstruct file type when loading from disk
  afs: Fix dynamic lookup to fail on cell lookup failure
  hostfs: Fix only passing host root in boot stage with new mount
  fs: Fix uninitialized 'offp' in statmount_string()
2025-11-17 09:11:27 -08:00
Linus Torvalds
418592a040 Merge tag 'sched_ext-for-6.18-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fixes from Tejun Heo:
 "Five fixes addressing PREEMPT_RT compatibility and locking issues.

  Three commits fix potential deadlocks and sleeps in atomic contexts on
  RT kernels by converting locks to raw spinlocks and ensuring IRQ work
  runs in hard-irq context. The remaining two fix unsafe locking in the
  debug dump path and a variable dereference typo"

* tag 'sched_ext-for-6.18-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  sched_ext: Use IRQ_WORK_INIT_HARD() to initialize rq->scx.kick_cpus_irq_work
  sched_ext: Fix possible deadlock in the deferred_irq_workfn()
  sched/ext: convert scx_tasks_lock to raw spinlock
  sched_ext: Fix unsafe locking in the scx_dump_state()
  sched_ext: Fix use of uninitialized variable in scx_bpf_cpuperf_set()
2025-11-17 09:01:22 -08:00
Ewan D. Milne
0a2c5495b6 nvme: nvme-fc: Ensure ->ioerr_work is cancelled in nvme_fc_delete_ctrl()
nvme_fc_delete_assocation() waits for pending I/O to complete before
returning, and an error can cause ->ioerr_work to be queued after
cancel_work_sync() had been called.  Move the call to cancel_work_sync() to
be after nvme_fc_delete_association() to ensure ->ioerr_work is not running
when the nvme_fc_ctrl object is freed.  Otherwise the following can occur:

[ 1135.911754] list_del corruption, ff2d24c8093f31f8->next is NULL
[ 1135.917705] ------------[ cut here ]------------
[ 1135.922336] kernel BUG at lib/list_debug.c:52!
[ 1135.926784] Oops: invalid opcode: 0000 [#1] SMP NOPTI
[ 1135.931851] CPU: 48 UID: 0 PID: 726 Comm: kworker/u449:23 Kdump: loaded Not tainted 6.12.0 #1 PREEMPT(voluntary)
[ 1135.943490] Hardware name: Dell Inc. PowerEdge R660/0HGTK9, BIOS 2.5.4 01/16/2025
[ 1135.950969] Workqueue:  0x0 (nvme-wq)
[ 1135.954673] RIP: 0010:__list_del_entry_valid_or_report.cold+0xf/0x6f
[ 1135.961041] Code: c7 c7 98 68 72 94 e8 26 45 fe ff 0f 0b 48 c7 c7 70 68 72 94 e8 18 45 fe ff 0f 0b 48 89 fe 48 c7 c7 80 69 72 94 e8 07 45 fe ff <0f> 0b 48 89 d1 48 c7 c7 a0 6a 72 94 48 89 c2 e8 f3 44 fe ff 0f 0b
[ 1135.979788] RSP: 0018:ff579b19482d3e50 EFLAGS: 00010046
[ 1135.985015] RAX: 0000000000000033 RBX: ff2d24c8093f31f0 RCX: 0000000000000000
[ 1135.992148] RDX: 0000000000000000 RSI: ff2d24d6bfa1d0c0 RDI: ff2d24d6bfa1d0c0
[ 1135.999278] RBP: ff2d24c8093f31f8 R08: 0000000000000000 R09: ffffffff951e2b08
[ 1136.006413] R10: ffffffff95122ac8 R11: 0000000000000003 R12: ff2d24c78697c100
[ 1136.013546] R13: fffffffffffffff8 R14: 0000000000000000 R15: ff2d24c78697c0c0
[ 1136.020677] FS:  0000000000000000(0000) GS:ff2d24d6bfa00000(0000) knlGS:0000000000000000
[ 1136.028765] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1136.034510] CR2: 00007fd207f90b80 CR3: 000000163ea22003 CR4: 0000000000f73ef0
[ 1136.041641] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1136.048776] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
[ 1136.055910] PKRU: 55555554
[ 1136.058623] Call Trace:
[ 1136.061074]  <TASK>
[ 1136.063179]  ? show_trace_log_lvl+0x1b0/0x2f0
[ 1136.067540]  ? show_trace_log_lvl+0x1b0/0x2f0
[ 1136.071898]  ? move_linked_works+0x4a/0xa0
[ 1136.075998]  ? __list_del_entry_valid_or_report.cold+0xf/0x6f
[ 1136.081744]  ? __die_body.cold+0x8/0x12
[ 1136.085584]  ? die+0x2e/0x50
[ 1136.088469]  ? do_trap+0xca/0x110
[ 1136.091789]  ? do_error_trap+0x65/0x80
[ 1136.095543]  ? __list_del_entry_valid_or_report.cold+0xf/0x6f
[ 1136.101289]  ? exc_invalid_op+0x50/0x70
[ 1136.105127]  ? __list_del_entry_valid_or_report.cold+0xf/0x6f
[ 1136.110874]  ? asm_exc_invalid_op+0x1a/0x20
[ 1136.115059]  ? __list_del_entry_valid_or_report.cold+0xf/0x6f
[ 1136.120806]  move_linked_works+0x4a/0xa0
[ 1136.124733]  worker_thread+0x216/0x3a0
[ 1136.128485]  ? __pfx_worker_thread+0x10/0x10
[ 1136.132758]  kthread+0xfa/0x240
[ 1136.135904]  ? __pfx_kthread+0x10/0x10
[ 1136.139657]  ret_from_fork+0x31/0x50
[ 1136.143236]  ? __pfx_kthread+0x10/0x10
[ 1136.146988]  ret_from_fork_asm+0x1a/0x30
[ 1136.150915]  </TASK>

Fixes: 19fce0470f ("nvme-fc: avoid calling _nvme_fc_abort_outstanding_ios from interrupt context")
Cc: stable@vger.kernel.org
Tested-by: Marco Patalano <mpatalan@redhat.com>
Reviewed-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2025-11-17 09:00:42 -08:00
Ewan D. Milne
ea3442efab nvme: nvme-fc: move tagset removal to nvme_fc_delete_ctrl()
Now target is removed from nvme_fc_ctrl_free() which is the ctrl->ref
release handler. And even admin queue is unquiesced there, this way
is definitely wrong because the ctr->ref is grabbed when submitting
command.

And Marco observed that nvme_fc_ctrl_free() can be called from request
completion code path, and trigger kernel warning since request completes
from softirq context.

Fix the issue by moveing target removal into nvme_fc_delete_ctrl(),
which is also aligned with nvme-tcp and nvme-rdma.

Patch originally proposed by Ming Lei, then modified to move the tagset
removal down to after nvme_fc_delete_association() after further testing.

Cc: Marco Patalano <mpatalan@redhat.com>
Cc: Ewan Milne <emilne@redhat.com>
Cc: James Smart <james.smart@broadcom.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Cc: stable@vger.kernel.org
Tested-by: Marco Patalano <mpatalan@redhat.com>
Reviewed-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2025-11-17 09:00:42 -08:00
Linus Torvalds
e97c61885b Merge tag 'mtd/fixes-for-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux
Pull MTD fixes from Miquel Raynal:
 "Mostly small misc fixes, here they are sorted by sub-subsystem:

  ECC fixes:
   - Realtek Kconfig fix

  SPI NAND fixes:
   - Remove nonexistent QE bit on FMSH FM25S01A

  Raw NAND fixes:
   - Prevent DMA device NULL pointer dereference in Cadence driver

  MTD device fixes:
   - Possible integer overflow in read/write ioctls
   - Fix the IRQ handler pointer in the onenand driver, even if in
     practice it is never dereferenced.

* tag 'mtd/fixes-for-6.18-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux:
  mtd: onenand: Pass correct pointer to IRQ handler
  mtd: spinand: fmsh: remove QE bit for FM25S01A flash
  mtd: rawnand: cadence: fix DMA device NULL pointer dereference
  mtd: rawnand: realtek: Make rtl_ecc_engine_ops const
  mtd: nand: MTD_NAND_ECC_REALTEK should depend on HAS_DMA
  mtd: nand: realtek-ecc: Fix a IS_ERR() vs NULL bug in probe
  mtdchar: fix integer overflow in read/write ioctls
2025-11-17 08:57:21 -08:00
Shin'ichiro Kawasaki
6d87cd5335 nvme-multipath: fix lockdep WARN due to partition scan work
Blktests test cases nvme/014, 057 and 058 fail occasionally due to a
lockdep WARN. As reported in the Closes tag URL, the WARN indicates that
a deadlock can happen due to the dependency among disk->open_mutex,
kblockd workqueue completion and partition_scan_work completion.

To avoid the lockdep WARN and the potential deadlock, cut the dependency
by running the partition_scan_work not by kblockd workqueue but by
nvme_wq.

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/linux-block/CAHj4cs8mJ+R_GmQm9R8ebResKAWUE8kF5+_WVg0v8zndmqd6BQ@mail.gmail.com/
Link: https://lore.kernel.org/linux-block/oeyzci6ffshpukpfqgztsdeke5ost5hzsuz4rrsjfmvpqcevax@5nhnwbkzbrpa/
Fixes: 1f021341ee ("nvme-multipath: defer partition scanning")
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2025-11-17 08:08:00 -08:00
Alistair Francis
159de7a825 nvmet-auth: update sc_c in target host hash calculation
Commit 7e091add9c "nvme-auth: update sc_c in host response" added
the sc_c variable to the dhchap queue context structure which is
appropriately set during negotiate and then used in the host response.

This breaks secure concat connections with a Linux target as the target
code wasn't updated at the same time. This patch fixes this by adding a
new sc_c variable to the host hash calculations.

Fixes: 7e091add9c ("nvme-auth: update sc_c in host response")
Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Reviewed-by: Martin George <marting@netapp.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2025-11-17 08:05:58 -08:00
Zqiang
36c6f3c03d sched_ext: Use IRQ_WORK_INIT_HARD() to initialize rq->scx.kick_cpus_irq_work
For PREEMPT_RT kernels, the kick_cpus_irq_workfn() be invoked in
the per-cpu irq_work/* task context and there is no rcu-read critical
section to protect. this commit therefore use IRQ_WORK_INIT_HARD() to
initialize the per-cpu rq->scx.kick_cpus_irq_work in the
init_sched_ext_class().

Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Tejun Heo <tj@kernel.org>
2025-11-17 05:07:22 -10:00
Linus Torvalds
6a23ae0a96 Linux 6.18-rc6 2025-11-16 14:25:38 -08:00
Linus Torvalds
998ccc327b Merge tag 'perf-tools-fixes-for-v6.18-2-2025-11-16' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - Fix writing bpf_prog (infos|btfs)_cnt to data file, to not generate
   invalid perf.data files in some corner cases.

 - Fix 'perf top' segfault by ensuring libbfd is initialized. This is an
   opt-in feature due to license incompatibilities.

 - Fix segfault in 'perf lock' due to missing kernel map.

 - Fix 'perf lock contention' test.

 - Don't fail fast path detection if binutils-devel isn't available.

 - Sync KVM's vmx.h with the kernel to pick SEAMCALL exit reason.

* tag 'perf-tools-fixes-for-v6.18-2-2025-11-16' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools:
  perf libbfd: Ensure libbfd is initialized prior to use
  perf test: Fix lock contention test
  perf lock: Fix segfault due to missing kernel map
  tools headers UAPI: Sync KVM's vmx.h with the kernel to pick SEAMCALL exit reason
  perf build: Don't fail fast path feature detection when binutils-devel is not available
  perf header: Write bpf_prog (infos|btfs)_cnt to data file
2025-11-16 13:45:03 -08:00
Linus Torvalds
7ba45f1504 Merge tag 'mm-hotfixes-stable-2025-11-16-10-40' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton:
 "7 hotfixes.  5 are cc:stable, 4 are against mm/

  All are singletons - please see the respective changelogs for details"

* tag 'mm-hotfixes-stable-2025-11-16-10-40' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mm, swap: fix potential UAF issue for VMA readahead
  selftests/user_events: fix type cast for write_index packed member in perf_test
  lib/test_kho: check if KHO is enabled
  mm/huge_memory: fix folio split check for anon folios in swapcache
  MAINTAINERS: update David Hildenbrand's email address
  crash: fix crashkernel resource shrink
  mm: fix MAX_FOLIO_ORDER on powerpc configs with hugetlb
2025-11-16 13:31:14 -08:00
Chen Pei
e2cb69263e tools: riscv: Fixed misalignment of CSR related definitions
The file tools/arch/riscv/include/asm/csr.h borrows from
arch/riscv/include/asm/csr.h, and subsequent modifications
related to CSR should maintain consistency.

Signed-off-by: Chen Pei <cp0613@linux.alibaba.com>
Link: https://patch.msgid.link/20251114071215.816-1-cp0613@linux.alibaba.com
[pjw@kernel.org: dropped Fixes: lines for patches that weren't broken; removed superfluous blank line]
Signed-off-by: Paul Walmsley <pjw@kernel.org>
2025-11-16 10:37:38 -07:00
Mayuresh Chitale
7b090e7b91 riscv: sbi: Prefer SRST shutdown over legacy
Currently, the sbi_init() always attempts to register the legacy shutdown
function as the sys-off handler which is fine when RISCV_SBI_V01 is not
enabled. However, if RISCV_SBI_V01 is enabled in the kernel and the SBI
v0.1 is not supported by the underlying SBI implementation then the
legacy shutdown fails. Fix this by not registering the legacy shutdown
when SRST shutdown is available.

Fixes: 70ddf86d76 ("riscv: sbi: Switch to new sys-off handler API")
Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://patch.msgid.link/20251114065808.304430-1-mchitale@ventanamicro.com
Signed-off-by: Paul Walmsley <pjw@kernel.org>
2025-11-16 10:37:27 -07:00
Linus Torvalds
7254a2b522 Merge tag 'firewire-fixes-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394
Pull firewire fixes from Takashi Sakamoto:
 "This includes some fixes for the topology map, newly introduced in
  v6.18 kernel"

* tag 'firewire-fixes-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394:
  firewire: core: fix to update generation field in topology map
  firewire: core: Initialize topology_map.lock
2025-11-16 07:08:28 -08:00
Linus Torvalds
87751e715e Merge tag 'edac_urgent_for_v6.18_rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC fixes from Borislav Petkov:

 - In Versalnet, handle the reporting of non-standard hw errors whose
   information can come in more than one remote processor message.

 - Explicitly reenable ECC checking after a warm reset in Altera OCRAM
   as those registers are reset to default otherwise

 - Fix single-bit error injection in Altera EDAC to not inject errors
   directly in ECC RAM and thus lead to false double-bit errors due to
   same ECC RAM being in concurrent use

* tag 'edac_urgent_for_v6.18_rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC/altera: Use INTTEST register for Ethernet and USB SBE injection
  EDAC/altera: Handle OCRAM ECC enable after warm reset
  EDAC/versalnet: Handle split messages for non-standard errors
2025-11-16 07:05:24 -08:00
Takashi Sakamoto
1107aac1ad firewire: core: fix to update generation field in topology map
The generation field of topology map is updated after initialized by zero.
The updated value of generation field is always zero, and is against
specification.

This commit fixes the bug.

Fixes: 7d138cb269 ("firewire: core: use spin lock specific to topology map")
Link: https://lore.kernel.org/r/20251114144421.415278-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
2025-11-16 21:30:26 +09:00
Chao-ying Fu
91f815b707 riscv: Update MIPS vendor id to 0x127
[1] defines MIPS vendor id as 0x127. All previous MIPS RISC-V patches
were tested on QEMU, also modified to use 0x722 as MIPS_VENDOR_ID. This
new value should reflect real hardware.

[1] https://mips.com/wp-content/uploads/2025/06/P8700_Programmers_Reference_Manual_Rev1.84_5-31-2025.pdf

Fixes: a8fed1bc03 ("riscv: Add xmipsexectl as a vendor extension")
Signed-off-by: Chao-ying Fu <cfu@wavecomp.com>
Signed-off-by: Aleksa Paunovic <aleksa.paunovic@htecgroup.com>
Link: https://patch.msgid.link/20251113-mips-vendorid-v2-1-3279489b7f84@htecgroup.com
Cc: <stable@vger.kernel.org>
Signed-off-by: Paul WAlmsley <pjw@kernel.org>
2025-11-15 15:27:02 -07:00
Kairui Song
1c2a936edd mm, swap: fix potential UAF issue for VMA readahead
Since commit 78524b05f1 ("mm, swap: avoid redundant swap device
pinning"), the common helper for allocating and preparing a folio in the
swap cache layer no longer tries to get a swap device reference
internally, because all callers of __read_swap_cache_async are already
holding a swap entry reference.  The repeated swap device pinning isn't
needed on the same swap device.

Caller of VMA readahead is also holding a reference to the target entry's
swap device, but VMA readahead walks the page table, so it might encounter
swap entries from other devices, and call __read_swap_cache_async on
another device without holding a reference to it.

So it is possible to cause a UAF when swapoff of device A raced with
swapin on device B, and VMA readahead tries to read swap entries from
device A.  It's not easy to trigger, but in theory, it could cause real
issues.

Make VMA readahead try to get the device reference first if the swap
device is a different one from the target entry.

Link: https://lkml.kernel.org/r/20251111-swap-fix-vma-uaf-v1-1-41c660e58562@tencent.com
Fixes: 78524b05f1 ("mm, swap: avoid redundant swap device pinning")
Suggested-by: Huang Ying <ying.huang@linux.alibaba.com>
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: Chris Li <chrisl@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-15 10:52:02 -08:00
Ankit Khushwaha
216158f063 selftests/user_events: fix type cast for write_index packed member in perf_test
Accessing 'reg.write_index' directly triggers a -Waddress-of-packed-member
warning due to potential unaligned pointer access:

perf_test.c:239:38: warning: taking address of packed member 'write_index'
of class or structure 'user_reg' may result in an unaligned pointer value
[-Waddress-of-packed-member]
  239 |         ASSERT_NE(-1, write(self->data_fd, &reg.write_index,
      |                                             ^~~~~~~~~~~~~~~

Since write(2) works with any alignment. Casting '&reg.write_index'
explicitly to 'void *' to suppress this warning.

Link: https://lkml.kernel.org/r/20251106095532.15185-1-ankitkhushwaha.linux@gmail.com
Fixes: 42187bdc3c ("selftests/user_events: Add perf self-test for empty arguments events")
Signed-off-by: Ankit Khushwaha <ankitkhushwaha.linux@gmail.com>
Cc: Beau Belgrave <beaub@linux.microsoft.com>
Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: sunliming <sunliming@kylinos.cn>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-15 10:52:02 -08:00
Pasha Tatashin
a26ec8f3d4 lib/test_kho: check if KHO is enabled
We must check whether KHO is enabled prior to issuing KHO commands,
otherwise KHO internal data structures are not initialized.

Link: https://lkml.kernel.org/r/20251106220635.2608494-1-pasha.tatashin@soleen.com
Fixes: b753522bed ("kho: add test for kexec handover")
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202511061629.e242724-lkp@intel.com
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-15 10:52:01 -08:00
Zi Yan
f1d47cafe5 mm/huge_memory: fix folio split check for anon folios in swapcache
Both uniform and non uniform split check missed the check to prevent
splitting anon folios in swapcache to non-zero order.

Splitting anon folios in swapcache to non-zero order can cause data
corruption since swapcache only support PMD order and order-0 entries. 
This can happen when one use split_huge_pages under debugfs to split
anon folios in swapcache.

In-tree callers do not perform such an illegal operation.  Only debugfs
interface could trigger it.  I will put adding a test case on my TODO
list.

Fix the check.

Link: https://lkml.kernel.org/r/20251105162910.752266-1-ziy@nvidia.com
Fixes: 58729c04cf ("mm/huge_memory: add buddy allocator like (non-uniform) folio_split()")
Signed-off-by: Zi Yan <ziy@nvidia.com>
Reported-by: "David Hildenbrand (Red Hat)" <david@kernel.org>
Closes: https://lore.kernel.org/all/dc0ecc2c-4089-484f-917f-920fdca4c898@kernel.org/
Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-15 10:52:01 -08:00
David Hildenbrand (Red Hat)
3470715e5c MAINTAINERS: update David Hildenbrand's email address
Switch to kernel.org email address as I will be leaving Red Hat.  The old
address will remain active until end of January 2026, so performing the
change now should make sure that most mails will reach me.

Link: https://lkml.kernel.org/r/20251103103659.379335-1-david@kernel.org
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: David Hildenbrand (Red Hat) <david@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-15 10:52:01 -08:00
Sourabh Jain
00fbff75c5 crash: fix crashkernel resource shrink
When crashkernel is configured with a high reservation, shrinking its
value below the low crashkernel reservation causes two issues:

1. Invalid crashkernel resource objects
2. Kernel crash if crashkernel shrinking is done twice

For example, with crashkernel=200M,high, the kernel reserves 200MB of high
memory and some default low memory (say 256MB).  The reservation appears
as:

cat /proc/iomem | grep -i crash
af000000-beffffff : Crash kernel
433000000-43f7fffff : Crash kernel

If crashkernel is then shrunk to 50MB (echo 52428800 >
/sys/kernel/kexec_crash_size), /proc/iomem still shows 256MB reserved:
af000000-beffffff : Crash kernel

Instead, it should show 50MB:
af000000-b21fffff : Crash kernel

Further shrinking crashkernel to 40MB causes a kernel crash with the
following trace (x86):

BUG: kernel NULL pointer dereference, address: 0000000000000038
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
<snip...>
Call Trace: <TASK>
? __die_body.cold+0x19/0x27
? page_fault_oops+0x15a/0x2f0
? search_module_extables+0x19/0x60
? search_bpf_extables+0x5f/0x80
? exc_page_fault+0x7e/0x180
? asm_exc_page_fault+0x26/0x30
? __release_resource+0xd/0xb0
release_resource+0x26/0x40
__crash_shrink_memory+0xe5/0x110
crash_shrink_memory+0x12a/0x190
kexec_crash_size_store+0x41/0x80
kernfs_fop_write_iter+0x141/0x1f0
vfs_write+0x294/0x460
ksys_write+0x6d/0xf0
<snip...>

This happens because __crash_shrink_memory()/kernel/crash_core.c
incorrectly updates the crashk_res resource object even when
crashk_low_res should be updated.

Fix this by ensuring the correct crashkernel resource object is updated
when shrinking crashkernel memory.

Link: https://lkml.kernel.org/r/20251101193741.289252-1-sourabhjain@linux.ibm.com
Fixes: 16c6006af4 ("kexec: enable kexec_crash_size to support two crash kernel regions")
Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Zhen Lei <thunder.leizhen@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-15 10:52:01 -08:00
David Hildenbrand (Red Hat)
39231e8d6b mm: fix MAX_FOLIO_ORDER on powerpc configs with hugetlb
In the past, CONFIG_ARCH_HAS_GIGANTIC_PAGE indicated that we support
runtime allocation of gigantic hugetlb folios.  In the meantime it evolved
into a generic way for the architecture to state that it supports gigantic
hugetlb folios.

In commit fae7d834c4 ("mm: add __dump_folio()") we started using
CONFIG_ARCH_HAS_GIGANTIC_PAGE to decide MAX_FOLIO_ORDER: whether we could
have folios larger than what the buddy can handle.  In the context of that
commit, we started using MAX_FOLIO_ORDER to detect page corruptions when
dumping tail pages of folios.  Before that commit, we assumed that we
cannot have folios larger than the highest buddy order, which was
obviously wrong.

In commit 7b4f21f5e0 ("mm/hugetlb: check for unreasonable folio sizes
when registering hstate"), we used MAX_FOLIO_ORDER to detect
inconsistencies, and in fact, we found some now.

Powerpc allows for configs that can allocate gigantic folio during boot
(not at runtime), that do not set CONFIG_ARCH_HAS_GIGANTIC_PAGE and can
exceed PUD_ORDER.

To fix it, let's make powerpc select CONFIG_ARCH_HAS_GIGANTIC_PAGE with
hugetlb on powerpc, and increase the maximum folio size with hugetlb to 16
GiB on 64bit (possible on arm64 and powerpc) and 1 GiB on 32 bit
(powerpc).  Note that on some powerpc configurations, whether we actually
have gigantic pages depends on the setting of CONFIG_ARCH_FORCE_MAX_ORDER,
but there is nothing really problematic about setting it unconditionally:
we just try to keep the value small so we can better detect problems in
__dump_folio() and inconsistencies around the expected largest folio in
the system.

Ideally, we'd have a better way to obtain the maximum hugetlb folio size
and detect ourselves whether we really end up with gigantic folios.  Let's
defer bigger changes and fix the warnings first.

While at it, handle gigantic DAX folios more clearly: DAX can only end up
creating gigantic folios with HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD.

Add a new Kconfig option HAVE_GIGANTIC_FOLIOS to make both cases clearer. 
In particular, worry about ARCH_HAS_GIGANTIC_PAGE only with HUGETLB_PAGE.

Note: with enabling CONFIG_ARCH_HAS_GIGANTIC_PAGE on powerpc, we will now
also allow for runtime allocations of folios in some more powerpc configs.
I don't think this is a problem, but if it is we could handle it through
__HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED.

While __dump_page()/__dump_folio was also problematic (not handling
dumping of tail pages of such gigantic folios correctly), it doesn't seem
critical enough to mark it as a fix.

Link: https://lkml.kernel.org/r/20251114214920.2550676-1-david@kernel.org
Fixes: 7b4f21f5e0 ("mm/hugetlb: check for unreasonable folio sizes when registering hstate")
Reported-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Closes: https://lore.kernel.org/r/3e043453-3f27-48ad-b987-cc39f523060a@csgroup.eu/
Reported-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Closes: https://lore.kernel.org/r/94377f5c-d4f0-4c0f-b0f6-5bf1cd7305b1@linux.ibm.com/
Signed-off-by: David Hildenbrand (Red Hat) <david@kernel.org>
Cc: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Donet Tom <donettom@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-15 10:52:00 -08:00
Linus Torvalds
f824272b6e Merge tag 's390-6.18-4' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 fix from Heiko Carstens:

 - Fix a bug in the __ptep_rdp() inline assembly which may lead to
   missing TLB flushes

* tag 's390-6.18-4' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/mm: Fix __ptep_rdp() inline assembly
2025-11-15 09:01:00 -08:00
Linus Torvalds
b8a2c32b22 Merge tag 'x86-urgent-2025-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar:

 - Update the list of AMD microcode minimum Entrysign revisions

 - Add additional fixed AMD RDSEED microcode revisions

 - Update the language transliteration for Kiryl Shutsemau's name
   in the MAINTAINERS entry

* tag 'x86-urgent-2025-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/microcode/AMD: Add Zen5 model 0x44, stepping 0x1 minrev
  x86/CPU/AMD: Add additional fixed RDSEED microcode revisions
  MAINTAINERS: Update name spelling
2025-11-15 08:55:29 -08:00
Linus Torvalds
bb1a6ddcfa Merge tag 'timers-urgent-2025-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer fix from Ingo Molnar:
 "Fix a memory leak in the posix timer creation logic"

* tag 'timers-urgent-2025-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  posix-timers: Plug potential memory leak in do_timer_create()
2025-11-15 08:51:43 -08:00
Linus Torvalds
fa3ade5e8e Merge tag 'irq-urgent-2025-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull irq fix from Ingo Molnar:
 "Fix an irqchip driver release bug in the riscv-intc irqchip driver"

* tag 'irq-urgent-2025-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  irqchip/riscv-intc: Add missing free() callback in riscv_intc_domain_ops
2025-11-15 08:48:51 -08:00
Linus Torvalds
e256f7b4e5 Merge tag 'core-urgent-2025-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core fix from Ingo Molnar:
 "Fix a broken #ifndef in the <linux/entry-virt.h> header.

  It hasn't caused problems upstream yet because no arch overrides
  arch_xfer_to_guest_mode_handle_work() at this moment"

* tag 'core-urgent-2025-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  entry: Fix ifndef around arch_xfer_to_guest_mode_handle_work() stub
2025-11-15 08:46:18 -08:00
Jesper Dangaard Brouer
5442a9da69 veth: more robust handing of race to avoid txq getting stuck
Commit dc82a33297 ("veth: apply qdisc backpressure on full ptr_ring to
reduce TX drops") introduced a race condition that can lead to a permanently
stalled TXQ. This was observed in production on ARM64 systems (Ampere Altra
Max).

The race occurs in veth_xmit(). The producer observes a full ptr_ring and
stops the queue (netif_tx_stop_queue()). The subsequent conditional logic,
intended to re-wake the queue if the consumer had just emptied it (if
(__ptr_ring_empty(...)) netif_tx_wake_queue()), can fail. This leads to a
"lost wakeup" where the TXQ remains stopped (QUEUE_STATE_DRV_XOFF) and
traffic halts.

This failure is caused by an incorrect use of the __ptr_ring_empty() API
from the producer side. As noted in kernel comments, this check is not
guaranteed to be correct if a consumer is operating on another CPU. The
empty test is based on ptr_ring->consumer_head, making it reliable only for
the consumer. Using this check from the producer side is fundamentally racy.

This patch fixes the race by adopting the more robust logic from an earlier
version V4 of the patchset, which always flushed the peer:

(1) In veth_xmit(), the racy conditional wake-up logic and its memory barrier
are removed. Instead, after stopping the queue, we unconditionally call
__veth_xdp_flush(rq). This guarantees that the NAPI consumer is scheduled,
making it solely responsible for re-waking the TXQ.
  This handles the race where veth_poll() consumes all packets and completes
NAPI *before* veth_xmit() on the producer side has called netif_tx_stop_queue.
The __veth_xdp_flush(rq) will observe rx_notify_masked is false and schedule
NAPI.

(2) On the consumer side, the logic for waking the peer TXQ is moved out of
veth_xdp_rcv() and placed at the end of the veth_poll() function. This
placement is part of fixing the race, as the netif_tx_queue_stopped() check
must occur after rx_notify_masked is potentially set to false during NAPI
completion.
  This handles the race where veth_poll() consumes all packets, but haven't
finished (rx_notify_masked is still true). The producer veth_xmit() stops the
TXQ and __veth_xdp_flush(rq) will observe rx_notify_masked is true, meaning
not starting NAPI.  Then veth_poll() change rx_notify_masked to false and
stops NAPI.  Before exiting veth_poll() will observe TXQ is stopped and wake
it up.

Fixes: dc82a33297 ("veth: apply qdisc backpressure on full ptr_ring to reduce TX drops")
Reviewed-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
Signed-off-by: Jesper Dangaard Brouer <hawk@kernel.org>
Link: https://patch.msgid.link/176295323282.307447.14790015927673763094.stgit@firesoul
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-14 18:16:53 -08:00
Ilya Maximets
dfe28c4167 net: openvswitch: remove never-working support for setting nsh fields
The validation of the set(nsh(...)) action is completely wrong.
It runs through the nsh_key_put_from_nlattr() function that is the
same function that validates NSH keys for the flow match and the
push_nsh() action.  However, the set(nsh(...)) has a very different
memory layout.  Nested attributes in there are doubled in size in
case of the masked set().  That makes proper validation impossible.

There is also confusion in the code between the 'masked' flag, that
says that the nested attributes are doubled in size containing both
the value and the mask, and the 'is_mask' that says that the value
we're parsing is the mask.  This is causing kernel crash on trying to
write into mask part of the match with SW_FLOW_KEY_PUT() during
validation, while validate_nsh() doesn't allocate any memory for it:

  BUG: kernel NULL pointer dereference, address: 0000000000000018
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 1c2383067 P4D 1c2383067 PUD 20b703067 PMD 0
  Oops: Oops: 0000 [#1] SMP NOPTI
  CPU: 8 UID: 0 Kdump: loaded Not tainted 6.17.0-rc4+ #107 PREEMPT(voluntary)
  RIP: 0010:nsh_key_put_from_nlattr+0x19d/0x610 [openvswitch]
  Call Trace:
   <TASK>
   validate_nsh+0x60/0x90 [openvswitch]
   validate_set.constprop.0+0x270/0x3c0 [openvswitch]
   __ovs_nla_copy_actions+0x477/0x860 [openvswitch]
   ovs_nla_copy_actions+0x8d/0x100 [openvswitch]
   ovs_packet_cmd_execute+0x1cc/0x310 [openvswitch]
   genl_family_rcv_msg_doit+0xdb/0x130
   genl_family_rcv_msg+0x14b/0x220
   genl_rcv_msg+0x47/0xa0
   netlink_rcv_skb+0x53/0x100
   genl_rcv+0x24/0x40
   netlink_unicast+0x280/0x3b0
   netlink_sendmsg+0x1f7/0x430
   ____sys_sendmsg+0x36b/0x3a0
   ___sys_sendmsg+0x87/0xd0
   __sys_sendmsg+0x6d/0xd0
   do_syscall_64+0x7b/0x2c0
   entry_SYSCALL_64_after_hwframe+0x76/0x7e

The third issue with this process is that while trying to convert
the non-masked set into masked one, validate_set() copies and doubles
the size of the OVS_KEY_ATTR_NSH as if it didn't have any nested
attributes.  It should be copying each nested attribute and doubling
them in size independently.  And the process must be properly reversed
during the conversion back from masked to a non-masked variant during
the flow dump.

In the end, the only two outcomes of trying to use this action are
either validation failure or a kernel crash.  And if somehow someone
manages to install a flow with such an action, it will most definitely
not do what it is supposed to, since all the keys and the masks are
mixed up.

Fixing all the issues is a complex task as it requires re-writing
most of the validation code.

Given that and the fact that this functionality never worked since
introduction, let's just remove it altogether.  It's better to
re-introduce it later with a proper implementation instead of trying
to fix it in stable releases.

Fixes: b2d0f5d5dc ("openvswitch: enable NSH support")
Reported-by: Junvy Yang <zhuque@tencent.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Reviewed-by: Aaron Conole <aconole@redhat.com>
Link: https://patch.msgid.link/20251112112246.95064-1-i.maximets@ovn.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-14 18:13:24 -08:00
Eric Dumazet
035bca3f01 mptcp: fix race condition in mptcp_schedule_work()
syzbot reported use-after-free in mptcp_schedule_work() [1]

Issue here is that mptcp_schedule_work() schedules a work,
then gets a refcount on sk->sk_refcnt if the work was scheduled.
This refcount will be released by mptcp_worker().

[A] if (schedule_work(...)) {
[B]     sock_hold(sk);
        return true;
    }

Problem is that mptcp_worker() can run immediately and complete before [B]

We need instead :

    sock_hold(sk);
    if (schedule_work(...))
        return true;
    sock_put(sk);

[1]
refcount_t: addition on 0; use-after-free.
 WARNING: CPU: 1 PID: 29 at lib/refcount.c:25 refcount_warn_saturate+0xfa/0x1d0 lib/refcount.c:25
Call Trace:
 <TASK>
 __refcount_add include/linux/refcount.h:-1 [inline]
  __refcount_inc include/linux/refcount.h:366 [inline]
  refcount_inc include/linux/refcount.h:383 [inline]
  sock_hold include/net/sock.h:816 [inline]
  mptcp_schedule_work+0x164/0x1a0 net/mptcp/protocol.c:943
  mptcp_tout_timer+0x21/0xa0 net/mptcp/protocol.c:2316
  call_timer_fn+0x17e/0x5f0 kernel/time/timer.c:1747
  expire_timers kernel/time/timer.c:1798 [inline]
  __run_timers kernel/time/timer.c:2372 [inline]
  __run_timer_base+0x648/0x970 kernel/time/timer.c:2384
  run_timer_base kernel/time/timer.c:2393 [inline]
  run_timer_softirq+0xb7/0x180 kernel/time/timer.c:2403
  handle_softirqs+0x22f/0x710 kernel/softirq.c:622
  __do_softirq kernel/softirq.c:656 [inline]
  run_ktimerd+0xcf/0x190 kernel/softirq.c:1138
  smpboot_thread_fn+0x542/0xa60 kernel/smpboot.c:160
  kthread+0x711/0x8a0 kernel/kthread.c:463
  ret_from_fork+0x4bc/0x870 arch/x86/kernel/process.c:158
  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245

Cc: stable@vger.kernel.org
Fixes: 3b1d6210a9 ("mptcp: implement and use MPTCP-level retransmission")
Reported-by: syzbot+355158e7e301548a1424@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/6915b46f.050a0220.3565dc.0028.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251113103924.3737425-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-14 18:12:35 -08:00
Pavel Zhigulin
b0c959fec1 net: mlxsw: linecards: fix missing error check in mlxsw_linecard_devlink_info_get()
The call to devlink_info_version_fixed_put() in
mlxsw_linecard_devlink_info_get() did not check for errors,
although it is checked everywhere in the code.

Add missed 'err' check to the mlxsw_linecard_devlink_info_get()

Fixes: 3fc0c51905 ("mlxsw: core_linecards: Expose device PSID over device info")
Signed-off-by: Pavel Zhigulin <Pavel.Zhigulin@kaspersky.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20251113161922.813828-1-Pavel.Zhigulin@kaspersky.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-14 18:01:32 -08:00
Pavel Zhigulin
e6751b0b19 net: dsa: hellcreek: fix missing error handling in LED registration
The LED setup routine registered both led_sync_good
and led_is_gm devices without checking the return
values of led_classdev_register(). If either registration
failed, the function continued silently, leaving the
driver in a partially-initialized state and leaking
a registered LED classdev.

Add proper error handling

Fixes: 7d9ee2e8ff ("net: dsa: hellcreek: Add PTP status LEDs")
Signed-off-by: Pavel Zhigulin <Pavel.Zhigulin@kaspersky.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Kurt Kanzenbach <kurt@linutronix.de>
Link: https://patch.msgid.link/20251113135745.92375-1-Pavel.Zhigulin@kaspersky.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-14 17:46:32 -08:00
Linus Torvalds
7a0892d283 Merge tag 'pci-v6.18-fixes-5' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci
Pull pci fixes from Bjorn Helgaas:

 - Cache the ASPM L0s/L1 Supported bits early so quirks can override
   them if necessary (Bjorn Helgaas)

 - Add quirks for PA Semi and Freescale Root Ports and a HiSilicon Wi-Fi
   device that are reported to have broken L0s and L1 (Shawn Lin, Bjorn
   Helgaas)

* tag 'pci-v6.18-fixes-5' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci:
  PCI/ASPM: Avoid L0s and L1 on Hi1105 [19e5:1105] Wi-Fi
  PCI/ASPM: Avoid L0s and L1 on PA Semi [1959:a002] Root Ports
  PCI/ASPM: Avoid L0s and L1 on Freescale [1957:0451] Root Ports
  PCI/ASPM: Convert quirks to override advertised link states
  PCI/ASPM: Add pcie_aspm_remove_cap() to override advertised link states
  PCI/ASPM: Cache L0s/L1 Supported so advertised link states can be overridden
2025-11-14 15:45:31 -08:00
Linus Torvalds
cbba5d1b53 Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull bpf fixes from Alexei Starovoitov:

 - Fix interaction between livepatch and BPF fexit programs (Song Liu)
   With Steven and Masami acks.

 - Fix stack ORC unwind from BPF kprobe_multi (Jiri Olsa)
   With Steven and Masami acks.

 - Fix out of bounds access in widen_imprecise_scalars() in the verifier
   (Eduard Zingerman)

 - Fix conflicts between MPTCP and BPF sockmap (Jiayuan Chen)

 - Fix net_sched storage collision with BPF data_meta/data_end (Eric
   Dumazet)

 - Add _impl suffix to BPF kfuncs with implicit args to avoid breaking
   them in bpf-next when KF_IMPLICIT_ARGS is added (Mykyta Yatsenko)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  selftests/bpf: Test widen_imprecise_scalars() with different stack depth
  bpf: account for current allocated stack depth in widen_imprecise_scalars()
  bpf: Add bpf_prog_run_data_pointers()
  selftests/bpf: Add mptcp test with sockmap
  mptcp: Fix proto fallback detection with BPF
  mptcp: Disallow MPTCP subflows from sockmap
  selftests/bpf: Add stacktrace ips test for raw_tp
  selftests/bpf: Add stacktrace ips test for kprobe_multi/kretprobe_multi
  x86/fgraph,bpf: Fix stack ORC unwind from kprobe_multi return probe
  Revert "perf/x86: Always store regs->ip in perf_callchain_kernel()"
  bpf: add _impl suffix for bpf_stream_vprintk() kfunc
  bpf:add _impl suffix for bpf_task_work_schedule* kfuncs
  selftests/bpf: Add tests for livepatch + bpf trampoline
  ftrace: bpf: Fix IPMODIFY + DIRECT in modify_ftrace_direct()
  ftrace: Fix BPF fexit with livepatch
2025-11-14 15:39:39 -08:00
Linus Torvalds
a752782a28 Merge tag 'rust-fixes-6.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ojeda/linux
Pull Rust fix from Miguel Ojeda:

 - Fix a Rust 1.91.0 build issue due to 'bindings.o' not containing
   DWARF debug information anymore by teaching gendwarfksyms to skip
   object files without exports

* tag 'rust-fixes-6.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/ojeda/linux:
  gendwarfksyms: Skip files with no exports
2025-11-14 15:36:15 -08:00
Linus Torvalds
1cc41c88ef Merge tag 'nfs-for-6.18-3' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client fixes from Anna Schumaker:

 - Various fixes when using NFS with TLS

 - Localio direct-IO fixes

 - Fix error handling in nfs_atomic_open_v23()

 - Fix sysfs memory leak when nfs_client kobject add fails

 - Fix an incorrect parameter when calling nfs4_call_sync()

 - Fix a failing LTP test when using delegated timestamps

* tag 'nfs-for-6.18-3' of git://git.linux-nfs.org/projects/anna/linux-nfs:
  NFS: Fix LTP test failures when timestamps are delegated
  NFSv4: Fix an incorrect parameter when calling nfs4_call_sync()
  NFS: sysfs: fix leak when nfs_client kobject add fails
  NFSv2/v3: Fix error handling in nfs_atomic_open_v23()
  nfs/localio: do not issue misaligned DIO out-of-order
  nfs/localio: Ensure DIO WRITE's IO on stable storage upon completion
  nfs/localio: backfill missing partial read support for misaligned DIO
  nfs/localio: add refcounting for each iocb IO associated with NFS pgio header
  nfs/localio: remove unecessary ENOTBLK handling in DIO WRITE support
  NFS: Check the TLS certificate fields in nfs_match_client()
  pnfs: Set transport security policy to RPC_XPRTSEC_NONE unless using TLS
  pnfs: Fix TLS logic in _nfs4_pnfs_v4_ds_connect()
  pnfs: Fix TLS logic in _nfs4_pnfs_v3_ds_connect()
2025-11-14 13:44:23 -08:00
Linus Torvalds
d5c1b4b432 Merge tag 'drm-fixes-2025-11-15' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie:
 "Weekly fixes, amdgpu and vmwgfx making up the most of it, along with
  panthor and i915/xe.

  Seems about right for this time of development, nothing major
  outstanding.

  client:
   - Fix description of module parameter

  panthor:
   - Flush writes before mapping buffers

  vmwgfx:
   - Improve command validation
   - Improve ref counting
   - Fix cursor-plane support

  amdgpu:
   - Disallow P2P DMA for GC 12 DCC surfaces
   - ctx error handling fix
   - UserQ fixes
   - VRR fix
   - ISP fix
   - JPEG 5.0.1 fix

  amdkfd:
   - Save area check fix
   - Fix GPU mappings for APU after prefetch

  i915:
   - Fix PSR's pipe to vblank conversion
   - Disable Panel Replay on MST links

  xe:
   - New HW workarounds affecting PTL and WCL platforms

* tag 'drm-fixes-2025-11-15' of https://gitlab.freedesktop.org/drm/kernel:
  drm/client: fix MODULE_PARM_DESC string for "active"
  drm/i915/dp_mst: Disable Panel Replay
  drm/amdkfd: Fix GPU mappings for APU after prefetch
  drm/amdkfd: relax checks for over allocation of save area
  drm/amdgpu/jpeg: Add parse_cs for JPEG5_0_1
  drm/amd/amdgpu: Ensure isp_kernel_buffer_alloc() creates a new BO
  drm/amd/display: Allow VRR params change if unsynced with the stream
  drm/amdgpu: fix lock warning in amdgpu_userq_fence_driver_process
  drm/amdgpu: jump to the correct label on failure
  drm/amdgpu: disable peer-to-peer access for DCC-enabled GC12 VRAM surfaces
  drm/xe/xe3lpg: Extend Wa_15016589081 for xe3lpg
  drm/xe/xe3: Extend wa_14023061436
  drm/xe/xe3: Add WA_14024681466 for Xe3_LPG
  drm/i915/psr: fix pipe to vblank conversion
  drm/panthor: Flush shmem writes before mapping buffers CPU-uncached
  drm/vmwgfx: Restore Guest-Backed only cursor plane support
  drm/vmwgfx: Use kref in vmw_bo_dirty
  drm/vmwgfx: Validate command header size against SVGA_CMD_MAX_DATASIZE
2025-11-14 13:39:15 -08:00
Linus Torvalds
ccc0011804 Merge tag 'mmc-v6.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc
Pull MMC fixes from Ulf Hansson:

 - dw_mmc-rockchip: Fix internal phase calculation

 - pxamci: Simplify and fix ->probe() error handling

 - sdhci-of-dwcmshc: Fix strbin signal delay

 - wmt-sdmmc: Fix compile test default

* tag 'mmc-v6.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc:
  mmc: dw_mmc-rockchip: Fix wrong internal phase calculate
  mmc: pxamci: Simplify pxamci_probe() error handling using devm APIs
  mmc: sdhci-of-dwcmshc: Change DLL_STRBIN_TAPNUM_DEFAULT to 0x4
  mmc: wmt-sdmmc: fix compile test default
2025-11-14 13:34:36 -08:00
Linus Torvalds
241e99dbdc Merge tag 'pmdomain-v6.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm
Pull pmdomain fixes from Ulf Hansson:

 - imx: Fix reference count leak in ->remove()

 - samsung: Rework legacy splash-screen handover workaround

 - samsung: Fix potential memleak during ->probe()

 - arm: Fix genpd leak on provider registration failure for scmi

* tag 'pmdomain-v6.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/linux-pm:
  pmdomain: imx: Fix reference count leak in imx_gpc_remove
  pmdomain: samsung: Rework legacy splash-screen handover workaround
  pmdomain: arm: scmi: Fix genpd leak on provider registration failure
  pmdomain: samsung: plug potential memleak during probe
2025-11-14 13:29:15 -08:00
Linus Torvalds
6014e75e87 Merge tag 'cxl-fixes-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull cxl fixes from Dave Jiang:

 - Fix incorrect device handle check for Generic Initiator

 - Fix offset calculation for extended linear cache poison injection

 - Fix lockdep warning for hmem_register_resource()

* tag 'cxl-fixes-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl:
  acpi/hmat: Fix lockdep warning for hmem_register_resource()
  cxl: Adjust offset calculation for poison injection
  acpi,srat: Fix incorrect device handle check for Generic Initiator
2025-11-14 13:25:00 -08:00
Arnd Bergmann
124c98b100 Merge tag 'tee-fix-for-v6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/jenswi/linux-tee into arm/fixes
TEE kernel-doc fixes for v6.18

* tag 'tee-fix-for-v6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/jenswi/linux-tee:
  tee: <uapi/linux/tee.h: fix all kernel-doc issues

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-14 22:22:00 +01:00
Arnd Bergmann
4e08ec0054 Merge tag 'arm-soc/for-6.18/devicetree-arm64-fixes-v2' of https://github.com/Broadcom/stblinux into arm/fixes
This pull request contains Broadcom ARM64-based SoCs Device Tree fixes
for 6.18, please pull the following:

- Andrea assigns clocks rates for the Ethernet controller for the
  Raspberry Pi 5 systems

- Laurent adds an ethernet0 alias to allow client programs consuming
  that alias to populate the correct Ethernet address for the Raspberry
  Pi 5 systems

* tag 'arm-soc/for-6.18/devicetree-arm64-fixes-v2' of https://github.com/Broadcom/stblinux:
  arm64: dts: broadcom: bcm2712: rpi-5: Add ethernet0 alias
  arm64: dts: broadcom: Assign clock rates in eth node for RPi5

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-14 22:20:06 +01:00
Arnd Bergmann
c3216f1d62 Merge tag 'reset-fixes-for-v6.18' of https://git.pengutronix.de/git/pza/linux into arm/fixes
Reset controller fixes for v6.18

* Fix incorrect EARC reset masks in the reset-imx8mp-audiomix driver,
  introduced in commit a83bc87cd3.

* tag 'reset-fixes-for-v6.18' of https://git.pengutronix.de/git/pza/linux:
  reset: imx8mp-audiomix: Fix bad mask values

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-14 22:18:11 +01:00
Arnd Bergmann
7b52117eea Merge tag 'arm-soc/for-6.18/devicetree-fixes-part2' of https://github.com/Broadcom/stblinux into arm/fixes
This pull request contains Broadcom ARM-based SoCs Device Tree files updates
for 6.18, please pull the following:

- Rafal fixes the Ethernet PHY address on the Luxul XAP-1440

* tag 'arm-soc/for-6.18/devicetree-fixes-part2' of https://github.com/Broadcom/stblinux:
  ARM: dts: BCM53573: Fix address of Luxul XAP-1440's Ethernet PHY

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-14 22:14:49 +01:00
Arnd Bergmann
020e792ca3 Merge tag 'arm-soc/for-6.18/defconfig-arm64-fixes' of https://github.com/Broadcom/stblinux into arm/fixes
This pull request contains Broadcom ARM64 defconfig updates for 6.18,
please pull the following:

- Stefan ensures that the clk-raspberrypi driver which is now the clock
  provider is built into the kernel image to satisfy root over NFS

* tag 'arm-soc/for-6.18/defconfig-arm64-fixes' of https://github.com/Broadcom/stblinux:
  arm64: defconfig: Fix V3D deferred probe timeout

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-14 22:13:21 +01:00
Arnd Bergmann
f4f012b0ee Merge tag 'imx-fixes-6.18' of https://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux into arm/fixes
i.MX fixes for 6.18:

- Enable ext4 support explicitly in imx_v6_v7_defconfig to fix a regression
  caused by commit d6ace46c82 ("ext4: remove obsolete EXT3 config options")
- Correct report-rate-hz setting for imx6ull-engicam-microgea-rmm board
- Fix MSI mapping for i.MX95 PCIe endpoint device nodes
- Fix USB OTG role switching for imx8mp-kontron board
- Fix a dt-schema warning caused by audmux node for imx51-zii-rdu1 board
- Avoid gpio0_mipi_csi GPIOs being deferred for i.MX8 img subsystem

* tag 'imx-fixes-6.18' of https://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux:
  arm64: dts: imx8mp-kontron: Fix USB OTG role switching
  arm64: dts: imx95: Fix MSI mapping for PCIe endpoint nodes
  arm64: dts: imx8-ss-img: Avoid gpio0_mipi_csi GPIOs being deferred
  arm: imx_v6_v7_defconfig: enable ext4 directly
  ARM: dts: imx51-zii-rdu1: Fix audmux node names
  ARM: dts: imx6ull-engicam-microgea-rmm: fix report-rate-hz value

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-14 22:10:31 +01:00
Krzysztof Kozlowski
ec0ca4be11 MAINTAINERS: Update Krzysztof Kozlowski's email
Update Krzysztof Kozlowski's email address in mailmap to stay reachable.

Link: https://patch.msgid.link/20251021095426.86549-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20251031135041.78789-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-14 22:09:51 +01:00
Arnd Bergmann
64bdb49f2d Merge tag 'memory-controller-drv-fixes-6.18' of https://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl into arm/fixes
Memory controller drivers - fixes for v6.18

Correct incorrect ID used for the memory controller client IDs in
Tegra210 Memory Controller driver, introduced in v6.18-rc1.

* tag 'memory-controller-drv-fixes-6.18' of https://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl:
  memory: tegra210: Fix incorrect client ids

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-14 22:05:41 +01:00
Linus Torvalds
de90cc610e Merge tag 'spi-fix-v6.18-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
Pull spi fixes from Mark Brown:
 "A few standard fixes here, plus one more interesting one from Hans
  which addresses an issue where a move in when we requested GPIOs on
  ACPI systems caused us to stop doing pinmuxing and leave things
  floating that we'd really rather not have floating"

* tag 'spi-fix-v6.18-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi:
  spi: Add TODO comment about ACPI GPIO setup
  spi: xilinx: increase number of retries before declaring stall
  spi: imx: keep dma request disabled before dma transfer setup
  spi: Try to get ACPI GPIO IRQ earlier
2025-11-14 13:04:35 -08:00
Arnd Bergmann
dc00a9fea2 Merge tag 'aspeed-6.18-fixes-0' of https://git.kernel.org/pub/scm/linux/kernel/git/bmc/linux into arm/fixes
First batch of ASPEED fixes for 6.18

This time it's just the one fix addressing a PHY configuration regression in the
Fuji (Meta) platform's mac3 devicetree node.

* tag 'aspeed-6.18-fixes-0' of https://git.kernel.org/pub/scm/linux/kernel/git/bmc/linux:
  ARM: dts: aspeed: fuji-data64: Enable mac3 controller

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-14 22:03:53 +01:00
Arnd Bergmann
3e40c91981 Merge tag 'tegra-for-6.18-arm64-dt-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux into arm/fixes
arm64: tegra: Fixes for v6.18

This contains a simple fix to mark the Ethernet PHY on Jetson Xavier NX
as a wakeup source so the device can support WoL.

* tag 'tegra-for-6.18-arm64-dt-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tegra/linux:
  arm64: tegra: Mark Jetson Xavier NX's PHY as a wakeup source

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
2025-11-14 22:02:49 +01:00
Linus Torvalds
809bd27452 Merge tag 'regulator-fix-v6.18-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator
Pull regulator fix from Mark Brown:
 "One simple fix for a GPIO descriptor leak in the probe error handling
  for the fixed regulator"

* tag 'regulator-fix-v6.18-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator:
  regulator: fixed: fix GPIO descriptor leak on register failure
2025-11-14 13:01:23 -08:00
Linus Torvalds
27f518609e Merge tag 'sound-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
Pull sound fixes from Takashi Iwai:
 "A collection of small fixes. All changes are device-specific, and
  nothing stands out.

   - A regression fix for HD-audio HDMI probe

   - USB-audio hardening patches for issues spotted by fuzzers

   - ASoC fixes for TAS278x, SoundWire and Cirrus

   - Usual HD-audio and USB-audio quirks"

* tag 'sound-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound:
  ALSA: usb-audio: Add native DSD quirks for PureAudio DAC series
  ASoC: rsnd: fix OF node reference leak in rsnd_ssiu_probe()
  ALSA: hda/tas2781: Correct the wrong project ID
  ALSA: usb-audio: Fix NULL pointer dereference in snd_usb_mixer_controls_badd
  ASoC: SDCA: bug fix while parsing mipi-sdca-control-cn-list
  ALSA: usb-audio: Fix potential overflow of PCM transfer buffer
  ALSA: hda/tas2781: Add new quirk for HP new projects
  ASoC: tas2781: fix getting the wrong device number
  ASoC: codecs: va-macro: fix resource leak in probe error path
  ASoC: tas2783A: Fix issues in firmware parsing
  ASoC: sdw_utils: fix device reference leak in is_sdca_endpoint_present()
  ASoC: cs4271: Fix regulator leak on probe failure
  ALSA: hda/hdmi: Fix breakage at probing nvhdmi-mcp driver
  ASoC: da7213: Use component driver suspend/resume
  ALSA: usb-audio: add min_mute quirk for SteelSeries Arctis
  ASoC: doc: cs35l56: Update firmware filename description for B0 silicon
2025-11-14 12:50:08 -08:00
Linus Torvalds
d4f8cccc62 Merge tag 'block-6.18-20251114' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block fixlet from Jens Axboe:
 "Been sitting on this one for a week or two, planning on sending it out
  when there were other block changes for 6.18. But as that hasn't
  materialized in the second week of sitting on it, let's flush it out.

  A previous commit updated my git tree locations, but one was missed as
  it was already set to the git.kernel.org one. But the git location swap
  also renamed the actual tree from linux-block to just linux, let's get
  that last one updated too"

* tag 'block-6.18-20251114' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  MAINTAINERS: correct git location for block layer tree
2025-11-14 10:18:45 -08:00
Linus Torvalds
ac9f4f306d Merge tag 'io_uring-6.18-20251113' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring fixes from Jens Axboe:

 - Use the actual segments in a request when for bvec based buffers

 - Fix an odd case where the iovec might get leaked for a read/write
   request, if it was newly allocated, overflowed the alloc cache, and
   hit an early error

 - Minor tweak to the query API added in this release, returning the
   number of available entries

* tag 'io_uring-6.18-20251113' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux:
  io_uring/rsrc: don't use blk_rq_nr_phys_segments() as number of bvecs
  io_uring/query: return number of available queries
  io_uring/rw: ensure allocated iovec gets cleared for early failure
2025-11-14 09:57:30 -08:00
Eduard Zingerman
6c762611fe selftests/bpf: Test widen_imprecise_scalars() with different stack depth
A test case for a situation when widen_imprecise_scalars() is called
with old->allocated_stack > cur->allocated_stack. Test structure:

    def widening_stack_size_bug():
      r1 = 0
      for r6 in 0..1:
        iterator_with_diff_stack_depth(r1)
        r1 = 42

    def iterator_with_diff_stack_depth(r1):
      if r1 != 42:
        use 128 bytes of stack
      iterator based loop

iterator_with_diff_stack_depth() is verified with r1 == 0 first and
r1 == 42 next. Causing stack usage of 128 bytes on a first visit and 8
bytes on a second. Such arrangement triggered a KASAN error in
widen_imprecise_scalars().

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20251114025730.772723-2-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-11-14 09:26:28 -08:00
Eduard Zingerman
b0c8e6d3d8 bpf: account for current allocated stack depth in widen_imprecise_scalars()
The usage pattern for widen_imprecise_scalars() looks as follows:

    prev_st = find_prev_entry(env, ...);
    queued_st = push_stack(...);
    widen_imprecise_scalars(env, prev_st, queued_st);

Where prev_st is an ancestor of the queued_st in the explored states
tree. This ancestor is not guaranteed to have same allocated stack
depth as queued_st. E.g. in the following case:

    def main():
      for i in 1..2:
        foo(i)        // same callsite, differnt param

    def foo(i):
      if i == 1:
        use 128 bytes of stack
      iterator based loop

Here, for a second 'foo' call prev_st->allocated_stack is 128,
while queued_st->allocated_stack is much smaller.
widen_imprecise_scalars() needs to take this into account and avoid
accessing bpf_verifier_state->frame[*]->stack out of bounds.

Fixes: 2793a8b015 ("bpf: exact states comparison for iterator convergence checks")
Reported-by: Emil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20251114025730.772723-1-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-11-14 09:26:05 -08:00
Diogo Ivo
660b299bed Revert "drm/tegra: dsi: Clear enable register if powered by bootloader"
Commit b6bcbce335 ("soc/tegra: pmc: Ensure power-domains are in a
known state") was introduced so that all power domains get initialized
to a known working state when booting and it does this by shutting them
down (including asserting resets and disabling clocks) before registering
each power domain with the genpd framework, leaving it to each driver to
later on power its needed domains.

This caused the Google Pixel C to hang when booting due to a workaround
in the DSI driver introduced in commit b22fd0b963 ("drm/tegra: dsi:
Clear enable register if powered by bootloader") meant to handle the case
where the bootloader enabled the DSI hardware module. The workaround relies
on reading a hardware register to determine the current status and after
b6bcbce335 that now happens in a powered down state thus leading to
the boot hang.

Fix this by reverting b22fd0b963 since currently we are guaranteed
that the hardware will be fully reset by the time we start enabling the
DSI module.

Fixes: b6bcbce335 ("soc/tegra: pmc: Ensure power-domains are in a known state")
Cc: stable@vger.kernel.org
Signed-off-by: Diogo Ivo <diogo.ivo@tecnico.ulisboa.pt>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Link: https://patch.msgid.link/20251103-diogo-smaug_ec_typec-v1-1-be656ccda391@tecnico.ulisboa.pt
2025-11-14 18:21:11 +01:00
Prateek Agarwal
6cbab9f0da drm/tegra: Add call to put_pid()
Add a call to put_pid() corresponding to get_task_pid().
host1x_memory_context_alloc() does not take ownership of the PID so we
need to free it here to avoid leaking.

Signed-off-by: Prateek Agarwal <praagarwal@nvidia.com>
Fixes: e09db97889 ("drm/tegra: Support context isolation")
[mperttunen@nvidia.com: reword commit message]
Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Link: https://patch.msgid.link/20250919-host1x-put-pid-v1-1-19c2163dfa87@nvidia.com
2025-11-14 18:17:51 +01:00
Ma Ke
4c5376b4b1 drm/tegra: dc: Fix reference leak in tegra_dc_couple()
driver_find_device() calls get_device() to increment the reference
count once a matching device is found, but there is no put_device() to
balance the reference count. To avoid reference count leakage, add
put_device() to decrease the reference count.

Found by code review.

Cc: stable@vger.kernel.org
Fixes: a31500fe70 ("drm/tegra: dc: Restore coupling of display controllers")
Signed-off-by: Ma Ke <make24@iscas.ac.cn>
Acked-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Link: https://patch.msgid.link/20251022114720.24937-1-make24@iscas.ac.cn
2025-11-14 17:58:26 +01:00
Eric Dumazet
4ef9274362 bpf: Add bpf_prog_run_data_pointers()
syzbot found that cls_bpf_classify() is able to change
tc_skb_cb(skb)->drop_reason triggering a warning in sk_skb_reason_drop().

WARNING: CPU: 0 PID: 5965 at net/core/skbuff.c:1192 __sk_skb_reason_drop net/core/skbuff.c:1189 [inline]
WARNING: CPU: 0 PID: 5965 at net/core/skbuff.c:1192 sk_skb_reason_drop+0x76/0x170 net/core/skbuff.c:1214

struct tc_skb_cb has been added in commit ec624fe740 ("net/sched:
Extend qdisc control block with tc control block"), which added a wrong
interaction with db58ba4592 ("bpf: wire in data and data_end for
cls_act_bpf").

drop_reason was added later.

Add bpf_prog_run_data_pointers() helper to save/restore the net_sched
storage colliding with BPF data_meta/data_end.

Fixes: ec624fe740 ("net/sched: Extend qdisc control block with tc control block")
Reported-by: syzbot <syzkaller@googlegroups.com>
Closes: https://lore.kernel.org/netdev/6913437c.a70a0220.22f260.013b.GAE@google.com/
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://patch.msgid.link/20251112125516.1563021-1-edumazet@google.com
2025-11-14 08:56:49 -08:00
Vlastimil Babka
ec33b59542 mm/mempool: fix poisoning order>0 pages with HIGHMEM
The kernel test has reported:

  BUG: unable to handle page fault for address: fffba000
  #PF: supervisor write access in kernel mode
  #PF: error_code(0x0002) - not-present page
  *pde = 03171067 *pte = 00000000
  Oops: Oops: 0002 [#1]
  CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Tainted: G                T   6.18.0-rc2-00031-gec7f31b2a2d3 #1 NONE  a1d066dfe789f54bc7645c7989957d2bdee593ca
  Tainted: [T]=RANDSTRUCT
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
  EIP: memset (arch/x86/include/asm/string_32.h:168 arch/x86/lib/memcpy_32.c:17)
  Code: a5 8b 4d f4 83 e1 03 74 02 f3 a4 83 c4 04 5e 5f 5d 2e e9 73 41 01 00 90 90 90 3e 8d 74 26 00 55 89 e5 57 56 89 c6 89 d0 89 f7 <f3> aa 89 f0 5e 5f 5d 2e e9 53 41 01 00 cc cc cc 55 89 e5 53 57 56
  EAX: 0000006b EBX: 00000015 ECX: 001fefff EDX: 0000006b
  ESI: fffb9000 EDI: fffba000 EBP: c611fbf0 ESP: c611fbe8
  DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00010287
  CR0: 80050033 CR2: fffba000 CR3: 0316e000 CR4: 00040690
  Call Trace:
   poison_element (mm/mempool.c:83 mm/mempool.c:102)
   mempool_init_node (mm/mempool.c:142 mm/mempool.c:226)
   mempool_init_noprof (mm/mempool.c:250 (discriminator 1))
   ? mempool_alloc_pages (mm/mempool.c:640)
   bio_integrity_initfn (block/bio-integrity.c:483 (discriminator 8))
   ? mempool_alloc_pages (mm/mempool.c:640)
   do_one_initcall (init/main.c:1283)

Christoph found out this is due to the poisoning code not dealing
properly with CONFIG_HIGHMEM because only the first page is mapped but
then the whole potentially high-order page is accessed.

We could give up on HIGHMEM here, but it's straightforward to fix this
with a loop that's mapping, poisoning or checking and unmapping
individual pages.

Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202511111411.9ebfa1ba-lkp@intel.com
Analyzed-by: Christoph Hellwig <hch@lst.de>
Fixes: bdfedb76f4 ("mm, mempool: poison elements backed by slab allocator")
Cc: stable@vger.kernel.org
Tested-by: kernel test robot <oliver.sang@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20251113-mempool-poison-v1-1-233b3ef984c3@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
2025-11-14 17:55:23 +01:00
Linus Torvalds
b86caedd0b Merge tag 'v6.18-p5' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto fix from Herbert Xu:

 - Fix device reference leak in hisilicon

* tag 'v6.18-p5' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  crypto: hisilicon/qm - Fix device reference leak in qm_get_qos_value
2025-11-14 08:32:58 -08:00
Linus Torvalds
95baf63fe8 Merge tag 'v6.18-rc5-smb-client-fixes' of git://git.samba.org/sfrench/cifs-2.6
Pull smb client fixes from Steve French:

 - Multichannel reconnect channel selection fix

 - Fix for smbdirect (RDMA) disconnect bug

 - Fix for incorrect username length check

 - Fix memory leak in mount parm processing

* tag 'v6.18-rc5-smb-client-fixes' of git://git.samba.org/sfrench/cifs-2.6:
  smb: client: let smbd_disconnect_rdma_connection() turn CREATED into DISCONNECTED
  smb: fix invalid username check in smb3_fs_context_parse_param()
  cifs: client: fix memory leak in smb3_fs_context_parse_param
  smb: client: fix cifs_pick_channel when channel needs reconnect
2025-11-14 08:30:48 -08:00
Eslam Khafagy
e0fd4d42e2 posix-timers: Plug potential memory leak in do_timer_create()
When posix timer creation is set to allocate a given timer ID and the
access to the user space value faults, the function terminates without
freeing the already allocated posix timer structure.

Move the allocation after the user space access to cure that.

[ tglx: Massaged change log ]

Fixes: ec2d0c0462 ("posix-timers: Provide a mechanism to allocate a given timer ID")
Reported-by: syzbot+9c47ad18f978d4394986@syzkaller.appspotmail.com
Suggested-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Eslam Khafagy <eslam.medhat1993@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://patch.msgid.link/20251114122739.994326-1-eslam.medhat1993@gmail.com
Closes: https://lore.kernel.org/all/69155df4.a70a0220.3124cb.0017.GAE@google.com/T/
2025-11-14 16:58:31 +01:00
Nick Hu
14473a1f88 irqchip/riscv-intc: Add missing free() callback in riscv_intc_domain_ops
The irq_domain_free_irqs() helper requires that the irq_domain_ops->free
callback is implemented. Otherwise, the kernel reports the warning message
"NULL pointer, cannot free irq" when irq_dispose_mapping() is invoked to
release the per-HART local interrupts.

Set irq_domain_ops->free to irq_domain_free_irqs_top() to cure that.

Fixes: 832f15f426 ("RISC-V: Treat IPIs as normal Linux IRQs")
Signed-off-by: Nick Hu <nick.hu@sifive.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://patch.msgid.link/20251114-rv-intc-fix-v1-1-a3edd1c1a868@sifive.com
2025-11-14 16:52:34 +01:00
Heiko Carstens
31475b8811 s390/mm: Fix __ptep_rdp() inline assembly
When a zero ASCE is passed to the __ptep_rdp() inline assembly, the
generated instruction should have the R3 field of the instruction set to
zero. However the inline assembly is written incorrectly: for such cases a
zero is loaded into a register allocated by the compiler and this register
is then used by the instruction.

This means that selected TLB entries may not be flushed since the specified
ASCE does not match the one which was used when the selected TLB entries
were created.

Fix this by removing the asce and opt parameters of __ptep_rdp(), since
all callers always pass zero, and use a hard-coded register zero for
the R3 field.

Fixes: 0807b85652 ("s390/mm: add support for RDP (Reset DAT-Protection)")
Cc: stable@vger.kernel.org
Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2025-11-14 15:58:20 +01:00
Ville Syrjälä
37339122a7 firewire: core: Initialize topology_map.lock
Lockdep barfs on the new uninitialized spinlock.
Initialize it.

protip: enable lockdep (CONFIG_PROVE_LOCKING=y) when
        doing locking changes

firewire_ohci 0000:02:01.1: added OHCI v1.10 device as card 0, 4 IR + 4 IT contexts, quirks 0x11
INFO: trying to register non-static key.
The code is fine but needs lockdep annotation, or maybe
you didn't initialize this object before use?
turning off the locking correctness validator.
CPU: 0 UID: 0 PID: 1042 Comm: irq/17-firewire Not tainted 6.17.0-rc2-cl-bisect2-00026-g7d138cb269db #136 PREEMPT
Hardware name: Dell Inc. Latitude E5400                  /0D695C, BIOS A19 06/13/2013
Call Trace:
 <TASK>
 dump_stack_lvl+0x6d/0xa0
 register_lock_class+0x783/0x790
 ? find_held_lock+0x2b/0x80
 ? __mod_timer+0x110/0x320
 ? __mod_timer+0x110/0x320
 __lock_acquire+0x405/0x2600
 lock_acquire+0xca/0x2e0
 ? fw_core_handle_bus_reset+0x888/0xca0 [firewire_core]
 ? fw_core_handle_bus_reset+0x878/0xca0 [firewire_core]
 ? fw_core_handle_bus_reset+0x878/0xca0 [firewire_core]
 _raw_spin_lock+0x2e/0x40
 ? fw_core_handle_bus_reset+0x888/0xca0 [firewire_core]
 fw_core_handle_bus_reset+0x888/0xca0 [firewire_core]
 handle_selfid_complete_event+0x35c/0x7a0 [firewire_ohci]
 ? irq_thread+0x8d/0x280
 irq_thread_fn+0x18/0x50
 irq_thread+0x15a/0x280
 ? irq_check_status_bit+0x100/0x100
 ? lockdep_hardirqs_on+0x78/0x100
 ? irq_finalize_oneshot.part.0+0xc0/0xc0
 ? irq_forced_thread_fn+0x60/0x60
 kthread+0x114/0x200
 ? kthreads_online_cpu+0x110/0x110
 ret_from_fork+0x158/0x1e0
 ? kthreads_online_cpu+0x110/0x110
 ret_from_fork_asm+0x11/0x20
 </TASK>

Reported-by: Erhard Furtner <erhard_f@mailbox.org>
Fixes: 7d138cb269 ("firewire: core: use spin lock specific to topology map")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
2025-11-14 23:18:50 +09:00
Lushih Hsieh
21a9ab5b90 ALSA: usb-audio: Add native DSD quirks for PureAudio DAC series
The PureAudio APA DAC and Lotus DAC5 series are USB Audio
2.0 Class devices that support native Direct Stream Digital (DSD)
playback via specific vendor protocols.

Without these quirks, the devices may only function in standard
PCM mode, or fail to correctly report their DSD format capabilities
to the ALSA framework, preventing native DSD playback under Linux.

This commit adds new quirk entries for the mentioned DAC models
based on their respective Vendor/Product IDs (VID:PID), for example:
0x16d0:0x0ab1 (APA DAC), 0x16d0:0xeca1 (DAC5 series), etc.

The quirk ensures correct DSD format handling by setting the required
SNDRV_PCM_FMTBIT_DSD_U32_BE format bit and defining the DSD-specific
Audio Class 2.0 (AC2.0) endpoint configurations. This allows the ALSA
DSD API to correctly address the device for high-bitrate DSD streams,
bypassing the need for DoP (DSD over PCM).

Test on APA DAC and Lotus DAC5 SE under Arch Linux.

Tested-by: Lushih Hsieh <bruce@mail.kh.edu.tw>
Signed-off-by: Lushih Hsieh <bruce@mail.kh.edu.tw>
Link: https://patch.msgid.link/20251114052053.54989-1-bruce@mail.kh.edu.tw
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2025-11-14 14:19:47 +01:00
Borislav Petkov (AMD)
dd14022a7c x86/microcode/AMD: Add Zen5 model 0x44, stepping 0x1 minrev
Add the minimum Entrysign revision for that model+stepping to the list
of minimum revisions.

Fixes: 50cef76d5c ("x86/microcode/AMD: Load only SHA256-checksummed patches")
Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: <stable@kernel.org>
Link: https://lore.kernel.org/r/e94dd76b-4911-482f-8500-5c848a3df026@citrix.com
2025-11-14 14:04:49 +01:00
Mario Limonciello
e1a97a627c x86/CPU/AMD: Add additional fixed RDSEED microcode revisions
Microcode that resolves the RDSEED failure (SB-7055 [1]) has been released for
additional Zen5 models to linux-firmware [2]. Update the zen5_rdseed_microcode
array to cover these new models.

Fixes: 607b9fb2ce ("x86/CPU/AMD: Add RDSEED fix for Zen5")
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: <stable@kernel.org>
Link: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-7055.html [1]
Link: 6167e55669 [2]
Link: https://patch.msgid.link/20251113223608.1495655-1-mario.limonciello@amd.com
2025-11-14 13:02:11 +01:00
Luke Wang
f84fd5bec5 pwm: adp5585: Correct mismatched pwm chip info
The register addresses of ADP5585 and ADP5589 are swapped.

Fixes: 75024f97e8 ("pwm: adp5585: add support for adp5589")
Signed-off-by: Luke Wang <ziniu.wang_1@nxp.com>
Acked-by: Nuno Sá <nuno.sa@analog.com>
Tested-by: Liu Ying <victor.liu@nxp.com> # ADP5585 PWM
Link: https://patch.msgid.link/20251114065308.2074893-1-ziniu.wang_1@nxp.com
Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
2025-11-14 11:55:56 +01:00
Zilin Guan
a55ef3bff8 xfrm: fix memory leak in xfrm_add_acquire()
The xfrm_add_acquire() function constructs an xfrm policy by calling
xfrm_policy_construct(). This allocates the policy structure and
potentially associates a security context and a device policy with it.

However, at the end of the function, the policy object is freed using
only kfree() . This skips the necessary cleanup for the security context
and device policy, leading to a memory leak.

To fix this, invoke the proper cleanup functions xfrm_dev_policy_delete(),
xfrm_dev_policy_free(), and security_xfrm_policy_free() before freeing the
policy object. This approach mirrors the error handling path in
xfrm_add_policy(), ensuring that all associated resources are correctly
released.

Fixes: 980ebd2579 ("[IPSEC]: Sync series - acquire insert")
Signed-off-by: Zilin Guan <zilin@seu.edu.cn>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2025-11-14 10:12:36 +01:00
Takashi Iwai
fa3c727e05 Merge tag 'asoc-fix-v6.18-rc5' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
ASoC: Fixes for v6.18

A small collection of fixes, all driver specific and none especially
remarkable unless you have the hardware (many not even then).
2025-11-14 09:47:28 +01:00
Dave Airlie
362a7d4fd5 Merge tag 'drm-xe-fixes-2025-11-13' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Driver Changes:
 - New HW workarounds affecting PTL and WCL platforms
   (Nitin Gote, Tangudu Tilak Tirumalesh)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patch.msgid.link/ay2qztgonodwson6tuzcv5napjmqbgwzv27so4ybfola34guux@xgufrrmbzyws
2025-11-14 17:51:17 +10:00
Dave Airlie
538e0110fe Merge tag 'drm-intel-fixes-2025-11-13' of https://gitlab.freedesktop.org/drm/i915/kernel into drm-fixes
- Fix PSR's pipe to vblank conversion (Jani)
- Disable Panel Replay on MST links (Imre)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patch.msgid.link/aRXdQnitzyFcokhF@intel.com
2025-11-14 17:50:52 +10:00
Dave Airlie
15ebea1bdf Merge tag 'drm-misc-fixes-2025-11-13' of https://gitlab.freedesktop.org/drm/misc/kernel into drm-fixes
Short summary of fixes pull:

client:
- Fix description of module parameter

panthor:
- Flush writes before mapping buffers

vmwgfx:
- Improve command validation
- Improve ref counting
- Fix cursor-plane support

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patch.msgid.link/20251113132317.GA451885@linux.fritz.box
2025-11-14 17:24:57 +10:00
Dave Airlie
63444b4ca4 Merge tag 'amd-drm-fixes-6.18-2025-11-12' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-6.18-2025-11-12:

amdgpu:
- Disallow P2P DMA for GC 12 DCC surfaces
- ctx error handling fix
- UserQ fixes
- VRR fix
- ISP fix
- JPEG 5.0.1 fix

amdkfd:
- Save area check fix
- Fix GPU mappings for APU after prefetch

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patch.msgid.link/20251112200930.8788-1-alexander.deucher@amd.com
2025-11-14 17:24:27 +10:00
Zilin Guan
407a06507c mlxsw: spectrum: Fix memory leak in mlxsw_sp_flower_stats()
The function mlxsw_sp_flower_stats() calls mlxsw_sp_acl_ruleset_get() to
obtain a ruleset reference. If the subsequent call to
mlxsw_sp_acl_rule_lookup() fails to find a rule, the function returns
an error without releasing the ruleset reference, causing a memory leak.

Fix this by using a goto to the existing error handling label, which
calls mlxsw_sp_acl_ruleset_put() to properly release the reference.

Fixes: 7c1b8eb175 ("mlxsw: spectrum: Add support for TC flower offload statistics")
Signed-off-by: Zilin Guan <zilin@seu.edu.cn>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://patch.msgid.link/20251112052114.1591695-1-zilin@seu.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-13 17:25:33 -08:00
Jiaming Zhang
f796a8dec9 net: core: prevent NULL deref in generic_hwtstamp_ioctl_lower()
The ethtool tsconfig Netlink path can trigger a null pointer
dereference. A call chain such as:

  tsconfig_prepare_data() ->
  dev_get_hwtstamp_phylib() ->
  vlan_hwtstamp_get() ->
  generic_hwtstamp_get_lower() ->
  generic_hwtstamp_ioctl_lower()

results in generic_hwtstamp_ioctl_lower() being called with
kernel_cfg->ifr as NULL.

The generic_hwtstamp_ioctl_lower() function does not expect
a NULL ifr and dereferences it, leading to a system crash.

Fix this by adding a NULL check for kernel_cfg->ifr in
generic_hwtstamp_ioctl_lower(). If ifr is NULL, return -EINVAL.

Fixes: 6e9e2eed4f ("net: ethtool: Add support for tsconfig command to get/set hwtstamp config")
Closes: https://lore.kernel.org/cd6a7056-fa6d-43f8-b78a-f5e811247ba8@linux.dev
Signed-off-by: Jiaming Zhang <r772577952@gmail.com>
Reviewed-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://patch.msgid.link/20251111173652.749159-2-r772577952@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-13 17:23:21 -08:00
Linus Torvalds
6da43bbeb6 Merge tag 'vfio-v6.18-rc6' of https://github.com/awilliam/linux-vfio
Pull VFIO seftest fixes from Alex Williamson:

 - Fix vfio selftests to remove the expectation that the IOMMU supports
   a 64-bit IOVA space.

   These manifest both in the original set of tests introduced this
   development cycle in identity mapping the IOVA to buffer virtual
   address space, as well as the more recent boundary testing.

   Implement facilities for collecting the valid IOVA ranges from the
   backend, implement a simple IOVA allocator, and use the information
   for determining extents (Alex Mastro)

* tag 'vfio-v6.18-rc6' of https://github.com/awilliam/linux-vfio:
  vfio: selftests: replace iova=vaddr with allocated iovas
  vfio: selftests: add iova allocator
  vfio: selftests: fix map limit tests to use last available iova
  vfio: selftests: add iova range query helpers
2025-11-13 17:00:40 -08:00
Linus Torvalds
01814e11e5 Merge tag 'hwmon-for-v6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging
Pull hwmon fixes from Guenter Roeck:

 - gpd-fan: Fix compilation error for non-ACPI builds, and initialize EC
   when loading the driver

* tag 'hwmon-for-v6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging:
  hwmon: (gpd-fan) initialize EC on driver load for Win 4
  hwmon: (gpd-fan) Fix compilation error in non-ACPI builds
2025-11-13 16:54:36 -08:00
Linus Torvalds
aecba2e013 Merge tag 'pm-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management fixes from Rafael Wysocki:
 "These fix issues related to the handling of compressed hibernation
  images and a recent intel_pstate driver regression:

   - Fix issues related to using inadequate data types and incorrect use
     of atomic variables in the compressed hibernation images handling
     code that were introduced during the 6.9 development cycle (Mario
     Limonciello)

   - Move a X86_FEATURE_IDA check from turbo_is_disabled() to the places
     where a new value for MSR_IA32_PERF_CTL is computed in intel_pstate
     to address a regression preventing users from enabling turbo
     frequencies post-boot (Srinivas Pandruvada)"

* tag 'pm-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  cpufreq: intel_pstate: Check IDA only before MSR_IA32_PERF_CTL writes
  PM: hibernate: Fix style issues in save_compressed_image()
  PM: hibernate: Use atomic64_t for compressed_size variable
  PM: hibernate: Emit an error when image writing fails
2025-11-13 16:31:07 -08:00
Baruch Siach
7410c86fc0 MAINTAINERS: Remove eth bridge website
Ethernet bridge website URL shows "This page isn’t available".

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/0a32aaf7fa4473e7574f7327480e8fbc4fef2741.1762946223.git.baruch@tkos.co.il
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-13 16:24:14 -08:00
Linus Torvalds
6a3cc1b749 Merge tag 'acpi-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull ACPI fixes from Rafael Wysocki:
 "These fix issues in the ACPI CPPC library and in the recently added
  parser for the ACPI MRRM table:

   - Limit some checks in the ACPI CPPC library to online CPUs to avoid
     accessing uninitialized per-CPU variables when some CPUs are
     offline to start with, like during boot with 'nosmt=force' (Gautham
     Shenoy)

   - Rework add_boot_memory_ranges() in the ACPI MRRM table parser to
     fix memory leaks and improve error handling (Kaushlendra Kumar)"

* tag 'acpi-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  ACPI: MRRM: Fix memory leaks and improve error handling
  ACPI: CPPC: Limit perf ctrs in PCC check only to online CPUs
  ACPI: CPPC: Perform fast check switch only for online CPUs
  ACPI: CPPC: Check _CPC validity for only the online CPUs
  ACPI: CPPC: Detect preferred core availability on online CPUs
2025-11-13 16:22:36 -08:00
Mykola Kvach
b541452079 arm64: dts: rockchip: fix PCIe 3.3V regulator voltage on orangepi-5
The vcc3v3_pcie20 fixed regulator powers the PCIe device-side 3.3V rail
for pcie2x1l2 via vpcie3v3-supply. The DTS mistakenly set its
regulator-min/max-microvolt to 1800000 (1.8 V). Correct both to 3300000
(3.3 V) to match the rail name, the PCIe/M.2 power requirement, and the
actual hardware wiring on Orange Pi 5.

Fixes: b6bc755d80 ("arm64: dts: rockchip: Add Orange Pi 5")
Cc: stable@vger.kernel.org
Signed-off-by: Mykola Kvach <xakep.amatop@gmail.com>
Reviewed-by: Michael Riesch <michael.riesch@collabora.com>
Link: https://patch.msgid.link/cf6e08dfdfbf1c540685d12388baab1326f95d2c.1762165324.git.xakep.amatop@gmail.com
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-11-14 00:15:33 +01:00
Quentin Schulz
baa18d577c arm64: dts: rockchip: disable HS400 on RK3588 Tiger
We've had reports from the field that some RK3588 Tiger have random
issues with eMMC errors.

Applying commit a28352cf2d ("mmc: sdhci-of-dwcmshc: Change
DLL_STRBIN_TAPNUM_DEFAULT to 0x4") didn't help and seemed to have made
things worse for our board.

Our HW department checked the eMMC lines and reported that they are too
long and don't look great so signal integrity is probably not the best.

Note that not all Tigers with the same eMMC chip have errors, so the
suspicion is that we're really on the edge in terms of signal integrity
and only a handful devices are failing. Additionally, we have RK3588
Jaguars with the same eMMC chip but the layout is different and we also
haven't received reports about those so far.

Lowering the max-frequency to 150MHz from 200MHz instead of simply
disabling HS400 was briefly tested and seem to work as well. We've
disabled HS400 downstream and haven't received reports since so we'll go
with that instead of lowering the max-frequency.

Signed-off-by: Quentin Schulz <quentin.schulz@cherry.de>
Fixes: 6173ef24b3 ("arm64: dts: rockchip: add RK3588-Q7 (Tiger) SoM")
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20251112-tiger-hs200-v1-1-b50adac107c0@cherry.de
[added Fixes tag and stable-cc from 2nd mail]
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-11-14 00:08:14 +01:00
Chukun Pan
264152a97e arm64: dts: rockchip: drop reset from rk3576 i2c9 node
The reset property is not part of the binding, so drop it.
It is also not used by the driver, so it was likely copied
from some vendor-kernel node.

Fixes: 57b1ce9039 ("arm64: dts: rockchip: Add rk3576 SoC base DT")
Signed-off-by: Chukun Pan <amadeus@jmu.edu.cn>
Link: https://patch.msgid.link/20251101140101.302229-1-amadeus@jmu.edu.cn
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-11-13 23:55:53 +01:00
Martin KaFai Lau
91a78ce994 Merge branch 'mptcp-fix-conflicts-between-mptcp-and-sockmap'
Jiayuan Chen says:

====================
mptcp: Fix conflicts between MPTCP and sockmap

Overall, we encountered a warning [1] that can be triggered by running the
selftest I provided.

sockmap works by replacing sk_data_ready, recvmsg, sendmsg operations and
implementing fast socket-level forwarding logic:
1. Users can obtain file descriptors through userspace socket()/accept()
   interfaces, then call BPF syscall to perform these replacements.
2. Users can also use the bpf_sock_hash_update helper (in sockops programs)
   to replace handlers when TCP connections enter ESTABLISHED state
  (BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB/BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB)

However, when combined with MPTCP, an issue arises: MPTCP creates subflow
sk's and performs TCP handshakes, so the BPF program obtains subflow sk's
and may incorrectly replace their sk_prot. We need to reject such
operations. In patch 1, we set psock_update_sk_prot to NULL in the
subflow's custom sk_prot.

Additionally, if the server's listening socket has MPTCP enabled and the
client's TCP also uses MPTCP, we should allow the combination of subflow
and sockmap. This is because the latest Golang programs have enabled MPTCP
for listening sockets by default [2]. For programs already using sockmap,
upgrading Golang should not cause sockmap functionality to fail.

Patch 2 prevents the WARNING from occurring.

Despite these patches fixing stream corruption, users of sockmap must set
GODEBUG=multipathtcp=0 to disable MPTCP until sockmap fully supports it.

[1] truncated warning:
------------[ cut here ]------------
WARNING: CPU: 1 PID: 388 at net/mptcp/protocol.c:68 mptcp_stream_accept+0x34c/0x380
Modules linked in:
RIP: 0010:mptcp_stream_accept+0x34c/0x380
RSP: 0018:ffffc90000cf3cf8 EFLAGS: 00010202
PKRU: 55555554
Call Trace:
 <TASK>
 do_accept+0xeb/0x190
 ? __x64_sys_pselect6+0x61/0x80
 ? _raw_spin_unlock+0x12/0x30
 ? alloc_fd+0x11e/0x190
 __sys_accept4+0x8c/0x100
 __x64_sys_accept+0x1f/0x30
 x64_sys_call+0x202f/0x20f0
 do_syscall_64+0x72/0x9a0
 ? switch_fpu_return+0x60/0xf0
 ? irqentry_exit_to_user_mode+0xdb/0x1e0
 ? irqentry_exit+0x3f/0x50
 ? clear_bhb_loop+0x50/0xa0
 ? clear_bhb_loop+0x50/0xa0
 ? clear_bhb_loop+0x50/0xa0
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
 </TASK>
---[ end trace 0000000000000000 ]---

[2]: https://go-review.googlesource.com/c/go/+/607715
====================

Link: https://patch.msgid.link/20251111060307.194196-1-jiayuan.chen@linux.dev
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2025-11-13 13:18:27 -08:00
Jiayuan Chen
cb730e4ac1 selftests/bpf: Add mptcp test with sockmap
Add test cases to verify that when MPTCP falls back to plain TCP sockets,
they can properly work with sockmap.

Additionally, add test cases to ensure that sockmap correctly rejects
MPTCP sockets as expected.

Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Acked-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251111060307.194196-4-jiayuan.chen@linux.dev
2025-11-13 13:18:25 -08:00
Jiayuan Chen
c77b3b79a9 mptcp: Fix proto fallback detection with BPF
The sockmap feature allows bpf syscall from userspace, or based
on bpf sockops, replacing the sk_prot of sockets during protocol stack
processing with sockmap's custom read/write interfaces.
'''
tcp_rcv_state_process()
  syn_recv_sock()/subflow_syn_recv_sock()
    tcp_init_transfer(BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
      bpf_skops_established       <== sockops
        bpf_sock_map_update(sk)   <== call bpf helper
          tcp_bpf_update_proto()  <== update sk_prot
'''

When the server has MPTCP enabled but the client sends a TCP SYN
without MPTCP, subflow_syn_recv_sock() performs a fallback on the
subflow, replacing the subflow sk's sk_prot with the native sk_prot.
'''
subflow_syn_recv_sock()
  subflow_ulp_fallback()
    subflow_drop_ctx()
      mptcp_subflow_ops_undo_override()
'''

Then, this subflow can be normally used by sockmap, which replaces the
native sk_prot with sockmap's custom sk_prot. The issue occurs when the
user executes accept::mptcp_stream_accept::mptcp_fallback_tcp_ops().
Here, it uses sk->sk_prot to compare with the native sk_prot, but this
is incorrect when sockmap is used, as we may incorrectly set
sk->sk_socket->ops.

This fix uses the more generic sk_family for the comparison instead.

Additionally, this also prevents a WARNING from occurring:

result from ./scripts/decode_stacktrace.sh:
------------[ cut here ]------------
WARNING: CPU: 0 PID: 337 at net/mptcp/protocol.c:68 mptcp_stream_accept \
(net/mptcp/protocol.c:4005)
Modules linked in:
...

PKRU: 55555554
Call Trace:
<TASK>
do_accept (net/socket.c:1989)
__sys_accept4 (net/socket.c:2028 net/socket.c:2057)
__x64_sys_accept (net/socket.c:2067)
x64_sys_call (arch/x86/entry/syscall_64.c:41)
do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
RIP: 0033:0x7f87ac92b83d

---[ end trace 0000000000000000 ]---

Fixes: 0b4f33def7 ("mptcp: fix tcp fallback crash")
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Cc: <stable@vger.kernel.org>
Link: https://patch.msgid.link/20251111060307.194196-3-jiayuan.chen@linux.dev
2025-11-13 13:11:15 -08:00
Ian Rogers
b72b8132d8 perf libbfd: Ensure libbfd is initialized prior to use
Multiple threads may be creating and destroying BFD objects in
situations like `perf top`.

Without appropriate initialization crashes may occur during libbfd's
cache management.

BFD's locks require recursive mutexes, add support for these.

Committer testing:

This happens only when building with 'make BUILD_NONDISTRO=1' and having
the binutils-devel package (or equivalent) installed, i.e. linking with
binutils devel files, an opt-in perf build.

Before:

  root@x1:~# perf top
  perf: Segmentation fault
  -------- backtrace --------
  <SNIP multiple failed attempts at printing a backtrace>
  root@x1:~#

After this patch it works as before.

Closes: https://lore.kernel.org/lkml/aQt66zhfxSA80xwt@gentoo.org/
Fixes: 95931d9a59 ("perf libbfd: Move libbfd functionality to its own file")
Reported-by: Guilherme Amadio <amadio@gentoo.org>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-11-13 17:55:19 -03:00
Ravi Bangoria
3c723f4497 perf test: Fix lock contention test
Couple of independent fixes:

1. Wire in SIGSEGV handler that terminates the test with a failure code.

2. Use "--lock-cgroup" instead of "-g"; "-g" was proposed but never
   merged. See commit 4d1792d0a2 ("perf lock contention: Add
   --lock-cgroup option")

3. Call cleanup() on every normal exit so trap_cleanup() doesn't mistake
   it for an unexpected signal and emit a false-negative "Unexpected
   signal in main" message.

Before patch:

  # ./perf test -vv "lock contention"
   85: kernel lock contention analysis test:
  --- start ---
  test child forked, pid 610711
  Testing perf lock record and perf lock contention
  Testing perf lock contention --use-bpf
  Testing perf lock record and perf lock contention at the same time
  Testing perf lock contention --threads
  Testing perf lock contention --lock-addr
  Testing perf lock contention --lock-cgroup
  Unexpected signal in test_aggr_cgroup
  ---- end(0) ----
   85: kernel lock contention analysis test                            : Ok

After patch:

  # ./perf test -vv "lock contention"
   85: kernel lock contention analysis test:
  --- start ---
  test child forked, pid 602637
  Testing perf lock record and perf lock contention
  Testing perf lock contention --use-bpf
  Testing perf lock record and perf lock contention at the same time
  Testing perf lock contention --threads
  Testing perf lock contention --lock-addr
  Testing perf lock contention --lock-cgroup
  Testing perf lock contention --type-filter (w/ spinlock)
  Testing perf lock contention --lock-filter (w/ tasklist_lock)
  Testing perf lock contention --callstack-filter (w/ unix_stream)
  [Skip] Could not find 'unix_stream'
  Testing perf lock contention --callstack-filter with task aggregation
  [Skip] Could not find 'unix_stream'
  Testing perf lock contention --cgroup-filter
  Testing perf lock contention CSV output
  ---- end(0) ----
   85: kernel lock contention analysis test                            : Ok

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ananth Narayan <ananth.narayan@amd.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Cc: Tycho Andersen <tycho@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-11-13 17:29:00 -03:00
Ravi Bangoria
d0206db94b perf lock: Fix segfault due to missing kernel map
Kernel maps are encoded in PERF_RECORD_MMAP2 samples but "perf lock
report" and "perf lock contention" do not process MMAP2 samples.

Because of that, machine->vmlinux_map stays NULL and any later access
triggers a segmentation fault.

Fix it by adding ->mmap2() callbacks.

Fixes: 53b00ff358 ("perf record: Make --buildid-mmap the default")
Reported-by: Tycho Andersen (AMD) <tycho@kernel.org>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Tested-by: Tycho Andersen (AMD) <tycho@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ananth Narayan <ananth.narayan@amd.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-11-13 17:17:41 -03:00
Arnaldo Carvalho de Melo
84003ab3d0 tools headers UAPI: Sync KVM's vmx.h with the kernel to pick SEAMCALL exit reason
To pick the changes in:

  9d7dfb95da ("KVM: VMX: Inject #UD if guest tries to execute SEAMCALL or TDCALL")

The 'perf kvm-stat' tool uses the exit reasons that are included in the
VMX_EXIT_REASONS define, this new SEAMCALL isn't included there (TDCALL
is), so shouldn't be causing any change in behaviour, this patch ends up
being just addressess the following perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h

Please see tools/include/uapi/README for further details.

Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-11-13 17:16:34 -03:00
Arnaldo Carvalho de Melo
a09e5967ad perf build: Don't fail fast path feature detection when binutils-devel is not available
This is one more remnant of the BUILD_NONDISTRO series to make building
with binutils-devel opt-in due to license incompatibility.

In this case just the references at link time were still in place, which
make building the test-all.bin file fail, which wasn't detected before
probably because the last test was done with binutils-devel available,
doh.

Now:

  $ rpm -q binutils-devel
  package binutils-devel is not installed
  $ file /tmp/build/perf-tools/feature/test-all.bin
  /tmp/build/perf-tools/feature/test-all.bin: ELF 64-bit LSB executable, x86-64, version 1 (SYSV),
  dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2,
  BuildID[sha1]=4b5388a346b51f1b993f0b0dbd49f4570769b03c, for GNU/Linux 3.2.0, not stripped
  $

Fixes: 970ae86307 ("perf build: The bfd features are opt-in, stop testing for them by default")
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-11-13 17:16:34 -03:00
Thomas Falcon
85c894a80a perf header: Write bpf_prog (infos|btfs)_cnt to data file
With commit f0d0f978f3 ("perf header: Don't write empty BPF/BTF
info"), the write_bpf_( prog_info() | btf() ) functions exit without
writing anything if env->bpf_prog.(infos| btfs)_cnt is zero.

process_bpf_( prog_info() | btf() ), however, still expect a "count"
value to exist in the data file. If btf information is empty, for
example, process_bpf_btf will read garbage or some other data as the
number of btf nodes in the data file. As a result, the data file will
not be processed correctly.

Instead, write the count to the data file and exit if it is zero.

Fixes: f0d0f978f3 ("perf header: Don't write empty BPF/BTF info")
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Thomas Falcon <thomas.falcon@intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2025-11-13 17:16:33 -03:00
Rafael J. Wysocki
161284b26f Merge branch 'pm-sleep'
Merge fixes for issues related to the handling of compressed hibernation
images that were introduced during the 6.9 development cycle.

* pm-sleep:
  PM: hibernate: Fix style issues in save_compressed_image()
  PM: hibernate: Use atomic64_t for compressed_size variable
  PM: hibernate: Emit an error when image writing fails
2025-11-13 21:05:46 +01:00
Linus Torvalds
9b9e43704d Merge tag 'slab-for-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab fix from Vlastimil Babka:

 - Fix memory leak of objects from remote NUMA node when bulk freeing to
   a cache with sheaves (Harry Yoo)

* tag 'slab-for-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab:
  mm/slub: fix memory leak in free_to_pcs_bulk()
2025-11-13 11:42:44 -08:00
Rafael J. Wysocki
7564f3543c Merge branches 'acpi-cppc' and 'acpi-tables'
Merge ACPI CPPC library fixes and an ACPI MRRM table parser fix for
6.18-rc6.

* acpi-cppc:
  ACPI: CPPC: Limit perf ctrs in PCC check only to online CPUs
  ACPI: CPPC: Perform fast check switch only for online CPUs
  ACPI: CPPC: Check _CPC validity for only the online CPUs
  ACPI: CPPC: Detect preferred core availability on online CPUs

* acpi-tables:
  ACPI: MRRM: Fix memory leaks and improve error handling
2025-11-13 20:40:51 +01:00
Linus Torvalds
8b4a014e28 Merge tag 'linux_kselftest-fixes-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest
Pull kselftest fix from Shuah Khan:
 "Fixes event-filter-function.tc tracing test failure caused when a
  first run to sample events triggers kmem_cache_free which interferes
  with the rest of the test.

  Fix this by calling sample_events twice to eliminate the
  kmem_cache_free related noise from the sampling"

* tag 'linux_kselftest-fixes-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest:
  selftests/tracing: Run sample events to clear page cache events
2025-11-13 11:37:40 -08:00
Linus Torvalds
d0309c0543 Merge tag 'net-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni:
 "Including fixes from Bluetooth and Wireless. No known outstanding
  regressions.

  Current release - regressions:

   - eth:
      - bonding: fix mii_status when slave is down
      - mlx5e: fix missing error assignment in mlx5e_xfrm_add_state()

  Previous releases - regressions:

   - sched: limit try_bulk_dequeue_skb() batches

   - ipv4: route: prevent rt_bind_exception() from rebinding stale fnhe

   - af_unix: initialise scc_index in unix_add_edge()

   - netpoll: fix incorrect refcount handling causing incorrect cleanup

   - bluetooth: don't hold spin lock over sleeping functions

   - hsr: Fix supervision frame sending on HSRv0

   - sctp: prevent possible shift out-of-bounds

   - tipc: fix use-after-free in tipc_mon_reinit_self().

   - dsa: tag_brcm: do not mark link local traffic as offloaded

   - eth: virtio-net: fix incorrect flags recording in big mode

  Previous releases - always broken:

   - sched: initialize struct tc_ife to fix kernel-infoleak

   - wifi:
      - mac80211: reject address change while connecting
      - iwlwifi: avoid toggling links due to wrong element use

   - bluetooth: cancel mesh send timer when hdev removed

   - strparser: fix signed/unsigned mismatch bug

   - handshake: fix memory leak in tls_handshake_accept()

  Misc:

   - selftests: mptcp: fix some flaky tests"

* tag 'net-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (60 commits)
  hsr: Follow standard for HSRv0 supervision frames
  hsr: Fix supervision frame sending on HSRv0
  virtio-net: fix incorrect flags recording in big mode
  ipv4: route: Prevent rt_bind_exception() from rebinding stale fnhe
  wifi: iwlwifi: mld: always take beacon ies in link grading
  wifi: iwlwifi: mvm: fix beacon template/fixed rate
  wifi: iwlwifi: fix aux ROC time event iterator usage
  net_sched: limit try_bulk_dequeue_skb() batches
  selftests: mptcp: join: properly kill background tasks
  selftests: mptcp: connect: trunc: read all recv data
  selftests: mptcp: join: userspace: longer transfer
  selftests: mptcp: join: endpoints: longer transfer
  selftests: mptcp: join: rm: set backup flag
  selftests: mptcp: connect: fix fallback note due to OoO
  ethtool: fix incorrect kernel-doc style comment in ethtool.h
  mlx5: Fix default values in create CQ
  Bluetooth: btrtl: Avoid loading the config file on security chips
  net/mlx5e: Fix potentially misleading debug message
  net/mlx5e: Fix wraparound in rate limiting for values above 255 Gbps
  net/mlx5e: Fix maxrate wraparound in threshold between units
  ...
2025-11-13 11:20:25 -08:00
Harry Yoo
cbcff934fa mm/slub: fix memory leak in free_to_pcs_bulk()
The commit 989b09b739 ("slab: skip percpu sheaves for remote object
freeing") introduced the remote_objects array in free_to_pcs_bulk() to
skip sheaves when objects from a remote node are freed.

However, the array is flushed only when:
  1) the array becomes full (++remote_nr >= PCS_BATCH_MAX), or
  2) slab_free_hook() returns false and size becomes zero.

When neither of the conditions is met, objects in the array are leaked.
This resulted in a memory leak [1], where 82 GiB of memory was allocated
for the maple_node cache.

Flush the array after successfully freeing objects to sheaves
in the do_free: path.

In the meantime, move the snippet if (!size) goto flush_remote; outside
the while loop for readability. Let's say all objects in the array are
from a remote node: then we acquire s->cpu_sheaves->lock and try to free
an object even when size is zero. This doesn't appear to be harmful,
but isn't really readable.

Reported-by: Tytus Rogalewski <admin@simplepod.ai>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220765 [1]
Closes: https://lore.kernel.org/linux-mm/20251107094809.12e9d705b7bf4815783eb184@linux-foundation.org
Closes: https://lore.kernel.org/all/aRGDTwbt2EIz2CYn@hyeyoo
Fixes: 989b09b739 ("slab: skip percpu sheaves for remote object freeing")
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
Link: https://patch.msgid.link/20251111125331.12246-1-harry.yoo@oracle.com
Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Tested-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Tytus Rogalewski <admin@simplepod.ai>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
2025-11-13 19:56:46 +01:00
Zqiang
a257e97421 sched_ext: Fix possible deadlock in the deferred_irq_workfn()
For PREEMPT_RT=y kernels, the deferred_irq_workfn() is executed in
the per-cpu irq_work/* task context and not disable-irq, if the rq
returned by container_of() is current CPU's rq, the following scenarios
may occur:

lock(&rq->__lock);
<Interrupt>
  lock(&rq->__lock);

This commit use IRQ_WORK_INIT_HARD() to replace init_irq_work() to
initialize rq->scx.deferred_irq_work, make the deferred_irq_workfn()
is always invoked in hard-irq context.

Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Tejun Heo <tj@kernel.org>
2025-11-13 08:29:28 -10:00
Jiayuan Chen
fbade4bd08 mptcp: Disallow MPTCP subflows from sockmap
The sockmap feature allows bpf syscall from userspace, or based on bpf
sockops, replacing the sk_prot of sockets during protocol stack processing
with sockmap's custom read/write interfaces.
'''
tcp_rcv_state_process()
  subflow_syn_recv_sock()
    tcp_init_transfer(BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
      bpf_skops_established       <== sockops
        bpf_sock_map_update(sk)   <== call bpf helper
          tcp_bpf_update_proto()  <== update sk_prot
'''
Consider two scenarios:

1. When the server has MPTCP enabled and the client also requests MPTCP,
   the sk passed to the BPF program is a subflow sk. Since subflows only
   handle partial data, replacing their sk_prot is meaningless and will
   cause traffic disruption.

2. When the server has MPTCP enabled but the client sends a TCP SYN
   without MPTCP, subflow_syn_recv_sock() performs a fallback on the
   subflow, replacing the subflow sk's sk_prot with the native sk_prot.
   '''
   subflow_ulp_fallback()
    subflow_drop_ctx()
      mptcp_subflow_ops_undo_override()
   '''
   Subsequently, accept::mptcp_stream_accept::mptcp_fallback_tcp_ops()
   converts the subflow to plain TCP.

For the first case, we should prevent it from being combined with sockmap
by setting sk_prot->psock_update_sk_prot to NULL, which will be blocked by
sockmap's own flow.

For the second case, since subflow_syn_recv_sock() has already restored
sk_prot to native tcp_prot/tcpv6_prot, no further action is needed.

Fixes: cec37a6e41 ("mptcp: Handle MP_CAPABLE options for outgoing connections")
Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Cc: <stable@vger.kernel.org>
Link: https://patch.msgid.link/20251111060307.194196-2-jiayuan.chen@linux.dev
2025-11-13 09:15:41 -08:00
Kiryl Shutsemau
0a8fb03fe7 MAINTAINERS: Update name spelling
Use transliteration from the Belarusian language instead of Russian.

Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://patch.msgid.link/20251113121006.651992-1-kas%40kernel.org
2025-11-13 07:58:47 -08:00
Andrew Donnellan
ebd4469e7a entry: Fix ifndef around arch_xfer_to_guest_mode_handle_work() stub
The stub implementation of arch_xfer_to_guest_mode_handle_work() is
guarded by an #ifndef that incorrectly checks for the name
arch_xfer_to_guest_mode_work instead. It seems the function was renamed
to add "_handle" as a late change to the original patch, and the #ifndef
wasn't updated to go with it.

Change the #ifndef to match the name of the function. No users right now,
so no need to update any architecture code.

Fixes: 935ace2fb5 ("entry: Provide infrastructure for work before transitioning to guest mode")
Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://patch.msgid.link/20251105-entry-fix-ifndef-v1-1-d8d28045b627@linux.ibm.com
2025-11-13 16:27:56 +01:00
Paolo Abeni
94909c53e4 Merge branch 'hsr-send-correct-hsrv0-supervision-frames'
Felix Maurer says:

====================
hsr: Send correct HSRv0 supervision frames

Hangbin recently reported that the hsr selftests were failing and noted
that the entries in the node table were not merged, i.e., had
00:00:00:00:00:00 as MacAddressB forever [1].

This failure only occured with HSRv0 because it was not sending
supervision frames anymore. While debugging this I found that we were
not really following the HSRv0 standard for the supervision frames we
sent, so I additionally made a few changes to get closer to the standard
and restore a more correct behavior we had a while ago.

The selftests can still fail because they take a while and run into the
timeout. I did not include a change of the timeout because I have more
improvements to the selftests mostly ready that change the test duration
but are net-next material.

[1]: https://lore.kernel.org/netdev/aMONxDXkzBZZRfE5@fedora/
====================

Link: https://patch.msgid.link/cover.1762876095.git.fmaurer@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-13 15:55:06 +01:00
Felix Maurer
b2c26c82f7 hsr: Follow standard for HSRv0 supervision frames
For HSRv0, the path_id has the following meaning:
- 0000: PRP supervision frame
- 0001-1001: HSR ring identifier
- 1010-1011: Frames from PRP network (A/B, with RedBoxes)
- 1111: HSR supervision frame

Follow the IEC 62439-3:2010 standard more closely by setting the right
path_id for HSRv0 supervision frames (actually, it is correctly set when
the frame is constructed, but hsr_set_path_id() overwrites it) and set a
fixed HSR ring identifier of 1. The ring identifier seems to be generally
unused and we ignore it anyways on reception, but some fixed identifier is
definitely better than using one identifier in one direction and a wrong
identifier in the other.

This was also the behavior before commit f266a683a4 ("net/hsr: Better
frame dispatch") which introduced the alternating path_id. This was later
moved to hsr_set_path_id() in commit 451d8123f8 ("net: prp: add packet
handling support").

The IEC 62439-3:2010 also contains 6 unused bytes after the MacAddressA in
the HSRv0 supervision frames. Adjust a TODO comment accordingly.

Fixes: f266a683a4 ("net/hsr: Better frame dispatch")
Fixes: 451d8123f8 ("net: prp: add packet handling support")
Signed-off-by: Felix Maurer <fmaurer@redhat.com>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/ea0d5133cd593856b2fa673d6e2067bf1d4d1794.1762876095.git.fmaurer@redhat.com
Tested-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-13 15:55:04 +01:00
Felix Maurer
96a3a03abf hsr: Fix supervision frame sending on HSRv0
On HSRv0, no supervision frames were sent. The supervison frames were
generated successfully, but failed the check for a sufficiently long mac
header, i.e., at least sizeof(struct hsr_ethhdr), in hsr_fill_frame_info()
because the mac header only contained the ethernet header.

Fix this by including the HSR header in the mac header when generating HSR
supervision frames. Note that the mac header now also includes the TLV
fields. This matches how we set the headers on rx and also the size of
struct hsrv0_ethhdr_sp.

Reported-by: Hangbin Liu <liuhangbin@gmail.com>
Closes: https://lore.kernel.org/netdev/aMONxDXkzBZZRfE5@fedora/
Fixes: 9cfb5e7f0d ("net: hsr: fix hsr_init_sk() vs network/transport headers.")
Signed-off-by: Felix Maurer <fmaurer@redhat.com>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/4354114fea9a642fe71f49aeeb6c6159d1d61840.1762876095.git.fmaurer@redhat.com
Tested-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-13 15:55:04 +01:00
Randy Dunlap
0a4a18e888 drm/client: fix MODULE_PARM_DESC string for "active"
The MODULE_PARM_DESC string for the "active" parameter is missing a
space and has an extraneous trailing ']' character. Correct these.

Before patch:
$ modinfo -p ./drm_client_lib.ko
active:Choose which drm client to start, default isfbdev] (string)

After patch:
$ modinfo -p ./drm_client_lib.ko
active:Choose which drm client to start, default is fbdev (string)

Fixes: f7b42442c4 ("drm/log: Introduce a new boot logger to draw the kmsg on the screen")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Jocelyn Falempe <jfalempe@redhat.com>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patch.msgid.link/20251112010920.2355712-1-rdunlap@infradead.org
2025-11-13 14:15:24 +01:00
Linus Torvalds
2ccec59446 Merge tag 'erofs-for-6.18-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs fixes from Gao Xiang:

 - Add Chunhai Guo as a EROFS reviewer to get more eyes from interested
   industry vendors

 - Fix infinite loop caused by incomplete crafted zstd-compressed data
   (thanks to Robert again!)

* tag 'erofs-for-6.18-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
  erofs: avoid infinite loop due to incomplete zstd-compressed data
  MAINTAINERS: erofs: add myself as reviewer
2025-11-13 05:02:59 -08:00
Linus Torvalds
967a72fa7f Merge tag 'v6.18-rc5-smb-server-fixes' of git://git.samba.org/ksmbd
Pull smb server fixes from Steve French:

 - Fix smbdirect (RDMA) disconnect hang bug

 - Fix potential Denial of Service when connection limit exceeded

 - Fix smbdirect (RDMA) connection (potentially accessing freed memory)
   bug

* tag 'v6.18-rc5-smb-server-fixes' of git://git.samba.org/ksmbd:
  smb: server: let smb_direct_disconnect_rdma_connection() turn CREATED into DISCONNECTED
  ksmbd: close accepted socket when per-IP limit rejects connection
  smb: server: rdma: avoid unmapping posted recv on accept failure
2025-11-13 04:57:38 -08:00
Shawn Lin
921b3f59b7 PCI/ASPM: Avoid L0s and L1 on Hi1105 [19e5:1105] Wi-Fi
This Wi-Fi advertises the L0s and L1 capabilities but actually it doesn't
support them. This is confirmed by HiSilicon team in actual productization.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/1762916319-139532-1-git-send-email-shawn.lin@rock-chips.com
2025-11-13 06:17:23 -06:00
Xuan Zhuo
0eff2eaa53 virtio-net: fix incorrect flags recording in big mode
The purpose of commit 703eec1b24 ("virtio_net: fixing XDP for fully
checksummed packets handling") is to record the flags in advance, as
their value may be overwritten in the XDP case. However, the flags
recorded under big mode are incorrect, because in big mode, the passed
buf does not point to the rx buffer, but rather to the page of the
submitted buffer. This commit fixes this issue.

For the small mode, the commit c11a49d58a ("virtio_net: Fix mismatched
buf address when unmapping for small packets") fixed it.

Tested-by: Alyssa Ross <hi@alyssa.is>
Fixes: 703eec1b24 ("virtio_net: fixing XDP for fully checksummed packets handling")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://patch.msgid.link/20251111090828.23186-1-xuanzhuo@linux.alibaba.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-13 13:16:30 +01:00
Linus Torvalds
6fa9041b71 Merge tag 'nfsd-6.18-3' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd fixes from Chuck Lever:
 "Address recently reported issues or issues found at the recent NFS
  bake-a-thon held in Raleigh, NC.

  Issues reported with v6.18-rc:
   - Address a kernel build issue
   - Reorder SEQUENCE processing to avoid spurious NFS4ERR_SEQ_MISORDERED

  Issues that need expedient stable backports:
   - Close a refcount leak exposure
   - Report support for NFSv4.2 CLONE correctly
   - Fix oops during COPY_NOTIFY processing
   - Prevent rare crash after XDR encoding failure
   - Prevent crash due to confused or malicious NFSv4.1 client"

* tag 'nfsd-6.18-3' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux:
  Revert "SUNRPC: Make RPCSEC_GSS_KRB5 select CRYPTO instead of depending on it"
  nfsd: ensure SEQUENCE replay sends a valid reply.
  NFSD: Never cache a COMPOUND when the SEQUENCE operation fails
  NFSD: Skip close replay processing if XDR encoding fails
  NFSD: free copynotify stateid in nfs4_free_ol_stateid()
  nfsd: add missing FATTR4_WORD2_CLONE_BLKSIZE from supported attributes
  nfsd: fix refcount leak in nfsd_set_fh_dentry()
2025-11-12 18:41:01 -08:00
Linus Torvalds
92385a075a Merge tag 'dma-mapping-6.18-2025-11-12' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux
Pull dma-mapping fixes from Marek Szyprowski:

 - two minor fixes for DMA API infrastructure: restoring proper
   structure padding used in benchmark tests (Qinxin Xia) and global
   DMA_BIT_MASK macro rework to make it a bit more clang friendly (James
   Clark)

* tag 'dma-mapping-6.18-2025-11-12' of git://git.kernel.org/pub/scm/linux/kernel/git/mszyprowski/linux:
  dma-mapping: Allow use of DMA_BIT_MASK(64) in global scope
  dma-mapping: benchmark: Restore padding to ensure uABI remained consistent
2025-11-12 18:31:22 -08:00
Linus Torvalds
e927c520e1 Merge tag 'loongarch-fixes-6.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
Pull LoongArch fixes from Huacai Chen:

 - Fix a Rust build error

 - Fix exception/interrupt, memory management, perf event, hardware
   breakpoint, kexec and KVM bugs

* tag 'loongarch-fixes-6.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson:
  LoongArch: KVM: Fix max supported vCPUs set with EIOINTC
  LoongArch: KVM: Skip PMU checking on vCPU context switch
  LoongArch: KVM: Restore guest PMU if it is enabled
  LoongArch: KVM: Add delay until timer interrupt injected
  LoongArch: KVM: Set page with write attribute if dirty track disabled
  LoongArch: kexec: Print out debugging message if required
  LoongArch: kexec: Initialize the kexec_buf structure
  LoongArch: Use correct accessor to read FWPC/MWPC
  LoongArch: Refine the init_hw_perf_events() function
  LoongArch: Remove __GFP_HIGHMEM masking in pud_alloc_one()
  LoongArch: Let {pte,pmd}_modify() record the status of _PAGE_DIRTY
  LoongArch: Consolidate max_pfn & max_low_pfn calculation
  LoongArch: Consolidate early_ioremap()/ioremap_prot()
  LoongArch: Use physical addresses for CSR_MERRENTRY/CSR_TLBRENTRY
  LoongArch: Clarify 3 MSG interrupt features
  rust: Add -fno-isolate-erroneous-paths-dereference to bindgen_skip_c_flags
2025-11-12 18:21:30 -08:00
Linus Torvalds
89ee862a4d Merge tag 'alpha-fixes-v6.18-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/mattst88/alpha
Pull alpha fix from Matt Turner:
 "Add Magnus as a maintainer of the alpha port"

* tag 'alpha-fixes-v6.18-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/mattst88/alpha:
  MAINTAINERS: Add Magnus Lindholm as maintainer for alpha port
2025-11-12 18:18:12 -08:00
Bjorn Helgaas
823576c894 PCI/ASPM: Avoid L0s and L1 on PA Semi [1959:a002] Root Ports
Christian reported that f3ac2ff148 ("PCI/ASPM: Enable all ClockPM and
ASPM states for devicetree platforms") broke booting on the A-EON AmigaOne
X1000.

Override the L0s and L1 Support advertised in Link Capabilities by the
X1000 Root Ports ([1959:a002]) so we don't try to enable those states.

Fixes: f3ac2ff148 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms")
Fixes: df5192d9bb ("PCI/ASPM: Enable only L0s and L1 for devicetree platforms")
Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Link: https://lore.kernel.org/r/a41d2ca1-fcd9-c416-b111-a958e92e94bf@xenosoft.de
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2025-11-12 18:51:39 -06:00
Bjorn Helgaas
5b40a5080c PCI/ASPM: Avoid L0s and L1 on Freescale [1957:0451] Root Ports
Christian reported that f3ac2ff148 ("PCI/ASPM: Enable all ClockPM and
ASPM states for devicetree platforms") broke booting on the A-EON X5000.

Override the L0s and L1 Support advertised in Link Capabilities by the
X5000 Root Ports ([1957:0451]) so we don't try to enable those states.

Fixes: f3ac2ff148 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms")
Fixes: df5192d9bb ("PCI/ASPM: Enable only L0s and L1 for devicetree platforms")
Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Link: https://lore.kernel.org/r/db5c95a1-cf3e-46f9-8045-a1b04908051a@xenosoft.de
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Shawn Lin <shawn.lin@rock-chips.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Link: https://patch.msgid.link/20251110222929.2140564-5-helgaas@kernel.org
2025-11-12 18:51:39 -06:00
Bjorn Helgaas
30579eebba PCI/ASPM: Convert quirks to override advertised link states
Existing quirks to disable ASPM L0s and L1 use pci_disable_link_state(),
which disables ASPM states and prevents their use in the future.  But since
they are FINAL quirks, they happen after ASPM has already been enabled.
Here's a typical call path:

  pci_host_probe
    pci_scan_root_bus_bridge
      pci_scan_child_bus
        pci_scan_slot
          pci_scan_single_device
            pci_device_add
              pci_fixup_device(pci_fixup_header)  # HEADER quirks
          pcie_aspm_init_link_state
            pcie_config_aspm_path
              pcie_config_aspm_link
                pcie_config_aspm_dev              # ASPM may be enabled
    pci_bus_add_devices
      pci_bus_add_devices
        pci_fixup_device(pci_fixup_final)         # FINAL quirks
          quirk_disable_aspm_l0s
            pci_disable_link_state(dev, PCIE_LINK_STATE_L0S)

Sometimes enabling ASPM can make the link non-functional, so if we know
ASPM is broken on a device, we shouldn't enable it at all, even
temporarily.

Convert the existing quirks to use pcie_aspm_remove_cap() instead, which
overrides the ASPM Support advertised in PCIe Link Capabilities, and make
them HEADER quirks so they run before pcie_aspm_init_link_state() has a
chance to enable ASPM.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Shawn Lin <shawn.lin@rock-chips.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Link: https://patch.msgid.link/20251110222929.2140564-4-helgaas@kernel.org
2025-11-12 18:51:39 -06:00
Bjorn Helgaas
575b98e39d PCI/ASPM: Add pcie_aspm_remove_cap() to override advertised link states
Add pcie_aspm_remove_cap().  A quirk can use this to prevent use of ASPM
L0s or L1 link states, even if the device advertised support for them.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Shawn Lin <shawn.lin@rock-chips.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Link: https://patch.msgid.link/20251110222929.2140564-3-helgaas@kernel.org
2025-11-12 18:51:27 -06:00
Bjorn Helgaas
4495bffd86 PCI/ASPM: Cache L0s/L1 Supported so advertised link states can be overridden
Defective devices sometimes advertise support for ASPM L0s or L1 states
even if they don't work correctly.

Cache the L0s Supported and L1 Supported bits early in enumeration so
HEADER quirks can override the ASPM states advertised in Link Capabilities
before pcie_aspm_cap_init() enables ASPM.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Shawn Lin <shawn.lin@rock-chips.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Link: https://patch.msgid.link/20251110222929.2140564-2-helgaas@kernel.org
2025-11-12 18:47:16 -06:00
Haotian Zhang
360b3730f8 ASoC: rsnd: fix OF node reference leak in rsnd_ssiu_probe()
rsnd_ssiu_probe() leaks an OF node reference obtained by
rsnd_ssiu_of_node(). The node reference is acquired but
never released across all return paths.

Fix it by declaring the device node with the __free(device_node)
cleanup construct to ensure automatic release when the variable goes
out of scope.

Fixes: 4e7788fb80 ("ASoC: rsnd: add SSIU BUSIF support")
Signed-off-by: Haotian Zhang <vulab@iscas.ac.cn>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://patch.msgid.link/20251112065709.1522-1-vulab@iscas.ac.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-13 00:36:01 +00:00
Dave Jiang
214291cbaa acpi/hmat: Fix lockdep warning for hmem_register_resource()
The following lockdep splat was observed while kernel auto-online a CXL
memory region:

======================================================
WARNING: possible circular locking dependency detected
6.17.0djtest+ #53 Tainted: G        W
------------------------------------------------------
systemd-udevd/3334 is trying to acquire lock:
ffffffff90346188 (hmem_resource_lock){+.+.}-{4:4}, at: hmem_register_resource+0x31/0x50

but task is already holding lock:
ffffffff90338890 ((node_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x2e/0x70

which lock already depends on the new lock.
[..]
Chain exists of:
  hmem_resource_lock --> mem_hotplug_lock --> (node_chain).rwsem

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  rlock((node_chain).rwsem);
                               lock(mem_hotplug_lock);
                               lock((node_chain).rwsem);
  lock(hmem_resource_lock);

The lock ordering can cause potential deadlock. There are instances
where hmem_resource_lock is taken after (node_chain).rwsem, and vice
versa.

Split out the target update section of hmat_register_target() so that
hmat_callback() only envokes that section instead of attempt to register
hmem devices that it does not need to.

[ dj: Fix up comment to be closer to 80cols. (Jonathan) ]

Fixes: cf8741ac57 ("ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device")
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Reviewed-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20251105235115.85062-3-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-11-12 14:47:55 -07:00
Cryolitia PukNgae
c55a8e24cd hwmon: (gpd-fan) initialize EC on driver load for Win 4
The original implement will re-init the EC when it reports a zero
value, and it's a workaround for the black box buggy firmware.

Now a contributer test and report that, the bug is that, the firmware
won't initialize the EC on boot, so the EC ramains in unusable status.
And it won't need to re-init it during runtime. The original implement
is not perfect, any write command will be ignored until we first read
it. Just re-init it unconditionally when the driver load could work.

Fixes: 0ab88e2394 ("hwmon: add GPD devices sensor driver")
Co-developed-by: kylon <3252255+kylon@users.noreply.github.com>
Signed-off-by: kylon <3252255+kylon@users.noreply.github.com>
Link: https://github.com/Cryolitia/gpd-fan-driver/pull/20
Signed-off-by: Cryolitia PukNgae <cryolitia@uniontech.com>
Link: https://lore.kernel.org/r/20251030-win4-v1-1-c374dcb86985@uniontech.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2025-11-12 11:54:37 -08:00
Gopi Krishna Menon
9efb297c52 hwmon: (gpd-fan) Fix compilation error in non-ACPI builds
Building gpd-fan driver without CONFIG_ACPI results in the following
build errors:

drivers/hwmon/gpd-fan.c: In function ‘gpd_ecram_read’:
drivers/hwmon/gpd-fan.c:228:9: error: implicit declaration of function ‘outb’ [-Werror=implicit-function-declaration]
  228 |         outb(0x2E, addr_port);
      |         ^~~~
drivers/hwmon/gpd-fan.c:241:16: error: implicit declaration of function ‘inb’ [-Werror=implicit-function-declaration]
  241 |         *val = inb(data_port);

The definitions for inb() and outb() come from <linux/io.h>
(specifically through <asm/io.h>), which is implicitly included via
<acpi_io.h>. When CONFIG_ACPI is not set, <acpi_io.h> is not included
resulting in <linux/io.h> to be omitted as well.

Since the driver does not depend on ACPI, remove <linux/acpi.h> and add
<linux/io.h> directly to fix the compilation errors.

Signed-off-by: Gopi Krishna Menon <krishnagopi487@gmail.com>
Link: https://lore.kernel.org/r/20251024202042.752160-1-krishnagopi487@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
2025-11-12 11:54:37 -08:00
Emil Tsalapatis
c87488a123 sched/ext: convert scx_tasks_lock to raw spinlock
Update scx_task_locks so that it's safe to lock/unlock in a
non-sleepable context in PREEMPT_RT kernels. scx_task_locks is
(non-raw) spinlock used to protect the list of tasks under SCX.
This list is updated during from finish_task_switch(), which
cannot sleep. Regular spinlocks can be locked in such a context
in non-RT kernels, but are sleepable under when CONFIG_PREEMPT_RT=y.

Convert scx_task_locks into a raw spinlock, which is not sleepable
even on RT kernels.

Sample backtrace:

<TASK>
dump_stack_lvl+0x83/0xa0
__might_resched+0x14a/0x200
rt_spin_lock+0x61/0x1c0
? sched_ext_dead+0x2d/0xf0
? lock_release+0xc6/0x280
sched_ext_dead+0x2d/0xf0
? srso_alias_return_thunk+0x5/0xfbef5
finish_task_switch.isra.0+0x254/0x360
__schedule+0x584/0x11d0
? srso_alias_return_thunk+0x5/0xfbef5
? srso_alias_return_thunk+0x5/0xfbef5
? tick_nohz_idle_exit+0x7e/0x120
schedule_idle+0x23/0x40
cpu_startup_entry+0x29/0x30
start_secondary+0xf8/0x100
common_startup_64+0x13e/0x148
</TASK>

Signed-off-by: Emil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2025-11-12 08:42:02 -10:00
Jakub Kicinski
fe82c4f8a2 Merge tag 'wireless-2025-11-12' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless
Johannes Berg says:

====================
Couple more fixes:
 - mwl8k: work around FW expecting a DSSS element in beacons
 - ath11k: report correct TX status
 - iwlwifi: avoid toggling links due to wrong element use
 - iwlwifi: fix beacon template rate on older devices
 - iwlwifi: fix loop iterator being used after loop
 - mac80211: disallow address changes while using the address
 - mac80211: avoid bad rate warning in monitor/sniffer mode
 - hwsim: fix potential NULL deref (on monitor injection)

* tag 'wireless-2025-11-12' of https://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless:
  wifi: iwlwifi: mld: always take beacon ies in link grading
  wifi: iwlwifi: mvm: fix beacon template/fixed rate
  wifi: iwlwifi: fix aux ROC time event iterator usage
  wifi: mwl8k: inject DSSS Parameter Set element into beacons if missing
  wifi: mac80211_hwsim: Fix possible NULL dereference
  wifi: mac80211: skip rate verification for not captured PSDUs
  wifi: mac80211: reject address change while connecting
  wifi: ath11k: zero init info->status in wmi_process_mgmt_tx_comp()
====================

Link: https://patch.msgid.link/20251112114621.15716-5-johannes@sipsolutions.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-12 09:33:09 -08:00
Srinivas Pandruvada
4b747cc628 cpufreq: intel_pstate: Check IDA only before MSR_IA32_PERF_CTL writes
Commit ac4e04d9e3 ("cpufreq: intel_pstate: Unchecked MSR aceess in
legacy mode") introduced a check for feature X86_FEATURE_IDA to verify
turbo mode support. Although this is the correct way to check for turbo
mode support, it causes issues on some platforms that disable turbo
during OS boot, but enable it later [1]. Before adding this feature
check, users were able to get turbo mode frequencies by writing 0 to
/sys/devices/system/cpu/intel_pstate/no_turbo post-boot.

To restore the old behavior on the affected systems while still
addressing the unchecked MSR issue on some Skylake-X systems, check
X86_FEATURE_IDA only immediately before updates of MSR_IA32_PERF_CTL
that may involve setting the Turbo Engage Bit (bit 32).

Fixes: ac4e04d9e3 ("cpufreq: intel_pstate: Unchecked MSR aceess in legacy mode")
Reported-by: Aaron Rainbolt <arainbolt@kfocus.org>
Closes: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2122531 [1]
Tested-by: Aaron Rainbolt <arainbolt@kfocus.org>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
[ rjw: Subject adjustment, changelog edits ]
Link: https://patch.msgid.link/20251111010840.141490-1-srinivas.pandruvada@linux.intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2025-11-12 17:59:37 +01:00
Zqiang
5f02151c41 sched_ext: Fix unsafe locking in the scx_dump_state()
For built with CONFIG_PREEMPT_RT=y kernels, the dump_lock will be converted
sleepable spinlock and not disable-irq, so the following scenarios occur:

inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage.
irq_work/0/27 [HC0[0]:SC0[0]:HE1:SE1] takes:
(&rq->__lock){?...}-{2:2}, at: raw_spin_rq_lock_nested+0x2b/0x40
{IN-HARDIRQ-W} state was registered at:
   lock_acquire+0x1e1/0x510
   _raw_spin_lock_nested+0x42/0x80
   raw_spin_rq_lock_nested+0x2b/0x40
   sched_tick+0xae/0x7b0
   update_process_times+0x14c/0x1b0
   tick_periodic+0x62/0x1f0
   tick_handle_periodic+0x48/0xf0
   timer_interrupt+0x55/0x80
   __handle_irq_event_percpu+0x20a/0x5c0
   handle_irq_event_percpu+0x18/0xc0
   handle_irq_event+0xb5/0x150
   handle_level_irq+0x220/0x460
   __common_interrupt+0xa2/0x1e0
   common_interrupt+0xb0/0xd0
   asm_common_interrupt+0x2b/0x40
   _raw_spin_unlock_irqrestore+0x45/0x80
   __setup_irq+0xc34/0x1a30
   request_threaded_irq+0x214/0x2f0
   hpet_time_init+0x3e/0x60
   x86_late_time_init+0x5b/0xb0
   start_kernel+0x308/0x410
   x86_64_start_reservations+0x1c/0x30
   x86_64_start_kernel+0x96/0xa0
   common_startup_64+0x13e/0x148

 other info that might help us debug this:
 Possible unsafe locking scenario:

        CPU0
        ----
   lock(&rq->__lock);
   <Interrupt>
     lock(&rq->__lock);

  *** DEADLOCK ***

 stack backtrace:
 CPU: 0 UID: 0 PID: 27 Comm: irq_work/0
 Call Trace:
  <TASK>
  dump_stack_lvl+0x8c/0xd0
  dump_stack+0x14/0x20
  print_usage_bug+0x42e/0x690
  mark_lock.part.44+0x867/0xa70
  ? __pfx_mark_lock.part.44+0x10/0x10
  ? string_nocheck+0x19c/0x310
  ? number+0x739/0x9f0
  ? __pfx_string_nocheck+0x10/0x10
  ? __pfx_check_pointer+0x10/0x10
  ? kvm_sched_clock_read+0x15/0x30
  ? sched_clock_noinstr+0xd/0x20
  ? local_clock_noinstr+0x1c/0xe0
  __lock_acquire+0xc4b/0x62b0
  ? __pfx_format_decode+0x10/0x10
  ? __pfx_string+0x10/0x10
  ? __pfx___lock_acquire+0x10/0x10
  ? __pfx_vsnprintf+0x10/0x10
  lock_acquire+0x1e1/0x510
  ? raw_spin_rq_lock_nested+0x2b/0x40
  ? __pfx_lock_acquire+0x10/0x10
  ? dump_line+0x12e/0x270
  ? raw_spin_rq_lock_nested+0x20/0x40
  _raw_spin_lock_nested+0x42/0x80
  ? raw_spin_rq_lock_nested+0x2b/0x40
  raw_spin_rq_lock_nested+0x2b/0x40
  scx_dump_state+0x3b3/0x1270
  ? finish_task_switch+0x27e/0x840
  scx_ops_error_irq_workfn+0x67/0x80
  irq_work_single+0x113/0x260
  irq_work_run_list.part.3+0x44/0x70
  run_irq_workd+0x6b/0x90
  ? __pfx_run_irq_workd+0x10/0x10
  smpboot_thread_fn+0x529/0x870
  ? __pfx_smpboot_thread_fn+0x10/0x10
  kthread+0x305/0x3f0
  ? __pfx_kthread+0x10/0x10
  ret_from_fork+0x40/0x70
  ? __pfx_kthread+0x10/0x10
  ret_from_fork_asm+0x1a/0x30
  </TASK>

This commit therefore use rq_lock_irqsave/irqrestore() to replace
rq_lock/unlock() in the scx_dump_state().

Fixes: 07814a9439 ("sched_ext: Print debug dump after an error exit")
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Signed-off-by: Tejun Heo <tj@kernel.org>
2025-11-12 06:28:32 -10:00
Caleb Sander Mateos
2d0e88f3fd io_uring/rsrc: don't use blk_rq_nr_phys_segments() as number of bvecs
io_buffer_register_bvec() currently uses blk_rq_nr_phys_segments() as
the number of bvecs in the request. However, bvecs may be split into
multiple segments depending on the queue limits. Thus, the number of
segments may overestimate the number of bvecs. For ublk devices, the
only current users of io_buffer_register_bvec(), virt_boundary_mask,
seg_boundary_mask, max_segments, and max_segment_size can all be set
arbitrarily by the ublk server process.
Set imu->nr_bvecs based on the number of bvecs the rq_for_each_bvec()
loop actually yields. However, continue using blk_rq_nr_phys_segments()
as an upper bound on the number of bvecs when allocating imu to avoid
needing to iterate the bvecs a second time.

Link: https://lore.kernel.org/io-uring/20251111191530.1268875-1-csander@purestorage.com/
Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Fixes: 27cb27b6d5 ("io_uring: add support for kernel registered bvecs")
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2025-11-12 08:25:33 -07:00
Alex Mastro
d323ad7396 vfio: selftests: replace iova=vaddr with allocated iovas
vfio_dma_mapping_test and vfio_pci_driver_test currently use iova=vaddr
as part of DMA mapping operations. However, not all IOMMUs support the
same virtual address width as the processor. For instance, older Intel
consumer platforms only support 39-bits of IOMMU address space. On such
platforms, using the virtual address as the IOVA fails.

Make the tests more robust by using iova_allocator to vend IOVAs, which
queries legally accessible IOVAs from the underlying IOMMUFD or VFIO
container.

Reviewed-by: David Matlack <dmatlack@google.com>
Tested-by: David Matlack <dmatlack@google.com>
Signed-off-by: Alex Mastro <amastro@fb.com>
Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-4-7960244642c5@fb.com
Signed-off-by: Alex Williamson <alex@shazbot.org>
2025-11-12 08:04:42 -07:00
Alex Mastro
ce0e3c403e vfio: selftests: add iova allocator
Add struct iova_allocator, which gives tests a convenient way to generate
legally-accessible IOVAs to map. This allocator traverses the sorted
available IOVA ranges linearly, requires power-of-two size allocations,
and does not support freeing iova allocations. The assumption is that
tests are not IOVA space-bounded, and will not need to recycle IOVAs.

This is based on Alex Williamson's patch series for adding an IOVA
allocator [1].

[1] https://lore.kernel.org/all/20251108212954.26477-1-alex@shazbot.org/

Reviewed-by: David Matlack <dmatlack@google.com>
Tested-by: David Matlack <dmatlack@google.com>
Signed-off-by: Alex Mastro <amastro@fb.com>
Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-3-7960244642c5@fb.com
Signed-off-by: Alex Williamson <alex@shazbot.org>
2025-11-12 08:04:42 -07:00
Alex Mastro
a77fa0b922 vfio: selftests: fix map limit tests to use last available iova
Use the newly available vfio_pci_iova_ranges() to determine the last
legal IOVA, and use this as the basis for vfio_dma_map_limit_test tests.

Fixes: de8d1f2fd5 ("vfio: selftests: add end of address space DMA map/unmap tests")
Reviewed-by: David Matlack <dmatlack@google.com>
Tested-by: David Matlack <dmatlack@google.com>
Signed-off-by: Alex Mastro <amastro@fb.com>
Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-2-7960244642c5@fb.com
Signed-off-by: Alex Williamson <alex@shazbot.org>
2025-11-12 08:04:42 -07:00
Alex Mastro
7c44656ab3 vfio: selftests: add iova range query helpers
VFIO selftests need to map IOVAs from legally accessible ranges, which
could vary between hardware. Tests in vfio_dma_mapping_test.c are making
excessively strong assumptions about which IOVAs can be mapped.

Add vfio_iommu_iova_ranges(), which queries IOVA ranges from the
IOMMUFD or VFIO container associated with the device. The queried ranges
are normalized to IOMMUFD's iommu_iova_range representation so that
handling of IOVA ranges up the stack can be implementation-agnostic.
iommu_iova_range and vfio_iova_range are equivalent, so bias to using the
new interface's struct.

Query IOMMUFD's ranges with IOMMU_IOAS_IOVA_RANGES.
Query VFIO container's ranges with VFIO_IOMMU_GET_INFO and
VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE.

The underlying vfio_iommu_type1_info buffer-related functionality has
been kept generic so the same helpers can be used to query other
capability chain information, if needed.

Reviewed-by: David Matlack <dmatlack@google.com>
Tested-by: David Matlack <dmatlack@google.com>
Signed-off-by: Alex Mastro <amastro@fb.com>
Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-1-7960244642c5@fb.com
Signed-off-by: Alex Williamson <alex@shazbot.org>
2025-11-12 08:04:42 -07:00
Chuang Wang
ac1499fcd4 ipv4: route: Prevent rt_bind_exception() from rebinding stale fnhe
The sit driver's packet transmission path calls: sit_tunnel_xmit() ->
update_or_create_fnhe(), which lead to fnhe_remove_oldest() being called
to delete entries exceeding FNHE_RECLAIM_DEPTH+random.

The race window is between fnhe_remove_oldest() selecting fnheX for
deletion and the subsequent kfree_rcu(). During this time, the
concurrent path's __mkroute_output() -> find_exception() can fetch the
soon-to-be-deleted fnheX, and rt_bind_exception() then binds it with a
new dst using a dst_hold(). When the original fnheX is freed via RCU,
the dst reference remains permanently leaked.

CPU 0                             CPU 1
__mkroute_output()
  find_exception() [fnheX]
                                  update_or_create_fnhe()
                                    fnhe_remove_oldest() [fnheX]
  rt_bind_exception() [bind dst]
                                  RCU callback [fnheX freed, dst leak]

This issue manifests as a device reference count leak and a warning in
dmesg when unregistering the net device:

  unregister_netdevice: waiting for sitX to become free. Usage count = N

Ido Schimmel provided the simple test validation method [1].

The fix clears 'oldest->fnhe_daddr' before calling fnhe_flush_routes().
Since rt_bind_exception() checks this field, setting it to zero prevents
the stale fnhe from being reused and bound to a new dst just before it
is freed.

[1]
ip netns add ns1
ip -n ns1 link set dev lo up
ip -n ns1 address add 192.0.2.1/32 dev lo
ip -n ns1 link add name dummy1 up type dummy
ip -n ns1 route add 192.0.2.2/32 dev dummy1
ip -n ns1 link add name gretap1 up arp off type gretap \
    local 192.0.2.1 remote 192.0.2.2
ip -n ns1 route add 198.51.0.0/16 dev gretap1
taskset -c 0 ip netns exec ns1 mausezahn gretap1 \
    -A 198.51.100.1 -B 198.51.0.0/16 -t udp -p 1000 -c 0 -q &
taskset -c 2 ip netns exec ns1 mausezahn gretap1 \
    -A 198.51.100.1 -B 198.51.0.0/16 -t udp -p 1000 -c 0 -q &
sleep 10
ip netns pids ns1 | xargs kill
ip netns del ns1

Cc: stable@vger.kernel.org
Fixes: 67d6d681e1 ("ipv4: make exception cache less predictible")
Signed-off-by: Chuang Wang <nashuiliang@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251111064328.24440-1-nashuiliang@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-12 06:46:36 -08:00
Imre Deak
f2687d3cc9 drm/i915/dp_mst: Disable Panel Replay
Disable Panel Replay on MST links until it's properly implemented. For
instance the required VSC SDP is not programmed on MST and FEC is not
enabled if Panel Replay is enabled.

Fixes: 3257e55d3e ("drm/i915/panelreplay: enable/disable panel replay")
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15174
Cc: Jouni Högander <jouni.hogander@intel.com>
Cc: Animesh Manna <animesh.manna@intel.com>
Cc: stable@vger.kernel.org # v6.8+
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Link: https://patch.msgid.link/20251107124141.911895-1-imre.deak@intel.com
(cherry picked from commit e109f644b871df8440c886a69cdce971ed533088)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2025-11-12 09:44:54 -05:00
Praveen Talari
1c2e70397b pinctrl: qcom: msm: Fix deadlock in pinmux configuration
Replace disable_irq() with disable_irq_nosync() in msm_pinmux_set_mux()
to prevent deadlock when wakeup IRQ is triggered on the same
GPIO being reconfigured.

The issue occurs when a wakeup IRQ is triggered on a GPIO and the IRQ
handler attempts to reconfigure the same GPIO's pinmux. In this scenario,
msm_pinmux_set_mux() calls disable_irq() which waits for the currently
running IRQ handler to complete, creating a circular dependency that
results in deadlock.

Using disable_irq_nosync() avoids waiting for the IRQ handler to
complete, preventing the deadlock condition while still properly
disabling the interrupt during pinmux reconfiguration.

Suggested-by: Prasad Sodagudi <prasad.sodagudi@oss.qualcomm.com>
Signed-off-by: Praveen Talari <praveen.talari@oss.qualcomm.com>
Reviewed-by: Bjorn Andersson <andersson@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
2025-11-12 13:41:34 +01:00
Marcos Vega
fa0498f804 platform/x86: hp-wmi: Add Omen MAX 16-ah0xx fan support and thermal profile
New HP Omen laptops follow the same WMI thermal profile as Victus
16-r1000 and 16-s1000.

Add DMI board 8D41 to victus_s_thermal_profile_boards.

Signed-off-by: Marcos Vega <marcosmola2@gmail.com>
Link: https://patch.msgid.link/20251108114739.9255-3-marcosmola2@gmail.com
[ij: changelog taken partially from v1]
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-12 14:28:44 +02:00
Alexandru Elisei
85592114ff KVM: arm64: VHE: Compute fgt traps before activating them
On VHE, the Fine Grain Traps registers are written to hardware in
kvm_arch_vcpu_load()->..->__activate_traps_hfgxtr(), but the fgt array is
computed later, in kvm_vcpu_load_fgt(). This can lead to zero being written
to the FGT registers the first time a VCPU is loaded. Also, any changes to
the fgt array will be visible only after the VCPU is scheduled out, and
then back in, which is not the intended behaviour.

Fix it by computing the fgt array just before the fgt traps are written
to hardware.

Fixes: fb10ddf35c ("KVM: arm64: Compute per-vCPU FGTs at vcpu_load()")
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: Oliver Upton <oupton@kernel.org>
Link: https://patch.msgid.link/20251112102853.47759-1-alexandru.elisei@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-11-12 10:52:58 +00:00
Christian Brauner
e9d50b78fd Merge patch series "fs: add iput_not_last()"
Add iput_not_last() and switch landlock over to it to remove
false-positives from might_sleep() annotations in iput().

Link: https://patch.msgid.link/20251105212025.807549-1-mjguzik@gmail.com

* patches from https://patch.msgid.link/20251105212025.807549-1-mjguzik@gmail.com:
  landlock: fix splats from iput() after it started calling might_sleep()
  fs: add iput_not_last()

Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-12 10:47:52 +01:00
Mateusz Guzik
56325e8c68 landlock: fix splats from iput() after it started calling might_sleep()
At this point it is guaranteed this is not the last reference.

However, a recent addition of might_sleep() at top of iput() started
generating false-positives as it was executing for all values.

Remedy the problem by using the newly introduced iput_not_last().

Reported-by: syzbot+12479ae15958fc3f54ec@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/68d32659.a70a0220.4f78.0012.GAE@google.com/
Fixes: 2ef435a872 ("fs: add might_sleep() annotation to iput() and more")
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://patch.msgid.link/20251105212025.807549-2-mjguzik@gmail.com
Reviewed-by: Mickaël Salaün <mic@digikod.net>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-12 10:47:42 +01:00
Mateusz Guzik
1274162464 fs: add iput_not_last()
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://patch.msgid.link/20251105212025.807549-1-mjguzik@gmail.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-12 10:47:42 +01:00
Mike Yuan
3cd1548a27 shmem: fix tmpfs reconfiguration (remount) when noswap is set
In systemd we're trying to switch the internal credentials setup logic
to new mount API [1], and I noticed fsconfig(FSCONFIG_CMD_RECONFIGURE)
consistently fails on tmpfs with noswap option. This can be trivially
reproduced with the following:

```
int fs_fd = fsopen("tmpfs", 0);
fsconfig(fs_fd, FSCONFIG_SET_FLAG, "noswap", NULL, 0);
fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
fsmount(fs_fd, 0, 0);
fsconfig(fs_fd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0);  <------ EINVAL
```

After some digging the culprit is shmem_reconfigure() rejecting
!(ctx->seen & SHMEM_SEEN_NOSWAP) && sbinfo->noswap, which is bogus
as ctx->seen serves as a mask for whether certain options are touched
at all. On top of that, noswap option doesn't use fsparam_flag_no,
hence it's not really possible to "reenable" swap to begin with.
Drop the check and redundant SHMEM_SEEN_NOSWAP flag.

[1] https://github.com/systemd/systemd/pull/39637

Fixes: 2c6efe9cf2 ("shmem: add support to ignore swap")
Signed-off-by: Mike Yuan <me@yhndnzj.com>
Link: https://patch.msgid.link/20251108190930.440685-1-me@yhndnzj.com
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-12 10:43:22 +01:00
Andrei Vagin
78f0e33cd6 fs/namespace: correctly handle errors returned by grab_requested_mnt_ns
grab_requested_mnt_ns was changed to return error codes on failure, but
its callers were not updated to check for error pointers, still checking
only for a NULL return value.

This commit updates the callers to use IS_ERR() or IS_ERR_OR_NULL() and
PTR_ERR() to correctly check for and propagate errors.

This also makes sure that the logic actually works and mount namespace
file descriptors can be used to refere to mounts.

Christian Brauner <brauner@kernel.org> says:

Rework the patch to be more ergonomic and in line with our overall error
handling patterns.

Fixes: 7b9d14af87 ("fs: allow mount namespace fd")
Cc: Christian Brauner <brauner@kernel.org>
Signed-off-by: Andrei Vagin <avagin@google.com>
Link: https://patch.msgid.link/20251111062815.2546189-1-avagin@google.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-12 10:42:49 +01:00
Baojun Xu
f6fdd77b3e ALSA: hda/tas2781: Correct the wrong project ID
The project hardware ID should be ALC287_FIXUP_TXNW2781_I2C,
not ALC287_FIXUP_TAS2781_I2C for HP Lampass projects.

Fixes: 7a39c723b7 ("ALSA: hda/tas2781: Add new quirk for HP new projects")
Signed-off-by: Baojun Xu <baojun.xu@ti.com>
Link: https://patch.msgid.link/20251112092609.15865-1-baojun.xu@ti.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2025-11-12 10:33:17 +01:00
Christian Brauner
a3f8f86627 power: always freeze efivarfs
The efivarfs filesystems must always be frozen and thawed to resync
variable state. Make it so.

Link: https://patch.msgid.link/20251105-vorbild-zutreffen-fe00d1dd98db@brauner
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-12 10:12:39 +01:00
Johannes Berg
a35f64a216 Merge tag 'iwlwifi-fixes-2025-11-12' of https://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-next
Miri Korenblit says:
====================
iwlwifi fixes:

- avoid link toggling
- fix beacon template rate
- don't use iterator outside the loop
====================

Link: https://patch.msgid.link/DM3PPF63A6024A9E52FF4A7B23F283B7FC7A3CCA@DM3PPF63A6024A9.namprd11.prod.outlook.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2025-11-12 09:51:05 +01:00
Miri Korenblit
1a222625b4 wifi: iwlwifi: mld: always take beacon ies in link grading
One of the factors of a link's grade is the channel load, which is
calculated from the AP's bss load element.
The current code takes this element from the beacon for an active link,
and from bss->ies for an inactive link.

bss->ies is set to either the beacon's ies or to the probe response
ones, with preference to the probe response (meaning that if there was
even one probe response, the ies of it will be stored in bss->ies and
won't be overiden by the beacon ies).

The probe response can be very old, i.e. from the connection time,
where a beacon is updated before each link selection (which is
triggered only after a passive scan).

In such case, the bss load element in the probe response will not
include the channel load caused by the STA, where the beacon will.

This will cause the inactive link to always have a lower channel
load, and therefore an higher grade than the active link's one.

This causes repeated link switches, causing the throughput to drop.

Fix this by always taking the ies from the beacon, as those are for
sure new.

Fixes: d1e879ec60 ("wifi: iwlwifi: add iwlmld sub-driver")
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20251110145652.b493dbb1853a.I058ba7309c84159f640cc9682d1bda56dd56a536@changeid
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
2025-11-12 09:54:46 +02:00
Johannes Berg
3592c0083f wifi: iwlwifi: mvm: fix beacon template/fixed rate
During the development of the rate changes, I evidently made
some changes that shouldn't have been there; beacon templates
with rate_n_flags are only in old versions, so no changes to
them should have been necessary, and evidently broke on some
devices. This also would have broken fixed (injection) rates,
it would seem. Restore the old handling of this.

Fixes: dabc88cb3b ("wifi: iwlwifi: handle v3 rates")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220558
Reviewed-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://patch.msgid.link/20251008112044.3bb8ea849d8d.I90f4d2b2c1f62eaedaf304a61d2ab9e50c491c2d@changeid
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
2025-11-12 09:54:46 +02:00
Junjie Cao
f4c737d449 wifi: iwlwifi: fix aux ROC time event iterator usage
The list_for_each_entry() iterator must not be used outside the loop.
Even though we break and check for NULL, doing so still violates kernel
iteration rules and triggers Coccinelle's use_after_iter.cocci warning.

Cache the matched entry in aux_roc_te and use it consistently after the
loop. This follows iterator best practices, resolves the warning, and
makes the code more maintainable.

Signed-off-by: Junjie Cao <junjie.cao@intel.com>
Link: https://patch.msgid.link/20251016014919.383565-1-junjie.cao@intel.com
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
2025-11-12 09:54:46 +02:00
Harish Kasiviswanathan
eac32ff423 drm/amdkfd: Fix GPU mappings for APU after prefetch
Fix the following corner case:-
 Consider a 2M huge page SVM allocation, followed by prefetch call for
the first 4K page. The whole range is initially mapped with single PTE.
After the prefetch, this range gets split to first page + rest of the
pages. Currently, the first page mapping is not updated on MI300A (APU)
since page hasn't migrated. However, after range split PTE mapping it not
valid.

Fix this by forcing page table update for the whole range when prefetch
is called.  Calling prefetch on APU doesn't improve performance. If all
it deteriotes. However, functionality has to be supported.

v2: Use apu_prefer_gtt as this issue doesn't apply to APUs with carveout
VRAM

v3: Simplify by setting the flag for all ASICs as it doesn't affect dGPU

v4: Remove v2 and v3 changes. Force update_mapping when range is split
at a size that is not aligned to prange granularity

Suggested-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Philip Yang<Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 076470b9f6f8d9c7c8ca73a9f054942a686f9ba7)
2025-11-11 22:52:51 -05:00
Jonathan Kim
d15deafab5 drm/amdkfd: relax checks for over allocation of save area
Over allocation of save area is not fatal, only under allocation is.
ROCm has various components that independently claim authority over save
area size.

Unless KFD decides to claim single authority, relax size checks.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Philip Yang <philip.yang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 15bd4958fe38e763bc17b607ba55155254a01f55)
Cc: stable@vger.kernel.org
2025-11-11 22:52:27 -05:00
Sathishkumar S
bbe3c11503 drm/amdgpu/jpeg: Add parse_cs for JPEG5_0_1
enable parse_cs callback for JPEG5_0_1.

Signed-off-by: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 547985579932c1de13f57f8bcf62cd9361b9d3d3)
Cc: stable@vger.kernel.org
2025-11-11 22:51:49 -05:00
Sultan Alsawaf
7132f7e025 drm/amd/amdgpu: Ensure isp_kernel_buffer_alloc() creates a new BO
When the BO pointer provided to amdgpu_bo_create_kernel() points to
non-NULL, amdgpu_bo_create_kernel() takes it as a hint to pin that address
rather than allocate a new BO.

This functionality is never desired for allocating ISP buffers. A new BO
should always be created when isp_kernel_buffer_alloc() is called, per the
description for isp_kernel_buffer_alloc().

Ensure this by zeroing *bo right before the amdgpu_bo_create_kernel() call.

Fixes: 55d42f6169 ("drm/amd/amdgpu: Add helper functions for isp buffers")
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Reviewed-by: Pratap Nirujogi <pratap.nirujogi@amd.com>
Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 73c8c29baac7f0c7e703d92eba009008cbb5228e)
2025-11-11 22:51:27 -05:00
Ivan Lipski
33c9957091 drm/amd/display: Allow VRR params change if unsynced with the stream
[Why]
When changing resolution (e.g., 4K → FHD) in mirror/clone mode with
certain monitors, the monitor blanks and loses connection due to an early
exit in vrr_settings_require_update(). The function only checks if VRR
state, fixed refresh target, or min/max refresh rate range has changed.

During mode changes, if the calculated min/max refresh values remain the
same even though the stream's v_total changed, the function returns early
without updating vrr_params.adjust.v_total_min/max, leaving the monitor's
VRR timing parameters unsynced with the new mode, causing it to blank out.

[How]
Explicitly adjust VRR parameters to the stream's nominal v_total when VRR
is supported, but inactive.

Fixes: 6d31602a9f ("drm/amd/display: more liberal vmin/vmax update for freesync")
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Ivan Lipski <ivan.lipski@amd.com>
Signed-off-by: Fangzhi Zuo <jerry.zuo@amd.com>
Tested-by: Dan Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 607df8248a011524211ee34850345305a1913f9e)
2025-11-11 22:50:53 -05:00
Jesse.Zhang
6623c5f9fd drm/amdgpu: fix lock warning in amdgpu_userq_fence_driver_process
Fix a potential deadlock caused by inconsistent spinlock usage
between interrupt and process contexts in the userq fence driver.

The issue occurs when amdgpu_userq_fence_driver_process() is called
from both:
- Interrupt context: gfx_v11_0_eop_irq() -> amdgpu_userq_fence_driver_process()
- Process context: amdgpu_eviction_fence_suspend_worker() ->
  amdgpu_userq_fence_driver_force_completion() -> amdgpu_userq_fence_driver_process()

In interrupt context, the spinlock was acquired without disabling
interrupts, leaving it in {IN-HARDIRQ-W} state. When the same lock
is acquired in process context, the kernel detects inconsistent
locking since the process context acquisition would enable interrupts
while holding a lock previously acquired in interrupt context.

Kernel log shows:
[ 4039.310790] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage.
[ 4039.310804] kworker/7:2/409 [HC0[0]:SC0[0]:HE1:SE1] takes:
[ 4039.310818] ffff9284e1bed000 (&fence_drv->fence_list_lock){?...}-{3:3},
[ 4039.310993] {IN-HARDIRQ-W} state was registered at:
[ 4039.311004]   lock_acquire+0xc6/0x300
[ 4039.311018]   _raw_spin_lock+0x39/0x80
[ 4039.311031]   amdgpu_userq_fence_driver_process.part.0+0x30/0x180 [amdgpu]
[ 4039.311146]   amdgpu_userq_fence_driver_process+0x17/0x30 [amdgpu]
[ 4039.311257]   gfx_v11_0_eop_irq+0x132/0x170 [amdgpu]

Fix by using spin_lock_irqsave()/spin_unlock_irqrestore() to properly
manage interrupt state regardless of calling context.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit ded3ad780cf97a04927773c4600823b84f7f3cc2)
Cc: stable@vger.kernel.org
2025-11-11 22:50:22 -05:00
Pierre-Eric Pelloux-Prayer
9f8fd538e2 drm/amdgpu: jump to the correct label on failure
drm_sched_entity_init wasn't called yet, so the only thing to
do is to release allocated memory.
This doesn't fix any bug since entity is zero allocated and
drm_sched_entity_fini does nothing in this case.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit ec49374ccb8da86b465beaf09c367f3dfd648d8f)
2025-11-11 22:49:46 -05:00
Vitaly Prosyak
22a36e660d drm/amdgpu: disable peer-to-peer access for DCC-enabled GC12 VRAM surfaces
Certain multi-GPU configurations (especially GFX12) may hit
data corruption when a DCC-compressed VRAM surface is shared across GPUs
using peer-to-peer (P2P) DMA transfers.

Such surfaces rely on device-local metadata and cannot be safely accessed
through a remote GPU’s page tables. Attempting to import a DCC-enabled
surface through P2P leads to incorrect rendering or GPU faults.

This change disables P2P for DCC-enabled VRAM buffers that are contiguous
and allocated on GFX12+ hardware.  In these cases, the importer falls back
to the standard system-memory path, avoiding invalid access to compressed
surfaces.

Future work could consider optional migration (VRAM→System→VRAM) if a
performance regression is observed when `attach->peer2peer = false`.

Tested on:
 - Dual RX 9700 XT (Navi4x) setup
 - GNOME and Wayland compositor scenarios
 - Confirmed no corruption after disabling P2P under these conditions
v2: Remove check TTM_PL_VRAM & TTM_PL_FLAG_CONTIGUOUS.
v3: simplify for upsteam and fix ip version check (Alex)

Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 9dff2bb709e6fbd97e263fd12bf12802d2b5a0cf)
Cc: stable@vger.kernel.org
2025-11-11 22:49:19 -05:00
Eric Dumazet
0345552a65 net_sched: limit try_bulk_dequeue_skb() batches
After commit 100dfa74cad9 ("inet: dev_queue_xmit() llist adoption")
I started seeing many qdisc requeues on IDPF under high TX workload.

$ tc -s qd sh dev eth1 handle 1: ; sleep 1; tc -s qd sh dev eth1 handle 1:
qdisc mq 1: root
 Sent 43534617319319 bytes 268186451819 pkt (dropped 0, overlimits 0 requeues 3532840114)
 backlog 1056Kb 6675p requeues 3532840114
qdisc mq 1: root
 Sent 43554665866695 bytes 268309964788 pkt (dropped 0, overlimits 0 requeues 3537737653)
 backlog 781164b 4822p requeues 3537737653

This is caused by try_bulk_dequeue_skb() being only limited by BQL budget.

perf record -C120-239 -e qdisc:qdisc_dequeue sleep 1 ; perf script
...
 netperf 75332 [146]  2711.138269: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1292 skbaddr=0xff378005a1e9f200
 netperf 75332 [146]  2711.138953: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1213 skbaddr=0xff378004d607a500
 netperf 75330 [144]  2711.139631: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1233 skbaddr=0xff3780046be20100
 netperf 75333 [147]  2711.140356: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1093 skbaddr=0xff37800514845b00
 netperf 75337 [151]  2711.141037: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1353 skbaddr=0xff37800460753300
 netperf 75337 [151]  2711.141877: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1367 skbaddr=0xff378004e72c7b00
 netperf 75330 [144]  2711.142643: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1202 skbaddr=0xff3780045bd60000
...

This is bad because :

1) Large batches hold one victim cpu for a very long time.

2) Driver often hit their own TX ring limit (all slots are used).

3) We call dev_requeue_skb()

4) Requeues are using a FIFO (q->gso_skb), breaking qdisc ability to
   implement FQ or priority scheduling.

5) dequeue_skb() gets packets from q->gso_skb one skb at a time
   with no xmit_more support. This is causing many spinlock games
   between the qdisc and the device driver.

Requeues were supposed to be very rare, lets keep them this way.

Limit batch sizes to /proc/sys/net/core/dev_weight (default 64) as
__qdisc_run() was designed to use.

Fixes: 5772e9a346 ("qdisc: bulk dequeue support for qdiscs with TCQ_F_ONETXQUEUE")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Link: https://patch.msgid.link/20251109161215.2574081-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-11 17:56:50 -08:00
Magnus Lindholm
d58041d2c6 MAINTAINERS: Add Magnus Lindholm as maintainer for alpha port
Acked-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Acked-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Magnus Lindholm <linmag7@gmail.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
2025-11-11 20:52:04 -05:00
Jakub Kicinski
7a6fa4f89e Merge branch 'selftests-mptcp-join-fix-some-flaky-tests'
Matthieu Baerts says:

====================
selftests: mptcp: join: fix some flaky tests

When looking at the recent CI results on NIPA and MPTCP CIs, a few MPTCP
Join tests are marked as unstable. Here are some fixes for that.

- Patch 1: a small fix for mptcp_connect.sh, printing a note as
  initially intended. For >=v5.13.

- Patch 2: avoid unexpected reset when closing subflows. For >= 5.13.

- Patches 3-4: longer transfer when not waiting for the end. For >=5.18.

- Patch 5: read all received data when expecting a reset. For >= v6.1.

- Patch 6: a fix to properly kill background tasks. For >= v6.5.
====================

Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-0-a4332c714e10@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-11 17:49:53 -08:00
Matthieu Baerts (NGI0)
852b644acb selftests: mptcp: join: properly kill background tasks
The 'run_tests' function is executed in the background, but killing its
associated PID would not kill the children tasks running in the
background.

To properly kill all background tasks, 'kill -- -PID' could be used, but
this requires kill from procps-ng. Instead, all children tasks are
listed using 'ps', and 'kill' is called with all PIDs of this group.

Fixes: 31ee4ad86a ("selftests: mptcp: join: stop transfer when check is done (part 1)")
Cc: stable@vger.kernel.org
Fixes: 04b57c9e09 ("selftests: mptcp: join: stop transfer when check is done (part 2)")
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-6-a4332c714e10@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-11 17:49:49 -08:00
Matthieu Baerts (NGI0)
ee79980f7a selftests: mptcp: connect: trunc: read all recv data
MPTCP Join "fastclose server" selftest is sometimes failing because the
client output file doesn't have the expected size, e.g. 296B instead of
1024B.

When looking at a packet trace when this happens, the server sent the
expected 1024B in two parts -- 100B, then 924B -- then the MP_FASTCLOSE.
It is then strange to see the client only receiving 296B, which would
mean it only got a part of the second packet. The problem is then not on
the networking side, but rather on the data reception side.

When mptcp_connect is launched with '-f -1', it means the connection
might stop before having sent everything, because a reset has been
received. When this happens, the program was directly stopped. But it is
also possible there are still some data to read, simply because the
previous 'read' step was done with a buffer smaller than the pending
data, see do_rnd_read(). In this case, it is important to read what's
left in the kernel buffers before stopping without error like before.

SIGPIPE is now ignored, not to quit the app before having read
everything.

Fixes: 6bf41020b7 ("selftests: mptcp: update and extend fastclose test-cases")
Cc: stable@vger.kernel.org
Reviewed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-5-a4332c714e10@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-11 17:49:49 -08:00
Matthieu Baerts (NGI0)
290493078b selftests: mptcp: join: userspace: longer transfer
In rare cases, when the test environment is very slow, some userspace
tests can fail because some expected events have not been seen.

Because the tests are expecting a long on-going connection, and they are
not waiting for the end of the transfer, it is fine to make the
connection longer. This connection will be killed at the end, after the
verifications, so making it longer doesn't change anything, apart from
avoid it to end before the end of the verifications

To play it safe, all userspace tests not waiting for the end of the
transfer are now sharing a longer file (128KB) at slow speed.

Fixes: 4369c198e5 ("selftests: mptcp: test userspace pm out of transfer")
Cc: stable@vger.kernel.org
Fixes: b2e2248f36 ("selftests: mptcp: userspace pm create id 0 subflow")
Fixes: e3b47e460b ("selftests: mptcp: userspace pm remove initial subflow")
Fixes: b9fb176081 ("selftests: mptcp: userspace pm send RM_ADDR for ID 0")
Reviewed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-4-a4332c714e10@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-11 17:49:49 -08:00
Matthieu Baerts (NGI0)
6457595db9 selftests: mptcp: join: endpoints: longer transfer
In rare cases, when the test environment is very slow, some userspace
tests can fail because some expected events have not been seen.

Because the tests are expecting a long on-going connection, and they are
not waiting for the end of the transfer, it is fine to make the
connection longer. This connection will be killed at the end, after the
verifications, so making it longer doesn't change anything, apart from
avoid it to end before the end of the verifications

To play it safe, all endpoints tests not waiting for the end of the
transfer are now sharing a longer file (128KB) at slow speed.

Fixes: 69c6ce7b6e ("selftests: mptcp: add implicit endpoint test case")
Cc: stable@vger.kernel.org
Fixes: e274f71540 ("selftests: mptcp: add subflow limits test-cases")
Fixes: b5e2fb832f ("selftests: mptcp: add explicit test case for remove/readd")
Fixes: e06959e9ee ("selftests: mptcp: join: test for flush/re-add endpoints")
Reviewed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-3-a4332c714e10@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-11 17:49:48 -08:00
Matthieu Baerts (NGI0)
aea73bae66 selftests: mptcp: join: rm: set backup flag
Some of these 'remove' tests rarely fail because a subflow has been
reset instead of cleanly removed. This can happen when one extra subflow
which has never carried data is being closed (FIN) on one side, while
the other is sending data for the first time.

To avoid such subflows to be used right at the end, the backup flag has
been added. With that, data will be only carried on the initial subflow.

Fixes: d2c4333a80 ("selftests: mptcp: add testcases for removing addrs")
Cc: stable@vger.kernel.org
Reviewed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-2-a4332c714e10@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-11 17:49:48 -08:00
Matthieu Baerts (NGI0)
63c643aa7b selftests: mptcp: connect: fix fallback note due to OoO
The "fallback due to TCP OoO" was never printed because the stat_ooo_now
variable was checked twice: once in the parent if-statement, and one in
the child one. The second condition was then always true then, and the
'else' branch was never taken.

The idea is that when there are more ACK + MP_CAPABLE than expected, the
test either fails if there was no out of order packets, or a notice is
printed.

Fixes: 69ca3d29a7 ("mptcp: update selftest for fallback due to OoO")
Cc: stable@vger.kernel.org
Reviewed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-1-a4332c714e10@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-11 17:49:47 -08:00
Jakub Kicinski
27bcc05b88 Merge tag 'for-net-2025-11-11' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth
Luiz Augusto von Dentz says:

====================
bluetooth pull request for net:

 - hci_conn: Fix not cleaning up PA_LINK connections
 - hci_event: Fix not handling PA Sync Lost event
 - MGMT: cancel mesh send timer when hdev removed
 - 6lowpan: reset link-local header on ipv6 recv path
 - 6lowpan: fix BDADDR_LE vs ADDR_LE_DEV address type confusion
 - L2CAP: export l2cap_chan_hold for modules
 - 6lowpan: Don't hold spin lock over sleeping functions
 - 6lowpan: add missing l2cap_chan_lock()
 - btusb: reorder cleanup in btusb_disconnect to avoid UAF
 - btrtl: Avoid loading the config file on security chips

* tag 'for-net-2025-11-11' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth:
  Bluetooth: btrtl: Avoid loading the config file on security chips
  Bluetooth: hci_event: Fix not handling PA Sync Lost event
  Bluetooth: hci_conn: Fix not cleaning up PA_LINK connections
  Bluetooth: 6lowpan: add missing l2cap_chan_lock()
  Bluetooth: 6lowpan: Don't hold spin lock over sleeping functions
  Bluetooth: L2CAP: export l2cap_chan_hold for modules
  Bluetooth: 6lowpan: fix BDADDR_LE vs ADDR_LE_DEV address type confusion
  Bluetooth: 6lowpan: reset link-local header on ipv6 recv path
  Bluetooth: btusb: reorder cleanup in btusb_disconnect to avoid UAF
  Bluetooth: MGMT: cancel mesh send timer when hdev removed
====================

Link: https://patch.msgid.link/20251111141357.1983153-1-luiz.dentz@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-11 17:43:32 -08:00
Kriish Sharma
bb8336a516 ethtool: fix incorrect kernel-doc style comment in ethtool.h
Building documentation produced the following warning:

  WARNING: ./include/linux/ethtool.h:495 This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * IEEE 802.3ck/df defines 16 bins for FEC histogram plus one more for

This comment was not intended to be parsed as kernel-doc, so replace
the '/**' with '/*' to silence the warning and align with normal
comment style in header files.

No functional changes.

Signed-off-by: Kriish Sharma <kriish.sharma2006@gmail.com>
Link: https://patch.msgid.link/20251110182545.2112596-1-kriish.sharma2006@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-11 17:38:48 -08:00
Sami Tolvanen
fdf302e6be gendwarfksyms: Skip files with no exports
Starting with Rust 1.91.0 (released 2025-10-30), in upstream commit
ab91a63d403b ("Ignore intrinsic calls in cross-crate-inlining cost model")
[1][2], `bindings.o` stops containing DWARF debug information because the
`Default` implementations contained `write_bytes()` calls which are now
ignored in that cost model (note that `CLIPPY=1` does not reproduce it).

This means `gendwarfksyms` complains:

      RUSTC L rust/bindings.o
    error: gendwarfksyms: process_module: dwarf_get_units failed: no debugging information?

There are several alternatives that would work here: conditionally
skipping in the cases needed (but that is subtle and brittle), forcing
DWARF generation with e.g. a dummy `static` (ugly and we may need to
do it in several crates), skipping the call to the tool in the Kbuild
command when there are no exports (fine) or teaching the tool to do so
itself (simple and clean).

Thus do the last one: don't attempt to process files if we have no symbol
versions to calculate.

  [ I used the commit log of my patch linked below since it explained the
    root issue and expanded it a bit more to summarize the alternatives.

      - Miguel ]

Cc: stable@vger.kernel.org # Needed in 6.17.y.
Reported-by: Haiyue Wang <haiyuewa@163.com>
Closes: https://lore.kernel.org/rust-for-linux/b8c1c73d-bf8b-4bf2-beb1-84ffdcd60547@163.com/
Suggested-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://lore.kernel.org/rust-for-linux/CANiq72nKC5r24VHAp9oUPR1HVPqT+=0ab9N0w6GqTF-kJOeiSw@mail.gmail.com/
Link: ab91a63d40 [1]
Link: https://github.com/rust-lang/rust/pull/145910 [2]
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Tested-by: Haiyue Wang <haiyuewa@163.com>
Reviewed-by: Alice Ryhl <aliceryhl@google.com>
Link: https://patch.msgid.link/20251110131913.1789896-1-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
2025-11-11 20:37:11 +01:00
Linus Torvalds
24172e0d79 Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 fixes from Will Deacon:
 "There's more here than I would ideally like at this stage, but there's
  been a steady trickle of fixes and some of them took a few rounds of
  review.

  The bulk of the changes are fixing some fallout from the recent BBM
  level two support which allows the linear map to be split from block
  to page mappings at runtime, but inadvertently led to sleeping in
  atomic context on some paths where the linear map was already mapped
  with page granularity. The fix is simply to avoid splitting in those
  cases but the implementation of that is a little involved.

  The other interesting fix is addressing a catastophic performance
  issue with our per-cpu atomics discovered by Paul in the SRCU locking
  code but which took some interactions with the hardware folks to
  resolve.

  Summary:

   - Avoid sleeping in atomic context when changing linear map
     permissions for DEBUG_PAGEALLOC or KFENCE

   - Rework printing of Spectre mitigation status to avoid hardlockup
     when enabling per-task mitigations on the context-switch path

   - Reject kernel modules when instruction patching fails either due to
     the DWARF-based SCS patching or because of an alternatives callback
     residing outside of the core kernel text

   - Propagate error when updating kernel memory permissions in kprobes

   - Drop pointless, incorrect message when enabling the ACPI SPCR
     console

   - Use value-returning LSE instructions for per-cpu atomics to reduce
     latency in SRCU locking routines"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: Reject modules with internal alternative callbacks
  arm64: Fail module loading if dynamic SCS patching fails
  arm64: proton-pack: Fix hard lockup due to print in scheduler context
  arm64: proton-pack: Drop print when !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
  arm64: mm: Tidy up force_pte_mapping()
  arm64: mm: Optimize range_split_to_ptes()
  arm64: mm: Don't sleep in split_kernel_leaf_mapping() when in atomic context
  arm64: kprobes: check the return value of set_memory_rox()
  arm64: acpi: Drop message logging SPCR default console
  Revert "ACPI: Suppress misleading SPCR console message when SPCR table is absent"
  arm64: Use load LSE atomics for the non-return per-CPU atomic operations
2025-11-11 10:31:17 -08:00
Linus Torvalds
8341374f67 Merge tag 'for-6.18-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:

 - fix new inode name tracking in tree-log

 - fix conventional zone and stripe calculations in zoned mode

 - fix bio reference counts on error paths in relocation and scrub

* tag 'for-6.18-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: release root after error in data_reloc_print_warning_inode()
  btrfs: scrub: put bio after errors in scrub_raid56_parity_stripe()
  btrfs: do not update last_log_commit when logging inode due to a new name
  btrfs: zoned: fix stripe width calculation
  btrfs: zoned: fix conventional zone capacity calculation
2025-11-11 10:13:17 -08:00
Linus Torvalds
537d196186 Merge tag 'mm-hotfixes-stable-2025-11-10-19-30' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton:
 "26 hotfixes.  22(!) are cc:stable, 22 are MM.

   - address some Kexec Handover issues (Pasha Tatashin)

   - fix handling of large folios which are mapped outside i_size (Kiryl
     Shutsemau)

   - fix some DAMON time issues on 32-bit machines (Quanmin Yan)

  Plus the usual shower of singletons"

* tag 'mm-hotfixes-stable-2025-11-10-19-30' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (26 commits)
  kho: warn and exit when unpreserved page wasn't preserved
  kho: fix unpreservation of higher-order vmalloc preservations
  kho: fix out-of-bounds access of vmalloc chunk
  MAINTAINERS: add Chris and Kairui as the swap maintainer
  mm/secretmem: fix use-after-free race in fault handler
  mm/huge_memory: initialise the tags of the huge zero folio
  nilfs2: avoid having an active sc_timer before freeing sci
  scripts/decode_stacktrace.sh: fix build ID and PC source parsing
  mm/damon/sysfs: change next_update_jiffies to a global variable
  mm/damon/stat: change last_refresh_jiffies to a global variable
  maple_tree: fix tracepoint string pointers
  codetag: debug: handle existing CODETAG_EMPTY in mark_objexts_empty for slabobj_ext
  mm/mremap: honour writable bit in mremap pte batching
  gcov: add support for GCC 15
  mm/mm_init: fix hash table order logging in alloc_large_system_hash()
  mm/truncate: unmap large folio on split failure
  mm/memory: do not populate page table entries beyond i_size
  fs/proc: fix uaf in proc_readdir_de()
  mm/huge_memory: preserve PG_has_hwpoisoned if a folio is split to >0 order
  ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
  ...
2025-11-11 09:49:56 -08:00
Stefan Metzmacher
d93a89684d smb: client: let smbd_disconnect_rdma_connection() turn CREATED into DISCONNECTED
When smbd_disconnect_rdma_connection() turns SMBDIRECT_SOCKET_CREATED
into SMBDIRECT_SOCKET_ERROR, we'll have the situation that
smbd_disconnect_rdma_work() will set SMBDIRECT_SOCKET_DISCONNECTING
and call rdma_disconnect(), which likely fails as we never reached
the RDMA_CM_EVENT_ESTABLISHED. it means that
wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED)
in smbd_destroy() will hang forever in SMBDIRECT_SOCKET_DISCONNECTING
never reaching SMBDIRECT_SOCKET_DISCONNECTED.

So we directly go from SMBDIRECT_SOCKET_CREATED to
SMBDIRECT_SOCKET_DISCONNECTED.

Fixes: ffbfc73e84 ("smb: client: let smbd_disconnect_rdma_connection() set SMBDIRECT_SOCKET_ERROR...")
Cc: Steve French <smfrench@gmail.com>
Cc: Tom Talpey <tom@talpey.com>
Cc: Long Li <longli@microsoft.com>
Cc: Namjae Jeon <linkinjeon@kernel.org>
Cc: linux-cifs@vger.kernel.org
Cc: samba-technical@lists.samba.org
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2025-11-11 11:05:35 -06:00
Haein Lee
632108ec07 ALSA: usb-audio: Fix NULL pointer dereference in snd_usb_mixer_controls_badd
In snd_usb_create_streams(), for UAC version 3 devices, the Interface
Association Descriptor (IAD) is retrieved via usb_ifnum_to_if(). If this
call fails, a fallback routine attempts to obtain the IAD from the next
interface and sets a BADD profile. However, snd_usb_mixer_controls_badd()
assumes that the IAD retrieved from usb_ifnum_to_if() is always valid,
without performing a NULL check. This can lead to a NULL pointer
dereference when usb_ifnum_to_if() fails to find the interface descriptor.

This patch adds a NULL pointer check after calling usb_ifnum_to_if() in
snd_usb_mixer_controls_badd() to prevent the dereference.

This issue was discovered by syzkaller, which triggered the bug by sending
a crafted USB device descriptor.

Fixes: 17156f23e9 ("ALSA: usb: add UAC3 BADD profiles support")
Signed-off-by: Haein Lee <lhi0729@kaist.ac.kr>
Link: https://patch.msgid.link/vwhzmoba9j2f.vwhzmob9u9e2.g6@dooray.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2025-11-11 17:57:44 +01:00
Shawn Lin
739f04f4a4 mmc: dw_mmc-rockchip: Fix wrong internal phase calculate
ciu clock is 2 times of io clock, but the sample clk used is
derived from io clock provided to the card. So we should use
io clock to calculate the phase.

Fixes: 59903441f5 ("mmc: dw_mmc-rockchip: Add internal phase support")
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2025-11-11 17:47:47 +01:00
Rakuram Eswaran
9e80562521 mmc: pxamci: Simplify pxamci_probe() error handling using devm APIs
This patch refactors pxamci_probe() to use devm-managed resource
allocation (e.g. devm_dma_request_chan) and dev_err_probe() for
improved readability and automatic cleanup on probe failure.

It also removes redundant NULL assignments and manual resource release
logic from pxamci_probe(), and eliminates the corresponding release
calls from pxamci_remove().

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/r/202510041841.pRlunIfl-lkp@intel.com/
Fixes: 58c40f3faf ("mmc: pxamci: Use devm_mmc_alloc_host() helper")
Suggested-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Signed-off-by: Rakuram Eswaran <rakuram.e96@gmail.com>
Reviewed-by: Khalid Aziz <khalid@kernel.org>
Acked-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2025-11-11 17:34:28 +01:00
Akinobu Mita
c42af83c59 memblock: fix memblock_estimated_nr_free_pages() for soft-reserved memory
memblock_estimated_nr_free_pages() returns the difference between the total
size of the "memory" memblock type and the "reserved" memblock type.

The "soft-reserved" memory regions are added to the "reserved" memblock
type, but not to the "memory" memblock type. Therefore,
memblock_estimated_nr_free_pages() may return a smaller value than
expected, or if it underflows, an extremely large value.

/proc/sys/kernel/threads-max is determined by the value of
memblock_estimated_nr_free_pages().  This issue was discovered on machines
with CXL memory because kernel.threads-max was either smaller than expected
or extremely large for the installed DRAM size.

This fixes the issue by replacing memblock_reserved_size() with
memblock_reserved_kern_size() that tells how much memory was
reserved from the actual RAM.

Suggested-by: Mike Rapoport <rppt@kernel.org>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Link: https://patch.msgid.link/20251111010010.7800-1-akinobu.mita@gmail.com
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
2025-11-11 18:15:35 +02:00
Yiqi Sun
ed6612165b smb: fix invalid username check in smb3_fs_context_parse_param()
Since the maximum return value of strnlen(..., CIFS_MAX_USERNAME_LEN)
is CIFS_MAX_USERNAME_LEN, length check in smb3_fs_context_parse_param()
is always FALSE and invalid.

Fix the comparison in if statement.

Signed-off-by: Yiqi Sun <sunyiqixm@gmail.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2025-11-11 10:01:47 -06:00
Stefan Metzmacher
55286b1e1b smb: server: let smb_direct_disconnect_rdma_connection() turn CREATED into DISCONNECTED
When smb_direct_disconnect_rdma_connection() turns SMBDIRECT_SOCKET_CREATED
into SMBDIRECT_SOCKET_ERROR, we'll have the situation that
smb_direct_disconnect_rdma_work() will set SMBDIRECT_SOCKET_DISCONNECTING
and call rdma_disconnect(), which likely fails as we never reached
the RDMA_CM_EVENT_ESTABLISHED. it means that
wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED)
in free_transport() will hang forever in SMBDIRECT_SOCKET_DISCONNECTING
never reaching SMBDIRECT_SOCKET_DISCONNECTED.

So we directly go from SMBDIRECT_SOCKET_CREATED to
SMBDIRECT_SOCKET_DISCONNECTED.

Fixes: b3fd52a0d8 ("smb: server: let smb_direct_disconnect_rdma_connection() set SMBDIRECT_SOCKET_ERROR...")
Cc: Namjae Jeon <linkinjeon@kernel.org>
Cc: Steve French <smfrench@gmail.com>
Cc: Tom Talpey <tom@talpey.com>
Cc: linux-cifs@vger.kernel.org
Cc: samba-technical@lists.samba.org
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2025-11-11 09:50:35 -06:00
Akiva Goldberger
e5eba42f01 mlx5: Fix default values in create CQ
Currently, CQs without a completion function are assigned the
mlx5_add_cq_to_tasklet function by default. This is problematic since
only user CQs created through the mlx5_ib driver are intended to use
this function.

Additionally, all CQs that will use doorbells instead of polling for
completions must call mlx5_cq_arm. However, the default CQ creation flow
leaves a valid value in the CQ's arm_db field, allowing FW to send
interrupts to polling-only CQs in certain corner cases.

These two factors would allow a polling-only kernel CQ to be triggered
by an EQ interrupt and call a completion function intended only for user
CQs, causing a null pointer exception.

Some areas in the driver have prevented this issue with one-off fixes
but did not address the root cause.

This patch fixes the described issue by adding defaults to the create CQ
flow. It adds a default dummy completion function to protect against
null pointer exceptions, and it sets an invalid command sequence number
by default in kernel CQs to prevent the FW from sending an interrupt to
the CQ until it is armed. User CQs are responsible for their own
initialization values.

Callers of mlx5_core_create_cq are responsible for changing the
completion function and arming the CQ per their needs.

Fixes: cdd04f4d4d ("net/mlx5: Add support to create SQ and CQ for ASO")
Signed-off-by: Akiva Goldberger <agoldberger@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Acked-by: Leon Romanovsky <leon@kernel.org>
Link: https://patch.msgid.link/1762681743-1084694-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-11 15:12:18 +01:00
Max Chou
cd8dbd9ef6 Bluetooth: btrtl: Avoid loading the config file on security chips
For chips with security enabled, it's only possible to load firmware
with a valid signature pattern.
If key_id is not zero, it indicates a security chip, and the driver will
not load the config file.

- Example log for a security chip.

Bluetooth: hci0: RTL: examining hci_ver=0c hci_rev=000a
  lmp_ver=0c lmp_subver=8922
Bluetooth: hci0: RTL: rom_version status=0 version=1
Bluetooth: hci0: RTL: btrtl_initialize: key id 1
Bluetooth: hci0: RTL: loading rtl_bt/rtl8922au_fw.bin
Bluetooth: hci0: RTL: cfg_sz 0, total sz 71301
Bluetooth: hci0: RTL: fw version 0x41c0c905

- Example log for a normal chip.

Bluetooth: hci0: RTL: examining hci_ver=0c hci_rev=000a
  lmp_ver=0c lmp_subver=8922
Bluetooth: hci0: RTL: rom_version status=0 version=1
Bluetooth: hci0: RTL: btrtl_initialize: key id 0
Bluetooth: hci0: RTL: loading rtl_bt/rtl8922au_fw.bin
Bluetooth: hci0: RTL: loading rtl_bt/rtl8922au_config.bin
Bluetooth: hci0: RTL: cfg_sz 6, total sz 71307
Bluetooth: hci0: RTL: fw version 0x41c0c905

Tested-by: Hilda Wu <hildawu@realtek.com>
Signed-off-by: Nial Ni <niall_ni@realsil.com.cn>
Signed-off-by: Max Chou <max.chou@realtek.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2025-11-11 09:06:57 -05:00
Paolo Abeni
ed6b5632e0 Merge branch 'mlx5e-misc-fixes-2025-11-09'
Tariq Toukan says:

====================
mlx5e misc fixes 2025-11-09

This patchset provides misc bug fixes from the team to the mlx5 Eth
driver.
====================

Link: https://patch.msgid.link/1762681073-1084058-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-11 15:05:46 +01:00
Gal Pressman
9fcc2b6c10 net/mlx5e: Fix potentially misleading debug message
Change the debug message to print the correct units instead of always
assuming Gbps, as the value can be in either 100 Mbps or 1 Gbps units.

Fixes: 5da8bc3eff ("net/mlx5e: DCBNL, Add debug messages log")
Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Nimrod Oren <noren@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1762681073-1084058-6-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-11 15:05:44 +01:00
Gal Pressman
43b27d1bd8 net/mlx5e: Fix wraparound in rate limiting for values above 255 Gbps
Add validation to reject rates exceeding 255 Gbps that would overflow
the 8 bits max bandwidth field.

Fixes: d8880795da ("net/mlx5e: Implement DCBNL IEEE max rate")
Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Nimrod Oren <noren@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1762681073-1084058-5-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-11 15:05:44 +01:00
Gal Pressman
a7bf4d5063 net/mlx5e: Fix maxrate wraparound in threshold between units
The previous calculation used roundup() which caused an overflow for
rates between 25.5Gbps and 26Gbps.
For example, a rate of 25.6Gbps would result in using 100Mbps units with
value of 256, which would overflow the 8 bits field.

Simplify the upper_limit_mbps calculation by removing the
unnecessary roundup, and adjust the comparison to use <= to correctly
handle the boundary condition.

Fixes: d8880795da ("net/mlx5e: Implement DCBNL IEEE max rate")
Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Nimrod Oren <noren@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1762681073-1084058-4-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-11 15:05:44 +01:00
Cosmin Ratiu
2dc768c052 net/mlx5e: Trim the length of the num_doorbell error
When trying to set num_doorbells to a value greater than the max number
of channels, the error message was going over the netlink limit of 80
chars, truncating the most important part of the message, the number of
channels.

Fix that by trimming the length a bit.

Fixes: 11bbcfb766 ("net/mlx5e: Use the 'num_doorbells' devlink param")
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1762681073-1084058-3-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-11 15:05:44 +01:00
Carolina Jubran
0bcd5b3b50 net/mlx5e: Fix missing error assignment in mlx5e_xfrm_add_state()
Assign the return value of mlx5_eswitch_block_mode() to 'err' before
checking it to avoid returning an uninitialized error code.

Fixes: 22239eb258 ("net/mlx5e: Prevent tunnel reformat when tunnel mode not allowed")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/r/202510271649.uwsIxD6O-lkp@intel.com/
Closes: http://lore.kernel.org/linux-rdma/aPIEK4rLB586FdDt@stanley.mountain/
Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1762681073-1084058-2-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-11 15:05:44 +01:00
Paolo Abeni
02e9578c3e Merge branch 'net-sched-initialize-struct-tc_ife-to-fix-kernel-infoleak'
Ranganath says:

====================
net: sched: initialize struct tc_ife to fix kernel-infoleak

This series addresses the uninitialization of the struct which has
2 bytes of padding. And copying this uninitialized data to userspace
can leak info from kernel memory.

This series ensures all members and padding are cleared prior to
begin copied.

This change silences the KMSAN report and prevents potential information
leaks from the kernel memory.

v3: https://lore.kernel.org/lkml/20251106195635.2438-1-vnranganath.20@gmail.com/#t
v2: https://lore.kernel.org/r/20251101-infoleak-v2-0-01a501d41c09@gmail.com
v1: https://lore.kernel.org/r/20251031-infoleak-v1-1-9f7250ee33aa@gmail.com

Signed-off-by: Ranganath V N <vnranganath.20@gmail.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
====================

Link: https://patch.msgid.link/20251109091336.9277-1-vnranganath.20@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-11 15:00:11 +01:00
Ranganath V N
ce50039be4 net: sched: act_ife: initialize struct tc_ife to fix KMSAN kernel-infoleak
Fix a KMSAN kernel-infoleak detected  by the syzbot .

[net?] KMSAN: kernel-infoleak in __skb_datagram_iter

In tcf_ife_dump(), the variable 'opt' was partially initialized using a
designatied initializer. While the padding bytes are reamined
uninitialized. nla_put() copies the entire structure into a
netlink message, these uninitialized bytes leaked to userspace.

Initialize the structure with memset before assigning its fields
to ensure all members and padding are cleared prior to beign copied.

This change silences the KMSAN report and prevents potential information
leaks from the kernel memory.

This fix has been tested and validated by syzbot. This patch closes the
bug reported at the following syzkaller link and ensures no infoleak.

Reported-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=0c85cae3350b7d486aee
Tested-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com
Fixes: ef6980b6be ("introduce IFE action")
Signed-off-by: Ranganath V N <vnranganath.20@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251109091336.9277-3-vnranganath.20@gmail.com
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-11 15:00:08 +01:00
Ranganath V N
62b656e43e net: sched: act_connmark: initialize struct tc_ife to fix kernel leak
In tcf_connmark_dump(), the variable 'opt' was partially initialized using a
designatied initializer. While the padding bytes are reamined
uninitialized. nla_put() copies the entire structure into a
netlink message, these uninitialized bytes leaked to userspace.

Initialize the structure with memset before assigning its fields
to ensure all members and padding are cleared prior to beign copied.

Reported-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=0c85cae3350b7d486aee
Tested-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com
Fixes: 22a5dc0e5e ("net: sched: Introduce connmark action")
Signed-off-by: Ranganath V N <vnranganath.20@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251109091336.9277-2-vnranganath.20@gmail.com
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-11 15:00:08 +01:00
Niravkumar L Rabara
281326be67 EDAC/altera: Use INTTEST register for Ethernet and USB SBE injection
The current single-bit error injection mechanism flips bits directly in ECC RAM
by performing write and read operations. When the ECC RAM is actively used by
the Ethernet or USB controller, this approach sometimes trigger a false
double-bit error.

Switch both Ethernet and USB EDAC devices to use the INTTEST register
(altr_edac_a10_device_inject_fops) for single-bit error injection, similar to
the existing double-bit error injection method.

Fixes: 064acbd4f4 ("EDAC, altera: Add Stratix10 peripheral support")
Signed-off-by: Niravkumar L Rabara <niravkumarlaxmidas.rabara@altera.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Dinh Nguyen <dinguyen@kernel.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20251111081333.1279635-1-niravkumarlaxmidas.rabara@altera.com
2025-11-11 14:59:04 +01:00
Luiz Augusto von Dentz
485e0626e5 Bluetooth: hci_event: Fix not handling PA Sync Lost event
This handles PA Sync Lost event which previously was assumed to be
handled with BIG Sync Lost but their lifetime are not the same thus why
there are 2 different events to inform when each sync is lost.

Fixes: b2a5f2e1c1 ("Bluetooth: hci_event: Add support for handling LE BIG Sync Lost event")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2025-11-11 08:55:18 -05:00
Niravkumar L Rabara
fd3ecda38f EDAC/altera: Handle OCRAM ECC enable after warm reset
The OCRAM ECC is always enabled either by the BootROM or by the Secure Device
Manager (SDM) during a power-on reset on SoCFPGA.

However, during a warm reset, the OCRAM content is retained to preserve data,
while the control and status registers are reset to their default values. As
a result, ECC must be explicitly re-enabled after a warm reset.

Fixes: 17e47dc6db ("EDAC/altera: Add Stratix10 OCRAM ECC support")
Signed-off-by: Niravkumar L Rabara <niravkumarlaxmidas.rabara@altera.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Dinh Nguyen <dinguyen@kernel.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20251111080801.1279401-1-niravkumarlaxmidas.rabara@altera.com
2025-11-11 13:48:13 +01:00
Kuniyuki Iwashima
60e6489f8e af_unix: Initialise scc_index in unix_add_edge().
Quang Le reported that the AF_UNIX GC could garbage-collect a
receive queue of an alive in-flight socket, with a nice repro.

The repro consists of three stages.

  1)
    1-a. Create a single cyclic reference with many sockets
    1-b. close() all sockets
    1-c. Trigger GC

  2)
    2-a. Pass sk-A to an embryo sk-B
    2-b. Pass sk-X to sk-X
    2-c. Trigger GC

  3)
    3-a. accept() the embryo sk-B
    3-b. Pass sk-B to sk-C
    3-c. close() the in-flight sk-A
    3-d. Trigger GC

As of 2-c, sk-A and sk-X are linked to unix_unvisited_vertices,
and unix_walk_scc() groups them into two different SCCs:

  unix_sk(sk-A)->vertex->scc_index = 2 (UNIX_VERTEX_INDEX_START)
  unix_sk(sk-X)->vertex->scc_index = 3

Once GC completes, unix_graph_grouped is set to true.
Also, unix_graph_maybe_cyclic is set to true due to sk-X's
cyclic self-reference, which makes close() trigger GC.

At 3-b, unix_add_edge() allocates unix_sk(sk-B)->vertex and
links it to unix_unvisited_vertices.

unix_update_graph() is called at 3-a. and 3-b., but neither
unix_graph_grouped nor unix_graph_maybe_cyclic is changed
because both sk-B's listener and sk-C are not in-flight.

3-c decrements sk-A's file refcnt to 1.

Since unix_graph_grouped is true at 3-d, unix_walk_scc_fast()
is finally called and iterates 3 sockets sk-A, sk-B, and sk-X:

  sk-A -> sk-B (-> sk-C)
  sk-X -> sk-X

This is totally fine.  All of them are not yet close()d and
should be grouped into different SCCs.

However, unix_vertex_dead() misjudges that sk-A and sk-B are
in the same SCC and sk-A is dead.

  unix_sk(sk-A)->scc_index == unix_sk(sk-B)->scc_index <-- Wrong!
  &&
  sk-A's file refcnt == unix_sk(sk-A)->vertex->out_degree
                                       ^-- 1 in-flight count for sk-B
  -> sk-A is dead !?

The problem is that unix_add_edge() does not initialise scc_index.

Stage 1) is used for heap spraying, making a newly allocated
vertex have vertex->scc_index == 2 (UNIX_VERTEX_INDEX_START)
set by unix_walk_scc() at 1-c.

Let's track the max SCC index from the previous unix_walk_scc()
call and assign the max + 1 to a new vertex's scc_index.

This way, we can continue to avoid Tarjan's algorithm while
preventing misjudgments.

Fixes: ad081928a8 ("af_unix: Avoid Tarjan's algorithm if unnecessary.")
Reported-by: Quang Le <quanglex97@gmail.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251109025233.3659187-1-kuniyu@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2025-11-11 13:42:33 +01:00
Marc Zyngier
0f559cd91e KVM: arm64: Finalize ID registers only once per VM
Owing to the ID registers being global to the VM, there is no point
in computing them more than once.  However, recent changes making
use of kvm_set_vm_id_reg() outlined that we repeatedly hammer
the ID registers when we shouldn't.

Gate the ID reg update on the VM having never run.

Fixes: 50e7cce81b ("KVM: arm64: Limit clearing of ID_{AA64PFR0,PFR1}_EL1.GIC to userspace irqchip")
Fixes: 5cb57a1aff ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest")
Closes: https://lore.kernel.org/r/aRHf6x5umkTYhYJ3@finisterre.sirena.org.uk
Reported-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20251110173010.1918424-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-11-11 12:24:22 +00:00
Aleksander Jan Bajkowski
09782e72ee mips: dts: econet: fix EN751221 core type
In fact, it is a multi-threaded MIPS34Kc, not a single-threaded MIPS24Kc.

Fixes: 0ec4887009 ("mips: dts: Add EcoNet DTS with EN751221 and SmartFiber XP8421-B board")
Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
2025-11-11 12:59:30 +01:00
Maciej W. Rozycki
ebd729fef3 MIPS: Malta: Fix !EVA SOC-it PCI MMIO
Fix a regression that has caused accesses to the PCI MMIO window to
complete unclaimed in non-EVA configurations with the SOC-it family of
system controllers, preventing PCI devices from working that use MMIO.

In the non-EVA case PHYS_OFFSET is set to 0, meaning that PCI_BAR0 is
set with an empty mask (and PCI_HEAD4 matches addresses starting from 0
accordingly).  Consequently all addresses are matched for incoming DMA
accesses from PCI.  This seems to confuse the system controller's logic
and outgoing bus cycles targeting the PCI MMIO window seem not to make
it to the intended devices.

This happens as well when a wider mask is used with PCI_BAR0, such as
0x80000000 or 0xe0000000, that makes addresses match that overlap with
the PCI MMIO window, which starts at 0x10000000 in our configuration.

Set the mask in PCI_BAR0 to 0xf0000000 for non-EVA then, covering the
non-EVA maximum 256 MiB of RAM, which is what YAMON does and which used
to work correctly up to the offending commit.  Set PCI_P2SCMSKL to match
PCI_BAR0 as required by the system controller's specification, and match
PCI_P2SCMAPL to PCI_HEAD4 for identity mapping.

Verified with:

Core board type/revision =      0x0d (Core74K) / 0x01
System controller/revision =    MIPS SOC-it 101 OCP / 1.3   SDR-FW-4:1
Processor Company ID/options =  0x01 (MIPS Technologies, Inc.) / 0x1c
Processor ID/revision =         0x97 (MIPS 74Kf) / 0x4c

for non-EVA and with:

Core board type/revision =      0x0c (CoreFPGA-5) / 0x00
System controller/revision =    MIPS ROC-it2 / 0.0   FW-1:1 (CLK_unknown) GIC
Processor Company ID/options =  0x01 (MIPS Technologies, Inc.) / 0x00
Processor ID/revision =         0xa0 (MIPS interAptiv UP) / 0x20

for EVA/non-EVA, fixing:

defxx 0000:00:12.0: assign IRQ: got 10
defxx: v1.12 2021/03/10  Lawrence V. Stefani and others
0000:00:12.0: Could not read adapter factory MAC address!

vs:

defxx 0000:00:12.0: assign IRQ: got 10
defxx: v1.12 2021/03/10  Lawrence V. Stefani and others
0000:00:12.0: DEFPA at MMIO addr = 0x10142000, IRQ = 10, Hardware addr = 00-00-f8-xx-xx-xx
0000:00:12.0: registered as fddi0

for non-EVA and causing no change for EVA.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Fixes: 422dd25664 ("MIPS: Malta: Allow PCI devices DMA to lower 2GB physical")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
2025-11-11 12:58:49 +01:00
Pawel Dembicki
c4e1ac09ee wifi: mwl8k: inject DSSS Parameter Set element into beacons if missing
Some Marvell AP firmware used with mwl8k misbehaves when beacon frames
do not contain a WLAN_EID_DS_PARAMS element with the current channel.
It was reported on OpenWrt Github issues [0].

When hostapd/mac80211 omits DSSS Parameter Set from the beacon (which is
valid on some bands), the firmware stops transmitting sane frames and RX
status starts reporting bogus channel information. This makes AP mode
unusable.

Newer Marvell drivers (mwlwifi [1]) hard-code DSSS Parameter Set into
AP beacons for all chips, which suggests this is a firmware requirement
rather than a mwl8k-specific quirk.

Mirror that behaviour in mwl8k: when setting the beacon, check if
WLAN_EID_DS_PARAMS is present, and if not, extend the beacon and inject
a DSSS Parameter Set element, using the current channel from
hw->conf.chandef.chan.

Tested on Linksys EA4500 (88W8366).

[0] https://github.com/openwrt/openwrt/issues/19088
[1] db97edf20f/hif/fwcmd.c (L675)

Fixes: b64fe619e3 ("mwl8k: basic AP interface support")
Tested-by: Antony Kolitsos <zeusomighty@hotmail.com>
Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
Link: https://patch.msgid.link/20251111100733.2825970-3-paweldembicki@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2025-11-11 11:38:57 +01:00
Ilan Peer
eaa7ce66c3 wifi: mac80211_hwsim: Fix possible NULL dereference
The 'vif' pointer in the Tx information might be NULL, e.g., in
case of injected frames etc. and is not checked in all paths. Fix it.
While at it, also directly use the local 'vif' pointer.

Fixes: a37a6f5443 ("wifi: mac80211_hwsim: Add simulation support for NAN device")
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/linux-wireless/aNJUlyIiSTW9zZdr@stanley.mountain
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20251110140128.ec00ae795a32.I9c65659b52434189d8b2ba06710d482669a3887a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2025-11-11 09:25:18 +01:00
Benjamin Berg
7fe0d21f56 wifi: mac80211: skip rate verification for not captured PSDUs
If for example the sniffer did not follow any AIDs in an MU frame, then
some of the information may not be filled in or is even expected to be
invalid. As an example, in that case it is expected that Nss is zero.

Fixes: 2ff5e52e78 ("radiotap: add 0-length PSDU "not captured" type")
Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://patch.msgid.link/20251110142554.83a2858ee15b.I9f78ce7984872f474722f9278691ae16378f0a3e@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2025-11-11 09:25:17 +01:00
Johannes Berg
2027e3bcbf Merge tag 'ath-current-20251110' of git://git.kernel.org/pub/scm/linux/kernel/git/ath/ath
Jeff Johnson says:
==================
ath.git update for v6.18-rc6

Fix an ath11k transmit status reporting issue. This issue has always
been present, but not reported until recently.

Bringing this through the current release since there is now a
userspace entity that wants to leverage this.
==================

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2025-11-11 09:25:11 +01:00
Jakub Kicinski
8c0726e861 Merge branch 'net-netpoll-fix-memory-leak-and-add-comprehensive-selftests'
Breno Leitao says:

====================
net: netpoll: fix memory leak and add comprehensive selftests

Fix a memory leak in netpoll and introduce netconsole selftests that
expose the issue when running with kmemleak detection enabled.

This patchset includes a selftest for netpoll with multiple concurrent
users (netconsole + bonding), which simulates the scenario from test[1]
that originally demonstrated the issue allegedly fixed by commit
efa95b01da ("netpoll: fix use after free") - a commit that is now
being reverted.

Sending this to "net" branch because this is a fix, and the selftest
might help with the backports validation.

Link: https://lore.kernel.org/lkml/96b940137a50e5c387687bb4f57de8b0435a653f.1404857349.git.decot@googlers.com/ [1]
====================

Link: https://patch.msgid.link/20251107-netconsole_torture-v10-0-749227b55f63@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 18:34:47 -08:00
Breno Leitao
236682db3b selftest: netcons: add test for netconsole over bonded interfaces
This patch adds a selftest that verifies netconsole functionality
over bonded network interfaces using netdevsim. It sets up two bonded
interfaces acting as transmit (TX) and receive (RX) ends, placed in
separate network namespaces. The test sends kernel log messages and
verifies that they are properly received on the bonded RX interfaces
with both IPv4 and IPv6, and using basic and extended netconsole
formats.

This patchset aims to test a long-standing netpoll subsystem where
netpoll has multiple users. (in this case netconsole and bonding). A
similar selftest has been discussed in [1] and [2].

This test also tries to enable bonding and netpoll in different order,
just to guarantee that all the possibilities are exercised.

Link: https://lore.kernel.org/all/20250905-netconsole_torture-v3-0-875c7febd316@debian.org/ [1]
Link: https://lore.kernel.org/lkml/96b940137a50e5c387687bb4f57de8b0435a653f.1404857349.git.decot@googlers.com/ [2]
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20251107-netconsole_torture-v10-4-749227b55f63@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 18:34:44 -08:00
Breno Leitao
6701896eb9 selftest: netcons: create a torture test
Create a netconsole test that puts a lot of pressure on the netconsole
list manipulation. Do it by creating dynamic targets and deleting
targets while messages are being sent. Also put interface down while the
messages are being sent, as creating parallel targets.

The code launches three background jobs on distinct schedules:

 * Toggle netcons target every 30 iterations
 * create and delete random_target every 50 iterations
 * toggle iface every 70 iterations

This creates multiple concurrency sources that interact with netconsole
states. This is good practice to simulate stress, and exercise netpoll
and netconsole locks.

This test already found an issue as reported in [1]

Link: https://lore.kernel.org/all/20250901-netpoll_memleak-v1-1-34a181977dfc@debian.org/ [1]
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Andre Carvalho <asantostc@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20251107-netconsole_torture-v10-3-749227b55f63@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 18:34:44 -08:00
Breno Leitao
39acc6a95e selftest: netcons: refactor target creation
Extract the netconsole target creation from create_dynamic_target(), by
moving it from create_dynamic_target() into a new helper function. This
enables other tests to use the creation of netconsole targets with
arbitrary parameters and no sleep.

The new helper will be utilized by forthcoming torture-type selftests
that require dynamic target management.

Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20251107-netconsole_torture-v10-2-749227b55f63@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 18:34:44 -08:00
Breno Leitao
49c8d2c1f9 net: netpoll: fix incorrect refcount handling causing incorrect cleanup
commit efa95b01da ("netpoll: fix use after free") incorrectly
ignored the refcount and prematurely set dev->npinfo to NULL during
netpoll cleanup, leading to improper behavior and memory leaks.

Scenario causing lack of proper cleanup:

1) A netpoll is associated with a NIC (e.g., eth0) and netdev->npinfo is
   allocated, and refcnt = 1
   - Keep in mind that npinfo is shared among all netpoll instances. In
     this case, there is just one.

2) Another netpoll is also associated with the same NIC and
   npinfo->refcnt += 1.
   - Now dev->npinfo->refcnt = 2;
   - There is just one npinfo associated to the netdev.

3) When the first netpolls goes to clean up:
   - The first cleanup succeeds and clears np->dev->npinfo, ignoring
     refcnt.
     - It basically calls `RCU_INIT_POINTER(np->dev->npinfo, NULL);`
   - Set dev->npinfo = NULL, without proper cleanup
   - No ->ndo_netpoll_cleanup() is either called

4) Now the second target tries to clean up
   - The second cleanup fails because np->dev->npinfo is already NULL.
     * In this case, ops->ndo_netpoll_cleanup() was never called, and
       the skb pool is not cleaned as well (for the second netpoll
       instance)
  - This leaks npinfo and skbpool skbs, which is clearly reported by
    kmemleak.

Revert commit efa95b01da ("netpoll: fix use after free") and adds
clarifying comments emphasizing that npinfo cleanup should only happen
once the refcount reaches zero, ensuring stable and correct netpoll
behavior.

Cc: <stable@vger.kernel.org> # 3.17.x
Cc: Jay Vosburgh <jv@jvosburgh.net>
Fixes: efa95b01da ("netpoll: fix use after free")
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20251107-netconsole_torture-v10-1-749227b55f63@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 18:34:44 -08:00
Buday Csaba
e6ca8f533e net: mdio: fix resource leak in mdiobus_register_device()
Fix a possible leak in mdiobus_register_device() when both a
reset-gpio and a reset-controller are present.
Clean up the already claimed reset-gpio, when the registration of
the reset-controller fails, so when an error code is returned, the
device retains its state before the registration attempt.

Link: https://lore.kernel.org/all/20251106144603.39053c81@kernel.org/
Fixes: 71dd6c0dff ("net: phy: add support for reset-controller")
Signed-off-by: Buday Csaba <buday.csaba@prolan.hu>
Link: https://patch.msgid.link/4b419377f8dd7d2f63f919d0f74a336c734f8fff.1762584481.git.buday.csaba@prolan.hu
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 18:15:13 -08:00
Kuniyuki Iwashima
0725e6afb5 tipc: Fix use-after-free in tipc_mon_reinit_self().
syzbot reported use-after-free of tipc_net(net)->monitors[]
in tipc_mon_reinit_self(). [0]

The array is protected by RTNL, but tipc_mon_reinit_self()
iterates over it without RTNL.

tipc_mon_reinit_self() is called from tipc_net_finalize(),
which is always under RTNL except for tipc_net_finalize_work().

Let's hold RTNL in tipc_net_finalize_work().

[0]:
BUG: KASAN: slab-use-after-free in __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
BUG: KASAN: slab-use-after-free in _raw_spin_lock_irqsave+0xa7/0xf0 kernel/locking/spinlock.c:162
Read of size 1 at addr ffff88805eae1030 by task kworker/0:7/5989

CPU: 0 UID: 0 PID: 5989 Comm: kworker/0:7 Not tainted syzkaller #0 PREEMPT_{RT,(full)}
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025
Workqueue: events tipc_net_finalize_work
Call Trace:
 <TASK>
 dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
 print_address_description mm/kasan/report.c:378 [inline]
 print_report+0xca/0x240 mm/kasan/report.c:482
 kasan_report+0x118/0x150 mm/kasan/report.c:595
 __kasan_check_byte+0x2a/0x40 mm/kasan/common.c:568
 kasan_check_byte include/linux/kasan.h:399 [inline]
 lock_acquire+0x8d/0x360 kernel/locking/lockdep.c:5842
 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
 _raw_spin_lock_irqsave+0xa7/0xf0 kernel/locking/spinlock.c:162
 rtlock_slowlock kernel/locking/rtmutex.c:1894 [inline]
 rwbase_rtmutex_lock_state kernel/locking/spinlock_rt.c:160 [inline]
 rwbase_write_lock+0xd3/0x7e0 kernel/locking/rwbase_rt.c:244
 rt_write_lock+0x76/0x110 kernel/locking/spinlock_rt.c:243
 write_lock_bh include/linux/rwlock_rt.h:99 [inline]
 tipc_mon_reinit_self+0x79/0x430 net/tipc/monitor.c:718
 tipc_net_finalize+0x115/0x190 net/tipc/net.c:140
 process_one_work kernel/workqueue.c:3236 [inline]
 process_scheduled_works+0xade/0x17b0 kernel/workqueue.c:3319
 worker_thread+0x8a0/0xda0 kernel/workqueue.c:3400
 kthread+0x70e/0x8a0 kernel/kthread.c:463
 ret_from_fork+0x439/0x7d0 arch/x86/kernel/process.c:148
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245
 </TASK>

Allocated by task 6089:
 kasan_save_stack mm/kasan/common.c:47 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:68
 poison_kmalloc_redzone mm/kasan/common.c:388 [inline]
 __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:405
 kasan_kmalloc include/linux/kasan.h:260 [inline]
 __kmalloc_cache_noprof+0x1a8/0x320 mm/slub.c:4407
 kmalloc_noprof include/linux/slab.h:905 [inline]
 kzalloc_noprof include/linux/slab.h:1039 [inline]
 tipc_mon_create+0xc3/0x4d0 net/tipc/monitor.c:657
 tipc_enable_bearer net/tipc/bearer.c:357 [inline]
 __tipc_nl_bearer_enable+0xe16/0x13f0 net/tipc/bearer.c:1047
 __tipc_nl_compat_doit net/tipc/netlink_compat.c:371 [inline]
 tipc_nl_compat_doit+0x3bc/0x5f0 net/tipc/netlink_compat.c:393
 tipc_nl_compat_handle net/tipc/netlink_compat.c:-1 [inline]
 tipc_nl_compat_recv+0x83c/0xbe0 net/tipc/netlink_compat.c:1321
 genl_family_rcv_msg_doit+0x215/0x300 net/netlink/genetlink.c:1115
 genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline]
 genl_rcv_msg+0x60e/0x790 net/netlink/genetlink.c:1210
 netlink_rcv_skb+0x208/0x470 net/netlink/af_netlink.c:2552
 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219
 netlink_unicast_kernel net/netlink/af_netlink.c:1320 [inline]
 netlink_unicast+0x846/0xa10 net/netlink/af_netlink.c:1346
 netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1896
 sock_sendmsg_nosec net/socket.c:714 [inline]
 __sock_sendmsg+0x21c/0x270 net/socket.c:729
 ____sys_sendmsg+0x508/0x820 net/socket.c:2614
 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2668
 __sys_sendmsg net/socket.c:2700 [inline]
 __do_sys_sendmsg net/socket.c:2705 [inline]
 __se_sys_sendmsg net/socket.c:2703 [inline]
 __x64_sys_sendmsg+0x1a1/0x260 net/socket.c:2703
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Freed by task 6088:
 kasan_save_stack mm/kasan/common.c:47 [inline]
 kasan_save_track+0x3e/0x80 mm/kasan/common.c:68
 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:576
 poison_slab_object mm/kasan/common.c:243 [inline]
 __kasan_slab_free+0x5b/0x80 mm/kasan/common.c:275
 kasan_slab_free include/linux/kasan.h:233 [inline]
 slab_free_hook mm/slub.c:2422 [inline]
 slab_free mm/slub.c:4695 [inline]
 kfree+0x195/0x550 mm/slub.c:4894
 tipc_l2_device_event+0x380/0x650 net/tipc/bearer.c:-1
 notifier_call_chain+0x1b3/0x3e0 kernel/notifier.c:85
 call_netdevice_notifiers_extack net/core/dev.c:2267 [inline]
 call_netdevice_notifiers net/core/dev.c:2281 [inline]
 unregister_netdevice_many_notify+0x14d7/0x1fe0 net/core/dev.c:12166
 unregister_netdevice_many net/core/dev.c:12229 [inline]
 unregister_netdevice_queue+0x33c/0x380 net/core/dev.c:12073
 unregister_netdevice include/linux/netdevice.h:3385 [inline]
 __tun_detach+0xe4d/0x1620 drivers/net/tun.c:621
 tun_detach drivers/net/tun.c:637 [inline]
 tun_chr_close+0x10d/0x1c0 drivers/net/tun.c:3433
 __fput+0x458/0xa80 fs/file_table.c:468
 task_work_run+0x1d4/0x260 kernel/task_work.c:227
 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline]
 exit_to_user_mode_loop+0xec/0x110 kernel/entry/common.c:43
 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline]
 syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline]
 syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline]
 do_syscall_64+0x2bd/0x3b0 arch/x86/entry/syscall_64.c:100
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

Fixes: 46cb01eeeb ("tipc: update mon's self addr when node addr generated")
Reported-by: syzbot+d7dad7fd4b3921104957@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/690c323a.050a0220.baf87.007f.GAE@google.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20251107064038.2361188-1-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 18:14:40 -08:00
Jakub Kicinski
142b02cc48 Merge branch 'fix-iet-verification-implementation-for-cpsw-driver'
Aksh Garg says:

====================
Fix IET verification implementation for CPSW driver

The CPSW module supports Intersperse Express Traffic (IET) and allows
the MAC layer to verify whether the peer supports IET through its MAC
merge sublayer, by sending a verification packet and waiting for its
response until the timeout. As defined in IEEE 802.3 Clause 99, the
verification process involves up to 3 verification attempts to
establish support.

This patch series fixes issues in the implementation of this IET
verification process.
====================

Link: https://patch.msgid.link/20251106092305.1437347-1-a-garg7@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 18:00:44 -08:00
Aksh Garg
d4b00d132d net: ethernet: ti: am65-cpsw-qos: fix IET verify retry mechanism
The am65_cpsw_iet_verify_wait() function attempts verification 20 times,
toggling the AM65_CPSW_PN_IET_MAC_LINKFAIL bit in each iteration. When
the LINKFAIL bit transitions from 1 to 0, the MAC merge layer initiates
the verification process and waits for the timeout configured in
MAC_VERIFY_CNT before automatically retransmitting. The MAC_VERIFY_CNT
register is configured according to the user-defined verify/response
timeout in am65_cpsw_iet_set_verify_timeout_count(). As per IEEE 802.3
Clause 99, the hardware performs this automatic retry up to 3 times.

Current implementation toggles LINKFAIL after the user-configured
verify/response timeout in each iteration, forcing the hardware to
restart verification instead of respecting the MAC_VERIFY_CNT timeout.
This bypasses the hardware's automatic retry mechanism.

Fix this by moving the LINKFAIL bit toggle outside the retry loop and
reducing the retry count from 20 to 3. The software now only monitors
the status register while the hardware autonomously handles the 3
verification attempts at proper MAC_VERIFY_CNT intervals.

Fixes: 49a2eb9068 ("net: ethernet: ti: am65-cpsw-qos: Add Frame Preemption MAC Merge support")
Signed-off-by: Aksh Garg <a-garg7@ti.com>
Link: https://patch.msgid.link/20251106092305.1437347-3-a-garg7@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 18:00:40 -08:00
Aksh Garg
49b3916465 net: ethernet: ti: am65-cpsw-qos: fix IET verify/response timeout
The CPSW module uses the MAC_VERIFY_CNT bit field in the
CPSW_PN_IET_VERIFY_REG_k register to set the verify/response timeout
count. This register specifies the number of clock cycles to wait before
resending a verify packet if the verification fails.

The verify/response timeout count, as being set by the function
am65_cpsw_iet_set_verify_timeout_count() is hardcoded for 125MHz
clock frequency, which varies based on PHY mode and link speed.

The respective clock frequencies are as follows:
- RGMII mode:
  * 1000 Mbps: 125 MHz
  * 100 Mbps: 25 MHz
  * 10 Mbps: 2.5 MHz
- QSGMII/SGMII mode: 125 MHz (all speeds)

Fix this by adding logic to calculate the correct timeout counts
based on the actual PHY interface mode and link speed.

Fixes: 49a2eb9068 ("net: ethernet: ti: am65-cpsw-qos: Add Frame Preemption MAC Merge support")
Signed-off-by: Aksh Garg <a-garg7@ti.com>
Link: https://patch.msgid.link/20251106092305.1437347-2-a-garg7@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 18:00:40 -08:00
Zilin Guan
3072f00bba net/handshake: Fix memory leak in tls_handshake_accept()
In tls_handshake_accept(), a netlink message is allocated using
genlmsg_new(). In the error handling path, genlmsg_cancel() is called
to cancel the message construction, but the message itself is not freed.
This leads to a memory leak.

Fix this by calling nlmsg_free() in the error path after genlmsg_cancel()
to release the allocated memory.

Fixes: 2fd5532044 ("net/handshake: Add a kernel API for requesting a TLSv1.3 handshake")
Signed-off-by: Zilin Guan <zilin@seu.edu.cn>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Link: https://patch.msgid.link/20251106144511.3859535-1-zilin@seu.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 17:53:47 -08:00
D. Wythe
ec33f2e5a2 net/smc: fix mismatch between CLC header and proposal
The current CLC proposal message construction uses a mix of
`ini->smc_type_v1/v2` and `pclc_base->hdr.typev1/v2` to decide whether
to include optional extensions (IPv6 prefix extension for v1, and v2
extension). This leads to a critical inconsistency: when
`smc_clc_prfx_set()` fails - for example, in IPv6-only environments with
only link-local addresses, or when the local IP address and the outgoing
interface’s network address are not in the same subnet.

As a result, the proposal message is assembled using the stale
`ini->smc_type_v1` value—causing the IPv6 prefix extension to be
included even though the header indicates v1 is not supported.
The peer then receives a malformed CLC proposal where the header type
does not match the payload, and immediately resets the connection.

The fix ensures consistency between the CLC header flags and the actual
payload by synchronizing `ini->smc_type_v1` with `pclc_base->hdr.typev1`
when prefix setup fails.

Fixes: 8c3dca341a ("net/smc: build and send V2 CLC proposal")
Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
Reviewed-by: Alexandra Winter <wintera@linux.ibm.com>
Link: https://patch.msgid.link/20251107024029.88753-1-alibuda@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 17:52:09 -08:00
Nicolas Dichtel
2554559aba bonding: fix mii_status when slave is down
netif_carrier_ok() doesn't check if the slave is up. Before the below
commit, netif_running() was also checked.

Fixes: 23a6037ce7 ("bonding: Remove support for use_carrier")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Jay Vosburgh <jv@jvosburgh.net>
Link: https://patch.msgid.link/20251106180252.3974772-1-nicolas.dichtel@6wind.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 17:51:30 -08:00
Zahari Doychev
41d0c31be2 tools: ynl: call nested attribute free function for indexed arrays
When freeing indexed arrays, the corresponding free function should
be called for each entry of the indexed array. For example, for
for 'struct tc_act_attrs' 'tc_act_attrs_free(...)' needs to be called
for each entry.

Previously, memory leaks were reported when enabling the ASAN
analyzer.

=================================================================
==874==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 24 byte(s) in 1 object(s) allocated from:
    #0 0x7f221fd20cb5 in malloc ./debug/gcc/gcc/libsanitizer/asan/asan_malloc_linux.cpp:67
    #1 0x55c98db048af in tc_act_attrs_set_options_vlan_parms ../generated/tc-user.h:2813
    #2 0x55c98db048af in main  ./linux/tools/net/ynl/samples/tc-filter-add.c:71

Direct leak of 24 byte(s) in 1 object(s) allocated from:
    #0 0x7f221fd20cb5 in malloc ./debug/gcc/gcc/libsanitizer/asan/asan_malloc_linux.cpp:67
    #1 0x55c98db04a93 in tc_act_attrs_set_options_vlan_parms ../generated/tc-user.h:2813
    #2 0x55c98db04a93 in main ./linux/tools/net/ynl/samples/tc-filter-add.c:74

Direct leak of 10 byte(s) in 2 object(s) allocated from:
    #0 0x7f221fd20cb5 in malloc ./debug/gcc/gcc/libsanitizer/asan/asan_malloc_linux.cpp:67
    #1 0x55c98db0527d in tc_act_attrs_set_kind ../generated/tc-user.h:1622

SUMMARY: AddressSanitizer: 58 byte(s) leaked in 4 allocation(s).

The following diff illustrates the changes introduced compared to the
previous version of the code.

 void tc_flower_attrs_free(struct tc_flower_attrs *obj)
 {
+	unsigned int i;
+
 	free(obj->indev);
+	for (i = 0; i < obj->_count.act; i++)
+		tc_act_attrs_free(&obj->act[i]);
 	free(obj->act);
 	free(obj->key_eth_dst);
 	free(obj->key_eth_dst_mask);

Signed-off-by: Zahari Doychev <zahari.doychev@linux.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://patch.msgid.link/20251106151529.453026-3-zahari.doychev@linux.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 17:18:05 -08:00
Jonas Gorski
762e7e174d net: dsa: tag_brcm: do not mark link local traffic as offloaded
Broadcom switches locally terminate link local traffic and do not
forward it, so we should not mark it as offloaded.

In some situations we still want/need to flood this traffic, e.g. if STP
is disabled, or it is explicitly enabled via the group_fwd_mask. But if
the skb is marked as offloaded, the kernel will assume this was already
done in hardware, and the packets never reach other bridge ports.

So ensure that link local traffic is never marked as offloaded, so that
the kernel can forward/flood these packets in software if needed.

Since the local termination in not configurable, check the destination
MAC, and never mark packets as offloaded if it is a link local ether
address.

While modern switches set the tag reason code to BRCM_EG_RC_PROT_TERM
for trapped link local traffic, they also set it for link local traffic
that is flooded (01:80:c2:00:00:10 to 01:80:c2:00:00:2f), so we cannot
use it and need to look at the destination address for them as well.

Fixes: 964dbf186e ("net: dsa: tag_brcm: add support for legacy tags")
Fixes: 0e62f543be ("net: dsa: Fix duplicate frames flooded by learning")
Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Link: https://patch.msgid.link/20251109134635.243951-1-jonas.gorski@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 17:04:19 -08:00
Steven Rostedt
dd4adb986a selftests/tracing: Run sample events to clear page cache events
The tracing selftest "event-filter-function.tc" was failing because it
first runs the "sample_events" function that triggers the kmem_cache_free
event and it looks at what function was used during a call to "ls".

But the first time it calls this, it could trigger events that are used to
pull pages into the page cache.

The rest of the test uses the function it finds during that call to see if
it will be called in subsequent "sample_events" calls. But if there's no
need to pull pages into the page cache, it will not trigger that function
and the test will fail.

Call the "sample_events" twice to trigger all the page cache work before
it calls it to find a function to use in subsequent checks.

Cc: stable@vger.kernel.org
Fixes: eb50d0f250 ("selftests/ftrace: Choose target function for filter test from samples")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2025-11-10 18:00:07 -07:00
Victor Nogueira
60260ad935 selftests/tc-testing: Create tests trying to add children to clsact/ingress qdiscs
In response to Wang's bug report [1], add the following test cases:

- Try and fail to add an fq child to an ingress qdisc
- Try and fail to add an fq child to a clsact qdisc

[1] https://lore.kernel.org/netdev/20251105022213.1981982-1-wangliang74@huawei.com/

Reviewed-by: Pedro Tammela <pctammela@mojatatu.ai>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Reviewed-by: Cong Wang <cwang@multikernel.io>
Link: https://patch.msgid.link/20251106205621.3307639-2-victor@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 16:57:56 -08:00
Victor Nogueira
e781122d76 net/sched: Abort __tc_modify_qdisc if parent is a clsact/ingress qdisc
Wang reported an illegal configuration [1] where the user attempts to add a
child qdisc to the ingress qdisc as follows:

tc qdisc add dev eth0 handle ffff:0 ingress
tc qdisc add dev eth0 handle ffe0:0 parent ffff:a fq

To solve this, we reject any configuration attempt to add a child qdisc to
ingress or clsact.

[1] https://lore.kernel.org/netdev/20251105022213.1981982-1-wangliang74@huawei.com/

Fixes: 5e50da01d0 ("[NET_SCHED]: Fix endless loops (part 2): "simple" qdiscs")
Reported-by: Wang Liang <wangliang74@huawei.com>
Closes: https://lore.kernel.org/netdev/20251105022213.1981982-1-wangliang74@huawei.com/
Reviewed-by: Pedro Tammela <pctammela@mojatatu.ai>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Victor Nogueira <victor@mojatatu.com>
Reviewed-by: Cong Wang <cwang@multikernel.io>
Link: https://patch.msgid.link/20251106205621.3307639-1-victor@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 16:57:56 -08:00
Eric Dumazet
1534ff7775 sctp: prevent possible shift-out-of-bounds in sctp_transport_update_rto
syzbot reported a possible shift-out-of-bounds [1]

Blamed commit added rto_alpha_max and rto_beta_max set to 1000.

It is unclear if some sctp users are setting very large rto_alpha
and/or rto_beta.

In order to prevent user regression, perform the test at run time.

Also add READ_ONCE() annotations as sysctl values can change under us.

[1]

UBSAN: shift-out-of-bounds in net/sctp/transport.c:509:41
shift exponent 64 is too large for 32-bit type 'unsigned int'
CPU: 0 UID: 0 PID: 16704 Comm: syz.2.2320 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025
Call Trace:
 <TASK>
  __dump_stack lib/dump_stack.c:94 [inline]
  dump_stack_lvl+0x16c/0x1f0 lib/dump_stack.c:120
  ubsan_epilogue lib/ubsan.c:233 [inline]
  __ubsan_handle_shift_out_of_bounds+0x27f/0x420 lib/ubsan.c:494
  sctp_transport_update_rto.cold+0x1c/0x34b net/sctp/transport.c:509
  sctp_check_transmitted+0x11c4/0x1c30 net/sctp/outqueue.c:1502
  sctp_outq_sack+0x4ef/0x1b20 net/sctp/outqueue.c:1338
  sctp_cmd_process_sack net/sctp/sm_sideeffect.c:840 [inline]
  sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1372 [inline]

Fixes: b58537a1f5 ("net: sctp: fix permissions for rto_alpha and rto_beta knobs")
Reported-by: syzbot+f8c46c8b2b7f6e076e99@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/690c81ae.050a0220.3d0d33.014e.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Xin Long <lucien.xin@gmail.com>
Link: https://patch.msgid.link/20251106111054.3288127-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-10 16:21:05 -08:00
Linus Torvalds
4427259cc7 Merge tag 'riscv-for-linus-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux
Pull RISC-V fixes from Paul Walmsley:

 - fix broken clang build on versions earlier than 19 and binutils
   versions earlier than 2.38.

   (This exposed that we're not properly testing earlier toolchain
   versions in our linux-next builds and PR submissions. This was fixed
   for this PR, and is being addressed more generally for -next builds.)

 - remove some redundant Makefile code

 - avoid building Canaan Kendryte K210-specific code on targets that
   don't build for the K210

* tag 'riscv-for-linus-6.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux:
  riscv: Fix CONFIG_AS_HAS_INSN for new .insn usage
  riscv: Remove redundant judgment for the default build target
  riscv: Build loader.bin exclusively for Canaan K210
2025-11-10 15:35:45 -08:00
Pavel Begunkov
6a77267d97 io_uring/query: return number of available queries
It's useful to know which query opcodes are available. Extend the
structure and return that. It's a trivial change, and even though it can
be painlessly extended later, it'd still require adding a v2 of the
structure.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2025-11-10 14:59:35 -07:00
Jens Axboe
d3c9c213c0 io_uring/rw: ensure allocated iovec gets cleared for early failure
A previous commit reused the recyling infrastructure for early cleanup,
but this is not enough for the case where our internal caches have
overflowed. If this happens, then the allocated iovec can get leaked if
the request is also aborted early.

Reinstate the previous forced free of the iovec for that situation.

Cc: stable@vger.kernel.org
Reported-by: syzbot+3c93637d7648c24e1fd0@syzkaller.appspotmail.com
Tested-by: syzbot+3c93637d7648c24e1fd0@syzkaller.appspotmail.com
Fixes: 9ac273ae3d ("io_uring/rw: use io_rw_recycle() from cleanup path")
Link: https://lore.kernel.org/io-uring/69122a59.a70a0220.22f260.00fd.GAE@google.com/
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2025-11-10 14:59:08 -07:00
Dai Ngo
b623390045 NFS: Fix LTP test failures when timestamps are delegated
The utimes01 and utime06 tests fail when delegated timestamps are
enabled, specifically in subtests that modify the atime and mtime
fields using the 'nobody' user ID.

The problem can be reproduced as follow:

# echo "/media *(rw,no_root_squash,sync)" >> /etc/exports
# export -ra
# mount -o rw,nfsvers=4.2 127.0.0.1:/media /tmpdir
# cd /opt/ltp
# ./runltp -d /tmpdir -s utimes01
# ./runltp -d /tmpdir -s utime06

This issue occurs because nfs_setattr does not verify the inode's
UID against the caller's fsuid when delegated timestamps are
permitted for the inode.

This patch adds the UID check and if it does not match then the
request is sent to the server for permission checking.

Fixes: e12912d941 ("NFSv4: Add support for delegated atime and mtime attributes")
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 16:55:12 -05:00
Luiz Augusto von Dentz
41bf23338a Bluetooth: hci_conn: Fix not cleaning up PA_LINK connections
Contrary to what was stated on d36349ea73 ("Bluetooth: hci_conn:
Fix running bis_cleanup for hci_conn->type PA_LINK") the PA_LINK does
in fact needs to run bis_cleanup in order to terminate the PA Sync,
since that is bond to the listening socket which is the entity that
controls the lifetime of PA Sync, so if it is closed/released the PA
Sync shall be terminated, terminating the PA Sync shall not result in
the BIG Sync being terminated since once the later is established it
doesn't depend on the former anymore.

If the use user wants to reconnect/rebind a number of BIS(s) it shall
keep the socket open until it no longer needs the PA Sync, which means
it retains full control of the lifetime of both PA and BIG Syncs.

Fixes: d36349ea73 ("Bluetooth: hci_conn: Fix running bis_cleanup for hci_conn->type PA_LINK")
Fixes: a7bcffc673 ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2025-11-10 16:09:00 -05:00
Pauli Virtanen
15f32cabf4 Bluetooth: 6lowpan: add missing l2cap_chan_lock()
l2cap_chan_close() needs to be called in l2cap_chan_lock(), otherwise
l2cap_le_sig_cmd() etc. may run concurrently.

Add missing locks around l2cap_chan_close().

Fixes: 6b8d4a6a03 ("Bluetooth: 6LoWPAN: Use connected oriented channel instead of fixed one")
Signed-off-by: Pauli Virtanen <pav@iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2025-11-10 16:08:41 -05:00
Pauli Virtanen
98454bc812 Bluetooth: 6lowpan: Don't hold spin lock over sleeping functions
disconnect_all_peers() calls sleeping function (l2cap_chan_close) under
spinlock.  Holding the lock doesn't actually do any good -- we work on a
local copy of the list, and the lock doesn't protect against peer->chan
having already been freed.

Fix by taking refcounts of peer->chan instead.  Clean up the code and
old comments a bit.

Take devices_lock instead of RCU, because the kfree_rcu();
l2cap_chan_put(); construct in chan_close_cb() does not guarantee
peer->chan is necessarily valid in RCU.

Also take l2cap_chan_lock() which is required for l2cap_chan_close().

Log: (bluez 6lowpan-tester Client Connect - Disable)
------
BUG: sleeping function called from invalid context at kernel/locking/mutex.c:575
...
<TASK>
...
l2cap_send_disconn_req (net/bluetooth/l2cap_core.c:938 net/bluetooth/l2cap_core.c:1495)
...
? __pfx_l2cap_chan_close (net/bluetooth/l2cap_core.c:809)
do_enable_set (net/bluetooth/6lowpan.c:1048 net/bluetooth/6lowpan.c:1068)
------

Fixes: 9030582963 ("Bluetooth: 6lowpan: Converting rwlocks to use RCU")
Signed-off-by: Pauli Virtanen <pav@iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2025-11-10 16:08:21 -05:00
Pauli Virtanen
e060088db0 Bluetooth: L2CAP: export l2cap_chan_hold for modules
l2cap_chan_put() is exported, so export also l2cap_chan_hold() for
modules.

l2cap_chan_hold() has use case in net/bluetooth/6lowpan.c

Signed-off-by: Pauli Virtanen <pav@iki.fi>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2025-11-10 16:08:00 -05:00
Pauli Virtanen
b454505bf5 Bluetooth: 6lowpan: fix BDADDR_LE vs ADDR_LE_DEV address type confusion
Bluetooth 6lowpan.c confuses BDADDR_LE and ADDR_LE_DEV address types,
e.g. debugfs "connect" command takes the former, and "disconnect" and
"connect" to already connected device take the latter.  This is due to
using same value both for l2cap_chan_connect and hci_conn_hash_lookup_le
which take different dst_type values.

Fix address type passed to hci_conn_hash_lookup_le().

Retain the debugfs API difference between "connect" and "disconnect"
commands since it's been like this since 2015 and nobody apparently
complained.

Fixes: f5ad4ffceb ("Bluetooth: 6lowpan: Use hci_conn_hash_lookup_le() when possible")
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Pauli Virtanen <pav@iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2025-11-10 16:07:41 -05:00
Pauli Virtanen
3b78f50918 Bluetooth: 6lowpan: reset link-local header on ipv6 recv path
Bluetooth 6lowpan.c netdev has header_ops, so it must set link-local
header for RX skb, otherwise things crash, eg. with AF_PACKET SOCK_RAW

Add missing skb_reset_mac_header() for uncompressed ipv6 RX path.

For the compressed one, it is done in lowpan_header_decompress().

Log: (BlueZ 6lowpan-tester Client Recv Raw - Success)
------
kernel BUG at net/core/skbuff.c:212!
Call Trace:
<IRQ>
...
packet_rcv (net/packet/af_packet.c:2152)
...
<TASK>
__local_bh_enable_ip (kernel/softirq.c:407)
netif_rx (net/core/dev.c:5648)
chan_recv_cb (net/bluetooth/6lowpan.c:294 net/bluetooth/6lowpan.c:359)
------

Fixes: 18722c2470 ("Bluetooth: Enable 6LoWPAN support for BT LE devices")
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Pauli Virtanen <pav@iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2025-11-10 16:07:21 -05:00
Raphael Pinsonneault-Thibeault
23d22f2f71 Bluetooth: btusb: reorder cleanup in btusb_disconnect to avoid UAF
There is a KASAN: slab-use-after-free read in btusb_disconnect().
Calling "usb_driver_release_interface(&btusb_driver, data->intf)" will
free the btusb data associated with the interface. The same data is
then used later in the function, hence the UAF.

Fix by moving the accesses to btusb data to before the data is free'd.

Reported-by: syzbot+2fc81b50a4f8263a159b@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=2fc81b50a4f8263a159b
Tested-by: syzbot+2fc81b50a4f8263a159b@syzkaller.appspotmail.com
Fixes: fd913ef7ce ("Bluetooth: btusb: Add out-of-band wakeup support")
Signed-off-by: Raphael Pinsonneault-Thibeault <rpthibeault@gmail.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2025-11-10 16:07:01 -05:00
Pauli Virtanen
55fb52ffdd Bluetooth: MGMT: cancel mesh send timer when hdev removed
mesh_send_done timer is not canceled when hdev is removed, which causes
crash if the timer triggers after hdev is gone.

Cancel the timer when MGMT removes the hdev, like other MGMT timers.

Should fix the BUG: sporadically seen by BlueZ test bot
(in "Mesh - Send cancel - 1" test).

Log:
------
BUG: KASAN: slab-use-after-free in run_timer_softirq+0x76b/0x7d0
...
Freed by task 36:
 kasan_save_stack+0x24/0x50
 kasan_save_track+0x14/0x30
 __kasan_save_free_info+0x3a/0x60
 __kasan_slab_free+0x43/0x70
 kfree+0x103/0x500
 device_release+0x9a/0x210
 kobject_put+0x100/0x1e0
 vhci_release+0x18b/0x240
------

Fixes: b338d91703 ("Bluetooth: Implement support for Mesh")
Link: https://lore.kernel.org/linux-bluetooth/67364c09.0c0a0220.113cba.39ff@mx.google.com/
Signed-off-by: Pauli Virtanen <pav@iki.fi>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
2025-11-10 16:00:44 -05:00
Trond Myklebust
1f214e9c3a NFSv4: Fix an incorrect parameter when calling nfs4_call_sync()
The Smatch static checker noted that in _nfs4_proc_lookupp(), the flag
RPC_TASK_TIMEOUT is being passed as an argument to nfs4_init_sequence(),
which is clearly incorrect.
Since LOOKUPP is an idempotent operation, nfs4_init_sequence() should
not ask the server to cache the result. The RPC_TASK_TIMEOUT flag needs
to be passed down to the RPC layer.

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Reported-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Fixes: 76998ebb91 ("NFSv4: Observe the NFS_MOUNT_SOFTREVAL flag in _nfs4_proc_lookupp")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 14:30:46 -05:00
Yang Xiuwei
7a7a345652 NFS: sysfs: fix leak when nfs_client kobject add fails
If adding the second kobject fails, drop both references to avoid sysfs
residue and memory leak.

Fixes: e96f9268ee ("NFS: Make all of /sys/fs/nfs network-namespace unique")

Signed-off-by: Yang Xiuwei <yangxiuwei@kylinos.cn>
Reviewed-by: Benjamin Coddington <ben.coddington@hammerspace.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 14:30:45 -05:00
Trond Myklebust
85d2c2392a NFSv2/v3: Fix error handling in nfs_atomic_open_v23()
When nfs_do_create() returns an EEXIST error, it means that a regular
file could not be created. That could mean that a symlink needs to be
resolved. If that's the case, a lookup needs to be kicked off.

Reported-by: Stephen Abbene <sabbene87@gmail.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=220710
Fixes: 7c6c5249f0 ("NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly.")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 14:30:45 -05:00
Mike Snitzer
6a218b9c31 nfs/localio: do not issue misaligned DIO out-of-order
From https://lore.kernel.org/linux-nfs/aQHASIumLJyOoZGH@infradead.org/

On Wed, Oct 29, 2025 at 12:20:40AM -0700, Christoph Hellwig wrote:
> On Mon, Oct 27, 2025 at 12:18:30PM -0400, Mike Snitzer wrote:
> > LOCALIO's misaligned DIO will issue head/tail followed by O_DIRECT
> > middle (via AIO completion of that aligned middle). So out of order
> > relative to file offset.
>
> That's in general a really bad idea.  It will obviously work, but
> both on SSDs and out of place write file systems it is a sure way
> to increase your garbage collection overhead a lot down the line.

Fix this by never issuing misaligned DIO out of order. This fix means
the DIO-aligned middle will only use AIO completion if there is no
misaligned end segment. Otherwise, all 3 segments of a misaligned DIO
will be issued without AIO completion to ensure file offset increases
properly for all partial READ or WRITE situations.

Factoring out nfs_local_iter_setup() helps standardize repetitive
nfs_local_iters_setup_dio() code and is inspired by cleanup work that
Chuck Lever did on the NFSD Direct code.

Fixes: c817248fc8 ("nfs/localio: add proper O_DIRECT support for READ and WRITE")
Reported-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 13:28:45 -05:00
Nitin Gote
240372edaf drm/xe/xe3lpg: Extend Wa_15016589081 for xe3lpg
Wa_15016589081 applies to Xe3_LPG renderCS

Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
Link: https://patch.msgid.link/20251106100516.318863-2-nitin.r.gote@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit 715974499a2199bd199fb4630501f55545342ea4)
Cc: stable@vger.kernel.org # v6.16+
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
2025-11-10 09:41:44 -08:00
Tangudu Tilak Tirumalesh
fa3376319b drm/xe/xe3: Extend wa_14023061436
Extend wa_14023061436 to Graphics Versions 30.03, 30.04
and 30.05.

Signed-off-by: Tangudu Tilak Tirumalesh <tilak.tirumalesh.tangudu@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patch.msgid.link/20251030154626.3124565-1-tilak.tirumalesh.tangudu@intel.com
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit 0dd656d06f50ae4cedf160634cf13fd9e0944cf7)
Cc: stable@vger.kernel.org # v6.17+
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
2025-11-10 09:41:29 -08:00
Nitin Gote
0b2f7be548 drm/xe/xe3: Add WA_14024681466 for Xe3_LPG
Apply WA_14024681466 to Xe3_LPG graphics IP versions from 30.00 to 30.05.

v2: (Matthew Roper)
   - Remove stepping filter as workaround applies to all steppings.
   - Add an engine class filter so it only applies to the RENDER engine.

Signed-off-by: Nitin Gote <nitin.r.gote@intel.com>
Link: https://patch.msgid.link/20251027092643.335904-1-nitin.r.gote@intel.com
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit 071089a69e199bd810ff31c4c933bd528e502743)
Cc: stable@vger.kernel.org # v6.16+
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
2025-11-10 09:41:09 -08:00
Armin Wolf
97b726eb1d platform/x86: msi-wmi-platform: Fix typo in WMI GUID
The WMI driver core only supports GUID strings containing only
uppercase characters, however the GUID string used by the
msi-wmi-platform driver contains a single lowercase character.
This prevents the WMI driver core from matching said driver to
its WMI device.

Fix this by turning the lowercase character into a uppercase
character. Also update the WMI driver development guide to warn
about this.

Reported-by: Antheas Kapenekakis <lkml@antheas.dev>
Fixes: 9c0beb6b29 ("platform/x86: wmi: Add MSI WMI Platform driver")
Tested-by: Antheas Kapenekakis <lkml@antheas.dev>
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20251110111253.16204-3-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-10 19:13:29 +02:00
Armin Wolf
c93433fd4e platform/x86: msi-wmi-platform: Only load on MSI devices
It turns out that the GUID used by the msi-wmi-platform driver
(ABBC0F60-8EA1-11D1-00A0-C90629100000) is not unique, but was instead
copied from the WIndows Driver Samples. This means that this driver
could load on devices from other manufacturers that also copied this
GUID, potentially causing hardware errors.

Prevent this by only loading on devices whitelisted via DMI. The DMI
matches where taken from the msi-ec driver.

Reported-by: Antheas Kapenekakis <lkml@antheas.dev>
Fixes: 9c0beb6b29 ("platform/x86: wmi: Add MSI WMI Platform driver")
Tested-by: Antheas Kapenekakis <lkml@antheas.dev>
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://patch.msgid.link/20251110111253.16204-2-W_Armin@gmx.de
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-10 19:12:40 +02:00
Linus Torvalds
4ea7c1717f Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
 "Arm:

   - Fix trapping regression when no in-kernel irqchip is present

   - Check host-provided, untrusted ranges and offsets in pKVM

   - Fix regression restoring the ID_PFR1_EL1 register

   - Fix vgic ITS locking issues when LPIs are not directly injected

  Arm selftests:

   - Correct target CPU programming in vgic_lpi_stress selftest

   - Fix exposure of SCTLR2_EL2 and ZCR_EL2 in get-reg-list selftest

  RISC-V:

   - Fix check for local interrupts on riscv32

   - Read HGEIP CSR on the correct cpu when checking for IMSIC
     interrupts

   - Remove automatic I/O mapping from kvm_arch_prepare_memory_region()

  x86:

   - Inject #UD if the guest attempts to execute SEAMCALL or TDCALL as
     KVM doesn't support virtualization the instructions, but the
     instructions are gated only by VMXON. That is, they will VM-Exit
     instead of taking a #UD and until now this resulted in KVM exiting
     to userspace with an emulation error.

   - Unload the "FPU" when emulating INIT of XSTATE features if and only
     if the FPU is actually loaded, instead of trying to predict when
     KVM will emulate an INIT (CET support missed the MP_STATE path).
     Add sanity checks to detect and harden against similar bugs in the
     future.

   - Unregister KVM's GALog notifier (for AVIC) when kvm-amd.ko is
     unloaded.

   - Use a raw spinlock for svm->ir_list_lock as the lock is taken
     during schedule(), and "normal" spinlocks are sleepable locks when
     PREEMPT_RT=y.

   - Remove guest_memfd bindings on memslot deletion when a gmem file is
     dying to fix a use-after-free race found by syzkaller.

   - Fix a goof in the EPT Violation handler where KVM checks the wrong
     variable when determining if the reported GVA is valid.

   - Fix and simplify the handling of LBR virtualization on AMD, which
     was made buggy and unnecessarily complicated by nested VM support

  Misc:

   - Update Oliver's email address"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
  KVM: nSVM: Fix and simplify LBR virtualization handling with nested
  KVM: nSVM: Always recalculate LBR MSR intercepts in svm_update_lbrv()
  KVM: SVM: Mark VMCB_LBR dirty when MSR_IA32_DEBUGCTLMSR is updated
  MAINTAINERS: Switch myself to using kernel.org address
  KVM: arm64: vgic-v3: Release reserved slot outside of lpi_xa's lock
  KVM: arm64: vgic-v3: Reinstate IRQ lock ordering for LPI xarray
  KVM: arm64: Limit clearing of ID_{AA64PFR0,PFR1}_EL1.GIC to userspace irqchip
  KVM: arm64: Set ID_{AA64PFR0,PFR1}_EL1.GIC when GICv3 is configured
  KVM: arm64: Make all 32bit ID registers fully writable
  KVM: VMX: Fix check for valid GVA on an EPT violation
  KVM: guest_memfd: Remove bindings on memslot deletion when gmem is dying
  KVM: SVM: switch to raw spinlock for svm->ir_list_lock
  KVM: SVM: Make avic_ga_log_notifier() local to avic.c
  KVM: SVM: Unregister KVM's GALog notifier on kvm-amd.ko exit
  KVM: SVM: Initialize per-CPU svm_data at the end of hardware setup
  KVM: x86: Call out MSR_IA32_S_CET is not handled by XSAVES
  KVM: x86: Harden KVM against imbalanced load/put of guest FPU state
  KVM: x86: Unload "FPU" state on INIT if and only if its currently in-use
  KVM: arm64: Check the untrusted offset in FF-A memory share
  KVM: arm64: Check range args for pKVM mem transitions
  ...
2025-11-10 08:54:36 -08:00
Niranjan H Y
eb2d6774cc ASoC: SDCA: bug fix while parsing mipi-sdca-control-cn-list
"struct sdca_control" declares "values" field as integer array.
But the memory allocated to it is of char array. This causes
crash for sdca_parse_function API. This patch addresses the
issue by allocating correct data size.

Signed-off-by: Niranjan H Y <niranjan.hy@ti.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://patch.msgid.link/20251110152646.192-1-niranjan.hy@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-10 16:22:49 +00:00
Mike Snitzer
d32ddfeb55 nfs/localio: Ensure DIO WRITE's IO on stable storage upon completion
LOCALIO's misaligned DIO WRITE support requires synchronous IO for any
misaligned head and/or tail that are issued using buffered IO.  In
addition, it is important that the O_DIRECT middle be on stable
storage upon its completion via AIO.

Otherwise, a misaligned DIO WRITE could mix buffered IO for the
head/tail and direct IO for the DIO-aligned middle -- which could lead
to problems associated with deferred writes to stable storage (such as
out of order partial completions causing incorrect advancement of the
file's offset, etc).

Fixes: c817248fc8 ("nfs/localio: add proper O_DIRECT support for READ and WRITE")
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 10:32:28 -05:00
Mike Snitzer
d0497dd274 nfs/localio: backfill missing partial read support for misaligned DIO
Misaligned DIO read can be split into 3 IOs, must handle potential for
short read from each component IO (follows same pattern used for
handling partial writes, except upper layer read code handles advancing
offset before retry).

Fixes: c817248fc8 ("nfs/localio: add proper O_DIRECT support for READ and WRITE")
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 10:32:28 -05:00
Mike Snitzer
f2060bdc21 nfs/localio: add refcounting for each iocb IO associated with NFS pgio header
Improve completion handling of as many as 3 IOs associated with each
misaligned DIO by using a atomic_t to track completion of each IO.

Update nfs_local_pgio_done() to use precise atomic_t accounting for
remaining iov_iter (up to 3) associated with each iocb, so that each
NFS LOCALIO pgio header is only released after all IOs have completed.
But also allow early return if/when a short read or write occurs.

Fixes reported BUG: KASAN: slab-use-after-free in nfs_local_call_read:
https://lore.kernel.org/linux-nfs/aPSvi5Yr2lGOh5Jh@dell-per750-06-vm-07.rhts.eng.pek2.redhat.com/

Reported-by: Yongcheng Yang <yoyang@redhat.com>
Fixes: c817248fc8 ("nfs/localio: add proper O_DIRECT support for READ and WRITE")
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 10:32:28 -05:00
Mike Snitzer
51a491f270 nfs/localio: remove unecessary ENOTBLK handling in DIO WRITE support
Each filesystem is meant to fallback to retrying DIO in terms buffered
IO when it might encounter -ENOTBLK when issuing DIO (which can happen
if the VFS cannot invalidate the page cache).

So NFS doesn't need special handling for -ENOTBLK.

Also, explicitly initialize a couple DIO related iocb members rather
than simply rely on data structure zeroing.

Fixes: c817248fc8 ("nfs/localio: add proper O_DIRECT support for READ and WRITE")
Reported-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 10:32:28 -05:00
Trond Myklebust
fb2cba0854 NFS: Check the TLS certificate fields in nfs_match_client()
If the TLS security policy is of type RPC_XPRTSEC_TLS_X509, then the
cert_serial and privkey_serial fields need to match as well since they
define the client's identity, as presented to the server.

Fixes: 90c9550a8d ("NFS: support the kernel keyring for TLS")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 10:32:28 -05:00
Trond Myklebust
8ab523ce78 pnfs: Set transport security policy to RPC_XPRTSEC_NONE unless using TLS
The default setting for the transport security policy must be
RPC_XPRTSEC_NONE, when using a TCP or RDMA connection without TLS.
Conversely, when using TLS, the security policy needs to be set.

Fixes: 6c0a8c5fcf ("NFS: Have struct nfs_client carry a TLS policy field")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 10:32:28 -05:00
Trond Myklebust
28e19737e1 pnfs: Fix TLS logic in _nfs4_pnfs_v4_ds_connect()
Don't try to add an RDMA transport to a client that is already marked as
being a TCP/TLS transport.

Fixes: a35518cae4 ("NFSv4.1/pnfs: fix NFS with TLS in pnfs")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 10:32:28 -05:00
Trond Myklebust
7aca00d950 pnfs: Fix TLS logic in _nfs4_pnfs_v3_ds_connect()
Don't try to add an RDMA transport to a client that is already marked as
being a TCP/TLS transport.

Fixes: 04a1526366 ("pnfs/flexfiles: connect to NFSv3 DS using TLS if MDS connection uses TLS")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <anna.schumaker@oracle.com>
2025-11-10 10:32:27 -05:00
Jani Nikula
994dec1099 drm/i915/psr: fix pipe to vblank conversion
First, we can't assume pipe == crtc index. If a pipe is fused off in
between, it no longer holds. intel_crtc_for_pipe() is the only proper
way to get from a pipe to the corresponding crtc.

Second, drivers aren't supposed to access or index drm->vblank[]
directly. There's drm_crtc_vblank_crtc() for this.

Use both functions to fix the pipe to vblank conversion.

Fixes: f02658c46c ("drm/i915/psr: Add mechanism to notify PSR of pipe enable/disable")
Cc: Jouni Högander <jouni.hogander@intel.com>
Cc: stable@vger.kernel.org # v6.16+
Reviewed-by: Jouni Högander <jouni.hogander@intel.com>
Link: https://patch.msgid.link/20251106200000.1455164-1-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
(cherry picked from commit 2750f6765d6974f7e163c5d540a96c8703f6d8dd)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
2025-11-10 10:12:31 -05:00
Boris Brezillon
576c930e5e drm/panthor: Flush shmem writes before mapping buffers CPU-uncached
The shmem layer zeroes out the new pages using cached mappings, and if
we don't CPU-flush we might leave dirty cachelines behind, leading to
potential data leaks and/or asynchronous buffer corruption when dirty
cachelines are evicted.

Fixes: 8a1cc07578 ("drm/panthor: Add GEM logical block")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Link: https://patch.msgid.link/20251107171214.1186299-1-boris.brezillon@collabora.com
2025-11-10 14:56:06 +00:00
Chuck Lever
324be6dcbf Revert "SUNRPC: Make RPCSEC_GSS_KRB5 select CRYPTO instead of depending on it"
Geert reports:
> This is now commit d8e97cc476 ("SUNRPC: Make RPCSEC_GSS_KRB5
> select CRYPTO instead of depending on it") in v6.18-rc1.
> As RPCSEC_GSS_KRB5 defaults to "y", CRYPTO is now auto-enabled in
> defconfigs that didn't enable it before.

Revert while we work out a proper solution and then test it.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Closes: https://lore.kernel.org/linux-nfs/b97cea29-4ab7-4fb6-85ba-83f9830e524f@kernel.org/T/#t
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2025-11-10 09:31:52 -05:00
NeilBrown
1cff14b7fc nfsd: ensure SEQUENCE replay sends a valid reply.
nfsd4_enc_sequence_replay() uses nfsd4_encode_operation() to encode a
new SEQUENCE reply when replaying a request from the slot cache - only
ops after the SEQUENCE are replayed from the cache in ->sl_data.

However it does this in nfsd4_replay_cache_entry() which is called
*before* nfsd4_sequence() has filled in reply fields.

This means that in the replayed SEQUENCE reply:
 maxslots will be whatever the client sent
 target_maxslots will be -1 (assuming init to zero, and
      nfsd4_encode_sequence() subtracts 1)
 status_flags will be zero

The incorrect maxslots value, in particular, can cause the client to
think the slot table has been reduced in size so it can discard its
knowledge of current sequence number of the later slots, though the
server has not discarded those slots.  When the client later wants to
use a later slot, it can get NFS4ERR_SEQ_MISORDERED from the server.

This patch moves the setup of the reply into a new helper function and
call it *before* nfsd4_replay_cache_entry() is called.  Only one of the
updated fields was used after this point - maxslots.  So the
nfsd4_sequence struct has been extended to have separate maxslots for
the request and the response.

Reported-by: Olga Kornievskaia <okorniev@redhat.com>
Closes: https://lore.kernel.org/linux-nfs/20251010194449.10281-1-okorniev@redhat.com/
Tested-by: Olga Kornievskaia <okorniev@redhat.com>
Signed-off-by: NeilBrown <neil@brown.name>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2025-11-10 09:31:52 -05:00
Chuck Lever
c96573c0d7 NFSD: Never cache a COMPOUND when the SEQUENCE operation fails
RFC 8881 normatively mandates that operations where the initial
SEQUENCE operation in a compound fails must not modify the slot's
replay cache.

nfsd4_cache_this() doesn't prevent such caching. So when SEQUENCE
fails, cstate.data_offset is not set, allowing
read_bytes_from_xdr_buf() to access uninitialized memory.

Reported-by: rtm@csail.mit.edu
Closes: https://lore.kernel.org/linux-nfs/c3628d57-94ae-48cf-8c9e-49087a28cec9@oracle.com/T/#t
Fixes: 468de9e54a ("nfsd41: expand solo sequence check")
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2025-11-10 09:31:52 -05:00
Chuck Lever
ff8141e49c NFSD: Skip close replay processing if XDR encoding fails
The replay logic added by commit 9411b1d4c7 ("nfsd4: cleanup
handling of nfsv4.0 closed stateid's") cannot be done if encoding
failed due to a short send buffer; there's no guarantee that the
operation encoder has actually encoded the data that is being copied
to the replay cache.

Reported-by: rtm@csail.mit.edu
Closes: https://lore.kernel.org/linux-nfs/c3628d57-94ae-48cf-8c9e-49087a28cec9@oracle.com/T/#t
Fixes: 9411b1d4c7 ("nfsd4: cleanup handling of nfsv4.0 closed stateid's")
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2025-11-10 09:31:52 -05:00
Olga Kornievskaia
4aa17144d5 NFSD: free copynotify stateid in nfs4_free_ol_stateid()
Typically copynotify stateid is freed either when parent's stateid
is being close/freed or in nfsd4_laundromat if the stateid hasn't
been used in a lease period.

However, in case when the server got an OPEN (which created
a parent stateid), followed by a COPY_NOTIFY using that stateid,
followed by a client reboot. New client instance while doing
CREATE_SESSION would force expire previous state of this client.
It leads to the open state being freed thru release_openowner->
nfs4_free_ol_stateid() and it finds that it still has copynotify
stateid associated with it. We currently print a warning and is
triggerred

WARNING: CPU: 1 PID: 8858 at fs/nfsd/nfs4state.c:1550 nfs4_free_ol_stateid+0xb0/0x100 [nfsd]

This patch, instead, frees the associated copynotify stateid here.

If the parent stateid is freed (without freeing the copynotify
stateids associated with it), it leads to the list corruption
when laundromat ends up freeing the copynotify state later.

[ 1626.839430] Internal error: Oops - BUG: 00000000f2000800 [#1]  SMP
[ 1626.842828] Modules linked in: nfnetlink_queue nfnetlink_log bluetooth cfg80211 rpcrdma rdma_cm iw_cm ib_cm ib_core nfsd nfs_acl lockd grace nfs_localio ext4 crc16 mbcache jbd2 overlay uinput snd_seq_dummy snd_hrtimer qrtr rfkill vfat fat uvcvideo snd_hda_codec_generic videobuf2_vmalloc videobuf2_memops snd_hda_intel uvc snd_intel_dspcfg videobuf2_v4l2 videobuf2_common snd_hda_codec snd_hda_core videodev snd_hwdep snd_seq mc snd_seq_device snd_pcm snd_timer snd soundcore sg loop auth_rpcgss vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vmw_vmci vsock xfs 8021q garp stp llc mrp nvme ghash_ce e1000e nvme_core sr_mod nvme_keyring nvme_auth cdrom vmwgfx drm_ttm_helper ttm sunrpc dm_mirror dm_region_hash dm_log iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi fuse dm_multipath dm_mod nfnetlink
[ 1626.855594] CPU: 2 UID: 0 PID: 199 Comm: kworker/u24:33 Kdump: loaded Tainted: G    B   W           6.17.0-rc7+ #22 PREEMPT(voluntary)
[ 1626.857075] Tainted: [B]=BAD_PAGE, [W]=WARN
[ 1626.857573] Hardware name: VMware, Inc. VMware20,1/VBSA, BIOS VMW201.00V.24006586.BA64.2406042154 06/04/2024
[ 1626.858724] Workqueue: nfsd4 laundromat_main [nfsd]
[ 1626.859304] pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
[ 1626.860010] pc : __list_del_entry_valid_or_report+0x148/0x200
[ 1626.860601] lr : __list_del_entry_valid_or_report+0x148/0x200
[ 1626.861182] sp : ffff8000881d7a40
[ 1626.861521] x29: ffff8000881d7a40 x28: 0000000000000018 x27: ffff0000c2a98200
[ 1626.862260] x26: 0000000000000600 x25: 0000000000000000 x24: ffff8000881d7b20
[ 1626.862986] x23: ffff0000c2a981e8 x22: 1fffe00012410e7d x21: ffff0000920873e8
[ 1626.863701] x20: ffff0000920873e8 x19: ffff000086f22998 x18: 0000000000000000
[ 1626.864421] x17: 20747562202c3839 x16: 3932326636383030 x15: 3030666666662065
[ 1626.865092] x14: 6220646c756f6873 x13: 0000000000000001 x12: ffff60004fd9e4a3
[ 1626.865713] x11: 1fffe0004fd9e4a2 x10: ffff60004fd9e4a2 x9 : dfff800000000000
[ 1626.866320] x8 : 00009fffb0261b5e x7 : ffff00027ecf2513 x6 : 0000000000000001
[ 1626.866938] x5 : ffff00027ecf2510 x4 : ffff60004fd9e4a3 x3 : 0000000000000000
[ 1626.867553] x2 : 0000000000000000 x1 : ffff000096069640 x0 : 000000000000006d
[ 1626.868167] Call trace:
[ 1626.868382]  __list_del_entry_valid_or_report+0x148/0x200 (P)
[ 1626.868876]  _free_cpntf_state_locked+0xd0/0x268 [nfsd]
[ 1626.869368]  nfs4_laundromat+0x6f8/0x1058 [nfsd]
[ 1626.869813]  laundromat_main+0x24/0x60 [nfsd]
[ 1626.870231]  process_one_work+0x584/0x1050
[ 1626.870595]  worker_thread+0x4c4/0xc60
[ 1626.870893]  kthread+0x2f8/0x398
[ 1626.871146]  ret_from_fork+0x10/0x20
[ 1626.871422] Code: aa1303e1 aa1403e3 910e8000 97bc55d7 (d4210000)
[ 1626.871892] SMP: stopping secondary CPUs

Reported-by: rtm@csail.mit.edu
Closes: https://lore.kernel.org/linux-nfs/d8f064c1-a26f-4eed-b4f0-1f7f608f415f@oracle.com/T/#t
Fixes: 624322f1ad ("NFSD add COPY_NOTIFY operation")
Cc: stable@vger.kernel.org
Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2025-11-10 09:31:52 -05:00
Hans de Goede
66e9feb03e spi: Add TODO comment about ACPI GPIO setup
Add a TODO comment that ideally the ACPI/gpiolib core code should take care
of setting GPIO direction and/or bias according to ACPI GPIO resources.

If this TODO gets implemented then the acpi_dev_gpio_irq_get() call in
acpi_register_spi_device() can be dropped.

Suggested-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Hans de Goede <johannes.goede@oss.qualcomm.com>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Link: https://patch.msgid.link/20251109155340.26199-1-johannes.goede@oss.qualcomm.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-10 14:03:53 +00:00
Takashi Iwai
05a1fc5efd ALSA: usb-audio: Fix potential overflow of PCM transfer buffer
The PCM stream data in USB-audio driver is transferred over USB URB
packet buffers, and each packet size is determined dynamically.  The
packet sizes are limited by some factors such as wMaxPacketSize USB
descriptor.  OTOH, in the current code, the actually used packet sizes
are determined only by the rate and the PPS, which may be bigger than
the size limit above.  This results in a buffer overflow, as reported
by syzbot.

Basically when the limit is smaller than the calculated packet size,
it implies that something is wrong, most likely a weird USB
descriptor.  So the best option would be just to return an error at
the parameter setup time before doing any further operations.

This patch introduces such a sanity check, and returns -EINVAL when
the packet size is greater than maxpacksize.  The comparison with
ep->packsize[1] alone should suffice since it's always equal or
greater than ep->packsize[0].

Reported-by: syzbot+bfd77469c8966de076f7@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=bfd77469c8966de076f7
Link: https://lore.kernel.org/690b6b46.050a0220.3d0d33.0054.GAE@google.com
Cc: Lizhi Xu <lizhi.xu@windriver.com>
Cc: <stable@vger.kernel.org>
Link: https://patch.msgid.link/20251109091211.12739-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2025-11-10 10:48:45 +01:00
Randy Dunlap
aaf46c6a6d tee: <uapi/linux/tee.h: fix all kernel-doc issues
Fix kernel-doc warnings so that there no other kernel-doc issues
in <uapi/linux/tee.h>:

- add ending ':' to some struct members as needed for kernel-doc
- change struct name in kernel-doc to match the actual struct name (2x)
- add a @params: kernel-doc entry multiple times

Warning: tee.h:265 struct member 'ret_origin' not described
 in 'tee_ioctl_open_session_arg'
Warning: tee.h:265 struct member 'num_params' not described
 in 'tee_ioctl_open_session_arg'
Warning: tee.h:265 struct member 'params' not described
 in 'tee_ioctl_open_session_arg'
Warning: tee.h:351 struct member 'num_params' not described
 in 'tee_iocl_supp_recv_arg'
Warning: tee.h:351 struct member 'params' not described
 in 'tee_iocl_supp_recv_arg'
Warning: tee.h:372 struct member 'num_params' not described
 in 'tee_iocl_supp_send_arg'
Warning: tee.h:372 struct member 'params' not described
 in 'tee_iocl_supp_send_arg'
Warning: tee.h:298: expecting prototype for struct
 tee_ioctl_invoke_func_arg. Prototype was for
 struct tee_ioctl_invoke_arg instead
Warning: tee.h:473: expecting prototype for struct
 tee_ioctl_invoke_func_arg. Prototype was for struct
 tee_ioctl_object_invoke_arg instead

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Sumit Garg <sumit.garg@oss.qualcomm.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
2025-11-10 09:47:54 +01:00
Pratyush Yadav
b05addf6f0 kho: warn and exit when unpreserved page wasn't preserved
Calling __kho_unpreserve() on a pair of (pfn, end_pfn) that wasn't
preserved is a bug.  Currently, if that is done, the physxa or bits can be
NULL.  This results in a soft lockup since a NULL physxa or bits results
in redoing the loop without ever making any progress.

Return when physxa or bits are not found, but WARN first to loudly
indicate invalid behaviour.

Link: https://lkml.kernel.org/r/20251103180235.71409-3-pratyush@kernel.org
Fixes: fc33e4b44b ("kexec: enable KHO support for memory preservation")
Signed-off-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:47 -08:00
Pratyush Yadav
7ecd2e439d kho: fix unpreservation of higher-order vmalloc preservations
kho_vmalloc_unpreserve_chunk() calls __kho_unpreserve() with end_pfn as
pfn + 1.  This happens to work for 0-order pages, but leaks higher order
pages.

For example, say order 2 pages back the allocation.  During preservation,
they get preserved in the order 2 bitmaps, but
kho_vmalloc_unpreserve_chunk() would try to unpreserve them from the order
0 bitmaps, which should not have these bits set anyway, leaving the order
2 bitmaps untouched.  This results in the pages being carried over to the
next kernel.  Nothing will free those pages in the next boot, leaking
them.

Fix this by taking the order into account when calculating the end PFN for
__kho_unpreserve().

Link: https://lkml.kernel.org/r/20251103180235.71409-2-pratyush@kernel.org
Fixes: a667300bd5 ("kho: add support for preserving vmalloc allocations")
Signed-off-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:47 -08:00
Pratyush Yadav
0b07092d09 kho: fix out-of-bounds access of vmalloc chunk
The list of pages in a vmalloc chunk is NULL-terminated.  So when looping
through the pages in a vmalloc chunk, both kho_restore_vmalloc() and
kho_vmalloc_unpreserve_chunk() rightly make sure to stop when encountering
a NULL page.  But when the chunk is full, the loops do not stop and go
past the bounds of chunk->phys, resulting in out-of-bounds memory access,
and possibly the restoration or unpreservation of an invalid page.

Fix this by making sure the processing of chunk stops at the end of the
array.

Link: https://lkml.kernel.org/r/20251103110159.8399-1-pratyush@kernel.org
Fixes: a667300bd5 ("kho: add support for preserving vmalloc allocations")
Signed-off-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:47 -08:00
Chris Li
bba717bbc4 MAINTAINERS: add Chris and Kairui as the swap maintainer
We have been collaborating on a systematic effort to clean up and improve
the Linux swap system, and might as well take responsibility for it.

Link: https://lkml.kernel.org/r/20251102-swap-m-v1-1-582f275d5bce@kernel.org
Signed-off-by: Chris Li <chrisl@kernel.org>
Acked-by: Kairui Song <kasong@tencent.com>
Acked-by: Barry Song <baohua@kernel.org>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: SeongJae Park <sj@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:46 -08:00
Lance Yang
6f86d0534f mm/secretmem: fix use-after-free race in fault handler
When a page fault occurs in a secret memory file created with
`memfd_secret(2)`, the kernel will allocate a new folio for it, mark the
underlying page as not-present in the direct map, and add it to the file
mapping.

If two tasks cause a fault in the same page concurrently, both could end
up allocating a folio and removing the page from the direct map, but only
one would succeed in adding the folio to the file mapping.  The task that
failed undoes the effects of its attempt by (a) freeing the folio again
and (b) putting the page back into the direct map.  However, by doing
these two operations in this order, the page becomes available to the
allocator again before it is placed back in the direct mapping.

If another task attempts to allocate the page between (a) and (b), and the
kernel tries to access it via the direct map, it would result in a
supervisor not-present page fault.

Fix the ordering to restore the direct map before the folio is freed.

Link: https://lkml.kernel.org/r/20251031120955.92116-1-lance.yang@linux.dev
Fixes: 1507f51255 ("mm: introduce memfd_secret system call to create "secret" memory areas")
Signed-off-by: Lance Yang <lance.yang@linux.dev>
Reported-by: Google Big Sleep <big-sleep-vuln-reports@google.com>
Closes: https://lore.kernel.org/linux-mm/CAEXGt5QeDpiHTu3K9tvjUTPqo+d-=wuCNYPa+6sWKrdQJ-ATdg@mail.gmail.com/
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:46 -08:00
Catalin Marinas
adfb6609c6 mm/huge_memory: initialise the tags of the huge zero folio
On arm64 with MTE enabled, a page mapped as Normal Tagged (PROT_MTE) in
user space will need to have its allocation tags initialised.  This is
normally done in the arm64 set_pte_at() after checking the memory
attributes.  Such page is also marked with the PG_mte_tagged flag to avoid
subsequent clearing.  Since this relies on having a struct page,
pte_special() mappings are ignored.

Commit d82d09e482 ("mm/huge_memory: mark PMD mappings of the huge zero
folio special") maps the huge zero folio special and the arm64
set_pmd_at() will no longer zero the tags.  There is no guarantee that the
tags are zero, especially if parts of this huge page have been previously
tagged.

It's fairly easy to detect this by regularly dropping the caches to
force the reallocation of the huge zero folio.

Allocate the huge zero folio with the __GFP_ZEROTAGS flag.  In addition,
do not warn in the arm64 __access_remote_tags() when reading tags from the
huge zero page.

I bundled the arm64 change in here as well since they are both related to
the commit mapping the huge zero folio as special.

[catalin.marinas@arm.com: handle arch mte_zero_clear_page_tags() code issuing MTE instructions]
  Link: https://lkml.kernel.org/r/aQi8dA_QpXM8XqrE@arm.com
Link: https://lkml.kernel.org/r/20251031170133.280742-1-catalin.marinas@arm.com
Fixes: d82d09e482 ("mm/huge_memory: mark PMD mappings of the huge zero folio special")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Tested-by: Beleswar Padhi <b-padhi@ti.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Aishwarya TCV <aishwarya.tcv@arm.com>
Cc: David Hildenbrand (Red Hat) <david@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:46 -08:00
Edward Adam Davis
9a6b60cb14 nilfs2: avoid having an active sc_timer before freeing sci
Because kthread_stop did not stop sc_task properly and returned -EINTR,
the sc_timer was not properly closed, ultimately causing the problem [1]
reported by syzbot when freeing sci due to the sc_timer not being closed.

Because the thread sc_task main function nilfs_segctor_thread() returns 0
when it succeeds, when the return value of kthread_stop() is not 0 in
nilfs_segctor_destroy(), we believe that it has not properly closed
sc_timer.

We use timer_shutdown_sync() to sync wait for sc_timer to shutdown, and
set the value of sc_task to NULL under the protection of lock
sc_state_lock, so as to avoid the issue caused by sc_timer not being
properly shutdowned.

[1]
ODEBUG: free active (active state 0) object: 00000000dacb411a object type: timer_list hint: nilfs_construction_timeout
Call trace:
 nilfs_segctor_destroy fs/nilfs2/segment.c:2811 [inline]
 nilfs_detach_log_writer+0x668/0x8cc fs/nilfs2/segment.c:2877
 nilfs_put_super+0x4c/0x12c fs/nilfs2/super.c:509

Link: https://lkml.kernel.org/r/20251029225226.16044-1-konishi.ryusuke@gmail.com
Fixes: 3f66cc261c ("nilfs2: use kthread_create and kthread_stop for the log writer thread")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: syzbot+24d8b70f039151f65590@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=24d8b70f039151f65590
Tested-by: syzbot+24d8b70f039151f65590@syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
Cc: <stable@vger.kernel.org>	[6.12+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:46 -08:00
Carlos Llamas
7d9f7d390f scripts/decode_stacktrace.sh: fix build ID and PC source parsing
Support for parsing PC source info in stacktraces (e.g.  '(P)') was added
in commit 2bff77c665 ("scripts/decode_stacktrace.sh: fix decoding of
lines with an additional info").  However, this logic was placed after the
build ID processing.  This incorrect order fails to parse lines containing
both elements, e.g.:

  drm_gem_mmap_obj+0x114/0x200 [drm 03d0564e0529947d67bb2008c3548be77279fd27] (P)

This patch fixes the problem by extracting the PC source info first and
then processing the module build ID.  With this change, the line above is
now properly parsed as such:

  drm_gem_mmap_obj (./include/linux/mmap_lock.h:212 ./include/linux/mm.h:811 drivers/gpu/drm/drm_gem.c:1177) drm (P)

While here, also add a brief explanation the build ID section.

Link: https://lkml.kernel.org/r/20251030010347.2731925-1-cmllamas@google.com
Fixes: 2bff77c665 ("scripts/decode_stacktrace.sh: fix decoding of lines with an additional info")
Signed-off-by: Carlos Llamas <cmllamas@google.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Cc: Breno Leitao <leitao@debian.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Matthieu Baerts <matttbe@kernel.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Puranjay Mohan <puranjay@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:45 -08:00
Quanmin Yan
9fd7bb5083 mm/damon/sysfs: change next_update_jiffies to a global variable
In DAMON's damon_sysfs_repeat_call_fn(), time_before() is used to compare
the current jiffies with next_update_jiffies to determine whether to
update the sysfs files at this moment.

On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make
jiffies wrap bugs appear earlier. However, this causes time_before() in
damon_sysfs_repeat_call_fn() to unexpectedly return true during the first
5 minutes after boot on 32-bit systems (see [1] for more explanation,
which fixes another jiffies-related issue before). As a result, DAMON
does not update sysfs files during that period.

There is also an issue unrelated to the system's word size[2]: if the
user stops DAMON just after next_update_jiffies is updated and restarts
it after 'refresh_ms' or a longer delay, next_update_jiffies will retain
an older value, causing time_before() to return false and the update to
happen earlier than expected.

Fix these issues by making next_update_jiffies a global variable and
initializing it each time DAMON is started.

Link: https://lkml.kernel.org/r/20251030020746.967174-3-yanquanmin1@huawei.com
Link: https://lkml.kernel.org/r/20250822025057.1740854-1-ekffu200098@gmail.com [1]
Link: https://lore.kernel.org/all/20251029013038.66625-1-sj@kernel.org/ [2]
Fixes: d809a7c64b ("mm/damon/sysfs: implement refresh_ms file internal work")
Suggested-by: SeongJae Park <sj@kernel.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Quanmin Yan <yanquanmin1@huawei.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: ze zuo <zuoze1@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:45 -08:00
Quanmin Yan
2f6ce7e714 mm/damon/stat: change last_refresh_jiffies to a global variable
Patch series "mm/damon: fixes for the jiffies-related issues", v2.

On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make
jiffies wrap bugs appear earlier.  However, this may cause the
time_before() series of functions to return unexpected values, resulting
in DAMON not functioning as intended.  Meanwhile, similar issues exist in
some specific user operation scenarios.

This patchset addresses these issues.  The first patch is about the
DAMON_STAT module, and the second patch is about the core layer's sysfs.


This patch (of 2):

In DAMON_STAT's damon_stat_damon_call_fn(), time_before_eq() is used to
avoid unnecessarily frequent stat update.

On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make
jiffies wrap bugs appear earlier.  However, this causes time_before_eq()
in DAMON_STAT to unexpectedly return true during the first 5 minutes after
boot on 32-bit systems (see [1] for more explanation, which fixes another
jiffies-related issue before).  As a result, DAMON_STAT does not update
any monitoring results during that period, which becomes more confusing
when DAMON_STAT_ENABLED_DEFAULT is enabled.

There is also an issue unrelated to the system's word size[2]: if the user
stops DAMON_STAT just after last_refresh_jiffies is updated and restarts
it after 5 seconds or a longer delay, last_refresh_jiffies will retain an
older value, causing time_before_eq() to return false and the update to
happen earlier than expected.

Fix these issues by making last_refresh_jiffies a global variable and
initializing it each time DAMON_STAT is started.

Link: https://lkml.kernel.org/r/20251030020746.967174-2-yanquanmin1@huawei.com
Link: https://lkml.kernel.org/r/20250822025057.1740854-1-ekffu200098@gmail.com [1]
Link: https://lore.kernel.org/all/20251028143250.50144-1-sj@kernel.org/ [2]
Fixes: fabdd1e911 ("mm/damon/stat: calculate and expose estimated memory bandwidth")
Signed-off-by: Quanmin Yan <yanquanmin1@huawei.com>
Suggested-by: SeongJae Park <sj@kernel.org>
Reviewed-by: SeongJae Park <sj@kernel.org>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: ze zuo <zuoze1@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:45 -08:00
Martin Kaiser
91a5409002 maple_tree: fix tracepoint string pointers
maple_tree tracepoints contain pointers to function names. Such a pointer
is saved when a tracepoint logs an event. There's no guarantee that it's
still valid when the event is parsed later and the pointer is dereferenced.

The kernel warns about these unsafe pointers.

	event 'ma_read' has unsafe pointer field 'fn'
	WARNING: kernel/trace/trace.c:3779 at ignore_event+0x1da/0x1e4

Mark the function names as tracepoint_string() to fix the events.

One case that doesn't work without my patch would be trace-cmd record
to save the binary ringbuffer and trace-cmd report to parse it in
userspace.  The address of __func__ can't be dereferenced from
userspace but tracepoint_string will add an entry to
/sys/kernel/tracing/printk_formats

Link: https://lkml.kernel.org/r/20251030155537.87972-1-martin@kaiser.cx
Fixes: 54a611b605 ("Maple Tree: add new data structure")
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Acked-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:45 -08:00
Hao Ge
1abbdf3d57 codetag: debug: handle existing CODETAG_EMPTY in mark_objexts_empty for slabobj_ext
When alloc_slab_obj_exts() fails and then later succeeds in allocating a
slab extension vector, it calls handle_failed_objexts_alloc() to mark all
objects in the vector as empty.  As a result all objects in this slab
(slabA) will have their extensions set to CODETAG_EMPTY.

Later on if this slabA is used to allocate a slabobj_ext vector for
another slab (slabB), we end up with the slabB->obj_exts pointing to a
slabobj_ext vector that itself has a non-NULL slabobj_ext equal to
CODETAG_EMPTY.  When slabB gets freed, free_slab_obj_exts() is called to
free slabB->obj_exts vector.  

free_slab_obj_exts() calls mark_objexts_empty(slabB->obj_exts) which will
generate a warning because it expects slabobj_ext vectors to have a NULL
obj_ext, not CODETAG_EMPTY.

Modify mark_objexts_empty() to skip the warning and setting the obj_ext
value if it's already set to CODETAG_EMPTY.


To quickly detect this WARN, I modified the code from
WARN_ON(slab_exts[offs].ref.ct) to BUG_ON(slab_exts[offs].ref.ct == 1);

We then obtained this message:

[21630.898561] ------------[ cut here ]------------
[21630.898596] kernel BUG at mm/slub.c:2050!
[21630.898611] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
[21630.900372] Modules linked in: squashfs isofs vfio_iommu_type1 
vhost_vsock vfio vhost_net vmw_vsock_virtio_transport_common vhost tap 
vhost_iotlb iommufd vsock binfmt_misc nfsv3 nfs_acl nfs lockd grace 
netfs tls rds dns_resolver tun brd overlay ntfs3 exfat btrfs 
blake2b_generic xor xor_neon raid6_pq loop sctp ip6_udp_tunnel 
udp_tunnel nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib 
nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct 
nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 
nf_tables rfkill ip_set sunrpc vfat fat joydev sg sch_fq_codel nfnetlink 
virtio_gpu sr_mod cdrom drm_client_lib virtio_dma_buf drm_shmem_helper 
drm_kms_helper drm ghash_ce backlight virtio_net virtio_blk virtio_scsi 
net_failover virtio_console failover virtio_mmio dm_mirror 
dm_region_hash dm_log dm_multipath dm_mod fuse i2c_dev virtio_pci 
virtio_pci_legacy_dev virtio_pci_modern_dev virtio virtio_ring autofs4 
aes_neon_bs aes_ce_blk [last unloaded: hwpoison_inject]
[21630.909177] CPU: 3 UID: 0 PID: 3787 Comm: kylin-process-m Kdump: 
loaded Tainted: G        W           6.18.0-rc1+ #74 PREEMPT(voluntary)
[21630.910495] Tainted: [W]=WARN
[21630.910867] Hardware name: QEMU KVM Virtual Machine, BIOS unknown 
2/2/2022
[21630.911625] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS 
BTYPE=--)
[21630.912392] pc : __free_slab+0x228/0x250
[21630.912868] lr : __free_slab+0x18c/0x250[21630.913334] sp : 
ffff8000a02f73e0
[21630.913830] x29: ffff8000a02f73e0 x28: fffffdffc43fc800 x27: 
ffff0000c0011c40
[21630.914677] x26: ffff0000c000cac0 x25: ffff00010fe5e5f0 x24: 
ffff000102199b40
[21630.915469] x23: 0000000000000003 x22: 0000000000000003 x21: 
ffff0000c0011c40
[21630.916259] x20: fffffdffc4086600 x19: fffffdffc43fc800 x18: 
0000000000000000
[21630.917048] x17: 0000000000000000 x16: 0000000000000000 x15: 
0000000000000000
[21630.917837] x14: 0000000000000000 x13: 0000000000000000 x12: 
ffff70001405ee66
[21630.918640] x11: 1ffff0001405ee65 x10: ffff70001405ee65 x9 : 
ffff800080a295dc
[21630.919442] x8 : ffff8000a02f7330 x7 : 0000000000000000 x6 : 
0000000000003000
[21630.920232] x5 : 0000000024924925 x4 : 0000000000000001 x3 : 
0000000000000007
[21630.921021] x2 : 0000000000001b40 x1 : 000000000000001f x0 : 
0000000000000001
[21630.921810] Call trace:
[21630.922130]  __free_slab+0x228/0x250 (P)
[21630.922669]  free_slab+0x38/0x118
[21630.923079]  free_to_partial_list+0x1d4/0x340
[21630.923591]  __slab_free+0x24c/0x348
[21630.924024]  ___cache_free+0xf0/0x110
[21630.924468]  qlist_free_all+0x78/0x130
[21630.924922]  kasan_quarantine_reduce+0x114/0x148
[21630.925525]  __kasan_slab_alloc+0x7c/0xb0
[21630.926006]  kmem_cache_alloc_noprof+0x164/0x5c8
[21630.926699]  __alloc_object+0x44/0x1f8
[21630.927153]  __create_object+0x34/0xc8
[21630.927604]  kmemleak_alloc+0xb8/0xd8
[21630.928052]  kmem_cache_alloc_noprof+0x368/0x5c8
[21630.928606]  getname_flags.part.0+0xa4/0x610
[21630.929112]  getname_flags+0x80/0xd8
[21630.929557]  vfs_fstatat+0xc8/0xe0
[21630.929975]  __do_sys_newfstatat+0xa0/0x100
[21630.930469]  __arm64_sys_newfstatat+0x90/0xd8
[21630.931046]  invoke_syscall+0xd4/0x258
[21630.931685]  el0_svc_common.constprop.0+0xb4/0x240
[21630.932467]  do_el0_svc+0x48/0x68
[21630.932972]  el0_svc+0x40/0xe0
[21630.933472]  el0t_64_sync_handler+0xa0/0xe8
[21630.934151]  el0t_64_sync+0x1ac/0x1b0
[21630.934923] Code: aa1803e0 97ffef2b a9446bf9 17ffff9c (d4210000)
[21630.936461] SMP: stopping secondary CPUs
[21630.939550] Starting crashdump kernel...
[21630.940108] Bye!

Link: https://lkml.kernel.org/r/20251029014317.1533488-1-hao.ge@linux.dev
Fixes: 09c46563ff ("codetag: debug: introduce OBJEXTS_ALLOC_FAIL to mark failed slab_ext allocations")
Signed-off-by: Hao Ge <gehao@kylinos.cn>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Cc: Christoph Lameter (Ampere) <cl@gentwo.org>
Cc: David Rientjes <rientjes@google.com>
Cc: gehao <gehao@kylinos.cn>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:44 -08:00
Dev Jain
04d1c9d60c mm/mremap: honour writable bit in mremap pte batching
Currently mremap folio pte batch ignores the writable bit during figuring
out a set of similar ptes mapping the same folio.  Suppose that the first
pte of the batch is writable while the others are not - set_ptes will end
up setting the writable bit on the other ptes, which is a violation of
mremap semantics.  Therefore, use FPB_RESPECT_WRITE to check the writable
bit while determining the pte batch.

Link: https://lkml.kernel.org/r/20251028063952.90313-1-dev.jain@arm.com
Signed-off-by: Dev Jain <dev.jain@arm.com>
Fixes: f822a9a81a ("mm: optimize mremap() by PTE batching")
Reported-by: David Hildenbrand <david@redhat.com>
Debugged-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Pedro Falcato <pfalcato@suse.de>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>	[6.17+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:44 -08:00
Peter Oberparleiter
ec4d11fc4b gcov: add support for GCC 15
Using gcov on kernels compiled with GCC 15 results in truncated 16-byte
long .gcda files with no usable data.  To fix this, update GCOV_COUNTERS
to match the value defined by GCC 15.

Tested with GCC 14.3.0 and GCC 15.2.0.

Link: https://lkml.kernel.org/r/20251028115125.1319410-1-oberpar@linux.ibm.com
Signed-off-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Reported-by: Matthieu Baerts <matttbe@kernel.org>
Closes: https://github.com/linux-test-project/lcov/issues/445
Tested-by: Matthieu Baerts <matttbe@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:44 -08:00
Isaac J. Manjarres
0d6c356dd6 mm/mm_init: fix hash table order logging in alloc_large_system_hash()
When emitting the order of the allocation for a hash table,
alloc_large_system_hash() unconditionally subtracts PAGE_SHIFT from log
base 2 of the allocation size.  This is not correct if the allocation size
is smaller than a page, and yields a negative value for the order as seen
below:

TCP established hash table entries: 32 (order: -4, 256 bytes, linear) TCP
bind hash table entries: 32 (order: -2, 1024 bytes, linear)

Use get_order() to compute the order when emitting the hash table
information to correctly handle cases where the allocation size is smaller
than a page:

TCP established hash table entries: 32 (order: 0, 256 bytes, linear) TCP
bind hash table entries: 32 (order: 0, 1024 bytes, linear)

Link: https://lkml.kernel.org/r/20251028191020.413002-1-isaacmanjarres@google.com
Fixes: 1da177e4c3 ("Linux-2.6.12-rc2")
Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:44 -08:00
Kiryl Shutsemau
fa04f5b60f mm/truncate: unmap large folio on split failure
Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.

This behavior might not be respected on truncation.

During truncation, the kernel splits a large folio in order to reclaim
memory.  As a side effect, it unmaps the folio and destroys PMD mappings
of the folio.  The folio will be refaulted as PTEs and SIGBUS semantics
are preserved.

However, if the split fails, PMD mappings are preserved and the user will
not receive SIGBUS on any accesses within the PMD.

Unmap the folio on split failure.  It will lead to refault as PTEs and
preserve SIGBUS semantics.

Make an exception for shmem/tmpfs that for long time intentionally mapped
with PMDs across i_size.

Link: https://lkml.kernel.org/r/20251027115636.82382-3-kirill@shutemov.name
Fixes: b9a8a4195c ("truncate,shmem: Handle truncates that split large folios")
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:43 -08:00
Kiryl Shutsemau
74207de2ba mm/memory: do not populate page table entries beyond i_size
Patch series "Fix SIGBUS semantics with large folios", v3.

Accessing memory within a VMA, but beyond i_size rounded up to the next
page size, is supposed to generate SIGBUS.

Darrick reported[1] an xfstests regression in v6.18-rc1.  generic/749
failed due to missing SIGBUS.  This was caused by my recent changes that
try to fault in the whole folio where possible:

        19773df031 ("mm/fault: try to map the entire file folio in finish_fault()")
        357b92761d ("mm/filemap: map entire large folio faultaround")

These changes did not consider i_size when setting up PTEs, leading to
xfstest breakage.

However, the problem has been present in the kernel for a long time -
since huge tmpfs was introduced in 2016.  The kernel happily maps
PMD-sized folios as PMD without checking i_size.  And huge=always tmpfs
allocates PMD-size folios on any writes.

I considered this corner case when I implemented a large tmpfs, and my
conclusion was that no one in their right mind should rely on receiving a
SIGBUS signal when accessing beyond i_size.  I cannot imagine how it could
be useful for the workload.

But apparently filesystem folks care a lot about preserving strict SIGBUS
semantics.

Generic/749 was introduced last year with reference to POSIX, but no real
workloads were mentioned.  It also acknowledged the tmpfs deviation from
the test case.

POSIX indeed says[3]:

        References within the address range starting at pa and
        continuing for len bytes to whole pages following the end of an
        object shall result in delivery of a SIGBUS signal.

The patchset fixes the regression introduced by recent changes as well as
more subtle SIGBUS breakage due to split failure on truncation.


This patch (of 2):

Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are
supposed to generate SIGBUS.

Recent changes attempted to fault in full folio where possible.  They did
not respect i_size, which led to populating PTEs beyond i_size and
breaking SIGBUS semantics.

Darrick reported generic/749 breakage because of this.

However, the problem existed before the recent changes.  With huge=always
tmpfs, any write to a file leads to PMD-size allocation.  Following the
fault-in of the folio will install PMD mapping regardless of i_size.

Fix filemap_map_pages() and finish_fault() to not install:
  - PTEs beyond i_size;
  - PMD mappings across i_size;

Make an exception for shmem/tmpfs that for long time intentionally
mapped with PMDs across i_size.

Link: https://lkml.kernel.org/r/20251027115636.82382-1-kirill@shutemov.name
Link: https://lkml.kernel.org/r/20251027115636.82382-2-kirill@shutemov.name
Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
Fixes: 6795801366 ("xfs: Support large folios")
Reported-by: "Darrick J. Wong" <djwong@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:43 -08:00
Wei Yang
895b4c0c79 fs/proc: fix uaf in proc_readdir_de()
Pde is erased from subdir rbtree through rb_erase(), but not set the node
to EMPTY, which may result in uaf access.  We should use RB_CLEAR_NODE()
set the erased node to EMPTY, then pde_subdir_next() will return NULL to
avoid uaf access.

We found an uaf issue while using stress-ng testing, need to run testcase
getdent and tun in the same time.  The steps of the issue is as follows:

1) use getdent to traverse dir /proc/pid/net/dev_snmp6/, and current
   pde is tun3;

2) in the [time windows] unregister netdevice tun3 and tun2, and erase
   them from rbtree.  erase tun3 first, and then erase tun2.  the
   pde(tun2) will be released to slab;

3) continue to getdent process, then pde_subdir_next() will return
   pde(tun2) which is released, it will case uaf access.

CPU 0                                      |    CPU 1
-------------------------------------------------------------------------
traverse dir /proc/pid/net/dev_snmp6/      |   unregister_netdevice(tun->dev)   //tun3 tun2
sys_getdents64()                           |
  iterate_dir()                            |
    proc_readdir()                         |
      proc_readdir_de()                    |     snmp6_unregister_dev()
        pde_get(de);                       |       proc_remove()
        read_unlock(&proc_subdir_lock);    |         remove_proc_subtree()
                                           |           write_lock(&proc_subdir_lock);
        [time window]                      |           rb_erase(&root->subdir_node, &parent->subdir);
                                           |           write_unlock(&proc_subdir_lock);
        read_lock(&proc_subdir_lock);      |
        next = pde_subdir_next(de);        |
        pde_put(de);                       |
        de = next;    //UAF                |

rbtree of dev_snmp6
                        |
                    pde(tun3)
                     /    \
                  NULL  pde(tun2)

Link: https://lkml.kernel.org/r/20251025024233.158363-1-albin_yang@163.com
Signed-off-by: Wei Yang <albinwyang@tencent.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: wangzijie <wangzijie1@honor.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:43 -08:00
Zi Yan
fa5a061700 mm/huge_memory: preserve PG_has_hwpoisoned if a folio is split to >0 order
folio split clears PG_has_hwpoisoned, but the flag should be preserved in
after-split folios containing pages with PG_hwpoisoned flag if the folio
is split to >0 order folios.  Scan all pages in a to-be-split folio to
determine which after-split folios need the flag.

An alternatives is to change PG_has_hwpoisoned to PG_maybe_hwpoisoned to
avoid the scan and set it on all after-split folios, but resulting false
positive has undesirable negative impact.  To remove false positive,
caller of folio_test_has_hwpoisoned() and folio_contain_hwpoisoned_page()
needs to do the scan.  That might be causing a hassle for current and
future callers and more costly than doing the scan in the split code. 
More details are discussed in [1].

This issue can be exposed via:
1. splitting a has_hwpoisoned folio to >0 order from debugfs interface;
2. truncating part of a has_hwpoisoned folio in
   truncate_inode_partial_folio().

And later accesses to a hwpoisoned page could be possible due to the
missing has_hwpoisoned folio flag.  This will lead to MCE errors.

Link: https://lore.kernel.org/all/CAHbLzkoOZm0PXxE9qwtF4gKR=cpRXrSrJ9V9Pm2DJexs985q4g@mail.gmail.com/ [1]
Link: https://lkml.kernel.org/r/20251023030521.473097-1-ziy@nvidia.com
Fixes: c010d47f10 ("mm: thp: split huge page to any lower order pages")
Signed-off-by: Zi Yan <ziy@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Yang Shi <yang@os.amperecomputing.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Cc: Pankaj Raghav <kernel@pankajraghav.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Luis Chamberalin <mcgrof@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:43 -08:00
Pedro Demarchi Gomes
f5548c318d ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
Currently, scan_get_next_rmap_item() walks every page address in a VMA to
locate mergeable pages.  This becomes highly inefficient when scanning
large virtual memory areas that contain mostly unmapped regions, causing
ksmd to use large amount of cpu without deduplicating much pages.

This patch replaces the per-address lookup with a range walk using
walk_page_range().  The range walker allows KSM to skip over entire
unmapped holes in a VMA, avoiding unnecessary lookups.  This problem was
previously discussed in [1].

Consider the following test program which creates a 32 TiB mapping in the
virtual address space but only populates a single page:

#include <unistd.h>
#include <stdio.h>
#include <sys/mman.h>

/* 32 TiB */
const size_t size = 32ul * 1024 * 1024 * 1024 * 1024;

int main() {
        char *area = mmap(NULL, size, PROT_READ | PROT_WRITE,
                          MAP_NORESERVE | MAP_PRIVATE | MAP_ANON, -1, 0);

        if (area == MAP_FAILED) {
                perror("mmap() failed\n");
                return -1;
        }

        /* Populate a single page such that we get an anon_vma. */
        *area = 0;

        /* Enable KSM. */
        madvise(area, size, MADV_MERGEABLE);
        pause();
        return 0;
}

$ ./ksm-sparse  &
$ echo 1 > /sys/kernel/mm/ksm/run 

Without this patch ksmd uses 100% of the cpu for a long time (more then 1
hour in my test machine) scanning all the 32 TiB virtual address space
that contain only one mapped page.  This makes ksmd essentially deadlocked
not able to deduplicate anything of value.  With this patch ksmd walks
only the one mapped page and skips the rest of the 32 TiB virtual address
space, making the scan fast using little cpu.

Link: https://lkml.kernel.org/r/20251023035841.41406-1-pedrodemargomes@gmail.com
Link: https://lkml.kernel.org/r/20251022153059.22763-1-pedrodemargomes@gmail.com
Link: https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/ [1]
Fixes: 31dbd01f31 ("ksm: Kernel SamePage Merging")
Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
Co-developed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Reported-by: craftfever <craftfever@airmail.cc>
Closes: https://lkml.kernel.org/r/020cf8de6e773bb78ba7614ef250129f11a63781@murena.io
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:42 -08:00
Aleksei Nikiforov
7e76b75e5a mm/kmsan: fix kmsan kmalloc hook when no stack depots are allocated yet
If no stack depot is allocated yet, due to masking out __GFP_RECLAIM flags
kmsan called from kmalloc cannot allocate stack depot.  kmsan fails to
record origin and report issues.  This may result in KMSAN failing to
report issues.

Reusing flags from kmalloc without modifying them should be safe for kmsan.
For example, such chain of calls is possible:
test_uninit_kmalloc -> kmalloc -> __kmalloc_cache_noprof ->
slab_alloc_node -> slab_post_alloc_hook ->
kmsan_slab_alloc -> kmsan_internal_poison_memory.

Only when it is called in a context without flags present should
__GFP_RECLAIM flags be masked.

With this change all kmsan tests start working reliably.

Eric reported:

: Yes, KMSAN seems to be at least partially broken currently.  Besides the
: fact that the kmsan KUnit test is currently failing (which I reported at
: https://lore.kernel.org/r/20250911175145.GA1376@sol), I've confirmed that
: the poly1305 KUnit test causes a KMSAN warning with Aleksei's patch
: applied but does not cause a warning without it.  The warning did get
: reached via syzbot somehow
: (https://lore.kernel.org/r/751b3d80293a6f599bb07770afcef24f623c7da0.1761026343.git.xiaopei01@kylinos.cn/),
: so KMSAN must still work in some cases.  But it didn't work for me.

Link: https://lkml.kernel.org/r/20250930115600.709776-2-aleksei.nikiforov@linux.ibm.com
Link: https://lkml.kernel.org/r/20251022030213.GA35717@sol
Fixes: 97769a53f1 ("mm, bpf: Introduce try_alloc_pages() for opportunistic page allocation")
Signed-off-by: Aleksei Nikiforov <aleksei.nikiforov@linux.ibm.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Tested-by: Eric Biggers <ebiggers@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Ilya Leoshkevich <iii@linux.ibm.com>
Cc: Marco Elver <elver@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:42 -08:00
Kairui Song
fc745ff317 mm/shmem: fix THP allocation and fallback loop
The order check and fallback loop is updating the index value on every
loop.  This will cause the index to be wrongly aligned by a larger value
while the loop shrinks the order.

This may result in inserting and returning a folio of the wrong index and
cause data corruption with some userspace workloads [1].

[kasong@tencent.com: introduce a temporary variable to improve code]
  Link: https://lkml.kernel.org/r/20251023065913.36925-1-ryncsn@gmail.com
  Link: https://lore.kernel.org/linux-mm/CAMgjq7DqgAmj25nDUwwu1U2cSGSn8n4-Hqpgottedy0S6YYeUw@mail.gmail.com/ [1]
Link: https://lkml.kernel.org/r/20251022105719.18321-1-ryncsn@gmail.com
Link: https://lore.kernel.org/linux-mm/CAMgjq7DqgAmj25nDUwwu1U2cSGSn8n4-Hqpgottedy0S6YYeUw@mail.gmail.com/ [1]
Fixes: e7a2ab7b3b ("mm: shmem: add mTHP support for anonymous shmem")
Closes: https://lore.kernel.org/linux-mm/CAMgjq7DqgAmj25nDUwwu1U2cSGSn8n4-Hqpgottedy0S6YYeUw@mail.gmail.com/
Signed-off-by: Kairui Song <kasong@tencent.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:42 -08:00
Pasha Tatashin
fa759cd75b kho: allocate metadata directly from the buddy allocator
KHO allocates metadata for its preserved memory map using the slab
allocator via kzalloc().  This metadata is temporary and is used by the
next kernel during early boot to find preserved memory.

A problem arises when KFENCE is enabled.  kzalloc() calls can be randomly
intercepted by kfence_alloc(), which services the allocation from a
dedicated KFENCE memory pool.  This pool is allocated early in boot via
memblock.

When booting via KHO, the memblock allocator is restricted to a "scratch
area", forcing the KFENCE pool to be allocated within it.  This creates a
conflict, as the scratch area is expected to be ephemeral and
overwriteable by a subsequent kexec.  If KHO metadata is placed in this
KFENCE pool, it leads to memory corruption when the next kernel is loaded.

To fix this, modify KHO to allocate its metadata directly from the buddy
allocator instead of slab.

Link: https://lkml.kernel.org/r/20251021000852.2924827-4-pasha.tatashin@soleen.com
Fixes: fc33e4b44b ("kexec: enable KHO support for memory preservation")
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: David Matlack <dmatlack@google.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Samiullah Khawaja <skhawaja@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:42 -08:00
Pasha Tatashin
a2fff99f92 kho: increase metadata bitmap size to PAGE_SIZE
KHO memory preservation metadata is preserved in 512 byte chunks which
requires their allocation from slab allocator.  Slabs are not safe to be
used with KHO because of kfence, and because partial slabs may lead leaks
to the next kernel.  Change the size to be PAGE_SIZE.

The kfence specifically may cause memory corruption, where it randomly
provides slab objects that can be within the scratch area.  The reason for
that is that kfence allocates its objects prior to KHO scratch is marked
as CMA region.

While this change could potentially increase metadata overhead on systems
with sparsely preserved memory, this is being mitigated by ongoing work to
reduce sparseness during preservation via 1G guest pages.  Furthermore,
this change aligns with future work on a stateless KHO, which will also
use page-sized bitmaps for its radix tree metadata.

Link: https://lkml.kernel.org/r/20251021000852.2924827-3-pasha.tatashin@soleen.com
Fixes: fc33e4b44b ("kexec: enable KHO support for memory preservation")
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Matlack <dmatlack@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Samiullah Khawaja <skhawaja@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:41 -08:00
Pasha Tatashin
e38f65d317 kho: warn and fail on metadata or preserved memory in scratch area
Patch series "KHO: kfence + KHO memory corruption fix", v3.

This series fixes a memory corruption bug in KHO that occurs when KFENCE
is enabled.

The root cause is that KHO metadata, allocated via kzalloc(), can be
randomly serviced by kfence_alloc().  When a kernel boots via KHO, the
early memblock allocator is restricted to a "scratch area".  This forces
the KFENCE pool to be allocated within this scratch area, creating a
conflict.  If KHO metadata is subsequently placed in this pool, it gets
corrupted during the next kexec operation.

Google is using KHO and have had obscure crashes due to this memory
corruption, with stacks all over the place.  I would prefer this fix to be
properly backported to stable so we can also automatically consume it once
we switch to the upstream KHO.

Patch 1/3 introduces a debug-only feature (CONFIG_KEXEC_HANDOVER_DEBUG)
that adds checks to detect and fail any operation that attempts to place
KHO metadata or preserved memory within the scratch area.  This serves as
a validation and diagnostic tool to confirm the problem without affecting
production builds.

Patch 2/3 Increases bitmap to PAGE_SIZE, so buddy allocator can be used.

Patch 3/3 Provides the fix by modifying KHO to allocate its metadata
directly from the buddy allocator instead of slab.  This bypasses the
KFENCE interception entirely.


This patch (of 3):

It is invalid for KHO metadata or preserved memory regions to be located
within the KHO scratch area, as this area is overwritten when the next
kernel is loaded, and used early in boot by the next kernel.  This can
lead to memory corruption.

Add checks to kho_preserve_* and KHO's internal metadata allocators
(xa_load_or_alloc, new_chunk) to verify that the physical address of the
memory does not overlap with any defined scratch region.  If an overlap is
detected, the operation will fail and a WARN_ON is triggered.  To avoid
performance overhead in production kernels, these checks are enabled only
when CONFIG_KEXEC_HANDOVER_DEBUG is selected.

[rppt@kernel.org: fix KEXEC_HANDOVER_DEBUG Kconfig dependency]
  Link: https://lkml.kernel.org/r/aQHUyyFtiNZhx8jo@kernel.org
[pasha.tatashin@soleen.com: build fix]
  Link: https://lkml.kernel.org/r/CA+CK2bBnorfsTymKtv4rKvqGBHs=y=MjEMMRg_tE-RME6n-zUw@mail.gmail.com
Link: https://lkml.kernel.org/r/20251021000852.2924827-1-pasha.tatashin@soleen.com
Link: https://lkml.kernel.org/r/20251021000852.2924827-2-pasha.tatashin@soleen.com
Fixes: fc33e4b44b ("kexec: enable KHO support for memory preservation")
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Mike Rapoport <rppt@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: David Matlack <dmatlack@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Samiullah Khawaja <skhawaja@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:41 -08:00
Zi Yan
77008e1b2e mm/huge_memory: do not change split_huge_page*() target order silently
Page cache folios from a file system that support large block size (LBS)
can have minimal folio order greater than 0, thus a high order folio might
not be able to be split down to order-0.  Commit e220917fa5 ("mm: split
a folio in minimum folio order chunks") bumps the target order of
split_huge_page*() to the minimum allowed order when splitting a LBS
folio.  This causes confusion for some split_huge_page*() callers like
memory failure handling code, since they expect after-split folios all
have order-0 when split succeeds but in reality get min_order_for_split()
order folios and give warnings.

Fix it by failing a split if the folio cannot be split to the target
order.  Rename try_folio_split() to try_folio_split_to_order() to reflect
the added new_order parameter.  Remove its unused list parameter.

[The test poisons LBS folios, which cannot be split to order-0 folios, and
also tries to poison all memory.  The non split LBS folios take more
memory than the test anticipated, leading to OOM.  The patch fixed the
kernel warning and the test needs some change to avoid OOM.]

Link: https://lkml.kernel.org/r/20251017013630.139907-1-ziy@nvidia.com
Fixes: e220917fa5 ("mm: split a folio in minimum folio order chunks")
Signed-off-by: Zi Yan <ziy@nvidia.com>
Reported-by: syzbot+e6367ea2fdab6ed46056@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/68d2c943.a70a0220.1b52b.02b3.GAE@google.com/
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-11-09 21:19:41 -08:00
Bibo Mao
237e74bfa2 LoongArch: KVM: Fix max supported vCPUs set with EIOINTC
VM fails to boot with 256 vCPUs, the detailed command is

  qemu-system-loongarch64 -smp 256

and there is an error reported as follows:

  KVM_LOONGARCH_EXTIOI_INIT_NUM_CPU failed: Invalid argument

There is typo issue in function kvm_eiointc_ctrl_access() when set
max supported vCPUs.

Cc: stable@vger.kernel.org
Fixes: 47256c4c8b ("LoongArch: KVM: Avoid copy_*_user() with lock hold in kvm_eiointc_ctrl_access()")
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:07 +08:00
Bibo Mao
11f340ece4 LoongArch: KVM: Skip PMU checking on vCPU context switch
PMU hardware about VM is switched on VM exit to host rather than vCPU
context sched off, PMU is checked and restored on return to VM. It is
not necessary to check PMU on vCPU context sched on callback, since the
request is made on the VM exit entry or VM PMU CSR access abort routine
already.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:07 +08:00
Bibo Mao
5001bcf86e LoongArch: KVM: Restore guest PMU if it is enabled
On LoongArch system, guest PMU hardware is shared by guest and host but
PMU interrupt is separated. PMU is pass-through to VM, and there is PMU
context switch when exit to host and return to guest.

There is optimiation to check whether PMU is enabled by guest. If not,
it is not necessary to return to guest. However, if it is enabled, PMU
context for guest need switch on. Now KVM_REQ_PMU notification is set
on vCPU context switch, but it is missing if there is no vCPU context
switch while PMU is used by guest VM, so fix it.

Cc: <stable@vger.kernel.org>
Fixes: f4e40ea9f7 ("LoongArch: KVM: Add PMU support for guest")
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:07 +08:00
Bibo Mao
d3c9515e4f LoongArch: KVM: Add delay until timer interrupt injected
When timer is fired in oneshot mode, CSR.TVAL will stop with value -1
rather than 0. However when the register CSR.TVAL is restored, it will
continue to count down rather than stop there.

Now the method is to write 0 to CSR.TVAL, wait to count down for 1 cycle
at least, which is 10ns with a timer freq 100MHz, and then retore timer
interrupt status. Here add 2 cycles delay to assure that timer interrupt
is injected.

With this patch, timer selftest case passes to run always.

Cc: stable@vger.kernel.org
Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:07 +08:00
Bibo Mao
37e9d1a913 LoongArch: KVM: Set page with write attribute if dirty track disabled
With secondary MMU page table, if there is a read page fault, the page's
write attribute will not set even if it is writable from master MMU page
table. This logic only works if dirty tracking is enabled, so page table
should be set with _PAGE_WRITE if dirty tracking is disabled.

It reduces extra page fault on secondary MMU page table if a VM finishes
migration, when the master MMU page table is ready and the secondary MMU
page is fresh.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:07 +08:00
Qiang Ma
62cda5e54f LoongArch: kexec: Print out debugging message if required
When specifying '-d' for kexec_file_load interface, loaded locations of
kernel/initrd/cmdline etc can be printed out to help debug.

Commit eb7622d908 ("kexec_file, riscv: print out debugging message if
required") fixes the same issue on RISC-V.

So, remove kexec_image_info() because the content has been printed out
in generic code.

And on Loongson-3A5000, the printed messages look like below:

 kexec_file: kernel: 00000000d9aad283 kernel_size: 0x2e77f30
 kexec_file(EFI): No LoongArch PE image header.
 kexec_file: Loaded initrd at 0x80000000 bufsz=0x1637cd0 memsz=0x1638000
 kexec_file(ELF): Loaded kernel at 0x9c20000 bufsz=0x27f1800 memsz=0x2950000
 kexec_file: nr_segments = 2
 kexec_file: segment[0]: buf=0x00000000cc3e6c33 bufsz=0x27f1800 mem=0x9c20000 memsz=0x2950000
 kexec_file: segment[1]: buf=0x00000000bb75a541 bufsz=0x1637cd0 mem=0x80000000 memsz=0x1638000
 kexec_file: kexec_file_load: type:0, start:0xb15d000 head:0x18db60002 flags:0x8

Signed-off-by: Qiang Ma <maqianga@uniontech.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:07 +08:00
Youling Tang
df16b8956c LoongArch: kexec: Initialize the kexec_buf structure
The kexec_buf structure was previously declared without initialization.
commit bf454ec31a ("kexec_file: allow to place kexec_buf randomly")
added a field that is always read but not consistently populated by all
architectures. This un-initialized field will contain garbage.

This is also triggering a UBSAN warning when the uninitialized data is
accessed:

        ------------[ cut here ]------------
        UBSAN: invalid-load in ./include/linux/kexec.h:210:10
        load of value 252 is not a valid value for type '_Bool'

Zero-initializing kexec_buf at declaration ensures all fields are
cleanly set, preventing future instances of uninitialized memory being
used.

Fixes: bf454ec31a ("kexec_file: allow to place kexec_buf randomly")
Link: https://lore.kernel.org/r/20250827-kbuf_all-v1-2-1df9882bb01a@debian.org
Signed-off-by: Youling Tang <tangyouling@kylinos.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:07 +08:00
Huacai Chen
eeeeaafa62 LoongArch: Use correct accessor to read FWPC/MWPC
CSR.FWPC and CSR.MWPC are 32bit registers, so use csr_read32() rather
than csr_read64() to read the values of FWPC/MWPC.

Cc: stable@vger.kernel.org
Fixes: edffa33c7b ("LoongArch: Add hardware breakpoints/watchpoints support")
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:06 +08:00
Tiezhu Yang
4c8a7c9827 LoongArch: Refine the init_hw_perf_events() function
(1) Use the existing CPUCFG6_PMNUM_SHIFT macro definition instead of
the magic value 4 to get the PMU number.

(2) Detect the value of PMU bits via CPUCFG instruction according to
the ISA manual instead of hard-coded as 64, because the value may be
different for various micro-architectures.

Link: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_cpucfg
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:06 +08:00
Vishal Moola (Oracle)
17f838512a LoongArch: Remove __GFP_HIGHMEM masking in pud_alloc_one()
Remove the unnecessary __GFP_HIGHMEM masking in pud_alloc_one(), which
was introduced with commit 382739797f ("loongarch: convert various
functions to use ptdescs"). GFP_KERNEL doesn't contain __GFP_HIGHMEM.

Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:06 +08:00
Tianyang Zhang
a073d637c8 LoongArch: Let {pte,pmd}_modify() record the status of _PAGE_DIRTY
Now if the PTE/PMD is dirty with _PAGE_DIRTY but without _PAGE_MODIFIED,
after {pte,pmd}_modify() we lose _PAGE_DIRTY, then {pte,pmd}_dirty()
return false and lead to data loss. This can happen in certain scenarios
such as HW PTW doesn't set _PAGE_MODIFIED automatically, so here we need
_PAGE_MODIFIED to record the dirty status (_PAGE_DIRTY).

The new modification involves checking whether the original PTE/PMD has
the _PAGE_DIRTY flag. If it exists, the _PAGE_MODIFIED bit is also set,
ensuring that the {pte,pmd}_dirty() interface can always return accurate
information.

Cc: stable@vger.kernel.org
Co-developed-by: Liupu Wang <wangliupu@loongson.cn>
Signed-off-by: Liupu Wang <wangliupu@loongson.cn>
Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn>
2025-11-10 08:37:06 +08:00
Huacai Chen
ce5ad03e45 LoongArch: Consolidate max_pfn & max_low_pfn calculation
Now there 5 places which calculate max_pfn & max_low_pfn:
1. in fdt_setup() for FDT systems;
2. in memblock_init() for ACPI systems;
3. in init_numa_memory() for NUMA systems;
4. in arch_mem_init() to recalculate for "mem=" cmdline;
5. in paging_init() to recalculate for NUMA systems.

Since memblock_init() is called both for ACPI and FDT systems, move the
calculation out of the for_each_efi_memory_desc() loop can eliminate the
first case. The last case is very questionable (may be derived from the
MIPS/Loongson code) and breaks the "mem=" cmdline, so should be removed.
And then the NUMA version of paging_init() can be also eliminated.

After consolidation there are 3 places of calculation:
1. in memblock_init() for both ACPI and FDT systems;
2. in init_numa_memory() to recalculate for NUMA systems;
3. in arch_mem_init() to recalculate for the "mem=" cmdline.

For all cases the calculation is:
max_pfn = PFN_DOWN(memblock_end_of_DRAM());
max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn);

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:06 +08:00
Huacai Chen
43a9e6a10b LoongArch: Consolidate early_ioremap()/ioremap_prot()
1. Use phys_addr_t instead of u64, which can work for both 32/64 bits.
2. Check whether the input physical address is above TO_PHYS_MASK (and
   return NULL if yes) for the DMW version.

Note: In theory early_ioremap() also need the TO_PHYS_MASK checking, but
the UEFI BIOS pass some DMW virtual addresses.

Cc: stable@vger.kernel.org
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:06 +08:00
Huacai Chen
4e67526840 LoongArch: Use physical addresses for CSR_MERRENTRY/CSR_TLBRENTRY
Now we use virtual addresses to fill CSR_MERRENTRY/CSR_TLBRENTRY, but
hardware hope physical addresses. Now it works well because the high
bits are ignored above PA_BITS (48 bits), but explicitly use physical
addresses can avoid potential bugs. So fix it.

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:06 +08:00
Huacai Chen
f28abb9f96 LoongArch: Clarify 3 MSG interrupt features
LoongArch's MSG interrupt features are used across multiple subsystems.
Clarify these features to avoid misuse, existing users will be adjusted
if necessary.

MSGINT: Infrastructure, means the CPU core supports message interupts.
Indicated by CPUCFG1.MSGINT.

AVECINT: AVEC interrupt controller based on MSGINT, means the CPU chip
supports direct message interrupts. Indicated by IOCSR.FEATURES.DMSI.

REDIRECTINT: REDIRECT interrupt controller based on MSGINT and AVECINT,
means the CPU chip supports redirect message interrupts. Indicated by
IOCSR.FEATURES.RMSI.

For example:
Loongson-3A5000/3C5000 doesn't support MSGINT/AVECINT/REDIRECTINT;
Loongson-3A6000 supports MSGINT but doesn't support AVECINT/REDIRECTINT;
Loongson-3C6000 supports MSGINT/AVECINT/REDIRECTINT.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:06 +08:00
Xi Ruoyao
fe4b3a34e9 rust: Add -fno-isolate-erroneous-paths-dereference to bindgen_skip_c_flags
It's used to work around an objtool issue since commit abb2a55722
("LoongArch: Add cflag -fno-isolate-erroneous-paths-dereference"), but
it's then passed to bindgen and cause an error because Clang does not
have this option.

Fixes: abb2a55722 ("LoongArch: Add cflag -fno-isolate-erroneous-paths-dereference")
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Tested-by: Mingcong Bai <jeffbai@aosc.io>
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2025-11-10 08:37:06 +08:00
Joshua Rogers
98a5fd31cb ksmbd: close accepted socket when per-IP limit rejects connection
When the per-IP connection limit is exceeded in ksmbd_kthread_fn(),
the code sets ret = -EAGAIN and continues the accept loop without
closing the just-accepted socket. That leaks one socket per rejected
attempt from a single IP and enables a trivial remote DoS.

Release client_sk before continuing.

This bug was found with ZeroPath.

Cc: stable@vger.kernel.org
Signed-off-by: Joshua Rogers <linux@joshua.hu>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2025-11-09 17:47:52 -06:00
Joshua Rogers
e904d81ad1 smb: server: rdma: avoid unmapping posted recv on accept failure
smb_direct_prepare_negotiation() posts a recv and then, if
smb_direct_accept_client() fails, calls put_recvmsg() on the same
buffer. That unmaps and recycles a buffer that is still posted on
the QP., which can lead to device DMA into unmapped or reused memory.

Track whether the recv was posted and only return it if it was never
posted. If accept fails after a post, leave it for teardown to drain
and complete safely.

Signed-off-by: Joshua Rogers <linux@joshua.hu>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
2025-11-09 17:47:49 -06:00
Edward Adam Davis
e8c73eb7db cifs: client: fix memory leak in smb3_fs_context_parse_param
The user calls fsconfig twice, but when the program exits, free() only
frees ctx->source for the second fsconfig, not the first.
Regarding fc->source, there is no code in the fs context related to its
memory reclamation.

To fix this memory leak, release the source memory corresponding to ctx
or fc before each parsing.

syzbot reported:
BUG: memory leak
unreferenced object 0xffff888128afa360 (size 96):
  backtrace (crc 79c9c7ba):
    kstrdup+0x3c/0x80 mm/util.c:84
    smb3_fs_context_parse_param+0x229b/0x36c0 fs/smb/client/fs_context.c:1444

BUG: memory leak
unreferenced object 0xffff888112c7d900 (size 96):
  backtrace (crc 79c9c7ba):
    smb3_fs_context_fullpath+0x70/0x1b0 fs/smb/client/fs_context.c:629
    smb3_fs_context_parse_param+0x2266/0x36c0 fs/smb/client/fs_context.c:1438

Reported-by: syzbot+72afd4c236e6bc3f4bac@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=72afd4c236e6bc3f4bac
Cc: stable@vger.kernel.org
Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.org>
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2025-11-09 17:30:17 -06:00
Henrique Carvalho
79280191c2 smb: client: fix cifs_pick_channel when channel needs reconnect
cifs_pick_channel iterates candidate channels using cur. The
reconnect-state test mistakenly used a different variable.

This checked the wrong slot and would cause us to skip a healthy channel
and to dispatch on one that needs reconnect, occasionally failing
operations when a channel was down.

Fix by replacing for the correct variable.

Fixes: fc43a8ac39 ("cifs: cifs_pick_channel should try selecting active channels")
Cc: stable@vger.kernel.org
Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Henrique Carvalho <henrique.carvalho@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
2025-11-09 17:30:17 -06:00
Haotian Zhang
9b07cdf86a pinctrl: cirrus: Fix fwnode leak in cs42l43_pin_probe()
The driver calls fwnode_get_named_child_node() which takes a reference
on the child node, but never releases it, which causes a reference leak.

Fix by using devm_add_action_or_reset() to automatically release the
reference when the device is removed.

Fixes: d5282a5392 ("pinctrl: cs42l43: Add support for the cs42l43")
Suggested-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Haotian Zhang <vulab@iscas.ac.cn>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
2025-11-10 00:01:22 +01:00
Baojun Xu
7a39c723b7 ALSA: hda/tas2781: Add new quirk for HP new projects
Add new vendor_id and subsystem_id in quirk for HP new projects.

Signed-off-by: Baojun Xu <baojun.xu@ti.com>
Link: https://patch.msgid.link/20251108142325.2563-1-baojun.xu@ti.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2025-11-09 09:52:59 +01:00
Yosry Ahmed
8a4821412c KVM: nSVM: Fix and simplify LBR virtualization handling with nested
The current scheme for handling LBRV when nested is used is very
complicated, especially when L1 does not enable LBRV (i.e. does not set
LBR_CTL_ENABLE_MASK).

To avoid copying LBRs between VMCB01 and VMCB02 on every nested
transition, the current implementation switches between using VMCB01 or
VMCB02 as the source of truth for the LBRs while L2 is running. If L2
enables LBR, VMCB02 is used as the source of truth. When L2 disables
LBR, the LBRs are copied to VMCB01 and VMCB01 is used as the source of
truth. This introduces significant complexity, and incorrect behavior in
some cases.

For example, on a nested #VMEXIT, the LBRs are only copied from VMCB02
to VMCB01 if LBRV is enabled in VMCB01. This is because L2's writes to
MSR_IA32_DEBUGCTLMSR to enable LBR are intercepted and propagated to
VMCB01 instead of VMCB02. However, LBRV is only enabled in VMCB02 when
L2 is running.

This means that if L2 enables LBR and exits to L1, the LBRs will not be
propagated from VMCB02 to VMCB01, because LBRV is disabled in VMCB01.

There is no meaningful difference in CPUID rate in L2 when copying LBRs
on every nested transition vs. the current approach, so do the simple
and correct thing and always copy LBRs between VMCB01 and VMCB02 on
nested transitions (when LBRV is disabled by L1). Drop the conditional
LBRs copying in __svm_{enable/disable}_lbrv() as it is now unnecessary.

VMCB02 becomes the only source of truth for LBRs when L2 is running,
regardless of LBRV being enabled by L1, drop svm_get_lbr_vmcb() and use
svm->vmcb directly in its place.

Fixes: 1d5a1b5860 ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running")
Cc: stable@vger.kernel.org
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Link: https://patch.msgid.link/20251108004524.1600006-4-yosry.ahmed@linux.dev
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2025-11-09 08:50:13 +01:00
Yosry Ahmed
fbe5e5f030 KVM: nSVM: Always recalculate LBR MSR intercepts in svm_update_lbrv()
svm_update_lbrv() is called when MSR_IA32_DEBUGCTLMSR is updated, and on
nested transitions where LBRV is used. It checks whether LBRV enablement
needs to be changed in the current VMCB, and if it does, it also
recalculate intercepts to LBR MSRs.

However, there are cases where intercepts need to be updated even when
LBRV enablement doesn't. Example scenario:
- L1 has MSR_IA32_DEBUGCTLMSR cleared.
- L1 runs L2 without LBR_CTL_ENABLE (no LBRV).
- L2 sets DEBUGCTLMSR_LBR in MSR_IA32_DEBUGCTLMSR, svm_update_lbrv()
  sets LBR_CTL_ENABLE in VMCB02 and disables intercepts to LBR MSRs.
- L2 exits to L1, svm_update_lbrv() is not called on this transition.
- L1 clears MSR_IA32_DEBUGCTLMSR, svm_update_lbrv() finds that
  LBR_CTL_ENABLE is already cleared in VMCB01 and does nothing.
- Intercepts remain disabled, L1 reads to LBR MSRs read the host MSRs.

Fix it by always recalculating intercepts in svm_update_lbrv().

Fixes: 1d5a1b5860 ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running")
Cc: stable@vger.kernel.org
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Link: https://patch.msgid.link/20251108004524.1600006-3-yosry.ahmed@linux.dev
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2025-11-09 08:49:52 +01:00
Yosry Ahmed
dc55b3c3f6 KVM: SVM: Mark VMCB_LBR dirty when MSR_IA32_DEBUGCTLMSR is updated
The APM lists the DbgCtlMsr field as being tracked by the VMCB_LBR clean
bit.  Always clear the bit when MSR_IA32_DEBUGCTLMSR is updated.

The history is complicated, it was correctly cleared for L1 before
commit 1d5a1b5860 ("KVM: x86: nSVM: correctly virtualize LBR msrs when
L2 is running").  At that point svm_set_msr() started to rely on
svm_update_lbrv() to clear the bit, but when nested virtualization
is enabled the latter does not always clear it even if MSR_IA32_DEBUGCTLMSR
changed. Go back to clearing it directly in svm_set_msr().

Fixes: 1d5a1b5860 ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running")
Reported-by: Matteo Rizzo <matteorizzo@google.com>
Reported-by: evn@google.com
Co-developed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Link: https://patch.msgid.link/20251108004524.1600006-2-yosry.ahmed@linux.dev
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2025-11-09 08:49:48 +01:00
Paolo Bonzini
ca00c3af8e Merge tag 'kvmarm-fixes-6.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm654 fixes for 6.18, take #2

* Core fixes

  - Fix trapping regression when no in-kernel irqchip is present
    (20251021094358.1963807-1-sascha.bischoff@arm.com)

  - Check host-provided, untrusted ranges and offsets in pKVM
    (20251016164541.3771235-1-vdonnefort@google.com)
    (20251017075710.2605118-1-sebastianene@google.com)

  - Fix regression restoring the ID_PFR1_EL1 register
    (20251030122707.2033690-1-maz@kernel.org

  - Fix vgic ITS locking issues when LPIs are not directly injected
    (20251107184847.1784820-1-oupton@kernel.org)

* Test fixes

  - Correct target CPU programming in vgic_lpi_stress selftest
    (20251020145946.48288-1-mdittgen@amazon.de)

  - Fix exposure of SCTLR2_EL2 and ZCR_EL2 in get-reg-list selftest
    (20251023-b4-kvm-arm64-get-reg-list-sctlr-el2-v1-1-088f88ff992a@kernel.org)
    (20251024-kvm-arm64-get-reg-list-zcr-el2-v1-1-0cd0ff75e22f@kernel.org)

* Misc

  - Update Oliver's email address
    (20251107012830.1708225-1-oupton@kernel.org)
2025-11-09 08:07:55 +01:00
Paolo Bonzini
0e5ba55750 Merge tag 'kvm-x86-fixes-6.18-rc5' of https://github.com/kvm-x86/linux into HEAD
KVM x86 fixes for 6.18:

 - Inject #UD if the guest attempts to execute SEAMCALL or TDCALL as KVM
   doesn't support virtualization the instructions, but the instructions
   are gated only by VMXON, i.e. will VM-Exit instead of taking a #UD and
   thus result in KVM exiting to userspace with an emulation error.

 - Unload the "FPU" when emulating INIT of XSTATE features if and only if
   the FPU is actually loaded, instead of trying to predict when KVM will
   emulate an INIT (CET support missed the MP_STATE path).  Add sanity
   checks to detect and harden against similar bugs in the future.

 - Unregister KVM's GALog notifier (for AVIC) when kvm-amd.ko is unloaded.

 - Use a raw spinlock for svm->ir_list_lock as the lock is taken during
   schedule(), and "normal" spinlocks are sleepable locks when PREEMPT_RT=y.

 - Remove guest_memfd bindings on memslot deletion when a gmem file is dying
   to fix a use-after-free race found by syzkaller.

 - Fix a goof in the EPT Violation handler where KVM checks the wrong
   variable when determining if the reported GVA is valid.
2025-11-09 08:07:32 +01:00
Paolo Bonzini
36567f1de1 Merge tag 'kvm-riscv-fixes-6.18-2' of https://github.com/kvm-riscv/linux into HEAD
KVM/riscv fixes for 6.18, take #2

- Fix check for local interrupts on riscv32
- Read HGEIP CSR on the correct cpu when checking for IMSIC interrupts
- Remove automatic I/O mapping from kvm_arch_prepare_memory_region()
2025-11-09 08:07:03 +01:00
Hamza Mahfooz
e6965188f8 scsi: target: tcm_loop: Fix segfault in tcm_loop_tpg_address_show()
If the allocation of tl_hba->sh fails in tcm_loop_driver_probe() and we
attempt to dereference it in tcm_loop_tpg_address_show() we will get a
segfault, see below for an example. So, check tl_hba->sh before
dereferencing it.

  Unable to allocate struct scsi_host
  BUG: kernel NULL pointer dereference, address: 0000000000000194
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 0 P4D 0
  Oops: 0000 [#1] PREEMPT SMP NOPTI
  CPU: 1 PID: 8356 Comm: tokio-runtime-w Not tainted 6.6.104.2-4.azl3 #1
  Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 09/28/2024
  RIP: 0010:tcm_loop_tpg_address_show+0x2e/0x50 [tcm_loop]
...
  Call Trace:
   <TASK>
   configfs_read_iter+0x12d/0x1d0 [configfs]
   vfs_read+0x1b5/0x300
   ksys_read+0x6f/0xf0
...

Cc: stable@vger.kernel.org
Fixes: 2628b352c3 ("tcm_loop: Show address of tpg in configfs")
Signed-off-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Allen Pais <apais@linux.microsoft.com>
Link: https://patch.msgid.link/1762370746-6304-1-git-send-email-hamzamahfooz@linux.microsoft.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2025-11-08 12:30:02 -05:00
Oliver Upton
4af235bf64 MAINTAINERS: Switch myself to using kernel.org address
I've been running into issues with the linux.dev email
semi-periodically, switching to my kernel.org address while I go figure
out a better home for my inbox.

Signed-off-by: Oliver Upton <oupton@kernel.org>
Link: https://patch.msgid.link/20251107012830.1708225-1-oupton@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-11-08 11:21:20 +00:00
Oliver Upton
66768669f2 KVM: arm64: vgic-v3: Release reserved slot outside of lpi_xa's lock
xa_release() expects to be called outside of the xa_lock. Fix
vgic_add_lpi() to drop the lock before calling and restructure to get
rid of the goto label.

Reported-by: Zenghui Yu <yuzenghui@huawei.com>
Closes: https://lore.kernel.org/kvmarm/d0853e82-7d95-5025-7abf-c6f1e0cdf7b5@huawei.com/
Fixes: 481c9ee846 ("KVM: arm64: vgic-its: Get rid of the lpi_list_lock")
Signed-off-by: Oliver Upton <oupton@kernel.org>
Link: https://patch.msgid.link/20251107184847.1784820-3-oupton@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-11-08 11:19:32 +00:00
Oliver Upton
75360a9a33 KVM: arm64: vgic-v3: Reinstate IRQ lock ordering for LPI xarray
Zenghui reports that running a KVM guest with an assigned device and
lockdep enabled produces an unfriendly splat due to an inconsistent irq
context when taking the lpi_xa's spinlock.

This is no good as in rare cases the last reference to an LPI can get
dropped after injection of a cached LPI translation. In this case,
vgic_put_irq() will release the IRQ struct and take the lpi_xa's
spinlock to erase it from the xarray.

Reinstate the IRQ ordering and update the lockdep hint accordingly. Note
that there is no irqsave equivalent of might_lock(), so just explictly
grab and release the spinlock on lockdep kernels.

Reported-by: Zenghui Yu <yuzenghui@huawei.com>
Closes: https://lore.kernel.org/kvmarm/b4d7cb0f-f007-0b81-46d1-998b15cc14bc@huawei.com/
Fixes: 982f31bbb5 ("KVM: arm64: vgic-v3: Don't require IRQs be disabled for LPI xarray lock")
Signed-off-by: Oliver Upton <oupton@kernel.org>
Link: https://patch.msgid.link/20251107184847.1784820-2-oupton@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-11-08 11:19:32 +00:00
Marc Zyngier
50e7cce81b KVM: arm64: Limit clearing of ID_{AA64PFR0,PFR1}_EL1.GIC to userspace irqchip
Now that the idreg's GIC field is in sync with the irqchip, limit
the runtime clearing of these fields to the pathological case where
we do not have an in-kernel GIC.

While we're at it, use the existing API instead of open-coded
accessors to access the ID regs.

Fixes: 5cb57a1aff ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest")
Reviewed-by: Oliver Upton <oupton@kernel.org>
Link: https://patch.msgid.link/20251030122707.2033690-4-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-11-08 11:17:28 +00:00
Marc Zyngier
8a9866ff86 KVM: arm64: Set ID_{AA64PFR0,PFR1}_EL1.GIC when GICv3 is configured
Drive the idreg fields indicating the presence of GICv3 directly from
the vgic code. This avoids having to do any sort of runtime clearing
of the idreg.

Fixes: 5cb57a1aff ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest")
Reviewed-by: Oliver Upton <oupton@kernel.org>
Link: https://patch.msgid.link/20251030122707.2033690-3-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-11-08 11:17:28 +00:00
Marc Zyngier
3f9eacf4f0 KVM: arm64: Make all 32bit ID registers fully writable
32bit ID registers aren't getting much love these days, and are
often missed in updates. One of these updates broke restoring
a GICv2 guest on a GICv3 machine.

Instead of performing a piecemeal fix, just bite the bullet
and make all 32bit ID regs fully writable. KVM itself never
relies on them for anything, and if the VMM wants to mess up
the guest, so be it.

Fixes: 5cb57a1aff ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest")
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Cc: stable@vger.kernel.org
Reviewed-by: Oliver Upton <oupton@kernel.org>
Link: https://patch.msgid.link/20251030122707.2033690-2-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-11-08 11:17:28 +00:00
Horatiu Vultur
96a9178a29 net: phy: micrel: lan8814 fix reset of the QSGMII interface
The lan8814 is a quad-phy and it is using QSGMII towards the MAC.
The problem is that everytime when one of the ports is configured then
the PCS is reseted for all the PHYs. Meaning that the other ports can
loose traffic until the link is establish again.
To fix this, do the reset one time for the entire PHY package.

Fixes: ece1950283 ("net: phy: micrel: 1588 support for LAN8814 phy")
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Divya Koppera <Divya.Koppera@microchip.com >
Link: https://patch.msgid.link/20251106090637.2030625-1-horatiu.vultur@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-07 19:00:38 -08:00
Wei Fang
ad17e7e92a net: fec: correct rx_bytes statistic for the case SHIFT16 is set
Two additional bytes in front of each frame received into the RX FIFO if
SHIFT16 is set, so we need to subtract the extra two bytes from pkt_len
to correct the statistic of rx_bytes.

Fixes: 3ac72b7b63 ("net: fec: align IP header in hardware")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20251106021421.2096585-1-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-07 18:55:27 -08:00
Alexander Sverdlin
57531b3416 selftests: net: local_termination: Wait for interfaces to come up
It seems that most of the tests prepare the interfaces once before the test
run (setup_prepare()), rely on setup_wait() to wait for link and only then
run the test(s).

local_termination brings the physical interfaces down and up during test
run but never wait for them to come up. If the auto-negotiation takes
some seconds, first test packets are being lost, which leads to
false-negative test results.

Use setup_wait() in run_test() to make sure auto-negotiation has been
completed after all simple_if_init() calls on physical interfaces and test
packets will not be lost because of the race against link establishment.

Fixes: 90b9566aa5 ("selftests: forwarding: add a test for local_termination.sh")
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
Link: https://patch.msgid.link/20251106161213.459501-1-alexander.sverdlin@siemens.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-07 18:46:36 -08:00
Nate Karstens
4da4e4bde1 strparser: Fix signed/unsigned mismatch bug
The `len` member of the sk_buff is an unsigned int. This is cast to
`ssize_t` (a signed type) for the first sk_buff in the comparison,
but not the second sk_buff. On 32-bit systems, this can result in
an integer underflow for certain values because unsigned arithmetic
is being used.

This appears to be an oversight: if the intention was to use unsigned
arithmetic, then the first cast would have been omitted. The change
ensures both len values are cast to `ssize_t`.

The underflow causes an issue with ktls when multiple TLS PDUs are
included in a single TCP segment. The mainline kernel does not use
strparser for ktls anymore, but this is still useful for other
features that still use strparser, and for backporting.

Signed-off-by: Nate Karstens <nate.karstens@garmin.com>
Cc: stable@vger.kernel.org
Fixes: 43a0c6751a ("strparser: Stream parser for messages")
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Link: https://patch.msgid.link/20251106222835.1871628-1-nate.karstens@garmin.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-07 18:17:16 -08:00
Nathan Chancellor
dc20452e6c riscv: Fix CONFIG_AS_HAS_INSN for new .insn usage
After commit 44aa25c000 ("riscv: asm: use .insn for making custom
instructions"), builds using LLVM older that 19 or binutils older than
2.38 fail with:

  arch/riscv/include/asm/vdso/processor.h: Assembler messages:
  arch/riscv/include/asm/vdso/processor.h:27: Error: unrecognized opcode `0x100000f'
  arch/riscv/include/asm/vdso/processor.h:27: Error: unrecognized opcode `0x100000f'
  arch/riscv/include/asm/vdso/processor.h:27: Error: unrecognized opcode `0x100000f'
  arch/riscv/include/asm/vdso/processor.h:27: Error: unrecognized opcode `0x100000f'
  make[4]: *** [scripts/Makefile.build:287: arch/riscv/kernel/vdso/vgettimeofday.o] Error 1

  In file included from <built-in>:4:
  In file included from lib/vdso/gettimeofday.c:6:
  In file included from include/vdso/datapage.h:21:
  In file included from include/vdso/processor.h:10:
  arch/riscv/include/asm/vdso/processor.h:23:2: error: expected instruction format
     23 |         ALT_RISCV_PAUSE();
        |         ^
  arch/riscv/include/asm/errata_list.h:47:3: note: expanded from macro 'ALT_RISCV_PAUSE'
     47 |                 RISCV_PAUSE, /* Original RISC‑V pause insn */ \
        |                 ^
  arch/riscv/include/asm/insn-def.h:259:21: note: expanded from macro 'RISCV_PAUSE'
    259 | #define RISCV_PAUSE     ASM_INSN_I("0x100000f")
        |                         ^
  arch/riscv/include/asm/asm.h:16:26: note: expanded from macro 'ASM_INSN_I'
     16 | #define ASM_INSN_I(__x) ".insn " __x
        |                          ^
  <inline asm>:5:7: note: instantiated into assembly here
      5 | .insn 0x100000f
        |       ^

binutils gained support for '.insn <value>' in 2.38 [1] and LLVM gained
support in 19 [2]. Adjust the test for CONFIG_AS_HAS_INSN to ensure that
all versions of .insn are supported before being used.

Fixes: 44aa25c000 ("riscv: asm: use .insn for making custom instructions")
Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=a262b82fdbf4cda3b0648b1adc32245ca3f78b7a [1]
Link: 2a086dce69 [2]
Suggested-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Link: https://patch.msgid.link/20251107-riscv-fix-new-insn-usage-v1-1-9a186c5928a0@kernel.org
Signed-off-by: Paul Walmsley <pjw@kernel.org>
2025-11-07 17:39:07 -07:00
Feng Jiang
5e8632987d riscv: Remove redundant judgment for the default build target
The value of KBUILD_IMAGE is derived from $(boot-image-y),
so there's no need for redundant checks before this.

Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
Reviewed-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
Link: https://lore.kernel.org/r/20251029094429.553842-2-jiangfeng@kylinos.cn
Signed-off-by: Paul Walmsley <pjw@kernel.org>
2025-11-07 17:39:07 -07:00
Feng Jiang
3ad1b71fdc riscv: Build loader.bin exclusively for Canaan K210
According to the explanation in commit ef10bdf9c3 ("riscv:
Kconfig.socs: Split ARCH_CANAAN and SOC_CANAAN_K210"),
loader.bin is a special feature of the Canaan K210 and
is not applicable to other SoCs.

Fixes: e79dfcbfb9 ("riscv: make image compression configurable")
Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
Reviewed-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
Link: https://lore.kernel.org/r/20251029094429.553842-1-jiangfeng@kylinos.cn
Signed-off-by: Paul Walmsley <pjw@kernel.org>
2025-11-07 17:39:06 -07:00
Kaushlendra Kumar
4b93d211bb ACPI: MRRM: Fix memory leaks and improve error handling
Add proper error handling and resource cleanup to prevent memory leaks
in add_boot_memory_ranges(). The function now checks for NULL return
from kobject_create_and_add(), uses local buffer for range names to
avoid dynamic allocation, and implements a cleanup path that removes
previously created sysfs groups and kobjects on failure.

This prevents resource leaks when kobject creation or sysfs group
creation fails during boot memory range initialization.

Signed-off-by: Kaushlendra Kumar <kaushlendra.kumar@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://patch.msgid.link/20251030023228.3956296-1-kaushlendra.kumar@intel.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2025-11-07 21:48:49 +01:00
Shubhrajyoti Datta
2cf95b9baa EDAC/versalnet: Handle split messages for non-standard errors
The current code assumes that only DDR errors have split messages.  Ensure
proper logging of non-standard event errors that may be split across multiple
messages too.

  [ bp: Massage, move comment too, fix it up. ]

Fixes: d5fe2fec6c ("EDAC: Add a driver for the AMD Versal NET DDR controller")
Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://patch.msgid.link/20251023113108.3467132-1-shubhrajyoti.datta@amd.com
2025-11-07 20:15:14 +01:00
Gautham R. Shenoy
0fce758706 ACPI: CPPC: Limit perf ctrs in PCC check only to online CPUs
per_cpu(cpc_desc_ptr, cpu) object is initialized for only the online
CPU via acpi_soft_cpu_online() --> __acpi_processor_start() -->
acpi_cppc_processor_probe().

However the function cppc_perf_ctrs_in_pcc() checks if the CPPC
perf-ctrs are in a PCC region for all the present CPUs, which breaks
when the kernel is booted with "nosmt=force".

Hence, limit the check only to the online CPUs.

Fixes: ae2df912d1 ("ACPI: CPPC: Disable FIE if registers in PCC regions")
Reviewed-by: "Mario Limonciello (AMD) (kernel.org)" <superm1@kernel.org>
Signed-off-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Link: https://patch.msgid.link/20251107074145.2340-5-gautham.shenoy@amd.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2025-11-07 18:37:42 +01:00
Gautham R. Shenoy
8821c8e80a ACPI: CPPC: Perform fast check switch only for online CPUs
per_cpu(cpc_desc_ptr, cpu) object is initialized for only the online
CPUs via acpi_soft_cpu_online() --> __acpi_processor_start() -->
acpi_cppc_processor_probe().

However the function cppc_allow_fast_switch() checks for the validity
of the _CPC object for all the present CPUs. This breaks when the
kernel is booted with "nosmt=force".

Check fast_switch capability only on online CPUs

Fixes: 15eece6c5b ("ACPI: CPPC: Fix NULL pointer dereference when nosmp is used")
Reviewed-by: "Mario Limonciello (AMD) (kernel.org)" <superm1@kernel.org>
Signed-off-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Link: https://patch.msgid.link/20251107074145.2340-4-gautham.shenoy@amd.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2025-11-07 18:37:42 +01:00
Gautham R. Shenoy
6dd3b8a709 ACPI: CPPC: Check _CPC validity for only the online CPUs
per_cpu(cpc_desc_ptr, cpu) object is initialized for only the online
CPUs via acpi_soft_cpu_online() --> __acpi_processor_start() -->
acpi_cppc_processor_probe().

However the function acpi_cpc_valid() checks for the validity of the
_CPC object for all the present CPUs. This breaks when the kernel is
booted with "nosmt=force".

Hence check the validity of the _CPC objects of only the online CPUs.

Fixes: 2aeca6bd02 ("ACPI: CPPC: Check present CPUs for determining _CPC is valid")
Reported-by: Christopher Harris <chris.harris79@gmail.com>
Closes: https://lore.kernel.org/lkml/CAM+eXpdDT7KjLV0AxEwOLkSJ2QtrsvGvjA2cCHvt1d0k2_C4Cw@mail.gmail.com/
Suggested-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: "Mario Limonciello (AMD) (kernel.org)" <superm1@kernel.org>
Tested-by: Chrisopher Harris <chris.harris79@gmail.com>
Signed-off-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Link: https://patch.msgid.link/20251107074145.2340-3-gautham.shenoy@amd.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2025-11-07 18:37:42 +01:00
Gautham R. Shenoy
4fe5934db4 ACPI: CPPC: Detect preferred core availability on online CPUs
Commit 279f838a61 ("x86/amd: Detect preferred cores in
amd_get_boost_ratio_numerator()") introduced the ability to detect the
preferred core on AMD platforms by checking if there at least two
distinct highest_perf values.

However, it uses for_each_present_cpu() to iterate through all the
CPUs in the platform, which is problematic when the kernel is booted
with "nosmt=force" commandline option.

Hence limit the search to only the online CPUs.

Fixes: 279f838a61 ("x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()")
Reported-by: Christopher Harris <chris.harris79@gmail.com>
Closes: https://lore.kernel.org/lkml/CAM+eXpdDT7KjLV0AxEwOLkSJ2QtrsvGvjA2cCHvt1d0k2_C4Cw@mail.gmail.com/
Reviewed-by: "Mario Limonciello (AMD) (kernel.org)" <superm1@kernel.org>
Tested-by: Chrisopher Harris <chris.harris79@gmail.com>
Signed-off-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Link: https://patch.msgid.link/20251107074145.2340-2-gautham.shenoy@amd.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2025-11-07 18:37:42 +01:00
Dave Jiang
b6cfddd26e cxl: Adjust offset calculation for poison injection
The HPA to DPA translation for poison injection assumes that the
base address starts from where the CXL region begins. When the
extended linear cache is active, the offset can be within the DRAM
region. Adjust the offset so that it correctly reflects the offset
within the CXL region.

[ dj: Add fixes tag from Alison ]

Fixes: c3dd67681c ("cxl/region: Add inject and clear poison by region offset")
Link: https://patch.msgid.link/20251031173224.3537030-5-dave.jiang@intel.com
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-11-07 09:22:10 -07:00
Mario Limonciello (AMD)
0b6c10cb84 PM: hibernate: Fix style issues in save_compressed_image()
Address two issues indicated by checkpatch:

 - Trailing statements should be on next line.
 - Prefer 'unsigned int' to bare use of 'unsigned'.

Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
[ rjw: Changelog edits ]
Link: https://patch.msgid.link/20251106045158.3198061-4-superm1@kernel.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2025-11-07 16:53:57 +01:00
Mario Limonciello (AMD)
66ededc694 PM: hibernate: Use atomic64_t for compressed_size variable
`compressed_size` can overflow, showing nonsensical values.

Change from `atomic_t` to `atomic64_t` to prevent overflow.

Fixes: a06c6f5d3c ("PM: hibernate: Move to crypto APIs for LZO compression")
Reported-by: Askar Safin <safinaskar@gmail.com>
Closes: https://lore.kernel.org/linux-pm/20251105180506.137448-1-safinaskar@gmail.com/
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
Tested-by: Askar Safin <safinaskar@gmail.com>
Cc: 6.9+ <stable@vger.kernel.org> # 6.9+
Link: https://patch.msgid.link/20251106045158.3198061-3-superm1@kernel.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2025-11-07 16:53:56 +01:00
Mario Limonciello (AMD)
62b9ca1706 PM: hibernate: Emit an error when image writing fails
If image writing fails, a return code is passed up to the caller, but
none of the callers log anything to the log and so the only record
of it is the return code that userspace gets.

Adjust the logging so that the image size and speed of writing is
only emitted on success and if there is an error, it's saved to the
logs.

Fixes: a06c6f5d3c ("PM: hibernate: Move to crypto APIs for LZO compression")
Reported-by: Askar Safin <safinaskar@gmail.com>
Closes: https://lore.kernel.org/linux-pm/20251105180506.137448-1-safinaskar@gmail.com/
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
Tested-by: Askar Safin <safinaskar@gmail.com>
Cc: 6.9+ <stable@vger.kernel.org> # 6.9+
[ rjw: Added missing braces after "else", changelog edits ]
Link: https://patch.msgid.link/20251106045158.3198061-2-superm1@kernel.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2025-11-07 16:53:56 +01:00
Adrian Barnaś
8e8ae78896 arm64: Reject modules with internal alternative callbacks
During module loading, check if a callback function used by the
alternatives specified in the '.altinstruction' ELF section (if present)
is located in core kernel .text. If not fail module loading before
callback is called.

Reported-by: Fanqin Cui <cuifq1@chinatelecom.cn>
Closes: https://lore.kernel.org/all/20250807072700.348514-1-fanqincui@163.com/
Signed-off-by: Adrian Barnaś <abarnas@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
[will: Folded in 'noinstr' tweak from Mark]
Signed-off-by: Will Deacon <will@kernel.org>
2025-11-07 15:00:14 +00:00
Adrian Barnaś
6d4a0fbd34 arm64: Fail module loading if dynamic SCS patching fails
Disallow a module to load if SCS dynamic patching fails for its code. For
module loading, instead of running a dry-run to check for patching errors,
try to run patching in the first run and propagate any errors so module
loading will fail.

Signed-off-by: Adrian Barnaś <abarnas@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
2025-11-07 15:00:09 +00:00
shechenglong
7f16357378 arm64: proton-pack: Fix hard lockup due to print in scheduler context
Relocate the printk() calls from spectre_v4_mitigations_off() and
spectre_v2_mitigations_off() into setup_system_capabilities() function,
preventing hard lockups caused by printk calls in scheduler context:

  | _raw_spin_lock_nested+168
  | ttwu_queue+180 (rq_lock(rq, &rf); 2nd acquiring the rq->__lock)
  | try_to_wake_up+548
  | wake_up_process+32
  | __up+88
  | up+100
  | __up_console_sem+96
  | console_unlock+696
  | vprintk_emit+428
  | vprintk_default+64
  | vprintk_func+220
  | printk+104
  | spectre_v4_enable_task_mitigation+344
  | __switch_to+100
  | __schedule+1028 (rq_lock(rq, &rf); 1st acquiring the rq->__lock)
  | schedule_idle+48
  | do_idle+388
  | cpu_startup_entry+44
  | secondary_start_kernel+352

Suggested-by: Mark Rutland <mark.rutland@arm.com>
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Suggested-by: Will Deacon <will@kernel.org>
Signed-off-by: shechenglong <shechenglong@xfusion.com>
Signed-off-by: Will Deacon <will@kernel.org>
2025-11-07 14:49:12 +00:00
shechenglong
62e72463ca arm64: proton-pack: Drop print when !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
Following the pattern established with other Spectre mitigations,
do not print a message when the CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
Kconfig option is disabled.

Suggested-by: Will Deacon <will@kernel.org>
Signed-off-by: shechenglong <shechenglong@xfusion.com>
Signed-off-by: Will Deacon <will@kernel.org>
2025-11-07 14:49:09 +00:00
Ryan Roberts
53357f14f9 arm64: mm: Tidy up force_pte_mapping()
Tidy up the implementation of force_pte_mapping() to make it easier to
read and introduce the split_leaf_mapping_possible() helper to reduce
code duplication in split_kernel_leaf_mapping() and
arch_kfence_init_pool().

Suggested-by: David Hildenbrand (Red Hat) <david@kernel.org>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: David Hildenbrand (Red Hat) <david@kernel.org>
Reviewed-by: Yang Shi <yang@os.amperecomputing.com>
Signed-off-by: Will Deacon <will@kernel.org>
2025-11-07 14:43:15 +00:00
Ryan Roberts
40a292f701 arm64: mm: Optimize range_split_to_ptes()
Enter lazy_mmu mode while splitting a range of memory to pte mappings.
This causes barriers, which would otherwise be emitted after every pte
(and pmd/pud) write, to be deferred until exiting lazy_mmu mode.

For large systems, this is expected to significantly speed up fallback
to pte-mapping the linear map for the case where the boot CPU has
BBML2_NOABORT, but secondary CPUs do not. I haven't directly measured
it, but this is equivalent to commit 1fcb7cea8a ("arm64: mm: Batch dsb
and isb when populating pgtables").

Note that for the path from arch_kfence_init_pool(), we may sleep while
allocating memory inside the lazy_mmu mode. Sleeping is not allowed by
generic code inside lazy_mmu, but we know that the arm64 implementation
is sleep-safe. So this is ok and follows the same pattern already used
by split_kernel_leaf_mapping().

Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Yang Shi <yang@os.amperecomputing.com>
Signed-off-by: Will Deacon <will@kernel.org>
2025-11-07 14:43:15 +00:00
Ryan Roberts
ce2b3a50ad arm64: mm: Don't sleep in split_kernel_leaf_mapping() when in atomic context
It has been reported that split_kernel_leaf_mapping() is trying to sleep
in non-sleepable context. It does this when acquiring the
pgtable_split_lock mutex, when either CONFIG_DEBUG_PAGEALLOC or
CONFIG_KFENCE are enabled, which change linear map permissions within
softirq context during memory allocation and/or freeing. All other paths
into this function are called from sleepable context and so are safe.

But it turns out that the memory for which these 2 features may attempt
to modify the permissions is always mapped by pte, so there is no need
to attempt to split the mapping. So let's exit early in these cases and
avoid attempting to take the mutex.

There is one wrinkle to this approach; late-initialized kfence allocates
it's pool from the buddy which may be block mapped. So we must hook that
allocation and convert it to pte-mappings up front. Previously this was
done as a side-effect of kfence protecting all the individual pages in
its pool at init-time, but this no longer works due to the added early
exit path in split_kernel_leaf_mapping().

So instead, do this via the existing arch_kfence_init_pool() arch hook,
and reuse the existing linear_map_split_to_ptes() infrastructure.

Closes: https://lore.kernel.org/all/f24b9032-0ec9-47b1-8b95-c0eeac7a31c5@roeck-us.net/
Fixes: a166563e7e ("arm64: mm: support large block mapping when rodata=full")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <groeck@google.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: David Hildenbrand (Red Hat) <david@kernel.org>
Reviewed-by: Yang Shi <yang@os.amperecomputing.com>
Signed-off-by: Will Deacon <will@kernel.org>
2025-11-07 14:43:15 +00:00
Yang Shi
0ec364c0c9 arm64: kprobes: check the return value of set_memory_rox()
Since commit a166563e7e ("arm64: mm: support large block mapping when
rodata=full"), __change_memory_common has more chance to fail due to
memory allocation failure when splitting page table. So check the return
value of set_memory_rox(), then bail out if it fails otherwise we may have
RW memory mapping for kprobes insn page.

Fixes: 195a1b7d83 ("arm64: kprobes: call set_memory_rox() for kprobe page")
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Reviewed-by: Dev Jain <dev.jain@arm.com>
Signed-off-by: Yang Shi <yang@os.amperecomputing.com>
Signed-off-by: Will Deacon <will@kernel.org>
2025-11-07 14:30:22 +00:00
Punit Agrawal
7991fda619 arm64: acpi: Drop message logging SPCR default console
Commit f5a4af3c75 ("ACPI: Add acpi=nospcr to disable ACPI SPCR as
default console on ARM64") introduced a command line parameter to
prevent using SPCR provided console as default. It also introduced a
message to log this choice.

Drop the message as it is not particularly useful and can be incorrect
in situations where no SPCR is provided by the firmware.

Link: https://lore.kernel.org/all/aQN0YWUYaPYWpgJM@willie-the-truck/
Signed-off-by: Punit Agrawal <punit.agrawal@oss.qualcomm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2025-11-07 14:23:33 +00:00
Punit Agrawal
eeb8c19896 Revert "ACPI: Suppress misleading SPCR console message when SPCR table is absent"
This reverts commit bad3fa2fb9.

Commit bad3fa2fb9 ("ACPI: Suppress misleading SPCR console message
when SPCR table is absent") mistakenly assumes acpi_parse_spcr()
returning 0 to indicate a failure to parse SPCR. While addressing the
resultant incorrect logging it was deemed that dropping the message is
a better approach as it is not particularly useful.

Roll back the commit introducing the bug as a step towards dropping
the log message.

Link: https://lore.kernel.org/all/aQN0YWUYaPYWpgJM@willie-the-truck/
Signed-off-by: Punit Agrawal <punit.agrawal@oss.qualcomm.com>
Signed-off-by: Will Deacon <will@kernel.org>
2025-11-07 14:23:33 +00:00
Catalin Marinas
535fdfc5a2 arm64: Use load LSE atomics for the non-return per-CPU atomic operations
The non-return per-CPU this_cpu_*() atomic operations are implemented as
STADD/STCLR/STSET when FEAT_LSE is available. On many microarchitecture
implementations, these instructions tend to be executed "far" in the
interconnect or memory subsystem (unless the data is already in the L1
cache). This is in general more efficient when there is contention as it
avoids bouncing cache lines between CPUs. The load atomics (e.g. LDADD
without XZR as destination), OTOH, tend to be executed "near" with the
data loaded into the L1 cache.

STADD executed back to back as in srcu_read_{lock,unlock}*() incur an
additional overhead due to the default posting behaviour on several CPU
implementations. Since the per-CPU atomics are unlikely to be used
concurrently on the same memory location, encourage the hardware to to
execute them "near" by issuing load atomics - LDADD/LDCLR/LDSET - with
the destination register unused (but not XZR).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop
Reported-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Palmer Dabbelt <palmer@dabbelt.com>
[will: Add comment and link to the discussion thread]
Signed-off-by: Will Deacon <will@kernel.org>
2025-11-07 14:20:07 +00:00
Alvaro Gamez Machado
939edfaa10 spi: xilinx: increase number of retries before declaring stall
SPI devices using a (relative) slow frequency need a larger time.

For instance, microblaze running at 83.25MHz and performing a
3 bytes transaction using a 10MHz/16 = 625kHz needed this stall
value increased to at least 20. The SPI device is quite slow, but
also is the microblaze, so set this value to 32 to give it even
more margin.

Signed-off-by: Alvaro Gamez Machado <alvaro.gamez@hazent.com>
Reviewed-by: Ricardo Ribalda <ribalda@chromium.org>
Link: https://patch.msgid.link/20251106134545.31942-1-alvaro.gamez@hazent.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-07 09:37:46 +00:00
Shenghao Ding
29528c8e64 ASoC: tas2781: fix getting the wrong device number
The return value of device_property_read_u32_array used for getting the
property is the status instead of the number of the property.

Fixes: ef3bcde75d ("ASoC: tas2781: Add tas2781 driver")
Signed-off-by: Shenghao Ding <shenghao-ding@ti.com>
Link: https://patch.msgid.link/20251107054959.950-1-shenghao-ding@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-07 09:37:36 +00:00
Ian Forbes
eef295a850 drm/vmwgfx: Restore Guest-Backed only cursor plane support
The referenced fixes commit broke the cursor plane for configurations
which have Guest-Backed surfaces but no cursor MOB support.

Fixes: 965544150d ("drm/vmwgfx: Refactor cursor handling")
Signed-off-by: Ian Forbes <ian.forbes@broadcom.com>
Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
Link: https://patch.msgid.link/20251103201920.381503-1-ian.forbes@broadcom.com
2025-11-07 00:01:15 -05:00
Ian Forbes
c1962742ff drm/vmwgfx: Use kref in vmw_bo_dirty
Rather than using an ad hoc reference count use kref which is atomic
and has underflow warnings.

Signed-off-by: Ian Forbes <ian.forbes@broadcom.com>
Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
Link: https://patch.msgid.link/20251030193640.153697-1-ian.forbes@broadcom.com
2025-11-07 00:00:53 -05:00
Ian Forbes
32b415a9dc drm/vmwgfx: Validate command header size against SVGA_CMD_MAX_DATASIZE
This data originates from userspace and is used in buffer offset
calculations which could potentially overflow causing an out-of-bounds
access.

Fixes: 8ce75f8ab9 ("drm/vmwgfx: Update device includes for DX device functionality")
Reported-by: Rohit Keshri <rkeshri@redhat.com>
Signed-off-by: Ian Forbes <ian.forbes@broadcom.com>
Reviewed-by: Maaz Mombasawala <maaz.mombasawala@broadcom.com>
Signed-off-by: Zack Rusin <zack.rusin@broadcom.com>
Link: https://patch.msgid.link/20251021190128.13014-1-ian.forbes@broadcom.com
2025-11-06 23:59:40 -05:00
Gal Pressman
74d4432421 docs: netlink: Couple of intro-specs documentation fixes
Fix typo "handul" to "handful" and remove outdated limitation
stating only generic netlink is supported (we have netlink-raw).

Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
Signed-off-by: Gal Pressman <gal@nvidia.com>
Link: https://patch.msgid.link/20251105192908.686458-1-gal@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-11-06 14:50:59 -08:00
Andrey Leonchikov
a59e927ff4 arm64: dts: rockchip: Fix USB power enable pin for BTT CB2 and Pi2
Fix typo into regulator GPIO definition. With current
 definition - USB powered off. Valid definition can be found on "pinctrl"
 section:
 		vcc5v0_usb2t_en: vcc5v0-usb2t-en {
 				rockchip,pins = <3 RK_PD5 RK_FUNC_GPIO &pcfg_pull_none>;
 				 		};

 		vcc5v0_usb2b_en: vcc5v0-usb2b-en {
 			rockchip,pins = <4 RK_PC4 RK_FUNC_GPIO &pcfg_pull_none>;
 		};

Fixes: bfbc663d27 ("arm64: dts: rockchip: Add BigTreeTech CB2 and Pi2")
Signed-off-by: Andrey Leonchikov <andreil499@gmail.com>
Link: https://patch.msgid.link/20251105210741.850031-1-andreil499@gmail.com
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-11-06 23:28:34 +01:00
Gao Xiang
f2a12cc3b9 erofs: avoid infinite loop due to incomplete zstd-compressed data
Currently, the decompression logic incorrectly spins if compressed
data is truncated in crafted (deliberately corrupted) images.

Fixes: 7c35de4df1 ("erofs: Zstandard compression support")
Reported-by: Robert Morris <rtm@csail.mit.edu>
Closes: https://lore.kernel.org/r/50958.1761605413@localhost
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Chunhai Guo <guochunhai@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
2025-11-07 04:10:45 +08:00
Johannes Berg
a9da90e618 wifi: mac80211: reject address change while connecting
While connecting, the MAC address can already no longer be
changed. The change is already rejected if netif_carrier_ok(),
but of course that's not true yet while connecting. Check for
auth_data or assoc_data, so the MAC address cannot be changed.

Also more comprehensively check that there are no stations on
the interface being changed - if any peer station is added it
will know about our address already, so we cannot change it.

Cc: stable@vger.kernel.org
Fixes: 3c06e91b40 ("wifi: mac80211: Support POWERED_ADDR_CHANGE feature")
Link: https://patch.msgid.link/20251105154119.f9f6c1df81bb.I9bb3760ede650fb96588be0d09a5a7bdec21b217@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
2025-11-06 19:07:47 +01:00
Haotian Zhang
3dc8c73365 ASoC: codecs: va-macro: fix resource leak in probe error path
In the commit referenced by the Fixes tag, clk_hw_get_clk()
was added in va_macro_probe() to get the fsgen clock,
but forgot to add the corresponding clk_put() in va_macro_remove().
This leads to a clock reference leak when the driver is unloaded.

Switch to devm_clk_hw_get_clk() to automatically manage the
clock resource.

Fixes: 30097967e0 ("ASoC: codecs: va-macro: use fsgen as clock")
Suggested-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Signed-off-by: Haotian Zhang <vulab@iscas.ac.cn>
Reviewed-by: Konrad Dybcio <konrad.dybcio@oss.qualcomm.com>
Link: https://patch.msgid.link/20251106143114.729-1-vulab@iscas.ac.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-06 17:07:05 +00:00
Nicolas Escande
9065b96875 wifi: ath11k: zero init info->status in wmi_process_mgmt_tx_comp()
When reporting tx completion using ieee80211_tx_status_xxx() family of
functions, the status part of the struct ieee80211_tx_info nested in the
skb is used to report things like transmit rates & retry count to mac80211

On the TX data path, this is correctly memset to 0 before calling
ieee80211_tx_status_ext(), but on the tx mgmt path this was not done.

This leads to mac80211 treating garbage values as valid transmit counters
(like tx retries for example) and accounting them as real statistics that
makes their way to userland via station dump.

The same issue was resolved in ath12k by commit 9903c0986f ("wifi:
ath12k: Add memset and update default rate value in wmi tx completion")

Tested-on: QCN9074 PCI WLAN.HK.2.9.0.1-01977-QCAHKSWPL_SILICONZ-1

Fixes: d5c65159f2 ("ath11k: driver for Qualcomm IEEE 802.11ax devices")
Signed-off-by: Nicolas Escande <nico.escande@gmail.com>
Reviewed-by: Vasanthakumar Thiagarajan <vasanthakumar.thiagarajan@oss.qualcomm.com>
Reviewed-by: Baochen Qiang <baochen.qiang@oss.qualcomm.com>
Link: https://patch.msgid.link/20251104083957.717825-1-nico.escande@gmail.com
Signed-off-by: Jeff Johnson <jeff.johnson@oss.qualcomm.com>
2025-11-06 07:26:21 -08:00
Sukrit Bhatnagar
d0164c1619 KVM: VMX: Fix check for valid GVA on an EPT violation
On an EPT violation, bit 7 of the exit qualification is set if the
guest linear-address is valid. The derived page fault error code
should not be checked for this bit.

Fixes: f300948251 ("KVM: VMX: Set PFERR_GUEST_{FINAL,PAGE}_MASK if and only if the GVA is valid")
Cc: stable@vger.kernel.org
Signed-off-by: Sukrit Bhatnagar <Sukrit.Bhatnagar@sony.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://patch.msgid.link/20251106052853.3071088-1-Sukrit.Bhatnagar@sony.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2025-11-06 06:06:18 -08:00
Robin Gong
86d57d9c07 spi: imx: keep dma request disabled before dma transfer setup
Since sdma hardware configure postpone to transfer phase, have to disable
dma request before dma transfer setup because there is a hardware
limitation on sdma event enable(ENBLn) as below:

"It is thus essential for the Arm platform to program them before any DMA
 request is triggered to the SDMA, otherwise an unpredictable combination
 of channels may be started."

Signed-off-by: Carlos Song <carlos.song@nxp.com>
Signed-off-by: Robin Gong <yibin.gong@nxp.com>
Link: https://patch.msgid.link/20251024055320.408482-1-carlos.song@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-06 13:13:57 +00:00
Niranjan H Y
84f5526e4d ASoC: tas2783A: Fix issues in firmware parsing
During firmware download, if the size of the firmware is too small,
it wrongly assumes the firmware download is successful. If there is
size mismatch with chunk's header, invalid memory is accessed.
Fix these issues by throwing error during these cases.

Fixes: 4cc9bd8d7b (ASoc: tas2783A: Add soundwire based codec driver)
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <error27@gmail.com>
Closes: https://lore.kernel.org/r/202510291226.2R3fbYNh-lkp@intel.com/
Signed-off-by: Niranjan H Y <niranjan.hy@ti.com>
Link: https://patch.msgid.link/20251030151637.566-1-niranjan.hy@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-06 13:12:34 +00:00
Miaoqian Lin
1a58d865f4 ASoC: sdw_utils: fix device reference leak in is_sdca_endpoint_present()
The bus_find_device_by_name() function returns a device pointer with an
incremented reference count, but the original code was missing put_device()
calls in some return paths, leading to reference count leaks.

Fix this by ensuring put_device() is called before function exit after
  bus_find_device_by_name() succeeds

This follows the same pattern used elsewhere in the kernel where
bus_find_device_by_name() is properly paired with put_device().

Found via static analysis and code review.

Fixes: 4f8ef33dd4 ("ASoC: soc_sdw_utils: skip the endpoint that doesn't present")
Cc: stable@vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Link: https://patch.msgid.link/20251029071804.8425-1-linmq006@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-06 13:12:33 +00:00
Haotian Zhang
6b6eddc63c ASoC: cs4271: Fix regulator leak on probe failure
The probe function enables regulators at the beginning
but fails to disable them in its error handling path.
If any operation after enabling the regulators fails,
the probe will exit with an error, leaving the regulators
permanently enabled, which could lead to a resource leak.

Add a proper error handling path to call regulator_bulk_disable()
before returning an error.

Fixes: 9a397f4736 ("ASoC: cs4271: add regulator consumer support")
Signed-off-by: Haotian Zhang <vulab@iscas.ac.cn>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://patch.msgid.link/20251105062246.1955-1-vulab@iscas.ac.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-06 13:12:32 +00:00
Antheas Kapenekakis
f945afe01c platform/x86/amd: pmc: Add Lenovo Legion Go 2 to pmc quirk list
The Lenovo Legion Go 2 takes a long time to resume from suspend.
This is due to it having an nvme resume handler that interferes
with IOMMU mappings. It is a common issue with older Lenovo
laptops. Adding it to that quirk list fixes this issue.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4618
Suggested-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Antheas Kapenekakis <lkml@antheas.dev>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Link: https://patch.msgid.link/20251008135057.731928-1-lkml@antheas.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:19:32 +02:00
Antheas Kapenekakis
c0ddc54016 platform/x86/amd/pmc: Add spurious_8042 to Xbox Ally
The Xbox Ally features a Van Gogh SoC that has spurious interrupts
during resume. We get the following logs:

atkbd_receive_byte: 20 callbacks suppressed
atkbd serio0: Spurious ACK on isa0060/serio0. Some program might be trying to access hardware directly.

So, add the spurious_8042 quirk for it. It does not have a keyboard, so
this does not result in any functional loss.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4659
Signed-off-by: Antheas Kapenekakis <lkml@antheas.dev>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Link: https://patch.msgid.link/20251024152152.3981721-3-lkml@antheas.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:19:31 +02:00
Antheas Kapenekakis
db4a3f0fbe platform/x86/amd/pmc: Add support for Van Gogh SoC
The ROG Xbox Ally (non-X) SoC features a similar architecture to the
Steam Deck. While the Steam Deck supports S3 (s2idle causes a crash),
this support was dropped by the Xbox Ally which only S0ix suspend.

Since the handler is missing here, this causes the device to not suspend
and the AMD GPU driver to crash while trying to resume afterwards due to
a power hang.

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4659
Signed-off-by: Antheas Kapenekakis <lkml@antheas.dev>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>
Acked-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Link: https://patch.msgid.link/20251024152152.3981721-2-lkml@antheas.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:19:29 +02:00
Kurt Borja
a6003d90f0 platform/x86: alienware-wmi-wmax: Add support for the whole "G" family
Add support for the whole "Dell G" laptop family.

Cc: stable@vger.kernel.org
Signed-off-by: Kurt Borja <kuurtb@gmail.com>
Link: https://patch.msgid.link/20251103-family-supp-v1-5-a241075d1787@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:19:27 +02:00
Kurt Borja
21ebfff1cf platform/x86: alienware-wmi-wmax: Add support for the whole "X" family
Add support for the whole "Alienware X" laptop family.

Cc: stable@vger.kernel.org
Signed-off-by: Kurt Borja <kuurtb@gmail.com>
Link: https://patch.msgid.link/20251103-family-supp-v1-4-a241075d1787@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:19:26 +02:00
Kurt Borja
e8c3c875e1 platform/x86: alienware-wmi-wmax: Add support for the whole "M" family
Add support for the whole "Alienware M" laptop family.

Cc: stable@vger.kernel.org
Signed-off-by: Kurt Borja <kuurtb@gmail.com>
Link: https://patch.msgid.link/20251103-family-supp-v1-3-a241075d1787@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:19:24 +02:00
Kurt Borja
173b238087 platform/x86: alienware-wmi-wmax: Drop redundant DMI entries
The awcc_dmi_table[] uses DMI_MATCH() that supports partial matches. As
there is already "Alienware Area-51m" entry, "Alienware Area-51m R2" entry
is redundant.

Signed-off-by: Kurt Borja <kuurtb@gmail.com>
Link: https://patch.msgid.link/20251103-family-supp-v1-2-a241075d1787@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:19:23 +02:00
Kurt Borja
bd4f9f113d platform/x86: alienware-wmi-wmax: Fix "Alienware m16 R1 AMD" quirk order
Quirks are matched using dmi_first_match(), therefore move the
"Alienware m16 R1 AMD" entry above other m16 entries.

Reported-by: Cihan Ozakca <cozakca@outlook.com>
Fixes: e2468dc700 ("Revert "platform/x86: alienware-wmi-wmax: Add G-Mode support to Alienware m16 R1"")
Cc: stable@vger.kernel.org
Signed-off-by: Kurt Borja <kuurtb@gmail.com>
Link: https://patch.msgid.link/20251103-family-supp-v1-1-a241075d1787@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:19:21 +02:00
Randy Dunlap
5f20bc206b platform/x86: ISST: isst_if.h: fix all kernel-doc warnings
Fix all kernel-doc warnings in <uapi/linux/isst_if.h>:

- don't use "[]" in the variable name in kernel-doc
- add a few missing entries
- change "power_domain" to "power_domain_id" in kernel-doc to match
  the struct member name
- add a leading '@' on a few existing kernel-doc lines
- use '_' instead of '-' in struct member names

Examples (but not all 27 warnings):

Warning: include/uapi/linux/isst_if.h:63 struct member 'cpu_map'
 not described in 'isst_if_cpu_maps'
Warning: ../include/uapi/linux/isst_if.h:95 struct member 'req_count'
 not described in 'isst_if_io_regs'
Warning: include/uapi/linux/isst_if.h:132 struct member 'mbox_cmd'
 not described in 'isst_if_mbox_cmds'
Warning: ../include/uapi/linux/isst_if.h:183 struct member 'supported'
 not described in 'isst_core_power'
Warning: ../include/uapi/linux/isst_if.h:206 struct member
 'power_domain_id' not described in 'isst_clos_param'
Warning: ../include/uapi/linux/isst_if.h:239 struct member 'assoc_info'
 not described in 'isst_if_clos_assoc_cmds'
Warning: ../include/uapi/linux/isst_if.h:286 struct member 'sst_tf_support'
 not described in 'isst_perf_level_info'
Warning: ../include/uapi/linux/isst_if.h:375 struct member 'trl_freq_mhz'
 not described in 'isst_perf_level_data_info'
Warning: ../include/uapi/linux/isst_if.h:475 struct member 'max_buckets'
 not described in 'isst_turbo_freq_info'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://patch.msgid.link/20251023194615.180824-1-rdunlap@infradead.org
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:19:20 +02:00
Kuppuswamy Sathyanarayanan
a229809c18 platform/x86: intel-uncore-freq: Add additional client processors
Add Intel uncore frequency driver support for Pantherlake, Wildcatlake
and Novalake processors.

Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Link: https://patch.msgid.link/20251022211733.3565526-1-sathyanarayanan.kuppuswamy@linux.intel.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:19:18 +02:00
Krishna Chomal
fb146a38cb platform/x86: hp-wmi: Add Omen 16-wf1xxx fan support
The newer HP Omen laptops, such as Omen 16-wf1xxx, use the same
WMI-based thermal profile interface as Victus 16-r1000 and 16-s1000
models.

Add the DMI board name "8C78" to the victus_s_thermal_profile_boards
list to enable proper fan and thermal mode control.

Tested on: HP Omen 16-wf1xxx (board 8C78)
Result:
* Fan RPMs are readable
* echo 0 | sudo tee /sys/devices/platform/hp-wmi/hwmon/*/pwm1_enable
  allows the fans to run on max RPM.

Signed-off-by: Krishna Chomal <krishna.chomal108@gmail.com>
Link: https://patch.msgid.link/20251018111001.56625-1-krishna.chomal108@gmail.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:19:06 +02:00
Jia Ston
5c72329716 platform/x86: huawei-wmi: add keys for HONOR models
HONOR MagicBook X16/X14 models produced in 2025 cannot use the Print
Screen and YOYO keys properly, with the system reporting them as
unknown key presses (codes: 0x028b and 0x028e).

To resolve this, a key_entry is added for both the HONOR Print Screen
key and the HONOR YOYO key, ensuring they function correctly on these
models.

Signed-off-by: Ston Jia <ston.jia@outlook.com>
Link: https://patch.msgid.link/20251029051804.220111-1-ston.jia@outlook.com
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:11:43 +02:00
Edip Hazuri
54afb047cd platform/x86: hp-wmi: mark Victus 16-r0 and 16-s0 for victus_s fan and thermal profile support
This patch adds Victus 16-r0 (8bbe) and Victus 16-s0(8bd4, 8bd5) laptop
DMI board name into existing list

Signed-off-by: Edip Hazuri <edip@medip.dev>
Link: https://patch.msgid.link/20251015181042.23961-3-edip@medip.dev
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
2025-11-06 14:10:59 +02:00
Takashi Iwai
82420bd4e1 ALSA: hda/hdmi: Fix breakage at probing nvhdmi-mcp driver
After restructuring and splitting the HDMI codec driver code, each
HDMI codec driver contains the own build_controls and build_pcms ops.
A copy-n-paste error put the wrong entries for nvhdmi-mcp driver; both
build_controls and build_pcms are swapped.  Unfortunately both
callbacks have the very same form, and the compiler didn't complain
it, either.  This resulted in a NULL dereference because the PCM
instance hasn't been initialized at calling the build_controls
callback.

Fix it by passing the proper entries.

Fixes: ad781b550f ("ALSA: hda/hdmi: Rewrite to new probe method")
Cc: <stable@vger.kernel.org>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=220743
Link: https://patch.msgid.link/20251106104647.25805-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2025-11-06 11:49:35 +01:00
Miaoqian Lin
59b0afd01b crypto: hisilicon/qm - Fix device reference leak in qm_get_qos_value
The qm_get_qos_value() function calls bus_find_device_by_name() which
increases the device reference count, but fails to call put_device()
to balance the reference count and lead to a device reference leak.

Add put_device() calls in both the error path and success path to
properly balance the reference count.

Found via static analysis.

Fixes: 22d7a6c39c ("crypto: hisilicon/qm - add pci bdf number check")
Cc: stable@vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Reviewed-by: Longfang Liu <liulongfang@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2025-11-06 14:29:49 +08:00
Alexei Starovoitov
e427054ae7 Merge branch 'x86-fgraph-bpf-fix-orc-stack-unwind-from-return-probe'
Jiri Olsa says:

====================
x86/fgraph,bpf: Fix ORC stack unwind from return probe

sending fix for ORC stack unwind issue reported in here [1], where
the ORC unwinder won't go pass the return_to_handler function and
we get no stacktrace.

Sending fix for that together with unrelated stacktrace fix (patch 1),
so the attached test can work properly.

It's based on:
  https://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
  probes/for-next

v1: https://lore.kernel.org/bpf/20251027131354.1984006-1-jolsa@kernel.org/
v2: https://lore.kernel.org/bpf/20251103220924.36371-3-jolsa@kernel.org/

v3 changes:
- fix assert condition in test

thanks,
jirka

[1] https://lore.kernel.org/bpf/aObSyt3qOnS_BMcy@krava/
====================

Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Link: https://patch.msgid.link/20251104215405.168643-1-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-11-05 17:14:42 -08:00
Jiri Olsa
3490d29964 selftests/bpf: Add stacktrace ips test for raw_tp
Adding test that verifies we get expected initial 2 entries from
stacktrace for rawtp probe via ORC unwind.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20251104215405.168643-5-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-11-05 17:05:19 -08:00
Jiri Olsa
c9e208fa93 selftests/bpf: Add stacktrace ips test for kprobe_multi/kretprobe_multi
Adding test that attaches kprobe/kretprobe multi and verifies the
ORC stacktrace matches expected functions.

Adding bpf_testmod_stacktrace_test function to bpf_testmod kernel
module which is called through several functions so we get reliable
call path for stacktrace.

The test is only for ORC unwinder to keep it simple.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20251104215405.168643-4-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-11-05 17:05:19 -08:00
Jiri Olsa
20a0bc1027 x86/fgraph,bpf: Fix stack ORC unwind from kprobe_multi return probe
Currently we don't get stack trace via ORC unwinder on top of fgraph exit
handler. We can see that when generating stacktrace from kretprobe_multi
bpf program which is based on fprobe/fgraph.

The reason is that the ORC unwind code won't get pass the return_to_handler
callback installed by fgraph return probe machinery.

Solving this by creating stack frame in return_to_handler expected by
ftrace_graph_ret_addr function to recover original return address and
continue with the unwind.

Also updating the pt_regs data with cs/flags/rsp which are needed for
successful stack retrieval from ebpf bpf_get_stackid helper.
 - in get_perf_callchain we check user_mode(regs) so CS has to be set
 - in perf_callchain_kernel we call perf_hw_regs(regs), so EFLAGS/FIXED
    has to be unset

Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20251104215405.168643-3-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-11-05 17:05:19 -08:00
Jiri Olsa
6d08340d1e Revert "perf/x86: Always store regs->ip in perf_callchain_kernel()"
This reverts commit 83f44ae0f8.

Currently we store initial stacktrace entry twice for non-HW ot_regs, which
means callers that fail perf_hw_regs(regs) condition in perf_callchain_kernel.

It's easy to reproduce this bpftrace:

  # bpftrace -e 'tracepoint:sched:sched_process_exec { print(kstack()); }'
  Attaching 1 probe...

        bprm_execve+1767
        bprm_execve+1767
        do_execveat_common.isra.0+425
        __x64_sys_execve+56
        do_syscall_64+133
        entry_SYSCALL_64_after_hwframe+118

When perf_callchain_kernel calls unwind_start with first_frame, AFAICS
we do not skip regs->ip, but it's added as part of the unwind process.
Hence reverting the extra perf_callchain_store for non-hw regs leg.

I was not able to bisect this, so I'm not really sure why this was needed
in v5.2 and why it's not working anymore, but I could see double entries
as far as v5.10.

I did the test for both ORC and framepointer unwind with and without the
this fix and except for the initial entry the stacktraces are the same.

Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20251104215405.168643-2-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-11-05 17:05:19 -08:00
Keith Busch
03b3bcd319 nvme: fix admin request_queue lifetime
The namespaces can access the controller's admin request_queue, and
stale references on the namespaces may exist after tearing down the
controller. Ensure the admin request_queue is active by moving the
controller's 'put' to after all controller references have been released
to ensure no one is can access the request_queue. This fixes a reported
use-after-free bug:

  BUG: KASAN: slab-use-after-free in blk_queue_enter+0x41c/0x4a0
  Read of size 8 at addr ffff88c0a53819f8 by task nvme/3287
  CPU: 67 UID: 0 PID: 3287 Comm: nvme Tainted: G            E       6.13.2-ga1582f1a031e #15
  Tainted: [E]=UNSIGNED_MODULE
  Hardware name: Jabil /EGS 2S MB1, BIOS 1.00 06/18/2025
  Call Trace:
   <TASK>
   dump_stack_lvl+0x4f/0x60
   print_report+0xc4/0x620
   ? _raw_spin_lock_irqsave+0x70/0xb0
   ? _raw_read_unlock_irqrestore+0x30/0x30
   ? blk_queue_enter+0x41c/0x4a0
   kasan_report+0xab/0xe0
   ? blk_queue_enter+0x41c/0x4a0
   blk_queue_enter+0x41c/0x4a0
   ? __irq_work_queue_local+0x75/0x1d0
   ? blk_queue_start_drain+0x70/0x70
   ? irq_work_queue+0x18/0x20
   ? vprintk_emit.part.0+0x1cc/0x350
   ? wake_up_klogd_work_func+0x60/0x60
   blk_mq_alloc_request+0x2b7/0x6b0
   ? __blk_mq_alloc_requests+0x1060/0x1060
   ? __switch_to+0x5b7/0x1060
   nvme_submit_user_cmd+0xa9/0x330
   nvme_user_cmd.isra.0+0x240/0x3f0
   ? force_sigsegv+0xe0/0xe0
   ? nvme_user_cmd64+0x400/0x400
   ? vfs_fileattr_set+0x9b0/0x9b0
   ? cgroup_update_frozen_flag+0x24/0x1c0
   ? cgroup_leave_frozen+0x204/0x330
   ? nvme_ioctl+0x7c/0x2c0
   blkdev_ioctl+0x1a8/0x4d0
   ? blkdev_common_ioctl+0x1930/0x1930
   ? fdget+0x54/0x380
   __x64_sys_ioctl+0x129/0x190
   do_syscall_64+0x5b/0x160
   entry_SYSCALL_64_after_hwframe+0x4b/0x53
  RIP: 0033:0x7f765f703b0b
  Code: ff ff ff 85 c0 79 9b 49 c7 c4 ff ff ff ff 5b 5d 4c 89 e0 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d dd 52 0f 00 f7 d8 64 89 01 48
  RSP: 002b:00007ffe2cefe808 EFLAGS: 00000202 ORIG_RAX: 0000000000000010
  RAX: ffffffffffffffda RBX: 00007ffe2cefe860 RCX: 00007f765f703b0b
  RDX: 00007ffe2cefe860 RSI: 00000000c0484e41 RDI: 0000000000000003
  RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000000000000
  R10: 00007f765f611d50 R11: 0000000000000202 R12: 0000000000000003
  R13: 00000000c0484e41 R14: 0000000000000001 R15: 00007ffe2cefea60
   </TASK>

Reported-by: Casey Chen <cachen@purestorage.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
2025-11-05 12:50:46 -08:00
Zilin Guan
c367af440e btrfs: release root after error in data_reloc_print_warning_inode()
data_reloc_print_warning_inode() calls btrfs_get_fs_root() to obtain
local_root, but fails to release its reference when paths_from_inode()
returns an error. This causes a potential memory leak.

Add a missing btrfs_put_root() call in the error path to properly
decrease the reference count of local_root.

Fixes: b9a9a85059 ("btrfs: output affected files when relocation fails")
CC: stable@vger.kernel.org # 6.6+
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Zilin Guan <zilin@seu.edu.cn>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2025-11-05 20:01:12 +01:00
Zilin Guan
5fea61aa1c btrfs: scrub: put bio after errors in scrub_raid56_parity_stripe()
scrub_raid56_parity_stripe() allocates a bio with bio_alloc(), but
fails to release it on some error paths, leading to a potential
memory leak.

Add the missing bio_put() calls to properly drop the bio reference
in those error cases.

Fixes: 1009254bf2 ("btrfs: scrub: use scrub_stripe to implement RAID56 P/Q scrub")
CC: stable@vger.kernel.org # 6.6+
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Zilin Guan <zilin@seu.edu.cn>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2025-11-05 20:01:12 +01:00
Filipe Manana
bfe3d755ef btrfs: do not update last_log_commit when logging inode due to a new name
When logging that a new name exists, we skip updating the inode's
last_log_commit field to prevent a later explicit fsync against the inode
from doing nothing (as updating last_log_commit makes btrfs_inode_in_log()
return true). We are detecting, at btrfs_log_inode(), that logging a new
name is happening by checking the logging mode is not LOG_INODE_EXISTS,
but that is not enough because we may log parent directories when logging
a new name of a file in LOG_INODE_ALL mode - we need to check that the
logging_new_name field of the log context too.

An example scenario where this results in an explicit fsync against a
directory not persisting changes to the directory is the following:

  $ mkfs.btrfs -f /dev/sdc
  $ mount /dev/sdc /mnt

  $ touch /mnt/foo

  $ sync

  $ mkdir /mnt/dir

  # Write some data to our file and fsync it.
  $ xfs_io -c "pwrite -S 0xab 0 64K" -c "fsync" /mnt/foo

  # Add a new link to our file. Since the file was logged before, we
  # update it in the log tree by calling btrfs_log_new_name().
  $ ln /mnt/foo /mnt/dir/bar

  # fsync the root directory - we expect it to persist the dentry for
  # the new directory "dir".
  $ xfs_io -c "fsync" /mnt

  <power fail>

After mounting the fs the entry for directory "dir" does not exists,
despite the explicit fsync on the root directory.

Here's why this happens:

1) When we fsync the file we log the inode, so that it's present in the
   log tree;

2) When adding the new link we enter btrfs_log_new_name(), and since the
   inode is in the log tree we proceed to updating the inode in the log
   tree;

3) We first set the inode's last_unlink_trans to the current transaction
   (early in btrfs_log_new_name());

4) We then eventually enter btrfs_log_inode_parent(), and after logging
   the file's inode, we call btrfs_log_all_parents() because the inode's
   last_unlink_trans matches the current transaction's ID (updated in the
   previous step);

5) So btrfs_log_all_parents() logs the root directory by calling
   btrfs_log_inode() for the root's inode with a log mode of LOG_INODE_ALL
   so that new dentries are logged;

6) At btrfs_log_inode(), because the log mode is LOG_INODE_ALL, we
   update root inode's last_log_commit to the last transaction that
   changed the inode (->last_sub_trans field of the inode), which
   corresponds to the current transaction's ID;

7) Then later when user space explicitly calls fsync against the root
   directory, we enter btrfs_sync_file(), which calls skip_inode_logging()
   and that returns true, since its call to btrfs_inode_in_log() returns
   true and there are no ordered extents (it's a directory, never has
   ordered extents). This results in btrfs_sync_file() returning without
   syncing the log or committing the current transaction, so all the
   updates we did when logging the new name, including logging the root
   directory,  are not persisted.

So fix this by but updating the inode's last_log_commit if we are sure
we are not logging a new name (if ctx->logging_new_name is false).

A test case for fstests will follow soon.

Reported-by: Vyacheslav Kovalevsky <slava.kovalevskiy.2014@gmail.com>
Link: https://lore.kernel.org/linux-btrfs/03c5d7ec-5b3d-49d1-95bc-8970a7f82d87@gmail.com/
Fixes: 130341be7f ("btrfs: always update the logged transaction when logging new names")
CC: stable@vger.kernel.org # 6.1+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2025-11-05 20:01:01 +01:00
Naohiro Aota
6a1ab50135 btrfs: zoned: fix stripe width calculation
The stripe offset calculation in the zoned code for raid0 and raid10
wrongly uses map->stripe_size to calculate it. In fact, map->stripe_size is
the size of the device extent composing the block group, which always is
the zone_size on the zoned setup.

Fix it by using BTRFS_STRIPE_LEN and BTRFS_STRIPE_LEN_SHIFT. Also, optimize
the calculation a bit by doing the common calculation only once.

Fixes: c0d90a79e8 ("btrfs: zoned: fix alloc_offset calculation for partly conventional block groups")
CC: stable@vger.kernel.org # 6.17+
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2025-11-05 20:00:08 +01:00
Naohiro Aota
94f54924b9 btrfs: zoned: fix conventional zone capacity calculation
When a block group contains both conventional zone and sequential zone, the
capacity of the block group is wrongly set to the block group's full
length. The capacity should be calculated in btrfs_load_block_group_* using
the last allocation offset.

Fixes: 568220fa96 ("btrfs: zoned: support RAID0/1/10 on top of raid stripe tree")
CC: stable@vger.kernel.org # v6.12+
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2025-11-05 20:00:06 +01:00
Laurent Pinchart
5e44c5a2cc arm64: dts: broadcom: bcm2712: rpi-5: Add ethernet0 alias
The RP1 ethernet controller DT node contains a local-mac-address
property to pass the MAC address from the boot loader to the kernel. The
boot loader does not fill the MAC address as the ethernet0 alias is
missing. Add it.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Andrea della Porta <andrea.porta@suse.com>
Link: https://lore.kernel.org/r/20251102111443.18206-1-laurent.pinchart@ideasonboard.com
Fixes: 43456fdfc0 ("arm64: dts: broadcom: Enable RP1 ethernet for Raspberry Pi 5")
Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
2025-11-05 10:28:16 -08:00
Andrea della Porta
a7da9c6a2f arm64: dts: broadcom: Assign clock rates in eth node for RPi5
In Raspberry Pi 5 DTS, the Ethernet clock rates must be assigned
as the default clock register values are not valid for the
Ethernet interface to function.
This can be done either in rp1_clocks node or in rp1_eth node.

Define the rates in rp1_eth node, as those clocks are 'leaf' clocks
used specifically by the Ethernet device only.

Fixes: 43456fdfc0 ("arm64: dts: broadcom: Enable RP1 ethernet for Raspberry Pi 5")
Signed-off-by: Andrea della Porta <andrea.porta@suse.com>
Link: https://lore.kernel.org/r/20251021135533.5517-1-andrea.porta@suse.com
Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
2025-11-05 10:28:09 -08:00
Laurentiu Mihalcea
997c06330f reset: imx8mp-audiomix: Fix bad mask values
As per the i.MX8MP TRM, section 14.2 "AUDIO_BLK_CTRL", table 14.2.3.1.1
"memory map", the definition of the EARC control register shows that the
EARC controller software reset is controlled via bit 0, while the EARC PHY
software reset is controlled via bit 1.

This means that the current definitions of IMX8MP_AUDIOMIX_EARC_RESET_MASK
and IMX8MP_AUDIOMIX_EARC_PHY_RESET_MASK are wrong since their values would
imply that the EARC controller software reset is controlled via bit 1 and
the EARC PHY software reset is controlled via bit 2. Fix them.

Fixes: a83bc87cd3 ("reset: imx8mp-audiomix: Prepare the code for more reset bits")
Cc: stable@vger.kernel.org
Reviewed-by: Shengjiu Wang <shengjiu.wang@gmail.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Signed-off-by: Laurentiu Mihalcea <laurentiu.mihalcea@nxp.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
2025-11-05 16:12:28 +01:00
Hans de Goede
3cd2018e15 spi: Try to get ACPI GPIO IRQ earlier
Since commit d24cfee7f6 ("spi: Fix acpi deferred irq probe"), the
acpi_dev_gpio_irq_get() call gets delayed till spi_probe() is called
on the SPI device.

If there is no driver for the SPI device then the move to spi_probe()
results in acpi_dev_gpio_irq_get() never getting called. This may
cause problems by leaving the GPIO pin floating because this call is
responsible for setting up the GPIO pin direction and/or bias according
to the values from the ACPI tables.

Re-add the removed acpi_dev_gpio_irq_get() in acpi_register_spi_device()
to ensure the GPIO pin is always correctly setup, while keeping the
acpi_dev_gpio_irq_get() call added to spi_probe() to deal with
-EPROBE_DEFER returns caused by the GPIO controller not having a driver
yet.

Link: https://bbs.archlinux.org/viewtopic.php?id=302348
Fixes: d24cfee7f6 ("spi: Fix acpi deferred irq probe")
Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hansg@kernel.org>
Link: https://patch.msgid.link/20251102190921.30068-1-hansg@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-05 13:08:48 +00:00
Christian Brauner
75fdd57499 Merge patch series "sb_min_blocksize() fixes"
Enforce checking of sb_min_blocksize() calls and update all callers
accordingly.

* patches from https://patch.msgid.link/20251104125009.2111925-2-yangyongpeng.storage@gmail.com:
  block: add __must_check attribute to sb_min_blocksize()
  xfs: check the return value of sb_min_blocksize() in xfs_fs_fill_super
  isofs: check the return value of sb_min_blocksize() in isofs_fill_super
  exfat: check return value of sb_min_blocksize in exfat_read_boot_sector
  vfat: fix missing sb_min_blocksize() return value checks

Link: https://patch.msgid.link/20251104125009.2111925-2-yangyongpeng.storage@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-05 14:00:17 +01:00
Zilin Guan
90f601b497 binfmt_misc: restore write access before closing files opened by open_exec()
bm_register_write() opens an executable file using open_exec(), which
internally calls do_open_execat() and denies write access on the file to
avoid modification while it is being executed.

However, when an error occurs, bm_register_write() closes the file using
filp_close() directly. This does not restore the write permission, which
may cause subsequent write operations on the same file to fail.

Fix this by calling exe_file_allow_write_access() before filp_close() to
restore the write permission properly.

Fixes: e7850f4d84 ("binfmt_misc: fix possible deadlock in bm_register_write")
Signed-off-by: Zilin Guan <zilin@seu.edu.cn>
Link: https://patch.msgid.link/20251105022923.1813587-1-zilin@seu.edu.cn
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-05 14:00:16 +01:00
Yongpeng Yang
8637fa89e6 block: add __must_check attribute to sb_min_blocksize()
When sb_min_blocksize() returns 0 and the return value is not checked,
it may lead to a situation where sb->s_blocksize is 0 when
accessing the filesystem super block. After commit a64e5a5960
("bdev: add back PAGE_SIZE block size validation for
sb_set_blocksize()"), this becomes more likely to happen when the
block device’s logical_block_size is larger than PAGE_SIZE and the
filesystem is unformatted. Add the __must_check attribute to ensure
callers always check the return value.

Cc: stable@vger.kernel.org # v6.15
Suggested-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
Link: https://patch.msgid.link/20251104125009.2111925-6-yangyongpeng.storage@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-05 14:00:16 +01:00
Alok Tiwari
c014021253 virtio-fs: fix incorrect check for fsvq->kobj
In virtio_fs_add_queues_sysfs(), the code incorrectly checks fs->mqs_kobj
after calling kobject_create_and_add(). Change the check to fsvq->kobj
(fs->mqs_kobj -> fsvq->kobj) to ensure the per-queue kobject is
successfully created.

Fixes: 87cbdc396a ("virtio_fs: add sysfs entries for queue information")
Signed-off-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Link: https://patch.msgid.link/20251027104658.1668537-1-alok.a.tiwari@oracle.com
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-05 14:00:16 +01:00
Yongpeng Yang
124af0868e xfs: check the return value of sb_min_blocksize() in xfs_fs_fill_super
sb_min_blocksize() may return 0. Check its return value to avoid the
filesystem super block when sb->s_blocksize is 0.

Cc: stable@vger.kernel.org # v6.15
Fixes: a64e5a5960 ("bdev: add back PAGE_SIZE block size validation for sb_set_blocksize()")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
Link: https://patch.msgid.link/20251104125009.2111925-5-yangyongpeng.storage@gmail.com
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-05 14:00:16 +01:00
Yongpeng Yang
e106e269c5 isofs: check the return value of sb_min_blocksize() in isofs_fill_super
sb_min_blocksize() may return 0. Check its return value to avoid
opt->blocksize and sb->s_blocksize is 0.

Cc: stable@vger.kernel.org # v6.15
Fixes: 1b17a46c92 ("isofs: convert isofs to use the new mount API")
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
Link: https://patch.msgid.link/20251104125009.2111925-4-yangyongpeng.storage@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-05 14:00:16 +01:00
Yongpeng Yang
f2c1f63163 exfat: check return value of sb_min_blocksize in exfat_read_boot_sector
sb_min_blocksize() may return 0. Check its return value to avoid
accessing the filesystem super block when sb->s_blocksize is 0.

Cc: stable@vger.kernel.org # v6.15
Fixes: 719c1e1829 ("exfat: add super block operations")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
Link: https://patch.msgid.link/20251104125009.2111925-3-yangyongpeng.storage@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-05 14:00:16 +01:00
Yongpeng Yang
63b5aa01da vfat: fix missing sb_min_blocksize() return value checks
When emulating an nvme device on qemu with both logical_block_size and
physical_block_size set to 8 KiB, but without format, a kernel panic
was triggered during the early boot stage while attempting to mount a
vfat filesystem.

[95553.682035] EXT4-fs (nvme0n1): unable to set blocksize
[95553.684326] EXT4-fs (nvme0n1): unable to set blocksize
[95553.686501] EXT4-fs (nvme0n1): unable to set blocksize
[95553.696448] ISOFS: unsupported/invalid hardware sector size 8192
[95553.697117] ------------[ cut here ]------------
[95553.697567] kernel BUG at fs/buffer.c:1582!
[95553.697984] Oops: invalid opcode: 0000 [#1] SMP NOPTI
[95553.698602] CPU: 0 UID: 0 PID: 7212 Comm: mount Kdump: loaded Not tainted 6.18.0-rc2+ #38 PREEMPT(voluntary)
[95553.699511] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[95553.700534] RIP: 0010:folio_alloc_buffers+0x1bb/0x1c0
[95553.701018] Code: 48 8b 15 e8 93 18 02 65 48 89 35 e0 93 18 02 48 83 c4 10 5b 41 5c 41 5d 41 5e 41 5f 5d 31 d2 31 c9 31 f6 31 ff c3 cc cc cc cc <0f> 0b 90 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f
[95553.702648] RSP: 0018:ffffd1b0c676f990 EFLAGS: 00010246
[95553.703132] RAX: ffff8cfc4176d820 RBX: 0000000000508c48 RCX: 0000000000000001
[95553.703805] RDX: 0000000000002000 RSI: 0000000000000000 RDI: 0000000000000000
[95553.704481] RBP: ffffd1b0c676f9c8 R08: 0000000000000000 R09: 0000000000000000
[95553.705148] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001
[95553.705816] R13: 0000000000002000 R14: fffff8bc8257e800 R15: 0000000000000000
[95553.706483] FS:  000072ee77315840(0000) GS:ffff8cfdd2c8d000(0000) knlGS:0000000000000000
[95553.707248] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[95553.707782] CR2: 00007d8f2a9e5a20 CR3: 0000000039d0c006 CR4: 0000000000772ef0
[95553.708439] PKRU: 55555554
[95553.708734] Call Trace:
[95553.709015]  <TASK>
[95553.709266]  __getblk_slow+0xd2/0x230
[95553.709641]  ? find_get_block_common+0x8b/0x530
[95553.710084]  bdev_getblk+0x77/0xa0
[95553.710449]  __bread_gfp+0x22/0x140
[95553.710810]  fat_fill_super+0x23a/0xfc0
[95553.711216]  ? __pfx_setup+0x10/0x10
[95553.711580]  ? __pfx_vfat_fill_super+0x10/0x10
[95553.712014]  vfat_fill_super+0x15/0x30
[95553.712401]  get_tree_bdev_flags+0x141/0x1e0
[95553.712817]  get_tree_bdev+0x10/0x20
[95553.713177]  vfat_get_tree+0x15/0x20
[95553.713550]  vfs_get_tree+0x2a/0x100
[95553.713910]  vfs_cmd_create+0x62/0xf0
[95553.714273]  __do_sys_fsconfig+0x4e7/0x660
[95553.714669]  __x64_sys_fsconfig+0x20/0x40
[95553.715062]  x64_sys_call+0x21ee/0x26a0
[95553.715453]  do_syscall_64+0x80/0x670
[95553.715816]  ? __fs_parse+0x65/0x1e0
[95553.716172]  ? fat_parse_param+0x103/0x4b0
[95553.716587]  ? vfs_parse_fs_param_source+0x21/0xa0
[95553.717034]  ? __do_sys_fsconfig+0x3d9/0x660
[95553.717548]  ? __x64_sys_fsconfig+0x20/0x40
[95553.717957]  ? x64_sys_call+0x21ee/0x26a0
[95553.718360]  ? do_syscall_64+0xb8/0x670
[95553.718734]  ? __x64_sys_fsconfig+0x20/0x40
[95553.719141]  ? x64_sys_call+0x21ee/0x26a0
[95553.719545]  ? do_syscall_64+0xb8/0x670
[95553.719922]  ? x64_sys_call+0x1405/0x26a0
[95553.720317]  ? do_syscall_64+0xb8/0x670
[95553.720702]  ? __x64_sys_close+0x3e/0x90
[95553.721080]  ? x64_sys_call+0x1b5e/0x26a0
[95553.721478]  ? do_syscall_64+0xb8/0x670
[95553.721841]  ? irqentry_exit+0x43/0x50
[95553.722211]  ? exc_page_fault+0x90/0x1b0
[95553.722681]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[95553.723166] RIP: 0033:0x72ee774f3afe
[95553.723562] Code: 73 01 c3 48 8b 0d 0a 33 0f 00 f7 d8 64 89 01 48 83 c8 ff c3 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 49 89 ca b8 af 01 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d da 32 0f 00 f7 d8 64 89 01 48
[95553.725188] RSP: 002b:00007ffe97148978 EFLAGS: 00000246 ORIG_RAX: 00000000000001af
[95553.725892] RAX: ffffffffffffffda RBX: 00005dcfe53d0080 RCX: 000072ee774f3afe
[95553.726526] RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000003
[95553.727176] RBP: 00007ffe97148ac0 R08: 0000000000000000 R09: 000072ee775e7ac0
[95553.727818] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
[95553.728459] R13: 00005dcfe53d04b0 R14: 000072ee77670b00 R15: 00005dcfe53d1a28
[95553.729086]  </TASK>

The panic occurs as follows:
1. logical_block_size is 8KiB, causing {struct super_block *sb}->s_blocksize
is initialized to 0.
vfat_fill_super
 - fat_fill_super
  - sb_min_blocksize
   - sb_set_blocksize //return 0 when size is 8KiB.
2. __bread_gfp is called with size == 0, causing folio_alloc_buffers() to
compute an offset equal to folio_size(folio), which triggers a BUG_ON.
fat_fill_super
 - sb_bread
  - __bread_gfp  // size == {struct super_block *sb}->s_blocksize == 0
   - bdev_getblk
    - __getblk_slow
     - grow_buffers
      - grow_dev_folio
       - folio_alloc_buffers  // size == 0
        - folio_set_bh //offset == folio_size(folio) and panic

To fix this issue, add proper return value checks for
sb_min_blocksize().

Cc: stable@vger.kernel.org # v6.15
Fixes: a64e5a5960 ("bdev: add back PAGE_SIZE block size validation for sb_set_blocksize()")
Reviewed-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Yongpeng Yang <yangyongpeng@xiaomi.com>
Link: https://patch.msgid.link/20251104125009.2111925-2-yangyongpeng.storage@gmail.com
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-11-05 14:00:16 +01:00
James Clark
a50f7456f8 dma-mapping: Allow use of DMA_BIT_MASK(64) in global scope
Clang doesn't like that (1ULL<<(64)) overflows when initializing a
global scope variable, even if that part of the ternary isn't used when
n = 64. The same initialization can be done without warnings in function
scopes, and GCC doesn't mind either way.

The build failure that highlighted this was already fixed in a different
way [1], which also has detailed links to the Clang issues. However it's
not going to be long before the same thing happens again, so it's better
to fix the root cause.

Fix it by using GENMASK_ULL() which does exactly the same thing, is much
more readable anyway, and doesn't have a shift that overflows.

[1]: https://lore.kernel.org/all/20250918-mmp-pdma-simplify-dma-addressing-v1-1-5c2be2b85696@riscstar.com/

Signed-off-by: James Clark <james.clark@linaro.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20251030-james-fix-dma_bit_mask-v1-1-ad1ce7cfab6e@linaro.org
2025-11-05 13:43:41 +01:00
Haotian Zhang
636f4618b1 regulator: fixed: fix GPIO descriptor leak on register failure
In the commit referenced by the Fixes tag,
devm_gpiod_get_optional() was replaced by manual
GPIO management, relying on the regulator core to release the
GPIO descriptor. However, this approach does not account for the
error path: when regulator registration fails, the core never
takes over the GPIO, resulting in a resource leak.

Add gpiod_put() before returning on regulator registration failure.

Fixes: 5e6f3ae5c1 ("regulator: fixed: Let core handle GPIO descriptor")
Signed-off-by: Haotian Zhang <vulab@iscas.ac.cn>
Link: https://patch.msgid.link/20251028172828.625-1-vulab@iscas.ac.cn
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-05 11:56:16 +00:00
Alexei Starovoitov
44e8f13f07 Merge branch 'bpf-add-_impl-suffix-for-kfuncs-with-implicit-args'
Mykyta Yatsenko says:

====================
bpf: Add _impl suffix for kfuncs with implicit args

We have established a pattern of function naming win "_impl" suffix;
those functions accept verifier-provided bpf_prog_aux argument.
Following uniform convention will allow for transparent backwards
compatibility with the upcoming KF_IMPLICIT_ARGS feature. This patch
set aims to fix current deviation from the convention to eliminate
unnecessary backwards incompatibility in the future.

Three kfuncs added in 6.18 don’t follow this *_impl convention and
therefore won’t participate in the new KF_IMPLICIT_ARGS mechanism:
 * bpf_task_work_schedule_resume()
 * bpf_task_work_schedule_signal()
 * bpf_stream_vprintk()

Rename them to align with the implicit-arg flow:
bpf_task_work_schedule_resume() -> bpf_task_work_schedule_resume_impl()
bpf_task_work_schedule_signal() -> bpf_task_work_schedule_signal_impl()
bpf_stream_vprintk() -> bpf_stream_vprintk_impl()

The KF_IMPLICIT_ARGS mechanism is not in tree yet, so callers must
switch to the *_impl names for now. Once the new mechanism lands, the
plain names (without _impl) will be reintroduced.

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
Acked-by: Ihor Solodrai <ihor.solodrai@linux.dev>
---
Changes in v3:
- Fix commit messages
- Link to v2: https://lore.kernel.org/r/20251104-implv2-v2-0-6dbc35f39f28@meta.com

Changes in v1:
- Split commit into 2
- Rebase on the correct branch
- Link to v1: https://lore.kernel.org/all/20251103232319.122965-1-mykyta.yatsenko5@gmail.com/
====================

Link: https://patch.msgid.link/20251104-implv2-v3-0-4772b9ae0e06@meta.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-11-04 17:50:33 -08:00
Mykyta Yatsenko
137cc92ffe bpf: add _impl suffix for bpf_stream_vprintk() kfunc
Rename bpf_stream_vprintk() to bpf_stream_vprintk_impl().

This makes bpf_stream_vprintk() follow the already established "_impl"
suffix-based naming convention for kfuncs with the bpf_prog_aux
argument provided by the verifier implicitly. This convention will be
taken advantage of with the upcoming KF_IMPLICIT_ARGS feature to
preserve backwards compatibility to BPF programs.

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
Link: https://lore.kernel.org/r/20251104-implv2-v3-2-4772b9ae0e06@meta.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2025-11-04 17:50:25 -08:00
Mykyta Yatsenko
ea0714d61d bpf:add _impl suffix for bpf_task_work_schedule* kfuncs
Rename:
bpf_task_work_schedule_resume()->bpf_task_work_schedule_resume_impl()
bpf_task_work_schedule_signal()->bpf_task_work_schedule_signal_impl()

This aligns task work scheduling kfuncs with the established naming
scheme for kfuncs with the bpf_prog_aux argument provided by the
verifier implicitly. This convention will be taken advantage of with the
upcoming KF_IMPLICIT_ARGS feature to preserve backwards compatibility to
BPF programs.

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
Link: https://lore.kernel.org/r/20251104-implv2-v3-1-4772b9ae0e06@meta.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Ihor Solodrai <ihor.solodrai@linux.dev>
2025-11-04 17:50:25 -08:00
Tzung-Bi Shih
e08969c4d6 Input: cros_ec_keyb - fix an invalid memory access
If cros_ec_keyb_register_matrix() isn't called (due to
`buttons_switches_only`) in cros_ec_keyb_probe(), `ckdev->idev` remains
NULL.  An invalid memory access is observed in cros_ec_keyb_process()
when receiving an EC_MKBP_EVENT_KEY_MATRIX event in cros_ec_keyb_work()
in such case.

  Unable to handle kernel read from unreadable memory at virtual address 0000000000000028
  ...
  x3 : 0000000000000000 x2 : 0000000000000000
  x1 : 0000000000000000 x0 : 0000000000000000
  Call trace:
  input_event
  cros_ec_keyb_work
  blocking_notifier_call_chain
  ec_irq_thread

It's still unknown about why the kernel receives such malformed event,
in any cases, the kernel shouldn't access `ckdev->idev` and friends if
the driver doesn't intend to initialize them.

Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
Link: https://patch.msgid.link/20251104070310.3212712-1-tzungbi@kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
2025-11-04 16:03:26 -08:00
Rafał Miłecki
3d1c795bde ARM: dts: BCM53573: Fix address of Luxul XAP-1440's Ethernet PHY
Luxul XAP-1440 has BCM54210E PHY at address 25.

Fixes: 44ad820780 ("ARM: dts: BCM53573: Fix Ethernet info for Luxul devices")
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Link: https://lore.kernel.org/r/20251002194852.13929-1-zajec5@gmail.com
Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
2025-11-04 14:23:39 -08:00
Stefan Wahren
9c16e4d216 arm64: defconfig: Fix V3D deferred probe timeout
The commit 4adc20ba95 ("ARM: dts: broadcom: rpi: Switch to V3D firmware
clock") causes a regression in arm64 developer setups, which stores the
kernel modules via NFS. Before this change the involved V3D clock provider
was builtin, but after this DT change the clk-raspberrypi is responsible
for V3D and for arm64/defconfig this driver is build as a kernel module.
In case these kernel modules are provided via NFS this takes too long and
the PM domain core give up before the clock driver could be loaded:

  v3d fec00000.gpu: deferred probe timeout, ignoring dependency

So resolve this issue by making this critical driver builtin.

Reported-by: Mark Brown <broonie@kernel.org>
Closes: https://lore.kernel.org/linux-arm-kernel/9ebda74e-e700-4fbe-bca5-382f92417a9c@sirena.org.uk/
Fixes: 4adc20ba95 ("ARM: dts: broadcom: rpi: Switch to V3D firmware clock")
Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
Link: https://lore.kernel.org/r/20251104174518.11783-1-wahrenst@gmx.net
Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
2025-11-04 14:20:40 -08:00
Dan Carpenter
d83f151275 Input: imx_sc_key - fix memory corruption on unload
This is supposed to be "priv" but we accidentally pass "&priv" which is
an address in the stack and so it will lead to memory corruption when
the imx_sc_key_action() function is called.  Remove the &.

Fixes: 768062fd12 ("Input: imx_sc_key - use devm_add_action_or_reset() to handle all cleanups")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Peng Fan <peng.fan@nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/aQYKR75r2VMFJutT@stanley.mountain
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
2025-11-04 10:57:36 -08:00
Sean Christopherson
ae431059e7 KVM: guest_memfd: Remove bindings on memslot deletion when gmem is dying
When unbinding a memslot from a guest_memfd instance, remove the bindings
even if the guest_memfd file is dying, i.e. even if its file refcount has
gone to zero.  If the memslot is freed before the file is fully released,
nullifying the memslot side of the binding in kvm_gmem_release() will
write to freed memory, as detected by syzbot+KASAN:

  ==================================================================
  BUG: KASAN: slab-use-after-free in kvm_gmem_release+0x176/0x440 virt/kvm/guest_memfd.c:353
  Write of size 8 at addr ffff88807befa508 by task syz.0.17/6022

  CPU: 0 UID: 0 PID: 6022 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full)
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025
  Call Trace:
   <TASK>
   dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
   print_address_description mm/kasan/report.c:378 [inline]
   print_report+0xca/0x240 mm/kasan/report.c:482
   kasan_report+0x118/0x150 mm/kasan/report.c:595
   kvm_gmem_release+0x176/0x440 virt/kvm/guest_memfd.c:353
   __fput+0x44c/0xa70 fs/file_table.c:468
   task_work_run+0x1d4/0x260 kernel/task_work.c:227
   resume_user_mode_work include/linux/resume_user_mode.h:50 [inline]
   exit_to_user_mode_loop+0xe9/0x130 kernel/entry/common.c:43
   exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline]
   syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline]
   syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline]
   do_syscall_64+0x2bd/0xfa0 arch/x86/entry/syscall_64.c:100
   entry_SYSCALL_64_after_hwframe+0x77/0x7f
  RIP: 0033:0x7fbeeff8efc9
   </TASK>

  Allocated by task 6023:
   kasan_save_stack mm/kasan/common.c:56 [inline]
   kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
   poison_kmalloc_redzone mm/kasan/common.c:397 [inline]
   __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:414
   kasan_kmalloc include/linux/kasan.h:262 [inline]
   __kmalloc_cache_noprof+0x3e2/0x700 mm/slub.c:5758
   kmalloc_noprof include/linux/slab.h:957 [inline]
   kzalloc_noprof include/linux/slab.h:1094 [inline]
   kvm_set_memory_region+0x747/0xb90 virt/kvm/kvm_main.c:2104
   kvm_vm_ioctl_set_memory_region+0x6f/0xd0 virt/kvm/kvm_main.c:2154
   kvm_vm_ioctl+0x957/0xc60 virt/kvm/kvm_main.c:5201
   vfs_ioctl fs/ioctl.c:51 [inline]
   __do_sys_ioctl fs/ioctl.c:597 [inline]
   __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583
   do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
   do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
   entry_SYSCALL_64_after_hwframe+0x77/0x7f

  Freed by task 6023:
   kasan_save_stack mm/kasan/common.c:56 [inline]
   kasan_save_track+0x3e/0x80 mm/kasan/common.c:77
   kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:584
   poison_slab_object mm/kasan/common.c:252 [inline]
   __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:284
   kasan_slab_free include/linux/kasan.h:234 [inline]
   slab_free_hook mm/slub.c:2533 [inline]
   slab_free mm/slub.c:6622 [inline]
   kfree+0x19a/0x6d0 mm/slub.c:6829
   kvm_set_memory_region+0x9c4/0xb90 virt/kvm/kvm_main.c:2130
   kvm_vm_ioctl_set_memory_region+0x6f/0xd0 virt/kvm/kvm_main.c:2154
   kvm_vm_ioctl+0x957/0xc60 virt/kvm/kvm_main.c:5201
   vfs_ioctl fs/ioctl.c:51 [inline]
   __do_sys_ioctl fs/ioctl.c:597 [inline]
   __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583
   do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
   do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
   entry_SYSCALL_64_after_hwframe+0x77/0x7f

Deliberately don't acquire filemap invalid lock when the file is dying as
the lifecycle of f_mapping is outside the purview of KVM.  Dereferencing
the mapping is *probably* fine, but there's no need to invalidate anything
as memslot deletion is responsible for zapping SPTEs, and the only code
that can access the dying file is kvm_gmem_release(), whose core code is
mutually exclusive with unbinding.

Note, the mutual exclusivity is also what makes it safe to access the
bindings on a dying gmem instance.  Unbinding either runs with slots_lock
held, or after the last reference to the owning "struct kvm" is put, and
kvm_gmem_release() nullifies the slot pointer under slots_lock, and puts
its reference to the VM after that is done.

Reported-by: syzbot+2479e53d0db9b32ae2aa@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/68fa7a22.a70a0220.3bf6c6.008b.GAE@google.com
Tested-by: syzbot+2479e53d0db9b32ae2aa@syzkaller.appspotmail.com
Fixes: a7800aa80e ("KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory")
Cc: stable@vger.kernel.org
Cc: Hillf Danton <hdanton@sina.com>
Reviewed-By: Vishal Annapurve <vannapurve@google.com>
Link: https://patch.msgid.link/20251104011205.3853541-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2025-11-04 09:16:53 -08:00
Maxim Levitsky
fd92bd3b44 KVM: SVM: switch to raw spinlock for svm->ir_list_lock
Use a raw spinlock for vcpu_svm.ir_list_lock as the lock can be taken
during schedule() via kvm_sched_out() => __avic_vcpu_put(), and "normal"
spinlocks are sleepable locks when PREEMPT_RT=y.

This fixes the following lockdep warning:

  =============================
  [ BUG: Invalid wait context ]
  6.12.0-146.1640_2124176644.el10.x86_64+debug #1 Not tainted
  -----------------------------
  qemu-kvm/38299 is trying to lock:
  ff11000239725600 (&svm->ir_list_lock){....}-{3:3}, at: __avic_vcpu_put+0xfd/0x300 [kvm_amd]
  other info that might help us debug this:
  context-{5:5}
  2 locks held by qemu-kvm/38299:
   #0: ff11000239723ba8 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0x240/0xe00 [kvm]
   #1: ff11000b906056d8 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock_nested+0x2e/0x130
  stack backtrace:
  CPU: 1 UID: 0 PID: 38299 Comm: qemu-kvm Kdump: loaded Not tainted 6.12.0-146.1640_2124176644.el10.x86_64+debug #1 PREEMPT(voluntary)
  Hardware name: AMD Corporation QUARTZ/QUARTZ, BIOS RQZ100AB 09/14/2023
  Call Trace:
   <TASK>
   dump_stack_lvl+0x6f/0xb0
   __lock_acquire+0x921/0xb80
   lock_acquire.part.0+0xbe/0x270
   _raw_spin_lock_irqsave+0x46/0x90
   __avic_vcpu_put+0xfd/0x300 [kvm_amd]
   svm_vcpu_put+0xfa/0x130 [kvm_amd]
   kvm_arch_vcpu_put+0x48c/0x790 [kvm]
   kvm_sched_out+0x161/0x1c0 [kvm]
   prepare_task_switch+0x36b/0xf60
   __schedule+0x4f7/0x1890
   schedule+0xd4/0x260
   xfer_to_guest_mode_handle_work+0x54/0xc0
   vcpu_run+0x69a/0xa70 [kvm]
   kvm_arch_vcpu_ioctl_run+0xdc0/0x17e0 [kvm]
   kvm_vcpu_ioctl+0x39f/0xe00 [kvm]

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Link: https://patch.msgid.link/20251030194130.307900-1-mlevitsk@redhat.com
[sean: massage changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
2025-11-04 09:14:28 -08:00
Sean Christopherson
aaac099459 KVM: SVM: Make avic_ga_log_notifier() local to avic.c
Make amd_iommu_register_ga_log_notifier() a local symbol now that it's
defined and used purely within avic.c.

No functional change intended.

Fixes: 4bdec12aa8 ("KVM: SVM: Detect X2APIC virtualization (x2AVIC) support")
Link: https://patch.msgid.link/20251016190643.80529-4-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2025-11-04 09:14:28 -08:00
Sean Christopherson
adc6ae9729 KVM: SVM: Unregister KVM's GALog notifier on kvm-amd.ko exit
Unregister the GALog notifier (used to get notified of wake events for
blocking vCPUs) on kvm-amd.ko exit so that a KVM or IOMMU driver bug that
results in a spurious GALog event "only" results in a spurious IRQ, and
doesn't trigger a use-after-free due to executing unloaded module code.

Fixes: 5881f73757 ("svm: Introduce AMD IOMMU avic_ga_log_notifier")
Reported-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Closes: https://lore.kernel.org/all/20250918130320.GA119526@k08j02272.eu95sqa
Link: https://patch.msgid.link/20251016190643.80529-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2025-11-04 09:14:27 -08:00
Sean Christopherson
59a217ced3 KVM: SVM: Initialize per-CPU svm_data at the end of hardware setup
Setup the per-CPU SVM data structures at the very end of hardware setup so
that svm_hardware_unsetup() can be used in svm_hardware_setup() to unwind
AVIC setup (for the GALog notifier).  Alternatively, the error path could
do an explicit, manual unwind, e.g. by adding a helper to free the per-CPU
structures.  But the per-CPU allocations have no interactions or
dependencies, i.e. can comfortably live at the end, and so converting to
a manual unwind would introduce churn and code without providing any
immediate advantage.

Link: https://patch.msgid.link/20251016190643.80529-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2025-11-04 09:14:26 -08:00
Chao Gao
cab4098be4 KVM: x86: Call out MSR_IA32_S_CET is not handled by XSAVES
Update the comment above is_xstate_managed_msr() to note that
MSR_IA32_S_CET isn't saved/restored by XSAVES/XRSTORS.

MSR_IA32_S_CET isn't part of CET_U/S state as the SDM states:
  The register state used by Control-Flow Enforcement Technology (CET)
  comprises the two 64-bit MSRs (IA32_U_CET and IA32_PL3_SSP) that manage
  CET when CPL = 3 (CET_U state); and the three 64-bit MSRs
  (IA32_PL0_SSP–IA32_PL2_SSP) that manage CET when CPL < 3 (CET_S state).

Opportunistically shift the snippet about the safety of loading certain
MSRs to the function comment for kvm_access_xstate_msr(), which is where
the MSRs are actually loaded into hardware.

Fixes: e44eb58334 ("KVM: x86: Load guest FPU state when access XSAVE-managed MSRs")
Signed-off-by: Chao Gao <chao.gao@intel.com>
Link: https://patch.msgid.link/20251028060142.29830-1-chao.gao@intel.com
[sean: shift snippet about safety to kvm_access_xstate_msr()]
Signed-off-by: Sean Christopherson <seanjc@google.com>
2025-11-04 09:14:26 -08:00
Sean Christopherson
9bc610b6a2 KVM: x86: Harden KVM against imbalanced load/put of guest FPU state
Assert, via KVM_BUG_ON(), that guest FPU state isn't/is in use when
loading/putting the FPU to help detect KVM bugs without needing an assist
from KASAN.  If an imbalanced load/put is detected, skip the redundant
load/put to avoid clobbering guest state and/or crashing the host.

Note, kvm_access_xstate_msr() already provides a similar assertion.

Reviewed-by: Yao Yuan <yaoyuan@linux.alibaba.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Link: https://patch.msgid.link/20251030185802.3375059-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2025-11-04 09:14:21 -08:00
Sean Christopherson
8819a49f9f KVM: x86: Unload "FPU" state on INIT if and only if its currently in-use
Replace the hack added by commit f958bd2314 ("KVM: x86: Fix potential
put_fpu() w/o load_fpu() on MPX platform") with a more robust approach of
unloading+reloading guest FPU state based on whether or not the vCPU's FPU
is currently in-use, i.e. currently loaded.  This fixes a bug on hosts
that support CET but not MPX, where kvm_arch_vcpu_ioctl_get_mpstate()
neglects to load FPU state (it only checks for MPX support) and leads to
KVM attempting to put FPU state due to kvm_apic_accept_events() triggering
INIT emulation.  E.g. on a host with CET but not MPX, syzkaller+KASAN
generates:

  Oops: general protection fault, probably for non-canonical address 0xdffffc0000000004: 0000 [#1] SMP KASAN NOPTI
  KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027]
  CPU: 211 UID: 0 PID: 20451 Comm: syz.9.26 Tainted: G S                  6.18.0-smp-DEV #7 NONE
  Tainted: [S]=CPU_OUT_OF_SPEC
  Hardware name: Google Izumi/izumi, BIOS 0.20250729.1-0 07/29/2025
  RIP: 0010:fpu_swap_kvm_fpstate+0x3ce/0x610 ../arch/x86/kernel/fpu/core.c:377
  RSP: 0018:ff1100410c167cc0 EFLAGS: 00010202
  RAX: 0000000000000004 RBX: 0000000000000020 RCX: 00000000000001aa
  RDX: 00000000000001ab RSI: ffffffff817bb960 RDI: 0000000022600000
  RBP: dffffc0000000000 R08: ff110040d23c8007 R09: 1fe220081a479000
  R10: dffffc0000000000 R11: ffe21c081a479001 R12: ff110040d23c8d98
  R13: 00000000fffdc578 R14: 0000000000000000 R15: ff110040d23c8d90
  FS:  00007f86dd1876c0(0000) GS:ff11007fc969b000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f86dd186fa8 CR3: 00000040d1dfa003 CR4: 0000000000f73ef0
  PKRU: 80000000
  Call Trace:
   <TASK>
   kvm_vcpu_reset+0x80d/0x12c0 ../arch/x86/kvm/x86.c:11818
   kvm_apic_accept_events+0x1cb/0x500 ../arch/x86/kvm/lapic.c:3489
   kvm_arch_vcpu_ioctl_get_mpstate+0xd0/0x4e0 ../arch/x86/kvm/x86.c:12145
   kvm_vcpu_ioctl+0x5e2/0xed0 ../virt/kvm/kvm_main.c:4539
   __se_sys_ioctl+0x11d/0x1b0 ../fs/ioctl.c:51
   do_syscall_x64 ../arch/x86/entry/syscall_64.c:63 [inline]
   do_syscall_64+0x6e/0x940 ../arch/x86/entry/syscall_64.c:94
   entry_SYSCALL_64_after_hwframe+0x76/0x7e
  RIP: 0033:0x7f86de71d9c9
   </TASK>

with a very simple reproducer:

  r0 = openat$kvm(0xffffffffffffff9c, &(0x7f0000000000), 0x80b00, 0x0)
  r1 = ioctl$KVM_CREATE_VM(r0, 0xae01, 0x0)
  ioctl$KVM_CREATE_IRQCHIP(r1, 0xae60)
  r2 = ioctl$KVM_CREATE_VCPU(r1, 0xae41, 0x0)
  ioctl$KVM_SET_IRQCHIP(r1, 0x8208ae63, ...)
  ioctl$KVM_GET_MP_STATE(r2, 0x8004ae98, &(0x7f00000000c0))

Alternatively, the MPX hack in GET_MP_STATE could be extended to cover CET,
but from a "don't break existing functionality" perspective, that isn't any
less risky than peeking at the state of in_use, and it's far less robust
for a long term solution (as evidenced by this bug).

Reported-by: Alexander Potapenko <glider@google.com>
Fixes: 69cc3e8865 ("KVM: x86: Add XSS support for CET_KERNEL and CET_USER")
Reviewed-by: Yao Yuan <yaoyuan@linux.alibaba.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Link: https://patch.msgid.link/20251030185802.3375059-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2025-11-04 09:14:11 -08:00
Miaoqian Lin
bbde14682e pmdomain: imx: Fix reference count leak in imx_gpc_remove
of_get_child_by_name() returns a node pointer with refcount incremented, we
should use of_node_put() on it when not needed anymore. Add the missing
of_node_put() to avoid refcount leak.

Fixes: 721cabf6c6 ("soc: imx: move PGC handling to a new GPC driver")
Cc: stable@vger.kernel.org
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2025-11-04 17:29:00 +01:00
Marek Szyprowski
fccac54b0d pmdomain: samsung: Rework legacy splash-screen handover workaround
Limit the workaround for the lack of the proper splash-screen handover
handling to the legacy ARM 32bit systems and replace forcing a sync_state
by explicite power domain shutdown. This approach lets compiler to
optimize it out on newer ARM 64bit systems.

Suggested-by: Ulf Hansson <ulf.hansson@linaro.org>
Fixes: 0745658aeb ("pmdomain: samsung: Fix splash-screen handover by enforcing a sync_state")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2025-11-04 17:16:05 +01:00
Olga Kornievskaia
4d3dbc2386 nfsd: add missing FATTR4_WORD2_CLONE_BLKSIZE from supported attributes
RFC 7862 Section 4.1.2 says that if the server supports CLONE it MUST
support clone_blksize attribute.

Fixes: d6ca7d2643 ("NFSD: Implement FATTR4_CLONE_BLKSIZE attribute")
Cc: stable@vger.kernel.org
Signed-off-by: Olga Kornievskaia <okorniev@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2025-11-04 11:02:31 -05:00
NeilBrown
8a7348a9ed nfsd: fix refcount leak in nfsd_set_fh_dentry()
nfsd exports a "pseudo root filesystem" which is used by NFSv4 to find
the various exported filesystems using LOOKUP requests from a known root
filehandle.  NFSv3 uses the MOUNT protocol to find those exported
filesystems and so is not given access to the pseudo root filesystem.

If a v3 (or v2) client uses a filehandle from that filesystem,
nfsd_set_fh_dentry() will report an error, but still stores the export
in "struct svc_fh" even though it also drops the reference (exp_put()).
This means that when fh_put() is called an extra reference will be dropped
which can lead to use-after-free and possible denial of service.

Normal NFS usage will not provide a pseudo-root filehandle to a v3
client.  This bug can only be triggered by the client synthesising an
incorrect filehandle.

To fix this we move the assignments to the svc_fh later, after all
possible error cases have been detected.

Reported-and-tested-by: tianshuo han <hantianshuo233@gmail.com>
Fixes: ef7f6c4904 ("nfsd: move V4ROOT version check to nfsd_set_fh_dentry()")
Signed-off-by: NeilBrown <neil@brown.name>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
2025-11-04 11:02:31 -05:00
Claudiu Beznea
249d96b492 ASoC: da7213: Use component driver suspend/resume
Since snd_soc_suspend() is invoked through snd_soc_pm_ops->suspend(),
and snd_soc_pm_ops is associated with the soc_driver (defined in
sound/soc/soc-core.c), and there is no parent-child relationship between
the soc_driver and the DA7213 codec driver, the power management subsystem
does not enforce a specific suspend/resume order between the DA7213 driver
and the soc_driver.

Because of this, the different codec component functionalities, called from
snd_soc_resume() to reconfigure various functions, can race with the
DA7213 struct dev_pm_ops::resume function, leading to misapplied
configuration. This occasionally results in clipped sound.

Fix this by dropping the struct dev_pm_ops::{suspend, resume} and use
instead struct snd_soc_component_driver::{suspend, resume}. This ensures
the proper configuration sequence is handled by the ASoC subsystem.

Cc: stable@vger.kernel.org
Fixes: 431e040065 ("ASoC: da7213: Add suspend to RAM support")
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
Link: https://patch.msgid.link/20251104114914.2060603-1-claudiu.beznea.uj@bp.renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-04 13:53:26 +00:00
Pauli Virtanen
b98b69c385 ALSA: usb-audio: add min_mute quirk for SteelSeries Arctis
ID 1038:1294 SteelSeries ApS Arctis Pro Wireless
is reported to have muted min playback volume. Apply quirk for that.

Link: https://gitlab.freedesktop.org/pipewire/pipewire/-/issues/4229#note_3174448
Signed-off-by: Pauli Virtanen <pav@iki.fi>
Link: https://patch.msgid.link/a83f2694b1f8c37e4667a3cf057ffdc408b0f70d.1762108507.git.pav@iki.fi
Signed-off-by: Takashi Iwai <tiwai@suse.de>
2025-11-04 12:06:09 +01:00
Alexei Starovoitov
156c75f596 Merge branch 'fix-ftrace-for-livepatch-bpf-fexit-programs'
Song Liu says:

====================
Fix ftrace for livepatch + BPF fexit programs

livepatch and BPF trampoline are two special users of ftrace. livepatch
uses ftrace with IPMODIFY flag and BPF trampoline uses ftrace direct
functions. When livepatch and BPF trampoline with fexit programs attach to
the same kernel function, BPF trampoline needs to call into the patched
version of the kernel function.

1/3 and 2/3 of this patchset fix two issues with livepatch + fexit cases,
one in the register_ftrace_direct path, the other in the
modify_ftrace_direct path.

3/3 adds selftests for both cases.
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
v4: https://patch.msgid.link/20251027175023.1521602-1-song@kernel.org

Changes v3 => v4:
1. Add helper reset_direct. (Steven)
2. Add Reviewed-by from Jiri.
3. Fix minor typo in comments.

v3: https://lore.kernel.org/bpf/20251026205445.1639632-1-song@kernel.org/

Changes v2 => v3:
1. Incorporate feedback by AI, which also fixes build error reported by
   Steven and kernel test robot.

v2: https://lore.kernel.org/bpf/20251024182901.3247573-1-song@kernel.org/

Changes v1 => v2:
1. Target bpf tree. (Alexei)
2. Bring back the FTRACE_WARN_ON in __ftrace_hash_update_ipmodify
   for valid code paths. (Steven)
3. Update selftests with cleaner way to find livepatch-sample.ko.
   (offlline discussion with Ihor)

v1: https://lore.kernel.org/bpf/20251024071257.3956031-1-song@kernel.org/
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2025-11-03 17:39:26 -08:00
Song Liu
62d2d0a338 selftests/bpf: Add tests for livepatch + bpf trampoline
Both livepatch and BPF trampoline use ftrace. Special attention is needed
when livepatch and fexit program touch the same function at the same
time, because livepatch updates a kernel function and the BPF trampoline
need to call into the right version of the kernel function.

Use samples/livepatch/livepatch-sample.ko for the test.

The test covers two cases:
  1) When a fentry program is loaded first. This exercises the
     modify_ftrace_direct code path.
  2) When a fentry program is loaded first. This exercises the
     register_ftrace_direct code path.

Signed-off-by: Song Liu <song@kernel.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20251027175023.1521602-4-song@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-11-03 17:22:06 -08:00
Song Liu
3e9a18e1c3 ftrace: bpf: Fix IPMODIFY + DIRECT in modify_ftrace_direct()
ftrace_hash_ipmodify_enable() checks IPMODIFY and DIRECT ftrace_ops on
the same kernel function. When needed, ftrace_hash_ipmodify_enable()
calls ops->ops_func() to prepare the direct ftrace (BPF trampoline) to
share the same function as the IPMODIFY ftrace (livepatch).

ftrace_hash_ipmodify_enable() is called in register_ftrace_direct() path,
but not called in modify_ftrace_direct() path. As a result, the following
operations will break livepatch:

1. Load livepatch to a kernel function;
2. Attach fentry program to the kernel function;
3. Attach fexit program to the kernel function.

After 3, the kernel function being used will not be the livepatched
version, but the original version.

Fix this by adding __ftrace_hash_update_ipmodify() to
__modify_ftrace_direct() and adjust some logic around the call.

Signed-off-by: Song Liu <song@kernel.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20251027175023.1521602-3-song@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-11-03 17:22:06 -08:00
Song Liu
56b3c85e15 ftrace: Fix BPF fexit with livepatch
When livepatch is attached to the same function as bpf trampoline with
a fexit program, bpf trampoline code calls register_ftrace_direct()
twice. The first time will fail with -EAGAIN, and the second time it
will succeed. This requires register_ftrace_direct() to unregister
the address on the first attempt. Otherwise, the bpf trampoline cannot
attach. Here is an easy way to reproduce this issue:

  insmod samples/livepatch/livepatch-sample.ko
  bpftrace -e 'fexit:cmdline_proc_show {}'
  ERROR: Unable to attach probe: fexit:vmlinux:cmdline_proc_show...

Fix this by cleaning up the hash when register_ftrace_function_nolock hits
errors.

Also, move the code that resets ops->func and ops->trampoline to the error
path of register_ftrace_direct(); and add a helper function reset_direct()
in register_ftrace_direct() and unregister_ftrace_direct().

Fixes: d05cb47066 ("ftrace: Fix modification of direct_function hash while in use")
Cc: stable@vger.kernel.org # v6.6+
Reported-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Closes: https://lore.kernel.org/live-patching/c5058315a39d4615b333e485893345be@crowdstrike.com/
Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@crowdstrike.com>
Signed-off-by: Song Liu <song@kernel.org>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20251027175023.1521602-2-song@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
2025-11-03 17:22:06 -08:00
Jens Axboe
8f05967b02 MAINTAINERS: correct git location for block layer tree
As part of a recent move go exclusively listing git.kernel.org trees
for the block and io_uring development, the "BLOCK LAYER" entry wasn't
updated as it already used git.kernel.org. However, outside of just
moving from git.kernel.dk to git.kernel.org, the "block" part of the
trees was also dropped, as the tree serves both block and io_uring
development trees.

Fix up the "BLOCK LAYER" entry so they all use the same tree.

Reported-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2025-11-03 08:55:12 -07:00
Richard Fitzgerald
8da0efc3da ASoC: doc: cs35l56: Update firmware filename description for B0 silicon
Update the text for firmware file naming to show that the l?u? suffix is
supported on CS35L56 B0 silicon and ampN was only used on early firmware.

The previous version of this text only said that B0 silicon used the ampN
suffix. Since kernel 6.16 the driver supports both the old ampN and
new l?u? suffix for B0 silicon. New firmwares will use the l?u? suffix.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://patch.msgid.link/20251103115809.33953-1-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
2025-11-03 13:09:14 +00:00
Dan Carpenter
97315e7c90 mtd: onenand: Pass correct pointer to IRQ handler
This was supposed to pass "onenand" instead of "&onenand" with the
ampersand.  Passing a random stack address which will be gone when the
function ends makes no sense.  However the good thing is that the pointer
is never used, so this doesn't cause a problem at run time.

Fixes: e23abf4b77 ("mtd: OneNAND: S5PC110: Implement DMA interrupt method")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2025-11-03 11:37:57 +01:00
Mikhail Kshevetskiy
a1d3bc606b mtd: spinand: fmsh: remove QE bit for FM25S01A flash
According to datasheet (http://eng.fmsh.com/nvm/FM25S01A_ds_eng.pdf)
there is no QE (Quad Enable) bit for FM25S01A flash, so remove it.

Fixes: 5f284dc15c ("mtd: spinand: add support for FudanMicro FM25S01A")
Signed-off-by: Mikhail Kshevetskiy <mikhail.kshevetskiy@iopsys.eu>
Tested-by: Tianling Shen <cnsztl@gmail.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2025-11-03 11:33:04 +01:00
Chunhai Guo
083d7af335 MAINTAINERS: erofs: add myself as reviewer
In the past two years, I have focused on EROFS and contributed features
including the reserved buffer pool, configurable global buffer pool, and
the ongoing direct I/O support for compressed data.

I would like to continue contributing to EROFS and help with code
reviews. Please CC me on EROFS-related changes.

Signed-off-by: Chunhai Guo <guochunhai@vivo.com>
Acked-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Acked-by: Chao Yu <chao@kernel.org>
Acked-by: Hongbo Li <lihongbo22@huawei.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
2025-11-03 12:32:46 +08:00
Diederik de Haas
03c7e964a0 arm64: dts: rockchip: Fix vccio4-supply on rk3566-pinetab2
Page 13 of the PineTab2 v2 schematic dd 20230417 shows VCCIO4's power
source is VCCIO_WL. Page 19 shows that VCCIO_WL is connected to
VCCA1V8_PMU, so fix the PineTab2 dtsi to reflect that.

Fixes: 1b7e19448f ("arm64: dts: rockchip: Add devicetree for Pine64 PineTab2")
Cc: stable@vger.kernel.org
Reviewed-by: Dragan Simic <dsimic@manjaro.org>
Signed-off-by: Diederik de Haas <diederik@cknow-tech.com>
Link: https://patch.msgid.link/20251027155724.138096-1-diederik@cknow-tech.com
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-10-31 23:49:21 +01:00
Quentin Schulz
08d70143e3 arm64: dts: rockchip: include rk3399-base instead of rk3399 in rk3399-op1
In commit 296602b8e5 ("arm64: dts: rockchip: Move RK3399 OPPs to dtsi
files for SoC variants"), everything shared between variants of RK3399
was put into rk3399-base.dtsi and the rest in variant-specific DTSI,
such as rk3399-t, rk3399-op1, rk3399, etc.
Therefore, the variant-specific DTSI should include rk3399-base.dtsi and
not another variant's DTSI.

rk3399-op1 wrongly includes rk3399 (a variant) DTSI instead of
rk3399-base DTSI, let's fix this oversight by including the intended
DTSI.

Fortunately, this had no impact on the resulting DTB since all nodes
were named the same and all node properties were overridden in
rk3399-op1.dtsi. This was checked by doing a checksum of rk3399-op1 DTBs
before and after this commit.

No intended change in behavior.

Fixes: 296602b8e5 ("arm64: dts: rockchip: Move RK3399 OPPs to dtsi files for SoC variants")
Cc: stable@vger.kernel.org
Signed-off-by: Quentin Schulz <quentin.schulz@cherry.de>
Reviewed-by: Dragan Simic <dsimic@manjaro.org>
Link: https://patch.msgid.link/20251029-rk3399-op1-include-v1-1-2472ee60e7f8@cherry.de
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-10-31 23:47:05 +01:00
Mario Limonciello (AMD)
534ca75e8e HID: hid-input: Extend Elan ignore battery quirk to USB
USB Elan devices have the same problem as the I2C ones with a fake
battery device showing up.

Reviewed-by: Hans de Goede <hansg@kernel.org>
Reported-by: André Barata <andretiagob@protonmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220722
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2025-10-31 10:06:36 +01:00
Masami Ichikawa
53f731f5bb HID: hid-ntrig: Prevent memory leak in ntrig_report_version()
Use a scope-based cleanup helper for the buffer allocated with kmalloc()
in ntrig_report_version() to simplify the cleanup logic and prevent
memory leaks (specifically the !hid_is_usb()-case one).

[jkosina@suse.com: elaborate on the actual existing leak]
Fixes: 185c926283 ("HID: hid-ntrig: fix unable to handle page fault in ntrig_report_version()")
Signed-off-by: Masami Ichikawa <masami256@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2025-10-31 09:57:10 +01:00
Sebastian Ene
103e17aac0 KVM: arm64: Check the untrusted offset in FF-A memory share
Verify the offset to prevent OOB access in the hypervisor
FF-A buffer in case an untrusted large enough value
[U32_MAX - sizeof(struct ffa_composite_mem_region) + 1, U32_MAX]
is set from the host kernel.

Signed-off-by: Sebastian Ene <sebastianene@google.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://patch.msgid.link/20251017075710.2605118-1-sebastianene@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-10-30 16:14:58 +00:00
Vincent Donnefort
f71f7afd0a KVM: arm64: Check range args for pKVM mem transitions
There's currently no verification for host issued ranges in most of the
pKVM memory transitions. The end boundary might therefore be subject to
overflow and later checks could be evaded.

Close this loophole with an additional pfn_range_is_valid() check on a
per public function basis. Once this check has passed, it is safe to
convert pfn and nr_pages into a phys_addr_t and a size.

host_unshare_guest transition is already protected via
__check_host_shared_guest(), while assert_host_shared_guest() callers
are already ignoring host checks.

Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Link: https://patch.msgid.link/20251016164541.3771235-1-vdonnefort@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-10-30 16:14:37 +00:00
Mark Brown
a186fbcfd8 KVM: arm64: selftests: Filter ZCR_EL2 in get-reg-list
get-reg-list includes ZCR_EL2 in the list of EL2 registers that it looks
for when NV is enabled but does not have any feature gate for this register,
meaning that testing any combination of features that includes EL2 but does
not include SVE will result in a test failure due to a missing register
being reported:

| The following lines are missing registers:
|
|	ARM64_SYS_REG(3, 4, 1, 2, 0),

Add ZCR_EL2 to feat_id_regs so that the test knows not to expect to see it
without SVE being enabled.

Fixes: 3a90b6f279 ("KVM: arm64: selftests: get-reg-list: Add base EL2 registers")
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20251024-kvm-arm64-get-reg-list-zcr-el2-v1-1-0cd0ff75e22f@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-10-30 16:13:27 +00:00
Mark Brown
92e781c93e KVM: arm64: selftests: Add SCTLR2_EL2 to get-reg-list
We recently added support for SCTLR2_EL2 to the kernel but did not add it
to get-reg-list, resulting in it reporting the missing register when it
is available. Add it.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20251023-b4-kvm-arm64-get-reg-list-sctlr-el2-v1-1-088f88ff992a@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-10-30 16:13:04 +00:00
Maximilian Dittgen
a24f7afce0 KVM: selftests: fix MAPC RDbase target formatting in vgic_lpi_stress
Since GITS_TYPER.PTA == 0, the ITS MAPC command demands a CPU ID,
rather than a physical redistributor address, for its RDbase
command argument.

As such, when MAPC-ing guest ITS collections, vgic_lpi_stress iterates
over CPU IDs in the range [0, nr_cpus), passing them as the RDbase
vcpu_id argument to its_send_mapc_cmd().

However, its_encode_target() in the its_send_mapc_cmd() selftest
handler expects RDbase arguments to be formatted with a 16 bit
offset, as shown by the 16-bit target_addr right shift its implementation:

        its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16)

At the moment, all CPU IDs passed into its_send_mapc_cmd() have no
offset, therefore becoming 0x0 after the bit shift. Thus, when
vgic_its_cmd_handle_mapc() receives the ITS command in vgic-its.c,
it always interprets the RDbase target CPU as CPU 0. All interrupts
sent to collections will be processed by vCPU 0, which defeats the
purpose of this multi-vCPU test.

Fix by creating procnum_to_rdbase() helper function, which left-shifts
the vCPU parameter received by its_send_mapc_cmd 16 bits before passing
it to its_encode_target for encoding.

Signed-off-by: Maximilian Dittgen <mdittgen@amazon.de>
Link: https://patch.msgid.link/20251020145946.48288-1-mdittgen@amazon.de
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-10-30 16:12:30 +00:00
Sascha Bischoff
da888524c3 KVM: arm64: vgic-v3: Trap all if no in-kernel irqchip
If there is no in-kernel irqchip for a GICv3 host set all of the trap
bits to block all accesses. This fixes the no-vgic-v3 selftest again.

Fixes: 3193287ddf ("KVM: arm64: gic-v3: Only set ICH_HCR traps for v2-on-v3 or v3 guests")
Reported-by: Mark Brown <broonie@kernel.org>
Closes: https://lore.kernel.org/all/23072856-6b8c-41e2-93d1-ea8a240a7079@sirena.org.uk
Signed-off-by: Sascha Bischoff <sascha.bischoff@arm.com>
Reviewed-by: Sebastian Ott <sebott@redhat.com>
Tested-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20251021094358.1963807-1-sascha.bischoff@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-10-30 16:11:21 +00:00
Mario Limonciello (AMD)
4d3a13afa8 HID: amd_sfh: Stop sensor before starting
Titas reports that the accelerometer sensor on their laptop only
works after a warm boot or unloading/reloading the amd-sfh kernel
module.

Presumably the sensor is in a bad state on cold boot and failing to
start, so explicitly stop it before starting.

Cc: stable@vger.kernel.org
Fixes: 93ce5e0231 ("HID: amd_sfh: Implement SFH1.1 functionality")
Reported-by: Titas <novatitas366@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220670
Tested-by: Titas <novatitas366@gmail.com>
Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2025-10-30 11:58:41 +01:00
April Grimoire
743c81cdc9 HID: apple: Add SONiX AK870 PRO to non_apple_keyboards quirk list
SONiX AK870 PRO keyboard pretends to be an apple keyboard by VID:PID,
rendering function keys not treated properly. Despite being a
SONiX USB DEVICE, it uses a different name, so adding it to the list.

Signed-off-by: April Grimoire <april@aprilg.moe>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2025-10-30 11:54:00 +01:00
Jianbo Liu
59630e2ccd xfrm: Prevent locally generated packets from direct output in tunnel mode
Add a check to ensure locally generated packets (skb->sk != NULL) do
not use direct output in tunnel mode, as these packets require proper
L2 header setup that is handled by the normal XFRM processing path.

Fixes: 5eddd76ec2 ("xfrm: fix tunnel mode TX datapath in packet offload mode")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2025-10-30 11:52:38 +01:00
Jianbo Liu
61fafbee6c xfrm: Determine inner GSO type from packet inner protocol
The GSO segmentation functions for ESP tunnel mode
(xfrm4_tunnel_gso_segment and xfrm6_tunnel_gso_segment) were
determining the inner packet's L2 protocol type by checking the static
x->inner_mode.family field from the xfrm state.

This is unreliable. In tunnel mode, the state's actual inner family
could be defined by x->inner_mode.family or by
x->inner_mode_iaf.family. Checking only the former can lead to a
mismatch with the actual packet being processed, causing GSO to create
segments with the wrong L2 header type.

This patch fixes the bug by deriving the inner mode directly from the
packet's inner protocol stored in XFRM_MODE_SKB_CB(skb)->protocol.

Instead of replicating the code, this patch modifies the
xfrm_ip2inner_mode helper function. It now correctly returns
&x->inner_mode if the selector family (x->sel.family) is already
specified, thereby handling both specific and AF_UNSPEC cases
appropriately.

With this change, ESP GSO can use xfrm_ip2inner_mode to get the
correct inner mode. It doesn't affect existing callers, as the updated
logic now mirrors the checks they were already performing externally.

Fixes: 26dbd66eab ("esp: choose the correct inner protocol for GSO on inter address family tunnels")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2025-10-30 11:52:31 +01:00
Jianbo Liu
082ef944e5 xfrm: Check inner packet family directly from skb_dst
In the output path, xfrm_dev_offload_ok and xfrm_get_inner_ipproto
need to determine the protocol family of the inner packet (skb) before
it gets encapsulated.

In xfrm_dev_offload_ok, the code checked x->inner_mode.family. This is
unreliable because, for states handling both IPv4 and IPv6, the
relevant inner family could be either x->inner_mode.family or
x->inner_mode_iaf.family. Checking only the former can lead to a
mismatch with the actual packet being processed.

In xfrm_get_inner_ipproto, the code checked x->outer_mode.family. This
is also incorrect for tunnel mode, as the inner packet's family can be
different from the outer header's family.

At both of these call sites, the skb variable holds the original inner
packet. The most direct and reliable source of truth for its protocol
family is its destination entry. This patch fixes the issue by using
skb_dst(skb)->ops->family to ensure protocol-specific headers are only
accessed for the correct packet type.

Fixes: 91d8a53db2 ("xfrm: fix offloading of cross-family tunnels")
Fixes: 45a98ef492 ("net/xfrm: IPsec tunnel mode fix inner_ipproto setting in sec_path")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2025-10-30 11:52:06 +01:00
Lauri Tirkkonen
a45f15808f HID: lenovo: fixup Lenovo Yoga Slim 7x Keyboard rdesc
The keyboard of this device has the following in its report description
for Usage (Keyboard) in Collection (Application):

	# 0x15, 0x00,                    //  Logical Minimum (0)                52
	# 0x25, 0x65,                    //  Logical Maximum (101)              54
	# 0x05, 0x07,                    //  Usage Page (Keyboard)              56
	# 0x19, 0x00,                    //  Usage Minimum (0)                  58
	# 0x29, 0xdd,                    //  Usage Maximum (221)                60
	# 0x81, 0x00,                    //  Input (Data,Arr,Abs)               62

Since the Usage Min/Max range exceeds the Logical Min/Max range,
keypresses outside the Logical range are not recognized. This includes,
for example, the Japanese language keyboard variant's keys for |, _ and
\.

Fixup the report description to make the Logical range match the Usage
range, fixing the interpretation of keypresses above 101 on this device.

Signed-off-by: Lauri Tirkkonen <lauri@hacktheplanet.fi>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2025-10-30 11:40:52 +01:00
Zhang Heng
beab067dbc HID: quirks: work around VID/PID conflict for 0x4c4a/0x4155
Based on available evidence, the USB ID 4c4a:4155 used by multiple
devices has been attributed to Jieli. The commit 1a8953f4f7
("HID: Add IGNORE quirk for SMARTLINKTECHNOLOGY") affected touchscreen
functionality. Added checks for manufacturer and serial number to
maintain microphone compatibility, enabling both devices to function
properly.

[jkosina@suse.com: edit shortlog]
Fixes: 1a8953f4f7 ("HID: Add IGNORE quirk for SMARTLINKTECHNOLOGY")
Cc: stable@vger.kernel.org
Tested-by: staffan.melin@oscillator.se
Reviewed-by: Terry Junge <linuxhid@cosmicgizmosystems.com>
Signed-off-by: Zhang Heng <zhangheng@kylinos.cn>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
2025-10-30 11:25:44 +01:00
Andrea Righi
f4fa7c25f6 sched_ext: Fix use of uninitialized variable in scx_bpf_cpuperf_set()
scx_bpf_cpuperf_set() has a typo where it dereferences the local
variable @sch, instead of the global @scx_root pointer. Fix by
dereferencing the correct variable.

Fixes: 956f2b11a8 ("sched_ext: Drop kf_cpu_valid()")
Signed-off-by: Andrea Righi <arighi@nvidia.com>
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2025-10-29 05:14:39 -10:00
Andy Shevchenko
9db8d46712 mnt: Remove dead code which might prevent from building
Clang, in particular, is not happy about dead code:

fs/namespace.c:135:37: error: unused function 'node_to_mnt_ns' [-Werror,-Wunused-function]
  135 | static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
      |                                     ^~~~~~~~~~~~~~
1 error generated.

Remove a leftover from the previous cleanup.

Fixes: 7d7d164989 ("mnt: support ns lookup")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://patch.msgid.link/20251024132336.1666382-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-10-29 15:57:24 +01:00
Tetsuo Handa
34ab4c7558 bfs: Reconstruct file type when loading from disk
syzbot is reporting that S_IFMT bits of inode->i_mode can become bogus when
the S_IFMT bits of the 32bits "mode" field loaded from disk are corrupted
or when the 32bits "attributes" field loaded from disk are corrupted.

A documentation says that BFS uses only lower 9 bits of the "mode" field.
But I can't find an explicit explanation that the unused upper 23 bits
(especially, the S_IFMT bits) are initialized with 0.

Therefore, ignore the S_IFMT bits of the "mode" field loaded from disk.
Also, verify that the value of the "attributes" field loaded from disk is
either BFS_VREG or BFS_VDIR (because BFS supports only regular files and
the root directory).

Reported-by: syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Link: https://patch.msgid.link/fabce673-d5b9-4038-8287-0fd65d80203b@I-love.SAKURA.ne.jp
Reviewed-by: Tigran Aivazian <aivazian.tigran@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-10-29 14:39:34 +01:00
David Howells
330e2c5148 afs: Fix dynamic lookup to fail on cell lookup failure
When a process tries to access an entry in /afs, normally what happens is
that an automount dentry is created by ->lookup() and then triggered, which
jumps through the ->d_automount() op.  Currently, afs_dynroot_lookup() does
not do cell DNS lookup, leaving that to afs_d_automount() to perform -
however, it is possible to use access() or stat() on the automount point,
which will always return successfully, have briefly created an afs_cell
record if one did not already exist.

This means that something like:

        test -d "/afs/.west" && echo Directory exists

will print "Directory exists" even though no such cell is configured.  This
breaks the "west" python module available on PIP as it expects this access
to fail.

Now, it could be possible to make afs_dynroot_lookup() perform the DNS[*]
lookup, but that would make "ls --color /afs" do this for each cell in /afs
that is listed but not yet probed.  kafs-client, probably wrongly, preloads
the entire cell database and all the known cells are then listed in /afs -
and doing ls /afs would be very, very slow, especially if any cell supplied
addresses but was wholly inaccessible.

 [*] When I say "DNS", actually read getaddrinfo(), which could use any one
     of a host of mechanisms.  Could also use static configuration.

To fix this, make the following changes:

 (1) Create an enum to specify the origination point of a call to
     afs_lookup_cell() and pass this value into that function in place of
     the "excl" parameter (which can be derived from it).  There are six
     points of origination:

        - Cell preload through /proc/net/afs/cells
        - Root cell config through /proc/net/afs/rootcell
        - Lookup in dynamic root
        - Automount trigger
        - Direct mount with mount() syscall
        - Alias check where YFS tells us the cell name is different

 (2) Add an extra state into the afs_cell state machine to indicate a cell
     that's been initialised, but not yet looked up.  This is separate from
     one that can be considered active and has been looked up at least
     once.

 (3) Make afs_lookup_cell() vary its behaviour more, depending on where it
     was called from:

     If called from preload or root cell config, DNS lookup will not happen
     until we definitely want to use the cell (dynroot mount, automount,
     direct mount or alias check).  The cell will appear in /afs but stat()
     won't trigger DNS lookup.

     If the cell already exists, dynroot will not wait for the DNS lookup
     to complete.  If the cell did not already exist, dynroot will wait.

     If called from automount, direct mount or alias check, it will wait
     for the DNS lookup to complete.

 (4) Make afs_lookup_cell() return an error if lookup failed in one way or
     another.  We try to return -ENOENT if the DNS says the cell does not
     exist and -EDESTADDRREQ if we couldn't access the DNS.

Reported-by: Markus Suvanto <markus.suvanto@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220685
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://patch.msgid.link/1784747.1761158912@warthog.procyon.org.uk
Fixes: 1d0b929fc0 ("afs: Change dynroot to create contents on demand")
Tested-by: Markus Suvanto <markus.suvanto@gmail.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-10-29 13:51:38 +01:00
Qinxin Xia
23ee8a2563 dma-mapping: benchmark: Restore padding to ensure uABI remained consistent
The padding field in the structure was previously reserved to
maintain a stable interface for potential new fields, ensuring
compatibility with user-space shared data structures.
However,it was accidentally removed by tiantao in a prior commit,
which may lead to incompatibility between user space and the kernel.

This patch reinstates the padding to restore the original structure
layout and preserve compatibility.

Fixes: 8ddde07a3d ("dma-mapping: benchmark: extract a common header file for map_benchmark definition")
Cc: stable@vger.kernel.org
Acked-by: Barry Song <baohua@kernel.org>
Signed-off-by: Qinxin Xia <xiaqinxin@huawei.com>
Reported-by: Barry Song <baohua@kernel.org>
Closes: https://lore.kernel.org/lkml/CAGsJ_4waiZ2+NBJG+SCnbNk+nQ_ZF13_Q5FHJqZyxyJTcEop2A@mail.gmail.com/
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20251028120900.2265511-2-xiaqinxin@huawei.com
2025-10-29 09:41:40 +01:00
Frieder Schrempf
6504297872 arm64: dts: imx8mp-kontron: Fix USB OTG role switching
The VBUS supply regulator is currently assigned to the PHY node.
This causes the VBUS to be always on, even when the controller
needs to be switched to peripheral mode.

Fix the OTG role switching by adding a connector node and moving
the VBUS supply regulator to that node. This way the VBUS gets
correctly switched according to the current role.

Fixes: 946ab10e3f ("arm64: dts: Add support for Kontron OSM-S i.MX8MP SoM and BL carrier board")
Signed-off-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
2025-10-28 17:01:42 +08:00
Frank Li
1eb42bacd7 arm64: dts: imx95: Fix MSI mapping for PCIe endpoint nodes
The msi-map property was incorrectly applied to pcie0-ep instead of
pcie1-ep. Correct the msi-map for both pcie0-ep and pcie1-ep nodes.

Fixes: bbe4b2f7d6 ("arm64: dts: imx95: Add msi-map for pci-ep device")
Signed-off-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
2025-10-27 14:46:19 +08:00
João Paulo Gonçalves
ec4daace64 arm64: dts: imx8-ss-img: Avoid gpio0_mipi_csi GPIOs being deferred
The gpio0_mipi_csi DT nodes are enabled by default, but they are
dependent on the irqsteer_csi nodes, which are not enabled. This causes
the gpio0_mipi_csi GPIOs to be probe deferred. Since these GPIOs can be
used independently of the CSI controller, enable irqsteer_csi by default
too to prevent them from being deferred and to ensure they work out of
the box.

Fixes: 2217f82437 ("arm64: dts: imx8: add capture controller for i.MX8's img subsystem")
Signed-off-by: João Paulo Gonçalves <joao.goncalves@toradex.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
2025-10-27 13:58:33 +08:00
Andreas Kemnade
ff7b5a2743 arm: imx_v6_v7_defconfig: enable ext4 directly
In former times, ext4 was enabled implicitely by enabling ext3 but with
the ext3 fs gone, it does not get enabled, which lets devices fail to
mount root on non-initrd based boots with an ext4 root.

Fixes: d6ace46c82 ("ext4: remove obsolete EXT3 config options")
Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
2025-10-27 13:52:29 +08:00
Fangyu Yu
8c5fa3764f RISC-V: KVM: Remove automatic I/O mapping for VM_PFNMAP
As of commit aac6db75a9 ("vfio/pci: Use unmap_mapping_range()"),
vm_pgoff may no longer guaranteed to hold the PFN for VM_PFNMAP
regions. Using vma->vm_pgoff to derive the HPA here may therefore
produce incorrect mappings.

Instead, I/O mappings for such regions can be established on-demand
during g-stage page faults, making the upfront ioremap in this path
is unnecessary.

Fixes: aac6db75a9 ("vfio/pci: Use unmap_mapping_range()")
Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
Tested-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
Reviewed-by: Guo Ren <guoren@kernel.org>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20251021142131.78796-1-fangyu.yu@linux.alibaba.com
Signed-off-by: Anup Patel <anup@brainfault.org>
2025-10-24 21:24:36 +05:30
Aaron Kling
6f37469a93 memory: tegra210: Fix incorrect client ids
The original commit had typos for two of the memory client ids. Fix them
to reference the correct bindings.

Fixes: 3804cef4c5 ("memory: tegra210: Use bindings for client ids")
Signed-off-by: Aaron Kling <webgeek1234@gmail.com>
Link: https://patch.msgid.link/20251021-t210-mem-clientid-fixup-v1-1-5094946faa31@gmail.com
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
2025-10-23 15:32:21 +02:00
Krzysztof Kozlowski
316e361b5d dt-bindings: pinctrl: toshiba,visconti: Fix number of items in groups
The "groups" property can hold multiple entries (e.g.
toshiba/tmpv7708-rm-mbrc.dts file), so allow that by dropping incorrect
type (pinmux-node.yaml schema already defines that as string-array) and
adding constraints for items.  This fixes dtbs_check warnings like:

  toshiba/tmpv7708-rm-mbrc.dtb: pinctrl@24190000 (toshiba,tmpv7708-pinctrl):
    pwm-pins:groups: ['pwm0_gpio16_grp', 'pwm1_gpio17_grp', 'pwm2_gpio18_grp', 'pwm3_gpio19_grp'] is too long

Fixes: 1825c1fe00 ("pinctrl: Add DT bindings for Toshiba Visconti TMPV7700 SoC")
Cc: stable@vger.kernel.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
2025-10-23 15:14:54 +02:00
Yu-Chun Lin
369f772299 pinctrl: realtek: Select REGMAP_MMIO for RTD driver
The pinctrl-rtd driver uses 'devm_regmap_init_mmio', which requires
'REGMAP_MMIO' to be enabled.

Without this selection, the build fails with an undefined reference:
aarch64-none-linux-gnu-ld: drivers/pinctrl/realtek/pinctrl-rtd.o: in
function rtd_pinctrl_probe': pinctrl-rtd.c:(.text+0x5a0): undefined
reference to __devm_regmap_init_mmio_clk'

Fix this by selecting 'REGMAP_MMIO' in the Kconfig.

Fixes: e99ce78030 ("pinctrl: realtek: Add common pinctrl driver for Realtek DHC RTD SoCs")
Signed-off-by: Yu-Chun Lin <eleanor.lin@realtek.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
2025-10-23 15:09:01 +02:00
Niravkumar L Rabara
5c56bf214a mtd: rawnand: cadence: fix DMA device NULL pointer dereference
The DMA device pointer `dma_dev` was being dereferenced before ensuring
that `cdns_ctrl->dmac` is properly initialized.

Move the assignment of `dma_dev` after successfully acquiring the DMA
channel to ensure the pointer is valid before use.

Fixes: d76d22b509 ("mtd: rawnand: cadence: use dma_map_resource for sdma address")
Cc: stable@vger.kernel.org
Signed-off-by: Niravkumar L Rabara <niravkumarlaxmidas.rabara@altera.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2025-10-23 09:03:13 +02:00
Chen-Yu Tsai
2050280a4b clk: sunxi-ng: sun55i-a523-ccu: Lower audio0 pll minimum rate
While the user manual states that the PLL's rate should be between 180
MHz and 3 GHz in the register defninition section, it also says the
actual operating frequency is 22.5792*4 MHz in the PLL features table.

22.5792*4 MHz is one of the actual clock rates that we want and is
is available in the SDM table. Lower the minimum clock rate to 90 MHz
so that both rates in the SDM table can be used.

Fixes: 7cae1e2b55 ("clk: sunxi-ng: Add support for the A523/T527 CCU PLLs")
Reviewed-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Link: https://patch.msgid.link/20251020171059.2786070-7-wens@kernel.org
Signed-off-by: Chen-Yu Tsai <wens@kernel.org>
2025-10-23 02:06:47 +08:00
Chen-Yu Tsai
5888533c60 clk: sunxi-ng: sun55i-a523-r-ccu: Mark bus-r-dma as critical
The "bus-r-dma" clock in the A523's PRCM clock controller is also
referred to as "DMA_CLKEN_SW" or "DMA ADB400 gating". It is unclear how
this ties into the DMA controller MBUS clock gate; however if the clock
is not enabled, the DMA controller in the MCU block will fail to access
DRAM, even failing to retrieve the DMA descriptors.

Mark this clock as critical. This sort of mirrors what is done for the
main DMA controller's MBUS clock, which has a separate toggle that is
currently left out of the main clock controller driver.

Fixes: 8cea339cfb ("clk: sunxi-ng: add support for the A523/T527 PRCM CCU")
Acked-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Link: https://patch.msgid.link/20251020171059.2786070-6-wens@kernel.org
Signed-off-by: Chen-Yu Tsai <wens@kernel.org>
2025-10-23 02:06:47 +08:00
Sudeep Holla
7458f72cc2 pmdomain: arm: scmi: Fix genpd leak on provider registration failure
If of_genpd_add_provider_onecell() fails during probe, the previously
created generic power domains are not removed, leading to a memory leak
and potential kernel crash later in genpd_debug_add().

Add proper error handling to unwind the initialized domains before
returning from probe to ensure all resources are correctly released on
failure.

Example crash trace observed without this fix:

  | Unable to handle kernel paging request at virtual address fffffffffffffc70
  | CPU: 1 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.18.0-rc1 #405 PREEMPT
  | Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform
  | pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
  | pc : genpd_debug_add+0x2c/0x160
  | lr : genpd_debug_init+0x74/0x98
  | Call trace:
  |  genpd_debug_add+0x2c/0x160 (P)
  |  genpd_debug_init+0x74/0x98
  |  do_one_initcall+0xd0/0x2d8
  |  do_initcall_level+0xa0/0x140
  |  do_initcalls+0x60/0xa8
  |  do_basic_setup+0x28/0x40
  |  kernel_init_freeable+0xe8/0x170
  |  kernel_init+0x2c/0x140
  |  ret_from_fork+0x10/0x20

Fixes: 898216c97e ("firmware: arm_scmi: add device power domain support using genpd")
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Peng Fan <peng.fan@nxp.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2025-10-22 16:16:14 +02:00
Li Qiang
9631350885 mtd: rawnand: realtek: Make rtl_ecc_engine_ops const
The rtl_ecc_engine_ops structure is only used to provide a set of
callback functions and is never modified after initialization.
Mark it as const so it can be placed in the read-only section, which
improves safety and allows better compiler optimization.

Signed-off-by: Li Qiang <liqiang01@kylinos.cn>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2025-10-22 11:54:42 +02:00
Geert Uytterhoeven
0d9c80aa57 mtd: nand: MTD_NAND_ECC_REALTEK should depend on HAS_DMA
If CONFIG_NO_DMA=y:

    ERROR: modpost: "dma_free_pages" [drivers/mtd/nand/ecc-realtek.ko] undefined!
    ERROR: modpost: "dma_alloc_pages" [drivers/mtd/nand/ecc-realtek.ko] undefined!

The driver cannot function without DMA, hence fix this by adding a
dependency on HAS_DMA.

Fixes: 3148d0e5b1 ("mtd: nand: realtek-ecc: Add Realtek external ECC engine support")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2025-10-22 11:54:29 +02:00
Dan Carpenter
9225f02ff2 mtd: nand: realtek-ecc: Fix a IS_ERR() vs NULL bug in probe
The dma_alloc_noncoherent() function doesn't return error pointers, it
returns NULL on error.  Fix the error checking to match.

Fixes: 3148d0e5b1 ("mtd: nand: realtek-ecc: Add Realtek external ECC engine support")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2025-10-22 11:54:18 +02:00
Dan Carpenter
e4185bed73 mtdchar: fix integer overflow in read/write ioctls
The "req.start" and "req.len" variables are u64 values that come from the
user at the start of the function.  We mask away the high 32 bits of
"req.len" so that's capped at U32_MAX but the "req.start" variable can go
up to U64_MAX which means that the addition can still integer overflow.

Use check_add_overflow() to fix this bug.

Fixes: 095bb6e44e ("mtdchar: add MEMREAD ioctl")
Fixes: 6420ac0af9 ("mtdchar: prevent unbounded allocation in MEMWRITE ioctl")
Cc: stable@vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
2025-10-22 11:53:59 +02:00
Shawn Lin
a28352cf2d mmc: sdhci-of-dwcmshc: Change DLL_STRBIN_TAPNUM_DEFAULT to 0x4
strbin signal delay under 0x8 configuration is not stable after massive
test. The recommandation of it should be 0x4.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Tested-by: Alexey Charkov <alchark@gmail.com>
Tested-by: Hugh Cole-Baker <sigmaris@gmail.com>
Fixes: 08f3dff799 ("mmc: sdhci-of-dwcmshc: add rockchip platform support")
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2025-10-22 11:46:13 +02:00
Jernej Skrabec
1dba74abf3 clk: sunxi-ng: Mark A523 bus-r-cpucfg clock as critical
bus-r-cpucfg clock is important for peripheral which takes care of
powering CPU cores on and off. Since this operation is done by firmware
(TF-A), mark it as critical. That way Linux won't interfere with that
clock.

Fixes: 8cea339cfb ("clk: sunxi-ng: add support for the A523/T527 PRCM CCU")
Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Tested-by: Andre Przywara <andre.przywara@arm.com>
Link: https://patch.msgid.link/20251020152704.4804-1-jernej.skrabec@gmail.com
Signed-off-by: Chen-Yu Tsai <wens@kernel.org>
2025-10-22 00:54:29 +08:00
Shuai Xue
7c3643f204 acpi,srat: Fix incorrect device handle check for Generic Initiator
The Generic Initiator Affinity Structure in SRAT table uses device
handle type field to indicate the device type. According to ACPI
specification, the device handle type value of 1 represents PCI device,
not 0.

Fixes: 894c26a1c2 ("ACPI: Support Generic Initiator only domains")
Reported-by: Wu Zongyong <wuzongyong@linux.alibaba.com>
Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20250913023224.39281-1-xueshuai@linux.alibaba.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-10-21 07:49:58 -07:00
André Draszik
90c82941ad pmdomain: samsung: plug potential memleak during probe
of_genpd_add_provider_simple() could fail, in which case this code
leaks the domain name, pd->pd.name.

Use devm_kstrdup_const() to plug this leak. As a side-effect, we can
simplify existing error handling.

Fixes: c09a3e6c97 ("soc: samsung: pm_domains: Convert to regular platform driver")
Cc: stable@vger.kernel.org
Reviewed-by: Peter Griffin <peter.griffin@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: André Draszik <andre.draszik@linaro.org>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2025-10-21 15:52:14 +02:00
Hongbo Li
2c2b67af5f hostfs: Fix only passing host root in boot stage with new mount
In the old mount proceedure, hostfs could only pass root directory during
boot. This is because it constructed the root directory using the @root_ino
event without any mount options. However, when using it with the new mount
API, this step is no longer triggered. As a result, if users mounts without
specifying any mount options, the @host_root_path remains uninitialized. To
prevent this issue, the @host_root_path should be initialized at the time
of allocation.

Reported-by: Geoffrey Thorpe <geoff@geoffthorpe.net>
Closes: https://lore.kernel.org/all/643333a0-f434-42fb-82ac-d25a0b56f3b7@geoffthorpe.net/
Fixes: cd140ce9f6 ("hostfs: convert hostfs to use the new mount API")
Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
Link: https://patch.msgid.link/20251011092235.29880-1-lihongbo22@huawei.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-10-21 14:22:42 +02:00
Zhen Ni
0778ac7df5 fs: Fix uninitialized 'offp' in statmount_string()
In statmount_string(), most flags assign an output offset pointer (offp)
which is later updated with the string offset. However, the
STATMOUNT_MNT_UIDMAP and STATMOUNT_MNT_GIDMAP cases directly set the
struct fields instead of using offp. This leaves offp uninitialized,
leading to a possible uninitialized dereference when *offp is updated.

Fix it by assigning offp for UIDMAP and GIDMAP as well, keeping the code
path consistent.

Fixes: 37c4a9590e ("statmount: allow to retrieve idmappings")
Fixes: e52e97f09f ("statmount: let unset strings be empty")
Cc: stable@vger.kernel.org
Signed-off-by: Zhen Ni <zhen.ni@easystack.cn>
Link: https://patch.msgid.link/20251013114151.664341-1-zhen.ni@easystack.cn
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-10-21 14:21:46 +02:00
Johan Hovold
a7b17ece40 mmc: wmt-sdmmc: fix compile test default
Enabling compile testing should not enable every individual driver (we
have "allyesconfig" for that).

Fixes: 7cd8db0fb0 ("mmc: add COMPILE_TEST to multiple drivers")
Cc: Mikko Rapeli <mikko.rapeli@linaro.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2025-10-21 13:37:18 +02:00
Sabrina Dubroca
f2bc8231fd xfrm: check all hash buckets for leftover states during netns deletion
The current hlist_empty checks only test the first bucket of each
hashtable, ignoring any other bucket. They should be caught by the
WARN_ON for state_all, but better to make all the checks accurate.

Fixes: 73d189dce4 ("netns xfrm: per-netns xfrm_state_bydst hash")
Fixes: d320bbb306 ("netns xfrm: per-netns xfrm_state_bysrc hash")
Fixes: b754a4fd8f ("netns xfrm: per-netns xfrm_state_byspi hash")
Fixes: fe9f1d8779 ("xfrm: add state hashtable keyed by seq")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2025-10-21 10:42:45 +02:00
Sabrina Dubroca
1dcf617bec xfrm: set err and extack on failure to create pcpu SA
xfrm_state_construct can fail without setting an error if the
requested pcpu_num value is too big. Set err and add an extack message
to avoid confusing userspace.

Fixes: 1ddf9916ac ("xfrm: Add support for per cpu xfrm state handling.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2025-10-21 10:42:45 +02:00
Sabrina Dubroca
7f02285764 xfrm: call xfrm_dev_state_delete when xfrm_state_migrate fails to add the state
In case xfrm_state_migrate fails after calling xfrm_dev_state_add, we
directly release the last reference and destroy the new state, without
calling xfrm_dev_state_delete (this only happens in
__xfrm_state_delete, which we're not calling on this path, since the
state was never added).

Call xfrm_dev_state_delete on error when an offload configuration was
provided.

Fixes: ab244a394c ("xfrm: Migrate offload configuration")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2025-10-21 10:42:44 +02:00
Sabrina Dubroca
5502bc4746 xfrm: make state as DEAD before final put when migrate fails
xfrm_state_migrate/xfrm_state_clone_and_setup create a new state, and
call xfrm_state_put to destroy it in case of
failure. __xfrm_state_destroy expects the state to be in
XFRM_STATE_DEAD, but we currently don't do that.

Reported-by: syzbot+5cd6299ede4d4f70987b@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=5cd6299ede4d4f70987b
Fixes: 78347c8c6b ("xfrm: Fix xfrm_state_migrate leak")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2025-10-21 10:42:43 +02:00
Sabrina Dubroca
10deb69864 xfrm: also call xfrm_state_delete_tunnel at destroy time for states that were never added
In commit b441cf3f8c ("xfrm: delete x->tunnel as we delete x"), I
missed the case where state creation fails between full
initialization (->init_state has been called) and being inserted on
the lists.

In this situation, ->init_state has been called, so for IPcomp
tunnels, the fallback tunnel has been created and added onto the
lists, but the user state never gets added, because we fail before
that. The user state doesn't go through __xfrm_state_delete, so we
don't call xfrm_state_delete_tunnel for those states, and we end up
leaking the FB tunnel.

There are several codepaths affected by this: the add/update paths, in
both net/key and xfrm, and the migrate code (xfrm_migrate,
xfrm_state_migrate). A "proper" rollback of the init_state work would
probably be doable in the add/update code, but for migrate it gets
more complicated as multiple states may be involved.

At some point, the new (not-inserted) state will be destroyed, so call
xfrm_state_delete_tunnel during xfrm_state_gc_destroy. Most states
will have their fallback tunnel cleaned up during __xfrm_state_delete,
which solves the issue that b441cf3f8c (and other patches before it)
aimed at. All states (including FB tunnels) will be removed from the
lists once xfrm_state_fini has called flush_work(&xfrm_state_gc_work).

Reported-by: syzbot+999eb23467f83f9bf9bf@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=999eb23467f83f9bf9bf
Fixes: b441cf3f8c ("xfrm: delete x->tunnel as we delete x")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2025-10-21 10:42:43 +02:00
Sabrina Dubroca
8d2a2a49c3 xfrm: drop SA reference in xfrm_state_update if dir doesn't match
We're not updating x1, but we still need to put() it.

Fixes: a4a87fa4e9 ("xfrm: Add Direction to the SA in or out")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
2025-10-21 10:42:42 +02:00
Heiko Stuebner
26f0f122f9 arm64: dts: rockchip: Fix indentation on rk3399 haikou demo dtso
The regulator-cam-dovdd-1v8 uses spaces for indentation, where it should
use tabs. Fix this.

Fixes: 066a69db9d ("arm64: dts: rockchip: add overlay for RK3399 Puma Haikou Video Demo adapter")
Signed-off-by: Heiko Stuebner <heiko.stuebner@cherry.de>
Reviewed-by: Quentin Schulz <quentin.schulz@cherry.de>
Link: https://patch.msgid.link/20251020091139.3652738-1-heiko@sntech.de
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-10-21 08:58:43 +02:00
Sean Christopherson
9d7dfb95da KVM: VMX: Inject #UD if guest tries to execute SEAMCALL or TDCALL
Add VMX exit handlers for SEAMCALL and TDCALL to inject a #UD if a non-TD
guest attempts to execute SEAMCALL or TDCALL.  Neither SEAMCALL nor TDCALL
is gated by any software enablement other than VMXON, and so will generate
a VM-Exit instead of e.g. a native #UD when executed from the guest kernel.

Note!  No unprivileged DoS of the L1 kernel is possible as TDCALL and
SEAMCALL #GP at CPL > 0, and the CPL check is performed prior to the VMX
non-root (VM-Exit) check, i.e. userspace can't crash the VM. And for a
nested guest, KVM forwards unknown exits to L1, i.e. an L2 kernel can
crash itself, but not L1.

Note #2!  The Intel® Trust Domain CPU Architectural Extensions spec's
pseudocode shows the CPL > 0 check for SEAMCALL coming _after_ the VM-Exit,
but that appears to be a documentation bug (likely because the CPL > 0
check was incorrectly bundled with other lower-priority #GP checks).
Testing on SPR and EMR shows that the CPL > 0 check is performed before
the VMX non-root check, i.e. SEAMCALL #GPs when executed in usermode.

Note #3!  The aforementioned Trust Domain spec uses confusing pseudocode
that says that SEAMCALL will #UD if executed "inSEAM", but "inSEAM"
specifically means in SEAM Root Mode, i.e. in the TDX-Module.  The long-
form description explicitly states that SEAMCALL generates an exit when
executed in "SEAM VMX non-root operation".  But that's a moot point as the
TDX-Module injects #UD if the guest attempts to execute SEAMCALL, as
documented in the "Unconditionally Blocked Instructions" section of the
TDX-Module base specification.

Cc: stable@vger.kernel.org
Cc: Kai Huang <kai.huang@intel.com>
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Binbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20251016182148.69085-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
2025-10-20 09:37:04 -07:00
Jihed Chaibi
f31e261712 ARM: dts: imx51-zii-rdu1: Fix audmux node names
Rename the 'ssi2' and 'aud3' nodes to 'mux-ssi2' and 'mux-aud3' in the
audmux configuration of imx51-zii-rdu1.dts to comply with the naming
convention in imx-audmux.yaml.

This fixes the following dt-schema warning:

  imx51-zii-rdu1.dtb: audmux@83fd0000 (fsl,imx51-audmux): 'aud3', 'ssi2'
  do not match any of the regexes: '^mux-[0-9a-z]*$', '^pinctrl-[0-9]+$'

Fixes: ceef0396f3 ("ARM: dts: imx: add ZII RDU1 board")
Signed-off-by: Jihed Chaibi <jihed.chaibi.dev@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
2025-10-20 20:14:21 +08:00
Dario Binacchi
62bf7708fe ARM: dts: imx6ull-engicam-microgea-rmm: fix report-rate-hz value
The 'report-rate-hz' property for the edt-ft5x06 driver was added and
handled in the Linux kernel by me with patches [1] and [2] for this
specific board.

The v1 upstream version, which was the one applied to the customer's
kernel, used the 'report-rate' property, which was written directly to
the controller register. During review, the 'hz' suffix was added,
changing its handling so that writing the value directly to the register
was no longer possible for the M06 controller.

Once the patches were accepted in mainline, I did not reapply them to
the customer's kernel, and when upstreaming the DTS for this board, I
forgot to correct the 'report-rate-hz' property value.

The property must be set to 60 because this board uses the M06 controller,
which expects the report rate in units of 10 Hz, meaning the actual value
written to the register is 6.

[1] 625f829586 ("dt-bindings: input: touchscreen: edt-ft5x06: add report-rate-hz")
[2] 5bcee83a40 ("Input: edt-ft5x06 - set report rate by dts property")
Fixes: ffea3cac94 ("ARM: dts: imx6ul: support Engicam MicroGEA RMM board")
Co-developed-by: Michael Trimarchi <michael@amarulasolutions.com>
Signed-off-by: Michael Trimarchi <michael@amarulasolutions.com>
Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
2025-10-20 19:59:54 +08:00
Seungjin Bae
69aeb50731 Input: pegasus-notetaker - fix potential out-of-bounds access
In the pegasus_notetaker driver, the pegasus_probe() function allocates
the URB transfer buffer using the wMaxPacketSize value from
the endpoint descriptor. An attacker can use a malicious USB descriptor
to force the allocation of a very small buffer.

Subsequently, if the device sends an interrupt packet with a specific
pattern (e.g., where the first byte is 0x80 or 0x42),
the pegasus_parse_packet() function parses the packet without checking
the allocated buffer size. This leads to an out-of-bounds memory access.

Fixes: 1afca2b66a ("Input: add Pegasus Notetaker tablet driver")
Signed-off-by: Seungjin Bae <eeodqql09@gmail.com>
Link: https://lore.kernel.org/r/20251007214131.3737115-2-eeodqql09@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
2025-10-17 18:04:15 -07:00
Fangyu Yu
873f10cf8e RISC-V: KVM: Read HGEIP CSR on the correct cpu
When executing kvm_riscv_vcpu_aia_has_interrupts, the vCPU may have
migrated and the IMSIC VS-file have not been updated yet, currently
the HGEIP CSR should be read from the imsic->vsfile_cpu ( the pCPU
before migration ) via on_each_cpu_mask, but this will trigger an
IPI call and repeated IPI within a period of time is expensive in
a many-core systems.

Just let the vCPU execute and update the correct IMSIC VS-file via
kvm_riscv_vcpu_aia_imsic_update may be a simple solution.

Fixes: 4cec89db80 ("RISC-V: KVM: Move HGEI[E|P] CSR access to IMSIC virtualization")
Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
Reviewed-by: Guo Ren <guoren@kernel.org>
Reviewed-by: Anup Patel <anup@brainfault.org>
Tested-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20251016012659.82998-1-fangyu.yu@linux.alibaba.com
Signed-off-by: Anup Patel <anup@brainfault.org>
2025-10-17 13:10:01 +05:30
Tao Ren
8589309453 ARM: dts: aspeed: fuji-data64: Enable mac3 controller
"mac3" controller was removed from the initial version of fuji-data64
dts because the rgmii setting is incorrect, but dropping mac3 leads to
regression in the existing fuji platform, because fuji.dts simply
includes fuji-data64.dts.

This patch adds mac3 back to fuji-data64.dts to fix the fuji regression[1],
and rgmii settings need to be fixed later.

Fixes: b0f294fdfc ("ARM: dts: aspeed: facebook-fuji: Include facebook-fuji-data64.dts")
Link: https://lore.kernel.org/all/79ddc7b9-ef26-4959-9a16-aa4e006eb145@roeck-us.net/ [1]
Signed-off-by: Tao Ren <rentao.bupt@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Andrew Jeffery <andrew@codeconstruct.com.au>
2025-10-17 16:29:40 +10:30
Samuel Holland
ea138a6077 RISC-V: KVM: Fix check for local interrupts on riscv32
To set all 64 bits in the mask on a 32-bit system, the constant must
have type `unsigned long long`.

Fixes: 6b1e8ba4ba ("RISC-V: KVM: Use bitmap for irqs_pending and irqs_pending_mask")
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Link: https://lore.kernel.org/r/20251016001714.3889380-1-samuel.holland@sifive.com
Signed-off-by: Anup Patel <anup@brainfault.org>
2025-10-16 17:17:29 +05:30
Russell King (Oracle)
ce121914f3 arm64: tegra: Mark Jetson Xavier NX's PHY as a wakeup source
Mark the RTL8211F PHY as a wakeup source for the Jetson Xavier NX.
This allows the reworked RTL8211F driver to know that the PHY is
wired to wakeup capable hardware, and thus to expose WoL capabilities.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
2025-10-16 11:49:08 +02:00
Dragan Simic
b3fd04e23f arm64: dts: rockchip: Make RK3588 GPU OPP table naming less generic
Unify the naming of the existing GPU OPP table nodes found in the RK3588
and RK3588J SoC dtsi files with the other SoC's GPU OPP nodes, following
the more "modern" node naming scheme.

Fixes: a7b2070505 ("arm64: dts: rockchip: Split GPU OPPs of RK3588 and RK3588j")
Signed-off-by: Dragan Simic <dsimic@manjaro.org>
[opp-table also is way too generic on systems with like 4-5 opp-tables]
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-10-14 20:04:56 +02:00
Diederik de Haas
afb5f84b21 arm64: dts: rockchip: Drop 'rockchip,grf' prop from tsadc on rk3328
The 'rockchip,grf' property for tsadc in rk3328 wasn't actually used in
the driver and is no longer allowed in the DT since commit
e881662aa0 ("dt-bindings: thermal: rockchip: Tighten grf requirements")

So remove that property which fixes the following DT validation issue

  tsadc@ff250000 (rockchip,rk3328-tsadc): rockchip,grf: False schema does not allow [[58]]

Signed-off-by: Diederik de Haas <didi.debian@cknow.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-10-14 18:27:35 +02:00
Alexey Charkov
05b80cd1f3 arm64: dts: rockchip: Remove non-functioning CPU OPPs from RK3576
Drop the top-frequency OPPs from both the LITTLE and big CPU clusters on
RK3576, as neither the opensource TF-A [1] nor the recent (after v1.08)
binary BL31 images provided by Rockchip expose those.

This fixes the problem [2] when the cpufreq governor tries to jump
directly to the highest-frequency OPP, which results in a failed SCMI call
leaving the system stuck at the previous OPP before the attempted change.

[1] https://github.com/ARM-software/arm-trusted-firmware/blob/master/plat/rockchip/rk3576/scmi/rk3576_clk.c#L264-L304
[2] https://lore.kernel.org/linux-rockchip/CABjd4Yz4NbqzZH4Qsed3ias56gcga9K6CmYA+BLDBxtbG915Ag@mail.gmail.com/

Fixes: 57b1ce9039 ("arm64: dts: rockchip: Add rk3576 SoC base DT")
Cc: stable@vger.kernel.org
Signed-off-by: Alexey Charkov <alchark@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-10-14 18:25:50 +02:00
Andrey Leonchikov
e179de737d arm64: dts: rockchip: Fix PCIe power enable pin for BigTreeTech CB2 and Pi2
Fix typo into regulator GPIO definition. With current definition, PCIe
doesn't start up. Valid definition is already used in  "pinctrl" section,
"pcie_drv" (gpio4, RK_PB1).

Fixes: bfbc663d27 ("arm64: dts: rockchip: Add BigTreeTech CB2 and Pi2")
Signed-off-by: Andrey Leonchikov <andreil499@gmail.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-10-14 18:22:50 +02:00
Anand Moon
d425aef66e arm64: dts: rockchip: Set correct pinctrl for I2S1 8ch TX on odroid-m1
Enable proper pin multiplexing for the I2S1 8-channel transmit interface by
adding the default pinctrl configuration which esures correct signal routing
and avoids pinmux conflicts during audio playback.

Changes fix the error
[  116.856643] [    T782] rockchip-pinctrl pinctrl: pin gpio1-10 already requested by affinity_hint; cannot claim for fe410000.i2s
[  116.857567] [    T782] rockchip-pinctrl pinctrl: error -EINVAL: pin-42 (fe410000.i2s)
[  116.857618] [    T782] rockchip-pinctrl pinctrl: error -EINVAL: could not request pin 42 (gpio1-10) from group i2s1m0-sdi1 on device rockchip-pinctrl
[  116.857659] [    T782] rockchip-i2s-tdm fe410000.i2s: Error applying setting, reverse things back

I2S1 on the M1 to the codec in the RK809 only uses the SCLK, LRCK, SDI0
and SDO0 signals, so limit the claimed pins to those.

With this change audio output works as expected:

$ aplay -l
**** List of PLAYBACK Hardware Devices ****
card 0: HDMI [HDMI], device 0: fe400000.i2s-i2s-hifi i2s-hifi-0 [fe400000.i2s-i2s-hifi i2s-hifi-0]
  Subdevices: 1/1
  Subdevice #0: subdevice #0
card 1: RK817 [Analog RK817], device 0: fe410000.i2s-rk817-hifi rk817-hifi-0 [fe410000.i2s-rk817-hifi rk817-hifi-0]
  Subdevices: 1/1
  Subdevice #0: subdevice #0

Fixes: 78f858447c ("arm64: dts: rockchip: Add analog audio on ODROID-M1")
Cc: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Anand Moon <linux.amoon@gmail.com>
[adapted the commit message a bit]
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
2025-10-14 18:15:12 +02:00
Martyn Welch
7363096a5a Input: goodix - remove setting of RST pin to input
The reset line is being set to input on non-ACPI devices apparently to
save power. This isn't being done on ACPI devices as it's been found
that some ACPI devices don't have a pull-up resistor fitted. This can
also be the case for non-ACPI devices, resulting in:

[  941.672207] Goodix-TS 1-0014: Error reading 10 bytes from 0x814e: -110
[  942.696168] Goodix-TS 1-0014: Error reading 10 bytes from 0x814e: -110
[  945.832208] Goodix-TS 1-0014: Error reading 10 bytes from 0x814e: -110

This behaviour appears to have been initialing introduced in
ec6e1b4082. This doesn't seem to be based on information in either the
GT911 or GT9271 datasheets cited as sources of information for this
change. Thus it seems likely that it is based on functionality in the
Android driver which it also lists. This behaviour may be viable in very
specific instances where the hardware is well known, but seems unwise in
the upstream kernel where such hardware requirements can't be
guaranteed.

Remove this over optimisation to improve reliability on non-ACPI
devices.

Signed-off-by: Martyn Welch <martyn.welch@collabora.com>
Reviewed-by: Hans de Goede <hansg@kernel.org>
Link: https://lore.kernel.org/r/20251009134138.686215-1-martyn.welch@collabora.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
2025-10-13 09:35:26 -07:00
Hans de Goede
c6d99e4881 Input: goodix - add support for ACPI ID GDIX1003
Some newer devices use an ACPI hardware ID of GDIX1003 for their Goodix
touchscreen controller, instead of GDIX1001 / GDIX1002. Add GDIX1003
to the goodix_acpi_match[] table.

Reported-by: Weikang Guo <guoweikang.kernel@gmail.com>
Closes: https://lore.kernel.org/linux-input/20250225024409.1467040-1-guoweikang.kernel@gmail.com/
Tested-by: Weikang Guo <guoweikang.kernel@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20251013121022.44333-1-hansg@kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
2025-10-13 09:16:11 -07:00
Louis-Alexis Eyraud
518919276c pinctrl: mediatek: mt8189: align register base names to dt-bindings ones
The mt8189-pinctrl driver requires to probe that a device tree uses
in the device node the same names than mt8189_pinctrl_register_base_names
array. But they are not matching the required ones in the
"mediatek,mt8189-pinctrl" dt-bindings, leading to possible dtbs check
issues. The mt8189_pinctrl_register_base_names entry order is also
different.
So, align all mt8189_pinctrl_register_base_names entry names and order
on dt-bindings.

Fixes: a3fe1324c3 ("pinctrl: mediatek: Add pinctrl driver for mt8189")
Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
2025-10-13 13:08:36 +02:00
Louis-Alexis Eyraud
404ee89b40 pinctrl: mediatek: mt8196: align register base names to dt-bindings ones
The mt8196-pinctrl driver requires to probe that a device tree uses
in the device node the same names than mt8196_pinctrl_register_base_names
array. But they are not matching the required ones in the
"mediatek,mt8196-pinctrl" dt-bindings, leading to possible dtbs check
issues.
So, align all mt8196_pinctrl_register_base_names entries on dt-bindings
ones.

Fixes: f7a29377c2 ("pinctrl: mediatek: Add pinctrl driver on mt8196")
Signed-off-by: Louis-Alexis Eyraud <louisalexis.eyraud@collabora.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
2025-10-13 13:07:36 +02:00
532 changed files with 6189 additions and 2604 deletions

View File

@@ -206,6 +206,7 @@ Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com>
David Brownell <david-b@pacbell.net>
David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org>
David Heidelberg <david@ixit.cz> <d.okias@gmail.com>
David Hildenbrand <david@kernel.org> <david@redhat.com>
David Rheinsberg <david@readahead.eu> <dh.herrmann@gmail.com>
David Rheinsberg <david@readahead.eu> <dh.herrmann@googlemail.com>
David Rheinsberg <david@readahead.eu> <david.rheinsberg@gmail.com>
@@ -426,7 +427,7 @@ Kenneth W Chen <kenneth.w.chen@intel.com>
Kenneth Westfield <quic_kwestfie@quicinc.com> <kwestfie@codeaurora.org>
Kiran Gunda <quic_kgunda@quicinc.com> <kgunda@codeaurora.org>
Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com>
Kirill A. Shutemov <kas@kernel.org> <kirill.shutemov@linux.intel.com>
Kiryl Shutsemau <kas@kernel.org> <kirill.shutemov@linux.intel.com>
Kishon Vijay Abraham I <kishon@kernel.org> <kishon@ti.com>
Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@linaro.org>
Konrad Dybcio <konradybcio@kernel.org> <konrad.dybcio@somainline.org>
@@ -437,6 +438,7 @@ Krishna Manikandan <quic_mkrishn@quicinc.com> <mkrishn@codeaurora.org>
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
Krzysztof Kozlowski <krzk@kernel.org> <krzysztof.kozlowski@canonical.com>
Krzysztof Kozlowski <krzk@kernel.org> <krzysztof.kozlowski@linaro.org>
Krzysztof Wilczyński <kwilczynski@kernel.org> <krzysztof.wilczynski@linux.com>
Krzysztof Wilczyński <kwilczynski@kernel.org> <kw@linux.com>
Kshitiz Godara <quic_kgodara@quicinc.com> <kgodara@codeaurora.org>
@@ -605,7 +607,8 @@ Oleksij Rempel <o.rempel@pengutronix.de>
Oleksij Rempel <o.rempel@pengutronix.de> <ore@pengutronix.de>
Oliver Hartkopp <socketcan@hartkopp.net> <oliver.hartkopp@volkswagen.de>
Oliver Hartkopp <socketcan@hartkopp.net> <oliver@hartkopp.net>
Oliver Upton <oliver.upton@linux.dev> <oupton@google.com>
Oliver Upton <oupton@kernel.org> <oupton@google.com>
Oliver Upton <oupton@kernel.org> <oliver.upton@linux.dev>
Ondřej Jirman <megi@xff.cz> <megous@megous.com>
Oza Pawandeep <quic_poza@quicinc.com> <poza@codeaurora.org>
Pali Rohár <pali@kernel.org> <pali.rohar@gmail.com>

View File

@@ -50,18 +50,20 @@ patternProperties:
groups:
description:
Name of the pin group to use for the functions.
$ref: /schemas/types.yaml#/definitions/string
enum: [i2c0_grp, i2c1_grp, i2c2_grp, i2c3_grp, i2c4_grp,
i2c5_grp, i2c6_grp, i2c7_grp, i2c8_grp,
spi0_grp, spi0_cs0_grp, spi0_cs1_grp, spi0_cs2_grp,
spi1_grp, spi2_grp, spi3_grp, spi4_grp, spi5_grp, spi6_grp,
uart0_grp, uart1_grp, uart2_grp, uart3_grp,
pwm0_gpio4_grp, pwm0_gpio8_grp, pwm0_gpio12_grp,
pwm0_gpio16_grp, pwm1_gpio5_grp, pwm1_gpio9_grp,
pwm1_gpio13_grp, pwm1_gpio17_grp, pwm2_gpio6_grp,
pwm2_gpio10_grp, pwm2_gpio14_grp, pwm2_gpio18_grp,
pwm3_gpio7_grp, pwm3_gpio11_grp, pwm3_gpio15_grp,
pwm3_gpio19_grp, pcmif_out_grp, pcmif_in_grp]
items:
enum: [i2c0_grp, i2c1_grp, i2c2_grp, i2c3_grp, i2c4_grp,
i2c5_grp, i2c6_grp, i2c7_grp, i2c8_grp,
spi0_grp, spi0_cs0_grp, spi0_cs1_grp, spi0_cs2_grp,
spi1_grp, spi2_grp, spi3_grp, spi4_grp, spi5_grp, spi6_grp,
uart0_grp, uart1_grp, uart2_grp, uart3_grp,
pwm0_gpio4_grp, pwm0_gpio8_grp, pwm0_gpio12_grp,
pwm0_gpio16_grp, pwm1_gpio5_grp, pwm1_gpio9_grp,
pwm1_gpio13_grp, pwm1_gpio17_grp, pwm2_gpio6_grp,
pwm2_gpio10_grp, pwm2_gpio14_grp, pwm2_gpio18_grp,
pwm3_gpio7_grp, pwm3_gpio11_grp, pwm3_gpio15_grp,
pwm3_gpio19_grp, pcmif_out_grp, pcmif_in_grp]
minItems: 1
maxItems: 8
drive-strength:
enum: [2, 4, 6, 8, 16, 24, 32]

View File

@@ -74,6 +74,7 @@ patternProperties:
'^conf':
type: object
unevaluatedProperties: false
description:
Pinctrl node's client devices use subnodes for pin configurations,
which in turn use the standard properties below.

View File

@@ -400,19 +400,30 @@ can report through the rotational axes (absolute and/or relative rx, ry, rz).
All other axes retain their meaning. A device must not mix
regular directional axes and accelerometer axes on the same event node.
INPUT_PROP_HAPTIC_TOUCHPAD
--------------------------
INPUT_PROP_PRESSUREPAD
----------------------
The INPUT_PROP_PRESSUREPAD property indicates that the device provides
simulated haptic feedback (e.g. a vibrator motor situated below the surface)
instead of physical haptic feedback (e.g. a hinge). This property is only set
if the device:
The INPUT_PROP_HAPTIC_TOUCHPAD property indicates that device:
- supports simple haptic auto and manual triggering
- can differentiate between at least 5 fingers
- uses correct resolution for the X/Y (units and value)
- reports correct force per touch, and correct units for them (newtons or grams)
- follows the MT protocol type B
If the simulated haptic feedback is controllable by userspace the device must:
- support simple haptic auto and manual triggering, and
- report correct force per touch, and correct units for them (newtons or grams), and
- provide the EV_FF FF_HAPTIC force feedback effect.
Summing up, such devices follow the MS spec for input devices in
Win8 and Win8.1, and in addition support the Simple haptic controller HID table,
and report correct units for the pressure.
Win8 and Win8.1, and in addition may support the Simple haptic controller HID
table, and report correct units for the pressure.
Where applicable, this property is set in addition to INPUT_PROP_BUTTONPAD, it
does not replace that property.
Guidelines
==========

View File

@@ -105,10 +105,10 @@ In this example the SSID is 10280c63.
The format of the firmware file names is:
SoundWire (except CS35L56 Rev B0):
SoundWire:
cs35lxx-b0-dsp1-misc-SSID[-spkidX]-l?u?
SoundWire CS35L56 Rev B0:
SoundWire CS35L56 Rev B0 firmware released before kernel version 6.16:
cs35lxx-b0-dsp1-misc-SSID[-spkidX]-ampN
Non-SoundWire (HDA and I2S):
@@ -127,9 +127,8 @@ Where:
* spkidX is an optional part, used for laptops that have firmware
configurations for different makes and models of internal speakers.
The CS35L56 Rev B0 continues to use the old filename scheme because a
large number of firmware files have already been published with these
names.
Early firmware for CS35L56 Rev B0 used the ALSA prefix (ampN) as the
filename qualifier. Support for the l?u? qualifier was added in kernel 6.16.
Sound Open Firmware and ALSA topology files
-------------------------------------------

View File

@@ -13,10 +13,10 @@ Simple CLI
Kernel comes with a simple CLI tool which should be useful when
developing Netlink related code. The tool is implemented in Python
and can use a YAML specification to issue Netlink requests
to the kernel. Only Generic Netlink is supported.
to the kernel.
The tool is located at ``tools/net/ynl/pyynl/cli.py``. It accepts
a handul of arguments, the most important ones are:
a handful of arguments, the most important ones are:
- ``--spec`` - point to the spec file
- ``--do $name`` / ``--dump $name`` - issue request ``$name``

View File

@@ -54,6 +54,7 @@ to matching WMI devices using a struct wmi_device_id table:
::
static const struct wmi_device_id foo_id_table[] = {
/* Only use uppercase letters! */
{ "936DA01F-9ABD-4D9D-80C7-02AF85C822A8", NULL },
{ }
};

View File

@@ -915,6 +915,7 @@ F: drivers/staging/media/sunxi/cedrus/
ALPHA PORT
M: Richard Henderson <richard.henderson@linaro.org>
M: Matt Turner <mattst88@gmail.com>
M: Magnus Lindholm <linmag7@gmail.com>
L: linux-alpha@vger.kernel.org
S: Odd Fixes
F: arch/alpha/
@@ -3925,7 +3926,7 @@ F: crypto/async_tx/
F: include/linux/async_tx.h
AT24 EEPROM DRIVER
M: Bartosz Golaszewski <brgl@bgdev.pl>
M: Bartosz Golaszewski <brgl@kernel.org>
L: linux-i2c@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
@@ -4398,7 +4399,7 @@ BLOCK LAYER
M: Jens Axboe <axboe@kernel.dk>
L: linux-block@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git
F: Documentation/ABI/stable/sysfs-block
F: Documentation/block/
F: block/
@@ -9208,6 +9209,7 @@ R: Yue Hu <zbestahu@gmail.com>
R: Jeffle Xu <jefflexu@linux.alibaba.com>
R: Sandeep Dhavale <dhavale@google.com>
R: Hongbo Li <lihongbo22@huawei.com>
R: Chunhai Guo <guochunhai@vivo.com>
L: linux-erofs@lists.ozlabs.org
S: Maintained
W: https://erofs.docs.kernel.org
@@ -9264,7 +9266,6 @@ M: Ido Schimmel <idosch@nvidia.com>
L: bridge@lists.linux.dev
L: netdev@vger.kernel.org
S: Maintained
W: http://www.linuxfoundation.org/en/Net:Bridge
F: include/linux/if_bridge.h
F: include/uapi/linux/if_bridge.h
F: include/linux/netfilter_bridge/
@@ -10677,7 +10678,7 @@ F: tools/gpio/gpio-sloppy-logic-analyzer.sh
GPIO SUBSYSTEM
M: Linus Walleij <linus.walleij@linaro.org>
M: Bartosz Golaszewski <brgl@bgdev.pl>
M: Bartosz Golaszewski <brgl@kernel.org>
L: linux-gpio@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
@@ -10694,7 +10695,7 @@ K: GPIOD_FLAGS_BIT_NONEXCLUSIVE
K: devm_gpiod_unhinge
GPIO UAPI
M: Bartosz Golaszewski <brgl@bgdev.pl>
M: Bartosz Golaszewski <brgl@kernel.org>
R: Kent Gibson <warthog618@gmail.com>
L: linux-gpio@vger.kernel.org
S: Maintained
@@ -11526,7 +11527,7 @@ F: include/linux/platform_data/huawei-gaokun-ec.h
HUGETLB SUBSYSTEM
M: Muchun Song <muchun.song@linux.dev>
M: Oscar Salvador <osalvador@suse.de>
R: David Hildenbrand <david@redhat.com>
R: David Hildenbrand <david@kernel.org>
L: linux-mm@kvack.org
S: Maintained
F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@ -13659,7 +13660,7 @@ F: virt/kvm/*
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
M: Marc Zyngier <maz@kernel.org>
M: Oliver Upton <oliver.upton@linux.dev>
M: Oliver Upton <oupton@kernel.org>
R: Joey Gouly <joey.gouly@arm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Zenghui Yu <yuzenghui@huawei.com>
@@ -13733,7 +13734,7 @@ KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
M: Christian Borntraeger <borntraeger@linux.ibm.com>
M: Janosch Frank <frankja@linux.ibm.com>
M: Claudio Imbrenda <imbrenda@linux.ibm.com>
R: David Hildenbrand <david@redhat.com>
R: David Hildenbrand <david@kernel.org>
L: kvm@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
@@ -15308,7 +15309,7 @@ F: drivers/pwm/pwm-max7360.c
F: include/linux/mfd/max7360.h
MAXIM MAX77650 PMIC MFD DRIVER
M: Bartosz Golaszewski <brgl@bgdev.pl>
M: Bartosz Golaszewski <brgl@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/*/*max77650.yaml
@@ -16204,7 +16205,7 @@ MEMORY CONTROLLER DRIVERS
M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
B: mailto:krzysztof.kozlowski@linaro.org
B: mailto:krzk@kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git
F: Documentation/devicetree/bindings/memory-controllers/
F: drivers/memory/
@@ -16220,7 +16221,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git
F: drivers/devfreq/tegra30-devfreq.c
MEMORY HOT(UN)PLUG
M: David Hildenbrand <david@redhat.com>
M: David Hildenbrand <david@kernel.org>
M: Oscar Salvador <osalvador@suse.de>
L: linux-mm@kvack.org
S: Maintained
@@ -16245,7 +16246,7 @@ F: tools/mm/
MEMORY MANAGEMENT - CORE
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
M: David Hildenbrand <david@kernel.org>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
@@ -16301,7 +16302,7 @@ F: mm/execmem.c
MEMORY MANAGEMENT - GUP (GET USER PAGES)
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
M: David Hildenbrand <david@kernel.org>
R: Jason Gunthorpe <jgg@nvidia.com>
R: John Hubbard <jhubbard@nvidia.com>
R: Peter Xu <peterx@redhat.com>
@@ -16317,7 +16318,7 @@ F: tools/testing/selftests/mm/gup_test.c
MEMORY MANAGEMENT - KSM (Kernel Samepage Merging)
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
M: David Hildenbrand <david@kernel.org>
R: Xu Xin <xu.xin16@zte.com.cn>
R: Chengming Zhou <chengming.zhou@linux.dev>
L: linux-mm@kvack.org
@@ -16333,7 +16334,7 @@ F: mm/mm_slot.h
MEMORY MANAGEMENT - MEMORY POLICY AND MIGRATION
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
M: David Hildenbrand <david@kernel.org>
R: Zi Yan <ziy@nvidia.com>
R: Matthew Brost <matthew.brost@intel.com>
R: Joshua Hahn <joshua.hahnjy@gmail.com>
@@ -16373,7 +16374,7 @@ F: mm/workingset.c
MEMORY MANAGEMENT - MISC
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
M: David Hildenbrand <david@kernel.org>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
@@ -16461,7 +16462,7 @@ F: mm/shuffle.h
MEMORY MANAGEMENT - RECLAIM
M: Andrew Morton <akpm@linux-foundation.org>
M: Johannes Weiner <hannes@cmpxchg.org>
R: David Hildenbrand <david@redhat.com>
R: David Hildenbrand <david@kernel.org>
R: Michal Hocko <mhocko@kernel.org>
R: Qi Zheng <zhengqi.arch@bytedance.com>
R: Shakeel Butt <shakeel.butt@linux.dev>
@@ -16474,7 +16475,7 @@ F: mm/workingset.c
MEMORY MANAGEMENT - RMAP (REVERSE MAPPING)
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
M: David Hildenbrand <david@kernel.org>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Rik van Riel <riel@surriel.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
@@ -16498,12 +16499,12 @@ F: mm/secretmem.c
MEMORY MANAGEMENT - SWAP
M: Andrew Morton <akpm@linux-foundation.org>
M: Chris Li <chrisl@kernel.org>
M: Kairui Song <kasong@tencent.com>
R: Kemeng Shi <shikemeng@huaweicloud.com>
R: Kairui Song <kasong@tencent.com>
R: Nhat Pham <nphamcs@gmail.com>
R: Baoquan He <bhe@redhat.com>
R: Barry Song <baohua@kernel.org>
R: Chris Li <chrisl@kernel.org>
L: linux-mm@kvack.org
S: Maintained
F: Documentation/mm/swap-table.rst
@@ -16519,7 +16520,7 @@ F: mm/swapfile.c
MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE)
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
M: David Hildenbrand <david@kernel.org>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Zi Yan <ziy@nvidia.com>
R: Baolin Wang <baolin.wang@linux.alibaba.com>
@@ -16621,7 +16622,7 @@ MEMORY MAPPING - MADVISE (MEMORY ADVICE)
M: Andrew Morton <akpm@linux-foundation.org>
M: Liam R. Howlett <Liam.Howlett@oracle.com>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
M: David Hildenbrand <david@redhat.com>
M: David Hildenbrand <david@kernel.org>
R: Vlastimil Babka <vbabka@suse.cz>
R: Jann Horn <jannh@google.com>
L: linux-mm@kvack.org
@@ -18779,6 +18780,10 @@ S: Maintained
F: arch/arm/*omap*/*clock*
OMAP DEVICE TREE SUPPORT
M: Aaro Koskinen <aaro.koskinen@iki.fi>
M: Andreas Kemnade <andreas@kemnade.info>
M: Kevin Hilman <khilman@baylibre.com>
M: Roger Quadros <rogerq@kernel.org>
M: Tony Lindgren <tony@atomide.com>
L: linux-omap@vger.kernel.org
L: devicetree@vger.kernel.org
@@ -19898,7 +19903,7 @@ F: drivers/pci/p2pdma.c
F: include/linux/pci-p2pdma.h
PCI POWER CONTROL
M: Bartosz Golaszewski <brgl@bgdev.pl>
M: Bartosz Golaszewski <brgl@kernel.org>
L: linux-pci@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git
@@ -20495,7 +20500,7 @@ F: include/linux/powercap.h
F: kernel/configs/nopm.config
POWER SEQUENCING
M: Bartosz Golaszewski <brgl@bgdev.pl>
M: Bartosz Golaszewski <brgl@kernel.org>
L: linux-pm@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
@@ -21177,7 +21182,7 @@ F: Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml
F: drivers/i2c/busses/i2c-qcom-cci.c
QUALCOMM INTERCONNECT BWMON DRIVER
M: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-arm-msm@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml
@@ -21298,7 +21303,7 @@ F: Documentation/tee/qtee.rst
F: drivers/tee/qcomtee/
QUALCOMM TRUST ZONE MEMORY ALLOCATOR
M: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
M: Bartosz Golaszewski <brgl@kernel.org>
L: linux-arm-msm@vger.kernel.org
S: Maintained
F: drivers/firmware/qcom/qcom_tzmem.c
@@ -25666,7 +25671,7 @@ F: Documentation/devicetree/bindings/crypto/ti,am62l-dthev2.yaml
F: drivers/crypto/ti/
TI DAVINCI MACHINE SUPPORT
M: Bartosz Golaszewski <brgl@bgdev.pl>
M: Bartosz Golaszewski <brgl@kernel.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
@@ -27088,7 +27093,7 @@ F: net/vmw_vsock/virtio_transport_common.c
VIRTIO BALLOON
M: "Michael S. Tsirkin" <mst@redhat.com>
M: David Hildenbrand <david@redhat.com>
M: David Hildenbrand <david@kernel.org>
L: virtualization@lists.linux.dev
S: Maintained
F: drivers/virtio/virtio_balloon.c
@@ -27243,7 +27248,7 @@ F: drivers/iommu/virtio-iommu.c
F: include/uapi/linux/virtio_iommu.h
VIRTIO MEM DRIVER
M: David Hildenbrand <david@redhat.com>
M: David Hildenbrand <david@kernel.org>
L: virtualization@lists.linux.dev
S: Maintained
W: https://virtio-mem.gitlab.io/
@@ -27849,7 +27854,7 @@ F: arch/x86/kernel/stacktrace.c
F: arch/x86/kernel/unwind_*.c
X86 TRUST DOMAIN EXTENSIONS (TDX)
M: Kirill A. Shutemov <kas@kernel.org>
M: Kiryl Shutsemau <kas@kernel.org>
R: Dave Hansen <dave.hansen@linux.intel.com>
R: Rick Edgecombe <rick.p.edgecombe@intel.com>
L: x86@kernel.org

View File

@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 18
SUBLEVEL = 0
EXTRAVERSION = -rc5
EXTRAVERSION = -rc7
NAME = Baby Opossum Posse
# *DOCUMENTATION*

View File

@@ -1254,3 +1254,17 @@
max-frequency = <25000000>;
bus-width = <4>;
};
/*
* FIXME: rgmii delay is introduced by MAC (configured in u-boot now)
* instead of PCB on fuji board, so the "phy-mode" should be updated to
* "rgmii-[tx|rx]id" when the aspeed-mac driver can handle the delay
* properly.
*/
&mac3 {
status = "okay";
phy-mode = "rgmii";
phy-handle = <&ethphy3>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_rgmii4_default>;
};

View File

@@ -55,8 +55,8 @@
mdio {
/delete-node/ switch@1e;
bcm54210e: ethernet-phy@0 {
reg = <0>;
bcm54210e: ethernet-phy@25 {
reg = <25>;
};
};
};

View File

@@ -259,7 +259,7 @@
pinctrl-0 = <&pinctrl_audmux>;
status = "okay";
ssi2 {
mux-ssi2 {
fsl,audmux-port = <1>;
fsl,port-config = <
(IMX_AUDMUX_V2_PTCR_SYN |
@@ -271,7 +271,7 @@
>;
};
aud3 {
mux-aud3 {
fsl,audmux-port = <2>;
fsl,port-config = <
IMX_AUDMUX_V2_PTCR_SYN

View File

@@ -136,7 +136,7 @@
interrupt-parent = <&gpio2>;
interrupts = <8 IRQ_TYPE_EDGE_FALLING>;
reset-gpios = <&gpio2 14 GPIO_ACTIVE_LOW>;
report-rate-hz = <6>;
report-rate-hz = <60>;
/* settings valid only for Hycon touchscreen */
touchscreen-size-x = <1280>;
touchscreen-size-y = <800>;

View File

@@ -18,11 +18,21 @@
#include "bcm2712-rpi-5-b-ovl-rp1.dts"
/ {
aliases {
ethernet0 = &rp1_eth;
};
};
&pcie2 {
#include "rp1-nexus.dtsi"
};
&rp1_eth {
assigned-clocks = <&rp1_clocks RP1_CLK_ETH_TSU>,
<&rp1_clocks RP1_CLK_ETH>;
assigned-clock-rates = <50000000>,
<125000000>;
status = "okay";
phy-mode = "rgmii-id";
phy-handle = <&phy1>;

View File

@@ -67,7 +67,6 @@ img_subsys: bus@58000000 {
power-domains = <&pd IMX_SC_R_CSI_0>;
fsl,channel = <0>;
fsl,num-irqs = <32>;
status = "disabled";
};
gpio0_mipi_csi0: gpio@58222000 {
@@ -144,7 +143,6 @@ img_subsys: bus@58000000 {
power-domains = <&pd IMX_SC_R_CSI_1>;
fsl,channel = <0>;
fsl,num-irqs = <32>;
status = "disabled";
};
gpio0_mipi_csi1: gpio@58242000 {

View File

@@ -16,11 +16,20 @@
ethernet1 = &eqos;
};
extcon_usbc: usbc {
compatible = "linux,extcon-usb-gpio";
connector {
compatible = "gpio-usb-b-connector", "usb-b-connector";
id-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>;
label = "Type-C";
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usb1_id>;
id-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>;
type = "micro";
vbus-supply = <&reg_usb1_vbus>;
port {
usb_dr_connector: endpoint {
remote-endpoint = <&usb3_dwc>;
};
};
};
leds {
@@ -244,9 +253,15 @@
hnp-disable;
srp-disable;
dr_mode = "otg";
extcon = <&extcon_usbc>;
usb-role-switch;
role-switch-default-mode = "peripheral";
status = "okay";
port {
usb3_dwc: endpoint {
remote-endpoint = <&usb_dr_connector>;
};
};
};
&usb_dwc3_1 {
@@ -273,7 +288,6 @@
};
&usb3_phy0 {
vbus-supply = <&reg_usb1_vbus>;
status = "okay";
};

View File

@@ -1886,7 +1886,7 @@
assigned-clock-rates = <3600000000>, <100000000>, <10000000>;
assigned-clock-parents = <0>, <0>,
<&scmi_clk IMX95_CLK_SYSPLL1_PFD1_DIV2>;
msi-map = <0x0 &its 0x98 0x1>;
msi-map = <0x0 &its 0x10 0x1>;
power-domains = <&scmi_devpd IMX95_PD_HSIO_TOP>;
status = "disabled";
};
@@ -1963,6 +1963,7 @@
assigned-clock-rates = <3600000000>, <100000000>, <10000000>;
assigned-clock-parents = <0>, <0>,
<&scmi_clk IMX95_CLK_SYSPLL1_PFD1_DIV2>;
msi-map = <0x0 &its 0x98 0x1>;
power-domains = <&scmi_devpd IMX95_PD_HSIO_TOP>;
status = "disabled";
};

View File

@@ -42,6 +42,7 @@
interrupt-parent = <&gpio>;
interrupts = <TEGRA194_MAIN_GPIO(G, 4) IRQ_TYPE_LEVEL_LOW>;
#phy-cells = <0>;
wakeup-source;
};
};
};

View File

@@ -598,7 +598,6 @@
pinctrl-2 = <&otp_pin>;
resets = <&cru SRST_TSADC>;
reset-names = "tsadc-apb";
rockchip,grf = <&grf>;
rockchip,hw-tshut-temp = <100000>;
#thermal-sensor-cells = <1>;
status = "disabled";

View File

@@ -3,7 +3,7 @@
* Copyright (c) 2016-2017 Fuzhou Rockchip Electronics Co., Ltd
*/
#include "rk3399.dtsi"
#include "rk3399-base.dtsi"
/ {
cluster0_opp: opp-table-0 {

View File

@@ -45,11 +45,11 @@
cam_dovdd_1v8: regulator-cam-dovdd-1v8 {
compatible = "regulator-fixed";
gpio = <&pca9670 3 GPIO_ACTIVE_LOW>;
regulator-max-microvolt = <1800000>;
regulator-min-microvolt = <1800000>;
regulator-name = "cam-dovdd-1v8";
vin-supply = <&vcc1v8_video>;
gpio = <&pca9670 3 GPIO_ACTIVE_LOW>;
regulator-max-microvolt = <1800000>;
regulator-min-microvolt = <1800000>;
regulator-name = "cam-dovdd-1v8";
vin-supply = <&vcc1v8_video>;
};
cam_dvdd_1v2: regulator-cam-dvdd-1v2 {

View File

@@ -120,7 +120,7 @@
compatible = "regulator-fixed";
regulator-name = "vcc3v3_pcie";
enable-active-high;
gpios = <&gpio0 RK_PB1 GPIO_ACTIVE_HIGH>;
gpios = <&gpio4 RK_PB1 GPIO_ACTIVE_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&pcie_drv>;
regulator-always-on;
@@ -187,7 +187,7 @@
vcc5v0_usb2b: regulator-vcc5v0-usb2b {
compatible = "regulator-fixed";
enable-active-high;
gpio = <&gpio0 RK_PC4 GPIO_ACTIVE_HIGH>;
gpio = <&gpio4 RK_PC4 GPIO_ACTIVE_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&vcc5v0_usb2b_en>;
regulator-name = "vcc5v0_usb2b";
@@ -199,7 +199,7 @@
vcc5v0_usb2t: regulator-vcc5v0-usb2t {
compatible = "regulator-fixed";
enable-active-high;
gpios = <&gpio0 RK_PD5 GPIO_ACTIVE_HIGH>;
gpios = <&gpio3 RK_PD5 GPIO_ACTIVE_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&vcc5v0_usb2t_en>;
regulator-name = "vcc5v0_usb2t";

View File

@@ -789,7 +789,7 @@
vccio1-supply = <&vccio_acodec>;
vccio2-supply = <&vcc_1v8>;
vccio3-supply = <&vccio_sd>;
vccio4-supply = <&vcc_1v8>;
vccio4-supply = <&vcca1v8_pmu>;
vccio5-supply = <&vcc_1v8>;
vccio6-supply = <&vcc1v8_dvp>;
vccio7-supply = <&vcc_3v3>;

View File

@@ -482,6 +482,8 @@
};
&i2s1_8ch {
pinctrl-names = "default";
pinctrl-0 = <&i2s1m0_sclktx &i2s1m0_lrcktx &i2s1m0_sdi0 &i2s1m0_sdo0>;
rockchip,trcm-sync-tx-only;
status = "okay";
};

View File

@@ -276,12 +276,6 @@
opp-microvolt = <900000 900000 950000>;
clock-latency-ns = <40000>;
};
opp-2208000000 {
opp-hz = /bits/ 64 <2208000000>;
opp-microvolt = <950000 950000 950000>;
clock-latency-ns = <40000>;
};
};
cluster1_opp_table: opp-table-cluster1 {
@@ -348,12 +342,6 @@
opp-microvolt = <925000 925000 950000>;
clock-latency-ns = <40000>;
};
opp-2304000000 {
opp-hz = /bits/ 64 <2304000000>;
opp-microvolt = <950000 950000 950000>;
clock-latency-ns = <40000>;
};
};
gpu_opp_table: opp-table-gpu {
@@ -2561,8 +2549,6 @@
interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&i2c9m0_xfer>;
resets = <&cru SRST_I2C9>, <&cru SRST_P_I2C9>;
reset-names = "i2c", "apb";
#address-cells = <1>;
#size-cells = <0>;
status = "disabled";

View File

@@ -115,7 +115,7 @@
};
};
gpu_opp_table: opp-table {
gpu_opp_table: opp-table-gpu {
compatible = "operating-points-v2";
opp-300000000 {

View File

@@ -382,14 +382,12 @@
cap-mmc-highspeed;
mmc-ddr-1_8v;
mmc-hs200-1_8v;
mmc-hs400-1_8v;
mmc-hs400-enhanced-strobe;
mmc-pwrseq = <&emmc_pwrseq>;
no-sdio;
no-sd;
non-removable;
pinctrl-names = "default";
pinctrl-0 = <&emmc_bus8 &emmc_cmd &emmc_clk &emmc_data_strobe>;
pinctrl-0 = <&emmc_bus8 &emmc_cmd &emmc_clk>;
vmmc-supply = <&vcc_3v3_s3>;
vqmmc-supply = <&vcc_1v8_s3>;
status = "okay";

View File

@@ -66,7 +66,7 @@
};
};
gpu_opp_table: opp-table {
gpu_opp_table: opp-table-gpu {
compatible = "operating-points-v2";
opp-300000000 {

View File

@@ -14,8 +14,8 @@
gpios = <&gpio0 RK_PC5 GPIO_ACTIVE_HIGH>;
regulator-name = "vcc3v3_pcie20";
regulator-boot-on;
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
startup-delay-us = <50000>;
vin-supply = <&vcc5v0_sys>;
};

View File

@@ -1341,7 +1341,7 @@ CONFIG_COMMON_CLK_RS9_PCIE=y
CONFIG_COMMON_CLK_VC3=y
CONFIG_COMMON_CLK_VC5=y
CONFIG_COMMON_CLK_BD718XX=m
CONFIG_CLK_RASPBERRYPI=m
CONFIG_CLK_RASPBERRYPI=y
CONFIG_CLK_IMX8MM=y
CONFIG_CLK_IMX8MN=y
CONFIG_CLK_IMX8MP=y

View File

@@ -26,9 +26,12 @@ void __init apply_alternatives_all(void);
bool alternative_is_applied(u16 cpucap);
#ifdef CONFIG_MODULES
void apply_alternatives_module(void *start, size_t length);
int apply_alternatives_module(void *start, size_t length);
#else
static inline void apply_alternatives_module(void *start, size_t length) { }
static inline int apply_alternatives_module(void *start, size_t length)
{
return 0;
}
#endif
void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,

View File

@@ -10,8 +10,6 @@
#include <asm/set_memory.h>
static inline bool arch_kfence_init_pool(void) { return true; }
static inline bool kfence_protect_page(unsigned long addr, bool protect)
{
set_memory_valid(addr, 1, !protect);
@@ -25,6 +23,7 @@ static inline bool arm64_kfence_can_set_direct_map(void)
{
return !kfence_early_init;
}
bool arch_kfence_init_pool(void);
#else /* CONFIG_KFENCE */
static inline bool arm64_kfence_can_set_direct_map(void) { return false; }
#endif /* CONFIG_KFENCE */

View File

@@ -33,8 +33,8 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
unsigned long vaddr);
#define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio
void tag_clear_highpage(struct page *to);
#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
bool tag_clear_highpages(struct page *to, int numpages);
#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGES
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)

View File

@@ -77,7 +77,7 @@ __percpu_##name##_case_##sz(void *ptr, unsigned long val) \
" stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \
" cbnz %w[loop], 1b", \
/* LSE atomics */ \
#op_lse "\t%" #w "[val], %[ptr]\n" \
#op_lse "\t%" #w "[val], %" #w "[tmp], %[ptr]\n" \
__nops(3)) \
: [loop] "=&r" (loop), [tmp] "=&r" (tmp), \
[ptr] "+Q"(*(u##sz *)ptr) \
@@ -124,9 +124,16 @@ PERCPU_RW_OPS(8)
PERCPU_RW_OPS(16)
PERCPU_RW_OPS(32)
PERCPU_RW_OPS(64)
PERCPU_OP(add, add, stadd)
PERCPU_OP(andnot, bic, stclr)
PERCPU_OP(or, orr, stset)
/*
* Use value-returning atomics for CPU-local ops as they are more likely
* to execute "near" to the CPU (e.g. in L1$).
*
* https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop
*/
PERCPU_OP(add, add, ldadd)
PERCPU_OP(andnot, bic, ldclr)
PERCPU_OP(or, orr, ldset)
PERCPU_RET_OP(add, add, ldadd)
#undef PERCPU_RW_OPS

View File

@@ -53,7 +53,7 @@ enum {
EDYNSCS_INVALID_CFA_OPCODE = 4,
};
int __pi_scs_patch(const u8 eh_frame[], int size);
int __pi_scs_patch(const u8 eh_frame[], int size, bool skip_dry_run);
#endif /* __ASSEMBLY __ */

View File

@@ -117,6 +117,7 @@ void spectre_bhb_patch_wa3(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void spectre_bhb_patch_clearbhb(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void spectre_print_disabled_mitigations(void);
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SPECTRE_H */

View File

@@ -197,8 +197,6 @@ out:
*/
void __init acpi_boot_table_init(void)
{
int ret;
/*
* Enable ACPI instead of device tree unless
* - ACPI has been disabled explicitly (acpi=off), or
@@ -252,12 +250,8 @@ done:
* behaviour, use acpi=nospcr to disable console in ACPI SPCR
* table as default serial console.
*/
ret = acpi_parse_spcr(earlycon_acpi_spcr_enable,
acpi_parse_spcr(earlycon_acpi_spcr_enable,
!param_acpi_nospcr);
if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE))
pr_info("Use ACPI SPCR as default console: No\n");
else
pr_info("Use ACPI SPCR as default console: Yes\n");
if (IS_ENABLED(CONFIG_ACPI_BGRT))
acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);

View File

@@ -139,9 +139,9 @@ static noinstr void clean_dcache_range_nopatch(u64 start, u64 end)
} while (cur += d_size, cur < end);
}
static void __apply_alternatives(const struct alt_region *region,
bool is_module,
unsigned long *cpucap_mask)
static int __apply_alternatives(const struct alt_region *region,
bool is_module,
unsigned long *cpucap_mask)
{
struct alt_instr *alt;
__le32 *origptr, *updptr;
@@ -166,10 +166,13 @@ static void __apply_alternatives(const struct alt_region *region,
updptr = is_module ? origptr : lm_alias(origptr);
nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
if (ALT_HAS_CB(alt))
if (ALT_HAS_CB(alt)) {
alt_cb = ALT_REPL_PTR(alt);
else
if (is_module && !core_kernel_text((unsigned long)alt_cb))
return -ENOEXEC;
} else {
alt_cb = patch_alternative;
}
alt_cb(alt, origptr, updptr, nr_inst);
@@ -193,6 +196,8 @@ static void __apply_alternatives(const struct alt_region *region,
bitmap_and(applied_alternatives, applied_alternatives,
system_cpucaps, ARM64_NCAPS);
}
return 0;
}
static void __init apply_alternatives_vdso(void)
@@ -277,7 +282,7 @@ void __init apply_boot_alternatives(void)
}
#ifdef CONFIG_MODULES
void apply_alternatives_module(void *start, size_t length)
int apply_alternatives_module(void *start, size_t length)
{
struct alt_region region = {
.begin = start,
@@ -287,7 +292,7 @@ void apply_alternatives_module(void *start, size_t length)
bitmap_fill(all_capabilities, ARM64_NCAPS);
__apply_alternatives(&region, true, &all_capabilities[0]);
return __apply_alternatives(&region, true, &all_capabilities[0]);
}
#endif

View File

@@ -95,6 +95,7 @@
#include <asm/vectors.h>
#include <asm/virt.h>
#include <asm/spectre.h>
/* Kernel representation of AT_HWCAP and AT_HWCAP2 */
static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly;
@@ -3875,6 +3876,11 @@ static void __init setup_system_capabilities(void)
*/
if (system_uses_ttbr0_pan())
pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
/*
* Report Spectre mitigations status.
*/
spectre_print_disabled_mitigations();
}
void __init setup_system_features(void)

View File

@@ -489,16 +489,29 @@ int module_finalize(const Elf_Ehdr *hdr,
int ret;
s = find_section(hdr, sechdrs, ".altinstructions");
if (s)
apply_alternatives_module((void *)s->sh_addr, s->sh_size);
if (s) {
ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size);
if (ret < 0) {
pr_err("module %s: error occurred when applying alternatives\n", me->name);
return ret;
}
}
if (scs_is_dynamic()) {
s = find_section(hdr, sechdrs, ".init.eh_frame");
if (s) {
ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size);
if (ret)
/*
* Because we can reject modules that are malformed
* so SCS patching fails, skip dry run and try to patch
* it in place. If patching fails, the module would not
* be loaded anyway.
*/
ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true);
if (ret) {
pr_err("module %s: error occurred during dynamic SCS patching (%d)\n",
me->name, ret);
return -ENOEXEC;
}
}
}

View File

@@ -476,7 +476,8 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
folio = page_folio(page);
if (folio_test_hugetlb(folio))
WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio));
WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) &&
!is_huge_zero_folio(folio));
else
WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page));

View File

@@ -104,7 +104,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
if (enable_scs) {
scs_patch(__eh_frame_start + va_offset,
__eh_frame_end - __eh_frame_start);
__eh_frame_end - __eh_frame_start, false);
asm("ic ialluis");
dynamic_scs_is_enabled = true;

View File

@@ -225,7 +225,7 @@ static int scs_handle_fde_frame(const struct eh_frame *frame,
return 0;
}
int scs_patch(const u8 eh_frame[], int size)
int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run)
{
int code_alignment_factor = 1;
bool fde_use_sdata8 = false;
@@ -277,11 +277,13 @@ int scs_patch(const u8 eh_frame[], int size)
}
} else {
ret = scs_handle_fde_frame(frame, code_alignment_factor,
fde_use_sdata8, true);
fde_use_sdata8, !skip_dry_run);
if (ret)
return ret;
scs_handle_fde_frame(frame, code_alignment_factor,
fde_use_sdata8, false);
if (!skip_dry_run)
scs_handle_fde_frame(frame, code_alignment_factor,
fde_use_sdata8, false);
}
p += sizeof(frame->size) + frame->size;

View File

@@ -27,7 +27,7 @@ extern pgd_t init_pg_dir[], init_pg_end[];
void init_feature_override(u64 boot_status, const void *fdt, int chosen);
u64 kaslr_early_init(void *fdt, int chosen);
void relocate_kernel(u64 offset);
int scs_patch(const u8 eh_frame[], int size);
int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run);
void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,

View File

@@ -49,7 +49,10 @@ void *alloc_insn_page(void)
addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!addr)
return NULL;
set_memory_rox((unsigned long)addr, 1);
if (set_memory_rox((unsigned long)addr, 1)) {
execmem_free(addr);
return NULL;
}
return addr;
}

View File

@@ -91,12 +91,7 @@ early_param("nospectre_v2", parse_spectre_v2_param);
static bool spectre_v2_mitigations_off(void)
{
bool ret = __nospectre_v2 || cpu_mitigations_off();
if (ret)
pr_info_once("spectre-v2 mitigation disabled by command line option\n");
return ret;
return __nospectre_v2 || cpu_mitigations_off();
}
static const char *get_bhb_affected_string(enum mitigation_state bhb_state)
@@ -421,13 +416,8 @@ early_param("ssbd", parse_spectre_v4_param);
*/
static bool spectre_v4_mitigations_off(void)
{
bool ret = cpu_mitigations_off() ||
__spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
if (ret)
pr_info_once("spectre-v4 mitigation disabled by command-line option\n");
return ret;
return cpu_mitigations_off() ||
__spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
}
/* Do we need to toggle the mitigation state on entry to/exit from the kernel? */
@@ -1042,10 +1032,6 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) {
/* No point mitigating Spectre-BHB alone. */
} else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
} else if (cpu_mitigations_off() || __nospectre_bhb) {
pr_info_once("spectre-bhb mitigation disabled by command line option\n");
} else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
state = SPECTRE_MITIGATED;
set_bit(BHB_HW, &system_bhb_mitigations);
@@ -1199,3 +1185,18 @@ void unpriv_ebpf_notify(int new_state)
pr_err("WARNING: %s", EBPF_WARN);
}
#endif
void spectre_print_disabled_mitigations(void)
{
/* Keep a single copy of the common message suffix to avoid duplication. */
const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n";
if (spectre_v2_mitigations_off())
pr_info("spectre-v2 %s", spectre_disabled_suffix);
if (spectre_v4_mitigations_off())
pr_info("spectre-v4 %s", spectre_disabled_suffix);
if (__nospectre_bhb || cpu_mitigations_off())
pr_info("spectre-bhb %s", spectre_disabled_suffix);
}

View File

@@ -624,6 +624,7 @@ nommu:
kvm_timer_vcpu_load(vcpu);
kvm_vgic_load(vcpu);
kvm_vcpu_load_debug(vcpu);
kvm_vcpu_load_fgt(vcpu);
if (has_vhe())
kvm_vcpu_load_vhe(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
@@ -642,7 +643,6 @@ nommu:
vcpu->arch.hcr_el2 |= HCR_TWI;
vcpu_set_pauth_traps(vcpu);
kvm_vcpu_load_fgt(vcpu);
if (is_protected_kvm_enabled()) {
kvm_call_hyp_nvhe(__pkvm_vcpu_load,

View File

@@ -479,7 +479,7 @@ static void __do_ffa_mem_xfer(const u64 func_id,
struct ffa_mem_region_attributes *ep_mem_access;
struct ffa_composite_mem_region *reg;
struct ffa_mem_region *buf;
u32 offset, nr_ranges;
u32 offset, nr_ranges, checked_offset;
int ret = 0;
if (addr_mbz || npages_mbz || fraglen > len ||
@@ -516,7 +516,12 @@ static void __do_ffa_mem_xfer(const u64 func_id,
goto out_unlock;
}
if (fraglen < offset + sizeof(struct ffa_composite_mem_region)) {
if (check_add_overflow(offset, sizeof(struct ffa_composite_mem_region), &checked_offset)) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out_unlock;
}
if (fraglen < checked_offset) {
ret = FFA_RET_INVALID_PARAMETERS;
goto out_unlock;
}

View File

@@ -367,6 +367,19 @@ static int host_stage2_unmap_dev_all(void)
return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
}
/*
* Ensure the PFN range is contained within PA-range.
*
* This check is also robust to overflows and is therefore a requirement before
* using a pfn/nr_pages pair from an untrusted source.
*/
static bool pfn_range_is_valid(u64 pfn, u64 nr_pages)
{
u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT);
return pfn < limit && ((limit - pfn) >= nr_pages);
}
struct kvm_mem_range {
u64 start;
u64 end;
@@ -776,6 +789,9 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
void *virt = __hyp_va(phys);
int ret;
if (!pfn_range_is_valid(pfn, nr_pages))
return -EINVAL;
host_lock_component();
hyp_lock_component();
@@ -804,6 +820,9 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
u64 virt = (u64)__hyp_va(phys);
int ret;
if (!pfn_range_is_valid(pfn, nr_pages))
return -EINVAL;
host_lock_component();
hyp_lock_component();
@@ -887,6 +906,9 @@ int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
u64 size = PAGE_SIZE * nr_pages;
int ret;
if (!pfn_range_is_valid(pfn, nr_pages))
return -EINVAL;
host_lock_component();
ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
if (!ret)
@@ -902,6 +924,9 @@ int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
u64 size = PAGE_SIZE * nr_pages;
int ret;
if (!pfn_range_is_valid(pfn, nr_pages))
return -EINVAL;
host_lock_component();
ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
if (!ret)
@@ -945,6 +970,9 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu
if (prot & ~KVM_PGTABLE_PROT_RWX)
return -EINVAL;
if (!pfn_range_is_valid(pfn, nr_pages))
return -EINVAL;
ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
if (ret)
return ret;

View File

@@ -2595,19 +2595,23 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
.val = 0, \
}
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define AA32_ID_SANITISED(name) { \
ID_DESC(name), \
.visibility = aa32_id_visibility, \
.val = 0, \
}
/* sys_reg_desc initialiser for writable ID registers */
#define ID_WRITABLE(name, mask) { \
ID_DESC(name), \
.val = mask, \
}
/*
* 32bit ID regs are fully writable when the guest is 32bit
* capable. Nothing in the KVM code should rely on 32bit features
* anyway, only 64bit, so let the VMM do its worse.
*/
#define AA32_ID_WRITABLE(name) { \
ID_DESC(name), \
.visibility = aa32_id_visibility, \
.val = GENMASK(31, 0), \
}
/* sys_reg_desc initialiser for cpufeature ID registers that need filtering */
#define ID_FILTERED(sysreg, name, mask) { \
ID_DESC(sysreg), \
@@ -3128,40 +3132,39 @@ static const struct sys_reg_desc sys_reg_descs[] = {
/* AArch64 mappings of the AArch32 ID registers */
/* CRm=1 */
AA32_ID_SANITISED(ID_PFR0_EL1),
AA32_ID_SANITISED(ID_PFR1_EL1),
AA32_ID_WRITABLE(ID_PFR0_EL1),
AA32_ID_WRITABLE(ID_PFR1_EL1),
{ SYS_DESC(SYS_ID_DFR0_EL1),
.access = access_id_reg,
.get_user = get_id_reg,
.set_user = set_id_dfr0_el1,
.visibility = aa32_id_visibility,
.reset = read_sanitised_id_dfr0_el1,
.val = ID_DFR0_EL1_PerfMon_MASK |
ID_DFR0_EL1_CopDbg_MASK, },
.val = GENMASK(31, 0) },
ID_HIDDEN(ID_AFR0_EL1),
AA32_ID_SANITISED(ID_MMFR0_EL1),
AA32_ID_SANITISED(ID_MMFR1_EL1),
AA32_ID_SANITISED(ID_MMFR2_EL1),
AA32_ID_SANITISED(ID_MMFR3_EL1),
AA32_ID_WRITABLE(ID_MMFR0_EL1),
AA32_ID_WRITABLE(ID_MMFR1_EL1),
AA32_ID_WRITABLE(ID_MMFR2_EL1),
AA32_ID_WRITABLE(ID_MMFR3_EL1),
/* CRm=2 */
AA32_ID_SANITISED(ID_ISAR0_EL1),
AA32_ID_SANITISED(ID_ISAR1_EL1),
AA32_ID_SANITISED(ID_ISAR2_EL1),
AA32_ID_SANITISED(ID_ISAR3_EL1),
AA32_ID_SANITISED(ID_ISAR4_EL1),
AA32_ID_SANITISED(ID_ISAR5_EL1),
AA32_ID_SANITISED(ID_MMFR4_EL1),
AA32_ID_SANITISED(ID_ISAR6_EL1),
AA32_ID_WRITABLE(ID_ISAR0_EL1),
AA32_ID_WRITABLE(ID_ISAR1_EL1),
AA32_ID_WRITABLE(ID_ISAR2_EL1),
AA32_ID_WRITABLE(ID_ISAR3_EL1),
AA32_ID_WRITABLE(ID_ISAR4_EL1),
AA32_ID_WRITABLE(ID_ISAR5_EL1),
AA32_ID_WRITABLE(ID_MMFR4_EL1),
AA32_ID_WRITABLE(ID_ISAR6_EL1),
/* CRm=3 */
AA32_ID_SANITISED(MVFR0_EL1),
AA32_ID_SANITISED(MVFR1_EL1),
AA32_ID_SANITISED(MVFR2_EL1),
AA32_ID_WRITABLE(MVFR0_EL1),
AA32_ID_WRITABLE(MVFR1_EL1),
AA32_ID_WRITABLE(MVFR2_EL1),
ID_UNALLOCATED(3,3),
AA32_ID_SANITISED(ID_PFR2_EL1),
AA32_ID_WRITABLE(ID_PFR2_EL1),
ID_HIDDEN(ID_DFR1_EL1),
AA32_ID_SANITISED(ID_MMFR5_EL1),
AA32_ID_WRITABLE(ID_MMFR5_EL1),
ID_UNALLOCATED(3,7),
/* AArch64 ID registers */
@@ -5606,11 +5609,17 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
guard(mutex)(&kvm->arch.config_lock);
if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) &&
irqchip_in_kernel(kvm) &&
kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) {
kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK;
kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK;
/*
* This hacks into the ID registers, so only perform it when the
* first vcpu runs, or the kvm_set_vm_id_reg() helper will scream.
*/
if (!irqchip_in_kernel(kvm) && !kvm_vm_has_ran_once(kvm)) {
u64 val;
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);
val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val);
}
if (vcpu_has_nv(vcpu)) {

View File

@@ -64,29 +64,37 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter)
static int iter_mark_lpis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
unsigned long intid, flags;
struct vgic_irq *irq;
unsigned long intid;
int nr_lpis = 0;
xa_lock_irqsave(&dist->lpi_xa, flags);
xa_for_each(&dist->lpi_xa, intid, irq) {
if (!vgic_try_get_irq_ref(irq))
continue;
xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
__xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
nr_lpis++;
}
xa_unlock_irqrestore(&dist->lpi_xa, flags);
return nr_lpis;
}
static void iter_unmark_lpis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
unsigned long intid, flags;
struct vgic_irq *irq;
unsigned long intid;
xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) {
xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
xa_lock_irqsave(&dist->lpi_xa, flags);
__xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
xa_unlock_irqrestore(&dist->lpi_xa, flags);
/* vgic_put_irq() expects to be called outside of the xa_lock */
vgic_put_irq(kvm, irq);
}
}

View File

@@ -53,7 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
xa_init(&dist->lpi_xa);
xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
}
/* CREATION */
@@ -71,6 +71,7 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type);
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
struct kvm_vcpu *vcpu;
u64 aa64pfr0, pfr1;
unsigned long i;
int ret;
@@ -161,10 +162,19 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
if (type == KVM_DEV_TYPE_ARM_VGIC_V2) {
kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
else
} else {
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP);
pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3);
}
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0);
kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1);
if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
kvm->arch.vgic.nassgicap = system_supports_direct_sgis();

View File

@@ -78,6 +78,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq;
unsigned long flags;
int ret;
/* In this case there is no put, since we keep the reference. */
@@ -88,7 +89,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
if (!irq)
return ERR_PTR(-ENOMEM);
ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
if (ret) {
kfree(irq);
return ERR_PTR(ret);
@@ -103,7 +104,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
irq->target_vcpu = vcpu;
irq->group = 1;
xa_lock(&dist->lpi_xa);
xa_lock_irqsave(&dist->lpi_xa, flags);
/*
* There could be a race with another vgic_add_lpi(), so we need to
@@ -114,21 +115,18 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
/* Someone was faster with adding this LPI, lets use that. */
kfree(irq);
irq = oldirq;
goto out_unlock;
} else {
ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0));
}
ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0));
xa_unlock_irqrestore(&dist->lpi_xa, flags);
if (ret) {
xa_release(&dist->lpi_xa, intid);
kfree(irq);
}
out_unlock:
xa_unlock(&dist->lpi_xa);
if (ret)
return ERR_PTR(ret);
}
/*
* We "cache" the configuration table entries in our struct vgic_irq's.

View File

@@ -301,7 +301,8 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
return;
/* Hide GICv3 sysreg if necessary */
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 ||
!irqchip_in_kernel(vcpu->kvm)) {
vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
ICH_HCR_EL2_TC);
return;

View File

@@ -28,7 +28,7 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* kvm->arch.config_lock (mutex)
* its->cmd_lock (mutex)
* its->its_lock (mutex)
* vgic_dist->lpi_xa.xa_lock
* vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
* vgic_cpu->ap_list_lock must be taken with IRQs disabled
* vgic_irq->irq_lock must be taken with IRQs disabled
*
@@ -141,32 +141,39 @@ static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
struct vgic_dist *dist = &kvm->arch.vgic;
unsigned long flags;
if (irq->intid >= VGIC_MIN_LPI)
might_lock(&dist->lpi_xa.xa_lock);
/*
* Normally the lock is only taken when the refcount drops to 0.
* Acquire/release it early on lockdep kernels to make locking issues
* in rare release paths a bit more obvious.
*/
if (IS_ENABLED(CONFIG_LOCKDEP) && irq->intid >= VGIC_MIN_LPI) {
guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock);
}
if (!__vgic_put_irq(kvm, irq))
return;
xa_lock(&dist->lpi_xa);
xa_lock_irqsave(&dist->lpi_xa, flags);
vgic_release_lpi_locked(dist, irq);
xa_unlock(&dist->lpi_xa);
xa_unlock_irqrestore(&dist->lpi_xa, flags);
}
static void vgic_release_deleted_lpis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
unsigned long intid;
unsigned long flags, intid;
struct vgic_irq *irq;
xa_lock(&dist->lpi_xa);
xa_lock_irqsave(&dist->lpi_xa, flags);
xa_for_each(&dist->lpi_xa, intid, irq) {
if (irq->pending_release)
vgic_release_lpi_locked(dist, irq);
}
xa_unlock(&dist->lpi_xa);
xa_unlock_irqrestore(&dist->lpi_xa, flags);
}
void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)

View File

@@ -967,10 +967,21 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
return vma_alloc_folio(flags, 0, vma, vaddr);
}
void tag_clear_highpage(struct page *page)
bool tag_clear_highpages(struct page *page, int numpages)
{
/* Newly allocated page, shouldn't have been tagged yet */
WARN_ON_ONCE(!try_page_mte_tagging(page));
mte_zero_clear_page_tags(page_address(page));
set_page_mte_tagged(page);
/*
* Check if MTE is supported and fall back to clear_highpage().
* get_huge_zero_folio() unconditionally passes __GFP_ZEROTAGS and
* post_alloc_hook() will invoke tag_clear_highpages().
*/
if (!system_supports_mte())
return false;
/* Newly allocated pages, shouldn't have been tagged yet */
for (int i = 0; i < numpages; i++, page++) {
WARN_ON_ONCE(!try_page_mte_tagging(page));
mte_zero_clear_page_tags(page_address(page));
set_page_mte_tagged(page);
}
return true;
}

View File

@@ -708,6 +708,30 @@ out:
return ret;
}
static inline bool force_pte_mapping(void)
{
const bool bbml2 = system_capabilities_finalized() ?
system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort();
if (debug_pagealloc_enabled())
return true;
if (bbml2)
return false;
return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world();
}
static inline bool split_leaf_mapping_possible(void)
{
/*
* !BBML2_NOABORT systems should never run into scenarios where we would
* have to split. So exit early and let calling code detect it and raise
* a warning.
*/
if (!system_supports_bbml2_noabort())
return false;
return !force_pte_mapping();
}
static DEFINE_MUTEX(pgtable_split_lock);
int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
@@ -715,12 +739,11 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
int ret;
/*
* !BBML2_NOABORT systems should not be trying to change permissions on
* anything that is not pte-mapped in the first place. Just return early
* and let the permission change code raise a warning if not already
* pte-mapped.
* Exit early if the region is within a pte-mapped area or if we can't
* split. For the latter case, the permission change code will raise a
* warning if not already pte-mapped.
*/
if (!system_supports_bbml2_noabort())
if (!split_leaf_mapping_possible() || is_kfence_address((void *)start))
return 0;
/*
@@ -758,30 +781,30 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
return ret;
}
static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
unsigned long next,
struct mm_walk *walk)
static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
gfp_t gfp = *(gfp_t *)walk->private;
pud_t pud = pudp_get(pudp);
int ret = 0;
if (pud_leaf(pud))
ret = split_pud(pudp, pud, GFP_ATOMIC, false);
ret = split_pud(pudp, pud, gfp, false);
return ret;
}
static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
unsigned long next,
struct mm_walk *walk)
static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
gfp_t gfp = *(gfp_t *)walk->private;
pmd_t pmd = pmdp_get(pmdp);
int ret = 0;
if (pmd_leaf(pmd)) {
if (pmd_cont(pmd))
split_contpmd(pmdp);
ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false);
ret = split_pmd(pmdp, pmd, gfp, false);
/*
* We have split the pmd directly to ptes so there is no need to
@@ -793,9 +816,8 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
return ret;
}
static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
unsigned long next,
struct mm_walk *walk)
static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
pte_t pte = __ptep_get(ptep);
@@ -805,12 +827,24 @@ static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
return 0;
}
static const struct mm_walk_ops split_to_ptes_ops __initconst = {
static const struct mm_walk_ops split_to_ptes_ops = {
.pud_entry = split_to_ptes_pud_entry,
.pmd_entry = split_to_ptes_pmd_entry,
.pte_entry = split_to_ptes_pte_entry,
};
static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp)
{
int ret;
arch_enter_lazy_mmu_mode();
ret = walk_kernel_page_table_range_lockless(start, end,
&split_to_ptes_ops, NULL, &gfp);
arch_leave_lazy_mmu_mode();
return ret;
}
static bool linear_map_requires_bbml2 __initdata;
u32 idmap_kpti_bbml2_flag;
@@ -847,11 +881,9 @@ static int __init linear_map_split_to_ptes(void *__unused)
* PTE. The kernel alias remains static throughout runtime so
* can continue to be safely mapped with large mappings.
*/
ret = walk_kernel_page_table_range_lockless(lstart, kstart,
&split_to_ptes_ops, NULL, NULL);
ret = range_split_to_ptes(lstart, kstart, GFP_ATOMIC);
if (!ret)
ret = walk_kernel_page_table_range_lockless(kend, lend,
&split_to_ptes_ops, NULL, NULL);
ret = range_split_to_ptes(kend, lend, GFP_ATOMIC);
if (ret)
panic("Failed to split linear map\n");
flush_tlb_kernel_range(lstart, lend);
@@ -1002,6 +1034,33 @@ static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp)
memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
__kfence_pool = phys_to_virt(kfence_pool);
}
bool arch_kfence_init_pool(void)
{
unsigned long start = (unsigned long)__kfence_pool;
unsigned long end = start + KFENCE_POOL_SIZE;
int ret;
/* Exit early if we know the linear map is already pte-mapped. */
if (!split_leaf_mapping_possible())
return true;
/* Kfence pool is already pte-mapped for the early init case. */
if (kfence_early_init)
return true;
mutex_lock(&pgtable_split_lock);
ret = range_split_to_ptes(start, end, GFP_PGTABLE_KERNEL);
mutex_unlock(&pgtable_split_lock);
/*
* Since the system supports bbml2_noabort, tlb invalidation is not
* required here; the pgtable mappings have been split to pte but larger
* entries may safely linger in the TLB.
*/
return !ret;
}
#else /* CONFIG_KFENCE */
static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; }
@@ -1009,16 +1068,6 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) {
#endif /* CONFIG_KFENCE */
static inline bool force_pte_mapping(void)
{
bool bbml2 = system_capabilities_finalized() ?
system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort();
return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() ||
is_realm_world())) ||
debug_pagealloc_enabled();
}
static void __init map_mem(pgd_t *pgdp)
{
static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);

View File

@@ -67,6 +67,8 @@
#define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR)
#define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW)
#define cpu_has_lspw cpu_opt(LOONGARCH_CPU_LSPW)
#define cpu_has_msgint cpu_opt(LOONGARCH_CPU_MSGINT)
#define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT)
#define cpu_has_redirectint cpu_opt(LOONGARCH_CPU_REDIRECTINT)
#endif /* __ASM_CPU_FEATURES_H */

View File

@@ -55,6 +55,27 @@ enum cpu_type_enum {
CPU_LAST
};
static inline char *id_to_core_name(unsigned int id)
{
if ((id & PRID_COMP_MASK) != PRID_COMP_LOONGSON)
return "Unknown";
switch (id & PRID_SERIES_MASK) {
case PRID_SERIES_LA132:
return "LA132";
case PRID_SERIES_LA264:
return "LA264";
case PRID_SERIES_LA364:
return "LA364";
case PRID_SERIES_LA464:
return "LA464";
case PRID_SERIES_LA664:
return "LA664";
default:
return "Unknown";
}
}
#endif /* !__ASSEMBLER__ */
/*
@@ -101,7 +122,9 @@ enum cpu_type_enum {
#define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */
#define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */
#define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */
#define CPU_FEATURE_AVECINT 29 /* CPU has AVEC interrupt */
#define CPU_FEATURE_MSGINT 29 /* CPU has MSG interrupt */
#define CPU_FEATURE_AVECINT 30 /* CPU has AVEC interrupt */
#define CPU_FEATURE_REDIRECTINT 31 /* CPU has interrupt remapping */
#define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG)
#define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM)
@@ -132,6 +155,8 @@ enum cpu_type_enum {
#define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR)
#define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW)
#define LOONGARCH_CPU_LSPW BIT_ULL(CPU_FEATURE_LSPW)
#define LOONGARCH_CPU_MSGINT BIT_ULL(CPU_FEATURE_MSGINT)
#define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT)
#define LOONGARCH_CPU_REDIRECTINT BIT_ULL(CPU_FEATURE_REDIRECTINT)
#endif /* _ASM_CPU_H */

View File

@@ -134,13 +134,13 @@ static inline void hw_breakpoint_thread_switch(struct task_struct *next)
/* Determine number of BRP registers available. */
static inline int get_num_brps(void)
{
return csr_read64(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM;
return csr_read32(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM;
}
/* Determine number of WRP registers available. */
static inline int get_num_wrps(void)
{
return csr_read64(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM;
return csr_read32(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM;
}
#endif /* __KERNEL__ */

View File

@@ -14,7 +14,7 @@
#include <asm/pgtable-bits.h>
#include <asm/string.h>
extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size);
extern void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size);
extern void __init early_iounmap(void __iomem *addr, unsigned long size);
#define early_memremap early_ioremap
@@ -25,6 +25,9 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size);
static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
pgprot_t prot)
{
if (offset > TO_PHYS_MASK)
return NULL;
switch (pgprot_val(prot) & _CACHE_MASK) {
case _CACHE_CC:
return (void __iomem *)(unsigned long)(CACHE_BASE + offset);

View File

@@ -128,6 +128,7 @@
#define CPUCFG6_PMNUM GENMASK(7, 4)
#define CPUCFG6_PMNUM_SHIFT 4
#define CPUCFG6_PMBITS GENMASK(13, 8)
#define CPUCFG6_PMBITS_SHIFT 8
#define CPUCFG6_UPM BIT(14)
#define LOONGARCH_CPUCFG16 0x10
@@ -1137,6 +1138,7 @@
#define IOCSRF_FLATMODE BIT_ULL(10)
#define IOCSRF_VM BIT_ULL(11)
#define IOCSRF_AVEC BIT_ULL(15)
#define IOCSRF_REDIRECT BIT_ULL(16)
#define LOONGARCH_IOCSR_VENDOR 0x10

View File

@@ -88,7 +88,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
pud_t *pud;
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0);
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0);
if (!ptdesc)
return NULL;

View File

@@ -424,6 +424,9 @@ static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a)
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
if (pte_val(pte) & _PAGE_DIRTY)
pte_val(pte) |= _PAGE_MODIFIED;
return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
(pgprot_val(newprot) & ~_PAGE_CHG_MASK));
}
@@ -547,9 +550,11 @@ static inline struct page *pmd_page(pmd_t pmd)
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) |
(pgprot_val(newprot) & ~_HPAGE_CHG_MASK);
return pmd;
if (pmd_val(pmd) & _PAGE_DIRTY)
pmd_val(pmd) |= _PAGE_MODIFIED;
return __pmd((pmd_val(pmd) & _HPAGE_CHG_MASK) |
(pgprot_val(newprot) & ~_HPAGE_CHG_MASK));
}
static inline pmd_t pmd_mkinvalid(pmd_t pmd)

View File

@@ -10,10 +10,6 @@
#include <linux/types.h>
#ifndef __KERNEL__
#include <stdint.h>
#endif
/*
* For PTRACE_{POKE,PEEK}USR. 0 - 31 are GPRs,
* 32 is syscall's original ARG0, 33 is PC, 34 is BADVADDR.
@@ -41,44 +37,44 @@ struct user_pt_regs {
} __attribute__((aligned(8)));
struct user_fp_state {
uint64_t fpr[32];
uint64_t fcc;
uint32_t fcsr;
__u64 fpr[32];
__u64 fcc;
__u32 fcsr;
};
struct user_lsx_state {
/* 32 registers, 128 bits width per register. */
uint64_t vregs[32*2];
__u64 vregs[32*2];
};
struct user_lasx_state {
/* 32 registers, 256 bits width per register. */
uint64_t vregs[32*4];
__u64 vregs[32*4];
};
struct user_lbt_state {
uint64_t scr[4];
uint32_t eflags;
uint32_t ftop;
__u64 scr[4];
__u32 eflags;
__u32 ftop;
};
struct user_watch_state {
uint64_t dbg_info;
__u64 dbg_info;
struct {
uint64_t addr;
uint64_t mask;
uint32_t ctrl;
uint32_t pad;
__u64 addr;
__u64 mask;
__u32 ctrl;
__u32 pad;
} dbg_regs[8];
};
struct user_watch_state_v2 {
uint64_t dbg_info;
__u64 dbg_info;
struct {
uint64_t addr;
uint64_t mask;
uint32_t ctrl;
uint32_t pad;
__u64 addr;
__u64 mask;
__u32 ctrl;
__u32 pad;
} dbg_regs[14];
};

View File

@@ -157,6 +157,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options |= LOONGARCH_CPU_TLB;
if (config & CPUCFG1_IOCSR)
c->options |= LOONGARCH_CPU_IOCSR;
if (config & CPUCFG1_MSGINT)
c->options |= LOONGARCH_CPU_MSGINT;
if (config & CPUCFG1_UAL) {
c->options |= LOONGARCH_CPU_UAL;
elf_hwcap |= HWCAP_LOONGARCH_UAL;
@@ -275,7 +277,7 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int
uint32_t config;
uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]);
uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]);
const char *core_name = "Unknown";
const char *core_name = id_to_core_name(c->processor_id);
switch (BIT(fls(c->isa_level) - 1)) {
case LOONGARCH_CPU_ISA_LA32R:
@@ -289,35 +291,23 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int
break;
}
switch (c->processor_id & PRID_SERIES_MASK) {
case PRID_SERIES_LA132:
core_name = "LA132";
break;
case PRID_SERIES_LA264:
core_name = "LA264";
break;
case PRID_SERIES_LA364:
core_name = "LA364";
break;
case PRID_SERIES_LA464:
core_name = "LA464";
break;
case PRID_SERIES_LA664:
core_name = "LA664";
break;
}
pr_info("%s Processor probed (%s Core)\n", __cpu_family[cpu], core_name);
if (!cpu_has_iocsr)
if (!cpu_has_iocsr) {
__cpu_full_name[cpu] = "Unknown";
return;
if (!__cpu_full_name[cpu])
__cpu_full_name[cpu] = cpu_full_name;
}
*vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR);
*cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME);
if (!__cpu_full_name[cpu]) {
if (((char *)vendor)[0] == 0)
__cpu_full_name[cpu] = "Unknown";
else
__cpu_full_name[cpu] = cpu_full_name;
}
config = iocsr_read32(LOONGARCH_IOCSR_FEATURES);
if (config & IOCSRF_CSRIPI)
c->options |= LOONGARCH_CPU_CSRIPI;
@@ -331,6 +321,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int
c->options |= LOONGARCH_CPU_EIODECODE;
if (config & IOCSRF_AVEC)
c->options |= LOONGARCH_CPU_AVECINT;
if (config & IOCSRF_REDIRECT)
c->options |= LOONGARCH_CPU_REDIRECTINT;
if (config & IOCSRF_VM)
c->options |= LOONGARCH_CPU_HYPERVISOR;
}

View File

@@ -42,7 +42,7 @@ static void *efi_kexec_load(struct kimage *image,
{
int ret;
unsigned long text_offset, kernel_segment_number;
struct kexec_buf kbuf;
struct kexec_buf kbuf = {};
struct kexec_segment *kernel_segment;
struct loongarch_image_header *h;

View File

@@ -59,7 +59,7 @@ static void *elf_kexec_load(struct kimage *image,
int ret;
unsigned long text_offset, kernel_segment_number;
struct elfhdr ehdr;
struct kexec_buf kbuf;
struct kexec_buf kbuf = {};
struct kexec_elf_info elf_info;
struct kexec_segment *kernel_segment;

View File

@@ -39,34 +39,12 @@ static unsigned long systable_ptr;
static unsigned long start_addr;
static unsigned long first_ind_entry;
static void kexec_image_info(const struct kimage *kimage)
{
unsigned long i;
pr_debug("kexec kimage info:\n");
pr_debug("\ttype: %d\n", kimage->type);
pr_debug("\tstart: %lx\n", kimage->start);
pr_debug("\thead: %lx\n", kimage->head);
pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
for (i = 0; i < kimage->nr_segments; i++) {
pr_debug("\t segment[%lu]: %016lx - %016lx", i,
kimage->segment[i].mem,
kimage->segment[i].mem + kimage->segment[i].memsz);
pr_debug("\t\t0x%lx bytes, %lu pages\n",
(unsigned long)kimage->segment[i].memsz,
(unsigned long)kimage->segment[i].memsz / PAGE_SIZE);
}
}
int machine_kexec_prepare(struct kimage *kimage)
{
int i;
char *bootloader = "kexec";
void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR;
kexec_image_info(kimage);
kimage->arch.efi_boot = fw_arg0;
kimage->arch.systable_ptr = fw_arg2;
@@ -259,6 +237,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
#ifdef CONFIG_SMP
crash_smp_send_stop();
#endif
machine_kexec_mask_interrupts();
cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
pr_info("Starting crashdump kernel...\n");
@@ -296,6 +275,7 @@ void machine_kexec(struct kimage *image)
/* We do not want to be bothered. */
local_irq_disable();
machine_kexec_mask_interrupts();
pr_notice("EFI boot flag: 0x%lx\n", efi_boot);
pr_notice("Command line addr: 0x%lx\n", cmdline_ptr);

View File

@@ -143,7 +143,7 @@ int load_other_segments(struct kimage *image,
unsigned long initrd_load_addr = 0;
unsigned long orig_segments = image->nr_segments;
char *modified_cmdline = NULL;
struct kexec_buf kbuf;
struct kexec_buf kbuf = {};
kbuf.image = image;
/* Don't allocate anything below the kernel */

View File

@@ -13,7 +13,7 @@
void __init memblock_init(void)
{
u32 mem_type;
u64 mem_start, mem_end, mem_size;
u64 mem_start, mem_size;
efi_memory_desc_t *md;
/* Parse memory information */
@@ -21,7 +21,6 @@ void __init memblock_init(void)
mem_type = md->type;
mem_start = md->phys_addr;
mem_size = md->num_pages << EFI_PAGE_SHIFT;
mem_end = mem_start + mem_size;
switch (mem_type) {
case EFI_LOADER_CODE:
@@ -31,8 +30,6 @@ void __init memblock_init(void)
case EFI_PERSISTENT_MEMORY:
case EFI_CONVENTIONAL_MEMORY:
memblock_add(mem_start, mem_size);
if (max_low_pfn < (mem_end >> PAGE_SHIFT))
max_low_pfn = mem_end >> PAGE_SHIFT;
break;
case EFI_PAL_CODE:
case EFI_UNUSABLE_MEMORY:
@@ -49,6 +46,8 @@ void __init memblock_init(void)
}
}
max_pfn = PFN_DOWN(memblock_end_of_DRAM());
max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn);
memblock_set_current_limit(PFN_PHYS(max_low_pfn));
/* Reserve the first 2MB */

View File

@@ -158,35 +158,9 @@ static void __init node_mem_init(unsigned int node)
#ifdef CONFIG_ACPI_NUMA
/*
* add_numamem_region
*
* Add a uasable memory region described by BIOS. The
* routine gets each intersection between BIOS's region
* and node's region, and adds them into node's memblock
* pool.
*
*/
static void __init add_numamem_region(u64 start, u64 end, u32 type)
{
u32 node = pa_to_nid(start);
u64 size = end - start;
static unsigned long num_physpages;
static unsigned long num_physpages;
if (start >= end) {
pr_debug("Invalid region: %016llx-%016llx\n", start, end);
return;
}
num_physpages += (size >> PAGE_SHIFT);
pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n",
node, type, start, size);
pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
start >> PAGE_SHIFT, end >> PAGE_SHIFT, num_physpages);
memblock_set_node(start, size, &memblock.memory, node);
}
static void __init init_node_memblock(void)
static void __init info_node_memblock(void)
{
u32 mem_type;
u64 mem_end, mem_start, mem_size;
@@ -206,12 +180,20 @@ static void __init init_node_memblock(void)
case EFI_BOOT_SERVICES_DATA:
case EFI_PERSISTENT_MEMORY:
case EFI_CONVENTIONAL_MEMORY:
add_numamem_region(mem_start, mem_end, mem_type);
num_physpages += (mem_size >> PAGE_SHIFT);
pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n",
(u32)pa_to_nid(mem_start), mem_type, mem_start, mem_size);
pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
mem_start >> PAGE_SHIFT, mem_end >> PAGE_SHIFT, num_physpages);
break;
case EFI_PAL_CODE:
case EFI_UNUSABLE_MEMORY:
case EFI_ACPI_RECLAIM_MEMORY:
add_numamem_region(mem_start, mem_end, mem_type);
num_physpages += (mem_size >> PAGE_SHIFT);
pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n",
(u32)pa_to_nid(mem_start), mem_type, mem_start, mem_size);
pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
mem_start >> PAGE_SHIFT, mem_end >> PAGE_SHIFT, num_physpages);
fallthrough;
case EFI_RESERVED_TYPE:
case EFI_RUNTIME_SERVICES_CODE:
@@ -249,22 +231,16 @@ int __init init_numa_memory(void)
for (i = 0; i < NR_CPUS; i++)
set_cpuid_to_node(i, NUMA_NO_NODE);
numa_reset_distance();
nodes_clear(numa_nodes_parsed);
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
WARN_ON(memblock_clear_hotplug(0, PHYS_ADDR_MAX));
/* Parse SRAT and SLIT if provided by firmware. */
ret = acpi_disabled ? fake_numa_init() : acpi_numa_init();
if (!acpi_disabled)
ret = numa_memblks_init(acpi_numa_init, false);
else
ret = numa_memblks_init(fake_numa_init, false);
if (ret < 0)
return ret;
node_possible_map = numa_nodes_parsed;
if (WARN_ON(nodes_empty(node_possible_map)))
return -EINVAL;
init_node_memblock();
info_node_memblock();
if (!memblock_validate_numa_coverage(SZ_1M))
return -EINVAL;
@@ -272,7 +248,8 @@ int __init init_numa_memory(void)
node_mem_init(node);
node_set_online(node);
}
max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
max_pfn = PFN_DOWN(memblock_end_of_DRAM());
max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn);
setup_nr_node_ids();
loongson_sysconf.nr_nodes = nr_node_ids;
@@ -283,26 +260,6 @@ int __init init_numa_memory(void)
#endif
void __init paging_init(void)
{
unsigned int node;
unsigned long zones_size[MAX_NR_ZONES] = {0, };
for_each_online_node(node) {
unsigned long start_pfn, end_pfn;
get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
if (end_pfn > max_low_pfn)
max_low_pfn = end_pfn;
}
#ifdef CONFIG_ZONE_DMA32
zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
#endif
zones_size[ZONE_NORMAL] = max_low_pfn;
free_area_init(zones_size);
}
int pcibus_to_node(struct pci_bus *bus)
{
return dev_to_node(&bus->dev);

View File

@@ -845,13 +845,14 @@ static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config
static int __init init_hw_perf_events(void)
{
int counters;
int bits, counters;
if (!cpu_has_pmp)
return -ENODEV;
pr_info("Performance counters: ");
counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1;
bits = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMBITS) >> CPUCFG6_PMBITS_SHIFT) + 1;
counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) + 1;
loongarch_pmu.num_counters = counters;
loongarch_pmu.max_period = (1ULL << 63) - 1;
@@ -867,7 +868,7 @@ static int __init init_hw_perf_events(void)
on_each_cpu(reset_counters, NULL, 1);
pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n",
loongarch_pmu.name, counters, 64);
loongarch_pmu.name, counters, bits);
perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);

View File

@@ -17,6 +17,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
{
unsigned long n = (unsigned long) v - 1;
unsigned int isa = cpu_data[n].isa_level;
unsigned int prid = cpu_data[n].processor_id;
unsigned int version = cpu_data[n].processor_id & 0xff;
unsigned int fp_version = cpu_data[n].fpu_vers;
@@ -37,6 +38,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "global_id\t\t: %d\n", cpu_data[n].global_id);
seq_printf(m, "CPU Family\t\t: %s\n", __cpu_family[n]);
seq_printf(m, "Model Name\t\t: %s\n", __cpu_full_name[n]);
seq_printf(m, "PRID\t\t\t: %s (%08x)\n", id_to_core_name(prid), prid);
seq_printf(m, "CPU Revision\t\t: 0x%02x\n", version);
seq_printf(m, "FPU Revision\t\t: 0x%02x\n", fp_version);
seq_printf(m, "CPU MHz\t\t\t: %llu.%02llu\n",

View File

@@ -294,8 +294,6 @@ static void __init fdt_setup(void)
early_init_dt_scan(fdt_pointer, __pa(fdt_pointer));
early_init_fdt_reserve_self();
max_low_pfn = PFN_PHYS(memblock_end_of_DRAM());
#endif
}
@@ -390,7 +388,8 @@ static void __init check_kernel_sections_mem(void)
static void __init arch_mem_init(char **cmdline_p)
{
/* Recalculate max_low_pfn for "mem=xxx" */
max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
max_pfn = PFN_DOWN(memblock_end_of_DRAM());
max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn);
if (usermem)
pr_info("User-defined physical RAM map overwrite\n");

View File

@@ -1131,8 +1131,8 @@ static void configure_exception_vector(void)
tlbrentry = (unsigned long)exception_handlers + 80*VECSIZE;
csr_write64(eentry, LOONGARCH_CSR_EENTRY);
csr_write64(eentry, LOONGARCH_CSR_MERRENTRY);
csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY);
csr_write64(__pa(eentry), LOONGARCH_CSR_MERRENTRY);
csr_write64(__pa(tlbrentry), LOONGARCH_CSR_TLBRENTRY);
}
void per_cpu_trap_init(int cpu)

View File

@@ -439,7 +439,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev,
spin_lock_irqsave(&s->lock, flags);
switch (type) {
case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU:
if (val >= EIOINTC_ROUTE_MAX_VCPUS)
if (val > EIOINTC_ROUTE_MAX_VCPUS)
ret = -EINVAL;
else
s->num_cpu = val;

View File

@@ -857,7 +857,7 @@ retry:
if (writeable) {
prot_bits = kvm_pte_mkwriteable(prot_bits);
if (write)
if (write || !kvm_slot_dirty_track_enabled(memslot))
prot_bits = kvm_pte_mkdirty(prot_bits);
}

View File

@@ -4,6 +4,7 @@
*/
#include <linux/kvm_host.h>
#include <asm/delay.h>
#include <asm/kvm_csr.h>
#include <asm/kvm_vcpu.h>
@@ -95,6 +96,7 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu)
* and set CSR TVAL with -1
*/
write_gcsr_timertick(0);
__delay(2); /* Wait cycles until timer interrupt injected */
/*
* Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear

View File

@@ -132,6 +132,9 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu)
* Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when
* exiting the guest, so that the next time trap into the guest.
* We don't need to deal with PMU CSRs contexts.
*
* Otherwise set the request bit KVM_REQ_PMU to restore guest PMU
* before entering guest VM
*/
val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
@@ -139,16 +142,12 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu)
val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
if (!(val & KVM_PMU_EVENT_ENABLED))
vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU;
else
kvm_make_request(KVM_REQ_PMU, vcpu);
kvm_restore_host_pmu(vcpu);
}
static void kvm_restore_pmu(struct kvm_vcpu *vcpu)
{
if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU))
kvm_make_request(KVM_REQ_PMU, vcpu);
}
static void kvm_check_pmu(struct kvm_vcpu *vcpu)
{
if (kvm_check_request(KVM_REQ_PMU, vcpu)) {
@@ -299,7 +298,10 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST;
if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) {
kvm_lose_pmu(vcpu);
if (vcpu->arch.aux_inuse & KVM_LARCH_PMU) {
kvm_lose_pmu(vcpu);
kvm_make_request(KVM_REQ_PMU, vcpu);
}
/* make sure the vcpu mode has been written */
smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE);
local_irq_enable();
@@ -1604,9 +1606,6 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_restore_timer(vcpu);
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
/* Restore hardware PMU CSRs */
kvm_restore_pmu(vcpu);
/* Don't bother restoring registers multiple times unless necessary */
if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
return 0;

View File

@@ -60,7 +60,6 @@ int __ref page_is_ram(unsigned long pfn)
return memblock_is_memory(addr) && !memblock_is_reserved(addr);
}
#ifndef CONFIG_NUMA
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
@@ -72,7 +71,6 @@ void __init paging_init(void)
free_area_init(max_zone_pfns);
}
#endif /* !CONFIG_NUMA */
void __ref free_initmem(void)
{

View File

@@ -6,7 +6,7 @@
#include <asm/io.h>
#include <asm-generic/early_ioremap.h>
void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size)
void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size)
{
return ((void __iomem *)TO_CACHE(phys_addr));
}

View File

@@ -1624,6 +1624,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
/* Direct jump skips 5 NOP instructions */
else if (is_bpf_text_address((unsigned long)orig_call))
orig_call += LOONGARCH_BPF_FENTRY_NBYTES;
/* Module tracing not supported - cause kernel lockups */
else if (is_module_text_address((unsigned long)orig_call))
return -ENOTSUPP;
if (flags & BPF_TRAMP_F_CALL_ORIG) {
move_addr(ctx, LOONGARCH_GPR_A0, (const u64)im);

View File

@@ -50,11 +50,11 @@ static int __init pcibios_init(void)
*/
lsize = cpu_last_level_cache_line_size();
BUG_ON(!lsize);
if (lsize) {
pci_dfl_cache_line_size = lsize >> 2;
pci_dfl_cache_line_size = lsize >> 2;
pr_debug("PCI: pci_cache_line_size set to %d bytes\n", lsize);
pr_debug("PCI: pci_cache_line_size set to %d bytes\n", lsize);
}
return 0;
}

View File

@@ -18,7 +18,7 @@
cpu@0 {
device_type = "cpu";
compatible = "mips,mips24KEc";
compatible = "mips,mips34Kc";
reg = <0>;
};
};

View File

@@ -692,7 +692,7 @@ unsigned long mips_stack_top(void)
/* Space for the VDSO, data page & GIC user page */
if (current->thread.abi) {
top -= PAGE_ALIGN(current->thread.abi->vdso->size);
top -= PAGE_SIZE;
top -= VDSO_NR_PAGES * PAGE_SIZE;
top -= mips_gic_present() ? PAGE_SIZE : 0;
/* Space to randomize the VDSO base */

View File

@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/export.h>
#include <linux/sort.h>
#include <asm/cpu.h>
#include <asm/cpu-type.h>
@@ -508,54 +509,78 @@ static int __init set_ntlb(char *str)
__setup("ntlb=", set_ntlb);
/* Initialise all TLB entries with unique values */
/* Comparison function for EntryHi VPN fields. */
static int r4k_vpn_cmp(const void *a, const void *b)
{
long v = *(unsigned long *)a - *(unsigned long *)b;
int s = sizeof(long) > sizeof(int) ? sizeof(long) * 8 - 1: 0;
return s ? (v != 0) | v >> s : v;
}
/*
* Initialise all TLB entries with unique values that do not clash with
* what we have been handed over and what we'll be using ourselves.
*/
static void r4k_tlb_uniquify(void)
{
int entry = num_wired_entries();
unsigned long tlb_vpns[1 << MIPS_CONF1_TLBS_SIZE];
int tlbsize = current_cpu_data.tlbsize;
int start = num_wired_entries();
unsigned long vpn_mask;
int cnt, ent, idx, i;
vpn_mask = GENMASK(cpu_vmbits - 1, 13);
vpn_mask |= IS_ENABLED(CONFIG_64BIT) ? 3ULL << 62 : 1 << 31;
htw_stop();
for (i = start, cnt = 0; i < tlbsize; i++, cnt++) {
unsigned long vpn;
write_c0_index(i);
mtc0_tlbr_hazard();
tlb_read();
tlb_read_hazard();
vpn = read_c0_entryhi();
vpn &= vpn_mask & PAGE_MASK;
tlb_vpns[cnt] = vpn;
/* Prevent any large pages from overlapping regular ones. */
write_c0_pagemask(read_c0_pagemask() & PM_DEFAULT_MASK);
mtc0_tlbw_hazard();
tlb_write_indexed();
tlbw_use_hazard();
}
sort(tlb_vpns, cnt, sizeof(tlb_vpns[0]), r4k_vpn_cmp, NULL);
write_c0_pagemask(PM_DEFAULT_MASK);
write_c0_entrylo0(0);
write_c0_entrylo1(0);
while (entry < current_cpu_data.tlbsize) {
unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
unsigned long asid = 0;
int idx;
idx = 0;
ent = tlbsize;
for (i = start; i < tlbsize; i++)
while (1) {
unsigned long entryhi, vpn;
/* Skip wired MMID to make ginvt_mmid work */
if (cpu_has_mmid)
asid = MMID_KERNEL_WIRED + 1;
entryhi = UNIQUE_ENTRYHI(ent);
vpn = entryhi & vpn_mask & PAGE_MASK;
/* Check for match before using UNIQUE_ENTRYHI */
do {
if (cpu_has_mmid) {
write_c0_memorymapid(asid);
write_c0_entryhi(UNIQUE_ENTRYHI(entry));
} else {
write_c0_entryhi(UNIQUE_ENTRYHI(entry) | asid);
}
mtc0_tlbw_hazard();
tlb_probe();
tlb_probe_hazard();
idx = read_c0_index();
/* No match or match is on current entry */
if (idx < 0 || idx == entry)
if (idx >= cnt || vpn < tlb_vpns[idx]) {
write_c0_entryhi(entryhi);
write_c0_index(i);
mtc0_tlbw_hazard();
tlb_write_indexed();
ent++;
break;
/*
* If we hit a match, we need to try again with
* a different ASID.
*/
asid++;
} while (asid < asid_mask);
if (idx >= 0 && idx != entry)
panic("Unable to uniquify TLB entry %d", idx);
write_c0_index(entry);
mtc0_tlbw_hazard();
tlb_write_indexed();
entry++;
}
} else if (vpn == tlb_vpns[idx]) {
ent++;
} else {
idx++;
}
}
tlbw_use_hazard();
htw_start();
@@ -602,6 +627,7 @@ static void r4k_tlb_configure(void)
/* From this point on the ARC firmware is dead. */
r4k_tlb_uniquify();
local_flush_tlb_all();
/* Did I tell you that ARC SUCKS? */
}

View File

@@ -241,16 +241,22 @@ mips_pci_controller:
#endif
/*
* Setup the Malta max (2GB) memory for PCI DMA in host bridge
* in transparent addressing mode.
* Set up memory mapping in host bridge for PCI DMA masters,
* in transparent addressing mode. For EVA use the Malta
* maximum of 2 GiB memory in the alias space at 0x80000000
* as per PHYS_OFFSET. Otherwise use 256 MiB of memory in
* the regular space, avoiding mapping the PCI MMIO window
* for DMA as it seems to confuse the system controller's
* logic, causing PCI MMIO to stop working.
*/
mask = PHYS_OFFSET | PCI_BASE_ADDRESS_MEM_PREFETCH;
MSC_WRITE(MSC01_PCI_BAR0, mask);
MSC_WRITE(MSC01_PCI_HEAD4, mask);
mask = PHYS_OFFSET ? PHYS_OFFSET : 0xf0000000;
MSC_WRITE(MSC01_PCI_BAR0,
mask | PCI_BASE_ADDRESS_MEM_PREFETCH);
MSC_WRITE(MSC01_PCI_HEAD4,
PHYS_OFFSET | PCI_BASE_ADDRESS_MEM_PREFETCH);
mask &= MSC01_PCI_BAR0_SIZE_MSK;
MSC_WRITE(MSC01_PCI_P2SCMSKL, mask);
MSC_WRITE(MSC01_PCI_P2SCMAPL, mask);
MSC_WRITE(MSC01_PCI_P2SCMAPL, PHYS_OFFSET);
/* Don't handle target retries indefinitely. */
if ((data & MSC01_PCI_CFG_MAXRTRY_MSK) ==

View File

@@ -137,6 +137,7 @@ config PPC
select ARCH_HAS_DMA_OPS if PPC64
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE if ARCH_SUPPORTS_HUGETLBFS
select ARCH_HAS_KCOV
select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC64 && PPC_FPU
select ARCH_HAS_MEMBARRIER_CALLBACKS

View File

@@ -423,7 +423,6 @@ config PPC_64S_HASH_MMU
config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64
select ARCH_HAS_GIGANTIC_PAGE
default y
help
Enable support for the Power ISA 3.0 Radix style MMU. Currently this

View File

@@ -367,7 +367,7 @@ config RISCV_NONSTANDARD_CACHE_OPS
systems to handle cache management.
config AS_HAS_INSN
def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero)
def_bool $(as-instr,.insn 0x100000f)
config AS_HAS_OPTION_ARCH
# https://github.com/llvm/llvm-project/commit/9e8ed3403c191ab9c4903e8eeb8f732ff8a43cb4

View File

@@ -134,21 +134,6 @@ endif
CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS)
# Default target when executing plain make
boot := arch/riscv/boot
ifeq ($(CONFIG_XIP_KERNEL),y)
KBUILD_IMAGE := $(boot)/xipImage
else
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN_K210),yy)
KBUILD_IMAGE := $(boot)/loader.bin
else
ifeq ($(CONFIG_EFI_ZBOOT),)
KBUILD_IMAGE := $(boot)/Image.gz
else
KBUILD_IMAGE := $(boot)/vmlinuz.efi
endif
endif
endif
boot := arch/riscv/boot
boot-image-y := Image
boot-image-$(CONFIG_KERNEL_BZIP2) := Image.bz2
@@ -159,7 +144,7 @@ boot-image-$(CONFIG_KERNEL_LZO) := Image.lzo
boot-image-$(CONFIG_KERNEL_ZSTD) := Image.zst
boot-image-$(CONFIG_KERNEL_XZ) := Image.xz
ifdef CONFIG_RISCV_M_MODE
boot-image-$(CONFIG_ARCH_CANAAN) := loader.bin
boot-image-$(CONFIG_SOC_CANAAN_K210) := loader.bin
endif
boot-image-$(CONFIG_EFI_ZBOOT) := vmlinuz.efi
boot-image-$(CONFIG_XIP_KERNEL) := xipImage

View File

@@ -7,8 +7,8 @@
#define ANDES_VENDOR_ID 0x31e
#define MICROCHIP_VENDOR_ID 0x029
#define MIPS_VENDOR_ID 0x127
#define SIFIVE_VENDOR_ID 0x489
#define THEAD_VENDOR_ID 0x5b7
#define MIPS_VENDOR_ID 0x722
#endif

View File

@@ -648,9 +648,9 @@ int sbi_debug_console_read(char *bytes, unsigned int num_bytes)
void __init sbi_init(void)
{
bool srst_power_off = false;
int ret;
sbi_set_power_off();
ret = sbi_get_spec_version();
if (ret > 0)
sbi_spec_version = ret;
@@ -683,6 +683,7 @@ void __init sbi_init(void)
sbi_probe_extension(SBI_EXT_SRST)) {
pr_info("SBI SRST extension detected\n");
register_platform_power_off(sbi_srst_power_off);
srst_power_off = true;
sbi_srst_reboot_nb.notifier_call = sbi_srst_reboot;
sbi_srst_reboot_nb.priority = 192;
register_restart_handler(&sbi_srst_reboot_nb);
@@ -702,4 +703,7 @@ void __init sbi_init(void)
__sbi_send_ipi = __sbi_send_ipi_v01;
__sbi_rfence = __sbi_rfence_v01;
}
if (!srst_power_off)
sbi_set_power_off();
}

View File

@@ -689,8 +689,20 @@ bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu)
*/
read_lock_irqsave(&imsic->vsfile_lock, flags);
if (imsic->vsfile_cpu > -1)
ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei));
if (imsic->vsfile_cpu > -1) {
/*
* This function is typically called from kvm_vcpu_block() via
* kvm_arch_vcpu_runnable() upon WFI trap. The kvm_vcpu_block()
* can be preempted and the blocking VCPU might resume on a
* different CPU. This means it is possible that current CPU
* does not match the imsic->vsfile_cpu hence this function
* must check imsic->vsfile_cpu before accessing HGEIP CSR.
*/
if (imsic->vsfile_cpu != vcpu->cpu)
ret = true;
else
ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei));
}
read_unlock_irqrestore(&imsic->vsfile_lock, flags);
return ret;

View File

@@ -171,7 +171,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
enum kvm_mr_change change)
{
hva_t hva, reg_end, size;
gpa_t base_gpa;
bool writable;
int ret = 0;
@@ -190,15 +189,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
hva = new->userspace_addr;
size = new->npages << PAGE_SHIFT;
reg_end = hva + size;
base_gpa = new->base_gfn << PAGE_SHIFT;
writable = !(new->flags & KVM_MEM_READONLY);
mmap_read_lock(current->mm);
/*
* A memory region could potentially cover multiple VMAs, and
* any holes between them, so iterate over all of them to find
* out if we can map any of them right now.
* any holes between them, so iterate over all of them.
*
* +--------------------------------------------+
* +---------------+----------------+ +----------------+
@@ -209,7 +206,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
*/
do {
struct vm_area_struct *vma;
hva_t vm_start, vm_end;
hva_t vm_end;
vma = find_vma_intersection(current->mm, hva, reg_end);
if (!vma)
@@ -225,36 +222,18 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
}
/* Take the intersection of this VMA with the memory region */
vm_start = max(hva, vma->vm_start);
vm_end = min(reg_end, vma->vm_end);
if (vma->vm_flags & VM_PFNMAP) {
gpa_t gpa = base_gpa + (vm_start - hva);
phys_addr_t pa;
pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
pa += vm_start - vma->vm_start;
/* IO region dirty page logging not allowed */
if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
goto out;
}
ret = kvm_riscv_mmu_ioremap(kvm, gpa, pa, vm_end - vm_start,
writable, false);
if (ret)
break;
}
hva = vm_end;
} while (hva < reg_end);
if (change == KVM_MR_FLAGS_ONLY)
goto out;
if (ret)
kvm_riscv_mmu_iounmap(kvm, base_gpa, size);
out:
mmap_read_unlock(current->mm);
return ret;

View File

@@ -212,7 +212,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1ULL) &&
!kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
}

View File

@@ -1154,17 +1154,15 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_NODAT 0x400
#define IPTE_GUEST_ASCE 0x800
static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep,
unsigned long opt, unsigned long asce,
int local)
static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, int local)
{
unsigned long pto;
pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]"
asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%%r0,%[m4]"
: "+m" (*ptep)
: [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt),
[asce] "a" (asce), [m4] "i" (local));
: [r1] "a" (pto), [r2] "a" (addr & PAGE_MASK),
[m4] "i" (local));
}
static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
@@ -1348,7 +1346,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
* A local RDP can be used to do the flush.
*/
if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT))
__ptep_rdp(address, ptep, 0, 0, 1);
__ptep_rdp(address, ptep, 1);
}
#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault

View File

@@ -274,9 +274,9 @@ void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
preempt_disable();
atomic_inc(&mm->context.flush_count);
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
__ptep_rdp(addr, ptep, 0, 0, 1);
__ptep_rdp(addr, ptep, 1);
else
__ptep_rdp(addr, ptep, 0, 0, 0);
__ptep_rdp(addr, ptep, 0);
/*
* PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
* means it is still valid and active, and must not be changed according

View File

@@ -2789,13 +2789,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
return;
}
if (perf_callchain_store(entry, regs->ip))
return;
if (perf_hw_regs(regs))
if (perf_hw_regs(regs)) {
if (perf_callchain_store(entry, regs->ip))
return;
unwind_start(&state, current, regs, NULL);
else
} else {
unwind_start(&state, current, NULL, (void *)regs->sp);
}
for (; !unwind_done(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);

Some files were not shown because too many files have changed in this diff Show More