Pull drm updates from Dave Airlie:
 "cross-subsystem:
   - i2c-hid: Make elan touch controllers power on after panel is
     enabled
   - dt bindings for STM32MP25 SoC
   - pci vgaarb: use screen_info helpers
   - rust pin-init updates
   - add MEI driver for late binding firmware update/load

  uapi:
   - add ioctl for reassigning GEM handles
   - provide boot_display attribute on boot-up devices

  core:
   - document DRM_MODE_PAGE_FLIP_EVENT
   - add vendor specific recovery method to drm device wedged uevent

  gem:
   - Simplify gpuvm locking

  ttm:
   - add interface to populate buffers

  sched:
   - Fix race condition in trace code

  atomic:
   - Reallow no-op async page flips

  display:
   - dp: Fix command length

  video:
   - Improve pixel-format handling for struct screen_info

  rust:
   - drop Opaque<> from ioctl args
   - Alloc:
       - BorrowedPage type and AsPageIter traits
       - Implement Vmalloc::to_page() and VmallocPageIter
   - DMA/Scatterlist:
       - Add dma::DataDirection and type alias for dma_addr_t
       - Abstraction for struct scatterlist and sg_table
   - DRM:
       - simplify use of generics
       - add DriverFile type alias
       - drop Object::SIZE
   - Rust:
       - pin-init tree merge
       - Various methods for AsBytes and FromBytes traits

  gpuvm:
   - Support madvice in Xe driver

  gpusvm:
   - fix hmm_pfn_to_map_order usage in gpusvm

  bridge:
   - Improve and fix ref counting on bridge management
   - cdns-dsi: Various improvements to mode setting
   - Support Solomon SSD2825 plus DT bindings
   - Support Waveshare DSI2DPI plus DT bindings
   - Support Content Protection property
   - display-connector: Improve DP display detection
   - Add support for Radxa Ra620 plus DT bindings
   - adv7511: Provide SPD and HDMI infoframes
   - it6505: Replace crypto_shash with sha()
   - synopsys: Add support for DW DPTX Controller plus DT bindings
   - adv7511: Write full Audio infoframe
   - ite6263: Support vendor-specific infoframes
   - simple: Add support for Realtek RTD2171 DP-to-HDMI plus DT bindings

  panel:
   - panel-edp: Support mt8189 Chromebooks; Support BOE NV140WUM-N64;
     Support SHP LQ134Z1; Fixes
   - panel-simple: Support Olimex LCD-OLinuXino-5CTS plus DT bindings
   - Support Samsung AMS561RA01
   - Support Hydis HV101HD1 plus DT bindings
   - ilitek-ili9881c: Refactor mode setting; Add support for Bestar
     BSD1218-A101KL68 LCD plus DT bindings
   - lvds: Add support for Ampire AMP19201200B5TZQW-T03 to DT bindings
   - edp: Add support for additonal mt8189 Chromebook panels
   - lvds: Add DT bindings for EDT ETML0700Z8DHA

  amdgpu:
   - add CRIU support for gem objects
   - RAS updates
   - VCN SRAM load fixes
   - EDID read fixes
   - eDP ALPM support
   - Documentation updates
   - Rework PTE flag generation
   - DCE6 fixes
   - VCN devcoredump cleanup
   - MMHUB client id fixes
   - VCN 5.0.1 RAS support
   - SMU 13.0.x updates
   - Expanded PCIe DPC support
   - Expanded VCN reset support
   - VPE per queue reset support
   - give kernel jobs unique id for tracing
   - pre-populate exported buffers
   - cyan skillfish updates
   - make vbios build number available in sysfs
   - userq updates
   - HDCP updates
   - support MMIO remap page as ttm pool
   - JPEG parser updates
   - DCE6 DC updates
   - use devm for i2c buses
   - GPUVM locking updates
   - Drop non-DC DCE11 code
   - improve fallback handling for pixel encoding

  amdkfd:
   - SVM/page migration fixes
   - debugfs fixes
   - add CRIO support for gem objects
   - SVM updates

  radeon:
   - use dev_warn_once in CS parsers

  xe:
   - add madvise interface
   - add DRM_IOCTL_XE_VM_QUERY_MEMORY_RANGE_ATTRS to query VMA count
     and memory attributes
   - drop L# bank mask reporting from media GT3 on Xe3+.
   - add SLPC power_profile sysfs interface
   - add configs attribs to add post/mid context-switch commands
   - handle firmware reported hardware errors notifying userspace with
     device wedged uevent
   - use same dir structure across sysfs/debugfs
   - cleanup and future proof vram region init
   - add G-states and PCI link states to debugfs
   - Add SRIOV support for CCS surfaces on Xe2+
   - Enable SRIOV PF mode by default on supported platforms
   - move flush to common code
   - extended core workarounds for Xe2/3
   - use DRM scheduler for delayed GT TLB invalidations
   - configs improvements and allow VF device enablement
   - prep work to expose mmio regions to userspace
   - VF migration support added
   - prepare GPU SVM for THP migration
   - start fixing XE_PAGE_SIZE vs PAGE_SIZE
   - add PSMI support for hw validation
   - resize VF bars to max possible size according to number of VFs
   - Ensure GT is in C0 during resume
   - pre-populate exported buffers
   - replace xe_hmm with gpusvm
   - add more SVM GT stats to debugfs
   - improve fake pci and WA kunnit handle for new platform testing
   - Test GuC to GuC comms to add debugging
   - use attribute groups to simplify sysfs registration
   - add Late Binding firmware code to interact with MEI

  i915:
   - apply multiple JSL/EHL/Gen7/Gen6 workarounds properly
   - protect against overflow in active_engine()
   - Use try_cmpxchg64() in __active_lookup()
   - include GuC registers in error state
   - get rid of dev->struct_mutex
   - iopoll: generalize read_poll_timout
   - lots more display refactoring
   - Reject HBR3 in any eDP Panel
   - Prune modes for YUV420
   - Display Wa fix, additions, and updates
   - DP: Fix 2.7 Gbps link training on g4x
   - DP: Adjust the idle pattern handling
   - DP: Shuffle the link training code a bit
   - Don't set/read the DSI C clock divider on GLK
   - Enable_psr kernel parameter changes
   - Type-C enabled/disconnected dp-alt sink
   - Wildcat Lake enabling
   - DP HDR updates
   - DRAM detection
   - wait PSR idle on dsb commit
   - Remove FBC modulo 4 restriction for ADL-P+
   - panic: refactor framebuffer allocation

  habanalabs:
   - debug/visibility improvements
   - vmalloc-backed coherent mmap support
   - HLDIO infrastructure

  nova-core:
   - various register!() macro improvements
   - minor vbios/firmware fixes/refactoring
   - advance firmware boot stages; process Booter and patch signatures
   - process GSP and GSP bootloader
   - Add r570.144 firmware bindings and update to it
   - Move GSP boot code to own module
   - Use new pin-init features to store driver's private data in a
     single allocation
   - Update ARef import from sync::aref

  nova-drm:
   - Update ARef import from sync::aref

  tyr:
   - initial driver skeleton for a rust driver for ARM Mali GPUs
   - capable of powering up, query metadata and provide it to userspace.

  msm:
   - GPU and Core:
      - in DT bindings describe clocks per GPU type
      - GMU bandwidth voting for x1-85
      - a623/a663 speedbins
      - cleanup some remaining no-iommu leftovers after VM_BIND conversion
      - fix GEM obj 32b size truncation
      - add missing VM_BIND param validation
      - IFPC for x1-85 and a750
      - register xml and gen_header.py sync from mesa
   - Display:
      - add missing bindings for display on SC8180X
      - added DisplayPort MST bindings
      - conversion from round_rate() to determine_rate()

  amdxdna:
   - add IOCTL_AMDXDNA_GET_ARRAY
   - support user space allocated buffers
   - streamline PM interfaces
   - Refactoring wrt. hardware contexts
   - improve error reporting

  nouveau:
   - use GSP firmware by default
   - improve error reporting
   - Pre-populate exported buffers

  ast:
   - Clean up detection of DRAM config

  exynos:
   - add DSIM bridge driver support for Exynos7870
   - Document Exynos7870 DSIM compatible in dt-binding

  panthor:
   - Print task/pid on errors
   - Add support for Mali G710, G510, G310, Gx15, Gx20, Gx25
   - Improve cache flushing
   - Fail VM bind if BO has offset

  renesas:
   - convert to RUNTIME_PM_OPS

  rcar-du:
   - Make number of lanes configurable
   - Use RUNTIME_PM_OPS
   - Add support for DSI commands

  rocket:
   - Add driver for Rockchip NPU plus DT bindings
   - Use kfree() and sizeof() correctly
   - Test DMA status

  rockchip:
   - dsi2: Add support for RK3576 plus DT bindings
   - Add support for RK3588 DPTX output

  tidss:
   - Use crtc_ fields for programming display mode
   - Remove other drivers from aperture

  pixpaper:
   - Add support for Mayqueen Pixpaper plus DT bindings

  v3d:
   - Support querying nubmer of GPU resets for KHR_robustness

  stm:
   - Clean up logging
   - ltdc: Add support support for STM32MP257F-EV1 plus DT bindings

  sitronix:
   - st7571-i2c: Add support for inverted displays and 2-bit grayscale

  tidss:
   - Convert to kernel's FIELD_ macros

  vesadrm:
   - Support 8-bit palette mode

  imagination:
   - Improve power management
   - Add support for TH1520 GPU
   - Support Risc-V architectures

  v3d:
   - Improve job management and locking

  vkms:
   - Support variants of ARGB8888, ARGB16161616, RGB565, RGB888 and P01x
   - Spport YUV with 16-bit components"

* tag 'drm-next-2025-10-01' of https://gitlab.freedesktop.org/drm/kernel: (1455 commits)
  drm/amd: Add name to modes from amdgpu_connector_add_common_modes()
  drm/amd: Drop some common modes from amdgpu_connector_add_common_modes()
  drm/amdgpu: update MODULE_PARM_DESC for freesync_video
  drm/amd: Use dynamic array size declaration for amdgpu_connector_add_common_modes()
  drm/amd/display: Share dce100_validate_global with DCE6-8
  drm/amd/display: Share dce100_validate_bandwidth with DCE6-8
  drm/amdgpu: Fix fence signaling race condition in userqueue
  amd/amdkfd: enhance kfd process check in switch partition
  amd/amdkfd: resolve a race in amdgpu_amdkfd_device_fini_sw
  drm/amd/display: Reject modes with too high pixel clock on DCE6-10
  drm/amd: Drop unnecessary check in amdgpu_connector_add_common_modes()
  drm/amd/display: Only enable common modes for eDP and LVDS
  drm/amdgpu: remove the redeclaration of variable i
  drm/amdgpu/userq: assign an error code for invalid userq va
  drm/amdgpu: revert "rework reserved VMID handling" v2
  drm/amdgpu: remove leftover from enforcing isolation by VMID
  drm/amdgpu: Add fallback to pipe reset if KCQ ring reset fails
  accel/habanalabs: add Infineon version check
  accel/habanalabs/gaudi2: read preboot status after recovering from dirty state
  accel/habanalabs: add HL_GET_P_STATE passthrough type
  ...
This commit is contained in:
Linus Torvalds
2025-10-02 12:47:25 -07:00
1254 changed files with 52110 additions and 19221 deletions

View File

@@ -165,6 +165,8 @@ Casey Connolly <casey.connolly@linaro.org> <caleb@connolly.tech>
Casey Connolly <casey.connolly@linaro.org> <caleb@postmarketos.org>
Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org>
Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org>
Carl Vanderlip <carl.vanderlip@oss.qualcomm.com> <carlv@codeaurora.org>
Carl Vanderlip <carl.vanderlip@oss.qualcomm.com> <quic_carlv@quicinc.com>
Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao@amd.com>
Carlos Bilbao <carlos.bilbao@kernel.org> <carlos.bilbao.osdev@gmail.com>
Carlos Bilbao <carlos.bilbao@kernel.org> <bilbao@vt.edu>

View File

@@ -0,0 +1,8 @@
What: /sys/class/drm/.../boot_display
Date: January 2026
Contact: Linux DRI developers <dri-devel@vger.kernel.org>
Description:
This file indicates that displays connected to the device were
used to display the boot sequence. If a display connected to
the device was used to display the boot sequence the file will
be present and contain "1".

View File

@@ -223,13 +223,13 @@ Userspace components
Compiler
--------
Peano is an LLVM based open-source compiler for AMD XDNA Array compute tile
available at:
Peano is an LLVM based open-source single core compiler for AMD XDNA Array
compute tile. Peano is available at:
https://github.com/Xilinx/llvm-aie
The open-source IREE compiler supports graph compilation of ML models for AMD
NPU and uses Peano underneath. It is available at:
https://github.com/nod-ai/iree-amd-aie
IRON is an open-source array compiler for AMD XDNA Array based NPU which uses
Peano underneath. IRON is available at:
https://github.com/Xilinx/mlir-aie
Usermode Driver (UMD)
---------------------

View File

@@ -10,6 +10,7 @@ Compute Accelerators
introduction
amdxdna/index
qaic/index
rocket/index
.. only:: subproject and html

View File

@@ -0,0 +1,19 @@
.. SPDX-License-Identifier: GPL-2.0-only
=====================================
accel/rocket Rockchip NPU driver
=====================================
The accel/rocket driver supports the Neural Processing Units (NPUs) inside some
Rockchip SoCs such as the RK3588. Rockchip calls it RKNN and sometimes RKNPU.
The hardware is described in chapter 36 in the RK3588 TRM.
This driver just powers the hardware on and off, allocates and maps buffers to
the device and submits jobs to the frontend unit. Everything else is done in
userspace, as a Gallium driver (also called rocket) that is part of the Mesa3D
project.
Hardware currently supported:
* RK3588

View File

@@ -36,20 +36,31 @@ properties:
clocks:
maxItems: 1
"#clock-cells":
const: 0
required:
- compatible
- reg
if:
properties:
compatible:
contains:
enum:
- st,stm32mp157-syscfg
- st,stm32f4-gcan
then:
required:
- clocks
allOf:
- if:
properties:
compatible:
contains:
enum:
- st,stm32mp157-syscfg
- st,stm32f4-gcan
then:
required:
- clocks
- if:
properties:
compatible:
const: st,stm32mp25-syscfg
then:
required:
- "#clock-cells"
additionalProperties: false

View File

@@ -24,6 +24,7 @@ properties:
- samsung,exynos5410-mipi-dsi
- samsung,exynos5422-mipi-dsi
- samsung,exynos5433-mipi-dsi
- samsung,exynos7870-mipi-dsi
- fsl,imx8mm-mipi-dsim
- fsl,imx8mp-mipi-dsim
- items:
@@ -144,6 +145,32 @@ required:
allOf:
- $ref: ../dsi-controller.yaml#
- if:
properties:
compatible:
contains:
const: samsung,exynos7870-mipi-dsi
then:
properties:
clocks:
minItems: 4
maxItems: 4
clock-names:
items:
- const: bus
- const: pll
- const: byte
- const: esc
ports:
required:
- port@0
required:
- ports
- if:
properties:
compatible:

View File

@@ -28,6 +28,8 @@ properties:
- enum:
- adi,adv7123
- dumb-vga-dac
- radxa,ra620
- realtek,rtd2171
- ti,opa362
- ti,ths8134
- ti,ths8135

View File

@@ -0,0 +1,141 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/bridge/solomon,ssd2825.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Solomon SSD2825 RGB to MIPI-DSI bridge
maintainers:
- Svyatoslav Ryhel <clamor95@gmail.com>
allOf:
- $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
const: solomon,ssd2825
reg:
maxItems: 1
reset-gpios: true
dvdd-supply:
description: Regulator for 1.2V digital power supply.
avdd-supply:
description: Regulator for 1.2V analog power supply.
vddio-supply:
description: Regulator for 1.8V IO power supply.
spi-max-frequency:
maximum: 1000000
spi-cpha: true
spi-cpol: true
clocks:
maxItems: 1
description: Reference TX_CLK used before PLL is locked.
solomon,hs-zero-delay-ns:
description:
HS zero delay period
minimum: 0
maximum: 1700
default: 133
solomon,hs-prep-delay-ns:
description:
HS prep delay period
minimum: 0
maximum: 1728
default: 40
ports:
$ref: /schemas/graph.yaml#/properties/ports
properties:
port@0:
$ref: /schemas/graph.yaml#/$defs/port-base
unevaluatedProperties: false
description:
Video port for RGB input
properties:
endpoint:
$ref: /schemas/graph.yaml#/$defs/endpoint-base
unevaluatedProperties: false
properties:
bus-width:
enum: [ 16, 18, 24 ]
port@1:
$ref: /schemas/graph.yaml#/properties/port
description:
Video port for DSI output (panel or connector)
required:
- port@0
- port@1
required:
- compatible
- ports
additionalProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
spi {
#address-cells = <1>;
#size-cells = <0>;
dsi@2 {
compatible = "solomon,ssd2825";
reg = <2>;
spi-max-frequency = <1000000>;
spi-cpha;
spi-cpol;
reset-gpios = <&gpio 114 GPIO_ACTIVE_LOW>;
dvdd-supply = <&vdd_1v2>;
avdd-supply = <&vdd_1v2>;
vddio-supply = <&vdd_1v8_io>;
solomon,hs-zero-delay-ns = <300>;
solomon,hs-prep-delay-ns = <65>;
clocks = <&ssd2825_tx_clk>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
bridge_input: endpoint {
remote-endpoint = <&dpi_output>;
bus-width = <24>;
};
};
port@1 {
reg = <1>;
bridge_output: endpoint {
remote-endpoint = <&panel_input>;
};
};
};
};
};

View File

@@ -0,0 +1,103 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/bridge/waveshare,dsi2dpi.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Waveshare MIPI-DSI to DPI Converter bridge
maintainers:
- Joseph Guo <qijian.guo@nxp.com>
description:
Waveshare bridge board is part of Waveshare panel which converts DSI to DPI.
properties:
compatible:
const: waveshare,dsi2dpi
reg:
maxItems: 1
description: base I2C address of the device
power-supply: true
ports:
$ref: /schemas/graph.yaml#/properties/ports
properties:
port@0:
$ref: /schemas/graph.yaml#/$defs/port-base
unevaluatedProperties: false
description:
Video port for MIPI DSI input
properties:
endpoint:
$ref: /schemas/media/video-interfaces.yaml#
unevaluatedProperties: false
properties:
data-lanes:
description: array of physical DSI data lane indexes.
items:
- const: 1
- const: 2
required:
- data-lanes
port@1:
$ref: /schemas/graph.yaml#/properties/port
description:
Video port for MIPI DPI output panel.
required:
- port@0
- port@1
required:
- compatible
- reg
- ports
- power-supply
additionalProperties: false
examples:
- |
i2c {
#address-cells = <1>;
#size-cells = <0>;
bridge@45 {
compatible = "waveshare,dsi2dpi";
reg = <0x45>;
power-supply = <&reg_3p3v>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
waveshare_from_dsim: endpoint {
data-lanes = <1 2>;
remote-endpoint = <&dsim_to_waveshare>;
};
};
port@1 {
reg = <1>;
waveshare_to_panel: endpoint {
remote-endpoint = <&panel_to_waveshare>;
};
};
};
};
};
...

View File

@@ -0,0 +1,63 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/mayqueen,pixpaper.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Mayqueen Pixpaper e-ink display panel
maintainers:
- LiangCheng Wang <zaq14760@gmail.com>
description:
The Pixpaper is an e-ink display panel controlled via an SPI interface.
The panel has a resolution of 122x250 pixels and requires GPIO pins for
reset, busy, and data/command control.
allOf:
- $ref: /schemas/spi/spi-peripheral-props.yaml#
properties:
compatible:
const: mayqueen,pixpaper
reg:
maxItems: 1
spi-max-frequency:
maximum: 1000000
default: 1000000
reset-gpios:
maxItems: 1
busy-gpios:
maxItems: 1
dc-gpios:
maxItems: 1
required:
- compatible
- reg
- reset-gpios
- busy-gpios
- dc-gpios
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
spi {
#address-cells = <1>;
#size-cells = <0>;
display@0 {
compatible = "mayqueen,pixpaper";
reg = <0>;
spi-max-frequency = <1000000>;
reset-gpios = <&gpio1 17 GPIO_ACTIVE_HIGH>;
busy-gpios = <&gpio1 18 GPIO_ACTIVE_HIGH>;
dc-gpios = <&gpio1 19 GPIO_ACTIVE_HIGH>;
};
};

View File

@@ -29,15 +29,30 @@ properties:
- qcom,sdm845-dp
- qcom,sm8350-dp
- qcom,sm8650-dp
- qcom,x1e80100-dp
- items:
- enum:
- qcom,sm6350-dp
- const: qcom,sc7180-dp
# deprecated entry for compatibility with old DT
- items:
- enum:
- qcom,sm6350-dp
- const: qcom,sm8350-dp
deprecated: true
- items:
- enum:
- qcom,sar2130p-dp
- qcom,sm6350-dp
- qcom,sm7150-dp
- qcom,sm8150-dp
- qcom,sm8250-dp
- qcom,sm8450-dp
- qcom,sm8550-dp
- const: qcom,sm8350-dp
- items:
- enum:
- qcom,sm8750-dp
@@ -51,35 +66,37 @@ properties:
- description: link register block
- description: p0 register block
- description: p1 register block
- description: p2 register block
- description: p3 register block
- description: mst2link register block
- description: mst3link register block
interrupts:
maxItems: 1
clocks:
minItems: 5
items:
- description: AHB clock to enable register access
- description: Display Port AUX clock
- description: Display Port Link clock
- description: Link interface clock between DP and PHY
- description: Display Port Pixel clock
- description: Display Port stream 0 Pixel clock
- description: Display Port stream 1 Pixel clock
- description: Display Port stream 2 Pixel clock
- description: Display Port stream 3 Pixel clock
clock-names:
minItems: 5
items:
- const: core_iface
- const: core_aux
- const: ctrl_link
- const: ctrl_link_iface
- const: stream_pixel
assigned-clocks:
items:
- description: link clock source
- description: pixel clock source
assigned-clock-parents:
items:
- description: phy 0 parent
- description: phy 1 parent
- const: stream_1_pixel
- const: stream_2_pixel
- const: stream_3_pixel
phys:
maxItems: 1
@@ -161,7 +178,6 @@ required:
allOf:
# AUX BUS does not exist on DP controllers
# Audio output also is present only on DP output
# p1 regions is present on DP, but not on eDP
- if:
properties:
compatible:
@@ -174,12 +190,110 @@ allOf:
properties:
"#sound-dai-cells": false
else:
if:
properties:
compatible:
contains:
enum:
- qcom,sa8775p-dp
- qcom,x1e80100-dp
then:
oneOf:
- required:
- aux-bus
- required:
- "#sound-dai-cells"
else:
properties:
aux-bus: false
required:
- "#sound-dai-cells"
- if:
properties:
compatible:
contains:
enum:
# these platforms support SST only
- qcom,sc7180-dp
- qcom,sc7280-dp
- qcom,sc7280-edp
- qcom,sc8180x-edp
- qcom,sc8280xp-edp
then:
properties:
aux-bus: false
reg:
minItems: 5
required:
- "#sound-dai-cells"
maxItems: 5
clocks:
minItems: 5
maxItems: 5
clocks-names:
minItems: 5
maxItems: 5
- if:
properties:
compatible:
contains:
enum:
# these platforms support 2 streams MST on some interfaces,
# others are SST only
- qcom,sc8280xp-dp
- qcom,x1e80100-dp
then:
properties:
reg:
minItems: 5
maxItems: 5
clocks:
minItems: 5
maxItems: 6
clocks-names:
minItems: 5
maxItems: 6
- if:
properties:
compatible:
contains:
# 2 streams MST
enum:
- qcom,sc8180x-dp
- qcom,sdm845-dp
- qcom,sm8350-dp
- qcom,sm8650-dp
then:
properties:
reg:
minItems: 5
maxItems: 5
clocks:
minItems: 6
maxItems: 6
clocks-names:
minItems: 6
maxItems: 6
- if:
properties:
compatible:
contains:
enum:
# these platforms support 4 stream MST on first DP,
# 2 streams MST on the second one.
- qcom,sa8775p-dp
then:
properties:
reg:
minItems: 9
maxItems: 9
clocks:
minItems: 6
maxItems: 8
clocks-names:
minItems: 6
maxItems: 8
additionalProperties: false

View File

@@ -27,6 +27,7 @@ properties:
- qcom,sar2130p-dsi-ctrl
- qcom,sc7180-dsi-ctrl
- qcom,sc7280-dsi-ctrl
- qcom,sc8180x-dsi-ctrl
- qcom,sdm660-dsi-ctrl
- qcom,sdm670-dsi-ctrl
- qcom,sdm845-dsi-ctrl
@@ -332,6 +333,7 @@ allOf:
- qcom,sar2130p-dsi-ctrl
- qcom,sc7180-dsi-ctrl
- qcom,sc7280-dsi-ctrl
- qcom,sc8180x-dsi-ctrl
- qcom,sdm845-dsi-ctrl
- qcom,sm6115-dsi-ctrl
- qcom,sm6125-dsi-ctrl

View File

@@ -124,6 +124,40 @@ allOf:
contains:
enum:
- qcom,adreno-gmu-623.0
then:
properties:
reg:
items:
- description: Core GMU registers
- description: Resource controller registers
- description: GMU PDC registers
reg-names:
items:
- const: gmu
- const: rscc
- const: gmu_pdc
clocks:
items:
- description: GMU clock
- description: GPU CX clock
- description: GPU AXI clock
- description: GPU MEMNOC clock
- description: GPU AHB clock
- description: GPU HUB CX clock
clock-names:
items:
- const: gmu
- const: cxo
- const: axi
- const: memnoc
- const: ahb
- const: hub
- if:
properties:
compatible:
contains:
enum:
- qcom,adreno-gmu-635.0
- qcom,adreno-gmu-660.1
- qcom,adreno-gmu-663.0

View File

@@ -146,39 +146,209 @@ allOf:
properties:
compatible:
contains:
pattern: '^qcom,adreno-[3-5][0-9][0-9]\.[0-9]+$'
oneOf:
- pattern: '^qcom,adreno-305\.[0-9]+$'
- pattern: '^qcom,adreno-330\.[0-9]+$'
then:
properties:
clocks:
minItems: 2
maxItems: 7
minItems: 3
maxItems: 3
clock-names:
items:
anyOf:
- const: core
description: GPU Core clock
- const: iface
description: GPU Interface clock
- const: mem
description: GPU Memory clock
- const: mem_iface
description: GPU Memory Interface clock
- const: alt_mem_iface
description: GPU Alternative Memory Interface clock
- const: gfx3d
description: GPU 3D engine clock
- const: rbbmtimer
description: GPU RBBM Timer for Adreno 5xx series
- const: rbcpr
description: GPU RB Core Power Reduction clock
minItems: 2
maxItems: 7
- const: core
description: GPU Core clock
- const: iface
description: GPU Interface clock
- const: mem_iface
description: GPU Memory Interface clock
required:
- clocks
- clock-names
- if:
properties:
compatible:
contains:
pattern: '^qcom,adreno-306\.[0-9]+$'
then:
properties:
clocks:
minItems: 5
maxItems: 6
clock-names:
oneOf:
- items:
- const: core
description: GPU Core clock
- const: iface
description: GPU Interface clock
- const: mem_iface
description: GPU Memory Interface clock
- const: alt_mem_iface
description: GPU Alternative Memory Interface clock
- const: gfx3d
description: GPU 3D engine clock
- items:
- const: core
description: GPU Core clock
- const: iface
description: GPU Interface clock
- const: mem
description: GPU Memory clock
- const: mem_iface
description: GPU Memory Interface clock
- const: alt_mem_iface
description: GPU Alternative Memory Interface clock
- const: gfx3d
description: GPU 3D engine clock
- if:
properties:
compatible:
contains:
pattern: '^qcom,adreno-320\.[0-9]+$'
then:
properties:
clocks:
minItems: 4
maxItems: 4
clock-names:
items:
- const: core
description: GPU Core clock
- const: iface
description: GPU Interface clock
- const: mem
description: GPU Memory clock
- const: mem_iface
description: GPU Memory Interface clock
- if:
properties:
compatible:
contains:
pattern: '^qcom,adreno-405\.[0-9]+$'
then:
properties:
clocks:
minItems: 7
maxItems: 7
clock-names:
items:
- const: core
description: GPU Core clock
- const: iface
description: GPU Interface clock
- const: mem
description: GPU Memory clock
- const: mem_iface
description: GPU Memory Interface clock
- const: alt_mem_iface
description: GPU Alternative Memory Interface clock
- const: gfx3d
description: GPU 3D engine clock
- const: rbbmtimer
description: GPU RBBM Timer for Adreno 5xx series
- if:
properties:
compatible:
contains:
pattern: '^qcom,adreno-50[56]\.[0-9]+$'
then:
properties:
clocks:
minItems: 6
maxItems: 6
clock-names:
items:
- const: core
description: GPU Core clock
- const: iface
description: GPU Interface clock
- const: mem_iface
description: GPU Memory Interface clock
- const: alt_mem_iface
description: GPU Alternative Memory Interface clock
- const: rbbmtimer
description: GPU RBBM Timer for Adreno 5xx series
- const: alwayson
description: GPU AON clock
- if:
properties:
compatible:
contains:
oneOf:
- pattern: '^qcom,adreno-508\.[0-9]+$'
- pattern: '^qcom,adreno-509\.[0-9]+$'
- pattern: '^qcom,adreno-512\.[0-9]+$'
- pattern: '^qcom,adreno-540\.[0-9]+$'
then:
properties:
clocks:
minItems: 6
maxItems: 6
clock-names:
items:
- const: iface
description: GPU Interface clock
- const: rbbmtimer
description: GPU RBBM Timer for Adreno 5xx series
- const: mem
description: GPU Memory clock
- const: mem_iface
description: GPU Memory Interface clock
- const: rbcpr
description: GPU RB Core Power Reduction clock
- const: core
description: GPU Core clock
- if:
properties:
compatible:
contains:
pattern: '^qcom,adreno-510\.[0-9]+$'
then:
properties:
clocks:
minItems: 6
maxItems: 6
clock-names:
items:
- const: core
description: GPU Core clock
- const: iface
description: GPU Interface clock
- const: mem
description: GPU Memory clock
- const: mem_iface
description: GPU Memory Interface clock
- const: rbbmtimer
description: GPU RBBM Timer for Adreno 5xx series
- const: alwayson
description: GPU AON clock
- if:
properties:
compatible:
contains:
pattern: '^qcom,adreno-530\.[0-9]+$'
then:
properties:
clocks:
minItems: 5
maxItems: 5
clock-names:
items:
- const: core
description: GPU Core clock
- const: iface
description: GPU Interface clock
- const: rbbmtimer
description: GPU RBBM Timer for Adreno 5xx series
- const: mem
description: GPU Memory clock
- const: mem_iface
description: GPU Memory Interface clock
- if:
properties:
@@ -187,6 +357,7 @@ allOf:
enum:
- qcom,adreno-610.0
- qcom,adreno-619.1
- qcom,adreno-07000200
then:
properties:
clocks:
@@ -222,7 +393,9 @@ allOf:
properties:
compatible:
contains:
pattern: '^qcom,adreno-[67][0-9][0-9]\.[0-9]+$'
oneOf:
- pattern: '^qcom,adreno-[67][0-9][0-9]\.[0-9]+$'
- pattern: '^qcom,adreno-[0-9a-f]{8}$'
then: # Starting with A6xx, the clocks are usually defined in the GMU node
properties:

View File

@@ -375,7 +375,11 @@ examples:
<0xaf54200 0x0c0>,
<0xaf55000 0x770>,
<0xaf56000 0x09c>,
<0xaf57000 0x09c>;
<0xaf57000 0x09c>,
<0xaf58000 0x09c>,
<0xaf59000 0x09c>,
<0xaf5a000 0x23c>,
<0xaf5b000 0x23c>;
interrupt-parent = <&mdss0>;
interrupts = <12>;
@@ -384,16 +388,28 @@ examples:
<&dispcc_dptx0_aux_clk>,
<&dispcc_dptx0_link_clk>,
<&dispcc_dptx0_link_intf_clk>,
<&dispcc_dptx0_pixel0_clk>;
<&dispcc_dptx0_pixel0_clk>,
<&dispcc_dptx0_pixel1_clk>,
<&dispcc_dptx0_pixel2_clk>,
<&dispcc_dptx0_pixel3_clk>;
clock-names = "core_iface",
"core_aux",
"ctrl_link",
"ctrl_link_iface",
"stream_pixel";
"stream_pixel",
"stream_1_pixel",
"stream_2_pixel",
"stream_3_pixel";
assigned-clocks = <&dispcc_mdss_dptx0_link_clk_src>,
<&dispcc_mdss_dptx0_pixel0_clk_src>;
assigned-clock-parents = <&mdss0_dp0_phy 0>, <&mdss0_dp0_phy 1>;
<&dispcc_mdss_dptx0_pixel0_clk_src>,
<&dispcc_mdss_dptx0_pixel1_clk_src>,
<&dispcc_mdss_dptx0_pixel2_clk_src>,
<&dispcc_mdss_dptx0_pixel3_clk_src>;
assigned-clock-parents = <&mdss0_dp0_phy 0>,
<&mdss0_dp0_phy 1>,
<&mdss0_dp0_phy 1>,
<&mdss0_dp0_phy 1>;
phys = <&mdss0_dp0_phy>;
phy-names = "dp";

View File

@@ -207,16 +207,20 @@ examples:
<&dispcc_disp_cc_mdss_dptx0_aux_clk>,
<&dispcc_disp_cc_mdss_dptx0_link_clk>,
<&dispcc_disp_cc_mdss_dptx0_link_intf_clk>,
<&dispcc_disp_cc_mdss_dptx0_pixel0_clk>;
<&dispcc_disp_cc_mdss_dptx0_pixel0_clk>,
<&dispcc_disp_cc_mdss_dptx0_pixel1_clk>;
clock-names = "core_iface",
"core_aux",
"ctrl_link",
"ctrl_link_iface",
"stream_pixel";
"stream_pixel",
"stream_1_pixel";
assigned-clocks = <&dispcc_disp_cc_mdss_dptx0_link_clk_src>,
<&dispcc_disp_cc_mdss_dptx0_pixel0_clk_src>;
<&dispcc_disp_cc_mdss_dptx0_pixel0_clk_src>,
<&dispcc_disp_cc_mdss_dptx0_pixel1_clk_src>;
assigned-clock-parents = <&usb_dp_qmpphy_QMP_USB43DP_DP_LINK_CLK>,
<&usb_dp_qmpphy_QMP_USB43DP_DP_VCO_DIV_CLK>,
<&usb_dp_qmpphy_QMP_USB43DP_DP_VCO_DIV_CLK>;
phys = <&usb_dp_qmpphy QMP_USB43DP_DP_PHY>;

View File

@@ -281,7 +281,8 @@ examples:
reg = <0xaea0000 0x200>,
<0xaea0200 0x200>,
<0xaea0400 0xc00>,
<0xaea1000 0x400>;
<0xaea1000 0x400>,
<0xaea1400 0x400>;
interrupt-parent = <&mdss>;
interrupts = <14>;

View File

@@ -0,0 +1,103 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/msm/qcom,sc8180x-dpu.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Qualcomm SC8180X Display DPU
maintainers:
- Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
$ref: /schemas/display/msm/dpu-common.yaml#
properties:
compatible:
const: qcom,sc8180x-dpu
reg:
items:
- description: Address offset and size for mdp register set
- description: Address offset and size for vbif register set
reg-names:
items:
- const: mdp
- const: vbif
clocks:
items:
- description: Display AHB clock
- description: Display HF AXI clock
- description: Display core clock
- description: Display vsync clock
- description: Display rotator clock
- description: Display LUT clock
clock-names:
items:
- const: iface
- const: bus
- const: core
- const: vsync
- const: rot
- const: lut
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/clock/qcom,dispcc-sm8250.h>
#include <dt-bindings/clock/qcom,gcc-sc8180x.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/interconnect/qcom,sc8180x.h>
#include <dt-bindings/power/qcom-rpmpd.h>
display-controller@ae01000 {
compatible = "qcom,sc8180x-dpu";
reg = <0x0ae01000 0x8f000>,
<0x0aeb0000 0x2008>;
reg-names = "mdp", "vbif";
clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
<&gcc GCC_DISP_HF_AXI_CLK>,
<&dispcc DISP_CC_MDSS_MDP_CLK>,
<&dispcc DISP_CC_MDSS_VSYNC_CLK>,
<&dispcc DISP_CC_MDSS_ROT_CLK>,
<&dispcc DISP_CC_MDSS_MDP_LUT_CLK>;
clock-names = "iface",
"bus",
"core",
"vsync",
"rot",
"lut";
assigned-clocks = <&dispcc DISP_CC_MDSS_VSYNC_CLK>;
assigned-clock-rates = <19200000>;
operating-points-v2 = <&mdp_opp_table>;
power-domains = <&rpmhpd SC8180X_MMCX>;
interrupt-parent = <&mdss>;
interrupts = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
endpoint {
remote-endpoint = <&dsi0_in>;
};
};
port@1 {
reg = <1>;
endpoint {
remote-endpoint = <&dsi1_in>;
};
};
};
};
...

View File

@@ -0,0 +1,359 @@
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/msm/qcom,sc8180x-mdss.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Qualcomm SC8180X Display MDSS
maintainers:
- Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
description:
Device tree bindings for MSM Mobile Display Subsystem(MDSS) that encapsulates
sub-blocks like DPU display controller, DSI and DP interfaces etc. Device tree
bindings of MDSS are mentioned for SC8180X target.
$ref: /schemas/display/msm/mdss-common.yaml#
properties:
compatible:
items:
- const: qcom,sc8180x-mdss
clocks:
items:
- description: Display AHB clock from gcc
- description: Display hf axi clock
- description: Display sf axi clock
- description: Display core clock
clock-names:
items:
- const: iface
- const: bus
- const: nrt_bus
- const: core
iommus:
maxItems: 1
interconnects:
maxItems: 3
interconnect-names:
maxItems: 3
patternProperties:
"^display-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,sc8180x-dpu
"^displayport-controller@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
enum:
- qcom,sc8180x-dp
- qcom,sc8180x-edp
"^dsi@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
contains:
const: qcom,sc8180x-dsi-ctrl
"^phy@[0-9a-f]+$":
type: object
additionalProperties: true
properties:
compatible:
const: qcom,dsi-phy-7nm
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/clock/qcom,dispcc-sm8250.h>
#include <dt-bindings/clock/qcom,gcc-sc8180x.h>
#include <dt-bindings/clock/qcom,rpmh.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/interconnect/qcom,sc8180x.h>
#include <dt-bindings/power/qcom-rpmpd.h>
display-subsystem@ae00000 {
compatible = "qcom,sc8180x-mdss";
reg = <0x0ae00000 0x1000>;
reg-names = "mdss";
interconnects = <&mmss_noc MASTER_MDP_PORT0 &mc_virt SLAVE_EBI_CH0>,
<&mmss_noc MASTER_MDP_PORT1 &mc_virt SLAVE_EBI_CH0>,
<&gem_noc MASTER_AMPSS_M0 &config_noc SLAVE_DISPLAY_CFG>;
interconnect-names = "mdp0-mem",
"mdp1-mem",
"cpu-cfg";
power-domains = <&dispcc MDSS_GDSC>;
clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
<&gcc GCC_DISP_HF_AXI_CLK>,
<&gcc GCC_DISP_SF_AXI_CLK>,
<&dispcc DISP_CC_MDSS_MDP_CLK>;
clock-names = "iface", "bus", "nrt_bus", "core";
interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
interrupt-controller;
#interrupt-cells = <1>;
iommus = <&apps_smmu 0x800 0x420>;
#address-cells = <1>;
#size-cells = <1>;
ranges;
display-controller@ae01000 {
compatible = "qcom,sc8180x-dpu";
reg = <0x0ae01000 0x8f000>,
<0x0aeb0000 0x2008>;
reg-names = "mdp", "vbif";
clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
<&gcc GCC_DISP_HF_AXI_CLK>,
<&dispcc DISP_CC_MDSS_MDP_CLK>,
<&dispcc DISP_CC_MDSS_VSYNC_CLK>,
<&dispcc DISP_CC_MDSS_ROT_CLK>,
<&dispcc DISP_CC_MDSS_MDP_LUT_CLK>;
clock-names = "iface",
"bus",
"core",
"vsync",
"rot",
"lut";
assigned-clocks = <&dispcc DISP_CC_MDSS_VSYNC_CLK>;
assigned-clock-rates = <19200000>;
operating-points-v2 = <&mdp_opp_table>;
power-domains = <&rpmhpd SC8180X_MMCX>;
interrupt-parent = <&mdss>;
interrupts = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dpu_intf1_out: endpoint {
remote-endpoint = <&dsi0_in>;
};
};
port@1 {
reg = <1>;
dpu_intf2_out: endpoint {
remote-endpoint = <&dsi1_in>;
};
};
};
mdp_opp_table: opp-table {
compatible = "operating-points-v2";
opp-171428571 {
opp-hz = /bits/ 64 <171428571>;
required-opps = <&rpmhpd_opp_low_svs>;
};
opp-300000000 {
opp-hz = /bits/ 64 <300000000>;
required-opps = <&rpmhpd_opp_svs>;
};
opp-345000000 {
opp-hz = /bits/ 64 <345000000>;
required-opps = <&rpmhpd_opp_svs_l1>;
};
opp-460000000 {
opp-hz = /bits/ 64 <460000000>;
required-opps = <&rpmhpd_opp_nom>;
};
};
};
dsi@ae94000 {
compatible = "qcom,sc8180x-dsi-ctrl",
"qcom,mdss-dsi-ctrl";
reg = <0x0ae94000 0x400>;
reg-names = "dsi_ctrl";
interrupt-parent = <&mdss>;
interrupts = <4>;
clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK>,
<&dispcc DISP_CC_MDSS_BYTE0_INTF_CLK>,
<&dispcc DISP_CC_MDSS_PCLK0_CLK>,
<&dispcc DISP_CC_MDSS_ESC0_CLK>,
<&dispcc DISP_CC_MDSS_AHB_CLK>,
<&gcc GCC_DISP_HF_AXI_CLK>;
clock-names = "byte",
"byte_intf",
"pixel",
"core",
"iface",
"bus";
assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE0_CLK_SRC>,
<&dispcc DISP_CC_MDSS_PCLK0_CLK_SRC>;
assigned-clock-parents = <&dsi0_phy 0>, <&dsi0_phy 1>;
operating-points-v2 = <&dsi_opp_table>;
power-domains = <&rpmhpd SC8180X_MMCX>;
phys = <&dsi0_phy>;
phy-names = "dsi";
#address-cells = <1>;
#size-cells = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dsi0_in: endpoint {
remote-endpoint = <&dpu_intf1_out>;
};
};
port@1 {
reg = <1>;
dsi0_out: endpoint {
};
};
};
dsi_opp_table: opp-table {
compatible = "operating-points-v2";
opp-187500000 {
opp-hz = /bits/ 64 <187500000>;
required-opps = <&rpmhpd_opp_low_svs>;
};
opp-300000000 {
opp-hz = /bits/ 64 <300000000>;
required-opps = <&rpmhpd_opp_svs>;
};
opp-358000000 {
opp-hz = /bits/ 64 <358000000>;
required-opps = <&rpmhpd_opp_svs_l1>;
};
};
};
dsi0_phy: phy@ae94400 {
compatible = "qcom,dsi-phy-7nm";
reg = <0x0ae94400 0x200>,
<0x0ae94600 0x280>,
<0x0ae94900 0x260>;
reg-names = "dsi_phy",
"dsi_phy_lane",
"dsi_pll";
#clock-cells = <1>;
#phy-cells = <0>;
clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
<&rpmhcc RPMH_CXO_CLK>;
clock-names = "iface", "ref";
vdds-supply = <&vreg_dsi_phy>;
};
dsi@ae96000 {
compatible = "qcom,sc8180x-dsi-ctrl",
"qcom,mdss-dsi-ctrl";
reg = <0x0ae96000 0x400>;
reg-names = "dsi_ctrl";
interrupt-parent = <&mdss>;
interrupts = <5>;
clocks = <&dispcc DISP_CC_MDSS_BYTE1_CLK>,
<&dispcc DISP_CC_MDSS_BYTE1_INTF_CLK>,
<&dispcc DISP_CC_MDSS_PCLK1_CLK>,
<&dispcc DISP_CC_MDSS_ESC1_CLK>,
<&dispcc DISP_CC_MDSS_AHB_CLK>,
<&gcc GCC_DISP_HF_AXI_CLK>;
clock-names = "byte",
"byte_intf",
"pixel",
"core",
"iface",
"bus";
assigned-clocks = <&dispcc DISP_CC_MDSS_BYTE1_CLK_SRC>,
<&dispcc DISP_CC_MDSS_PCLK1_CLK_SRC>;
assigned-clock-parents = <&dsi1_phy 0>, <&dsi1_phy 1>;
operating-points-v2 = <&dsi_opp_table>;
power-domains = <&rpmhpd SC8180X_MMCX>;
phys = <&dsi1_phy>;
phy-names = "dsi";
#address-cells = <1>;
#size-cells = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dsi1_in: endpoint {
remote-endpoint = <&dpu_intf2_out>;
};
};
port@1 {
reg = <1>;
dsi1_out: endpoint {
};
};
};
};
dsi1_phy: phy@ae96400 {
compatible = "qcom,dsi-phy-7nm";
reg = <0x0ae96400 0x200>,
<0x0ae96600 0x280>,
<0x0ae96900 0x260>;
reg-names = "dsi_phy",
"dsi_phy_lane",
"dsi_pll";
#clock-cells = <1>;
#phy-cells = <0>;
clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
<&rpmhcc RPMH_CXO_CLK>;
clock-names = "iface", "ref";
vdds-supply = <&vreg_dsi_phy>;
};
};
...

View File

@@ -61,7 +61,8 @@ patternProperties:
additionalProperties: true
properties:
compatible:
const: qcom,sm7150-dp
contains:
const: qcom,sm7150-dp
"^dsi@[0-9a-f]+$":
type: object
@@ -378,7 +379,8 @@ examples:
};
displayport-controller@ae90000 {
compatible = "qcom,sm7150-dp";
compatible = "qcom,sm7150-dp",
"qcom,sm8350-dp";
reg = <0xae90000 0x200>,
<0xae90200 0x200>,
<0xae90400 0xc00>,
@@ -392,16 +394,20 @@ examples:
<&dispcc_mdss_dp_aux_clk>,
<&dispcc_mdss_dp_link_clk>,
<&dispcc_mdss_dp_link_intf_clk>,
<&dispcc_mdss_dp_pixel_clk>;
<&dispcc_mdss_dp_pixel_clk>,
<&dispcc_mdss_dp_pixel1_clk>;
clock-names = "core_iface",
"core_aux",
"ctrl_link",
"ctrl_link_iface",
"stream_pixel";
"stream_pixel",
"stream_1_pixel";
assigned-clocks = <&dispcc_mdss_dp_link_clk_src>,
<&dispcc_mdss_dp_pixel_clk_src>;
<&dispcc_mdss_dp_pixel_clk_src>,
<&dispcc_mdss_dp_pixel1_clk_src>;
assigned-clock-parents = <&dp_phy 0>,
<&dp_phy 1>,
<&dp_phy 1>;
operating-points-v2 = <&dp_opp_table>;

View File

@@ -401,16 +401,20 @@ examples:
<&disp_cc_mdss_dptx0_aux_clk>,
<&disp_cc_mdss_dptx0_link_clk>,
<&disp_cc_mdss_dptx0_link_intf_clk>,
<&disp_cc_mdss_dptx0_pixel0_clk>;
<&disp_cc_mdss_dptx0_pixel0_clk>,
<&disp_cc_mdss_dptx0_pixel1_clk>;
clock-names = "core_iface",
"core_aux",
"ctrl_link",
"ctrl_link_iface",
"stream_pixel";
"stream_pixel",
"stream_1_pixel";
assigned-clocks = <&disp_cc_mdss_dptx0_link_clk_src>,
<&disp_cc_mdss_dptx0_pixel0_clk_src>;
<&disp_cc_mdss_dptx0_pixel0_clk_src>,
<&disp_cc_mdss_dptx0_pixel1_clk_src>;
assigned-clock-parents = <&usb_dp_qmpphy QMP_USB43DP_DP_LINK_CLK>,
<&usb_dp_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>,
<&usb_dp_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>;
operating-points-v2 = <&dp_opp_table>;

View File

@@ -170,11 +170,11 @@ examples:
displayport-controller@ae90000 {
compatible = "qcom,x1e80100-dp";
reg = <0 0xae90000 0 0x200>,
<0 0xae90200 0 0x200>,
<0 0xae90400 0 0x600>,
<0 0xae91000 0 0x400>,
<0 0xae91400 0 0x400>;
reg = <0xae90000 0x200>,
<0xae90200 0x200>,
<0xae90400 0x600>,
<0xae91000 0x400>,
<0xae91400 0x400>;
interrupt-parent = <&mdss>;
interrupts = <12>;
@@ -183,15 +183,19 @@ examples:
<&dispcc_dptx0_aux_clk>,
<&dispcc_dptx0_link_clk>,
<&dispcc_dptx0_link_intf_clk>,
<&dispcc_dptx0_pixel0_clk>;
<&dispcc_dptx0_pixel0_clk>,
<&dispcc_dptx0_pixel1_clk>;
clock-names = "core_iface", "core_aux",
"ctrl_link",
"ctrl_link_iface",
"stream_pixel";
"stream_pixel",
"stream_1_pixel";
assigned-clocks = <&dispcc_mdss_dptx0_link_clk_src>,
<&dispcc_mdss_dptx0_pixel0_clk_src>;
<&dispcc_mdss_dptx0_pixel0_clk_src>,
<&dispcc_mdss_dptx0_pixel1_clk_src>;
assigned-clock-parents = <&usb_1_ss0_qmpphy QMP_USB43DP_DP_LINK_CLK>,
<&usb_1_ss0_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>,
<&usb_1_ss0_qmpphy QMP_USB43DP_DP_VCO_DIV_CLK>;
operating-points-v2 = <&mdss_dp0_opp_table>;

View File

@@ -0,0 +1,60 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/panel/hydis,hv101hd1.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Hydis HV101HD1 DSI Display Panel
maintainers:
- Svyatoslav Ryhel <clamor95@gmail.com>
allOf:
- $ref: panel-common.yaml#
properties:
compatible:
const: hydis,hv101hd1
reg:
maxItems: 1
vdd-supply: true
vio-supply: true
backlight: true
port: true
required:
- compatible
- vdd-supply
- vio-supply
- backlight
additionalProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
dsi {
#address-cells = <1>;
#size-cells = <0>;
panel@0 {
compatible = "hydis,hv101hd1";
reg = <0>;
vdd-supply = <&vdd_lcd>;
vio-supply = <&vddio_lcd>;
backlight = <&backlight>;
port {
panel_in: endpoint {
remote-endpoint = <&dsi_out>;
};
};
};
};
...

View File

@@ -18,6 +18,7 @@ properties:
- enum:
- ampire,am8001280g
- bananapi,lhr050h41
- bestar,bsd1218-a101kl68
- feixin,k101-im2byl02
- raspberrypi,dsi-7inch
- startek,kd050hdfia020

View File

@@ -41,11 +41,15 @@ properties:
- enum:
# Admatec 9904379 10.1" 1024x600 LVDS panel
- admatec,9904379
# Ampire AMP19201200B5TZQW-T03 10.1" WUXGA (1920x1200) color TFT LCD panel
- ampire,amp19201200b5tzqw-t03
- auo,b101ew05
# AUO G084SN05 V9 8.4" 800x600 LVDS panel
- auo,g084sn05
# Chunghwa Picture Tubes Ltd. 7" WXGA (800x1280) TFT LCD LVDS panel
- chunghwa,claa070wp03xg
# EDT ETML0700Z8DHA 7.0" Full HD (1920x1080) color TFT LCD LVDS panel
- edt,etml0700z8dha
# EDT ETML0700Z9NDHA 7.0" WSVGA (1024x600) color TFT LCD LVDS panel
- edt,etml0700z9ndha
# HannStar Display Corp. HSD101PWW2 10.1" WXGA (1280x800) LVDS panel

View File

@@ -240,6 +240,8 @@ properties:
- okaya,rs800480t-7x0gp
# Olimex 4.3" TFT LCD panel
- olimex,lcd-olinuxino-43-ts
# Olimex 5.0" TFT LCD panel
- olimex,lcd-olinuxino-5-cts
# On Tat Industrial Company 5" DPI TFT panel.
- ontat,kd50g21-40nt-a1
# On Tat Industrial Company 7" DPI TFT panel.
@@ -325,6 +327,10 @@ properties:
- vivax,tpc9150-panel
# VXT 800x480 color TFT LCD panel
- vxt,vl050-8048nt-c01
# Waveshare 13.3" FHD (1920x1080) LCD panel
- waveshare,13.3inch-panel
# Waveshare 7.0" WSVGA (1024x600) LCD panel
- waveshare,7.0inch-c-panel
# Winstar Display Corporation 3.5" QVGA (320x240) TFT LCD panel
- winstar,wf35ltiacd
# Yes Optoelectronics YTC700TLAG-05-201C 7" TFT LCD panel

View File

@@ -21,6 +21,10 @@ properties:
- enum:
# Samsung 13" 3K (2880×1920 pixels) eDP AMOLED panel
- samsung,atna30dw01
# Samsung 14" FHD+ (1920x1200 pixels) eDP AMOLED panel
- samsung,atna40ct06
# Samsung 14" WQXGA+ (2880x1800 pixels) eDP AMOLED panel
- samsung,atna40cu11
# Samsung 14" WQXGA+ (2880×1800 pixels) eDP AMOLED panel
- samsung,atna40yk20
# Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel

View File

@@ -0,0 +1,55 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/panel/samsung,s6e8aa5x01-ams561ra01.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Samsung AMS561RA01 panel with S6E8AA5X01 controller
maintainers:
- Kaustabh Chakraborty <kauschluss@disroot.org>
allOf:
- $ref: panel-common.yaml#
properties:
compatible:
const: samsung,s6e8aa5x01-ams561ra01
reg:
maxItems: 1
vdd-supply:
description: core voltage supply
vci-supply:
description: voltage supply for analog circuits
reset-gpios: true
required:
- compatible
- reg
additionalProperties: false
examples:
- |
#include <dt-bindings/gpio/gpio.h>
dsi {
#address-cells = <1>;
#size-cells = <0>;
panel@0 {
compatible = "samsung,s6e8aa5x01-ams561ra01";
reg = <0>;
vdd-supply = <&panel_vdd_reg>;
vci-supply = <&panel_vci_reg>;
reset-gpios = <&gpd3 4 GPIO_ACTIVE_HIGH>;
};
};
...

View File

@@ -0,0 +1,150 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/display/rockchip/rockchip,dw-dp.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Rockchip DW DisplayPort Transmitter
maintainers:
- Andy Yan <andy.yan@rock-chips.com>
description: |
The Rockchip RK3588 SoC integrates the Synopsys DesignWare DPTX controller
which is compliant with the DisplayPort Specification Version 1.4 with the
following features:
* DisplayPort 1.4a
* Main Link: 1/2/4 lanes
* Main Link Support 1.62Gbps, 2.7Gbps, 5.4Gbps and 8.1Gbps
* AUX channel 1Mbps
* Single Stream Transport(SST)
* Multistream Transport (MST)
* Type-C support (alternate mode)
* HDCP 2.2, HDCP 1.3
* Supports up to 8/10 bits per color component
* Supports RBG, YCbCr4:4:4, YCbCr4:2:2, YCbCr4:2:0
* Pixel clock up to 594MHz
* I2S, SPDIF audio interface
allOf:
- $ref: /schemas/sound/dai-common.yaml#
properties:
compatible:
enum:
- rockchip,rk3588-dp
reg:
maxItems: 1
interrupts:
maxItems: 1
clocks:
items:
- description: Peripheral/APB bus clock
- description: DisplayPort AUX clock
- description: HDCP clock
- description: I2S interface clock
- description: SPDIF interfce clock
clock-names:
items:
- const: apb
- const: aux
- const: hdcp
- const: i2s
- const: spdif
phys:
maxItems: 1
ports:
$ref: /schemas/graph.yaml#/properties/ports
properties:
port@0:
$ref: /schemas/graph.yaml#/properties/port
description: Video port for RGB/YUV input.
port@1:
$ref: /schemas/graph.yaml#/properties/port
description: Video port for DP output.
required:
- port@0
- port@1
power-domains:
maxItems: 1
resets:
maxItems: 1
"#sound-dai-cells":
const: 0
required:
- compatible
- reg
- clocks
- clock-names
- interrupts
- phys
- ports
- resets
unevaluatedProperties: false
examples:
- |
#include <dt-bindings/clock/rockchip,rk3588-cru.h>
#include <dt-bindings/phy/phy.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/power/rk3588-power.h>
#include <dt-bindings/reset/rockchip,rk3588-cru.h>
soc {
#address-cells = <2>;
#size-cells = <2>;
dp@fde50000 {
compatible = "rockchip,rk3588-dp";
reg = <0x0 0xfde50000 0x0 0x4000>;
interrupts = <GIC_SPI 161 IRQ_TYPE_LEVEL_HIGH 0>;
clocks = <&cru PCLK_DP0>, <&cru CLK_AUX16M_0>,
<&cru CLK_DP0>, <&cru MCLK_I2S4_8CH_TX>,
<&cru MCLK_SPDIF2_DP0>;
clock-names = "apb", "aux", "hdcp", "i2s", "spdif";
assigned-clocks = <&cru CLK_AUX16M_0>;
assigned-clock-rates = <16000000>;
resets = <&cru SRST_DP0>;
phys = <&usbdp_phy0 PHY_TYPE_DP>;
power-domains = <&power RK3588_PD_VO0>;
#sound-dai-cells = <0>;
ports {
#address-cells = <1>;
#size-cells = <0>;
port@0 {
reg = <0>;
dp0_in_vp2: endpoint {
remote-endpoint = <&vp2_out_dp0>;
};
};
port@1 {
reg = <1>;
dp0_out_con0: endpoint {
remote-endpoint = <&dp_con0_in>;
};
};
};
};
};

View File

@@ -12,6 +12,7 @@ maintainers:
properties:
compatible:
enum:
- rockchip,rk3576-mipi-dsi2
- rockchip,rk3588-mipi-dsi2
reg:

View File

@@ -80,6 +80,21 @@ properties:
- const: vsync
- const: lcd_sys
iommus:
maxItems: 1
memory-region:
maxItems: 1
description:
A phandle to a node describing a reserved framebuffer memory region.
For example, the splash memory region set up by the bootloader.
port:
$ref: /schemas/graph.yaml#/properties/port
description:
Output port which is connected to either a Mobile Image Compressor
(MIC) or a DSI Master device.
power-domains:
maxItems: 1
@@ -92,6 +107,7 @@ required:
- clock-names
- interrupts
- interrupt-names
- port
- reg
additionalProperties: false
@@ -118,4 +134,9 @@ examples:
"decon0_vclk";
pinctrl-0 = <&lcd_clk &pwm1_out>;
pinctrl-names = "default";
port {
decon_to_dsi: endpoint {
remote-endpoint = <&dsi_to_decon>;
};
};
};

View File

@@ -23,6 +23,11 @@ properties:
reg:
maxItems: 1
sitronix,inverted:
type: boolean
description:
Display pixels are inverted, i.e. 0 is white and 1 is black.
width-mm: true
height-mm: true
panel-timing: true

View File

@@ -28,6 +28,11 @@ properties:
description:
Display supports 4-level grayscale.
sitronix,inverted:
type: boolean
description:
Display pixels are inverted, i.e. 0 is white and 1 is black.
reset-gpios: true
width-mm: true
height-mm: true

View File

@@ -12,7 +12,10 @@ maintainers:
properties:
compatible:
const: st,stm32-ltdc
enum:
- st,stm32-ltdc
- st,stm32mp251-ltdc
- st,stm32mp255-ltdc
reg:
maxItems: 1
@@ -24,15 +27,23 @@ properties:
minItems: 1
clocks:
maxItems: 1
minItems: 1
maxItems: 4
clock-names:
items:
- const: lcd
- const: bus
- const: ref
- const: lvds
minItems: 1
resets:
maxItems: 1
access-controllers:
maxItems: 1
port:
$ref: /schemas/graph.yaml#/properties/port
description: |
@@ -51,6 +62,46 @@ required:
- resets
- port
allOf:
- if:
properties:
compatible:
contains:
enum:
- st,stm32-ltdc
then:
properties:
clocks:
maxItems: 1
clock-names:
maxItems: 1
- if:
properties:
compatible:
contains:
enum:
- st,stm32mp251-ltdc
then:
properties:
clocks:
minItems: 2
maxItems: 2
clock-names:
minItems: 2
maxItems: 2
- if:
properties:
compatible:
contains:
enum:
- st,stm32mp255-ltdc
then:
properties:
clocks:
minItems: 4
clock-names:
minItems: 4
additionalProperties: false
examples:

View File

@@ -31,7 +31,12 @@ description: |
properties:
compatible:
const: st,stm32mp25-lvds
oneOf:
- items:
- enum:
- st,stm32mp255-lvds
- const: st,stm32mp25-lvds
- const: st,stm32mp25-lvds
"#clock-cells":
const: 0
@@ -54,6 +59,12 @@ properties:
resets:
maxItems: 1
access-controllers:
maxItems: 1
power-domains:
maxItems: 1
ports:
$ref: /schemas/graph.yaml#/properties/ports

View File

@@ -21,6 +21,11 @@ properties:
# work with newer dts.
- const: img,img-axe
- const: img,img-rogue
- items:
- enum:
- thead,th1520-gpu
- const: img,img-bxm-4-64
- const: img,img-rogue
- items:
- enum:
- ti,j721s2-gpu
@@ -77,18 +82,6 @@ required:
additionalProperties: false
allOf:
# Constraints added alongside the new compatible strings that would otherwise
# create an ABI break.
- if:
properties:
compatible:
contains:
const: img,img-rogue
then:
required:
- power-domains
- power-domain-names
- if:
properties:
compatible:
@@ -97,9 +90,32 @@ allOf:
then:
properties:
power-domains:
maxItems: 1
items:
- description: Power domain A
power-domain-names:
maxItems: 1
required:
- power-domains
- power-domain-names
- if:
properties:
compatible:
contains:
const: thead,th1520-gpu
then:
properties:
clocks:
minItems: 3
clock-names:
minItems: 3
power-domains:
items:
- description: The single, unified power domain for the GPU on the
TH1520 SoC, integrating all internal IP power domains.
power-domain-names: false
required:
- power-domains
- if:
properties:
@@ -109,9 +125,14 @@ allOf:
then:
properties:
power-domains:
minItems: 2
items:
- description: Power domain A
- description: Power domain B
power-domain-names:
minItems: 2
required:
- power-domains
- power-domain-names
- if:
properties:

View File

@@ -0,0 +1,112 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/npu/rockchip,rk3588-rknn-core.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Neural Processing Unit IP from Rockchip
maintainers:
- Tomeu Vizoso <tomeu@tomeuvizoso.net>
description:
Rockchip IP for accelerating inference of neural networks.
There is to be a node per each NPU core in the SoC, and each core should reference all the
resources that it needs to function, such as clocks, power domains, and resets.
properties:
$nodename:
pattern: '^npu@[a-f0-9]+$'
compatible:
enum:
- rockchip,rk3588-rknn-core
reg:
maxItems: 3
reg-names:
items:
- const: pc # Program Control-related registers
- const: cna # Convolution Neural Network Accelerator registers
- const: core # Main NPU core processing unit registers
clocks:
maxItems: 4
clock-names:
items:
- const: aclk
- const: hclk
- const: npu
- const: pclk
interrupts:
maxItems: 1
iommus:
maxItems: 1
npu-supply: true
power-domains:
maxItems: 1
resets:
maxItems: 2
reset-names:
items:
- const: srst_a
- const: srst_h
sram-supply: true
required:
- compatible
- reg
- reg-names
- clocks
- clock-names
- interrupts
- iommus
- power-domains
- resets
- reset-names
- npu-supply
- sram-supply
additionalProperties: false
examples:
- |
#include <dt-bindings/clock/rockchip,rk3588-cru.h>
#include <dt-bindings/interrupt-controller/irq.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/power/rk3588-power.h>
#include <dt-bindings/reset/rockchip,rk3588-cru.h>
bus {
#address-cells = <2>;
#size-cells = <2>;
npu@fdab0000 {
compatible = "rockchip,rk3588-rknn-core";
reg = <0x0 0xfdab0000 0x0 0x1000>,
<0x0 0xfdab1000 0x0 0x1000>,
<0x0 0xfdab3000 0x0 0x1000>;
reg-names = "pc", "cna", "core";
clocks = <&cru ACLK_NPU0>, <&cru HCLK_NPU0>,
<&scmi_clk SCMI_CLK_NPU>, <&cru PCLK_NPU_ROOT>;
clock-names = "aclk", "hclk", "npu", "pclk";
interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH 0>;
iommus = <&rknn_mmu_0>;
npu-supply = <&vdd_npu_s0>;
power-domains = <&power RK3588_PD_NPUTOP>;
resets = <&cru SRST_A_RKNN0>, <&cru SRST_H_RKNN0>;
reset-names = "srst_a", "srst_h";
sram-supply = <&vdd_npu_mem_s0>;
};
};
...

View File

@@ -225,6 +225,8 @@ patternProperties:
description: BeagleBoard.org Foundation
"^belling,.*":
description: Shanghai Belling Co., Ltd.
"^bestar,.*":
description: Shenzhen Bestar Electronic Technology Co., Ltd.
"^bhf,.*":
description: Beckhoff Automation GmbH & Co. KG
"^bigtreetech,.*":
@@ -967,6 +969,8 @@ patternProperties:
description: MaxLinear Inc.
"^maxtor,.*":
description: Maxtor Corporation
"^mayqueen,.*":
description: Mayqueen Technologies Ltd.
"^mbvl,.*":
description: Mobiveil Inc.
"^mcube,.*":

View File

@@ -10,7 +10,7 @@ Accelerated Processing Units (APU) Info
.. csv-table::
:header-rows: 1
:widths: 3, 2, 2, 1, 1, 1, 1
:widths: 3, 2, 2, 1, 1, 1, 1, 1
:file: ./apu-asic-info-table.csv
Discrete GPU Info
@@ -18,6 +18,6 @@ Discrete GPU Info
.. csv-table::
:header-rows: 1
:widths: 3, 2, 2, 1, 1, 1
:widths: 3, 2, 2, 1, 1, 1, 1, 1
:file: ./dgpu-asic-info-table.csv

View File

@@ -1,17 +1,18 @@
Product Name, Code Reference, DCN/DCE version, GC version, VCE/UVD/VCN version, SDMA version, MP0 version
Radeon R* Graphics, CARRIZO/STONEY, DCE 11, 8, VCE 3 / UVD 6, 3, n/a
Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx, RAVEN/PICASSO, DCN 1.0, 9.1.0, VCN 1.0, 4.1.0, 10.0.0
Ryzen 4000 series, RENOIR, DCN 2.1, 9.3, VCN 2.2, 4.1.2, 11.0.3
Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx, RAVEN2, DCN 1.0, 9.2.2, VCN 1.0.1, 4.1.1, 10.0.1
SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0
Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1
Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series, YELLOW CARP / Rembrandt / Rembrandt-R, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3
Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
Ryzen 9000 series (AM5), Granite Ridge, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
Ryzen 7x45 series (FL1), Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
Ryzen AI 300 series, Strix Point, 3.5.0, 11.5.0, 4.0.5, 6.1.0, 14.0.0
Ryzen AI 350 series, Krackan Point, 3.5.0, 11.5.2, 4.0.5, 6.1.2, 14.0.4
Ryzen AI Max 300 series, Strix Halo, 3.5.1, 11.5.1, 4.0.6, 6.1.1, 14.0.1
Product Name, Code Reference, DCN/DCE version, GC version, VCE/UVD/VCN version, SDMA version, MP0 version, MP1 version
Radeon R* Graphics, CARRIZO/STONEY, DCE 11, 8, VCE 3 / UVD 6, 3, n/a, 8
Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx, RAVEN/PICASSO, DCN 1.0, 9.1.0, VCN 1.0, 4.1.0, 10.0.0, 10.0.0
Ryzen 4000 series, RENOIR, DCN 2.1, 9.3, VCN 2.2, 4.1.2, 11.0.3, 12.0.1
Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx, RAVEN2, DCN 1.0, 9.2.2, VCN 1.0.1, 4.1.1, 10.0.1, 10.0.1
SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0, 11.5.0
Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1, 12.0.1
Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series, YELLOW CARP / Rembrandt / Rembrandt-R, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3, 13.0.3
Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5, 13.0.5
Ryzen 9000 series (AM5), Granite Ridge, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5, 13.0.5
Ryzen 7x45 series (FL1), Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5, 13.0.5
Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8, 13.0.8
Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11, 13.0.4 / 13.0.11
Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11, 13.0.4 / 13.0.11
Ryzen AI 300 series, Strix Point, 3.5.0, 11.5.0, 4.0.5, 6.1.0, 14.0.0, 14.0.0
Ryzen AI 330 series, Krackan Point, 3.6.0, 11.5.3, 4.0.5, 6.1.3, 14.0.5, 14.0.5
Ryzen AI 350 series, Krackan Point, 3.5.0, 11.5.2, 4.0.5, 6.1.2, 14.0.4, 14.0.4
Ryzen AI Max 300 series, Strix Halo, 3.5.1, 11.5.1, 4.0.6, 6.1.1, 14.0.1, 14.0.1
1 Product Name Code Reference DCN/DCE version GC version VCE/UVD/VCN version SDMA version MP0 version MP1 version
2 Radeon R* Graphics CARRIZO/STONEY DCE 11 8 VCE 3 / UVD 6 3 n/a 8
3 Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx RAVEN/PICASSO DCN 1.0 9.1.0 VCN 1.0 4.1.0 10.0.0 10.0.0
4 Ryzen 4000 series RENOIR DCN 2.1 9.3 VCN 2.2 4.1.2 11.0.3 12.0.1
5 Ryzen 3000 series / AMD Ryzen Embedded V1*/R1* with Radeon Vega Gfx RAVEN2 DCN 1.0 9.2.2 VCN 1.0.1 4.1.1 10.0.1 10.0.1
6 SteamDeck VANGOGH DCN 3.0.1 10.3.1 VCN 3.1.0 5.2.1 11.5.0 11.5.0
7 Ryzen 5000 series / Ryzen 7x30 series GREEN SARDINE / Cezanne / Barcelo / Barcelo-R DCN 2.1 9.3 VCN 2.2 4.1.1 12.0.1 12.0.1
8 Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series YELLOW CARP / Rembrandt / Rembrandt-R 3.1.2 10.3.3 VCN 3.1.1 5.2.3 13.0.3 13.0.3
9 Ryzen 7000 series (AM5) Raphael 3.1.5 10.3.6 3.1.2 5.2.6 13.0.5 13.0.5
10 Ryzen 9000 series (AM5) Granite Ridge 3.1.5 10.3.6 3.1.2 5.2.6 13.0.5 13.0.5
11 Ryzen 7x45 series (FL1) Dragon Range 3.1.5 10.3.6 3.1.2 5.2.6 13.0.5 13.0.5
12 Ryzen 7x20 series Mendocino 3.1.6 10.3.7 3.1.1 5.2.7 13.0.8 13.0.8
13 Ryzen 7x40 series Phoenix 3.1.4 11.0.1 / 11.0.4 4.0.2 6.0.1 13.0.4 / 13.0.11 13.0.4 / 13.0.11
14 Ryzen 8x40 series Hawk Point 3.1.4 11.0.1 / 11.0.4 4.0.2 6.0.1 13.0.4 / 13.0.11 13.0.4 / 13.0.11
15 Ryzen AI 300 series Strix Point 3.5.0 11.5.0 4.0.5 6.1.0 14.0.0 14.0.0
16 Ryzen AI 350 series Ryzen AI 330 series Krackan Point 3.5.0 3.6.0 11.5.2 11.5.3 4.0.5 6.1.2 6.1.3 14.0.4 14.0.5 14.0.5
17 Ryzen AI Max 300 series Ryzen AI 350 series Strix Halo Krackan Point 3.5.1 3.5.0 11.5.1 11.5.2 4.0.6 4.0.5 6.1.1 6.1.2 14.0.1 14.0.4 14.0.4
18 Ryzen AI Max 300 series Strix Halo 3.5.1 11.5.1 4.0.6 6.1.1 14.0.1 14.0.1

View File

@@ -94,7 +94,7 @@ amdgpu_error_<name>
-------------------
Provides an interface to set an error code on the dma fences associated with
ring <name>. The error code specified is propogated to all fences associated
ring <name>. The error code specified is propagated to all fences associated
with the ring. Use this to inject a fence error into a ring.
amdgpu_pm_info
@@ -165,7 +165,7 @@ GTT memory.
amdgpu_regs_*
-------------
Provides direct access to various register aperatures on the GPU. Used
Provides direct access to various register apertures on the GPU. Used
by tools like UMR to access GPU registers.
amdgpu_regs2

View File

@@ -1,28 +1,30 @@
Product Name, Code Reference, DCN/DCE version, GC version, VCN version, SDMA version
AMD Radeon (TM) HD 8500M/ 8600M /M200 /M320 /M330 /M335 Series, HAINAN, --, 6, --, --
AMD Radeon HD 7800 /7900 /FireGL Series, TAHITI, DCE 6, 6, VCE 1 / UVD 3, --
AMD Radeon R7 (TM|HD) M265 /M370 /8500M /8600 /8700 /8700M, OLAND, DCE 6, 6, VCE 1 / UVD 3, --
AMD Radeon (TM) (HD|R7) 7800 /7970 /8800 /8970 /370/ Series, PITCAIRN, DCE 6, 6, VCE 1 / UVD 3, --
AMD Radeon (TM|R7|R9|HD) E8860 /M360 /7700 /7800 /8800 /9000(M) /W4100 Series, VERDE, DCE 6, 6, VCE 1 / UVD 3, --
AMD Radeon HD M280X /M380 /7700 /8950 /W5100, BONAIRE, DCE 8, 7, VCE 2 / UVD 4.2, 1
AMD Radeon (R9|TM) 200 /390 /W8100 /W9100 Series, HAWAII, DCE 8, 7, VCE 2 / UVD 4.2, 1
AMD Radeon (TM) R(5|7) M315 /M340 /M360, TOPAZ, *, 8, --, 2
AMD Radeon (TM) R9 200 /380 /W7100 /S7150 /M390 /M395 Series, TONGA, DCE 10, 8, VCE 3 / UVD 5, 3
AMD Radeon (FirePro) (TM) R9 Fury Series, FIJI, DCE 10, 8, VCE 3 / UVD 6, 3
Radeon RX 470 /480 /570 /580 /590 Series - AMD Radeon (TM) (Pro WX) 5100 /E9390 /E9560 /E9565 /V7350 /7100 /P30PH, POLARIS10, DCE 11.2, 8, VCE 3.4 / UVD 6.3, 3
Radeon (TM) (RX|Pro WX) E9260 /460 /V5300X /550 /560(X) Series, POLARIS11, DCE 11.2, 8, VCE 3.4 / UVD 6.3, 3
Radeon (RX/Pro) 500 /540(X) /550 /640 /WX2100 /WX3100 /WX200 Series, POLARIS12, DCE 11.2, 8, VCE 3.4 / UVD 6.3, 3
Radeon (RX|TM) (PRO|WX) Vega /MI25 /V320 /V340L /8200 /9100 /SSG MxGPU, VEGA10, DCE 12, 9.0.1, VCE 4.0.0 / UVD 7.0.0, 4.0.0
AMD Radeon (Pro) VII /MI50 /MI60, VEGA20, DCE 12, 9.4.0, VCE 4.1.0 / UVD 7.2.0, 4.2.0
MI100, ARCTURUS, *, 9.4.1, VCN 2.5.0, 4.2.2
MI200 Series, ALDEBARAN, *, 9.4.2, VCN 2.6.0, 4.4.0
MI300 Series, AQUA_VANJARAM, *, 9.4.3, VCN 4.0.3, 4.4.2
AMD Radeon (RX|Pro) 5600(M|XT) /5700 (M|XT|XTB) /W5700, NAVI10, DCN 2.0.0, 10.1.10, VCN 2.0.0, 5.0.0
AMD Radeon (Pro) 5300 /5500XTB/5500(XT|M) /W5500M /W5500, NAVI14, DCN 2.0.0, 10.1.1, VCN 2.0.2, 5.0.2
AMD Radeon RX 6800(XT) /6900(XT) /W6800, SIENNA_CICHLID, DCN 3.0.0, 10.3.0, VCN 3.0.0, 5.2.0
AMD Radeon RX 6700 XT / 6800M / 6700M, NAVY_FLOUNDER, DCN 3.0.0, 10.3.2, VCN 3.0.0, 5.2.2
AMD Radeon RX 6600(XT) /6600M /W6600 /W6600M, DIMGREY_CAVEFISH, DCN 3.0.2, 10.3.4, VCN 3.0.16, 5.2.4
AMD Radeon RX 6500M /6300M /W6500M /W6300M, BEIGE_GOBY, DCN 3.0.3, 10.3.5, VCN 3.0.33, 5.2.5
AMD Radeon RX 7900 XT /XTX, , DCN 3.2.0, 11.0.0, VCN 4.0.0, 6.0.0
AMD Radeon RX 7800 XT, , DCN 3.2.0, 11.0.3, VCN 4.0.0, 6.0.3
AMD Radeon RX 7600M (XT) /7700S /7600S, , DCN 3.2.1, 11.0.2, VCN 4.0.4, 6.0.2
Product Name, Code Reference, DCN/DCE version, GC version, VCN version, SDMA version, MP0 version, MP1 version
AMD Radeon (TM) HD 8500M/ 8600M /M200 /M320 /M330 /M335 Series, HAINAN, --, 6, --, --, --, 6
AMD Radeon HD 7800 /7900 /FireGL Series, TAHITI, DCE 6, 6, VCE 1 / UVD 3, --, --, 6
AMD Radeon R7 (TM|HD) M265 /M370 /8500M /8600 /8700 /8700M, OLAND, DCE 6, 6, -- / UVD 3, --, --, 6
AMD Radeon (TM) (HD|R7) 7800 /7970 /8800 /8970 /370/ Series, PITCAIRN, DCE 6, 6, VCE 1 / UVD 3, --, --, 6
AMD Radeon (TM|R7|R9|HD) E8860 /M360 /7700 /7800 /8800 /9000(M) /W4100 Series, VERDE, DCE 6, 6, VCE 1 / UVD 3, --, --, 6
AMD Radeon HD M280X /M380 /7700 /8950 /W5100, BONAIRE, DCE 8, 7, VCE 2 / UVD 4.2, 1, --, 7
AMD Radeon (R9|TM) 200 /390 /W8100 /W9100 Series, HAWAII, DCE 8, 7, VCE 2 / UVD 4.2, 1, --, 7
AMD Radeon (TM) R(5|7) M315 /M340 /M360, TOPAZ, *, 8, --, 2, n/a, 7
AMD Radeon (TM) R9 200 /380 /W7100 /S7150 /M390 /M395 Series, TONGA, DCE 10, 8, VCE 3 / UVD 5, 3, n/a, 7
AMD Radeon (FirePro) (TM) R9 Fury Series, FIJI, DCE 10, 8, VCE 3 / UVD 6, 3, n/a, 7
Radeon RX 470 /480 /570 /580 /590 Series - AMD Radeon (TM) (Pro WX) 5100 /E9390 /E9560 /E9565 /V7350 /7100 /P30PH, POLARIS10, DCE 11.2, 8, VCE 3.4 / UVD 6.3, 3, n/a, 7
Radeon (TM) (RX|Pro WX) E9260 /460 /V5300X /550 /560(X) Series, POLARIS11, DCE 11.2, 8, VCE 3.4 / UVD 6.3, 3, n/a, 7
Radeon (RX/Pro) 500 /540(X) /550 /640 /WX2100 /WX3100 /WX200 Series, POLARIS12, DCE 11.2, 8, VCE 3.4 / UVD 6.3, 3, n/a, 7
Radeon (RX|TM) (PRO|WX) Vega /MI25 /V320 /V340L /8200 /9100 /SSG MxGPU, VEGA10, DCE 12, 9.0.1, VCE 4.0.0 / UVD 7.0.0, 4.0.0, 9.0.0, 9.0.0
AMD Radeon (Pro) VII /MI50 /MI60, VEGA20, DCE 12, 9.4.0, VCE 4.1.0 / UVD 7.2.0, 4.2.0, 11.0.2, 11.0.2
MI100, ARCTURUS, *, 9.4.1, VCN 2.5.0, 4.2.2, 11.0.4, 11.0.2
MI200 Series, ALDEBARAN, *, 9.4.2, VCN 2.6.0, 4.4.0, 13.0.2, 13.0.2
MI300 Series, AQUA_VANJARAM, *, 9.4.3, VCN 4.0.3, 4.4.2, 13.0.6, 13.0.6
AMD Radeon (RX|Pro) 5600(M|XT) /5700 (M|XT|XTB) /W5700, NAVI10, DCN 2.0.0, 10.1.10, VCN 2.0.0, 5.0.0, 11.0.0, 11.0.0
AMD Radeon (Pro) 5300 /5500XTB/5500(XT|M) /W5500M /W5500, NAVI14, DCN 2.0.0, 10.1.1, VCN 2.0.2, 5.0.2, 11.0.5, 11.0.5
AMD Radeon RX 6800(XT) /6900(XT) /W6800, SIENNA_CICHLID, DCN 3.0.0, 10.3.0, VCN 3.0.0, 5.2.0, 11.0.7, 11.0.7
AMD Radeon RX 6700 XT / 6800M / 6700M, NAVY_FLOUNDER, DCN 3.0.0, 10.3.2, VCN 3.0.0, 5.2.2, 11.0.11, 11.0.11
AMD Radeon RX 6600(XT) /6600M /W6600 /W6600M, DIMGREY_CAVEFISH, DCN 3.0.2, 10.3.4, VCN 3.0.16, 5.2.4, 11.0.12, 11.0.12
AMD Radeon RX 6500M /6300M /W6500M /W6300M, BEIGE_GOBY, DCN 3.0.3, 10.3.5, VCN 3.0.33, 5.2.5, 11.0.13, 11.0.13
AMD Radeon RX 7900 XT /XTX, , DCN 3.2.0, 11.0.0, VCN 4.0.0, 6.0.0, 13.0.0, 13.0.0
AMD Radeon RX 7800 XT, , DCN 3.2.0, 11.0.3, VCN 4.0.0, 6.0.3, 13.0.10, 13.0.10
AMD Radeon RX 7600M (XT) /7700S /7600S, , DCN 3.2.1, 11.0.2, VCN 4.0.4, 6.0.2, 13.0.7, 13.0.7
AMD Radeon RX 9070 (XT), , DCN 4.0.1, 12.0.1, VCN 5.0.0, 7.0.1, 14.0.3, 14.0.3
AMD Radeon RX 9060 XT, , DCN 4.0.1, 12.0.0, VCN 5.0.0, 7.0.0, 14.0.2, 14.0.2
1 Product Name Code Reference DCN/DCE version GC version VCN version SDMA version MP0 version MP1 version
2 AMD Radeon (TM) HD 8500M/ 8600M /M200 /M320 /M330 /M335 Series HAINAN -- 6 -- -- -- 6
3 AMD Radeon HD 7800 /7900 /FireGL Series TAHITI DCE 6 6 VCE 1 / UVD 3 -- -- 6
4 AMD Radeon R7 (TM|HD) M265 /M370 /8500M /8600 /8700 /8700M OLAND DCE 6 6 VCE 1 / UVD 3 -- / UVD 3 -- -- 6
5 AMD Radeon (TM) (HD|R7) 7800 /7970 /8800 /8970 /370/ Series PITCAIRN DCE 6 6 VCE 1 / UVD 3 -- -- 6
6 AMD Radeon (TM|R7|R9|HD) E8860 /M360 /7700 /7800 /8800 /9000(M) /W4100 Series VERDE DCE 6 6 VCE 1 / UVD 3 -- -- 6
7 AMD Radeon HD M280X /M380 /7700 /8950 /W5100 BONAIRE DCE 8 7 VCE 2 / UVD 4.2 1 -- 7
8 AMD Radeon (R9|TM) 200 /390 /W8100 /W9100 Series HAWAII DCE 8 7 VCE 2 / UVD 4.2 1 -- 7
9 AMD Radeon (TM) R(5|7) M315 /M340 /M360 TOPAZ * 8 -- 2 n/a 7
10 AMD Radeon (TM) R9 200 /380 /W7100 /S7150 /M390 /M395 Series TONGA DCE 10 8 VCE 3 / UVD 5 3 n/a 7
11 AMD Radeon (FirePro) (TM) R9 Fury Series FIJI DCE 10 8 VCE 3 / UVD 6 3 n/a 7
12 Radeon RX 470 /480 /570 /580 /590 Series - AMD Radeon (TM) (Pro WX) 5100 /E9390 /E9560 /E9565 /V7350 /7100 /P30PH POLARIS10 DCE 11.2 8 VCE 3.4 / UVD 6.3 3 n/a 7
13 Radeon (TM) (RX|Pro WX) E9260 /460 /V5300X /550 /560(X) Series POLARIS11 DCE 11.2 8 VCE 3.4 / UVD 6.3 3 n/a 7
14 Radeon (RX/Pro) 500 /540(X) /550 /640 /WX2100 /WX3100 /WX200 Series POLARIS12 DCE 11.2 8 VCE 3.4 / UVD 6.3 3 n/a 7
15 Radeon (RX|TM) (PRO|WX) Vega /MI25 /V320 /V340L /8200 /9100 /SSG MxGPU VEGA10 DCE 12 9.0.1 VCE 4.0.0 / UVD 7.0.0 4.0.0 9.0.0 9.0.0
16 AMD Radeon (Pro) VII /MI50 /MI60 VEGA20 DCE 12 9.4.0 VCE 4.1.0 / UVD 7.2.0 4.2.0 11.0.2 11.0.2
17 MI100 ARCTURUS * 9.4.1 VCN 2.5.0 4.2.2 11.0.4 11.0.2
18 MI200 Series ALDEBARAN * 9.4.2 VCN 2.6.0 4.4.0 13.0.2 13.0.2
19 MI300 Series AQUA_VANJARAM * 9.4.3 VCN 4.0.3 4.4.2 13.0.6 13.0.6
20 AMD Radeon (RX|Pro) 5600(M|XT) /5700 (M|XT|XTB) /W5700 NAVI10 DCN 2.0.0 10.1.10 VCN 2.0.0 5.0.0 11.0.0 11.0.0
21 AMD Radeon (Pro) 5300 /5500XTB/5500(XT|M) /W5500M /W5500 NAVI14 DCN 2.0.0 10.1.1 VCN 2.0.2 5.0.2 11.0.5 11.0.5
22 AMD Radeon RX 6800(XT) /6900(XT) /W6800 SIENNA_CICHLID DCN 3.0.0 10.3.0 VCN 3.0.0 5.2.0 11.0.7 11.0.7
23 AMD Radeon RX 6700 XT / 6800M / 6700M NAVY_FLOUNDER DCN 3.0.0 10.3.2 VCN 3.0.0 5.2.2 11.0.11 11.0.11
24 AMD Radeon RX 6600(XT) /6600M /W6600 /W6600M DIMGREY_CAVEFISH DCN 3.0.2 10.3.4 VCN 3.0.16 5.2.4 11.0.12 11.0.12
25 AMD Radeon RX 6500M /6300M /W6500M /W6300M BEIGE_GOBY DCN 3.0.3 10.3.5 VCN 3.0.33 5.2.5 11.0.13 11.0.13
26 AMD Radeon RX 7900 XT /XTX DCN 3.2.0 11.0.0 VCN 4.0.0 6.0.0 13.0.0 13.0.0
27 AMD Radeon RX 7800 XT DCN 3.2.0 11.0.3 VCN 4.0.0 6.0.3 13.0.10 13.0.10
28 AMD Radeon RX 7600M (XT) /7700S /7600S DCN 3.2.1 11.0.2 VCN 4.0.4 6.0.2 13.0.7 13.0.7
29 AMD Radeon RX 9070 (XT) DCN 4.0.1 12.0.1 VCN 5.0.0 7.0.1 14.0.3 14.0.3
30 AMD Radeon RX 9060 XT DCN 4.0.1 12.0.0 VCN 5.0.0 7.0.0 14.0.2 14.0.2

View File

@@ -5,7 +5,7 @@ DC Glossary
On this page, we try to keep track of acronyms related to the display
component. If you do not find what you are looking for, look at the
'Documentation/gpu/amdgpu/amdgpu-glossary.rst'; if you cannot find it anywhere,
consider asking in the amdgfx and update this page.
consider asking on the amd-gfx mailing list and update this page.
.. glossary::

View File

@@ -9,8 +9,8 @@ contribution to the display code, and for that, we say thank you :)
This page summarizes some of the issues you can help with; keep in mind that
this is a static page, and it is always a good idea to try to reach developers
in the amdgfx or some of the maintainers. Finally, this page follows the DRM
way of creating a TODO list; for more information, check
on the amd-gfx mailing list or some of the maintainers. Finally, this page
follows the DRM way of creating a TODO list; for more information, check
'Documentation/gpu/todo.rst'.
Gitlab issues

View File

@@ -100,7 +100,7 @@ represents the connected display.
For historical reasons, we used the name `dc_link`, which gives the
wrong impression that this abstraction only deals with physical connections
that the developer can easily manipulate. However, this also covers
conections like eDP or cases where the output is connected to other devices.
connections like eDP or cases where the output is connected to other devices.
There are two structs that are not represented in the diagram since they were
elaborated in the DCN overview page (check the DCN block diagram :ref:`Display

View File

@@ -65,7 +65,7 @@ SDMA (System DMA)
GC (Graphics and Compute)
This is the graphics and compute engine, i.e., the block that
encompasses the 3D pipeline and and shader blocks. This is by far the
encompasses the 3D pipeline and shader blocks. This is by far the
largest block on the GPU. The 3D pipeline has tons of sub-blocks. In
addition to that, it also contains the CP microcontrollers (ME, PFP, CE,
MEC) and the RLC microcontroller. It's exposed to userspace for user mode
@@ -210,4 +210,4 @@ IP Blocks
:doc: IP Blocks
.. kernel-doc:: drivers/gpu/drm/amd/include/amd_shared.h
:identifiers: amd_ip_block_type amd_ip_funcs DC_DEBUG_MASK
:identifiers: amd_ip_block_type amd_ip_funcs DC_FEATURE_MASK DC_DEBUG_MASK

View File

@@ -12,6 +12,7 @@ Next (GCN), Radeon DNA (RDNA), and Compute DNA (CDNA) architectures.
module-parameters
gc/index
display/index
userq
flashing
xgmi
ras

View File

@@ -26,7 +26,7 @@ Example of enabling enforce isolation on a GPU with multiple partitions:
$ cat /sys/class/drm/card0/device/enforce_isolation
1 0 1 0
The output indicates that enforce isolation is enabled on zeroth and second parition and disabled on first and fourth parition.
The output indicates that enforce isolation is enabled on zeroth and second partition and disabled on first and third partition.
For devices with a single partition or those that do not support partitions, there will be only one element:

View File

@@ -0,0 +1,203 @@
==================
User Mode Queues
==================
Introduction
============
Similar to the KFD, GPU engine queues move into userspace. The idea is to let
user processes manage their submissions to the GPU engines directly, bypassing
IOCTL calls to the driver to submit work. This reduces overhead and also allows
the GPU to submit work to itself. Applications can set up work graphs of jobs
across multiple GPU engines without needing trips through the CPU.
UMDs directly interface with firmware via per application shared memory areas.
The main vehicle for this is queue. A queue is a ring buffer with a read
pointer (rptr) and a write pointer (wptr). The UMD writes IP specific packets
into the queue and the firmware processes those packets, kicking off work on the
GPU engines. The CPU in the application (or another queue or device) updates
the wptr to tell the firmware how far into the ring buffer to process packets
and the rtpr provides feedback to the UMD on how far the firmware has progressed
in executing those packets. When the wptr and the rptr are equal, the queue is
idle.
Theory of Operation
===================
The various engines on modern AMD GPUs support multiple queues per engine with a
scheduling firmware which handles dynamically scheduling user queues on the
available hardware queue slots. When the number of user queues outnumbers the
available hardware queue slots, the scheduling firmware dynamically maps and
unmaps queues based on priority and time quanta. The state of each user queue
is managed in the kernel driver in an MQD (Memory Queue Descriptor). This is a
buffer in GPU accessible memory that stores the state of a user queue. The
scheduling firmware uses the MQD to load the queue state into an HQD (Hardware
Queue Descriptor) when a user queue is mapped. Each user queue requires a
number of additional buffers which represent the ring buffer and any metadata
needed by the engine for runtime operation. On most engines this consists of
the ring buffer itself, a rptr buffer (where the firmware will shadow the rptr
to userspace), a wptr buffer (where the application will write the wptr for the
firmware to fetch it), and a doorbell. A doorbell is a piece of one of the
device's MMIO BARs which can be mapped to specific user queues. When the
application writes to the doorbell, it will signal the firmware to take some
action. Writing to the doorbell wakes the firmware and causes it to fetch the
wptr and start processing the packets in the queue. Each 4K page of the doorbell
BAR supports specific offset ranges for specific engines. The doorbell of a
queue must be mapped into the aperture aligned to the IP used by the queue
(e.g., GFX, VCN, SDMA, etc.). These doorbell apertures are set up via NBIO
registers. Doorbells are 32 bit or 64 bit (depending on the engine) chunks of
the doorbell BAR. A 4K doorbell page provides 512 64-bit doorbells for up to
512 user queues. A subset of each page is reserved for each IP type supported
on the device. The user can query the doorbell ranges for each IP via the INFO
IOCTL. See the IOCTL Interfaces section for more information.
When an application wants to create a user queue, it allocates the necessary
buffers for the queue (ring buffer, wptr and rptr, context save areas, etc.).
These can be separate buffers or all part of one larger buffer. The application
would map the buffer(s) into its GPUVM and use the GPU virtual addresses of for
the areas of memory they want to use for the user queue. They would also
allocate a doorbell page for the doorbells used by the user queues. The
application would then populate the MQD in the USERQ IOCTL structure with the
GPU virtual addresses and doorbell index they want to use. The user can also
specify the attributes for the user queue (priority, whether the queue is secure
for protected content, etc.). The application would then call the USERQ
CREATE IOCTL to create the queue using the specified MQD details in the IOCTL.
The kernel driver then validates the MQD provided by the application and
translates the MQD into the engine specific MQD format for the IP. The IP
specific MQD would be allocated and the queue would be added to the run list
maintained by the scheduling firmware. Once the queue has been created, the
application can write packets directly into the queue, update the wptr, and
write to the doorbell offset to kick off work in the user queue.
When the application is done with the user queue, it would call the USERQ
FREE IOCTL to destroy it. The kernel driver would preempt the queue and
remove it from the scheduling firmware's run list. Then the IP specific MQD
would be freed and the user queue state would be cleaned up.
Some engines may require the aggregated doorbell too if the engine does not
support doorbells from unmapped queues. The aggregated doorbell is a special
page of doorbell space which wakes the scheduler. In cases where the engine may
be oversubscribed, some queues may not be mapped. If the doorbell is rung when
the queue is not mapped, the engine firmware may miss the request. Some
scheduling firmware may work around this by polling wptr shadows when the
hardware is oversubscribed, other engines may support doorbell updates from
unmapped queues. In the event that one of these options is not available, the
kernel driver will map a page of aggregated doorbell space into each GPUVM
space. The UMD will then update the doorbell and wptr as normal and then write
to the aggregated doorbell as well.
Special Packets
---------------
In order to support legacy implicit synchronization, as well as mixed user and
kernel queues, we need a synchronization mechanism that is secure. Because
kernel queues or memory management tasks depend on kernel fences, we need a way
for user queues to update memory that the kernel can use for a fence, that can't
be messed with by a bad actor. To support this, we've added a protected fence
packet. This packet works by writing a monotonically increasing value to
a memory location that only privileged clients have write access to. User
queues only have read access. When this packet is executed, the memory location
is updated and other queues (kernel or user) can see the results. The
user application would submit this packet in their command stream. The actual
packet format varies from IP to IP (GFX/Compute, SDMA, VCN, etc.), but the
behavior is the same. The packet submission is handled in userspace. The
kernel driver sets up the privileged memory used for each user queue when it
sets the queues up when the application creates them.
Memory Management
=================
It is assumed that all buffers mapped into the GPUVM space for the process are
valid when engines on the GPU are running. The kernel driver will only allow
user queues to run when all buffers are mapped. If there is a memory event that
requires buffer migration, the kernel driver will preempt the user queues,
migrate buffers to where they need to be, update the GPUVM page tables and
invaldidate the TLB, and then resume the user queues.
Interaction with Kernel Queues
==============================
Depending on the IP and the scheduling firmware, you can enable kernel queues
and user queues at the same time, however, you are limited by the HQD slots.
Kernel queues are always mapped so any work that goes into kernel queues will
take priority. This limits the available HQD slots for user queues.
Not all IPs will support user queues on all GPUs. As such, UMDs will need to
support both user queues and kernel queues depending on the IP. For example, a
GPU may support user queues for GFX, compute, and SDMA, but not for VCN, JPEG,
and VPE. UMDs need to support both. The kernel driver provides a way to
determine if user queues and kernel queues are supported on a per IP basis.
UMDs can query this information via the INFO IOCTL and determine whether to use
kernel queues or user queues for each IP.
Queue Resets
============
For most engines, queues can be reset individually. GFX, compute, and SDMA
queues can be reset individually. When a hung queue is detected, it can be
reset either via the scheduling firmware or MMIO. Since there are no kernel
fences for most user queues, they will usually only be detected when some other
event happens; e.g., a memory event which requires migration of buffers. When
the queues are preempted, if the queue is hung, the preemption will fail.
Driver will then look up the queues that failed to preempt and reset them and
record which queues are hung.
On the UMD side, we will add a USERQ QUERY_STATUS IOCTL to query the queue
status. UMD will provide the queue id in the IOCTL and the kernel driver
will check if it has already recorded the queue as hung (e.g., due to failed
peemption) and report back the status.
IOCTL Interfaces
================
GPU virtual addresses used for queues and related data (rptrs, wptrs, context
save areas, etc.) should be validated by the kernel mode driver to prevent the
user from specifying invalid GPU virtual addresses. If the user provides
invalid GPU virtual addresses or doorbell indicies, the IOCTL should return an
error message. These buffers should also be tracked in the kernel driver so
that if the user attempts to unmap the buffer(s) from the GPUVM, the umap call
would return an error.
INFO
----
There are several new INFO queries related to user queues in order to query the
size of user queue meta data needed for a user queue (e.g., context save areas
or shadow buffers), whether kernel or user queues or both are supported
for each IP type, and the offsets for each IP type in each doorbell page.
USERQ
-----
The USERQ IOCTL is used for creating, freeing, and querying the status of user
queues. It supports 3 opcodes:
1. CREATE - Create a user queue. The application provides an MQD-like structure
that defines the type of queue and associated metadata and flags for that
queue type. Returns the queue id.
2. FREE - Free a user queue.
3. QUERY_STATUS - Query that status of a queue. Used to check if the queue is
healthy or not. E.g., if the queue has been reset. (WIP)
USERQ_SIGNAL
------------
The USERQ_SIGNAL IOCTL is used to provide a list of sync objects to be signaled.
USERQ_WAIT
----------
The USERQ_WAIT IOCTL is used to provide a list of sync object to be waited on.
Kernel and User Queues
======================
In order to properly validate and test performance, we have a driver option to
select what type of queues are enabled (kernel queues, user queues or both).
The user_queue driver parameter allows you to enable kernel queues only (0),
user queues and kernel queues (1), and user queues only (2). Enabling user
queues only will free up static queue assignments that would otherwise be used
by kernel queues for use by the scheduling firmware. Some kernel queues are
required for kernel driver operation and they will always be created. When the
kernel queues are not enabled, they are not registered with the drm scheduler
and the CS IOCTL will reject any incoming command submissions which target those
queue types. Kernel queues only mirrors the behavior on all existing GPUs.
Enabling both queues allows for backwards compatibility with old userspace while
still supporting user queues.

View File

@@ -418,13 +418,12 @@ needed.
Recovery
--------
Current implementation defines three recovery methods, out of which, drivers
Current implementation defines four recovery methods, out of which, drivers
can use any one, multiple or none. Method(s) of choice will be sent in the
uevent environment as ``WEDGED=<method1>[,..,<methodN>]`` in order of less to
more side-effects. If driver is unsure about recovery or method is unknown
(like soft/hard system reboot, firmware flashing, physical device replacement
or any other procedure which can't be attempted on the fly), ``WEDGED=unknown``
will be sent instead.
more side-effects. See the section `Vendor Specific Recovery`_
for ``WEDGED=vendor-specific``. If driver is unsure about recovery or
method is unknown, ``WEDGED=unknown`` will be sent instead.
Userspace consumers can parse this event and attempt recovery as per the
following expectations.
@@ -435,6 +434,7 @@ following expectations.
none optional telemetry collection
rebind unbind + bind driver
bus-reset unbind + bus reset/re-enumeration + bind
vendor-specific vendor specific recovery method
unknown consumer policy
=============== ========================================
@@ -446,6 +446,35 @@ telemetry information (devcoredump, syslog). This is useful because the first
hang is usually the most critical one which can result in consequential hangs or
complete wedging.
Vendor Specific Recovery
------------------------
When ``WEDGED=vendor-specific`` is sent, it indicates that the device requires
a recovery procedure specific to the hardware vendor and is not one of the
standardized approaches.
``WEDGED=vendor-specific`` may be used to indicate different cases within a
single vendor driver, each requiring a distinct recovery procedure.
In such scenarios, the vendor driver must provide comprehensive documentation
that describes each case, include additional hints to identify specific case and
outline the corresponding recovery procedure. The documentation includes:
Case - A list of all cases that sends the ``WEDGED=vendor-specific`` recovery method.
Hints - Additional Information to assist the userspace consumer in identifying and
differentiating between different cases. This can be exposed through sysfs, debugfs,
traces, dmesg etc.
Recovery Procedure - Clear instructions and guidance for recovering each case.
This may include userspace scripts, tools needed for the recovery procedure.
It is the responsibility of the admin/userspace consumer to identify the case and
verify additional identification hints before attempting a recovery procedure.
Example: If the device uses the Xe driver, then userspace consumer should refer to
:ref:`Xe Device Wedging <xe-device-wedging>` for the detailed documentation.
Task information
----------------
@@ -472,8 +501,12 @@ erroring out, all device memory should be unmapped and file descriptors should
be closed to prevent leaks or undefined behaviour. The idea here is to clear the
device of all user context beforehand and set the stage for a clean recovery.
Example
-------
For ``WEDGED=vendor-specific`` recovery method, it is the responsibility of the
consumer to check the driver documentation and the usecase before attempting
a recovery.
Example - rebind
----------------
Udev rule::

View File

@@ -358,8 +358,6 @@ Locking Guidelines
#. All locking rules and interface contracts with cross-driver interfaces
(dma-buf, dma_fence) need to be followed.
#. No struct_mutex anywhere in the code
#. dma_resv will be the outermost lock (when needed) and ww_acquire_ctx
is to be hoisted at highest level and passed down within i915_gem_ctx
in the call chain
@@ -367,11 +365,6 @@ Locking Guidelines
#. While holding lru/memory manager (buddy, drm_mm, whatever) locks
system memory allocations are not allowed
* Enforce this by priming lockdep (with fs_reclaim). If we
allocate memory while holding these looks we get a rehash
of the shrinker vs. struct_mutex saga, and that would be
real bad.
#. Do not nest different lru/memory manager locks within each other.
Take them in turn to update memory allocations, relying on the objects
dma_resv ww_mutex to serialize against other operations.

View File

@@ -131,8 +131,6 @@ crate so it can be used by other components as well.
Features desired before this happens:
* Relative register with build-time base address validation,
* Arrays of registers with build-time index validation,
* Make I/O optional I/O (for field values that are not registers),
* Support other sizes than `u32`,
* Allow visibility control for registers and individual fields,
@@ -231,23 +229,6 @@ Rust abstraction for debugfs APIs.
GPU (general)
=============
Parse firmware headers
----------------------
Parse ELF headers from the firmware files loaded from the filesystem.
| Reference: ELF utils
| Complexity: Beginner
| Contact: Abdiel Janulgue
Build radix3 page table
-----------------------
Build the radix3 page table to map the firmware.
| Complexity: Intermediate
| Contact: Abdiel Janulgue
Initial Devinit support
-----------------------

View File

@@ -173,31 +173,6 @@ Contact: Simona Vetter
Level: Intermediate
Get rid of dev->struct_mutex from GEM drivers
---------------------------------------------
``dev->struct_mutex`` is the Big DRM Lock from legacy days and infested
everything. Nowadays in modern drivers the only bit where it's mandatory is
serializing GEM buffer object destruction. Which unfortunately means drivers
have to keep track of that lock and either call ``unreference`` or
``unreference_locked`` depending upon context.
Core GEM doesn't have a need for ``struct_mutex`` any more since kernel 4.8,
and there's a GEM object ``free`` callback for any drivers which are
entirely ``struct_mutex`` free.
For drivers that need ``struct_mutex`` it should be replaced with a driver-
private lock. The tricky part is the BO free functions, since those can't
reliably take that lock any more. Instead state needs to be protected with
suitable subordinate locks or some cleanup work pushed to a worker thread. For
performance-critical drivers it might also be better to go with a more
fine-grained per-buffer object and per-context lockings scheme. Currently only
the ``msm`` and `i915` drivers use ``struct_mutex``.
Contact: Simona Vetter, respective driver maintainers
Level: Advanced
Move Buffer Object Locking to dma_resv_lock()
---------------------------------------------
@@ -497,19 +472,19 @@ Contact: Douglas Anderson <dianders@chromium.org>
Level: Intermediate
Transition away from using mipi_dsi_*_write_seq()
-------------------------------------------------
Transition away from using deprecated MIPI DSI functions
--------------------------------------------------------
The macros mipi_dsi_generic_write_seq() and mipi_dsi_dcs_write_seq() are
non-intuitive because, if there are errors, they return out of the *caller's*
function. We should move all callers to use mipi_dsi_generic_write_seq_multi()
and mipi_dsi_dcs_write_seq_multi() macros instead.
There are many functions defined in ``drm_mipi_dsi.c`` which have been
deprecated. Each deprecated function was deprecated in favor of its `multi`
variant (e.g. `mipi_dsi_generic_write()` and `mipi_dsi_generic_write_multi()`).
The `multi` variant of a function includes improved error handling and logic
which makes it more convenient to make several calls in a row, as most MIPI
drivers do.
Once all callers are transitioned, the macros and the functions that they call,
mipi_dsi_generic_write_chatty() and mipi_dsi_dcs_write_buffer_chatty(), can
probably be removed. Alternatively, if people feel like the _multi() variants
are overkill for some use cases, we could keep the mipi_dsi_*_write_seq()
variants but change them not to return out of the caller.
Drivers should be updated to use undeprecated functions. Once all usages of the
deprecated MIPI DSI functions have been removed, their definitions may be
removed from ``drm_mipi_dsi.c``.
Contact: Douglas Anderson <dianders@chromium.org>

View File

@@ -25,5 +25,6 @@ DG2, etc is provided to prototype the driver.
xe_tile
xe_debugging
xe_devcoredump
xe_device
xe-drm-usage-stats.rst
xe_configfs

View File

@@ -0,0 +1,10 @@
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
.. _xe-device-wedging:
==================
Xe Device Wedging
==================
.. kernel-doc:: drivers/gpu/drm/xe/xe_device.c
:doc: Xe Device Wedging

View File

@@ -13,9 +13,11 @@ Internal API
.. kernel-doc:: drivers/gpu/drm/xe/xe_pcode.c
:internal:
.. _xe-survivability-mode:
==================
Boot Survivability
Survivability Mode
==================
.. kernel-doc:: drivers/gpu/drm/xe/xe_survivability_mode.c
:doc: Xe Boot Survivability
:doc: Survivability Mode

View File

@@ -1243,7 +1243,7 @@ F: drivers/spi/spi-amd.c
F: drivers/spi/spi-amd.h
AMD XDNA DRIVER
M: Min Ma <min.ma@amd.com>
M: Min Ma <mamin506@gmail.com>
M: Lizhi Hou <lizhi.hou@amd.com>
L: dri-devel@lists.freedesktop.org
S: Supported
@@ -2096,6 +2096,19 @@ F: Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml
F: drivers/gpu/drm/panthor/
F: include/uapi/drm/panthor_drm.h
ARM MALI TYR DRM DRIVER
M: Daniel Almeida <daniel.almeida@collabora.com>
M: Alice Ryhl <aliceryhl@google.com>
L: dri-devel@lists.freedesktop.org
S: Supported
W: https://rust-for-linux.com/tyr-gpu-driver
W https://drm.pages.freedesktop.org/maintainer-tools/drm-rust.html
B: https://gitlab.freedesktop.org/panfrost/linux/-/issues
T: git https://gitlab.freedesktop.org/drm/rust/kernel.git
F: Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml
F: drivers/gpu/drm/tyr/
F: include/uapi/drm/panthor_drm.h
ARM MALI-DP DRM DRIVER
M: Liviu Dudau <liviu.dudau@arm.com>
S: Supported
@@ -7297,7 +7310,7 @@ F: include/linux/dma-mapping.h
F: include/linux/swiotlb.h
F: kernel/dma/
DMA MAPPING HELPERS DEVICE DRIVER API [RUST]
DMA MAPPING & SCATTERLIST API [RUST]
M: Danilo Krummrich <dakr@kernel.org>
R: Abdiel Janulgue <abdiel.janulgue@gmail.com>
R: Daniel Almeida <daniel.almeida@collabora.com>
@@ -7308,7 +7321,9 @@ S: Supported
W: https://rust-for-linux.com
T: git git://git.kernel.org/pub/scm/linux/kernel/git/driver-core/driver-core.git
F: rust/helpers/dma.c
F: rust/helpers/scatterlist.c
F: rust/kernel/dma.rs
F: rust/kernel/scatterlist.rs
F: samples/rust/rust_dma.rs
DMA-BUF HEAPS FRAMEWORK
@@ -7553,14 +7568,24 @@ F: drivers/soc/ti/smartreflex.c
F: include/linux/power/smartreflex.h
DRM ACCEL DRIVERS FOR INTEL VPU
M: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
M: Maciej Falkowski <maciej.falkowski@linux.intel.com>
M: Karol Wachowski <karol.wachowski@linux.intel.com>
L: dri-devel@lists.freedesktop.org
S: Supported
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
F: drivers/accel/ivpu/
F: include/uapi/drm/ivpu_accel.h
DRM ACCEL DRIVER FOR ROCKCHIP NPU
M: Tomeu Vizoso <tomeu@tomeuvizoso.net>
L: dri-devel@lists.freedesktop.org
S: Supported
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
F: Documentation/accel/rocket/
F: Documentation/devicetree/bindings/npu/rockchip,rk3588-rknn-core.yaml
F: drivers/accel/rocket/
F: include/uapi/drm/rocket_accel.h
DRM COMPUTE ACCELERATORS DRIVERS AND FRAMEWORK
M: Oded Gabbay <ogabbay@kernel.org>
L: dri-devel@lists.freedesktop.org
@@ -7892,6 +7917,7 @@ M: Danilo Krummrich <dakr@kernel.org>
M: Alexandre Courbot <acourbot@nvidia.com>
L: nouveau@lists.freedesktop.org
S: Supported
W: https://rust-for-linux.com/nova-gpu-driver
Q: https://patchwork.freedesktop.org/project/nouveau/
B: https://gitlab.freedesktop.org/drm/nova/-/issues
C: irc://irc.oftc.net/nouveau
@@ -7903,6 +7929,7 @@ DRM DRIVER FOR NVIDIA GPUS [RUST]
M: Danilo Krummrich <dakr@kernel.org>
L: nouveau@lists.freedesktop.org
S: Supported
W: https://rust-for-linux.com/nova-gpu-driver
Q: https://patchwork.freedesktop.org/project/nouveau/
B: https://gitlab.freedesktop.org/drm/nova/-/issues
C: irc://irc.oftc.net/nouveau
@@ -7929,6 +7956,13 @@ T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
F: Documentation/devicetree/bindings/display/repaper.txt
F: drivers/gpu/drm/tiny/repaper.c
DRM DRIVER FOR PIXPAPER E-INK PANEL
M: LiangCheng Wang <zaq14760@gmail.com>
L: dri-devel@lists.freedesktop.org
S: Maintained
F: Documentation/devicetree/bindings/display/mayqueen,pixpaper.yaml
F: drivers/gpu/drm/tiny/pixpaper.c
DRM DRIVER FOR QEMU'S CIRRUS DEVICE
M: Dave Airlie <airlied@redhat.com>
M: Gerd Hoffmann <kraxel@redhat.com>
@@ -8051,6 +8085,14 @@ S: Maintained
F: Documentation/devicetree/bindings/display/panel/synaptics,r63353.yaml
F: drivers/gpu/drm/panel/panel-synaptics-r63353.c
DRM DRIVER FOR SYNOPSYS DESIGNWARE DISPLAYPORT BRIDGE
M: Andy Yan <andy.yan@rock-chips.com>
S: Maintained
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
F: Documentation/devicetree/bindings/display/rockchip/rockchip,dw-dp.yaml
F: drivers/gpu/drm/bridge/synopsys/dw-dp.c
F: include/drm/bridge/dw_dp.h
DRM DRIVER FOR TI DLPC3433 MIPI DSI TO DMD BRIDGE
M: Jagan Teki <jagan@amarulasolutions.com>
S: Maintained
@@ -20869,7 +20911,7 @@ F: include/dt-bindings/clock/qcom,*
QUALCOMM CLOUD AI (QAIC) DRIVER
M: Jeff Hugo <jeff.hugo@oss.qualcomm.com>
R: Carl Vanderlip <quic_carlv@quicinc.com>
R: Carl Vanderlip <carl.vanderlip@oss.qualcomm.com>
L: linux-arm-msm@vger.kernel.org
L: dri-devel@lists.freedesktop.org
S: Supported

View File

@@ -6,7 +6,7 @@
struct device;
#if defined(CONFIG_STI_CORE)
#if defined(CONFIG_STI_CORE) && defined(CONFIG_VIDEO)
bool video_is_primary_device(struct device *dev);
#define video_is_primary_device video_is_primary_device
#endif

View File

@@ -19,8 +19,10 @@ static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
#define pgprot_framebuffer pgprot_framebuffer
#endif
#ifdef CONFIG_VIDEO
bool video_is_primary_device(struct device *dev);
#define video_is_primary_device video_is_primary_device
#endif
static inline void fb_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n)
{

View File

@@ -13,8 +13,10 @@ pgprot_t pgprot_framebuffer(pgprot_t prot,
unsigned long offset);
#define pgprot_framebuffer pgprot_framebuffer
#ifdef CONFIG_VIDEO
bool video_is_primary_device(struct device *dev);
#define video_is_primary_device video_is_primary_device
#endif
#include <asm-generic/video.h>

View File

@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/screen_info.h>
#include <linux/vgaarb.h>
#include <asm/video.h>
@@ -27,6 +28,11 @@ EXPORT_SYMBOL(pgprot_framebuffer);
bool video_is_primary_device(struct device *dev)
{
#ifdef CONFIG_SCREEN_INFO
struct screen_info *si = &screen_info;
struct resource res[SCREEN_INFO_MAX_RESOURCES];
ssize_t i, numres;
#endif
struct pci_dev *pdev;
if (!dev_is_pci(dev))
@@ -34,7 +40,24 @@ bool video_is_primary_device(struct device *dev)
pdev = to_pci_dev(dev);
return (pdev == vga_default_device());
if (!pci_is_display(pdev))
return false;
if (pdev == vga_default_device())
return true;
#ifdef CONFIG_SCREEN_INFO
numres = screen_info_resources(si, res, ARRAY_SIZE(res));
for (i = 0; i < numres; ++i) {
if (!(res[i].flags & IORESOURCE_MEM))
continue;
if (pci_find_resource(pdev, &res[i]))
return true;
}
#endif
return false;
}
EXPORT_SYMBOL(video_is_primary_device);

View File

@@ -28,5 +28,6 @@ source "drivers/accel/amdxdna/Kconfig"
source "drivers/accel/habanalabs/Kconfig"
source "drivers/accel/ivpu/Kconfig"
source "drivers/accel/qaic/Kconfig"
source "drivers/accel/rocket/Kconfig"
endif

View File

@@ -4,3 +4,4 @@ obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/
obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/
obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/
obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/
obj-$(CONFIG_DRM_ACCEL_ROCKET) += rocket/

View File

@@ -15,6 +15,7 @@ amdxdna-y := \
amdxdna_mailbox_helper.o \
amdxdna_pci_drv.o \
amdxdna_sysfs.o \
amdxdna_ubuf.o \
npu1_regs.o \
npu2_regs.o \
npu4_regs.o \

View File

@@ -46,6 +46,17 @@ static void aie2_job_put(struct amdxdna_sched_job *job)
kref_put(&job->refcnt, aie2_job_release);
}
static void aie2_hwctx_status_shift_stop(struct amdxdna_hwctx *hwctx)
{
hwctx->old_status = hwctx->status;
hwctx->status = HWCTX_STAT_STOP;
}
static void aie2_hwctx_status_restore(struct amdxdna_hwctx *hwctx)
{
hwctx->status = hwctx->old_status;
}
/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
struct drm_sched_job *bad_job)
@@ -89,25 +100,6 @@ out:
return ret;
}
void aie2_restart_ctx(struct amdxdna_client *client)
{
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_hwctx *hwctx;
unsigned long hwctx_id;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
mutex_lock(&client->hwctx_lock);
amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
if (hwctx->status != HWCTX_STAT_STOP)
continue;
hwctx->status = hwctx->old_status;
XDNA_DBG(xdna, "Resetting %s", hwctx->name);
aie2_hwctx_restart(xdna, hwctx);
}
mutex_unlock(&client->hwctx_lock);
}
static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
{
struct dma_fence *fence, *out_fence = NULL;
@@ -141,34 +133,49 @@ static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
dma_fence_put(fence);
}
void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx)
static int aie2_hwctx_suspend_cb(struct amdxdna_hwctx *hwctx, void *arg)
{
struct amdxdna_dev *xdna = hwctx->client->xdna;
aie2_hwctx_wait_for_idle(hwctx);
aie2_hwctx_stop(xdna, hwctx, NULL);
aie2_hwctx_status_shift_stop(hwctx);
return 0;
}
void aie2_hwctx_suspend(struct amdxdna_client *client)
{
struct amdxdna_dev *xdna = client->xdna;
/*
* Command timeout is unlikely. But if it happens, it doesn't
* break the system. aie2_hwctx_stop() will destroy mailbox
* and abort all commands.
*/
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
aie2_hwctx_wait_for_idle(hwctx);
aie2_hwctx_stop(xdna, hwctx, NULL);
hwctx->old_status = hwctx->status;
hwctx->status = HWCTX_STAT_STOP;
amdxdna_hwctx_walk(client, NULL, aie2_hwctx_suspend_cb);
}
void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx)
static int aie2_hwctx_resume_cb(struct amdxdna_hwctx *hwctx, void *arg)
{
struct amdxdna_dev *xdna = hwctx->client->xdna;
aie2_hwctx_status_restore(hwctx);
return aie2_hwctx_restart(xdna, hwctx);
}
int aie2_hwctx_resume(struct amdxdna_client *client)
{
struct amdxdna_dev *xdna = client->xdna;
/*
* The resume path cannot guarantee that mailbox channel can be
* regenerated. If this happen, when submit message to this
* mailbox channel, error will return.
*/
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
hwctx->status = hwctx->old_status;
aie2_hwctx_restart(xdna, hwctx);
return amdxdna_hwctx_walk(client, NULL, aie2_hwctx_resume_cb);
}
static void
@@ -192,7 +199,7 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
{
struct amdxdna_sched_job *job = handle;
struct amdxdna_gem_obj *cmd_abo;
u32 ret = 0;
int ret = 0;
u32 status;
cmd_abo = job->cmd_bo;
@@ -222,7 +229,7 @@ static int
aie2_sched_nocmd_resp_handler(void *handle, void __iomem *data, size_t size)
{
struct amdxdna_sched_job *job = handle;
u32 ret = 0;
int ret = 0;
u32 status;
if (unlikely(!data))
@@ -250,7 +257,7 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
u32 fail_cmd_status;
u32 fail_cmd_idx;
u32 cmd_status;
u32 ret = 0;
int ret = 0;
cmd_abo = job->cmd_bo;
if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {

View File

@@ -290,18 +290,25 @@ int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u6
return 0;
}
static int amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx, void *arg)
{
u32 *bitmap = arg;
*bitmap |= GENMASK(hwctx->start_col + hwctx->num_col - 1, hwctx->start_col);
return 0;
}
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
u32 size, u32 *cols_filled)
{
DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS);
struct amdxdna_dev *xdna = ndev->xdna;
struct amdxdna_client *client;
struct amdxdna_hwctx *hwctx;
unsigned long hwctx_id;
dma_addr_t dma_addr;
u32 aie_bitmap = 0;
u8 *buff_addr;
int ret, idx;
int ret;
buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
DMA_FROM_DEVICE, GFP_KERNEL);
@@ -309,12 +316,8 @@ int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
return -ENOMEM;
/* Go through each hardware context and mark the AIE columns that are active */
list_for_each_entry(client, &xdna->client_list, node) {
idx = srcu_read_lock(&client->hwctx_srcu);
amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
aie_bitmap |= amdxdna_hwctx_col_map(hwctx);
srcu_read_unlock(&client->hwctx_srcu, idx);
}
list_for_each_entry(client, &xdna->client_list, node)
amdxdna_hwctx_walk(client, &aie_bitmap, amdxdna_hwctx_col_map);
*cols_filled = 0;
req.dump_buff_addr = dma_addr;

View File

@@ -10,6 +10,7 @@
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
#include <drm/gpu_scheduler.h>
#include <linux/cleanup.h>
#include <linux/errno.h>
#include <linux/firmware.h>
#include <linux/iommu.h>
@@ -440,6 +441,40 @@ disable_dev:
return ret;
}
static int aie2_hw_suspend(struct amdxdna_dev *xdna)
{
struct amdxdna_client *client;
guard(mutex)(&xdna->dev_lock);
list_for_each_entry(client, &xdna->client_list, node)
aie2_hwctx_suspend(client);
aie2_hw_stop(xdna);
return 0;
}
static int aie2_hw_resume(struct amdxdna_dev *xdna)
{
struct amdxdna_client *client;
int ret;
guard(mutex)(&xdna->dev_lock);
ret = aie2_hw_start(xdna);
if (ret) {
XDNA_ERR(xdna, "Start hardware failed, %d", ret);
return ret;
}
list_for_each_entry(client, &xdna->client_list, node) {
ret = aie2_hwctx_resume(client);
if (ret)
break;
}
return ret;
}
static int aie2_init(struct amdxdna_dev *xdna)
{
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
@@ -520,14 +555,14 @@ static int aie2_init(struct amdxdna_dev *xdna)
if (!ndev->psp_hdl) {
XDNA_ERR(xdna, "failed to create psp");
ret = -ENOMEM;
goto free_irq;
goto release_fw;
}
xdna->dev_handle = ndev;
ret = aie2_hw_start(xdna);
if (ret) {
XDNA_ERR(xdna, "start npu failed, ret %d", ret);
goto free_irq;
goto release_fw;
}
ret = aie2_mgmt_fw_query(ndev);
@@ -578,8 +613,6 @@ async_event_free:
aie2_error_async_events_free(ndev);
stop_hw:
aie2_hw_stop(xdna);
free_irq:
pci_free_irq_vectors(pdev);
release_fw:
release_firmware(fw);
@@ -588,12 +621,10 @@ release_fw:
static void aie2_fini(struct amdxdna_dev *xdna)
{
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
aie2_hw_stop(xdna);
aie2_error_async_events_free(ndev);
pci_free_irq_vectors(pdev);
}
static int aie2_get_aie_status(struct amdxdna_client *client,
@@ -752,65 +783,68 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
return ret;
}
static int aie2_get_hwctx_status(struct amdxdna_client *client,
struct amdxdna_drm_get_info *args)
static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg)
{
struct amdxdna_drm_query_hwctx __user *buf;
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_drm_query_hwctx *tmp;
struct amdxdna_client *tmp_client;
struct amdxdna_hwctx *hwctx;
unsigned long hwctx_id;
bool overflow = false;
u32 req_bytes = 0;
u32 hw_i = 0;
int ret = 0;
int idx;
struct amdxdna_drm_hwctx_entry *tmp __free(kfree) = NULL;
struct amdxdna_drm_get_array *array_args = arg;
struct amdxdna_drm_hwctx_entry __user *buf;
u32 size;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
if (!array_args->num_element)
return -EINVAL;
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
if (!tmp)
return -ENOMEM;
buf = u64_to_user_ptr(args->buffer);
tmp->pid = hwctx->client->pid;
tmp->context_id = hwctx->id;
tmp->start_col = hwctx->start_col;
tmp->num_col = hwctx->num_col;
tmp->command_submissions = hwctx->priv->seq;
tmp->command_completions = hwctx->priv->completed;
tmp->pasid = hwctx->client->pasid;
tmp->priority = hwctx->qos.priority;
tmp->gops = hwctx->qos.gops;
tmp->fps = hwctx->qos.fps;
tmp->dma_bandwidth = hwctx->qos.dma_bandwidth;
tmp->latency = hwctx->qos.latency;
tmp->frame_exec_time = hwctx->qos.frame_exec_time;
tmp->state = AMDXDNA_HWCTX_STATE_ACTIVE;
buf = u64_to_user_ptr(array_args->buffer);
size = min(sizeof(*tmp), array_args->element_size);
if (copy_to_user(buf, tmp, size))
return -EFAULT;
array_args->buffer += size;
array_args->num_element--;
return 0;
}
static int aie2_get_hwctx_status(struct amdxdna_client *client,
struct amdxdna_drm_get_info *args)
{
struct amdxdna_drm_get_array array_args;
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_client *tmp_client;
int ret;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
array_args.element_size = sizeof(struct amdxdna_drm_query_hwctx);
array_args.buffer = args->buffer;
array_args.num_element = args->buffer_size / array_args.element_size;
list_for_each_entry(tmp_client, &xdna->client_list, node) {
idx = srcu_read_lock(&tmp_client->hwctx_srcu);
amdxdna_for_each_hwctx(tmp_client, hwctx_id, hwctx) {
req_bytes += sizeof(*tmp);
if (args->buffer_size < req_bytes) {
/* Continue iterating to get the required size */
overflow = true;
continue;
}
memset(tmp, 0, sizeof(*tmp));
tmp->pid = tmp_client->pid;
tmp->context_id = hwctx->id;
tmp->start_col = hwctx->start_col;
tmp->num_col = hwctx->num_col;
tmp->command_submissions = hwctx->priv->seq;
tmp->command_completions = hwctx->priv->completed;
if (copy_to_user(&buf[hw_i], tmp, sizeof(*tmp))) {
ret = -EFAULT;
srcu_read_unlock(&tmp_client->hwctx_srcu, idx);
goto out;
}
hw_i++;
}
srcu_read_unlock(&tmp_client->hwctx_srcu, idx);
ret = amdxdna_hwctx_walk(tmp_client, &array_args,
aie2_hwctx_status_cb);
if (ret)
break;
}
if (overflow) {
XDNA_ERR(xdna, "Invalid buffer size. Given: %u Need: %u.",
args->buffer_size, req_bytes);
ret = -EINVAL;
}
out:
kfree(tmp);
args->buffer_size = req_bytes;
args->buffer_size -= (u32)(array_args.buffer - args->buffer);
return ret;
}
@@ -854,6 +888,58 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
return ret;
}
static int aie2_query_ctx_status_array(struct amdxdna_client *client,
struct amdxdna_drm_get_array *args)
{
struct amdxdna_drm_get_array array_args;
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_client *tmp_client;
int ret;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
array_args.element_size = min(args->element_size,
sizeof(struct amdxdna_drm_hwctx_entry));
array_args.buffer = args->buffer;
array_args.num_element = args->num_element * args->element_size /
array_args.element_size;
list_for_each_entry(tmp_client, &xdna->client_list, node) {
ret = amdxdna_hwctx_walk(tmp_client, &array_args,
aie2_hwctx_status_cb);
if (ret)
break;
}
args->element_size = array_args.element_size;
args->num_element = (u32)((array_args.buffer - args->buffer) /
args->element_size);
return ret;
}
static int aie2_get_array(struct amdxdna_client *client,
struct amdxdna_drm_get_array *args)
{
struct amdxdna_dev *xdna = client->xdna;
int ret, idx;
if (!drm_dev_enter(&xdna->ddev, &idx))
return -ENODEV;
switch (args->param) {
case DRM_AMDXDNA_HW_CONTEXT_ALL:
ret = aie2_query_ctx_status_array(client, args);
break;
default:
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
ret = -EOPNOTSUPP;
}
XDNA_DBG(xdna, "Got param %d", args->param);
drm_dev_exit(idx);
return ret;
}
static int aie2_set_power_mode(struct amdxdna_client *client,
struct amdxdna_drm_set_state *args)
{
@@ -903,17 +989,16 @@ static int aie2_set_state(struct amdxdna_client *client,
}
const struct amdxdna_dev_ops aie2_ops = {
.init = aie2_init,
.fini = aie2_fini,
.resume = aie2_hw_start,
.suspend = aie2_hw_stop,
.get_aie_info = aie2_get_info,
.set_aie_state = aie2_set_state,
.hwctx_init = aie2_hwctx_init,
.hwctx_fini = aie2_hwctx_fini,
.hwctx_config = aie2_hwctx_config,
.cmd_submit = aie2_cmd_submit,
.init = aie2_init,
.fini = aie2_fini,
.resume = aie2_hw_resume,
.suspend = aie2_hw_suspend,
.get_aie_info = aie2_get_info,
.set_aie_state = aie2_set_state,
.hwctx_init = aie2_hwctx_init,
.hwctx_fini = aie2_hwctx_fini,
.hwctx_config = aie2_hwctx_config,
.cmd_submit = aie2_cmd_submit,
.hmm_invalidate = aie2_hmm_invalidate,
.hwctx_suspend = aie2_hwctx_suspend,
.hwctx_resume = aie2_hwctx_resume,
.get_array = aie2_get_array,
};

View File

@@ -288,10 +288,9 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx);
void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx);
void aie2_hwctx_suspend(struct amdxdna_client *client);
int aie2_hwctx_resume(struct amdxdna_client *client);
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
void aie2_restart_ctx(struct amdxdna_client *client);
#endif /* _AIE2_PCI_H_ */

View File

@@ -60,32 +60,6 @@ static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx)
return &fence->base;
}
void amdxdna_hwctx_suspend(struct amdxdna_client *client)
{
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_hwctx *hwctx;
unsigned long hwctx_id;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
mutex_lock(&client->hwctx_lock);
amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
xdna->dev_info->ops->hwctx_suspend(hwctx);
mutex_unlock(&client->hwctx_lock);
}
void amdxdna_hwctx_resume(struct amdxdna_client *client)
{
struct amdxdna_dev *xdna = client->xdna;
struct amdxdna_hwctx *hwctx;
unsigned long hwctx_id;
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
mutex_lock(&client->hwctx_lock);
amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
xdna->dev_info->ops->hwctx_resume(hwctx);
mutex_unlock(&client->hwctx_lock);
}
static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx,
struct srcu_struct *ss)
{
@@ -94,14 +68,30 @@ static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx,
synchronize_srcu(ss);
/* At this point, user is not able to submit new commands */
mutex_lock(&xdna->dev_lock);
xdna->dev_info->ops->hwctx_fini(hwctx);
mutex_unlock(&xdna->dev_lock);
kfree(hwctx->name);
kfree(hwctx);
}
int amdxdna_hwctx_walk(struct amdxdna_client *client, void *arg,
int (*walk)(struct amdxdna_hwctx *hwctx, void *arg))
{
struct amdxdna_hwctx *hwctx;
unsigned long hwctx_id;
int ret = 0, idx;
idx = srcu_read_lock(&client->hwctx_srcu);
amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
ret = walk(hwctx, arg);
if (ret)
break;
}
srcu_read_unlock(&client->hwctx_srcu, idx);
return ret;
}
void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
{
struct amdxdna_cmd *cmd = abo->mem.kva;
@@ -152,16 +142,12 @@ void amdxdna_hwctx_remove_all(struct amdxdna_client *client)
struct amdxdna_hwctx *hwctx;
unsigned long hwctx_id;
mutex_lock(&client->hwctx_lock);
amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
XDNA_DBG(client->xdna, "PID %d close HW context %d",
client->pid, hwctx->id);
xa_erase(&client->hwctx_xa, hwctx->id);
mutex_unlock(&client->hwctx_lock);
amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
mutex_lock(&client->hwctx_lock);
}
mutex_unlock(&client->hwctx_lock);
}
int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
@@ -251,6 +237,7 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d
if (!drm_dev_enter(dev, &idx))
return -ENODEV;
mutex_lock(&xdna->dev_lock);
hwctx = xa_erase(&client->hwctx_xa, args->handle);
if (!hwctx) {
ret = -EINVAL;
@@ -267,6 +254,7 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d
XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle);
out:
mutex_unlock(&xdna->dev_lock);
drm_dev_exit(idx);
return ret;
}

View File

@@ -139,16 +139,10 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
static inline u32 amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx)
{
return GENMASK(hwctx->start_col + hwctx->num_col - 1,
hwctx->start_col);
}
void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
void amdxdna_hwctx_suspend(struct amdxdna_client *client);
void amdxdna_hwctx_resume(struct amdxdna_client *client);
int amdxdna_hwctx_walk(struct amdxdna_client *client, void *arg,
int (*walk)(struct amdxdna_hwctx *hwctx, void *arg));
int amdxdna_cmd_submit(struct amdxdna_client *client,
u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt,

View File

@@ -18,6 +18,7 @@
#include "amdxdna_ctx.h"
#include "amdxdna_gem.h"
#include "amdxdna_pci_drv.h"
#include "amdxdna_ubuf.h"
#define XDNA_MAX_CMD_BO_SIZE SZ_32K
@@ -296,7 +297,7 @@ static int amdxdna_insert_pages(struct amdxdna_gem_obj *abo,
vma->vm_private_data = NULL;
vma->vm_ops = NULL;
ret = dma_buf_mmap(to_gobj(abo)->dma_buf, vma, 0);
ret = dma_buf_mmap(abo->dma_buf, vma, 0);
if (ret) {
XDNA_ERR(xdna, "Failed to mmap dma buf %d", ret);
return ret;
@@ -391,10 +392,47 @@ static const struct dma_buf_ops amdxdna_dmabuf_ops = {
.vunmap = drm_gem_dmabuf_vunmap,
};
static int amdxdna_gem_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map)
{
struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
iosys_map_clear(map);
dma_resv_assert_held(obj->resv);
if (is_import_bo(abo))
dma_buf_vmap(abo->dma_buf, map);
else
drm_gem_shmem_object_vmap(obj, map);
if (!map->vaddr)
return -ENOMEM;
return 0;
}
static void amdxdna_gem_obj_vunmap(struct drm_gem_object *obj, struct iosys_map *map)
{
struct amdxdna_gem_obj *abo = to_xdna_obj(obj);
dma_resv_assert_held(obj->resv);
if (is_import_bo(abo))
dma_buf_vunmap(abo->dma_buf, map);
else
drm_gem_shmem_object_vunmap(obj, map);
}
static struct dma_buf *amdxdna_gem_prime_export(struct drm_gem_object *gobj, int flags)
{
struct amdxdna_gem_obj *abo = to_xdna_obj(gobj);
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
if (abo->dma_buf) {
get_dma_buf(abo->dma_buf);
return abo->dma_buf;
}
exp_info.ops = &amdxdna_dmabuf_ops;
exp_info.size = gobj->size;
exp_info.flags = flags;
@@ -451,8 +489,8 @@ static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = {
.pin = drm_gem_shmem_object_pin,
.unpin = drm_gem_shmem_object_unpin,
.get_sg_table = drm_gem_shmem_object_get_sg_table,
.vmap = drm_gem_shmem_object_vmap,
.vunmap = drm_gem_shmem_object_vunmap,
.vmap = amdxdna_gem_obj_vmap,
.vunmap = amdxdna_gem_obj_vunmap,
.mmap = amdxdna_gem_obj_mmap,
.vm_ops = &drm_gem_shmem_vm_ops,
.export = amdxdna_gem_prime_export,
@@ -494,6 +532,68 @@ amdxdna_gem_create_object_cb(struct drm_device *dev, size_t size)
return to_gobj(abo);
}
static struct amdxdna_gem_obj *
amdxdna_gem_create_shmem_object(struct drm_device *dev, size_t size)
{
struct drm_gem_shmem_object *shmem = drm_gem_shmem_create(dev, size);
if (IS_ERR(shmem))
return ERR_CAST(shmem);
shmem->map_wc = false;
return to_xdna_obj(&shmem->base);
}
static struct amdxdna_gem_obj *
amdxdna_gem_create_ubuf_object(struct drm_device *dev, struct amdxdna_drm_create_bo *args)
{
struct amdxdna_dev *xdna = to_xdna_dev(dev);
enum amdxdna_ubuf_flag flags = 0;
struct amdxdna_drm_va_tbl va_tbl;
struct drm_gem_object *gobj;
struct dma_buf *dma_buf;
if (copy_from_user(&va_tbl, u64_to_user_ptr(args->vaddr), sizeof(va_tbl))) {
XDNA_DBG(xdna, "Access va table failed");
return ERR_PTR(-EINVAL);
}
if (va_tbl.num_entries) {
if (args->type == AMDXDNA_BO_CMD)
flags |= AMDXDNA_UBUF_FLAG_MAP_DMA;
dma_buf = amdxdna_get_ubuf(dev, flags, va_tbl.num_entries,
u64_to_user_ptr(args->vaddr + sizeof(va_tbl)));
} else {
dma_buf = dma_buf_get(va_tbl.dmabuf_fd);
}
if (IS_ERR(dma_buf))
return ERR_CAST(dma_buf);
gobj = amdxdna_gem_prime_import(dev, dma_buf);
if (IS_ERR(gobj)) {
dma_buf_put(dma_buf);
return ERR_CAST(gobj);
}
dma_buf_put(dma_buf);
return to_xdna_obj(gobj);
}
static struct amdxdna_gem_obj *
amdxdna_gem_create_object(struct drm_device *dev,
struct amdxdna_drm_create_bo *args)
{
size_t aligned_sz = PAGE_ALIGN(args->size);
if (args->vaddr)
return amdxdna_gem_create_ubuf_object(dev, args);
return amdxdna_gem_create_shmem_object(dev, aligned_sz);
}
struct drm_gem_object *
amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
{
@@ -545,16 +645,12 @@ amdxdna_drm_alloc_shmem(struct drm_device *dev,
struct drm_file *filp)
{
struct amdxdna_client *client = filp->driver_priv;
struct drm_gem_shmem_object *shmem;
struct amdxdna_gem_obj *abo;
shmem = drm_gem_shmem_create(dev, args->size);
if (IS_ERR(shmem))
return ERR_CAST(shmem);
abo = amdxdna_gem_create_object(dev, args);
if (IS_ERR(abo))
return ERR_CAST(abo);
shmem->map_wc = false;
abo = to_xdna_obj(&shmem->base);
abo->client = client;
abo->type = AMDXDNA_BO_SHMEM;
@@ -569,7 +665,6 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev,
struct amdxdna_client *client = filp->driver_priv;
struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct drm_gem_shmem_object *shmem;
struct amdxdna_gem_obj *abo;
int ret;
@@ -586,14 +681,12 @@ amdxdna_drm_create_dev_heap(struct drm_device *dev,
goto mm_unlock;
}
shmem = drm_gem_shmem_create(dev, args->size);
if (IS_ERR(shmem)) {
ret = PTR_ERR(shmem);
abo = amdxdna_gem_create_object(dev, args);
if (IS_ERR(abo)) {
ret = PTR_ERR(abo);
goto mm_unlock;
}
shmem->map_wc = false;
abo = to_xdna_obj(&shmem->base);
abo->type = AMDXDNA_BO_DEV_HEAP;
abo->client = client;
abo->mem.dev_addr = client->xdna->dev_info->dev_mem_base;
@@ -657,7 +750,6 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
{
struct iosys_map map = IOSYS_MAP_INIT_VADDR(NULL);
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct drm_gem_shmem_object *shmem;
struct amdxdna_gem_obj *abo;
int ret;
@@ -671,12 +763,9 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
return ERR_PTR(-EINVAL);
}
shmem = drm_gem_shmem_create(dev, args->size);
if (IS_ERR(shmem))
return ERR_CAST(shmem);
shmem->map_wc = false;
abo = to_xdna_obj(&shmem->base);
abo = amdxdna_gem_create_object(dev, args);
if (IS_ERR(abo))
return ERR_CAST(abo);
abo->type = AMDXDNA_BO_CMD;
abo->client = filp->driver_priv;
@@ -691,7 +780,7 @@ amdxdna_drm_create_cmd_bo(struct drm_device *dev,
return abo;
release_obj:
drm_gem_shmem_free(shmem);
drm_gem_object_put(to_gobj(abo));
return ERR_PTR(ret);
}
@@ -702,7 +791,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f
struct amdxdna_gem_obj *abo;
int ret;
if (args->flags || args->vaddr || !args->size)
if (args->flags)
return -EINVAL;
XDNA_DBG(xdna, "BO arg type %d vaddr 0x%llx size 0x%llx flags 0x%llx",

View File

@@ -26,6 +26,13 @@ MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin");
MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin");
MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
/*
* 0.0: Initial version
* 0.1: Support getting all hardware contexts by DRM_IOCTL_AMDXDNA_GET_ARRAY
*/
#define AMDXDNA_DRIVER_MAJOR 0
#define AMDXDNA_DRIVER_MINOR 1
/*
* Bind the driver base on (vendor_id, device_id) pair and later use the
* (device_id, rev_id) pair as a key to select the devices. The devices with
@@ -81,7 +88,6 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
ret = -ENODEV;
goto unbind_sva;
}
mutex_init(&client->hwctx_lock);
init_srcu_struct(&client->hwctx_srcu);
xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC);
mutex_init(&client->mm_lock);
@@ -116,7 +122,6 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
xa_destroy(&client->hwctx_xa);
cleanup_srcu_struct(&client->hwctx_srcu);
mutex_destroy(&client->hwctx_lock);
mutex_destroy(&client->mm_lock);
if (client->dev_heap)
drm_gem_object_put(to_gobj(client->dev_heap));
@@ -142,8 +147,8 @@ static int amdxdna_flush(struct file *f, fl_owner_t id)
mutex_lock(&xdna->dev_lock);
list_del_init(&client->node);
mutex_unlock(&xdna->dev_lock);
amdxdna_hwctx_remove_all(client);
mutex_unlock(&xdna->dev_lock);
drm_dev_exit(idx);
return 0;
@@ -166,6 +171,23 @@ static int amdxdna_drm_get_info_ioctl(struct drm_device *dev, void *data, struct
return ret;
}
static int amdxdna_drm_get_array_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
struct amdxdna_client *client = filp->driver_priv;
struct amdxdna_dev *xdna = to_xdna_dev(dev);
struct amdxdna_drm_get_array *args = data;
if (!xdna->dev_info->ops->get_array)
return -EOPNOTSUPP;
if (args->pad || !args->num_element || !args->element_size)
return -EINVAL;
guard(mutex)(&xdna->dev_lock);
return xdna->dev_info->ops->get_array(client, args);
}
static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdxdna_client *client = filp->driver_priv;
@@ -197,6 +219,7 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
/* AIE hardware */
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 0),
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_ARRAY, amdxdna_drm_get_array_ioctl, 0),
DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
};
@@ -220,6 +243,8 @@ const struct drm_driver amdxdna_drm_drv = {
.fops = &amdxdna_fops,
.name = "amdxdna_accel_driver",
.desc = "AMD XDNA DRM implementation",
.major = AMDXDNA_DRIVER_MAJOR,
.minor = AMDXDNA_DRIVER_MINOR,
.open = amdxdna_drm_open,
.postclose = amdxdna_drm_close,
.ioctls = amdxdna_drm_ioctls,
@@ -330,11 +355,8 @@ static void amdxdna_remove(struct pci_dev *pdev)
struct amdxdna_client, node);
while (client) {
list_del_init(&client->node);
mutex_unlock(&xdna->dev_lock);
amdxdna_hwctx_remove_all(client);
mutex_lock(&xdna->dev_lock);
client = list_first_entry_or_null(&xdna->client_list,
struct amdxdna_client, node);
}
@@ -343,89 +365,29 @@ static void amdxdna_remove(struct pci_dev *pdev)
mutex_unlock(&xdna->dev_lock);
}
static int amdxdna_dev_suspend_nolock(struct amdxdna_dev *xdna)
{
if (xdna->dev_info->ops->suspend)
xdna->dev_info->ops->suspend(xdna);
return 0;
}
static int amdxdna_dev_resume_nolock(struct amdxdna_dev *xdna)
{
if (xdna->dev_info->ops->resume)
return xdna->dev_info->ops->resume(xdna);
return 0;
}
static int amdxdna_pmops_suspend(struct device *dev)
{
struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
struct amdxdna_client *client;
mutex_lock(&xdna->dev_lock);
list_for_each_entry(client, &xdna->client_list, node)
amdxdna_hwctx_suspend(client);
if (!xdna->dev_info->ops->suspend)
return -EOPNOTSUPP;
amdxdna_dev_suspend_nolock(xdna);
mutex_unlock(&xdna->dev_lock);
return 0;
return xdna->dev_info->ops->suspend(xdna);
}
static int amdxdna_pmops_resume(struct device *dev)
{
struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
struct amdxdna_client *client;
int ret;
XDNA_INFO(xdna, "firmware resuming...");
mutex_lock(&xdna->dev_lock);
ret = amdxdna_dev_resume_nolock(xdna);
if (ret) {
XDNA_ERR(xdna, "resume NPU firmware failed");
mutex_unlock(&xdna->dev_lock);
return ret;
}
if (!xdna->dev_info->ops->resume)
return -EOPNOTSUPP;
XDNA_INFO(xdna, "hardware context resuming...");
list_for_each_entry(client, &xdna->client_list, node)
amdxdna_hwctx_resume(client);
mutex_unlock(&xdna->dev_lock);
return 0;
}
static int amdxdna_rpmops_suspend(struct device *dev)
{
struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
int ret;
mutex_lock(&xdna->dev_lock);
ret = amdxdna_dev_suspend_nolock(xdna);
mutex_unlock(&xdna->dev_lock);
XDNA_DBG(xdna, "Runtime suspend done ret: %d", ret);
return ret;
}
static int amdxdna_rpmops_resume(struct device *dev)
{
struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
int ret;
mutex_lock(&xdna->dev_lock);
ret = amdxdna_dev_resume_nolock(xdna);
mutex_unlock(&xdna->dev_lock);
XDNA_DBG(xdna, "Runtime resume done ret: %d", ret);
return ret;
return xdna->dev_info->ops->resume(xdna);
}
static const struct dev_pm_ops amdxdna_pm_ops = {
SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
RUNTIME_PM_OPS(amdxdna_rpmops_suspend, amdxdna_rpmops_resume, NULL)
RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume, NULL)
};
static struct pci_driver amdxdna_pci_driver = {

View File

@@ -50,16 +50,15 @@ struct amdxdna_dev_ops {
int (*init)(struct amdxdna_dev *xdna);
void (*fini)(struct amdxdna_dev *xdna);
int (*resume)(struct amdxdna_dev *xdna);
void (*suspend)(struct amdxdna_dev *xdna);
int (*suspend)(struct amdxdna_dev *xdna);
int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
void (*hwctx_suspend)(struct amdxdna_hwctx *hwctx);
void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
int (*get_aie_info)(struct amdxdna_client *client, struct amdxdna_drm_get_info *args);
int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args);
int (*get_array)(struct amdxdna_client *client, struct amdxdna_drm_get_array *args);
};
/*
@@ -118,8 +117,6 @@ struct amdxdna_device_id {
struct amdxdna_client {
struct list_head node;
pid_t pid;
struct mutex hwctx_lock; /* protect hwctx */
/* do NOT wait this srcu when hwctx_lock is held */
struct srcu_struct hwctx_srcu;
struct xarray hwctx_xa;
u32 next_hwctxid;

View File

@@ -0,0 +1,232 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2025, Advanced Micro Devices, Inc.
*/
#include <drm/amdxdna_accel.h>
#include <drm/drm_device.h>
#include <drm/drm_print.h>
#include <linux/dma-buf.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
#include "amdxdna_pci_drv.h"
#include "amdxdna_ubuf.h"
struct amdxdna_ubuf_priv {
struct page **pages;
u64 nr_pages;
enum amdxdna_ubuf_flag flags;
struct mm_struct *mm;
};
static struct sg_table *amdxdna_ubuf_map(struct dma_buf_attachment *attach,
enum dma_data_direction direction)
{
struct amdxdna_ubuf_priv *ubuf = attach->dmabuf->priv;
struct sg_table *sg;
int ret;
sg = kzalloc(sizeof(*sg), GFP_KERNEL);
if (!sg)
return ERR_PTR(-ENOMEM);
ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->nr_pages, 0,
ubuf->nr_pages << PAGE_SHIFT, GFP_KERNEL);
if (ret)
return ERR_PTR(ret);
if (ubuf->flags & AMDXDNA_UBUF_FLAG_MAP_DMA) {
ret = dma_map_sgtable(attach->dev, sg, direction, 0);
if (ret)
return ERR_PTR(ret);
}
return sg;
}
static void amdxdna_ubuf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sg,
enum dma_data_direction direction)
{
struct amdxdna_ubuf_priv *ubuf = attach->dmabuf->priv;
if (ubuf->flags & AMDXDNA_UBUF_FLAG_MAP_DMA)
dma_unmap_sgtable(attach->dev, sg, direction, 0);
sg_free_table(sg);
kfree(sg);
}
static void amdxdna_ubuf_release(struct dma_buf *dbuf)
{
struct amdxdna_ubuf_priv *ubuf = dbuf->priv;
unpin_user_pages(ubuf->pages, ubuf->nr_pages);
kvfree(ubuf->pages);
atomic64_sub(ubuf->nr_pages, &ubuf->mm->pinned_vm);
mmdrop(ubuf->mm);
kfree(ubuf);
}
static vm_fault_t amdxdna_ubuf_vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct amdxdna_ubuf_priv *ubuf;
unsigned long pfn;
pgoff_t pgoff;
ubuf = vma->vm_private_data;
pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
pfn = page_to_pfn(ubuf->pages[pgoff]);
return vmf_insert_pfn(vma, vmf->address, pfn);
}
static const struct vm_operations_struct amdxdna_ubuf_vm_ops = {
.fault = amdxdna_ubuf_vm_fault,
};
static int amdxdna_ubuf_mmap(struct dma_buf *dbuf, struct vm_area_struct *vma)
{
struct amdxdna_ubuf_priv *ubuf = dbuf->priv;
vma->vm_ops = &amdxdna_ubuf_vm_ops;
vma->vm_private_data = ubuf;
vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
return 0;
}
static int amdxdna_ubuf_vmap(struct dma_buf *dbuf, struct iosys_map *map)
{
struct amdxdna_ubuf_priv *ubuf = dbuf->priv;
void *kva;
kva = vmap(ubuf->pages, ubuf->nr_pages, VM_MAP, PAGE_KERNEL);
if (!kva)
return -EINVAL;
iosys_map_set_vaddr(map, kva);
return 0;
}
static void amdxdna_ubuf_vunmap(struct dma_buf *dbuf, struct iosys_map *map)
{
vunmap(map->vaddr);
}
static const struct dma_buf_ops amdxdna_ubuf_dmabuf_ops = {
.map_dma_buf = amdxdna_ubuf_map,
.unmap_dma_buf = amdxdna_ubuf_unmap,
.release = amdxdna_ubuf_release,
.mmap = amdxdna_ubuf_mmap,
.vmap = amdxdna_ubuf_vmap,
.vunmap = amdxdna_ubuf_vunmap,
};
struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev,
enum amdxdna_ubuf_flag flags,
u32 num_entries, void __user *va_entries)
{
struct amdxdna_dev *xdna = to_xdna_dev(dev);
unsigned long lock_limit, new_pinned;
struct amdxdna_drm_va_entry *va_ent;
struct amdxdna_ubuf_priv *ubuf;
u32 npages, start = 0;
struct dma_buf *dbuf;
int i, ret;
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
if (!can_do_mlock())
return ERR_PTR(-EPERM);
ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
if (!ubuf)
return ERR_PTR(-ENOMEM);
ubuf->flags = flags;
ubuf->mm = current->mm;
mmgrab(ubuf->mm);
va_ent = kvcalloc(num_entries, sizeof(*va_ent), GFP_KERNEL);
if (!va_ent) {
ret = -ENOMEM;
goto free_ubuf;
}
if (copy_from_user(va_ent, va_entries, sizeof(*va_ent) * num_entries)) {
XDNA_DBG(xdna, "Access va entries failed");
ret = -EINVAL;
goto free_ent;
}
for (i = 0, exp_info.size = 0; i < num_entries; i++) {
if (!IS_ALIGNED(va_ent[i].vaddr, PAGE_SIZE) ||
!IS_ALIGNED(va_ent[i].len, PAGE_SIZE)) {
XDNA_ERR(xdna, "Invalid address or len %llx, %llx",
va_ent[i].vaddr, va_ent[i].len);
ret = -EINVAL;
goto free_ent;
}
exp_info.size += va_ent[i].len;
}
ubuf->nr_pages = exp_info.size >> PAGE_SHIFT;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
new_pinned = atomic64_add_return(ubuf->nr_pages, &ubuf->mm->pinned_vm);
if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
XDNA_DBG(xdna, "New pin %ld, limit %ld, cap %d",
new_pinned, lock_limit, capable(CAP_IPC_LOCK));
ret = -ENOMEM;
goto sub_pin_cnt;
}
ubuf->pages = kvmalloc_array(ubuf->nr_pages, sizeof(*ubuf->pages), GFP_KERNEL);
if (!ubuf->pages) {
ret = -ENOMEM;
goto sub_pin_cnt;
}
for (i = 0; i < num_entries; i++) {
npages = va_ent[i].len >> PAGE_SHIFT;
ret = pin_user_pages_fast(va_ent[i].vaddr, npages,
FOLL_WRITE | FOLL_LONGTERM,
&ubuf->pages[start]);
if (ret < 0 || ret != npages) {
ret = -ENOMEM;
XDNA_ERR(xdna, "Failed to pin pages ret %d", ret);
goto destroy_pages;
}
start += ret;
}
exp_info.ops = &amdxdna_ubuf_dmabuf_ops;
exp_info.priv = ubuf;
exp_info.flags = O_RDWR | O_CLOEXEC;
dbuf = dma_buf_export(&exp_info);
if (IS_ERR(dbuf)) {
ret = PTR_ERR(dbuf);
goto destroy_pages;
}
kvfree(va_ent);
return dbuf;
destroy_pages:
if (start)
unpin_user_pages(ubuf->pages, start);
kvfree(ubuf->pages);
sub_pin_cnt:
atomic64_sub(ubuf->nr_pages, &ubuf->mm->pinned_vm);
free_ent:
kvfree(va_ent);
free_ubuf:
mmdrop(ubuf->mm);
kfree(ubuf);
return ERR_PTR(ret);
}

View File

@@ -0,0 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2025, Advanced Micro Devices, Inc.
*/
#ifndef _AMDXDNA_UBUF_H_
#define _AMDXDNA_UBUF_H_
#include <drm/drm_device.h>
#include <linux/dma-buf.h>
enum amdxdna_ubuf_flag {
AMDXDNA_UBUF_FLAG_MAP_DMA = 1,
};
struct dma_buf *amdxdna_get_ubuf(struct drm_device *dev,
enum amdxdna_ubuf_flag flags,
u32 num_entries, void __user *va_entries);
#endif /* _AMDXDNA_UBUF_H_ */

View File

@@ -27,3 +27,26 @@ config DRM_ACCEL_HABANALABS
To compile this driver as a module, choose M here: the
module will be called habanalabs.
if DRM_ACCEL_HABANALABS
config HL_HLDIO
bool "Habanalabs NVMe Direct I/O (HLDIO)"
depends on PCI_P2PDMA
depends on BLOCK
help
Enable NVMe peer-to-peer direct I/O support for Habanalabs AI
accelerators.
This allows direct data transfers between NVMe storage devices
and Habanalabs accelerators without involving system memory,
using PCI peer-to-peer DMA capabilities.
Requirements:
- CONFIG_PCI_P2PDMA=y
- NVMe device and Habanalabs accelerator under same PCI root complex
- IOMMU disabled or in passthrough mode
- Hardware supporting PCI P2P DMA
If unsure, say N
endif # DRM_ACCEL_HABANALABS

View File

@@ -13,3 +13,8 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/command_submission.o common/firmware_if.o \
common/security.o common/state_dump.o \
common/memory_mgr.o common/decoder.o
# Conditionally add HLDIO support
ifdef CONFIG_HL_HLDIO
HL_COMMON_FILES += common/hldio.o
endif

View File

@@ -6,6 +6,7 @@
*/
#include "habanalabs.h"
#include "hldio.h"
#include "../include/hw_ip/mmu/mmu_general.h"
#include <linux/pci.h>
@@ -602,6 +603,198 @@ static int engines_show(struct seq_file *s, void *data)
return 0;
}
#ifdef CONFIG_HL_HLDIO
/* DIO debugfs functions following the standard pattern */
static int dio_ssd2hl_show(struct seq_file *s, void *data)
{
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
if (!hdev->asic_prop.supports_nvme) {
seq_puts(s, "NVMe Direct I/O not supported\\n");
return 0;
}
seq_puts(s, "Usage: echo \"fd=N va=0xADDR off=N len=N\" > dio_ssd2hl\n");
seq_printf(s, "Last transfer: %zu bytes\\n", dev_entry->dio_stats.last_len_read);
seq_puts(s, "Note: All parameters must be page-aligned (4KB)\\n");
return 0;
}
static ssize_t dio_ssd2hl_write(struct file *file, const char __user *buf,
size_t count, loff_t *f_pos)
{
struct seq_file *s = file->private_data;
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
struct hl_ctx *ctx = hdev->kernel_ctx;
char kbuf[128];
u64 device_va = 0, off_bytes = 0, len_bytes = 0;
u32 fd = 0;
size_t len_read = 0;
int rc, parsed;
if (!hdev->asic_prop.supports_nvme)
return -EOPNOTSUPP;
if (count >= sizeof(kbuf))
return -EINVAL;
if (copy_from_user(kbuf, buf, count))
return -EFAULT;
kbuf[count] = 0;
/* Parse: fd=N va=0xADDR off=N len=N */
parsed = sscanf(kbuf, "fd=%u va=0x%llx off=%llu len=%llu",
&fd, &device_va, &off_bytes, &len_bytes);
if (parsed != 4) {
dev_err(hdev->dev, "Invalid format. Expected: fd=N va=0xADDR off=N len=N\\n");
return -EINVAL;
}
/* Validate file descriptor */
if (fd == 0) {
dev_err(hdev->dev, "Invalid file descriptor: %u\\n", fd);
return -EINVAL;
}
/* Validate alignment requirements */
if (!IS_ALIGNED(device_va, PAGE_SIZE) ||
!IS_ALIGNED(off_bytes, PAGE_SIZE) ||
!IS_ALIGNED(len_bytes, PAGE_SIZE)) {
dev_err(hdev->dev,
"All parameters must be page-aligned (4KB)\\n");
return -EINVAL;
}
/* Validate transfer size */
if (len_bytes == 0 || len_bytes > SZ_1G) {
dev_err(hdev->dev, "Invalid length: %llu (max 1GB)\\n",
len_bytes);
return -EINVAL;
}
dev_dbg(hdev->dev, "DIO SSD2HL: fd=%u va=0x%llx off=%llu len=%llu\\n",
fd, device_va, off_bytes, len_bytes);
rc = hl_dio_ssd2hl(hdev, ctx, fd, device_va, off_bytes, len_bytes, &len_read);
if (rc < 0) {
dev_entry->dio_stats.failed_ops++;
dev_err(hdev->dev, "SSD2HL operation failed: %d\\n", rc);
return rc;
}
/* Update statistics */
dev_entry->dio_stats.total_ops++;
dev_entry->dio_stats.successful_ops++;
dev_entry->dio_stats.bytes_transferred += len_read;
dev_entry->dio_stats.last_len_read = len_read;
dev_dbg(hdev->dev, "DIO SSD2HL completed: %zu bytes transferred\\n", len_read);
return count;
}
static int dio_hl2ssd_show(struct seq_file *s, void *data)
{
seq_puts(s, "HL2SSD (device-to-SSD) transfers not implemented\\n");
return 0;
}
static ssize_t dio_hl2ssd_write(struct file *file, const char __user *buf,
size_t count, loff_t *f_pos)
{
struct seq_file *s = file->private_data;
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
if (!hdev->asic_prop.supports_nvme)
return -EOPNOTSUPP;
dev_dbg(hdev->dev, "HL2SSD operation not implemented\\n");
return -EOPNOTSUPP;
}
static int dio_stats_show(struct seq_file *s, void *data)
{
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
struct hl_dio_stats *stats = &dev_entry->dio_stats;
u64 avg_bytes_per_op = 0, success_rate = 0;
if (!hdev->asic_prop.supports_nvme) {
seq_puts(s, "NVMe Direct I/O not supported\\n");
return 0;
}
if (stats->successful_ops > 0)
avg_bytes_per_op = stats->bytes_transferred / stats->successful_ops;
if (stats->total_ops > 0)
success_rate = (stats->successful_ops * 100) / stats->total_ops;
seq_puts(s, "=== Habanalabs Direct I/O Statistics ===\\n");
seq_printf(s, "Total operations: %llu\\n", stats->total_ops);
seq_printf(s, "Successful ops: %llu\\n", stats->successful_ops);
seq_printf(s, "Failed ops: %llu\\n", stats->failed_ops);
seq_printf(s, "Success rate: %llu%%\\n", success_rate);
seq_printf(s, "Total bytes: %llu\\n", stats->bytes_transferred);
seq_printf(s, "Avg bytes per op: %llu\\n", avg_bytes_per_op);
seq_printf(s, "Last transfer: %zu bytes\\n", stats->last_len_read);
return 0;
}
static int dio_reset_show(struct seq_file *s, void *data)
{
seq_puts(s, "Write '1' to reset DIO statistics\\n");
return 0;
}
static ssize_t dio_reset_write(struct file *file, const char __user *buf,
size_t count, loff_t *f_pos)
{
struct seq_file *s = file->private_data;
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
char kbuf[8];
unsigned long val;
int rc;
if (!hdev->asic_prop.supports_nvme)
return -EOPNOTSUPP;
if (count >= sizeof(kbuf))
return -EINVAL;
if (copy_from_user(kbuf, buf, count))
return -EFAULT;
kbuf[count] = 0;
rc = kstrtoul(kbuf, 0, &val);
if (rc)
return rc;
if (val == 1) {
memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));
dev_dbg(hdev->dev, "DIO statistics reset\\n");
} else {
dev_err(hdev->dev, "Write '1' to reset statistics\\n");
return -EINVAL;
}
return count;
}
#endif
static ssize_t hl_memory_scrub(struct file *f, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -788,6 +981,113 @@ static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val,
}
}
static void dump_cfg_access_entry(struct hl_device *hdev,
struct hl_debugfs_cfg_access_entry *entry)
{
char *access_type = "";
struct tm tm;
switch (entry->debugfs_type) {
case DEBUGFS_READ32:
access_type = "READ32 from";
break;
case DEBUGFS_WRITE32:
access_type = "WRITE32 to";
break;
case DEBUGFS_READ64:
access_type = "READ64 from";
break;
case DEBUGFS_WRITE64:
access_type = "WRITE64 to";
break;
default:
dev_err(hdev->dev, "Invalid DEBUGFS access type (%u)\n", entry->debugfs_type);
return;
}
time64_to_tm(entry->seconds_since_epoch, 0, &tm);
dev_info(hdev->dev,
"%ld-%02d-%02d %02d:%02d:%02d (UTC): %s %#llx\n", tm.tm_year + 1900, tm.tm_mon + 1,
tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, access_type, entry->addr);
}
void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev)
{
struct hl_debugfs_cfg_access *dbgfs = &hdev->debugfs_cfg_accesses;
u32 i, head, count = 0;
time64_t entry_time, now;
unsigned long flags;
now = ktime_get_real_seconds();
spin_lock_irqsave(&dbgfs->lock, flags);
head = dbgfs->head;
if (head == 0)
i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;
else
i = head - 1;
/* Walk back until timeout or invalid entry */
while (dbgfs->cfg_access_list[i].valid) {
entry_time = dbgfs->cfg_access_list[i].seconds_since_epoch;
/* Stop when entry is older than timeout */
if (now - entry_time > HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC)
break;
/* print single entry under lock */
{
struct hl_debugfs_cfg_access_entry entry = dbgfs->cfg_access_list[i];
/*
* We copy the entry out under lock and then print after
* releasing the lock to minimize time under lock.
*/
spin_unlock_irqrestore(&dbgfs->lock, flags);
dump_cfg_access_entry(hdev, &entry);
spin_lock_irqsave(&dbgfs->lock, flags);
}
/* mark consumed */
dbgfs->cfg_access_list[i].valid = false;
if (i == 0)
i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;
else
i--;
count++;
if (count >= HL_DBGFS_CFG_ACCESS_HIST_LEN)
break;
}
spin_unlock_irqrestore(&dbgfs->lock, flags);
}
static void check_if_cfg_access_and_log(struct hl_device *hdev, u64 addr, size_t access_size,
enum debugfs_access_type access_type)
{
struct hl_debugfs_cfg_access *dbgfs_cfg_accesses = &hdev->debugfs_cfg_accesses;
struct pci_mem_region *mem_reg = &hdev->pci_mem_region[PCI_REGION_CFG];
struct hl_debugfs_cfg_access_entry *new_entry;
unsigned long flags;
/* Check if address is in config memory */
if (addr >= mem_reg->region_base &&
mem_reg->region_size >= access_size &&
addr <= mem_reg->region_base + mem_reg->region_size - access_size) {
spin_lock_irqsave(&dbgfs_cfg_accesses->lock, flags);
new_entry = &dbgfs_cfg_accesses->cfg_access_list[dbgfs_cfg_accesses->head];
new_entry->seconds_since_epoch = ktime_get_real_seconds();
new_entry->addr = addr;
new_entry->debugfs_type = access_type;
new_entry->valid = true;
dbgfs_cfg_accesses->head = (dbgfs_cfg_accesses->head + 1)
% HL_DBGFS_CFG_ACCESS_HIST_LEN;
spin_unlock_irqrestore(&dbgfs_cfg_accesses->lock, flags);
}
}
static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,
enum debugfs_access_type acc_type)
{
@@ -805,6 +1105,7 @@ static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,
return rc;
}
check_if_cfg_access_and_log(hdev, addr, acc_size, acc_type);
rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found);
if (rc) {
dev_err(hdev->dev,
@@ -1525,6 +1826,13 @@ static const struct hl_info_list hl_debugfs_list[] = {
{"mmu", mmu_show, mmu_asid_va_write},
{"mmu_error", mmu_ack_error, mmu_ack_error_value_write},
{"engines", engines_show, NULL},
#ifdef CONFIG_HL_HLDIO
/* DIO entries - only created if NVMe is supported */
{"dio_ssd2hl", dio_ssd2hl_show, dio_ssd2hl_write},
{"dio_stats", dio_stats_show, NULL},
{"dio_reset", dio_reset_show, dio_reset_write},
{"dio_hl2ssd", dio_hl2ssd_show, dio_hl2ssd_write},
#endif
};
static int hl_debugfs_open(struct inode *inode, struct file *file)
@@ -1723,6 +2031,11 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
&hdev->asic_prop.server_type);
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
/* Skip DIO entries if NVMe is not supported */
if (strncmp(hl_debugfs_list[i].name, "dio_", 4) == 0 &&
!hdev->asic_prop.supports_nvme)
continue;
debugfs_create_file(hl_debugfs_list[i].name,
0644,
root,
@@ -1762,6 +2075,14 @@ int hl_debugfs_device_init(struct hl_device *hdev)
spin_lock_init(&dev_entry->userptr_spinlock);
mutex_init(&dev_entry->ctx_mem_hash_mutex);
spin_lock_init(&hdev->debugfs_cfg_accesses.lock);
hdev->debugfs_cfg_accesses.head = 0; /* already zero by alloc but explicit init is fine */
#ifdef CONFIG_HL_HLDIO
/* Initialize DIO statistics */
memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));
#endif
return 0;
}
@@ -1780,6 +2101,7 @@ void hl_debugfs_device_fini(struct hl_device *hdev)
vfree(entry->state_dump[i]);
kfree(entry->entry_arr);
}
void hl_debugfs_add_device(struct hl_device *hdev)
@@ -1792,6 +2114,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
if (!hdev->asic_prop.fw_security_enabled)
add_secured_nodes(dev_entry, dev_entry->root);
}
void hl_debugfs_add_file(struct hl_fpriv *hpriv)
@@ -1924,3 +2247,4 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
up_write(&dev_entry->state_dump_sem);
}

View File

@@ -1630,6 +1630,11 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR);
reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release;
if (hdev->cpld_shutdown) {
dev_err(hdev->dev, "Cannot reset device, cpld is shutdown! Device is NOT usable\n");
return -EIO;
}
if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) {
dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n");
return 0;
@@ -2576,6 +2581,14 @@ void hl_device_fini(struct hl_device *hdev)
if (rc)
dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc);
/* Reset the H/W (if it accessible). It will be in idle state after this returns */
if (!hdev->cpld_shutdown) {
rc = hdev->asic_funcs->hw_fini(hdev, true, false);
if (rc)
dev_err(hdev->dev,
"hw_fini failed in device fini while removing device %d\n", rc);
}
hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
/* Release kernel context */
@@ -2943,3 +2956,13 @@ void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *eve
mutex_unlock(&clk_throttle->lock);
}
void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask)
{
hl_handle_critical_hw_err(hdev, event_id, event_mask);
*event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
/* Avoid any new accesses to the H/W */
hdev->disabled = true;
hdev->cpld_shutdown = true;
}

View File

@@ -90,7 +90,9 @@ struct hl_fpriv;
#define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF
#define HL_COMMON_DEC_INTERRUPT_ID 0xFFE
#define HL_STATE_DUMP_HIST_LEN 5
#define HL_STATE_DUMP_HIST_LEN 5
#define HL_DBGFS_CFG_ACCESS_HIST_LEN 20
#define HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC 2 /* 2s */
/* Default value for device reset trigger , an invalid value */
#define HL_RESET_TRIGGER_DEFAULT 0xFF
@@ -702,6 +704,7 @@ struct hl_hints_range {
* @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
* @supports_engine_modes: true if changing engines/engine_cores modes is supported.
* @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw.
* @supports_nvme: indicates whether the asic supports NVMe P2P DMA.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -822,6 +825,7 @@ struct asic_fixed_properties {
u8 supports_advanced_cpucp_rc;
u8 supports_engine_modes;
u8 support_dynamic_resereved_fw_size;
u8 supports_nvme;
};
/**
@@ -2274,6 +2278,9 @@ struct hl_vm {
u8 init_done;
};
#ifdef CONFIG_HL_HLDIO
#include "hldio.h"
#endif
/*
* DEBUG, PROFILING STRUCTURE
@@ -2344,7 +2351,6 @@ struct hl_fpriv {
struct mutex ctx_lock;
};
/*
* DebugFS
*/
@@ -2372,6 +2378,7 @@ struct hl_debugfs_entry {
struct hl_dbg_device_entry *dev_entry;
};
/**
* struct hl_dbg_device_entry - ASIC specific debugfs manager.
* @root: root dentry.
@@ -2403,6 +2410,7 @@ struct hl_debugfs_entry {
* @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
* @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
* @i2c_len: generic u8 debugfs file for length value to use in i2c_data_read.
* @dio_stats: Direct I/O statistics
*/
struct hl_dbg_device_entry {
struct dentry *root;
@@ -2434,6 +2442,35 @@ struct hl_dbg_device_entry {
u8 i2c_addr;
u8 i2c_reg;
u8 i2c_len;
#ifdef CONFIG_HL_HLDIO
struct hl_dio_stats dio_stats;
#endif
};
/**
* struct hl_debugfs_cfg_access_entry - single debugfs config access object, member of
* hl_debugfs_cfg_access.
* @seconds_since_epoch: seconds since January 1, 1970, used for time comparisons.
* @debugfs_type: the debugfs operation requested, can be READ32, WRITE32, READ64 or WRITE64.
* @addr: the requested address to access.
* @valid: if set, this entry has valid data for dumping at interrupt time.
*/
struct hl_debugfs_cfg_access_entry {
ktime_t seconds_since_epoch;
enum debugfs_access_type debugfs_type;
u64 addr;
bool valid;
};
/**
* struct hl_debugfs_cfg_access - saves debugfs config region access requests history.
* @cfg_access_list: list of objects describing config region access requests.
* @head: next valid index to add new entry to in cfg_access_list.
*/
struct hl_debugfs_cfg_access {
struct hl_debugfs_cfg_access_entry cfg_access_list[HL_DBGFS_CFG_ACCESS_HIST_LEN];
u32 head;
spinlock_t lock; /* protects head and entries */
};
/**
@@ -3281,6 +3318,7 @@ struct eq_heartbeat_debug_info {
* @hl_chip_info: ASIC's sensors information.
* @device_status_description: device status description.
* @hl_debugfs: device's debugfs manager.
* @debugfs_cfg_accesses: list of last debugfs config region accesses.
* @cb_pool: list of pre allocated CBs.
* @cb_pool_lock: protects the CB pool.
* @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
@@ -3305,6 +3343,7 @@ struct eq_heartbeat_debug_info {
* @captured_err_info: holds information about errors.
* @reset_info: holds current device reset information.
* @heartbeat_debug_info: counters used to debug heartbeat failures.
* @hldio: describes habanalabs direct storage interaction interface.
* @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling.
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
* @fw_inner_major_ver: the major of current loaded preboot inner version.
@@ -3357,6 +3396,7 @@ struct eq_heartbeat_debug_info {
* addresses.
* @is_in_dram_scrub: true if dram scrub operation is on going.
* @disabled: is device disabled.
* @cpld_shutdown: is cpld shutdown.
* @late_init_done: is late init stage was done during initialization.
* @hwmon_initialized: is H/W monitor sensors was initialized.
* @reset_on_lockup: true if a reset should be done in case of stuck CS, false
@@ -3461,6 +3501,7 @@ struct hl_device {
struct hwmon_chip_info *hl_chip_info;
struct hl_dbg_device_entry hl_debugfs;
struct hl_debugfs_cfg_access debugfs_cfg_accesses;
struct list_head cb_pool;
spinlock_t cb_pool_lock;
@@ -3496,7 +3537,9 @@ struct hl_device {
struct hl_reset_info reset_info;
struct eq_heartbeat_debug_info heartbeat_debug_info;
#ifdef CONFIG_HL_HLDIO
struct hl_dio hldio;
#endif
cpumask_t irq_affinity_mask;
u32 *stream_master_qid_arr;
@@ -3532,6 +3575,7 @@ struct hl_device {
u16 cpu_pci_msb_addr;
u8 is_in_dram_scrub;
u8 disabled;
u8 cpld_shutdown;
u8 late_init_done;
u8 hwmon_initialized;
u8 reset_on_lockup;
@@ -4089,6 +4133,7 @@ void hl_init_cpu_for_irq(struct hl_device *hdev);
void hl_set_irq_affinity(struct hl_device *hdev, int irq);
void hl_eq_heartbeat_event_handle(struct hl_device *hdev);
void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask);
void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask);
#ifdef CONFIG_DEBUG_FS
@@ -4110,6 +4155,7 @@ void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
unsigned long length);
void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev);
#else
@@ -4185,6 +4231,10 @@ static inline void hl_debugfs_set_state_dump(struct hl_device *hdev,
{
}
static inline void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev)
{
}
#endif
/* Security */

View File

@@ -961,6 +961,12 @@ static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *
case HL_PASSTHROUGH_VERSIONS:
need_input_buff = false;
break;
case HL_GET_ERR_COUNTERS_CMD:
need_input_buff = true;
break;
case HL_GET_P_STATE:
need_input_buff = false;
break;
default:
return -EINVAL;
}

View File

@@ -0,0 +1,437 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2024 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include "habanalabs.h"
#include "hldio.h"
#include <generated/uapi/linux/version.h>
#include <linux/pci-p2pdma.h>
#include <linux/blkdev.h>
#include <linux/vmalloc.h>
/*
* NVMe Direct I/O implementation for habanalabs driver
*
* ASSUMPTIONS
* ===========
* 1. No IOMMU (well, technically it can work with IOMMU, but it is *almost useless).
* 2. Only READ operations (can extend in the future).
* 3. No sparse files (can overcome this in the future).
* 4. Kernel version >= 6.9
* 5. Requiring page alignment is OK (I don't see a solution to this one right,
* now, how do we read partial pages?)
* 6. Kernel compiled with CONFIG_PCI_P2PDMA. This requires a CUSTOM kernel.
* Theoretically I have a slight idea on how this could be solvable, but it
* is probably inacceptable for the upstream. Also may not work in the end.
* 7. Either make sure our cards and disks are under the same PCI bridge, or
* compile a custom kernel to hack around this.
*/
#define IO_STABILIZE_TIMEOUT 10000000 /* 10 seconds in microseconds */
/*
* This struct contains all the useful data I could milk out of the file handle
* provided by the user.
* @TODO: right now it is retrieved on each IO, but can be done once with some
* dedicated IOCTL, call it for example HL_REGISTER_HANDLE.
*/
struct hl_dio_fd {
/* Back pointer in case we need it in async completion */
struct hl_ctx *ctx;
/* Associated fd struct */
struct file *filp;
};
/*
* This is a single IO descriptor
*/
struct hl_direct_io {
struct hl_dio_fd f;
struct kiocb kio;
struct bio_vec *bv;
struct iov_iter iter;
u64 device_va;
u64 off_bytes;
u64 len_bytes;
u32 type;
};
bool hl_device_supports_nvme(struct hl_device *hdev)
{
return hdev->asic_prop.supports_nvme;
}
static int hl_dio_fd_register(struct hl_ctx *ctx, int fd, struct hl_dio_fd *f)
{
struct hl_device *hdev = ctx->hdev;
struct block_device *bd;
struct super_block *sb;
struct inode *inode;
struct gendisk *gd;
struct device *disk_dev;
int rc;
f->filp = fget(fd);
if (!f->filp) {
rc = -ENOENT;
goto out;
}
if (!(f->filp->f_flags & O_DIRECT)) {
dev_err(hdev->dev, "file is not in the direct mode\n");
rc = -EINVAL;
goto fput;
}
if (!f->filp->f_op->read_iter) {
dev_err(hdev->dev, "read iter is not supported, need to fall back to legacy\n");
rc = -EINVAL;
goto fput;
}
inode = file_inode(f->filp);
sb = inode->i_sb;
bd = sb->s_bdev;
gd = bd->bd_disk;
if (inode->i_blocks << sb->s_blocksize_bits < i_size_read(inode)) {
dev_err(hdev->dev, "sparse files are not currently supported\n");
rc = -EINVAL;
goto fput;
}
if (!bd || !gd) {
dev_err(hdev->dev, "invalid block device\n");
rc = -ENODEV;
goto fput;
}
/* Get the underlying device from the block device */
disk_dev = disk_to_dev(gd);
if (!dma_pci_p2pdma_supported(disk_dev)) {
dev_err(hdev->dev, "device does not support PCI P2P DMA\n");
rc = -EOPNOTSUPP;
goto fput;
}
/*
* @TODO: Maybe we need additional checks here
*/
f->ctx = ctx;
rc = 0;
goto out;
fput:
fput(f->filp);
out:
return rc;
}
static void hl_dio_fd_unregister(struct hl_dio_fd *f)
{
fput(f->filp);
}
static long hl_dio_count_io(struct hl_device *hdev)
{
s64 sum = 0;
int i;
for_each_possible_cpu(i)
sum += per_cpu(*hdev->hldio.inflight_ios, i);
return sum;
}
static bool hl_dio_get_iopath(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
if (hdev->hldio.io_enabled) {
this_cpu_inc(*hdev->hldio.inflight_ios);
/* Avoid race conditions */
if (!hdev->hldio.io_enabled) {
this_cpu_dec(*hdev->hldio.inflight_ios);
return false;
}
hl_ctx_get(ctx);
return true;
}
return false;
}
static void hl_dio_put_iopath(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
hl_ctx_put(ctx);
this_cpu_dec(*hdev->hldio.inflight_ios);
}
static void hl_dio_set_io_enabled(struct hl_device *hdev, bool enabled)
{
hdev->hldio.io_enabled = enabled;
}
static bool hl_dio_validate_io(struct hl_device *hdev, struct hl_direct_io *io)
{
if ((u64)io->device_va & ~PAGE_MASK) {
dev_dbg(hdev->dev, "device address must be 4K aligned\n");
return false;
}
if (io->len_bytes & ~PAGE_MASK) {
dev_dbg(hdev->dev, "IO length must be 4K aligned\n");
return false;
}
if (io->off_bytes & ~PAGE_MASK) {
dev_dbg(hdev->dev, "IO offset must be 4K aligned\n");
return false;
}
return true;
}
static struct page *hl_dio_va2page(struct hl_device *hdev, struct hl_ctx *ctx, u64 device_va)
{
struct hl_dio *hldio = &hdev->hldio;
u64 device_pa;
int rc, i;
rc = hl_mmu_va_to_pa(ctx, device_va, &device_pa);
if (rc) {
dev_err(hdev->dev, "device virtual address translation error: %#llx (%d)",
device_va, rc);
return NULL;
}
for (i = 0 ; i < hldio->np2prs ; ++i) {
if (device_pa >= hldio->p2prs[i].device_pa &&
device_pa < hldio->p2prs[i].device_pa + hldio->p2prs[i].size)
return hldio->p2prs[i].p2ppages[(device_pa - hldio->p2prs[i].device_pa) >>
PAGE_SHIFT];
}
return NULL;
}
static ssize_t hl_direct_io(struct hl_device *hdev, struct hl_direct_io *io)
{
u64 npages, device_va;
ssize_t rc;
int i;
if (!hl_dio_validate_io(hdev, io))
return -EINVAL;
if (!hl_dio_get_iopath(io->f.ctx)) {
dev_info(hdev->dev, "can't schedule a new IO, IO is disabled\n");
return -ESHUTDOWN;
}
init_sync_kiocb(&io->kio, io->f.filp);
io->kio.ki_pos = io->off_bytes;
npages = (io->len_bytes >> PAGE_SHIFT);
/* @TODO: this can be implemented smarter, vmalloc in iopath is not
* ideal. Maybe some variation of genpool. Number of pages may differ
* greatly, so maybe even use pools of different sizes and chose the
* closest one.
*/
io->bv = vzalloc(npages * sizeof(struct bio_vec));
if (!io->bv)
return -ENOMEM;
for (i = 0, device_va = io->device_va; i < npages ; ++i, device_va += PAGE_SIZE) {
io->bv[i].bv_page = hl_dio_va2page(hdev, io->f.ctx, device_va);
if (!io->bv[i].bv_page) {
dev_err(hdev->dev, "error getting page struct for device va %#llx",
device_va);
rc = -EFAULT;
goto cleanup;
}
io->bv[i].bv_offset = 0;
io->bv[i].bv_len = PAGE_SIZE;
}
iov_iter_bvec(&io->iter, io->type, io->bv, 1, io->len_bytes);
if (io->f.filp->f_op && io->f.filp->f_op->read_iter)
rc = io->f.filp->f_op->read_iter(&io->kio, &io->iter);
else
rc = -EINVAL;
cleanup:
vfree(io->bv);
hl_dio_put_iopath(io->f.ctx);
dev_dbg(hdev->dev, "IO ended with %ld\n", rc);
return rc;
}
/*
* @TODO: This function can be used as a callback for io completion under
* kio->ki_complete in order to implement async IO.
* Note that on more recent kernels there is no ret2.
*/
__maybe_unused static void hl_direct_io_complete(struct kiocb *kio, long ret, long ret2)
{
struct hl_direct_io *io = container_of(kio, struct hl_direct_io, kio);
dev_dbg(io->f.ctx->hdev->dev, "IO completed with %ld\n", ret);
/* Do something to copy result to user / notify completion */
hl_dio_put_iopath(io->f.ctx);
hl_dio_fd_unregister(&io->f);
}
/*
* DMA disk to ASIC, wait for results. Must be invoked from the user context
*/
int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
u64 device_va, off_t off_bytes, size_t len_bytes,
size_t *len_read)
{
struct hl_direct_io *io;
ssize_t rc;
dev_dbg(hdev->dev, "SSD2HL fd=%d va=%#llx len=%#lx\n", fd, device_va, len_bytes);
io = kzalloc(sizeof(*io), GFP_KERNEL);
if (!io) {
rc = -ENOMEM;
goto out;
}
*io = (struct hl_direct_io){
.device_va = device_va,
.len_bytes = len_bytes,
.off_bytes = off_bytes,
.type = READ,
};
rc = hl_dio_fd_register(ctx, fd, &io->f);
if (rc)
goto kfree_io;
rc = hl_direct_io(hdev, io);
if (rc >= 0) {
*len_read = rc;
rc = 0;
}
/* This shall be called only in the case of a sync IO */
hl_dio_fd_unregister(&io->f);
kfree_io:
kfree(io);
out:
return rc;
}
static void hl_p2p_region_fini(struct hl_device *hdev, struct hl_p2p_region *p2pr)
{
if (p2pr->p2ppages) {
vfree(p2pr->p2ppages);
p2pr->p2ppages = NULL;
}
if (p2pr->p2pmem) {
dev_dbg(hdev->dev, "freeing P2P mem from %p, size=%#llx\n",
p2pr->p2pmem, p2pr->size);
pci_free_p2pmem(hdev->pdev, p2pr->p2pmem, p2pr->size);
p2pr->p2pmem = NULL;
}
}
void hl_p2p_region_fini_all(struct hl_device *hdev)
{
int i;
for (i = 0 ; i < hdev->hldio.np2prs ; ++i)
hl_p2p_region_fini(hdev, &hdev->hldio.p2prs[i]);
kvfree(hdev->hldio.p2prs);
hdev->hldio.p2prs = NULL;
hdev->hldio.np2prs = 0;
}
int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr)
{
void *addr;
int rc, i;
/* Start by publishing our p2p memory */
rc = pci_p2pdma_add_resource(hdev->pdev, p2pr->bar, p2pr->size, p2pr->bar_offset);
if (rc) {
dev_err(hdev->dev, "error adding p2p resource: %d\n", rc);
goto err;
}
/* Alloc all p2p mem */
p2pr->p2pmem = pci_alloc_p2pmem(hdev->pdev, p2pr->size);
if (!p2pr->p2pmem) {
dev_err(hdev->dev, "error allocating p2p memory\n");
rc = -ENOMEM;
goto err;
}
p2pr->p2ppages = vmalloc((p2pr->size >> PAGE_SHIFT) * sizeof(struct page *));
if (!p2pr->p2ppages) {
rc = -ENOMEM;
goto err;
}
for (i = 0, addr = p2pr->p2pmem ; i < (p2pr->size >> PAGE_SHIFT) ; ++i, addr += PAGE_SIZE) {
p2pr->p2ppages[i] = virt_to_page(addr);
if (!p2pr->p2ppages[i]) {
rc = -EFAULT;
goto err;
}
}
return 0;
err:
hl_p2p_region_fini(hdev, p2pr);
return rc;
}
int hl_dio_start(struct hl_device *hdev)
{
dev_dbg(hdev->dev, "initializing HLDIO\n");
/* Initialize the IO counter and enable IO */
hdev->hldio.inflight_ios = alloc_percpu(s64);
if (!hdev->hldio.inflight_ios)
return -ENOMEM;
hl_dio_set_io_enabled(hdev, true);
return 0;
}
void hl_dio_stop(struct hl_device *hdev)
{
dev_dbg(hdev->dev, "deinitializing HLDIO\n");
if (hdev->hldio.io_enabled) {
/* Wait for all the IO to finish */
hl_dio_set_io_enabled(hdev, false);
hl_poll_timeout_condition(hdev, !hl_dio_count_io(hdev), 1000, IO_STABILIZE_TIMEOUT);
}
if (hdev->hldio.inflight_ios) {
free_percpu(hdev->hldio.inflight_ios);
hdev->hldio.inflight_ios = NULL;
}
}

View File

@@ -0,0 +1,146 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* hldio.h - NVMe Direct I/O (HLDIO) infrastructure for Habana Labs Driver
*
* This feature requires specific hardware setup and must not be built
* under COMPILE_TEST.
*/
#ifndef __HL_HLDIO_H__
#define __HL_HLDIO_H__
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/seq_file.h>
#include <linux/ktime.h> /* ktime functions */
#include <linux/delay.h> /* usleep_range */
#include <linux/kernel.h> /* might_sleep_if */
#include <linux/errno.h> /* error codes */
/* Forward declarations */
struct hl_device;
struct file;
/* Enable only if Kconfig selected */
#ifdef CONFIG_HL_HLDIO
/**
* struct hl_p2p_region - describes a single P2P memory region
* @p2ppages: array of page structs for the P2P memory
* @p2pmem: virtual address of the P2P memory region
* @device_pa: physical address on the device
* @bar_offset: offset within the BAR
* @size: size of the region in bytes
* @bar: BAR number containing this region
*/
struct hl_p2p_region {
struct page **p2ppages;
void *p2pmem;
u64 device_pa;
u64 bar_offset;
u64 size;
int bar;
};
/**
* struct hl_dio_stats - Direct I/O statistics
* @total_ops: total number of operations attempted
* @successful_ops: number of successful operations
* @failed_ops: number of failed operations
* @bytes_transferred: total bytes successfully transferred
* @last_len_read: length of the last read operation
*/
struct hl_dio_stats {
u64 total_ops;
u64 successful_ops;
u64 failed_ops;
u64 bytes_transferred;
size_t last_len_read;
};
/**
* struct hl_dio - describes habanalabs direct storage interaction interface
* @p2prs: array of p2p regions
* @inflight_ios: percpu counter for inflight ios
* @np2prs: number of elements in p2prs
* @io_enabled: 1 if io is enabled 0 otherwise
*/
struct hl_dio {
struct hl_p2p_region *p2prs;
s64 __percpu *inflight_ios;
u8 np2prs;
u8 io_enabled;
};
int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
u64 device_va, off_t off_bytes, size_t len_bytes,
size_t *len_read);
void hl_p2p_region_fini_all(struct hl_device *hdev);
int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr);
int hl_dio_start(struct hl_device *hdev);
void hl_dio_stop(struct hl_device *hdev);
/* Init/teardown */
int hl_hldio_init(struct hl_device *hdev);
void hl_hldio_fini(struct hl_device *hdev);
/* File operations */
long hl_hldio_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
/* DebugFS hooks */
#ifdef CONFIG_DEBUG_FS
void hl_hldio_debugfs_init(struct hl_device *hdev);
void hl_hldio_debugfs_fini(struct hl_device *hdev);
#else
static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { }
static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { }
#endif
#else /* !CONFIG_HL_HLDIO */
struct hl_p2p_region;
/* Stubs when HLDIO is disabled */
static inline int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd,
u64 device_va, off_t off_bytes, size_t len_bytes,
size_t *len_read)
{ return -EOPNOTSUPP; }
static inline void hl_p2p_region_fini_all(struct hl_device *hdev) {}
static inline int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr)
{ return -EOPNOTSUPP; }
static inline int hl_dio_start(struct hl_device *hdev) { return -EOPNOTSUPP; }
static inline void hl_dio_stop(struct hl_device *hdev) {}
static inline int hl_hldio_init(struct hl_device *hdev) { return 0; }
static inline void hl_hldio_fini(struct hl_device *hdev) { }
static inline long hl_hldio_ioctl(struct file *f, unsigned int c,
unsigned long a)
{ return -ENOTTY; }
static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { }
static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { }
#endif /* CONFIG_HL_HLDIO */
/* Simplified polling macro for HLDIO (no simulator support) */
#define hl_poll_timeout_condition(hdev, cond, sleep_us, timeout_us) \
({ \
ktime_t __timeout = ktime_add_us(ktime_get(), timeout_us); \
might_sleep_if(sleep_us); \
(void)(hdev); /* keep signature consistent, hdev unused */ \
for (;;) { \
mb(); /* ensure ordering of memory operations */ \
if (cond) \
break; \
if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \
break; \
if (sleep_us) \
usleep_range((sleep_us >> 2) + 1, sleep_us); \
} \
(cond) ? 0 : -ETIMEDOUT; \
})
#ifdef CONFIG_HL_HLDIO
bool hl_device_supports_nvme(struct hl_device *hdev);
#else
static inline bool hl_device_supports_nvme(struct hl_device *hdev) { return false; }
#endif
#endif /* __HL_HLDIO_H__ */

View File

@@ -1837,7 +1837,12 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf)
atomic_dec(&ctx->hdev->dmabuf_export_cnt);
hl_ctx_put(ctx);
/* Paired with get_file() in export_dmabuf() */
/*
* Paired with get_file() in export_dmabuf().
* 'ctx' can be still used here to get the file pointer, even after hl_ctx_put() was called,
* because releasing the compute device file involves another reference decrement, and it
* would be possible only after calling fput().
*/
fput(ctx->hpriv->file_priv->filp);
kfree(hl_dmabuf);
@@ -2332,7 +2337,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
if (rc < 0)
goto destroy_pages;
npages = rc;
rc = -EFAULT;
rc = -ENOMEM;
goto put_pages;
}
userptr->npages = npages;

View File

@@ -259,13 +259,8 @@ int hl_mem_mgr_mmap(struct hl_mem_mgr *mmg, struct vm_area_struct *vma,
goto put_mem;
}
#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
if (!access_ok(VERIFY_WRITE, (void __user *)(uintptr_t)vma->vm_start,
user_mem_size)) {
#else
if (!access_ok((void __user *)(uintptr_t)vma->vm_start,
user_mem_size)) {
#endif
dev_err(mmg->dev, "%s: User pointer is invalid - 0x%lx\n",
buf->behavior->topic, vma->vm_start);

View File

@@ -96,14 +96,21 @@ static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, c
infineon_second_stage_third_instance =
(infineon_second_stage_version >> 16) & mask;
if (cpucp_info->infineon_second_stage_version)
if (cpucp_info->infineon_version && cpucp_info->infineon_second_stage_version)
return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n",
le32_to_cpu(cpucp_info->infineon_version),
infineon_second_stage_first_instance,
infineon_second_stage_second_instance,
infineon_second_stage_third_instance);
else
else if (cpucp_info->infineon_second_stage_version)
return sprintf(buf, "%#04x:%#04x:%#04x\n",
infineon_second_stage_first_instance,
infineon_second_stage_second_instance,
infineon_second_stage_third_instance);
else if (cpucp_info->infineon_version)
return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
return 0;
}
static DEVICE_ATTR_RO(vrm_ver);

View File

@@ -4168,10 +4168,29 @@ static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
VM_DONTCOPY | VM_NORESERVE);
#ifdef _HAS_DMA_MMAP_COHERENT
/*
* If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
* so vm_insert_page() can handle it safely. Without this, the kernel
* may BUG_ON due to VM_PFNMAP.
*/
if (is_vmalloc_addr(cpu_addr))
vm_flags_set(vma, VM_MIXEDMAP);
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
(dma_addr - HOST_PHYS_BASE), size);
if (rc)
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
#else
rc = remap_pfn_range(vma, vma->vm_start,
virt_to_phys(cpu_addr) >> PAGE_SHIFT,
size, vma->vm_page_prot);
if (rc)
dev_err(hdev->dev, "remap_pfn_range error %d", rc);
#endif
return rc;
}

View File

@@ -728,6 +728,354 @@ static const int gaudi2_dma_core_async_event_id[] = {
[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
};
const char *gaudi2_engine_id_str[] = {
__stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_0),
__stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_1),
__stringify(GAUDI2_DCORE0_ENGINE_ID_MME),
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_0),
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_1),
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_2),
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_3),
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_4),
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_5),
__stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_0),
__stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_1),
__stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_0),
__stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_1),
__stringify(GAUDI2_DCORE1_ENGINE_ID_MME),
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_0),
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_1),
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_2),
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_3),
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_4),
__stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_5),
__stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_0),
__stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_1),
__stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_0),
__stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_1),
__stringify(GAUDI2_DCORE2_ENGINE_ID_MME),
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_0),
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_1),
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_2),
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_3),
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_4),
__stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_5),
__stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_0),
__stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_1),
__stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_0),
__stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_1),
__stringify(GAUDI2_DCORE3_ENGINE_ID_MME),
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_0),
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_1),
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_2),
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_3),
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_4),
__stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_5),
__stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_0),
__stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_1),
__stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_6),
__stringify(GAUDI2_ENGINE_ID_PDMA_0),
__stringify(GAUDI2_ENGINE_ID_PDMA_1),
__stringify(GAUDI2_ENGINE_ID_ROT_0),
__stringify(GAUDI2_ENGINE_ID_ROT_1),
__stringify(GAUDI2_PCIE_ENGINE_ID_DEC_0),
__stringify(GAUDI2_PCIE_ENGINE_ID_DEC_1),
__stringify(GAUDI2_ENGINE_ID_NIC0_0),
__stringify(GAUDI2_ENGINE_ID_NIC0_1),
__stringify(GAUDI2_ENGINE_ID_NIC1_0),
__stringify(GAUDI2_ENGINE_ID_NIC1_1),
__stringify(GAUDI2_ENGINE_ID_NIC2_0),
__stringify(GAUDI2_ENGINE_ID_NIC2_1),
__stringify(GAUDI2_ENGINE_ID_NIC3_0),
__stringify(GAUDI2_ENGINE_ID_NIC3_1),
__stringify(GAUDI2_ENGINE_ID_NIC4_0),
__stringify(GAUDI2_ENGINE_ID_NIC4_1),
__stringify(GAUDI2_ENGINE_ID_NIC5_0),
__stringify(GAUDI2_ENGINE_ID_NIC5_1),
__stringify(GAUDI2_ENGINE_ID_NIC6_0),
__stringify(GAUDI2_ENGINE_ID_NIC6_1),
__stringify(GAUDI2_ENGINE_ID_NIC7_0),
__stringify(GAUDI2_ENGINE_ID_NIC7_1),
__stringify(GAUDI2_ENGINE_ID_NIC8_0),
__stringify(GAUDI2_ENGINE_ID_NIC8_1),
__stringify(GAUDI2_ENGINE_ID_NIC9_0),
__stringify(GAUDI2_ENGINE_ID_NIC9_1),
__stringify(GAUDI2_ENGINE_ID_NIC10_0),
__stringify(GAUDI2_ENGINE_ID_NIC10_1),
__stringify(GAUDI2_ENGINE_ID_NIC11_0),
__stringify(GAUDI2_ENGINE_ID_NIC11_1),
__stringify(GAUDI2_ENGINE_ID_PCIE),
__stringify(GAUDI2_ENGINE_ID_PSOC),
__stringify(GAUDI2_ENGINE_ID_ARC_FARM),
__stringify(GAUDI2_ENGINE_ID_KDMA),
__stringify(GAUDI2_ENGINE_ID_SIZE),
};
const char *gaudi2_queue_id_str[] = {
__stringify(GAUDI2_QUEUE_ID_PDMA_0_0),
__stringify(GAUDI2_QUEUE_ID_PDMA_0_1),
__stringify(GAUDI2_QUEUE_ID_PDMA_0_2),
__stringify(GAUDI2_QUEUE_ID_PDMA_0_3),
__stringify(GAUDI2_QUEUE_ID_PDMA_1_0),
__stringify(GAUDI2_QUEUE_ID_PDMA_1_1),
__stringify(GAUDI2_QUEUE_ID_PDMA_1_2),
__stringify(GAUDI2_QUEUE_ID_PDMA_1_3),
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0),
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1),
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2),
__stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3),
__stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_0),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_1),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_2),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_3),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_0),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_1),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_2),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_3),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_0),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_1),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_2),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_3),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_0),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_1),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_2),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_3),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_0),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_1),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_2),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_3),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_0),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_1),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_2),
__stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_3),
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0),
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1),
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2),
__stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3),
__stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_0),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_1),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_2),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_3),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_0),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_1),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_2),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_3),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_0),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_1),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_2),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_3),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_0),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_1),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_2),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_3),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_0),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_1),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_2),
__stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_3),
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0),
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1),
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2),
__stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3),
__stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_0),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_1),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_2),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_3),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_0),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_1),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_2),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_3),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_0),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_1),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_2),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_3),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_0),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_1),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_2),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_3),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_0),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_1),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_2),
__stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_3),
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0),
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1),
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2),
__stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3),
__stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_0),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_1),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_2),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_3),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_0),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_1),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_2),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_3),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_0),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_1),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_2),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_3),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_0),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_1),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_2),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_3),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_0),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_1),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_2),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_3),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_0),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_1),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_2),
__stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_3),
__stringify(GAUDI2_QUEUE_ID_NIC_0_0),
__stringify(GAUDI2_QUEUE_ID_NIC_0_1),
__stringify(GAUDI2_QUEUE_ID_NIC_0_2),
__stringify(GAUDI2_QUEUE_ID_NIC_0_3),
__stringify(GAUDI2_QUEUE_ID_NIC_1_0),
__stringify(GAUDI2_QUEUE_ID_NIC_1_1),
__stringify(GAUDI2_QUEUE_ID_NIC_1_2),
__stringify(GAUDI2_QUEUE_ID_NIC_1_3),
__stringify(GAUDI2_QUEUE_ID_NIC_2_0),
__stringify(GAUDI2_QUEUE_ID_NIC_2_1),
__stringify(GAUDI2_QUEUE_ID_NIC_2_2),
__stringify(GAUDI2_QUEUE_ID_NIC_2_3),
__stringify(GAUDI2_QUEUE_ID_NIC_3_0),
__stringify(GAUDI2_QUEUE_ID_NIC_3_1),
__stringify(GAUDI2_QUEUE_ID_NIC_3_2),
__stringify(GAUDI2_QUEUE_ID_NIC_3_3),
__stringify(GAUDI2_QUEUE_ID_NIC_4_0),
__stringify(GAUDI2_QUEUE_ID_NIC_4_1),
__stringify(GAUDI2_QUEUE_ID_NIC_4_2),
__stringify(GAUDI2_QUEUE_ID_NIC_4_3),
__stringify(GAUDI2_QUEUE_ID_NIC_5_0),
__stringify(GAUDI2_QUEUE_ID_NIC_5_1),
__stringify(GAUDI2_QUEUE_ID_NIC_5_2),
__stringify(GAUDI2_QUEUE_ID_NIC_5_3),
__stringify(GAUDI2_QUEUE_ID_NIC_6_0),
__stringify(GAUDI2_QUEUE_ID_NIC_6_1),
__stringify(GAUDI2_QUEUE_ID_NIC_6_2),
__stringify(GAUDI2_QUEUE_ID_NIC_6_3),
__stringify(GAUDI2_QUEUE_ID_NIC_7_0),
__stringify(GAUDI2_QUEUE_ID_NIC_7_1),
__stringify(GAUDI2_QUEUE_ID_NIC_7_2),
__stringify(GAUDI2_QUEUE_ID_NIC_7_3),
__stringify(GAUDI2_QUEUE_ID_NIC_8_0),
__stringify(GAUDI2_QUEUE_ID_NIC_8_1),
__stringify(GAUDI2_QUEUE_ID_NIC_8_2),
__stringify(GAUDI2_QUEUE_ID_NIC_8_3),
__stringify(GAUDI2_QUEUE_ID_NIC_9_0),
__stringify(GAUDI2_QUEUE_ID_NIC_9_1),
__stringify(GAUDI2_QUEUE_ID_NIC_9_2),
__stringify(GAUDI2_QUEUE_ID_NIC_9_3),
__stringify(GAUDI2_QUEUE_ID_NIC_10_0),
__stringify(GAUDI2_QUEUE_ID_NIC_10_1),
__stringify(GAUDI2_QUEUE_ID_NIC_10_2),
__stringify(GAUDI2_QUEUE_ID_NIC_10_3),
__stringify(GAUDI2_QUEUE_ID_NIC_11_0),
__stringify(GAUDI2_QUEUE_ID_NIC_11_1),
__stringify(GAUDI2_QUEUE_ID_NIC_11_2),
__stringify(GAUDI2_QUEUE_ID_NIC_11_3),
__stringify(GAUDI2_QUEUE_ID_NIC_12_0),
__stringify(GAUDI2_QUEUE_ID_NIC_12_1),
__stringify(GAUDI2_QUEUE_ID_NIC_12_2),
__stringify(GAUDI2_QUEUE_ID_NIC_12_3),
__stringify(GAUDI2_QUEUE_ID_NIC_13_0),
__stringify(GAUDI2_QUEUE_ID_NIC_13_1),
__stringify(GAUDI2_QUEUE_ID_NIC_13_2),
__stringify(GAUDI2_QUEUE_ID_NIC_13_3),
__stringify(GAUDI2_QUEUE_ID_NIC_14_0),
__stringify(GAUDI2_QUEUE_ID_NIC_14_1),
__stringify(GAUDI2_QUEUE_ID_NIC_14_2),
__stringify(GAUDI2_QUEUE_ID_NIC_14_3),
__stringify(GAUDI2_QUEUE_ID_NIC_15_0),
__stringify(GAUDI2_QUEUE_ID_NIC_15_1),
__stringify(GAUDI2_QUEUE_ID_NIC_15_2),
__stringify(GAUDI2_QUEUE_ID_NIC_15_3),
__stringify(GAUDI2_QUEUE_ID_NIC_16_0),
__stringify(GAUDI2_QUEUE_ID_NIC_16_1),
__stringify(GAUDI2_QUEUE_ID_NIC_16_2),
__stringify(GAUDI2_QUEUE_ID_NIC_16_3),
__stringify(GAUDI2_QUEUE_ID_NIC_17_0),
__stringify(GAUDI2_QUEUE_ID_NIC_17_1),
__stringify(GAUDI2_QUEUE_ID_NIC_17_2),
__stringify(GAUDI2_QUEUE_ID_NIC_17_3),
__stringify(GAUDI2_QUEUE_ID_NIC_18_0),
__stringify(GAUDI2_QUEUE_ID_NIC_18_1),
__stringify(GAUDI2_QUEUE_ID_NIC_18_2),
__stringify(GAUDI2_QUEUE_ID_NIC_18_3),
__stringify(GAUDI2_QUEUE_ID_NIC_19_0),
__stringify(GAUDI2_QUEUE_ID_NIC_19_1),
__stringify(GAUDI2_QUEUE_ID_NIC_19_2),
__stringify(GAUDI2_QUEUE_ID_NIC_19_3),
__stringify(GAUDI2_QUEUE_ID_NIC_20_0),
__stringify(GAUDI2_QUEUE_ID_NIC_20_1),
__stringify(GAUDI2_QUEUE_ID_NIC_20_2),
__stringify(GAUDI2_QUEUE_ID_NIC_20_3),
__stringify(GAUDI2_QUEUE_ID_NIC_21_0),
__stringify(GAUDI2_QUEUE_ID_NIC_21_1),
__stringify(GAUDI2_QUEUE_ID_NIC_21_2),
__stringify(GAUDI2_QUEUE_ID_NIC_21_3),
__stringify(GAUDI2_QUEUE_ID_NIC_22_0),
__stringify(GAUDI2_QUEUE_ID_NIC_22_1),
__stringify(GAUDI2_QUEUE_ID_NIC_22_2),
__stringify(GAUDI2_QUEUE_ID_NIC_22_3),
__stringify(GAUDI2_QUEUE_ID_NIC_23_0),
__stringify(GAUDI2_QUEUE_ID_NIC_23_1),
__stringify(GAUDI2_QUEUE_ID_NIC_23_2),
__stringify(GAUDI2_QUEUE_ID_NIC_23_3),
__stringify(GAUDI2_QUEUE_ID_ROT_0_0),
__stringify(GAUDI2_QUEUE_ID_ROT_0_1),
__stringify(GAUDI2_QUEUE_ID_ROT_0_2),
__stringify(GAUDI2_QUEUE_ID_ROT_0_3),
__stringify(GAUDI2_QUEUE_ID_ROT_1_0),
__stringify(GAUDI2_QUEUE_ID_ROT_1_1),
__stringify(GAUDI2_QUEUE_ID_ROT_1_2),
__stringify(GAUDI2_QUEUE_ID_ROT_1_3),
__stringify(GAUDI2_QUEUE_ID_CPU_PQ),
__stringify(GAUDI2_QUEUE_ID_SIZE),
};
static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
"qman sei intr",
"arc sei intr"
@@ -3150,7 +3498,6 @@ static int gaudi2_early_init(struct hl_device *hdev)
rc = hl_fw_read_preboot_status(hdev);
if (rc) {
if (hdev->reset_on_preboot_fail)
/* we are already on failure flow, so don't check if hw_fini fails. */
hdev->asic_funcs->hw_fini(hdev, true, false);
goto pci_fini;
}
@@ -3162,6 +3509,13 @@ static int gaudi2_early_init(struct hl_device *hdev)
dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
goto pci_fini;
}
rc = hl_fw_read_preboot_status(hdev);
if (rc) {
if (hdev->reset_on_preboot_fail)
hdev->asic_funcs->hw_fini(hdev, true, false);
goto pci_fini;
}
}
return 0;
@@ -4836,7 +5190,7 @@ static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw
else
wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
if (fw_reset)
if (fw_reset || hdev->cpld_shutdown)
goto skip_engines;
gaudi2_stop_dma_qmans(hdev);
@@ -6484,6 +6838,13 @@ static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
VM_DONTCOPY | VM_NORESERVE);
#ifdef _HAS_DMA_MMAP_COHERENT
/*
* If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
* so vm_insert_page() can handle it safely. Without this, the kernel
* may BUG_ON due to VM_PFNMAP.
*/
if (is_vmalloc_addr(cpu_addr))
vm_flags_set(vma, VM_MIXEDMAP);
rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
if (rc)
@@ -6774,7 +7135,8 @@ static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parse
struct gaudi2_device *gaudi2 = hdev->asic_specific;
if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
dev_err(hdev->dev, "h/w queue %s is disabled\n",
GAUDI2_QUEUE_ID_TO_STR(parser->hw_queue_id));
return -EINVAL;
}
@@ -7026,7 +7388,8 @@ static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
if (rc)
dev_err(hdev->dev,
"Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
"Failed to send msg_short packet to H/W queue %s\n",
GAUDI2_QUEUE_ID_TO_STR(hw_queue_id));
return rc;
}
@@ -7052,8 +7415,8 @@ static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queu
timeout_usec);
if (rc == -ETIMEDOUT) {
dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
hw_queue_id, tmp);
dev_err(hdev->dev, "H/W queue %s test failed (SOB_OBJ_0 == 0x%x)\n",
GAUDI2_QUEUE_ID_TO_STR(hw_queue_id), tmp);
rc = -EIO;
}
@@ -9603,8 +9966,8 @@ static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
gaudi2_print_event(hdev, event_type, true,
"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
engine_id, intr_type, q->queue_index);
"ARC DCCM Full event: Eng: %s, Intr_type: %u, Qidx: %u",
GAUDI2_ENG_ID_TO_STR(engine_id), intr_type, q->queue_index);
return 1;
default:
gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
@@ -10172,7 +10535,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
le64_to_cpu(eq_entry->data[0]));
error_count = GAUDI2_NA_EVENT_CAUSE;
event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
hl_eq_cpld_shutdown_event_handle(hdev, event_type, &event_mask);
break;
case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
@@ -10260,6 +10623,7 @@ reset_device:
if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
hl_handle_critical_hw_err(hdev, event_type, &event_mask);
hl_debugfs_cfg_access_history_dump(hdev);
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
hl_device_cond_reset(hdev, reset_flags, event_mask);
}
@@ -10296,8 +10660,8 @@ static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr);
if (rc)
dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %d\n",
hw_queue_id);
dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %s\n",
GAUDI2_QUEUE_ID_TO_STR(hw_queue_id));
return rc;
}

View File

@@ -240,6 +240,15 @@
#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
extern const char *gaudi2_engine_id_str[];
extern const char *gaudi2_queue_id_str[];
#define GAUDI2_ENG_ID_TO_STR(initiator) ((initiator) >= GAUDI2_ENGINE_ID_SIZE ? "not found" : \
gaudi2_engine_id_str[initiator])
#define GAUDI2_QUEUE_ID_TO_STR(initiator) ((initiator) >= GAUDI2_QUEUE_ID_SIZE ? "not found" : \
gaudi2_queue_id_str[initiator])
enum gaudi2_reserved_sob_id {
GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,
GAUDI2_RESERVED_SOB_CS_COMPLETION_LAST =

View File

@@ -2426,7 +2426,7 @@ static int gaudi2_config_bmon(struct hl_device *hdev, struct hl_debug_params *pa
WREG32(base_reg + mmBMON_ADDRH_E3_OFFSET, 0);
WREG32(base_reg + mmBMON_REDUCTION_OFFSET, 0);
WREG32(base_reg + mmBMON_STM_TRC_OFFSET, 0x7 | (0xA << 8));
WREG32(base_reg + mmBMON_CR_OFFSET, 0x77 | 0xf << 24);
WREG32(base_reg + mmBMON_CR_OFFSET, 0x41);
}
return 0;

View File

@@ -45,7 +45,7 @@ struct ivpu_fw_info {
int ivpu_fw_init(struct ivpu_device *vdev);
void ivpu_fw_fini(struct ivpu_device *vdev);
void ivpu_fw_load(struct ivpu_device *vdev);
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp);
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params);
static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev)
{

View File

@@ -33,7 +33,6 @@
#define PLL_CDYN_DEFAULT 0x80
#define PLL_EPP_DEFAULT 0x80
#define PLL_CONFIG_DEFAULT 0x0
#define PLL_REF_CLK_FREQ 50000000ull
#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
@@ -303,7 +302,7 @@ static void prepare_wp_request(struct ivpu_device *vdev, struct wp_request *wp,
wp->epp = 0;
} else {
wp->target = hw->pll.pn_ratio;
wp->cfg = enable ? PLL_CONFIG_DEFAULT : 0;
wp->cfg = 0;
wp->cdyn = enable ? PLL_CDYN_DEFAULT : 0;
wp->epp = enable ? PLL_EPP_DEFAULT : 0;
}

View File

@@ -36,7 +36,7 @@ u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev);
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq);
bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq);
int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable);
void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent);
void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 active_percent);
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);

View File

@@ -141,7 +141,6 @@ ivpu_ipc_rx_msg_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
struct ivpu_ipc_rx_msg *rx_msg;
lockdep_assert_held(&ipc->cons_lock);
lockdep_assert_irqs_disabled();
rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
if (!rx_msg) {

View File

@@ -0,0 +1,24 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_ACCEL_ROCKET
tristate "Rocket (support for Rockchip NPUs)"
depends on DRM_ACCEL
depends on (ARCH_ROCKCHIP && ARM64) || COMPILE_TEST
depends on ROCKCHIP_IOMMU || COMPILE_TEST
depends on MMU
select DRM_SCHED
select DRM_GEM_SHMEM_HELPER
help
Choose this option if you have a Rockchip SoC that contains a
compatible Neural Processing Unit (NPU), such as the RK3588. Called by
Rockchip either RKNN or RKNPU, it accelerates inference of neural
networks.
The interface exposed to userspace is described in
include/uapi/drm/rocket_accel.h and is used by the Rocket userspace
driver in Mesa3D.
If unsure, say N.
To compile this driver as a module, choose M here: the
module will be called rocket.

View File

@@ -0,0 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_DRM_ACCEL_ROCKET) := rocket.o
rocket-y := \
rocket_core.o \
rocket_device.o \
rocket_drv.o \
rocket_gem.o \
rocket_job.o

View File

@@ -0,0 +1,110 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/dev_printk.h>
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/iommu.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
#include "rocket_core.h"
#include "rocket_job.h"
int rocket_core_init(struct rocket_core *core)
{
struct device *dev = core->dev;
struct platform_device *pdev = to_platform_device(dev);
u32 version;
int err = 0;
core->resets[0].id = "srst_a";
core->resets[1].id = "srst_h";
err = devm_reset_control_bulk_get_exclusive(&pdev->dev, ARRAY_SIZE(core->resets),
core->resets);
if (err)
return dev_err_probe(dev, err, "failed to get resets for core %d\n", core->index);
err = devm_clk_bulk_get(dev, ARRAY_SIZE(core->clks), core->clks);
if (err)
return dev_err_probe(dev, err, "failed to get clocks for core %d\n", core->index);
core->pc_iomem = devm_platform_ioremap_resource_byname(pdev, "pc");
if (IS_ERR(core->pc_iomem)) {
dev_err(dev, "couldn't find PC registers %ld\n", PTR_ERR(core->pc_iomem));
return PTR_ERR(core->pc_iomem);
}
core->cna_iomem = devm_platform_ioremap_resource_byname(pdev, "cna");
if (IS_ERR(core->cna_iomem)) {
dev_err(dev, "couldn't find CNA registers %ld\n", PTR_ERR(core->cna_iomem));
return PTR_ERR(core->cna_iomem);
}
core->core_iomem = devm_platform_ioremap_resource_byname(pdev, "core");
if (IS_ERR(core->core_iomem)) {
dev_err(dev, "couldn't find CORE registers %ld\n", PTR_ERR(core->core_iomem));
return PTR_ERR(core->core_iomem);
}
dma_set_max_seg_size(dev, UINT_MAX);
err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
if (err)
return err;
core->iommu_group = iommu_group_get(dev);
err = rocket_job_init(core);
if (err)
return err;
pm_runtime_use_autosuspend(dev);
/*
* As this NPU will be most often used as part of a media pipeline that
* ends presenting in a display, choose 50 ms (~3 frames at 60Hz) as an
* autosuspend delay as that will keep the device powered up while the
* pipeline is running.
*/
pm_runtime_set_autosuspend_delay(dev, 50);
pm_runtime_enable(dev);
err = pm_runtime_resume_and_get(dev);
if (err) {
rocket_job_fini(core);
return err;
}
version = rocket_pc_readl(core, VERSION);
version += rocket_pc_readl(core, VERSION_NUM) & 0xffff;
pm_runtime_mark_last_busy(dev);
pm_runtime_put_autosuspend(dev);
dev_info(dev, "Rockchip NPU core %d version: %d\n", core->index, version);
return 0;
}
void rocket_core_fini(struct rocket_core *core)
{
pm_runtime_dont_use_autosuspend(core->dev);
pm_runtime_disable(core->dev);
iommu_group_put(core->iommu_group);
core->iommu_group = NULL;
rocket_job_fini(core);
}
void rocket_core_reset(struct rocket_core *core)
{
reset_control_bulk_assert(ARRAY_SIZE(core->resets), core->resets);
udelay(10);
reset_control_bulk_deassert(ARRAY_SIZE(core->resets), core->resets);
}

View File

@@ -0,0 +1,64 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
#ifndef __ROCKET_CORE_H__
#define __ROCKET_CORE_H__
#include <drm/gpu_scheduler.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/mutex_types.h>
#include <linux/reset.h>
#include "rocket_registers.h"
#define rocket_pc_readl(core, reg) \
readl((core)->pc_iomem + (REG_PC_##reg))
#define rocket_pc_writel(core, reg, value) \
writel(value, (core)->pc_iomem + (REG_PC_##reg))
#define rocket_cna_readl(core, reg) \
readl((core)->cna_iomem + (REG_CNA_##reg) - REG_CNA_S_STATUS)
#define rocket_cna_writel(core, reg, value) \
writel(value, (core)->cna_iomem + (REG_CNA_##reg) - REG_CNA_S_STATUS)
#define rocket_core_readl(core, reg) \
readl((core)->core_iomem + (REG_CORE_##reg) - REG_CORE_S_STATUS)
#define rocket_core_writel(core, reg, value) \
writel(value, (core)->core_iomem + (REG_CORE_##reg) - REG_CORE_S_STATUS)
struct rocket_core {
struct device *dev;
struct rocket_device *rdev;
unsigned int index;
int irq;
void __iomem *pc_iomem;
void __iomem *cna_iomem;
void __iomem *core_iomem;
struct clk_bulk_data clks[4];
struct reset_control_bulk_data resets[2];
struct iommu_group *iommu_group;
struct mutex job_lock;
struct rocket_job *in_flight_job;
spinlock_t fence_lock;
struct {
struct workqueue_struct *wq;
struct work_struct work;
atomic_t pending;
} reset;
struct drm_gpu_scheduler sched;
u64 fence_context;
u64 emit_seqno;
};
int rocket_core_init(struct rocket_core *core);
void rocket_core_fini(struct rocket_core *core);
void rocket_core_reset(struct rocket_core *core);
#endif

View File

@@ -0,0 +1,60 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright 2024-2025 Tomeu Vizoso <tomeu@tomeuvizoso.net> */
#include <drm/drm_drv.h>
#include <linux/array_size.h>
#include <linux/clk.h>
#include <linux/dma-mapping.h>
#include <linux/platform_device.h>
#include <linux/of.h>
#include "rocket_device.h"
struct rocket_device *rocket_device_init(struct platform_device *pdev,
const struct drm_driver *rocket_drm_driver)
{
struct device *dev = &pdev->dev;
struct device_node *core_node;
struct rocket_device *rdev;
struct drm_device *ddev;
unsigned int num_cores = 0;
int err;
rdev = devm_drm_dev_alloc(dev, rocket_drm_driver, struct rocket_device, ddev);
if (IS_ERR(rdev))
return rdev;
ddev = &rdev->ddev;
dev_set_drvdata(dev, rdev);
for_each_compatible_node(core_node, NULL, "rockchip,rk3588-rknn-core")
if (of_device_is_available(core_node))
num_cores++;
rdev->cores = devm_kcalloc(dev, num_cores, sizeof(*rdev->cores), GFP_KERNEL);
if (!rdev->cores)
return ERR_PTR(-ENOMEM);
dma_set_max_seg_size(dev, UINT_MAX);
err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
if (err)
return ERR_PTR(err);
err = devm_mutex_init(dev, &rdev->sched_lock);
if (err)
return ERR_PTR(-ENOMEM);
err = drm_dev_register(ddev, 0);
if (err)
return ERR_PTR(err);
return rdev;
}
void rocket_device_fini(struct rocket_device *rdev)
{
WARN_ON(rdev->num_cores > 0);
drm_dev_unregister(&rdev->ddev);
}

Some files were not shown because too many files have changed in this diff Show More