Merge commit c288d9cd71 ("Merge tag 'for-5.14/io_uring-2021-06-30' of git://git.kernel.dk/linux-block") into android-mainline
Another small step en route to v5.14-rc1 Change-Id: I24899ab78da7d367574ed69ceaa82ab0837d9556 Signed-off-by: Lee Jones <lee.jones@linaro.org>
This commit is contained in:
@@ -197,8 +197,24 @@ Description:
|
||||
Drivers may emit a CHANGE uevent when a password is set or unset
|
||||
userspace may check it again.
|
||||
|
||||
On Dell systems, if Admin password is set, then all BIOS attributes
|
||||
On Dell and Lenovo systems, if Admin password is set, then all BIOS attributes
|
||||
require password validation.
|
||||
On Lenovo systems if you change the Admin password the new password is not active until
|
||||
the next boot.
|
||||
|
||||
Lenovo specific class extensions
|
||||
------------------------------
|
||||
|
||||
On Lenovo systems the following additional settings are available:
|
||||
|
||||
lenovo_encoding:
|
||||
The encoding method that is used. This can be either "ascii"
|
||||
or "scancode". Default is set to "ascii"
|
||||
|
||||
lenovo_kbdlang:
|
||||
The keyboard language method that is used. This is generally a
|
||||
two char code (e.g. "us", "fr", "gr") and may vary per platform.
|
||||
Default is set to "us"
|
||||
|
||||
What: /sys/class/firmware-attributes/*/attributes/pending_reboot
|
||||
Date: February 2021
|
||||
|
||||
78
Documentation/ABI/testing/sysfs-devices-platform-soc-ipa
Normal file
78
Documentation/ABI/testing/sysfs-devices-platform-soc-ipa
Normal file
@@ -0,0 +1,78 @@
|
||||
What: /sys/devices/platform/soc@X/XXXXXXX.ipa/
|
||||
Date: June 2021
|
||||
KernelVersion: v5.14
|
||||
Contact: Alex Elder <elder@kernel.org>
|
||||
Description:
|
||||
The /sys/devices/platform/soc@X/XXXXXXX.ipa/ directory
|
||||
contains read-only attributes exposing information about
|
||||
an IPA device. The X values could vary, but are typically
|
||||
"soc@0/1e40000.ipa".
|
||||
|
||||
What: .../XXXXXXX.ipa/version
|
||||
Date: June 2021
|
||||
KernelVersion: v5.14
|
||||
Contact: Alex Elder <elder@kernel.org>
|
||||
Description:
|
||||
The .../XXXXXXX.ipa/version file contains the IPA hardware
|
||||
version, as a period-separated set of two or three integers
|
||||
(e.g., "3.5.1" or "4.2").
|
||||
|
||||
What: .../XXXXXXX.ipa/feature/
|
||||
Date: June 2021
|
||||
KernelVersion: v5.14
|
||||
Contact: Alex Elder <elder@kernel.org>
|
||||
Description:
|
||||
The .../XXXXXXX.ipa/feature/ directory contains a set of
|
||||
attributes describing features implemented by the IPA
|
||||
hardware.
|
||||
|
||||
What: .../XXXXXXX.ipa/feature/rx_offload
|
||||
Date: June 2021
|
||||
KernelVersion: v5.14
|
||||
Contact: Alex Elder <elder@kernel.org>
|
||||
Description:
|
||||
The .../XXXXXXX.ipa/feature/rx_offload file contains a
|
||||
string indicating the type of receive checksum offload
|
||||
that is supported by the hardware. The possible values
|
||||
are "MAPv4" or "MAPv5".
|
||||
|
||||
What: .../XXXXXXX.ipa/feature/tx_offload
|
||||
Date: June 2021
|
||||
KernelVersion: v5.14
|
||||
Contact: Alex Elder <elder@kernel.org>
|
||||
Description:
|
||||
The .../XXXXXXX.ipa/feature/tx_offload file contains a
|
||||
string indicating the type of transmit checksum offload
|
||||
that is supported by the hardware. The possible values
|
||||
are "MAPv4" or "MAPv5".
|
||||
|
||||
What: .../XXXXXXX.ipa/modem/
|
||||
Date: June 2021
|
||||
KernelVersion: v5.14
|
||||
Contact: Alex Elder <elder@kernel.org>
|
||||
Description:
|
||||
The .../XXXXXXX.ipa/modem/ directory contains a set of
|
||||
attributes describing properties of the modem execution
|
||||
environment reachable by the IPA hardware.
|
||||
|
||||
What: .../XXXXXXX.ipa/modem/rx_endpoint_id
|
||||
Date: June 2021
|
||||
KernelVersion: v5.14
|
||||
Contact: Alex Elder <elder@kernel.org>
|
||||
Description:
|
||||
The .../XXXXXXX.ipa/feature/rx_endpoint_id file contains
|
||||
the AP endpoint ID that receives packets originating from
|
||||
the modem execution environment. The "rx" is from the
|
||||
perspective of the AP; this endpoint is considered an "IPA
|
||||
producer". An endpoint ID is a small unsigned integer.
|
||||
|
||||
What: .../XXXXXXX.ipa/modem/tx_endpoint_id
|
||||
Date: June 2021
|
||||
KernelVersion: v5.14
|
||||
Contact: Alex Elder <elder@kernel.org>
|
||||
Description:
|
||||
The .../XXXXXXX.ipa/feature/tx_endpoint_id file contains
|
||||
the AP endpoint ID used to transmit packets destined for
|
||||
the modem execution environment. The "tx" is from the
|
||||
perspective of the AP; this endpoint is considered an "IPA
|
||||
consumer". An endpoint ID is a small unsigned integer.
|
||||
55
Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi
Normal file
55
Documentation/ABI/testing/sysfs-platform-dell-privacy-wmi
Normal file
@@ -0,0 +1,55 @@
|
||||
What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_supported_type
|
||||
Date: Apr 2021
|
||||
KernelVersion: 5.13
|
||||
Contact: "perry.yuan@dell.com>"
|
||||
Description:
|
||||
Display which dell hardware level privacy devices are supported
|
||||
“Dell Privacy” is a set of HW, FW, and SW features to enhance
|
||||
Dell’s commitment to platform privacy for MIC, Camera, and
|
||||
ePrivacy screens.
|
||||
The supported hardware privacy devices are:
|
||||
Attributes:
|
||||
Microphone Mute:
|
||||
Identifies the local microphone can be muted by hardware, no applications
|
||||
is available to capture system mic sound
|
||||
|
||||
Camera Shutter:
|
||||
Identifies camera shutter controlled by hardware, which is a micromechanical
|
||||
shutter assembly that is built onto the camera module to block capturing images
|
||||
from outside the laptop
|
||||
|
||||
supported:
|
||||
The privacy device is supported by this system
|
||||
|
||||
unsupported:
|
||||
The privacy device is not supported on this system
|
||||
|
||||
For example to check which privacy devices are supported:
|
||||
|
||||
# cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_supported_type
|
||||
[Microphone Mute] [supported]
|
||||
[Camera Shutter] [supported]
|
||||
[ePrivacy Screen] [unsupported]
|
||||
|
||||
What: /sys/bus/wmi/devices/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_current_state
|
||||
Date: Apr 2021
|
||||
KernelVersion: 5.13
|
||||
Contact: "perry.yuan@dell.com>"
|
||||
Description:
|
||||
Allow user space to check current dell privacy device state.
|
||||
Describes the Device State class exposed by BIOS which can be
|
||||
consumed by various applications interested in knowing the Privacy
|
||||
feature capabilities
|
||||
Attributes:
|
||||
muted:
|
||||
Identifies the privacy device is turned off and cannot send stream to OS applications
|
||||
|
||||
unmuted:
|
||||
Identifies the privacy device is turned on ,audio or camera driver can get
|
||||
stream from mic and camera module to OS applications
|
||||
|
||||
For example to check all supported current privacy device states:
|
||||
|
||||
# cat /sys/bus/wmi/drivers/dell-privacy/6932965F-1671-4CEB-B988-D3AB0A901919/dell_privacy_current_state
|
||||
[Microphone] [unmuted]
|
||||
[Camera Shutter] [unmuted]
|
||||
@@ -211,27 +211,40 @@ over a rather long period of time, but improvements are always welcome!
|
||||
of the system, especially to real-time workloads running on
|
||||
the rest of the system.
|
||||
|
||||
7. As of v4.20, a given kernel implements only one RCU flavor,
|
||||
which is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
|
||||
If the updater uses call_rcu() or synchronize_rcu(),
|
||||
then the corresponding readers may use rcu_read_lock() and
|
||||
rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(),
|
||||
or any pair of primitives that disables and re-enables preemption,
|
||||
for example, rcu_read_lock_sched() and rcu_read_unlock_sched().
|
||||
If the updater uses synchronize_srcu() or call_srcu(),
|
||||
then the corresponding readers must use srcu_read_lock() and
|
||||
srcu_read_unlock(), and with the same srcu_struct. The rules for
|
||||
the expedited primitives are the same as for their non-expedited
|
||||
counterparts. Mixing things up will result in confusion and
|
||||
broken kernels, and has even resulted in an exploitable security
|
||||
issue.
|
||||
7. As of v4.20, a given kernel implements only one RCU flavor, which
|
||||
is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
|
||||
If the updater uses call_rcu() or synchronize_rcu(), then
|
||||
the corresponding readers may use: (1) rcu_read_lock() and
|
||||
rcu_read_unlock(), (2) any pair of primitives that disables
|
||||
and re-enables softirq, for example, rcu_read_lock_bh() and
|
||||
rcu_read_unlock_bh(), or (3) any pair of primitives that disables
|
||||
and re-enables preemption, for example, rcu_read_lock_sched() and
|
||||
rcu_read_unlock_sched(). If the updater uses synchronize_srcu()
|
||||
or call_srcu(), then the corresponding readers must use
|
||||
srcu_read_lock() and srcu_read_unlock(), and with the same
|
||||
srcu_struct. The rules for the expedited RCU grace-period-wait
|
||||
primitives are the same as for their non-expedited counterparts.
|
||||
|
||||
One exception to this rule: rcu_read_lock() and rcu_read_unlock()
|
||||
may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
|
||||
in cases where local bottom halves are already known to be
|
||||
disabled, for example, in irq or softirq context. Commenting
|
||||
such cases is a must, of course! And the jury is still out on
|
||||
whether the increased speed is worth it.
|
||||
If the updater uses call_rcu_tasks() or synchronize_rcu_tasks(),
|
||||
then the readers must refrain from executing voluntary
|
||||
context switches, that is, from blocking. If the updater uses
|
||||
call_rcu_tasks_trace() or synchronize_rcu_tasks_trace(), then
|
||||
the corresponding readers must use rcu_read_lock_trace() and
|
||||
rcu_read_unlock_trace(). If an updater uses call_rcu_tasks_rude()
|
||||
or synchronize_rcu_tasks_rude(), then the corresponding readers
|
||||
must use anything that disables interrupts.
|
||||
|
||||
Mixing things up will result in confusion and broken kernels, and
|
||||
has even resulted in an exploitable security issue. Therefore,
|
||||
when using non-obvious pairs of primitives, commenting is
|
||||
of course a must. One example of non-obvious pairing is
|
||||
the XDP feature in networking, which calls BPF programs from
|
||||
network-driver NAPI (softirq) context. BPF relies heavily on RCU
|
||||
protection for its data structures, but because the BPF program
|
||||
invocation happens entirely within a single local_bh_disable()
|
||||
section in a NAPI poll cycle, this usage is safe. The reason
|
||||
that this usage is safe is that readers can use anything that
|
||||
disables BH when updaters use call_rcu() or synchronize_rcu().
|
||||
|
||||
8. Although synchronize_rcu() is slower than is call_rcu(), it
|
||||
usually results in simpler code. So, unless update performance is
|
||||
|
||||
@@ -17,36 +17,37 @@ level logical devices like device mapper.
|
||||
|
||||
HOWTO
|
||||
=====
|
||||
|
||||
Throttling/Upper Limit policy
|
||||
-----------------------------
|
||||
- Enable Block IO controller::
|
||||
Enable Block IO controller::
|
||||
|
||||
CONFIG_BLK_CGROUP=y
|
||||
|
||||
- Enable throttling in block layer::
|
||||
Enable throttling in block layer::
|
||||
|
||||
CONFIG_BLK_DEV_THROTTLING=y
|
||||
|
||||
- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)::
|
||||
Mount blkio controller (see cgroups.txt, Why are cgroups needed?)::
|
||||
|
||||
mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
|
||||
|
||||
- Specify a bandwidth rate on particular device for root group. The format
|
||||
for policy is "<major>:<minor> <bytes_per_second>"::
|
||||
Specify a bandwidth rate on particular device for root group. The format
|
||||
for policy is "<major>:<minor> <bytes_per_second>"::
|
||||
|
||||
echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
|
||||
|
||||
Above will put a limit of 1MB/second on reads happening for root group
|
||||
on device having major/minor number 8:16.
|
||||
This will put a limit of 1MB/second on reads happening for root group
|
||||
on device having major/minor number 8:16.
|
||||
|
||||
- Run dd to read a file and see if rate is throttled to 1MB/s or not::
|
||||
Run dd to read a file and see if rate is throttled to 1MB/s or not::
|
||||
|
||||
# dd iflag=direct if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
|
||||
1024+0 records in
|
||||
1024+0 records out
|
||||
4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
|
||||
|
||||
Limits for writes can be put using blkio.throttle.write_bps_device file.
|
||||
Limits for writes can be put using blkio.throttle.write_bps_device file.
|
||||
|
||||
Hierarchical Cgroups
|
||||
====================
|
||||
@@ -79,85 +80,89 @@ following::
|
||||
|
||||
Various user visible config options
|
||||
===================================
|
||||
CONFIG_BLK_CGROUP
|
||||
- Block IO controller.
|
||||
|
||||
CONFIG_BFQ_CGROUP_DEBUG
|
||||
- Debug help. Right now some additional stats file show up in cgroup
|
||||
CONFIG_BLK_CGROUP
|
||||
Block IO controller.
|
||||
|
||||
CONFIG_BFQ_CGROUP_DEBUG
|
||||
Debug help. Right now some additional stats file show up in cgroup
|
||||
if this option is enabled.
|
||||
|
||||
CONFIG_BLK_DEV_THROTTLING
|
||||
- Enable block device throttling support in block layer.
|
||||
CONFIG_BLK_DEV_THROTTLING
|
||||
Enable block device throttling support in block layer.
|
||||
|
||||
Details of cgroup files
|
||||
=======================
|
||||
|
||||
Proportional weight policy files
|
||||
--------------------------------
|
||||
- blkio.weight
|
||||
- Specifies per cgroup weight. This is default weight of the group
|
||||
on all the devices until and unless overridden by per device rule.
|
||||
(See blkio.weight_device).
|
||||
Currently allowed range of weights is from 10 to 1000.
|
||||
|
||||
- blkio.weight_device
|
||||
- One can specify per cgroup per device rules using this interface.
|
||||
These rules override the default value of group weight as specified
|
||||
by blkio.weight.
|
||||
blkio.bfq.weight
|
||||
Specifies per cgroup weight. This is default weight of the group
|
||||
on all the devices until and unless overridden by per device rule
|
||||
(see `blkio.bfq.weight_device` below).
|
||||
|
||||
Currently allowed range of weights is from 1 to 1000. For more details,
|
||||
see Documentation/block/bfq-iosched.rst.
|
||||
|
||||
blkio.bfq.weight_device
|
||||
Specifes per cgroup per device weights, overriding the default group
|
||||
weight. For more details, see Documentation/block/bfq-iosched.rst.
|
||||
|
||||
Following is the format::
|
||||
|
||||
# echo dev_maj:dev_minor weight > blkio.weight_device
|
||||
# echo dev_maj:dev_minor weight > blkio.bfq.weight_device
|
||||
|
||||
Configure weight=300 on /dev/sdb (8:16) in this cgroup::
|
||||
|
||||
# echo 8:16 300 > blkio.weight_device
|
||||
# cat blkio.weight_device
|
||||
# echo 8:16 300 > blkio.bfq.weight_device
|
||||
# cat blkio.bfq.weight_device
|
||||
dev weight
|
||||
8:16 300
|
||||
|
||||
Configure weight=500 on /dev/sda (8:0) in this cgroup::
|
||||
|
||||
# echo 8:0 500 > blkio.weight_device
|
||||
# cat blkio.weight_device
|
||||
# echo 8:0 500 > blkio.bfq.weight_device
|
||||
# cat blkio.bfq.weight_device
|
||||
dev weight
|
||||
8:0 500
|
||||
8:16 300
|
||||
|
||||
Remove specific weight for /dev/sda in this cgroup::
|
||||
|
||||
# echo 8:0 0 > blkio.weight_device
|
||||
# cat blkio.weight_device
|
||||
# echo 8:0 0 > blkio.bfq.weight_device
|
||||
# cat blkio.bfq.weight_device
|
||||
dev weight
|
||||
8:16 300
|
||||
|
||||
- blkio.time
|
||||
- disk time allocated to cgroup per device in milliseconds. First
|
||||
blkio.time
|
||||
Disk time allocated to cgroup per device in milliseconds. First
|
||||
two fields specify the major and minor number of the device and
|
||||
third field specifies the disk time allocated to group in
|
||||
milliseconds.
|
||||
|
||||
- blkio.sectors
|
||||
- number of sectors transferred to/from disk by the group. First
|
||||
blkio.sectors
|
||||
Number of sectors transferred to/from disk by the group. First
|
||||
two fields specify the major and minor number of the device and
|
||||
third field specifies the number of sectors transferred by the
|
||||
group to/from the device.
|
||||
|
||||
- blkio.io_service_bytes
|
||||
- Number of bytes transferred to/from the disk by the group. These
|
||||
blkio.io_service_bytes
|
||||
Number of bytes transferred to/from the disk by the group. These
|
||||
are further divided by the type of operation - read or write, sync
|
||||
or async. First two fields specify the major and minor number of the
|
||||
device, third field specifies the operation type and the fourth field
|
||||
specifies the number of bytes.
|
||||
|
||||
- blkio.io_serviced
|
||||
- Number of IOs (bio) issued to the disk by the group. These
|
||||
blkio.io_serviced
|
||||
Number of IOs (bio) issued to the disk by the group. These
|
||||
are further divided by the type of operation - read or write, sync
|
||||
or async. First two fields specify the major and minor number of the
|
||||
device, third field specifies the operation type and the fourth field
|
||||
specifies the number of IOs.
|
||||
|
||||
- blkio.io_service_time
|
||||
- Total amount of time between request dispatch and request completion
|
||||
blkio.io_service_time
|
||||
Total amount of time between request dispatch and request completion
|
||||
for the IOs done by this cgroup. This is in nanoseconds to make it
|
||||
meaningful for flash devices too. For devices with queue depth of 1,
|
||||
this time represents the actual service time. When queue_depth > 1,
|
||||
@@ -170,8 +175,8 @@ Proportional weight policy files
|
||||
specifies the operation type and the fourth field specifies the
|
||||
io_service_time in ns.
|
||||
|
||||
- blkio.io_wait_time
|
||||
- Total amount of time the IOs for this cgroup spent waiting in the
|
||||
blkio.io_wait_time
|
||||
Total amount of time the IOs for this cgroup spent waiting in the
|
||||
scheduler queues for service. This can be greater than the total time
|
||||
elapsed since it is cumulative io_wait_time for all IOs. It is not a
|
||||
measure of total time the cgroup spent waiting but rather a measure of
|
||||
@@ -185,24 +190,24 @@ Proportional weight policy files
|
||||
minor number of the device, third field specifies the operation type
|
||||
and the fourth field specifies the io_wait_time in ns.
|
||||
|
||||
- blkio.io_merged
|
||||
- Total number of bios/requests merged into requests belonging to this
|
||||
blkio.io_merged
|
||||
Total number of bios/requests merged into requests belonging to this
|
||||
cgroup. This is further divided by the type of operation - read or
|
||||
write, sync or async.
|
||||
|
||||
- blkio.io_queued
|
||||
- Total number of requests queued up at any given instant for this
|
||||
blkio.io_queued
|
||||
Total number of requests queued up at any given instant for this
|
||||
cgroup. This is further divided by the type of operation - read or
|
||||
write, sync or async.
|
||||
|
||||
- blkio.avg_queue_size
|
||||
- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
blkio.avg_queue_size
|
||||
Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
The average queue size for this cgroup over the entire time of this
|
||||
cgroup's existence. Queue size samples are taken each time one of the
|
||||
queues of this cgroup gets a timeslice.
|
||||
|
||||
- blkio.group_wait_time
|
||||
- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
blkio.group_wait_time
|
||||
Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
This is the amount of time the cgroup had to wait since it became busy
|
||||
(i.e., went from 0 to 1 request queued) to get a timeslice for one of
|
||||
its queues. This is different from the io_wait_time which is the
|
||||
@@ -212,8 +217,8 @@ Proportional weight policy files
|
||||
will only report the group_wait_time accumulated till the last time it
|
||||
got a timeslice and will not include the current delta.
|
||||
|
||||
- blkio.empty_time
|
||||
- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
blkio.empty_time
|
||||
Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
This is the amount of time a cgroup spends without any pending
|
||||
requests when not being served, i.e., it does not include any time
|
||||
spent idling for one of the queues of the cgroup. This is in
|
||||
@@ -221,8 +226,8 @@ Proportional weight policy files
|
||||
the stat will only report the empty_time accumulated till the last
|
||||
time it had a pending request and will not include the current delta.
|
||||
|
||||
- blkio.idle_time
|
||||
- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
blkio.idle_time
|
||||
Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y.
|
||||
This is the amount of time spent by the IO scheduler idling for a
|
||||
given cgroup in anticipation of a better request than the existing ones
|
||||
from other queues/cgroups. This is in nanoseconds. If this is read
|
||||
@@ -230,60 +235,60 @@ Proportional weight policy files
|
||||
idle_time accumulated till the last idle period and will not include
|
||||
the current delta.
|
||||
|
||||
- blkio.dequeue
|
||||
- Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y. This
|
||||
blkio.dequeue
|
||||
Debugging aid only enabled if CONFIG_BFQ_CGROUP_DEBUG=y. This
|
||||
gives the statistics about how many a times a group was dequeued
|
||||
from service tree of the device. First two fields specify the major
|
||||
and minor number of the device and third field specifies the number
|
||||
of times a group was dequeued from a particular device.
|
||||
|
||||
- blkio.*_recursive
|
||||
- Recursive version of various stats. These files show the
|
||||
blkio.*_recursive
|
||||
Recursive version of various stats. These files show the
|
||||
same information as their non-recursive counterparts but
|
||||
include stats from all the descendant cgroups.
|
||||
|
||||
Throttling/Upper limit policy files
|
||||
-----------------------------------
|
||||
- blkio.throttle.read_bps_device
|
||||
- Specifies upper limit on READ rate from the device. IO rate is
|
||||
blkio.throttle.read_bps_device
|
||||
Specifies upper limit on READ rate from the device. IO rate is
|
||||
specified in bytes per second. Rules are per device. Following is
|
||||
the format::
|
||||
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
|
||||
|
||||
- blkio.throttle.write_bps_device
|
||||
- Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
blkio.throttle.write_bps_device
|
||||
Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
specified in bytes per second. Rules are per device. Following is
|
||||
the format::
|
||||
|
||||
echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
|
||||
|
||||
- blkio.throttle.read_iops_device
|
||||
- Specifies upper limit on READ rate from the device. IO rate is
|
||||
blkio.throttle.read_iops_device
|
||||
Specifies upper limit on READ rate from the device. IO rate is
|
||||
specified in IO per second. Rules are per device. Following is
|
||||
the format::
|
||||
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
|
||||
|
||||
- blkio.throttle.write_iops_device
|
||||
- Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
blkio.throttle.write_iops_device
|
||||
Specifies upper limit on WRITE rate to the device. IO rate is
|
||||
specified in io per second. Rules are per device. Following is
|
||||
the format::
|
||||
|
||||
echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
|
||||
|
||||
Note: If both BW and IOPS rules are specified for a device, then IO is
|
||||
Note: If both BW and IOPS rules are specified for a device, then IO is
|
||||
subjected to both the constraints.
|
||||
|
||||
- blkio.throttle.io_serviced
|
||||
- Number of IOs (bio) issued to the disk by the group. These
|
||||
blkio.throttle.io_serviced
|
||||
Number of IOs (bio) issued to the disk by the group. These
|
||||
are further divided by the type of operation - read or write, sync
|
||||
or async. First two fields specify the major and minor number of the
|
||||
device, third field specifies the operation type and the fourth field
|
||||
specifies the number of IOs.
|
||||
|
||||
- blkio.throttle.io_service_bytes
|
||||
- Number of bytes transferred to/from the disk by the group. These
|
||||
blkio.throttle.io_service_bytes
|
||||
Number of bytes transferred to/from the disk by the group. These
|
||||
are further divided by the type of operation - read or write, sync
|
||||
or async. First two fields specify the major and minor number of the
|
||||
device, third field specifies the operation type and the fourth field
|
||||
@@ -291,6 +296,6 @@ Note: If both BW and IOPS rules are specified for a device, then IO is
|
||||
|
||||
Common files among various policies
|
||||
-----------------------------------
|
||||
- blkio.reset_stats
|
||||
- Writing an int to this file will result in resetting all the stats
|
||||
blkio.reset_stats
|
||||
Writing an int to this file will result in resetting all the stats
|
||||
for that cgroup.
|
||||
|
||||
@@ -56,6 +56,7 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
|
||||
5-3-3. IO Latency
|
||||
5-3-3-1. How IO Latency Throttling Works
|
||||
5-3-3-2. IO Latency Interface Files
|
||||
5-3-4. IO Priority
|
||||
5-4. PID
|
||||
5-4-1. PID Interface Files
|
||||
5-5. Cpuset
|
||||
@@ -1866,6 +1867,60 @@ IO Latency Interface Files
|
||||
duration of time between evaluation events. Windows only elapse
|
||||
with IO activity. Idle periods extend the most recent window.
|
||||
|
||||
IO Priority
|
||||
~~~~~~~~~~~
|
||||
|
||||
A single attribute controls the behavior of the I/O priority cgroup policy,
|
||||
namely the blkio.prio.class attribute. The following values are accepted for
|
||||
that attribute:
|
||||
|
||||
no-change
|
||||
Do not modify the I/O priority class.
|
||||
|
||||
none-to-rt
|
||||
For requests that do not have an I/O priority class (NONE),
|
||||
change the I/O priority class into RT. Do not modify
|
||||
the I/O priority class of other requests.
|
||||
|
||||
restrict-to-be
|
||||
For requests that do not have an I/O priority class or that have I/O
|
||||
priority class RT, change it into BE. Do not modify the I/O priority
|
||||
class of requests that have priority class IDLE.
|
||||
|
||||
idle
|
||||
Change the I/O priority class of all requests into IDLE, the lowest
|
||||
I/O priority class.
|
||||
|
||||
The following numerical values are associated with the I/O priority policies:
|
||||
|
||||
+-------------+---+
|
||||
| no-change | 0 |
|
||||
+-------------+---+
|
||||
| none-to-rt | 1 |
|
||||
+-------------+---+
|
||||
| rt-to-be | 2 |
|
||||
+-------------+---+
|
||||
| all-to-idle | 3 |
|
||||
+-------------+---+
|
||||
|
||||
The numerical value that corresponds to each I/O priority class is as follows:
|
||||
|
||||
+-------------------------------+---+
|
||||
| IOPRIO_CLASS_NONE | 0 |
|
||||
+-------------------------------+---+
|
||||
| IOPRIO_CLASS_RT (real-time) | 1 |
|
||||
+-------------------------------+---+
|
||||
| IOPRIO_CLASS_BE (best effort) | 2 |
|
||||
+-------------------------------+---+
|
||||
| IOPRIO_CLASS_IDLE | 3 |
|
||||
+-------------------------------+---+
|
||||
|
||||
The algorithm to set the I/O priority class for a request is as follows:
|
||||
|
||||
- Translate the I/O priority class policy into a number.
|
||||
- Change the request I/O priority class into the maximum of the I/O priority
|
||||
class policy number and the numerical I/O priority class.
|
||||
|
||||
PID
|
||||
---
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ first sector should contain valid superblock from previous invocation.
|
||||
Constructor parameters:
|
||||
|
||||
1. type of the cache device - "p" or "s"
|
||||
|
||||
- p - persistent memory
|
||||
- s - SSD
|
||||
2. the underlying device that will be cached
|
||||
@@ -21,7 +20,6 @@ Constructor parameters:
|
||||
size)
|
||||
5. the number of optional parameters (the parameters with an argument
|
||||
count as two)
|
||||
|
||||
start_sector n (default: 0)
|
||||
offset from the start of cache device in 512-byte sectors
|
||||
high_watermark n (default: 50)
|
||||
@@ -53,6 +51,27 @@ Constructor parameters:
|
||||
|
||||
- some underlying devices perform better with fua, some
|
||||
with nofua. The user should test it
|
||||
cleaner
|
||||
when this option is activated (either in the constructor
|
||||
arguments or by a message), the cache will not promote
|
||||
new writes (however, writes to already cached blocks are
|
||||
promoted, to avoid data corruption due to misordered
|
||||
writes) and it will gradually writeback any cached
|
||||
data. The userspace can then monitor the cleaning
|
||||
process with "dmsetup status". When the number of cached
|
||||
blocks drops to zero, userspace can unload the
|
||||
dm-writecache target and replace it with dm-linear or
|
||||
other targets.
|
||||
max_age n
|
||||
specifies the maximum age of a block in milliseconds. If
|
||||
a block is stored in the cache for too long, it will be
|
||||
written to the underlying device and cleaned up.
|
||||
metadata_only
|
||||
only metadata is promoted to the cache. This option
|
||||
improves performance for heavier REQ_META workloads.
|
||||
pause_writeback n (default: 3000)
|
||||
pause writeback if there was some write I/O redirected to
|
||||
the origin volume in the last n milliseconds
|
||||
|
||||
Status:
|
||||
1. error indicator - 0 if there was no error, otherwise error number
|
||||
@@ -77,3 +96,5 @@ Messages:
|
||||
5. resume the device, so that it will use the linear
|
||||
target
|
||||
6. the cache device is now inactive and it can be deleted
|
||||
cleaner
|
||||
See above "cleaner" constructor documentation.
|
||||
|
||||
@@ -113,7 +113,7 @@
|
||||
the GPE dispatcher.
|
||||
This facility can be used to prevent such uncontrolled
|
||||
GPE floodings.
|
||||
Format: <byte>
|
||||
Format: <byte> or <bitmap-list>
|
||||
|
||||
acpi_no_auto_serialize [HW,ACPI]
|
||||
Disable auto-serialization of AML methods
|
||||
@@ -586,6 +586,28 @@
|
||||
loops can be debugged more effectively on production
|
||||
systems.
|
||||
|
||||
clocksource.max_cswd_read_retries= [KNL]
|
||||
Number of clocksource_watchdog() retries due to
|
||||
external delays before the clock will be marked
|
||||
unstable. Defaults to three retries, that is,
|
||||
four attempts to read the clock under test.
|
||||
|
||||
clocksource.verify_n_cpus= [KNL]
|
||||
Limit the number of CPUs checked for clocksources
|
||||
marked with CLOCK_SOURCE_VERIFY_PERCPU that
|
||||
are marked unstable due to excessive skew.
|
||||
A negative value says to check all CPUs, while
|
||||
zero says not to check any. Values larger than
|
||||
nr_cpu_ids are silently truncated to nr_cpu_ids.
|
||||
The actual CPUs are chosen randomly, with
|
||||
no replacement if the same CPU is chosen twice.
|
||||
|
||||
clocksource-wdtest.holdoff= [KNL]
|
||||
Set the time in seconds that the clocksource
|
||||
watchdog test waits before commencing its tests.
|
||||
Defaults to zero when built as a module and to
|
||||
10 seconds when built into the kernel.
|
||||
|
||||
clearcpuid=BITNUM[,BITNUM...] [X86]
|
||||
Disable CPUID feature X for the kernel. See
|
||||
arch/x86/include/asm/cpufeatures.h for the valid bit
|
||||
@@ -3578,6 +3600,12 @@
|
||||
off: turn off poisoning (default)
|
||||
on: turn on poisoning
|
||||
|
||||
page_reporting.page_reporting_order=
|
||||
[KNL] Minimal page reporting order
|
||||
Format: <integer>
|
||||
Adjust the minimal page reporting order. The page
|
||||
reporting is disabled when it exceeds (MAX_ORDER-1).
|
||||
|
||||
panic= [KNL] Kernel behaviour on panic: delay <timeout>
|
||||
timeout > 0: seconds before rebooting
|
||||
timeout = 0: wait forever
|
||||
|
||||
@@ -101,17 +101,6 @@ this results in concentration of disk activity in a small time interval which
|
||||
occurs only once every 10 minutes, or whenever the disk is forced to spin up by
|
||||
a cache miss. The disk can then be spun down in the periods of inactivity.
|
||||
|
||||
If you want to find out which process caused the disk to spin up, you can
|
||||
gather information by setting the flag /proc/sys/vm/block_dump. When this flag
|
||||
is set, Linux reports all disk read and write operations that take place, and
|
||||
all block dirtyings done to files. This makes it possible to debug why a disk
|
||||
needs to spin up, and to increase battery life even more. The output of
|
||||
block_dump is written to the kernel output, and it can be retrieved using
|
||||
"dmesg". When you use block_dump and your kernel logging level also includes
|
||||
kernel debugging messages, you probably want to turn off klogd, otherwise
|
||||
the output of block_dump will be logged, causing disk activity that is not
|
||||
normally there.
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
@@ -39,7 +39,7 @@ in principle, they should work in any architecture where these
|
||||
subsystems are present.
|
||||
|
||||
A periodic hrtimer runs to generate interrupts and kick the watchdog
|
||||
task. An NMI perf event is generated every "watchdog_thresh"
|
||||
job. An NMI perf event is generated every "watchdog_thresh"
|
||||
(compile-time initialized to 10 and configurable through sysctl of the
|
||||
same name) seconds to check for hardlockups. If any CPU in the system
|
||||
does not receive any hrtimer interrupt during that time the
|
||||
@@ -47,7 +47,7 @@ does not receive any hrtimer interrupt during that time the
|
||||
generate a kernel warning or call panic, depending on the
|
||||
configuration.
|
||||
|
||||
The watchdog task is a high priority kernel thread that updates a
|
||||
The watchdog job runs in a stop scheduling thread that updates a
|
||||
timestamp every time it is scheduled. If that timestamp is not updated
|
||||
for 2*watchdog_thresh seconds (the softlockup threshold) the
|
||||
'softlockup detector' (coded inside the hrtimer callback function)
|
||||
|
||||
@@ -347,81 +347,8 @@ for tickless systems. It follows the same basic strategy as the ``menu`` `one
|
||||
<menu-gov_>`_: it always tries to find the deepest idle state suitable for the
|
||||
given conditions. However, it applies a different approach to that problem.
|
||||
|
||||
First, it does not use sleep length correction factors, but instead it attempts
|
||||
to correlate the observed idle duration values with the available idle states
|
||||
and use that information to pick up the idle state that is most likely to
|
||||
"match" the upcoming CPU idle interval. Second, it does not take the tasks
|
||||
that were running on the given CPU in the past and are waiting on some I/O
|
||||
operations to complete now at all (there is no guarantee that they will run on
|
||||
the same CPU when they become runnable again) and the pattern detection code in
|
||||
it avoids taking timer wakeups into account. It also only uses idle duration
|
||||
values less than the current time till the closest timer (with the scheduler
|
||||
tick excluded) for that purpose.
|
||||
|
||||
Like in the ``menu`` governor `case <menu-gov_>`_, the first step is to obtain
|
||||
the *sleep length*, which is the time until the closest timer event with the
|
||||
assumption that the scheduler tick will be stopped (that also is the upper bound
|
||||
on the time until the next CPU wakeup). That value is then used to preselect an
|
||||
idle state on the basis of three metrics maintained for each idle state provided
|
||||
by the ``CPUIdle`` driver: ``hits``, ``misses`` and ``early_hits``.
|
||||
|
||||
The ``hits`` and ``misses`` metrics measure the likelihood that a given idle
|
||||
state will "match" the observed (post-wakeup) idle duration if it "matches" the
|
||||
sleep length. They both are subject to decay (after a CPU wakeup) every time
|
||||
the target residency of the idle state corresponding to them is less than or
|
||||
equal to the sleep length and the target residency of the next idle state is
|
||||
greater than the sleep length (that is, when the idle state corresponding to
|
||||
them "matches" the sleep length). The ``hits`` metric is increased if the
|
||||
former condition is satisfied and the target residency of the given idle state
|
||||
is less than or equal to the observed idle duration and the target residency of
|
||||
the next idle state is greater than the observed idle duration at the same time
|
||||
(that is, it is increased when the given idle state "matches" both the sleep
|
||||
length and the observed idle duration). In turn, the ``misses`` metric is
|
||||
increased when the given idle state "matches" the sleep length only and the
|
||||
observed idle duration is too short for its target residency.
|
||||
|
||||
The ``early_hits`` metric measures the likelihood that a given idle state will
|
||||
"match" the observed (post-wakeup) idle duration if it does not "match" the
|
||||
sleep length. It is subject to decay on every CPU wakeup and it is increased
|
||||
when the idle state corresponding to it "matches" the observed (post-wakeup)
|
||||
idle duration and the target residency of the next idle state is less than or
|
||||
equal to the sleep length (i.e. the idle state "matching" the sleep length is
|
||||
deeper than the given one).
|
||||
|
||||
The governor walks the list of idle states provided by the ``CPUIdle`` driver
|
||||
and finds the last (deepest) one with the target residency less than or equal
|
||||
to the sleep length. Then, the ``hits`` and ``misses`` metrics of that idle
|
||||
state are compared with each other and it is preselected if the ``hits`` one is
|
||||
greater (which means that that idle state is likely to "match" the observed idle
|
||||
duration after CPU wakeup). If the ``misses`` one is greater, the governor
|
||||
preselects the shallower idle state with the maximum ``early_hits`` metric
|
||||
(or if there are multiple shallower idle states with equal ``early_hits``
|
||||
metric which also is the maximum, the shallowest of them will be preselected).
|
||||
[If there is a wakeup latency constraint coming from the `PM QoS framework
|
||||
<cpu-pm-qos_>`_ which is hit before reaching the deepest idle state with the
|
||||
target residency within the sleep length, the deepest idle state with the exit
|
||||
latency within the constraint is preselected without consulting the ``hits``,
|
||||
``misses`` and ``early_hits`` metrics.]
|
||||
|
||||
Next, the governor takes several idle duration values observed most recently
|
||||
into consideration and if at least a half of them are greater than or equal to
|
||||
the target residency of the preselected idle state, that idle state becomes the
|
||||
final candidate to ask for. Otherwise, the average of the most recent idle
|
||||
duration values below the target residency of the preselected idle state is
|
||||
computed and the governor walks the idle states shallower than the preselected
|
||||
one and finds the deepest of them with the target residency within that average.
|
||||
That idle state is then taken as the final candidate to ask for.
|
||||
|
||||
Still, at this point the governor may need to refine the idle state selection if
|
||||
it has not decided to `stop the scheduler tick <idle-cpus-and-tick_>`_. That
|
||||
generally happens if the target residency of the idle state selected so far is
|
||||
less than the tick period and the tick has not been stopped already (in a
|
||||
previous iteration of the idle loop). Then, like in the ``menu`` governor
|
||||
`case <menu-gov_>`_, the sleep length used in the previous computations may not
|
||||
reflect the real time until the closest timer event and if it really is greater
|
||||
than that time, a shallower state with a suitable target residency may need to
|
||||
be selected.
|
||||
|
||||
.. kernel-doc:: drivers/cpuidle/governors/teo.c
|
||||
:doc: teo-description
|
||||
|
||||
.. _idle-states-representation:
|
||||
|
||||
|
||||
@@ -365,6 +365,9 @@ argument is passed to the kernel in the command line.
|
||||
inclusive) including both turbo and non-turbo P-states (see
|
||||
`Turbo P-states Support`_).
|
||||
|
||||
This attribute is present only if the value exposed by it is the same
|
||||
for all of the CPUs in the system.
|
||||
|
||||
The value of this attribute is not affected by the ``no_turbo``
|
||||
setting described `below <no_turbo_attr_>`_.
|
||||
|
||||
@@ -374,6 +377,9 @@ argument is passed to the kernel in the command line.
|
||||
Ratio of the `turbo range <turbo_>`_ size to the size of the entire
|
||||
range of supported P-states, in percent.
|
||||
|
||||
This attribute is present only if the value exposed by it is the same
|
||||
for all of the CPUs in the system.
|
||||
|
||||
This attribute is read-only.
|
||||
|
||||
.. _no_turbo_attr:
|
||||
|
||||
@@ -1297,11 +1297,11 @@ This parameter can be used to control the soft lockup detector.
|
||||
= =================================
|
||||
|
||||
The soft lockup detector monitors CPUs for threads that are hogging the CPUs
|
||||
without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads
|
||||
from running. The mechanism depends on the CPUs ability to respond to timer
|
||||
interrupts which are needed for the 'watchdog/N' threads to be woken up by
|
||||
the watchdog timer function, otherwise the NMI watchdog — if enabled — can
|
||||
detect a hard lockup condition.
|
||||
without rescheduling voluntarily, and thus prevent the 'migration/N' threads
|
||||
from running, causing the watchdog work fail to execute. The mechanism depends
|
||||
on the CPUs ability to respond to timer interrupts which are needed for the
|
||||
watchdog work to be queued by the watchdog timer function, otherwise the NMI
|
||||
watchdog — if enabled — can detect a hard lockup condition.
|
||||
|
||||
|
||||
stack_erasing
|
||||
|
||||
@@ -25,7 +25,6 @@ files can be found in mm/swap.c.
|
||||
Currently, these files are in /proc/sys/vm:
|
||||
|
||||
- admin_reserve_kbytes
|
||||
- block_dump
|
||||
- compact_memory
|
||||
- compaction_proactiveness
|
||||
- compact_unevictable_allowed
|
||||
@@ -65,7 +64,7 @@ Currently, these files are in /proc/sys/vm:
|
||||
- overcommit_ratio
|
||||
- page-cluster
|
||||
- panic_on_oom
|
||||
- percpu_pagelist_fraction
|
||||
- percpu_pagelist_high_fraction
|
||||
- stat_interval
|
||||
- stat_refresh
|
||||
- numa_stat
|
||||
@@ -107,13 +106,6 @@ On x86_64 this is about 128MB.
|
||||
Changing this takes effect whenever an application requests memory.
|
||||
|
||||
|
||||
block_dump
|
||||
==========
|
||||
|
||||
block_dump enables block I/O debugging when set to a nonzero value. More
|
||||
information on block I/O debugging is in Documentation/admin-guide/laptops/laptop-mode.rst.
|
||||
|
||||
|
||||
compact_memory
|
||||
==============
|
||||
|
||||
@@ -806,22 +798,24 @@ panic_on_oom=2+kdump gives you very strong tool to investigate
|
||||
why oom happens. You can get snapshot.
|
||||
|
||||
|
||||
percpu_pagelist_fraction
|
||||
========================
|
||||
percpu_pagelist_high_fraction
|
||||
=============================
|
||||
|
||||
This is the fraction of pages at most (high mark pcp->high) in each zone that
|
||||
are allocated for each per cpu page list. The min value for this is 8. It
|
||||
means that we don't allow more than 1/8th of pages in each zone to be
|
||||
allocated in any single per_cpu_pagelist. This entry only changes the value
|
||||
of hot per cpu pagelists. User can specify a number like 100 to allocate
|
||||
1/100th of each zone to each per cpu page list.
|
||||
This is the fraction of pages in each zone that are can be stored to
|
||||
per-cpu page lists. It is an upper boundary that is divided depending
|
||||
on the number of online CPUs. The min value for this is 8 which means
|
||||
that we do not allow more than 1/8th of pages in each zone to be stored
|
||||
on per-cpu page lists. This entry only changes the value of hot per-cpu
|
||||
page lists. A user can specify a number like 100 to allocate 1/100th of
|
||||
each zone between per-cpu lists.
|
||||
|
||||
The batch value of each per cpu pagelist is also updated as a result. It is
|
||||
set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8)
|
||||
The batch value of each per-cpu page list remains the same regardless of
|
||||
the value of the high fraction so allocation latencies are unaffected.
|
||||
|
||||
The initial value is zero. Kernel does not use this value at boot time to set
|
||||
the high water marks for each per cpu page list. If the user writes '0' to this
|
||||
sysctl, it will revert to this default behavior.
|
||||
The initial value is zero. Kernel uses this value to set the high pcp->high
|
||||
mark based on the low watermark for the zone and the number of local
|
||||
online CPUs. If the user writes '0' to this sysctl, it will revert to
|
||||
this default behavior.
|
||||
|
||||
|
||||
stat_interval
|
||||
@@ -952,12 +946,12 @@ allocations, THP and hugetlbfs pages.
|
||||
|
||||
To make it sensible with respect to the watermark_scale_factor
|
||||
parameter, the unit is in fractions of 10,000. The default value of
|
||||
15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
|
||||
watermark will be reclaimed in the event of a pageblock being mixed due
|
||||
to fragmentation. The level of reclaim is determined by the number of
|
||||
fragmentation events that occurred in the recent past. If this value is
|
||||
smaller than a pageblock then a pageblocks worth of pages will be reclaimed
|
||||
(e.g. 2MB on 64-bit x86). A boost factor of 0 will disable the feature.
|
||||
15,000 means that up to 150% of the high watermark will be reclaimed in the
|
||||
event of a pageblock being mixed due to fragmentation. The level of reclaim
|
||||
is determined by the number of fragmentation events that occurred in the
|
||||
recent past. If this value is smaller than a pageblock then a pageblocks
|
||||
worth of pages will be reclaimed (e.g. 2MB on 64-bit x86). A boost factor
|
||||
of 0 will disable the feature.
|
||||
|
||||
|
||||
watermark_scale_factor
|
||||
|
||||
@@ -553,20 +553,36 @@ throughput sustainable with bfq, because updating the blkio.bfq.*
|
||||
stats is rather costly, especially for some of the stats enabled by
|
||||
CONFIG_BFQ_CGROUP_DEBUG.
|
||||
|
||||
Parameters to set
|
||||
-----------------
|
||||
Parameters
|
||||
----------
|
||||
|
||||
For each group, there is only the following parameter to set.
|
||||
For each group, the following parameters can be set:
|
||||
|
||||
weight (namely blkio.bfq.weight or io.bfq-weight): the weight of the
|
||||
group inside its parent. Available values: 1..1000 (default 100). The
|
||||
linear mapping between ioprio and weights, described at the beginning
|
||||
of the tunable section, is still valid, but all weights higher than
|
||||
IOPRIO_BE_NR*10 are mapped to ioprio 0.
|
||||
weight
|
||||
This specifies the default weight for the cgroup inside its parent.
|
||||
Available values: 1..1000 (default: 100).
|
||||
|
||||
Recall that, if low-latency is set, then BFQ automatically raises the
|
||||
weight of the queues associated with interactive and soft real-time
|
||||
applications. Unset this tunable if you need/want to control weights.
|
||||
For cgroup v1, it is set by writing the value to `blkio.bfq.weight`.
|
||||
|
||||
For cgroup v2, it is set by writing the value to `io.bfq.weight`.
|
||||
(with an optional prefix of `default` and a space).
|
||||
|
||||
The linear mapping between ioprio and weights, described at the beginning
|
||||
of the tunable section, is still valid, but all weights higher than
|
||||
IOPRIO_BE_NR*10 are mapped to ioprio 0.
|
||||
|
||||
Recall that, if low-latency is set, then BFQ automatically raises the
|
||||
weight of the queues associated with interactive and soft real-time
|
||||
applications. Unset this tunable if you need/want to control weights.
|
||||
|
||||
weight_device
|
||||
This specifies a per-device weight for the cgroup. The syntax is
|
||||
`minor:major weight`. A weight of `0` may be used to reset to the default
|
||||
weight.
|
||||
|
||||
For cgroup v1, it is set by writing the value to `blkio.bfq.weight_device`.
|
||||
|
||||
For cgroup v2, the file name is `io.bfq.weight`.
|
||||
|
||||
|
||||
[1]
|
||||
|
||||
@@ -12,6 +12,19 @@ BPF instruction-set.
|
||||
The Cilium project also maintains a `BPF and XDP Reference Guide`_
|
||||
that goes into great technical depth about the BPF Architecture.
|
||||
|
||||
libbpf
|
||||
======
|
||||
|
||||
Libbpf is a userspace library for loading and interacting with bpf programs.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
libbpf/libbpf
|
||||
libbpf/libbpf_api
|
||||
libbpf/libbpf_build
|
||||
libbpf/libbpf_naming_convention
|
||||
|
||||
BPF Type Format (BTF)
|
||||
=====================
|
||||
|
||||
@@ -84,6 +97,7 @@ Other
|
||||
:maxdepth: 1
|
||||
|
||||
ringbuf
|
||||
llvm_reloc
|
||||
|
||||
.. Links:
|
||||
.. _networking-filter: ../networking/filter.rst
|
||||
|
||||
14
Documentation/bpf/libbpf/libbpf.rst
Normal file
14
Documentation/bpf/libbpf/libbpf.rst
Normal file
@@ -0,0 +1,14 @@
|
||||
.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
|
||||
libbpf
|
||||
======
|
||||
|
||||
This is documentation for libbpf, a userspace library for loading and
|
||||
interacting with bpf programs.
|
||||
|
||||
All general BPF questions, including kernel functionality, libbpf APIs and
|
||||
their application, should be sent to bpf@vger.kernel.org mailing list.
|
||||
You can `subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the
|
||||
mailing list search its `archive <https://lore.kernel.org/bpf/>`_.
|
||||
Please search the archive before asking new questions. It very well might
|
||||
be that this was already addressed or answered before.
|
||||
27
Documentation/bpf/libbpf/libbpf_api.rst
Normal file
27
Documentation/bpf/libbpf/libbpf_api.rst
Normal file
@@ -0,0 +1,27 @@
|
||||
.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
|
||||
API
|
||||
===
|
||||
|
||||
This documentation is autogenerated from header files in libbpf, tools/lib/bpf
|
||||
|
||||
.. kernel-doc:: tools/lib/bpf/libbpf.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: tools/lib/bpf/bpf.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: tools/lib/bpf/btf.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: tools/lib/bpf/xsk.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: tools/lib/bpf/bpf_tracing.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: tools/lib/bpf/bpf_core_read.h
|
||||
:internal:
|
||||
|
||||
.. kernel-doc:: tools/lib/bpf/bpf_endian.h
|
||||
:internal:
|
||||
37
Documentation/bpf/libbpf/libbpf_build.rst
Normal file
37
Documentation/bpf/libbpf/libbpf_build.rst
Normal file
@@ -0,0 +1,37 @@
|
||||
.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
|
||||
Building libbpf
|
||||
===============
|
||||
|
||||
libelf and zlib are internal dependencies of libbpf and thus are required to link
|
||||
against and must be installed on the system for applications to work.
|
||||
pkg-config is used by default to find libelf, and the program called
|
||||
can be overridden with PKG_CONFIG.
|
||||
|
||||
If using pkg-config at build time is not desired, it can be disabled by
|
||||
setting NO_PKG_CONFIG=1 when calling make.
|
||||
|
||||
To build both static libbpf.a and shared libbpf.so:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd src
|
||||
$ make
|
||||
|
||||
To build only static libbpf.a library in directory build/ and install them
|
||||
together with libbpf headers in a staging directory root/:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd src
|
||||
$ mkdir build root
|
||||
$ BUILD_STATIC_ONLY=y OBJDIR=build DESTDIR=root make install
|
||||
|
||||
To build both static libbpf.a and shared libbpf.so against a custom libelf
|
||||
dependency installed in /build/root/ and install them together with libbpf
|
||||
headers in a build directory /build/root/:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd src
|
||||
$ PKG_CONFIG_PATH=/build/root/lib64/pkgconfig DESTDIR=/build/root make
|
||||
@@ -1,7 +1,7 @@
|
||||
.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
|
||||
libbpf API naming convention
|
||||
============================
|
||||
API naming convention
|
||||
=====================
|
||||
|
||||
libbpf API provides access to a few logically separated groups of
|
||||
functions and types. Every group has its own naming convention
|
||||
@@ -10,14 +10,14 @@ new function or type is added to keep libbpf API clean and consistent.
|
||||
|
||||
All types and functions provided by libbpf API should have one of the
|
||||
following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
|
||||
``perf_buffer_``.
|
||||
``btf_dump_``, ``ring_buffer_``, ``perf_buffer_``.
|
||||
|
||||
System call wrappers
|
||||
--------------------
|
||||
|
||||
System call wrappers are simple wrappers for commands supported by
|
||||
sys_bpf system call. These wrappers should go to ``bpf.h`` header file
|
||||
and map one-on-one to corresponding commands.
|
||||
and map one to one to corresponding commands.
|
||||
|
||||
For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM``
|
||||
command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc.
|
||||
@@ -49,10 +49,6 @@ object, ``bpf_object``, double underscore and ``open`` that defines the
|
||||
purpose of the function to open ELF file and create ``bpf_object`` from
|
||||
it.
|
||||
|
||||
Another example: ``bpf_program__load`` is named for corresponding
|
||||
object, ``bpf_program``, that is separated from other part of the name
|
||||
by double underscore.
|
||||
|
||||
All objects and corresponding functions other than BTF related should go
|
||||
to ``libbpf.h``. BTF types and functions should go to ``btf.h``.
|
||||
|
||||
@@ -72,11 +68,7 @@ of both low-level ring access functions and high-level configuration
|
||||
functions. These can be mixed and matched. Note that these functions
|
||||
are not reentrant for performance reasons.
|
||||
|
||||
Please take a look at Documentation/networking/af_xdp.rst in the Linux
|
||||
kernel source tree on how to use XDP sockets and for some common
|
||||
mistakes in case you do not get any traffic up to user space.
|
||||
|
||||
libbpf ABI
|
||||
ABI
|
||||
==========
|
||||
|
||||
libbpf can be both linked statically or used as DSO. To avoid possible
|
||||
@@ -116,7 +108,8 @@ This bump in ABI version is at most once per kernel development cycle.
|
||||
|
||||
For example, if current state of ``libbpf.map`` is:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: c
|
||||
|
||||
LIBBPF_0.0.1 {
|
||||
global:
|
||||
bpf_func_a;
|
||||
@@ -128,7 +121,8 @@ For example, if current state of ``libbpf.map`` is:
|
||||
, and a new symbol ``bpf_func_c`` is being introduced, then
|
||||
``libbpf.map`` should be changed like this:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: c
|
||||
|
||||
LIBBPF_0.0.1 {
|
||||
global:
|
||||
bpf_func_a;
|
||||
@@ -148,7 +142,7 @@ Format of version script and ways to handle ABI changes, including
|
||||
incompatible ones, described in details in [1].
|
||||
|
||||
Stand-alone build
|
||||
=================
|
||||
-------------------
|
||||
|
||||
Under https://github.com/libbpf/libbpf there is a (semi-)automated
|
||||
mirror of the mainline's version of libbpf for a stand-alone build.
|
||||
@@ -157,12 +151,12 @@ However, all changes to libbpf's code base must be upstreamed through
|
||||
the mainline kernel tree.
|
||||
|
||||
License
|
||||
=======
|
||||
-------------------
|
||||
|
||||
libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause.
|
||||
|
||||
Links
|
||||
=====
|
||||
-------------------
|
||||
|
||||
[1] https://www.akkadia.org/drepper/dsohowto.pdf
|
||||
(Chapter 3. Maintaining APIs and ABIs).
|
||||
240
Documentation/bpf/llvm_reloc.rst
Normal file
240
Documentation/bpf/llvm_reloc.rst
Normal file
@@ -0,0 +1,240 @@
|
||||
.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
|
||||
====================
|
||||
BPF LLVM Relocations
|
||||
====================
|
||||
|
||||
This document describes LLVM BPF backend relocation types.
|
||||
|
||||
Relocation Record
|
||||
=================
|
||||
|
||||
LLVM BPF backend records each relocation with the following 16-byte
|
||||
ELF structure::
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Elf64_Addr r_offset; // Offset from the beginning of section.
|
||||
Elf64_Xword r_info; // Relocation type and symbol index.
|
||||
} Elf64_Rel;
|
||||
|
||||
For example, for the following code::
|
||||
|
||||
int g1 __attribute__((section("sec")));
|
||||
int g2 __attribute__((section("sec")));
|
||||
static volatile int l1 __attribute__((section("sec")));
|
||||
static volatile int l2 __attribute__((section("sec")));
|
||||
int test() {
|
||||
return g1 + g2 + l1 + l2;
|
||||
}
|
||||
|
||||
Compiled with ``clang -target bpf -O2 -c test.c``, the following is
|
||||
the code with ``llvm-objdump -dr test.o``::
|
||||
|
||||
0: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
|
||||
0000000000000000: R_BPF_64_64 g1
|
||||
2: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
|
||||
3: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
|
||||
0000000000000018: R_BPF_64_64 g2
|
||||
5: 61 20 00 00 00 00 00 00 r0 = *(u32 *)(r2 + 0)
|
||||
6: 0f 10 00 00 00 00 00 00 r0 += r1
|
||||
7: 18 01 00 00 08 00 00 00 00 00 00 00 00 00 00 00 r1 = 8 ll
|
||||
0000000000000038: R_BPF_64_64 sec
|
||||
9: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
|
||||
10: 0f 10 00 00 00 00 00 00 r0 += r1
|
||||
11: 18 01 00 00 0c 00 00 00 00 00 00 00 00 00 00 00 r1 = 12 ll
|
||||
0000000000000058: R_BPF_64_64 sec
|
||||
13: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
|
||||
14: 0f 10 00 00 00 00 00 00 r0 += r1
|
||||
15: 95 00 00 00 00 00 00 00 exit
|
||||
|
||||
There are four relations in the above for four ``LD_imm64`` instructions.
|
||||
The following ``llvm-readelf -r test.o`` shows the binary values of the four
|
||||
relocations::
|
||||
|
||||
Relocation section '.rel.text' at offset 0x190 contains 4 entries:
|
||||
Offset Info Type Symbol's Value Symbol's Name
|
||||
0000000000000000 0000000600000001 R_BPF_64_64 0000000000000000 g1
|
||||
0000000000000018 0000000700000001 R_BPF_64_64 0000000000000004 g2
|
||||
0000000000000038 0000000400000001 R_BPF_64_64 0000000000000000 sec
|
||||
0000000000000058 0000000400000001 R_BPF_64_64 0000000000000000 sec
|
||||
|
||||
Each relocation is represented by ``Offset`` (8 bytes) and ``Info`` (8 bytes).
|
||||
For example, the first relocation corresponds to the first instruction
|
||||
(Offset 0x0) and the corresponding ``Info`` indicates the relocation type
|
||||
of ``R_BPF_64_64`` (type 1) and the entry in the symbol table (entry 6).
|
||||
The following is the symbol table with ``llvm-readelf -s test.o``::
|
||||
|
||||
Symbol table '.symtab' contains 8 entries:
|
||||
Num: Value Size Type Bind Vis Ndx Name
|
||||
0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
|
||||
1: 0000000000000000 0 FILE LOCAL DEFAULT ABS test.c
|
||||
2: 0000000000000008 4 OBJECT LOCAL DEFAULT 4 l1
|
||||
3: 000000000000000c 4 OBJECT LOCAL DEFAULT 4 l2
|
||||
4: 0000000000000000 0 SECTION LOCAL DEFAULT 4 sec
|
||||
5: 0000000000000000 128 FUNC GLOBAL DEFAULT 2 test
|
||||
6: 0000000000000000 4 OBJECT GLOBAL DEFAULT 4 g1
|
||||
7: 0000000000000004 4 OBJECT GLOBAL DEFAULT 4 g2
|
||||
|
||||
The 6th entry is global variable ``g1`` with value 0.
|
||||
|
||||
Similarly, the second relocation is at ``.text`` offset ``0x18``, instruction 3,
|
||||
for global variable ``g2`` which has a symbol value 4, the offset
|
||||
from the start of ``.data`` section.
|
||||
|
||||
The third and fourth relocations refers to static variables ``l1``
|
||||
and ``l2``. From ``.rel.text`` section above, it is not clear
|
||||
which symbols they really refers to as they both refers to
|
||||
symbol table entry 4, symbol ``sec``, which has ``STT_SECTION`` type
|
||||
and represents a section. So for static variable or function,
|
||||
the section offset is written to the original insn
|
||||
buffer, which is called ``A`` (addend). Looking at
|
||||
above insn ``7`` and ``11``, they have section offset ``8`` and ``12``.
|
||||
From symbol table, we can find that they correspond to entries ``2``
|
||||
and ``3`` for ``l1`` and ``l2``.
|
||||
|
||||
In general, the ``A`` is 0 for global variables and functions,
|
||||
and is the section offset or some computation result based on
|
||||
section offset for static variables/functions. The non-section-offset
|
||||
case refers to function calls. See below for more details.
|
||||
|
||||
Different Relocation Types
|
||||
==========================
|
||||
|
||||
Six relocation types are supported. The following is an overview and
|
||||
``S`` represents the value of the symbol in the symbol table::
|
||||
|
||||
Enum ELF Reloc Type Description BitSize Offset Calculation
|
||||
0 R_BPF_NONE None
|
||||
1 R_BPF_64_64 ld_imm64 insn 32 r_offset + 4 S + A
|
||||
2 R_BPF_64_ABS64 normal data 64 r_offset S + A
|
||||
3 R_BPF_64_ABS32 normal data 32 r_offset S + A
|
||||
4 R_BPF_64_NODYLD32 .BTF[.ext] data 32 r_offset S + A
|
||||
10 R_BPF_64_32 call insn 32 r_offset + 4 (S + A) / 8 - 1
|
||||
|
||||
For example, ``R_BPF_64_64`` relocation type is used for ``ld_imm64`` instruction.
|
||||
The actual to-be-relocated data (0 or section offset)
|
||||
is stored at ``r_offset + 4`` and the read/write
|
||||
data bitsize is 32 (4 bytes). The relocation can be resolved with
|
||||
the symbol value plus implicit addend. Note that the ``BitSize`` is 32 which
|
||||
means the section offset must be less than or equal to ``UINT32_MAX`` and this
|
||||
is enforced by LLVM BPF backend.
|
||||
|
||||
In another case, ``R_BPF_64_ABS64`` relocation type is used for normal 64-bit data.
|
||||
The actual to-be-relocated data is stored at ``r_offset`` and the read/write data
|
||||
bitsize is 64 (8 bytes). The relocation can be resolved with
|
||||
the symbol value plus implicit addend.
|
||||
|
||||
Both ``R_BPF_64_ABS32`` and ``R_BPF_64_NODYLD32`` types are for 32-bit data.
|
||||
But ``R_BPF_64_NODYLD32`` specifically refers to relocations in ``.BTF`` and
|
||||
``.BTF.ext`` sections. For cases like bcc where llvm ``ExecutionEngine RuntimeDyld``
|
||||
is involved, ``R_BPF_64_NODYLD32`` types of relocations should not be resolved
|
||||
to actual function/variable address. Otherwise, ``.BTF`` and ``.BTF.ext``
|
||||
become unusable by bcc and kernel.
|
||||
|
||||
Type ``R_BPF_64_32`` is used for call instruction. The call target section
|
||||
offset is stored at ``r_offset + 4`` (32bit) and calculated as
|
||||
``(S + A) / 8 - 1``.
|
||||
|
||||
Examples
|
||||
========
|
||||
|
||||
Types ``R_BPF_64_64`` and ``R_BPF_64_32`` are used to resolve ``ld_imm64``
|
||||
and ``call`` instructions. For example::
|
||||
|
||||
__attribute__((noinline)) __attribute__((section("sec1")))
|
||||
int gfunc(int a, int b) {
|
||||
return a * b;
|
||||
}
|
||||
static __attribute__((noinline)) __attribute__((section("sec1")))
|
||||
int lfunc(int a, int b) {
|
||||
return a + b;
|
||||
}
|
||||
int global __attribute__((section("sec2")));
|
||||
int test(int a, int b) {
|
||||
return gfunc(a, b) + lfunc(a, b) + global;
|
||||
}
|
||||
|
||||
Compiled with ``clang -target bpf -O2 -c test.c``, we will have
|
||||
following code with `llvm-objdump -dr test.o``::
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
0000000000000000 <test>:
|
||||
0: bf 26 00 00 00 00 00 00 r6 = r2
|
||||
1: bf 17 00 00 00 00 00 00 r7 = r1
|
||||
2: 85 10 00 00 ff ff ff ff call -1
|
||||
0000000000000010: R_BPF_64_32 gfunc
|
||||
3: bf 08 00 00 00 00 00 00 r8 = r0
|
||||
4: bf 71 00 00 00 00 00 00 r1 = r7
|
||||
5: bf 62 00 00 00 00 00 00 r2 = r6
|
||||
6: 85 10 00 00 02 00 00 00 call 2
|
||||
0000000000000030: R_BPF_64_32 sec1
|
||||
7: 0f 80 00 00 00 00 00 00 r0 += r8
|
||||
8: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
|
||||
0000000000000040: R_BPF_64_64 global
|
||||
10: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
|
||||
11: 0f 10 00 00 00 00 00 00 r0 += r1
|
||||
12: 95 00 00 00 00 00 00 00 exit
|
||||
|
||||
Disassembly of section sec1:
|
||||
|
||||
0000000000000000 <gfunc>:
|
||||
0: bf 20 00 00 00 00 00 00 r0 = r2
|
||||
1: 2f 10 00 00 00 00 00 00 r0 *= r1
|
||||
2: 95 00 00 00 00 00 00 00 exit
|
||||
|
||||
0000000000000018 <lfunc>:
|
||||
3: bf 20 00 00 00 00 00 00 r0 = r2
|
||||
4: 0f 10 00 00 00 00 00 00 r0 += r1
|
||||
5: 95 00 00 00 00 00 00 00 exit
|
||||
|
||||
The first relocation corresponds to ``gfunc(a, b)`` where ``gfunc`` has a value of 0,
|
||||
so the ``call`` instruction offset is ``(0 + 0)/8 - 1 = -1``.
|
||||
The second relocation corresponds to ``lfunc(a, b)`` where ``lfunc`` has a section
|
||||
offset ``0x18``, so the ``call`` instruction offset is ``(0 + 0x18)/8 - 1 = 2``.
|
||||
The third relocation corresponds to ld_imm64 of ``global``, which has a section
|
||||
offset ``0``.
|
||||
|
||||
The following is an example to show how R_BPF_64_ABS64 could be generated::
|
||||
|
||||
int global() { return 0; }
|
||||
struct t { void *g; } gbl = { global };
|
||||
|
||||
Compiled with ``clang -target bpf -O2 -g -c test.c``, we will see a
|
||||
relocation below in ``.data`` section with command
|
||||
``llvm-readelf -r test.o``::
|
||||
|
||||
Relocation section '.rel.data' at offset 0x458 contains 1 entries:
|
||||
Offset Info Type Symbol's Value Symbol's Name
|
||||
0000000000000000 0000000700000002 R_BPF_64_ABS64 0000000000000000 global
|
||||
|
||||
The relocation says the first 8-byte of ``.data`` section should be
|
||||
filled with address of ``global`` variable.
|
||||
|
||||
With ``llvm-readelf`` output, we can see that dwarf sections have a bunch of
|
||||
``R_BPF_64_ABS32`` and ``R_BPF_64_ABS64`` relocations::
|
||||
|
||||
Relocation section '.rel.debug_info' at offset 0x468 contains 13 entries:
|
||||
Offset Info Type Symbol's Value Symbol's Name
|
||||
0000000000000006 0000000300000003 R_BPF_64_ABS32 0000000000000000 .debug_abbrev
|
||||
000000000000000c 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str
|
||||
0000000000000012 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str
|
||||
0000000000000016 0000000600000003 R_BPF_64_ABS32 0000000000000000 .debug_line
|
||||
000000000000001a 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str
|
||||
000000000000001e 0000000200000002 R_BPF_64_ABS64 0000000000000000 .text
|
||||
000000000000002b 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str
|
||||
0000000000000037 0000000800000002 R_BPF_64_ABS64 0000000000000000 gbl
|
||||
0000000000000040 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str
|
||||
......
|
||||
|
||||
The .BTF/.BTF.ext sections has R_BPF_64_NODYLD32 relocations::
|
||||
|
||||
Relocation section '.rel.BTF' at offset 0x538 contains 1 entries:
|
||||
Offset Info Type Symbol's Value Symbol's Name
|
||||
0000000000000084 0000000800000004 R_BPF_64_NODYLD32 0000000000000000 gbl
|
||||
|
||||
Relocation section '.rel.BTF.ext' at offset 0x548 contains 2 entries:
|
||||
Offset Info Type Symbol's Value Symbol's Name
|
||||
000000000000002c 0000000200000004 R_BPF_64_NODYLD32 0000000000000000 .text
|
||||
0000000000000040 0000000200000004 R_BPF_64_NODYLD32 0000000000000000 .text
|
||||
@@ -146,7 +146,6 @@ Legacy
|
||||
|
||||
irq_domain_add_simple()
|
||||
irq_domain_add_legacy()
|
||||
irq_domain_add_legacy_isa()
|
||||
irq_domain_create_simple()
|
||||
irq_domain_create_legacy()
|
||||
|
||||
|
||||
@@ -513,9 +513,10 @@ Time and date
|
||||
::
|
||||
|
||||
%pt[RT] YYYY-mm-ddTHH:MM:SS
|
||||
%pt[RT]s YYYY-mm-dd HH:MM:SS
|
||||
%pt[RT]d YYYY-mm-dd
|
||||
%pt[RT]t HH:MM:SS
|
||||
%pt[RT][dt][r]
|
||||
%pt[RT][dt][r][s]
|
||||
|
||||
For printing date and time as represented by::
|
||||
|
||||
@@ -527,6 +528,10 @@ in human readable format.
|
||||
By default year will be incremented by 1900 and month by 1.
|
||||
Use %pt[RT]r (raw) to suppress this behaviour.
|
||||
|
||||
The %pt[RT]s (space) will override ISO 8601 separator by using ' ' (space)
|
||||
instead of 'T' (Capital T) between date and time. It won't have any effect
|
||||
when date or time is omitted.
|
||||
|
||||
Passed by reference.
|
||||
|
||||
struct clk
|
||||
|
||||
@@ -447,11 +447,10 @@ When a test fails due to a failed ``kmalloc``::
|
||||
|
||||
When a test fails due to a missing KASAN report::
|
||||
|
||||
# kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:629
|
||||
Expected kasan_data->report_expected == kasan_data->report_found, but
|
||||
kasan_data->report_expected == 1
|
||||
kasan_data->report_found == 0
|
||||
not ok 28 - kmalloc_double_kzfree
|
||||
# kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:974
|
||||
KASAN failure expected in "kfree_sensitive(ptr)", but none occurred
|
||||
not ok 44 - kmalloc_double_kzfree
|
||||
|
||||
|
||||
At the end the cumulative status of all KASAN tests is printed. On success::
|
||||
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
NVIDIA Tegra Activity Monitor
|
||||
|
||||
The activity monitor block collects statistics about the behaviour of other
|
||||
components in the system. This information can be used to derive the rate at
|
||||
which the external memory needs to be clocked in order to serve all requests
|
||||
from the monitored clients.
|
||||
|
||||
Required properties:
|
||||
- compatible: should be "nvidia,tegra<chip>-actmon"
|
||||
- reg: offset and length of the register set for the device
|
||||
- interrupts: standard interrupt property
|
||||
- clocks: Must contain a phandle and clock specifier pair for each entry in
|
||||
clock-names. See ../../clock/clock-bindings.txt for details.
|
||||
- clock-names: Must include the following entries:
|
||||
- actmon
|
||||
- emc
|
||||
- resets: Must contain an entry for each entry in reset-names. See
|
||||
../../reset/reset.txt for details.
|
||||
- reset-names: Must include the following entries:
|
||||
- actmon
|
||||
- operating-points-v2: See ../bindings/opp/opp.txt for details.
|
||||
- interconnects: Should contain entries for memory clients sitting on
|
||||
MC->EMC memory interconnect path.
|
||||
- interconnect-names: Should include name of the interconnect path for each
|
||||
interconnect entry. Consult TRM documentation for
|
||||
information about available memory clients, see MEMORY
|
||||
CONTROLLER section.
|
||||
|
||||
For each opp entry in 'operating-points-v2' table:
|
||||
- opp-supported-hw: bitfield indicating SoC speedo ID mask
|
||||
- opp-peak-kBps: peak bandwidth of the memory channel
|
||||
|
||||
Example:
|
||||
dfs_opp_table: opp-table {
|
||||
compatible = "operating-points-v2";
|
||||
|
||||
opp@12750000 {
|
||||
opp-hz = /bits/ 64 <12750000>;
|
||||
opp-supported-hw = <0x000F>;
|
||||
opp-peak-kBps = <51000>;
|
||||
};
|
||||
...
|
||||
};
|
||||
|
||||
actmon@6000c800 {
|
||||
compatible = "nvidia,tegra124-actmon";
|
||||
reg = <0x0 0x6000c800 0x0 0x400>;
|
||||
interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&tegra_car TEGRA124_CLK_ACTMON>,
|
||||
<&tegra_car TEGRA124_CLK_EMC>;
|
||||
clock-names = "actmon", "emc";
|
||||
resets = <&tegra_car 119>;
|
||||
reset-names = "actmon";
|
||||
operating-points-v2 = <&dfs_opp_table>;
|
||||
interconnects = <&mc TEGRA124_MC_MPCORER &emc>;
|
||||
interconnect-names = "cpu";
|
||||
};
|
||||
@@ -0,0 +1,126 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/devfreq/nvidia,tegra30-actmon.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: NVIDIA Tegra30 Activity Monitor
|
||||
|
||||
maintainers:
|
||||
- Dmitry Osipenko <digetx@gmail.com>
|
||||
- Jon Hunter <jonathanh@nvidia.com>
|
||||
- Thierry Reding <thierry.reding@gmail.com>
|
||||
|
||||
description: |
|
||||
The activity monitor block collects statistics about the behaviour of other
|
||||
components in the system. This information can be used to derive the rate at
|
||||
which the external memory needs to be clocked in order to serve all requests
|
||||
from the monitored clients.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- nvidia,tegra30-actmon
|
||||
- nvidia,tegra114-actmon
|
||||
- nvidia,tegra124-actmon
|
||||
- nvidia,tegra210-actmon
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
clocks:
|
||||
maxItems: 2
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: actmon
|
||||
- const: emc
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
reset-names:
|
||||
items:
|
||||
- const: actmon
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
interconnects:
|
||||
minItems: 1
|
||||
maxItems: 12
|
||||
|
||||
interconnect-names:
|
||||
minItems: 1
|
||||
maxItems: 12
|
||||
description:
|
||||
Should include name of the interconnect path for each interconnect
|
||||
entry. Consult TRM documentation for information about available
|
||||
memory clients, see MEMORY CONTROLLER and ACTIVITY MONITOR sections.
|
||||
|
||||
operating-points-v2:
|
||||
description:
|
||||
Should contain freqs and voltages and opp-supported-hw property, which
|
||||
is a bitfield indicating SoC speedo ID mask.
|
||||
|
||||
"#cooling-cells":
|
||||
const: 2
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- clocks
|
||||
- clock-names
|
||||
- resets
|
||||
- reset-names
|
||||
- interrupts
|
||||
- interconnects
|
||||
- interconnect-names
|
||||
- operating-points-v2
|
||||
- "#cooling-cells"
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/memory/tegra30-mc.h>
|
||||
|
||||
mc: memory-controller@7000f000 {
|
||||
compatible = "nvidia,tegra30-mc";
|
||||
reg = <0x7000f000 0x400>;
|
||||
clocks = <&clk 32>;
|
||||
clock-names = "mc";
|
||||
|
||||
interrupts = <0 77 4>;
|
||||
|
||||
#iommu-cells = <1>;
|
||||
#reset-cells = <1>;
|
||||
#interconnect-cells = <1>;
|
||||
};
|
||||
|
||||
emc: external-memory-controller@7000f400 {
|
||||
compatible = "nvidia,tegra30-emc";
|
||||
reg = <0x7000f400 0x400>;
|
||||
interrupts = <0 78 4>;
|
||||
clocks = <&clk 57>;
|
||||
|
||||
nvidia,memory-controller = <&mc>;
|
||||
operating-points-v2 = <&dvfs_opp_table>;
|
||||
power-domains = <&domain>;
|
||||
|
||||
#interconnect-cells = <0>;
|
||||
};
|
||||
|
||||
actmon@6000c800 {
|
||||
compatible = "nvidia,tegra30-actmon";
|
||||
reg = <0x6000c800 0x400>;
|
||||
interrupts = <0 45 4>;
|
||||
clocks = <&clk 119>, <&clk 57>;
|
||||
clock-names = "actmon", "emc";
|
||||
resets = <&rst 119>;
|
||||
reset-names = "actmon";
|
||||
operating-points-v2 = <&dvfs_opp_table>;
|
||||
interconnects = <&mc TEGRA30_MC_MPCORER &emc>;
|
||||
interconnect-names = "cpu-read";
|
||||
#cooling-cells = <2>;
|
||||
};
|
||||
@@ -145,6 +145,19 @@ properties:
|
||||
required:
|
||||
- affinity
|
||||
|
||||
clocks:
|
||||
maxItems: 1
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: aclk
|
||||
|
||||
power-domains:
|
||||
maxItems: 1
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
dependencies:
|
||||
mbi-ranges: [ msi-controller ]
|
||||
msi-controller: [ mbi-ranges ]
|
||||
|
||||
@@ -29,6 +29,7 @@ properties:
|
||||
- renesas,intc-ex-r8a774c0 # RZ/G2E
|
||||
- renesas,intc-ex-r8a7795 # R-Car H3
|
||||
- renesas,intc-ex-r8a7796 # R-Car M3-W
|
||||
- renesas,intc-ex-r8a77961 # R-Car M3-W+
|
||||
- renesas,intc-ex-r8a77965 # R-Car M3-N
|
||||
- renesas,intc-ex-r8a77970 # R-Car V3M
|
||||
- renesas,intc-ex-r8a77980 # R-Car V3H
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/ipmi/aspeed,ast2400-kcs-bmc.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: ASPEED BMC KCS Devices
|
||||
|
||||
maintainers:
|
||||
- Andrew Jeffery <andrew@aj.id.au>
|
||||
|
||||
description: |
|
||||
The Aspeed BMC SoCs typically use the Keyboard-Controller-Style (KCS)
|
||||
interfaces on the LPC bus for in-band IPMI communication with their host.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
oneOf:
|
||||
- description: Channel ID derived from reg
|
||||
items:
|
||||
enum:
|
||||
- aspeed,ast2400-kcs-bmc-v2
|
||||
- aspeed,ast2500-kcs-bmc-v2
|
||||
- aspeed,ast2600-kcs-bmc
|
||||
|
||||
- description: Old-style with explicit channel ID, no reg
|
||||
deprecated: true
|
||||
items:
|
||||
enum:
|
||||
- aspeed,ast2400-kcs-bmc
|
||||
- aspeed,ast2500-kcs-bmc
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
reg:
|
||||
# maxItems: 3
|
||||
items:
|
||||
- description: IDR register
|
||||
- description: ODR register
|
||||
- description: STR register
|
||||
|
||||
aspeed,lpc-io-reg:
|
||||
$ref: '/schemas/types.yaml#/definitions/uint32-array'
|
||||
minItems: 1
|
||||
maxItems: 2
|
||||
description: |
|
||||
The host CPU LPC IO data and status addresses for the device. For most
|
||||
channels the status address is derived from the data address, but the
|
||||
status address may be optionally provided.
|
||||
|
||||
aspeed,lpc-interrupts:
|
||||
$ref: "/schemas/types.yaml#/definitions/uint32-array"
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
description: |
|
||||
A 2-cell property expressing the LPC SerIRQ number and the interrupt
|
||||
level/sense encoding (specified in the standard fashion).
|
||||
|
||||
Note that the generated interrupt is issued from the BMC to the host, and
|
||||
thus the target interrupt controller is not captured by the BMC's
|
||||
devicetree.
|
||||
|
||||
kcs_chan:
|
||||
deprecated: true
|
||||
$ref: '/schemas/types.yaml#/definitions/uint32'
|
||||
description: The LPC channel number in the controller
|
||||
|
||||
kcs_addr:
|
||||
deprecated: true
|
||||
$ref: '/schemas/types.yaml#/definitions/uint32'
|
||||
description: The host CPU IO map address
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- interrupts
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
allOf:
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- aspeed,ast2400-kcs-bmc
|
||||
- aspeed,ast2500-kcs-bmc
|
||||
then:
|
||||
required:
|
||||
- kcs_chan
|
||||
- kcs_addr
|
||||
else:
|
||||
required:
|
||||
- reg
|
||||
- aspeed,lpc-io-reg
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/irq.h>
|
||||
kcs3: kcs@24 {
|
||||
compatible = "aspeed,ast2600-kcs-bmc";
|
||||
reg = <0x24 0x1>, <0x30 0x1>, <0x3c 0x1>;
|
||||
aspeed,lpc-io-reg = <0xca2>;
|
||||
aspeed,lpc-interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
|
||||
interrupts = <8>;
|
||||
};
|
||||
@@ -1,33 +0,0 @@
|
||||
# Aspeed KCS (Keyboard Controller Style) IPMI interface
|
||||
|
||||
The Aspeed SOCs (AST2400 and AST2500) are commonly used as BMCs
|
||||
(Baseboard Management Controllers) and the KCS interface can be
|
||||
used to perform in-band IPMI communication with their host.
|
||||
|
||||
## v1
|
||||
Required properties:
|
||||
- compatible : should be one of
|
||||
"aspeed,ast2400-kcs-bmc"
|
||||
"aspeed,ast2500-kcs-bmc"
|
||||
- interrupts : interrupt generated by the controller
|
||||
- kcs_chan : The LPC channel number in the controller
|
||||
- kcs_addr : The host CPU IO map address
|
||||
|
||||
## v2
|
||||
Required properties:
|
||||
- compatible : should be one of
|
||||
"aspeed,ast2400-kcs-bmc-v2"
|
||||
"aspeed,ast2500-kcs-bmc-v2"
|
||||
- reg : The address and size of the IDR, ODR and STR registers
|
||||
- interrupts : interrupt generated by the controller
|
||||
- aspeed,lpc-io-reg : The host CPU LPC IO address for the device
|
||||
|
||||
Example:
|
||||
|
||||
kcs3: kcs@24 {
|
||||
compatible = "aspeed,ast2500-kcs-bmc-v2";
|
||||
reg = <0x24 0x1>, <0x30 0x1>, <0x3c 0x1>;
|
||||
aspeed,lpc-reg = <0xca2>;
|
||||
interrupts = <8>;
|
||||
status = "okay";
|
||||
};
|
||||
@@ -26,6 +26,7 @@ properties:
|
||||
oneOf:
|
||||
- const: fsl,imx6sx-mu
|
||||
- const: fsl,imx7ulp-mu
|
||||
- const: fsl,imx8ulp-mu
|
||||
- const: fsl,imx8-mu-scu
|
||||
- items:
|
||||
- enum:
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: "http://devicetree.org/schemas/mailbox/microchip,polarfire-soc-mailbox.yaml#"
|
||||
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
|
||||
|
||||
title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) mailbox controller
|
||||
|
||||
maintainers:
|
||||
- Conor Dooley <conor.dooley@microchip.com>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: microchip,polarfire-soc-mailbox
|
||||
|
||||
reg:
|
||||
items:
|
||||
- description: mailbox data registers
|
||||
- description: mailbox interrupt registers
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
"#mbox-cells":
|
||||
const: 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- interrupts
|
||||
- "#mbox-cells"
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
soc {
|
||||
#address-cells = <2>;
|
||||
#size-cells = <2>;
|
||||
mbox: mailbox@37020000 {
|
||||
compatible = "microchip,polarfire-soc-mailbox";
|
||||
reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318c 0x0 0x40>;
|
||||
interrupt-parent = <&L1>;
|
||||
interrupts = <96>;
|
||||
#mbox-cells = <1>;
|
||||
};
|
||||
};
|
||||
@@ -19,6 +19,7 @@ properties:
|
||||
- qcom,ipq6018-apcs-apps-global
|
||||
- qcom,ipq8074-apcs-apps-global
|
||||
- qcom,msm8916-apcs-kpss-global
|
||||
- qcom,msm8939-apcs-kpss-global
|
||||
- qcom,msm8994-apcs-kpss-global
|
||||
- qcom,msm8996-apcs-hmss-global
|
||||
- qcom,msm8998-apcs-hmss-global
|
||||
@@ -27,6 +28,7 @@ properties:
|
||||
- qcom,sc8180x-apss-shared
|
||||
- qcom,sdm660-apcs-hmss-global
|
||||
- qcom,sdm845-apss-shared
|
||||
- qcom,sm6125-apcs-hmss-global
|
||||
- qcom,sm8150-apss-shared
|
||||
|
||||
reg:
|
||||
@@ -75,6 +77,7 @@ allOf:
|
||||
- qcom,sc7180-apss-shared
|
||||
- qcom,sdm660-apcs-hmss-global
|
||||
- qcom,sdm845-apss-shared
|
||||
- qcom,sm6125-apcs-hmss-global
|
||||
- qcom,sm8150-apss-shared
|
||||
then:
|
||||
properties:
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
* Broadcom iProc MDIO bus controller
|
||||
|
||||
Required properties:
|
||||
- compatible: should be "brcm,iproc-mdio"
|
||||
- reg: address and length of the register set for the MDIO interface
|
||||
- #size-cells: must be 1
|
||||
- #address-cells: must be 0
|
||||
|
||||
Child nodes of this MDIO bus controller node are standard Ethernet PHY device
|
||||
nodes as described in Documentation/devicetree/bindings/net/phy.txt
|
||||
|
||||
Example:
|
||||
|
||||
mdio@18002000 {
|
||||
compatible = "brcm,iproc-mdio";
|
||||
reg = <0x18002000 0x8>;
|
||||
#size-cells = <1>;
|
||||
#address-cells = <0>;
|
||||
|
||||
enet-gphy@0 {
|
||||
reg = <0>;
|
||||
};
|
||||
};
|
||||
38
Documentation/devicetree/bindings/net/brcm,iproc-mdio.yaml
Normal file
38
Documentation/devicetree/bindings/net/brcm,iproc-mdio.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/net/brcm,iproc-mdio.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Broadcom iProc MDIO bus controller
|
||||
|
||||
maintainers:
|
||||
- Rafał Miłecki <rafal@milecki.pl>
|
||||
|
||||
allOf:
|
||||
- $ref: mdio.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: brcm,iproc-mdio
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
required:
|
||||
- reg
|
||||
|
||||
examples:
|
||||
- |
|
||||
mdio@18002000 {
|
||||
compatible = "brcm,iproc-mdio";
|
||||
reg = <0x18002000 0x8>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
ethernet-phy@0 {
|
||||
reg = <0>;
|
||||
};
|
||||
};
|
||||
@@ -1,80 +0,0 @@
|
||||
Renesas R-Car CAN controller Device Tree Bindings
|
||||
-------------------------------------------------
|
||||
|
||||
Required properties:
|
||||
- compatible: "renesas,can-r8a7742" if CAN controller is a part of R8A7742 SoC.
|
||||
"renesas,can-r8a7743" if CAN controller is a part of R8A7743 SoC.
|
||||
"renesas,can-r8a7744" if CAN controller is a part of R8A7744 SoC.
|
||||
"renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC.
|
||||
"renesas,can-r8a77470" if CAN controller is a part of R8A77470 SoC.
|
||||
"renesas,can-r8a774a1" if CAN controller is a part of R8A774A1 SoC.
|
||||
"renesas,can-r8a774b1" if CAN controller is a part of R8A774B1 SoC.
|
||||
"renesas,can-r8a774c0" if CAN controller is a part of R8A774C0 SoC.
|
||||
"renesas,can-r8a774e1" if CAN controller is a part of R8A774E1 SoC.
|
||||
"renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
|
||||
"renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC.
|
||||
"renesas,can-r8a7790" if CAN controller is a part of R8A7790 SoC.
|
||||
"renesas,can-r8a7791" if CAN controller is a part of R8A7791 SoC.
|
||||
"renesas,can-r8a7792" if CAN controller is a part of R8A7792 SoC.
|
||||
"renesas,can-r8a7793" if CAN controller is a part of R8A7793 SoC.
|
||||
"renesas,can-r8a7794" if CAN controller is a part of R8A7794 SoC.
|
||||
"renesas,can-r8a7795" if CAN controller is a part of R8A7795 SoC.
|
||||
"renesas,can-r8a7796" if CAN controller is a part of R8A77960 SoC.
|
||||
"renesas,can-r8a77961" if CAN controller is a part of R8A77961 SoC.
|
||||
"renesas,can-r8a77965" if CAN controller is a part of R8A77965 SoC.
|
||||
"renesas,can-r8a77990" if CAN controller is a part of R8A77990 SoC.
|
||||
"renesas,can-r8a77995" if CAN controller is a part of R8A77995 SoC.
|
||||
"renesas,rcar-gen1-can" for a generic R-Car Gen1 compatible device.
|
||||
"renesas,rcar-gen2-can" for a generic R-Car Gen2 or RZ/G1
|
||||
compatible device.
|
||||
"renesas,rcar-gen3-can" for a generic R-Car Gen3 or RZ/G2
|
||||
compatible device.
|
||||
When compatible with the generic version, nodes must list the
|
||||
SoC-specific version corresponding to the platform first
|
||||
followed by the generic version.
|
||||
|
||||
- reg: physical base address and size of the R-Car CAN register map.
|
||||
- interrupts: interrupt specifier for the sole interrupt.
|
||||
- clocks: phandles and clock specifiers for 3 CAN clock inputs.
|
||||
- clock-names: 3 clock input name strings: "clkp1", "clkp2", and "can_clk".
|
||||
- pinctrl-0: pin control group to be used for this controller.
|
||||
- pinctrl-names: must be "default".
|
||||
|
||||
Required properties for R8A774A1, R8A774B1, R8A774C0, R8A774E1, R8A7795,
|
||||
R8A77960, R8A77961, R8A77965, R8A77990, and R8A77995:
|
||||
For the denoted SoCs, "clkp2" can be CANFD clock. This is a div6 clock and can
|
||||
be used by both CAN and CAN FD controller at the same time. It needs to be
|
||||
scaled to maximum frequency if any of these controllers use it. This is done
|
||||
using the below properties:
|
||||
|
||||
- assigned-clocks: phandle of clkp2(CANFD) clock.
|
||||
- assigned-clock-rates: maximum frequency of this clock.
|
||||
|
||||
Optional properties:
|
||||
- renesas,can-clock-select: R-Car CAN Clock Source Select. Valid values are:
|
||||
<0x0> (default) : Peripheral clock (clkp1)
|
||||
<0x1> : Peripheral clock (clkp2)
|
||||
<0x3> : External input clock
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
SoC common .dtsi file:
|
||||
|
||||
can0: can@e6e80000 {
|
||||
compatible = "renesas,can-r8a7791", "renesas,rcar-gen2-can";
|
||||
reg = <0 0xe6e80000 0 0x1000>;
|
||||
interrupts = <0 186 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&mstp9_clks R8A7791_CLK_RCAN0>,
|
||||
<&cpg_clocks R8A7791_CLK_RCAN>, <&can_clk>;
|
||||
clock-names = "clkp1", "clkp2", "can_clk";
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
Board specific .dts file:
|
||||
|
||||
&can0 {
|
||||
pinctrl-0 = <&can0_pins>;
|
||||
pinctrl-names = "default";
|
||||
status = "okay";
|
||||
};
|
||||
@@ -1,107 +0,0 @@
|
||||
Renesas R-Car CAN FD controller Device Tree Bindings
|
||||
----------------------------------------------------
|
||||
|
||||
Required properties:
|
||||
- compatible: Must contain one or more of the following:
|
||||
- "renesas,rcar-gen3-canfd" for R-Car Gen3 and RZ/G2 compatible controllers.
|
||||
- "renesas,r8a774a1-canfd" for R8A774A1 (RZ/G2M) compatible controller.
|
||||
- "renesas,r8a774b1-canfd" for R8A774B1 (RZ/G2N) compatible controller.
|
||||
- "renesas,r8a774c0-canfd" for R8A774C0 (RZ/G2E) compatible controller.
|
||||
- "renesas,r8a774e1-canfd" for R8A774E1 (RZ/G2H) compatible controller.
|
||||
- "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller.
|
||||
- "renesas,r8a7796-canfd" for R8A7796 (R-Car M3-W) compatible controller.
|
||||
- "renesas,r8a77965-canfd" for R8A77965 (R-Car M3-N) compatible controller.
|
||||
- "renesas,r8a77970-canfd" for R8A77970 (R-Car V3M) compatible controller.
|
||||
- "renesas,r8a77980-canfd" for R8A77980 (R-Car V3H) compatible controller.
|
||||
- "renesas,r8a77990-canfd" for R8A77990 (R-Car E3) compatible controller.
|
||||
- "renesas,r8a77995-canfd" for R8A77995 (R-Car D3) compatible controller.
|
||||
|
||||
When compatible with the generic version, nodes must list the
|
||||
SoC-specific version corresponding to the platform first, followed by the
|
||||
family-specific and/or generic versions.
|
||||
|
||||
- reg: physical base address and size of the R-Car CAN FD register map.
|
||||
- interrupts: interrupt specifiers for the Channel & Global interrupts
|
||||
- clocks: phandles and clock specifiers for 3 clock inputs.
|
||||
- clock-names: 3 clock input name strings: "fck", "canfd", "can_clk".
|
||||
- pinctrl-0: pin control group to be used for this controller.
|
||||
- pinctrl-names: must be "default".
|
||||
|
||||
Required child nodes:
|
||||
The controller supports two channels and each is represented as a child node.
|
||||
The name of the child nodes are "channel0" and "channel1" respectively. Each
|
||||
child node supports the "status" property only, which is used to
|
||||
enable/disable the respective channel.
|
||||
|
||||
Required properties for R8A774A1, R8A774B1, R8A774C0, R8A774E1, R8A7795,
|
||||
R8A7796, R8A77965, R8A77990, and R8A77995:
|
||||
In the denoted SoCs, canfd clock is a div6 clock and can be used by both CAN
|
||||
and CAN FD controller at the same time. It needs to be scaled to maximum
|
||||
frequency if any of these controllers use it. This is done using the below
|
||||
properties:
|
||||
|
||||
- assigned-clocks: phandle of canfd clock.
|
||||
- assigned-clock-rates: maximum frequency of this clock.
|
||||
|
||||
Optional property:
|
||||
The controller can operate in either CAN FD only mode (default) or
|
||||
Classical CAN only mode. The mode is global to both the channels. In order to
|
||||
enable the later, define the following optional property.
|
||||
- renesas,no-can-fd: puts the controller in Classical CAN only mode.
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
SoC common .dtsi file:
|
||||
|
||||
canfd: can@e66c0000 {
|
||||
compatible = "renesas,r8a7795-canfd",
|
||||
"renesas,rcar-gen3-canfd";
|
||||
reg = <0 0xe66c0000 0 0x8000>;
|
||||
interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&cpg CPG_MOD 914>,
|
||||
<&cpg CPG_CORE R8A7795_CLK_CANFD>,
|
||||
<&can_clk>;
|
||||
clock-names = "fck", "canfd", "can_clk";
|
||||
assigned-clocks = <&cpg CPG_CORE R8A7795_CLK_CANFD>;
|
||||
assigned-clock-rates = <40000000>;
|
||||
power-domains = <&cpg>;
|
||||
status = "disabled";
|
||||
|
||||
channel0 {
|
||||
status = "disabled";
|
||||
};
|
||||
|
||||
channel1 {
|
||||
status = "disabled";
|
||||
};
|
||||
};
|
||||
|
||||
Board specific .dts file:
|
||||
|
||||
E.g. below enables Channel 1 alone in the board in Classical CAN only mode.
|
||||
|
||||
&canfd {
|
||||
pinctrl-0 = <&canfd1_pins>;
|
||||
pinctrl-names = "default";
|
||||
renesas,no-can-fd;
|
||||
status = "okay";
|
||||
|
||||
channel1 {
|
||||
status = "okay";
|
||||
};
|
||||
};
|
||||
|
||||
E.g. below enables Channel 0 alone in the board using External clock
|
||||
as fCAN clock.
|
||||
|
||||
&canfd {
|
||||
pinctrl-0 = <&canfd0_pins>, <&can_clk_pins>;
|
||||
pinctrl-names = "default";
|
||||
status = "okay";
|
||||
|
||||
channel0 {
|
||||
status = "okay";
|
||||
};
|
||||
};
|
||||
139
Documentation/devicetree/bindings/net/can/renesas,rcar-can.yaml
Normal file
139
Documentation/devicetree/bindings/net/can/renesas,rcar-can.yaml
Normal file
@@ -0,0 +1,139 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/net/can/renesas,rcar-can.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Renesas R-Car CAN Controller
|
||||
|
||||
maintainers:
|
||||
- Sergei Shtylyov <sergei.shtylyov@gmail.com>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
oneOf:
|
||||
- items:
|
||||
- enum:
|
||||
- renesas,can-r8a7778 # R-Car M1-A
|
||||
- renesas,can-r8a7779 # R-Car H1
|
||||
- const: renesas,rcar-gen1-can # R-Car Gen1
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- renesas,can-r8a7742 # RZ/G1H
|
||||
- renesas,can-r8a7743 # RZ/G1M
|
||||
- renesas,can-r8a7744 # RZ/G1N
|
||||
- renesas,can-r8a7745 # RZ/G1E
|
||||
- renesas,can-r8a77470 # RZ/G1C
|
||||
- renesas,can-r8a7790 # R-Car H2
|
||||
- renesas,can-r8a7791 # R-Car M2-W
|
||||
- renesas,can-r8a7792 # R-Car V2H
|
||||
- renesas,can-r8a7793 # R-Car M2-N
|
||||
- renesas,can-r8a7794 # R-Car E2
|
||||
- const: renesas,rcar-gen2-can # R-Car Gen2 and RZ/G1
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- renesas,can-r8a774a1 # RZ/G2M
|
||||
- renesas,can-r8a774b1 # RZ/G2N
|
||||
- renesas,can-r8a774c0 # RZ/G2E
|
||||
- renesas,can-r8a774e1 # RZ/G2H
|
||||
- renesas,can-r8a7795 # R-Car H3
|
||||
- renesas,can-r8a7796 # R-Car M3-W
|
||||
- renesas,can-r8a77961 # R-Car M3-W+
|
||||
- renesas,can-r8a77965 # R-Car M3-N
|
||||
- renesas,can-r8a77990 # R-Car E3
|
||||
- renesas,can-r8a77995 # R-Car D3
|
||||
- const: renesas,rcar-gen3-can # R-Car Gen3 and RZ/G2
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
clocks:
|
||||
maxItems: 3
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: clkp1
|
||||
- const: clkp2
|
||||
- const: can_clk
|
||||
|
||||
power-domains:
|
||||
maxItems: 1
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
renesas,can-clock-select:
|
||||
$ref: /schemas/types.yaml#/definitions/uint32
|
||||
enum: [ 0, 1, 3 ]
|
||||
default: 0
|
||||
description: |
|
||||
R-Car CAN Clock Source Select. Valid values are:
|
||||
<0x0> (default) : Peripheral clock (clkp1)
|
||||
<0x1> : Peripheral clock (clkp2)
|
||||
<0x3> : External input clock
|
||||
|
||||
assigned-clocks:
|
||||
description:
|
||||
Reference to the clkp2 (CANFD) clock.
|
||||
On R-Car Gen3 and RZ/G2 SoCs, "clkp2" is the CANFD clock. This is a div6
|
||||
clock and can be used by both CAN and CAN FD controllers at the same
|
||||
time. It needs to be scaled to maximum frequency if any of these
|
||||
controllers use it.
|
||||
|
||||
assigned-clock-rates:
|
||||
description: Maximum frequency of the CANFD clock.
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- interrupts
|
||||
- clocks
|
||||
- clock-names
|
||||
- power-domains
|
||||
|
||||
allOf:
|
||||
- $ref: can-controller.yaml#
|
||||
|
||||
- if:
|
||||
not:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: renesas,rcar-gen1-can
|
||||
then:
|
||||
required:
|
||||
- resets
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: renesas,rcar-gen3-can
|
||||
then:
|
||||
required:
|
||||
- assigned-clocks
|
||||
- assigned-clock-rates
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/r8a7791-cpg-mssr.h>
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/power/r8a7791-sysc.h>
|
||||
|
||||
can0: can@e6e80000 {
|
||||
compatible = "renesas,can-r8a7791", "renesas,rcar-gen2-can";
|
||||
reg = <0xe6e80000 0x1000>;
|
||||
interrupts = <GIC_SPI 186 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&cpg CPG_MOD 916>,
|
||||
<&cpg CPG_CORE R8A7791_CLK_RCAN>, <&can_clk>;
|
||||
clock-names = "clkp1", "clkp2", "can_clk";
|
||||
power-domains = <&sysc R8A7791_PD_ALWAYS_ON>;
|
||||
resets = <&cpg 916>;
|
||||
};
|
||||
@@ -0,0 +1,122 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/net/can/renesas,rcar-canfd.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Renesas R-Car CAN FD Controller
|
||||
|
||||
maintainers:
|
||||
- Fabrizio Castro <fabrizio.castro.jz@renesas.com>
|
||||
|
||||
allOf:
|
||||
- $ref: can-controller.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
oneOf:
|
||||
- items:
|
||||
- enum:
|
||||
- renesas,r8a774a1-canfd # RZ/G2M
|
||||
- renesas,r8a774b1-canfd # RZ/G2N
|
||||
- renesas,r8a774c0-canfd # RZ/G2E
|
||||
- renesas,r8a774e1-canfd # RZ/G2H
|
||||
- renesas,r8a7795-canfd # R-Car H3
|
||||
- renesas,r8a7796-canfd # R-Car M3-W
|
||||
- renesas,r8a77965-canfd # R-Car M3-N
|
||||
- renesas,r8a77970-canfd # R-Car V3M
|
||||
- renesas,r8a77980-canfd # R-Car V3H
|
||||
- renesas,r8a77990-canfd # R-Car E3
|
||||
- renesas,r8a77995-canfd # R-Car D3
|
||||
- const: renesas,rcar-gen3-canfd # R-Car Gen3 and RZ/G2
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
interrupts:
|
||||
items:
|
||||
- description: Channel interrupt
|
||||
- description: Global interrupt
|
||||
|
||||
clocks:
|
||||
maxItems: 3
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: fck
|
||||
- const: canfd
|
||||
- const: can_clk
|
||||
|
||||
power-domains:
|
||||
maxItems: 1
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
renesas,no-can-fd:
|
||||
$ref: /schemas/types.yaml#/definitions/flag
|
||||
description:
|
||||
The controller can operate in either CAN FD only mode (default) or
|
||||
Classical CAN only mode. The mode is global to both the channels.
|
||||
Specify this property to put the controller in Classical CAN only mode.
|
||||
|
||||
assigned-clocks:
|
||||
description:
|
||||
Reference to the CANFD clock. The CANFD clock is a div6 clock and can be
|
||||
used by both CAN (if present) and CAN FD controllers at the same time.
|
||||
It needs to be scaled to maximum frequency if any of these controllers
|
||||
use it.
|
||||
|
||||
assigned-clock-rates:
|
||||
description: Maximum frequency of the CANFD clock.
|
||||
|
||||
patternProperties:
|
||||
"^channel[01]$":
|
||||
type: object
|
||||
description:
|
||||
The controller supports two channels and each is represented as a child
|
||||
node. Each child node supports the "status" property only, which
|
||||
is used to enable/disable the respective channel.
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- interrupts
|
||||
- clocks
|
||||
- clock-names
|
||||
- power-domains
|
||||
- resets
|
||||
- assigned-clocks
|
||||
- assigned-clock-rates
|
||||
- channel0
|
||||
- channel1
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/r8a7795-cpg-mssr.h>
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/power/r8a7795-sysc.h>
|
||||
|
||||
canfd: can@e66c0000 {
|
||||
compatible = "renesas,r8a7795-canfd",
|
||||
"renesas,rcar-gen3-canfd";
|
||||
reg = <0xe66c0000 0x8000>;
|
||||
interrupts = <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
|
||||
clocks = <&cpg CPG_MOD 914>,
|
||||
<&cpg CPG_CORE R8A7795_CLK_CANFD>,
|
||||
<&can_clk>;
|
||||
clock-names = "fck", "canfd", "can_clk";
|
||||
assigned-clocks = <&cpg CPG_CORE R8A7795_CLK_CANFD>;
|
||||
assigned-clock-rates = <40000000>;
|
||||
power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
|
||||
resets = <&cpg 914>;
|
||||
|
||||
channel0 {
|
||||
};
|
||||
|
||||
channel1 {
|
||||
};
|
||||
};
|
||||
@@ -81,6 +81,12 @@ Optional properties:
|
||||
- gpio-controller: Boolean; if defined, MT7530's LED controller will run on
|
||||
GPIO mode.
|
||||
- #gpio-cells: Must be 2 if gpio-controller is defined.
|
||||
- interrupt-controller: Boolean; Enables the internal interrupt controller.
|
||||
|
||||
If interrupt-controller is defined, the following properties are required.
|
||||
|
||||
- #interrupt-cells: Must be 1.
|
||||
- interrupts: Parent interrupt for the interrupt controller.
|
||||
|
||||
See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
|
||||
required, optional properties and how the integrated switch subnodes must
|
||||
|
||||
132
Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
Normal file
132
Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
Normal file
@@ -0,0 +1,132 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/net/dsa/nxp,sja1105.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: NXP SJA1105 Automotive Ethernet Switch Family Device Tree Bindings
|
||||
|
||||
description:
|
||||
The SJA1105 SPI interface requires a CS-to-CLK time (t2 in UM10944.pdf) of at
|
||||
least one half of t_CLK. At an SPI frequency of 1MHz, this means a minimum
|
||||
cs_sck_delay of 500ns. Ensuring that this SPI timing requirement is observed
|
||||
depends on the SPI bus master driver.
|
||||
|
||||
allOf:
|
||||
- $ref: "dsa.yaml#"
|
||||
|
||||
maintainers:
|
||||
- Vladimir Oltean <vladimir.oltean@nxp.com>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- nxp,sja1105e
|
||||
- nxp,sja1105t
|
||||
- nxp,sja1105p
|
||||
- nxp,sja1105q
|
||||
- nxp,sja1105r
|
||||
- nxp,sja1105s
|
||||
- nxp,sja1110a
|
||||
- nxp,sja1110b
|
||||
- nxp,sja1110c
|
||||
- nxp,sja1110d
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
# Optional container node for the 2 internal MDIO buses of the SJA1110
|
||||
# (one for the internal 100base-T1 PHYs and the other for the single
|
||||
# 100base-TX PHY). The "reg" property does not have physical significance.
|
||||
# The PHY addresses to port correspondence is as follows: for 100base-T1,
|
||||
# port 5 has PHY 1, port 6 has PHY 2 etc, while for 100base-TX, port 1 has
|
||||
# PHY 1.
|
||||
mdios:
|
||||
type: object
|
||||
|
||||
properties:
|
||||
'#address-cells':
|
||||
const: 1
|
||||
'#size-cells':
|
||||
const: 0
|
||||
|
||||
patternProperties:
|
||||
"^mdio@[0-1]$":
|
||||
type: object
|
||||
|
||||
allOf:
|
||||
- $ref: "http://devicetree.org/schemas/net/mdio.yaml#"
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
oneOf:
|
||||
- enum:
|
||||
- nxp,sja1110-base-t1-mdio
|
||||
- nxp,sja1110-base-tx-mdio
|
||||
|
||||
reg:
|
||||
oneOf:
|
||||
- enum:
|
||||
- 0
|
||||
- 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
spi {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
ethernet-switch@1 {
|
||||
reg = <0x1>;
|
||||
compatible = "nxp,sja1105t";
|
||||
|
||||
ethernet-ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port@0 {
|
||||
phy-handle = <&rgmii_phy6>;
|
||||
phy-mode = "rgmii-id";
|
||||
reg = <0>;
|
||||
};
|
||||
|
||||
port@1 {
|
||||
phy-handle = <&rgmii_phy3>;
|
||||
phy-mode = "rgmii-id";
|
||||
reg = <1>;
|
||||
};
|
||||
|
||||
port@2 {
|
||||
phy-handle = <&rgmii_phy4>;
|
||||
phy-mode = "rgmii-id";
|
||||
reg = <2>;
|
||||
};
|
||||
|
||||
port@3 {
|
||||
phy-mode = "rgmii-id";
|
||||
reg = <3>;
|
||||
};
|
||||
|
||||
port@4 {
|
||||
ethernet = <&enet2>;
|
||||
phy-mode = "rgmii";
|
||||
reg = <4>;
|
||||
|
||||
fixed-link {
|
||||
speed = <1000>;
|
||||
full-duplex;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
@@ -3,6 +3,7 @@
|
||||
Required properties:
|
||||
|
||||
- compatible: should be one of:
|
||||
"qca,qca8327"
|
||||
"qca,qca8334"
|
||||
"qca,qca8337"
|
||||
|
||||
@@ -20,6 +21,10 @@ described in dsa/dsa.txt. If the QCA8K switch is connect to a SoC's external
|
||||
mdio-bus each subnode describing a port needs to have a valid phandle
|
||||
referencing the internal PHY it is connected to. This is because there's no
|
||||
N:N mapping of port and PHY id.
|
||||
To declare the internal mdio-bus configuration, declare a mdio node in the
|
||||
switch node and declare the phandle for the port referencing the internal
|
||||
PHY is connected to. In this config a internal mdio-bus is registered and
|
||||
the mdio MASTER is used as communication.
|
||||
|
||||
Don't use mixed external and internal mdio-bus configurations, as this is
|
||||
not supported by the hardware.
|
||||
@@ -149,26 +154,61 @@ for the internal master mdio-bus configuration:
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
label = "lan1";
|
||||
phy-mode = "internal";
|
||||
phy-handle = <&phy_port1>;
|
||||
};
|
||||
|
||||
port@2 {
|
||||
reg = <2>;
|
||||
label = "lan2";
|
||||
phy-mode = "internal";
|
||||
phy-handle = <&phy_port2>;
|
||||
};
|
||||
|
||||
port@3 {
|
||||
reg = <3>;
|
||||
label = "lan3";
|
||||
phy-mode = "internal";
|
||||
phy-handle = <&phy_port3>;
|
||||
};
|
||||
|
||||
port@4 {
|
||||
reg = <4>;
|
||||
label = "lan4";
|
||||
phy-mode = "internal";
|
||||
phy-handle = <&phy_port4>;
|
||||
};
|
||||
|
||||
port@5 {
|
||||
reg = <5>;
|
||||
label = "wan";
|
||||
phy-mode = "internal";
|
||||
phy-handle = <&phy_port5>;
|
||||
};
|
||||
};
|
||||
|
||||
mdio {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
phy_port1: phy@0 {
|
||||
reg = <0>;
|
||||
};
|
||||
|
||||
phy_port2: phy@1 {
|
||||
reg = <1>;
|
||||
};
|
||||
|
||||
phy_port3: phy@2 {
|
||||
reg = <2>;
|
||||
};
|
||||
|
||||
phy_port4: phy@3 {
|
||||
reg = <3>;
|
||||
};
|
||||
|
||||
phy_port5: phy@4 {
|
||||
reg = <4>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,156 +0,0 @@
|
||||
NXP SJA1105 switch driver
|
||||
=========================
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible:
|
||||
Must be one of:
|
||||
- "nxp,sja1105e"
|
||||
- "nxp,sja1105t"
|
||||
- "nxp,sja1105p"
|
||||
- "nxp,sja1105q"
|
||||
- "nxp,sja1105r"
|
||||
- "nxp,sja1105s"
|
||||
|
||||
Although the device ID could be detected at runtime, explicit bindings
|
||||
are required in order to be able to statically check their validity.
|
||||
For example, SGMII can only be specified on port 4 of R and S devices,
|
||||
and the non-SGMII devices, while pin-compatible, are not equal in terms
|
||||
of support for RGMII internal delays (supported on P/Q/R/S, but not on
|
||||
E/T).
|
||||
|
||||
Optional properties:
|
||||
|
||||
- sja1105,role-mac:
|
||||
- sja1105,role-phy:
|
||||
Boolean properties that can be assigned under each port node. By
|
||||
default (unless otherwise specified) a port is configured as MAC if it
|
||||
is driving a PHY (phy-handle is present) or as PHY if it is PHY-less
|
||||
(fixed-link specified, presumably because it is connected to a MAC).
|
||||
The effect of this property (in either its implicit or explicit form)
|
||||
is:
|
||||
- In the case of MII or RMII it specifies whether the SJA1105 port is a
|
||||
clock source or sink for this interface (not applicable for RGMII
|
||||
where there is a Tx and an Rx clock).
|
||||
- In the case of RGMII it affects the behavior regarding internal
|
||||
delays:
|
||||
1. If sja1105,role-mac is specified, and the phy-mode property is one
|
||||
of "rgmii-id", "rgmii-txid" or "rgmii-rxid", then the entity
|
||||
designated to apply the delay/clock skew necessary for RGMII
|
||||
is the PHY. The SJA1105 MAC does not apply any internal delays.
|
||||
2. If sja1105,role-phy is specified, and the phy-mode property is one
|
||||
of the above, the designated entity to apply the internal delays
|
||||
is the SJA1105 MAC (if hardware-supported). This is only supported
|
||||
by the second-generation (P/Q/R/S) hardware. On a first-generation
|
||||
E or T device, it is an error to specify an RGMII phy-mode other
|
||||
than "rgmii" for a port that is in fixed-link mode. In that case,
|
||||
the clock skew must either be added by the MAC at the other end of
|
||||
the fixed-link, or by PCB serpentine traces on the board.
|
||||
These properties are required, for example, in the case where SJA1105
|
||||
ports are at both ends of a MII/RMII PHY-less setup. One end would need
|
||||
to have sja1105,role-mac, while the other sja1105,role-phy.
|
||||
|
||||
See Documentation/devicetree/bindings/net/dsa/dsa.txt for the list of standard
|
||||
DSA required and optional properties.
|
||||
|
||||
Other observations
|
||||
------------------
|
||||
|
||||
The SJA1105 SPI interface requires a CS-to-CLK time (t2 in UM10944) of at least
|
||||
one half of t_CLK. At an SPI frequency of 1MHz, this means a minimum
|
||||
cs_sck_delay of 500ns. Ensuring that this SPI timing requirement is observed
|
||||
depends on the SPI bus master driver.
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
Ethernet switch connected via SPI to the host, CPU port wired to enet2:
|
||||
|
||||
arch/arm/boot/dts/ls1021a-tsn.dts:
|
||||
|
||||
/* SPI controller of the LS1021 */
|
||||
&dspi0 {
|
||||
sja1105@1 {
|
||||
reg = <0x1>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
compatible = "nxp,sja1105t";
|
||||
spi-max-frequency = <4000000>;
|
||||
fsl,spi-cs-sck-delay = <1000>;
|
||||
fsl,spi-sck-cs-delay = <1000>;
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
port@0 {
|
||||
/* ETH5 written on chassis */
|
||||
label = "swp5";
|
||||
phy-handle = <&rgmii_phy6>;
|
||||
phy-mode = "rgmii-id";
|
||||
reg = <0>;
|
||||
/* Implicit "sja1105,role-mac;" */
|
||||
};
|
||||
port@1 {
|
||||
/* ETH2 written on chassis */
|
||||
label = "swp2";
|
||||
phy-handle = <&rgmii_phy3>;
|
||||
phy-mode = "rgmii-id";
|
||||
reg = <1>;
|
||||
/* Implicit "sja1105,role-mac;" */
|
||||
};
|
||||
port@2 {
|
||||
/* ETH3 written on chassis */
|
||||
label = "swp3";
|
||||
phy-handle = <&rgmii_phy4>;
|
||||
phy-mode = "rgmii-id";
|
||||
reg = <2>;
|
||||
/* Implicit "sja1105,role-mac;" */
|
||||
};
|
||||
port@3 {
|
||||
/* ETH4 written on chassis */
|
||||
phy-handle = <&rgmii_phy5>;
|
||||
label = "swp4";
|
||||
phy-mode = "rgmii-id";
|
||||
reg = <3>;
|
||||
/* Implicit "sja1105,role-mac;" */
|
||||
};
|
||||
port@4 {
|
||||
/* Internal port connected to eth2 */
|
||||
ethernet = <&enet2>;
|
||||
phy-mode = "rgmii";
|
||||
reg = <4>;
|
||||
/* Implicit "sja1105,role-phy;" */
|
||||
fixed-link {
|
||||
speed = <1000>;
|
||||
full-duplex;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
/* MDIO controller of the LS1021 */
|
||||
&mdio0 {
|
||||
/* BCM5464 */
|
||||
rgmii_phy3: ethernet-phy@3 {
|
||||
reg = <0x3>;
|
||||
};
|
||||
rgmii_phy4: ethernet-phy@4 {
|
||||
reg = <0x4>;
|
||||
};
|
||||
rgmii_phy5: ethernet-phy@5 {
|
||||
reg = <0x5>;
|
||||
};
|
||||
rgmii_phy6: ethernet-phy@6 {
|
||||
reg = <0x6>;
|
||||
};
|
||||
};
|
||||
|
||||
/* Ethernet master port of the LS1021 */
|
||||
&enet2 {
|
||||
phy-connection-type = "rgmii";
|
||||
status = "ok";
|
||||
fixed-link {
|
||||
speed = <1000>;
|
||||
full-duplex;
|
||||
};
|
||||
};
|
||||
@@ -68,6 +68,7 @@ properties:
|
||||
- tbi
|
||||
- rev-mii
|
||||
- rmii
|
||||
- rev-rmii
|
||||
|
||||
# RX and TX delays are added by the MAC when required
|
||||
- rgmii
|
||||
@@ -97,6 +98,7 @@ properties:
|
||||
- 10gbase-kr
|
||||
- usxgmii
|
||||
- 10gbase-r
|
||||
- 25gbase-r
|
||||
|
||||
phy-mode:
|
||||
$ref: "#/properties/phy-connection-type"
|
||||
|
||||
76
Documentation/devicetree/bindings/net/ingenic,mac.yaml
Normal file
76
Documentation/devicetree/bindings/net/ingenic,mac.yaml
Normal file
@@ -0,0 +1,76 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/net/ingenic,mac.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Bindings for MAC in Ingenic SoCs
|
||||
|
||||
maintainers:
|
||||
- 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
|
||||
|
||||
description:
|
||||
The Ethernet Media Access Controller in Ingenic SoCs.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- ingenic,jz4775-mac
|
||||
- ingenic,x1000-mac
|
||||
- ingenic,x1600-mac
|
||||
- ingenic,x1830-mac
|
||||
- ingenic,x2000-mac
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
interrupt-names:
|
||||
const: macirq
|
||||
|
||||
clocks:
|
||||
maxItems: 1
|
||||
|
||||
clock-names:
|
||||
const: stmmaceth
|
||||
|
||||
mode-reg:
|
||||
description: An extra syscon register that control ethernet interface and timing delay
|
||||
|
||||
rx-clk-delay-ps:
|
||||
description: RGMII receive clock delay defined in pico seconds
|
||||
|
||||
tx-clk-delay-ps:
|
||||
description: RGMII transmit clock delay defined in pico seconds
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- interrupts
|
||||
- interrupt-names
|
||||
- clocks
|
||||
- clock-names
|
||||
- mode-reg
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/x1000-cgu.h>
|
||||
|
||||
mac: ethernet@134b0000 {
|
||||
compatible = "ingenic,x1000-mac";
|
||||
reg = <0x134b0000 0x2000>;
|
||||
|
||||
interrupt-parent = <&intc>;
|
||||
interrupts = <55>;
|
||||
interrupt-names = "macirq";
|
||||
|
||||
clocks = <&cgu X1000_CLK_MAC>;
|
||||
clock-names = "stmmaceth";
|
||||
|
||||
mode-reg = <&mac_phy_ctrl>;
|
||||
};
|
||||
...
|
||||
@@ -0,0 +1,226 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/net/microchip,sparx5-switch.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Microchip Sparx5 Ethernet switch controller
|
||||
|
||||
maintainers:
|
||||
- Steen Hegelund <steen.hegelund@microchip.com>
|
||||
- Lars Povlsen <lars.povlsen@microchip.com>
|
||||
|
||||
description: |
|
||||
The SparX-5 Enterprise Ethernet switch family provides a rich set of
|
||||
Enterprise switching features such as advanced TCAM-based VLAN and
|
||||
QoS processing enabling delivery of differentiated services, and
|
||||
security through TCAM-based frame processing using versatile content
|
||||
aware processor (VCAP).
|
||||
|
||||
IPv4/IPv6 Layer 3 (L3) unicast and multicast routing is supported
|
||||
with up to 18K IPv4/9K IPv6 unicast LPM entries and up to 9K IPv4/3K
|
||||
IPv6 (S,G) multicast groups.
|
||||
|
||||
L3 security features include source guard and reverse path
|
||||
forwarding (uRPF) tasks. Additional L3 features include VRF-Lite and
|
||||
IP tunnels (IP over GRE/IP).
|
||||
|
||||
The SparX-5 switch family targets managed Layer 2 and Layer 3
|
||||
equipment in SMB, SME, and Enterprise where high port count
|
||||
1G/2.5G/5G/10G switching with 10G/25G aggregation links is required.
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
pattern: "^switch@[0-9a-f]+$"
|
||||
|
||||
compatible:
|
||||
const: microchip,sparx5-switch
|
||||
|
||||
reg:
|
||||
items:
|
||||
- description: cpu target
|
||||
- description: devices target
|
||||
- description: general control block target
|
||||
|
||||
reg-names:
|
||||
items:
|
||||
- const: cpu
|
||||
- const: devices
|
||||
- const: gcb
|
||||
|
||||
interrupts:
|
||||
minItems: 1
|
||||
items:
|
||||
- description: register based extraction
|
||||
- description: frame dma based extraction
|
||||
|
||||
interrupt-names:
|
||||
minItems: 1
|
||||
items:
|
||||
- const: xtr
|
||||
- const: fdma
|
||||
|
||||
resets:
|
||||
items:
|
||||
- description: Reset controller used for switch core reset (soft reset)
|
||||
|
||||
reset-names:
|
||||
items:
|
||||
- const: switch
|
||||
|
||||
mac-address: true
|
||||
|
||||
ethernet-ports:
|
||||
type: object
|
||||
patternProperties:
|
||||
"^port@[0-9a-f]+$":
|
||||
type: object
|
||||
|
||||
properties:
|
||||
'#address-cells':
|
||||
const: 1
|
||||
'#size-cells':
|
||||
const: 0
|
||||
|
||||
reg:
|
||||
description: Switch port number
|
||||
|
||||
phys:
|
||||
maxItems: 1
|
||||
description:
|
||||
phandle of a Ethernet SerDes PHY. This defines which SerDes
|
||||
instance will handle the Ethernet traffic.
|
||||
|
||||
phy-mode:
|
||||
description:
|
||||
This specifies the interface used by the Ethernet SerDes towards
|
||||
the PHY or SFP.
|
||||
|
||||
microchip,bandwidth:
|
||||
description: Specifies bandwidth in Mbit/s allocated to the port.
|
||||
$ref: "/schemas/types.yaml#/definitions/uint32"
|
||||
maximum: 25000
|
||||
|
||||
phy-handle:
|
||||
description:
|
||||
phandle of a Ethernet PHY. This is optional and if provided it
|
||||
points to the cuPHY used by the Ethernet SerDes.
|
||||
|
||||
sfp:
|
||||
description:
|
||||
phandle of an SFP. This is optional and used when not specifying
|
||||
a cuPHY. It points to the SFP node that describes the SFP used by
|
||||
the Ethernet SerDes.
|
||||
|
||||
managed: true
|
||||
|
||||
microchip,sd-sgpio:
|
||||
description:
|
||||
Index of the ports Signal Detect SGPIO in the set of 384 SGPIOs
|
||||
This is optional, and only needed if the default used index is
|
||||
is not correct.
|
||||
$ref: "/schemas/types.yaml#/definitions/uint32"
|
||||
minimum: 0
|
||||
maximum: 383
|
||||
|
||||
required:
|
||||
- reg
|
||||
- phys
|
||||
- phy-mode
|
||||
- microchip,bandwidth
|
||||
|
||||
oneOf:
|
||||
- required:
|
||||
- phy-handle
|
||||
- required:
|
||||
- sfp
|
||||
- managed
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- reg-names
|
||||
- interrupts
|
||||
- interrupt-names
|
||||
- resets
|
||||
- reset-names
|
||||
- ethernet-ports
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
switch: switch@600000000 {
|
||||
compatible = "microchip,sparx5-switch";
|
||||
reg = <0 0x401000>,
|
||||
<0x10004000 0x7fc000>,
|
||||
<0x11010000 0xaf0000>;
|
||||
reg-names = "cpu", "devices", "gcb";
|
||||
interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-names = "xtr";
|
||||
resets = <&reset 0>;
|
||||
reset-names = "switch";
|
||||
ethernet-ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port0: port@0 {
|
||||
reg = <0>;
|
||||
microchip,bandwidth = <1000>;
|
||||
phys = <&serdes 13>;
|
||||
phy-handle = <&phy0>;
|
||||
phy-mode = "qsgmii";
|
||||
};
|
||||
/* ... */
|
||||
/* Then the 25G interfaces */
|
||||
port60: port@60 {
|
||||
reg = <60>;
|
||||
microchip,bandwidth = <25000>;
|
||||
phys = <&serdes 29>;
|
||||
phy-mode = "10gbase-r";
|
||||
sfp = <&sfp_eth60>;
|
||||
managed = "in-band-status";
|
||||
microchip,sd-sgpio = <365>;
|
||||
};
|
||||
port61: port@61 {
|
||||
reg = <61>;
|
||||
microchip,bandwidth = <25000>;
|
||||
phys = <&serdes 30>;
|
||||
phy-mode = "10gbase-r";
|
||||
sfp = <&sfp_eth61>;
|
||||
managed = "in-band-status";
|
||||
microchip,sd-sgpio = <369>;
|
||||
};
|
||||
port62: port@62 {
|
||||
reg = <62>;
|
||||
microchip,bandwidth = <25000>;
|
||||
phys = <&serdes 31>;
|
||||
phy-mode = "10gbase-r";
|
||||
sfp = <&sfp_eth62>;
|
||||
managed = "in-band-status";
|
||||
microchip,sd-sgpio = <373>;
|
||||
};
|
||||
port63: port@63 {
|
||||
reg = <63>;
|
||||
microchip,bandwidth = <25000>;
|
||||
phys = <&serdes 32>;
|
||||
phy-mode = "10gbase-r";
|
||||
sfp = <&sfp_eth63>;
|
||||
managed = "in-band-status";
|
||||
microchip,sd-sgpio = <377>;
|
||||
};
|
||||
/* Finally the Management interface */
|
||||
port64: port@64 {
|
||||
reg = <64>;
|
||||
microchip,bandwidth = <1000>;
|
||||
phys = <&serdes 0>;
|
||||
phy-handle = <&phy64>;
|
||||
phy-mode = "sgmii";
|
||||
mac-address = [ 00 00 00 01 02 03 ];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
...
|
||||
# vim: set ts=2 sw=2 sts=2 tw=80 et cc=80 ft=yaml :
|
||||
@@ -27,6 +27,9 @@ properties:
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
clocks:
|
||||
maxItems: 1
|
||||
|
||||
wake-gpios:
|
||||
maxItems: 1
|
||||
description:
|
||||
@@ -80,6 +83,8 @@ examples:
|
||||
|
||||
en-gpios = <&gpf1 4 GPIO_ACTIVE_HIGH>;
|
||||
wake-gpios = <&gpj0 2 GPIO_ACTIVE_HIGH>;
|
||||
|
||||
clocks = <&rpmcc 20>;
|
||||
};
|
||||
};
|
||||
# UART example on Raspberry Pi
|
||||
|
||||
@@ -44,6 +44,7 @@ description:
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- qcom,msm8998-ipa
|
||||
- qcom,sc7180-ipa
|
||||
- qcom,sc7280-ipa
|
||||
- qcom,sdm845-ipa
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
Qualcomm Bluetooth Chips
|
||||
---------------------
|
||||
|
||||
This documents the binding structure and common properties for serial
|
||||
attached Qualcomm devices.
|
||||
|
||||
Serial attached Qualcomm devices shall be a child node of the host UART
|
||||
device the slave device is attached to.
|
||||
|
||||
Required properties:
|
||||
- compatible: should contain one of the following:
|
||||
* "qcom,qca6174-bt"
|
||||
* "qcom,qca9377-bt"
|
||||
* "qcom,wcn3990-bt"
|
||||
* "qcom,wcn3991-bt"
|
||||
* "qcom,wcn3998-bt"
|
||||
* "qcom,qca6390-bt"
|
||||
|
||||
Optional properties for compatible string qcom,qca6174-bt:
|
||||
|
||||
- enable-gpios: gpio specifier used to enable chip
|
||||
- clocks: clock provided to the controller (SUSCLK_32KHZ)
|
||||
- firmware-name: specify the name of nvm firmware to load
|
||||
|
||||
Optional properties for compatible string qcom,qca9377-bt:
|
||||
|
||||
- max-speed: see Documentation/devicetree/bindings/serial/serial.yaml
|
||||
|
||||
Required properties for compatible string qcom,wcn399x-bt:
|
||||
|
||||
- vddio-supply: VDD_IO supply regulator handle.
|
||||
- vddxo-supply: VDD_XO supply regulator handle.
|
||||
- vddrf-supply: VDD_RF supply regulator handle.
|
||||
- vddch0-supply: VDD_CH0 supply regulator handle.
|
||||
|
||||
Optional properties for compatible string qcom,wcn399x-bt:
|
||||
|
||||
- max-speed: see Documentation/devicetree/bindings/serial/serial.yaml
|
||||
- firmware-name: specify the name of nvm firmware to load
|
||||
- clocks: clock provided to the controller
|
||||
|
||||
Examples:
|
||||
|
||||
serial@7570000 {
|
||||
label = "BT-UART";
|
||||
status = "okay";
|
||||
|
||||
bluetooth {
|
||||
compatible = "qcom,qca6174-bt";
|
||||
|
||||
enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>;
|
||||
clocks = <&divclk4>;
|
||||
firmware-name = "nvm_00440302.bin";
|
||||
};
|
||||
};
|
||||
|
||||
serial@898000 {
|
||||
bluetooth {
|
||||
compatible = "qcom,wcn3990-bt";
|
||||
|
||||
vddio-supply = <&vreg_s4a_1p8>;
|
||||
vddxo-supply = <&vreg_l7a_1p8>;
|
||||
vddrf-supply = <&vreg_l17a_1p3>;
|
||||
vddch0-supply = <&vreg_l25a_3p3>;
|
||||
max-speed = <3200000>;
|
||||
firmware-name = "crnv21.bin";
|
||||
clocks = <&rpmhcc RPMH_RF_CLK2>;
|
||||
};
|
||||
};
|
||||
183
Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml
Normal file
183
Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml
Normal file
@@ -0,0 +1,183 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/net/qualcomm-bluetooth.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm Bluetooth Chips
|
||||
|
||||
maintainers:
|
||||
- Balakrishna Godavarthi <bgodavar@codeaurora.org>
|
||||
- Rocky Liao <rjliao@codeaurora.org>
|
||||
|
||||
description:
|
||||
This binding describes Qualcomm UART-attached bluetooth chips.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- qcom,qca6174-bt
|
||||
- qcom,qca9377-bt
|
||||
- qcom,wcn3990-bt
|
||||
- qcom,wcn3991-bt
|
||||
- qcom,wcn3998-bt
|
||||
- qcom,qca6390-bt
|
||||
- qcom,wcn6750-bt
|
||||
|
||||
enable-gpios:
|
||||
maxItems: 1
|
||||
description: gpio specifier used to enable chip
|
||||
|
||||
swctrl-gpios:
|
||||
maxItems: 1
|
||||
description: gpio specifier is used to find status
|
||||
of clock supply to SoC
|
||||
|
||||
clocks:
|
||||
maxItems: 1
|
||||
description: clock provided to the controller (SUSCLK_32KHZ)
|
||||
|
||||
vddio-supply:
|
||||
description: VDD_IO supply regulator handle
|
||||
|
||||
vddxo-supply:
|
||||
description: VDD_XO supply regulator handle
|
||||
|
||||
vddrf-supply:
|
||||
description: VDD_RF supply regulator handle
|
||||
|
||||
vddch0-supply:
|
||||
description: VDD_CH0 supply regulator handle
|
||||
|
||||
vddaon-supply:
|
||||
description: VDD_AON supply regulator handle
|
||||
|
||||
vddbtcxmx-supply:
|
||||
description: VDD_BT_CXMX supply regulator handle
|
||||
|
||||
vddrfacmn-supply:
|
||||
description: VDD_RFA_CMN supply regulator handle
|
||||
|
||||
vddrfa0p8-supply:
|
||||
description: VDD_RFA_0P8 suppply regulator handle
|
||||
|
||||
vddrfa1p7-supply:
|
||||
description: VDD_RFA_1P7 supply regulator handle
|
||||
|
||||
vddrfa1p2-supply:
|
||||
description: VDD_RFA_1P2 supply regulator handle
|
||||
|
||||
vddrfa2p2-supply:
|
||||
description: VDD_RFA_2P2 supply regulator handle
|
||||
|
||||
vddasd-supply:
|
||||
description: VDD_ASD supply regulator handle
|
||||
|
||||
max-speed:
|
||||
description: see Documentation/devicetree/bindings/serial/serial.yaml
|
||||
|
||||
firmware-name:
|
||||
description: specify the name of nvm firmware to load
|
||||
|
||||
local-bd-address:
|
||||
description: see Documentation/devicetree/bindings/net/bluetooth.txt
|
||||
|
||||
|
||||
required:
|
||||
- compatible
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
allOf:
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,qca6174-bt
|
||||
then:
|
||||
required:
|
||||
- enable-gpios
|
||||
- clocks
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,wcn3990-bt
|
||||
- qcom,wcn3991-bt
|
||||
- qcom,wcn3998-bt
|
||||
then:
|
||||
required:
|
||||
- vddio-supply
|
||||
- vddxo-supply
|
||||
- vddrf-supply
|
||||
- vddch0-supply
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- qcom,wcn6750-bt
|
||||
then:
|
||||
required:
|
||||
- enable-gpios
|
||||
- swctrl-gpios
|
||||
- vddio-supply
|
||||
- vddaon-supply
|
||||
- vddbtcxmx-supply
|
||||
- vddrfacmn-supply
|
||||
- vddrfa0p8-supply
|
||||
- vddrfa1p7-supply
|
||||
- vddrfa1p2-supply
|
||||
- vddasd-supply
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/gpio/gpio.h>
|
||||
serial {
|
||||
|
||||
bluetooth {
|
||||
compatible = "qcom,qca6174-bt";
|
||||
enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>;
|
||||
clocks = <&divclk4>;
|
||||
firmware-name = "nvm_00440302.bin";
|
||||
};
|
||||
};
|
||||
- |
|
||||
serial {
|
||||
|
||||
bluetooth {
|
||||
compatible = "qcom,wcn3990-bt";
|
||||
vddio-supply = <&vreg_s4a_1p8>;
|
||||
vddxo-supply = <&vreg_l7a_1p8>;
|
||||
vddrf-supply = <&vreg_l17a_1p3>;
|
||||
vddch0-supply = <&vreg_l25a_3p3>;
|
||||
max-speed = <3200000>;
|
||||
firmware-name = "crnv21.bin";
|
||||
};
|
||||
};
|
||||
- |
|
||||
serial {
|
||||
|
||||
bluetooth {
|
||||
compatible = "qcom,wcn6750-bt";
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&bt_en_default>;
|
||||
enable-gpios = <&tlmm 85 GPIO_ACTIVE_HIGH>;
|
||||
swctrl-gpios = <&tlmm 86 GPIO_ACTIVE_HIGH>;
|
||||
vddio-supply = <&vreg_l19b_1p8>;
|
||||
vddaon-supply = <&vreg_s7b_0p9>;
|
||||
vddbtcxmx-supply = <&vreg_s7b_0p9>;
|
||||
vddrfacmn-supply = <&vreg_s7b_0p9>;
|
||||
vddrfa0p8-supply = <&vreg_s7b_0p9>;
|
||||
vddrfa1p7-supply = <&vreg_s1b_1p8>;
|
||||
vddrfa1p2-supply = <&vreg_s8b_1p2>;
|
||||
vddrfa2p2-supply = <&vreg_s1c_2p2>;
|
||||
vddasd-supply = <&vreg_l11c_2p8>;
|
||||
max-speed = <3200000>;
|
||||
firmware-name = "msnv11.bin";
|
||||
};
|
||||
};
|
||||
45
Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml
Normal file
45
Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml
Normal file
@@ -0,0 +1,45 @@
|
||||
# SPDX-License-Identifier: GPL-2.0+
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/net/realtek,rtl82xx.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Realtek RTL82xx PHY
|
||||
|
||||
maintainers:
|
||||
- Andrew Lunn <andrew@lunn.ch>
|
||||
- Florian Fainelli <f.fainelli@gmail.com>
|
||||
- Heiner Kallweit <hkallweit1@gmail.com>
|
||||
|
||||
description:
|
||||
Bindings for Realtek RTL82xx PHYs
|
||||
|
||||
allOf:
|
||||
- $ref: ethernet-phy.yaml#
|
||||
|
||||
properties:
|
||||
realtek,clkout-disable:
|
||||
type: boolean
|
||||
description:
|
||||
Disable CLKOUT clock, CLKOUT clock default is enabled after hardware reset.
|
||||
|
||||
|
||||
realtek,aldps-enable:
|
||||
type: boolean
|
||||
description:
|
||||
Enable ALDPS mode, ALDPS mode default is disabled after hardware reset.
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
mdio {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
ethphy1: ethernet-phy@1 {
|
||||
reg = <1>;
|
||||
realtek,clkout-disable;
|
||||
realtek,aldps-enable;
|
||||
};
|
||||
};
|
||||
@@ -19,10 +19,12 @@ select:
|
||||
- rockchip,rk3128-gmac
|
||||
- rockchip,rk3228-gmac
|
||||
- rockchip,rk3288-gmac
|
||||
- rockchip,rk3308-gmac
|
||||
- rockchip,rk3328-gmac
|
||||
- rockchip,rk3366-gmac
|
||||
- rockchip,rk3368-gmac
|
||||
- rockchip,rk3399-gmac
|
||||
- rockchip,rk3568-gmac
|
||||
- rockchip,rv1108-gmac
|
||||
required:
|
||||
- compatible
|
||||
@@ -32,17 +34,23 @@ allOf:
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
oneOf:
|
||||
- items:
|
||||
- enum:
|
||||
- rockchip,px30-gmac
|
||||
- rockchip,rk3128-gmac
|
||||
- rockchip,rk3228-gmac
|
||||
- rockchip,rk3288-gmac
|
||||
- rockchip,rk3308-gmac
|
||||
- rockchip,rk3328-gmac
|
||||
- rockchip,rk3366-gmac
|
||||
- rockchip,rk3368-gmac
|
||||
- rockchip,rk3399-gmac
|
||||
- rockchip,rv1108-gmac
|
||||
- items:
|
||||
- enum:
|
||||
- rockchip,rk3568-gmac
|
||||
- const: snps,dwmac-4.20a
|
||||
|
||||
clocks:
|
||||
minItems: 5
|
||||
|
||||
@@ -51,11 +51,20 @@ properties:
|
||||
- allwinner,sun8i-r40-emac
|
||||
- allwinner,sun8i-v3s-emac
|
||||
- allwinner,sun50i-a64-emac
|
||||
- loongson,ls2k-dwmac
|
||||
- loongson,ls7a-dwmac
|
||||
- amlogic,meson6-dwmac
|
||||
- amlogic,meson8b-dwmac
|
||||
- amlogic,meson8m2-dwmac
|
||||
- amlogic,meson-gxbb-dwmac
|
||||
- amlogic,meson-axg-dwmac
|
||||
- loongson,ls2k-dwmac
|
||||
- loongson,ls7a-dwmac
|
||||
- ingenic,jz4775-mac
|
||||
- ingenic,x1000-mac
|
||||
- ingenic,x1600-mac
|
||||
- ingenic,x1830-mac
|
||||
- ingenic,x2000-mac
|
||||
- rockchip,px30-gmac
|
||||
- rockchip,rk3128-gmac
|
||||
- rockchip,rk3228-gmac
|
||||
@@ -310,6 +319,11 @@ allOf:
|
||||
- allwinner,sun8i-r40-emac
|
||||
- allwinner,sun8i-v3s-emac
|
||||
- allwinner,sun50i-a64-emac
|
||||
- ingenic,jz4775-mac
|
||||
- ingenic,x1000-mac
|
||||
- ingenic,x1600-mac
|
||||
- ingenic,x1830-mac
|
||||
- ingenic,x2000-mac
|
||||
- snps,dwxgmac
|
||||
- snps,dwxgmac-2.10
|
||||
- st,spear600-gmac
|
||||
@@ -353,6 +367,13 @@ allOf:
|
||||
- allwinner,sun8i-r40-emac
|
||||
- allwinner,sun8i-v3s-emac
|
||||
- allwinner,sun50i-a64-emac
|
||||
- loongson,ls2k-dwmac
|
||||
- loongson,ls7a-dwmac
|
||||
- ingenic,jz4775-mac
|
||||
- ingenic,x1000-mac
|
||||
- ingenic,x1600-mac
|
||||
- ingenic,x1830-mac
|
||||
- ingenic,x2000-mac
|
||||
- snps,dwmac-4.00
|
||||
- snps,dwmac-4.10a
|
||||
- snps,dwmac-4.20a
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: "http://devicetree.org/schemas/soc/microchip/microchip,polarfire-soc-sys-controller.yaml#"
|
||||
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
|
||||
|
||||
title: Microchip PolarFire SoC (MPFS) MSS (microprocessor subsystem) system controller
|
||||
|
||||
maintainers:
|
||||
- Conor Dooley <conor.dooley@microchip.com>
|
||||
|
||||
description: |
|
||||
The PolarFire SoC system controller is communicated with via a mailbox.
|
||||
This document describes the bindings for the client portion of that mailbox.
|
||||
|
||||
|
||||
properties:
|
||||
mboxes:
|
||||
maxItems: 1
|
||||
|
||||
compatible:
|
||||
const: microchip,polarfire-soc-sys-controller
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- mboxes
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
syscontroller: syscontroller {
|
||||
compatible = "microchip,polarfire-soc-sys-controller";
|
||||
mboxes = <&mbox 0>;
|
||||
};
|
||||
@@ -1,9 +1,8 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
.. |u8| replace:: :c:type:`u8 <u8>`
|
||||
.. |u16| replace:: :c:type:`u16 <u16>`
|
||||
.. |ssam_cdev_request| replace:: :c:type:`struct ssam_cdev_request <ssam_cdev_request>`
|
||||
.. |ssam_cdev_request_flags| replace:: :c:type:`enum ssam_cdev_request_flags <ssam_cdev_request_flags>`
|
||||
.. |ssam_cdev_event| replace:: :c:type:`struct ssam_cdev_event <ssam_cdev_event>`
|
||||
|
||||
==============================
|
||||
User-Space EC Interface (cdev)
|
||||
@@ -23,6 +22,40 @@ These IOCTLs and their respective input/output parameter structs are defined in
|
||||
A small python library and scripts for accessing this interface can be found
|
||||
at https://github.com/linux-surface/surface-aggregator-module/tree/master/scripts/ssam.
|
||||
|
||||
.. contents::
|
||||
|
||||
|
||||
Receiving Events
|
||||
================
|
||||
|
||||
Events can be received by reading from the device-file. The are represented by
|
||||
the |ssam_cdev_event| datatype.
|
||||
|
||||
Before events are available to be read, however, the desired notifiers must be
|
||||
registered via the ``SSAM_CDEV_NOTIF_REGISTER`` IOCTL. Notifiers are, in
|
||||
essence, callbacks, called when the EC sends an event. They are, in this
|
||||
interface, associated with a specific target category and device-file-instance.
|
||||
They forward any event of this category to the buffer of the corresponding
|
||||
instance, from which it can then be read.
|
||||
|
||||
Notifiers themselves do not enable events on the EC. Thus, it may additionally
|
||||
be necessary to enable events via the ``SSAM_CDEV_EVENT_ENABLE`` IOCTL. While
|
||||
notifiers work per-client (i.e. per-device-file-instance), events are enabled
|
||||
globally, for the EC and all of its clients (regardless of userspace or
|
||||
non-userspace). The ``SSAM_CDEV_EVENT_ENABLE`` and ``SSAM_CDEV_EVENT_DISABLE``
|
||||
IOCTLs take care of reference counting the events, such that an event is
|
||||
enabled as long as there is a client that has requested it.
|
||||
|
||||
Note that enabled events are not automatically disabled once the client
|
||||
instance is closed. Therefore any client process (or group of processes) should
|
||||
balance their event enable calls with the corresponding event disable calls. It
|
||||
is, however, perfectly valid to enable and disable events on different client
|
||||
instances. For example, it is valid to set up notifiers and read events on
|
||||
client instance ``A``, enable those events on instance ``B`` (note that these
|
||||
will also be received by A since events are enabled/disabled globally), and
|
||||
after no more events are desired, disable the previously enabled events via
|
||||
instance ``C``.
|
||||
|
||||
|
||||
Controller IOCTLs
|
||||
=================
|
||||
@@ -45,9 +78,33 @@ The following IOCTLs are provided:
|
||||
- ``REQUEST``
|
||||
- Perform synchronous SAM request.
|
||||
|
||||
* - ``0xA5``
|
||||
- ``2``
|
||||
- ``W``
|
||||
- ``NOTIF_REGISTER``
|
||||
- Register event notifier.
|
||||
|
||||
``REQUEST``
|
||||
-----------
|
||||
* - ``0xA5``
|
||||
- ``3``
|
||||
- ``W``
|
||||
- ``NOTIF_UNREGISTER``
|
||||
- Unregister event notifier.
|
||||
|
||||
* - ``0xA5``
|
||||
- ``4``
|
||||
- ``W``
|
||||
- ``EVENT_ENABLE``
|
||||
- Enable event source.
|
||||
|
||||
* - ``0xA5``
|
||||
- ``5``
|
||||
- ``W``
|
||||
- ``EVENT_DISABLE``
|
||||
- Disable event source.
|
||||
|
||||
|
||||
``SSAM_CDEV_REQUEST``
|
||||
---------------------
|
||||
|
||||
Defined as ``_IOWR(0xA5, 1, struct ssam_cdev_request)``.
|
||||
|
||||
@@ -82,6 +139,66 @@ submitted, and completed (i.e. handed back to user-space) successfully from
|
||||
inside the IOCTL, but the request ``status`` member may still be negative in
|
||||
case the actual execution of the request failed after it has been submitted.
|
||||
|
||||
A full definition of the argument struct is provided below:
|
||||
A full definition of the argument struct is provided below.
|
||||
|
||||
``SSAM_CDEV_NOTIF_REGISTER``
|
||||
----------------------------
|
||||
|
||||
Defined as ``_IOW(0xA5, 2, struct ssam_cdev_notifier_desc)``.
|
||||
|
||||
Register a notifier for the event target category specified in the given
|
||||
notifier description with the specified priority. Notifiers registration is
|
||||
required to receive events, but does not enable events themselves. After a
|
||||
notifier for a specific target category has been registered, all events of that
|
||||
category will be forwarded to the userspace client and can then be read from
|
||||
the device file instance. Note that events may have to be enabled, e.g. via the
|
||||
``SSAM_CDEV_EVENT_ENABLE`` IOCTL, before the EC will send them.
|
||||
|
||||
Only one notifier can be registered per target category and client instance. If
|
||||
a notifier has already been registered, this IOCTL will fail with ``-EEXIST``.
|
||||
|
||||
Notifiers will automatically be removed when the device file instance is
|
||||
closed.
|
||||
|
||||
``SSAM_CDEV_NOTIF_UNREGISTER``
|
||||
------------------------------
|
||||
|
||||
Defined as ``_IOW(0xA5, 3, struct ssam_cdev_notifier_desc)``.
|
||||
|
||||
Unregisters the notifier associated with the specified target category. The
|
||||
priority field will be ignored by this IOCTL. If no notifier has been
|
||||
registered for this client instance and the given category, this IOCTL will
|
||||
fail with ``-ENOENT``.
|
||||
|
||||
``SSAM_CDEV_EVENT_ENABLE``
|
||||
--------------------------
|
||||
|
||||
Defined as ``_IOW(0xA5, 4, struct ssam_cdev_event_desc)``.
|
||||
|
||||
Enable the event associated with the given event descriptor.
|
||||
|
||||
Note that this call will not register a notifier itself, it will only enable
|
||||
events on the controller. If you want to receive events by reading from the
|
||||
device file, you will need to register the corresponding notifier(s) on that
|
||||
instance.
|
||||
|
||||
Events are not automatically disabled when the device file is closed. This must
|
||||
be done manually, via a call to the ``SSAM_CDEV_EVENT_DISABLE`` IOCTL.
|
||||
|
||||
``SSAM_CDEV_EVENT_DISABLE``
|
||||
---------------------------
|
||||
|
||||
Defined as ``_IOW(0xA5, 5, struct ssam_cdev_event_desc)``.
|
||||
|
||||
Disable the event associated with the given event descriptor.
|
||||
|
||||
Note that this will not unregister any notifiers. Events may still be received
|
||||
and forwarded to user-space after this call. The only safe way of stopping
|
||||
events from being received is unregistering all previously registered
|
||||
notifiers.
|
||||
|
||||
|
||||
Structures and Enums
|
||||
====================
|
||||
|
||||
.. kernel-doc:: include/uapi/linux/surface_aggregator/cdev.h
|
||||
|
||||
@@ -4,14 +4,14 @@ Journal (jbd2)
|
||||
--------------
|
||||
|
||||
Introduced in ext3, the ext4 filesystem employs a journal to protect the
|
||||
filesystem against corruption in the case of a system crash. A small
|
||||
continuous region of disk (default 128MiB) is reserved inside the
|
||||
filesystem as a place to land “important” data writes on-disk as quickly
|
||||
as possible. Once the important data transaction is fully written to the
|
||||
disk and flushed from the disk write cache, a record of the data being
|
||||
committed is also written to the journal. At some later point in time,
|
||||
the journal code writes the transactions to their final locations on
|
||||
disk (this could involve a lot of seeking or a lot of small
|
||||
filesystem against metadata inconsistencies in the case of a system crash. Up
|
||||
to 10,240,000 file system blocks (see man mke2fs(8) for more details on journal
|
||||
size limits) can be reserved inside the filesystem as a place to land
|
||||
“important” data writes on-disk as quickly as possible. Once the important
|
||||
data transaction is fully written to the disk and flushed from the disk write
|
||||
cache, a record of the data being committed is also written to the journal. At
|
||||
some later point in time, the journal code writes the transactions to their
|
||||
final locations on disk (this could involve a lot of seeking or a lot of small
|
||||
read-write-erases) before erasing the commit record. Should the system
|
||||
crash during the second slow write, the journal can be replayed all the
|
||||
way to the latest commit record, guaranteeing the atomicity of whatever
|
||||
@@ -731,3 +731,26 @@ point, the refcount for inode 11 is not reliable, but that gets fixed by the
|
||||
replay of last inode 11 tag. Thus, by converting a non-idempotent procedure
|
||||
into a series of idempotent outcomes, fast commits ensured idempotence during
|
||||
the replay.
|
||||
|
||||
Journal Checkpoint
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Checkpointing the journal ensures all transactions and their associated buffers
|
||||
are submitted to the disk. In-progress transactions are waited upon and included
|
||||
in the checkpoint. Checkpointing is used internally during critical updates to
|
||||
the filesystem including journal recovery, filesystem resizing, and freeing of
|
||||
the journal_t structure.
|
||||
|
||||
A journal checkpoint can be triggered from userspace via the ioctl
|
||||
EXT4_IOC_CHECKPOINT. This ioctl takes a single, u64 argument for flags.
|
||||
Currently, three flags are supported. First, EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN
|
||||
can be used to verify input to the ioctl. It returns error if there is any
|
||||
invalid input, otherwise it returns success without performing
|
||||
any checkpointing. This can be used to check whether the ioctl exists on a
|
||||
system and to verify there are no issues with arguments or flags. The
|
||||
other two flags are EXT4_IOC_CHECKPOINT_FLAG_DISCARD and
|
||||
EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT. These flags cause the journal blocks to be
|
||||
discarded or zero-filled, respectively, after the journal checkpoint is
|
||||
complete. EXT4_IOC_CHECKPOINT_FLAG_DISCARD and EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT
|
||||
cannot both be set. The ioctl may be useful when snapshotting a system or for
|
||||
complying with content deletion SLOs.
|
||||
|
||||
@@ -480,7 +480,7 @@ prototypes::
|
||||
locking rules:
|
||||
|
||||
======================= ===================
|
||||
ops bd_mutex
|
||||
ops open_mutex
|
||||
======================= ===================
|
||||
open: yes
|
||||
release: yes
|
||||
|
||||
199
Documentation/firmware-guide/acpi/dsd/phy.rst
Normal file
199
Documentation/firmware-guide/acpi/dsd/phy.rst
Normal file
@@ -0,0 +1,199 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=========================
|
||||
MDIO bus and PHYs in ACPI
|
||||
=========================
|
||||
|
||||
The PHYs on an MDIO bus [1] are probed and registered using
|
||||
fwnode_mdiobus_register_phy().
|
||||
|
||||
Later, for connecting these PHYs to their respective MACs, the PHYs registered
|
||||
on the MDIO bus have to be referenced.
|
||||
|
||||
This document introduces two _DSD properties that are to be used
|
||||
for connecting PHYs on the MDIO bus [3] to the MAC layer.
|
||||
|
||||
These properties are defined in accordance with the "Device
|
||||
Properties UUID For _DSD" [2] document and the
|
||||
daffd814-6eba-4d8c-8a91-bc9bbf4aa301 UUID must be used in the Device
|
||||
Data Descriptors containing them.
|
||||
|
||||
phy-handle
|
||||
----------
|
||||
For each MAC node, a device property "phy-handle" is used to reference
|
||||
the PHY that is registered on an MDIO bus. This is mandatory for
|
||||
network interfaces that have PHYs connected to MAC via MDIO bus.
|
||||
|
||||
During the MDIO bus driver initialization, PHYs on this bus are probed
|
||||
using the _ADR object as shown below and are registered on the MDIO bus.
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Scope(\_SB.MDI0)
|
||||
{
|
||||
Device(PHY1) {
|
||||
Name (_ADR, 0x1)
|
||||
} // end of PHY1
|
||||
|
||||
Device(PHY2) {
|
||||
Name (_ADR, 0x2)
|
||||
} // end of PHY2
|
||||
}
|
||||
|
||||
Later, during the MAC driver initialization, the registered PHY devices
|
||||
have to be retrieved from the MDIO bus. For this, the MAC driver needs
|
||||
references to the previously registered PHYs which are provided
|
||||
as device object references (e.g. \_SB.MDI0.PHY1).
|
||||
|
||||
phy-mode
|
||||
--------
|
||||
The "phy-mode" _DSD property is used to describe the connection to
|
||||
the PHY. The valid values for "phy-mode" are defined in [4].
|
||||
|
||||
managed
|
||||
-------
|
||||
Optional property, which specifies the PHY management type.
|
||||
The valid values for "managed" are defined in [4].
|
||||
|
||||
fixed-link
|
||||
----------
|
||||
The "fixed-link" is described by a data-only subnode of the
|
||||
MAC port, which is linked in the _DSD package via
|
||||
hierarchical data extension (UUID dbb8e3e6-5886-4ba6-8795-1319f52a966b
|
||||
in accordance with [5] "_DSD Implementation Guide" document).
|
||||
The subnode should comprise a required property ("speed") and
|
||||
possibly the optional ones - complete list of parameters and
|
||||
their values are specified in [4].
|
||||
|
||||
The following ASL example illustrates the usage of these properties.
|
||||
|
||||
DSDT entry for MDIO node
|
||||
------------------------
|
||||
|
||||
The MDIO bus has an SoC component (MDIO controller) and a platform
|
||||
component (PHYs on the MDIO bus).
|
||||
|
||||
a) Silicon Component
|
||||
This node describes the MDIO controller, MDI0
|
||||
---------------------------------------------
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Scope(_SB)
|
||||
{
|
||||
Device(MDI0) {
|
||||
Name(_HID, "NXP0006")
|
||||
Name(_CCA, 1)
|
||||
Name(_UID, 0)
|
||||
Name(_CRS, ResourceTemplate() {
|
||||
Memory32Fixed(ReadWrite, MDI0_BASE, MDI_LEN)
|
||||
Interrupt(ResourceConsumer, Level, ActiveHigh, Shared)
|
||||
{
|
||||
MDI0_IT
|
||||
}
|
||||
}) // end of _CRS for MDI0
|
||||
} // end of MDI0
|
||||
}
|
||||
|
||||
b) Platform Component
|
||||
The PHY1 and PHY2 nodes represent the PHYs connected to MDIO bus MDI0
|
||||
---------------------------------------------------------------------
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Scope(\_SB.MDI0)
|
||||
{
|
||||
Device(PHY1) {
|
||||
Name (_ADR, 0x1)
|
||||
} // end of PHY1
|
||||
|
||||
Device(PHY2) {
|
||||
Name (_ADR, 0x2)
|
||||
} // end of PHY2
|
||||
}
|
||||
|
||||
DSDT entries representing MAC nodes
|
||||
-----------------------------------
|
||||
|
||||
Below are the MAC nodes where PHY nodes are referenced.
|
||||
phy-mode and phy-handle are used as explained earlier.
|
||||
------------------------------------------------------
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Scope(\_SB.MCE0.PR17)
|
||||
{
|
||||
Name (_DSD, Package () {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package (2) {"phy-mode", "rgmii-id"},
|
||||
Package (2) {"phy-handle", \_SB.MDI0.PHY1}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Scope(\_SB.MCE0.PR18)
|
||||
{
|
||||
Name (_DSD, Package () {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package (2) {"phy-mode", "rgmii-id"},
|
||||
Package (2) {"phy-handle", \_SB.MDI0.PHY2}}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
MAC node example where "managed" property is specified.
|
||||
-------------------------------------------------------
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Scope(\_SB.PP21.ETH0)
|
||||
{
|
||||
Name (_DSD, Package () {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () {"phy-mode", "sgmii"},
|
||||
Package () {"managed", "in-band-status"}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
MAC node example with a "fixed-link" subnode.
|
||||
---------------------------------------------
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Scope(\_SB.PP21.ETH1)
|
||||
{
|
||||
Name (_DSD, Package () {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () {"phy-mode", "sgmii"},
|
||||
},
|
||||
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
|
||||
Package () {
|
||||
Package () {"fixed-link", "LNK0"}
|
||||
}
|
||||
})
|
||||
Name (LNK0, Package(){ // Data-only subnode of port
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () {"speed", 1000},
|
||||
Package () {"full-duplex", 1}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
[1] Documentation/networking/phy.rst
|
||||
|
||||
[2] https://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
|
||||
|
||||
[3] Documentation/firmware-guide/acpi/DSD-properties-rules.rst
|
||||
|
||||
[4] Documentation/devicetree/bindings/net/ethernet-controller.yaml
|
||||
|
||||
[5] https://github.com/UEFI/DSD-Guide/blob/main/dsd-guide.pdf
|
||||
@@ -11,6 +11,7 @@ ACPI Support
|
||||
dsd/graph
|
||||
dsd/data-node-references
|
||||
dsd/leds
|
||||
dsd/phy
|
||||
enumeration
|
||||
osi
|
||||
method-customizing
|
||||
|
||||
@@ -27,32 +27,134 @@ these MAP frames and send them to appropriate PDN's.
|
||||
2. Packet format
|
||||
================
|
||||
|
||||
a. MAP packet (data / control)
|
||||
a. MAP packet v1 (data / control)
|
||||
|
||||
MAP header has the same endianness of the IP packet.
|
||||
MAP header fields are in big endian format.
|
||||
|
||||
Packet format::
|
||||
|
||||
Bit 0 1 2-7 8 - 15 16 - 31
|
||||
Bit 0 1 2-7 8-15 16-31
|
||||
Function Command / Data Reserved Pad Multiplexer ID Payload length
|
||||
Bit 32 - x
|
||||
Function Raw Bytes
|
||||
|
||||
Bit 32-x
|
||||
Function Raw bytes
|
||||
|
||||
Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command
|
||||
or data packet. Control packet is used for transport level flow control. Data
|
||||
or data packet. Command packet is used for transport level flow control. Data
|
||||
packets are standard IP packets.
|
||||
|
||||
Reserved bits are usually zeroed out and to be ignored by receiver.
|
||||
Reserved bits must be zero when sent and ignored when received.
|
||||
|
||||
Padding is number of bytes to be added for 4 byte alignment if required by
|
||||
hardware.
|
||||
Padding is the number of bytes to be appended to the payload to
|
||||
ensure 4 byte alignment.
|
||||
|
||||
Multiplexer ID is to indicate the PDN on which data has to be sent.
|
||||
|
||||
Payload length includes the padding length but does not include MAP header
|
||||
length.
|
||||
|
||||
b. MAP packet (command specific)::
|
||||
b. Map packet v4 (data / control)
|
||||
|
||||
MAP header fields are in big endian format.
|
||||
|
||||
Packet format::
|
||||
|
||||
Bit 0 1 2-7 8-15 16-31
|
||||
Function Command / Data Reserved Pad Multiplexer ID Payload length
|
||||
|
||||
Bit 32-(x-33) (x-32)-x
|
||||
Function Raw bytes Checksum offload header
|
||||
|
||||
Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command
|
||||
or data packet. Command packet is used for transport level flow control. Data
|
||||
packets are standard IP packets.
|
||||
|
||||
Reserved bits must be zero when sent and ignored when received.
|
||||
|
||||
Padding is the number of bytes to be appended to the payload to
|
||||
ensure 4 byte alignment.
|
||||
|
||||
Multiplexer ID is to indicate the PDN on which data has to be sent.
|
||||
|
||||
Payload length includes the padding length but does not include MAP header
|
||||
length.
|
||||
|
||||
Checksum offload header, has the information about the checksum processing done
|
||||
by the hardware.Checksum offload header fields are in big endian format.
|
||||
|
||||
Packet format::
|
||||
|
||||
Bit 0-14 15 16-31
|
||||
Function Reserved Valid Checksum start offset
|
||||
|
||||
Bit 31-47 48-64
|
||||
Function Checksum length Checksum value
|
||||
|
||||
Reserved bits must be zero when sent and ignored when received.
|
||||
|
||||
Valid bit indicates whether the partial checksum is calculated and is valid.
|
||||
Set to 1, if its is valid. Set to 0 otherwise.
|
||||
|
||||
Padding is the number of bytes to be appended to the payload to
|
||||
ensure 4 byte alignment.
|
||||
|
||||
Checksum start offset, Indicates the offset in bytes from the beginning of the
|
||||
IP header, from which modem computed checksum.
|
||||
|
||||
Checksum length is the Length in bytes starting from CKSUM_START_OFFSET,
|
||||
over which checksum is computed.
|
||||
|
||||
Checksum value, indicates the checksum computed.
|
||||
|
||||
c. MAP packet v5 (data / control)
|
||||
|
||||
MAP header fields are in big endian format.
|
||||
|
||||
Packet format::
|
||||
|
||||
Bit 0 1 2-7 8-15 16-31
|
||||
Function Command / Data Next header Pad Multiplexer ID Payload length
|
||||
|
||||
Bit 32-x
|
||||
Function Raw bytes
|
||||
|
||||
Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command
|
||||
or data packet. Command packet is used for transport level flow control. Data
|
||||
packets are standard IP packets.
|
||||
|
||||
Next header is used to indicate the presence of another header, currently is
|
||||
limited to checksum header.
|
||||
|
||||
Padding is the number of bytes to be appended to the payload to
|
||||
ensure 4 byte alignment.
|
||||
|
||||
Multiplexer ID is to indicate the PDN on which data has to be sent.
|
||||
|
||||
Payload length includes the padding length but does not include MAP header
|
||||
length.
|
||||
|
||||
d. Checksum offload header v5
|
||||
|
||||
Checksum offload header fields are in big endian format.
|
||||
|
||||
Bit 0 - 6 7 8-15 16-31
|
||||
Function Header Type Next Header Checksum Valid Reserved
|
||||
|
||||
Header Type is to indicate the type of header, this usually is set to CHECKSUM
|
||||
|
||||
Header types
|
||||
= ==========================================
|
||||
0 Reserved
|
||||
1 Reserved
|
||||
2 checksum header
|
||||
|
||||
Checksum Valid is to indicate whether the header checksum is valid. Value of 1
|
||||
implies that checksum is calculated on this packet and is valid, value of 0
|
||||
indicates that the calculated packet checksum is invalid.
|
||||
|
||||
Reserved bits must be zero when sent and ignored when received.
|
||||
|
||||
e. MAP packet v1/v5 (command specific)::
|
||||
|
||||
Bit 0 1 2-7 8 - 15 16 - 31
|
||||
Function Command Reserved Pad Multiplexer ID Payload length
|
||||
@@ -74,7 +176,7 @@ Command types
|
||||
3 is for error during processing of commands
|
||||
= ==========================================
|
||||
|
||||
c. Aggregation
|
||||
f. Aggregation
|
||||
|
||||
Aggregation is multiple MAP packets (can be data or command) delivered to
|
||||
rmnet in a single linear skb. rmnet will process the individual
|
||||
|
||||
@@ -11,12 +11,12 @@ ENA is a networking interface designed to make good use of modern CPU
|
||||
features and system architectures.
|
||||
|
||||
The ENA device exposes a lightweight management interface with a
|
||||
minimal set of memory mapped registers and extendable command set
|
||||
minimal set of memory mapped registers and extendible command set
|
||||
through an Admin Queue.
|
||||
|
||||
The driver supports a range of ENA devices, is link-speed independent
|
||||
(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has
|
||||
a negotiated and extendable feature set.
|
||||
(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc), and has
|
||||
a negotiated and extendible feature set.
|
||||
|
||||
Some ENA devices support SR-IOV. This driver is used for both the
|
||||
SR-IOV Physical Function (PF) and Virtual Function (VF) devices.
|
||||
@@ -27,9 +27,9 @@ is advertised by the device via the Admin Queue), a dedicated MSI-X
|
||||
interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation,
|
||||
and CPU cacheline optimized data placement.
|
||||
|
||||
The ENA driver supports industry standard TCP/IP offload features such
|
||||
as checksum offload and TCP transmit segmentation offload (TSO).
|
||||
Receive-side scaling (RSS) is supported for multi-core scaling.
|
||||
The ENA driver supports industry standard TCP/IP offload features such as
|
||||
checksum offload. Receive-side scaling (RSS) is supported for multi-core
|
||||
scaling.
|
||||
|
||||
The ENA driver and its corresponding devices implement health
|
||||
monitoring mechanisms such as watchdog, enabling the device and driver
|
||||
@@ -38,7 +38,6 @@ debug logs.
|
||||
|
||||
Some of the ENA devices support a working mode called Low-latency
|
||||
Queue (LLQ), which saves several more microseconds.
|
||||
|
||||
ENA Source Code Directory Structure
|
||||
===================================
|
||||
|
||||
@@ -53,7 +52,6 @@ ena_eth_io_defs.h Definition of ENA data path interface.
|
||||
ena_common_defs.h Common definitions for ena_com layer.
|
||||
ena_regs_defs.h Definition of ENA PCI memory-mapped (MMIO) registers.
|
||||
ena_netdev.[ch] Main Linux kernel driver.
|
||||
ena_syfsfs.[ch] Sysfs files.
|
||||
ena_ethtool.c ethtool callbacks.
|
||||
ena_pci_id_tbl.h Supported device IDs.
|
||||
================= ======================================================
|
||||
@@ -69,7 +67,7 @@ ENA management interface is exposed by means of:
|
||||
- Asynchronous Event Notification Queue (AENQ)
|
||||
|
||||
ENA device MMIO Registers are accessed only during driver
|
||||
initialization and are not involved in further normal device
|
||||
initialization and are not used during further normal device
|
||||
operation.
|
||||
|
||||
AQ is used for submitting management commands, and the
|
||||
@@ -100,28 +98,27 @@ group may have multiple syndromes, as shown below
|
||||
|
||||
The events are:
|
||||
|
||||
==================== ===============
|
||||
Group Syndrome
|
||||
==================== ===============
|
||||
Link state change **X**
|
||||
Fatal error **X**
|
||||
Notification Suspend traffic
|
||||
Notification Resume traffic
|
||||
Keep-Alive **X**
|
||||
==================== ===============
|
||||
==================== ===============
|
||||
Group Syndrome
|
||||
==================== ===============
|
||||
Link state change **X**
|
||||
Fatal error **X**
|
||||
Notification Suspend traffic
|
||||
Notification Resume traffic
|
||||
Keep-Alive **X**
|
||||
==================== ===============
|
||||
|
||||
ACQ and AENQ share the same MSI-X vector.
|
||||
|
||||
Keep-Alive is a special mechanism that allows monitoring of the
|
||||
device's health. The driver maintains a watchdog (WD) handler which,
|
||||
if fired, logs the current state and statistics then resets and
|
||||
restarts the ENA device and driver. A Keep-Alive event is delivered by
|
||||
the device every second. The driver re-arms the WD upon reception of a
|
||||
Keep-Alive event. A missed Keep-Alive event causes the WD handler to
|
||||
fire.
|
||||
Keep-Alive is a special mechanism that allows monitoring the device's health.
|
||||
A Keep-Alive event is delivered by the device every second.
|
||||
The driver maintains a watchdog (WD) handler which logs the current state and
|
||||
statistics. If the keep-alive events aren't delivered as expected the WD resets
|
||||
the device and the driver.
|
||||
|
||||
Data Path Interface
|
||||
===================
|
||||
|
||||
I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx
|
||||
SQ correspondingly). Each SQ has a completion queue (CQ) associated
|
||||
with it.
|
||||
@@ -131,26 +128,24 @@ physical memory.
|
||||
|
||||
The ENA driver supports two Queue Operation modes for Tx SQs:
|
||||
|
||||
- Regular mode
|
||||
|
||||
* In this mode the Tx SQs reside in the host's memory. The ENA
|
||||
- **Regular mode:**
|
||||
In this mode the Tx SQs reside in the host's memory. The ENA
|
||||
device fetches the ENA Tx descriptors and packet data from host
|
||||
memory.
|
||||
|
||||
- Low Latency Queue (LLQ) mode or "push-mode".
|
||||
|
||||
* In this mode the driver pushes the transmit descriptors and the
|
||||
- **Low Latency Queue (LLQ) mode or "push-mode":**
|
||||
In this mode the driver pushes the transmit descriptors and the
|
||||
first 128 bytes of the packet directly to the ENA device memory
|
||||
space. The rest of the packet payload is fetched by the
|
||||
device. For this operation mode, the driver uses a dedicated PCI
|
||||
device memory BAR, which is mapped with write-combine capability.
|
||||
|
||||
The Rx SQs support only the regular mode.
|
||||
|
||||
Note: Not all ENA devices support LLQ, and this feature is negotiated
|
||||
**Note that** not all ENA devices support LLQ, and this feature is negotiated
|
||||
with the device upon initialization. If the ENA device does not
|
||||
support LLQ mode, the driver falls back to the regular mode.
|
||||
|
||||
The Rx SQs support only the regular mode.
|
||||
|
||||
The driver supports multi-queue for both Tx and Rx. This has various
|
||||
benefits:
|
||||
|
||||
@@ -165,6 +160,7 @@ benefits:
|
||||
|
||||
Interrupt Modes
|
||||
===============
|
||||
|
||||
The driver assigns a single MSI-X vector per queue pair (for both Tx
|
||||
and Rx directions). The driver assigns an additional dedicated MSI-X vector
|
||||
for management (for ACQ and AENQ).
|
||||
@@ -190,20 +186,21 @@ unmasked by the driver after NAPI processing is complete.
|
||||
|
||||
Interrupt Moderation
|
||||
====================
|
||||
|
||||
ENA driver and device can operate in conventional or adaptive interrupt
|
||||
moderation mode.
|
||||
|
||||
In conventional mode the driver instructs device to postpone interrupt
|
||||
**In conventional mode** the driver instructs device to postpone interrupt
|
||||
posting according to static interrupt delay value. The interrupt delay
|
||||
value can be configured through ethtool(8). The following ethtool
|
||||
parameters are supported by the driver: tx-usecs, rx-usecs
|
||||
value can be configured through `ethtool(8)`. The following `ethtool`
|
||||
parameters are supported by the driver: ``tx-usecs``, ``rx-usecs``
|
||||
|
||||
In adaptive interrupt moderation mode the interrupt delay value is
|
||||
**In adaptive interrupt** moderation mode the interrupt delay value is
|
||||
updated by the driver dynamically and adjusted every NAPI cycle
|
||||
according to the traffic nature.
|
||||
|
||||
Adaptive coalescing can be switched on/off through ethtool(8)
|
||||
adaptive_rx on|off parameter.
|
||||
Adaptive coalescing can be switched on/off through `ethtool(8)`'s
|
||||
:code:`adaptive_rx on|off` parameter.
|
||||
|
||||
More information about Adaptive Interrupt Moderation (DIM) can be found in
|
||||
Documentation/networking/net_dim.rst
|
||||
@@ -214,17 +211,10 @@ The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK
|
||||
and can be configured by the ETHTOOL_STUNABLE command of the
|
||||
SIOCETHTOOL ioctl.
|
||||
|
||||
SKB
|
||||
===
|
||||
The driver-allocated SKB for frames received from Rx handling using
|
||||
NAPI context. The allocation method depends on the size of the packet.
|
||||
If the frame length is larger than rx_copybreak, napi_get_frags()
|
||||
is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer
|
||||
content is copied (by CPU) to the SKB, and the buffer is recycled.
|
||||
|
||||
Statistics
|
||||
==========
|
||||
The user can obtain ENA device and driver statistics using ethtool.
|
||||
|
||||
The user can obtain ENA device and driver statistics using `ethtool`.
|
||||
The driver can collect regular or extended statistics (including
|
||||
per-queue stats) from the device.
|
||||
|
||||
@@ -232,22 +222,23 @@ In addition the driver logs the stats to syslog upon device reset.
|
||||
|
||||
MTU
|
||||
===
|
||||
|
||||
The driver supports an arbitrarily large MTU with a maximum that is
|
||||
negotiated with the device. The driver configures MTU using the
|
||||
SetFeature command (ENA_ADMIN_MTU property). The user can change MTU
|
||||
via ip(8) and similar legacy tools.
|
||||
via `ip(8)` and similar legacy tools.
|
||||
|
||||
Stateless Offloads
|
||||
==================
|
||||
|
||||
The ENA driver supports:
|
||||
|
||||
- TSO over IPv4/IPv6
|
||||
- TSO with ECN
|
||||
- IPv4 header checksum offload
|
||||
- TCP/UDP over IPv4/IPv6 checksum offloads
|
||||
|
||||
RSS
|
||||
===
|
||||
|
||||
- The ENA device supports RSS that allows flexible Rx traffic
|
||||
steering.
|
||||
- Toeplitz and CRC32 hash functions are supported.
|
||||
@@ -260,41 +251,42 @@ RSS
|
||||
function delivered in the Rx CQ descriptor is set in the received
|
||||
SKB.
|
||||
- The user can provide a hash key, hash function, and configure the
|
||||
indirection table through ethtool(8).
|
||||
indirection table through `ethtool(8)`.
|
||||
|
||||
DATA PATH
|
||||
=========
|
||||
|
||||
Tx
|
||||
--
|
||||
|
||||
ena_start_xmit() is called by the stack. This function does the following:
|
||||
:code:`ena_start_xmit()` is called by the stack. This function does the following:
|
||||
|
||||
- Maps data buffers (skb->data and frags).
|
||||
- Populates ena_buf for the push buffer (if the driver and device are
|
||||
in push mode.)
|
||||
- Maps data buffers (``skb->data`` and frags).
|
||||
- Populates ``ena_buf`` for the push buffer (if the driver and device are
|
||||
in push mode).
|
||||
- Prepares ENA bufs for the remaining frags.
|
||||
- Allocates a new request ID from the empty req_id ring. The request
|
||||
- Allocates a new request ID from the empty ``req_id`` ring. The request
|
||||
ID is the index of the packet in the Tx info. This is used for
|
||||
out-of-order TX completions.
|
||||
out-of-order Tx completions.
|
||||
- Adds the packet to the proper place in the Tx ring.
|
||||
- Calls ena_com_prepare_tx(), an ENA communication layer that converts
|
||||
the ena_bufs to ENA descriptors (and adds meta ENA descriptors as
|
||||
needed.)
|
||||
- Calls :code:`ena_com_prepare_tx()`, an ENA communication layer that converts
|
||||
the ``ena_bufs`` to ENA descriptors (and adds meta ENA descriptors as
|
||||
needed).
|
||||
|
||||
* This function also copies the ENA descriptors and the push buffer
|
||||
to the Device memory space (if in push mode.)
|
||||
to the Device memory space (if in push mode).
|
||||
|
||||
- Writes doorbell to the ENA device.
|
||||
- Writes a doorbell to the ENA device.
|
||||
- When the ENA device finishes sending the packet, a completion
|
||||
interrupt is raised.
|
||||
- The interrupt handler schedules NAPI.
|
||||
- The ena_clean_tx_irq() function is called. This function handles the
|
||||
- The :code:`ena_clean_tx_irq()` function is called. This function handles the
|
||||
completion descriptors generated by the ENA, with a single
|
||||
completion descriptor per completed packet.
|
||||
|
||||
* req_id is retrieved from the completion descriptor. The tx_info of
|
||||
the packet is retrieved via the req_id. The data buffers are
|
||||
unmapped and req_id is returned to the empty req_id ring.
|
||||
* ``req_id`` is retrieved from the completion descriptor. The ``tx_info`` of
|
||||
the packet is retrieved via the ``req_id``. The data buffers are
|
||||
unmapped and ``req_id`` is returned to the empty ``req_id`` ring.
|
||||
* The function stops when the completion descriptors are completed or
|
||||
the budget is reached.
|
||||
|
||||
@@ -303,12 +295,11 @@ Rx
|
||||
|
||||
- When a packet is received from the ENA device.
|
||||
- The interrupt handler schedules NAPI.
|
||||
- The ena_clean_rx_irq() function is called. This function calls
|
||||
ena_rx_pkt(), an ENA communication layer function, which returns the
|
||||
number of descriptors used for a new unhandled packet, and zero if
|
||||
- The :code:`ena_clean_rx_irq()` function is called. This function calls
|
||||
:code:`ena_com_rx_pkt()`, an ENA communication layer function, which returns the
|
||||
number of descriptors used for a new packet, and zero if
|
||||
no new packet is found.
|
||||
- Then it calls the ena_clean_rx_irq() function.
|
||||
- ena_eth_rx_skb() checks packet length:
|
||||
- :code:`ena_rx_skb()` checks packet length:
|
||||
|
||||
* If the packet is small (len < rx_copybreak), the driver allocates
|
||||
a SKB for the new packet, and copies the packet payload into the
|
||||
@@ -317,9 +308,10 @@ Rx
|
||||
- In this way the original data buffer is not passed to the stack
|
||||
and is reused for future Rx packets.
|
||||
|
||||
* Otherwise the function unmaps the Rx buffer, then allocates the
|
||||
new SKB structure and hooks the Rx buffer to the SKB frags.
|
||||
* Otherwise the function unmaps the Rx buffer, sets the first
|
||||
descriptor as `skb`'s linear part and the other descriptors as the
|
||||
`skb`'s frags.
|
||||
|
||||
- The new SKB is updated with the necessary information (protocol,
|
||||
checksum hw verify result, etc.), and then passed to the network
|
||||
stack, using the NAPI interface function napi_gro_receive().
|
||||
checksum hw verify result, etc), and then passed to the network
|
||||
stack, using the NAPI interface function :code:`napi_gro_receive()`.
|
||||
|
||||
@@ -47,13 +47,24 @@ The driver interacts with the device in the following ways:
|
||||
- Transmit and Receive Queues
|
||||
- See description below
|
||||
|
||||
Descriptor Formats
|
||||
------------------
|
||||
GVE supports two descriptor formats: GQI and DQO. These two formats have
|
||||
entirely different descriptors, which will be described below.
|
||||
|
||||
Registers
|
||||
---------
|
||||
All registers are MMIO and big endian.
|
||||
All registers are MMIO.
|
||||
|
||||
The registers are used for initializing and configuring the device as well as
|
||||
querying device status in response to management interrupts.
|
||||
|
||||
Endianness
|
||||
----------
|
||||
- Admin Queue messages and registers are all Big Endian.
|
||||
- GQI descriptors and datapath registers are Big Endian.
|
||||
- DQO descriptors and datapath registers are Little Endian.
|
||||
|
||||
Admin Queue (AQ)
|
||||
----------------
|
||||
The Admin Queue is a PAGE_SIZE memory block, treated as an array of AQ
|
||||
@@ -97,10 +108,10 @@ the queues associated with that interrupt.
|
||||
The handler for these irqs schedule the napi for that block to run
|
||||
and poll the queues.
|
||||
|
||||
Traffic Queues
|
||||
--------------
|
||||
gVNIC's queues are composed of a descriptor ring and a buffer and are
|
||||
assigned to a notification block.
|
||||
GQI Traffic Queues
|
||||
------------------
|
||||
GQI queues are composed of a descriptor ring and a buffer and are assigned to a
|
||||
notification block.
|
||||
|
||||
The descriptor rings are power-of-two-sized ring buffers consisting of
|
||||
fixed-size descriptors. They advance their head pointer using a __be32
|
||||
@@ -121,3 +132,35 @@ Receive
|
||||
The buffers for receive rings are put into a data ring that is the same
|
||||
length as the descriptor ring and the head and tail pointers advance over
|
||||
the rings together.
|
||||
|
||||
DQO Traffic Queues
|
||||
------------------
|
||||
- Every TX and RX queue is assigned a notification block.
|
||||
|
||||
- TX and RX buffers queues, which send descriptors to the device, use MMIO
|
||||
doorbells to notify the device of new descriptors.
|
||||
|
||||
- RX and TX completion queues, which receive descriptors from the device, use a
|
||||
"generation bit" to know when a descriptor was populated by the device. The
|
||||
driver initializes all bits with the "current generation". The device will
|
||||
populate received descriptors with the "next generation" which is inverted
|
||||
from the current generation. When the ring wraps, the current/next generation
|
||||
are swapped.
|
||||
|
||||
- It's the driver's responsibility to ensure that the RX and TX completion
|
||||
queues are not overrun. This can be accomplished by limiting the number of
|
||||
descriptors posted to HW.
|
||||
|
||||
- TX packets have a 16 bit completion_tag and RX buffers have a 16 bit
|
||||
buffer_id. These will be returned on the TX completion and RX queues
|
||||
respectively to let the driver know which packet/buffer was completed.
|
||||
|
||||
Transmit
|
||||
~~~~~~~~
|
||||
A packet's buffers are DMA mapped for the device to access before transmission.
|
||||
After the packet was successfully transmitted, the buffers are unmapped.
|
||||
|
||||
Receive
|
||||
~~~~~~~
|
||||
The driver posts fixed sized buffers to HW on the RX buffer queue. The packet
|
||||
received on the associated RX queue may span multiple descriptors.
|
||||
|
||||
@@ -12,6 +12,7 @@ Contents
|
||||
- `Enabling the driver and kconfig options`_
|
||||
- `Devlink info`_
|
||||
- `Devlink parameters`_
|
||||
- `Bridge offload`_
|
||||
- `mlx5 subfunction`_
|
||||
- `mlx5 function attributes`_
|
||||
- `Devlink health reporters`_
|
||||
@@ -217,6 +218,37 @@ users try to enable them.
|
||||
|
||||
$ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
|
||||
|
||||
Bridge offload
|
||||
==============
|
||||
The mlx5 driver implements support for offloading bridge rules when in switchdev
|
||||
mode. Linux bridge FDBs are automatically offloaded when mlx5 switchdev
|
||||
representor is attached to bridge.
|
||||
|
||||
- Change device to switchdev mode::
|
||||
|
||||
$ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
|
||||
|
||||
- Attach mlx5 switchdev representor 'enp8s0f0' to bridge netdev 'bridge1'::
|
||||
|
||||
$ ip link set enp8s0f0 master bridge1
|
||||
|
||||
VLANs
|
||||
-----
|
||||
Following bridge VLAN functions are supported by mlx5:
|
||||
|
||||
- VLAN filtering (including multiple VLANs per port)::
|
||||
|
||||
$ ip link set bridge1 type bridge vlan_filtering 1
|
||||
$ bridge vlan add dev enp8s0f0 vid 2-3
|
||||
|
||||
- VLAN push on bridge ingress::
|
||||
|
||||
$ bridge vlan add dev enp8s0f0 vid 3 pvid
|
||||
|
||||
- VLAN pop on bridge egress::
|
||||
|
||||
$ bridge vlan add dev enp8s0f0 vid 3 untagged
|
||||
|
||||
mlx5 subfunction
|
||||
================
|
||||
mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>`) interface.
|
||||
@@ -568,3 +600,59 @@ tc and eswitch offloads tracepoints:
|
||||
$ cat /sys/kernel/debug/tracing/trace
|
||||
...
|
||||
kworker/u48:7-2221 [009] ...1 1475.387435: mlx5e_rep_neigh_update: netdev: ens1f0 MAC: 24:8a:07:9a:17:9a IPv4: 1.1.1.10 IPv6: ::ffff:1.1.1.10 neigh_connected=1
|
||||
|
||||
Bridge offloads tracepoints:
|
||||
|
||||
- mlx5_esw_bridge_fdb_entry_init: trace bridge FDB entry offloaded to mlx5::
|
||||
|
||||
$ echo mlx5:mlx5_esw_bridge_fdb_entry_init >> set_event
|
||||
$ cat /sys/kernel/debug/tracing/trace
|
||||
...
|
||||
kworker/u20:9-2217 [003] ...1 318.582243: mlx5_esw_bridge_fdb_entry_init: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=0 flags=0 used=0
|
||||
|
||||
- mlx5_esw_bridge_fdb_entry_cleanup: trace bridge FDB entry deleted from mlx5::
|
||||
|
||||
$ echo mlx5:mlx5_esw_bridge_fdb_entry_cleanup >> set_event
|
||||
$ cat /sys/kernel/debug/tracing/trace
|
||||
...
|
||||
ip-2581 [005] ...1 318.629871: mlx5_esw_bridge_fdb_entry_cleanup: net_device=enp8s0f0_1 addr=e4:fd:05:08:00:03 vid=0 flags=0 used=16
|
||||
|
||||
- mlx5_esw_bridge_fdb_entry_refresh: trace bridge FDB entry offload refreshed in
|
||||
mlx5::
|
||||
|
||||
$ echo mlx5:mlx5_esw_bridge_fdb_entry_refresh >> set_event
|
||||
$ cat /sys/kernel/debug/tracing/trace
|
||||
...
|
||||
kworker/u20:8-3849 [003] ...1 466716: mlx5_esw_bridge_fdb_entry_refresh: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=3 flags=0 used=0
|
||||
|
||||
- mlx5_esw_bridge_vlan_create: trace bridge VLAN object add on mlx5
|
||||
representor::
|
||||
|
||||
$ echo mlx5:mlx5_esw_bridge_vlan_create >> set_event
|
||||
$ cat /sys/kernel/debug/tracing/trace
|
||||
...
|
||||
ip-2560 [007] ...1 318.460258: mlx5_esw_bridge_vlan_create: vid=1 flags=6
|
||||
|
||||
- mlx5_esw_bridge_vlan_cleanup: trace bridge VLAN object delete from mlx5
|
||||
representor::
|
||||
|
||||
$ echo mlx5:mlx5_esw_bridge_vlan_cleanup >> set_event
|
||||
$ cat /sys/kernel/debug/tracing/trace
|
||||
...
|
||||
bridge-2582 [007] ...1 318.653496: mlx5_esw_bridge_vlan_cleanup: vid=2 flags=8
|
||||
|
||||
- mlx5_esw_bridge_vport_init: trace mlx5 vport assigned with bridge upper
|
||||
device::
|
||||
|
||||
$ echo mlx5:mlx5_esw_bridge_vport_init >> set_event
|
||||
$ cat /sys/kernel/debug/tracing/trace
|
||||
...
|
||||
ip-2560 [007] ...1 318.458915: mlx5_esw_bridge_vport_init: vport_num=1
|
||||
|
||||
- mlx5_esw_bridge_vport_cleanup: trace mlx5 vport removed from bridge upper
|
||||
device::
|
||||
|
||||
$ echo mlx5:mlx5_esw_bridge_vport_cleanup >> set_event
|
||||
$ cat /sys/kernel/debug/tracing/trace
|
||||
...
|
||||
ip-5387 [000] ...1 573713: mlx5_esw_bridge_vport_cleanup: vport_num=1
|
||||
|
||||
@@ -18,6 +18,7 @@ Contents:
|
||||
qlogic/index
|
||||
wan/index
|
||||
wifi/index
|
||||
wwan/index
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
||||
18
Documentation/networking/device_drivers/wwan/index.rst
Normal file
18
Documentation/networking/device_drivers/wwan/index.rst
Normal file
@@ -0,0 +1,18 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
WWAN Device Drivers
|
||||
===================
|
||||
|
||||
Contents:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
iosm
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
Indices
|
||||
=======
|
||||
|
||||
* :ref:`genindex`
|
||||
96
Documentation/networking/device_drivers/wwan/iosm.rst
Normal file
96
Documentation/networking/device_drivers/wwan/iosm.rst
Normal file
@@ -0,0 +1,96 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
.. Copyright (C) 2020-21 Intel Corporation
|
||||
|
||||
.. _iosm_driver_doc:
|
||||
|
||||
===========================================
|
||||
IOSM Driver for Intel M.2 PCIe based Modems
|
||||
===========================================
|
||||
The IOSM (IPC over Shared Memory) driver is a WWAN PCIe host driver developed
|
||||
for linux or chrome platform for data exchange over PCIe interface between
|
||||
Host platform & Intel M.2 Modem. The driver exposes interface conforming to the
|
||||
MBIM protocol [1]. Any front end application ( eg: Modem Manager) could easily
|
||||
manage the MBIM interface to enable data communication towards WWAN.
|
||||
|
||||
Basic usage
|
||||
===========
|
||||
MBIM functions are inactive when unmanaged. The IOSM driver only provides a
|
||||
userspace interface MBIM "WWAN PORT" representing MBIM control channel and does
|
||||
not play any role in managing the functionality. It is the job of a userspace
|
||||
application to detect port enumeration and enable MBIM functionality.
|
||||
|
||||
Examples of few such userspace application are:
|
||||
- mbimcli (included with the libmbim [2] library), and
|
||||
- Modem Manager [3]
|
||||
|
||||
Management Applications to carry out below required actions for establishing
|
||||
MBIM IP session:
|
||||
- open the MBIM control channel
|
||||
- configure network connection settings
|
||||
- connect to network
|
||||
- configure IP network interface
|
||||
|
||||
Management application development
|
||||
==================================
|
||||
The driver and userspace interfaces are described below. The MBIM protocol is
|
||||
described in [1] Mobile Broadband Interface Model v1.0 Errata-1.
|
||||
|
||||
MBIM control channel userspace ABI
|
||||
----------------------------------
|
||||
|
||||
/dev/wwan0mbim0 character device
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
The driver exposes an MBIM interface to the MBIM function by implementing
|
||||
MBIM WWAN Port. The userspace end of the control channel pipe is a
|
||||
/dev/wwan0mbim0 character device. Application shall use this interface for
|
||||
MBIM protocol communication.
|
||||
|
||||
Fragmentation
|
||||
~~~~~~~~~~~~~
|
||||
The userspace application is responsible for all control message fragmentation
|
||||
and defragmentation as per MBIM specification.
|
||||
|
||||
/dev/wwan0mbim0 write()
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
The MBIM control messages from the management application must not exceed the
|
||||
negotiated control message size.
|
||||
|
||||
/dev/wwan0mbim0 read()
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
The management application must accept control messages of up the negotiated
|
||||
control message size.
|
||||
|
||||
MBIM data channel userspace ABI
|
||||
-------------------------------
|
||||
|
||||
wwan0-X network device
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
The IOSM driver exposes IP link interface "wwan0-X" of type "wwan" for IP
|
||||
traffic. Iproute network utility is used for creating "wwan0-X" network
|
||||
interface and for associating it with MBIM IP session. The Driver supports
|
||||
upto 8 IP sessions for simultaneous IP communication.
|
||||
|
||||
The userspace management application is responsible for creating new IP link
|
||||
prior to establishing MBIM IP session where the SessionId is greater than 0.
|
||||
|
||||
For example, creating new IP link for a MBIM IP session with SessionId 1:
|
||||
|
||||
ip link add dev wwan0-1 parentdev-name wwan0 type wwan linkid 1
|
||||
|
||||
The driver will automatically map the "wwan0-1" network device to MBIM IP
|
||||
session 1.
|
||||
|
||||
References
|
||||
==========
|
||||
[1] "MBIM (Mobile Broadband Interface Model) Errata-1"
|
||||
- https://www.usb.org/document-library/
|
||||
|
||||
[2] libmbim - "a glib-based library for talking to WWAN modems and
|
||||
devices which speak the Mobile Interface Broadband Model (MBIM)
|
||||
protocol"
|
||||
- http://www.freedesktop.org/wiki/Software/libmbim/
|
||||
|
||||
[3] Modem Manager - "a DBus-activated daemon which controls mobile
|
||||
broadband (2G/3G/4G) devices and connections"
|
||||
- http://www.freedesktop.org/wiki/Software/ModemManager/
|
||||
@@ -164,6 +164,41 @@ device to instantiate the subfunction device on particular PCI function.
|
||||
A subfunction device is created on the :ref:`Documentation/driver-api/auxiliary_bus.rst <auxiliary_bus>`.
|
||||
At this point a matching subfunction driver binds to the subfunction's auxiliary device.
|
||||
|
||||
Rate object management
|
||||
======================
|
||||
|
||||
Devlink provides API to manage tx rates of single devlink port or a group.
|
||||
This is done through rate objects, which can be one of the two types:
|
||||
|
||||
``leaf``
|
||||
Represents a single devlink port; created/destroyed by the driver. Since leaf
|
||||
have 1to1 mapping to its devlink port, in user space it is referred as
|
||||
``pci/<bus_addr>/<port_index>``;
|
||||
|
||||
``node``
|
||||
Represents a group of rate objects (leafs and/or nodes); created/deleted by
|
||||
request from the userspace; initially empty (no rate objects added). In
|
||||
userspace it is referred as ``pci/<bus_addr>/<node_name>``, where
|
||||
``node_name`` can be any identifier, except decimal number, to avoid
|
||||
collisions with leafs.
|
||||
|
||||
API allows to configure following rate object's parameters:
|
||||
|
||||
``tx_share``
|
||||
Minimum TX rate value shared among all other rate objects, or rate objects
|
||||
that parts of the parent group, if it is a part of the same group.
|
||||
|
||||
``tx_max``
|
||||
Maximum TX rate value.
|
||||
|
||||
``parent``
|
||||
Parent node name. Parent node rate limits are considered as additional limits
|
||||
to all node children limits. ``tx_max`` is an upper limit for children.
|
||||
``tx_share`` is a total bandwidth distributed among children.
|
||||
|
||||
Driver implementations are allowed to support both or either rate object types
|
||||
and setting methods of their parameters.
|
||||
|
||||
Terms and Definitions
|
||||
=====================
|
||||
|
||||
|
||||
@@ -497,6 +497,7 @@ drivers:
|
||||
|
||||
* Documentation/networking/devlink/netdevsim.rst
|
||||
* Documentation/networking/devlink/mlxsw.rst
|
||||
* Documentation/networking/devlink/prestera.rst
|
||||
|
||||
.. _Generic-Packet-Trap-Groups:
|
||||
|
||||
|
||||
@@ -46,3 +46,4 @@ parameters, info versions, and other features it supports.
|
||||
qed
|
||||
ti-cpsw-switch
|
||||
am65-nuss-cpsw-switch
|
||||
prestera
|
||||
|
||||
@@ -57,6 +57,32 @@ entries, FIB rule entries and nexthops that the driver will allow.
|
||||
$ devlink resource set netdevsim/netdevsim0 path /nexthops size 16
|
||||
$ devlink dev reload netdevsim/netdevsim0
|
||||
|
||||
Rate objects
|
||||
============
|
||||
|
||||
The ``netdevsim`` driver supports rate objects management, which includes:
|
||||
|
||||
- registerging/unregistering leaf rate objects per VF devlink port;
|
||||
- creation/deletion node rate objects;
|
||||
- setting tx_share and tx_max rate values for any rate object type;
|
||||
- setting parent node for any rate object type.
|
||||
|
||||
Rate nodes and it's parameters are exposed in ``netdevsim`` debugfs in RO mode.
|
||||
For example created rate node with name ``some_group``:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
$ ls /sys/kernel/debug/netdevsim/netdevsim0/rate_groups/some_group
|
||||
rate_parent tx_max tx_share
|
||||
|
||||
Same parameters are exposed for leaf objects in corresponding ports directories.
|
||||
For ex.:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
$ ls /sys/kernel/debug/netdevsim/netdevsim0/ports/1
|
||||
dev ethtool rate_parent tx_max tx_share
|
||||
|
||||
Driver-specific Traps
|
||||
=====================
|
||||
|
||||
|
||||
141
Documentation/networking/devlink/prestera.rst
Normal file
141
Documentation/networking/devlink/prestera.rst
Normal file
@@ -0,0 +1,141 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
========================
|
||||
prestera devlink support
|
||||
========================
|
||||
|
||||
This document describes the devlink features implemented by the ``prestera``
|
||||
device driver.
|
||||
|
||||
Driver-specific Traps
|
||||
=====================
|
||||
|
||||
.. list-table:: List of Driver-specific Traps Registered by ``prestera``
|
||||
:widths: 5 5 90
|
||||
|
||||
* - Name
|
||||
- Type
|
||||
- Description
|
||||
.. list-table:: List of Driver-specific Traps Registered by ``prestera``
|
||||
:widths: 5 5 90
|
||||
|
||||
* - Name
|
||||
- Type
|
||||
- Description
|
||||
* - ``arp_bc``
|
||||
- ``trap``
|
||||
- Traps ARP broadcast packets (both requests/responses)
|
||||
* - ``is_is``
|
||||
- ``trap``
|
||||
- Traps IS-IS packets
|
||||
* - ``ospf``
|
||||
- ``trap``
|
||||
- Traps OSPF packets
|
||||
* - ``ip_bc_mac``
|
||||
- ``trap``
|
||||
- Traps IPv4 packets with broadcast DA Mac address
|
||||
* - ``stp``
|
||||
- ``trap``
|
||||
- Traps STP BPDU
|
||||
* - ``lacp``
|
||||
- ``trap``
|
||||
- Traps LACP packets
|
||||
* - ``lldp``
|
||||
- ``trap``
|
||||
- Traps LLDP packets
|
||||
* - ``router_mc``
|
||||
- ``trap``
|
||||
- Traps multicast packets
|
||||
* - ``vrrp``
|
||||
- ``trap``
|
||||
- Traps VRRP packets
|
||||
* - ``dhcp``
|
||||
- ``trap``
|
||||
- Traps DHCP packets
|
||||
* - ``mtu_error``
|
||||
- ``trap``
|
||||
- Traps (exception) packets that exceeded port's MTU
|
||||
* - ``mac_to_me``
|
||||
- ``trap``
|
||||
- Traps packets with switch-port's DA Mac address
|
||||
* - ``ttl_error``
|
||||
- ``trap``
|
||||
- Traps (exception) IPv4 packets whose TTL exceeded
|
||||
* - ``ipv4_options``
|
||||
- ``trap``
|
||||
- Traps (exception) packets due to the malformed IPV4 header options
|
||||
* - ``ip_default_route``
|
||||
- ``trap``
|
||||
- Traps packets that have no specific IP interface (IP to me) and no forwarding prefix
|
||||
* - ``local_route``
|
||||
- ``trap``
|
||||
- Traps packets that have been send to one of switch IP interfaces addresses
|
||||
* - ``ipv4_icmp_redirect``
|
||||
- ``trap``
|
||||
- Traps (exception) IPV4 ICMP redirect packets
|
||||
* - ``arp_response``
|
||||
- ``trap``
|
||||
- Traps ARP replies packets that have switch-port's DA Mac address
|
||||
* - ``acl_code_0``
|
||||
- ``trap``
|
||||
- Traps packets that have ACL priority set to 0 (tc pref 0)
|
||||
* - ``acl_code_1``
|
||||
- ``trap``
|
||||
- Traps packets that have ACL priority set to 1 (tc pref 1)
|
||||
* - ``acl_code_2``
|
||||
- ``trap``
|
||||
- Traps packets that have ACL priority set to 2 (tc pref 2)
|
||||
* - ``acl_code_3``
|
||||
- ``trap``
|
||||
- Traps packets that have ACL priority set to 3 (tc pref 3)
|
||||
* - ``acl_code_4``
|
||||
- ``trap``
|
||||
- Traps packets that have ACL priority set to 4 (tc pref 4)
|
||||
* - ``acl_code_5``
|
||||
- ``trap``
|
||||
- Traps packets that have ACL priority set to 5 (tc pref 5)
|
||||
* - ``acl_code_6``
|
||||
- ``trap``
|
||||
- Traps packets that have ACL priority set to 6 (tc pref 6)
|
||||
* - ``acl_code_7``
|
||||
- ``trap``
|
||||
- Traps packets that have ACL priority set to 7 (tc pref 7)
|
||||
* - ``ipv4_bgp``
|
||||
- ``trap``
|
||||
- Traps IPv4 BGP packets
|
||||
* - ``ssh``
|
||||
- ``trap``
|
||||
- Traps SSH packets
|
||||
* - ``telnet``
|
||||
- ``trap``
|
||||
- Traps Telnet packets
|
||||
* - ``icmp``
|
||||
- ``trap``
|
||||
- Traps ICMP packets
|
||||
* - ``rxdma_drop``
|
||||
- ``drop``
|
||||
- Drops packets (RxDMA) due to the lack of ingress buffers etc.
|
||||
* - ``port_no_vlan``
|
||||
- ``drop``
|
||||
- Drops packets due to faulty-configured network or due to internal bug (config issue).
|
||||
* - ``local_port``
|
||||
- ``drop``
|
||||
- Drops packets whose decision (FDB entry) is to bridge packet back to the incoming port/trunk.
|
||||
* - ``invalid_sa``
|
||||
- ``drop``
|
||||
- Drops packets with multicast source MAC address.
|
||||
* - ``illegal_ip_addr``
|
||||
- ``drop``
|
||||
- Drops packets with illegal SIP/DIP multicast/unicast addresses.
|
||||
* - ``illegal_ipv4_hdr``
|
||||
- ``drop``
|
||||
- Drops packets with illegal IPV4 header.
|
||||
* - ``ip_uc_dip_da_mismatch``
|
||||
- ``drop``
|
||||
- Drops packets with destination MAC being unicast, but destination IP address being multicast.
|
||||
* - ``ip_sip_is_zero``
|
||||
- ``drop``
|
||||
- Drops packets with zero (0) IPV4 source address.
|
||||
* - ``met_red``
|
||||
- ``drop``
|
||||
- Drops non-conforming packets (dropped by Ingress policer, metering drop), e.g. packet rate exceeded configured bandwith.
|
||||
@@ -292,3 +292,71 @@ configuration.
|
||||
|
||||
# bring up the bridge devices
|
||||
ip link set br0 up
|
||||
|
||||
Forwarding database (FDB) management
|
||||
------------------------------------
|
||||
|
||||
The existing DSA switches do not have the necessary hardware support to keep
|
||||
the software FDB of the bridge in sync with the hardware tables, so the two
|
||||
tables are managed separately (``bridge fdb show`` queries both, and depending
|
||||
on whether the ``self`` or ``master`` flags are being used, a ``bridge fdb
|
||||
add`` or ``bridge fdb del`` command acts upon entries from one or both tables).
|
||||
|
||||
Up until kernel v4.14, DSA only supported user space management of bridge FDB
|
||||
entries using the bridge bypass operations (which do not update the software
|
||||
FDB, just the hardware one) using the ``self`` flag (which is optional and can
|
||||
be omitted).
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
bridge fdb add dev swp0 00:01:02:03:04:05 self static
|
||||
# or shorthand
|
||||
bridge fdb add dev swp0 00:01:02:03:04:05 static
|
||||
|
||||
Due to a bug, the bridge bypass FDB implementation provided by DSA did not
|
||||
distinguish between ``static`` and ``local`` FDB entries (``static`` are meant
|
||||
to be forwarded, while ``local`` are meant to be locally terminated, i.e. sent
|
||||
to the host port). Instead, all FDB entries with the ``self`` flag (implicit or
|
||||
explicit) are treated by DSA as ``static`` even if they are ``local``.
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# This command:
|
||||
bridge fdb add dev swp0 00:01:02:03:04:05 static
|
||||
# behaves the same for DSA as this command:
|
||||
bridge fdb add dev swp0 00:01:02:03:04:05 local
|
||||
# or shorthand, because the 'local' flag is implicit if 'static' is not
|
||||
# specified, it also behaves the same as:
|
||||
bridge fdb add dev swp0 00:01:02:03:04:05
|
||||
|
||||
The last command is an incorrect way of adding a static bridge FDB entry to a
|
||||
DSA switch using the bridge bypass operations, and works by mistake. Other
|
||||
drivers will treat an FDB entry added by the same command as ``local`` and as
|
||||
such, will not forward it, as opposed to DSA.
|
||||
|
||||
Between kernel v4.14 and v5.14, DSA has supported in parallel two modes of
|
||||
adding a bridge FDB entry to the switch: the bridge bypass discussed above, as
|
||||
well as a new mode using the ``master`` flag which installs FDB entries in the
|
||||
software bridge too.
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
bridge fdb add dev swp0 00:01:02:03:04:05 master static
|
||||
|
||||
Since kernel v5.14, DSA has gained stronger integration with the bridge's
|
||||
software FDB, and the support for its bridge bypass FDB implementation (using
|
||||
the ``self`` flag) has been removed. This results in the following changes:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
# This is the only valid way of adding an FDB entry that is supported,
|
||||
# compatible with v4.14 kernels and later:
|
||||
bridge fdb add dev swp0 00:01:02:03:04:05 master static
|
||||
# This command is no longer buggy and the entry is properly treated as
|
||||
# 'local' instead of being forwarded:
|
||||
bridge fdb add dev swp0 00:01:02:03:04:05
|
||||
# This command no longer installs a static FDB entry to hardware:
|
||||
bridge fdb add dev swp0 00:01:02:03:04:05 static
|
||||
|
||||
Script writers are therefore encouraged to use the ``master static`` set of
|
||||
flags when working with bridge FDB entries on DSA switch interfaces.
|
||||
|
||||
@@ -93,14 +93,15 @@ A tagging protocol may tag all packets with switch tags of the same length, or
|
||||
the tag length might vary (for example packets with PTP timestamps might
|
||||
require an extended switch tag, or there might be one tag length on TX and a
|
||||
different one on RX). Either way, the tagging protocol driver must populate the
|
||||
``struct dsa_device_ops::overhead`` with the length in octets of the longest
|
||||
switch frame header. The DSA framework will automatically adjust the MTU of the
|
||||
master interface to accomodate for this extra size in order for DSA user ports
|
||||
to support the standard MTU (L2 payload length) of 1500 octets. The ``overhead``
|
||||
is also used to request from the network stack, on a best-effort basis, the
|
||||
allocation of packets with a ``needed_headroom`` or ``needed_tailroom``
|
||||
sufficient such that the act of pushing the switch tag on transmission of a
|
||||
packet does not cause it to reallocate due to lack of memory.
|
||||
``struct dsa_device_ops::needed_headroom`` and/or ``struct dsa_device_ops::needed_tailroom``
|
||||
with the length in octets of the longest switch frame header/trailer. The DSA
|
||||
framework will automatically adjust the MTU of the master interface to
|
||||
accommodate for this extra size in order for DSA user ports to support the
|
||||
standard MTU (L2 payload length) of 1500 octets. The ``needed_headroom`` and
|
||||
``needed_tailroom`` properties are also used to request from the network stack,
|
||||
on a best-effort basis, the allocation of packets with enough extra space such
|
||||
that the act of pushing the switch tag on transmission of a packet does not
|
||||
cause it to reallocate due to lack of memory.
|
||||
|
||||
Even though applications are not expected to parse DSA-specific frame headers,
|
||||
the format on the wire of the tagging protocol represents an Application Binary
|
||||
@@ -169,8 +170,8 @@ The job of this method is to prepare the skb in a way that the switch will
|
||||
understand what egress port the packet is for (and not deliver it towards other
|
||||
ports). Typically this is fulfilled by pushing a frame header. Checking for
|
||||
insufficient size in the skb headroom or tailroom is unnecessary provided that
|
||||
the ``overhead`` and ``tail_tag`` properties were filled out properly, because
|
||||
DSA ensures there is enough space before calling this method.
|
||||
the ``needed_headroom`` and ``needed_tailroom`` properties were filled out
|
||||
properly, because DSA ensures there is enough space before calling this method.
|
||||
|
||||
The reception of a packet goes through the tagger's ``rcv`` function. The
|
||||
passed ``struct sk_buff *skb`` has ``skb->data`` pointing at
|
||||
|
||||
@@ -5,7 +5,7 @@ NXP SJA1105 switch driver
|
||||
Overview
|
||||
========
|
||||
|
||||
The NXP SJA1105 is a family of 6 devices:
|
||||
The NXP SJA1105 is a family of 10 SPI-managed automotive switches:
|
||||
|
||||
- SJA1105E: First generation, no TTEthernet
|
||||
- SJA1105T: First generation, TTEthernet
|
||||
@@ -13,9 +13,11 @@ The NXP SJA1105 is a family of 6 devices:
|
||||
- SJA1105Q: Second generation, TTEthernet, no SGMII
|
||||
- SJA1105R: Second generation, no TTEthernet, SGMII
|
||||
- SJA1105S: Second generation, TTEthernet, SGMII
|
||||
|
||||
These are SPI-managed automotive switches, with all ports being gigabit
|
||||
capable, and supporting MII/RMII/RGMII and optionally SGMII on one port.
|
||||
- SJA1110A: Third generation, TTEthernet, SGMII, integrated 100base-T1 and
|
||||
100base-TX PHYs
|
||||
- SJA1110B: Third generation, TTEthernet, SGMII, 100base-T1, 100base-TX
|
||||
- SJA1110C: Third generation, TTEthernet, SGMII, 100base-T1, 100base-TX
|
||||
- SJA1110D: Third generation, TTEthernet, SGMII, 100base-T1
|
||||
|
||||
Being automotive parts, their configuration interface is geared towards
|
||||
set-and-forget use, with minimal dynamic interaction at runtime. They
|
||||
@@ -579,3 +581,54 @@ A board would need to hook up the PHYs connected to the switch to any other
|
||||
MDIO bus available to Linux within the system (e.g. to the DSA master's MDIO
|
||||
bus). Link state management then works by the driver manually keeping in sync
|
||||
(over SPI commands) the MAC link speed with the settings negotiated by the PHY.
|
||||
|
||||
By comparison, the SJA1110 supports an MDIO slave access point over which its
|
||||
internal 100base-T1 PHYs can be accessed from the host. This is, however, not
|
||||
used by the driver, instead the internal 100base-T1 and 100base-TX PHYs are
|
||||
accessed through SPI commands, modeled in Linux as virtual MDIO buses.
|
||||
|
||||
The microcontroller attached to the SJA1110 port 0 also has an MDIO controller
|
||||
operating in master mode, however the driver does not support this either,
|
||||
since the microcontroller gets disabled when the Linux driver operates.
|
||||
Discrete PHYs connected to the switch ports should have their MDIO interface
|
||||
attached to an MDIO controller from the host system and not to the switch,
|
||||
similar to SJA1105.
|
||||
|
||||
Port compatibility matrix
|
||||
-------------------------
|
||||
|
||||
The SJA1105 port compatibility matrix is:
|
||||
|
||||
===== ============== ============== ==============
|
||||
Port SJA1105E/T SJA1105P/Q SJA1105R/S
|
||||
===== ============== ============== ==============
|
||||
0 xMII xMII xMII
|
||||
1 xMII xMII xMII
|
||||
2 xMII xMII xMII
|
||||
3 xMII xMII xMII
|
||||
4 xMII xMII SGMII
|
||||
===== ============== ============== ==============
|
||||
|
||||
|
||||
The SJA1110 port compatibility matrix is:
|
||||
|
||||
===== ============== ============== ============== ==============
|
||||
Port SJA1110A SJA1110B SJA1110C SJA1110D
|
||||
===== ============== ============== ============== ==============
|
||||
0 RevMII (uC) RevMII (uC) RevMII (uC) RevMII (uC)
|
||||
1 100base-TX 100base-TX 100base-TX
|
||||
or SGMII SGMII
|
||||
2 xMII xMII xMII xMII
|
||||
or SGMII or SGMII
|
||||
3 xMII xMII xMII
|
||||
or SGMII or SGMII SGMII
|
||||
or 2500base-X or 2500base-X or 2500base-X
|
||||
4 SGMII SGMII SGMII SGMII
|
||||
or 2500base-X or 2500base-X or 2500base-X or 2500base-X
|
||||
5 100base-T1 100base-T1 100base-T1 100base-T1
|
||||
6 100base-T1 100base-T1 100base-T1 100base-T1
|
||||
7 100base-T1 100base-T1 100base-T1 100base-T1
|
||||
8 100base-T1 100base-T1 n/a n/a
|
||||
9 100base-T1 100base-T1 n/a n/a
|
||||
10 100base-T1 n/a n/a n/a
|
||||
===== ============== ============== ============== ==============
|
||||
|
||||
@@ -1363,8 +1363,8 @@ in an implementation specific way.
|
||||
``ETHTOOL_A_FEC_AUTO`` requests the driver to choose FEC mode based on SFP
|
||||
module parameters. This does not mean autonegotiation.
|
||||
|
||||
MODULE_EEPROM
|
||||
=============
|
||||
MODULE_EEPROM_GET
|
||||
=================
|
||||
|
||||
Fetch module EEPROM data dump.
|
||||
This interface is designed to allow dumps of at most 1/2 page at once. This
|
||||
@@ -1383,12 +1383,14 @@ Request contents:
|
||||
``ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS`` u8 page I2C address
|
||||
======================================= ====== ==========================
|
||||
|
||||
If ``ETHTOOL_A_MODULE_EEPROM_BANK`` is not specified, bank 0 is assumed.
|
||||
|
||||
Kernel response contents:
|
||||
|
||||
+---------------------------------------------+--------+---------------------+
|
||||
| ``ETHTOOL_A_MODULE_EEPROM_HEADER`` | nested | reply header |
|
||||
+---------------------------------------------+--------+---------------------+
|
||||
| ``ETHTOOL_A_MODULE_EEPROM_DATA`` | nested | array of bytes from |
|
||||
| ``ETHTOOL_A_MODULE_EEPROM_DATA`` | binary | array of bytes from |
|
||||
| | | module EEPROM |
|
||||
+---------------------------------------------+--------+---------------------+
|
||||
|
||||
|
||||
@@ -99,6 +99,35 @@ fib_multipath_hash_policy - INTEGER
|
||||
- 0 - Layer 3
|
||||
- 1 - Layer 4
|
||||
- 2 - Layer 3 or inner Layer 3 if present
|
||||
- 3 - Custom multipath hash. Fields used for multipath hash calculation
|
||||
are determined by fib_multipath_hash_fields sysctl
|
||||
|
||||
fib_multipath_hash_fields - UNSIGNED INTEGER
|
||||
When fib_multipath_hash_policy is set to 3 (custom multipath hash), the
|
||||
fields used for multipath hash calculation are determined by this
|
||||
sysctl.
|
||||
|
||||
This value is a bitmask which enables various fields for multipath hash
|
||||
calculation.
|
||||
|
||||
Possible fields are:
|
||||
|
||||
====== ============================
|
||||
0x0001 Source IP address
|
||||
0x0002 Destination IP address
|
||||
0x0004 IP protocol
|
||||
0x0008 Unused (Flow Label)
|
||||
0x0010 Source port
|
||||
0x0020 Destination port
|
||||
0x0040 Inner source IP address
|
||||
0x0080 Inner destination IP address
|
||||
0x0100 Inner IP protocol
|
||||
0x0200 Inner Flow Label
|
||||
0x0400 Inner source port
|
||||
0x0800 Inner destination port
|
||||
====== ============================
|
||||
|
||||
Default: 0x0007 (source IP, destination IP and IP protocol)
|
||||
|
||||
fib_sync_mem - UNSIGNED INTEGER
|
||||
Amount of dirty memory from fib entries that can be backlogged before
|
||||
@@ -732,6 +761,31 @@ tcp_syncookies - INTEGER
|
||||
network connections you can set this knob to 2 to enable
|
||||
unconditionally generation of syncookies.
|
||||
|
||||
tcp_migrate_req - BOOLEAN
|
||||
The incoming connection is tied to a specific listening socket when
|
||||
the initial SYN packet is received during the three-way handshake.
|
||||
When a listener is closed, in-flight request sockets during the
|
||||
handshake and established sockets in the accept queue are aborted.
|
||||
|
||||
If the listener has SO_REUSEPORT enabled, other listeners on the
|
||||
same port should have been able to accept such connections. This
|
||||
option makes it possible to migrate such child sockets to another
|
||||
listener after close() or shutdown().
|
||||
|
||||
The BPF_SK_REUSEPORT_SELECT_OR_MIGRATE type of eBPF program should
|
||||
usually be used to define the policy to pick an alive listener.
|
||||
Otherwise, the kernel will randomly pick an alive listener only if
|
||||
this option is enabled.
|
||||
|
||||
Note that migration between listeners with different settings may
|
||||
crash applications. Let's say migration happens from listener A to
|
||||
B, and only B has TCP_SAVE_SYN enabled. B cannot read SYN data from
|
||||
the requests migrated from A. To avoid such a situation, cancel
|
||||
migration by returning SK_DROP in the type of eBPF program, or
|
||||
disable this option.
|
||||
|
||||
Default: 0
|
||||
|
||||
tcp_fastopen - INTEGER
|
||||
Enable TCP Fast Open (RFC7413) to send and accept data in the opening
|
||||
SYN packet.
|
||||
@@ -1743,6 +1797,35 @@ fib_multipath_hash_policy - INTEGER
|
||||
- 0 - Layer 3 (source and destination addresses plus flow label)
|
||||
- 1 - Layer 4 (standard 5-tuple)
|
||||
- 2 - Layer 3 or inner Layer 3 if present
|
||||
- 3 - Custom multipath hash. Fields used for multipath hash calculation
|
||||
are determined by fib_multipath_hash_fields sysctl
|
||||
|
||||
fib_multipath_hash_fields - UNSIGNED INTEGER
|
||||
When fib_multipath_hash_policy is set to 3 (custom multipath hash), the
|
||||
fields used for multipath hash calculation are determined by this
|
||||
sysctl.
|
||||
|
||||
This value is a bitmask which enables various fields for multipath hash
|
||||
calculation.
|
||||
|
||||
Possible fields are:
|
||||
|
||||
====== ============================
|
||||
0x0001 Source IP address
|
||||
0x0002 Destination IP address
|
||||
0x0004 IP protocol
|
||||
0x0008 Flow Label
|
||||
0x0010 Source port
|
||||
0x0020 Destination port
|
||||
0x0040 Inner source IP address
|
||||
0x0080 Inner destination IP address
|
||||
0x0100 Inner IP protocol
|
||||
0x0200 Inner Flow Label
|
||||
0x0400 Inner source port
|
||||
0x0800 Inner destination port
|
||||
====== ============================
|
||||
|
||||
Default: 0x0007 (source IP, destination IP and IP protocol)
|
||||
|
||||
anycast_src_echo_reply - BOOLEAN
|
||||
Controls the use of anycast addresses as source addresses for ICMPv6
|
||||
@@ -2751,6 +2834,18 @@ encap_port - INTEGER
|
||||
|
||||
Default: 0
|
||||
|
||||
plpmtud_probe_interval - INTEGER
|
||||
The time interval (in milliseconds) for the PLPMTUD probe timer,
|
||||
which is configured to expire after this period to receive an
|
||||
acknowledgment to a probe packet. This is also the time interval
|
||||
between the probes for the current pmtu when the probe search
|
||||
is done.
|
||||
|
||||
PLPMTUD will be disabled when 0 is set, and other values for it
|
||||
must be >= 5000.
|
||||
|
||||
Default: 0
|
||||
|
||||
|
||||
``/proc/sys/net/core/*``
|
||||
========================
|
||||
|
||||
@@ -7,13 +7,13 @@ MPTCP Sysfs variables
|
||||
/proc/sys/net/mptcp/* Variables
|
||||
===============================
|
||||
|
||||
enabled - INTEGER
|
||||
enabled - BOOLEAN
|
||||
Control whether MPTCP sockets can be created.
|
||||
|
||||
MPTCP sockets can be created if the value is nonzero. This is
|
||||
a per-namespace sysctl.
|
||||
MPTCP sockets can be created if the value is 1. This is a
|
||||
per-namespace sysctl.
|
||||
|
||||
Default: 1
|
||||
Default: 1 (enabled)
|
||||
|
||||
add_addr_timeout - INTEGER (seconds)
|
||||
Set the timeout after which an ADD_ADDR control message will be
|
||||
@@ -24,3 +24,24 @@ add_addr_timeout - INTEGER (seconds)
|
||||
sysctl.
|
||||
|
||||
Default: 120
|
||||
|
||||
checksum_enabled - BOOLEAN
|
||||
Control whether DSS checksum can be enabled.
|
||||
|
||||
DSS checksum can be enabled if the value is nonzero. This is a
|
||||
per-namespace sysctl.
|
||||
|
||||
Default: 0
|
||||
|
||||
allow_join_initial_addr_port - BOOLEAN
|
||||
Allow peers to send join requests to the IP address and port number used
|
||||
by the initial subflow if the value is 1. This controls a flag that is
|
||||
sent to the peer at connection time, and whether such join requests are
|
||||
accepted or denied.
|
||||
|
||||
Joins to addresses advertised with ADD_ADDR are not affected by this
|
||||
value.
|
||||
|
||||
This is a per-namespace sysctl.
|
||||
|
||||
Default: 1
|
||||
|
||||
@@ -177,3 +177,27 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds)
|
||||
|
||||
This extended timeout will be used in case there is an GRE stream
|
||||
detected.
|
||||
|
||||
nf_flowtable_tcp_timeout - INTEGER (seconds)
|
||||
default 30
|
||||
|
||||
Control offload timeout for tcp connections.
|
||||
TCP connections may be offloaded from nf conntrack to nf flow table.
|
||||
Once aged, the connection is returned to nf conntrack with tcp pickup timeout.
|
||||
|
||||
nf_flowtable_tcp_pickup - INTEGER (seconds)
|
||||
default 120
|
||||
|
||||
TCP connection timeout after being aged from nf flow table offload.
|
||||
|
||||
nf_flowtable_udp_timeout - INTEGER (seconds)
|
||||
default 30
|
||||
|
||||
Control offload timeout for udp connections.
|
||||
UDP connections may be offloaded from nf conntrack to nf flow table.
|
||||
Once aged, the connection is returned to nf conntrack with udp pickup timeout.
|
||||
|
||||
nf_flowtable_udp_pickup - INTEGER (seconds)
|
||||
default 30
|
||||
|
||||
UDP connection timeout after being aged from nf flow table offload.
|
||||
|
||||
@@ -292,6 +292,12 @@ Some of the interface modes are described below:
|
||||
Note: due to legacy usage, some 10GBASE-R usage incorrectly makes
|
||||
use of this definition.
|
||||
|
||||
``PHY_INTERFACE_MODE_25GBASER``
|
||||
This is the IEEE 802.3 PCS Clause 107 defined 25GBASE-R protocol.
|
||||
The PCS is identical to 10GBASE-R, i.e. 64B/66B encoded
|
||||
running 2.5 as fast, giving a fixed bit rate of 25.78125 Gbaud.
|
||||
Please refer to the IEEE standard for further information.
|
||||
|
||||
``PHY_INTERFACE_MODE_100BASEX``
|
||||
This defines IEEE 802.3 Clause 24. The link operates at a fixed data
|
||||
rate of 125Mpbs using a 4B/5B encoding scheme, resulting in an underlying
|
||||
|
||||
@@ -378,7 +378,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
|
||||
|
||||
`int pm_runtime_get_sync(struct device *dev);`
|
||||
- increment the device's usage counter, run pm_runtime_resume(dev) and
|
||||
return its result
|
||||
return its result;
|
||||
note that it does not drop the device's usage counter on errors, so
|
||||
consider using pm_runtime_resume_and_get() instead of it, especially
|
||||
if its return value is checked by the caller, as this is likely to
|
||||
result in cleaner code.
|
||||
|
||||
`int pm_runtime_get_if_in_use(struct device *dev);`
|
||||
- return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the
|
||||
@@ -827,6 +831,15 @@ or driver about runtime power changes. Instead, the driver for the device's
|
||||
parent must take responsibility for telling the device's driver when the
|
||||
parent's power state changes.
|
||||
|
||||
Note that, in some cases it may not be desirable for subsystems/drivers to call
|
||||
pm_runtime_no_callbacks() for their devices. This could be because a subset of
|
||||
the runtime PM callbacks needs to be implemented, a platform dependent PM
|
||||
domain could get attached to the device or that the device is power managed
|
||||
through a supplier device link. For these reasons and to avoid boilerplate code
|
||||
in subsystems/drivers, the PM core allows runtime PM callbacks to be
|
||||
unassigned. More precisely, if a callback pointer is NULL, the PM core will act
|
||||
as though there was a callback and it returned 0.
|
||||
|
||||
9. Autosuspend, or automatically-delayed suspends
|
||||
=================================================
|
||||
|
||||
|
||||
@@ -325,7 +325,7 @@ Code Seq# Include File Comments
|
||||
0xA3 90-9F linux/dtlk.h
|
||||
0xA4 00-1F uapi/linux/tee.h Generic TEE subsystem
|
||||
0xA4 00-1F uapi/asm/sgx.h <mailto:linux-sgx@vger.kernel.org>
|
||||
0xA5 01 linux/surface_aggregator/cdev.h Microsoft Surface Platform System Aggregator
|
||||
0xA5 01-05 linux/surface_aggregator/cdev.h Microsoft Surface Platform System Aggregator
|
||||
<mailto:luzmaximilian@gmail.com>
|
||||
0xA5 20-2F linux/surface_aggregator/dtx.h Microsoft Surface DTX driver
|
||||
<mailto:luzmaximilian@gmail.com>
|
||||
|
||||
@@ -14,15 +14,11 @@ for the CPU. Then there could be several contiguous ranges at
|
||||
completely distinct addresses. And, don't forget about NUMA, where
|
||||
different memory banks are attached to different CPUs.
|
||||
|
||||
Linux abstracts this diversity using one of the three memory models:
|
||||
FLATMEM, DISCONTIGMEM and SPARSEMEM. Each architecture defines what
|
||||
Linux abstracts this diversity using one of the two memory models:
|
||||
FLATMEM and SPARSEMEM. Each architecture defines what
|
||||
memory models it supports, what the default memory model is and
|
||||
whether it is possible to manually override that default.
|
||||
|
||||
.. note::
|
||||
At time of this writing, DISCONTIGMEM is considered deprecated,
|
||||
although it is still in use by several architectures.
|
||||
|
||||
All the memory models track the status of physical page frames using
|
||||
struct page arranged in one or more arrays.
|
||||
|
||||
@@ -63,43 +59,6 @@ straightforward: `PFN - ARCH_PFN_OFFSET` is an index to the
|
||||
The `ARCH_PFN_OFFSET` defines the first page frame number for
|
||||
systems with physical memory starting at address different from 0.
|
||||
|
||||
DISCONTIGMEM
|
||||
============
|
||||
|
||||
The DISCONTIGMEM model treats the physical memory as a collection of
|
||||
`nodes` similarly to how Linux NUMA support does. For each node Linux
|
||||
constructs an independent memory management subsystem represented by
|
||||
`struct pglist_data` (or `pg_data_t` for short). Among other
|
||||
things, `pg_data_t` holds the `node_mem_map` array that maps
|
||||
physical pages belonging to that node. The `node_start_pfn` field of
|
||||
`pg_data_t` is the number of the first page frame belonging to that
|
||||
node.
|
||||
|
||||
The architecture setup code should call :c:func:`free_area_init_node` for
|
||||
each node in the system to initialize the `pg_data_t` object and its
|
||||
`node_mem_map`.
|
||||
|
||||
Every `node_mem_map` behaves exactly as FLATMEM's `mem_map` -
|
||||
every physical page frame in a node has a `struct page` entry in the
|
||||
`node_mem_map` array. When DISCONTIGMEM is enabled, a portion of the
|
||||
`flags` field of the `struct page` encodes the node number of the
|
||||
node hosting that page.
|
||||
|
||||
The conversion between a PFN and the `struct page` in the
|
||||
DISCONTIGMEM model became slightly more complex as it has to determine
|
||||
which node hosts the physical page and which `pg_data_t` object
|
||||
holds the `struct page`.
|
||||
|
||||
Architectures that support DISCONTIGMEM provide :c:func:`pfn_to_nid`
|
||||
to convert PFN to the node number. The opposite conversion helper
|
||||
:c:func:`page_to_nid` is generic as it uses the node number encoded in
|
||||
page->flags.
|
||||
|
||||
Once the node number is known, the PFN can be used to index
|
||||
appropriate `node_mem_map` array to access the `struct page` and
|
||||
the offset of the `struct page` from the `node_mem_map` plus
|
||||
`node_start_pfn` is the PFN of that page.
|
||||
|
||||
SPARSEMEM
|
||||
=========
|
||||
|
||||
|
||||
90
MAINTAINERS
90
MAINTAINERS
@@ -973,7 +973,7 @@ F: drivers/net/ethernet/amd/xgbe/
|
||||
|
||||
AMD SENSOR FUSION HUB DRIVER
|
||||
M: Nehal Shah <nehal-bakulchandra.shah@amd.com>
|
||||
M: Sandeep Singh <sandeep.singh@amd.com>
|
||||
M: Basavaraj Natikar <basavaraj.natikar@amd.com>
|
||||
L: linux-input@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/hid/amd-sfh*
|
||||
@@ -4447,6 +4447,18 @@ F: include/linux/compiler-clang.h
|
||||
F: scripts/clang-tools/
|
||||
K: \b(?i:clang|llvm)\b
|
||||
|
||||
CLANG CONTROL FLOW INTEGRITY SUPPORT
|
||||
M: Sami Tolvanen <samitolvanen@google.com>
|
||||
M: Kees Cook <keescook@chromium.org>
|
||||
R: Nathan Chancellor <nathan@kernel.org>
|
||||
R: Nick Desaulniers <ndesaulniers@google.com>
|
||||
L: clang-built-linux@googlegroups.com
|
||||
S: Supported
|
||||
B: https://github.com/ClangBuiltLinux/linux/issues
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/clang/features
|
||||
F: include/linux/cfi.h
|
||||
F: kernel/cfi.c
|
||||
|
||||
CLEANCACHE API
|
||||
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
@@ -5187,7 +5199,14 @@ DELL WMI NOTIFICATIONS DRIVER
|
||||
M: Matthew Garrett <mjg59@srcf.ucam.org>
|
||||
M: Pali Rohár <pali@kernel.org>
|
||||
S: Maintained
|
||||
F: drivers/platform/x86/dell/dell-wmi.c
|
||||
F: drivers/platform/x86/dell/dell-wmi-base.c
|
||||
|
||||
DELL WMI HARDWARE PRIVACY SUPPORT
|
||||
M: Perry Yuan <Perry.Yuan@dell.com>
|
||||
L: Dell.Client.Kernel@dell.com
|
||||
L: platform-driver-x86@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/platform/x86/dell/dell-wmi-privacy.c
|
||||
|
||||
DELTA ST MEDIA DRIVER
|
||||
M: Hugues Fruchet <hugues.fruchet@foss.st.com>
|
||||
@@ -6460,10 +6479,11 @@ F: Documentation/filesystems/ecryptfs.rst
|
||||
F: fs/ecryptfs/
|
||||
|
||||
EDAC-AMD64
|
||||
M: Borislav Petkov <bp@alien8.de>
|
||||
M: Yazen Ghannam <yazen.ghannam@amd.com>
|
||||
L: linux-edac@vger.kernel.org
|
||||
S: Maintained
|
||||
S: Supported
|
||||
F: drivers/edac/amd64_edac*
|
||||
F: drivers/edac/mce_amd*
|
||||
|
||||
EDAC-ARMADA
|
||||
M: Jan Luebbe <jlu@pengutronix.de>
|
||||
@@ -6827,6 +6847,8 @@ F: Documentation/devicetree/bindings/net/mdio*
|
||||
F: Documentation/devicetree/bindings/net/qca,ar803x.yaml
|
||||
F: Documentation/networking/phy.rst
|
||||
F: drivers/net/mdio/
|
||||
F: drivers/net/mdio/acpi_mdio.c
|
||||
F: drivers/net/mdio/fwnode_mdio.c
|
||||
F: drivers/net/mdio/of_mdio.c
|
||||
F: drivers/net/pcs/
|
||||
F: drivers/net/phy/
|
||||
@@ -9150,6 +9172,7 @@ F: Documentation/networking/device_drivers/ethernet/intel/
|
||||
F: drivers/net/ethernet/intel/
|
||||
F: drivers/net/ethernet/intel/*/
|
||||
F: include/linux/avf/virtchnl.h
|
||||
F: include/linux/net/intel/iidc.h
|
||||
|
||||
INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
|
||||
M: Maik Broemme <mbroemme@libmpq.org>
|
||||
@@ -9404,6 +9427,11 @@ S: Maintained
|
||||
F: arch/x86/include/asm/intel_scu_ipc.h
|
||||
F: drivers/platform/x86/intel_scu_*
|
||||
|
||||
INTEL SKYLAKE INT3472 ACPI DEVICE DRIVER
|
||||
M: Daniel Scally <djrscally@gmail.com>
|
||||
S: Maintained
|
||||
F: drivers/platform/x86/intel/int3472/
|
||||
|
||||
INTEL SPEED SELECT TECHNOLOGY
|
||||
M: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
||||
L: platform-driver-x86@vger.kernel.org
|
||||
@@ -9424,7 +9452,7 @@ F: include/linux/firmware/intel/stratix10-smc.h
|
||||
F: include/linux/firmware/intel/stratix10-svc-client.h
|
||||
|
||||
INTEL TELEMETRY DRIVER
|
||||
M: Rajneesh Bhardwaj <rajneesh.bhardwaj@linux.intel.com>
|
||||
M: Rajneesh Bhardwaj <irenic.rajneesh@gmail.com>
|
||||
M: "David E. Box" <david.e.box@linux.intel.com>
|
||||
L: platform-driver-x86@vger.kernel.org
|
||||
S: Maintained
|
||||
@@ -9469,6 +9497,13 @@ L: Dell.Client.Kernel@dell.com
|
||||
S: Maintained
|
||||
F: drivers/platform/x86/intel-wmi-thunderbolt.c
|
||||
|
||||
INTEL WWAN IOSM DRIVER
|
||||
M: M Chetan Kumar <m.chetan.kumar@intel.com>
|
||||
M: Intel Corporation <linuxwwan@intel.com>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/net/wwan/iosm/
|
||||
|
||||
INTEL(R) TRACE HUB
|
||||
M: Alexander Shishkin <alexander.shishkin@linux.intel.com>
|
||||
S: Supported
|
||||
@@ -10879,6 +10914,7 @@ S: Maintained
|
||||
F: drivers/mailbox/
|
||||
F: include/linux/mailbox_client.h
|
||||
F: include/linux/mailbox_controller.h
|
||||
F: include/dt-bindings/mailbox/
|
||||
F: Documentation/devicetree/bindings/mailbox/
|
||||
|
||||
MAILBOX ARM MHUv2
|
||||
@@ -12215,7 +12251,7 @@ M: Maximilian Luz <luzmaximilian@gmail.com>
|
||||
L: platform-driver-x86@vger.kernel.org
|
||||
S: Maintained
|
||||
W: https://github.com/linux-surface/surface-aggregator-module
|
||||
C: irc://chat.freenode.net/##linux-surface
|
||||
C: irc://irc.libera.chat/linux-surface
|
||||
F: Documentation/driver-api/surface_aggregator/
|
||||
F: drivers/platform/surface/aggregator/
|
||||
F: drivers/platform/surface/surface_acpi_notify.c
|
||||
@@ -12411,6 +12447,12 @@ F: Documentation/userspace-api/media/drivers/meye*
|
||||
F: drivers/media/pci/meye/
|
||||
F: include/uapi/linux/meye.h
|
||||
|
||||
MOTORCOMM PHY DRIVER
|
||||
M: Peter Geis <pgwipeout@gmail.com>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/net/phy/motorcomm.c
|
||||
|
||||
MOXA SMARTIO/INDUSTIO/INTELLIO SERIAL CARD
|
||||
S: Orphan
|
||||
F: Documentation/driver-api/serial/moxa-smartio.rst
|
||||
@@ -12682,6 +12724,7 @@ W: http://www.netfilter.org/
|
||||
W: http://www.iptables.org/
|
||||
W: http://www.nftables.org/
|
||||
Q: http://patchwork.ozlabs.org/project/netfilter-devel/list/
|
||||
C: irc://irc.libera.chat/netfilter
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git
|
||||
F: include/linux/netfilter*
|
||||
@@ -13225,6 +13268,7 @@ M: Vladimir Oltean <olteanv@gmail.com>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/net/dsa/sja1105
|
||||
F: drivers/net/pcs/pcs-xpcs-nxp.c
|
||||
|
||||
NXP TDA998X DRM DRIVER
|
||||
M: Russell King <linux@armlinux.org.uk>
|
||||
@@ -15612,6 +15656,13 @@ F: include/linux/rpmsg/
|
||||
F: include/uapi/linux/rpmsg.h
|
||||
F: samples/rpmsg/
|
||||
|
||||
REMOTE PROCESSOR MESSAGING (RPMSG) WWAN CONTROL DRIVER
|
||||
M: Stephan Gerhold <stephan@gerhold.net>
|
||||
L: netdev@vger.kernel.org
|
||||
L: linux-remoteproc@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/net/wwan/rpmsg_wwan_ctrl.c
|
||||
|
||||
RENESAS CLOCK DRIVERS
|
||||
M: Geert Uytterhoeven <geert+renesas@glider.be>
|
||||
L: linux-renesas-soc@vger.kernel.org
|
||||
@@ -15741,6 +15792,14 @@ F: arch/riscv/
|
||||
N: riscv
|
||||
K: riscv
|
||||
|
||||
RISC-V/MICROCHIP POLARFIRE SOC SUPPORT
|
||||
M: Lewis Hanly <lewis.hanly@microchip.com>
|
||||
L: linux-riscv@lists.infradead.org
|
||||
S: Supported
|
||||
F: drivers/mailbox/mailbox-mpfs.c
|
||||
F: drivers/soc/microchip/
|
||||
F: include/soc/microchip/mpfs.h
|
||||
|
||||
RNBD BLOCK DRIVERS
|
||||
M: Md. Haris Iqbal <haris.iqbal@ionos.com>
|
||||
M: Jack Wang <jinpu.wang@ionos.com>
|
||||
@@ -17717,6 +17776,7 @@ M: Jose Abreu <Jose.Abreu@synopsys.com>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/net/pcs/pcs-xpcs.c
|
||||
F: drivers/net/pcs/pcs-xpcs.h
|
||||
F: include/linux/pcs/pcs-xpcs.h
|
||||
|
||||
SYNOPSYS DESIGNWARE I2C DRIVER
|
||||
@@ -18219,6 +18279,13 @@ W: http://thinkwiki.org/wiki/Ibm-acpi
|
||||
T: git git://repo.or.cz/linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git
|
||||
F: drivers/platform/x86/thinkpad_acpi.c
|
||||
|
||||
THINKPAD LMI DRIVER
|
||||
M: Mark Pearson <markpearson@lenovo.com>
|
||||
L: platform-driver-x86@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/ABI/testing/sysfs-class-firmware-attributes
|
||||
F: drivers/platform/x86/think-lmi.?
|
||||
|
||||
THUNDERBOLT DMA TRAFFIC TEST DRIVER
|
||||
M: Isaac Hazan <isaac.hazan@intel.com>
|
||||
L: linux-usb@vger.kernel.org
|
||||
@@ -19642,6 +19709,7 @@ S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk.git
|
||||
F: Documentation/core-api/printk-formats.rst
|
||||
F: lib/test_printf.c
|
||||
F: lib/test_scanf.c
|
||||
F: lib/vsprintf.c
|
||||
|
||||
VT1211 HARDWARE MONITOR DRIVER
|
||||
@@ -19825,6 +19893,16 @@ F: Documentation/core-api/workqueue.rst
|
||||
F: include/linux/workqueue.h
|
||||
F: kernel/workqueue.c
|
||||
|
||||
WWAN DRIVERS
|
||||
M: Loic Poulain <loic.poulain@linaro.org>
|
||||
M: Sergey Ryazanov <ryazanov.s.a@gmail.com>
|
||||
R: Johannes Berg <johannes@sipsolutions.net>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/net/wwan/
|
||||
F: include/linux/wwan.h
|
||||
F: include/uapi/linux/wwan.h
|
||||
|
||||
X-POWERS AXP288 PMIC DRIVERS
|
||||
M: Hans de Goede <hdegoede@redhat.com>
|
||||
S: Maintained
|
||||
|
||||
@@ -285,6 +285,13 @@ config ARCH_THREAD_STACK_ALLOCATOR
|
||||
config ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
||||
bool
|
||||
|
||||
config ARCH_WANTS_NO_INSTR
|
||||
bool
|
||||
help
|
||||
An architecture should select this if the noinstr macro is being used on
|
||||
functions to denote that the toolchain should avoid instrumenting such
|
||||
functions and is required for correctness.
|
||||
|
||||
config ARCH_32BIT_OFF_T
|
||||
bool
|
||||
depends on !64BIT
|
||||
|
||||
@@ -549,29 +549,12 @@ config NR_CPUS
|
||||
MARVEL support can handle a maximum of 32 CPUs, all the others
|
||||
with working support have a maximum of 4 CPUs.
|
||||
|
||||
config ARCH_DISCONTIGMEM_ENABLE
|
||||
bool "Discontiguous Memory Support"
|
||||
depends on BROKEN
|
||||
help
|
||||
Say Y to support efficient handling of discontiguous physical memory,
|
||||
for architectures which are either NUMA (Non-Uniform Memory Access)
|
||||
or have huge holes in the physical address space for other reasons.
|
||||
See <file:Documentation/vm/numa.rst> for more.
|
||||
|
||||
config ARCH_SPARSEMEM_ENABLE
|
||||
bool "Sparse Memory Support"
|
||||
help
|
||||
Say Y to support efficient handling of discontiguous physical memory,
|
||||
for systems that have huge holes in the physical address space.
|
||||
|
||||
config NUMA
|
||||
bool "NUMA Support (EXPERIMENTAL)"
|
||||
depends on DISCONTIGMEM && BROKEN
|
||||
help
|
||||
Say Y to compile the kernel to support NUMA (Non-Uniform Memory
|
||||
Access). This option is for configuring high-end multiprocessor
|
||||
server machines. If in doubt, say N.
|
||||
|
||||
config ALPHA_WTINT
|
||||
bool "Use WTINT" if ALPHA_SRM || ALPHA_GENERIC
|
||||
default y if ALPHA_QEMU
|
||||
@@ -596,11 +579,6 @@ config ALPHA_WTINT
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config NODES_SHIFT
|
||||
int
|
||||
default "7"
|
||||
depends on NEED_MULTIPLE_NODES
|
||||
|
||||
# LARGE_VMALLOC is racy, if you *really* need it then fix it first
|
||||
config ALPHA_LARGE_VMALLOC
|
||||
bool
|
||||
|
||||
@@ -99,12 +99,6 @@ struct alpha_machine_vector
|
||||
|
||||
const char *vector_name;
|
||||
|
||||
/* NUMA information */
|
||||
int (*pa_to_nid)(unsigned long);
|
||||
int (*cpuid_to_nid)(int);
|
||||
unsigned long (*node_mem_start)(int);
|
||||
unsigned long (*node_mem_size)(int);
|
||||
|
||||
/* System specific parameters. */
|
||||
union {
|
||||
struct {
|
||||
|
||||
@@ -1,100 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Written by Kanoj Sarcar (kanoj@sgi.com) Aug 99
|
||||
* Adapted for the alpha wildfire architecture Jan 2001.
|
||||
*/
|
||||
#ifndef _ASM_MMZONE_H_
|
||||
#define _ASM_MMZONE_H_
|
||||
|
||||
#ifdef CONFIG_DISCONTIGMEM
|
||||
|
||||
#include <asm/smp.h>
|
||||
|
||||
/*
|
||||
* Following are macros that are specific to this numa platform.
|
||||
*/
|
||||
|
||||
extern pg_data_t node_data[];
|
||||
|
||||
#define alpha_pa_to_nid(pa) \
|
||||
(alpha_mv.pa_to_nid \
|
||||
? alpha_mv.pa_to_nid(pa) \
|
||||
: (0))
|
||||
#define node_mem_start(nid) \
|
||||
(alpha_mv.node_mem_start \
|
||||
? alpha_mv.node_mem_start(nid) \
|
||||
: (0UL))
|
||||
#define node_mem_size(nid) \
|
||||
(alpha_mv.node_mem_size \
|
||||
? alpha_mv.node_mem_size(nid) \
|
||||
: ((nid) ? (0UL) : (~0UL)))
|
||||
|
||||
#define pa_to_nid(pa) alpha_pa_to_nid(pa)
|
||||
#define NODE_DATA(nid) (&node_data[(nid)])
|
||||
|
||||
#define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn)
|
||||
|
||||
#if 1
|
||||
#define PLAT_NODE_DATA_LOCALNR(p, n) \
|
||||
(((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn)
|
||||
#else
|
||||
static inline unsigned long
|
||||
PLAT_NODE_DATA_LOCALNR(unsigned long p, int n)
|
||||
{
|
||||
unsigned long temp;
|
||||
temp = p >> PAGE_SHIFT;
|
||||
return temp - PLAT_NODE_DATA(n)->gendata.node_start_pfn;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Following are macros that each numa implementation must define.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Given a kernel address, find the home node of the underlying memory.
|
||||
*/
|
||||
#define kvaddr_to_nid(kaddr) pa_to_nid(__pa(kaddr))
|
||||
|
||||
/*
|
||||
* Given a kaddr, LOCAL_BASE_ADDR finds the owning node of the memory
|
||||
* and returns the kaddr corresponding to first physical page in the
|
||||
* node's mem_map.
|
||||
*/
|
||||
#define LOCAL_BASE_ADDR(kaddr) \
|
||||
((unsigned long)__va(NODE_DATA(kvaddr_to_nid(kaddr))->node_start_pfn \
|
||||
<< PAGE_SHIFT))
|
||||
|
||||
/* XXX: FIXME -- nyc */
|
||||
#define kern_addr_valid(kaddr) (0)
|
||||
|
||||
#define mk_pte(page, pgprot) \
|
||||
({ \
|
||||
pte_t pte; \
|
||||
unsigned long pfn; \
|
||||
\
|
||||
pfn = page_to_pfn(page) << 32; \
|
||||
pte_val(pte) = pfn | pgprot_val(pgprot); \
|
||||
\
|
||||
pte; \
|
||||
})
|
||||
|
||||
#define pte_page(x) \
|
||||
({ \
|
||||
unsigned long kvirt; \
|
||||
struct page * __xx; \
|
||||
\
|
||||
kvirt = (unsigned long)__va(pte_val(x) >> (32-PAGE_SHIFT)); \
|
||||
__xx = virt_to_page(kvirt); \
|
||||
\
|
||||
__xx; \
|
||||
})
|
||||
|
||||
#define pfn_to_nid(pfn) pa_to_nid(((u64)(pfn) << PAGE_SHIFT))
|
||||
#define pfn_valid(pfn) \
|
||||
(((pfn) - node_start_pfn(pfn_to_nid(pfn))) < \
|
||||
node_spanned_pages(pfn_to_nid(pfn))) \
|
||||
|
||||
#endif /* CONFIG_DISCONTIGMEM */
|
||||
|
||||
#endif /* _ASM_MMZONE_H_ */
|
||||
@@ -206,7 +206,6 @@ extern unsigned long __zero_page(void);
|
||||
#define page_to_pa(page) (page_to_pfn(page) << PAGE_SHIFT)
|
||||
#define pte_pfn(pte) (pte_val(pte) >> 32)
|
||||
|
||||
#ifndef CONFIG_DISCONTIGMEM
|
||||
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
|
||||
#define mk_pte(page, pgprot) \
|
||||
({ \
|
||||
@@ -215,7 +214,6 @@ extern unsigned long __zero_page(void);
|
||||
pte_val(pte) = (page_to_pfn(page) << 32) | pgprot_val(pgprot); \
|
||||
pte; \
|
||||
})
|
||||
#endif
|
||||
|
||||
extern inline pte_t pfn_pte(unsigned long physpfn, pgprot_t pgprot)
|
||||
{ pte_t pte; pte_val(pte) = (PHYS_TWIDDLE(physpfn) << 32) | pgprot_val(pgprot); return pte; }
|
||||
@@ -330,9 +328,7 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
|
||||
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
||||
|
||||
#ifndef CONFIG_DISCONTIGMEM
|
||||
#define kern_addr_valid(addr) (1)
|
||||
#endif
|
||||
|
||||
#define pte_ERROR(e) \
|
||||
printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
|
||||
|
||||
@@ -7,45 +7,6 @@
|
||||
#include <linux/numa.h>
|
||||
#include <asm/machvec.h>
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
static inline int cpu_to_node(int cpu)
|
||||
{
|
||||
int node;
|
||||
|
||||
if (!alpha_mv.cpuid_to_nid)
|
||||
return 0;
|
||||
|
||||
node = alpha_mv.cpuid_to_nid(cpu);
|
||||
|
||||
#ifdef DEBUG_NUMA
|
||||
BUG_ON(node < 0);
|
||||
#endif
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
extern struct cpumask node_to_cpumask_map[];
|
||||
/* FIXME: This is dumb, recalculating every time. But simple. */
|
||||
static const struct cpumask *cpumask_of_node(int node)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (node == NUMA_NO_NODE)
|
||||
return cpu_all_mask;
|
||||
|
||||
cpumask_clear(&node_to_cpumask_map[node]);
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu_to_node(cpu) == node)
|
||||
cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
|
||||
}
|
||||
|
||||
return &node_to_cpumask_map[node];
|
||||
}
|
||||
|
||||
#define cpumask_of_pcibus(bus) (cpu_online_mask)
|
||||
|
||||
#endif /* !CONFIG_NUMA */
|
||||
# include <asm-generic/topology.h>
|
||||
|
||||
#endif /* _ASM_ALPHA_TOPOLOGY_H */
|
||||
|
||||
@@ -127,6 +127,8 @@
|
||||
#define SO_PREFER_BUSY_POLL 69
|
||||
#define SO_BUSY_POLL_BUDGET 70
|
||||
|
||||
#define SO_NETNS_COOKIE 71
|
||||
|
||||
#if !defined(__KERNEL__)
|
||||
|
||||
#if __BITS_PER_LONG == 64
|
||||
|
||||
@@ -287,8 +287,7 @@ io7_init_hose(struct io7 *io7, int port)
|
||||
/*
|
||||
* Set up window 0 for scatter-gather 8MB at 8MB.
|
||||
*/
|
||||
hose->sg_isa = iommu_arena_new_node(marvel_cpuid_to_nid(io7->pe),
|
||||
hose, 0x00800000, 0x00800000, 0);
|
||||
hose->sg_isa = iommu_arena_new_node(0, hose, 0x00800000, 0x00800000, 0);
|
||||
hose->sg_isa->align_entry = 8; /* cache line boundary */
|
||||
csrs->POx_WBASE[0].csr =
|
||||
hose->sg_isa->dma_base | wbase_m_ena | wbase_m_sg;
|
||||
@@ -305,8 +304,7 @@ io7_init_hose(struct io7 *io7, int port)
|
||||
/*
|
||||
* Set up window 2 for scatter-gather (up-to) 1GB at 3GB.
|
||||
*/
|
||||
hose->sg_pci = iommu_arena_new_node(marvel_cpuid_to_nid(io7->pe),
|
||||
hose, 0xc0000000, 0x40000000, 0);
|
||||
hose->sg_pci = iommu_arena_new_node(0, hose, 0xc0000000, 0x40000000, 0);
|
||||
hose->sg_pci->align_entry = 8; /* cache line boundary */
|
||||
csrs->POx_WBASE[2].csr =
|
||||
hose->sg_pci->dma_base | wbase_m_ena | wbase_m_sg;
|
||||
@@ -843,52 +841,7 @@ EXPORT_SYMBOL(marvel_ioportmap);
|
||||
EXPORT_SYMBOL(marvel_ioread8);
|
||||
EXPORT_SYMBOL(marvel_iowrite8);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* NUMA Support
|
||||
*/
|
||||
/**********
|
||||
* FIXME - for now each cpu is a node by itself
|
||||
* -- no real support for striped mode
|
||||
**********
|
||||
*/
|
||||
int
|
||||
marvel_pa_to_nid(unsigned long pa)
|
||||
{
|
||||
int cpuid;
|
||||
|
||||
if ((pa >> 43) & 1) /* I/O */
|
||||
cpuid = (~(pa >> 35) & 0xff);
|
||||
else /* mem */
|
||||
cpuid = ((pa >> 34) & 0x3) | ((pa >> (37 - 2)) & (0x1f << 2));
|
||||
|
||||
return marvel_cpuid_to_nid(cpuid);
|
||||
}
|
||||
|
||||
int
|
||||
marvel_cpuid_to_nid(int cpuid)
|
||||
{
|
||||
return cpuid;
|
||||
}
|
||||
|
||||
unsigned long
|
||||
marvel_node_mem_start(int nid)
|
||||
{
|
||||
unsigned long pa;
|
||||
|
||||
pa = (nid & 0x3) | ((nid & (0x1f << 2)) << 1);
|
||||
pa <<= 34;
|
||||
|
||||
return pa;
|
||||
}
|
||||
|
||||
unsigned long
|
||||
marvel_node_mem_size(int nid)
|
||||
{
|
||||
return 16UL * 1024 * 1024 * 1024; /* 16GB */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* AGP GART Support.
|
||||
*/
|
||||
|
||||
@@ -440,33 +440,6 @@ struct pci_ops wildfire_pci_ops =
|
||||
.write = wildfire_write_config,
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* NUMA Support
|
||||
*/
|
||||
int wildfire_pa_to_nid(unsigned long pa)
|
||||
{
|
||||
return pa >> 36;
|
||||
}
|
||||
|
||||
int wildfire_cpuid_to_nid(int cpuid)
|
||||
{
|
||||
/* assume 4 CPUs per node */
|
||||
return cpuid >> 2;
|
||||
}
|
||||
|
||||
unsigned long wildfire_node_mem_start(int nid)
|
||||
{
|
||||
/* 64GB per node */
|
||||
return (unsigned long)nid * (64UL * 1024 * 1024 * 1024);
|
||||
}
|
||||
|
||||
unsigned long wildfire_node_mem_size(int nid)
|
||||
{
|
||||
/* 64GB per node */
|
||||
return 64UL * 1024 * 1024 * 1024;
|
||||
}
|
||||
|
||||
#if DEBUG_DUMP_REGS
|
||||
|
||||
static void __init
|
||||
|
||||
@@ -71,33 +71,6 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
|
||||
if (align < mem_size)
|
||||
align = mem_size;
|
||||
|
||||
|
||||
#ifdef CONFIG_DISCONTIGMEM
|
||||
|
||||
arena = memblock_alloc_node(sizeof(*arena), align, nid);
|
||||
if (!NODE_DATA(nid) || !arena) {
|
||||
printk("%s: couldn't allocate arena from node %d\n"
|
||||
" falling back to system-wide allocation\n",
|
||||
__func__, nid);
|
||||
arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
|
||||
if (!arena)
|
||||
panic("%s: Failed to allocate %zu bytes\n", __func__,
|
||||
sizeof(*arena));
|
||||
}
|
||||
|
||||
arena->ptes = memblock_alloc_node(sizeof(*arena), align, nid);
|
||||
if (!NODE_DATA(nid) || !arena->ptes) {
|
||||
printk("%s: couldn't allocate arena ptes from node %d\n"
|
||||
" falling back to system-wide allocation\n",
|
||||
__func__, nid);
|
||||
arena->ptes = memblock_alloc(mem_size, align);
|
||||
if (!arena->ptes)
|
||||
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
|
||||
__func__, mem_size, align);
|
||||
}
|
||||
|
||||
#else /* CONFIG_DISCONTIGMEM */
|
||||
|
||||
arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
|
||||
if (!arena)
|
||||
panic("%s: Failed to allocate %zu bytes\n", __func__,
|
||||
@@ -107,8 +80,6 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
|
||||
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
|
||||
__func__, mem_size, align);
|
||||
|
||||
#endif /* CONFIG_DISCONTIGMEM */
|
||||
|
||||
spin_lock_init(&arena->lock);
|
||||
arena->hose = hose;
|
||||
arena->dma_base = base;
|
||||
|
||||
@@ -49,10 +49,6 @@ extern void marvel_init_arch(void);
|
||||
extern void marvel_kill_arch(int);
|
||||
extern void marvel_machine_check(unsigned long, unsigned long);
|
||||
extern void marvel_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
|
||||
extern int marvel_pa_to_nid(unsigned long);
|
||||
extern int marvel_cpuid_to_nid(int);
|
||||
extern unsigned long marvel_node_mem_start(int);
|
||||
extern unsigned long marvel_node_mem_size(int);
|
||||
extern struct _alpha_agp_info *marvel_agp_info(void);
|
||||
struct io7 *marvel_find_io7(int pe);
|
||||
struct io7 *marvel_next_io7(struct io7 *prev);
|
||||
@@ -101,10 +97,6 @@ extern void wildfire_init_arch(void);
|
||||
extern void wildfire_kill_arch(int);
|
||||
extern void wildfire_machine_check(unsigned long vector, unsigned long la_ptr);
|
||||
extern void wildfire_pci_tbi(struct pci_controller *, dma_addr_t, dma_addr_t);
|
||||
extern int wildfire_pa_to_nid(unsigned long);
|
||||
extern int wildfire_cpuid_to_nid(int);
|
||||
extern unsigned long wildfire_node_mem_start(int);
|
||||
extern unsigned long wildfire_node_mem_size(int);
|
||||
|
||||
/* console.c */
|
||||
#ifdef CONFIG_VGA_HOSE
|
||||
|
||||
@@ -79,11 +79,6 @@ int alpha_l3_cacheshape;
|
||||
unsigned long alpha_verbose_mcheck = CONFIG_VERBOSE_MCHECK_ON;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
struct cpumask node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
|
||||
EXPORT_SYMBOL(node_to_cpumask_map);
|
||||
#endif
|
||||
|
||||
/* Which processor we booted from. */
|
||||
int boot_cpuid;
|
||||
|
||||
@@ -305,7 +300,6 @@ move_initrd(unsigned long mem_limit)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_DISCONTIGMEM
|
||||
static void __init
|
||||
setup_memory(void *kernel_end)
|
||||
{
|
||||
@@ -389,9 +383,6 @@ setup_memory(void *kernel_end)
|
||||
}
|
||||
#endif /* CONFIG_BLK_DEV_INITRD */
|
||||
}
|
||||
#else
|
||||
extern void setup_memory(void *);
|
||||
#endif /* !CONFIG_DISCONTIGMEM */
|
||||
|
||||
int __init
|
||||
page_is_ram(unsigned long pfn)
|
||||
@@ -618,13 +609,6 @@ setup_arch(char **cmdline_p)
|
||||
"VERBOSE_MCHECK "
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DISCONTIGMEM
|
||||
"DISCONTIGMEM "
|
||||
#ifdef CONFIG_NUMA
|
||||
"NUMA "
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_SPINLOCK
|
||||
"DEBUG_SPINLOCK "
|
||||
#endif
|
||||
|
||||
@@ -461,10 +461,5 @@ struct alpha_machine_vector marvel_ev7_mv __initmv = {
|
||||
.kill_arch = marvel_kill_arch,
|
||||
.pci_map_irq = marvel_map_irq,
|
||||
.pci_swizzle = common_swizzle,
|
||||
|
||||
.pa_to_nid = marvel_pa_to_nid,
|
||||
.cpuid_to_nid = marvel_cpuid_to_nid,
|
||||
.node_mem_start = marvel_node_mem_start,
|
||||
.node_mem_size = marvel_node_mem_size,
|
||||
};
|
||||
ALIAS_MV(marvel_ev7)
|
||||
|
||||
@@ -337,10 +337,5 @@ struct alpha_machine_vector wildfire_mv __initmv = {
|
||||
.kill_arch = wildfire_kill_arch,
|
||||
.pci_map_irq = wildfire_map_irq,
|
||||
.pci_swizzle = common_swizzle,
|
||||
|
||||
.pa_to_nid = wildfire_pa_to_nid,
|
||||
.cpuid_to_nid = wildfire_cpuid_to_nid,
|
||||
.node_mem_start = wildfire_node_mem_start,
|
||||
.node_mem_size = wildfire_node_mem_size,
|
||||
};
|
||||
ALIAS_MV(wildfire)
|
||||
|
||||
@@ -482,7 +482,7 @@
|
||||
550 common process_madvise sys_process_madvise
|
||||
551 common epoll_pwait2 sys_epoll_pwait2
|
||||
552 common mount_setattr sys_mount_setattr
|
||||
# 553 reserved for quotactl_path
|
||||
553 common quotactl_fd sys_quotactl_fd
|
||||
554 common landlock_create_ruleset sys_landlock_create_ruleset
|
||||
555 common landlock_add_rule sys_landlock_add_rule
|
||||
556 common landlock_restrict_self sys_landlock_restrict_self
|
||||
|
||||
@@ -6,5 +6,3 @@
|
||||
ccflags-y := -Werror
|
||||
|
||||
obj-y := init.o fault.o
|
||||
|
||||
obj-$(CONFIG_DISCONTIGMEM) += numa.o
|
||||
|
||||
@@ -235,8 +235,6 @@ callback_init(void * kernel_end)
|
||||
return kernel_end;
|
||||
}
|
||||
|
||||
|
||||
#ifndef CONFIG_DISCONTIGMEM
|
||||
/*
|
||||
* paging_init() sets up the memory map.
|
||||
*/
|
||||
@@ -257,7 +255,6 @@ void __init paging_init(void)
|
||||
/* Initialize the kernel's ZERO_PGE. */
|
||||
memset((void *)ZERO_PGE, 0, PAGE_SIZE);
|
||||
}
|
||||
#endif /* CONFIG_DISCONTIGMEM */
|
||||
|
||||
#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM)
|
||||
void
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user