Merge 996fe06160 ("Merge tag 'kgdb-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/danielt/linux") into android-mainline
Steps on the way to 5.15-rc1 Change-Id: I3806b714a5a783a7132b1daf766ebb71985fc640 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
@@ -125,6 +125,71 @@ Note that kprobe and synthetic event definitions can be written under
|
||||
instance node, but those are also visible from other instances. So please
|
||||
take care for event name conflict.
|
||||
|
||||
Ftrace Histogram Options
|
||||
------------------------
|
||||
|
||||
Since it is too long to write a histogram action as a string for per-event
|
||||
action option, there are tree-style options under per-event 'hist' subkey
|
||||
for the histogram actions. For the detail of the each parameter,
|
||||
please read the event histogram document [3]_.
|
||||
|
||||
.. [3] See :ref:`Documentation/trace/histogram.rst <histogram>`
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]keys = KEY1[, KEY2[...]]
|
||||
Set histogram key parameters. (Mandatory)
|
||||
The 'N' is a digit string for the multiple histogram. You can omit it
|
||||
if there is one histogram on the event.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]values = VAL1[, VAL2[...]]
|
||||
Set histogram value parameters.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]sort = SORT1[, SORT2[...]]
|
||||
Set histogram sort parameter options.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]size = NR_ENTRIES
|
||||
Set histogram size (number of entries).
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]name = NAME
|
||||
Set histogram name.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]var.VARIABLE = EXPR
|
||||
Define a new VARIABLE by EXPR expression.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<pause|continue|clear>
|
||||
Set histogram control parameter. You can set one of them.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmatch.[M.]event = GROUP.EVENT
|
||||
Set histogram 'onmatch' handler matching event parameter.
|
||||
The 'M' is a digit string for the multiple 'onmatch' handler. You can omit it
|
||||
if there is one 'onmatch' handler on this histogram.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmatch.[M.]trace = EVENT[, ARG1[...]]
|
||||
Set histogram 'trace' action for 'onmatch'.
|
||||
EVENT must be a synthetic event name, and ARG1... are parameters
|
||||
for that event. Mandatory if 'onmatch.event' option is set.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmax.[M.]var = VAR
|
||||
Set histogram 'onmax' handler variable parameter.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onchange.[M.]var = VAR
|
||||
Set histogram 'onchange' handler variable parameter.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<onmax|onchange>.[M.]save = ARG1[, ARG2[...]]
|
||||
Set histogram 'save' action parameters for 'onmax' or 'onchange' handler.
|
||||
This option or below 'snapshot' option is mandatory if 'onmax.var' or
|
||||
'onchange.var' option is set.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<onmax|onchange>.[M.]snapshot
|
||||
Set histogram 'snapshot' action for 'onmax' or 'onchange' handler.
|
||||
This option or above 'save' option is mandatory if 'onmax.var' or
|
||||
'onchange.var' option is set.
|
||||
|
||||
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.filter = FILTER_EXPR
|
||||
Set histogram filter expression. You don't need 'if' in the FILTER_EXPR.
|
||||
|
||||
Note that this 'hist' option can conflict with the per-event 'actions'
|
||||
option if the 'actions' option has a histogram action.
|
||||
|
||||
|
||||
When to Start
|
||||
=============
|
||||
@@ -159,13 +224,23 @@ below::
|
||||
}
|
||||
synthetic.initcall_latency {
|
||||
fields = "unsigned long func", "u64 lat"
|
||||
actions = "hist:keys=func.sym,lat:vals=lat:sort=lat"
|
||||
hist {
|
||||
keys = func.sym, lat
|
||||
values = lat
|
||||
sort = lat
|
||||
}
|
||||
}
|
||||
initcall.initcall_start {
|
||||
actions = "hist:keys=func:ts0=common_timestamp.usecs"
|
||||
initcall.initcall_start.hist {
|
||||
keys = func
|
||||
var.ts0 = common_timestamp.usecs
|
||||
}
|
||||
initcall.initcall_finish {
|
||||
actions = "hist:keys=func:lat=common_timestamp.usecs-$ts0:onmatch(initcall.initcall_start).initcall_latency(func,$lat)"
|
||||
initcall.initcall_finish.hist {
|
||||
keys = func
|
||||
var.lat = common_timestamp.usecs - $ts0
|
||||
onmatch {
|
||||
event = initcall.initcall_start
|
||||
trace = initcall_latency, func, $lat
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -70,15 +70,16 @@ Documentation written by Tom Zanussi
|
||||
modified by appending any of the following modifiers to the field
|
||||
name:
|
||||
|
||||
=========== ==========================================
|
||||
.hex display a number as a hex value
|
||||
.sym display an address as a symbol
|
||||
.sym-offset display an address as a symbol and offset
|
||||
.syscall display a syscall id as a system call name
|
||||
.execname display a common_pid as a program name
|
||||
.log2 display log2 value rather than raw number
|
||||
.usecs display a common_timestamp in microseconds
|
||||
=========== ==========================================
|
||||
============= =================================================
|
||||
.hex display a number as a hex value
|
||||
.sym display an address as a symbol
|
||||
.sym-offset display an address as a symbol and offset
|
||||
.syscall display a syscall id as a system call name
|
||||
.execname display a common_pid as a program name
|
||||
.log2 display log2 value rather than raw number
|
||||
.buckets=size display grouping of values rather than raw number
|
||||
.usecs display a common_timestamp in microseconds
|
||||
============= =================================================
|
||||
|
||||
Note that in general the semantics of a given field aren't
|
||||
interpreted when applying a modifier to it, but there are some
|
||||
@@ -228,7 +229,7 @@ Extended error information
|
||||
that lists the total number of bytes requested for each function in
|
||||
the kernel that made one or more calls to kmalloc::
|
||||
|
||||
# echo 'hist:key=call_site:val=bytes_req' > \
|
||||
# echo 'hist:key=call_site:val=bytes_req.buckets=32' > \
|
||||
/sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
|
||||
|
||||
This tells the tracing system to create a 'hist' trigger using the
|
||||
@@ -1823,20 +1824,99 @@ and variables defined on other events (see Section 2.2.3 below on
|
||||
how that is done using hist trigger 'onmatch' action). Once that is
|
||||
done, the 'wakeup_latency' synthetic event instance is created.
|
||||
|
||||
A histogram can now be defined for the new synthetic event::
|
||||
|
||||
# echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
|
||||
/sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
|
||||
|
||||
The new event is created under the tracing/events/synthetic/ directory
|
||||
and looks and behaves just like any other event::
|
||||
|
||||
# ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency
|
||||
enable filter format hist id trigger
|
||||
|
||||
A histogram can now be defined for the new synthetic event::
|
||||
|
||||
# echo 'hist:keys=pid,prio,lat.log2:sort=lat' >> \
|
||||
/sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
|
||||
|
||||
The above shows the latency "lat" in a power of 2 grouping.
|
||||
|
||||
Like any other event, once a histogram is enabled for the event, the
|
||||
output can be displayed by reading the event's 'hist' file.
|
||||
|
||||
# cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist
|
||||
|
||||
# event histogram
|
||||
#
|
||||
# trigger info: hist:keys=pid,prio,lat.log2:vals=hitcount:sort=lat.log2:size=2048 [active]
|
||||
#
|
||||
|
||||
{ pid: 2035, prio: 9, lat: ~ 2^2 } hitcount: 43
|
||||
{ pid: 2034, prio: 9, lat: ~ 2^2 } hitcount: 60
|
||||
{ pid: 2029, prio: 9, lat: ~ 2^2 } hitcount: 965
|
||||
{ pid: 2034, prio: 120, lat: ~ 2^2 } hitcount: 9
|
||||
{ pid: 2033, prio: 120, lat: ~ 2^2 } hitcount: 5
|
||||
{ pid: 2030, prio: 9, lat: ~ 2^2 } hitcount: 335
|
||||
{ pid: 2030, prio: 120, lat: ~ 2^2 } hitcount: 10
|
||||
{ pid: 2032, prio: 120, lat: ~ 2^2 } hitcount: 1
|
||||
{ pid: 2035, prio: 120, lat: ~ 2^2 } hitcount: 2
|
||||
{ pid: 2031, prio: 9, lat: ~ 2^2 } hitcount: 176
|
||||
{ pid: 2028, prio: 120, lat: ~ 2^2 } hitcount: 15
|
||||
{ pid: 2033, prio: 9, lat: ~ 2^2 } hitcount: 91
|
||||
{ pid: 2032, prio: 9, lat: ~ 2^2 } hitcount: 125
|
||||
{ pid: 2029, prio: 120, lat: ~ 2^2 } hitcount: 4
|
||||
{ pid: 2031, prio: 120, lat: ~ 2^2 } hitcount: 3
|
||||
{ pid: 2029, prio: 120, lat: ~ 2^3 } hitcount: 2
|
||||
{ pid: 2035, prio: 9, lat: ~ 2^3 } hitcount: 41
|
||||
{ pid: 2030, prio: 120, lat: ~ 2^3 } hitcount: 1
|
||||
{ pid: 2032, prio: 9, lat: ~ 2^3 } hitcount: 32
|
||||
{ pid: 2031, prio: 9, lat: ~ 2^3 } hitcount: 44
|
||||
{ pid: 2034, prio: 9, lat: ~ 2^3 } hitcount: 40
|
||||
{ pid: 2030, prio: 9, lat: ~ 2^3 } hitcount: 29
|
||||
{ pid: 2033, prio: 9, lat: ~ 2^3 } hitcount: 31
|
||||
{ pid: 2029, prio: 9, lat: ~ 2^3 } hitcount: 31
|
||||
{ pid: 2028, prio: 120, lat: ~ 2^3 } hitcount: 18
|
||||
{ pid: 2031, prio: 120, lat: ~ 2^3 } hitcount: 2
|
||||
{ pid: 2028, prio: 120, lat: ~ 2^4 } hitcount: 1
|
||||
{ pid: 2029, prio: 9, lat: ~ 2^4 } hitcount: 4
|
||||
{ pid: 2031, prio: 120, lat: ~ 2^7 } hitcount: 1
|
||||
{ pid: 2032, prio: 120, lat: ~ 2^7 } hitcount: 1
|
||||
|
||||
Totals:
|
||||
Hits: 2122
|
||||
Entries: 30
|
||||
Dropped: 0
|
||||
|
||||
|
||||
The latency values can also be grouped linearly by a given size with
|
||||
the ".buckets" modifier and specify a size (in this case groups of 10).
|
||||
|
||||
# echo 'hist:keys=pid,prio,lat.buckets=10:sort=lat' >> \
|
||||
/sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
|
||||
|
||||
# event histogram
|
||||
#
|
||||
# trigger info: hist:keys=pid,prio,lat.buckets=10:vals=hitcount:sort=lat.buckets=10:size=2048 [active]
|
||||
#
|
||||
|
||||
{ pid: 2067, prio: 9, lat: ~ 0-9 } hitcount: 220
|
||||
{ pid: 2068, prio: 9, lat: ~ 0-9 } hitcount: 157
|
||||
{ pid: 2070, prio: 9, lat: ~ 0-9 } hitcount: 100
|
||||
{ pid: 2067, prio: 120, lat: ~ 0-9 } hitcount: 6
|
||||
{ pid: 2065, prio: 120, lat: ~ 0-9 } hitcount: 2
|
||||
{ pid: 2066, prio: 120, lat: ~ 0-9 } hitcount: 2
|
||||
{ pid: 2069, prio: 9, lat: ~ 0-9 } hitcount: 122
|
||||
{ pid: 2069, prio: 120, lat: ~ 0-9 } hitcount: 8
|
||||
{ pid: 2070, prio: 120, lat: ~ 0-9 } hitcount: 1
|
||||
{ pid: 2068, prio: 120, lat: ~ 0-9 } hitcount: 7
|
||||
{ pid: 2066, prio: 9, lat: ~ 0-9 } hitcount: 365
|
||||
{ pid: 2064, prio: 120, lat: ~ 0-9 } hitcount: 35
|
||||
{ pid: 2065, prio: 9, lat: ~ 0-9 } hitcount: 998
|
||||
{ pid: 2071, prio: 9, lat: ~ 0-9 } hitcount: 85
|
||||
{ pid: 2065, prio: 9, lat: ~ 10-19 } hitcount: 2
|
||||
{ pid: 2064, prio: 120, lat: ~ 10-19 } hitcount: 2
|
||||
|
||||
Totals:
|
||||
Hits: 2112
|
||||
Entries: 16
|
||||
Dropped: 0
|
||||
|
||||
2.2.3 Hist trigger 'handlers' and 'actions'
|
||||
-------------------------------------------
|
||||
|
||||
|
||||
17
MAINTAINERS
17
MAINTAINERS
@@ -18986,6 +18986,20 @@ F: arch/x86/mm/testmmiotrace.c
|
||||
F: include/linux/mmiotrace.h
|
||||
F: kernel/trace/trace_mmiotrace.c
|
||||
|
||||
TRACING OS NOISE / LATENCY TRACERS
|
||||
M: Steven Rostedt <rostedt@goodmis.org>
|
||||
M: Daniel Bristot de Oliveira <bristot@kernel.org>
|
||||
S: Maintained
|
||||
F: kernel/trace/trace_osnoise.c
|
||||
F: include/trace/events/osnoise.h
|
||||
F: kernel/trace/trace_hwlat.c
|
||||
F: kernel/trace/trace_irqsoff.c
|
||||
F: kernel/trace/trace_sched_wakeup.c
|
||||
F: Documentation/trace/osnoise-tracer.rst
|
||||
F: Documentation/trace/timerlat-tracer.rst
|
||||
F: Documentation/trace/hwlat_detector.rst
|
||||
F: arch/*/kernel/trace.c
|
||||
|
||||
TRADITIONAL CHINESE DOCUMENTATION
|
||||
M: Hu Haowen <src.res@email.cn>
|
||||
L: linux-doc-tw-discuss@lists.sourceforge.net
|
||||
@@ -19166,9 +19180,8 @@ W: http://dotat.at/prog/unifdef
|
||||
F: scripts/unifdef.c
|
||||
|
||||
UNIFORM CDROM DRIVER
|
||||
M: Jens Axboe <axboe@kernel.dk>
|
||||
M: Phillip Potter <phil@philpotter.co.uk>
|
||||
S: Maintained
|
||||
W: http://www.kernel.dk
|
||||
F: Documentation/cdrom/
|
||||
F: drivers/cdrom/cdrom.c
|
||||
F: include/linux/cdrom.h
|
||||
|
||||
@@ -197,6 +197,9 @@ config HAVE_FUNCTION_ERROR_INJECTION
|
||||
config HAVE_NMI
|
||||
bool
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
bool
|
||||
|
||||
#
|
||||
# An arch should select this if it provides all these things:
|
||||
#
|
||||
|
||||
@@ -49,9 +49,7 @@ config ARC
|
||||
select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
|
||||
select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
|
||||
select SET_FS
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
|
||||
config LOCKDEP_SUPPORT
|
||||
def_bool y
|
||||
@@ -116,16 +114,9 @@ choice
|
||||
default ARC_CPU_770 if ISA_ARCOMPACT
|
||||
default ARC_CPU_HS if ISA_ARCV2
|
||||
|
||||
if ISA_ARCOMPACT
|
||||
|
||||
config ARC_CPU_750D
|
||||
bool "ARC750D"
|
||||
select ARC_CANT_LLSC
|
||||
help
|
||||
Support for ARC750 core
|
||||
|
||||
config ARC_CPU_770
|
||||
bool "ARC770"
|
||||
depends on ISA_ARCOMPACT
|
||||
select ARC_HAS_SWAPE
|
||||
help
|
||||
Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
|
||||
@@ -135,8 +126,6 @@ config ARC_CPU_770
|
||||
-Caches: New Prog Model, Region Flush
|
||||
-Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
|
||||
|
||||
endif #ISA_ARCOMPACT
|
||||
|
||||
config ARC_CPU_HS
|
||||
bool "ARC-HS"
|
||||
depends on ISA_ARCV2
|
||||
@@ -274,33 +263,17 @@ config ARC_DCCM_BASE
|
||||
|
||||
choice
|
||||
prompt "MMU Version"
|
||||
default ARC_MMU_V3 if ARC_CPU_770
|
||||
default ARC_MMU_V2 if ARC_CPU_750D
|
||||
default ARC_MMU_V4 if ARC_CPU_HS
|
||||
|
||||
if ISA_ARCOMPACT
|
||||
|
||||
config ARC_MMU_V1
|
||||
bool "MMU v1"
|
||||
help
|
||||
Orig ARC700 MMU
|
||||
|
||||
config ARC_MMU_V2
|
||||
bool "MMU v2"
|
||||
help
|
||||
Fixed the deficiency of v1 - possible thrashing in memcpy scenario
|
||||
when 2 D-TLB and 1 I-TLB entries index into same 2way set.
|
||||
default ARC_MMU_V3 if ISA_ARCOMPACT
|
||||
default ARC_MMU_V4 if ISA_ARCV2
|
||||
|
||||
config ARC_MMU_V3
|
||||
bool "MMU v3"
|
||||
depends on ARC_CPU_770
|
||||
depends on ISA_ARCOMPACT
|
||||
help
|
||||
Introduced with ARC700 4.10: New Features
|
||||
Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
|
||||
Shared Address Spaces (SASID)
|
||||
|
||||
endif
|
||||
|
||||
config ARC_MMU_V4
|
||||
bool "MMU v4"
|
||||
depends on ISA_ARCV2
|
||||
@@ -319,7 +292,6 @@ config ARC_PAGE_SIZE_8K
|
||||
|
||||
config ARC_PAGE_SIZE_16K
|
||||
bool "16KB"
|
||||
depends on ARC_MMU_V3 || ARC_MMU_V4
|
||||
|
||||
config ARC_PAGE_SIZE_4K
|
||||
bool "4KB"
|
||||
@@ -340,6 +312,10 @@ config ARC_HUGEPAGE_16M
|
||||
|
||||
endchoice
|
||||
|
||||
config PGTABLE_LEVELS
|
||||
int "Number of Page table levels"
|
||||
default 2
|
||||
|
||||
config ARC_COMPACT_IRQ_LEVELS
|
||||
depends on ISA_ARCOMPACT
|
||||
bool "Setup Timer IRQ as high Priority"
|
||||
@@ -563,9 +539,6 @@ config ARC_DW2_UNWIND
|
||||
If you don't debug the kernel, you can say N, but we may not be able
|
||||
to solve problems without frame unwind information
|
||||
|
||||
config ARC_DBG_TLB_PARANOIA
|
||||
bool "Paranoia Checks in Low Level TLB Handlers"
|
||||
|
||||
config ARC_DBG_JUMP_LABEL
|
||||
bool "Paranoid checks in Static Keys (jump labels) code"
|
||||
depends on JUMP_LABEL
|
||||
|
||||
97
arch/arc/include/asm/atomic-llsc.h
Normal file
97
arch/arc/include/asm/atomic-llsc.h
Normal file
@@ -0,0 +1,97 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
|
||||
#ifndef _ASM_ARC_ATOMIC_LLSC_H
|
||||
#define _ASM_ARC_ATOMIC_LLSC_H
|
||||
|
||||
#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
|
||||
|
||||
#define ATOMIC_OP(op, c_op, asm_op) \
|
||||
static inline void arch_atomic_##op(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned int val; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %[val], [%[ctr]] \n" \
|
||||
" " #asm_op " %[val], %[val], %[i] \n" \
|
||||
" scond %[val], [%[ctr]] \n" \
|
||||
" bnz 1b \n" \
|
||||
: [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
|
||||
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
|
||||
[i] "ir" (i) \
|
||||
: "cc"); \
|
||||
} \
|
||||
|
||||
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
|
||||
static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned int val; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %[val], [%[ctr]] \n" \
|
||||
" " #asm_op " %[val], %[val], %[i] \n" \
|
||||
" scond %[val], [%[ctr]] \n" \
|
||||
" bnz 1b \n" \
|
||||
: [val] "=&r" (val) \
|
||||
: [ctr] "r" (&v->counter), \
|
||||
[i] "ir" (i) \
|
||||
: "cc"); \
|
||||
\
|
||||
return val; \
|
||||
}
|
||||
|
||||
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
|
||||
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
|
||||
|
||||
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
|
||||
static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned int val, orig; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %[orig], [%[ctr]] \n" \
|
||||
" " #asm_op " %[val], %[orig], %[i] \n" \
|
||||
" scond %[val], [%[ctr]] \n" \
|
||||
" bnz 1b \n" \
|
||||
: [val] "=&r" (val), \
|
||||
[orig] "=&r" (orig) \
|
||||
: [ctr] "r" (&v->counter), \
|
||||
[i] "ir" (i) \
|
||||
: "cc"); \
|
||||
\
|
||||
return orig; \
|
||||
}
|
||||
|
||||
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
|
||||
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
|
||||
|
||||
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
|
||||
#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed
|
||||
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
|
||||
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
|
||||
|
||||
#define ATOMIC_OPS(op, c_op, asm_op) \
|
||||
ATOMIC_OP(op, c_op, asm_op) \
|
||||
ATOMIC_OP_RETURN(op, c_op, asm_op) \
|
||||
ATOMIC_FETCH_OP(op, c_op, asm_op)
|
||||
|
||||
ATOMIC_OPS(add, +=, add)
|
||||
ATOMIC_OPS(sub, -=, sub)
|
||||
|
||||
#undef ATOMIC_OPS
|
||||
#define ATOMIC_OPS(op, c_op, asm_op) \
|
||||
ATOMIC_OP(op, c_op, asm_op) \
|
||||
ATOMIC_FETCH_OP(op, c_op, asm_op)
|
||||
|
||||
ATOMIC_OPS(and, &=, and)
|
||||
ATOMIC_OPS(andnot, &= ~, bic)
|
||||
ATOMIC_OPS(or, |=, or)
|
||||
ATOMIC_OPS(xor, ^=, xor)
|
||||
|
||||
#define arch_atomic_andnot arch_atomic_andnot
|
||||
|
||||
#undef ATOMIC_OPS
|
||||
#undef ATOMIC_FETCH_OP
|
||||
#undef ATOMIC_OP_RETURN
|
||||
#undef ATOMIC_OP
|
||||
|
||||
#endif
|
||||
102
arch/arc/include/asm/atomic-spinlock.h
Normal file
102
arch/arc/include/asm/atomic-spinlock.h
Normal file
@@ -0,0 +1,102 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
|
||||
#ifndef _ASM_ARC_ATOMIC_SPLOCK_H
|
||||
#define _ASM_ARC_ATOMIC_SPLOCK_H
|
||||
|
||||
/*
|
||||
* Non hardware assisted Atomic-R-M-W
|
||||
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
|
||||
*/
|
||||
|
||||
static inline void arch_atomic_set(atomic_t *v, int i)
|
||||
{
|
||||
/*
|
||||
* Independent of hardware support, all of the atomic_xxx() APIs need
|
||||
* to follow the same locking rules to make sure that a "hardware"
|
||||
* atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
|
||||
* sequence
|
||||
*
|
||||
* Thus atomic_set() despite being 1 insn (and seemingly atomic)
|
||||
* requires the locking.
|
||||
*/
|
||||
unsigned long flags;
|
||||
|
||||
atomic_ops_lock(flags);
|
||||
WRITE_ONCE(v->counter, i);
|
||||
atomic_ops_unlock(flags);
|
||||
}
|
||||
|
||||
#define arch_atomic_set_release(v, i) arch_atomic_set((v), (i))
|
||||
|
||||
#define ATOMIC_OP(op, c_op, asm_op) \
|
||||
static inline void arch_atomic_##op(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned long flags; \
|
||||
\
|
||||
atomic_ops_lock(flags); \
|
||||
v->counter c_op i; \
|
||||
atomic_ops_unlock(flags); \
|
||||
}
|
||||
|
||||
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
|
||||
static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned long flags; \
|
||||
unsigned int temp; \
|
||||
\
|
||||
/* \
|
||||
* spin lock/unlock provides the needed smp_mb() before/after \
|
||||
*/ \
|
||||
atomic_ops_lock(flags); \
|
||||
temp = v->counter; \
|
||||
temp c_op i; \
|
||||
v->counter = temp; \
|
||||
atomic_ops_unlock(flags); \
|
||||
\
|
||||
return temp; \
|
||||
}
|
||||
|
||||
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
|
||||
static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned long flags; \
|
||||
unsigned int orig; \
|
||||
\
|
||||
/* \
|
||||
* spin lock/unlock provides the needed smp_mb() before/after \
|
||||
*/ \
|
||||
atomic_ops_lock(flags); \
|
||||
orig = v->counter; \
|
||||
v->counter c_op i; \
|
||||
atomic_ops_unlock(flags); \
|
||||
\
|
||||
return orig; \
|
||||
}
|
||||
|
||||
#define ATOMIC_OPS(op, c_op, asm_op) \
|
||||
ATOMIC_OP(op, c_op, asm_op) \
|
||||
ATOMIC_OP_RETURN(op, c_op, asm_op) \
|
||||
ATOMIC_FETCH_OP(op, c_op, asm_op)
|
||||
|
||||
ATOMIC_OPS(add, +=, add)
|
||||
ATOMIC_OPS(sub, -=, sub)
|
||||
|
||||
#undef ATOMIC_OPS
|
||||
#define ATOMIC_OPS(op, c_op, asm_op) \
|
||||
ATOMIC_OP(op, c_op, asm_op) \
|
||||
ATOMIC_FETCH_OP(op, c_op, asm_op)
|
||||
|
||||
ATOMIC_OPS(and, &=, and)
|
||||
ATOMIC_OPS(andnot, &= ~, bic)
|
||||
ATOMIC_OPS(or, |=, or)
|
||||
ATOMIC_OPS(xor, ^=, xor)
|
||||
|
||||
#define arch_atomic_andnot arch_atomic_andnot
|
||||
#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
|
||||
|
||||
#undef ATOMIC_OPS
|
||||
#undef ATOMIC_FETCH_OP
|
||||
#undef ATOMIC_OP_RETURN
|
||||
#undef ATOMIC_OP
|
||||
|
||||
#endif
|
||||
@@ -17,435 +17,43 @@
|
||||
#define arch_atomic_read(v) READ_ONCE((v)->counter)
|
||||
|
||||
#ifdef CONFIG_ARC_HAS_LLSC
|
||||
|
||||
#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
|
||||
|
||||
#define ATOMIC_OP(op, c_op, asm_op) \
|
||||
static inline void arch_atomic_##op(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned int val; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %[val], [%[ctr]] \n" \
|
||||
" " #asm_op " %[val], %[val], %[i] \n" \
|
||||
" scond %[val], [%[ctr]] \n" \
|
||||
" bnz 1b \n" \
|
||||
: [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
|
||||
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
|
||||
[i] "ir" (i) \
|
||||
: "cc"); \
|
||||
} \
|
||||
|
||||
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
|
||||
static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned int val; \
|
||||
\
|
||||
/* \
|
||||
* Explicit full memory barrier needed before/after as \
|
||||
* LLOCK/SCOND themselves don't provide any such semantics \
|
||||
*/ \
|
||||
smp_mb(); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %[val], [%[ctr]] \n" \
|
||||
" " #asm_op " %[val], %[val], %[i] \n" \
|
||||
" scond %[val], [%[ctr]] \n" \
|
||||
" bnz 1b \n" \
|
||||
: [val] "=&r" (val) \
|
||||
: [ctr] "r" (&v->counter), \
|
||||
[i] "ir" (i) \
|
||||
: "cc"); \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
return val; \
|
||||
}
|
||||
|
||||
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
|
||||
static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned int val, orig; \
|
||||
\
|
||||
/* \
|
||||
* Explicit full memory barrier needed before/after as \
|
||||
* LLOCK/SCOND themselves don't provide any such semantics \
|
||||
*/ \
|
||||
smp_mb(); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %[orig], [%[ctr]] \n" \
|
||||
" " #asm_op " %[val], %[orig], %[i] \n" \
|
||||
" scond %[val], [%[ctr]] \n" \
|
||||
" bnz 1b \n" \
|
||||
: [val] "=&r" (val), \
|
||||
[orig] "=&r" (orig) \
|
||||
: [ctr] "r" (&v->counter), \
|
||||
[i] "ir" (i) \
|
||||
: "cc"); \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
return orig; \
|
||||
}
|
||||
|
||||
#else /* !CONFIG_ARC_HAS_LLSC */
|
||||
|
||||
#ifndef CONFIG_SMP
|
||||
|
||||
/* violating atomic_xxx API locking protocol in UP for optimization sake */
|
||||
#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
|
||||
|
||||
#include <asm/atomic-llsc.h>
|
||||
#else
|
||||
#include <asm/atomic-spinlock.h>
|
||||
#endif
|
||||
|
||||
static inline void arch_atomic_set(atomic_t *v, int i)
|
||||
{
|
||||
/*
|
||||
* Independent of hardware support, all of the atomic_xxx() APIs need
|
||||
* to follow the same locking rules to make sure that a "hardware"
|
||||
* atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
|
||||
* sequence
|
||||
*
|
||||
* Thus atomic_set() despite being 1 insn (and seemingly atomic)
|
||||
* requires the locking.
|
||||
*/
|
||||
unsigned long flags;
|
||||
#define arch_atomic_cmpxchg(v, o, n) \
|
||||
({ \
|
||||
arch_cmpxchg(&((v)->counter), (o), (n)); \
|
||||
})
|
||||
|
||||
atomic_ops_lock(flags);
|
||||
WRITE_ONCE(v->counter, i);
|
||||
atomic_ops_unlock(flags);
|
||||
}
|
||||
#ifdef arch_cmpxchg_relaxed
|
||||
#define arch_atomic_cmpxchg_relaxed(v, o, n) \
|
||||
({ \
|
||||
arch_cmpxchg_relaxed(&((v)->counter), (o), (n)); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define arch_atomic_set_release(v, i) arch_atomic_set((v), (i))
|
||||
#define arch_atomic_xchg(v, n) \
|
||||
({ \
|
||||
arch_xchg(&((v)->counter), (n)); \
|
||||
})
|
||||
|
||||
#ifdef arch_xchg_relaxed
|
||||
#define arch_atomic_xchg_relaxed(v, n) \
|
||||
({ \
|
||||
arch_xchg_relaxed(&((v)->counter), (n)); \
|
||||
})
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Non hardware assisted Atomic-R-M-W
|
||||
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
|
||||
* 64-bit atomics
|
||||
*/
|
||||
|
||||
#define ATOMIC_OP(op, c_op, asm_op) \
|
||||
static inline void arch_atomic_##op(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned long flags; \
|
||||
\
|
||||
atomic_ops_lock(flags); \
|
||||
v->counter c_op i; \
|
||||
atomic_ops_unlock(flags); \
|
||||
}
|
||||
|
||||
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
|
||||
static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned long flags; \
|
||||
unsigned long temp; \
|
||||
\
|
||||
/* \
|
||||
* spin lock/unlock provides the needed smp_mb() before/after \
|
||||
*/ \
|
||||
atomic_ops_lock(flags); \
|
||||
temp = v->counter; \
|
||||
temp c_op i; \
|
||||
v->counter = temp; \
|
||||
atomic_ops_unlock(flags); \
|
||||
\
|
||||
return temp; \
|
||||
}
|
||||
|
||||
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
|
||||
static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned long flags; \
|
||||
unsigned long orig; \
|
||||
\
|
||||
/* \
|
||||
* spin lock/unlock provides the needed smp_mb() before/after \
|
||||
*/ \
|
||||
atomic_ops_lock(flags); \
|
||||
orig = v->counter; \
|
||||
v->counter c_op i; \
|
||||
atomic_ops_unlock(flags); \
|
||||
\
|
||||
return orig; \
|
||||
}
|
||||
|
||||
#endif /* !CONFIG_ARC_HAS_LLSC */
|
||||
|
||||
#define ATOMIC_OPS(op, c_op, asm_op) \
|
||||
ATOMIC_OP(op, c_op, asm_op) \
|
||||
ATOMIC_OP_RETURN(op, c_op, asm_op) \
|
||||
ATOMIC_FETCH_OP(op, c_op, asm_op)
|
||||
|
||||
ATOMIC_OPS(add, +=, add)
|
||||
ATOMIC_OPS(sub, -=, sub)
|
||||
|
||||
#undef ATOMIC_OPS
|
||||
#define ATOMIC_OPS(op, c_op, asm_op) \
|
||||
ATOMIC_OP(op, c_op, asm_op) \
|
||||
ATOMIC_FETCH_OP(op, c_op, asm_op)
|
||||
|
||||
ATOMIC_OPS(and, &=, and)
|
||||
ATOMIC_OPS(andnot, &= ~, bic)
|
||||
ATOMIC_OPS(or, |=, or)
|
||||
ATOMIC_OPS(xor, ^=, xor)
|
||||
|
||||
#define arch_atomic_andnot arch_atomic_andnot
|
||||
#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
|
||||
|
||||
#undef ATOMIC_OPS
|
||||
#undef ATOMIC_FETCH_OP
|
||||
#undef ATOMIC_OP_RETURN
|
||||
#undef ATOMIC_OP
|
||||
|
||||
#ifdef CONFIG_GENERIC_ATOMIC64
|
||||
|
||||
#include <asm-generic/atomic64.h>
|
||||
|
||||
#else /* Kconfig ensures this is only enabled with needed h/w assist */
|
||||
|
||||
/*
|
||||
* ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
|
||||
* - The address HAS to be 64-bit aligned
|
||||
* - There are 2 semantics involved here:
|
||||
* = exclusive implies no interim update between load/store to same addr
|
||||
* = both words are observed/updated together: this is guaranteed even
|
||||
* for regular 64-bit load (LDD) / store (STD). Thus atomic64_set()
|
||||
* is NOT required to use LLOCKD+SCONDD, STD suffices
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
s64 __aligned(8) counter;
|
||||
} atomic64_t;
|
||||
|
||||
#define ATOMIC64_INIT(a) { (a) }
|
||||
|
||||
static inline s64 arch_atomic64_read(const atomic64_t *v)
|
||||
{
|
||||
s64 val;
|
||||
|
||||
__asm__ __volatile__(
|
||||
" ldd %0, [%1] \n"
|
||||
: "=r"(val)
|
||||
: "r"(&v->counter));
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void arch_atomic64_set(atomic64_t *v, s64 a)
|
||||
{
|
||||
/*
|
||||
* This could have been a simple assignment in "C" but would need
|
||||
* explicit volatile. Otherwise gcc optimizers could elide the store
|
||||
* which borked atomic64 self-test
|
||||
* In the inline asm version, memory clobber needed for exact same
|
||||
* reason, to tell gcc about the store.
|
||||
*
|
||||
* This however is not needed for sibling atomic64_add() etc since both
|
||||
* load/store are explicitly done in inline asm. As long as API is used
|
||||
* for each access, gcc has no way to optimize away any load/store
|
||||
*/
|
||||
__asm__ __volatile__(
|
||||
" std %0, [%1] \n"
|
||||
:
|
||||
: "r"(a), "r"(&v->counter)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#define ATOMIC64_OP(op, op1, op2) \
|
||||
static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \
|
||||
{ \
|
||||
s64 val; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: \n" \
|
||||
" llockd %0, [%1] \n" \
|
||||
" " #op1 " %L0, %L0, %L2 \n" \
|
||||
" " #op2 " %H0, %H0, %H2 \n" \
|
||||
" scondd %0, [%1] \n" \
|
||||
" bnz 1b \n" \
|
||||
: "=&r"(val) \
|
||||
: "r"(&v->counter), "ir"(a) \
|
||||
: "cc"); \
|
||||
} \
|
||||
|
||||
#define ATOMIC64_OP_RETURN(op, op1, op2) \
|
||||
static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \
|
||||
{ \
|
||||
s64 val; \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: \n" \
|
||||
" llockd %0, [%1] \n" \
|
||||
" " #op1 " %L0, %L0, %L2 \n" \
|
||||
" " #op2 " %H0, %H0, %H2 \n" \
|
||||
" scondd %0, [%1] \n" \
|
||||
" bnz 1b \n" \
|
||||
: [val] "=&r"(val) \
|
||||
: "r"(&v->counter), "ir"(a) \
|
||||
: "cc"); /* memory clobber comes from smp_mb() */ \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
return val; \
|
||||
}
|
||||
|
||||
#define ATOMIC64_FETCH_OP(op, op1, op2) \
|
||||
static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \
|
||||
{ \
|
||||
s64 val, orig; \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: \n" \
|
||||
" llockd %0, [%2] \n" \
|
||||
" " #op1 " %L1, %L0, %L3 \n" \
|
||||
" " #op2 " %H1, %H0, %H3 \n" \
|
||||
" scondd %1, [%2] \n" \
|
||||
" bnz 1b \n" \
|
||||
: "=&r"(orig), "=&r"(val) \
|
||||
: "r"(&v->counter), "ir"(a) \
|
||||
: "cc"); /* memory clobber comes from smp_mb() */ \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
return orig; \
|
||||
}
|
||||
|
||||
#define ATOMIC64_OPS(op, op1, op2) \
|
||||
ATOMIC64_OP(op, op1, op2) \
|
||||
ATOMIC64_OP_RETURN(op, op1, op2) \
|
||||
ATOMIC64_FETCH_OP(op, op1, op2)
|
||||
|
||||
ATOMIC64_OPS(add, add.f, adc)
|
||||
ATOMIC64_OPS(sub, sub.f, sbc)
|
||||
ATOMIC64_OPS(and, and, and)
|
||||
ATOMIC64_OPS(andnot, bic, bic)
|
||||
ATOMIC64_OPS(or, or, or)
|
||||
ATOMIC64_OPS(xor, xor, xor)
|
||||
|
||||
#define arch_atomic64_andnot arch_atomic64_andnot
|
||||
#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot
|
||||
|
||||
#undef ATOMIC64_OPS
|
||||
#undef ATOMIC64_FETCH_OP
|
||||
#undef ATOMIC64_OP_RETURN
|
||||
#undef ATOMIC64_OP
|
||||
|
||||
static inline s64
|
||||
arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
|
||||
{
|
||||
s64 prev;
|
||||
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: llockd %0, [%1] \n"
|
||||
" brne %L0, %L2, 2f \n"
|
||||
" brne %H0, %H2, 2f \n"
|
||||
" scondd %3, [%1] \n"
|
||||
" bnz 1b \n"
|
||||
"2: \n"
|
||||
: "=&r"(prev)
|
||||
: "r"(ptr), "ir"(expected), "r"(new)
|
||||
: "cc"); /* memory clobber comes from smp_mb() */
|
||||
|
||||
smp_mb();
|
||||
|
||||
return prev;
|
||||
}
|
||||
|
||||
static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
|
||||
{
|
||||
s64 prev;
|
||||
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: llockd %0, [%1] \n"
|
||||
" scondd %2, [%1] \n"
|
||||
" bnz 1b \n"
|
||||
"2: \n"
|
||||
: "=&r"(prev)
|
||||
: "r"(ptr), "r"(new)
|
||||
: "cc"); /* memory clobber comes from smp_mb() */
|
||||
|
||||
smp_mb();
|
||||
|
||||
return prev;
|
||||
}
|
||||
|
||||
/**
|
||||
* arch_atomic64_dec_if_positive - decrement by 1 if old value positive
|
||||
* @v: pointer of type atomic64_t
|
||||
*
|
||||
* The function returns the old value of *v minus 1, even if
|
||||
* the atomic variable, v, was not decremented.
|
||||
*/
|
||||
|
||||
static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
|
||||
{
|
||||
s64 val;
|
||||
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: llockd %0, [%1] \n"
|
||||
" sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n"
|
||||
" sub.c %H0, %H0, 1 # if C set, w1 - 1\n"
|
||||
" brlt %H0, 0, 2f \n"
|
||||
" scondd %0, [%1] \n"
|
||||
" bnz 1b \n"
|
||||
"2: \n"
|
||||
: "=&r"(val)
|
||||
: "r"(&v->counter)
|
||||
: "cc"); /* memory clobber comes from smp_mb() */
|
||||
|
||||
smp_mb();
|
||||
|
||||
return val;
|
||||
}
|
||||
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
|
||||
|
||||
/**
|
||||
* arch_atomic64_fetch_add_unless - add unless the number is a given value
|
||||
* @v: pointer of type atomic64_t
|
||||
* @a: the amount to add to v...
|
||||
* @u: ...unless v is equal to u.
|
||||
*
|
||||
* Atomically adds @a to @v, if it was not @u.
|
||||
* Returns the old value of @v
|
||||
*/
|
||||
static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
|
||||
{
|
||||
s64 old, temp;
|
||||
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: llockd %0, [%2] \n"
|
||||
" brne %L0, %L4, 2f # continue to add since v != u \n"
|
||||
" breq.d %H0, %H4, 3f # return since v == u \n"
|
||||
"2: \n"
|
||||
" add.f %L1, %L0, %L3 \n"
|
||||
" adc %H1, %H0, %H3 \n"
|
||||
" scondd %1, [%2] \n"
|
||||
" bnz 1b \n"
|
||||
"3: \n"
|
||||
: "=&r"(old), "=&r" (temp)
|
||||
: "r"(&v->counter), "r"(a), "r"(u)
|
||||
: "cc"); /* memory clobber comes from smp_mb() */
|
||||
|
||||
smp_mb();
|
||||
|
||||
return old;
|
||||
}
|
||||
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
|
||||
|
||||
#endif /* !CONFIG_GENERIC_ATOMIC64 */
|
||||
#else
|
||||
#include <asm/atomic64-arcv2.h>
|
||||
#endif
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
|
||||
250
arch/arc/include/asm/atomic64-arcv2.h
Normal file
250
arch/arc/include/asm/atomic64-arcv2.h
Normal file
@@ -0,0 +1,250 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
|
||||
/*
|
||||
* ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
|
||||
* - The address HAS to be 64-bit aligned
|
||||
*/
|
||||
|
||||
#ifndef _ASM_ARC_ATOMIC64_ARCV2_H
|
||||
#define _ASM_ARC_ATOMIC64_ARCV2_H
|
||||
|
||||
typedef struct {
|
||||
s64 __aligned(8) counter;
|
||||
} atomic64_t;
|
||||
|
||||
#define ATOMIC64_INIT(a) { (a) }
|
||||
|
||||
static inline s64 arch_atomic64_read(const atomic64_t *v)
|
||||
{
|
||||
s64 val;
|
||||
|
||||
__asm__ __volatile__(
|
||||
" ldd %0, [%1] \n"
|
||||
: "=r"(val)
|
||||
: "r"(&v->counter));
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void arch_atomic64_set(atomic64_t *v, s64 a)
|
||||
{
|
||||
/*
|
||||
* This could have been a simple assignment in "C" but would need
|
||||
* explicit volatile. Otherwise gcc optimizers could elide the store
|
||||
* which borked atomic64 self-test
|
||||
* In the inline asm version, memory clobber needed for exact same
|
||||
* reason, to tell gcc about the store.
|
||||
*
|
||||
* This however is not needed for sibling atomic64_add() etc since both
|
||||
* load/store are explicitly done in inline asm. As long as API is used
|
||||
* for each access, gcc has no way to optimize away any load/store
|
||||
*/
|
||||
__asm__ __volatile__(
|
||||
" std %0, [%1] \n"
|
||||
:
|
||||
: "r"(a), "r"(&v->counter)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#define ATOMIC64_OP(op, op1, op2) \
|
||||
static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \
|
||||
{ \
|
||||
s64 val; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: \n" \
|
||||
" llockd %0, [%1] \n" \
|
||||
" " #op1 " %L0, %L0, %L2 \n" \
|
||||
" " #op2 " %H0, %H0, %H2 \n" \
|
||||
" scondd %0, [%1] \n" \
|
||||
" bnz 1b \n" \
|
||||
: "=&r"(val) \
|
||||
: "r"(&v->counter), "ir"(a) \
|
||||
: "cc"); \
|
||||
} \
|
||||
|
||||
#define ATOMIC64_OP_RETURN(op, op1, op2) \
|
||||
static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
|
||||
{ \
|
||||
s64 val; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: \n" \
|
||||
" llockd %0, [%1] \n" \
|
||||
" " #op1 " %L0, %L0, %L2 \n" \
|
||||
" " #op2 " %H0, %H0, %H2 \n" \
|
||||
" scondd %0, [%1] \n" \
|
||||
" bnz 1b \n" \
|
||||
: [val] "=&r"(val) \
|
||||
: "r"(&v->counter), "ir"(a) \
|
||||
: "cc"); /* memory clobber comes from smp_mb() */ \
|
||||
\
|
||||
return val; \
|
||||
}
|
||||
|
||||
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
|
||||
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
|
||||
|
||||
#define ATOMIC64_FETCH_OP(op, op1, op2) \
|
||||
static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
|
||||
{ \
|
||||
s64 val, orig; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: \n" \
|
||||
" llockd %0, [%2] \n" \
|
||||
" " #op1 " %L1, %L0, %L3 \n" \
|
||||
" " #op2 " %H1, %H0, %H3 \n" \
|
||||
" scondd %1, [%2] \n" \
|
||||
" bnz 1b \n" \
|
||||
: "=&r"(orig), "=&r"(val) \
|
||||
: "r"(&v->counter), "ir"(a) \
|
||||
: "cc"); /* memory clobber comes from smp_mb() */ \
|
||||
\
|
||||
return orig; \
|
||||
}
|
||||
|
||||
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
|
||||
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
|
||||
|
||||
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
|
||||
#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed
|
||||
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
|
||||
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
|
||||
|
||||
#define ATOMIC64_OPS(op, op1, op2) \
|
||||
ATOMIC64_OP(op, op1, op2) \
|
||||
ATOMIC64_OP_RETURN(op, op1, op2) \
|
||||
ATOMIC64_FETCH_OP(op, op1, op2)
|
||||
|
||||
ATOMIC64_OPS(add, add.f, adc)
|
||||
ATOMIC64_OPS(sub, sub.f, sbc)
|
||||
|
||||
#undef ATOMIC64_OPS
|
||||
#define ATOMIC64_OPS(op, op1, op2) \
|
||||
ATOMIC64_OP(op, op1, op2) \
|
||||
ATOMIC64_FETCH_OP(op, op1, op2)
|
||||
|
||||
ATOMIC64_OPS(and, and, and)
|
||||
ATOMIC64_OPS(andnot, bic, bic)
|
||||
ATOMIC64_OPS(or, or, or)
|
||||
ATOMIC64_OPS(xor, xor, xor)
|
||||
|
||||
#define arch_atomic64_andnot arch_atomic64_andnot
|
||||
|
||||
#undef ATOMIC64_OPS
|
||||
#undef ATOMIC64_FETCH_OP
|
||||
#undef ATOMIC64_OP_RETURN
|
||||
#undef ATOMIC64_OP
|
||||
|
||||
static inline s64
|
||||
arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
|
||||
{
|
||||
s64 prev;
|
||||
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: llockd %0, [%1] \n"
|
||||
" brne %L0, %L2, 2f \n"
|
||||
" brne %H0, %H2, 2f \n"
|
||||
" scondd %3, [%1] \n"
|
||||
" bnz 1b \n"
|
||||
"2: \n"
|
||||
: "=&r"(prev)
|
||||
: "r"(ptr), "ir"(expected), "r"(new)
|
||||
: "cc"); /* memory clobber comes from smp_mb() */
|
||||
|
||||
smp_mb();
|
||||
|
||||
return prev;
|
||||
}
|
||||
|
||||
static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
|
||||
{
|
||||
s64 prev;
|
||||
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: llockd %0, [%1] \n"
|
||||
" scondd %2, [%1] \n"
|
||||
" bnz 1b \n"
|
||||
"2: \n"
|
||||
: "=&r"(prev)
|
||||
: "r"(ptr), "r"(new)
|
||||
: "cc"); /* memory clobber comes from smp_mb() */
|
||||
|
||||
smp_mb();
|
||||
|
||||
return prev;
|
||||
}
|
||||
|
||||
/**
|
||||
* arch_atomic64_dec_if_positive - decrement by 1 if old value positive
|
||||
* @v: pointer of type atomic64_t
|
||||
*
|
||||
* The function returns the old value of *v minus 1, even if
|
||||
* the atomic variable, v, was not decremented.
|
||||
*/
|
||||
|
||||
static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
|
||||
{
|
||||
s64 val;
|
||||
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: llockd %0, [%1] \n"
|
||||
" sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n"
|
||||
" sub.c %H0, %H0, 1 # if C set, w1 - 1\n"
|
||||
" brlt %H0, 0, 2f \n"
|
||||
" scondd %0, [%1] \n"
|
||||
" bnz 1b \n"
|
||||
"2: \n"
|
||||
: "=&r"(val)
|
||||
: "r"(&v->counter)
|
||||
: "cc"); /* memory clobber comes from smp_mb() */
|
||||
|
||||
smp_mb();
|
||||
|
||||
return val;
|
||||
}
|
||||
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
|
||||
|
||||
/**
|
||||
* arch_atomic64_fetch_add_unless - add unless the number is a given value
|
||||
* @v: pointer of type atomic64_t
|
||||
* @a: the amount to add to v...
|
||||
* @u: ...unless v is equal to u.
|
||||
*
|
||||
* Atomically adds @a to @v, if it was not @u.
|
||||
* Returns the old value of @v
|
||||
*/
|
||||
static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
|
||||
{
|
||||
s64 old, temp;
|
||||
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: llockd %0, [%2] \n"
|
||||
" brne %L0, %L4, 2f # continue to add since v != u \n"
|
||||
" breq.d %H0, %H4, 3f # return since v == u \n"
|
||||
"2: \n"
|
||||
" add.f %L1, %L0, %L3 \n"
|
||||
" adc %H1, %H0, %H3 \n"
|
||||
" scondd %1, [%2] \n"
|
||||
" bnz 1b \n"
|
||||
"3: \n"
|
||||
: "=&r"(old), "=&r" (temp)
|
||||
: "r"(&v->counter), "r"(a), "r"(u)
|
||||
: "cc"); /* memory clobber comes from smp_mb() */
|
||||
|
||||
smp_mb();
|
||||
|
||||
return old;
|
||||
}
|
||||
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
|
||||
|
||||
#endif
|
||||
@@ -14,188 +14,6 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <asm/barrier.h>
|
||||
#ifndef CONFIG_ARC_HAS_LLSC
|
||||
#include <asm/smp.h>
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARC_HAS_LLSC
|
||||
|
||||
/*
|
||||
* Hardware assisted Atomic-R-M-W
|
||||
*/
|
||||
|
||||
#define BIT_OP(op, c_op, asm_op) \
|
||||
static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
|
||||
{ \
|
||||
unsigned int temp; \
|
||||
\
|
||||
m += nr >> 5; \
|
||||
\
|
||||
nr &= 0x1f; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %0, [%1] \n" \
|
||||
" " #asm_op " %0, %0, %2 \n" \
|
||||
" scond %0, [%1] \n" \
|
||||
" bnz 1b \n" \
|
||||
: "=&r"(temp) /* Early clobber, to prevent reg reuse */ \
|
||||
: "r"(m), /* Not "m": llock only supports reg direct addr mode */ \
|
||||
"ir"(nr) \
|
||||
: "cc"); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Semantically:
|
||||
* Test the bit
|
||||
* if clear
|
||||
* set it and return 0 (old value)
|
||||
* else
|
||||
* return 1 (old value).
|
||||
*
|
||||
* Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
|
||||
* and the old value of bit is returned
|
||||
*/
|
||||
#define TEST_N_BIT_OP(op, c_op, asm_op) \
|
||||
static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
|
||||
{ \
|
||||
unsigned long old, temp; \
|
||||
\
|
||||
m += nr >> 5; \
|
||||
\
|
||||
nr &= 0x1f; \
|
||||
\
|
||||
/* \
|
||||
* Explicit full memory barrier needed before/after as \
|
||||
* LLOCK/SCOND themselves don't provide any such smenatic \
|
||||
*/ \
|
||||
smp_mb(); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %0, [%2] \n" \
|
||||
" " #asm_op " %1, %0, %3 \n" \
|
||||
" scond %1, [%2] \n" \
|
||||
" bnz 1b \n" \
|
||||
: "=&r"(old), "=&r"(temp) \
|
||||
: "r"(m), "ir"(nr) \
|
||||
: "cc"); \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
return (old & (1 << nr)) != 0; \
|
||||
}
|
||||
|
||||
#else /* !CONFIG_ARC_HAS_LLSC */
|
||||
|
||||
/*
|
||||
* Non hardware assisted Atomic-R-M-W
|
||||
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
|
||||
*
|
||||
* There's "significant" micro-optimization in writing our own variants of
|
||||
* bitops (over generic variants)
|
||||
*
|
||||
* (1) The generic APIs have "signed" @nr while we have it "unsigned"
|
||||
* This avoids extra code to be generated for pointer arithmatic, since
|
||||
* is "not sure" that index is NOT -ve
|
||||
* (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc
|
||||
* only consider bottom 5 bits of @nr, so NO need to mask them off.
|
||||
* (GCC Quirk: however for constant @nr we still need to do the masking
|
||||
* at compile time)
|
||||
*/
|
||||
|
||||
#define BIT_OP(op, c_op, asm_op) \
|
||||
static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
|
||||
{ \
|
||||
unsigned long temp, flags; \
|
||||
m += nr >> 5; \
|
||||
\
|
||||
/* \
|
||||
* spin lock/unlock provide the needed smp_mb() before/after \
|
||||
*/ \
|
||||
bitops_lock(flags); \
|
||||
\
|
||||
temp = *m; \
|
||||
*m = temp c_op (1UL << (nr & 0x1f)); \
|
||||
\
|
||||
bitops_unlock(flags); \
|
||||
}
|
||||
|
||||
#define TEST_N_BIT_OP(op, c_op, asm_op) \
|
||||
static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
|
||||
{ \
|
||||
unsigned long old, flags; \
|
||||
m += nr >> 5; \
|
||||
\
|
||||
bitops_lock(flags); \
|
||||
\
|
||||
old = *m; \
|
||||
*m = old c_op (1UL << (nr & 0x1f)); \
|
||||
\
|
||||
bitops_unlock(flags); \
|
||||
\
|
||||
return (old & (1UL << (nr & 0x1f))) != 0; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/***************************************
|
||||
* Non atomic variants
|
||||
**************************************/
|
||||
|
||||
#define __BIT_OP(op, c_op, asm_op) \
|
||||
static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \
|
||||
{ \
|
||||
unsigned long temp; \
|
||||
m += nr >> 5; \
|
||||
\
|
||||
temp = *m; \
|
||||
*m = temp c_op (1UL << (nr & 0x1f)); \
|
||||
}
|
||||
|
||||
#define __TEST_N_BIT_OP(op, c_op, asm_op) \
|
||||
static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
|
||||
{ \
|
||||
unsigned long old; \
|
||||
m += nr >> 5; \
|
||||
\
|
||||
old = *m; \
|
||||
*m = old c_op (1UL << (nr & 0x1f)); \
|
||||
\
|
||||
return (old & (1UL << (nr & 0x1f))) != 0; \
|
||||
}
|
||||
|
||||
#define BIT_OPS(op, c_op, asm_op) \
|
||||
\
|
||||
/* set_bit(), clear_bit(), change_bit() */ \
|
||||
BIT_OP(op, c_op, asm_op) \
|
||||
\
|
||||
/* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
|
||||
TEST_N_BIT_OP(op, c_op, asm_op) \
|
||||
\
|
||||
/* __set_bit(), __clear_bit(), __change_bit() */ \
|
||||
__BIT_OP(op, c_op, asm_op) \
|
||||
\
|
||||
/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
|
||||
__TEST_N_BIT_OP(op, c_op, asm_op)
|
||||
|
||||
BIT_OPS(set, |, bset)
|
||||
BIT_OPS(clear, & ~, bclr)
|
||||
BIT_OPS(change, ^, bxor)
|
||||
|
||||
/*
|
||||
* This routine doesn't need to be atomic.
|
||||
*/
|
||||
static inline int
|
||||
test_bit(unsigned int nr, const volatile unsigned long *addr)
|
||||
{
|
||||
unsigned long mask;
|
||||
|
||||
addr += nr >> 5;
|
||||
|
||||
mask = 1UL << (nr & 0x1f);
|
||||
|
||||
return ((mask & *addr) != 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ISA_ARCOMPACT
|
||||
|
||||
@@ -296,7 +114,7 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
|
||||
* @result: [1-32]
|
||||
* fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
|
||||
*/
|
||||
static inline __attribute__ ((const)) int fls(unsigned long x)
|
||||
static inline __attribute__ ((const)) int fls(unsigned int x)
|
||||
{
|
||||
int n;
|
||||
|
||||
@@ -323,7 +141,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
|
||||
* ffs = Find First Set in word (LSB to MSB)
|
||||
* @result: [1-32], 0 if all 0's
|
||||
*/
|
||||
static inline __attribute__ ((const)) int ffs(unsigned long x)
|
||||
static inline __attribute__ ((const)) int ffs(unsigned int x)
|
||||
{
|
||||
int n;
|
||||
|
||||
@@ -368,6 +186,8 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
|
||||
#include <asm-generic/bitops/fls64.h>
|
||||
#include <asm-generic/bitops/sched.h>
|
||||
#include <asm-generic/bitops/lock.h>
|
||||
#include <asm-generic/bitops/atomic.h>
|
||||
#include <asm-generic/bitops/non-atomic.h>
|
||||
|
||||
#include <asm-generic/bitops/find.h>
|
||||
#include <asm-generic/bitops/le.h>
|
||||
|
||||
@@ -62,10 +62,6 @@
|
||||
#define ARCH_SLAB_MINALIGN 8
|
||||
#endif
|
||||
|
||||
extern void arc_cache_init(void);
|
||||
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
|
||||
extern void read_decode_cache_bcr(void);
|
||||
|
||||
extern int ioc_enable;
|
||||
extern unsigned long perip_base, perip_end;
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#ifndef __ASM_ARC_CMPXCHG_H
|
||||
#define __ASM_ARC_CMPXCHG_H
|
||||
|
||||
#include <linux/build_bug.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <asm/barrier.h>
|
||||
@@ -13,146 +14,130 @@
|
||||
|
||||
#ifdef CONFIG_ARC_HAS_LLSC
|
||||
|
||||
static inline unsigned long
|
||||
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
|
||||
{
|
||||
unsigned long prev;
|
||||
|
||||
/*
|
||||
* Explicit full memory barrier needed before/after as
|
||||
* LLOCK/SCOND themselves don't provide any such semantics
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: llock %0, [%1] \n"
|
||||
" brne %0, %2, 2f \n"
|
||||
" scond %3, [%1] \n"
|
||||
" bnz 1b \n"
|
||||
"2: \n"
|
||||
: "=&r"(prev) /* Early clobber, to prevent reg reuse */
|
||||
: "r"(ptr), /* Not "m": llock only supports reg direct addr mode */
|
||||
"ir"(expected),
|
||||
"r"(new) /* can't be "ir". scond can't take LIMM for "b" */
|
||||
: "cc", "memory"); /* so that gcc knows memory is being written here */
|
||||
|
||||
smp_mb();
|
||||
|
||||
return prev;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_ARC_HAS_LLSC */
|
||||
|
||||
static inline unsigned long
|
||||
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
|
||||
{
|
||||
unsigned long flags;
|
||||
int prev;
|
||||
volatile unsigned long *p = ptr;
|
||||
|
||||
/*
|
||||
* spin lock/unlock provide the needed smp_mb() before/after
|
||||
*/
|
||||
atomic_ops_lock(flags);
|
||||
prev = *p;
|
||||
if (prev == expected)
|
||||
*p = new;
|
||||
atomic_ops_unlock(flags);
|
||||
return prev;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define arch_cmpxchg(ptr, o, n) ({ \
|
||||
(typeof(*(ptr)))__cmpxchg((ptr), \
|
||||
(unsigned long)(o), \
|
||||
(unsigned long)(n)); \
|
||||
/*
|
||||
* if (*ptr == @old)
|
||||
* *ptr = @new
|
||||
*/
|
||||
#define __cmpxchg(ptr, old, new) \
|
||||
({ \
|
||||
__typeof__(*(ptr)) _prev; \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %0, [%1] \n" \
|
||||
" brne %0, %2, 2f \n" \
|
||||
" scond %3, [%1] \n" \
|
||||
" bnz 1b \n" \
|
||||
"2: \n" \
|
||||
: "=&r"(_prev) /* Early clobber prevent reg reuse */ \
|
||||
: "r"(ptr), /* Not "m": llock only supports reg */ \
|
||||
"ir"(old), \
|
||||
"r"(new) /* Not "ir": scond can't take LIMM */ \
|
||||
: "cc", \
|
||||
"memory"); /* gcc knows memory is clobbered */ \
|
||||
\
|
||||
_prev; \
|
||||
})
|
||||
|
||||
/*
|
||||
* atomic_cmpxchg is same as cmpxchg
|
||||
* LLSC: only different in data-type, semantics are exactly same
|
||||
* !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
|
||||
* semantics, and this lock also happens to be used by atomic_*()
|
||||
*/
|
||||
#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
|
||||
|
||||
|
||||
/*
|
||||
* xchg (reg with memory) based on "Native atomic" EX insn
|
||||
*/
|
||||
static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
|
||||
int size)
|
||||
{
|
||||
extern unsigned long __xchg_bad_pointer(void);
|
||||
|
||||
switch (size) {
|
||||
case 4:
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
" ex %0, [%1] \n"
|
||||
: "+r"(val)
|
||||
: "r"(ptr)
|
||||
: "memory");
|
||||
|
||||
smp_mb();
|
||||
|
||||
return val;
|
||||
}
|
||||
return __xchg_bad_pointer();
|
||||
}
|
||||
|
||||
#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
|
||||
sizeof(*(ptr))))
|
||||
|
||||
/*
|
||||
* xchg() maps directly to ARC EX instruction which guarantees atomicity.
|
||||
* However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
|
||||
* due to a subtle reason:
|
||||
* - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
|
||||
* of kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
|
||||
* Hence xchg() needs to follow same locking rules.
|
||||
*
|
||||
* Technically the lock is also needed for UP (boils down to irq save/restore)
|
||||
* but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
|
||||
* be disabled thus can't possibly be interrupted/preempted/clobbered by xchg()
|
||||
* Other way around, xchg is one instruction anyways, so can't be interrupted
|
||||
* as such
|
||||
*/
|
||||
|
||||
#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
|
||||
|
||||
#define arch_xchg(ptr, with) \
|
||||
({ \
|
||||
unsigned long flags; \
|
||||
typeof(*(ptr)) old_val; \
|
||||
\
|
||||
atomic_ops_lock(flags); \
|
||||
old_val = _xchg(ptr, with); \
|
||||
atomic_ops_unlock(flags); \
|
||||
old_val; \
|
||||
#define arch_cmpxchg_relaxed(ptr, old, new) \
|
||||
({ \
|
||||
__typeof__(ptr) _p_ = (ptr); \
|
||||
__typeof__(*(ptr)) _o_ = (old); \
|
||||
__typeof__(*(ptr)) _n_ = (new); \
|
||||
__typeof__(*(ptr)) _prev_; \
|
||||
\
|
||||
switch(sizeof((_p_))) { \
|
||||
case 4: \
|
||||
_prev_ = __cmpxchg(_p_, _o_, _n_); \
|
||||
break; \
|
||||
default: \
|
||||
BUILD_BUG(); \
|
||||
} \
|
||||
_prev_; \
|
||||
})
|
||||
|
||||
#else
|
||||
|
||||
#define arch_xchg(ptr, with) _xchg(ptr, with)
|
||||
#define arch_cmpxchg(ptr, old, new) \
|
||||
({ \
|
||||
volatile __typeof__(ptr) _p_ = (ptr); \
|
||||
__typeof__(*(ptr)) _o_ = (old); \
|
||||
__typeof__(*(ptr)) _n_ = (new); \
|
||||
__typeof__(*(ptr)) _prev_; \
|
||||
unsigned long __flags; \
|
||||
\
|
||||
BUILD_BUG_ON(sizeof(_p_) != 4); \
|
||||
\
|
||||
/* \
|
||||
* spin lock/unlock provide the needed smp_mb() before/after \
|
||||
*/ \
|
||||
atomic_ops_lock(__flags); \
|
||||
_prev_ = *_p_; \
|
||||
if (_prev_ == _o_) \
|
||||
*_p_ = _n_; \
|
||||
atomic_ops_unlock(__flags); \
|
||||
_prev_; \
|
||||
})
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* "atomic" variant of xchg()
|
||||
* REQ: It needs to follow the same serialization rules as other atomic_xxx()
|
||||
* Since xchg() doesn't always do that, it would seem that following definition
|
||||
* is incorrect. But here's the rationale:
|
||||
* SMP : Even xchg() takes the atomic_ops_lock, so OK.
|
||||
* LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
|
||||
* is natively "SMP safe", no serialization required).
|
||||
* UP : other atomics disable IRQ, so no way a difft ctxt atomic_xchg()
|
||||
* could clobber them. atomic_xchg() itself would be 1 insn, so it
|
||||
* can't be clobbered by others. Thus no serialization required when
|
||||
* atomic_xchg is involved.
|
||||
* xchg
|
||||
*/
|
||||
#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
|
||||
#ifdef CONFIG_ARC_HAS_LLSC
|
||||
|
||||
#define __xchg(ptr, val) \
|
||||
({ \
|
||||
__asm__ __volatile__( \
|
||||
" ex %0, [%1] \n" /* set new value */ \
|
||||
: "+r"(val) \
|
||||
: "r"(ptr) \
|
||||
: "memory"); \
|
||||
_val_; /* get old value */ \
|
||||
})
|
||||
|
||||
#define arch_xchg_relaxed(ptr, val) \
|
||||
({ \
|
||||
__typeof__(ptr) _p_ = (ptr); \
|
||||
__typeof__(*(ptr)) _val_ = (val); \
|
||||
\
|
||||
switch(sizeof(*(_p_))) { \
|
||||
case 4: \
|
||||
_val_ = __xchg(_p_, _val_); \
|
||||
break; \
|
||||
default: \
|
||||
BUILD_BUG(); \
|
||||
} \
|
||||
_val_; \
|
||||
})
|
||||
|
||||
#else /* !CONFIG_ARC_HAS_LLSC */
|
||||
|
||||
/*
|
||||
* EX instructions is baseline and present in !LLSC too. But in this
|
||||
* regime it still needs use @atomic_ops_lock spinlock to allow interop
|
||||
* with cmpxchg() which uses spinlock in !LLSC
|
||||
* (llist.h use xchg and cmpxchg on sama data)
|
||||
*/
|
||||
|
||||
#define arch_xchg(ptr, val) \
|
||||
({ \
|
||||
__typeof__(ptr) _p_ = (ptr); \
|
||||
__typeof__(*(ptr)) _val_ = (val); \
|
||||
\
|
||||
unsigned long __flags; \
|
||||
\
|
||||
atomic_ops_lock(__flags); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
" ex %0, [%1] \n" \
|
||||
: "+r"(_val_) \
|
||||
: "r"(_p_) \
|
||||
: "memory"); \
|
||||
\
|
||||
atomic_ops_unlock(__flags); \
|
||||
_val_; \
|
||||
})
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -126,19 +126,11 @@
|
||||
* to be saved again on kernel mode stack, as part of pt_regs.
|
||||
*-------------------------------------------------------------*/
|
||||
.macro PROLOG_FREEUP_REG reg, mem
|
||||
#ifndef ARC_USE_SCRATCH_REG
|
||||
sr \reg, [ARC_REG_SCRATCH_DATA0]
|
||||
#else
|
||||
st \reg, [\mem]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro PROLOG_RESTORE_REG reg, mem
|
||||
#ifndef ARC_USE_SCRATCH_REG
|
||||
lr \reg, [ARC_REG_SCRATCH_DATA0]
|
||||
#else
|
||||
ld \reg, [\mem]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/*--------------------------------------------------------------
|
||||
|
||||
@@ -58,14 +58,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||
extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
|
||||
pmd_t *pmd);
|
||||
|
||||
/* Generic variants assume pgtable_t is struct page *, hence need for these */
|
||||
#define __HAVE_ARCH_PGTABLE_DEPOSIT
|
||||
extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
|
||||
pgtable_t pgtable);
|
||||
|
||||
#define __HAVE_ARCH_PGTABLE_WITHDRAW
|
||||
extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
|
||||
|
||||
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
|
||||
extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
|
||||
unsigned long end);
|
||||
|
||||
103
arch/arc/include/asm/mmu-arcv2.h
Normal file
103
arch/arc/include/asm/mmu-arcv2.h
Normal file
@@ -0,0 +1,103 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed.
|
||||
* This file contains the TLB access registers and commands
|
||||
*/
|
||||
|
||||
#ifndef _ASM_ARC_MMU_ARCV2_H
|
||||
#define _ASM_ARC_MMU_ARCV2_H
|
||||
|
||||
/*
|
||||
* TLB Management regs
|
||||
*/
|
||||
#define ARC_REG_MMU_BCR 0x06f
|
||||
|
||||
#ifdef CONFIG_ARC_MMU_V3
|
||||
#define ARC_REG_TLBPD0 0x405
|
||||
#define ARC_REG_TLBPD1 0x406
|
||||
#define ARC_REG_TLBPD1HI 0 /* Dummy: allows common code */
|
||||
#define ARC_REG_TLBINDEX 0x407
|
||||
#define ARC_REG_TLBCOMMAND 0x408
|
||||
#define ARC_REG_PID 0x409
|
||||
#define ARC_REG_SCRATCH_DATA0 0x418
|
||||
#else
|
||||
#define ARC_REG_TLBPD0 0x460
|
||||
#define ARC_REG_TLBPD1 0x461
|
||||
#define ARC_REG_TLBPD1HI 0x463
|
||||
#define ARC_REG_TLBINDEX 0x464
|
||||
#define ARC_REG_TLBCOMMAND 0x465
|
||||
#define ARC_REG_PID 0x468
|
||||
#define ARC_REG_SCRATCH_DATA0 0x46c
|
||||
#endif
|
||||
|
||||
/* Bits in MMU PID reg */
|
||||
#define __TLB_ENABLE (1 << 31)
|
||||
#define __PROG_ENABLE (1 << 30)
|
||||
#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE)
|
||||
|
||||
/* Bits in TLB Index reg */
|
||||
#define TLB_LKUP_ERR 0x80000000
|
||||
|
||||
#ifdef CONFIG_ARC_MMU_V3
|
||||
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001)
|
||||
#else
|
||||
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* TLB Commands
|
||||
*/
|
||||
#define TLBWrite 0x1
|
||||
#define TLBRead 0x2
|
||||
#define TLBGetIndex 0x3
|
||||
#define TLBProbe 0x4
|
||||
#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */
|
||||
#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */
|
||||
|
||||
#ifdef CONFIG_ARC_MMU_V4
|
||||
#define TLBInsertEntry 0x7
|
||||
#define TLBDeleteEntry 0x8
|
||||
#endif
|
||||
|
||||
/* Masks for actual TLB "PD"s */
|
||||
#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
|
||||
#define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
|
||||
|
||||
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
struct mm_struct;
|
||||
extern int pae40_exist_but_not_enab(void);
|
||||
|
||||
static inline int is_pae40_enabled(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
|
||||
}
|
||||
|
||||
static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid)
|
||||
{
|
||||
write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
|
||||
}
|
||||
|
||||
static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
|
||||
{
|
||||
/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
|
||||
#ifdef CONFIG_ISA_ARCV2
|
||||
write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
.macro ARC_MMU_REENABLE reg
|
||||
lr \reg, [ARC_REG_PID]
|
||||
or \reg, \reg, MMU_ENABLE
|
||||
sr \reg, [ARC_REG_PID]
|
||||
.endm
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#endif
|
||||
@@ -7,98 +7,15 @@
|
||||
#define _ASM_ARC_MMU_H
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/threads.h> /* NR_CPUS */
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARC_MMU_V1)
|
||||
#define CONFIG_ARC_MMU_VER 1
|
||||
#elif defined(CONFIG_ARC_MMU_V2)
|
||||
#define CONFIG_ARC_MMU_VER 2
|
||||
#elif defined(CONFIG_ARC_MMU_V3)
|
||||
#define CONFIG_ARC_MMU_VER 3
|
||||
#elif defined(CONFIG_ARC_MMU_V4)
|
||||
#define CONFIG_ARC_MMU_VER 4
|
||||
#endif
|
||||
|
||||
/* MMU Management regs */
|
||||
#define ARC_REG_MMU_BCR 0x06f
|
||||
#if (CONFIG_ARC_MMU_VER < 4)
|
||||
#define ARC_REG_TLBPD0 0x405
|
||||
#define ARC_REG_TLBPD1 0x406
|
||||
#define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */
|
||||
#define ARC_REG_TLBINDEX 0x407
|
||||
#define ARC_REG_TLBCOMMAND 0x408
|
||||
#define ARC_REG_PID 0x409
|
||||
#define ARC_REG_SCRATCH_DATA0 0x418
|
||||
#else
|
||||
#define ARC_REG_TLBPD0 0x460
|
||||
#define ARC_REG_TLBPD1 0x461
|
||||
#define ARC_REG_TLBPD1HI 0x463
|
||||
#define ARC_REG_TLBINDEX 0x464
|
||||
#define ARC_REG_TLBCOMMAND 0x465
|
||||
#define ARC_REG_PID 0x468
|
||||
#define ARC_REG_SCRATCH_DATA0 0x46c
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP)
|
||||
#define ARC_USE_SCRATCH_REG
|
||||
#endif
|
||||
|
||||
/* Bits in MMU PID register */
|
||||
#define __TLB_ENABLE (1 << 31)
|
||||
#define __PROG_ENABLE (1 << 30)
|
||||
#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE)
|
||||
|
||||
/* Error code if probe fails */
|
||||
#define TLB_LKUP_ERR 0x80000000
|
||||
|
||||
#if (CONFIG_ARC_MMU_VER < 4)
|
||||
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001)
|
||||
#else
|
||||
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000)
|
||||
#endif
|
||||
|
||||
/* TLB Commands */
|
||||
#define TLBWrite 0x1
|
||||
#define TLBRead 0x2
|
||||
#define TLBGetIndex 0x3
|
||||
#define TLBProbe 0x4
|
||||
|
||||
#if (CONFIG_ARC_MMU_VER >= 2)
|
||||
#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */
|
||||
#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */
|
||||
#else
|
||||
#define TLBWriteNI TLBWrite /* Not present in hardware, fallback */
|
||||
#endif
|
||||
|
||||
#if (CONFIG_ARC_MMU_VER >= 4)
|
||||
#define TLBInsertEntry 0x7
|
||||
#define TLBDeleteEntry 0x8
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
typedef struct {
|
||||
unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */
|
||||
} mm_context_t;
|
||||
|
||||
#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
|
||||
void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
|
||||
#else
|
||||
#define tlb_paranoid_check(a, b)
|
||||
#endif
|
||||
|
||||
void arc_mmu_init(void);
|
||||
extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
|
||||
void read_decode_mmu_bcr(void);
|
||||
|
||||
static inline int is_pae40_enabled(void)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
|
||||
}
|
||||
|
||||
extern int pae40_exist_but_not_enab(void);
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
#include <asm/mmu-arcv2.h>
|
||||
|
||||
#endif
|
||||
|
||||
@@ -15,22 +15,23 @@
|
||||
#ifndef _ASM_ARC_MMU_CONTEXT_H
|
||||
#define _ASM_ARC_MMU_CONTEXT_H
|
||||
|
||||
#include <asm/arcregs.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include <asm/tlb.h>
|
||||
#include <asm-generic/mm_hooks.h>
|
||||
|
||||
/* ARC700 ASID Management
|
||||
/* ARC ASID Management
|
||||
*
|
||||
* ARC MMU provides 8-bit ASID (0..255) to TAG TLB entries, allowing entries
|
||||
* with same vaddr (different tasks) to co-exit. This provides for
|
||||
* "Fast Context Switch" i.e. no TLB flush on ctxt-switch
|
||||
* MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on
|
||||
* context-switch.
|
||||
*
|
||||
* Linux assigns each task a unique ASID. A simple round-robin allocation
|
||||
* of H/w ASID is done using software tracker @asid_cpu.
|
||||
* When it reaches max 255, the allocation cycle starts afresh by flushing
|
||||
* the entire TLB and wrapping ASID back to zero.
|
||||
* ASID is managed per cpu, so task threads across CPUs can have different
|
||||
* ASID. Global ASID management is needed if hardware supports TLB shootdown
|
||||
* and/or shared TLB across cores, which ARC doesn't.
|
||||
*
|
||||
* Each task is assigned unique ASID, with a simple round-robin allocator
|
||||
* tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started
|
||||
* over from 0, and TLB is flushed
|
||||
*
|
||||
* A new allocation cycle, post rollover, could potentially reassign an ASID
|
||||
* to a different task. Thus the rule is to refresh the ASID in a new cycle.
|
||||
@@ -93,7 +94,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
|
||||
asid_mm(mm, cpu) = asid_cpu(cpu);
|
||||
|
||||
set_hw:
|
||||
write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
|
||||
mmu_setup_asid(mm, hw_pid(mm, cpu));
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
@@ -146,10 +147,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
||||
*/
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
|
||||
#ifdef ARC_USE_SCRATCH_REG
|
||||
/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
|
||||
write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
|
||||
#endif
|
||||
mmu_setup_pgd(next, next->pgd);
|
||||
|
||||
get_new_mmu_context(next);
|
||||
}
|
||||
|
||||
@@ -34,12 +34,35 @@ void copy_user_highpage(struct page *to, struct page *from,
|
||||
unsigned long u_vaddr, struct vm_area_struct *vma);
|
||||
void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
|
||||
|
||||
#undef STRICT_MM_TYPECHECKS
|
||||
typedef struct {
|
||||
unsigned long pgd;
|
||||
} pgd_t;
|
||||
|
||||
#define pgd_val(x) ((x).pgd)
|
||||
#define __pgd(x) ((pgd_t) { (x) })
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
|
||||
typedef struct {
|
||||
unsigned long pud;
|
||||
} pud_t;
|
||||
|
||||
#define pud_val(x) ((x).pud)
|
||||
#define __pud(x) ((pud_t) { (x) })
|
||||
|
||||
#endif
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 2
|
||||
|
||||
typedef struct {
|
||||
unsigned long pmd;
|
||||
} pmd_t;
|
||||
|
||||
#define pmd_val(x) ((x).pmd)
|
||||
#define __pmd(x) ((pmd_t) { (x) })
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef STRICT_MM_TYPECHECKS
|
||||
/*
|
||||
* These are used to make use of C type-checking..
|
||||
*/
|
||||
typedef struct {
|
||||
#ifdef CONFIG_ARC_HAS_PAE40
|
||||
unsigned long long pte;
|
||||
@@ -47,44 +70,19 @@ typedef struct {
|
||||
unsigned long pte;
|
||||
#endif
|
||||
} pte_t;
|
||||
typedef struct {
|
||||
unsigned long pgd;
|
||||
} pgd_t;
|
||||
|
||||
#define pte_val(x) ((x).pte)
|
||||
#define __pte(x) ((pte_t) { (x) })
|
||||
|
||||
typedef struct {
|
||||
unsigned long pgprot;
|
||||
} pgprot_t;
|
||||
|
||||
#define pte_val(x) ((x).pte)
|
||||
#define pgd_val(x) ((x).pgd)
|
||||
#define pgprot_val(x) ((x).pgprot)
|
||||
#define pgprot_val(x) ((x).pgprot)
|
||||
#define __pgprot(x) ((pgprot_t) { (x) })
|
||||
#define pte_pgprot(x) __pgprot(pte_val(x))
|
||||
|
||||
#define __pte(x) ((pte_t) { (x) })
|
||||
#define __pgd(x) ((pgd_t) { (x) })
|
||||
#define __pgprot(x) ((pgprot_t) { (x) })
|
||||
|
||||
#define pte_pgprot(x) __pgprot(pte_val(x))
|
||||
|
||||
#else /* !STRICT_MM_TYPECHECKS */
|
||||
|
||||
#ifdef CONFIG_ARC_HAS_PAE40
|
||||
typedef unsigned long long pte_t;
|
||||
#else
|
||||
typedef unsigned long pte_t;
|
||||
#endif
|
||||
typedef unsigned long pgd_t;
|
||||
typedef unsigned long pgprot_t;
|
||||
|
||||
#define pte_val(x) (x)
|
||||
#define pgd_val(x) (x)
|
||||
#define pgprot_val(x) (x)
|
||||
#define __pte(x) (x)
|
||||
#define __pgd(x) (x)
|
||||
#define __pgprot(x) (x)
|
||||
#define pte_pgprot(x) (x)
|
||||
|
||||
#endif
|
||||
|
||||
typedef pte_t * pgtable_t;
|
||||
typedef struct page *pgtable_t;
|
||||
|
||||
/*
|
||||
* Use virt_to_pfn with caution:
|
||||
@@ -122,8 +120,8 @@ extern int pfn_valid(unsigned long pfn);
|
||||
* virt here means link-address/program-address as embedded in object code.
|
||||
* And for ARC, link-addr = physical address
|
||||
*/
|
||||
#define __pa(vaddr) ((unsigned long)(vaddr))
|
||||
#define __va(paddr) ((void *)((unsigned long)(paddr)))
|
||||
#define __pa(vaddr) ((unsigned long)(vaddr))
|
||||
#define __va(paddr) ((void *)((unsigned long)(paddr)))
|
||||
|
||||
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
|
||||
#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
|
||||
|
||||
@@ -31,30 +31,32 @@
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/log2.h>
|
||||
#include <asm-generic/pgalloc.h>
|
||||
|
||||
static inline void
|
||||
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
|
||||
{
|
||||
pmd_set(pmd, pte);
|
||||
/*
|
||||
* The cast to long below is OK in 32-bit PAE40 regime with long long pte
|
||||
* Despite "wider" pte, the pte table needs to be in non-PAE low memory
|
||||
* as all higher levels can only hold long pointers.
|
||||
*
|
||||
* The cast itself is needed given simplistic definition of set_pmd()
|
||||
*/
|
||||
set_pmd(pmd, __pmd((unsigned long)pte));
|
||||
}
|
||||
|
||||
static inline void
|
||||
pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
|
||||
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page)
|
||||
{
|
||||
pmd_set(pmd, (pte_t *) ptep);
|
||||
}
|
||||
|
||||
static inline int __get_order_pgd(void)
|
||||
{
|
||||
return get_order(PTRS_PER_PGD * sizeof(pgd_t));
|
||||
set_pmd(pmd, __pmd((unsigned long)page_address(pte_page)));
|
||||
}
|
||||
|
||||
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
|
||||
{
|
||||
int num, num2;
|
||||
pgd_t *ret = (pgd_t *) __get_free_pages(GFP_KERNEL, __get_order_pgd());
|
||||
pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL);
|
||||
|
||||
if (ret) {
|
||||
int num, num2;
|
||||
num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
|
||||
memzero(ret, num * sizeof(pgd_t));
|
||||
|
||||
@@ -68,64 +70,27 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
|
||||
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
|
||||
{
|
||||
free_pages((unsigned long)pgd, __get_order_pgd());
|
||||
set_p4d(p4dp, __p4d((unsigned long)pudp));
|
||||
}
|
||||
|
||||
#define __pud_free_tlb(tlb, pmd, addr) pud_free((tlb)->mm, pmd)
|
||||
|
||||
/*
|
||||
* With software-only page-tables, addr-split for traversal is tweakable and
|
||||
* that directly governs how big tables would be at each level.
|
||||
* Further, the MMU page size is configurable.
|
||||
* Thus we need to programatically assert the size constraint
|
||||
* All of this is const math, allowing gcc to do constant folding/propagation.
|
||||
*/
|
||||
#endif
|
||||
|
||||
static inline int __get_order_pte(void)
|
||||
#if CONFIG_PGTABLE_LEVELS > 2
|
||||
|
||||
static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
|
||||
{
|
||||
return get_order(PTRS_PER_PTE * sizeof(pte_t));
|
||||
set_pud(pudp, __pud((unsigned long)pmdp));
|
||||
}
|
||||
|
||||
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
|
||||
{
|
||||
pte_t *pte;
|
||||
#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
|
||||
|
||||
pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
|
||||
__get_order_pte());
|
||||
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pgtable_t
|
||||
pte_alloc_one(struct mm_struct *mm)
|
||||
{
|
||||
pgtable_t pte_pg;
|
||||
struct page *page;
|
||||
|
||||
pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
|
||||
if (!pte_pg)
|
||||
return 0;
|
||||
memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
|
||||
page = virt_to_page(pte_pg);
|
||||
if (!pgtable_pte_page_ctor(page)) {
|
||||
__free_page(page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return pte_pg;
|
||||
}
|
||||
|
||||
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
|
||||
{
|
||||
free_pages((unsigned long)pte, __get_order_pte()); /* takes phy addr */
|
||||
}
|
||||
|
||||
static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
|
||||
{
|
||||
pgtable_pte_page_dtor(virt_to_page(ptep));
|
||||
free_pages((unsigned long)ptep, __get_order_pte());
|
||||
}
|
||||
#endif
|
||||
|
||||
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
|
||||
|
||||
|
||||
149
arch/arc/include/asm/pgtable-bits-arcv2.h
Normal file
149
arch/arc/include/asm/pgtable-bits-arcv2.h
Normal file
@@ -0,0 +1,149 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*/
|
||||
|
||||
/*
|
||||
* page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS)
|
||||
* There correspond to the corresponding bits in the TLB
|
||||
*/
|
||||
|
||||
#ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H
|
||||
#define _ASM_ARC_PGTABLE_BITS_ARCV2_H
|
||||
|
||||
#ifdef CONFIG_ARC_CACHE_PAGES
|
||||
#define _PAGE_CACHEABLE (1 << 0) /* Cached (H) */
|
||||
#else
|
||||
#define _PAGE_CACHEABLE 0
|
||||
#endif
|
||||
|
||||
#define _PAGE_EXECUTE (1 << 1) /* User Execute (H) */
|
||||
#define _PAGE_WRITE (1 << 2) /* User Write (H) */
|
||||
#define _PAGE_READ (1 << 3) /* User Read (H) */
|
||||
#define _PAGE_ACCESSED (1 << 4) /* Accessed (s) */
|
||||
#define _PAGE_DIRTY (1 << 5) /* Modified (s) */
|
||||
#define _PAGE_SPECIAL (1 << 6)
|
||||
#define _PAGE_GLOBAL (1 << 8) /* ASID agnostic (H) */
|
||||
#define _PAGE_PRESENT (1 << 9) /* PTE/TLB Valid (H) */
|
||||
|
||||
#ifdef CONFIG_ARC_MMU_V4
|
||||
#define _PAGE_HW_SZ (1 << 10) /* Normal/super (H) */
|
||||
#else
|
||||
#define _PAGE_HW_SZ 0
|
||||
#endif
|
||||
|
||||
/* Defaults for every user page */
|
||||
#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
|
||||
|
||||
/* Set of bits not changed in pte_modify */
|
||||
#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
|
||||
_PAGE_SPECIAL)
|
||||
|
||||
/* More Abbrevaited helpers */
|
||||
#define PAGE_U_NONE __pgprot(___DEF)
|
||||
#define PAGE_U_R __pgprot(___DEF | _PAGE_READ)
|
||||
#define PAGE_U_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
|
||||
#define PAGE_U_X_R __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
|
||||
#define PAGE_U_X_W_R __pgprot(___DEF \
|
||||
| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
|
||||
#define PAGE_KERNEL __pgprot(___DEF | _PAGE_GLOBAL \
|
||||
| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
|
||||
|
||||
#define PAGE_SHARED PAGE_U_W_R
|
||||
|
||||
#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
|
||||
|
||||
/*
|
||||
* Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
|
||||
*
|
||||
* Certain cases have 1:1 mapping
|
||||
* e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
|
||||
* which directly corresponds to PAGE_U_X_R
|
||||
*
|
||||
* Other rules which cause the divergence from 1:1 mapping
|
||||
*
|
||||
* 1. Although ARC700 can do exclusive execute/write protection (meaning R
|
||||
* can be tracked independet of X/W unlike some other CPUs), still to
|
||||
* keep things consistent with other archs:
|
||||
* -Write implies Read: W => R
|
||||
* -Execute implies Read: X => R
|
||||
*
|
||||
* 2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
|
||||
* This is to enable COW mechanism
|
||||
*/
|
||||
/* xwr */
|
||||
#define __P000 PAGE_U_NONE
|
||||
#define __P001 PAGE_U_R
|
||||
#define __P010 PAGE_U_R /* Pvt-W => !W */
|
||||
#define __P011 PAGE_U_R /* Pvt-W => !W */
|
||||
#define __P100 PAGE_U_X_R /* X => R */
|
||||
#define __P101 PAGE_U_X_R
|
||||
#define __P110 PAGE_U_X_R /* Pvt-W => !W and X => R */
|
||||
#define __P111 PAGE_U_X_R /* Pvt-W => !W */
|
||||
|
||||
#define __S000 PAGE_U_NONE
|
||||
#define __S001 PAGE_U_R
|
||||
#define __S010 PAGE_U_W_R /* W => R */
|
||||
#define __S011 PAGE_U_W_R
|
||||
#define __S100 PAGE_U_X_R /* X => R */
|
||||
#define __S101 PAGE_U_X_R
|
||||
#define __S110 PAGE_U_X_W_R /* X => R */
|
||||
#define __S111 PAGE_U_X_W_R
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE)
|
||||
#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY)
|
||||
#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED)
|
||||
#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL)
|
||||
|
||||
#define PTE_BIT_FUNC(fn, op) \
|
||||
static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
|
||||
|
||||
PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT));
|
||||
PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE));
|
||||
PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE));
|
||||
PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY));
|
||||
PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY));
|
||||
PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED));
|
||||
PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED));
|
||||
PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL));
|
||||
PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ));
|
||||
|
||||
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
||||
{
|
||||
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
|
||||
}
|
||||
|
||||
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pteval)
|
||||
{
|
||||
set_pte(ptep, pteval);
|
||||
}
|
||||
|
||||
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
|
||||
pte_t *ptep);
|
||||
|
||||
/* Encode swap {type,off} tuple into PTE
|
||||
* We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
|
||||
* PAGE_PRESENT is zero in a PTE holding swap "identifier"
|
||||
*/
|
||||
#define __swp_entry(type, off) ((swp_entry_t) \
|
||||
{ ((type) & 0x1f) | ((off) << 13) })
|
||||
|
||||
/* Decode a PTE containing swap "identifier "into constituents */
|
||||
#define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f)
|
||||
#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13)
|
||||
|
||||
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
||||
|
||||
#define kern_addr_valid(addr) (1)
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
#include <asm/hugepage.h>
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif
|
||||
189
arch/arc/include/asm/pgtable-levels.h
Normal file
189
arch/arc/include/asm/pgtable-levels.h
Normal file
@@ -0,0 +1,189 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Helpers for implemenintg paging levels
|
||||
*/
|
||||
|
||||
#ifndef _ASM_ARC_PGTABLE_LEVELS_H
|
||||
#define _ASM_ARC_PGTABLE_LEVELS_H
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS == 2
|
||||
|
||||
/*
|
||||
* 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS)
|
||||
*
|
||||
* [31] 32 bit virtual address [0]
|
||||
* -------------------------------------------------------
|
||||
* | | <---------- PGDIR_SHIFT ----------> |
|
||||
* | | | <-- PAGE_SHIFT --> |
|
||||
* -------------------------------------------------------
|
||||
* | | |
|
||||
* | | --> off in page frame
|
||||
* | ---> index into Page Table
|
||||
* ----> index into Page Directory
|
||||
*
|
||||
* Given software walk, the vaddr split is arbitrary set to 11:8:13
|
||||
* However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to
|
||||
* super page size.
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_ARC_HUGEPAGE_16M)
|
||||
#define PGDIR_SHIFT 24
|
||||
#elif defined(CONFIG_ARC_HUGEPAGE_2M)
|
||||
#define PGDIR_SHIFT 21
|
||||
#else
|
||||
/*
|
||||
* No Super page case
|
||||
* Default value provides 11:8:13 (8K), 10:10:12 (4K)
|
||||
* Limits imposed by pgtable_t only PAGE_SIZE long
|
||||
* (so 4K page can only have 1K entries: or 10 bits)
|
||||
*/
|
||||
#ifdef CONFIG_ARC_PAGE_SIZE_4K
|
||||
#define PGDIR_SHIFT 22
|
||||
#else
|
||||
#define PGDIR_SHIFT 21
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#else /* CONFIG_PGTABLE_LEVELS != 2 */
|
||||
|
||||
/*
|
||||
* A default 3 level paging testing setup in software walked MMU
|
||||
* MMUv4 (8K page): <4> : <7> : <8> : <13>
|
||||
* A default 4 level paging testing setup in software walked MMU
|
||||
* MMUv4 (8K page): <4> : <3> : <4> : <8> : <13>
|
||||
*/
|
||||
#define PGDIR_SHIFT 28
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
#define PUD_SHIFT 25
|
||||
#endif
|
||||
#if CONFIG_PGTABLE_LEVELS > 2
|
||||
#define PMD_SHIFT 21
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_PGTABLE_LEVELS */
|
||||
|
||||
#define PGDIR_SIZE BIT(PGDIR_SHIFT)
|
||||
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
|
||||
#define PTRS_PER_PGD BIT(32 - PGDIR_SHIFT)
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
#define PUD_SIZE BIT(PUD_SHIFT)
|
||||
#define PUD_MASK (~(PUD_SIZE - 1))
|
||||
#define PTRS_PER_PUD BIT(PGDIR_SHIFT - PUD_SHIFT)
|
||||
#endif
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 2
|
||||
#define PMD_SIZE BIT(PMD_SHIFT)
|
||||
#define PMD_MASK (~(PMD_SIZE - 1))
|
||||
#define PTRS_PER_PMD BIT(PUD_SHIFT - PMD_SHIFT)
|
||||
#endif
|
||||
|
||||
#define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
#include <asm-generic/pgtable-nop4d.h>
|
||||
#elif CONFIG_PGTABLE_LEVELS > 2
|
||||
#include <asm-generic/pgtable-nopud.h>
|
||||
#else
|
||||
#include <asm-generic/pgtable-nopmd.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 1st level paging: pgd
|
||||
*/
|
||||
#define pgd_index(addr) ((addr) >> PGDIR_SHIFT)
|
||||
#define pgd_offset(mm, addr) (((mm)->pgd) + pgd_index(addr))
|
||||
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
|
||||
#define pgd_ERROR(e) \
|
||||
pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
|
||||
/* In 4 level paging, p4d_* macros work on pgd */
|
||||
#define p4d_none(x) (!p4d_val(x))
|
||||
#define p4d_bad(x) ((p4d_val(x) & ~PAGE_MASK))
|
||||
#define p4d_present(x) (p4d_val(x))
|
||||
#define p4d_clear(xp) do { p4d_val(*(xp)) = 0; } while (0)
|
||||
#define p4d_pgtable(p4d) ((pud_t *)(p4d_val(p4d) & PAGE_MASK))
|
||||
#define p4d_page(p4d) virt_to_page(p4d_pgtable(p4d))
|
||||
#define set_p4d(p4dp, p4d) (*(p4dp) = p4d)
|
||||
|
||||
/*
|
||||
* 2nd level paging: pud
|
||||
*/
|
||||
#define pud_ERROR(e) \
|
||||
pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
|
||||
|
||||
#endif
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 2
|
||||
|
||||
/*
|
||||
* In 3 level paging, pud_* macros work on pgd
|
||||
* In 4 level paging, pud_* macros work on pud
|
||||
*/
|
||||
#define pud_none(x) (!pud_val(x))
|
||||
#define pud_bad(x) ((pud_val(x) & ~PAGE_MASK))
|
||||
#define pud_present(x) (pud_val(x))
|
||||
#define pud_clear(xp) do { pud_val(*(xp)) = 0; } while (0)
|
||||
#define pud_pgtable(pud) ((pmd_t *)(pud_val(pud) & PAGE_MASK))
|
||||
#define pud_page(pud) virt_to_page(pud_pgtable(pud))
|
||||
#define set_pud(pudp, pud) (*(pudp) = pud)
|
||||
|
||||
/*
|
||||
* 3rd level paging: pmd
|
||||
*/
|
||||
#define pmd_ERROR(e) \
|
||||
pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
|
||||
|
||||
#define pmd_pfn(pmd) ((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT)
|
||||
#define pfn_pmd(pfn,prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
|
||||
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Due to the strange way generic pgtable level folding works, the pmd_* macros
|
||||
* - are valid even for 2 levels (which supposedly only has pgd - pte)
|
||||
* - behave differently for 2 vs. 3
|
||||
* In 2 level paging (pgd -> pte), pmd_* macros work on pgd
|
||||
* In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd
|
||||
*/
|
||||
#define pmd_none(x) (!pmd_val(x))
|
||||
#define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK))
|
||||
#define pmd_present(x) (pmd_val(x))
|
||||
#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
|
||||
#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK)
|
||||
#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd))
|
||||
#define set_pmd(pmdp, pmd) (*(pmdp) = pmd)
|
||||
#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd))
|
||||
|
||||
/*
|
||||
* 4th level paging: pte
|
||||
*/
|
||||
#define pte_ERROR(e) \
|
||||
pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
|
||||
|
||||
#define pte_none(x) (!pte_val(x))
|
||||
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
|
||||
#define pte_clear(mm,addr,ptep) set_pte_at(mm, addr, ptep, __pte(0))
|
||||
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
|
||||
#define set_pte(ptep, pte) ((*(ptep)) = (pte))
|
||||
#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
|
||||
#define pfn_pte(pfn, prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot))
|
||||
#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
|
||||
|
||||
#ifdef CONFIG_ISA_ARCV2
|
||||
#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ)
|
||||
#endif
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#endif
|
||||
@@ -1,220 +1,17 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* vineetg: May 2011
|
||||
* -Folded PAGE_PRESENT (used by VM) and PAGE_VALID (used by MMU) into 1.
|
||||
* They are semantically the same although in different contexts
|
||||
* VALID marks a TLB entry exists and it will only happen if PRESENT
|
||||
* - Utilise some unused free bits to confine PTE flags to 12 bits
|
||||
* This is a must for 4k pg-sz
|
||||
*
|
||||
* vineetg: Mar 2011 - changes to accommodate MMU TLB Page Descriptor mods
|
||||
* -TLB Locking never really existed, except for initial specs
|
||||
* -SILENT_xxx not needed for our port
|
||||
* -Per my request, MMU V3 changes the layout of some of the bits
|
||||
* to avoid a few shifts in TLB Miss handlers.
|
||||
*
|
||||
* vineetg: April 2010
|
||||
* -PGD entry no longer contains any flags. If empty it is 0, otherwise has
|
||||
* Pg-Tbl ptr. Thus pmd_present(), pmd_valid(), pmd_set( ) become simpler
|
||||
*
|
||||
* vineetg: April 2010
|
||||
* -Switched form 8:11:13 split for page table lookup to 11:8:13
|
||||
* -this speeds up page table allocation itself as we now have to memset 1K
|
||||
* instead of 8k per page table.
|
||||
* -TODO: Right now page table alloc is 8K and rest 7K is unused
|
||||
* need to optimise it
|
||||
*
|
||||
* Amit Bhor, Sameer Dhavale: Codito Technologies 2004
|
||||
*/
|
||||
|
||||
#ifndef _ASM_ARC_PGTABLE_H
|
||||
#define _ASM_ARC_PGTABLE_H
|
||||
|
||||
#include <linux/bits.h>
|
||||
#include <asm-generic/pgtable-nopmd.h>
|
||||
|
||||
#include <asm/pgtable-levels.h>
|
||||
#include <asm/pgtable-bits-arcv2.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/mmu.h> /* to propagate CONFIG_ARC_MMU_VER <n> */
|
||||
|
||||
/**************************************************************************
|
||||
* Page Table Flags
|
||||
*
|
||||
* ARC700 MMU only deals with softare managed TLB entries.
|
||||
* Page Tables are purely for Linux VM's consumption and the bits below are
|
||||
* suited to that (uniqueness). Hence some are not implemented in the TLB and
|
||||
* some have different value in TLB.
|
||||
* e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in
|
||||
* seperate PD0 and PD1, which combined forms a translation entry)
|
||||
* while for PTE perspective, they are 8 and 9 respectively
|
||||
* with MMU v3: Most bits (except SHARED) represent the exact hardware pos
|
||||
* (saves some bit shift ops in TLB Miss hdlrs)
|
||||
*/
|
||||
|
||||
#if (CONFIG_ARC_MMU_VER <= 2)
|
||||
|
||||
#define _PAGE_ACCESSED (1<<1) /* Page is accessed (S) */
|
||||
#define _PAGE_CACHEABLE (1<<2) /* Page is cached (H) */
|
||||
#define _PAGE_EXECUTE (1<<3) /* Page has user execute perm (H) */
|
||||
#define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */
|
||||
#define _PAGE_READ (1<<5) /* Page has user read perm (H) */
|
||||
#define _PAGE_DIRTY (1<<6) /* Page modified (dirty) (S) */
|
||||
#define _PAGE_SPECIAL (1<<7)
|
||||
#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */
|
||||
#define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */
|
||||
|
||||
#else /* MMU v3 onwards */
|
||||
|
||||
#define _PAGE_CACHEABLE (1<<0) /* Page is cached (H) */
|
||||
#define _PAGE_EXECUTE (1<<1) /* Page has user execute perm (H) */
|
||||
#define _PAGE_WRITE (1<<2) /* Page has user write perm (H) */
|
||||
#define _PAGE_READ (1<<3) /* Page has user read perm (H) */
|
||||
#define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */
|
||||
#define _PAGE_DIRTY (1<<5) /* Page modified (dirty) (S) */
|
||||
#define _PAGE_SPECIAL (1<<6)
|
||||
|
||||
#if (CONFIG_ARC_MMU_VER >= 4)
|
||||
#define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */
|
||||
#endif
|
||||
|
||||
#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */
|
||||
#define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */
|
||||
|
||||
#if (CONFIG_ARC_MMU_VER >= 4)
|
||||
#define _PAGE_HW_SZ (1<<10) /* Page Size indicator (H): 0 normal, 1 super */
|
||||
#endif
|
||||
|
||||
#define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr
|
||||
usable for shared TLB entries (H) */
|
||||
|
||||
#define _PAGE_UNUSED_BIT (1<<12)
|
||||
#endif
|
||||
|
||||
/* vmalloc permissions */
|
||||
#define _K_PAGE_PERMS (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
|
||||
_PAGE_GLOBAL | _PAGE_PRESENT)
|
||||
|
||||
#ifndef CONFIG_ARC_CACHE_PAGES
|
||||
#undef _PAGE_CACHEABLE
|
||||
#define _PAGE_CACHEABLE 0
|
||||
#endif
|
||||
|
||||
#ifndef _PAGE_HW_SZ
|
||||
#define _PAGE_HW_SZ 0
|
||||
#endif
|
||||
|
||||
/* Defaults for every user page */
|
||||
#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
|
||||
|
||||
/* Set of bits not changed in pte_modify */
|
||||
#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
|
||||
_PAGE_SPECIAL)
|
||||
/* More Abbrevaited helpers */
|
||||
#define PAGE_U_NONE __pgprot(___DEF)
|
||||
#define PAGE_U_R __pgprot(___DEF | _PAGE_READ)
|
||||
#define PAGE_U_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
|
||||
#define PAGE_U_X_R __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
|
||||
#define PAGE_U_X_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE | \
|
||||
_PAGE_EXECUTE)
|
||||
|
||||
#define PAGE_SHARED PAGE_U_W_R
|
||||
|
||||
/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of
|
||||
* user vaddr space - visible in all addr spaces, but kernel mode only
|
||||
* Thus Global, all-kernel-access, no-user-access, cached
|
||||
*/
|
||||
#define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE)
|
||||
|
||||
/* ioremap */
|
||||
#define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
|
||||
|
||||
/* Masks for actual TLB "PD"s */
|
||||
#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
|
||||
#define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
|
||||
|
||||
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
|
||||
|
||||
/**************************************************************************
|
||||
* Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
|
||||
*
|
||||
* Certain cases have 1:1 mapping
|
||||
* e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
|
||||
* which directly corresponds to PAGE_U_X_R
|
||||
*
|
||||
* Other rules which cause the divergence from 1:1 mapping
|
||||
*
|
||||
* 1. Although ARC700 can do exclusive execute/write protection (meaning R
|
||||
* can be tracked independet of X/W unlike some other CPUs), still to
|
||||
* keep things consistent with other archs:
|
||||
* -Write implies Read: W => R
|
||||
* -Execute implies Read: X => R
|
||||
*
|
||||
* 2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
|
||||
* This is to enable COW mechanism
|
||||
*/
|
||||
/* xwr */
|
||||
#define __P000 PAGE_U_NONE
|
||||
#define __P001 PAGE_U_R
|
||||
#define __P010 PAGE_U_R /* Pvt-W => !W */
|
||||
#define __P011 PAGE_U_R /* Pvt-W => !W */
|
||||
#define __P100 PAGE_U_X_R /* X => R */
|
||||
#define __P101 PAGE_U_X_R
|
||||
#define __P110 PAGE_U_X_R /* Pvt-W => !W and X => R */
|
||||
#define __P111 PAGE_U_X_R /* Pvt-W => !W */
|
||||
|
||||
#define __S000 PAGE_U_NONE
|
||||
#define __S001 PAGE_U_R
|
||||
#define __S010 PAGE_U_W_R /* W => R */
|
||||
#define __S011 PAGE_U_W_R
|
||||
#define __S100 PAGE_U_X_R /* X => R */
|
||||
#define __S101 PAGE_U_X_R
|
||||
#define __S110 PAGE_U_X_W_R /* X => R */
|
||||
#define __S111 PAGE_U_X_W_R
|
||||
|
||||
/****************************************************************
|
||||
* 2 tier (PGD:PTE) software page walker
|
||||
*
|
||||
* [31] 32 bit virtual address [0]
|
||||
* -------------------------------------------------------
|
||||
* | | <------------ PGDIR_SHIFT ----------> |
|
||||
* | | |
|
||||
* | BITS_FOR_PGD | BITS_FOR_PTE | <-- PAGE_SHIFT --> |
|
||||
* -------------------------------------------------------
|
||||
* | | |
|
||||
* | | --> off in page frame
|
||||
* | ---> index into Page Table
|
||||
* ----> index into Page Directory
|
||||
*
|
||||
* In a single page size configuration, only PAGE_SHIFT is fixed
|
||||
* So both PGD and PTE sizing can be tweaked
|
||||
* e.g. 8K page (PAGE_SHIFT 13) can have
|
||||
* - PGDIR_SHIFT 21 -> 11:8:13 address split
|
||||
* - PGDIR_SHIFT 24 -> 8:11:13 address split
|
||||
*
|
||||
* If Super Page is configured, PGDIR_SHIFT becomes fixed too,
|
||||
* so the sizing flexibility is gone.
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_ARC_HUGEPAGE_16M)
|
||||
#define PGDIR_SHIFT 24
|
||||
#elif defined(CONFIG_ARC_HUGEPAGE_2M)
|
||||
#define PGDIR_SHIFT 21
|
||||
#else
|
||||
/*
|
||||
* Only Normal page support so "hackable" (see comment above)
|
||||
* Default value provides 11:8:13 (8K), 11:9:12 (4K)
|
||||
*/
|
||||
#define PGDIR_SHIFT 21
|
||||
#endif
|
||||
|
||||
#define BITS_FOR_PTE (PGDIR_SHIFT - PAGE_SHIFT)
|
||||
#define BITS_FOR_PGD (32 - PGDIR_SHIFT)
|
||||
|
||||
#define PGDIR_SIZE BIT(PGDIR_SHIFT) /* vaddr span, not PDG sz */
|
||||
#define PGDIR_MASK (~(PGDIR_SIZE-1))
|
||||
|
||||
#define PTRS_PER_PTE BIT(BITS_FOR_PTE)
|
||||
#define PTRS_PER_PGD BIT(BITS_FOR_PGD)
|
||||
#include <asm/mmu.h>
|
||||
|
||||
/*
|
||||
* Number of entries a user land program use.
|
||||
@@ -222,143 +19,17 @@
|
||||
*/
|
||||
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
|
||||
|
||||
|
||||
/****************************************************************
|
||||
* Bucket load of VM Helpers
|
||||
*/
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#define pte_ERROR(e) \
|
||||
pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
|
||||
#define pgd_ERROR(e) \
|
||||
pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
|
||||
|
||||
/* the zero page used for uninitialized and anonymous pages */
|
||||
extern char empty_zero_page[PAGE_SIZE];
|
||||
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
||||
|
||||
#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
|
||||
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
|
||||
|
||||
/* find the page descriptor of the Page Tbl ref by PMD entry */
|
||||
#define pmd_page(pmd) virt_to_page(pmd_val(pmd) & PAGE_MASK)
|
||||
|
||||
/* find the logical addr (phy for ARC) of the Page Tbl ref by PMD entry */
|
||||
#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK)
|
||||
|
||||
/* In a 2 level sys, setup the PGD entry with PTE value */
|
||||
static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
|
||||
{
|
||||
pmd_val(*pmdp) = (unsigned long)ptep;
|
||||
}
|
||||
|
||||
#define pte_none(x) (!pte_val(x))
|
||||
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
|
||||
#define pte_clear(mm, addr, ptep) set_pte_at(mm, addr, ptep, __pte(0))
|
||||
|
||||
#define pmd_none(x) (!pmd_val(x))
|
||||
#define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK))
|
||||
#define pmd_present(x) (pmd_val(x))
|
||||
#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ)
|
||||
#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
|
||||
|
||||
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
|
||||
#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
|
||||
#define pfn_pte(pfn, prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot))
|
||||
|
||||
/* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
|
||||
#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
|
||||
|
||||
/* Zoo of pte_xxx function */
|
||||
#define pte_read(pte) (pte_val(pte) & _PAGE_READ)
|
||||
#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE)
|
||||
#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY)
|
||||
#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED)
|
||||
#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL)
|
||||
|
||||
#define PTE_BIT_FUNC(fn, op) \
|
||||
static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
|
||||
|
||||
PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT));
|
||||
PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE));
|
||||
PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE));
|
||||
PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY));
|
||||
PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY));
|
||||
PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED));
|
||||
PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED));
|
||||
PTE_BIT_FUNC(exprotect, &= ~(_PAGE_EXECUTE));
|
||||
PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE));
|
||||
PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL));
|
||||
PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ));
|
||||
|
||||
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
||||
{
|
||||
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
|
||||
}
|
||||
extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
|
||||
|
||||
/* Macro to mark a page protection as uncacheable */
|
||||
#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
|
||||
|
||||
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pteval)
|
||||
{
|
||||
set_pte(ptep, pteval);
|
||||
}
|
||||
|
||||
/*
|
||||
* Macro to quickly access the PGD entry, utlising the fact that some
|
||||
* arch may cache the pointer to Page Directory of "current" task
|
||||
* in a MMU register
|
||||
*
|
||||
* Thus task->mm->pgd (3 pointer dereferences, cache misses etc simply
|
||||
* becomes read a register
|
||||
*
|
||||
* ********CAUTION*******:
|
||||
* Kernel code might be dealing with some mm_struct of NON "current"
|
||||
* Thus use this macro only when you are certain that "current" is current
|
||||
* e.g. when dealing with signal frame setup code etc
|
||||
*/
|
||||
#ifdef ARC_USE_SCRATCH_REG
|
||||
#define pgd_offset_fast(mm, addr) \
|
||||
({ \
|
||||
pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0); \
|
||||
pgd_base + pgd_index(addr); \
|
||||
})
|
||||
#else
|
||||
#define pgd_offset_fast(mm, addr) pgd_offset(mm, addr)
|
||||
#endif
|
||||
|
||||
extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
|
||||
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
|
||||
pte_t *ptep);
|
||||
|
||||
/* Encode swap {type,off} tuple into PTE
|
||||
* We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
|
||||
* PAGE_PRESENT is zero in a PTE holding swap "identifier"
|
||||
*/
|
||||
#define __swp_entry(type, off) ((swp_entry_t) { \
|
||||
((type) & 0x1f) | ((off) << 13) })
|
||||
|
||||
/* Decode a PTE containing swap "identifier "into constituents */
|
||||
#define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f)
|
||||
#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13)
|
||||
|
||||
/* NOPs, to keep generic kernel happy */
|
||||
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
|
||||
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
||||
|
||||
#define kern_addr_valid(addr) (1)
|
||||
|
||||
#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd))
|
||||
|
||||
/*
|
||||
* remap a physical page `pfn' of size `size' with page protection `prot'
|
||||
* into virtual address `from'
|
||||
*/
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
#include <asm/hugepage.h>
|
||||
#endif
|
||||
|
||||
/* to cope with aliasing VIPT cache */
|
||||
#define HAVE_ARCH_UNMAPPED_AREA
|
||||
|
||||
@@ -93,7 +93,7 @@ extern unsigned int get_wchan(struct task_struct *p);
|
||||
#define VMALLOC_START (PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
|
||||
|
||||
/* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
|
||||
#define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4)
|
||||
#define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4)
|
||||
|
||||
#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*/
|
||||
#ifndef __ASMARC_SETUP_H
|
||||
#define __ASMARC_SETUP_H
|
||||
#ifndef __ASM_ARC_SETUP_H
|
||||
#define __ASM_ARC_SETUP_H
|
||||
|
||||
|
||||
#include <linux/types.h>
|
||||
@@ -34,4 +34,12 @@ long __init arc_get_mem_sz(void);
|
||||
#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
|
||||
#define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
|
||||
|
||||
extern void arc_mmu_init(void);
|
||||
extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
|
||||
extern void read_decode_mmu_bcr(void);
|
||||
|
||||
extern void arc_cache_init(void);
|
||||
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
|
||||
extern void read_decode_cache_bcr(void);
|
||||
|
||||
#endif /* __ASMARC_SETUP_H */
|
||||
|
||||
@@ -105,7 +105,6 @@ static inline const char *arc_platform_smp_cpuinfo(void)
|
||||
#include <asm/spinlock.h>
|
||||
|
||||
extern arch_spinlock_t smp_atomic_ops_lock;
|
||||
extern arch_spinlock_t smp_bitops_lock;
|
||||
|
||||
#define atomic_ops_lock(flags) do { \
|
||||
local_irq_save(flags); \
|
||||
@@ -117,24 +116,11 @@ extern arch_spinlock_t smp_bitops_lock;
|
||||
local_irq_restore(flags); \
|
||||
} while (0)
|
||||
|
||||
#define bitops_lock(flags) do { \
|
||||
local_irq_save(flags); \
|
||||
arch_spin_lock(&smp_bitops_lock); \
|
||||
} while (0)
|
||||
|
||||
#define bitops_unlock(flags) do { \
|
||||
arch_spin_unlock(&smp_bitops_lock); \
|
||||
local_irq_restore(flags); \
|
||||
} while (0)
|
||||
|
||||
#else /* !CONFIG_SMP */
|
||||
|
||||
#define atomic_ops_lock(flags) local_irq_save(flags)
|
||||
#define atomic_ops_unlock(flags) local_irq_restore(flags)
|
||||
|
||||
#define bitops_lock(flags) local_irq_save(flags)
|
||||
#define bitops_unlock(flags) local_irq_restore(flags)
|
||||
|
||||
#endif /* !CONFIG_SMP */
|
||||
|
||||
#endif /* !CONFIG_ARC_HAS_LLSC */
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*/
|
||||
|
||||
#ifndef __ASM_TLB_MMU_V1_H__
|
||||
#define __ASM_TLB_MMU_V1_H__
|
||||
|
||||
#include <asm/mmu.h>
|
||||
|
||||
#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1)
|
||||
|
||||
.macro TLB_WRITE_HEURISTICS
|
||||
|
||||
#define JH_HACK1
|
||||
#undef JH_HACK2
|
||||
#undef JH_HACK3
|
||||
|
||||
#ifdef JH_HACK3
|
||||
; Calculate set index for 2-way MMU
|
||||
; -avoiding use of GetIndex from MMU
|
||||
; and its unpleasant LFSR pseudo-random sequence
|
||||
;
|
||||
; r1 = TLBPD0 from TLB_RELOAD above
|
||||
;
|
||||
; -- jh_ex_way_set not cleared on startup
|
||||
; didn't want to change setup.c
|
||||
; hence extra instruction to clean
|
||||
;
|
||||
; -- should be in cache since in same line
|
||||
; as r0/r1 saves above
|
||||
;
|
||||
ld r0,[jh_ex_way_sel] ; victim pointer
|
||||
and r0,r0,1 ; clean
|
||||
xor.f r0,r0,1 ; flip
|
||||
st r0,[jh_ex_way_sel] ; store back
|
||||
asr r0,r1,12 ; get set # <<1, note bit 12=R=0
|
||||
or.nz r0,r0,1 ; set way bit
|
||||
and r0,r0,0xff ; clean
|
||||
sr r0,[ARC_REG_TLBINDEX]
|
||||
#endif
|
||||
|
||||
#ifdef JH_HACK2
|
||||
; JH hack #2
|
||||
; Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU
|
||||
; Slower in thrash case (where it matters) because more code is executed
|
||||
; Inefficient due to two-register paradigm of this miss handler
|
||||
;
|
||||
/* r1 = data TLBPD0 at this point */
|
||||
lr r0,[eret] /* instruction address */
|
||||
xor r0,r0,r1 /* compare set # */
|
||||
and.f r0,r0,0x000fe000 /* 2-way MMU mask */
|
||||
bne 88f /* not in same set - no need to probe */
|
||||
|
||||
lr r0,[eret] /* instruction address */
|
||||
and r0,r0,PAGE_MASK /* VPN of instruction address */
|
||||
; lr r1,[ARC_REG_TLBPD0] /* Data VPN+ASID - already in r1 from TLB_RELOAD*/
|
||||
and r1,r1,0xff /* Data ASID */
|
||||
or r0,r0,r1 /* Instruction address + Data ASID */
|
||||
|
||||
lr r1,[ARC_REG_TLBPD0] /* save TLBPD0 containing data TLB*/
|
||||
sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */
|
||||
sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
|
||||
lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */
|
||||
sr r1,[ARC_REG_TLBPD0] /* restore TLBPD0 */
|
||||
|
||||
xor r0,r0,1 /* flip bottom bit of data index */
|
||||
b.d 89f
|
||||
sr r0,[ARC_REG_TLBINDEX] /* and put it back */
|
||||
88:
|
||||
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
|
||||
89:
|
||||
#endif
|
||||
|
||||
#ifdef JH_HACK1
|
||||
;
|
||||
; Always checks whether instruction will be kicked out by dtlb miss
|
||||
;
|
||||
mov_s r3, r1 ; save PD0 prepared by TLB_RELOAD in r3
|
||||
lr r0,[eret] /* instruction address */
|
||||
and r0,r0,PAGE_MASK /* VPN of instruction address */
|
||||
bmsk r1,r3,7 /* Data ASID, bits 7-0 */
|
||||
or_s r0,r0,r1 /* Instruction address + Data ASID */
|
||||
|
||||
sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */
|
||||
sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
|
||||
lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */
|
||||
sr r3,[ARC_REG_TLBPD0] /* restore TLBPD0 */
|
||||
|
||||
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
|
||||
lr r1,[ARC_REG_TLBINDEX] /* r1 = index where MMU wants to put data */
|
||||
cmp r0,r1 /* if no match on indices, go around */
|
||||
xor.eq r1,r1,1 /* flip bottom bit of data index */
|
||||
sr r1,[ARC_REG_TLBINDEX] /* and put it back */
|
||||
#endif
|
||||
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <asm/errno.h>
|
||||
#include <asm/arcregs.h>
|
||||
#include <asm/irqflags.h>
|
||||
#include <asm/mmu.h>
|
||||
|
||||
; A maximum number of supported interrupts in the core interrupt controller.
|
||||
; This number is not equal to the maximum interrupt number (256) because
|
||||
|
||||
@@ -101,11 +101,8 @@ ENTRY(EV_MachineCheck)
|
||||
lr r0, [efa]
|
||||
mov r1, sp
|
||||
|
||||
; hardware auto-disables MMU, re-enable it to allow kernel vaddr
|
||||
; access for say stack unwinding of modules for crash dumps
|
||||
lr r3, [ARC_REG_PID]
|
||||
or r3, r3, MMU_ENABLE
|
||||
sr r3, [ARC_REG_PID]
|
||||
; MC excpetions disable MMU
|
||||
ARC_MMU_REENABLE r3
|
||||
|
||||
lsr r3, r2, 8
|
||||
bmsk r3, r3, 7
|
||||
|
||||
@@ -142,7 +142,7 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
|
||||
* Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
|
||||
* Here local_irq_enable( ) shd not re-enable lower priority interrupts
|
||||
* -If called from soft-ISR, it must re-enable all interrupts
|
||||
* soft ISR are low prioity jobs which can be very slow, thus all IRQs
|
||||
* soft ISR are low priority jobs which can be very slow, thus all IRQs
|
||||
* must be enabled while they run.
|
||||
* Now hardware context wise we may still be in L2 ISR (not done rtie)
|
||||
* still we must re-enable both L1 and L2 IRQs
|
||||
|
||||
@@ -29,10 +29,8 @@
|
||||
|
||||
#ifndef CONFIG_ARC_HAS_LLSC
|
||||
arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
|
||||
arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
|
||||
|
||||
EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
|
||||
EXPORT_SYMBOL_GPL(smp_bitops_lock);
|
||||
#endif
|
||||
|
||||
struct plat_smp_ops __weak plat_smp_ops;
|
||||
@@ -283,7 +281,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
|
||||
/*
|
||||
* Call the platform specific IPI kick function, but avoid if possible:
|
||||
* Only do so if there's no pending msg from other concurrent sender(s).
|
||||
* Otherwise, recevier will see this msg as well when it takes the
|
||||
* Otherwise, receiver will see this msg as well when it takes the
|
||||
* IPI corresponding to that msg. This is true, even if it is already in
|
||||
* IPI handler, because !@old means it has not yet dequeued the msg(s)
|
||||
* so @new msg can be a free-loader
|
||||
|
||||
@@ -149,7 +149,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
|
||||
#else
|
||||
/* On ARC, only Dward based unwinder works. fp based backtracing is
|
||||
* not possible (-fno-omit-frame-pointer) because of the way function
|
||||
* prelogue is setup (callee regs saved and then fp set and not other
|
||||
* prologue is setup (callee regs saved and then fp set and not other
|
||||
* way around
|
||||
*/
|
||||
pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
|
||||
|
||||
@@ -205,93 +205,24 @@ slc_chk:
|
||||
#define OP_INV_IC 0x4
|
||||
|
||||
/*
|
||||
* I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
|
||||
* Cache Flush programming model
|
||||
*
|
||||
* ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
|
||||
* The orig Cache Management Module "CDU" only required paddr to invalidate a
|
||||
* certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
|
||||
* Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
|
||||
* the exact same line.
|
||||
* ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias.
|
||||
* Programming model requires both paddr and vaddr irrespecive of aliasing
|
||||
* considerations:
|
||||
* - vaddr in {I,D}C_IV?L
|
||||
* - paddr in {I,D}C_PTAG
|
||||
*
|
||||
* However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
|
||||
* paddr alone could not be used to correctly index the cache.
|
||||
* In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias.
|
||||
* Programming model is different for aliasing vs. non-aliasing I$
|
||||
* - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L
|
||||
* - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$)
|
||||
*
|
||||
* ------------------
|
||||
* MMU v1/v2 (Fixed Page Size 8k)
|
||||
* ------------------
|
||||
* The solution was to provide CDU with these additonal vaddr bits. These
|
||||
* would be bits [x:13], x would depend on cache-geometry, 13 comes from
|
||||
* standard page size of 8k.
|
||||
* H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
|
||||
* of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
|
||||
* orig 5 bits of paddr were anyways ignored by CDU line ops, as they
|
||||
* represent the offset within cache-line. The adv of using this "clumsy"
|
||||
* interface for additional info was no new reg was needed in CDU programming
|
||||
* model.
|
||||
*
|
||||
* 17:13 represented the max num of bits passable, actual bits needed were
|
||||
* fewer, based on the num-of-aliases possible.
|
||||
* -for 2 alias possibility, only bit 13 needed (32K cache)
|
||||
* -for 4 alias possibility, bits 14:13 needed (64K cache)
|
||||
*
|
||||
* ------------------
|
||||
* MMU v3
|
||||
* ------------------
|
||||
* This ver of MMU supports variable page sizes (1k-16k): although Linux will
|
||||
* only support 8k (default), 16k and 4k.
|
||||
* However from hardware perspective, smaller page sizes aggravate aliasing
|
||||
* meaning more vaddr bits needed to disambiguate the cache-line-op ;
|
||||
* the existing scheme of piggybacking won't work for certain configurations.
|
||||
* Two new registers IC_PTAG and DC_PTAG inttoduced.
|
||||
* "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
|
||||
* - If PAE40 is enabled, independent of aliasing considerations, the higher
|
||||
* bits needs to be written into PTAG_HI
|
||||
*/
|
||||
|
||||
static inline
|
||||
void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
|
||||
unsigned long sz, const int op, const int full_page)
|
||||
{
|
||||
unsigned int aux_cmd;
|
||||
int num_lines;
|
||||
|
||||
if (op == OP_INV_IC) {
|
||||
aux_cmd = ARC_REG_IC_IVIL;
|
||||
} else {
|
||||
/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
|
||||
aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
|
||||
}
|
||||
|
||||
/* Ensure we properly floor/ceil the non-line aligned/sized requests
|
||||
* and have @paddr - aligned to cache line and integral @num_lines.
|
||||
* This however can be avoided for page sized since:
|
||||
* -@paddr will be cache-line aligned already (being page aligned)
|
||||
* -@sz will be integral multiple of line size (being page sized).
|
||||
*/
|
||||
if (!full_page) {
|
||||
sz += paddr & ~CACHE_LINE_MASK;
|
||||
paddr &= CACHE_LINE_MASK;
|
||||
vaddr &= CACHE_LINE_MASK;
|
||||
}
|
||||
|
||||
num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
|
||||
|
||||
/* MMUv2 and before: paddr contains stuffed vaddrs bits */
|
||||
paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
|
||||
|
||||
while (num_lines-- > 0) {
|
||||
write_aux_reg(aux_cmd, paddr);
|
||||
paddr += L1_CACHE_BYTES;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* For ARC700 MMUv3 I-cache and D-cache flushes
|
||||
* - ARC700 programming model requires paddr and vaddr be passed in seperate
|
||||
* AUX registers (*_IV*L and *_PTAG respectively) irrespective of whether the
|
||||
* caches actually alias or not.
|
||||
* - For HS38, only the aliasing I-cache configuration uses the PTAG reg
|
||||
* (non aliasing I-cache version doesn't; while D-cache can't possibly alias)
|
||||
*/
|
||||
static inline
|
||||
void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
|
||||
unsigned long sz, const int op, const int full_page)
|
||||
{
|
||||
@@ -350,17 +281,6 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
|
||||
#ifndef USE_RGN_FLSH
|
||||
|
||||
/*
|
||||
* In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
|
||||
* Here's how cache ops are implemented
|
||||
*
|
||||
* - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
|
||||
* - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
|
||||
* - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
|
||||
* respectively, similar to MMU v3 programming model, hence
|
||||
* __cache_line_loop_v3() is used)
|
||||
*
|
||||
* If PAE40 is enabled, independent of aliasing considerations, the higher bits
|
||||
* needs to be written into PTAG_HI
|
||||
*/
|
||||
static inline
|
||||
void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
|
||||
@@ -460,11 +380,9 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
|
||||
|
||||
#endif
|
||||
|
||||
#if (CONFIG_ARC_MMU_VER < 3)
|
||||
#define __cache_line_loop __cache_line_loop_v2
|
||||
#elif (CONFIG_ARC_MMU_VER == 3)
|
||||
#ifdef CONFIG_ARC_MMU_V3
|
||||
#define __cache_line_loop __cache_line_loop_v3
|
||||
#elif (CONFIG_ARC_MMU_VER > 3)
|
||||
#else
|
||||
#define __cache_line_loop __cache_line_loop_v4
|
||||
#endif
|
||||
|
||||
@@ -1123,7 +1041,7 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
|
||||
clear_page(to);
|
||||
clear_bit(PG_dc_clean, &page->flags);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(clear_user_page);
|
||||
|
||||
/**********************************************************************
|
||||
* Explicit Cache flush request from user space via syscall
|
||||
|
||||
@@ -33,28 +33,34 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
|
||||
pud_t *pud, *pud_k;
|
||||
pmd_t *pmd, *pmd_k;
|
||||
|
||||
pgd = pgd_offset_fast(current->active_mm, address);
|
||||
pgd = pgd_offset(current->active_mm, address);
|
||||
pgd_k = pgd_offset_k(address);
|
||||
|
||||
if (!pgd_present(*pgd_k))
|
||||
if (pgd_none (*pgd_k))
|
||||
goto bad_area;
|
||||
if (!pgd_present(*pgd))
|
||||
set_pgd(pgd, *pgd_k);
|
||||
|
||||
p4d = p4d_offset(pgd, address);
|
||||
p4d_k = p4d_offset(pgd_k, address);
|
||||
if (!p4d_present(*p4d_k))
|
||||
if (p4d_none(*p4d_k))
|
||||
goto bad_area;
|
||||
if (!p4d_present(*p4d))
|
||||
set_p4d(p4d, *p4d_k);
|
||||
|
||||
pud = pud_offset(p4d, address);
|
||||
pud_k = pud_offset(p4d_k, address);
|
||||
if (!pud_present(*pud_k))
|
||||
if (pud_none(*pud_k))
|
||||
goto bad_area;
|
||||
if (!pud_present(*pud))
|
||||
set_pud(pud, *pud_k);
|
||||
|
||||
pmd = pmd_offset(pud, address);
|
||||
pmd_k = pmd_offset(pud_k, address);
|
||||
if (!pmd_present(*pmd_k))
|
||||
if (pmd_none(*pmd_k))
|
||||
goto bad_area;
|
||||
|
||||
set_pmd(pmd, *pmd_k);
|
||||
if (!pmd_present(*pmd))
|
||||
set_pmd(pmd, *pmd_k);
|
||||
|
||||
/* XXX: create the TLB entry here */
|
||||
return 0;
|
||||
|
||||
@@ -189,6 +189,11 @@ void __init mem_init(void)
|
||||
{
|
||||
memblock_free_all();
|
||||
highmem_init();
|
||||
|
||||
BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
|
||||
BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE);
|
||||
BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE);
|
||||
BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
|
||||
@@ -39,7 +39,8 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
|
||||
if (arc_uncached_addr_space(paddr))
|
||||
return (void __iomem *)(u32)paddr;
|
||||
|
||||
return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE);
|
||||
return ioremap_prot(paddr, size,
|
||||
pgprot_val(pgprot_noncached(PAGE_KERNEL)));
|
||||
}
|
||||
EXPORT_SYMBOL(ioremap);
|
||||
|
||||
|
||||
@@ -1,51 +1,9 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* TLB Management (flush/create/diagnostics) for ARC700
|
||||
* TLB Management (flush/create/diagnostics) for MMUv3 and MMUv4
|
||||
*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* vineetg: Aug 2011
|
||||
* -Reintroduce duplicate PD fixup - some customer chips still have the issue
|
||||
*
|
||||
* vineetg: May 2011
|
||||
* -No need to flush_cache_page( ) for each call to update_mmu_cache()
|
||||
* some of the LMBench tests improved amazingly
|
||||
* = page-fault thrice as fast (75 usec to 28 usec)
|
||||
* = mmap twice as fast (9.6 msec to 4.6 msec),
|
||||
* = fork (5.3 msec to 3.7 msec)
|
||||
*
|
||||
* vineetg: April 2011 :
|
||||
* -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
|
||||
* helps avoid a shift when preparing PD0 from PTE
|
||||
*
|
||||
* vineetg: April 2011 : Preparing for MMU V3
|
||||
* -MMU v2/v3 BCRs decoded differently
|
||||
* -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512
|
||||
* -tlb_entry_erase( ) can be void
|
||||
* -local_flush_tlb_range( ):
|
||||
* = need not "ceil" @end
|
||||
* = walks MMU only if range spans < 32 entries, as opposed to 256
|
||||
*
|
||||
* Vineetg: Sept 10th 2008
|
||||
* -Changes related to MMU v2 (Rel 4.8)
|
||||
*
|
||||
* Vineetg: Aug 29th 2008
|
||||
* -In TLB Flush operations (Metal Fix MMU) there is a explicit command to
|
||||
* flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,
|
||||
* it fails. Thus need to load it with ANY valid value before invoking
|
||||
* TLBIVUTLB cmd
|
||||
*
|
||||
* Vineetg: Aug 21th 2008:
|
||||
* -Reduced the duration of IRQ lockouts in TLB Flush routines
|
||||
* -Multiple copies of TLB erase code separated into a "single" function
|
||||
* -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID
|
||||
* in interrupt-safe region.
|
||||
*
|
||||
* Vineetg: April 23rd Bug #93131
|
||||
* Problem: tlb_flush_kernel_range() doesn't do anything if the range to
|
||||
* flush is more than the size of TLB itself.
|
||||
*
|
||||
* Rahul Trivedi : Codito Technologies 2004
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
@@ -57,47 +15,6 @@
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/mmu.h>
|
||||
|
||||
/* Need for ARC MMU v2
|
||||
*
|
||||
* ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc.
|
||||
* For a memcpy operation with 3 players (src/dst/code) such that all 3 pages
|
||||
* map into same set, there would be contention for the 2 ways causing severe
|
||||
* Thrashing.
|
||||
*
|
||||
* Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has
|
||||
* much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways.
|
||||
* Given this, the thrashing problem should never happen because once the 3
|
||||
* J-TLB entries are created (even though 3rd will knock out one of the prev
|
||||
* two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy
|
||||
*
|
||||
* Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs.
|
||||
* This is a simple design for keeping them in sync. So what do we do?
|
||||
* The solution which James came up was pretty neat. It utilised the assoc
|
||||
* of uTLBs by not invalidating always but only when absolutely necessary.
|
||||
*
|
||||
* - Existing TLB commands work as before
|
||||
* - New command (TLBWriteNI) for TLB write without clearing uTLBs
|
||||
* - New command (TLBIVUTLB) to invalidate uTLBs.
|
||||
*
|
||||
* The uTLBs need only be invalidated when pages are being removed from the
|
||||
* OS page table. If a 'victim' TLB entry is being overwritten in the main TLB
|
||||
* as a result of a miss, the removed entry is still allowed to exist in the
|
||||
* uTLBs as it is still valid and present in the OS page table. This allows the
|
||||
* full associativity of the uTLBs to hide the limited associativity of the main
|
||||
* TLB.
|
||||
*
|
||||
* During a miss handler, the new "TLBWriteNI" command is used to load
|
||||
* entries without clearing the uTLBs.
|
||||
*
|
||||
* When the OS page table is updated, TLB entries that may be associated with a
|
||||
* removed page are removed (flushed) from the TLB using TLBWrite. In this
|
||||
* circumstance, the uTLBs must also be cleared. This is done by using the
|
||||
* existing TLBWrite command. An explicit IVUTLB is also required for those
|
||||
* corner cases when TLBWrite was not executed at all because the corresp
|
||||
* J-TLB entry got evicted/replaced.
|
||||
*/
|
||||
|
||||
|
||||
/* A copy of the ASID from the PID reg is kept in asid_cache */
|
||||
DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
|
||||
|
||||
@@ -120,32 +37,10 @@ static inline void __tlb_entry_erase(void)
|
||||
|
||||
static void utlb_invalidate(void)
|
||||
{
|
||||
#if (CONFIG_ARC_MMU_VER >= 2)
|
||||
|
||||
#if (CONFIG_ARC_MMU_VER == 2)
|
||||
/* MMU v2 introduced the uTLB Flush command.
|
||||
* There was however an obscure hardware bug, where uTLB flush would
|
||||
* fail when a prior probe for J-TLB (both totally unrelated) would
|
||||
* return lkup err - because the entry didn't exist in MMU.
|
||||
* The Workaround was to set Index reg with some valid value, prior to
|
||||
* flush. This was fixed in MMU v3
|
||||
*/
|
||||
unsigned int idx;
|
||||
|
||||
/* make sure INDEX Reg is valid */
|
||||
idx = read_aux_reg(ARC_REG_TLBINDEX);
|
||||
|
||||
/* If not write some dummy val */
|
||||
if (unlikely(idx & TLB_LKUP_ERR))
|
||||
write_aux_reg(ARC_REG_TLBINDEX, 0xa);
|
||||
#endif
|
||||
|
||||
write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#if (CONFIG_ARC_MMU_VER < 4)
|
||||
#ifdef CONFIG_ARC_MMU_V3
|
||||
|
||||
static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
|
||||
{
|
||||
@@ -176,7 +71,7 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
|
||||
}
|
||||
}
|
||||
|
||||
static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
|
||||
static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
|
||||
{
|
||||
unsigned int idx;
|
||||
|
||||
@@ -206,7 +101,7 @@ static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
|
||||
write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
|
||||
}
|
||||
|
||||
#else /* CONFIG_ARC_MMU_VER >= 4) */
|
||||
#else /* MMUv4 */
|
||||
|
||||
static void tlb_entry_erase(unsigned int vaddr_n_asid)
|
||||
{
|
||||
@@ -214,13 +109,16 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
|
||||
write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
|
||||
}
|
||||
|
||||
static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
|
||||
static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
|
||||
{
|
||||
write_aux_reg(ARC_REG_TLBPD0, pd0);
|
||||
write_aux_reg(ARC_REG_TLBPD1, pd1);
|
||||
|
||||
if (is_pae40_enabled())
|
||||
if (!is_pae40_enabled()) {
|
||||
write_aux_reg(ARC_REG_TLBPD1, pd1);
|
||||
} else {
|
||||
write_aux_reg(ARC_REG_TLBPD1, pd1 & 0xFFFFFFFF);
|
||||
write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
|
||||
}
|
||||
|
||||
write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
|
||||
}
|
||||
@@ -496,7 +394,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
|
||||
unsigned long flags;
|
||||
unsigned int asid_or_sasid, rwx;
|
||||
unsigned long pd0;
|
||||
pte_t pd1;
|
||||
phys_addr_t pd1;
|
||||
|
||||
/*
|
||||
* create_tlb() assumes that current->mm == vma->mm, since
|
||||
@@ -505,7 +403,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
|
||||
*
|
||||
* Removing the assumption involves
|
||||
* -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg.
|
||||
* -Fix the TLB paranoid debug code to not trigger false negatives.
|
||||
* -More importantly it makes this handler inconsistent with fast-path
|
||||
* TLB Refill handler which always deals with "current"
|
||||
*
|
||||
@@ -528,8 +425,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr);
|
||||
|
||||
vaddr &= PAGE_MASK;
|
||||
|
||||
/* update this PTE credentials */
|
||||
@@ -639,43 +534,6 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
|
||||
update_mmu_cache(vma, addr, &pte);
|
||||
}
|
||||
|
||||
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
|
||||
pgtable_t pgtable)
|
||||
{
|
||||
struct list_head *lh = (struct list_head *) pgtable;
|
||||
|
||||
assert_spin_locked(&mm->page_table_lock);
|
||||
|
||||
/* FIFO */
|
||||
if (!pmd_huge_pte(mm, pmdp))
|
||||
INIT_LIST_HEAD(lh);
|
||||
else
|
||||
list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
|
||||
pmd_huge_pte(mm, pmdp) = pgtable;
|
||||
}
|
||||
|
||||
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
|
||||
{
|
||||
struct list_head *lh;
|
||||
pgtable_t pgtable;
|
||||
|
||||
assert_spin_locked(&mm->page_table_lock);
|
||||
|
||||
pgtable = pmd_huge_pte(mm, pmdp);
|
||||
lh = (struct list_head *) pgtable;
|
||||
if (list_empty(lh))
|
||||
pmd_huge_pte(mm, pmdp) = NULL;
|
||||
else {
|
||||
pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
|
||||
list_del(lh);
|
||||
}
|
||||
|
||||
pte_val(pgtable[0]) = 0;
|
||||
pte_val(pgtable[1]) = 0;
|
||||
|
||||
return pgtable;
|
||||
}
|
||||
|
||||
void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
@@ -706,14 +564,6 @@ void read_decode_mmu_bcr(void)
|
||||
{
|
||||
struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
|
||||
unsigned int tmp;
|
||||
struct bcr_mmu_1_2 {
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8;
|
||||
#else
|
||||
unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8;
|
||||
#endif
|
||||
} *mmu2;
|
||||
|
||||
struct bcr_mmu_3 {
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
|
||||
@@ -738,23 +588,14 @@ void read_decode_mmu_bcr(void)
|
||||
tmp = read_aux_reg(ARC_REG_MMU_BCR);
|
||||
mmu->ver = (tmp >> 24);
|
||||
|
||||
if (is_isa_arcompact()) {
|
||||
if (mmu->ver <= 2) {
|
||||
mmu2 = (struct bcr_mmu_1_2 *)&tmp;
|
||||
mmu->pg_sz_k = TO_KB(0x2000);
|
||||
mmu->sets = 1 << mmu2->sets;
|
||||
mmu->ways = 1 << mmu2->ways;
|
||||
mmu->u_dtlb = mmu2->u_dtlb;
|
||||
mmu->u_itlb = mmu2->u_itlb;
|
||||
} else {
|
||||
mmu3 = (struct bcr_mmu_3 *)&tmp;
|
||||
mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
|
||||
mmu->sets = 1 << mmu3->sets;
|
||||
mmu->ways = 1 << mmu3->ways;
|
||||
mmu->u_dtlb = mmu3->u_dtlb;
|
||||
mmu->u_itlb = mmu3->u_itlb;
|
||||
mmu->sasid = mmu3->sasid;
|
||||
}
|
||||
if (is_isa_arcompact() && mmu->ver == 3) {
|
||||
mmu3 = (struct bcr_mmu_3 *)&tmp;
|
||||
mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
|
||||
mmu->sets = 1 << mmu3->sets;
|
||||
mmu->ways = 1 << mmu3->ways;
|
||||
mmu->u_dtlb = mmu3->u_dtlb;
|
||||
mmu->u_itlb = mmu3->u_itlb;
|
||||
mmu->sasid = mmu3->sasid;
|
||||
} else {
|
||||
mmu4 = (struct bcr_mmu_4 *)&tmp;
|
||||
mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
|
||||
@@ -780,8 +621,8 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
|
||||
IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
|
||||
|
||||
n += scnprintf(buf + n, len - n,
|
||||
"MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
|
||||
p_mmu->ver, p_mmu->pg_sz_k, super_pg,
|
||||
"MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
|
||||
p_mmu->ver, p_mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS,
|
||||
p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
|
||||
p_mmu->u_dtlb, p_mmu->u_itlb,
|
||||
IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
|
||||
@@ -815,22 +656,17 @@ void arc_mmu_init(void)
|
||||
|
||||
/*
|
||||
* Ensure that MMU features assumed by kernel exist in hardware.
|
||||
* For older ARC700 cpus, it has to be exact match, since the MMU
|
||||
* revisions were not backwards compatible (MMUv3 TLB layout changed
|
||||
* so even if kernel for v2 didn't use any new cmds of v3, it would
|
||||
* still not work.
|
||||
* For HS cpus, MMUv4 was baseline and v5 is backwards compatible
|
||||
* (will run older software).
|
||||
* - For older ARC700 cpus, only v3 supported
|
||||
* - For HS cpus, v4 was baseline and v5 is backwards compatible
|
||||
* (will run older software).
|
||||
*/
|
||||
if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER)
|
||||
if (is_isa_arcompact() && mmu->ver == 3)
|
||||
compat = 1;
|
||||
else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER)
|
||||
else if (is_isa_arcv2() && mmu->ver >= 4)
|
||||
compat = 1;
|
||||
|
||||
if (!compat) {
|
||||
panic("MMU ver %d doesn't match kernel built for %d...\n",
|
||||
mmu->ver, CONFIG_ARC_MMU_VER);
|
||||
}
|
||||
if (!compat)
|
||||
panic("MMU ver %d doesn't match kernel built for\n", mmu->ver);
|
||||
|
||||
if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
|
||||
panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
|
||||
@@ -843,14 +679,11 @@ void arc_mmu_init(void)
|
||||
if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
|
||||
panic("Hardware doesn't support PAE40\n");
|
||||
|
||||
/* Enable the MMU */
|
||||
write_aux_reg(ARC_REG_PID, MMU_ENABLE);
|
||||
/* Enable the MMU with ASID 0 */
|
||||
mmu_setup_asid(NULL, 0);
|
||||
|
||||
/* In smp we use this reg for interrupt 1 scratch */
|
||||
#ifdef ARC_USE_SCRATCH_REG
|
||||
/* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
|
||||
write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
|
||||
#endif
|
||||
/* cache the pgd pointer in MMU SCRATCH reg (ARCv2 only) */
|
||||
mmu_setup_pgd(NULL, swapper_pg_dir);
|
||||
|
||||
if (pae40_exist_but_not_enab())
|
||||
write_aux_reg(ARC_REG_TLBPD1HI, 0);
|
||||
@@ -945,40 +778,3 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
* Diagnostic Routines
|
||||
* -Called from Low Level TLB Handlers if things don;t look good
|
||||
**********************************************************************/
|
||||
|
||||
#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
|
||||
|
||||
/*
|
||||
* Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS
|
||||
* don't match
|
||||
*/
|
||||
void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path)
|
||||
{
|
||||
pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
|
||||
is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid);
|
||||
|
||||
__asm__ __volatile__("flag 1");
|
||||
}
|
||||
|
||||
void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr)
|
||||
{
|
||||
unsigned int mmu_asid;
|
||||
|
||||
mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff;
|
||||
|
||||
/*
|
||||
* At the time of a TLB miss/installation
|
||||
* - HW version needs to match SW version
|
||||
* - SW needs to have a valid ASID
|
||||
*/
|
||||
if (addr < 0x70000000 &&
|
||||
((mm_asid == MM_CTXT_NO_ASID) ||
|
||||
(mmu_asid != (mm_asid & MM_CTXT_ASID_MASK))))
|
||||
print_asid_mismatch(mm_asid, mmu_asid, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -39,7 +39,6 @@
|
||||
#include <asm/arcregs.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/tlb-mmu1.h>
|
||||
|
||||
#ifdef CONFIG_ISA_ARCOMPACT
|
||||
;-----------------------------------------------------------------
|
||||
@@ -94,11 +93,6 @@ ex_saved_reg1:
|
||||
st_s r1, [r0, 4]
|
||||
st_s r2, [r0, 8]
|
||||
st_s r3, [r0, 12]
|
||||
|
||||
; VERIFY if the ASID in MMU-PID Reg is same as
|
||||
; one in Linux data structures
|
||||
|
||||
tlb_paranoid_check_asm
|
||||
.endm
|
||||
|
||||
.macro TLBMISS_RESTORE_REGS
|
||||
@@ -147,55 +141,18 @@ ex_saved_reg1:
|
||||
|
||||
#endif
|
||||
|
||||
;============================================================================
|
||||
; Troubleshooting Stuff
|
||||
;============================================================================
|
||||
|
||||
; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid
|
||||
; When Creating TLB Entries, instead of doing 3 dependent loads from memory,
|
||||
; we use the MMU PID Reg to get current ASID.
|
||||
; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
|
||||
; So we try to detect this in TLB Mis shandler
|
||||
|
||||
.macro tlb_paranoid_check_asm
|
||||
|
||||
#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
|
||||
|
||||
GET_CURR_TASK_ON_CPU r3
|
||||
ld r0, [r3, TASK_ACT_MM]
|
||||
ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
|
||||
breq r0, 0, 55f ; Error if no ASID allocated
|
||||
|
||||
lr r1, [ARC_REG_PID]
|
||||
and r1, r1, 0xFF
|
||||
|
||||
and r2, r0, 0xFF ; MMU PID bits only for comparison
|
||||
breq r1, r2, 5f
|
||||
|
||||
55:
|
||||
; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
|
||||
lr r2, [erstatus]
|
||||
bbit0 r2, STATUS_U_BIT, 5f
|
||||
|
||||
; We sure are in troubled waters, Flag the error, but to do so
|
||||
; need to switch to kernel mode stack to call error routine
|
||||
GET_TSK_STACK_BASE r3, sp
|
||||
|
||||
; Call printk to shoutout aloud
|
||||
mov r2, 1
|
||||
j print_asid_mismatch
|
||||
|
||||
5: ; ASIDs match so proceed normally
|
||||
nop
|
||||
|
||||
#endif
|
||||
|
||||
.endm
|
||||
|
||||
;============================================================================
|
||||
;TLB Miss handling Code
|
||||
;============================================================================
|
||||
|
||||
#ifndef PMD_SHIFT
|
||||
#define PMD_SHIFT PUD_SHIFT
|
||||
#endif
|
||||
|
||||
#ifndef PUD_SHIFT
|
||||
#define PUD_SHIFT PGDIR_SHIFT
|
||||
#endif
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; This macro does the page-table lookup for the faulting address.
|
||||
; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address
|
||||
@@ -203,7 +160,7 @@ ex_saved_reg1:
|
||||
|
||||
lr r2, [efa]
|
||||
|
||||
#ifdef ARC_USE_SCRATCH_REG
|
||||
#ifdef CONFIG_ISA_ARCV2
|
||||
lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
|
||||
#else
|
||||
GET_CURR_TASK_ON_CPU r1
|
||||
@@ -216,6 +173,24 @@ ex_saved_reg1:
|
||||
tst r3, r3
|
||||
bz do_slow_path_pf ; if no Page Table, do page fault
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
lsr r0, r2, PUD_SHIFT ; Bits for indexing into PUD
|
||||
and r0, r0, (PTRS_PER_PUD - 1)
|
||||
ld.as r1, [r3, r0] ; PMD entry
|
||||
tst r1, r1
|
||||
bz do_slow_path_pf
|
||||
mov r3, r1
|
||||
#endif
|
||||
|
||||
#if CONFIG_PGTABLE_LEVELS > 2
|
||||
lsr r0, r2, PMD_SHIFT ; Bits for indexing into PMD
|
||||
and r0, r0, (PTRS_PER_PMD - 1)
|
||||
ld.as r1, [r3, r0] ; PMD entry
|
||||
tst r1, r1
|
||||
bz do_slow_path_pf
|
||||
mov r3, r1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp)
|
||||
add2.nz r1, r1, r0
|
||||
@@ -279,7 +254,7 @@ ex_saved_reg1:
|
||||
; Commit the TLB entry into MMU
|
||||
|
||||
.macro COMMIT_ENTRY_TO_MMU
|
||||
#if (CONFIG_ARC_MMU_VER < 4)
|
||||
#ifdef CONFIG_ARC_MMU_V3
|
||||
|
||||
/* Get free TLB slot: Set = computed from vaddr, way = random */
|
||||
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
|
||||
@@ -375,13 +350,6 @@ ENTRY(EV_TLBMissD)
|
||||
|
||||
CONV_PTE_TO_TLB
|
||||
|
||||
#if (CONFIG_ARC_MMU_VER == 1)
|
||||
; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
|
||||
; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
|
||||
; But only for old MMU or one with Metal Fix
|
||||
TLB_WRITE_HEURISTICS
|
||||
#endif
|
||||
|
||||
COMMIT_ENTRY_TO_MMU
|
||||
TLBMISS_RESTORE_REGS
|
||||
EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation
|
||||
|
||||
@@ -126,6 +126,7 @@ config ARM
|
||||
select RTC_LIB
|
||||
select SET_FS
|
||||
select SYS_SUPPORTS_APM_EMULATION
|
||||
select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
|
||||
# Above selects are sorted alphabetically; please add new ones
|
||||
# according to that. Thanks.
|
||||
help
|
||||
@@ -189,10 +190,6 @@ config LOCKDEP_SUPPORT
|
||||
bool
|
||||
default y
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
bool
|
||||
default !CPU_V7M
|
||||
|
||||
config ARCH_HAS_ILOG2_U32
|
||||
bool
|
||||
|
||||
|
||||
@@ -220,6 +220,7 @@ config ARM64
|
||||
select SYSCTL_EXCEPTION_TRACE
|
||||
select THREAD_INFO_IN_TASK
|
||||
select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
help
|
||||
ARM 64-bit (AArch64) Linux support.
|
||||
|
||||
@@ -287,9 +288,6 @@ config ILLEGAL_POINTER_VALUE
|
||||
config LOCKDEP_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config GENERIC_BUG
|
||||
def_bool y
|
||||
depends on BUG
|
||||
|
||||
@@ -82,6 +82,7 @@ config CSKY
|
||||
select PCI_SYSCALL if PCI
|
||||
select PCI_MSI if PCI
|
||||
select SET_FS
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
|
||||
config LOCKDEP_SUPPORT
|
||||
def_bool y
|
||||
@@ -139,9 +140,6 @@ config STACKTRACE_SUPPORT
|
||||
config TIME_LOW_RES
|
||||
def_bool y
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config CPU_TLB_SIZE
|
||||
int
|
||||
default "128" if (CPU_CK610 || CPU_CK807 || CPU_CK810)
|
||||
|
||||
@@ -32,6 +32,7 @@ config HEXAGON
|
||||
select GENERIC_CPU_DEVICES
|
||||
select SET_FS
|
||||
select ARCH_WANT_LD_ORPHAN_WARN
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
help
|
||||
Qualcomm Hexagon is a processor architecture designed for high
|
||||
performance and low power across a wide variety of applications.
|
||||
@@ -53,9 +54,6 @@ config EARLY_PRINTK
|
||||
config MMU
|
||||
def_bool y
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config GENERIC_CSUM
|
||||
def_bool y
|
||||
|
||||
|
||||
@@ -44,6 +44,7 @@ config MICROBLAZE
|
||||
select SPARSE_IRQ
|
||||
select SET_FS
|
||||
select ZONE_DMA
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
|
||||
# Endianness selection
|
||||
choice
|
||||
|
||||
@@ -1,6 +1 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
# For a description of the syntax of this configuration file,
|
||||
# see Documentation/kbuild/kconfig-language.rst.
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
@@ -98,6 +98,7 @@ config MIPS
|
||||
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
|
||||
select RTC_LIB
|
||||
select SYSCTL_EXCEPTION_TRACE
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
select VIRT_TO_BUS
|
||||
select ARCH_HAS_ELFCORE_COMPAT
|
||||
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
bool
|
||||
default y
|
||||
|
||||
config EARLY_PRINTK
|
||||
bool "Early printk" if EXPERT
|
||||
depends on SYS_HAS_EARLY_PRINTK
|
||||
|
||||
@@ -46,6 +46,7 @@ config NDS32
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
select SET_FS
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
help
|
||||
Andes(nds32) Linux support.
|
||||
|
||||
@@ -62,9 +63,6 @@ config GENERIC_LOCKBREAK
|
||||
def_bool y
|
||||
depends on PREEMPTION
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config STACKTRACE_SUPPORT
|
||||
def_bool y
|
||||
|
||||
|
||||
@@ -41,9 +41,6 @@ config NO_IOPORT_MAP
|
||||
config FPU
|
||||
def_bool n
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool n
|
||||
|
||||
menu "Kernel features"
|
||||
|
||||
source "kernel/Kconfig.hz"
|
||||
|
||||
@@ -37,6 +37,7 @@ config OPENRISC
|
||||
select GENERIC_IRQ_MULTI_HANDLER
|
||||
select MMU_GATHER_NO_RANGE if MMU
|
||||
select SET_FS
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
|
||||
config CPU_BIG_ENDIAN
|
||||
def_bool y
|
||||
@@ -50,9 +51,6 @@ config GENERIC_HWEIGHT
|
||||
config NO_IOPORT_MAP
|
||||
def_bool y
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
# For now, use generic checksum functions
|
||||
#These can be reimplemented in assembly later if so inclined
|
||||
config GENERIC_CSUM
|
||||
|
||||
@@ -66,6 +66,7 @@ config PARISC
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_REGS
|
||||
select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS
|
||||
select SET_FS
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
|
||||
help
|
||||
The PA-RISC microprocessor is designed by Hewlett-Packard and used
|
||||
|
||||
@@ -1,4 +1 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
@@ -94,10 +94,6 @@ config STACKTRACE_SUPPORT
|
||||
bool
|
||||
default y
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
bool
|
||||
default y
|
||||
|
||||
config LOCKDEP_SUPPORT
|
||||
bool
|
||||
default y
|
||||
@@ -270,6 +266,7 @@ config PPC
|
||||
select STRICT_KERNEL_RWX if STRICT_MODULE_RWX
|
||||
select SYSCTL_EXCEPTION_TRACE
|
||||
select THREAD_INFO_IN_TASK
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
select VIRT_TO_BUS if !PPC64
|
||||
#
|
||||
# Please keep this list sorted alphabetically.
|
||||
|
||||
@@ -113,6 +113,7 @@ config RISCV
|
||||
select SPARSE_IRQ
|
||||
select SYSCTL_EXCEPTION_TRACE
|
||||
select THREAD_INFO_IN_TASK
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
select UACCESS_MEMCPY if !MMU
|
||||
select ZONE_DMA32 if 64BIT
|
||||
|
||||
@@ -182,9 +183,6 @@ config ARCH_SUPPORTS_UPROBES
|
||||
config STACKTRACE_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config GENERIC_BUG
|
||||
def_bool y
|
||||
depends on BUG
|
||||
|
||||
@@ -209,6 +209,7 @@ config S390
|
||||
select SWIOTLB
|
||||
select SYSCTL_EXCEPTION_TRACE
|
||||
select THREAD_INFO_IN_TASK
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
select TTY
|
||||
select VIRT_CPU_ACCOUNTING
|
||||
select ZONE_DMA
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config EARLY_PRINTK
|
||||
def_bool y
|
||||
|
||||
|
||||
@@ -822,7 +822,7 @@ void do_secure_storage_access(struct pt_regs *regs)
|
||||
break;
|
||||
case KERNEL_FAULT:
|
||||
page = phys_to_page(addr);
|
||||
if (unlikely(!try_get_compound_head(page, 1)))
|
||||
if (unlikely(!try_get_page(page)))
|
||||
break;
|
||||
rc = arch_make_page_accessible(page);
|
||||
put_page(page);
|
||||
|
||||
@@ -69,6 +69,7 @@ config SUPERH
|
||||
select RTC_LIB
|
||||
select SET_FS
|
||||
select SPARSE_IRQ
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
help
|
||||
The SuperH is a RISC processor targeted for use in embedded systems
|
||||
and consumer electronics; it was also used in the Sega Dreamcast
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config SH_STANDARD_BIOS
|
||||
bool "Use LinuxSH standard BIOS"
|
||||
help
|
||||
|
||||
@@ -47,6 +47,7 @@ config SPARC
|
||||
select NEED_DMA_MAP_STATE
|
||||
select NEED_SG_DMA_LENGTH
|
||||
select SET_FS
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
|
||||
config SPARC32
|
||||
def_bool !64BIT
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
bool
|
||||
default y
|
||||
|
||||
config DEBUG_DCFLUSH
|
||||
bool "D-cache flush debugging"
|
||||
depends on SPARC64 && DEBUG_KERNEL
|
||||
|
||||
@@ -22,6 +22,7 @@ config UML
|
||||
select GENERIC_CPU_DEVICES
|
||||
select HAVE_GCC_PLUGINS
|
||||
select SET_FS
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
select TTY # Needed for line.c
|
||||
|
||||
config MMU
|
||||
@@ -52,10 +53,6 @@ config ISA
|
||||
config SBUS
|
||||
bool
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
bool
|
||||
default y
|
||||
|
||||
config LOCKDEP_SUPPORT
|
||||
bool
|
||||
default y
|
||||
|
||||
@@ -259,6 +259,7 @@ config X86
|
||||
select STACK_VALIDATION if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
|
||||
select SYSCTL_EXCEPTION_TRACE
|
||||
select THREAD_INFO_IN_TASK
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
select USER_STACKTRACE_SUPPORT
|
||||
select VIRT_TO_BUS
|
||||
select HAVE_ARCH_KCSAN if X86_64
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config TRACE_IRQFLAGS_NMI_SUPPORT
|
||||
def_bool y
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@ config XTENSA
|
||||
select MODULES_USE_ELF_RELA
|
||||
select PERF_USE_VMALLOC
|
||||
select SET_FS
|
||||
select TRACE_IRQFLAGS_SUPPORT
|
||||
select VIRT_TO_BUS
|
||||
help
|
||||
Xtensa processors are 32-bit RISC machines designed by Tensilica
|
||||
@@ -73,9 +74,6 @@ config LOCKDEP_SUPPORT
|
||||
config STACKTRACE_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config TRACE_IRQFLAGS_SUPPORT
|
||||
def_bool y
|
||||
|
||||
config MMU
|
||||
def_bool n
|
||||
|
||||
|
||||
@@ -2662,6 +2662,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
|
||||
* are likely to increase the throughput.
|
||||
*/
|
||||
bfqq->new_bfqq = new_bfqq;
|
||||
/*
|
||||
* The above assignment schedules the following redirections:
|
||||
* each time some I/O for bfqq arrives, the process that
|
||||
* generated that I/O is disassociated from bfqq and
|
||||
* associated with new_bfqq. Here we increases new_bfqq->ref
|
||||
* in advance, adding the number of processes that are
|
||||
* expected to be associated with new_bfqq as they happen to
|
||||
* issue I/O.
|
||||
*/
|
||||
new_bfqq->ref += process_refs;
|
||||
return new_bfqq;
|
||||
}
|
||||
@@ -2724,6 +2733,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
{
|
||||
struct bfq_queue *in_service_bfqq, *new_bfqq;
|
||||
|
||||
/* if a merge has already been setup, then proceed with that first */
|
||||
if (bfqq->new_bfqq)
|
||||
return bfqq->new_bfqq;
|
||||
|
||||
/*
|
||||
* Check delayed stable merge for rotational or non-queueing
|
||||
* devs. For this branch to be executed, bfqq must not be
|
||||
@@ -2825,9 +2838,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
if (bfq_too_late_for_merging(bfqq))
|
||||
return NULL;
|
||||
|
||||
if (bfqq->new_bfqq)
|
||||
return bfqq->new_bfqq;
|
||||
|
||||
if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
|
||||
return NULL;
|
||||
|
||||
|
||||
@@ -1691,7 +1691,7 @@ EXPORT_SYMBOL(bioset_init_from_src);
|
||||
/**
|
||||
* bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
|
||||
* @kiocb: kiocb describing the IO
|
||||
* @nr_iovecs: number of iovecs to pre-allocate
|
||||
* @nr_vecs: number of iovecs to pre-allocate
|
||||
* @bs: bio_set to allocate from
|
||||
*
|
||||
* Description:
|
||||
|
||||
@@ -270,12 +270,6 @@ deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
|
||||
deadline_remove_request(rq->q, per_prio, rq);
|
||||
}
|
||||
|
||||
/* Number of requests queued for a given priority level. */
|
||||
static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
|
||||
{
|
||||
return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
|
||||
}
|
||||
|
||||
/*
|
||||
* deadline_check_fifo returns 0 if there are no expired requests on the fifo,
|
||||
* 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
|
||||
@@ -953,6 +947,12 @@ static int dd_async_depth_show(void *data, struct seq_file *m)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Number of requests queued for a given priority level. */
|
||||
static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
|
||||
{
|
||||
return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
|
||||
}
|
||||
|
||||
static int dd_queued_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
|
||||
@@ -2186,6 +2186,25 @@ not_supported:
|
||||
dev->flags &= ~ATA_DFLAG_NCQ_PRIO;
|
||||
}
|
||||
|
||||
static bool ata_dev_check_adapter(struct ata_device *dev,
|
||||
unsigned short vendor_id)
|
||||
{
|
||||
struct pci_dev *pcidev = NULL;
|
||||
struct device *parent_dev = NULL;
|
||||
|
||||
for (parent_dev = dev->tdev.parent; parent_dev != NULL;
|
||||
parent_dev = parent_dev->parent) {
|
||||
if (dev_is_pci(parent_dev)) {
|
||||
pcidev = to_pci_dev(parent_dev);
|
||||
if (pcidev->vendor == vendor_id)
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int ata_dev_config_ncq(struct ata_device *dev,
|
||||
char *desc, size_t desc_sz)
|
||||
{
|
||||
@@ -2204,6 +2223,13 @@ static int ata_dev_config_ncq(struct ata_device *dev,
|
||||
snprintf(desc, desc_sz, "NCQ (not used)");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dev->horkage & ATA_HORKAGE_NO_NCQ_ON_ATI &&
|
||||
ata_dev_check_adapter(dev, PCI_VENDOR_ID_ATI)) {
|
||||
snprintf(desc, desc_sz, "NCQ (not used)");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ap->flags & ATA_FLAG_NCQ) {
|
||||
hdepth = min(ap->scsi_host->can_queue, ATA_MAX_QUEUE);
|
||||
dev->flags |= ATA_DFLAG_NCQ;
|
||||
@@ -3970,6 +3996,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
|
||||
ATA_HORKAGE_ZERO_AFTER_TRIM, },
|
||||
{ "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
|
||||
ATA_HORKAGE_ZERO_AFTER_TRIM, },
|
||||
{ "Samsung SSD 860*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
|
||||
ATA_HORKAGE_ZERO_AFTER_TRIM |
|
||||
ATA_HORKAGE_NO_NCQ_ON_ATI, },
|
||||
{ "Samsung SSD 870*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
|
||||
ATA_HORKAGE_ZERO_AFTER_TRIM |
|
||||
ATA_HORKAGE_NO_NCQ_ON_ATI, },
|
||||
{ "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
|
||||
ATA_HORKAGE_ZERO_AFTER_TRIM, },
|
||||
|
||||
@@ -6124,6 +6156,8 @@ static int __init ata_parse_force_one(char **cur,
|
||||
{ "ncq", .horkage_off = ATA_HORKAGE_NONCQ },
|
||||
{ "noncqtrim", .horkage_on = ATA_HORKAGE_NO_NCQ_TRIM },
|
||||
{ "ncqtrim", .horkage_off = ATA_HORKAGE_NO_NCQ_TRIM },
|
||||
{ "noncqati", .horkage_on = ATA_HORKAGE_NO_NCQ_ON_ATI },
|
||||
{ "ncqati", .horkage_off = ATA_HORKAGE_NO_NCQ_ON_ATI },
|
||||
{ "dump_id", .horkage_on = ATA_HORKAGE_DUMP_ID },
|
||||
{ "pio0", .xfer_mask = 1 << (ATA_SHIFT_PIO + 0) },
|
||||
{ "pio1", .xfer_mask = 1 << (ATA_SHIFT_PIO + 1) },
|
||||
|
||||
@@ -2111,18 +2111,6 @@ int loop_register_transfer(struct loop_func_table *funcs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int unregister_transfer_cb(int id, void *ptr, void *data)
|
||||
{
|
||||
struct loop_device *lo = ptr;
|
||||
struct loop_func_table *xfer = data;
|
||||
|
||||
mutex_lock(&lo->lo_mutex);
|
||||
if (lo->lo_encryption == xfer)
|
||||
loop_release_xfer(lo);
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int loop_unregister_transfer(int number)
|
||||
{
|
||||
unsigned int n = number;
|
||||
@@ -2130,9 +2118,20 @@ int loop_unregister_transfer(int number)
|
||||
|
||||
if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
|
||||
return -EINVAL;
|
||||
/*
|
||||
* This function is called from only cleanup_cryptoloop().
|
||||
* Given that each loop device that has a transfer enabled holds a
|
||||
* reference to the module implementing it we should never get here
|
||||
* with a transfer that is set (unless forced module unloading is
|
||||
* requested). Thus, check module's refcount and warn if this is
|
||||
* not a clean unloading.
|
||||
*/
|
||||
#ifdef CONFIG_MODULE_UNLOAD
|
||||
if (xfer->owner && module_refcount(xfer->owner) != -1)
|
||||
pr_err("Danger! Unregistering an in use transfer function.\n");
|
||||
#endif
|
||||
|
||||
xfer_funcs[n] = NULL;
|
||||
idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2323,8 +2322,9 @@ static int loop_add(int i)
|
||||
} else {
|
||||
err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
|
||||
}
|
||||
mutex_unlock(&loop_ctl_mutex);
|
||||
if (err < 0)
|
||||
goto out_unlock;
|
||||
goto out_free_dev;
|
||||
i = err;
|
||||
|
||||
err = -ENOMEM;
|
||||
@@ -2393,15 +2393,19 @@ static int loop_add(int i)
|
||||
disk->events = DISK_EVENT_MEDIA_CHANGE;
|
||||
disk->event_flags = DISK_EVENT_FLAG_UEVENT;
|
||||
sprintf(disk->disk_name, "loop%d", i);
|
||||
/* Make this loop device reachable from pathname. */
|
||||
add_disk(disk);
|
||||
/* Show this loop device. */
|
||||
mutex_lock(&loop_ctl_mutex);
|
||||
lo->idr_visible = true;
|
||||
mutex_unlock(&loop_ctl_mutex);
|
||||
return i;
|
||||
|
||||
out_cleanup_tags:
|
||||
blk_mq_free_tag_set(&lo->tag_set);
|
||||
out_free_idr:
|
||||
mutex_lock(&loop_ctl_mutex);
|
||||
idr_remove(&loop_index_idr, i);
|
||||
out_unlock:
|
||||
mutex_unlock(&loop_ctl_mutex);
|
||||
out_free_dev:
|
||||
kfree(lo);
|
||||
@@ -2411,9 +2415,14 @@ out:
|
||||
|
||||
static void loop_remove(struct loop_device *lo)
|
||||
{
|
||||
/* Make this loop device unreachable from pathname. */
|
||||
del_gendisk(lo->lo_disk);
|
||||
blk_cleanup_disk(lo->lo_disk);
|
||||
blk_mq_free_tag_set(&lo->tag_set);
|
||||
mutex_lock(&loop_ctl_mutex);
|
||||
idr_remove(&loop_index_idr, lo->lo_number);
|
||||
mutex_unlock(&loop_ctl_mutex);
|
||||
/* There is no route which can find this loop device. */
|
||||
mutex_destroy(&lo->lo_mutex);
|
||||
kfree(lo);
|
||||
}
|
||||
@@ -2437,31 +2446,40 @@ static int loop_control_remove(int idx)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Hide this loop device for serialization. */
|
||||
ret = mutex_lock_killable(&loop_ctl_mutex);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
lo = idr_find(&loop_index_idr, idx);
|
||||
if (!lo) {
|
||||
if (!lo || !lo->idr_visible)
|
||||
ret = -ENODEV;
|
||||
goto out_unlock_ctrl;
|
||||
}
|
||||
else
|
||||
lo->idr_visible = false;
|
||||
mutex_unlock(&loop_ctl_mutex);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Check whether this loop device can be removed. */
|
||||
ret = mutex_lock_killable(&lo->lo_mutex);
|
||||
if (ret)
|
||||
goto out_unlock_ctrl;
|
||||
goto mark_visible;
|
||||
if (lo->lo_state != Lo_unbound ||
|
||||
atomic_read(&lo->lo_refcnt) > 0) {
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
ret = -EBUSY;
|
||||
goto out_unlock_ctrl;
|
||||
goto mark_visible;
|
||||
}
|
||||
/* Mark this loop device no longer open()-able. */
|
||||
lo->lo_state = Lo_deleting;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
|
||||
idr_remove(&loop_index_idr, lo->lo_number);
|
||||
loop_remove(lo);
|
||||
out_unlock_ctrl:
|
||||
return 0;
|
||||
|
||||
mark_visible:
|
||||
/* Show this loop device again. */
|
||||
mutex_lock(&loop_ctl_mutex);
|
||||
lo->idr_visible = true;
|
||||
mutex_unlock(&loop_ctl_mutex);
|
||||
return ret;
|
||||
}
|
||||
@@ -2475,7 +2493,8 @@ static int loop_control_get_free(int idx)
|
||||
if (ret)
|
||||
return ret;
|
||||
idr_for_each_entry(&loop_index_idr, lo, id) {
|
||||
if (lo->lo_state == Lo_unbound)
|
||||
/* Hitting a race results in creating a new loop device which is harmless. */
|
||||
if (lo->idr_visible && data_race(lo->lo_state) == Lo_unbound)
|
||||
goto found;
|
||||
}
|
||||
mutex_unlock(&loop_ctl_mutex);
|
||||
@@ -2591,10 +2610,14 @@ static void __exit loop_exit(void)
|
||||
unregister_blkdev(LOOP_MAJOR, "loop");
|
||||
misc_deregister(&loop_misc);
|
||||
|
||||
mutex_lock(&loop_ctl_mutex);
|
||||
/*
|
||||
* There is no need to use loop_ctl_mutex here, for nobody else can
|
||||
* access loop_index_idr when this module is unloading (unless forced
|
||||
* module unloading is requested). If this is not a clean unloading,
|
||||
* we have no means to avoid kernel crash.
|
||||
*/
|
||||
idr_for_each_entry(&loop_index_idr, lo, id)
|
||||
loop_remove(lo);
|
||||
mutex_unlock(&loop_ctl_mutex);
|
||||
|
||||
idr_destroy(&loop_index_idr);
|
||||
}
|
||||
|
||||
@@ -68,6 +68,7 @@ struct loop_device {
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct gendisk *lo_disk;
|
||||
struct mutex lo_mutex;
|
||||
bool idr_visible;
|
||||
};
|
||||
|
||||
struct loop_cmd {
|
||||
|
||||
@@ -49,14 +49,14 @@ void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
|
||||
err = efivar_entry_get(pnvm_efivar, NULL, &package_size, data);
|
||||
if (err) {
|
||||
IWL_DEBUG_FW(trans,
|
||||
"PNVM UEFI variable not found %d (len %zd)\n",
|
||||
"PNVM UEFI variable not found %d (len %lu)\n",
|
||||
err, package_size);
|
||||
kfree(data);
|
||||
data = ERR_PTR(err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
IWL_DEBUG_FW(trans, "Read PNVM from UEFI with size %zd\n", package_size);
|
||||
IWL_DEBUG_FW(trans, "Read PNVM from UEFI with size %lu\n", package_size);
|
||||
*len = package_size;
|
||||
|
||||
out:
|
||||
|
||||
@@ -2206,23 +2206,13 @@ static void tb_test_credit_alloc_dma_multiple(struct kunit *test)
|
||||
tb_tunnel_free(tunnel2);
|
||||
}
|
||||
|
||||
static void tb_test_credit_alloc_all(struct kunit *test)
|
||||
static struct tb_tunnel *TB_TEST_PCIE_TUNNEL(struct kunit *test,
|
||||
struct tb_switch *host, struct tb_switch *dev)
|
||||
{
|
||||
struct tb_port *up, *down, *in, *out, *nhi, *port;
|
||||
struct tb_tunnel *pcie_tunnel, *dp_tunnel1, *dp_tunnel2, *usb3_tunnel;
|
||||
struct tb_tunnel *dma_tunnel1, *dma_tunnel2;
|
||||
struct tb_switch *host, *dev;
|
||||
struct tb_port *up, *down;
|
||||
struct tb_tunnel *pcie_tunnel;
|
||||
struct tb_path *path;
|
||||
|
||||
/*
|
||||
* Create PCIe, 2 x DP, USB 3.x and two DMA tunnels from host to
|
||||
* device. Expectation is that all these can be established with
|
||||
* the default credit allocation found in Intel hardware.
|
||||
*/
|
||||
|
||||
host = alloc_host_usb4(test);
|
||||
dev = alloc_dev_usb4(test, host, 0x1, true);
|
||||
|
||||
down = &host->ports[8];
|
||||
up = &dev->ports[9];
|
||||
pcie_tunnel = tb_tunnel_alloc_pci(NULL, up, down);
|
||||
@@ -2243,9 +2233,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 64U);
|
||||
|
||||
return pcie_tunnel;
|
||||
}
|
||||
|
||||
static struct tb_tunnel *TB_TEST_DP_TUNNEL1(struct kunit *test,
|
||||
struct tb_switch *host, struct tb_switch *dev)
|
||||
{
|
||||
struct tb_port *in, *out;
|
||||
struct tb_tunnel *dp_tunnel1;
|
||||
struct tb_path *path;
|
||||
|
||||
in = &host->ports[5];
|
||||
out = &dev->ports[13];
|
||||
|
||||
dp_tunnel1 = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
|
||||
KUNIT_ASSERT_TRUE(test, dp_tunnel1 != NULL);
|
||||
KUNIT_ASSERT_EQ(test, dp_tunnel1->npaths, (size_t)3);
|
||||
@@ -2271,9 +2270,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 1U);
|
||||
|
||||
return dp_tunnel1;
|
||||
}
|
||||
|
||||
static struct tb_tunnel *TB_TEST_DP_TUNNEL2(struct kunit *test,
|
||||
struct tb_switch *host, struct tb_switch *dev)
|
||||
{
|
||||
struct tb_port *in, *out;
|
||||
struct tb_tunnel *dp_tunnel2;
|
||||
struct tb_path *path;
|
||||
|
||||
in = &host->ports[6];
|
||||
out = &dev->ports[14];
|
||||
|
||||
dp_tunnel2 = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
|
||||
KUNIT_ASSERT_TRUE(test, dp_tunnel2 != NULL);
|
||||
KUNIT_ASSERT_EQ(test, dp_tunnel2->npaths, (size_t)3);
|
||||
@@ -2299,6 +2307,16 @@ static void tb_test_credit_alloc_all(struct kunit *test)
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 1U);
|
||||
|
||||
return dp_tunnel2;
|
||||
}
|
||||
|
||||
static struct tb_tunnel *TB_TEST_USB3_TUNNEL(struct kunit *test,
|
||||
struct tb_switch *host, struct tb_switch *dev)
|
||||
{
|
||||
struct tb_port *up, *down;
|
||||
struct tb_tunnel *usb3_tunnel;
|
||||
struct tb_path *path;
|
||||
|
||||
down = &host->ports[12];
|
||||
up = &dev->ports[16];
|
||||
usb3_tunnel = tb_tunnel_alloc_usb3(NULL, up, down, 0, 0);
|
||||
@@ -2319,9 +2337,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 32U);
|
||||
|
||||
return usb3_tunnel;
|
||||
}
|
||||
|
||||
static struct tb_tunnel *TB_TEST_DMA_TUNNEL1(struct kunit *test,
|
||||
struct tb_switch *host, struct tb_switch *dev)
|
||||
{
|
||||
struct tb_port *nhi, *port;
|
||||
struct tb_tunnel *dma_tunnel1;
|
||||
struct tb_path *path;
|
||||
|
||||
nhi = &host->ports[7];
|
||||
port = &dev->ports[3];
|
||||
|
||||
dma_tunnel1 = tb_tunnel_alloc_dma(NULL, nhi, port, 8, 1, 8, 1);
|
||||
KUNIT_ASSERT_TRUE(test, dma_tunnel1 != NULL);
|
||||
KUNIT_ASSERT_EQ(test, dma_tunnel1->npaths, (size_t)2);
|
||||
@@ -2340,6 +2367,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 14U);
|
||||
|
||||
return dma_tunnel1;
|
||||
}
|
||||
|
||||
static struct tb_tunnel *TB_TEST_DMA_TUNNEL2(struct kunit *test,
|
||||
struct tb_switch *host, struct tb_switch *dev)
|
||||
{
|
||||
struct tb_port *nhi, *port;
|
||||
struct tb_tunnel *dma_tunnel2;
|
||||
struct tb_path *path;
|
||||
|
||||
nhi = &host->ports[7];
|
||||
port = &dev->ports[3];
|
||||
dma_tunnel2 = tb_tunnel_alloc_dma(NULL, nhi, port, 9, 2, 9, 2);
|
||||
KUNIT_ASSERT_TRUE(test, dma_tunnel2 != NULL);
|
||||
KUNIT_ASSERT_EQ(test, dma_tunnel2->npaths, (size_t)2);
|
||||
@@ -2358,6 +2397,31 @@ static void tb_test_credit_alloc_all(struct kunit *test)
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
|
||||
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 1U);
|
||||
|
||||
return dma_tunnel2;
|
||||
}
|
||||
|
||||
static void tb_test_credit_alloc_all(struct kunit *test)
|
||||
{
|
||||
struct tb_tunnel *pcie_tunnel, *dp_tunnel1, *dp_tunnel2, *usb3_tunnel;
|
||||
struct tb_tunnel *dma_tunnel1, *dma_tunnel2;
|
||||
struct tb_switch *host, *dev;
|
||||
|
||||
/*
|
||||
* Create PCIe, 2 x DP, USB 3.x and two DMA tunnels from host to
|
||||
* device. Expectation is that all these can be established with
|
||||
* the default credit allocation found in Intel hardware.
|
||||
*/
|
||||
|
||||
host = alloc_host_usb4(test);
|
||||
dev = alloc_dev_usb4(test, host, 0x1, true);
|
||||
|
||||
pcie_tunnel = TB_TEST_PCIE_TUNNEL(test, host, dev);
|
||||
dp_tunnel1 = TB_TEST_DP_TUNNEL1(test, host, dev);
|
||||
dp_tunnel2 = TB_TEST_DP_TUNNEL2(test, host, dev);
|
||||
usb3_tunnel = TB_TEST_USB3_TUNNEL(test, host, dev);
|
||||
dma_tunnel1 = TB_TEST_DMA_TUNNEL1(test, host, dev);
|
||||
dma_tunnel2 = TB_TEST_DMA_TUNNEL2(test, host, dev);
|
||||
|
||||
tb_tunnel_free(dma_tunnel2);
|
||||
tb_tunnel_free(dma_tunnel1);
|
||||
tb_tunnel_free(usb3_tunnel);
|
||||
|
||||
438
fs/io-wq.c
438
fs/io-wq.c
@@ -23,8 +23,7 @@ enum {
|
||||
IO_WORKER_F_UP = 1, /* up and active */
|
||||
IO_WORKER_F_RUNNING = 2, /* account as running */
|
||||
IO_WORKER_F_FREE = 4, /* worker on free list */
|
||||
IO_WORKER_F_FIXED = 8, /* static idle worker */
|
||||
IO_WORKER_F_BOUND = 16, /* is doing bounded work */
|
||||
IO_WORKER_F_BOUND = 8, /* is doing bounded work */
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -32,7 +31,7 @@ enum {
|
||||
};
|
||||
|
||||
enum {
|
||||
IO_WQE_FLAG_STALLED = 1, /* stalled on hash */
|
||||
IO_ACCT_STALLED_BIT = 0, /* stalled on hash */
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -55,7 +54,10 @@ struct io_worker {
|
||||
struct callback_head create_work;
|
||||
int create_index;
|
||||
|
||||
struct rcu_head rcu;
|
||||
union {
|
||||
struct rcu_head rcu;
|
||||
struct work_struct work;
|
||||
};
|
||||
};
|
||||
|
||||
#if BITS_PER_LONG == 64
|
||||
@@ -71,25 +73,24 @@ struct io_wqe_acct {
|
||||
unsigned max_workers;
|
||||
int index;
|
||||
atomic_t nr_running;
|
||||
struct io_wq_work_list work_list;
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
enum {
|
||||
IO_WQ_ACCT_BOUND,
|
||||
IO_WQ_ACCT_UNBOUND,
|
||||
IO_WQ_ACCT_NR,
|
||||
};
|
||||
|
||||
/*
|
||||
* Per-node worker thread pool
|
||||
*/
|
||||
struct io_wqe {
|
||||
struct {
|
||||
raw_spinlock_t lock;
|
||||
struct io_wq_work_list work_list;
|
||||
unsigned flags;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
raw_spinlock_t lock;
|
||||
struct io_wqe_acct acct[2];
|
||||
|
||||
int node;
|
||||
struct io_wqe_acct acct[2];
|
||||
|
||||
struct hlist_nulls_head free_list;
|
||||
struct list_head all_list;
|
||||
@@ -133,8 +134,11 @@ struct io_cb_cancel_data {
|
||||
bool cancel_all;
|
||||
};
|
||||
|
||||
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first);
|
||||
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
|
||||
static void io_wqe_dec_running(struct io_worker *worker);
|
||||
static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
|
||||
struct io_wqe_acct *acct,
|
||||
struct io_cb_cancel_data *match);
|
||||
|
||||
static bool io_worker_get(struct io_worker *worker)
|
||||
{
|
||||
@@ -195,11 +199,10 @@ static void io_worker_exit(struct io_worker *worker)
|
||||
do_exit(0);
|
||||
}
|
||||
|
||||
static inline bool io_wqe_run_queue(struct io_wqe *wqe)
|
||||
__must_hold(wqe->lock)
|
||||
static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
|
||||
{
|
||||
if (!wq_list_empty(&wqe->work_list) &&
|
||||
!(wqe->flags & IO_WQE_FLAG_STALLED))
|
||||
if (!wq_list_empty(&acct->work_list) &&
|
||||
!test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
@@ -208,7 +211,8 @@ static inline bool io_wqe_run_queue(struct io_wqe *wqe)
|
||||
* Check head of free list for an available worker. If one isn't available,
|
||||
* caller must create one.
|
||||
*/
|
||||
static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
|
||||
static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
|
||||
struct io_wqe_acct *acct)
|
||||
__must_hold(RCU)
|
||||
{
|
||||
struct hlist_nulls_node *n;
|
||||
@@ -222,6 +226,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
|
||||
hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
|
||||
if (!io_worker_get(worker))
|
||||
continue;
|
||||
if (io_wqe_get_acct(worker) != acct) {
|
||||
io_worker_release(worker);
|
||||
continue;
|
||||
}
|
||||
if (wake_up_process(worker->task)) {
|
||||
io_worker_release(worker);
|
||||
return true;
|
||||
@@ -236,9 +244,9 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
|
||||
* We need a worker. If we find a free one, we're good. If not, and we're
|
||||
* below the max number of workers, create one.
|
||||
*/
|
||||
static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
|
||||
static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
|
||||
{
|
||||
bool ret;
|
||||
bool do_create = false;
|
||||
|
||||
/*
|
||||
* Most likely an attempt to queue unbounded work on an io_wq that
|
||||
@@ -247,27 +255,19 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
|
||||
if (unlikely(!acct->max_workers))
|
||||
pr_warn_once("io-wq is not configured for unbound workers");
|
||||
|
||||
rcu_read_lock();
|
||||
ret = io_wqe_activate_free_worker(wqe);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!ret) {
|
||||
bool do_create = false, first = false;
|
||||
|
||||
raw_spin_lock(&wqe->lock);
|
||||
if (acct->nr_workers < acct->max_workers) {
|
||||
if (!acct->nr_workers)
|
||||
first = true;
|
||||
acct->nr_workers++;
|
||||
do_create = true;
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (do_create) {
|
||||
atomic_inc(&acct->nr_running);
|
||||
atomic_inc(&wqe->wq->worker_refs);
|
||||
create_io_worker(wqe->wq, wqe, acct->index, first);
|
||||
}
|
||||
raw_spin_lock(&wqe->lock);
|
||||
if (acct->nr_workers < acct->max_workers) {
|
||||
acct->nr_workers++;
|
||||
do_create = true;
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (do_create) {
|
||||
atomic_inc(&acct->nr_running);
|
||||
atomic_inc(&wqe->wq->worker_refs);
|
||||
return create_io_worker(wqe->wq, wqe, acct->index);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void io_wqe_inc_running(struct io_worker *worker)
|
||||
@@ -283,7 +283,7 @@ static void create_worker_cb(struct callback_head *cb)
|
||||
struct io_wq *wq;
|
||||
struct io_wqe *wqe;
|
||||
struct io_wqe_acct *acct;
|
||||
bool do_create = false, first = false;
|
||||
bool do_create = false;
|
||||
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
wqe = worker->wqe;
|
||||
@@ -291,14 +291,12 @@ static void create_worker_cb(struct callback_head *cb)
|
||||
acct = &wqe->acct[worker->create_index];
|
||||
raw_spin_lock(&wqe->lock);
|
||||
if (acct->nr_workers < acct->max_workers) {
|
||||
if (!acct->nr_workers)
|
||||
first = true;
|
||||
acct->nr_workers++;
|
||||
do_create = true;
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (do_create) {
|
||||
create_io_worker(wq, wqe, worker->create_index, first);
|
||||
create_io_worker(wq, wqe, worker->create_index);
|
||||
} else {
|
||||
atomic_dec(&acct->nr_running);
|
||||
io_worker_ref_put(wq);
|
||||
@@ -307,9 +305,11 @@ static void create_worker_cb(struct callback_head *cb)
|
||||
io_worker_release(worker);
|
||||
}
|
||||
|
||||
static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker,
|
||||
struct io_wqe_acct *acct)
|
||||
static bool io_queue_worker_create(struct io_worker *worker,
|
||||
struct io_wqe_acct *acct,
|
||||
task_work_func_t func)
|
||||
{
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
struct io_wq *wq = wqe->wq;
|
||||
|
||||
/* raced with exit, just ignore create call */
|
||||
@@ -327,16 +327,17 @@ static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker,
|
||||
test_and_set_bit_lock(0, &worker->create_state))
|
||||
goto fail_release;
|
||||
|
||||
init_task_work(&worker->create_work, create_worker_cb);
|
||||
init_task_work(&worker->create_work, func);
|
||||
worker->create_index = acct->index;
|
||||
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
|
||||
return;
|
||||
return true;
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
fail_release:
|
||||
io_worker_release(worker);
|
||||
fail:
|
||||
atomic_dec(&acct->nr_running);
|
||||
io_worker_ref_put(wq);
|
||||
return false;
|
||||
}
|
||||
|
||||
static void io_wqe_dec_running(struct io_worker *worker)
|
||||
@@ -348,10 +349,10 @@ static void io_wqe_dec_running(struct io_worker *worker)
|
||||
if (!(worker->flags & IO_WORKER_F_UP))
|
||||
return;
|
||||
|
||||
if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
|
||||
if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
|
||||
atomic_inc(&acct->nr_running);
|
||||
atomic_inc(&wqe->wq->worker_refs);
|
||||
io_queue_worker_create(wqe, worker, acct);
|
||||
io_queue_worker_create(worker, acct, create_worker_cb);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -363,29 +364,10 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
|
||||
struct io_wq_work *work)
|
||||
__must_hold(wqe->lock)
|
||||
{
|
||||
bool worker_bound, work_bound;
|
||||
|
||||
BUILD_BUG_ON((IO_WQ_ACCT_UNBOUND ^ IO_WQ_ACCT_BOUND) != 1);
|
||||
|
||||
if (worker->flags & IO_WORKER_F_FREE) {
|
||||
worker->flags &= ~IO_WORKER_F_FREE;
|
||||
hlist_nulls_del_init_rcu(&worker->nulls_node);
|
||||
}
|
||||
|
||||
/*
|
||||
* If worker is moving from bound to unbound (or vice versa), then
|
||||
* ensure we update the running accounting.
|
||||
*/
|
||||
worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
|
||||
work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
|
||||
if (worker_bound != work_bound) {
|
||||
int index = work_bound ? IO_WQ_ACCT_UNBOUND : IO_WQ_ACCT_BOUND;
|
||||
io_wqe_dec_running(worker);
|
||||
worker->flags ^= IO_WORKER_F_BOUND;
|
||||
wqe->acct[index].nr_workers--;
|
||||
wqe->acct[index ^ 1].nr_workers++;
|
||||
io_wqe_inc_running(worker);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -413,7 +395,7 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
|
||||
{
|
||||
struct io_wq *wq = wqe->wq;
|
||||
|
||||
spin_lock(&wq->hash->wait.lock);
|
||||
spin_lock_irq(&wq->hash->wait.lock);
|
||||
if (list_empty(&wqe->wait.entry)) {
|
||||
__add_wait_queue(&wq->hash->wait, &wqe->wait);
|
||||
if (!test_bit(hash, &wq->hash->map)) {
|
||||
@@ -421,48 +403,26 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
|
||||
list_del_init(&wqe->wait.entry);
|
||||
}
|
||||
}
|
||||
spin_unlock(&wq->hash->wait.lock);
|
||||
spin_unlock_irq(&wq->hash->wait.lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* We can always run the work if the worker is currently the same type as
|
||||
* the work (eg both are bound, or both are unbound). If they are not the
|
||||
* same, only allow it if incrementing the worker count would be allowed.
|
||||
*/
|
||||
static bool io_worker_can_run_work(struct io_worker *worker,
|
||||
struct io_wq_work *work)
|
||||
{
|
||||
struct io_wqe_acct *acct;
|
||||
|
||||
if (!(worker->flags & IO_WORKER_F_BOUND) !=
|
||||
!(work->flags & IO_WQ_WORK_UNBOUND))
|
||||
return true;
|
||||
|
||||
/* not the same type, check if we'd go over the limit */
|
||||
acct = io_work_get_acct(worker->wqe, work);
|
||||
return acct->nr_workers < acct->max_workers;
|
||||
}
|
||||
|
||||
static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
|
||||
struct io_worker *worker,
|
||||
bool *stalled)
|
||||
static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
|
||||
struct io_worker *worker)
|
||||
__must_hold(wqe->lock)
|
||||
{
|
||||
struct io_wq_work_node *node, *prev;
|
||||
struct io_wq_work *work, *tail;
|
||||
unsigned int stall_hash = -1U;
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
|
||||
wq_list_for_each(node, prev, &wqe->work_list) {
|
||||
wq_list_for_each(node, prev, &acct->work_list) {
|
||||
unsigned int hash;
|
||||
|
||||
work = container_of(node, struct io_wq_work, list);
|
||||
|
||||
if (!io_worker_can_run_work(worker, work))
|
||||
break;
|
||||
|
||||
/* not hashed, can run anytime */
|
||||
if (!io_wq_is_hashed(work)) {
|
||||
wq_list_del(&wqe->work_list, node, prev);
|
||||
wq_list_del(&acct->work_list, node, prev);
|
||||
return work;
|
||||
}
|
||||
|
||||
@@ -473,7 +433,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
|
||||
/* hashed, can run if not already running */
|
||||
if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
|
||||
wqe->hash_tail[hash] = NULL;
|
||||
wq_list_cut(&wqe->work_list, &tail->list, prev);
|
||||
wq_list_cut(&acct->work_list, &tail->list, prev);
|
||||
return work;
|
||||
}
|
||||
if (stall_hash == -1U)
|
||||
@@ -483,10 +443,14 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
|
||||
}
|
||||
|
||||
if (stall_hash != -1U) {
|
||||
/*
|
||||
* Set this before dropping the lock to avoid racing with new
|
||||
* work being added and clearing the stalled bit.
|
||||
*/
|
||||
set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
io_wait_on_hash(wqe, stall_hash);
|
||||
raw_spin_lock(&wqe->lock);
|
||||
*stalled = true;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@@ -520,13 +484,13 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
|
||||
static void io_worker_handle_work(struct io_worker *worker)
|
||||
__releases(wqe->lock)
|
||||
{
|
||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
struct io_wq *wq = wqe->wq;
|
||||
bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
|
||||
|
||||
do {
|
||||
struct io_wq_work *work;
|
||||
bool stalled;
|
||||
get_next:
|
||||
/*
|
||||
* If we got some work, mark us as busy. If we didn't, but
|
||||
@@ -535,12 +499,9 @@ get_next:
|
||||
* can't make progress, any work completion or insertion will
|
||||
* clear the stalled flag.
|
||||
*/
|
||||
stalled = false;
|
||||
work = io_get_next_work(wqe, worker, &stalled);
|
||||
work = io_get_next_work(acct, worker);
|
||||
if (work)
|
||||
__io_worker_busy(wqe, worker, work);
|
||||
else if (stalled)
|
||||
wqe->flags |= IO_WQE_FLAG_STALLED;
|
||||
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (!work)
|
||||
@@ -572,10 +533,10 @@ get_next:
|
||||
|
||||
if (hash != -1U && !next_hashed) {
|
||||
clear_bit(hash, &wq->hash->map);
|
||||
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
|
||||
if (wq_has_sleeper(&wq->hash->wait))
|
||||
wake_up(&wq->hash->wait);
|
||||
raw_spin_lock(&wqe->lock);
|
||||
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
||||
/* skip unnecessary unlock-lock wqe->lock */
|
||||
if (!work)
|
||||
goto get_next;
|
||||
@@ -590,8 +551,10 @@ get_next:
|
||||
static int io_wqe_worker(void *data)
|
||||
{
|
||||
struct io_worker *worker = data;
|
||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
||||
struct io_wqe *wqe = worker->wqe;
|
||||
struct io_wq *wq = wqe->wq;
|
||||
bool last_timeout = false;
|
||||
char buf[TASK_COMM_LEN];
|
||||
|
||||
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
|
||||
@@ -605,10 +568,17 @@ static int io_wqe_worker(void *data)
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
loop:
|
||||
raw_spin_lock(&wqe->lock);
|
||||
if (io_wqe_run_queue(wqe)) {
|
||||
if (io_acct_run_queue(acct)) {
|
||||
io_worker_handle_work(worker);
|
||||
goto loop;
|
||||
}
|
||||
/* timed out, exit unless we're the last worker */
|
||||
if (last_timeout && acct->nr_workers > 1) {
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
break;
|
||||
}
|
||||
last_timeout = false;
|
||||
__io_worker_idle(wqe, worker);
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
if (io_flush_signals())
|
||||
@@ -619,13 +589,11 @@ loop:
|
||||
|
||||
if (!get_signal(&ksig))
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
if (ret)
|
||||
if (fatal_signal_pending(current))
|
||||
break;
|
||||
continue;
|
||||
/* timed out, exit unless we're the fixed worker */
|
||||
if (!(worker->flags & IO_WORKER_F_FIXED))
|
||||
break;
|
||||
}
|
||||
last_timeout = !ret;
|
||||
}
|
||||
|
||||
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
|
||||
@@ -676,36 +644,9 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
|
||||
raw_spin_unlock(&worker->wqe->lock);
|
||||
}
|
||||
|
||||
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first)
|
||||
static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
struct io_wqe_acct *acct = &wqe->acct[index];
|
||||
struct io_worker *worker;
|
||||
struct task_struct *tsk;
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
|
||||
if (!worker)
|
||||
goto fail;
|
||||
|
||||
refcount_set(&worker->ref, 1);
|
||||
worker->nulls_node.pprev = NULL;
|
||||
worker->wqe = wqe;
|
||||
spin_lock_init(&worker->lock);
|
||||
init_completion(&worker->ref_done);
|
||||
|
||||
tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
|
||||
if (IS_ERR(tsk)) {
|
||||
kfree(worker);
|
||||
fail:
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&wqe->lock);
|
||||
acct->nr_workers--;
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
io_worker_ref_put(wq);
|
||||
return;
|
||||
}
|
||||
|
||||
tsk->pf_io_worker = worker;
|
||||
worker->task = tsk;
|
||||
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
|
||||
@@ -715,14 +656,118 @@ fail:
|
||||
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
|
||||
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
|
||||
worker->flags |= IO_WORKER_F_FREE;
|
||||
if (index == IO_WQ_ACCT_BOUND)
|
||||
worker->flags |= IO_WORKER_F_BOUND;
|
||||
if (first && (worker->flags & IO_WORKER_F_BOUND))
|
||||
worker->flags |= IO_WORKER_F_FIXED;
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
wake_up_new_task(tsk);
|
||||
}
|
||||
|
||||
static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool io_should_retry_thread(long err)
|
||||
{
|
||||
switch (err) {
|
||||
case -EAGAIN:
|
||||
case -ERESTARTSYS:
|
||||
case -ERESTARTNOINTR:
|
||||
case -ERESTARTNOHAND:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void create_worker_cont(struct callback_head *cb)
|
||||
{
|
||||
struct io_worker *worker;
|
||||
struct task_struct *tsk;
|
||||
struct io_wqe *wqe;
|
||||
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
wqe = worker->wqe;
|
||||
tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
|
||||
if (!IS_ERR(tsk)) {
|
||||
io_init_new_worker(wqe, worker, tsk);
|
||||
io_worker_release(worker);
|
||||
return;
|
||||
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
|
||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
||||
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&wqe->lock);
|
||||
acct->nr_workers--;
|
||||
if (!acct->nr_workers) {
|
||||
struct io_cb_cancel_data match = {
|
||||
.fn = io_wq_work_match_all,
|
||||
.cancel_all = true,
|
||||
};
|
||||
|
||||
while (io_acct_cancel_pending_work(wqe, acct, &match))
|
||||
raw_spin_lock(&wqe->lock);
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
io_worker_ref_put(wqe->wq);
|
||||
return;
|
||||
}
|
||||
|
||||
/* re-create attempts grab a new worker ref, drop the existing one */
|
||||
io_worker_release(worker);
|
||||
schedule_work(&worker->work);
|
||||
}
|
||||
|
||||
static void io_workqueue_create(struct work_struct *work)
|
||||
{
|
||||
struct io_worker *worker = container_of(work, struct io_worker, work);
|
||||
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
|
||||
|
||||
if (!io_queue_worker_create(worker, acct, create_worker_cont)) {
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
io_worker_release(worker);
|
||||
}
|
||||
}
|
||||
|
||||
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
|
||||
{
|
||||
struct io_wqe_acct *acct = &wqe->acct[index];
|
||||
struct io_worker *worker;
|
||||
struct task_struct *tsk;
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
|
||||
if (!worker) {
|
||||
fail:
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&wqe->lock);
|
||||
acct->nr_workers--;
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
io_worker_ref_put(wq);
|
||||
return false;
|
||||
}
|
||||
|
||||
refcount_set(&worker->ref, 1);
|
||||
worker->wqe = wqe;
|
||||
spin_lock_init(&worker->lock);
|
||||
init_completion(&worker->ref_done);
|
||||
|
||||
if (index == IO_WQ_ACCT_BOUND)
|
||||
worker->flags |= IO_WORKER_F_BOUND;
|
||||
|
||||
tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
|
||||
if (!IS_ERR(tsk)) {
|
||||
io_init_new_worker(wqe, worker, tsk);
|
||||
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
|
||||
goto fail;
|
||||
} else {
|
||||
INIT_WORK(&worker->work, io_workqueue_create);
|
||||
schedule_work(&worker->work);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate the passed in list and call the specific function for each
|
||||
* worker that isn't exiting
|
||||
@@ -755,11 +800,6 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
|
||||
{
|
||||
struct io_wq *wq = wqe->wq;
|
||||
@@ -773,12 +813,13 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
|
||||
|
||||
static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
|
||||
{
|
||||
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
|
||||
unsigned int hash;
|
||||
struct io_wq_work *tail;
|
||||
|
||||
if (!io_wq_is_hashed(work)) {
|
||||
append:
|
||||
wq_list_add_tail(&work->list, &wqe->work_list);
|
||||
wq_list_add_tail(&work->list, &acct->work_list);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -788,13 +829,14 @@ append:
|
||||
if (!tail)
|
||||
goto append;
|
||||
|
||||
wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
|
||||
wq_list_add_after(&work->list, &tail->list, &acct->work_list);
|
||||
}
|
||||
|
||||
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
|
||||
{
|
||||
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
|
||||
bool do_wake;
|
||||
unsigned work_flags = work->flags;
|
||||
bool do_create;
|
||||
|
||||
/*
|
||||
* If io-wq is exiting for this task, or if the request has explicitly
|
||||
@@ -802,19 +844,36 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
|
||||
*/
|
||||
if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
|
||||
(work->flags & IO_WQ_WORK_CANCEL)) {
|
||||
run_cancel:
|
||||
io_run_cancel(work, wqe);
|
||||
return;
|
||||
}
|
||||
|
||||
raw_spin_lock(&wqe->lock);
|
||||
io_wqe_insert_work(wqe, work);
|
||||
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
||||
do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
|
||||
!atomic_read(&acct->nr_running);
|
||||
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
|
||||
|
||||
rcu_read_lock();
|
||||
do_create = !io_wqe_activate_free_worker(wqe, acct);
|
||||
rcu_read_unlock();
|
||||
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
|
||||
if (do_wake)
|
||||
io_wqe_wake_worker(wqe, acct);
|
||||
if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) ||
|
||||
!atomic_read(&acct->nr_running))) {
|
||||
bool did_create;
|
||||
|
||||
did_create = io_wqe_create_worker(wqe, acct);
|
||||
if (unlikely(!did_create)) {
|
||||
raw_spin_lock(&wqe->lock);
|
||||
/* fatal condition, failed to create the first worker */
|
||||
if (!acct->nr_workers) {
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
goto run_cancel;
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
|
||||
@@ -859,6 +918,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
|
||||
struct io_wq_work *work,
|
||||
struct io_wq_work_node *prev)
|
||||
{
|
||||
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
|
||||
unsigned int hash = io_get_work_hash(work);
|
||||
struct io_wq_work *prev_work = NULL;
|
||||
|
||||
@@ -870,18 +930,18 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
|
||||
else
|
||||
wqe->hash_tail[hash] = NULL;
|
||||
}
|
||||
wq_list_del(&wqe->work_list, &work->list, prev);
|
||||
wq_list_del(&acct->work_list, &work->list, prev);
|
||||
}
|
||||
|
||||
static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
|
||||
struct io_cb_cancel_data *match)
|
||||
static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
|
||||
struct io_wqe_acct *acct,
|
||||
struct io_cb_cancel_data *match)
|
||||
__releases(wqe->lock)
|
||||
{
|
||||
struct io_wq_work_node *node, *prev;
|
||||
struct io_wq_work *work;
|
||||
|
||||
retry:
|
||||
raw_spin_lock(&wqe->lock);
|
||||
wq_list_for_each(node, prev, &wqe->work_list) {
|
||||
wq_list_for_each(node, prev, &acct->work_list) {
|
||||
work = container_of(node, struct io_wq_work, list);
|
||||
if (!match->fn(work, match->data))
|
||||
continue;
|
||||
@@ -889,11 +949,27 @@ retry:
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
io_run_cancel(work, wqe);
|
||||
match->nr_pending++;
|
||||
if (!match->cancel_all)
|
||||
return;
|
||||
|
||||
/* not safe to continue after unlock */
|
||||
goto retry;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
|
||||
struct io_cb_cancel_data *match)
|
||||
{
|
||||
int i;
|
||||
retry:
|
||||
raw_spin_lock(&wqe->lock);
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
|
||||
|
||||
if (io_acct_cancel_pending_work(wqe, acct, match)) {
|
||||
if (match->cancel_all)
|
||||
goto retry;
|
||||
return;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&wqe->lock);
|
||||
}
|
||||
@@ -954,18 +1030,24 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
|
||||
int sync, void *key)
|
||||
{
|
||||
struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
|
||||
int i;
|
||||
|
||||
list_del_init(&wait->entry);
|
||||
|
||||
rcu_read_lock();
|
||||
io_wqe_activate_free_worker(wqe);
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
struct io_wqe_acct *acct = &wqe->acct[i];
|
||||
|
||||
if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags))
|
||||
io_wqe_activate_free_worker(wqe, acct);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
||||
{
|
||||
int ret, node;
|
||||
int ret, node, i;
|
||||
struct io_wq *wq;
|
||||
|
||||
if (WARN_ON_ONCE(!data->free_work || !data->do_work))
|
||||
@@ -1000,18 +1082,20 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
||||
cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
|
||||
wq->wqes[node] = wqe;
|
||||
wqe->node = alloc_node;
|
||||
wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
|
||||
wqe->acct[IO_WQ_ACCT_UNBOUND].index = IO_WQ_ACCT_UNBOUND;
|
||||
wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
|
||||
atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
|
||||
wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
|
||||
task_rlimit(current, RLIMIT_NPROC);
|
||||
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
|
||||
wqe->wait.func = io_wqe_hash_wake;
|
||||
INIT_LIST_HEAD(&wqe->wait.entry);
|
||||
wqe->wait.func = io_wqe_hash_wake;
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
struct io_wqe_acct *acct = &wqe->acct[i];
|
||||
|
||||
acct->index = i;
|
||||
atomic_set(&acct->nr_running, 0);
|
||||
INIT_WQ_LIST(&acct->work_list);
|
||||
}
|
||||
wqe->wq = wq;
|
||||
raw_spin_lock_init(&wqe->lock);
|
||||
INIT_WQ_LIST(&wqe->work_list);
|
||||
INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
|
||||
INIT_LIST_HEAD(&wqe->all_list);
|
||||
}
|
||||
@@ -1038,7 +1122,7 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
|
||||
{
|
||||
struct io_worker *worker;
|
||||
|
||||
if (cb->func != create_worker_cb)
|
||||
if (cb->func != create_worker_cb || cb->func != create_worker_cont)
|
||||
return false;
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
return worker->wqe->wq == data;
|
||||
@@ -1193,7 +1277,7 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
|
||||
for_each_node(node) {
|
||||
struct io_wqe_acct *acct;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
acct = &wq->wqes[node]->acct[i];
|
||||
prev = max_t(int, acct->max_workers, prev);
|
||||
if (new_count[i])
|
||||
|
||||
@@ -1021,6 +1021,7 @@ static const struct io_op_def io_op_defs[] = {
|
||||
},
|
||||
[IORING_OP_WRITE] = {
|
||||
.needs_file = 1,
|
||||
.hash_reg_file = 1,
|
||||
.unbound_nonreg_file = 1,
|
||||
.pollout = 1,
|
||||
.plug = 1,
|
||||
@@ -1851,6 +1852,17 @@ static void io_req_complete_failed(struct io_kiocb *req, long res)
|
||||
io_req_complete_post(req, res, 0);
|
||||
}
|
||||
|
||||
static void io_req_complete_fail_submit(struct io_kiocb *req)
|
||||
{
|
||||
/*
|
||||
* We don't submit, fail them all, for that replace hardlinks with
|
||||
* normal links. Extra REQ_F_LINK is tolerated.
|
||||
*/
|
||||
req->flags &= ~REQ_F_HARDLINK;
|
||||
req->flags |= REQ_F_LINK;
|
||||
io_req_complete_failed(req, req->result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't initialise the fields below on every allocation, but do that in
|
||||
* advance and keep them valid across allocations.
|
||||
@@ -2119,6 +2131,9 @@ static void tctx_task_work(struct callback_head *cb)
|
||||
while (1) {
|
||||
struct io_wq_work_node *node;
|
||||
|
||||
if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
|
||||
io_submit_flush_completions(ctx);
|
||||
|
||||
spin_lock_irq(&tctx->task_lock);
|
||||
node = tctx->task_list.first;
|
||||
INIT_WQ_LIST(&tctx->task_list);
|
||||
@@ -2673,7 +2688,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
|
||||
{
|
||||
if (__io_complete_rw_common(req, res))
|
||||
return;
|
||||
__io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
|
||||
__io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req));
|
||||
}
|
||||
|
||||
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
|
||||
@@ -3410,6 +3425,12 @@ static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static bool need_read_all(struct io_kiocb *req)
|
||||
{
|
||||
return req->flags & REQ_F_ISREG ||
|
||||
S_ISBLK(file_inode(req->file)->i_mode);
|
||||
}
|
||||
|
||||
static int io_read(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
|
||||
@@ -3464,7 +3485,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
|
||||
} else if (ret == -EIOCBQUEUED) {
|
||||
goto out_free;
|
||||
} else if (ret <= 0 || ret == io_size || !force_nonblock ||
|
||||
(req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
|
||||
(req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
|
||||
/* read all, failed, already did sync or don't want to retry */
|
||||
goto done;
|
||||
}
|
||||
@@ -5249,7 +5270,7 @@ static void io_poll_remove_double(struct io_kiocb *req)
|
||||
}
|
||||
}
|
||||
|
||||
static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
|
||||
static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
|
||||
__must_hold(&req->ctx->completion_lock)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
@@ -5271,10 +5292,19 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
|
||||
if (flags & IORING_CQE_F_MORE)
|
||||
ctx->cq_extra++;
|
||||
|
||||
io_commit_cqring(ctx);
|
||||
return !(flags & IORING_CQE_F_MORE);
|
||||
}
|
||||
|
||||
static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
|
||||
__must_hold(&req->ctx->completion_lock)
|
||||
{
|
||||
bool done;
|
||||
|
||||
done = __io_poll_complete(req, mask);
|
||||
io_commit_cqring(req->ctx);
|
||||
return done;
|
||||
}
|
||||
|
||||
static void io_poll_task_func(struct io_kiocb *req, bool *locked)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
@@ -5285,7 +5315,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
|
||||
} else {
|
||||
bool done;
|
||||
|
||||
done = io_poll_complete(req, req->result);
|
||||
done = __io_poll_complete(req, req->result);
|
||||
if (done) {
|
||||
io_poll_remove_double(req);
|
||||
hash_del(&req->hash_node);
|
||||
@@ -5293,6 +5323,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
|
||||
req->result = 0;
|
||||
add_wait_queue(req->poll.head, &req->poll.wait);
|
||||
}
|
||||
io_commit_cqring(ctx);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
io_cqring_ev_posted(ctx);
|
||||
|
||||
@@ -6398,6 +6429,11 @@ static bool io_drain_req(struct io_kiocb *req)
|
||||
int ret;
|
||||
u32 seq;
|
||||
|
||||
if (req->flags & REQ_F_FAIL) {
|
||||
io_req_complete_fail_submit(req);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to drain a request in the middle of a link, drain the
|
||||
* head request and the next request/link after the current link.
|
||||
@@ -6914,7 +6950,7 @@ static inline void io_queue_sqe(struct io_kiocb *req)
|
||||
if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
|
||||
__io_queue_sqe(req);
|
||||
} else if (req->flags & REQ_F_FAIL) {
|
||||
io_req_complete_failed(req, req->result);
|
||||
io_req_complete_fail_submit(req);
|
||||
} else {
|
||||
int ret = io_req_prep_async(req);
|
||||
|
||||
@@ -10498,26 +10534,46 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
|
||||
static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
|
||||
void __user *arg)
|
||||
{
|
||||
struct io_uring_task *tctx = current->io_uring;
|
||||
struct io_uring_task *tctx = NULL;
|
||||
struct io_sq_data *sqd = NULL;
|
||||
__u32 new_count[2];
|
||||
int i, ret;
|
||||
|
||||
if (!tctx || !tctx->io_wq)
|
||||
return -EINVAL;
|
||||
if (copy_from_user(new_count, arg, sizeof(new_count)))
|
||||
return -EFAULT;
|
||||
for (i = 0; i < ARRAY_SIZE(new_count); i++)
|
||||
if (new_count[i] > INT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
if (ctx->flags & IORING_SETUP_SQPOLL) {
|
||||
sqd = ctx->sq_data;
|
||||
if (sqd) {
|
||||
mutex_lock(&sqd->lock);
|
||||
tctx = sqd->thread->io_uring;
|
||||
}
|
||||
} else {
|
||||
tctx = current->io_uring;
|
||||
}
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!tctx || !tctx->io_wq)
|
||||
goto err;
|
||||
|
||||
ret = io_wq_max_workers(tctx->io_wq, new_count);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err;
|
||||
|
||||
if (sqd)
|
||||
mutex_unlock(&sqd->lock);
|
||||
|
||||
if (copy_to_user(arg, new_count, sizeof(new_count)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
err:
|
||||
if (sqd)
|
||||
mutex_unlock(&sqd->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool io_register_op_must_quiesce(int op)
|
||||
|
||||
@@ -2941,12 +2941,10 @@ static int __init filelock_init(void)
|
||||
int i;
|
||||
|
||||
flctx_cache = kmem_cache_create("file_lock_ctx",
|
||||
sizeof(struct file_lock_context), 0,
|
||||
SLAB_PANIC | SLAB_ACCOUNT, NULL);
|
||||
sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);
|
||||
|
||||
filelock_cache = kmem_cache_create("file_lock_cache",
|
||||
sizeof(struct file_lock), 0,
|
||||
SLAB_PANIC | SLAB_ACCOUNT, NULL);
|
||||
sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
|
||||
|
||||
@@ -191,7 +191,7 @@ EXPORT_SYMBOL(generic_pipe_buf_try_steal);
|
||||
*/
|
||||
bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
|
||||
{
|
||||
return try_get_compound_head(buf->page, 1);
|
||||
return try_get_page(buf->page);
|
||||
}
|
||||
EXPORT_SYMBOL(generic_pipe_buf_get);
|
||||
|
||||
|
||||
@@ -655,7 +655,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
|
||||
goto out_nofds;
|
||||
|
||||
alloc_size = 6 * size;
|
||||
bits = kvmalloc(alloc_size, GFP_KERNEL_ACCOUNT);
|
||||
bits = kvmalloc(alloc_size, GFP_KERNEL);
|
||||
if (!bits)
|
||||
goto out_nofds;
|
||||
}
|
||||
@@ -1000,7 +1000,7 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
|
||||
|
||||
len = min(todo, POLLFD_PER_PAGE);
|
||||
walk = walk->next = kmalloc(struct_size(walk, entries, len),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
GFP_KERNEL);
|
||||
if (!walk) {
|
||||
err = -ENOMEM;
|
||||
goto out_fds;
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
* Copyright (C) 2009 Jason Wessel <jason.wessel@windriver.com>
|
||||
*/
|
||||
|
||||
#include <linux/list.h>
|
||||
|
||||
/* Shifted versions of the command enable bits are be used if the command
|
||||
* has no arguments (see kdb_check_flags). This allows commands, such as
|
||||
* go, to have different permissions depending upon whether it is called
|
||||
@@ -64,6 +66,17 @@ typedef enum {
|
||||
|
||||
typedef int (*kdb_func_t)(int, const char **);
|
||||
|
||||
/* The KDB shell command table */
|
||||
typedef struct _kdbtab {
|
||||
char *name; /* Command name */
|
||||
kdb_func_t func; /* Function to execute command */
|
||||
char *usage; /* Usage String for this command */
|
||||
char *help; /* Help message for this command */
|
||||
short minlen; /* Minimum legal # cmd chars required */
|
||||
kdb_cmdflags_t flags; /* Command behaviour flags */
|
||||
struct list_head list_node; /* Command list */
|
||||
} kdbtab_t;
|
||||
|
||||
#ifdef CONFIG_KGDB_KDB
|
||||
#include <linux/init.h>
|
||||
#include <linux/sched.h>
|
||||
@@ -193,19 +206,13 @@ static inline const char *kdb_walk_kallsyms(loff_t *pos)
|
||||
#endif /* ! CONFIG_KALLSYMS */
|
||||
|
||||
/* Dynamic kdb shell command registration */
|
||||
extern int kdb_register(char *, kdb_func_t, char *, char *, short);
|
||||
extern int kdb_register_flags(char *, kdb_func_t, char *, char *,
|
||||
short, kdb_cmdflags_t);
|
||||
extern int kdb_unregister(char *);
|
||||
extern int kdb_register(kdbtab_t *cmd);
|
||||
extern void kdb_unregister(kdbtab_t *cmd);
|
||||
#else /* ! CONFIG_KGDB_KDB */
|
||||
static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
|
||||
static inline void kdb_init(int level) {}
|
||||
static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
|
||||
char *help, short minlen) { return 0; }
|
||||
static inline int kdb_register_flags(char *cmd, kdb_func_t func, char *usage,
|
||||
char *help, short minlen,
|
||||
kdb_cmdflags_t flags) { return 0; }
|
||||
static inline int kdb_unregister(char *cmd) { return 0; }
|
||||
static inline int kdb_register(kdbtab_t *cmd) { return 0; }
|
||||
static inline void kdb_unregister(kdbtab_t *cmd) {}
|
||||
#endif /* CONFIG_KGDB_KDB */
|
||||
enum {
|
||||
KDB_NOT_INITIALIZED,
|
||||
|
||||
@@ -426,6 +426,7 @@ enum {
|
||||
ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */
|
||||
ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */
|
||||
ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */
|
||||
ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */
|
||||
|
||||
/* DMA mask for user DMA control: User visible values; DO NOT
|
||||
renumber */
|
||||
|
||||
@@ -1218,7 +1218,15 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags);
|
||||
struct page *try_grab_compound_head(struct page *page, int refs,
|
||||
unsigned int flags);
|
||||
|
||||
struct page *try_get_compound_head(struct page *page, int refs);
|
||||
|
||||
static inline __must_check bool try_get_page(struct page *page)
|
||||
{
|
||||
page = compound_head(page);
|
||||
if (WARN_ON_ONCE(page_ref_count(page) <= 0))
|
||||
return false;
|
||||
page_ref_inc(page);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void put_page(struct page *page)
|
||||
{
|
||||
|
||||
@@ -310,8 +310,10 @@ enum {
|
||||
TRACE_EVENT_FL_NO_SET_FILTER_BIT,
|
||||
TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
|
||||
TRACE_EVENT_FL_TRACEPOINT_BIT,
|
||||
TRACE_EVENT_FL_DYNAMIC_BIT,
|
||||
TRACE_EVENT_FL_KPROBE_BIT,
|
||||
TRACE_EVENT_FL_UPROBE_BIT,
|
||||
TRACE_EVENT_FL_EPROBE_BIT,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -321,8 +323,10 @@ enum {
|
||||
* NO_SET_FILTER - Set when filter has error and is to be ignored
|
||||
* IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
|
||||
* TRACEPOINT - Event is a tracepoint
|
||||
* DYNAMIC - Event is a dynamic event (created at run time)
|
||||
* KPROBE - Event is a kprobe
|
||||
* UPROBE - Event is a uprobe
|
||||
* EPROBE - Event is an event probe
|
||||
*/
|
||||
enum {
|
||||
TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
|
||||
@@ -330,8 +334,10 @@ enum {
|
||||
TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
|
||||
TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
|
||||
TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
|
||||
TRACE_EVENT_FL_DYNAMIC = (1 << TRACE_EVENT_FL_DYNAMIC_BIT),
|
||||
TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT),
|
||||
TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT),
|
||||
TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT),
|
||||
};
|
||||
|
||||
#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
|
||||
@@ -347,7 +353,14 @@ struct trace_event_call {
|
||||
struct trace_event event;
|
||||
char *print_fmt;
|
||||
struct event_filter *filter;
|
||||
void *mod;
|
||||
/*
|
||||
* Static events can disappear with modules,
|
||||
* where as dynamic ones need their own ref count.
|
||||
*/
|
||||
union {
|
||||
void *module;
|
||||
atomic_t refcnt;
|
||||
};
|
||||
void *data;
|
||||
|
||||
/* See the TRACE_EVENT_FL_* flags above */
|
||||
@@ -363,6 +376,42 @@ struct trace_event_call {
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_EVENTS
|
||||
bool trace_event_dyn_try_get_ref(struct trace_event_call *call);
|
||||
void trace_event_dyn_put_ref(struct trace_event_call *call);
|
||||
bool trace_event_dyn_busy(struct trace_event_call *call);
|
||||
#else
|
||||
static inline bool trace_event_dyn_try_get_ref(struct trace_event_call *call)
|
||||
{
|
||||
/* Without DYNAMIC_EVENTS configured, nothing should be calling this */
|
||||
return false;
|
||||
}
|
||||
static inline void trace_event_dyn_put_ref(struct trace_event_call *call)
|
||||
{
|
||||
}
|
||||
static inline bool trace_event_dyn_busy(struct trace_event_call *call)
|
||||
{
|
||||
/* Nothing should call this without DYNAIMIC_EVENTS configured. */
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool trace_event_try_get_ref(struct trace_event_call *call)
|
||||
{
|
||||
if (call->flags & TRACE_EVENT_FL_DYNAMIC)
|
||||
return trace_event_dyn_try_get_ref(call);
|
||||
else
|
||||
return try_module_get(call->module);
|
||||
}
|
||||
|
||||
static inline void trace_event_put_ref(struct trace_event_call *call)
|
||||
{
|
||||
if (call->flags & TRACE_EVENT_FL_DYNAMIC)
|
||||
trace_event_dyn_put_ref(call);
|
||||
else
|
||||
module_put(call->module);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
static inline bool bpf_prog_array_valid(struct trace_event_call *call)
|
||||
{
|
||||
@@ -634,6 +683,7 @@ enum event_trigger_type {
|
||||
ETT_EVENT_ENABLE = (1 << 3),
|
||||
ETT_EVENT_HIST = (1 << 4),
|
||||
ETT_HIST_ENABLE = (1 << 5),
|
||||
ETT_EVENT_EPROBE = (1 << 6),
|
||||
};
|
||||
|
||||
extern int filter_match_preds(struct event_filter *filter, void *rec);
|
||||
|
||||
@@ -475,7 +475,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
|
||||
* *
|
||||
* * The declared 'local variable' is called '__entry'
|
||||
* *
|
||||
* * __field(pid_t, prev_prid) is equivalent to a standard declaration:
|
||||
* * __field(pid_t, prev_pid) is equivalent to a standard declaration:
|
||||
* *
|
||||
* * pid_t prev_pid;
|
||||
* *
|
||||
|
||||
@@ -295,14 +295,14 @@ TRACE_EVENT(io_uring_fail_link,
|
||||
*/
|
||||
TRACE_EVENT(io_uring_complete,
|
||||
|
||||
TP_PROTO(void *ctx, u64 user_data, long res, unsigned cflags),
|
||||
TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags),
|
||||
|
||||
TP_ARGS(ctx, user_data, res, cflags),
|
||||
|
||||
TP_STRUCT__entry (
|
||||
__field( void *, ctx )
|
||||
__field( u64, user_data )
|
||||
__field( long, res )
|
||||
__field( int, res )
|
||||
__field( unsigned, cflags )
|
||||
),
|
||||
|
||||
@@ -313,7 +313,7 @@ TRACE_EVENT(io_uring_complete,
|
||||
__entry->cflags = cflags;
|
||||
),
|
||||
|
||||
TP_printk("ring %p, user_data 0x%llx, result %ld, cflags %x",
|
||||
TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x",
|
||||
__entry->ctx, (unsigned long long)__entry->user_data,
|
||||
__entry->res, __entry->cflags)
|
||||
);
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Kernel Debug Core
|
||||
*
|
||||
@@ -22,10 +23,6 @@
|
||||
*
|
||||
* Original KGDB stub: David Grothe <dave@gcom.com>,
|
||||
* Tigran Aivazian <tigran@sco.com>
|
||||
*
|
||||
* This file is licensed under the terms of the GNU General Public License
|
||||
* version 2. This program is licensed "as is" without any warranty of any
|
||||
* kind, whether express or implied.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "KGDB: " fmt
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Kernel Debug Core
|
||||
*
|
||||
@@ -22,10 +23,6 @@
|
||||
*
|
||||
* Original KGDB stub: David Grothe <dave@gcom.com>,
|
||||
* Tigran Aivazian <tigran@sco.com>
|
||||
*
|
||||
* This file is licensed under the terms of the GNU General Public License
|
||||
* version 2. This program is licensed "as is" without any warranty of any
|
||||
* kind, whether express or implied.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
|
||||
@@ -523,51 +523,51 @@ static int kdb_ss(int argc, const char **argv)
|
||||
}
|
||||
|
||||
static kdbtab_t bptab[] = {
|
||||
{ .cmd_name = "bp",
|
||||
.cmd_func = kdb_bp,
|
||||
.cmd_usage = "[<vaddr>]",
|
||||
.cmd_help = "Set/Display breakpoints",
|
||||
.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
|
||||
{ .name = "bp",
|
||||
.func = kdb_bp,
|
||||
.usage = "[<vaddr>]",
|
||||
.help = "Set/Display breakpoints",
|
||||
.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
|
||||
},
|
||||
{ .cmd_name = "bl",
|
||||
.cmd_func = kdb_bp,
|
||||
.cmd_usage = "[<vaddr>]",
|
||||
.cmd_help = "Display breakpoints",
|
||||
.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
|
||||
{ .name = "bl",
|
||||
.func = kdb_bp,
|
||||
.usage = "[<vaddr>]",
|
||||
.help = "Display breakpoints",
|
||||
.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
|
||||
},
|
||||
{ .cmd_name = "bc",
|
||||
.cmd_func = kdb_bc,
|
||||
.cmd_usage = "<bpnum>",
|
||||
.cmd_help = "Clear Breakpoint",
|
||||
.cmd_flags = KDB_ENABLE_FLOW_CTRL,
|
||||
{ .name = "bc",
|
||||
.func = kdb_bc,
|
||||
.usage = "<bpnum>",
|
||||
.help = "Clear Breakpoint",
|
||||
.flags = KDB_ENABLE_FLOW_CTRL,
|
||||
},
|
||||
{ .cmd_name = "be",
|
||||
.cmd_func = kdb_bc,
|
||||
.cmd_usage = "<bpnum>",
|
||||
.cmd_help = "Enable Breakpoint",
|
||||
.cmd_flags = KDB_ENABLE_FLOW_CTRL,
|
||||
{ .name = "be",
|
||||
.func = kdb_bc,
|
||||
.usage = "<bpnum>",
|
||||
.help = "Enable Breakpoint",
|
||||
.flags = KDB_ENABLE_FLOW_CTRL,
|
||||
},
|
||||
{ .cmd_name = "bd",
|
||||
.cmd_func = kdb_bc,
|
||||
.cmd_usage = "<bpnum>",
|
||||
.cmd_help = "Disable Breakpoint",
|
||||
.cmd_flags = KDB_ENABLE_FLOW_CTRL,
|
||||
{ .name = "bd",
|
||||
.func = kdb_bc,
|
||||
.usage = "<bpnum>",
|
||||
.help = "Disable Breakpoint",
|
||||
.flags = KDB_ENABLE_FLOW_CTRL,
|
||||
},
|
||||
{ .cmd_name = "ss",
|
||||
.cmd_func = kdb_ss,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Single Step",
|
||||
.cmd_minlen = 1,
|
||||
.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
|
||||
{ .name = "ss",
|
||||
.func = kdb_ss,
|
||||
.usage = "",
|
||||
.help = "Single Step",
|
||||
.minlen = 1,
|
||||
.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
|
||||
},
|
||||
};
|
||||
|
||||
static kdbtab_t bphcmd = {
|
||||
.cmd_name = "bph",
|
||||
.cmd_func = kdb_bp,
|
||||
.cmd_usage = "[<vaddr>]",
|
||||
.cmd_help = "[datar [length]|dataw [length]] Set hw brk",
|
||||
.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
|
||||
.name = "bph",
|
||||
.func = kdb_bp,
|
||||
.usage = "[<vaddr>]",
|
||||
.help = "[datar [length]|dataw [length]] Set hw brk",
|
||||
.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
|
||||
};
|
||||
|
||||
/* Initialize the breakpoint table and register breakpoint commands. */
|
||||
|
||||
@@ -140,7 +140,6 @@ int kdb_stub(struct kgdb_state *ks)
|
||||
*/
|
||||
kdb_common_deinit_state();
|
||||
KDB_STATE_CLEAR(PAGER);
|
||||
kdbnearsym_cleanup();
|
||||
if (error == KDB_CMD_KGDB) {
|
||||
if (KDB_STATE(DOING_KGDB))
|
||||
KDB_STATE_CLEAR(DOING_KGDB);
|
||||
|
||||
@@ -33,7 +33,6 @@
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/kgdb.h>
|
||||
#include <linux/kdb.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/delay.h>
|
||||
@@ -654,16 +653,17 @@ static void kdb_cmderror(int diag)
|
||||
* Returns:
|
||||
* zero for success, a kdb diagnostic if error
|
||||
*/
|
||||
struct defcmd_set {
|
||||
int count;
|
||||
bool usable;
|
||||
char *name;
|
||||
char *usage;
|
||||
char *help;
|
||||
char **command;
|
||||
struct kdb_macro {
|
||||
kdbtab_t cmd; /* Macro command */
|
||||
struct list_head statements; /* Associated statement list */
|
||||
};
|
||||
static struct defcmd_set *defcmd_set;
|
||||
static int defcmd_set_count;
|
||||
|
||||
struct kdb_macro_statement {
|
||||
char *statement; /* Statement text */
|
||||
struct list_head list_node; /* Statement list node */
|
||||
};
|
||||
|
||||
static struct kdb_macro *kdb_macro;
|
||||
static bool defcmd_in_progress;
|
||||
|
||||
/* Forward references */
|
||||
@@ -671,53 +671,55 @@ static int kdb_exec_defcmd(int argc, const char **argv);
|
||||
|
||||
static int kdb_defcmd2(const char *cmdstr, const char *argv0)
|
||||
{
|
||||
struct defcmd_set *s = defcmd_set + defcmd_set_count - 1;
|
||||
char **save_command = s->command;
|
||||
struct kdb_macro_statement *kms;
|
||||
|
||||
if (!kdb_macro)
|
||||
return KDB_NOTIMP;
|
||||
|
||||
if (strcmp(argv0, "endefcmd") == 0) {
|
||||
defcmd_in_progress = false;
|
||||
if (!s->count)
|
||||
s->usable = false;
|
||||
if (s->usable)
|
||||
/* macros are always safe because when executed each
|
||||
* internal command re-enters kdb_parse() and is
|
||||
* safety checked individually.
|
||||
*/
|
||||
kdb_register_flags(s->name, kdb_exec_defcmd, s->usage,
|
||||
s->help, 0,
|
||||
KDB_ENABLE_ALWAYS_SAFE);
|
||||
if (!list_empty(&kdb_macro->statements))
|
||||
kdb_register(&kdb_macro->cmd);
|
||||
return 0;
|
||||
}
|
||||
if (!s->usable)
|
||||
return KDB_NOTIMP;
|
||||
s->command = kcalloc(s->count + 1, sizeof(*(s->command)), GFP_KDB);
|
||||
if (!s->command) {
|
||||
kdb_printf("Could not allocate new kdb_defcmd table for %s\n",
|
||||
|
||||
kms = kmalloc(sizeof(*kms), GFP_KDB);
|
||||
if (!kms) {
|
||||
kdb_printf("Could not allocate new kdb macro command: %s\n",
|
||||
cmdstr);
|
||||
s->usable = false;
|
||||
return KDB_NOTIMP;
|
||||
}
|
||||
memcpy(s->command, save_command, s->count * sizeof(*(s->command)));
|
||||
s->command[s->count++] = kdb_strdup(cmdstr, GFP_KDB);
|
||||
kfree(save_command);
|
||||
|
||||
kms->statement = kdb_strdup(cmdstr, GFP_KDB);
|
||||
list_add_tail(&kms->list_node, &kdb_macro->statements);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kdb_defcmd(int argc, const char **argv)
|
||||
{
|
||||
struct defcmd_set *save_defcmd_set = defcmd_set, *s;
|
||||
kdbtab_t *mp;
|
||||
|
||||
if (defcmd_in_progress) {
|
||||
kdb_printf("kdb: nested defcmd detected, assuming missing "
|
||||
"endefcmd\n");
|
||||
kdb_defcmd2("endefcmd", "endefcmd");
|
||||
}
|
||||
if (argc == 0) {
|
||||
int i;
|
||||
for (s = defcmd_set; s < defcmd_set + defcmd_set_count; ++s) {
|
||||
kdb_printf("defcmd %s \"%s\" \"%s\"\n", s->name,
|
||||
s->usage, s->help);
|
||||
for (i = 0; i < s->count; ++i)
|
||||
kdb_printf("%s", s->command[i]);
|
||||
kdb_printf("endefcmd\n");
|
||||
kdbtab_t *kp;
|
||||
struct kdb_macro *kmp;
|
||||
struct kdb_macro_statement *kms;
|
||||
|
||||
list_for_each_entry(kp, &kdb_cmds_head, list_node) {
|
||||
if (kp->func == kdb_exec_defcmd) {
|
||||
kdb_printf("defcmd %s \"%s\" \"%s\"\n",
|
||||
kp->name, kp->usage, kp->help);
|
||||
kmp = container_of(kp, struct kdb_macro, cmd);
|
||||
list_for_each_entry(kms, &kmp->statements,
|
||||
list_node)
|
||||
kdb_printf("%s", kms->statement);
|
||||
kdb_printf("endefcmd\n");
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -727,45 +729,43 @@ static int kdb_defcmd(int argc, const char **argv)
|
||||
kdb_printf("Command only available during kdb_init()\n");
|
||||
return KDB_NOTIMP;
|
||||
}
|
||||
defcmd_set = kmalloc_array(defcmd_set_count + 1, sizeof(*defcmd_set),
|
||||
GFP_KDB);
|
||||
if (!defcmd_set)
|
||||
kdb_macro = kzalloc(sizeof(*kdb_macro), GFP_KDB);
|
||||
if (!kdb_macro)
|
||||
goto fail_defcmd;
|
||||
memcpy(defcmd_set, save_defcmd_set,
|
||||
defcmd_set_count * sizeof(*defcmd_set));
|
||||
s = defcmd_set + defcmd_set_count;
|
||||
memset(s, 0, sizeof(*s));
|
||||
s->usable = true;
|
||||
s->name = kdb_strdup(argv[1], GFP_KDB);
|
||||
if (!s->name)
|
||||
|
||||
mp = &kdb_macro->cmd;
|
||||
mp->func = kdb_exec_defcmd;
|
||||
mp->minlen = 0;
|
||||
mp->flags = KDB_ENABLE_ALWAYS_SAFE;
|
||||
mp->name = kdb_strdup(argv[1], GFP_KDB);
|
||||
if (!mp->name)
|
||||
goto fail_name;
|
||||
s->usage = kdb_strdup(argv[2], GFP_KDB);
|
||||
if (!s->usage)
|
||||
mp->usage = kdb_strdup(argv[2], GFP_KDB);
|
||||
if (!mp->usage)
|
||||
goto fail_usage;
|
||||
s->help = kdb_strdup(argv[3], GFP_KDB);
|
||||
if (!s->help)
|
||||
mp->help = kdb_strdup(argv[3], GFP_KDB);
|
||||
if (!mp->help)
|
||||
goto fail_help;
|
||||
if (s->usage[0] == '"') {
|
||||
strcpy(s->usage, argv[2]+1);
|
||||
s->usage[strlen(s->usage)-1] = '\0';
|
||||
if (mp->usage[0] == '"') {
|
||||
strcpy(mp->usage, argv[2]+1);
|
||||
mp->usage[strlen(mp->usage)-1] = '\0';
|
||||
}
|
||||
if (s->help[0] == '"') {
|
||||
strcpy(s->help, argv[3]+1);
|
||||
s->help[strlen(s->help)-1] = '\0';
|
||||
if (mp->help[0] == '"') {
|
||||
strcpy(mp->help, argv[3]+1);
|
||||
mp->help[strlen(mp->help)-1] = '\0';
|
||||
}
|
||||
++defcmd_set_count;
|
||||
|
||||
INIT_LIST_HEAD(&kdb_macro->statements);
|
||||
defcmd_in_progress = true;
|
||||
kfree(save_defcmd_set);
|
||||
return 0;
|
||||
fail_help:
|
||||
kfree(s->usage);
|
||||
kfree(mp->usage);
|
||||
fail_usage:
|
||||
kfree(s->name);
|
||||
kfree(mp->name);
|
||||
fail_name:
|
||||
kfree(defcmd_set);
|
||||
kfree(kdb_macro);
|
||||
fail_defcmd:
|
||||
kdb_printf("Could not allocate new defcmd_set entry for %s\n", argv[1]);
|
||||
defcmd_set = save_defcmd_set;
|
||||
kdb_printf("Could not allocate new kdb_macro entry for %s\n", argv[1]);
|
||||
return KDB_NOTIMP;
|
||||
}
|
||||
|
||||
@@ -780,25 +780,31 @@ fail_defcmd:
|
||||
*/
|
||||
static int kdb_exec_defcmd(int argc, const char **argv)
|
||||
{
|
||||
int i, ret;
|
||||
struct defcmd_set *s;
|
||||
int ret;
|
||||
kdbtab_t *kp;
|
||||
struct kdb_macro *kmp;
|
||||
struct kdb_macro_statement *kms;
|
||||
|
||||
if (argc != 0)
|
||||
return KDB_ARGCOUNT;
|
||||
for (s = defcmd_set, i = 0; i < defcmd_set_count; ++i, ++s) {
|
||||
if (strcmp(s->name, argv[0]) == 0)
|
||||
|
||||
list_for_each_entry(kp, &kdb_cmds_head, list_node) {
|
||||
if (strcmp(kp->name, argv[0]) == 0)
|
||||
break;
|
||||
}
|
||||
if (i == defcmd_set_count) {
|
||||
if (list_entry_is_head(kp, &kdb_cmds_head, list_node)) {
|
||||
kdb_printf("kdb_exec_defcmd: could not find commands for %s\n",
|
||||
argv[0]);
|
||||
return KDB_NOTIMP;
|
||||
}
|
||||
for (i = 0; i < s->count; ++i) {
|
||||
/* Recursive use of kdb_parse, do not use argv after
|
||||
* this point */
|
||||
kmp = container_of(kp, struct kdb_macro, cmd);
|
||||
list_for_each_entry(kms, &kmp->statements, list_node) {
|
||||
/*
|
||||
* Recursive use of kdb_parse, do not use argv after this point.
|
||||
*/
|
||||
argv = NULL;
|
||||
kdb_printf("[%s]kdb> %s\n", s->name, s->command[i]);
|
||||
ret = kdb_parse(s->command[i]);
|
||||
kdb_printf("[%s]kdb> %s\n", kmp->cmd.name, kms->statement);
|
||||
ret = kdb_parse(kms->statement);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@@ -1009,11 +1015,11 @@ int kdb_parse(const char *cmdstr)
|
||||
* If this command is allowed to be abbreviated,
|
||||
* check to see if this is it.
|
||||
*/
|
||||
if (tp->cmd_minlen && (strlen(argv[0]) <= tp->cmd_minlen) &&
|
||||
(strncmp(argv[0], tp->cmd_name, tp->cmd_minlen) == 0))
|
||||
if (tp->minlen && (strlen(argv[0]) <= tp->minlen) &&
|
||||
(strncmp(argv[0], tp->name, tp->minlen) == 0))
|
||||
break;
|
||||
|
||||
if (strcmp(argv[0], tp->cmd_name) == 0)
|
||||
if (strcmp(argv[0], tp->name) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1024,8 +1030,7 @@ int kdb_parse(const char *cmdstr)
|
||||
*/
|
||||
if (list_entry_is_head(tp, &kdb_cmds_head, list_node)) {
|
||||
list_for_each_entry(tp, &kdb_cmds_head, list_node) {
|
||||
if (strncmp(argv[0], tp->cmd_name,
|
||||
strlen(tp->cmd_name)) == 0)
|
||||
if (strncmp(argv[0], tp->name, strlen(tp->name)) == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1033,19 +1038,19 @@ int kdb_parse(const char *cmdstr)
|
||||
if (!list_entry_is_head(tp, &kdb_cmds_head, list_node)) {
|
||||
int result;
|
||||
|
||||
if (!kdb_check_flags(tp->cmd_flags, kdb_cmd_enabled, argc <= 1))
|
||||
if (!kdb_check_flags(tp->flags, kdb_cmd_enabled, argc <= 1))
|
||||
return KDB_NOPERM;
|
||||
|
||||
KDB_STATE_SET(CMD);
|
||||
result = (*tp->cmd_func)(argc-1, (const char **)argv);
|
||||
result = (*tp->func)(argc-1, (const char **)argv);
|
||||
if (result && ignore_errors && result > KDB_CMD_GO)
|
||||
result = 0;
|
||||
KDB_STATE_CLEAR(CMD);
|
||||
|
||||
if (tp->cmd_flags & KDB_REPEAT_WITH_ARGS)
|
||||
if (tp->flags & KDB_REPEAT_WITH_ARGS)
|
||||
return result;
|
||||
|
||||
argc = tp->cmd_flags & KDB_REPEAT_NO_ARGS ? 1 : 0;
|
||||
argc = tp->flags & KDB_REPEAT_NO_ARGS ? 1 : 0;
|
||||
if (argv[argc])
|
||||
*(argv[argc]) = '\0';
|
||||
return result;
|
||||
@@ -2412,12 +2417,12 @@ static int kdb_help(int argc, const char **argv)
|
||||
char *space = "";
|
||||
if (KDB_FLAG(CMD_INTERRUPT))
|
||||
return 0;
|
||||
if (!kdb_check_flags(kt->cmd_flags, kdb_cmd_enabled, true))
|
||||
if (!kdb_check_flags(kt->flags, kdb_cmd_enabled, true))
|
||||
continue;
|
||||
if (strlen(kt->cmd_usage) > 20)
|
||||
if (strlen(kt->usage) > 20)
|
||||
space = "\n ";
|
||||
kdb_printf("%-15.15s %-20s%s%s\n", kt->cmd_name,
|
||||
kt->cmd_usage, space, kt->cmd_help);
|
||||
kdb_printf("%-15.15s %-20s%s%s\n", kt->name,
|
||||
kt->usage, space, kt->help);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -2613,56 +2618,32 @@ static int kdb_grep_help(int argc, const char **argv)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* kdb_register_flags - This function is used to register a kernel
|
||||
* debugger command.
|
||||
* Inputs:
|
||||
* cmd Command name
|
||||
* func Function to execute the command
|
||||
* usage A simple usage string showing arguments
|
||||
* help A simple help string describing command
|
||||
* repeat Does the command auto repeat on enter?
|
||||
* Returns:
|
||||
* zero for success, one if a duplicate command.
|
||||
/**
|
||||
* kdb_register() - This function is used to register a kernel debugger
|
||||
* command.
|
||||
* @cmd: pointer to kdb command
|
||||
*
|
||||
* Note that it's the job of the caller to keep the memory for the cmd
|
||||
* allocated until unregister is called.
|
||||
*/
|
||||
int kdb_register_flags(char *cmd,
|
||||
kdb_func_t func,
|
||||
char *usage,
|
||||
char *help,
|
||||
short minlen,
|
||||
kdb_cmdflags_t flags)
|
||||
int kdb_register(kdbtab_t *cmd)
|
||||
{
|
||||
kdbtab_t *kp;
|
||||
|
||||
list_for_each_entry(kp, &kdb_cmds_head, list_node) {
|
||||
if (strcmp(kp->cmd_name, cmd) == 0) {
|
||||
kdb_printf("Duplicate kdb command registered: "
|
||||
"%s, func %px help %s\n", cmd, func, help);
|
||||
if (strcmp(kp->name, cmd->name) == 0) {
|
||||
kdb_printf("Duplicate kdb cmd: %s, func %p help %s\n",
|
||||
cmd->name, cmd->func, cmd->help);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
kp = kmalloc(sizeof(*kp), GFP_KDB);
|
||||
if (!kp) {
|
||||
kdb_printf("Could not allocate new kdb_command table\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
kp->cmd_name = cmd;
|
||||
kp->cmd_func = func;
|
||||
kp->cmd_usage = usage;
|
||||
kp->cmd_help = help;
|
||||
kp->cmd_minlen = minlen;
|
||||
kp->cmd_flags = flags;
|
||||
kp->is_dynamic = true;
|
||||
|
||||
list_add_tail(&kp->list_node, &kdb_cmds_head);
|
||||
|
||||
list_add_tail(&cmd->list_node, &kdb_cmds_head);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kdb_register_flags);
|
||||
EXPORT_SYMBOL_GPL(kdb_register);
|
||||
|
||||
/*
|
||||
/**
|
||||
* kdb_register_table() - This function is used to register a kdb command
|
||||
* table.
|
||||
* @kp: pointer to kdb command table
|
||||
@@ -2676,266 +2657,231 @@ void kdb_register_table(kdbtab_t *kp, size_t len)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* kdb_register - Compatibility register function for commands that do
|
||||
* not need to specify a repeat state. Equivalent to
|
||||
* kdb_register_flags with flags set to 0.
|
||||
* Inputs:
|
||||
* cmd Command name
|
||||
* func Function to execute the command
|
||||
* usage A simple usage string showing arguments
|
||||
* help A simple help string describing command
|
||||
* Returns:
|
||||
* zero for success, one if a duplicate command.
|
||||
/**
|
||||
* kdb_unregister() - This function is used to unregister a kernel debugger
|
||||
* command. It is generally called when a module which
|
||||
* implements kdb command is unloaded.
|
||||
* @cmd: pointer to kdb command
|
||||
*/
|
||||
int kdb_register(char *cmd,
|
||||
kdb_func_t func,
|
||||
char *usage,
|
||||
char *help,
|
||||
short minlen)
|
||||
void kdb_unregister(kdbtab_t *cmd)
|
||||
{
|
||||
return kdb_register_flags(cmd, func, usage, help, minlen, 0);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kdb_register);
|
||||
|
||||
/*
|
||||
* kdb_unregister - This function is used to unregister a kernel
|
||||
* debugger command. It is generally called when a module which
|
||||
* implements kdb commands is unloaded.
|
||||
* Inputs:
|
||||
* cmd Command name
|
||||
* Returns:
|
||||
* zero for success, one command not registered.
|
||||
*/
|
||||
int kdb_unregister(char *cmd)
|
||||
{
|
||||
kdbtab_t *kp;
|
||||
|
||||
/*
|
||||
* find the command.
|
||||
*/
|
||||
list_for_each_entry(kp, &kdb_cmds_head, list_node) {
|
||||
if (strcmp(kp->cmd_name, cmd) == 0) {
|
||||
list_del(&kp->list_node);
|
||||
if (kp->is_dynamic)
|
||||
kfree(kp);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Couldn't find it. */
|
||||
return 1;
|
||||
list_del(&cmd->list_node);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kdb_unregister);
|
||||
|
||||
static kdbtab_t maintab[] = {
|
||||
{ .cmd_name = "md",
|
||||
.cmd_func = kdb_md,
|
||||
.cmd_usage = "<vaddr>",
|
||||
.cmd_help = "Display Memory Contents, also mdWcN, e.g. md8c1",
|
||||
.cmd_minlen = 1,
|
||||
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
|
||||
{ .name = "md",
|
||||
.func = kdb_md,
|
||||
.usage = "<vaddr>",
|
||||
.help = "Display Memory Contents, also mdWcN, e.g. md8c1",
|
||||
.minlen = 1,
|
||||
.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
|
||||
},
|
||||
{ .cmd_name = "mdr",
|
||||
.cmd_func = kdb_md,
|
||||
.cmd_usage = "<vaddr> <bytes>",
|
||||
.cmd_help = "Display Raw Memory",
|
||||
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
|
||||
{ .name = "mdr",
|
||||
.func = kdb_md,
|
||||
.usage = "<vaddr> <bytes>",
|
||||
.help = "Display Raw Memory",
|
||||
.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
|
||||
},
|
||||
{ .cmd_name = "mdp",
|
||||
.cmd_func = kdb_md,
|
||||
.cmd_usage = "<paddr> <bytes>",
|
||||
.cmd_help = "Display Physical Memory",
|
||||
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
|
||||
{ .name = "mdp",
|
||||
.func = kdb_md,
|
||||
.usage = "<paddr> <bytes>",
|
||||
.help = "Display Physical Memory",
|
||||
.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
|
||||
},
|
||||
{ .cmd_name = "mds",
|
||||
.cmd_func = kdb_md,
|
||||
.cmd_usage = "<vaddr>",
|
||||
.cmd_help = "Display Memory Symbolically",
|
||||
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
|
||||
{ .name = "mds",
|
||||
.func = kdb_md,
|
||||
.usage = "<vaddr>",
|
||||
.help = "Display Memory Symbolically",
|
||||
.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
|
||||
},
|
||||
{ .cmd_name = "mm",
|
||||
.cmd_func = kdb_mm,
|
||||
.cmd_usage = "<vaddr> <contents>",
|
||||
.cmd_help = "Modify Memory Contents",
|
||||
.cmd_flags = KDB_ENABLE_MEM_WRITE | KDB_REPEAT_NO_ARGS,
|
||||
{ .name = "mm",
|
||||
.func = kdb_mm,
|
||||
.usage = "<vaddr> <contents>",
|
||||
.help = "Modify Memory Contents",
|
||||
.flags = KDB_ENABLE_MEM_WRITE | KDB_REPEAT_NO_ARGS,
|
||||
},
|
||||
{ .cmd_name = "go",
|
||||
.cmd_func = kdb_go,
|
||||
.cmd_usage = "[<vaddr>]",
|
||||
.cmd_help = "Continue Execution",
|
||||
.cmd_minlen = 1,
|
||||
.cmd_flags = KDB_ENABLE_REG_WRITE |
|
||||
{ .name = "go",
|
||||
.func = kdb_go,
|
||||
.usage = "[<vaddr>]",
|
||||
.help = "Continue Execution",
|
||||
.minlen = 1,
|
||||
.flags = KDB_ENABLE_REG_WRITE |
|
||||
KDB_ENABLE_ALWAYS_SAFE_NO_ARGS,
|
||||
},
|
||||
{ .cmd_name = "rd",
|
||||
.cmd_func = kdb_rd,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Display Registers",
|
||||
.cmd_flags = KDB_ENABLE_REG_READ,
|
||||
{ .name = "rd",
|
||||
.func = kdb_rd,
|
||||
.usage = "",
|
||||
.help = "Display Registers",
|
||||
.flags = KDB_ENABLE_REG_READ,
|
||||
},
|
||||
{ .cmd_name = "rm",
|
||||
.cmd_func = kdb_rm,
|
||||
.cmd_usage = "<reg> <contents>",
|
||||
.cmd_help = "Modify Registers",
|
||||
.cmd_flags = KDB_ENABLE_REG_WRITE,
|
||||
{ .name = "rm",
|
||||
.func = kdb_rm,
|
||||
.usage = "<reg> <contents>",
|
||||
.help = "Modify Registers",
|
||||
.flags = KDB_ENABLE_REG_WRITE,
|
||||
},
|
||||
{ .cmd_name = "ef",
|
||||
.cmd_func = kdb_ef,
|
||||
.cmd_usage = "<vaddr>",
|
||||
.cmd_help = "Display exception frame",
|
||||
.cmd_flags = KDB_ENABLE_MEM_READ,
|
||||
{ .name = "ef",
|
||||
.func = kdb_ef,
|
||||
.usage = "<vaddr>",
|
||||
.help = "Display exception frame",
|
||||
.flags = KDB_ENABLE_MEM_READ,
|
||||
},
|
||||
{ .cmd_name = "bt",
|
||||
.cmd_func = kdb_bt,
|
||||
.cmd_usage = "[<vaddr>]",
|
||||
.cmd_help = "Stack traceback",
|
||||
.cmd_minlen = 1,
|
||||
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
|
||||
{ .name = "bt",
|
||||
.func = kdb_bt,
|
||||
.usage = "[<vaddr>]",
|
||||
.help = "Stack traceback",
|
||||
.minlen = 1,
|
||||
.flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
|
||||
},
|
||||
{ .cmd_name = "btp",
|
||||
.cmd_func = kdb_bt,
|
||||
.cmd_usage = "<pid>",
|
||||
.cmd_help = "Display stack for process <pid>",
|
||||
.cmd_flags = KDB_ENABLE_INSPECT,
|
||||
{ .name = "btp",
|
||||
.func = kdb_bt,
|
||||
.usage = "<pid>",
|
||||
.help = "Display stack for process <pid>",
|
||||
.flags = KDB_ENABLE_INSPECT,
|
||||
},
|
||||
{ .cmd_name = "bta",
|
||||
.cmd_func = kdb_bt,
|
||||
.cmd_usage = "[D|R|S|T|C|Z|E|U|I|M|A]",
|
||||
.cmd_help = "Backtrace all processes matching state flag",
|
||||
.cmd_flags = KDB_ENABLE_INSPECT,
|
||||
{ .name = "bta",
|
||||
.func = kdb_bt,
|
||||
.usage = "[D|R|S|T|C|Z|E|U|I|M|A]",
|
||||
.help = "Backtrace all processes matching state flag",
|
||||
.flags = KDB_ENABLE_INSPECT,
|
||||
},
|
||||
{ .cmd_name = "btc",
|
||||
.cmd_func = kdb_bt,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Backtrace current process on each cpu",
|
||||
.cmd_flags = KDB_ENABLE_INSPECT,
|
||||
{ .name = "btc",
|
||||
.func = kdb_bt,
|
||||
.usage = "",
|
||||
.help = "Backtrace current process on each cpu",
|
||||
.flags = KDB_ENABLE_INSPECT,
|
||||
},
|
||||
{ .cmd_name = "btt",
|
||||
.cmd_func = kdb_bt,
|
||||
.cmd_usage = "<vaddr>",
|
||||
.cmd_help = "Backtrace process given its struct task address",
|
||||
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
|
||||
{ .name = "btt",
|
||||
.func = kdb_bt,
|
||||
.usage = "<vaddr>",
|
||||
.help = "Backtrace process given its struct task address",
|
||||
.flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
|
||||
},
|
||||
{ .cmd_name = "env",
|
||||
.cmd_func = kdb_env,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Show environment variables",
|
||||
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
{ .name = "env",
|
||||
.func = kdb_env,
|
||||
.usage = "",
|
||||
.help = "Show environment variables",
|
||||
.flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
},
|
||||
{ .cmd_name = "set",
|
||||
.cmd_func = kdb_set,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Set environment variables",
|
||||
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
{ .name = "set",
|
||||
.func = kdb_set,
|
||||
.usage = "",
|
||||
.help = "Set environment variables",
|
||||
.flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
},
|
||||
{ .cmd_name = "help",
|
||||
.cmd_func = kdb_help,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Display Help Message",
|
||||
.cmd_minlen = 1,
|
||||
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
{ .name = "help",
|
||||
.func = kdb_help,
|
||||
.usage = "",
|
||||
.help = "Display Help Message",
|
||||
.minlen = 1,
|
||||
.flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
},
|
||||
{ .cmd_name = "?",
|
||||
.cmd_func = kdb_help,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Display Help Message",
|
||||
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
{ .name = "?",
|
||||
.func = kdb_help,
|
||||
.usage = "",
|
||||
.help = "Display Help Message",
|
||||
.flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
},
|
||||
{ .cmd_name = "cpu",
|
||||
.cmd_func = kdb_cpu,
|
||||
.cmd_usage = "<cpunum>",
|
||||
.cmd_help = "Switch to new cpu",
|
||||
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE_NO_ARGS,
|
||||
{ .name = "cpu",
|
||||
.func = kdb_cpu,
|
||||
.usage = "<cpunum>",
|
||||
.help = "Switch to new cpu",
|
||||
.flags = KDB_ENABLE_ALWAYS_SAFE_NO_ARGS,
|
||||
},
|
||||
{ .cmd_name = "kgdb",
|
||||
.cmd_func = kdb_kgdb,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Enter kgdb mode",
|
||||
.cmd_flags = 0,
|
||||
{ .name = "kgdb",
|
||||
.func = kdb_kgdb,
|
||||
.usage = "",
|
||||
.help = "Enter kgdb mode",
|
||||
.flags = 0,
|
||||
},
|
||||
{ .cmd_name = "ps",
|
||||
.cmd_func = kdb_ps,
|
||||
.cmd_usage = "[<flags>|A]",
|
||||
.cmd_help = "Display active task list",
|
||||
.cmd_flags = KDB_ENABLE_INSPECT,
|
||||
{ .name = "ps",
|
||||
.func = kdb_ps,
|
||||
.usage = "[<flags>|A]",
|
||||
.help = "Display active task list",
|
||||
.flags = KDB_ENABLE_INSPECT,
|
||||
},
|
||||
{ .cmd_name = "pid",
|
||||
.cmd_func = kdb_pid,
|
||||
.cmd_usage = "<pidnum>",
|
||||
.cmd_help = "Switch to another task",
|
||||
.cmd_flags = KDB_ENABLE_INSPECT,
|
||||
{ .name = "pid",
|
||||
.func = kdb_pid,
|
||||
.usage = "<pidnum>",
|
||||
.help = "Switch to another task",
|
||||
.flags = KDB_ENABLE_INSPECT,
|
||||
},
|
||||
{ .cmd_name = "reboot",
|
||||
.cmd_func = kdb_reboot,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Reboot the machine immediately",
|
||||
.cmd_flags = KDB_ENABLE_REBOOT,
|
||||
{ .name = "reboot",
|
||||
.func = kdb_reboot,
|
||||
.usage = "",
|
||||
.help = "Reboot the machine immediately",
|
||||
.flags = KDB_ENABLE_REBOOT,
|
||||
},
|
||||
#if defined(CONFIG_MODULES)
|
||||
{ .cmd_name = "lsmod",
|
||||
.cmd_func = kdb_lsmod,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "List loaded kernel modules",
|
||||
.cmd_flags = KDB_ENABLE_INSPECT,
|
||||
{ .name = "lsmod",
|
||||
.func = kdb_lsmod,
|
||||
.usage = "",
|
||||
.help = "List loaded kernel modules",
|
||||
.flags = KDB_ENABLE_INSPECT,
|
||||
},
|
||||
#endif
|
||||
#if defined(CONFIG_MAGIC_SYSRQ)
|
||||
{ .cmd_name = "sr",
|
||||
.cmd_func = kdb_sr,
|
||||
.cmd_usage = "<key>",
|
||||
.cmd_help = "Magic SysRq key",
|
||||
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
{ .name = "sr",
|
||||
.func = kdb_sr,
|
||||
.usage = "<key>",
|
||||
.help = "Magic SysRq key",
|
||||
.flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
},
|
||||
#endif
|
||||
#if defined(CONFIG_PRINTK)
|
||||
{ .cmd_name = "dmesg",
|
||||
.cmd_func = kdb_dmesg,
|
||||
.cmd_usage = "[lines]",
|
||||
.cmd_help = "Display syslog buffer",
|
||||
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
{ .name = "dmesg",
|
||||
.func = kdb_dmesg,
|
||||
.usage = "[lines]",
|
||||
.help = "Display syslog buffer",
|
||||
.flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
},
|
||||
#endif
|
||||
{ .cmd_name = "defcmd",
|
||||
.cmd_func = kdb_defcmd,
|
||||
.cmd_usage = "name \"usage\" \"help\"",
|
||||
.cmd_help = "Define a set of commands, down to endefcmd",
|
||||
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
{ .name = "defcmd",
|
||||
.func = kdb_defcmd,
|
||||
.usage = "name \"usage\" \"help\"",
|
||||
.help = "Define a set of commands, down to endefcmd",
|
||||
/*
|
||||
* Macros are always safe because when executed each
|
||||
* internal command re-enters kdb_parse() and is safety
|
||||
* checked individually.
|
||||
*/
|
||||
.flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
},
|
||||
{ .cmd_name = "kill",
|
||||
.cmd_func = kdb_kill,
|
||||
.cmd_usage = "<-signal> <pid>",
|
||||
.cmd_help = "Send a signal to a process",
|
||||
.cmd_flags = KDB_ENABLE_SIGNAL,
|
||||
{ .name = "kill",
|
||||
.func = kdb_kill,
|
||||
.usage = "<-signal> <pid>",
|
||||
.help = "Send a signal to a process",
|
||||
.flags = KDB_ENABLE_SIGNAL,
|
||||
},
|
||||
{ .cmd_name = "summary",
|
||||
.cmd_func = kdb_summary,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Summarize the system",
|
||||
.cmd_minlen = 4,
|
||||
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
{ .name = "summary",
|
||||
.func = kdb_summary,
|
||||
.usage = "",
|
||||
.help = "Summarize the system",
|
||||
.minlen = 4,
|
||||
.flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
},
|
||||
{ .cmd_name = "per_cpu",
|
||||
.cmd_func = kdb_per_cpu,
|
||||
.cmd_usage = "<sym> [<bytes>] [<cpu>]",
|
||||
.cmd_help = "Display per_cpu variables",
|
||||
.cmd_minlen = 3,
|
||||
.cmd_flags = KDB_ENABLE_MEM_READ,
|
||||
{ .name = "per_cpu",
|
||||
.func = kdb_per_cpu,
|
||||
.usage = "<sym> [<bytes>] [<cpu>]",
|
||||
.help = "Display per_cpu variables",
|
||||
.minlen = 3,
|
||||
.flags = KDB_ENABLE_MEM_READ,
|
||||
},
|
||||
{ .cmd_name = "grephelp",
|
||||
.cmd_func = kdb_grep_help,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Display help on | grep",
|
||||
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
{ .name = "grephelp",
|
||||
.func = kdb_grep_help,
|
||||
.usage = "",
|
||||
.help = "Display help on | grep",
|
||||
.flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
},
|
||||
};
|
||||
|
||||
static kdbtab_t nmicmd = {
|
||||
.cmd_name = "disable_nmi",
|
||||
.cmd_func = kdb_disable_nmi,
|
||||
.cmd_usage = "",
|
||||
.cmd_help = "Disable NMI entry to KDB",
|
||||
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
.name = "disable_nmi",
|
||||
.func = kdb_disable_nmi,
|
||||
.usage = "",
|
||||
.help = "Disable NMI entry to KDB",
|
||||
.flags = KDB_ENABLE_ALWAYS_SAFE,
|
||||
};
|
||||
|
||||
/* Initialize the kdb command table. */
|
||||
|
||||
@@ -109,7 +109,6 @@ extern int kdbgetaddrarg(int, const char **, int*, unsigned long *,
|
||||
long *, char **);
|
||||
extern int kdbgetsymval(const char *, kdb_symtab_t *);
|
||||
extern int kdbnearsym(unsigned long, kdb_symtab_t *);
|
||||
extern void kdbnearsym_cleanup(void);
|
||||
extern char *kdb_strdup(const char *str, gfp_t type);
|
||||
extern void kdb_symbol_print(unsigned long, const kdb_symtab_t *, unsigned int);
|
||||
|
||||
@@ -165,19 +164,6 @@ typedef struct _kdb_bp {
|
||||
#ifdef CONFIG_KGDB_KDB
|
||||
extern kdb_bp_t kdb_breakpoints[/* KDB_MAXBPT */];
|
||||
|
||||
/* The KDB shell command table */
|
||||
typedef struct _kdbtab {
|
||||
char *cmd_name; /* Command name */
|
||||
kdb_func_t cmd_func; /* Function to execute command */
|
||||
char *cmd_usage; /* Usage String for this command */
|
||||
char *cmd_help; /* Help message for this command */
|
||||
short cmd_minlen; /* Minimum legal # command
|
||||
* chars required */
|
||||
kdb_cmdflags_t cmd_flags; /* Command behaviour flags */
|
||||
struct list_head list_node; /* Command list */
|
||||
bool is_dynamic; /* Command table allocation type */
|
||||
} kdbtab_t;
|
||||
|
||||
extern void kdb_register_table(kdbtab_t *kp, size_t len);
|
||||
extern int kdb_bt(int, const char **); /* KDB display back trace */
|
||||
|
||||
@@ -233,10 +219,6 @@ extern struct task_struct *kdb_curr_task(int);
|
||||
|
||||
#define GFP_KDB (in_dbg_master() ? GFP_ATOMIC : GFP_KERNEL)
|
||||
|
||||
extern void *debug_kmalloc(size_t size, gfp_t flags);
|
||||
extern void debug_kfree(void *);
|
||||
extern void debug_kusage(void);
|
||||
|
||||
extern struct task_struct *kdb_current_task;
|
||||
extern struct pt_regs *kdb_current_regs;
|
||||
|
||||
|
||||
@@ -51,48 +51,48 @@ int kdbgetsymval(const char *symname, kdb_symtab_t *symtab)
|
||||
}
|
||||
EXPORT_SYMBOL(kdbgetsymval);
|
||||
|
||||
static char *kdb_name_table[100]; /* arbitrary size */
|
||||
|
||||
/*
|
||||
* kdbnearsym - Return the name of the symbol with the nearest address
|
||||
* less than 'addr'.
|
||||
/**
|
||||
* kdbnearsym() - Return the name of the symbol with the nearest address
|
||||
* less than @addr.
|
||||
* @addr: Address to check for near symbol
|
||||
* @symtab: Structure to receive results
|
||||
*
|
||||
* Parameters:
|
||||
* addr Address to check for symbol near
|
||||
* symtab Structure to receive results
|
||||
* Returns:
|
||||
* 0 No sections contain this address, symtab zero filled
|
||||
* 1 Address mapped to module/symbol/section, data in symtab
|
||||
* Remarks:
|
||||
* 2.6 kallsyms has a "feature" where it unpacks the name into a
|
||||
* string. If that string is reused before the caller expects it
|
||||
* then the caller sees its string change without warning. To
|
||||
* avoid cluttering up the main kdb code with lots of kdb_strdup,
|
||||
* tests and kfree calls, kdbnearsym maintains an LRU list of the
|
||||
* last few unique strings. The list is sized large enough to
|
||||
* hold active strings, no kdb caller of kdbnearsym makes more
|
||||
* than ~20 later calls before using a saved value.
|
||||
* WARNING: This function may return a pointer to a single statically
|
||||
* allocated buffer (namebuf). kdb's unusual calling context (single
|
||||
* threaded, all other CPUs halted) provides us sufficient locking for
|
||||
* this to be safe. The only constraint imposed by the static buffer is
|
||||
* that the caller must consume any previous reply prior to another call
|
||||
* to lookup a new symbol.
|
||||
*
|
||||
* Note that, strictly speaking, some architectures may re-enter the kdb
|
||||
* trap if the system turns out to be very badly damaged and this breaks
|
||||
* the single-threaded assumption above. In these circumstances successful
|
||||
* continuation and exit from the inner trap is unlikely to work and any
|
||||
* user attempting this receives a prominent warning before being allowed
|
||||
* to progress. In these circumstances we remain memory safe because
|
||||
* namebuf[KSYM_NAME_LEN-1] will never change from '\0' although we do
|
||||
* tolerate the possibility of garbled symbol display from the outer kdb
|
||||
* trap.
|
||||
*
|
||||
* Return:
|
||||
* * 0 - No sections contain this address, symtab zero filled
|
||||
* * 1 - Address mapped to module/symbol/section, data in symtab
|
||||
*/
|
||||
int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long symbolsize = 0;
|
||||
unsigned long offset = 0;
|
||||
#define knt1_size 128 /* must be >= kallsyms table size */
|
||||
char *knt1 = NULL;
|
||||
static char namebuf[KSYM_NAME_LEN];
|
||||
|
||||
kdb_dbg_printf(AR, "addr=0x%lx, symtab=%px\n", addr, symtab);
|
||||
memset(symtab, 0, sizeof(*symtab));
|
||||
|
||||
if (addr < 4096)
|
||||
goto out;
|
||||
knt1 = debug_kmalloc(knt1_size, GFP_ATOMIC);
|
||||
if (!knt1) {
|
||||
kdb_func_printf("addr=0x%lx cannot kmalloc knt1\n", addr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
symtab->sym_name = kallsyms_lookup(addr, &symbolsize , &offset,
|
||||
(char **)(&symtab->mod_name), knt1);
|
||||
(char **)(&symtab->mod_name), namebuf);
|
||||
if (offset > 8*1024*1024) {
|
||||
symtab->sym_name = NULL;
|
||||
addr = offset = symbolsize = 0;
|
||||
@@ -101,63 +101,14 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
|
||||
symtab->sym_end = symtab->sym_start + symbolsize;
|
||||
ret = symtab->sym_name != NULL && *(symtab->sym_name) != '\0';
|
||||
|
||||
if (ret) {
|
||||
int i;
|
||||
/* Another 2.6 kallsyms "feature". Sometimes the sym_name is
|
||||
* set but the buffer passed into kallsyms_lookup is not used,
|
||||
* so it contains garbage. The caller has to work out which
|
||||
* buffer needs to be saved.
|
||||
*
|
||||
* What was Rusty smoking when he wrote that code?
|
||||
*/
|
||||
if (symtab->sym_name != knt1) {
|
||||
strncpy(knt1, symtab->sym_name, knt1_size);
|
||||
knt1[knt1_size-1] = '\0';
|
||||
}
|
||||
for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
|
||||
if (kdb_name_table[i] &&
|
||||
strcmp(kdb_name_table[i], knt1) == 0)
|
||||
break;
|
||||
}
|
||||
if (i >= ARRAY_SIZE(kdb_name_table)) {
|
||||
debug_kfree(kdb_name_table[0]);
|
||||
memmove(kdb_name_table, kdb_name_table+1,
|
||||
sizeof(kdb_name_table[0]) *
|
||||
(ARRAY_SIZE(kdb_name_table)-1));
|
||||
} else {
|
||||
debug_kfree(knt1);
|
||||
knt1 = kdb_name_table[i];
|
||||
memmove(kdb_name_table+i, kdb_name_table+i+1,
|
||||
sizeof(kdb_name_table[0]) *
|
||||
(ARRAY_SIZE(kdb_name_table)-i-1));
|
||||
}
|
||||
i = ARRAY_SIZE(kdb_name_table) - 1;
|
||||
kdb_name_table[i] = knt1;
|
||||
symtab->sym_name = kdb_name_table[i];
|
||||
knt1 = NULL;
|
||||
}
|
||||
|
||||
if (symtab->mod_name == NULL)
|
||||
symtab->mod_name = "kernel";
|
||||
kdb_dbg_printf(AR, "returns %d symtab->sym_start=0x%lx, symtab->mod_name=%px, symtab->sym_name=%px (%s)\n",
|
||||
ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name);
|
||||
|
||||
out:
|
||||
debug_kfree(knt1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kdbnearsym_cleanup(void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
|
||||
if (kdb_name_table[i]) {
|
||||
debug_kfree(kdb_name_table[i]);
|
||||
kdb_name_table[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static char ks_namebuf[KSYM_NAME_LEN+1], ks_namebuf_prev[KSYM_NAME_LEN+1];
|
||||
|
||||
/*
|
||||
@@ -655,230 +606,6 @@ unsigned long kdb_task_state(const struct task_struct *p, unsigned long mask)
|
||||
return (mask & kdb_task_state_string(state)) != 0;
|
||||
}
|
||||
|
||||
/* Last ditch allocator for debugging, so we can still debug even when
|
||||
* the GFP_ATOMIC pool has been exhausted. The algorithms are tuned
|
||||
* for space usage, not for speed. One smallish memory pool, the free
|
||||
* chain is always in ascending address order to allow coalescing,
|
||||
* allocations are done in brute force best fit.
|
||||
*/
|
||||
|
||||
struct debug_alloc_header {
|
||||
u32 next; /* offset of next header from start of pool */
|
||||
u32 size;
|
||||
void *caller;
|
||||
};
|
||||
|
||||
/* The memory returned by this allocator must be aligned, which means
|
||||
* so must the header size. Do not assume that sizeof(struct
|
||||
* debug_alloc_header) is a multiple of the alignment, explicitly
|
||||
* calculate the overhead of this header, including the alignment.
|
||||
* The rest of this code must not use sizeof() on any header or
|
||||
* pointer to a header.
|
||||
*/
|
||||
#define dah_align 8
|
||||
#define dah_overhead ALIGN(sizeof(struct debug_alloc_header), dah_align)
|
||||
|
||||
static u64 debug_alloc_pool_aligned[256*1024/dah_align]; /* 256K pool */
|
||||
static char *debug_alloc_pool = (char *)debug_alloc_pool_aligned;
|
||||
static u32 dah_first, dah_first_call = 1, dah_used, dah_used_max;
|
||||
|
||||
/* Locking is awkward. The debug code is called from all contexts,
|
||||
* including non maskable interrupts. A normal spinlock is not safe
|
||||
* in NMI context. Try to get the debug allocator lock, if it cannot
|
||||
* be obtained after a second then give up. If the lock could not be
|
||||
* previously obtained on this cpu then only try once.
|
||||
*
|
||||
* sparse has no annotation for "this function _sometimes_ acquires a
|
||||
* lock", so fudge the acquire/release notation.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(dap_lock);
|
||||
static int get_dap_lock(void)
|
||||
__acquires(dap_lock)
|
||||
{
|
||||
static int dap_locked = -1;
|
||||
int count;
|
||||
if (dap_locked == smp_processor_id())
|
||||
count = 1;
|
||||
else
|
||||
count = 1000;
|
||||
while (1) {
|
||||
if (spin_trylock(&dap_lock)) {
|
||||
dap_locked = -1;
|
||||
return 1;
|
||||
}
|
||||
if (!count--)
|
||||
break;
|
||||
udelay(1000);
|
||||
}
|
||||
dap_locked = smp_processor_id();
|
||||
__acquire(dap_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *debug_kmalloc(size_t size, gfp_t flags)
|
||||
{
|
||||
unsigned int rem, h_offset;
|
||||
struct debug_alloc_header *best, *bestprev, *prev, *h;
|
||||
void *p = NULL;
|
||||
if (!get_dap_lock()) {
|
||||
__release(dap_lock); /* we never actually got it */
|
||||
return NULL;
|
||||
}
|
||||
h = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
|
||||
if (dah_first_call) {
|
||||
h->size = sizeof(debug_alloc_pool_aligned) - dah_overhead;
|
||||
dah_first_call = 0;
|
||||
}
|
||||
size = ALIGN(size, dah_align);
|
||||
prev = best = bestprev = NULL;
|
||||
while (1) {
|
||||
if (h->size >= size && (!best || h->size < best->size)) {
|
||||
best = h;
|
||||
bestprev = prev;
|
||||
if (h->size == size)
|
||||
break;
|
||||
}
|
||||
if (!h->next)
|
||||
break;
|
||||
prev = h;
|
||||
h = (struct debug_alloc_header *)(debug_alloc_pool + h->next);
|
||||
}
|
||||
if (!best)
|
||||
goto out;
|
||||
rem = best->size - size;
|
||||
/* The pool must always contain at least one header */
|
||||
if (best->next == 0 && bestprev == NULL && rem < dah_overhead)
|
||||
goto out;
|
||||
if (rem >= dah_overhead) {
|
||||
best->size = size;
|
||||
h_offset = ((char *)best - debug_alloc_pool) +
|
||||
dah_overhead + best->size;
|
||||
h = (struct debug_alloc_header *)(debug_alloc_pool + h_offset);
|
||||
h->size = rem - dah_overhead;
|
||||
h->next = best->next;
|
||||
} else
|
||||
h_offset = best->next;
|
||||
best->caller = __builtin_return_address(0);
|
||||
dah_used += best->size;
|
||||
dah_used_max = max(dah_used, dah_used_max);
|
||||
if (bestprev)
|
||||
bestprev->next = h_offset;
|
||||
else
|
||||
dah_first = h_offset;
|
||||
p = (char *)best + dah_overhead;
|
||||
memset(p, POISON_INUSE, best->size - 1);
|
||||
*((char *)p + best->size - 1) = POISON_END;
|
||||
out:
|
||||
spin_unlock(&dap_lock);
|
||||
return p;
|
||||
}
|
||||
|
||||
void debug_kfree(void *p)
|
||||
{
|
||||
struct debug_alloc_header *h;
|
||||
unsigned int h_offset;
|
||||
if (!p)
|
||||
return;
|
||||
if ((char *)p < debug_alloc_pool ||
|
||||
(char *)p >= debug_alloc_pool + sizeof(debug_alloc_pool_aligned)) {
|
||||
kfree(p);
|
||||
return;
|
||||
}
|
||||
if (!get_dap_lock()) {
|
||||
__release(dap_lock); /* we never actually got it */
|
||||
return; /* memory leak, cannot be helped */
|
||||
}
|
||||
h = (struct debug_alloc_header *)((char *)p - dah_overhead);
|
||||
memset(p, POISON_FREE, h->size - 1);
|
||||
*((char *)p + h->size - 1) = POISON_END;
|
||||
h->caller = NULL;
|
||||
dah_used -= h->size;
|
||||
h_offset = (char *)h - debug_alloc_pool;
|
||||
if (h_offset < dah_first) {
|
||||
h->next = dah_first;
|
||||
dah_first = h_offset;
|
||||
} else {
|
||||
struct debug_alloc_header *prev;
|
||||
unsigned int prev_offset;
|
||||
prev = (struct debug_alloc_header *)(debug_alloc_pool +
|
||||
dah_first);
|
||||
while (1) {
|
||||
if (!prev->next || prev->next > h_offset)
|
||||
break;
|
||||
prev = (struct debug_alloc_header *)
|
||||
(debug_alloc_pool + prev->next);
|
||||
}
|
||||
prev_offset = (char *)prev - debug_alloc_pool;
|
||||
if (prev_offset + dah_overhead + prev->size == h_offset) {
|
||||
prev->size += dah_overhead + h->size;
|
||||
memset(h, POISON_FREE, dah_overhead - 1);
|
||||
*((char *)h + dah_overhead - 1) = POISON_END;
|
||||
h = prev;
|
||||
h_offset = prev_offset;
|
||||
} else {
|
||||
h->next = prev->next;
|
||||
prev->next = h_offset;
|
||||
}
|
||||
}
|
||||
if (h_offset + dah_overhead + h->size == h->next) {
|
||||
struct debug_alloc_header *next;
|
||||
next = (struct debug_alloc_header *)
|
||||
(debug_alloc_pool + h->next);
|
||||
h->size += dah_overhead + next->size;
|
||||
h->next = next->next;
|
||||
memset(next, POISON_FREE, dah_overhead - 1);
|
||||
*((char *)next + dah_overhead - 1) = POISON_END;
|
||||
}
|
||||
spin_unlock(&dap_lock);
|
||||
}
|
||||
|
||||
void debug_kusage(void)
|
||||
{
|
||||
struct debug_alloc_header *h_free, *h_used;
|
||||
#ifdef CONFIG_IA64
|
||||
/* FIXME: using dah for ia64 unwind always results in a memory leak.
|
||||
* Fix that memory leak first, then set debug_kusage_one_time = 1 for
|
||||
* all architectures.
|
||||
*/
|
||||
static int debug_kusage_one_time;
|
||||
#else
|
||||
static int debug_kusage_one_time = 1;
|
||||
#endif
|
||||
if (!get_dap_lock()) {
|
||||
__release(dap_lock); /* we never actually got it */
|
||||
return;
|
||||
}
|
||||
h_free = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
|
||||
if (dah_first == 0 &&
|
||||
(h_free->size == sizeof(debug_alloc_pool_aligned) - dah_overhead ||
|
||||
dah_first_call))
|
||||
goto out;
|
||||
if (!debug_kusage_one_time)
|
||||
goto out;
|
||||
debug_kusage_one_time = 0;
|
||||
kdb_func_printf("debug_kmalloc memory leak dah_first %d\n", dah_first);
|
||||
if (dah_first) {
|
||||
h_used = (struct debug_alloc_header *)debug_alloc_pool;
|
||||
kdb_func_printf("h_used %px size %d\n", h_used, h_used->size);
|
||||
}
|
||||
do {
|
||||
h_used = (struct debug_alloc_header *)
|
||||
((char *)h_free + dah_overhead + h_free->size);
|
||||
kdb_func_printf("h_used %px size %d caller %px\n",
|
||||
h_used, h_used->size, h_used->caller);
|
||||
h_free = (struct debug_alloc_header *)
|
||||
(debug_alloc_pool + h_free->next);
|
||||
} while (h_free->next);
|
||||
h_used = (struct debug_alloc_header *)
|
||||
((char *)h_free + dah_overhead + h_free->size);
|
||||
if ((char *)h_used - debug_alloc_pool !=
|
||||
sizeof(debug_alloc_pool_aligned))
|
||||
kdb_func_printf("h_used %px size %d caller %px\n",
|
||||
h_used, h_used->size, h_used->caller);
|
||||
out:
|
||||
spin_unlock(&dap_lock);
|
||||
}
|
||||
|
||||
/* Maintain a small stack of kdb_flags to allow recursion without disturbing
|
||||
* the global kdb state.
|
||||
*/
|
||||
|
||||
@@ -135,10 +135,9 @@ config TRACING_SUPPORT
|
||||
depends on STACKTRACE_SUPPORT
|
||||
default y
|
||||
|
||||
if TRACING_SUPPORT
|
||||
|
||||
menuconfig FTRACE
|
||||
bool "Tracers"
|
||||
depends on TRACING_SUPPORT
|
||||
default y if DEBUG_KERNEL
|
||||
help
|
||||
Enable the kernel tracing infrastructure.
|
||||
@@ -1038,6 +1037,3 @@ config HIST_TRIGGERS_DEBUG
|
||||
If unsure, say N.
|
||||
|
||||
endif # FTRACE
|
||||
|
||||
endif # TRACING_SUPPORT
|
||||
|
||||
|
||||
@@ -77,6 +77,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
|
||||
endif
|
||||
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
|
||||
obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
|
||||
obj-$(CONFIG_PROBE_EVENTS) += trace_eprobe.o
|
||||
obj-$(CONFIG_TRACE_EVENT_INJECT) += trace_events_inject.o
|
||||
obj-$(CONFIG_SYNTH_EVENTS) += trace_events_synth.o
|
||||
obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
|
||||
|
||||
@@ -2111,7 +2111,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
|
||||
}
|
||||
}
|
||||
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
/*
|
||||
* Fire off all the required work handlers
|
||||
* We can't schedule on offline CPUs, but it's not necessary
|
||||
@@ -2143,7 +2143,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
|
||||
cpu_buffer->nr_pages_to_update = 0;
|
||||
}
|
||||
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
} else {
|
||||
cpu_buffer = buffer->buffers[cpu_id];
|
||||
|
||||
@@ -2171,7 +2171,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
get_online_cpus();
|
||||
cpus_read_lock();
|
||||
|
||||
/* Can't run something on an offline CPU. */
|
||||
if (!cpu_online(cpu_id))
|
||||
@@ -2183,7 +2183,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
|
||||
}
|
||||
|
||||
cpu_buffer->nr_pages_to_update = 0;
|
||||
put_online_cpus();
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
out:
|
||||
|
||||
@@ -3698,11 +3698,11 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str)
|
||||
return false;
|
||||
|
||||
event = container_of(trace_event, struct trace_event_call, event);
|
||||
if (!event->mod)
|
||||
if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
|
||||
return false;
|
||||
|
||||
/* Would rather have rodata, but this will suffice */
|
||||
if (within_module_core(addr, event->mod))
|
||||
if (within_module_core(addr, event->module))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -5544,6 +5544,7 @@ static const char readme_msg[] =
|
||||
#ifdef CONFIG_HIST_TRIGGERS
|
||||
"\t s:[synthetic/]<event> <field> [<field>]\n"
|
||||
#endif
|
||||
"\t e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
|
||||
"\t -:[<group>/]<event>\n"
|
||||
#ifdef CONFIG_KPROBE_EVENTS
|
||||
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
|
||||
@@ -5553,7 +5554,7 @@ static const char readme_msg[] =
|
||||
" place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
|
||||
#endif
|
||||
"\t args: <name>=fetcharg[:type]\n"
|
||||
"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
|
||||
"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
|
||||
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
|
||||
"\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
|
||||
#else
|
||||
@@ -5568,6 +5569,8 @@ static const char readme_msg[] =
|
||||
"\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
|
||||
"\t [unsigned] char/int/long\n"
|
||||
#endif
|
||||
"\t efield: For event probes ('e' types), the field is on of the fields\n"
|
||||
"\t of the <attached-group>/<attached-event>.\n"
|
||||
#endif
|
||||
" events/\t\t- Directory containing all trace event subsystems:\n"
|
||||
" enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
|
||||
@@ -5655,6 +5658,7 @@ static const char readme_msg[] =
|
||||
"\t .execname display a common_pid as a program name\n"
|
||||
"\t .syscall display a syscall id as a syscall name\n"
|
||||
"\t .log2 display log2 value rather than raw number\n"
|
||||
"\t .buckets=size display values in groups of size rather than raw number\n"
|
||||
"\t .usecs display a common_timestamp in microseconds\n\n"
|
||||
"\t The 'pause' parameter can be used to pause an existing hist\n"
|
||||
"\t trigger or to start a hist trigger but not log any events\n"
|
||||
|
||||
@@ -126,6 +126,11 @@ struct kprobe_trace_entry_head {
|
||||
unsigned long ip;
|
||||
};
|
||||
|
||||
struct eprobe_trace_entry_head {
|
||||
struct trace_entry ent;
|
||||
unsigned int type;
|
||||
};
|
||||
|
||||
struct kretprobe_trace_entry_head {
|
||||
struct trace_entry ent;
|
||||
unsigned long func;
|
||||
@@ -1508,9 +1513,14 @@ static inline int register_trigger_hist_enable_disable_cmds(void) { return 0; }
|
||||
extern int register_trigger_cmds(void);
|
||||
extern void clear_event_triggers(struct trace_array *tr);
|
||||
|
||||
enum {
|
||||
EVENT_TRIGGER_FL_PROBE = BIT(0),
|
||||
};
|
||||
|
||||
struct event_trigger_data {
|
||||
unsigned long count;
|
||||
int ref;
|
||||
int flags;
|
||||
struct event_trigger_ops *ops;
|
||||
struct event_command *cmd_ops;
|
||||
struct event_filter __rcu *filter;
|
||||
@@ -1918,6 +1928,14 @@ static inline bool is_good_name(const char *name)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Convert certain expected symbols into '_' when generating event names */
|
||||
static inline void sanitize_event_name(char *name)
|
||||
{
|
||||
while (*name++ != '\0')
|
||||
if (*name == ':' || *name == '.')
|
||||
*name = '_';
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a generic way to read and write a u64 value from a file in tracefs.
|
||||
*
|
||||
|
||||
@@ -171,6 +171,290 @@ trace_boot_add_synth_event(struct xbc_node *node, const char *event)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HIST_TRIGGERS
|
||||
static int __init __printf(3, 4)
|
||||
append_printf(char **bufp, char *end, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
int ret;
|
||||
|
||||
if (*bufp == end)
|
||||
return -ENOSPC;
|
||||
|
||||
va_start(args, fmt);
|
||||
ret = vsnprintf(*bufp, end - *bufp, fmt, args);
|
||||
if (ret < end - *bufp) {
|
||||
*bufp += ret;
|
||||
} else {
|
||||
*bufp = end;
|
||||
ret = -ERANGE;
|
||||
}
|
||||
va_end(args);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init
|
||||
append_str_nospace(char **bufp, char *end, const char *str)
|
||||
{
|
||||
char *p = *bufp;
|
||||
int len;
|
||||
|
||||
while (p < end - 1 && *str != '\0') {
|
||||
if (!isspace(*str))
|
||||
*(p++) = *str;
|
||||
str++;
|
||||
}
|
||||
*p = '\0';
|
||||
if (p == end - 1) {
|
||||
*bufp = end;
|
||||
return -ENOSPC;
|
||||
}
|
||||
len = p - *bufp;
|
||||
*bufp = p;
|
||||
return (int)len;
|
||||
}
|
||||
|
||||
static int __init
|
||||
trace_boot_hist_add_array(struct xbc_node *hnode, char **bufp,
|
||||
char *end, const char *key)
|
||||
{
|
||||
struct xbc_node *knode, *anode;
|
||||
const char *p;
|
||||
char sep;
|
||||
|
||||
knode = xbc_node_find_child(hnode, key);
|
||||
if (knode) {
|
||||
anode = xbc_node_get_child(knode);
|
||||
if (!anode) {
|
||||
pr_err("hist.%s requires value(s).\n", key);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
append_printf(bufp, end, ":%s", key);
|
||||
sep = '=';
|
||||
xbc_array_for_each_value(anode, p) {
|
||||
append_printf(bufp, end, "%c%s", sep, p);
|
||||
if (sep == '=')
|
||||
sep = ',';
|
||||
}
|
||||
} else
|
||||
return -ENOENT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init
|
||||
trace_boot_hist_add_one_handler(struct xbc_node *hnode, char **bufp,
|
||||
char *end, const char *handler,
|
||||
const char *param)
|
||||
{
|
||||
struct xbc_node *knode, *anode;
|
||||
const char *p;
|
||||
char sep;
|
||||
|
||||
/* Compose 'handler' parameter */
|
||||
p = xbc_node_find_value(hnode, param, NULL);
|
||||
if (!p) {
|
||||
pr_err("hist.%s requires '%s' option.\n",
|
||||
xbc_node_get_data(hnode), param);
|
||||
return -EINVAL;
|
||||
}
|
||||
append_printf(bufp, end, ":%s(%s)", handler, p);
|
||||
|
||||
/* Compose 'action' parameter */
|
||||
knode = xbc_node_find_child(hnode, "trace");
|
||||
if (!knode)
|
||||
knode = xbc_node_find_child(hnode, "save");
|
||||
|
||||
if (knode) {
|
||||
anode = xbc_node_get_child(knode);
|
||||
if (!anode || !xbc_node_is_value(anode)) {
|
||||
pr_err("hist.%s.%s requires value(s).\n",
|
||||
xbc_node_get_data(hnode),
|
||||
xbc_node_get_data(knode));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
append_printf(bufp, end, ".%s", xbc_node_get_data(knode));
|
||||
sep = '(';
|
||||
xbc_array_for_each_value(anode, p) {
|
||||
append_printf(bufp, end, "%c%s", sep, p);
|
||||
if (sep == '(')
|
||||
sep = ',';
|
||||
}
|
||||
append_printf(bufp, end, ")");
|
||||
} else if (xbc_node_find_child(hnode, "snapshot")) {
|
||||
append_printf(bufp, end, ".snapshot()");
|
||||
} else {
|
||||
pr_err("hist.%s requires an action.\n",
|
||||
xbc_node_get_data(hnode));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init
|
||||
trace_boot_hist_add_handlers(struct xbc_node *hnode, char **bufp,
|
||||
char *end, const char *param)
|
||||
{
|
||||
struct xbc_node *node;
|
||||
const char *p, *handler;
|
||||
int ret;
|
||||
|
||||
handler = xbc_node_get_data(hnode);
|
||||
|
||||
xbc_node_for_each_subkey(hnode, node) {
|
||||
p = xbc_node_get_data(node);
|
||||
if (!isdigit(p[0]))
|
||||
continue;
|
||||
/* All digit started node should be instances. */
|
||||
ret = trace_boot_hist_add_one_handler(node, bufp, end, handler, param);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (xbc_node_find_child(hnode, param))
|
||||
ret = trace_boot_hist_add_one_handler(hnode, bufp, end, handler, param);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Histogram boottime tracing syntax.
|
||||
*
|
||||
* ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist[.N] {
|
||||
* keys = <KEY>[,...]
|
||||
* values = <VAL>[,...]
|
||||
* sort = <SORT-KEY>[,...]
|
||||
* size = <ENTRIES>
|
||||
* name = <HISTNAME>
|
||||
* var { <VAR> = <EXPR> ... }
|
||||
* pause|continue|clear
|
||||
* onmax|onchange[.N] { var = <VAR>; <ACTION> [= <PARAM>] }
|
||||
* onmatch[.N] { event = <EVENT>; <ACTION> [= <PARAM>] }
|
||||
* filter = <FILTER>
|
||||
* }
|
||||
*
|
||||
* Where <ACTION> are;
|
||||
*
|
||||
* trace = <EVENT>, <ARG1>[, ...]
|
||||
* save = <ARG1>[, ...]
|
||||
* snapshot
|
||||
*/
|
||||
static int __init
|
||||
trace_boot_compose_hist_cmd(struct xbc_node *hnode, char *buf, size_t size)
|
||||
{
|
||||
struct xbc_node *node, *knode;
|
||||
char *end = buf + size;
|
||||
const char *p;
|
||||
int ret = 0;
|
||||
|
||||
append_printf(&buf, end, "hist");
|
||||
|
||||
ret = trace_boot_hist_add_array(hnode, &buf, end, "keys");
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
pr_err("hist requires keys.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = trace_boot_hist_add_array(hnode, &buf, end, "values");
|
||||
if (ret == -EINVAL)
|
||||
return ret;
|
||||
ret = trace_boot_hist_add_array(hnode, &buf, end, "sort");
|
||||
if (ret == -EINVAL)
|
||||
return ret;
|
||||
|
||||
p = xbc_node_find_value(hnode, "size", NULL);
|
||||
if (p)
|
||||
append_printf(&buf, end, ":size=%s", p);
|
||||
|
||||
p = xbc_node_find_value(hnode, "name", NULL);
|
||||
if (p)
|
||||
append_printf(&buf, end, ":name=%s", p);
|
||||
|
||||
node = xbc_node_find_child(hnode, "var");
|
||||
if (node) {
|
||||
xbc_node_for_each_key_value(node, knode, p) {
|
||||
/* Expression must not include spaces. */
|
||||
append_printf(&buf, end, ":%s=",
|
||||
xbc_node_get_data(knode));
|
||||
append_str_nospace(&buf, end, p);
|
||||
}
|
||||
}
|
||||
|
||||
/* Histogram control attributes (mutual exclusive) */
|
||||
if (xbc_node_find_child(hnode, "pause"))
|
||||
append_printf(&buf, end, ":pause");
|
||||
else if (xbc_node_find_child(hnode, "continue"))
|
||||
append_printf(&buf, end, ":continue");
|
||||
else if (xbc_node_find_child(hnode, "clear"))
|
||||
append_printf(&buf, end, ":clear");
|
||||
|
||||
/* Histogram handler and actions */
|
||||
node = xbc_node_find_child(hnode, "onmax");
|
||||
if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0)
|
||||
return -EINVAL;
|
||||
node = xbc_node_find_child(hnode, "onchange");
|
||||
if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0)
|
||||
return -EINVAL;
|
||||
node = xbc_node_find_child(hnode, "onmatch");
|
||||
if (node && trace_boot_hist_add_handlers(node, &buf, end, "event") < 0)
|
||||
return -EINVAL;
|
||||
|
||||
p = xbc_node_find_value(hnode, "filter", NULL);
|
||||
if (p)
|
||||
append_printf(&buf, end, " if %s", p);
|
||||
|
||||
if (buf == end) {
|
||||
pr_err("hist exceeds the max command length.\n");
|
||||
return -E2BIG;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __init
|
||||
trace_boot_init_histograms(struct trace_event_file *file,
|
||||
struct xbc_node *hnode, char *buf, size_t size)
|
||||
{
|
||||
struct xbc_node *node;
|
||||
const char *p;
|
||||
char *tmp;
|
||||
|
||||
xbc_node_for_each_subkey(hnode, node) {
|
||||
p = xbc_node_get_data(node);
|
||||
if (!isdigit(p[0]))
|
||||
continue;
|
||||
/* All digit started node should be instances. */
|
||||
if (trace_boot_compose_hist_cmd(node, buf, size) == 0) {
|
||||
tmp = kstrdup(buf, GFP_KERNEL);
|
||||
if (trigger_process_regex(file, buf) < 0)
|
||||
pr_err("Failed to apply hist trigger: %s\n", tmp);
|
||||
kfree(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
if (xbc_node_find_child(hnode, "keys")) {
|
||||
if (trace_boot_compose_hist_cmd(hnode, buf, size) == 0) {
|
||||
tmp = kstrdup(buf, GFP_KERNEL);
|
||||
if (trigger_process_regex(file, buf) < 0)
|
||||
pr_err("Failed to apply hist trigger: %s\n", tmp);
|
||||
kfree(tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void __init
|
||||
trace_boot_init_histograms(struct trace_event_file *file,
|
||||
struct xbc_node *hnode, char *buf, size_t size)
|
||||
{
|
||||
/* do nothing */
|
||||
}
|
||||
#endif
|
||||
|
||||
static void __init
|
||||
trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode,
|
||||
struct xbc_node *enode)
|
||||
@@ -205,12 +489,18 @@ trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode,
|
||||
pr_err("Failed to apply filter: %s\n", buf);
|
||||
}
|
||||
|
||||
xbc_node_for_each_array_value(enode, "actions", anode, p) {
|
||||
if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
|
||||
pr_err("action string is too long: %s\n", p);
|
||||
else if (trigger_process_regex(file, buf) < 0)
|
||||
pr_err("Failed to apply an action: %s\n", buf);
|
||||
}
|
||||
if (IS_ENABLED(CONFIG_HIST_TRIGGERS)) {
|
||||
xbc_node_for_each_array_value(enode, "actions", anode, p) {
|
||||
if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
|
||||
pr_err("action string is too long: %s\n", p);
|
||||
else if (trigger_process_regex(file, buf) < 0)
|
||||
pr_err("Failed to apply an action: %s\n", p);
|
||||
}
|
||||
anode = xbc_node_find_child(enode, "hist");
|
||||
if (anode)
|
||||
trace_boot_init_histograms(file, anode, buf, ARRAY_SIZE(buf));
|
||||
} else if (xbc_node_find_value(enode, "actions", NULL))
|
||||
pr_err("Failed to apply event actions because CONFIG_HIST_TRIGGERS is not set.\n");
|
||||
|
||||
if (xbc_node_find_value(enode, "enable", NULL)) {
|
||||
if (trace_event_enable_disable(file, 1, 0) < 0)
|
||||
|
||||
@@ -13,11 +13,49 @@
|
||||
#include <linux/tracefs.h>
|
||||
|
||||
#include "trace.h"
|
||||
#include "trace_output.h" /* for trace_event_sem */
|
||||
#include "trace_dynevent.h"
|
||||
|
||||
static DEFINE_MUTEX(dyn_event_ops_mutex);
|
||||
static LIST_HEAD(dyn_event_ops_list);
|
||||
|
||||
bool trace_event_dyn_try_get_ref(struct trace_event_call *dyn_call)
|
||||
{
|
||||
struct trace_event_call *call;
|
||||
bool ret = false;
|
||||
|
||||
if (WARN_ON_ONCE(!(dyn_call->flags & TRACE_EVENT_FL_DYNAMIC)))
|
||||
return false;
|
||||
|
||||
down_read(&trace_event_sem);
|
||||
list_for_each_entry(call, &ftrace_events, list) {
|
||||
if (call == dyn_call) {
|
||||
atomic_inc(&dyn_call->refcnt);
|
||||
ret = true;
|
||||
}
|
||||
}
|
||||
up_read(&trace_event_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void trace_event_dyn_put_ref(struct trace_event_call *call)
|
||||
{
|
||||
if (WARN_ON_ONCE(!(call->flags & TRACE_EVENT_FL_DYNAMIC)))
|
||||
return;
|
||||
|
||||
if (WARN_ON_ONCE(atomic_read(&call->refcnt) <= 0)) {
|
||||
atomic_set(&call->refcnt, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
atomic_dec(&call->refcnt);
|
||||
}
|
||||
|
||||
bool trace_event_dyn_busy(struct trace_event_call *call)
|
||||
{
|
||||
return atomic_read(&call->refcnt) != 0;
|
||||
}
|
||||
|
||||
int dyn_event_register(struct dyn_event_operations *ops)
|
||||
{
|
||||
if (!ops || !ops->create || !ops->show || !ops->is_busy ||
|
||||
|
||||
@@ -76,13 +76,15 @@ int dyn_event_init(struct dyn_event *ev, struct dyn_event_operations *ops)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int dyn_event_add(struct dyn_event *ev)
|
||||
static inline int dyn_event_add(struct dyn_event *ev,
|
||||
struct trace_event_call *call)
|
||||
{
|
||||
lockdep_assert_held(&event_mutex);
|
||||
|
||||
if (!ev || !ev->ops)
|
||||
return -EINVAL;
|
||||
|
||||
call->flags |= TRACE_EVENT_FL_DYNAMIC;
|
||||
list_add_tail(&ev->list, &dyn_event_list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
903
kernel/trace/trace_eprobe.c
Normal file
903
kernel/trace/trace_eprobe.c
Normal file
@@ -0,0 +1,903 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* event probes
|
||||
*
|
||||
* Part of this code was copied from kernel/trace/trace_kprobe.c written by
|
||||
* Masami Hiramatsu <mhiramat@kernel.org>
|
||||
*
|
||||
* Copyright (C) 2021, VMware Inc, Steven Rostedt <rostedt@goodmis.org>
|
||||
* Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
|
||||
*
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/ftrace.h>
|
||||
|
||||
#include "trace_dynevent.h"
|
||||
#include "trace_probe.h"
|
||||
#include "trace_probe_tmpl.h"
|
||||
|
||||
#define EPROBE_EVENT_SYSTEM "eprobes"
|
||||
|
||||
struct trace_eprobe {
|
||||
/* tracepoint system */
|
||||
const char *event_system;
|
||||
|
||||
/* tracepoint event */
|
||||
const char *event_name;
|
||||
|
||||
struct trace_event_call *event;
|
||||
|
||||
struct dyn_event devent;
|
||||
struct trace_probe tp;
|
||||
};
|
||||
|
||||
struct eprobe_data {
|
||||
struct trace_event_file *file;
|
||||
struct trace_eprobe *ep;
|
||||
};
|
||||
|
||||
static int __trace_eprobe_create(int argc, const char *argv[]);
|
||||
|
||||
static void trace_event_probe_cleanup(struct trace_eprobe *ep)
|
||||
{
|
||||
if (!ep)
|
||||
return;
|
||||
trace_probe_cleanup(&ep->tp);
|
||||
kfree(ep->event_name);
|
||||
kfree(ep->event_system);
|
||||
if (ep->event)
|
||||
trace_event_put_ref(ep->event);
|
||||
kfree(ep);
|
||||
}
|
||||
|
||||
static struct trace_eprobe *to_trace_eprobe(struct dyn_event *ev)
|
||||
{
|
||||
return container_of(ev, struct trace_eprobe, devent);
|
||||
}
|
||||
|
||||
static int eprobe_dyn_event_create(const char *raw_command)
|
||||
{
|
||||
return trace_probe_create(raw_command, __trace_eprobe_create);
|
||||
}
|
||||
|
||||
static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev)
|
||||
{
|
||||
struct trace_eprobe *ep = to_trace_eprobe(ev);
|
||||
int i;
|
||||
|
||||
seq_printf(m, "e:%s/%s", trace_probe_group_name(&ep->tp),
|
||||
trace_probe_name(&ep->tp));
|
||||
seq_printf(m, " %s.%s", ep->event_system, ep->event_name);
|
||||
|
||||
for (i = 0; i < ep->tp.nr_args; i++)
|
||||
seq_printf(m, " %s=%s", ep->tp.args[i].name, ep->tp.args[i].comm);
|
||||
seq_putc(m, '\n');
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int unregister_trace_eprobe(struct trace_eprobe *ep)
|
||||
{
|
||||
/* If other probes are on the event, just unregister eprobe */
|
||||
if (trace_probe_has_sibling(&ep->tp))
|
||||
goto unreg;
|
||||
|
||||
/* Enabled event can not be unregistered */
|
||||
if (trace_probe_is_enabled(&ep->tp))
|
||||
return -EBUSY;
|
||||
|
||||
/* Will fail if probe is being used by ftrace or perf */
|
||||
if (trace_probe_unregister_event_call(&ep->tp))
|
||||
return -EBUSY;
|
||||
|
||||
unreg:
|
||||
dyn_event_remove(&ep->devent);
|
||||
trace_probe_unlink(&ep->tp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int eprobe_dyn_event_release(struct dyn_event *ev)
|
||||
{
|
||||
struct trace_eprobe *ep = to_trace_eprobe(ev);
|
||||
int ret = unregister_trace_eprobe(ep);
|
||||
|
||||
if (!ret)
|
||||
trace_event_probe_cleanup(ep);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool eprobe_dyn_event_is_busy(struct dyn_event *ev)
|
||||
{
|
||||
struct trace_eprobe *ep = to_trace_eprobe(ev);
|
||||
|
||||
return trace_probe_is_enabled(&ep->tp);
|
||||
}
|
||||
|
||||
static bool eprobe_dyn_event_match(const char *system, const char *event,
|
||||
int argc, const char **argv, struct dyn_event *ev)
|
||||
{
|
||||
struct trace_eprobe *ep = to_trace_eprobe(ev);
|
||||
|
||||
return strcmp(trace_probe_name(&ep->tp), event) == 0 &&
|
||||
(!system || strcmp(trace_probe_group_name(&ep->tp), system) == 0) &&
|
||||
trace_probe_match_command_args(&ep->tp, argc, argv);
|
||||
}
|
||||
|
||||
static struct dyn_event_operations eprobe_dyn_event_ops = {
|
||||
.create = eprobe_dyn_event_create,
|
||||
.show = eprobe_dyn_event_show,
|
||||
.is_busy = eprobe_dyn_event_is_busy,
|
||||
.free = eprobe_dyn_event_release,
|
||||
.match = eprobe_dyn_event_match,
|
||||
};
|
||||
|
||||
static struct trace_eprobe *alloc_event_probe(const char *group,
|
||||
const char *this_event,
|
||||
struct trace_event_call *event,
|
||||
int nargs)
|
||||
{
|
||||
struct trace_eprobe *ep;
|
||||
const char *event_name;
|
||||
const char *sys_name;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
if (!event)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
sys_name = event->class->system;
|
||||
event_name = trace_event_name(event);
|
||||
|
||||
ep = kzalloc(struct_size(ep, tp.args, nargs), GFP_KERNEL);
|
||||
if (!ep) {
|
||||
trace_event_put_ref(ep->event);
|
||||
goto error;
|
||||
}
|
||||
ep->event = event;
|
||||
ep->event_name = kstrdup(event_name, GFP_KERNEL);
|
||||
if (!ep->event_name)
|
||||
goto error;
|
||||
ep->event_system = kstrdup(sys_name, GFP_KERNEL);
|
||||
if (!ep->event_system)
|
||||
goto error;
|
||||
|
||||
ret = trace_probe_init(&ep->tp, this_event, group, false);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
dyn_event_init(&ep->devent, &eprobe_dyn_event_ops);
|
||||
return ep;
|
||||
error:
|
||||
trace_event_probe_cleanup(ep);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
|
||||
{
|
||||
struct probe_arg *parg = &ep->tp.args[i];
|
||||
struct ftrace_event_field *field;
|
||||
struct list_head *head;
|
||||
|
||||
head = trace_get_fields(ep->event);
|
||||
list_for_each_entry(field, head, link) {
|
||||
if (!strcmp(parg->code->data, field->name)) {
|
||||
kfree(parg->code->data);
|
||||
parg->code->data = field;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
kfree(parg->code->data);
|
||||
parg->code->data = NULL;
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static int eprobe_event_define_fields(struct trace_event_call *event_call)
|
||||
{
|
||||
int ret;
|
||||
struct eprobe_trace_entry_head field;
|
||||
struct trace_probe *tp;
|
||||
|
||||
tp = trace_probe_primary_from_call(event_call);
|
||||
if (WARN_ON_ONCE(!tp))
|
||||
return -ENOENT;
|
||||
|
||||
DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0);
|
||||
|
||||
return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
|
||||
}
|
||||
|
||||
static struct trace_event_fields eprobe_fields_array[] = {
|
||||
{ .type = TRACE_FUNCTION_TYPE,
|
||||
.define_fields = eprobe_event_define_fields },
|
||||
{}
|
||||
};
|
||||
|
||||
/* Event entry printers */
|
||||
static enum print_line_t
|
||||
print_eprobe_event(struct trace_iterator *iter, int flags,
|
||||
struct trace_event *event)
|
||||
{
|
||||
struct eprobe_trace_entry_head *field;
|
||||
struct trace_event_call *pevent;
|
||||
struct trace_event *probed_event;
|
||||
struct trace_seq *s = &iter->seq;
|
||||
struct trace_probe *tp;
|
||||
|
||||
field = (struct eprobe_trace_entry_head *)iter->ent;
|
||||
tp = trace_probe_primary_from_call(
|
||||
container_of(event, struct trace_event_call, event));
|
||||
if (WARN_ON_ONCE(!tp))
|
||||
goto out;
|
||||
|
||||
trace_seq_printf(s, "%s: (", trace_probe_name(tp));
|
||||
|
||||
probed_event = ftrace_find_event(field->type);
|
||||
if (probed_event) {
|
||||
pevent = container_of(probed_event, struct trace_event_call, event);
|
||||
trace_seq_printf(s, "%s.%s", pevent->class->system,
|
||||
trace_event_name(pevent));
|
||||
} else {
|
||||
trace_seq_printf(s, "%u", field->type);
|
||||
}
|
||||
|
||||
trace_seq_putc(s, ')');
|
||||
|
||||
if (print_probe_args(s, tp->args, tp->nr_args,
|
||||
(u8 *)&field[1], field) < 0)
|
||||
goto out;
|
||||
|
||||
trace_seq_putc(s, '\n');
|
||||
out:
|
||||
return trace_handle_return(s);
|
||||
}
|
||||
|
||||
static unsigned long get_event_field(struct fetch_insn *code, void *rec)
|
||||
{
|
||||
struct ftrace_event_field *field = code->data;
|
||||
unsigned long val;
|
||||
void *addr;
|
||||
|
||||
addr = rec + field->offset;
|
||||
|
||||
switch (field->size) {
|
||||
case 1:
|
||||
if (field->is_signed)
|
||||
val = *(char *)addr;
|
||||
else
|
||||
val = *(unsigned char *)addr;
|
||||
break;
|
||||
case 2:
|
||||
if (field->is_signed)
|
||||
val = *(short *)addr;
|
||||
else
|
||||
val = *(unsigned short *)addr;
|
||||
break;
|
||||
case 4:
|
||||
if (field->is_signed)
|
||||
val = *(int *)addr;
|
||||
else
|
||||
val = *(unsigned int *)addr;
|
||||
break;
|
||||
default:
|
||||
if (field->is_signed)
|
||||
val = *(long *)addr;
|
||||
else
|
||||
val = *(unsigned long *)addr;
|
||||
break;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
static int get_eprobe_size(struct trace_probe *tp, void *rec)
|
||||
{
|
||||
struct probe_arg *arg;
|
||||
int i, len, ret = 0;
|
||||
|
||||
for (i = 0; i < tp->nr_args; i++) {
|
||||
arg = tp->args + i;
|
||||
if (unlikely(arg->dynamic)) {
|
||||
unsigned long val;
|
||||
|
||||
val = get_event_field(arg->code, rec);
|
||||
len = process_fetch_insn_bottom(arg->code + 1, val, NULL, NULL);
|
||||
if (len > 0)
|
||||
ret += len;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Kprobe specific fetch functions */
|
||||
|
||||
/* Note that we don't verify it, since the code does not come from user space */
|
||||
static int
|
||||
process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
|
||||
void *base)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
val = get_event_field(code, rec);
|
||||
return process_fetch_insn_bottom(code + 1, val, dest, base);
|
||||
}
|
||||
NOKPROBE_SYMBOL(process_fetch_insn)
|
||||
|
||||
/* Return the length of string -- including null terminal byte */
|
||||
static nokprobe_inline int
|
||||
fetch_store_strlen_user(unsigned long addr)
|
||||
{
|
||||
const void __user *uaddr = (__force const void __user *)addr;
|
||||
|
||||
return strnlen_user_nofault(uaddr, MAX_STRING_SIZE);
|
||||
}
|
||||
|
||||
/* Return the length of string -- including null terminal byte */
|
||||
static nokprobe_inline int
|
||||
fetch_store_strlen(unsigned long addr)
|
||||
{
|
||||
int ret, len = 0;
|
||||
u8 c;
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
|
||||
if (addr < TASK_SIZE)
|
||||
return fetch_store_strlen_user(addr);
|
||||
#endif
|
||||
|
||||
do {
|
||||
ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1);
|
||||
len++;
|
||||
} while (c && ret == 0 && len < MAX_STRING_SIZE);
|
||||
|
||||
return (ret < 0) ? ret : len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
|
||||
* with max length and relative data location.
|
||||
*/
|
||||
static nokprobe_inline int
|
||||
fetch_store_string_user(unsigned long addr, void *dest, void *base)
|
||||
{
|
||||
const void __user *uaddr = (__force const void __user *)addr;
|
||||
int maxlen = get_loc_len(*(u32 *)dest);
|
||||
void *__dest;
|
||||
long ret;
|
||||
|
||||
if (unlikely(!maxlen))
|
||||
return -ENOMEM;
|
||||
|
||||
__dest = get_loc_data(dest, base);
|
||||
|
||||
ret = strncpy_from_user_nofault(__dest, uaddr, maxlen);
|
||||
if (ret >= 0)
|
||||
*(u32 *)dest = make_data_loc(ret, __dest - base);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
|
||||
* length and relative data location.
|
||||
*/
|
||||
static nokprobe_inline int
|
||||
fetch_store_string(unsigned long addr, void *dest, void *base)
|
||||
{
|
||||
int maxlen = get_loc_len(*(u32 *)dest);
|
||||
void *__dest;
|
||||
long ret;
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
|
||||
if ((unsigned long)addr < TASK_SIZE)
|
||||
return fetch_store_string_user(addr, dest, base);
|
||||
#endif
|
||||
|
||||
if (unlikely(!maxlen))
|
||||
return -ENOMEM;
|
||||
|
||||
__dest = get_loc_data(dest, base);
|
||||
|
||||
/*
|
||||
* Try to get string again, since the string can be changed while
|
||||
* probing.
|
||||
*/
|
||||
ret = strncpy_from_kernel_nofault(__dest, (void *)addr, maxlen);
|
||||
if (ret >= 0)
|
||||
*(u32 *)dest = make_data_loc(ret, __dest - base);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static nokprobe_inline int
|
||||
probe_mem_read_user(void *dest, void *src, size_t size)
|
||||
{
|
||||
const void __user *uaddr = (__force const void __user *)src;
|
||||
|
||||
return copy_from_user_nofault(dest, uaddr, size);
|
||||
}
|
||||
|
||||
static nokprobe_inline int
|
||||
probe_mem_read(void *dest, void *src, size_t size)
|
||||
{
|
||||
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
|
||||
if ((unsigned long)src < TASK_SIZE)
|
||||
return probe_mem_read_user(dest, src, size);
|
||||
#endif
|
||||
return copy_from_kernel_nofault(dest, src, size);
|
||||
}
|
||||
|
||||
/* eprobe handler */
|
||||
static inline void
|
||||
__eprobe_trace_func(struct eprobe_data *edata, void *rec)
|
||||
{
|
||||
struct eprobe_trace_entry_head *entry;
|
||||
struct trace_event_call *call = trace_probe_event_call(&edata->ep->tp);
|
||||
struct trace_event_buffer fbuffer;
|
||||
int dsize;
|
||||
|
||||
if (WARN_ON_ONCE(call != edata->file->event_call))
|
||||
return;
|
||||
|
||||
if (trace_trigger_soft_disabled(edata->file))
|
||||
return;
|
||||
|
||||
fbuffer.trace_ctx = tracing_gen_ctx();
|
||||
fbuffer.trace_file = edata->file;
|
||||
|
||||
dsize = get_eprobe_size(&edata->ep->tp, rec);
|
||||
fbuffer.regs = NULL;
|
||||
|
||||
fbuffer.event =
|
||||
trace_event_buffer_lock_reserve(&fbuffer.buffer, edata->file,
|
||||
call->event.type,
|
||||
sizeof(*entry) + edata->ep->tp.size + dsize,
|
||||
fbuffer.trace_ctx);
|
||||
if (!fbuffer.event)
|
||||
return;
|
||||
|
||||
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
|
||||
if (edata->ep->event)
|
||||
entry->type = edata->ep->event->event.type;
|
||||
else
|
||||
entry->type = 0;
|
||||
store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
|
||||
|
||||
trace_event_buffer_commit(&fbuffer);
|
||||
}
|
||||
|
||||
/*
|
||||
* The event probe implementation uses event triggers to get access to
|
||||
* the event it is attached to, but is not an actual trigger. The below
|
||||
* functions are just stubs to fulfill what is needed to use the trigger
|
||||
* infrastructure.
|
||||
*/
|
||||
static int eprobe_trigger_init(struct event_trigger_ops *ops,
|
||||
struct event_trigger_data *data)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void eprobe_trigger_free(struct event_trigger_ops *ops,
|
||||
struct event_trigger_data *data)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static int eprobe_trigger_print(struct seq_file *m,
|
||||
struct event_trigger_ops *ops,
|
||||
struct event_trigger_data *data)
|
||||
{
|
||||
/* Do not print eprobe event triggers */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void eprobe_trigger_func(struct event_trigger_data *data,
|
||||
struct trace_buffer *buffer, void *rec,
|
||||
struct ring_buffer_event *rbe)
|
||||
{
|
||||
struct eprobe_data *edata = data->private_data;
|
||||
|
||||
__eprobe_trace_func(edata, rec);
|
||||
}
|
||||
|
||||
static struct event_trigger_ops eprobe_trigger_ops = {
|
||||
.func = eprobe_trigger_func,
|
||||
.print = eprobe_trigger_print,
|
||||
.init = eprobe_trigger_init,
|
||||
.free = eprobe_trigger_free,
|
||||
};
|
||||
|
||||
static int eprobe_trigger_cmd_func(struct event_command *cmd_ops,
|
||||
struct trace_event_file *file,
|
||||
char *glob, char *cmd, char *param)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int eprobe_trigger_reg_func(char *glob, struct event_trigger_ops *ops,
|
||||
struct event_trigger_data *data,
|
||||
struct trace_event_file *file)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void eprobe_trigger_unreg_func(char *glob, struct event_trigger_ops *ops,
|
||||
struct event_trigger_data *data,
|
||||
struct trace_event_file *file)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd,
|
||||
char *param)
|
||||
{
|
||||
return &eprobe_trigger_ops;
|
||||
}
|
||||
|
||||
static struct event_command event_trigger_cmd = {
|
||||
.name = "eprobe",
|
||||
.trigger_type = ETT_EVENT_EPROBE,
|
||||
.flags = EVENT_CMD_FL_NEEDS_REC,
|
||||
.func = eprobe_trigger_cmd_func,
|
||||
.reg = eprobe_trigger_reg_func,
|
||||
.unreg = eprobe_trigger_unreg_func,
|
||||
.unreg_all = NULL,
|
||||
.get_trigger_ops = eprobe_trigger_get_ops,
|
||||
.set_filter = NULL,
|
||||
};
|
||||
|
||||
static struct event_trigger_data *
|
||||
new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
|
||||
{
|
||||
struct event_trigger_data *trigger;
|
||||
struct eprobe_data *edata;
|
||||
|
||||
edata = kzalloc(sizeof(*edata), GFP_KERNEL);
|
||||
trigger = kzalloc(sizeof(*trigger), GFP_KERNEL);
|
||||
if (!trigger || !edata) {
|
||||
kfree(edata);
|
||||
kfree(trigger);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
trigger->flags = EVENT_TRIGGER_FL_PROBE;
|
||||
trigger->count = -1;
|
||||
trigger->ops = &eprobe_trigger_ops;
|
||||
|
||||
/*
|
||||
* EVENT PROBE triggers are not registered as commands with
|
||||
* register_event_command(), as they are not controlled by the user
|
||||
* from the trigger file
|
||||
*/
|
||||
trigger->cmd_ops = &event_trigger_cmd;
|
||||
|
||||
INIT_LIST_HEAD(&trigger->list);
|
||||
RCU_INIT_POINTER(trigger->filter, NULL);
|
||||
|
||||
edata->file = file;
|
||||
edata->ep = ep;
|
||||
trigger->private_data = edata;
|
||||
|
||||
return trigger;
|
||||
}
|
||||
|
||||
static int enable_eprobe(struct trace_eprobe *ep,
|
||||
struct trace_event_file *eprobe_file)
|
||||
{
|
||||
struct event_trigger_data *trigger;
|
||||
struct trace_event_file *file;
|
||||
struct trace_array *tr = eprobe_file->tr;
|
||||
|
||||
file = find_event_file(tr, ep->event_system, ep->event_name);
|
||||
if (!file)
|
||||
return -ENOENT;
|
||||
trigger = new_eprobe_trigger(ep, eprobe_file);
|
||||
if (IS_ERR(trigger))
|
||||
return PTR_ERR(trigger);
|
||||
|
||||
list_add_tail_rcu(&trigger->list, &file->triggers);
|
||||
|
||||
trace_event_trigger_enable_disable(file, 1);
|
||||
update_cond_flag(file);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct trace_event_functions eprobe_funcs = {
|
||||
.trace = print_eprobe_event
|
||||
};
|
||||
|
||||
static int disable_eprobe(struct trace_eprobe *ep,
|
||||
struct trace_array *tr)
|
||||
{
|
||||
struct event_trigger_data *trigger;
|
||||
struct trace_event_file *file;
|
||||
struct eprobe_data *edata;
|
||||
|
||||
file = find_event_file(tr, ep->event_system, ep->event_name);
|
||||
if (!file)
|
||||
return -ENOENT;
|
||||
|
||||
list_for_each_entry(trigger, &file->triggers, list) {
|
||||
if (!(trigger->flags & EVENT_TRIGGER_FL_PROBE))
|
||||
continue;
|
||||
edata = trigger->private_data;
|
||||
if (edata->ep == ep)
|
||||
break;
|
||||
}
|
||||
if (list_entry_is_head(trigger, &file->triggers, list))
|
||||
return -ENODEV;
|
||||
|
||||
list_del_rcu(&trigger->list);
|
||||
|
||||
trace_event_trigger_enable_disable(file, 0);
|
||||
update_cond_flag(file);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int enable_trace_eprobe(struct trace_event_call *call,
|
||||
struct trace_event_file *file)
|
||||
{
|
||||
struct trace_probe *pos, *tp;
|
||||
struct trace_eprobe *ep;
|
||||
bool enabled;
|
||||
int ret = 0;
|
||||
|
||||
tp = trace_probe_primary_from_call(call);
|
||||
if (WARN_ON_ONCE(!tp))
|
||||
return -ENODEV;
|
||||
enabled = trace_probe_is_enabled(tp);
|
||||
|
||||
/* This also changes "enabled" state */
|
||||
if (file) {
|
||||
ret = trace_probe_add_file(tp, file);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else
|
||||
trace_probe_set_flag(tp, TP_FLAG_PROFILE);
|
||||
|
||||
if (enabled)
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
|
||||
ep = container_of(pos, struct trace_eprobe, tp);
|
||||
ret = enable_eprobe(ep, file);
|
||||
if (ret)
|
||||
break;
|
||||
enabled = true;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
/* Failed to enable one of them. Roll back all */
|
||||
if (enabled)
|
||||
disable_eprobe(ep, file->tr);
|
||||
if (file)
|
||||
trace_probe_remove_file(tp, file);
|
||||
else
|
||||
trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int disable_trace_eprobe(struct trace_event_call *call,
|
||||
struct trace_event_file *file)
|
||||
{
|
||||
struct trace_probe *pos, *tp;
|
||||
struct trace_eprobe *ep;
|
||||
|
||||
tp = trace_probe_primary_from_call(call);
|
||||
if (WARN_ON_ONCE(!tp))
|
||||
return -ENODEV;
|
||||
|
||||
if (file) {
|
||||
if (!trace_probe_get_file_link(tp, file))
|
||||
return -ENOENT;
|
||||
if (!trace_probe_has_single_file(tp))
|
||||
goto out;
|
||||
trace_probe_clear_flag(tp, TP_FLAG_TRACE);
|
||||
} else
|
||||
trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
|
||||
|
||||
if (!trace_probe_is_enabled(tp)) {
|
||||
list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
|
||||
ep = container_of(pos, struct trace_eprobe, tp);
|
||||
disable_eprobe(ep, file->tr);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (file)
|
||||
/*
|
||||
* Synchronization is done in below function. For perf event,
|
||||
* file == NULL and perf_trace_event_unreg() calls
|
||||
* tracepoint_synchronize_unregister() to ensure synchronize
|
||||
* event. We don't need to care about it.
|
||||
*/
|
||||
trace_probe_remove_file(tp, file);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int eprobe_register(struct trace_event_call *event,
|
||||
enum trace_reg type, void *data)
|
||||
{
|
||||
struct trace_event_file *file = data;
|
||||
|
||||
switch (type) {
|
||||
case TRACE_REG_REGISTER:
|
||||
return enable_trace_eprobe(event, file);
|
||||
case TRACE_REG_UNREGISTER:
|
||||
return disable_trace_eprobe(event, file);
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
case TRACE_REG_PERF_REGISTER:
|
||||
case TRACE_REG_PERF_UNREGISTER:
|
||||
case TRACE_REG_PERF_OPEN:
|
||||
case TRACE_REG_PERF_CLOSE:
|
||||
case TRACE_REG_PERF_ADD:
|
||||
case TRACE_REG_PERF_DEL:
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void init_trace_eprobe_call(struct trace_eprobe *ep)
|
||||
{
|
||||
struct trace_event_call *call = trace_probe_event_call(&ep->tp);
|
||||
|
||||
call->flags = TRACE_EVENT_FL_EPROBE;
|
||||
call->event.funcs = &eprobe_funcs;
|
||||
call->class->fields_array = eprobe_fields_array;
|
||||
call->class->reg = eprobe_register;
|
||||
}
|
||||
|
||||
static struct trace_event_call *
|
||||
find_and_get_event(const char *system, const char *event_name)
|
||||
{
|
||||
struct trace_event_call *tp_event;
|
||||
const char *name;
|
||||
|
||||
list_for_each_entry(tp_event, &ftrace_events, list) {
|
||||
/* Skip other probes and ftrace events */
|
||||
if (tp_event->flags &
|
||||
(TRACE_EVENT_FL_IGNORE_ENABLE |
|
||||
TRACE_EVENT_FL_KPROBE |
|
||||
TRACE_EVENT_FL_UPROBE |
|
||||
TRACE_EVENT_FL_EPROBE))
|
||||
continue;
|
||||
if (!tp_event->class->system ||
|
||||
strcmp(system, tp_event->class->system))
|
||||
continue;
|
||||
name = trace_event_name(tp_event);
|
||||
if (!name || strcmp(event_name, name))
|
||||
continue;
|
||||
if (!trace_event_try_get_ref(tp_event)) {
|
||||
return NULL;
|
||||
break;
|
||||
}
|
||||
return tp_event;
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
|
||||
{
|
||||
unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TPOINT;
|
||||
int ret;
|
||||
|
||||
ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG)
|
||||
ret = trace_eprobe_tp_arg_update(ep, i);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __trace_eprobe_create(int argc, const char *argv[])
|
||||
{
|
||||
/*
|
||||
* Argument syntax:
|
||||
* e[:[GRP/]ENAME] SYSTEM.EVENT [FETCHARGS]
|
||||
* Fetch args:
|
||||
* <name>=$<field>[:TYPE]
|
||||
*/
|
||||
const char *event = NULL, *group = EPROBE_EVENT_SYSTEM;
|
||||
const char *sys_event = NULL, *sys_name = NULL;
|
||||
struct trace_event_call *event_call;
|
||||
struct trace_eprobe *ep = NULL;
|
||||
char buf1[MAX_EVENT_NAME_LEN];
|
||||
char buf2[MAX_EVENT_NAME_LEN];
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
if (argc < 2 || argv[0][0] != 'e')
|
||||
return -ECANCELED;
|
||||
|
||||
trace_probe_log_init("event_probe", argc, argv);
|
||||
|
||||
event = strchr(&argv[0][1], ':');
|
||||
if (event) {
|
||||
event++;
|
||||
ret = traceprobe_parse_event_name(&event, &group, buf1,
|
||||
event - argv[0]);
|
||||
if (ret)
|
||||
goto parse_error;
|
||||
} else {
|
||||
strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN);
|
||||
sanitize_event_name(buf1);
|
||||
event = buf1;
|
||||
}
|
||||
if (!is_good_name(event) || !is_good_name(group))
|
||||
goto parse_error;
|
||||
|
||||
sys_event = argv[1];
|
||||
ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2,
|
||||
sys_event - argv[1]);
|
||||
if (ret || !sys_name)
|
||||
goto parse_error;
|
||||
if (!is_good_name(sys_event) || !is_good_name(sys_name))
|
||||
goto parse_error;
|
||||
|
||||
mutex_lock(&event_mutex);
|
||||
event_call = find_and_get_event(sys_name, sys_event);
|
||||
ep = alloc_event_probe(group, event, event_call, argc - 2);
|
||||
mutex_unlock(&event_mutex);
|
||||
|
||||
if (IS_ERR(ep)) {
|
||||
ret = PTR_ERR(ep);
|
||||
/* This must return -ENOMEM, else there is a bug */
|
||||
WARN_ON_ONCE(ret != -ENOMEM);
|
||||
goto error; /* We know ep is not allocated */
|
||||
}
|
||||
|
||||
argc -= 2; argv += 2;
|
||||
/* parse arguments */
|
||||
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
|
||||
trace_probe_log_set_index(i + 2);
|
||||
ret = trace_eprobe_tp_update_arg(ep, argv, i);
|
||||
if (ret)
|
||||
goto error;
|
||||
}
|
||||
ret = traceprobe_set_print_fmt(&ep->tp, PROBE_PRINT_EVENT);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
init_trace_eprobe_call(ep);
|
||||
mutex_lock(&event_mutex);
|
||||
ret = trace_probe_register_event_call(&ep->tp);
|
||||
if (ret) {
|
||||
if (ret == -EEXIST) {
|
||||
trace_probe_log_set_index(0);
|
||||
trace_probe_log_err(0, EVENT_EXIST);
|
||||
}
|
||||
mutex_unlock(&event_mutex);
|
||||
goto error;
|
||||
}
|
||||
ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
|
||||
mutex_unlock(&event_mutex);
|
||||
return ret;
|
||||
parse_error:
|
||||
ret = -EINVAL;
|
||||
error:
|
||||
trace_event_probe_cleanup(ep);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Register dynevent at core_initcall. This allows kernel to setup eprobe
|
||||
* events in postcore_initcall without tracefs.
|
||||
*/
|
||||
static __init int trace_events_eprobe_init_early(void)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
err = dyn_event_register(&eprobe_dyn_event_ops);
|
||||
if (err)
|
||||
pr_warn("Could not register eprobe_dyn_event_ops\n");
|
||||
|
||||
return err;
|
||||
}
|
||||
core_initcall(trace_events_eprobe_init_early);
|
||||
@@ -177,7 +177,7 @@ static void perf_trace_event_unreg(struct perf_event *p_event)
|
||||
}
|
||||
}
|
||||
out:
|
||||
module_put(tp_event->mod);
|
||||
trace_event_put_ref(tp_event);
|
||||
}
|
||||
|
||||
static int perf_trace_event_open(struct perf_event *p_event)
|
||||
@@ -224,10 +224,10 @@ int perf_trace_init(struct perf_event *p_event)
|
||||
list_for_each_entry(tp_event, &ftrace_events, list) {
|
||||
if (tp_event->event.type == event_id &&
|
||||
tp_event->class && tp_event->class->reg &&
|
||||
try_module_get(tp_event->mod)) {
|
||||
trace_event_try_get_ref(tp_event)) {
|
||||
ret = perf_trace_event_init(tp_event, p_event);
|
||||
if (ret)
|
||||
module_put(tp_event->mod);
|
||||
trace_event_put_ref(tp_event);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2525,7 +2525,10 @@ __register_event(struct trace_event_call *call, struct module *mod)
|
||||
return ret;
|
||||
|
||||
list_add(&call->list, &ftrace_events);
|
||||
call->mod = mod;
|
||||
if (call->flags & TRACE_EVENT_FL_DYNAMIC)
|
||||
atomic_set(&call->refcnt, 0);
|
||||
else
|
||||
call->module = mod;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2839,7 +2842,9 @@ static void trace_module_remove_events(struct module *mod)
|
||||
|
||||
down_write(&trace_event_sem);
|
||||
list_for_each_entry_safe(call, p, &ftrace_events, list) {
|
||||
if (call->mod == mod)
|
||||
if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module)
|
||||
continue;
|
||||
if (call->module == mod)
|
||||
__trace_remove_event_call(call);
|
||||
}
|
||||
up_write(&trace_event_sem);
|
||||
@@ -2982,7 +2987,7 @@ struct trace_event_file *trace_get_event_file(const char *instance,
|
||||
}
|
||||
|
||||
/* Don't let event modules unload while in use */
|
||||
ret = try_module_get(file->event_call->mod);
|
||||
ret = trace_event_try_get_ref(file->event_call);
|
||||
if (!ret) {
|
||||
trace_array_put(tr);
|
||||
ret = -EBUSY;
|
||||
@@ -3012,7 +3017,7 @@ EXPORT_SYMBOL_GPL(trace_get_event_file);
|
||||
void trace_put_event_file(struct trace_event_file *file)
|
||||
{
|
||||
mutex_lock(&event_mutex);
|
||||
module_put(file->event_call->mod);
|
||||
trace_event_put_ref(file->event_call);
|
||||
mutex_unlock(&event_mutex);
|
||||
|
||||
trace_array_put(file->tr);
|
||||
@@ -3147,7 +3152,7 @@ static int free_probe_data(void *data)
|
||||
if (!edata->ref) {
|
||||
/* Remove the SOFT_MODE flag */
|
||||
__ftrace_event_enable_disable(edata->file, 0, 1);
|
||||
module_put(edata->file->event_call->mod);
|
||||
trace_event_put_ref(edata->file->event_call);
|
||||
kfree(edata);
|
||||
}
|
||||
return 0;
|
||||
@@ -3280,7 +3285,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
|
||||
|
||||
out_reg:
|
||||
/* Don't let event modules unload while probe registered */
|
||||
ret = try_module_get(file->event_call->mod);
|
||||
ret = trace_event_try_get_ref(file->event_call);
|
||||
if (!ret) {
|
||||
ret = -EBUSY;
|
||||
goto out_free;
|
||||
@@ -3310,7 +3315,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
|
||||
out_disable:
|
||||
__ftrace_event_enable_disable(file, 0, 1);
|
||||
out_put:
|
||||
module_put(file->event_call->mod);
|
||||
trace_event_put_ref(file->event_call);
|
||||
out_free:
|
||||
kfree(data);
|
||||
goto out;
|
||||
@@ -3376,7 +3381,8 @@ void __trace_early_add_events(struct trace_array *tr)
|
||||
|
||||
list_for_each_entry(call, &ftrace_events, list) {
|
||||
/* Early boot up should not have any modules loaded */
|
||||
if (WARN_ON_ONCE(call->mod))
|
||||
if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) &&
|
||||
WARN_ON_ONCE(call->module))
|
||||
continue;
|
||||
|
||||
ret = __trace_early_add_new_event(call, tr);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user