Merge 996fe06160 ("Merge tag 'kgdb-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/danielt/linux") into android-mainline

Steps on the way to 5.15-rc1

Change-Id: I3806b714a5a783a7132b1daf766ebb71985fc640
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2021-09-14 16:06:34 +02:00
287 changed files with 44082 additions and 35293 deletions

View File

@@ -125,6 +125,71 @@ Note that kprobe and synthetic event definitions can be written under
instance node, but those are also visible from other instances. So please
take care for event name conflict.
Ftrace Histogram Options
------------------------
Since it is too long to write a histogram action as a string for per-event
action option, there are tree-style options under per-event 'hist' subkey
for the histogram actions. For the detail of the each parameter,
please read the event histogram document [3]_.
.. [3] See :ref:`Documentation/trace/histogram.rst <histogram>`
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]keys = KEY1[, KEY2[...]]
Set histogram key parameters. (Mandatory)
The 'N' is a digit string for the multiple histogram. You can omit it
if there is one histogram on the event.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]values = VAL1[, VAL2[...]]
Set histogram value parameters.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]sort = SORT1[, SORT2[...]]
Set histogram sort parameter options.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]size = NR_ENTRIES
Set histogram size (number of entries).
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]name = NAME
Set histogram name.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]var.VARIABLE = EXPR
Define a new VARIABLE by EXPR expression.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<pause|continue|clear>
Set histogram control parameter. You can set one of them.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmatch.[M.]event = GROUP.EVENT
Set histogram 'onmatch' handler matching event parameter.
The 'M' is a digit string for the multiple 'onmatch' handler. You can omit it
if there is one 'onmatch' handler on this histogram.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmatch.[M.]trace = EVENT[, ARG1[...]]
Set histogram 'trace' action for 'onmatch'.
EVENT must be a synthetic event name, and ARG1... are parameters
for that event. Mandatory if 'onmatch.event' option is set.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onmax.[M.]var = VAR
Set histogram 'onmax' handler variable parameter.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]onchange.[M.]var = VAR
Set histogram 'onchange' handler variable parameter.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<onmax|onchange>.[M.]save = ARG1[, ARG2[...]]
Set histogram 'save' action parameters for 'onmax' or 'onchange' handler.
This option or below 'snapshot' option is mandatory if 'onmax.var' or
'onchange.var' option is set.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.[N.]<onmax|onchange>.[M.]snapshot
Set histogram 'snapshot' action for 'onmax' or 'onchange' handler.
This option or above 'save' option is mandatory if 'onmax.var' or
'onchange.var' option is set.
ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist.filter = FILTER_EXPR
Set histogram filter expression. You don't need 'if' in the FILTER_EXPR.
Note that this 'hist' option can conflict with the per-event 'actions'
option if the 'actions' option has a histogram action.
When to Start
=============
@@ -159,13 +224,23 @@ below::
}
synthetic.initcall_latency {
fields = "unsigned long func", "u64 lat"
actions = "hist:keys=func.sym,lat:vals=lat:sort=lat"
hist {
keys = func.sym, lat
values = lat
sort = lat
}
}
initcall.initcall_start {
actions = "hist:keys=func:ts0=common_timestamp.usecs"
initcall.initcall_start.hist {
keys = func
var.ts0 = common_timestamp.usecs
}
initcall.initcall_finish {
actions = "hist:keys=func:lat=common_timestamp.usecs-$ts0:onmatch(initcall.initcall_start).initcall_latency(func,$lat)"
initcall.initcall_finish.hist {
keys = func
var.lat = common_timestamp.usecs - $ts0
onmatch {
event = initcall.initcall_start
trace = initcall_latency, func, $lat
}
}
}

View File

@@ -70,15 +70,16 @@ Documentation written by Tom Zanussi
modified by appending any of the following modifiers to the field
name:
=========== ==========================================
.hex display a number as a hex value
.sym display an address as a symbol
.sym-offset display an address as a symbol and offset
.syscall display a syscall id as a system call name
.execname display a common_pid as a program name
.log2 display log2 value rather than raw number
.usecs display a common_timestamp in microseconds
=========== ==========================================
============= =================================================
.hex display a number as a hex value
.sym display an address as a symbol
.sym-offset display an address as a symbol and offset
.syscall display a syscall id as a system call name
.execname display a common_pid as a program name
.log2 display log2 value rather than raw number
.buckets=size display grouping of values rather than raw number
.usecs display a common_timestamp in microseconds
============= =================================================
Note that in general the semantics of a given field aren't
interpreted when applying a modifier to it, but there are some
@@ -228,7 +229,7 @@ Extended error information
that lists the total number of bytes requested for each function in
the kernel that made one or more calls to kmalloc::
# echo 'hist:key=call_site:val=bytes_req' > \
# echo 'hist:key=call_site:val=bytes_req.buckets=32' > \
/sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
This tells the tracing system to create a 'hist' trigger using the
@@ -1823,20 +1824,99 @@ and variables defined on other events (see Section 2.2.3 below on
how that is done using hist trigger 'onmatch' action). Once that is
done, the 'wakeup_latency' synthetic event instance is created.
A histogram can now be defined for the new synthetic event::
# echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
/sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
The new event is created under the tracing/events/synthetic/ directory
and looks and behaves just like any other event::
# ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency
enable filter format hist id trigger
A histogram can now be defined for the new synthetic event::
# echo 'hist:keys=pid,prio,lat.log2:sort=lat' >> \
/sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
The above shows the latency "lat" in a power of 2 grouping.
Like any other event, once a histogram is enabled for the event, the
output can be displayed by reading the event's 'hist' file.
# cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist
# event histogram
#
# trigger info: hist:keys=pid,prio,lat.log2:vals=hitcount:sort=lat.log2:size=2048 [active]
#
{ pid: 2035, prio: 9, lat: ~ 2^2 } hitcount: 43
{ pid: 2034, prio: 9, lat: ~ 2^2 } hitcount: 60
{ pid: 2029, prio: 9, lat: ~ 2^2 } hitcount: 965
{ pid: 2034, prio: 120, lat: ~ 2^2 } hitcount: 9
{ pid: 2033, prio: 120, lat: ~ 2^2 } hitcount: 5
{ pid: 2030, prio: 9, lat: ~ 2^2 } hitcount: 335
{ pid: 2030, prio: 120, lat: ~ 2^2 } hitcount: 10
{ pid: 2032, prio: 120, lat: ~ 2^2 } hitcount: 1
{ pid: 2035, prio: 120, lat: ~ 2^2 } hitcount: 2
{ pid: 2031, prio: 9, lat: ~ 2^2 } hitcount: 176
{ pid: 2028, prio: 120, lat: ~ 2^2 } hitcount: 15
{ pid: 2033, prio: 9, lat: ~ 2^2 } hitcount: 91
{ pid: 2032, prio: 9, lat: ~ 2^2 } hitcount: 125
{ pid: 2029, prio: 120, lat: ~ 2^2 } hitcount: 4
{ pid: 2031, prio: 120, lat: ~ 2^2 } hitcount: 3
{ pid: 2029, prio: 120, lat: ~ 2^3 } hitcount: 2
{ pid: 2035, prio: 9, lat: ~ 2^3 } hitcount: 41
{ pid: 2030, prio: 120, lat: ~ 2^3 } hitcount: 1
{ pid: 2032, prio: 9, lat: ~ 2^3 } hitcount: 32
{ pid: 2031, prio: 9, lat: ~ 2^3 } hitcount: 44
{ pid: 2034, prio: 9, lat: ~ 2^3 } hitcount: 40
{ pid: 2030, prio: 9, lat: ~ 2^3 } hitcount: 29
{ pid: 2033, prio: 9, lat: ~ 2^3 } hitcount: 31
{ pid: 2029, prio: 9, lat: ~ 2^3 } hitcount: 31
{ pid: 2028, prio: 120, lat: ~ 2^3 } hitcount: 18
{ pid: 2031, prio: 120, lat: ~ 2^3 } hitcount: 2
{ pid: 2028, prio: 120, lat: ~ 2^4 } hitcount: 1
{ pid: 2029, prio: 9, lat: ~ 2^4 } hitcount: 4
{ pid: 2031, prio: 120, lat: ~ 2^7 } hitcount: 1
{ pid: 2032, prio: 120, lat: ~ 2^7 } hitcount: 1
Totals:
Hits: 2122
Entries: 30
Dropped: 0
The latency values can also be grouped linearly by a given size with
the ".buckets" modifier and specify a size (in this case groups of 10).
# echo 'hist:keys=pid,prio,lat.buckets=10:sort=lat' >> \
/sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
# event histogram
#
# trigger info: hist:keys=pid,prio,lat.buckets=10:vals=hitcount:sort=lat.buckets=10:size=2048 [active]
#
{ pid: 2067, prio: 9, lat: ~ 0-9 } hitcount: 220
{ pid: 2068, prio: 9, lat: ~ 0-9 } hitcount: 157
{ pid: 2070, prio: 9, lat: ~ 0-9 } hitcount: 100
{ pid: 2067, prio: 120, lat: ~ 0-9 } hitcount: 6
{ pid: 2065, prio: 120, lat: ~ 0-9 } hitcount: 2
{ pid: 2066, prio: 120, lat: ~ 0-9 } hitcount: 2
{ pid: 2069, prio: 9, lat: ~ 0-9 } hitcount: 122
{ pid: 2069, prio: 120, lat: ~ 0-9 } hitcount: 8
{ pid: 2070, prio: 120, lat: ~ 0-9 } hitcount: 1
{ pid: 2068, prio: 120, lat: ~ 0-9 } hitcount: 7
{ pid: 2066, prio: 9, lat: ~ 0-9 } hitcount: 365
{ pid: 2064, prio: 120, lat: ~ 0-9 } hitcount: 35
{ pid: 2065, prio: 9, lat: ~ 0-9 } hitcount: 998
{ pid: 2071, prio: 9, lat: ~ 0-9 } hitcount: 85
{ pid: 2065, prio: 9, lat: ~ 10-19 } hitcount: 2
{ pid: 2064, prio: 120, lat: ~ 10-19 } hitcount: 2
Totals:
Hits: 2112
Entries: 16
Dropped: 0
2.2.3 Hist trigger 'handlers' and 'actions'
-------------------------------------------

View File

@@ -18986,6 +18986,20 @@ F: arch/x86/mm/testmmiotrace.c
F: include/linux/mmiotrace.h
F: kernel/trace/trace_mmiotrace.c
TRACING OS NOISE / LATENCY TRACERS
M: Steven Rostedt <rostedt@goodmis.org>
M: Daniel Bristot de Oliveira <bristot@kernel.org>
S: Maintained
F: kernel/trace/trace_osnoise.c
F: include/trace/events/osnoise.h
F: kernel/trace/trace_hwlat.c
F: kernel/trace/trace_irqsoff.c
F: kernel/trace/trace_sched_wakeup.c
F: Documentation/trace/osnoise-tracer.rst
F: Documentation/trace/timerlat-tracer.rst
F: Documentation/trace/hwlat_detector.rst
F: arch/*/kernel/trace.c
TRADITIONAL CHINESE DOCUMENTATION
M: Hu Haowen <src.res@email.cn>
L: linux-doc-tw-discuss@lists.sourceforge.net
@@ -19166,9 +19180,8 @@ W: http://dotat.at/prog/unifdef
F: scripts/unifdef.c
UNIFORM CDROM DRIVER
M: Jens Axboe <axboe@kernel.dk>
M: Phillip Potter <phil@philpotter.co.uk>
S: Maintained
W: http://www.kernel.dk
F: Documentation/cdrom/
F: drivers/cdrom/cdrom.c
F: include/linux/cdrom.h

View File

@@ -197,6 +197,9 @@ config HAVE_FUNCTION_ERROR_INJECTION
config HAVE_NMI
bool
config TRACE_IRQFLAGS_SUPPORT
bool
#
# An arch should select this if it provides all these things:
#

View File

@@ -49,9 +49,7 @@ config ARC
select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
select SET_FS
config TRACE_IRQFLAGS_SUPPORT
def_bool y
select TRACE_IRQFLAGS_SUPPORT
config LOCKDEP_SUPPORT
def_bool y
@@ -116,16 +114,9 @@ choice
default ARC_CPU_770 if ISA_ARCOMPACT
default ARC_CPU_HS if ISA_ARCV2
if ISA_ARCOMPACT
config ARC_CPU_750D
bool "ARC750D"
select ARC_CANT_LLSC
help
Support for ARC750 core
config ARC_CPU_770
bool "ARC770"
depends on ISA_ARCOMPACT
select ARC_HAS_SWAPE
help
Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
@@ -135,8 +126,6 @@ config ARC_CPU_770
-Caches: New Prog Model, Region Flush
-Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
endif #ISA_ARCOMPACT
config ARC_CPU_HS
bool "ARC-HS"
depends on ISA_ARCV2
@@ -274,33 +263,17 @@ config ARC_DCCM_BASE
choice
prompt "MMU Version"
default ARC_MMU_V3 if ARC_CPU_770
default ARC_MMU_V2 if ARC_CPU_750D
default ARC_MMU_V4 if ARC_CPU_HS
if ISA_ARCOMPACT
config ARC_MMU_V1
bool "MMU v1"
help
Orig ARC700 MMU
config ARC_MMU_V2
bool "MMU v2"
help
Fixed the deficiency of v1 - possible thrashing in memcpy scenario
when 2 D-TLB and 1 I-TLB entries index into same 2way set.
default ARC_MMU_V3 if ISA_ARCOMPACT
default ARC_MMU_V4 if ISA_ARCV2
config ARC_MMU_V3
bool "MMU v3"
depends on ARC_CPU_770
depends on ISA_ARCOMPACT
help
Introduced with ARC700 4.10: New Features
Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
Shared Address Spaces (SASID)
endif
config ARC_MMU_V4
bool "MMU v4"
depends on ISA_ARCV2
@@ -319,7 +292,6 @@ config ARC_PAGE_SIZE_8K
config ARC_PAGE_SIZE_16K
bool "16KB"
depends on ARC_MMU_V3 || ARC_MMU_V4
config ARC_PAGE_SIZE_4K
bool "4KB"
@@ -340,6 +312,10 @@ config ARC_HUGEPAGE_16M
endchoice
config PGTABLE_LEVELS
int "Number of Page table levels"
default 2
config ARC_COMPACT_IRQ_LEVELS
depends on ISA_ARCOMPACT
bool "Setup Timer IRQ as high Priority"
@@ -563,9 +539,6 @@ config ARC_DW2_UNWIND
If you don't debug the kernel, you can say N, but we may not be able
to solve problems without frame unwind information
config ARC_DBG_TLB_PARANOIA
bool "Paranoia Checks in Low Level TLB Handlers"
config ARC_DBG_JUMP_LABEL
bool "Paranoid checks in Static Keys (jump labels) code"
depends on JUMP_LABEL

View File

@@ -0,0 +1,97 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_ARC_ATOMIC_LLSC_H
#define _ASM_ARC_ATOMIC_LLSC_H
#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
unsigned int val; \
\
__asm__ __volatile__( \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
[i] "ir" (i) \
: "cc"); \
} \
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
{ \
unsigned int val; \
\
__asm__ __volatile__( \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val) \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \
\
return val; \
}
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
{ \
unsigned int val, orig; \
\
__asm__ __volatile__( \
"1: llock %[orig], [%[ctr]] \n" \
" " #asm_op " %[val], %[orig], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val), \
[orig] "=&r" (orig) \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \
\
return orig; \
}
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_OP_RETURN(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(and, &=, and)
ATOMIC_OPS(andnot, &= ~, bic)
ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, xor)
#define arch_atomic_andnot arch_atomic_andnot
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
#endif

View File

@@ -0,0 +1,102 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_ARC_ATOMIC_SPLOCK_H
#define _ASM_ARC_ATOMIC_SPLOCK_H
/*
* Non hardware assisted Atomic-R-M-W
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
*/
static inline void arch_atomic_set(atomic_t *v, int i)
{
/*
* Independent of hardware support, all of the atomic_xxx() APIs need
* to follow the same locking rules to make sure that a "hardware"
* atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
* sequence
*
* Thus atomic_set() despite being 1 insn (and seemingly atomic)
* requires the locking.
*/
unsigned long flags;
atomic_ops_lock(flags);
WRITE_ONCE(v->counter, i);
atomic_ops_unlock(flags);
}
#define arch_atomic_set_release(v, i) arch_atomic_set((v), (i))
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
unsigned long flags; \
\
atomic_ops_lock(flags); \
v->counter c_op i; \
atomic_ops_unlock(flags); \
}
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
{ \
unsigned long flags; \
unsigned int temp; \
\
/* \
* spin lock/unlock provides the needed smp_mb() before/after \
*/ \
atomic_ops_lock(flags); \
temp = v->counter; \
temp c_op i; \
v->counter = temp; \
atomic_ops_unlock(flags); \
\
return temp; \
}
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
{ \
unsigned long flags; \
unsigned int orig; \
\
/* \
* spin lock/unlock provides the needed smp_mb() before/after \
*/ \
atomic_ops_lock(flags); \
orig = v->counter; \
v->counter c_op i; \
atomic_ops_unlock(flags); \
\
return orig; \
}
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_OP_RETURN(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(and, &=, and)
ATOMIC_OPS(andnot, &= ~, bic)
ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, xor)
#define arch_atomic_andnot arch_atomic_andnot
#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
#endif

View File

@@ -17,435 +17,43 @@
#define arch_atomic_read(v) READ_ONCE((v)->counter)
#ifdef CONFIG_ARC_HAS_LLSC
#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
unsigned int val; \
\
__asm__ __volatile__( \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
[i] "ir" (i) \
: "cc"); \
} \
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
{ \
unsigned int val; \
\
/* \
* Explicit full memory barrier needed before/after as \
* LLOCK/SCOND themselves don't provide any such semantics \
*/ \
smp_mb(); \
\
__asm__ __volatile__( \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val) \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \
\
smp_mb(); \
\
return val; \
}
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
{ \
unsigned int val, orig; \
\
/* \
* Explicit full memory barrier needed before/after as \
* LLOCK/SCOND themselves don't provide any such semantics \
*/ \
smp_mb(); \
\
__asm__ __volatile__( \
"1: llock %[orig], [%[ctr]] \n" \
" " #asm_op " %[val], %[orig], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" bnz 1b \n" \
: [val] "=&r" (val), \
[orig] "=&r" (orig) \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \
\
smp_mb(); \
\
return orig; \
}
#else /* !CONFIG_ARC_HAS_LLSC */
#ifndef CONFIG_SMP
/* violating atomic_xxx API locking protocol in UP for optimization sake */
#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#include <asm/atomic-llsc.h>
#else
#include <asm/atomic-spinlock.h>
#endif
static inline void arch_atomic_set(atomic_t *v, int i)
{
/*
* Independent of hardware support, all of the atomic_xxx() APIs need
* to follow the same locking rules to make sure that a "hardware"
* atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
* sequence
*
* Thus atomic_set() despite being 1 insn (and seemingly atomic)
* requires the locking.
*/
unsigned long flags;
#define arch_atomic_cmpxchg(v, o, n) \
({ \
arch_cmpxchg(&((v)->counter), (o), (n)); \
})
atomic_ops_lock(flags);
WRITE_ONCE(v->counter, i);
atomic_ops_unlock(flags);
}
#ifdef arch_cmpxchg_relaxed
#define arch_atomic_cmpxchg_relaxed(v, o, n) \
({ \
arch_cmpxchg_relaxed(&((v)->counter), (o), (n)); \
})
#endif
#define arch_atomic_set_release(v, i) arch_atomic_set((v), (i))
#define arch_atomic_xchg(v, n) \
({ \
arch_xchg(&((v)->counter), (n)); \
})
#ifdef arch_xchg_relaxed
#define arch_atomic_xchg_relaxed(v, n) \
({ \
arch_xchg_relaxed(&((v)->counter), (n)); \
})
#endif
/*
* Non hardware assisted Atomic-R-M-W
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
* 64-bit atomics
*/
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void arch_atomic_##op(int i, atomic_t *v) \
{ \
unsigned long flags; \
\
atomic_ops_lock(flags); \
v->counter c_op i; \
atomic_ops_unlock(flags); \
}
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
{ \
unsigned long flags; \
unsigned long temp; \
\
/* \
* spin lock/unlock provides the needed smp_mb() before/after \
*/ \
atomic_ops_lock(flags); \
temp = v->counter; \
temp c_op i; \
v->counter = temp; \
atomic_ops_unlock(flags); \
\
return temp; \
}
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
{ \
unsigned long flags; \
unsigned long orig; \
\
/* \
* spin lock/unlock provides the needed smp_mb() before/after \
*/ \
atomic_ops_lock(flags); \
orig = v->counter; \
v->counter c_op i; \
atomic_ops_unlock(flags); \
\
return orig; \
}
#endif /* !CONFIG_ARC_HAS_LLSC */
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_OP_RETURN(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(and, &=, and)
ATOMIC_OPS(andnot, &= ~, bic)
ATOMIC_OPS(or, |=, or)
ATOMIC_OPS(xor, ^=, xor)
#define arch_atomic_andnot arch_atomic_andnot
#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
#ifdef CONFIG_GENERIC_ATOMIC64
#include <asm-generic/atomic64.h>
#else /* Kconfig ensures this is only enabled with needed h/w assist */
/*
* ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
* - The address HAS to be 64-bit aligned
* - There are 2 semantics involved here:
* = exclusive implies no interim update between load/store to same addr
* = both words are observed/updated together: this is guaranteed even
* for regular 64-bit load (LDD) / store (STD). Thus atomic64_set()
* is NOT required to use LLOCKD+SCONDD, STD suffices
*/
typedef struct {
s64 __aligned(8) counter;
} atomic64_t;
#define ATOMIC64_INIT(a) { (a) }
static inline s64 arch_atomic64_read(const atomic64_t *v)
{
s64 val;
__asm__ __volatile__(
" ldd %0, [%1] \n"
: "=r"(val)
: "r"(&v->counter));
return val;
}
static inline void arch_atomic64_set(atomic64_t *v, s64 a)
{
/*
* This could have been a simple assignment in "C" but would need
* explicit volatile. Otherwise gcc optimizers could elide the store
* which borked atomic64 self-test
* In the inline asm version, memory clobber needed for exact same
* reason, to tell gcc about the store.
*
* This however is not needed for sibling atomic64_add() etc since both
* load/store are explicitly done in inline asm. As long as API is used
* for each access, gcc has no way to optimize away any load/store
*/
__asm__ __volatile__(
" std %0, [%1] \n"
:
: "r"(a), "r"(&v->counter)
: "memory");
}
#define ATOMIC64_OP(op, op1, op2) \
static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \
{ \
s64 val; \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%1] \n" \
" " #op1 " %L0, %L0, %L2 \n" \
" " #op2 " %H0, %H0, %H2 \n" \
" scondd %0, [%1] \n" \
" bnz 1b \n" \
: "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); \
} \
#define ATOMIC64_OP_RETURN(op, op1, op2) \
static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \
{ \
s64 val; \
\
smp_mb(); \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%1] \n" \
" " #op1 " %L0, %L0, %L2 \n" \
" " #op2 " %H0, %H0, %H2 \n" \
" scondd %0, [%1] \n" \
" bnz 1b \n" \
: [val] "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); /* memory clobber comes from smp_mb() */ \
\
smp_mb(); \
\
return val; \
}
#define ATOMIC64_FETCH_OP(op, op1, op2) \
static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \
{ \
s64 val, orig; \
\
smp_mb(); \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%2] \n" \
" " #op1 " %L1, %L0, %L3 \n" \
" " #op2 " %H1, %H0, %H3 \n" \
" scondd %1, [%2] \n" \
" bnz 1b \n" \
: "=&r"(orig), "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); /* memory clobber comes from smp_mb() */ \
\
smp_mb(); \
\
return orig; \
}
#define ATOMIC64_OPS(op, op1, op2) \
ATOMIC64_OP(op, op1, op2) \
ATOMIC64_OP_RETURN(op, op1, op2) \
ATOMIC64_FETCH_OP(op, op1, op2)
ATOMIC64_OPS(add, add.f, adc)
ATOMIC64_OPS(sub, sub.f, sbc)
ATOMIC64_OPS(and, and, and)
ATOMIC64_OPS(andnot, bic, bic)
ATOMIC64_OPS(or, or, or)
ATOMIC64_OPS(xor, xor, xor)
#define arch_atomic64_andnot arch_atomic64_andnot
#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot
#undef ATOMIC64_OPS
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
static inline s64
arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
{
s64 prev;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" brne %L0, %L2, 2f \n"
" brne %H0, %H2, 2f \n"
" scondd %3, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev)
: "r"(ptr), "ir"(expected), "r"(new)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return prev;
}
static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
{
s64 prev;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" scondd %2, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev)
: "r"(ptr), "r"(new)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return prev;
}
/**
* arch_atomic64_dec_if_positive - decrement by 1 if old value positive
* @v: pointer of type atomic64_t
*
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 val;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n"
" sub.c %H0, %H0, 1 # if C set, w1 - 1\n"
" brlt %H0, 0, 2f \n"
" scondd %0, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(val)
: "r"(&v->counter)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return val;
}
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
/**
* arch_atomic64_fetch_add_unless - add unless the number is a given value
* @v: pointer of type atomic64_t
* @a: the amount to add to v...
* @u: ...unless v is equal to u.
*
* Atomically adds @a to @v, if it was not @u.
* Returns the old value of @v
*/
static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 old, temp;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%2] \n"
" brne %L0, %L4, 2f # continue to add since v != u \n"
" breq.d %H0, %H4, 3f # return since v == u \n"
"2: \n"
" add.f %L1, %L0, %L3 \n"
" adc %H1, %H0, %H3 \n"
" scondd %1, [%2] \n"
" bnz 1b \n"
"3: \n"
: "=&r"(old), "=&r" (temp)
: "r"(&v->counter), "r"(a), "r"(u)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return old;
}
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
#endif /* !CONFIG_GENERIC_ATOMIC64 */
#else
#include <asm/atomic64-arcv2.h>
#endif
#endif /* !__ASSEMBLY__ */

View File

@@ -0,0 +1,250 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
* - The address HAS to be 64-bit aligned
*/
#ifndef _ASM_ARC_ATOMIC64_ARCV2_H
#define _ASM_ARC_ATOMIC64_ARCV2_H
typedef struct {
s64 __aligned(8) counter;
} atomic64_t;
#define ATOMIC64_INIT(a) { (a) }
static inline s64 arch_atomic64_read(const atomic64_t *v)
{
s64 val;
__asm__ __volatile__(
" ldd %0, [%1] \n"
: "=r"(val)
: "r"(&v->counter));
return val;
}
static inline void arch_atomic64_set(atomic64_t *v, s64 a)
{
/*
* This could have been a simple assignment in "C" but would need
* explicit volatile. Otherwise gcc optimizers could elide the store
* which borked atomic64 self-test
* In the inline asm version, memory clobber needed for exact same
* reason, to tell gcc about the store.
*
* This however is not needed for sibling atomic64_add() etc since both
* load/store are explicitly done in inline asm. As long as API is used
* for each access, gcc has no way to optimize away any load/store
*/
__asm__ __volatile__(
" std %0, [%1] \n"
:
: "r"(a), "r"(&v->counter)
: "memory");
}
#define ATOMIC64_OP(op, op1, op2) \
static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \
{ \
s64 val; \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%1] \n" \
" " #op1 " %L0, %L0, %L2 \n" \
" " #op2 " %H0, %H0, %H2 \n" \
" scondd %0, [%1] \n" \
" bnz 1b \n" \
: "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); \
} \
#define ATOMIC64_OP_RETURN(op, op1, op2) \
static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
{ \
s64 val; \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%1] \n" \
" " #op1 " %L0, %L0, %L2 \n" \
" " #op2 " %H0, %H0, %H2 \n" \
" scondd %0, [%1] \n" \
" bnz 1b \n" \
: [val] "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); /* memory clobber comes from smp_mb() */ \
\
return val; \
}
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
#define ATOMIC64_FETCH_OP(op, op1, op2) \
static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
{ \
s64 val, orig; \
\
__asm__ __volatile__( \
"1: \n" \
" llockd %0, [%2] \n" \
" " #op1 " %L1, %L0, %L3 \n" \
" " #op2 " %H1, %H0, %H3 \n" \
" scondd %1, [%2] \n" \
" bnz 1b \n" \
: "=&r"(orig), "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
: "cc"); /* memory clobber comes from smp_mb() */ \
\
return orig; \
}
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
#define ATOMIC64_OPS(op, op1, op2) \
ATOMIC64_OP(op, op1, op2) \
ATOMIC64_OP_RETURN(op, op1, op2) \
ATOMIC64_FETCH_OP(op, op1, op2)
ATOMIC64_OPS(add, add.f, adc)
ATOMIC64_OPS(sub, sub.f, sbc)
#undef ATOMIC64_OPS
#define ATOMIC64_OPS(op, op1, op2) \
ATOMIC64_OP(op, op1, op2) \
ATOMIC64_FETCH_OP(op, op1, op2)
ATOMIC64_OPS(and, and, and)
ATOMIC64_OPS(andnot, bic, bic)
ATOMIC64_OPS(or, or, or)
ATOMIC64_OPS(xor, xor, xor)
#define arch_atomic64_andnot arch_atomic64_andnot
#undef ATOMIC64_OPS
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
static inline s64
arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
{
s64 prev;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" brne %L0, %L2, 2f \n"
" brne %H0, %H2, 2f \n"
" scondd %3, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev)
: "r"(ptr), "ir"(expected), "r"(new)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return prev;
}
static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
{
s64 prev;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" scondd %2, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev)
: "r"(ptr), "r"(new)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return prev;
}
/**
* arch_atomic64_dec_if_positive - decrement by 1 if old value positive
* @v: pointer of type atomic64_t
*
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 val;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%1] \n"
" sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n"
" sub.c %H0, %H0, 1 # if C set, w1 - 1\n"
" brlt %H0, 0, 2f \n"
" scondd %0, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(val)
: "r"(&v->counter)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return val;
}
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
/**
* arch_atomic64_fetch_add_unless - add unless the number is a given value
* @v: pointer of type atomic64_t
* @a: the amount to add to v...
* @u: ...unless v is equal to u.
*
* Atomically adds @a to @v, if it was not @u.
* Returns the old value of @v
*/
static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 old, temp;
smp_mb();
__asm__ __volatile__(
"1: llockd %0, [%2] \n"
" brne %L0, %L4, 2f # continue to add since v != u \n"
" breq.d %H0, %H4, 3f # return since v == u \n"
"2: \n"
" add.f %L1, %L0, %L3 \n"
" adc %H1, %H0, %H3 \n"
" scondd %1, [%2] \n"
" bnz 1b \n"
"3: \n"
: "=&r"(old), "=&r" (temp)
: "r"(&v->counter), "r"(a), "r"(u)
: "cc"); /* memory clobber comes from smp_mb() */
smp_mb();
return old;
}
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
#endif

View File

@@ -14,188 +14,6 @@
#include <linux/types.h>
#include <linux/compiler.h>
#include <asm/barrier.h>
#ifndef CONFIG_ARC_HAS_LLSC
#include <asm/smp.h>
#endif
#ifdef CONFIG_ARC_HAS_LLSC
/*
* Hardware assisted Atomic-R-M-W
*/
#define BIT_OP(op, c_op, asm_op) \
static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned int temp; \
\
m += nr >> 5; \
\
nr &= 0x1f; \
\
__asm__ __volatile__( \
"1: llock %0, [%1] \n" \
" " #asm_op " %0, %0, %2 \n" \
" scond %0, [%1] \n" \
" bnz 1b \n" \
: "=&r"(temp) /* Early clobber, to prevent reg reuse */ \
: "r"(m), /* Not "m": llock only supports reg direct addr mode */ \
"ir"(nr) \
: "cc"); \
}
/*
* Semantically:
* Test the bit
* if clear
* set it and return 0 (old value)
* else
* return 1 (old value).
*
* Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
* and the old value of bit is returned
*/
#define TEST_N_BIT_OP(op, c_op, asm_op) \
static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long old, temp; \
\
m += nr >> 5; \
\
nr &= 0x1f; \
\
/* \
* Explicit full memory barrier needed before/after as \
* LLOCK/SCOND themselves don't provide any such smenatic \
*/ \
smp_mb(); \
\
__asm__ __volatile__( \
"1: llock %0, [%2] \n" \
" " #asm_op " %1, %0, %3 \n" \
" scond %1, [%2] \n" \
" bnz 1b \n" \
: "=&r"(old), "=&r"(temp) \
: "r"(m), "ir"(nr) \
: "cc"); \
\
smp_mb(); \
\
return (old & (1 << nr)) != 0; \
}
#else /* !CONFIG_ARC_HAS_LLSC */
/*
* Non hardware assisted Atomic-R-M-W
* Locking would change to irq-disabling only (UP) and spinlocks (SMP)
*
* There's "significant" micro-optimization in writing our own variants of
* bitops (over generic variants)
*
* (1) The generic APIs have "signed" @nr while we have it "unsigned"
* This avoids extra code to be generated for pointer arithmatic, since
* is "not sure" that index is NOT -ve
* (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc
* only consider bottom 5 bits of @nr, so NO need to mask them off.
* (GCC Quirk: however for constant @nr we still need to do the masking
* at compile time)
*/
#define BIT_OP(op, c_op, asm_op) \
static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long temp, flags; \
m += nr >> 5; \
\
/* \
* spin lock/unlock provide the needed smp_mb() before/after \
*/ \
bitops_lock(flags); \
\
temp = *m; \
*m = temp c_op (1UL << (nr & 0x1f)); \
\
bitops_unlock(flags); \
}
#define TEST_N_BIT_OP(op, c_op, asm_op) \
static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long old, flags; \
m += nr >> 5; \
\
bitops_lock(flags); \
\
old = *m; \
*m = old c_op (1UL << (nr & 0x1f)); \
\
bitops_unlock(flags); \
\
return (old & (1UL << (nr & 0x1f))) != 0; \
}
#endif
/***************************************
* Non atomic variants
**************************************/
#define __BIT_OP(op, c_op, asm_op) \
static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \
{ \
unsigned long temp; \
m += nr >> 5; \
\
temp = *m; \
*m = temp c_op (1UL << (nr & 0x1f)); \
}
#define __TEST_N_BIT_OP(op, c_op, asm_op) \
static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
{ \
unsigned long old; \
m += nr >> 5; \
\
old = *m; \
*m = old c_op (1UL << (nr & 0x1f)); \
\
return (old & (1UL << (nr & 0x1f))) != 0; \
}
#define BIT_OPS(op, c_op, asm_op) \
\
/* set_bit(), clear_bit(), change_bit() */ \
BIT_OP(op, c_op, asm_op) \
\
/* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
TEST_N_BIT_OP(op, c_op, asm_op) \
\
/* __set_bit(), __clear_bit(), __change_bit() */ \
__BIT_OP(op, c_op, asm_op) \
\
/* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
__TEST_N_BIT_OP(op, c_op, asm_op)
BIT_OPS(set, |, bset)
BIT_OPS(clear, & ~, bclr)
BIT_OPS(change, ^, bxor)
/*
* This routine doesn't need to be atomic.
*/
static inline int
test_bit(unsigned int nr, const volatile unsigned long *addr)
{
unsigned long mask;
addr += nr >> 5;
mask = 1UL << (nr & 0x1f);
return ((mask & *addr) != 0);
}
#ifdef CONFIG_ISA_ARCOMPACT
@@ -296,7 +114,7 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
* @result: [1-32]
* fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
*/
static inline __attribute__ ((const)) int fls(unsigned long x)
static inline __attribute__ ((const)) int fls(unsigned int x)
{
int n;
@@ -323,7 +141,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
* ffs = Find First Set in word (LSB to MSB)
* @result: [1-32], 0 if all 0's
*/
static inline __attribute__ ((const)) int ffs(unsigned long x)
static inline __attribute__ ((const)) int ffs(unsigned int x)
{
int n;
@@ -368,6 +186,8 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/atomic.h>
#include <asm-generic/bitops/non-atomic.h>
#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/le.h>

View File

@@ -62,10 +62,6 @@
#define ARCH_SLAB_MINALIGN 8
#endif
extern void arc_cache_init(void);
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_cache_bcr(void);
extern int ioc_enable;
extern unsigned long perip_base, perip_end;

View File

@@ -6,6 +6,7 @@
#ifndef __ASM_ARC_CMPXCHG_H
#define __ASM_ARC_CMPXCHG_H
#include <linux/build_bug.h>
#include <linux/types.h>
#include <asm/barrier.h>
@@ -13,146 +14,130 @@
#ifdef CONFIG_ARC_HAS_LLSC
static inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
{
unsigned long prev;
/*
* Explicit full memory barrier needed before/after as
* LLOCK/SCOND themselves don't provide any such semantics
*/
smp_mb();
__asm__ __volatile__(
"1: llock %0, [%1] \n"
" brne %0, %2, 2f \n"
" scond %3, [%1] \n"
" bnz 1b \n"
"2: \n"
: "=&r"(prev) /* Early clobber, to prevent reg reuse */
: "r"(ptr), /* Not "m": llock only supports reg direct addr mode */
"ir"(expected),
"r"(new) /* can't be "ir". scond can't take LIMM for "b" */
: "cc", "memory"); /* so that gcc knows memory is being written here */
smp_mb();
return prev;
}
#else /* !CONFIG_ARC_HAS_LLSC */
static inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
{
unsigned long flags;
int prev;
volatile unsigned long *p = ptr;
/*
* spin lock/unlock provide the needed smp_mb() before/after
*/
atomic_ops_lock(flags);
prev = *p;
if (prev == expected)
*p = new;
atomic_ops_unlock(flags);
return prev;
}
#endif
#define arch_cmpxchg(ptr, o, n) ({ \
(typeof(*(ptr)))__cmpxchg((ptr), \
(unsigned long)(o), \
(unsigned long)(n)); \
/*
* if (*ptr == @old)
* *ptr = @new
*/
#define __cmpxchg(ptr, old, new) \
({ \
__typeof__(*(ptr)) _prev; \
\
__asm__ __volatile__( \
"1: llock %0, [%1] \n" \
" brne %0, %2, 2f \n" \
" scond %3, [%1] \n" \
" bnz 1b \n" \
"2: \n" \
: "=&r"(_prev) /* Early clobber prevent reg reuse */ \
: "r"(ptr), /* Not "m": llock only supports reg */ \
"ir"(old), \
"r"(new) /* Not "ir": scond can't take LIMM */ \
: "cc", \
"memory"); /* gcc knows memory is clobbered */ \
\
_prev; \
})
/*
* atomic_cmpxchg is same as cmpxchg
* LLSC: only different in data-type, semantics are exactly same
* !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
* semantics, and this lock also happens to be used by atomic_*()
*/
#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
/*
* xchg (reg with memory) based on "Native atomic" EX insn
*/
static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
int size)
{
extern unsigned long __xchg_bad_pointer(void);
switch (size) {
case 4:
smp_mb();
__asm__ __volatile__(
" ex %0, [%1] \n"
: "+r"(val)
: "r"(ptr)
: "memory");
smp_mb();
return val;
}
return __xchg_bad_pointer();
}
#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
sizeof(*(ptr))))
/*
* xchg() maps directly to ARC EX instruction which guarantees atomicity.
* However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
* due to a subtle reason:
* - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
* of kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
* Hence xchg() needs to follow same locking rules.
*
* Technically the lock is also needed for UP (boils down to irq save/restore)
* but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
* be disabled thus can't possibly be interrupted/preempted/clobbered by xchg()
* Other way around, xchg is one instruction anyways, so can't be interrupted
* as such
*/
#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
#define arch_xchg(ptr, with) \
({ \
unsigned long flags; \
typeof(*(ptr)) old_val; \
\
atomic_ops_lock(flags); \
old_val = _xchg(ptr, with); \
atomic_ops_unlock(flags); \
old_val; \
#define arch_cmpxchg_relaxed(ptr, old, new) \
({ \
__typeof__(ptr) _p_ = (ptr); \
__typeof__(*(ptr)) _o_ = (old); \
__typeof__(*(ptr)) _n_ = (new); \
__typeof__(*(ptr)) _prev_; \
\
switch(sizeof((_p_))) { \
case 4: \
_prev_ = __cmpxchg(_p_, _o_, _n_); \
break; \
default: \
BUILD_BUG(); \
} \
_prev_; \
})
#else
#define arch_xchg(ptr, with) _xchg(ptr, with)
#define arch_cmpxchg(ptr, old, new) \
({ \
volatile __typeof__(ptr) _p_ = (ptr); \
__typeof__(*(ptr)) _o_ = (old); \
__typeof__(*(ptr)) _n_ = (new); \
__typeof__(*(ptr)) _prev_; \
unsigned long __flags; \
\
BUILD_BUG_ON(sizeof(_p_) != 4); \
\
/* \
* spin lock/unlock provide the needed smp_mb() before/after \
*/ \
atomic_ops_lock(__flags); \
_prev_ = *_p_; \
if (_prev_ == _o_) \
*_p_ = _n_; \
atomic_ops_unlock(__flags); \
_prev_; \
})
#endif
/*
* "atomic" variant of xchg()
* REQ: It needs to follow the same serialization rules as other atomic_xxx()
* Since xchg() doesn't always do that, it would seem that following definition
* is incorrect. But here's the rationale:
* SMP : Even xchg() takes the atomic_ops_lock, so OK.
* LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
* is natively "SMP safe", no serialization required).
* UP : other atomics disable IRQ, so no way a difft ctxt atomic_xchg()
* could clobber them. atomic_xchg() itself would be 1 insn, so it
* can't be clobbered by others. Thus no serialization required when
* atomic_xchg is involved.
* xchg
*/
#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
#ifdef CONFIG_ARC_HAS_LLSC
#define __xchg(ptr, val) \
({ \
__asm__ __volatile__( \
" ex %0, [%1] \n" /* set new value */ \
: "+r"(val) \
: "r"(ptr) \
: "memory"); \
_val_; /* get old value */ \
})
#define arch_xchg_relaxed(ptr, val) \
({ \
__typeof__(ptr) _p_ = (ptr); \
__typeof__(*(ptr)) _val_ = (val); \
\
switch(sizeof(*(_p_))) { \
case 4: \
_val_ = __xchg(_p_, _val_); \
break; \
default: \
BUILD_BUG(); \
} \
_val_; \
})
#else /* !CONFIG_ARC_HAS_LLSC */
/*
* EX instructions is baseline and present in !LLSC too. But in this
* regime it still needs use @atomic_ops_lock spinlock to allow interop
* with cmpxchg() which uses spinlock in !LLSC
* (llist.h use xchg and cmpxchg on sama data)
*/
#define arch_xchg(ptr, val) \
({ \
__typeof__(ptr) _p_ = (ptr); \
__typeof__(*(ptr)) _val_ = (val); \
\
unsigned long __flags; \
\
atomic_ops_lock(__flags); \
\
__asm__ __volatile__( \
" ex %0, [%1] \n" \
: "+r"(_val_) \
: "r"(_p_) \
: "memory"); \
\
atomic_ops_unlock(__flags); \
_val_; \
})
#endif
#endif

View File

@@ -126,19 +126,11 @@
* to be saved again on kernel mode stack, as part of pt_regs.
*-------------------------------------------------------------*/
.macro PROLOG_FREEUP_REG reg, mem
#ifndef ARC_USE_SCRATCH_REG
sr \reg, [ARC_REG_SCRATCH_DATA0]
#else
st \reg, [\mem]
#endif
.endm
.macro PROLOG_RESTORE_REG reg, mem
#ifndef ARC_USE_SCRATCH_REG
lr \reg, [ARC_REG_SCRATCH_DATA0]
#else
ld \reg, [\mem]
#endif
.endm
/*--------------------------------------------------------------

View File

@@ -58,14 +58,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd);
/* Generic variants assume pgtable_t is struct page *, hence need for these */
#define __HAVE_ARCH_PGTABLE_DEPOSIT
extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable);
#define __HAVE_ARCH_PGTABLE_WITHDRAW
extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);

View File

@@ -0,0 +1,103 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com)
*
* MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed.
* This file contains the TLB access registers and commands
*/
#ifndef _ASM_ARC_MMU_ARCV2_H
#define _ASM_ARC_MMU_ARCV2_H
/*
* TLB Management regs
*/
#define ARC_REG_MMU_BCR 0x06f
#ifdef CONFIG_ARC_MMU_V3
#define ARC_REG_TLBPD0 0x405
#define ARC_REG_TLBPD1 0x406
#define ARC_REG_TLBPD1HI 0 /* Dummy: allows common code */
#define ARC_REG_TLBINDEX 0x407
#define ARC_REG_TLBCOMMAND 0x408
#define ARC_REG_PID 0x409
#define ARC_REG_SCRATCH_DATA0 0x418
#else
#define ARC_REG_TLBPD0 0x460
#define ARC_REG_TLBPD1 0x461
#define ARC_REG_TLBPD1HI 0x463
#define ARC_REG_TLBINDEX 0x464
#define ARC_REG_TLBCOMMAND 0x465
#define ARC_REG_PID 0x468
#define ARC_REG_SCRATCH_DATA0 0x46c
#endif
/* Bits in MMU PID reg */
#define __TLB_ENABLE (1 << 31)
#define __PROG_ENABLE (1 << 30)
#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE)
/* Bits in TLB Index reg */
#define TLB_LKUP_ERR 0x80000000
#ifdef CONFIG_ARC_MMU_V3
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001)
#else
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000)
#endif
/*
* TLB Commands
*/
#define TLBWrite 0x1
#define TLBRead 0x2
#define TLBGetIndex 0x3
#define TLBProbe 0x4
#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */
#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */
#ifdef CONFIG_ARC_MMU_V4
#define TLBInsertEntry 0x7
#define TLBDeleteEntry 0x8
#endif
/* Masks for actual TLB "PD"s */
#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
#define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
#ifndef __ASSEMBLY__
struct mm_struct;
extern int pae40_exist_but_not_enab(void);
static inline int is_pae40_enabled(void)
{
return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
}
static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid)
{
write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
}
static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
{
/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
#ifdef CONFIG_ISA_ARCV2
write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
#endif
}
#else
.macro ARC_MMU_REENABLE reg
lr \reg, [ARC_REG_PID]
or \reg, \reg, MMU_ENABLE
sr \reg, [ARC_REG_PID]
.endm
#endif /* !__ASSEMBLY__ */
#endif

View File

@@ -7,98 +7,15 @@
#define _ASM_ARC_MMU_H
#ifndef __ASSEMBLY__
#include <linux/threads.h> /* NR_CPUS */
#endif
#if defined(CONFIG_ARC_MMU_V1)
#define CONFIG_ARC_MMU_VER 1
#elif defined(CONFIG_ARC_MMU_V2)
#define CONFIG_ARC_MMU_VER 2
#elif defined(CONFIG_ARC_MMU_V3)
#define CONFIG_ARC_MMU_VER 3
#elif defined(CONFIG_ARC_MMU_V4)
#define CONFIG_ARC_MMU_VER 4
#endif
/* MMU Management regs */
#define ARC_REG_MMU_BCR 0x06f
#if (CONFIG_ARC_MMU_VER < 4)
#define ARC_REG_TLBPD0 0x405
#define ARC_REG_TLBPD1 0x406
#define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */
#define ARC_REG_TLBINDEX 0x407
#define ARC_REG_TLBCOMMAND 0x408
#define ARC_REG_PID 0x409
#define ARC_REG_SCRATCH_DATA0 0x418
#else
#define ARC_REG_TLBPD0 0x460
#define ARC_REG_TLBPD1 0x461
#define ARC_REG_TLBPD1HI 0x463
#define ARC_REG_TLBINDEX 0x464
#define ARC_REG_TLBCOMMAND 0x465
#define ARC_REG_PID 0x468
#define ARC_REG_SCRATCH_DATA0 0x46c
#endif
#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP)
#define ARC_USE_SCRATCH_REG
#endif
/* Bits in MMU PID register */
#define __TLB_ENABLE (1 << 31)
#define __PROG_ENABLE (1 << 30)
#define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE)
/* Error code if probe fails */
#define TLB_LKUP_ERR 0x80000000
#if (CONFIG_ARC_MMU_VER < 4)
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001)
#else
#define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000)
#endif
/* TLB Commands */
#define TLBWrite 0x1
#define TLBRead 0x2
#define TLBGetIndex 0x3
#define TLBProbe 0x4
#if (CONFIG_ARC_MMU_VER >= 2)
#define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */
#define TLBIVUTLB 0x6 /* explicitly inv uTLBs */
#else
#define TLBWriteNI TLBWrite /* Not present in hardware, fallback */
#endif
#if (CONFIG_ARC_MMU_VER >= 4)
#define TLBInsertEntry 0x7
#define TLBDeleteEntry 0x8
#endif
#ifndef __ASSEMBLY__
typedef struct {
unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */
} mm_context_t;
#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
#else
#define tlb_paranoid_check(a, b)
#endif
void arc_mmu_init(void);
extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
void read_decode_mmu_bcr(void);
static inline int is_pae40_enabled(void)
{
return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
}
extern int pae40_exist_but_not_enab(void);
#endif /* !__ASSEMBLY__ */
#include <asm/mmu-arcv2.h>
#endif

View File

@@ -15,22 +15,23 @@
#ifndef _ASM_ARC_MMU_CONTEXT_H
#define _ASM_ARC_MMU_CONTEXT_H
#include <asm/arcregs.h>
#include <asm/tlb.h>
#include <linux/sched/mm.h>
#include <asm/tlb.h>
#include <asm-generic/mm_hooks.h>
/* ARC700 ASID Management
/* ARC ASID Management
*
* ARC MMU provides 8-bit ASID (0..255) to TAG TLB entries, allowing entries
* with same vaddr (different tasks) to co-exit. This provides for
* "Fast Context Switch" i.e. no TLB flush on ctxt-switch
* MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on
* context-switch.
*
* Linux assigns each task a unique ASID. A simple round-robin allocation
* of H/w ASID is done using software tracker @asid_cpu.
* When it reaches max 255, the allocation cycle starts afresh by flushing
* the entire TLB and wrapping ASID back to zero.
* ASID is managed per cpu, so task threads across CPUs can have different
* ASID. Global ASID management is needed if hardware supports TLB shootdown
* and/or shared TLB across cores, which ARC doesn't.
*
* Each task is assigned unique ASID, with a simple round-robin allocator
* tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started
* over from 0, and TLB is flushed
*
* A new allocation cycle, post rollover, could potentially reassign an ASID
* to a different task. Thus the rule is to refresh the ASID in a new cycle.
@@ -93,7 +94,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
asid_mm(mm, cpu) = asid_cpu(cpu);
set_hw:
write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
mmu_setup_asid(mm, hw_pid(mm, cpu));
local_irq_restore(flags);
}
@@ -146,10 +147,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/
cpumask_set_cpu(cpu, mm_cpumask(next));
#ifdef ARC_USE_SCRATCH_REG
/* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
#endif
mmu_setup_pgd(next, next->pgd);
get_new_mmu_context(next);
}

View File

@@ -34,12 +34,35 @@ void copy_user_highpage(struct page *to, struct page *from,
unsigned long u_vaddr, struct vm_area_struct *vma);
void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
#undef STRICT_MM_TYPECHECKS
typedef struct {
unsigned long pgd;
} pgd_t;
#define pgd_val(x) ((x).pgd)
#define __pgd(x) ((pgd_t) { (x) })
#if CONFIG_PGTABLE_LEVELS > 3
typedef struct {
unsigned long pud;
} pud_t;
#define pud_val(x) ((x).pud)
#define __pud(x) ((pud_t) { (x) })
#endif
#if CONFIG_PGTABLE_LEVELS > 2
typedef struct {
unsigned long pmd;
} pmd_t;
#define pmd_val(x) ((x).pmd)
#define __pmd(x) ((pmd_t) { (x) })
#endif
#ifdef STRICT_MM_TYPECHECKS
/*
* These are used to make use of C type-checking..
*/
typedef struct {
#ifdef CONFIG_ARC_HAS_PAE40
unsigned long long pte;
@@ -47,44 +70,19 @@ typedef struct {
unsigned long pte;
#endif
} pte_t;
typedef struct {
unsigned long pgd;
} pgd_t;
#define pte_val(x) ((x).pte)
#define __pte(x) ((pte_t) { (x) })
typedef struct {
unsigned long pgprot;
} pgprot_t;
#define pte_val(x) ((x).pte)
#define pgd_val(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot)
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) })
#define pte_pgprot(x) __pgprot(pte_val(x))
#define __pte(x) ((pte_t) { (x) })
#define __pgd(x) ((pgd_t) { (x) })
#define __pgprot(x) ((pgprot_t) { (x) })
#define pte_pgprot(x) __pgprot(pte_val(x))
#else /* !STRICT_MM_TYPECHECKS */
#ifdef CONFIG_ARC_HAS_PAE40
typedef unsigned long long pte_t;
#else
typedef unsigned long pte_t;
#endif
typedef unsigned long pgd_t;
typedef unsigned long pgprot_t;
#define pte_val(x) (x)
#define pgd_val(x) (x)
#define pgprot_val(x) (x)
#define __pte(x) (x)
#define __pgd(x) (x)
#define __pgprot(x) (x)
#define pte_pgprot(x) (x)
#endif
typedef pte_t * pgtable_t;
typedef struct page *pgtable_t;
/*
* Use virt_to_pfn with caution:
@@ -122,8 +120,8 @@ extern int pfn_valid(unsigned long pfn);
* virt here means link-address/program-address as embedded in object code.
* And for ARC, link-addr = physical address
*/
#define __pa(vaddr) ((unsigned long)(vaddr))
#define __va(paddr) ((void *)((unsigned long)(paddr)))
#define __pa(vaddr) ((unsigned long)(vaddr))
#define __va(paddr) ((void *)((unsigned long)(paddr)))
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))

View File

@@ -31,30 +31,32 @@
#include <linux/mm.h>
#include <linux/log2.h>
#include <asm-generic/pgalloc.h>
static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
{
pmd_set(pmd, pte);
/*
* The cast to long below is OK in 32-bit PAE40 regime with long long pte
* Despite "wider" pte, the pte table needs to be in non-PAE low memory
* as all higher levels can only hold long pointers.
*
* The cast itself is needed given simplistic definition of set_pmd()
*/
set_pmd(pmd, __pmd((unsigned long)pte));
}
static inline void
pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page)
{
pmd_set(pmd, (pte_t *) ptep);
}
static inline int __get_order_pgd(void)
{
return get_order(PTRS_PER_PGD * sizeof(pgd_t));
set_pmd(pmd, __pmd((unsigned long)page_address(pte_page)));
}
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
int num, num2;
pgd_t *ret = (pgd_t *) __get_free_pages(GFP_KERNEL, __get_order_pgd());
pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL);
if (ret) {
int num, num2;
num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
memzero(ret, num * sizeof(pgd_t));
@@ -68,64 +70,27 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
return ret;
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
#if CONFIG_PGTABLE_LEVELS > 3
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
{
free_pages((unsigned long)pgd, __get_order_pgd());
set_p4d(p4dp, __p4d((unsigned long)pudp));
}
#define __pud_free_tlb(tlb, pmd, addr) pud_free((tlb)->mm, pmd)
/*
* With software-only page-tables, addr-split for traversal is tweakable and
* that directly governs how big tables would be at each level.
* Further, the MMU page size is configurable.
* Thus we need to programatically assert the size constraint
* All of this is const math, allowing gcc to do constant folding/propagation.
*/
#endif
static inline int __get_order_pte(void)
#if CONFIG_PGTABLE_LEVELS > 2
static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
{
return get_order(PTRS_PER_PTE * sizeof(pte_t));
set_pud(pudp, __pud((unsigned long)pmdp));
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
__get_order_pte());
return pte;
}
static inline pgtable_t
pte_alloc_one(struct mm_struct *mm)
{
pgtable_t pte_pg;
struct page *page;
pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
if (!pte_pg)
return 0;
memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
page = virt_to_page(pte_pg);
if (!pgtable_pte_page_ctor(page)) {
__free_page(page);
return 0;
}
return pte_pg;
}
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_pages((unsigned long)pte, __get_order_pte()); /* takes phy addr */
}
static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
{
pgtable_pte_page_dtor(virt_to_page(ptep));
free_pages((unsigned long)ptep, __get_order_pte());
}
#endif
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)

View File

@@ -0,0 +1,149 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*/
/*
* page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS)
* There correspond to the corresponding bits in the TLB
*/
#ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H
#define _ASM_ARC_PGTABLE_BITS_ARCV2_H
#ifdef CONFIG_ARC_CACHE_PAGES
#define _PAGE_CACHEABLE (1 << 0) /* Cached (H) */
#else
#define _PAGE_CACHEABLE 0
#endif
#define _PAGE_EXECUTE (1 << 1) /* User Execute (H) */
#define _PAGE_WRITE (1 << 2) /* User Write (H) */
#define _PAGE_READ (1 << 3) /* User Read (H) */
#define _PAGE_ACCESSED (1 << 4) /* Accessed (s) */
#define _PAGE_DIRTY (1 << 5) /* Modified (s) */
#define _PAGE_SPECIAL (1 << 6)
#define _PAGE_GLOBAL (1 << 8) /* ASID agnostic (H) */
#define _PAGE_PRESENT (1 << 9) /* PTE/TLB Valid (H) */
#ifdef CONFIG_ARC_MMU_V4
#define _PAGE_HW_SZ (1 << 10) /* Normal/super (H) */
#else
#define _PAGE_HW_SZ 0
#endif
/* Defaults for every user page */
#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
_PAGE_SPECIAL)
/* More Abbrevaited helpers */
#define PAGE_U_NONE __pgprot(___DEF)
#define PAGE_U_R __pgprot(___DEF | _PAGE_READ)
#define PAGE_U_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
#define PAGE_U_X_R __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
#define PAGE_U_X_W_R __pgprot(___DEF \
| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
#define PAGE_KERNEL __pgprot(___DEF | _PAGE_GLOBAL \
| _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
#define PAGE_SHARED PAGE_U_W_R
#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
/*
* Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
*
* Certain cases have 1:1 mapping
* e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
* which directly corresponds to PAGE_U_X_R
*
* Other rules which cause the divergence from 1:1 mapping
*
* 1. Although ARC700 can do exclusive execute/write protection (meaning R
* can be tracked independet of X/W unlike some other CPUs), still to
* keep things consistent with other archs:
* -Write implies Read: W => R
* -Execute implies Read: X => R
*
* 2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
* This is to enable COW mechanism
*/
/* xwr */
#define __P000 PAGE_U_NONE
#define __P001 PAGE_U_R
#define __P010 PAGE_U_R /* Pvt-W => !W */
#define __P011 PAGE_U_R /* Pvt-W => !W */
#define __P100 PAGE_U_X_R /* X => R */
#define __P101 PAGE_U_X_R
#define __P110 PAGE_U_X_R /* Pvt-W => !W and X => R */
#define __P111 PAGE_U_X_R /* Pvt-W => !W */
#define __S000 PAGE_U_NONE
#define __S001 PAGE_U_R
#define __S010 PAGE_U_W_R /* W => R */
#define __S011 PAGE_U_W_R
#define __S100 PAGE_U_X_R /* X => R */
#define __S101 PAGE_U_X_R
#define __S110 PAGE_U_X_W_R /* X => R */
#define __S111 PAGE_U_X_W_R
#ifndef __ASSEMBLY__
#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE)
#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY)
#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED)
#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL)
#define PTE_BIT_FUNC(fn, op) \
static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT));
PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE));
PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE));
PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY));
PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY));
PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED));
PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED));
PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL));
PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ));
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
}
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
set_pte(ptep, pteval);
}
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep);
/* Encode swap {type,off} tuple into PTE
* We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
* PAGE_PRESENT is zero in a PTE holding swap "identifier"
*/
#define __swp_entry(type, off) ((swp_entry_t) \
{ ((type) & 0x1f) | ((off) << 13) })
/* Decode a PTE containing swap "identifier "into constituents */
#define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f)
#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#define kern_addr_valid(addr) (1)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#include <asm/hugepage.h>
#endif
#endif /* __ASSEMBLY__ */
#endif

View File

@@ -0,0 +1,189 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
*/
/*
* Helpers for implemenintg paging levels
*/
#ifndef _ASM_ARC_PGTABLE_LEVELS_H
#define _ASM_ARC_PGTABLE_LEVELS_H
#if CONFIG_PGTABLE_LEVELS == 2
/*
* 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS)
*
* [31] 32 bit virtual address [0]
* -------------------------------------------------------
* | | <---------- PGDIR_SHIFT ----------> |
* | | | <-- PAGE_SHIFT --> |
* -------------------------------------------------------
* | | |
* | | --> off in page frame
* | ---> index into Page Table
* ----> index into Page Directory
*
* Given software walk, the vaddr split is arbitrary set to 11:8:13
* However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to
* super page size.
*/
#if defined(CONFIG_ARC_HUGEPAGE_16M)
#define PGDIR_SHIFT 24
#elif defined(CONFIG_ARC_HUGEPAGE_2M)
#define PGDIR_SHIFT 21
#else
/*
* No Super page case
* Default value provides 11:8:13 (8K), 10:10:12 (4K)
* Limits imposed by pgtable_t only PAGE_SIZE long
* (so 4K page can only have 1K entries: or 10 bits)
*/
#ifdef CONFIG_ARC_PAGE_SIZE_4K
#define PGDIR_SHIFT 22
#else
#define PGDIR_SHIFT 21
#endif
#endif
#else /* CONFIG_PGTABLE_LEVELS != 2 */
/*
* A default 3 level paging testing setup in software walked MMU
* MMUv4 (8K page): <4> : <7> : <8> : <13>
* A default 4 level paging testing setup in software walked MMU
* MMUv4 (8K page): <4> : <3> : <4> : <8> : <13>
*/
#define PGDIR_SHIFT 28
#if CONFIG_PGTABLE_LEVELS > 3
#define PUD_SHIFT 25
#endif
#if CONFIG_PGTABLE_LEVELS > 2
#define PMD_SHIFT 21
#endif
#endif /* CONFIG_PGTABLE_LEVELS */
#define PGDIR_SIZE BIT(PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
#define PTRS_PER_PGD BIT(32 - PGDIR_SHIFT)
#if CONFIG_PGTABLE_LEVELS > 3
#define PUD_SIZE BIT(PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE - 1))
#define PTRS_PER_PUD BIT(PGDIR_SHIFT - PUD_SHIFT)
#endif
#if CONFIG_PGTABLE_LEVELS > 2
#define PMD_SIZE BIT(PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
#define PTRS_PER_PMD BIT(PUD_SHIFT - PMD_SHIFT)
#endif
#define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT)
#ifndef __ASSEMBLY__
#if CONFIG_PGTABLE_LEVELS > 3
#include <asm-generic/pgtable-nop4d.h>
#elif CONFIG_PGTABLE_LEVELS > 2
#include <asm-generic/pgtable-nopud.h>
#else
#include <asm-generic/pgtable-nopmd.h>
#endif
/*
* 1st level paging: pgd
*/
#define pgd_index(addr) ((addr) >> PGDIR_SHIFT)
#define pgd_offset(mm, addr) (((mm)->pgd) + pgd_index(addr))
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
#define pgd_ERROR(e) \
pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
#if CONFIG_PGTABLE_LEVELS > 3
/* In 4 level paging, p4d_* macros work on pgd */
#define p4d_none(x) (!p4d_val(x))
#define p4d_bad(x) ((p4d_val(x) & ~PAGE_MASK))
#define p4d_present(x) (p4d_val(x))
#define p4d_clear(xp) do { p4d_val(*(xp)) = 0; } while (0)
#define p4d_pgtable(p4d) ((pud_t *)(p4d_val(p4d) & PAGE_MASK))
#define p4d_page(p4d) virt_to_page(p4d_pgtable(p4d))
#define set_p4d(p4dp, p4d) (*(p4dp) = p4d)
/*
* 2nd level paging: pud
*/
#define pud_ERROR(e) \
pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
#endif
#if CONFIG_PGTABLE_LEVELS > 2
/*
* In 3 level paging, pud_* macros work on pgd
* In 4 level paging, pud_* macros work on pud
*/
#define pud_none(x) (!pud_val(x))
#define pud_bad(x) ((pud_val(x) & ~PAGE_MASK))
#define pud_present(x) (pud_val(x))
#define pud_clear(xp) do { pud_val(*(xp)) = 0; } while (0)
#define pud_pgtable(pud) ((pmd_t *)(pud_val(pud) & PAGE_MASK))
#define pud_page(pud) virt_to_page(pud_pgtable(pud))
#define set_pud(pudp, pud) (*(pudp) = pud)
/*
* 3rd level paging: pmd
*/
#define pmd_ERROR(e) \
pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
#define pmd_pfn(pmd) ((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT)
#define pfn_pmd(pfn,prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#endif
/*
* Due to the strange way generic pgtable level folding works, the pmd_* macros
* - are valid even for 2 levels (which supposedly only has pgd - pte)
* - behave differently for 2 vs. 3
* In 2 level paging (pgd -> pte), pmd_* macros work on pgd
* In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd
*/
#define pmd_none(x) (!pmd_val(x))
#define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK))
#define pmd_present(x) (pmd_val(x))
#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK)
#define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd))
#define set_pmd(pmdp, pmd) (*(pmdp) = pmd)
#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd))
/*
* 4th level paging: pte
*/
#define pte_ERROR(e) \
pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
#define pte_none(x) (!pte_val(x))
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
#define pte_clear(mm,addr,ptep) set_pte_at(mm, addr, ptep, __pte(0))
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
#define set_pte(ptep, pte) ((*(ptep)) = (pte))
#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
#define pfn_pte(pfn, prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot))
#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
#ifdef CONFIG_ISA_ARCV2
#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ)
#endif
#endif /* !__ASSEMBLY__ */
#endif

View File

@@ -1,220 +1,17 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* vineetg: May 2011
* -Folded PAGE_PRESENT (used by VM) and PAGE_VALID (used by MMU) into 1.
* They are semantically the same although in different contexts
* VALID marks a TLB entry exists and it will only happen if PRESENT
* - Utilise some unused free bits to confine PTE flags to 12 bits
* This is a must for 4k pg-sz
*
* vineetg: Mar 2011 - changes to accommodate MMU TLB Page Descriptor mods
* -TLB Locking never really existed, except for initial specs
* -SILENT_xxx not needed for our port
* -Per my request, MMU V3 changes the layout of some of the bits
* to avoid a few shifts in TLB Miss handlers.
*
* vineetg: April 2010
* -PGD entry no longer contains any flags. If empty it is 0, otherwise has
* Pg-Tbl ptr. Thus pmd_present(), pmd_valid(), pmd_set( ) become simpler
*
* vineetg: April 2010
* -Switched form 8:11:13 split for page table lookup to 11:8:13
* -this speeds up page table allocation itself as we now have to memset 1K
* instead of 8k per page table.
* -TODO: Right now page table alloc is 8K and rest 7K is unused
* need to optimise it
*
* Amit Bhor, Sameer Dhavale: Codito Technologies 2004
*/
#ifndef _ASM_ARC_PGTABLE_H
#define _ASM_ARC_PGTABLE_H
#include <linux/bits.h>
#include <asm-generic/pgtable-nopmd.h>
#include <asm/pgtable-levels.h>
#include <asm/pgtable-bits-arcv2.h>
#include <asm/page.h>
#include <asm/mmu.h> /* to propagate CONFIG_ARC_MMU_VER <n> */
/**************************************************************************
* Page Table Flags
*
* ARC700 MMU only deals with softare managed TLB entries.
* Page Tables are purely for Linux VM's consumption and the bits below are
* suited to that (uniqueness). Hence some are not implemented in the TLB and
* some have different value in TLB.
* e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in
* seperate PD0 and PD1, which combined forms a translation entry)
* while for PTE perspective, they are 8 and 9 respectively
* with MMU v3: Most bits (except SHARED) represent the exact hardware pos
* (saves some bit shift ops in TLB Miss hdlrs)
*/
#if (CONFIG_ARC_MMU_VER <= 2)
#define _PAGE_ACCESSED (1<<1) /* Page is accessed (S) */
#define _PAGE_CACHEABLE (1<<2) /* Page is cached (H) */
#define _PAGE_EXECUTE (1<<3) /* Page has user execute perm (H) */
#define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */
#define _PAGE_READ (1<<5) /* Page has user read perm (H) */
#define _PAGE_DIRTY (1<<6) /* Page modified (dirty) (S) */
#define _PAGE_SPECIAL (1<<7)
#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */
#define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */
#else /* MMU v3 onwards */
#define _PAGE_CACHEABLE (1<<0) /* Page is cached (H) */
#define _PAGE_EXECUTE (1<<1) /* Page has user execute perm (H) */
#define _PAGE_WRITE (1<<2) /* Page has user write perm (H) */
#define _PAGE_READ (1<<3) /* Page has user read perm (H) */
#define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */
#define _PAGE_DIRTY (1<<5) /* Page modified (dirty) (S) */
#define _PAGE_SPECIAL (1<<6)
#if (CONFIG_ARC_MMU_VER >= 4)
#define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */
#endif
#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */
#define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */
#if (CONFIG_ARC_MMU_VER >= 4)
#define _PAGE_HW_SZ (1<<10) /* Page Size indicator (H): 0 normal, 1 super */
#endif
#define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr
usable for shared TLB entries (H) */
#define _PAGE_UNUSED_BIT (1<<12)
#endif
/* vmalloc permissions */
#define _K_PAGE_PERMS (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
_PAGE_GLOBAL | _PAGE_PRESENT)
#ifndef CONFIG_ARC_CACHE_PAGES
#undef _PAGE_CACHEABLE
#define _PAGE_CACHEABLE 0
#endif
#ifndef _PAGE_HW_SZ
#define _PAGE_HW_SZ 0
#endif
/* Defaults for every user page */
#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
_PAGE_SPECIAL)
/* More Abbrevaited helpers */
#define PAGE_U_NONE __pgprot(___DEF)
#define PAGE_U_R __pgprot(___DEF | _PAGE_READ)
#define PAGE_U_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
#define PAGE_U_X_R __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
#define PAGE_U_X_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE | \
_PAGE_EXECUTE)
#define PAGE_SHARED PAGE_U_W_R
/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of
* user vaddr space - visible in all addr spaces, but kernel mode only
* Thus Global, all-kernel-access, no-user-access, cached
*/
#define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE)
/* ioremap */
#define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
/* Masks for actual TLB "PD"s */
#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
#define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
/**************************************************************************
* Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
*
* Certain cases have 1:1 mapping
* e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
* which directly corresponds to PAGE_U_X_R
*
* Other rules which cause the divergence from 1:1 mapping
*
* 1. Although ARC700 can do exclusive execute/write protection (meaning R
* can be tracked independet of X/W unlike some other CPUs), still to
* keep things consistent with other archs:
* -Write implies Read: W => R
* -Execute implies Read: X => R
*
* 2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
* This is to enable COW mechanism
*/
/* xwr */
#define __P000 PAGE_U_NONE
#define __P001 PAGE_U_R
#define __P010 PAGE_U_R /* Pvt-W => !W */
#define __P011 PAGE_U_R /* Pvt-W => !W */
#define __P100 PAGE_U_X_R /* X => R */
#define __P101 PAGE_U_X_R
#define __P110 PAGE_U_X_R /* Pvt-W => !W and X => R */
#define __P111 PAGE_U_X_R /* Pvt-W => !W */
#define __S000 PAGE_U_NONE
#define __S001 PAGE_U_R
#define __S010 PAGE_U_W_R /* W => R */
#define __S011 PAGE_U_W_R
#define __S100 PAGE_U_X_R /* X => R */
#define __S101 PAGE_U_X_R
#define __S110 PAGE_U_X_W_R /* X => R */
#define __S111 PAGE_U_X_W_R
/****************************************************************
* 2 tier (PGD:PTE) software page walker
*
* [31] 32 bit virtual address [0]
* -------------------------------------------------------
* | | <------------ PGDIR_SHIFT ----------> |
* | | |
* | BITS_FOR_PGD | BITS_FOR_PTE | <-- PAGE_SHIFT --> |
* -------------------------------------------------------
* | | |
* | | --> off in page frame
* | ---> index into Page Table
* ----> index into Page Directory
*
* In a single page size configuration, only PAGE_SHIFT is fixed
* So both PGD and PTE sizing can be tweaked
* e.g. 8K page (PAGE_SHIFT 13) can have
* - PGDIR_SHIFT 21 -> 11:8:13 address split
* - PGDIR_SHIFT 24 -> 8:11:13 address split
*
* If Super Page is configured, PGDIR_SHIFT becomes fixed too,
* so the sizing flexibility is gone.
*/
#if defined(CONFIG_ARC_HUGEPAGE_16M)
#define PGDIR_SHIFT 24
#elif defined(CONFIG_ARC_HUGEPAGE_2M)
#define PGDIR_SHIFT 21
#else
/*
* Only Normal page support so "hackable" (see comment above)
* Default value provides 11:8:13 (8K), 11:9:12 (4K)
*/
#define PGDIR_SHIFT 21
#endif
#define BITS_FOR_PTE (PGDIR_SHIFT - PAGE_SHIFT)
#define BITS_FOR_PGD (32 - PGDIR_SHIFT)
#define PGDIR_SIZE BIT(PGDIR_SHIFT) /* vaddr span, not PDG sz */
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PTE BIT(BITS_FOR_PTE)
#define PTRS_PER_PGD BIT(BITS_FOR_PGD)
#include <asm/mmu.h>
/*
* Number of entries a user land program use.
@@ -222,143 +19,17 @@
*/
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
/****************************************************************
* Bucket load of VM Helpers
*/
#ifndef __ASSEMBLY__
#define pte_ERROR(e) \
pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
#define pgd_ERROR(e) \
pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
/* the zero page used for uninitialized and anonymous pages */
extern char empty_zero_page[PAGE_SIZE];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
/* find the page descriptor of the Page Tbl ref by PMD entry */
#define pmd_page(pmd) virt_to_page(pmd_val(pmd) & PAGE_MASK)
/* find the logical addr (phy for ARC) of the Page Tbl ref by PMD entry */
#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK)
/* In a 2 level sys, setup the PGD entry with PTE value */
static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
{
pmd_val(*pmdp) = (unsigned long)ptep;
}
#define pte_none(x) (!pte_val(x))
#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
#define pte_clear(mm, addr, ptep) set_pte_at(mm, addr, ptep, __pte(0))
#define pmd_none(x) (!pmd_val(x))
#define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK))
#define pmd_present(x) (pmd_val(x))
#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ)
#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
#define pfn_pte(pfn, prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot))
/* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
/* Zoo of pte_xxx function */
#define pte_read(pte) (pte_val(pte) & _PAGE_READ)
#define pte_write(pte) (pte_val(pte) & _PAGE_WRITE)
#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY)
#define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED)
#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL)
#define PTE_BIT_FUNC(fn, op) \
static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT));
PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE));
PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE));
PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY));
PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY));
PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED));
PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED));
PTE_BIT_FUNC(exprotect, &= ~(_PAGE_EXECUTE));
PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE));
PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL));
PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ));
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
}
extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
/* Macro to mark a page protection as uncacheable */
#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
set_pte(ptep, pteval);
}
/*
* Macro to quickly access the PGD entry, utlising the fact that some
* arch may cache the pointer to Page Directory of "current" task
* in a MMU register
*
* Thus task->mm->pgd (3 pointer dereferences, cache misses etc simply
* becomes read a register
*
* ********CAUTION*******:
* Kernel code might be dealing with some mm_struct of NON "current"
* Thus use this macro only when you are certain that "current" is current
* e.g. when dealing with signal frame setup code etc
*/
#ifdef ARC_USE_SCRATCH_REG
#define pgd_offset_fast(mm, addr) \
({ \
pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0); \
pgd_base + pgd_index(addr); \
})
#else
#define pgd_offset_fast(mm, addr) pgd_offset(mm, addr)
#endif
extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep);
/* Encode swap {type,off} tuple into PTE
* We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
* PAGE_PRESENT is zero in a PTE holding swap "identifier"
*/
#define __swp_entry(type, off) ((swp_entry_t) { \
((type) & 0x1f) | ((off) << 13) })
/* Decode a PTE containing swap "identifier "into constituents */
#define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f)
#define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13)
/* NOPs, to keep generic kernel happy */
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
#define kern_addr_valid(addr) (1)
#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd))
/*
* remap a physical page `pfn' of size `size' with page protection `prot'
* into virtual address `from'
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#include <asm/hugepage.h>
#endif
/* to cope with aliasing VIPT cache */
#define HAVE_ARCH_UNMAPPED_AREA

View File

@@ -93,7 +93,7 @@ extern unsigned int get_wchan(struct task_struct *p);
#define VMALLOC_START (PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
/* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
#define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4)
#define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4)
#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)

View File

@@ -2,8 +2,8 @@
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*/
#ifndef __ASMARC_SETUP_H
#define __ASMARC_SETUP_H
#ifndef __ASM_ARC_SETUP_H
#define __ASM_ARC_SETUP_H
#include <linux/types.h>
@@ -34,4 +34,12 @@ long __init arc_get_mem_sz(void);
#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
#define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
extern void arc_mmu_init(void);
extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_mmu_bcr(void);
extern void arc_cache_init(void);
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
extern void read_decode_cache_bcr(void);
#endif /* __ASMARC_SETUP_H */

View File

@@ -105,7 +105,6 @@ static inline const char *arc_platform_smp_cpuinfo(void)
#include <asm/spinlock.h>
extern arch_spinlock_t smp_atomic_ops_lock;
extern arch_spinlock_t smp_bitops_lock;
#define atomic_ops_lock(flags) do { \
local_irq_save(flags); \
@@ -117,24 +116,11 @@ extern arch_spinlock_t smp_bitops_lock;
local_irq_restore(flags); \
} while (0)
#define bitops_lock(flags) do { \
local_irq_save(flags); \
arch_spin_lock(&smp_bitops_lock); \
} while (0)
#define bitops_unlock(flags) do { \
arch_spin_unlock(&smp_bitops_lock); \
local_irq_restore(flags); \
} while (0)
#else /* !CONFIG_SMP */
#define atomic_ops_lock(flags) local_irq_save(flags)
#define atomic_ops_unlock(flags) local_irq_restore(flags)
#define bitops_lock(flags) local_irq_save(flags)
#define bitops_unlock(flags) local_irq_restore(flags)
#endif /* !CONFIG_SMP */
#endif /* !CONFIG_ARC_HAS_LLSC */

View File

@@ -1,101 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*/
#ifndef __ASM_TLB_MMU_V1_H__
#define __ASM_TLB_MMU_V1_H__
#include <asm/mmu.h>
#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1)
.macro TLB_WRITE_HEURISTICS
#define JH_HACK1
#undef JH_HACK2
#undef JH_HACK3
#ifdef JH_HACK3
; Calculate set index for 2-way MMU
; -avoiding use of GetIndex from MMU
; and its unpleasant LFSR pseudo-random sequence
;
; r1 = TLBPD0 from TLB_RELOAD above
;
; -- jh_ex_way_set not cleared on startup
; didn't want to change setup.c
; hence extra instruction to clean
;
; -- should be in cache since in same line
; as r0/r1 saves above
;
ld r0,[jh_ex_way_sel] ; victim pointer
and r0,r0,1 ; clean
xor.f r0,r0,1 ; flip
st r0,[jh_ex_way_sel] ; store back
asr r0,r1,12 ; get set # <<1, note bit 12=R=0
or.nz r0,r0,1 ; set way bit
and r0,r0,0xff ; clean
sr r0,[ARC_REG_TLBINDEX]
#endif
#ifdef JH_HACK2
; JH hack #2
; Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU
; Slower in thrash case (where it matters) because more code is executed
; Inefficient due to two-register paradigm of this miss handler
;
/* r1 = data TLBPD0 at this point */
lr r0,[eret] /* instruction address */
xor r0,r0,r1 /* compare set # */
and.f r0,r0,0x000fe000 /* 2-way MMU mask */
bne 88f /* not in same set - no need to probe */
lr r0,[eret] /* instruction address */
and r0,r0,PAGE_MASK /* VPN of instruction address */
; lr r1,[ARC_REG_TLBPD0] /* Data VPN+ASID - already in r1 from TLB_RELOAD*/
and r1,r1,0xff /* Data ASID */
or r0,r0,r1 /* Instruction address + Data ASID */
lr r1,[ARC_REG_TLBPD0] /* save TLBPD0 containing data TLB*/
sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */
sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */
sr r1,[ARC_REG_TLBPD0] /* restore TLBPD0 */
xor r0,r0,1 /* flip bottom bit of data index */
b.d 89f
sr r0,[ARC_REG_TLBINDEX] /* and put it back */
88:
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
89:
#endif
#ifdef JH_HACK1
;
; Always checks whether instruction will be kicked out by dtlb miss
;
mov_s r3, r1 ; save PD0 prepared by TLB_RELOAD in r3
lr r0,[eret] /* instruction address */
and r0,r0,PAGE_MASK /* VPN of instruction address */
bmsk r1,r3,7 /* Data ASID, bits 7-0 */
or_s r0,r0,r1 /* Instruction address + Data ASID */
sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */
sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */
sr r3,[ARC_REG_TLBPD0] /* restore TLBPD0 */
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
lr r1,[ARC_REG_TLBINDEX] /* r1 = index where MMU wants to put data */
cmp r0,r1 /* if no match on indices, go around */
xor.eq r1,r1,1 /* flip bottom bit of data index */
sr r1,[ARC_REG_TLBINDEX] /* and put it back */
#endif
.endm
#endif
#endif

View File

@@ -10,6 +10,7 @@
#include <asm/errno.h>
#include <asm/arcregs.h>
#include <asm/irqflags.h>
#include <asm/mmu.h>
; A maximum number of supported interrupts in the core interrupt controller.
; This number is not equal to the maximum interrupt number (256) because

View File

@@ -101,11 +101,8 @@ ENTRY(EV_MachineCheck)
lr r0, [efa]
mov r1, sp
; hardware auto-disables MMU, re-enable it to allow kernel vaddr
; access for say stack unwinding of modules for crash dumps
lr r3, [ARC_REG_PID]
or r3, r3, MMU_ENABLE
sr r3, [ARC_REG_PID]
; MC excpetions disable MMU
ARC_MMU_REENABLE r3
lsr r3, r2, 8
bmsk r3, r3, 7

View File

@@ -142,7 +142,7 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
* Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
* Here local_irq_enable( ) shd not re-enable lower priority interrupts
* -If called from soft-ISR, it must re-enable all interrupts
* soft ISR are low prioity jobs which can be very slow, thus all IRQs
* soft ISR are low priority jobs which can be very slow, thus all IRQs
* must be enabled while they run.
* Now hardware context wise we may still be in L2 ISR (not done rtie)
* still we must re-enable both L1 and L2 IRQs

View File

@@ -29,10 +29,8 @@
#ifndef CONFIG_ARC_HAS_LLSC
arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
EXPORT_SYMBOL_GPL(smp_bitops_lock);
#endif
struct plat_smp_ops __weak plat_smp_ops;
@@ -283,7 +281,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
/*
* Call the platform specific IPI kick function, but avoid if possible:
* Only do so if there's no pending msg from other concurrent sender(s).
* Otherwise, recevier will see this msg as well when it takes the
* Otherwise, receiver will see this msg as well when it takes the
* IPI corresponding to that msg. This is true, even if it is already in
* IPI handler, because !@old means it has not yet dequeued the msg(s)
* so @new msg can be a free-loader

View File

@@ -149,7 +149,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
#else
/* On ARC, only Dward based unwinder works. fp based backtracing is
* not possible (-fno-omit-frame-pointer) because of the way function
* prelogue is setup (callee regs saved and then fp set and not other
* prologue is setup (callee regs saved and then fp set and not other
* way around
*/
pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");

View File

@@ -205,93 +205,24 @@ slc_chk:
#define OP_INV_IC 0x4
/*
* I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
* Cache Flush programming model
*
* ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
* The orig Cache Management Module "CDU" only required paddr to invalidate a
* certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
* Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
* the exact same line.
* ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias.
* Programming model requires both paddr and vaddr irrespecive of aliasing
* considerations:
* - vaddr in {I,D}C_IV?L
* - paddr in {I,D}C_PTAG
*
* However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
* paddr alone could not be used to correctly index the cache.
* In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias.
* Programming model is different for aliasing vs. non-aliasing I$
* - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L
* - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$)
*
* ------------------
* MMU v1/v2 (Fixed Page Size 8k)
* ------------------
* The solution was to provide CDU with these additonal vaddr bits. These
* would be bits [x:13], x would depend on cache-geometry, 13 comes from
* standard page size of 8k.
* H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
* of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
* orig 5 bits of paddr were anyways ignored by CDU line ops, as they
* represent the offset within cache-line. The adv of using this "clumsy"
* interface for additional info was no new reg was needed in CDU programming
* model.
*
* 17:13 represented the max num of bits passable, actual bits needed were
* fewer, based on the num-of-aliases possible.
* -for 2 alias possibility, only bit 13 needed (32K cache)
* -for 4 alias possibility, bits 14:13 needed (64K cache)
*
* ------------------
* MMU v3
* ------------------
* This ver of MMU supports variable page sizes (1k-16k): although Linux will
* only support 8k (default), 16k and 4k.
* However from hardware perspective, smaller page sizes aggravate aliasing
* meaning more vaddr bits needed to disambiguate the cache-line-op ;
* the existing scheme of piggybacking won't work for certain configurations.
* Two new registers IC_PTAG and DC_PTAG inttoduced.
* "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
* - If PAE40 is enabled, independent of aliasing considerations, the higher
* bits needs to be written into PTAG_HI
*/
static inline
void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz, const int op, const int full_page)
{
unsigned int aux_cmd;
int num_lines;
if (op == OP_INV_IC) {
aux_cmd = ARC_REG_IC_IVIL;
} else {
/* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
}
/* Ensure we properly floor/ceil the non-line aligned/sized requests
* and have @paddr - aligned to cache line and integral @num_lines.
* This however can be avoided for page sized since:
* -@paddr will be cache-line aligned already (being page aligned)
* -@sz will be integral multiple of line size (being page sized).
*/
if (!full_page) {
sz += paddr & ~CACHE_LINE_MASK;
paddr &= CACHE_LINE_MASK;
vaddr &= CACHE_LINE_MASK;
}
num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
/* MMUv2 and before: paddr contains stuffed vaddrs bits */
paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
while (num_lines-- > 0) {
write_aux_reg(aux_cmd, paddr);
paddr += L1_CACHE_BYTES;
}
}
/*
* For ARC700 MMUv3 I-cache and D-cache flushes
* - ARC700 programming model requires paddr and vaddr be passed in seperate
* AUX registers (*_IV*L and *_PTAG respectively) irrespective of whether the
* caches actually alias or not.
* - For HS38, only the aliasing I-cache configuration uses the PTAG reg
* (non aliasing I-cache version doesn't; while D-cache can't possibly alias)
*/
static inline
void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz, const int op, const int full_page)
{
@@ -350,17 +281,6 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
#ifndef USE_RGN_FLSH
/*
* In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
* Here's how cache ops are implemented
*
* - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
* - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
* - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
* respectively, similar to MMU v3 programming model, hence
* __cache_line_loop_v3() is used)
*
* If PAE40 is enabled, independent of aliasing considerations, the higher bits
* needs to be written into PTAG_HI
*/
static inline
void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
@@ -460,11 +380,9 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
#endif
#if (CONFIG_ARC_MMU_VER < 3)
#define __cache_line_loop __cache_line_loop_v2
#elif (CONFIG_ARC_MMU_VER == 3)
#ifdef CONFIG_ARC_MMU_V3
#define __cache_line_loop __cache_line_loop_v3
#elif (CONFIG_ARC_MMU_VER > 3)
#else
#define __cache_line_loop __cache_line_loop_v4
#endif
@@ -1123,7 +1041,7 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
clear_page(to);
clear_bit(PG_dc_clean, &page->flags);
}
EXPORT_SYMBOL(clear_user_page);
/**********************************************************************
* Explicit Cache flush request from user space via syscall

View File

@@ -33,28 +33,34 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
pgd = pgd_offset_fast(current->active_mm, address);
pgd = pgd_offset(current->active_mm, address);
pgd_k = pgd_offset_k(address);
if (!pgd_present(*pgd_k))
if (pgd_none (*pgd_k))
goto bad_area;
if (!pgd_present(*pgd))
set_pgd(pgd, *pgd_k);
p4d = p4d_offset(pgd, address);
p4d_k = p4d_offset(pgd_k, address);
if (!p4d_present(*p4d_k))
if (p4d_none(*p4d_k))
goto bad_area;
if (!p4d_present(*p4d))
set_p4d(p4d, *p4d_k);
pud = pud_offset(p4d, address);
pud_k = pud_offset(p4d_k, address);
if (!pud_present(*pud_k))
if (pud_none(*pud_k))
goto bad_area;
if (!pud_present(*pud))
set_pud(pud, *pud_k);
pmd = pmd_offset(pud, address);
pmd_k = pmd_offset(pud_k, address);
if (!pmd_present(*pmd_k))
if (pmd_none(*pmd_k))
goto bad_area;
set_pmd(pmd, *pmd_k);
if (!pmd_present(*pmd))
set_pmd(pmd, *pmd_k);
/* XXX: create the TLB entry here */
return 0;

View File

@@ -189,6 +189,11 @@ void __init mem_init(void)
{
memblock_free_all();
highmem_init();
BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE);
BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE);
BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
}
#ifdef CONFIG_HIGHMEM

View File

@@ -39,7 +39,8 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
if (arc_uncached_addr_space(paddr))
return (void __iomem *)(u32)paddr;
return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE);
return ioremap_prot(paddr, size,
pgprot_val(pgprot_noncached(PAGE_KERNEL)));
}
EXPORT_SYMBOL(ioremap);

View File

@@ -1,51 +1,9 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* TLB Management (flush/create/diagnostics) for ARC700
* TLB Management (flush/create/diagnostics) for MMUv3 and MMUv4
*
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* vineetg: Aug 2011
* -Reintroduce duplicate PD fixup - some customer chips still have the issue
*
* vineetg: May 2011
* -No need to flush_cache_page( ) for each call to update_mmu_cache()
* some of the LMBench tests improved amazingly
* = page-fault thrice as fast (75 usec to 28 usec)
* = mmap twice as fast (9.6 msec to 4.6 msec),
* = fork (5.3 msec to 3.7 msec)
*
* vineetg: April 2011 :
* -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
* helps avoid a shift when preparing PD0 from PTE
*
* vineetg: April 2011 : Preparing for MMU V3
* -MMU v2/v3 BCRs decoded differently
* -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512
* -tlb_entry_erase( ) can be void
* -local_flush_tlb_range( ):
* = need not "ceil" @end
* = walks MMU only if range spans < 32 entries, as opposed to 256
*
* Vineetg: Sept 10th 2008
* -Changes related to MMU v2 (Rel 4.8)
*
* Vineetg: Aug 29th 2008
* -In TLB Flush operations (Metal Fix MMU) there is a explicit command to
* flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,
* it fails. Thus need to load it with ANY valid value before invoking
* TLBIVUTLB cmd
*
* Vineetg: Aug 21th 2008:
* -Reduced the duration of IRQ lockouts in TLB Flush routines
* -Multiple copies of TLB erase code separated into a "single" function
* -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID
* in interrupt-safe region.
*
* Vineetg: April 23rd Bug #93131
* Problem: tlb_flush_kernel_range() doesn't do anything if the range to
* flush is more than the size of TLB itself.
*
* Rahul Trivedi : Codito Technologies 2004
*/
#include <linux/module.h>
@@ -57,47 +15,6 @@
#include <asm/mmu_context.h>
#include <asm/mmu.h>
/* Need for ARC MMU v2
*
* ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc.
* For a memcpy operation with 3 players (src/dst/code) such that all 3 pages
* map into same set, there would be contention for the 2 ways causing severe
* Thrashing.
*
* Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has
* much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways.
* Given this, the thrashing problem should never happen because once the 3
* J-TLB entries are created (even though 3rd will knock out one of the prev
* two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy
*
* Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs.
* This is a simple design for keeping them in sync. So what do we do?
* The solution which James came up was pretty neat. It utilised the assoc
* of uTLBs by not invalidating always but only when absolutely necessary.
*
* - Existing TLB commands work as before
* - New command (TLBWriteNI) for TLB write without clearing uTLBs
* - New command (TLBIVUTLB) to invalidate uTLBs.
*
* The uTLBs need only be invalidated when pages are being removed from the
* OS page table. If a 'victim' TLB entry is being overwritten in the main TLB
* as a result of a miss, the removed entry is still allowed to exist in the
* uTLBs as it is still valid and present in the OS page table. This allows the
* full associativity of the uTLBs to hide the limited associativity of the main
* TLB.
*
* During a miss handler, the new "TLBWriteNI" command is used to load
* entries without clearing the uTLBs.
*
* When the OS page table is updated, TLB entries that may be associated with a
* removed page are removed (flushed) from the TLB using TLBWrite. In this
* circumstance, the uTLBs must also be cleared. This is done by using the
* existing TLBWrite command. An explicit IVUTLB is also required for those
* corner cases when TLBWrite was not executed at all because the corresp
* J-TLB entry got evicted/replaced.
*/
/* A copy of the ASID from the PID reg is kept in asid_cache */
DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
@@ -120,32 +37,10 @@ static inline void __tlb_entry_erase(void)
static void utlb_invalidate(void)
{
#if (CONFIG_ARC_MMU_VER >= 2)
#if (CONFIG_ARC_MMU_VER == 2)
/* MMU v2 introduced the uTLB Flush command.
* There was however an obscure hardware bug, where uTLB flush would
* fail when a prior probe for J-TLB (both totally unrelated) would
* return lkup err - because the entry didn't exist in MMU.
* The Workaround was to set Index reg with some valid value, prior to
* flush. This was fixed in MMU v3
*/
unsigned int idx;
/* make sure INDEX Reg is valid */
idx = read_aux_reg(ARC_REG_TLBINDEX);
/* If not write some dummy val */
if (unlikely(idx & TLB_LKUP_ERR))
write_aux_reg(ARC_REG_TLBINDEX, 0xa);
#endif
write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
#endif
}
#if (CONFIG_ARC_MMU_VER < 4)
#ifdef CONFIG_ARC_MMU_V3
static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
{
@@ -176,7 +71,7 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
}
}
static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
{
unsigned int idx;
@@ -206,7 +101,7 @@ static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
}
#else /* CONFIG_ARC_MMU_VER >= 4) */
#else /* MMUv4 */
static void tlb_entry_erase(unsigned int vaddr_n_asid)
{
@@ -214,13 +109,16 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
}
static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
{
write_aux_reg(ARC_REG_TLBPD0, pd0);
write_aux_reg(ARC_REG_TLBPD1, pd1);
if (is_pae40_enabled())
if (!is_pae40_enabled()) {
write_aux_reg(ARC_REG_TLBPD1, pd1);
} else {
write_aux_reg(ARC_REG_TLBPD1, pd1 & 0xFFFFFFFF);
write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
}
write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
}
@@ -496,7 +394,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
unsigned long flags;
unsigned int asid_or_sasid, rwx;
unsigned long pd0;
pte_t pd1;
phys_addr_t pd1;
/*
* create_tlb() assumes that current->mm == vma->mm, since
@@ -505,7 +403,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
*
* Removing the assumption involves
* -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg.
* -Fix the TLB paranoid debug code to not trigger false negatives.
* -More importantly it makes this handler inconsistent with fast-path
* TLB Refill handler which always deals with "current"
*
@@ -528,8 +425,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
local_irq_save(flags);
tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr);
vaddr &= PAGE_MASK;
/* update this PTE credentials */
@@ -639,43 +534,6 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
update_mmu_cache(vma, addr, &pte);
}
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable)
{
struct list_head *lh = (struct list_head *) pgtable;
assert_spin_locked(&mm->page_table_lock);
/* FIFO */
if (!pmd_huge_pte(mm, pmdp))
INIT_LIST_HEAD(lh);
else
list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
pmd_huge_pte(mm, pmdp) = pgtable;
}
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
struct list_head *lh;
pgtable_t pgtable;
assert_spin_locked(&mm->page_table_lock);
pgtable = pmd_huge_pte(mm, pmdp);
lh = (struct list_head *) pgtable;
if (list_empty(lh))
pmd_huge_pte(mm, pmdp) = NULL;
else {
pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
list_del(lh);
}
pte_val(pgtable[0]) = 0;
pte_val(pgtable[1]) = 0;
return pgtable;
}
void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
@@ -706,14 +564,6 @@ void read_decode_mmu_bcr(void)
{
struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
unsigned int tmp;
struct bcr_mmu_1_2 {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8;
#else
unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8;
#endif
} *mmu2;
struct bcr_mmu_3 {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
@@ -738,23 +588,14 @@ void read_decode_mmu_bcr(void)
tmp = read_aux_reg(ARC_REG_MMU_BCR);
mmu->ver = (tmp >> 24);
if (is_isa_arcompact()) {
if (mmu->ver <= 2) {
mmu2 = (struct bcr_mmu_1_2 *)&tmp;
mmu->pg_sz_k = TO_KB(0x2000);
mmu->sets = 1 << mmu2->sets;
mmu->ways = 1 << mmu2->ways;
mmu->u_dtlb = mmu2->u_dtlb;
mmu->u_itlb = mmu2->u_itlb;
} else {
mmu3 = (struct bcr_mmu_3 *)&tmp;
mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
mmu->sets = 1 << mmu3->sets;
mmu->ways = 1 << mmu3->ways;
mmu->u_dtlb = mmu3->u_dtlb;
mmu->u_itlb = mmu3->u_itlb;
mmu->sasid = mmu3->sasid;
}
if (is_isa_arcompact() && mmu->ver == 3) {
mmu3 = (struct bcr_mmu_3 *)&tmp;
mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
mmu->sets = 1 << mmu3->sets;
mmu->ways = 1 << mmu3->ways;
mmu->u_dtlb = mmu3->u_dtlb;
mmu->u_itlb = mmu3->u_itlb;
mmu->sasid = mmu3->sasid;
} else {
mmu4 = (struct bcr_mmu_4 *)&tmp;
mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
@@ -780,8 +621,8 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
n += scnprintf(buf + n, len - n,
"MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
p_mmu->ver, p_mmu->pg_sz_k, super_pg,
"MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
p_mmu->ver, p_mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS,
p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
p_mmu->u_dtlb, p_mmu->u_itlb,
IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
@@ -815,22 +656,17 @@ void arc_mmu_init(void)
/*
* Ensure that MMU features assumed by kernel exist in hardware.
* For older ARC700 cpus, it has to be exact match, since the MMU
* revisions were not backwards compatible (MMUv3 TLB layout changed
* so even if kernel for v2 didn't use any new cmds of v3, it would
* still not work.
* For HS cpus, MMUv4 was baseline and v5 is backwards compatible
* (will run older software).
* - For older ARC700 cpus, only v3 supported
* - For HS cpus, v4 was baseline and v5 is backwards compatible
* (will run older software).
*/
if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER)
if (is_isa_arcompact() && mmu->ver == 3)
compat = 1;
else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER)
else if (is_isa_arcv2() && mmu->ver >= 4)
compat = 1;
if (!compat) {
panic("MMU ver %d doesn't match kernel built for %d...\n",
mmu->ver, CONFIG_ARC_MMU_VER);
}
if (!compat)
panic("MMU ver %d doesn't match kernel built for\n", mmu->ver);
if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
@@ -843,14 +679,11 @@ void arc_mmu_init(void)
if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
panic("Hardware doesn't support PAE40\n");
/* Enable the MMU */
write_aux_reg(ARC_REG_PID, MMU_ENABLE);
/* Enable the MMU with ASID 0 */
mmu_setup_asid(NULL, 0);
/* In smp we use this reg for interrupt 1 scratch */
#ifdef ARC_USE_SCRATCH_REG
/* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
#endif
/* cache the pgd pointer in MMU SCRATCH reg (ARCv2 only) */
mmu_setup_pgd(NULL, swapper_pg_dir);
if (pae40_exist_but_not_enab())
write_aux_reg(ARC_REG_TLBPD1HI, 0);
@@ -945,40 +778,3 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
local_irq_restore(flags);
}
/***********************************************************************
* Diagnostic Routines
* -Called from Low Level TLB Handlers if things don;t look good
**********************************************************************/
#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
/*
* Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS
* don't match
*/
void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path)
{
pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid);
__asm__ __volatile__("flag 1");
}
void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr)
{
unsigned int mmu_asid;
mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff;
/*
* At the time of a TLB miss/installation
* - HW version needs to match SW version
* - SW needs to have a valid ASID
*/
if (addr < 0x70000000 &&
((mm_asid == MM_CTXT_NO_ASID) ||
(mmu_asid != (mm_asid & MM_CTXT_ASID_MASK))))
print_asid_mismatch(mm_asid, mmu_asid, 0);
}
#endif

View File

@@ -39,7 +39,6 @@
#include <asm/arcregs.h>
#include <asm/cache.h>
#include <asm/processor.h>
#include <asm/tlb-mmu1.h>
#ifdef CONFIG_ISA_ARCOMPACT
;-----------------------------------------------------------------
@@ -94,11 +93,6 @@ ex_saved_reg1:
st_s r1, [r0, 4]
st_s r2, [r0, 8]
st_s r3, [r0, 12]
; VERIFY if the ASID in MMU-PID Reg is same as
; one in Linux data structures
tlb_paranoid_check_asm
.endm
.macro TLBMISS_RESTORE_REGS
@@ -147,55 +141,18 @@ ex_saved_reg1:
#endif
;============================================================================
; Troubleshooting Stuff
;============================================================================
; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid
; When Creating TLB Entries, instead of doing 3 dependent loads from memory,
; we use the MMU PID Reg to get current ASID.
; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
; So we try to detect this in TLB Mis shandler
.macro tlb_paranoid_check_asm
#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
GET_CURR_TASK_ON_CPU r3
ld r0, [r3, TASK_ACT_MM]
ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
breq r0, 0, 55f ; Error if no ASID allocated
lr r1, [ARC_REG_PID]
and r1, r1, 0xFF
and r2, r0, 0xFF ; MMU PID bits only for comparison
breq r1, r2, 5f
55:
; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
lr r2, [erstatus]
bbit0 r2, STATUS_U_BIT, 5f
; We sure are in troubled waters, Flag the error, but to do so
; need to switch to kernel mode stack to call error routine
GET_TSK_STACK_BASE r3, sp
; Call printk to shoutout aloud
mov r2, 1
j print_asid_mismatch
5: ; ASIDs match so proceed normally
nop
#endif
.endm
;============================================================================
;TLB Miss handling Code
;============================================================================
#ifndef PMD_SHIFT
#define PMD_SHIFT PUD_SHIFT
#endif
#ifndef PUD_SHIFT
#define PUD_SHIFT PGDIR_SHIFT
#endif
;-----------------------------------------------------------------------------
; This macro does the page-table lookup for the faulting address.
; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address
@@ -203,7 +160,7 @@ ex_saved_reg1:
lr r2, [efa]
#ifdef ARC_USE_SCRATCH_REG
#ifdef CONFIG_ISA_ARCV2
lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
#else
GET_CURR_TASK_ON_CPU r1
@@ -216,6 +173,24 @@ ex_saved_reg1:
tst r3, r3
bz do_slow_path_pf ; if no Page Table, do page fault
#if CONFIG_PGTABLE_LEVELS > 3
lsr r0, r2, PUD_SHIFT ; Bits for indexing into PUD
and r0, r0, (PTRS_PER_PUD - 1)
ld.as r1, [r3, r0] ; PMD entry
tst r1, r1
bz do_slow_path_pf
mov r3, r1
#endif
#if CONFIG_PGTABLE_LEVELS > 2
lsr r0, r2, PMD_SHIFT ; Bits for indexing into PMD
and r0, r0, (PTRS_PER_PMD - 1)
ld.as r1, [r3, r0] ; PMD entry
tst r1, r1
bz do_slow_path_pf
mov r3, r1
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp)
add2.nz r1, r1, r0
@@ -279,7 +254,7 @@ ex_saved_reg1:
; Commit the TLB entry into MMU
.macro COMMIT_ENTRY_TO_MMU
#if (CONFIG_ARC_MMU_VER < 4)
#ifdef CONFIG_ARC_MMU_V3
/* Get free TLB slot: Set = computed from vaddr, way = random */
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
@@ -375,13 +350,6 @@ ENTRY(EV_TLBMissD)
CONV_PTE_TO_TLB
#if (CONFIG_ARC_MMU_VER == 1)
; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
; But only for old MMU or one with Metal Fix
TLB_WRITE_HEURISTICS
#endif
COMMIT_ENTRY_TO_MMU
TLBMISS_RESTORE_REGS
EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation

View File

@@ -126,6 +126,7 @@ config ARM
select RTC_LIB
select SET_FS
select SYS_SUPPORTS_APM_EMULATION
select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
# Above selects are sorted alphabetically; please add new ones
# according to that. Thanks.
help
@@ -189,10 +190,6 @@ config LOCKDEP_SUPPORT
bool
default y
config TRACE_IRQFLAGS_SUPPORT
bool
default !CPU_V7M
config ARCH_HAS_ILOG2_U32
bool

View File

@@ -220,6 +220,7 @@ config ARM64
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
select TRACE_IRQFLAGS_SUPPORT
help
ARM 64-bit (AArch64) Linux support.
@@ -287,9 +288,6 @@ config ILLEGAL_POINTER_VALUE
config LOCKDEP_SUPPORT
def_bool y
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config GENERIC_BUG
def_bool y
depends on BUG

View File

@@ -82,6 +82,7 @@ config CSKY
select PCI_SYSCALL if PCI
select PCI_MSI if PCI
select SET_FS
select TRACE_IRQFLAGS_SUPPORT
config LOCKDEP_SUPPORT
def_bool y
@@ -139,9 +140,6 @@ config STACKTRACE_SUPPORT
config TIME_LOW_RES
def_bool y
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config CPU_TLB_SIZE
int
default "128" if (CPU_CK610 || CPU_CK807 || CPU_CK810)

View File

@@ -32,6 +32,7 @@ config HEXAGON
select GENERIC_CPU_DEVICES
select SET_FS
select ARCH_WANT_LD_ORPHAN_WARN
select TRACE_IRQFLAGS_SUPPORT
help
Qualcomm Hexagon is a processor architecture designed for high
performance and low power across a wide variety of applications.
@@ -53,9 +54,6 @@ config EARLY_PRINTK
config MMU
def_bool y
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config GENERIC_CSUM
def_bool y

View File

@@ -44,6 +44,7 @@ config MICROBLAZE
select SPARSE_IRQ
select SET_FS
select ZONE_DMA
select TRACE_IRQFLAGS_SUPPORT
# Endianness selection
choice

View File

@@ -1,6 +1 @@
# SPDX-License-Identifier: GPL-2.0-only
# For a description of the syntax of this configuration file,
# see Documentation/kbuild/kconfig-language.rst.
config TRACE_IRQFLAGS_SUPPORT
def_bool y

View File

@@ -98,6 +98,7 @@ config MIPS
select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
select RTC_LIB
select SYSCTL_EXCEPTION_TRACE
select TRACE_IRQFLAGS_SUPPORT
select VIRT_TO_BUS
select ARCH_HAS_ELFCORE_COMPAT

View File

@@ -1,9 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
config TRACE_IRQFLAGS_SUPPORT
bool
default y
config EARLY_PRINTK
bool "Early printk" if EXPERT
depends on SYS_HAS_EARLY_PRINTK

View File

@@ -46,6 +46,7 @@ config NDS32
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select SET_FS
select TRACE_IRQFLAGS_SUPPORT
help
Andes(nds32) Linux support.
@@ -62,9 +63,6 @@ config GENERIC_LOCKBREAK
def_bool y
depends on PREEMPTION
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config STACKTRACE_SUPPORT
def_bool y

View File

@@ -41,9 +41,6 @@ config NO_IOPORT_MAP
config FPU
def_bool n
config TRACE_IRQFLAGS_SUPPORT
def_bool n
menu "Kernel features"
source "kernel/Kconfig.hz"

View File

@@ -37,6 +37,7 @@ config OPENRISC
select GENERIC_IRQ_MULTI_HANDLER
select MMU_GATHER_NO_RANGE if MMU
select SET_FS
select TRACE_IRQFLAGS_SUPPORT
config CPU_BIG_ENDIAN
def_bool y
@@ -50,9 +51,6 @@ config GENERIC_HWEIGHT
config NO_IOPORT_MAP
def_bool y
config TRACE_IRQFLAGS_SUPPORT
def_bool y
# For now, use generic checksum functions
#These can be reimplemented in assembly later if so inclined
config GENERIC_CSUM

View File

@@ -66,6 +66,7 @@ config PARISC
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS
select SET_FS
select TRACE_IRQFLAGS_SUPPORT
help
The PA-RISC microprocessor is designed by Hewlett-Packard and used

View File

@@ -1,4 +1 @@
# SPDX-License-Identifier: GPL-2.0
config TRACE_IRQFLAGS_SUPPORT
def_bool y

View File

@@ -94,10 +94,6 @@ config STACKTRACE_SUPPORT
bool
default y
config TRACE_IRQFLAGS_SUPPORT
bool
default y
config LOCKDEP_SUPPORT
bool
default y
@@ -270,6 +266,7 @@ config PPC
select STRICT_KERNEL_RWX if STRICT_MODULE_RWX
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
select VIRT_TO_BUS if !PPC64
#
# Please keep this list sorted alphabetically.

View File

@@ -113,6 +113,7 @@ config RISCV
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
select UACCESS_MEMCPY if !MMU
select ZONE_DMA32 if 64BIT
@@ -182,9 +183,6 @@ config ARCH_SUPPORTS_UPROBES
config STACKTRACE_SUPPORT
def_bool y
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config GENERIC_BUG
def_bool y
depends on BUG

View File

@@ -209,6 +209,7 @@ config S390
select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
select TTY
select VIRT_CPU_ACCOUNTING
select ZONE_DMA

View File

@@ -1,8 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config EARLY_PRINTK
def_bool y

View File

@@ -822,7 +822,7 @@ void do_secure_storage_access(struct pt_regs *regs)
break;
case KERNEL_FAULT:
page = phys_to_page(addr);
if (unlikely(!try_get_compound_head(page, 1)))
if (unlikely(!try_get_page(page)))
break;
rc = arch_make_page_accessible(page);
put_page(page);

View File

@@ -69,6 +69,7 @@ config SUPERH
select RTC_LIB
select SET_FS
select SPARSE_IRQ
select TRACE_IRQFLAGS_SUPPORT
help
The SuperH is a RISC processor targeted for use in embedded systems
and consumer electronics; it was also used in the Sega Dreamcast

View File

@@ -1,8 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config SH_STANDARD_BIOS
bool "Use LinuxSH standard BIOS"
help

View File

@@ -47,6 +47,7 @@ config SPARC
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
select SET_FS
select TRACE_IRQFLAGS_SUPPORT
config SPARC32
def_bool !64BIT

View File

@@ -1,9 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
config TRACE_IRQFLAGS_SUPPORT
bool
default y
config DEBUG_DCFLUSH
bool "D-cache flush debugging"
depends on SPARC64 && DEBUG_KERNEL

View File

@@ -22,6 +22,7 @@ config UML
select GENERIC_CPU_DEVICES
select HAVE_GCC_PLUGINS
select SET_FS
select TRACE_IRQFLAGS_SUPPORT
select TTY # Needed for line.c
config MMU
@@ -52,10 +53,6 @@ config ISA
config SBUS
bool
config TRACE_IRQFLAGS_SUPPORT
bool
default y
config LOCKDEP_SUPPORT
bool
default y

View File

@@ -259,6 +259,7 @@ config X86
select STACK_VALIDATION if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select HAVE_ARCH_KCSAN if X86_64

View File

@@ -1,8 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config TRACE_IRQFLAGS_NMI_SUPPORT
def_bool y

View File

@@ -42,6 +42,7 @@ config XTENSA
select MODULES_USE_ELF_RELA
select PERF_USE_VMALLOC
select SET_FS
select TRACE_IRQFLAGS_SUPPORT
select VIRT_TO_BUS
help
Xtensa processors are 32-bit RISC machines designed by Tensilica
@@ -73,9 +74,6 @@ config LOCKDEP_SUPPORT
config STACKTRACE_SUPPORT
def_bool y
config TRACE_IRQFLAGS_SUPPORT
def_bool y
config MMU
def_bool n

View File

@@ -2662,6 +2662,15 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
* are likely to increase the throughput.
*/
bfqq->new_bfqq = new_bfqq;
/*
* The above assignment schedules the following redirections:
* each time some I/O for bfqq arrives, the process that
* generated that I/O is disassociated from bfqq and
* associated with new_bfqq. Here we increases new_bfqq->ref
* in advance, adding the number of processes that are
* expected to be associated with new_bfqq as they happen to
* issue I/O.
*/
new_bfqq->ref += process_refs;
return new_bfqq;
}
@@ -2724,6 +2733,10 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
{
struct bfq_queue *in_service_bfqq, *new_bfqq;
/* if a merge has already been setup, then proceed with that first */
if (bfqq->new_bfqq)
return bfqq->new_bfqq;
/*
* Check delayed stable merge for rotational or non-queueing
* devs. For this branch to be executed, bfqq must not be
@@ -2825,9 +2838,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (bfq_too_late_for_merging(bfqq))
return NULL;
if (bfqq->new_bfqq)
return bfqq->new_bfqq;
if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
return NULL;

View File

@@ -1691,7 +1691,7 @@ EXPORT_SYMBOL(bioset_init_from_src);
/**
* bio_alloc_kiocb - Allocate a bio from bio_set based on kiocb
* @kiocb: kiocb describing the IO
* @nr_iovecs: number of iovecs to pre-allocate
* @nr_vecs: number of iovecs to pre-allocate
* @bs: bio_set to allocate from
*
* Description:

View File

@@ -270,12 +270,6 @@ deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
deadline_remove_request(rq->q, per_prio, rq);
}
/* Number of requests queued for a given priority level. */
static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
{
return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
}
/*
* deadline_check_fifo returns 0 if there are no expired requests on the fifo,
* 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
@@ -953,6 +947,12 @@ static int dd_async_depth_show(void *data, struct seq_file *m)
return 0;
}
/* Number of requests queued for a given priority level. */
static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
{
return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
}
static int dd_queued_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;

View File

@@ -2186,6 +2186,25 @@ not_supported:
dev->flags &= ~ATA_DFLAG_NCQ_PRIO;
}
static bool ata_dev_check_adapter(struct ata_device *dev,
unsigned short vendor_id)
{
struct pci_dev *pcidev = NULL;
struct device *parent_dev = NULL;
for (parent_dev = dev->tdev.parent; parent_dev != NULL;
parent_dev = parent_dev->parent) {
if (dev_is_pci(parent_dev)) {
pcidev = to_pci_dev(parent_dev);
if (pcidev->vendor == vendor_id)
return true;
break;
}
}
return false;
}
static int ata_dev_config_ncq(struct ata_device *dev,
char *desc, size_t desc_sz)
{
@@ -2204,6 +2223,13 @@ static int ata_dev_config_ncq(struct ata_device *dev,
snprintf(desc, desc_sz, "NCQ (not used)");
return 0;
}
if (dev->horkage & ATA_HORKAGE_NO_NCQ_ON_ATI &&
ata_dev_check_adapter(dev, PCI_VENDOR_ID_ATI)) {
snprintf(desc, desc_sz, "NCQ (not used)");
return 0;
}
if (ap->flags & ATA_FLAG_NCQ) {
hdepth = min(ap->scsi_host->can_queue, ATA_MAX_QUEUE);
dev->flags |= ATA_DFLAG_NCQ;
@@ -3970,6 +3996,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
ATA_HORKAGE_ZERO_AFTER_TRIM, },
{ "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM, },
{ "Samsung SSD 860*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM |
ATA_HORKAGE_NO_NCQ_ON_ATI, },
{ "Samsung SSD 870*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM |
ATA_HORKAGE_NO_NCQ_ON_ATI, },
{ "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM, },
@@ -6124,6 +6156,8 @@ static int __init ata_parse_force_one(char **cur,
{ "ncq", .horkage_off = ATA_HORKAGE_NONCQ },
{ "noncqtrim", .horkage_on = ATA_HORKAGE_NO_NCQ_TRIM },
{ "ncqtrim", .horkage_off = ATA_HORKAGE_NO_NCQ_TRIM },
{ "noncqati", .horkage_on = ATA_HORKAGE_NO_NCQ_ON_ATI },
{ "ncqati", .horkage_off = ATA_HORKAGE_NO_NCQ_ON_ATI },
{ "dump_id", .horkage_on = ATA_HORKAGE_DUMP_ID },
{ "pio0", .xfer_mask = 1 << (ATA_SHIFT_PIO + 0) },
{ "pio1", .xfer_mask = 1 << (ATA_SHIFT_PIO + 1) },

View File

@@ -2111,18 +2111,6 @@ int loop_register_transfer(struct loop_func_table *funcs)
return 0;
}
static int unregister_transfer_cb(int id, void *ptr, void *data)
{
struct loop_device *lo = ptr;
struct loop_func_table *xfer = data;
mutex_lock(&lo->lo_mutex);
if (lo->lo_encryption == xfer)
loop_release_xfer(lo);
mutex_unlock(&lo->lo_mutex);
return 0;
}
int loop_unregister_transfer(int number)
{
unsigned int n = number;
@@ -2130,9 +2118,20 @@ int loop_unregister_transfer(int number)
if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
return -EINVAL;
/*
* This function is called from only cleanup_cryptoloop().
* Given that each loop device that has a transfer enabled holds a
* reference to the module implementing it we should never get here
* with a transfer that is set (unless forced module unloading is
* requested). Thus, check module's refcount and warn if this is
* not a clean unloading.
*/
#ifdef CONFIG_MODULE_UNLOAD
if (xfer->owner && module_refcount(xfer->owner) != -1)
pr_err("Danger! Unregistering an in use transfer function.\n");
#endif
xfer_funcs[n] = NULL;
idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
return 0;
}
@@ -2323,8 +2322,9 @@ static int loop_add(int i)
} else {
err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL);
}
mutex_unlock(&loop_ctl_mutex);
if (err < 0)
goto out_unlock;
goto out_free_dev;
i = err;
err = -ENOMEM;
@@ -2393,15 +2393,19 @@ static int loop_add(int i)
disk->events = DISK_EVENT_MEDIA_CHANGE;
disk->event_flags = DISK_EVENT_FLAG_UEVENT;
sprintf(disk->disk_name, "loop%d", i);
/* Make this loop device reachable from pathname. */
add_disk(disk);
/* Show this loop device. */
mutex_lock(&loop_ctl_mutex);
lo->idr_visible = true;
mutex_unlock(&loop_ctl_mutex);
return i;
out_cleanup_tags:
blk_mq_free_tag_set(&lo->tag_set);
out_free_idr:
mutex_lock(&loop_ctl_mutex);
idr_remove(&loop_index_idr, i);
out_unlock:
mutex_unlock(&loop_ctl_mutex);
out_free_dev:
kfree(lo);
@@ -2411,9 +2415,14 @@ out:
static void loop_remove(struct loop_device *lo)
{
/* Make this loop device unreachable from pathname. */
del_gendisk(lo->lo_disk);
blk_cleanup_disk(lo->lo_disk);
blk_mq_free_tag_set(&lo->tag_set);
mutex_lock(&loop_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
mutex_unlock(&loop_ctl_mutex);
/* There is no route which can find this loop device. */
mutex_destroy(&lo->lo_mutex);
kfree(lo);
}
@@ -2437,31 +2446,40 @@ static int loop_control_remove(int idx)
return -EINVAL;
}
/* Hide this loop device for serialization. */
ret = mutex_lock_killable(&loop_ctl_mutex);
if (ret)
return ret;
lo = idr_find(&loop_index_idr, idx);
if (!lo) {
if (!lo || !lo->idr_visible)
ret = -ENODEV;
goto out_unlock_ctrl;
}
else
lo->idr_visible = false;
mutex_unlock(&loop_ctl_mutex);
if (ret)
return ret;
/* Check whether this loop device can be removed. */
ret = mutex_lock_killable(&lo->lo_mutex);
if (ret)
goto out_unlock_ctrl;
goto mark_visible;
if (lo->lo_state != Lo_unbound ||
atomic_read(&lo->lo_refcnt) > 0) {
mutex_unlock(&lo->lo_mutex);
ret = -EBUSY;
goto out_unlock_ctrl;
goto mark_visible;
}
/* Mark this loop device no longer open()-able. */
lo->lo_state = Lo_deleting;
mutex_unlock(&lo->lo_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
loop_remove(lo);
out_unlock_ctrl:
return 0;
mark_visible:
/* Show this loop device again. */
mutex_lock(&loop_ctl_mutex);
lo->idr_visible = true;
mutex_unlock(&loop_ctl_mutex);
return ret;
}
@@ -2475,7 +2493,8 @@ static int loop_control_get_free(int idx)
if (ret)
return ret;
idr_for_each_entry(&loop_index_idr, lo, id) {
if (lo->lo_state == Lo_unbound)
/* Hitting a race results in creating a new loop device which is harmless. */
if (lo->idr_visible && data_race(lo->lo_state) == Lo_unbound)
goto found;
}
mutex_unlock(&loop_ctl_mutex);
@@ -2591,10 +2610,14 @@ static void __exit loop_exit(void)
unregister_blkdev(LOOP_MAJOR, "loop");
misc_deregister(&loop_misc);
mutex_lock(&loop_ctl_mutex);
/*
* There is no need to use loop_ctl_mutex here, for nobody else can
* access loop_index_idr when this module is unloading (unless forced
* module unloading is requested). If this is not a clean unloading,
* we have no means to avoid kernel crash.
*/
idr_for_each_entry(&loop_index_idr, lo, id)
loop_remove(lo);
mutex_unlock(&loop_ctl_mutex);
idr_destroy(&loop_index_idr);
}

View File

@@ -68,6 +68,7 @@ struct loop_device {
struct blk_mq_tag_set tag_set;
struct gendisk *lo_disk;
struct mutex lo_mutex;
bool idr_visible;
};
struct loop_cmd {

View File

@@ -49,14 +49,14 @@ void *iwl_uefi_get_pnvm(struct iwl_trans *trans, size_t *len)
err = efivar_entry_get(pnvm_efivar, NULL, &package_size, data);
if (err) {
IWL_DEBUG_FW(trans,
"PNVM UEFI variable not found %d (len %zd)\n",
"PNVM UEFI variable not found %d (len %lu)\n",
err, package_size);
kfree(data);
data = ERR_PTR(err);
goto out;
}
IWL_DEBUG_FW(trans, "Read PNVM from UEFI with size %zd\n", package_size);
IWL_DEBUG_FW(trans, "Read PNVM from UEFI with size %lu\n", package_size);
*len = package_size;
out:

View File

@@ -2206,23 +2206,13 @@ static void tb_test_credit_alloc_dma_multiple(struct kunit *test)
tb_tunnel_free(tunnel2);
}
static void tb_test_credit_alloc_all(struct kunit *test)
static struct tb_tunnel *TB_TEST_PCIE_TUNNEL(struct kunit *test,
struct tb_switch *host, struct tb_switch *dev)
{
struct tb_port *up, *down, *in, *out, *nhi, *port;
struct tb_tunnel *pcie_tunnel, *dp_tunnel1, *dp_tunnel2, *usb3_tunnel;
struct tb_tunnel *dma_tunnel1, *dma_tunnel2;
struct tb_switch *host, *dev;
struct tb_port *up, *down;
struct tb_tunnel *pcie_tunnel;
struct tb_path *path;
/*
* Create PCIe, 2 x DP, USB 3.x and two DMA tunnels from host to
* device. Expectation is that all these can be established with
* the default credit allocation found in Intel hardware.
*/
host = alloc_host_usb4(test);
dev = alloc_dev_usb4(test, host, 0x1, true);
down = &host->ports[8];
up = &dev->ports[9];
pcie_tunnel = tb_tunnel_alloc_pci(NULL, up, down);
@@ -2243,9 +2233,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 64U);
return pcie_tunnel;
}
static struct tb_tunnel *TB_TEST_DP_TUNNEL1(struct kunit *test,
struct tb_switch *host, struct tb_switch *dev)
{
struct tb_port *in, *out;
struct tb_tunnel *dp_tunnel1;
struct tb_path *path;
in = &host->ports[5];
out = &dev->ports[13];
dp_tunnel1 = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
KUNIT_ASSERT_TRUE(test, dp_tunnel1 != NULL);
KUNIT_ASSERT_EQ(test, dp_tunnel1->npaths, (size_t)3);
@@ -2271,9 +2270,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 1U);
return dp_tunnel1;
}
static struct tb_tunnel *TB_TEST_DP_TUNNEL2(struct kunit *test,
struct tb_switch *host, struct tb_switch *dev)
{
struct tb_port *in, *out;
struct tb_tunnel *dp_tunnel2;
struct tb_path *path;
in = &host->ports[6];
out = &dev->ports[14];
dp_tunnel2 = tb_tunnel_alloc_dp(NULL, in, out, 0, 0);
KUNIT_ASSERT_TRUE(test, dp_tunnel2 != NULL);
KUNIT_ASSERT_EQ(test, dp_tunnel2->npaths, (size_t)3);
@@ -2299,6 +2307,16 @@ static void tb_test_credit_alloc_all(struct kunit *test)
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 1U);
return dp_tunnel2;
}
static struct tb_tunnel *TB_TEST_USB3_TUNNEL(struct kunit *test,
struct tb_switch *host, struct tb_switch *dev)
{
struct tb_port *up, *down;
struct tb_tunnel *usb3_tunnel;
struct tb_path *path;
down = &host->ports[12];
up = &dev->ports[16];
usb3_tunnel = tb_tunnel_alloc_usb3(NULL, up, down, 0, 0);
@@ -2319,9 +2337,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 32U);
return usb3_tunnel;
}
static struct tb_tunnel *TB_TEST_DMA_TUNNEL1(struct kunit *test,
struct tb_switch *host, struct tb_switch *dev)
{
struct tb_port *nhi, *port;
struct tb_tunnel *dma_tunnel1;
struct tb_path *path;
nhi = &host->ports[7];
port = &dev->ports[3];
dma_tunnel1 = tb_tunnel_alloc_dma(NULL, nhi, port, 8, 1, 8, 1);
KUNIT_ASSERT_TRUE(test, dma_tunnel1 != NULL);
KUNIT_ASSERT_EQ(test, dma_tunnel1->npaths, (size_t)2);
@@ -2340,6 +2367,18 @@ static void tb_test_credit_alloc_all(struct kunit *test)
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 14U);
return dma_tunnel1;
}
static struct tb_tunnel *TB_TEST_DMA_TUNNEL2(struct kunit *test,
struct tb_switch *host, struct tb_switch *dev)
{
struct tb_port *nhi, *port;
struct tb_tunnel *dma_tunnel2;
struct tb_path *path;
nhi = &host->ports[7];
port = &dev->ports[3];
dma_tunnel2 = tb_tunnel_alloc_dma(NULL, nhi, port, 9, 2, 9, 2);
KUNIT_ASSERT_TRUE(test, dma_tunnel2 != NULL);
KUNIT_ASSERT_EQ(test, dma_tunnel2->npaths, (size_t)2);
@@ -2358,6 +2397,31 @@ static void tb_test_credit_alloc_all(struct kunit *test)
KUNIT_EXPECT_EQ(test, path->hops[1].nfc_credits, 0U);
KUNIT_EXPECT_EQ(test, path->hops[1].initial_credits, 1U);
return dma_tunnel2;
}
static void tb_test_credit_alloc_all(struct kunit *test)
{
struct tb_tunnel *pcie_tunnel, *dp_tunnel1, *dp_tunnel2, *usb3_tunnel;
struct tb_tunnel *dma_tunnel1, *dma_tunnel2;
struct tb_switch *host, *dev;
/*
* Create PCIe, 2 x DP, USB 3.x and two DMA tunnels from host to
* device. Expectation is that all these can be established with
* the default credit allocation found in Intel hardware.
*/
host = alloc_host_usb4(test);
dev = alloc_dev_usb4(test, host, 0x1, true);
pcie_tunnel = TB_TEST_PCIE_TUNNEL(test, host, dev);
dp_tunnel1 = TB_TEST_DP_TUNNEL1(test, host, dev);
dp_tunnel2 = TB_TEST_DP_TUNNEL2(test, host, dev);
usb3_tunnel = TB_TEST_USB3_TUNNEL(test, host, dev);
dma_tunnel1 = TB_TEST_DMA_TUNNEL1(test, host, dev);
dma_tunnel2 = TB_TEST_DMA_TUNNEL2(test, host, dev);
tb_tunnel_free(dma_tunnel2);
tb_tunnel_free(dma_tunnel1);
tb_tunnel_free(usb3_tunnel);

View File

@@ -23,8 +23,7 @@ enum {
IO_WORKER_F_UP = 1, /* up and active */
IO_WORKER_F_RUNNING = 2, /* account as running */
IO_WORKER_F_FREE = 4, /* worker on free list */
IO_WORKER_F_FIXED = 8, /* static idle worker */
IO_WORKER_F_BOUND = 16, /* is doing bounded work */
IO_WORKER_F_BOUND = 8, /* is doing bounded work */
};
enum {
@@ -32,7 +31,7 @@ enum {
};
enum {
IO_WQE_FLAG_STALLED = 1, /* stalled on hash */
IO_ACCT_STALLED_BIT = 0, /* stalled on hash */
};
/*
@@ -55,7 +54,10 @@ struct io_worker {
struct callback_head create_work;
int create_index;
struct rcu_head rcu;
union {
struct rcu_head rcu;
struct work_struct work;
};
};
#if BITS_PER_LONG == 64
@@ -71,25 +73,24 @@ struct io_wqe_acct {
unsigned max_workers;
int index;
atomic_t nr_running;
struct io_wq_work_list work_list;
unsigned long flags;
};
enum {
IO_WQ_ACCT_BOUND,
IO_WQ_ACCT_UNBOUND,
IO_WQ_ACCT_NR,
};
/*
* Per-node worker thread pool
*/
struct io_wqe {
struct {
raw_spinlock_t lock;
struct io_wq_work_list work_list;
unsigned flags;
} ____cacheline_aligned_in_smp;
raw_spinlock_t lock;
struct io_wqe_acct acct[2];
int node;
struct io_wqe_acct acct[2];
struct hlist_nulls_head free_list;
struct list_head all_list;
@@ -133,8 +134,11 @@ struct io_cb_cancel_data {
bool cancel_all;
};
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first);
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
static void io_wqe_dec_running(struct io_worker *worker);
static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
struct io_wqe_acct *acct,
struct io_cb_cancel_data *match);
static bool io_worker_get(struct io_worker *worker)
{
@@ -195,11 +199,10 @@ static void io_worker_exit(struct io_worker *worker)
do_exit(0);
}
static inline bool io_wqe_run_queue(struct io_wqe *wqe)
__must_hold(wqe->lock)
static inline bool io_acct_run_queue(struct io_wqe_acct *acct)
{
if (!wq_list_empty(&wqe->work_list) &&
!(wqe->flags & IO_WQE_FLAG_STALLED))
if (!wq_list_empty(&acct->work_list) &&
!test_bit(IO_ACCT_STALLED_BIT, &acct->flags))
return true;
return false;
}
@@ -208,7 +211,8 @@ static inline bool io_wqe_run_queue(struct io_wqe *wqe)
* Check head of free list for an available worker. If one isn't available,
* caller must create one.
*/
static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
struct io_wqe_acct *acct)
__must_hold(RCU)
{
struct hlist_nulls_node *n;
@@ -222,6 +226,10 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) {
if (!io_worker_get(worker))
continue;
if (io_wqe_get_acct(worker) != acct) {
io_worker_release(worker);
continue;
}
if (wake_up_process(worker->task)) {
io_worker_release(worker);
return true;
@@ -236,9 +244,9 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
* We need a worker. If we find a free one, we're good. If not, and we're
* below the max number of workers, create one.
*/
static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
{
bool ret;
bool do_create = false;
/*
* Most likely an attempt to queue unbounded work on an io_wq that
@@ -247,27 +255,19 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
if (unlikely(!acct->max_workers))
pr_warn_once("io-wq is not configured for unbound workers");
rcu_read_lock();
ret = io_wqe_activate_free_worker(wqe);
rcu_read_unlock();
if (!ret) {
bool do_create = false, first = false;
raw_spin_lock(&wqe->lock);
if (acct->nr_workers < acct->max_workers) {
if (!acct->nr_workers)
first = true;
acct->nr_workers++;
do_create = true;
}
raw_spin_unlock(&wqe->lock);
if (do_create) {
atomic_inc(&acct->nr_running);
atomic_inc(&wqe->wq->worker_refs);
create_io_worker(wqe->wq, wqe, acct->index, first);
}
raw_spin_lock(&wqe->lock);
if (acct->nr_workers < acct->max_workers) {
acct->nr_workers++;
do_create = true;
}
raw_spin_unlock(&wqe->lock);
if (do_create) {
atomic_inc(&acct->nr_running);
atomic_inc(&wqe->wq->worker_refs);
return create_io_worker(wqe->wq, wqe, acct->index);
}
return true;
}
static void io_wqe_inc_running(struct io_worker *worker)
@@ -283,7 +283,7 @@ static void create_worker_cb(struct callback_head *cb)
struct io_wq *wq;
struct io_wqe *wqe;
struct io_wqe_acct *acct;
bool do_create = false, first = false;
bool do_create = false;
worker = container_of(cb, struct io_worker, create_work);
wqe = worker->wqe;
@@ -291,14 +291,12 @@ static void create_worker_cb(struct callback_head *cb)
acct = &wqe->acct[worker->create_index];
raw_spin_lock(&wqe->lock);
if (acct->nr_workers < acct->max_workers) {
if (!acct->nr_workers)
first = true;
acct->nr_workers++;
do_create = true;
}
raw_spin_unlock(&wqe->lock);
if (do_create) {
create_io_worker(wq, wqe, worker->create_index, first);
create_io_worker(wq, wqe, worker->create_index);
} else {
atomic_dec(&acct->nr_running);
io_worker_ref_put(wq);
@@ -307,9 +305,11 @@ static void create_worker_cb(struct callback_head *cb)
io_worker_release(worker);
}
static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker,
struct io_wqe_acct *acct)
static bool io_queue_worker_create(struct io_worker *worker,
struct io_wqe_acct *acct,
task_work_func_t func)
{
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
/* raced with exit, just ignore create call */
@@ -327,16 +327,17 @@ static void io_queue_worker_create(struct io_wqe *wqe, struct io_worker *worker,
test_and_set_bit_lock(0, &worker->create_state))
goto fail_release;
init_task_work(&worker->create_work, create_worker_cb);
init_task_work(&worker->create_work, func);
worker->create_index = acct->index;
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
return;
return true;
clear_bit_unlock(0, &worker->create_state);
fail_release:
io_worker_release(worker);
fail:
atomic_dec(&acct->nr_running);
io_worker_ref_put(wq);
return false;
}
static void io_wqe_dec_running(struct io_worker *worker)
@@ -348,10 +349,10 @@ static void io_wqe_dec_running(struct io_worker *worker)
if (!(worker->flags & IO_WORKER_F_UP))
return;
if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe)) {
if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
atomic_inc(&acct->nr_running);
atomic_inc(&wqe->wq->worker_refs);
io_queue_worker_create(wqe, worker, acct);
io_queue_worker_create(worker, acct, create_worker_cb);
}
}
@@ -363,29 +364,10 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
struct io_wq_work *work)
__must_hold(wqe->lock)
{
bool worker_bound, work_bound;
BUILD_BUG_ON((IO_WQ_ACCT_UNBOUND ^ IO_WQ_ACCT_BOUND) != 1);
if (worker->flags & IO_WORKER_F_FREE) {
worker->flags &= ~IO_WORKER_F_FREE;
hlist_nulls_del_init_rcu(&worker->nulls_node);
}
/*
* If worker is moving from bound to unbound (or vice versa), then
* ensure we update the running accounting.
*/
worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
if (worker_bound != work_bound) {
int index = work_bound ? IO_WQ_ACCT_UNBOUND : IO_WQ_ACCT_BOUND;
io_wqe_dec_running(worker);
worker->flags ^= IO_WORKER_F_BOUND;
wqe->acct[index].nr_workers--;
wqe->acct[index ^ 1].nr_workers++;
io_wqe_inc_running(worker);
}
}
/*
@@ -413,7 +395,7 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
{
struct io_wq *wq = wqe->wq;
spin_lock(&wq->hash->wait.lock);
spin_lock_irq(&wq->hash->wait.lock);
if (list_empty(&wqe->wait.entry)) {
__add_wait_queue(&wq->hash->wait, &wqe->wait);
if (!test_bit(hash, &wq->hash->map)) {
@@ -421,48 +403,26 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
list_del_init(&wqe->wait.entry);
}
}
spin_unlock(&wq->hash->wait.lock);
spin_unlock_irq(&wq->hash->wait.lock);
}
/*
* We can always run the work if the worker is currently the same type as
* the work (eg both are bound, or both are unbound). If they are not the
* same, only allow it if incrementing the worker count would be allowed.
*/
static bool io_worker_can_run_work(struct io_worker *worker,
struct io_wq_work *work)
{
struct io_wqe_acct *acct;
if (!(worker->flags & IO_WORKER_F_BOUND) !=
!(work->flags & IO_WQ_WORK_UNBOUND))
return true;
/* not the same type, check if we'd go over the limit */
acct = io_work_get_acct(worker->wqe, work);
return acct->nr_workers < acct->max_workers;
}
static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
struct io_worker *worker,
bool *stalled)
static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
struct io_worker *worker)
__must_hold(wqe->lock)
{
struct io_wq_work_node *node, *prev;
struct io_wq_work *work, *tail;
unsigned int stall_hash = -1U;
struct io_wqe *wqe = worker->wqe;
wq_list_for_each(node, prev, &wqe->work_list) {
wq_list_for_each(node, prev, &acct->work_list) {
unsigned int hash;
work = container_of(node, struct io_wq_work, list);
if (!io_worker_can_run_work(worker, work))
break;
/* not hashed, can run anytime */
if (!io_wq_is_hashed(work)) {
wq_list_del(&wqe->work_list, node, prev);
wq_list_del(&acct->work_list, node, prev);
return work;
}
@@ -473,7 +433,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
/* hashed, can run if not already running */
if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
wqe->hash_tail[hash] = NULL;
wq_list_cut(&wqe->work_list, &tail->list, prev);
wq_list_cut(&acct->work_list, &tail->list, prev);
return work;
}
if (stall_hash == -1U)
@@ -483,10 +443,14 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe,
}
if (stall_hash != -1U) {
/*
* Set this before dropping the lock to avoid racing with new
* work being added and clearing the stalled bit.
*/
set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
raw_spin_unlock(&wqe->lock);
io_wait_on_hash(wqe, stall_hash);
raw_spin_lock(&wqe->lock);
*stalled = true;
}
return NULL;
@@ -520,13 +484,13 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
static void io_worker_handle_work(struct io_worker *worker)
__releases(wqe->lock)
{
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state);
do {
struct io_wq_work *work;
bool stalled;
get_next:
/*
* If we got some work, mark us as busy. If we didn't, but
@@ -535,12 +499,9 @@ get_next:
* can't make progress, any work completion or insertion will
* clear the stalled flag.
*/
stalled = false;
work = io_get_next_work(wqe, worker, &stalled);
work = io_get_next_work(acct, worker);
if (work)
__io_worker_busy(wqe, worker, work);
else if (stalled)
wqe->flags |= IO_WQE_FLAG_STALLED;
raw_spin_unlock(&wqe->lock);
if (!work)
@@ -572,10 +533,10 @@ get_next:
if (hash != -1U && !next_hashed) {
clear_bit(hash, &wq->hash->map);
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
if (wq_has_sleeper(&wq->hash->wait))
wake_up(&wq->hash->wait);
raw_spin_lock(&wqe->lock);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
/* skip unnecessary unlock-lock wqe->lock */
if (!work)
goto get_next;
@@ -590,8 +551,10 @@ get_next:
static int io_wqe_worker(void *data)
{
struct io_worker *worker = data;
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
bool last_timeout = false;
char buf[TASK_COMM_LEN];
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
@@ -605,10 +568,17 @@ static int io_wqe_worker(void *data)
set_current_state(TASK_INTERRUPTIBLE);
loop:
raw_spin_lock(&wqe->lock);
if (io_wqe_run_queue(wqe)) {
if (io_acct_run_queue(acct)) {
io_worker_handle_work(worker);
goto loop;
}
/* timed out, exit unless we're the last worker */
if (last_timeout && acct->nr_workers > 1) {
raw_spin_unlock(&wqe->lock);
__set_current_state(TASK_RUNNING);
break;
}
last_timeout = false;
__io_worker_idle(wqe, worker);
raw_spin_unlock(&wqe->lock);
if (io_flush_signals())
@@ -619,13 +589,11 @@ loop:
if (!get_signal(&ksig))
continue;
break;
}
if (ret)
if (fatal_signal_pending(current))
break;
continue;
/* timed out, exit unless we're the fixed worker */
if (!(worker->flags & IO_WORKER_F_FIXED))
break;
}
last_timeout = !ret;
}
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
@@ -676,36 +644,9 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
raw_spin_unlock(&worker->wqe->lock);
}
static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first)
static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
struct task_struct *tsk)
{
struct io_wqe_acct *acct = &wqe->acct[index];
struct io_worker *worker;
struct task_struct *tsk;
__set_current_state(TASK_RUNNING);
worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
if (!worker)
goto fail;
refcount_set(&worker->ref, 1);
worker->nulls_node.pprev = NULL;
worker->wqe = wqe;
spin_lock_init(&worker->lock);
init_completion(&worker->ref_done);
tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
if (IS_ERR(tsk)) {
kfree(worker);
fail:
atomic_dec(&acct->nr_running);
raw_spin_lock(&wqe->lock);
acct->nr_workers--;
raw_spin_unlock(&wqe->lock);
io_worker_ref_put(wq);
return;
}
tsk->pf_io_worker = worker;
worker->task = tsk;
set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
@@ -715,14 +656,118 @@ fail:
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
worker->flags |= IO_WORKER_F_FREE;
if (index == IO_WQ_ACCT_BOUND)
worker->flags |= IO_WORKER_F_BOUND;
if (first && (worker->flags & IO_WORKER_F_BOUND))
worker->flags |= IO_WORKER_F_FIXED;
raw_spin_unlock(&wqe->lock);
wake_up_new_task(tsk);
}
static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
{
return true;
}
static inline bool io_should_retry_thread(long err)
{
switch (err) {
case -EAGAIN:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
case -ERESTARTNOHAND:
return true;
default:
return false;
}
}
static void create_worker_cont(struct callback_head *cb)
{
struct io_worker *worker;
struct task_struct *tsk;
struct io_wqe *wqe;
worker = container_of(cb, struct io_worker, create_work);
clear_bit_unlock(0, &worker->create_state);
wqe = worker->wqe;
tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
if (!IS_ERR(tsk)) {
io_init_new_worker(wqe, worker, tsk);
io_worker_release(worker);
return;
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
atomic_dec(&acct->nr_running);
raw_spin_lock(&wqe->lock);
acct->nr_workers--;
if (!acct->nr_workers) {
struct io_cb_cancel_data match = {
.fn = io_wq_work_match_all,
.cancel_all = true,
};
while (io_acct_cancel_pending_work(wqe, acct, &match))
raw_spin_lock(&wqe->lock);
}
raw_spin_unlock(&wqe->lock);
io_worker_ref_put(wqe->wq);
return;
}
/* re-create attempts grab a new worker ref, drop the existing one */
io_worker_release(worker);
schedule_work(&worker->work);
}
static void io_workqueue_create(struct work_struct *work)
{
struct io_worker *worker = container_of(work, struct io_worker, work);
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
if (!io_queue_worker_create(worker, acct, create_worker_cont)) {
clear_bit_unlock(0, &worker->create_state);
io_worker_release(worker);
}
}
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
{
struct io_wqe_acct *acct = &wqe->acct[index];
struct io_worker *worker;
struct task_struct *tsk;
__set_current_state(TASK_RUNNING);
worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
if (!worker) {
fail:
atomic_dec(&acct->nr_running);
raw_spin_lock(&wqe->lock);
acct->nr_workers--;
raw_spin_unlock(&wqe->lock);
io_worker_ref_put(wq);
return false;
}
refcount_set(&worker->ref, 1);
worker->wqe = wqe;
spin_lock_init(&worker->lock);
init_completion(&worker->ref_done);
if (index == IO_WQ_ACCT_BOUND)
worker->flags |= IO_WORKER_F_BOUND;
tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
if (!IS_ERR(tsk)) {
io_init_new_worker(wqe, worker, tsk);
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
goto fail;
} else {
INIT_WORK(&worker->work, io_workqueue_create);
schedule_work(&worker->work);
}
return true;
}
/*
* Iterate the passed in list and call the specific function for each
* worker that isn't exiting
@@ -755,11 +800,6 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data)
return false;
}
static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
{
return true;
}
static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
{
struct io_wq *wq = wqe->wq;
@@ -773,12 +813,13 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
unsigned int hash;
struct io_wq_work *tail;
if (!io_wq_is_hashed(work)) {
append:
wq_list_add_tail(&work->list, &wqe->work_list);
wq_list_add_tail(&work->list, &acct->work_list);
return;
}
@@ -788,13 +829,14 @@ append:
if (!tail)
goto append;
wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
wq_list_add_after(&work->list, &tail->list, &acct->work_list);
}
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
bool do_wake;
unsigned work_flags = work->flags;
bool do_create;
/*
* If io-wq is exiting for this task, or if the request has explicitly
@@ -802,19 +844,36 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
*/
if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
(work->flags & IO_WQ_WORK_CANCEL)) {
run_cancel:
io_run_cancel(work, wqe);
return;
}
raw_spin_lock(&wqe->lock);
io_wqe_insert_work(wqe, work);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
!atomic_read(&acct->nr_running);
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
rcu_read_lock();
do_create = !io_wqe_activate_free_worker(wqe, acct);
rcu_read_unlock();
raw_spin_unlock(&wqe->lock);
if (do_wake)
io_wqe_wake_worker(wqe, acct);
if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) ||
!atomic_read(&acct->nr_running))) {
bool did_create;
did_create = io_wqe_create_worker(wqe, acct);
if (unlikely(!did_create)) {
raw_spin_lock(&wqe->lock);
/* fatal condition, failed to create the first worker */
if (!acct->nr_workers) {
raw_spin_unlock(&wqe->lock);
goto run_cancel;
}
raw_spin_unlock(&wqe->lock);
}
}
}
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
@@ -859,6 +918,7 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
struct io_wq_work *work,
struct io_wq_work_node *prev)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
unsigned int hash = io_get_work_hash(work);
struct io_wq_work *prev_work = NULL;
@@ -870,18 +930,18 @@ static inline void io_wqe_remove_pending(struct io_wqe *wqe,
else
wqe->hash_tail[hash] = NULL;
}
wq_list_del(&wqe->work_list, &work->list, prev);
wq_list_del(&acct->work_list, &work->list, prev);
}
static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
struct io_cb_cancel_data *match)
static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
struct io_wqe_acct *acct,
struct io_cb_cancel_data *match)
__releases(wqe->lock)
{
struct io_wq_work_node *node, *prev;
struct io_wq_work *work;
retry:
raw_spin_lock(&wqe->lock);
wq_list_for_each(node, prev, &wqe->work_list) {
wq_list_for_each(node, prev, &acct->work_list) {
work = container_of(node, struct io_wq_work, list);
if (!match->fn(work, match->data))
continue;
@@ -889,11 +949,27 @@ retry:
raw_spin_unlock(&wqe->lock);
io_run_cancel(work, wqe);
match->nr_pending++;
if (!match->cancel_all)
return;
/* not safe to continue after unlock */
goto retry;
return true;
}
return false;
}
static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
struct io_cb_cancel_data *match)
{
int i;
retry:
raw_spin_lock(&wqe->lock);
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
if (io_acct_cancel_pending_work(wqe, acct, match)) {
if (match->cancel_all)
goto retry;
return;
}
}
raw_spin_unlock(&wqe->lock);
}
@@ -954,18 +1030,24 @@ static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
int sync, void *key)
{
struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
int i;
list_del_init(&wait->entry);
rcu_read_lock();
io_wqe_activate_free_worker(wqe);
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
struct io_wqe_acct *acct = &wqe->acct[i];
if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags))
io_wqe_activate_free_worker(wqe, acct);
}
rcu_read_unlock();
return 1;
}
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
{
int ret, node;
int ret, node, i;
struct io_wq *wq;
if (WARN_ON_ONCE(!data->free_work || !data->do_work))
@@ -1000,18 +1082,20 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
cpumask_copy(wqe->cpu_mask, cpumask_of_node(node));
wq->wqes[node] = wqe;
wqe->node = alloc_node;
wqe->acct[IO_WQ_ACCT_BOUND].index = IO_WQ_ACCT_BOUND;
wqe->acct[IO_WQ_ACCT_UNBOUND].index = IO_WQ_ACCT_UNBOUND;
wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
task_rlimit(current, RLIMIT_NPROC);
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
wqe->wait.func = io_wqe_hash_wake;
INIT_LIST_HEAD(&wqe->wait.entry);
wqe->wait.func = io_wqe_hash_wake;
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
struct io_wqe_acct *acct = &wqe->acct[i];
acct->index = i;
atomic_set(&acct->nr_running, 0);
INIT_WQ_LIST(&acct->work_list);
}
wqe->wq = wq;
raw_spin_lock_init(&wqe->lock);
INIT_WQ_LIST(&wqe->work_list);
INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
INIT_LIST_HEAD(&wqe->all_list);
}
@@ -1038,7 +1122,7 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
{
struct io_worker *worker;
if (cb->func != create_worker_cb)
if (cb->func != create_worker_cb || cb->func != create_worker_cont)
return false;
worker = container_of(cb, struct io_worker, create_work);
return worker->wqe->wq == data;
@@ -1193,7 +1277,7 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
for_each_node(node) {
struct io_wqe_acct *acct;
for (i = 0; i < 2; i++) {
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
acct = &wq->wqes[node]->acct[i];
prev = max_t(int, acct->max_workers, prev);
if (new_count[i])

View File

@@ -1021,6 +1021,7 @@ static const struct io_op_def io_op_defs[] = {
},
[IORING_OP_WRITE] = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
.plug = 1,
@@ -1851,6 +1852,17 @@ static void io_req_complete_failed(struct io_kiocb *req, long res)
io_req_complete_post(req, res, 0);
}
static void io_req_complete_fail_submit(struct io_kiocb *req)
{
/*
* We don't submit, fail them all, for that replace hardlinks with
* normal links. Extra REQ_F_LINK is tolerated.
*/
req->flags &= ~REQ_F_HARDLINK;
req->flags |= REQ_F_LINK;
io_req_complete_failed(req, req->result);
}
/*
* Don't initialise the fields below on every allocation, but do that in
* advance and keep them valid across allocations.
@@ -2119,6 +2131,9 @@ static void tctx_task_work(struct callback_head *cb)
while (1) {
struct io_wq_work_node *node;
if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr)
io_submit_flush_completions(ctx);
spin_lock_irq(&tctx->task_lock);
node = tctx->task_list.first;
INIT_WQ_LIST(&tctx->task_list);
@@ -2673,7 +2688,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
{
if (__io_complete_rw_common(req, res))
return;
__io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
__io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req));
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
@@ -3410,6 +3425,12 @@ static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
return -EINVAL;
}
static bool need_read_all(struct io_kiocb *req)
{
return req->flags & REQ_F_ISREG ||
S_ISBLK(file_inode(req->file)->i_mode);
}
static int io_read(struct io_kiocb *req, unsigned int issue_flags)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
@@ -3464,7 +3485,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
} else if (ret == -EIOCBQUEUED) {
goto out_free;
} else if (ret <= 0 || ret == io_size || !force_nonblock ||
(req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
(req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
/* read all, failed, already did sync or don't want to retry */
goto done;
}
@@ -5249,7 +5270,7 @@ static void io_poll_remove_double(struct io_kiocb *req)
}
}
static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
__must_hold(&req->ctx->completion_lock)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -5271,10 +5292,19 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
if (flags & IORING_CQE_F_MORE)
ctx->cq_extra++;
io_commit_cqring(ctx);
return !(flags & IORING_CQE_F_MORE);
}
static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
__must_hold(&req->ctx->completion_lock)
{
bool done;
done = __io_poll_complete(req, mask);
io_commit_cqring(req->ctx);
return done;
}
static void io_poll_task_func(struct io_kiocb *req, bool *locked)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -5285,7 +5315,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
} else {
bool done;
done = io_poll_complete(req, req->result);
done = __io_poll_complete(req, req->result);
if (done) {
io_poll_remove_double(req);
hash_del(&req->hash_node);
@@ -5293,6 +5323,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
req->result = 0;
add_wait_queue(req->poll.head, &req->poll.wait);
}
io_commit_cqring(ctx);
spin_unlock(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
@@ -6398,6 +6429,11 @@ static bool io_drain_req(struct io_kiocb *req)
int ret;
u32 seq;
if (req->flags & REQ_F_FAIL) {
io_req_complete_fail_submit(req);
return true;
}
/*
* If we need to drain a request in the middle of a link, drain the
* head request and the next request/link after the current link.
@@ -6914,7 +6950,7 @@ static inline void io_queue_sqe(struct io_kiocb *req)
if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) {
__io_queue_sqe(req);
} else if (req->flags & REQ_F_FAIL) {
io_req_complete_failed(req, req->result);
io_req_complete_fail_submit(req);
} else {
int ret = io_req_prep_async(req);
@@ -10498,26 +10534,46 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
void __user *arg)
{
struct io_uring_task *tctx = current->io_uring;
struct io_uring_task *tctx = NULL;
struct io_sq_data *sqd = NULL;
__u32 new_count[2];
int i, ret;
if (!tctx || !tctx->io_wq)
return -EINVAL;
if (copy_from_user(new_count, arg, sizeof(new_count)))
return -EFAULT;
for (i = 0; i < ARRAY_SIZE(new_count); i++)
if (new_count[i] > INT_MAX)
return -EINVAL;
if (ctx->flags & IORING_SETUP_SQPOLL) {
sqd = ctx->sq_data;
if (sqd) {
mutex_lock(&sqd->lock);
tctx = sqd->thread->io_uring;
}
} else {
tctx = current->io_uring;
}
ret = -EINVAL;
if (!tctx || !tctx->io_wq)
goto err;
ret = io_wq_max_workers(tctx->io_wq, new_count);
if (ret)
return ret;
goto err;
if (sqd)
mutex_unlock(&sqd->lock);
if (copy_to_user(arg, new_count, sizeof(new_count)))
return -EFAULT;
return 0;
err:
if (sqd)
mutex_unlock(&sqd->lock);
return ret;
}
static bool io_register_op_must_quiesce(int op)

View File

@@ -2941,12 +2941,10 @@ static int __init filelock_init(void)
int i;
flctx_cache = kmem_cache_create("file_lock_ctx",
sizeof(struct file_lock_context), 0,
SLAB_PANIC | SLAB_ACCOUNT, NULL);
sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);
filelock_cache = kmem_cache_create("file_lock_cache",
sizeof(struct file_lock), 0,
SLAB_PANIC | SLAB_ACCOUNT, NULL);
sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
for_each_possible_cpu(i) {
struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);

View File

@@ -191,7 +191,7 @@ EXPORT_SYMBOL(generic_pipe_buf_try_steal);
*/
bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
return try_get_compound_head(buf->page, 1);
return try_get_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_get);

View File

@@ -655,7 +655,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
goto out_nofds;
alloc_size = 6 * size;
bits = kvmalloc(alloc_size, GFP_KERNEL_ACCOUNT);
bits = kvmalloc(alloc_size, GFP_KERNEL);
if (!bits)
goto out_nofds;
}
@@ -1000,7 +1000,7 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
len = min(todo, POLLFD_PER_PAGE);
walk = walk->next = kmalloc(struct_size(walk, entries, len),
GFP_KERNEL_ACCOUNT);
GFP_KERNEL);
if (!walk) {
err = -ENOMEM;
goto out_fds;

View File

@@ -13,6 +13,8 @@
* Copyright (C) 2009 Jason Wessel <jason.wessel@windriver.com>
*/
#include <linux/list.h>
/* Shifted versions of the command enable bits are be used if the command
* has no arguments (see kdb_check_flags). This allows commands, such as
* go, to have different permissions depending upon whether it is called
@@ -64,6 +66,17 @@ typedef enum {
typedef int (*kdb_func_t)(int, const char **);
/* The KDB shell command table */
typedef struct _kdbtab {
char *name; /* Command name */
kdb_func_t func; /* Function to execute command */
char *usage; /* Usage String for this command */
char *help; /* Help message for this command */
short minlen; /* Minimum legal # cmd chars required */
kdb_cmdflags_t flags; /* Command behaviour flags */
struct list_head list_node; /* Command list */
} kdbtab_t;
#ifdef CONFIG_KGDB_KDB
#include <linux/init.h>
#include <linux/sched.h>
@@ -193,19 +206,13 @@ static inline const char *kdb_walk_kallsyms(loff_t *pos)
#endif /* ! CONFIG_KALLSYMS */
/* Dynamic kdb shell command registration */
extern int kdb_register(char *, kdb_func_t, char *, char *, short);
extern int kdb_register_flags(char *, kdb_func_t, char *, char *,
short, kdb_cmdflags_t);
extern int kdb_unregister(char *);
extern int kdb_register(kdbtab_t *cmd);
extern void kdb_unregister(kdbtab_t *cmd);
#else /* ! CONFIG_KGDB_KDB */
static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
static inline void kdb_init(int level) {}
static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
char *help, short minlen) { return 0; }
static inline int kdb_register_flags(char *cmd, kdb_func_t func, char *usage,
char *help, short minlen,
kdb_cmdflags_t flags) { return 0; }
static inline int kdb_unregister(char *cmd) { return 0; }
static inline int kdb_register(kdbtab_t *cmd) { return 0; }
static inline void kdb_unregister(kdbtab_t *cmd) {}
#endif /* CONFIG_KGDB_KDB */
enum {
KDB_NOT_INITIALIZED,

View File

@@ -426,6 +426,7 @@ enum {
ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */
ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */
ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */
ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */
/* DMA mask for user DMA control: User visible values; DO NOT
renumber */

View File

@@ -1218,7 +1218,15 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags);
struct page *try_grab_compound_head(struct page *page, int refs,
unsigned int flags);
struct page *try_get_compound_head(struct page *page, int refs);
static inline __must_check bool try_get_page(struct page *page)
{
page = compound_head(page);
if (WARN_ON_ONCE(page_ref_count(page) <= 0))
return false;
page_ref_inc(page);
return true;
}
static inline void put_page(struct page *page)
{

View File

@@ -310,8 +310,10 @@ enum {
TRACE_EVENT_FL_NO_SET_FILTER_BIT,
TRACE_EVENT_FL_IGNORE_ENABLE_BIT,
TRACE_EVENT_FL_TRACEPOINT_BIT,
TRACE_EVENT_FL_DYNAMIC_BIT,
TRACE_EVENT_FL_KPROBE_BIT,
TRACE_EVENT_FL_UPROBE_BIT,
TRACE_EVENT_FL_EPROBE_BIT,
};
/*
@@ -321,8 +323,10 @@ enum {
* NO_SET_FILTER - Set when filter has error and is to be ignored
* IGNORE_ENABLE - For trace internal events, do not enable with debugfs file
* TRACEPOINT - Event is a tracepoint
* DYNAMIC - Event is a dynamic event (created at run time)
* KPROBE - Event is a kprobe
* UPROBE - Event is a uprobe
* EPROBE - Event is an event probe
*/
enum {
TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -330,8 +334,10 @@ enum {
TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT),
TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT),
TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
TRACE_EVENT_FL_DYNAMIC = (1 << TRACE_EVENT_FL_DYNAMIC_BIT),
TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT),
TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT),
TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT),
};
#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE)
@@ -347,7 +353,14 @@ struct trace_event_call {
struct trace_event event;
char *print_fmt;
struct event_filter *filter;
void *mod;
/*
* Static events can disappear with modules,
* where as dynamic ones need their own ref count.
*/
union {
void *module;
atomic_t refcnt;
};
void *data;
/* See the TRACE_EVENT_FL_* flags above */
@@ -363,6 +376,42 @@ struct trace_event_call {
#endif
};
#ifdef CONFIG_DYNAMIC_EVENTS
bool trace_event_dyn_try_get_ref(struct trace_event_call *call);
void trace_event_dyn_put_ref(struct trace_event_call *call);
bool trace_event_dyn_busy(struct trace_event_call *call);
#else
static inline bool trace_event_dyn_try_get_ref(struct trace_event_call *call)
{
/* Without DYNAMIC_EVENTS configured, nothing should be calling this */
return false;
}
static inline void trace_event_dyn_put_ref(struct trace_event_call *call)
{
}
static inline bool trace_event_dyn_busy(struct trace_event_call *call)
{
/* Nothing should call this without DYNAIMIC_EVENTS configured. */
return true;
}
#endif
static inline bool trace_event_try_get_ref(struct trace_event_call *call)
{
if (call->flags & TRACE_EVENT_FL_DYNAMIC)
return trace_event_dyn_try_get_ref(call);
else
return try_module_get(call->module);
}
static inline void trace_event_put_ref(struct trace_event_call *call)
{
if (call->flags & TRACE_EVENT_FL_DYNAMIC)
trace_event_dyn_put_ref(call);
else
module_put(call->module);
}
#ifdef CONFIG_PERF_EVENTS
static inline bool bpf_prog_array_valid(struct trace_event_call *call)
{
@@ -634,6 +683,7 @@ enum event_trigger_type {
ETT_EVENT_ENABLE = (1 << 3),
ETT_EVENT_HIST = (1 << 4),
ETT_HIST_ENABLE = (1 << 5),
ETT_EVENT_EPROBE = (1 << 6),
};
extern int filter_match_preds(struct event_filter *filter, void *rec);

View File

@@ -475,7 +475,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
* *
* * The declared 'local variable' is called '__entry'
* *
* * __field(pid_t, prev_prid) is equivalent to a standard declaration:
* * __field(pid_t, prev_pid) is equivalent to a standard declaration:
* *
* * pid_t prev_pid;
* *

View File

@@ -295,14 +295,14 @@ TRACE_EVENT(io_uring_fail_link,
*/
TRACE_EVENT(io_uring_complete,
TP_PROTO(void *ctx, u64 user_data, long res, unsigned cflags),
TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags),
TP_ARGS(ctx, user_data, res, cflags),
TP_STRUCT__entry (
__field( void *, ctx )
__field( u64, user_data )
__field( long, res )
__field( int, res )
__field( unsigned, cflags )
),
@@ -313,7 +313,7 @@ TRACE_EVENT(io_uring_complete,
__entry->cflags = cflags;
),
TP_printk("ring %p, user_data 0x%llx, result %ld, cflags %x",
TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x",
__entry->ctx, (unsigned long long)__entry->user_data,
__entry->res, __entry->cflags)
);

View File

@@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel Debug Core
*
@@ -22,10 +23,6 @@
*
* Original KGDB stub: David Grothe <dave@gcom.com>,
* Tigran Aivazian <tigran@sco.com>
*
* This file is licensed under the terms of the GNU General Public License
* version 2. This program is licensed "as is" without any warranty of any
* kind, whether express or implied.
*/
#define pr_fmt(fmt) "KGDB: " fmt

View File

@@ -1,3 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Kernel Debug Core
*
@@ -22,10 +23,6 @@
*
* Original KGDB stub: David Grothe <dave@gcom.com>,
* Tigran Aivazian <tigran@sco.com>
*
* This file is licensed under the terms of the GNU General Public License
* version 2. This program is licensed "as is" without any warranty of any
* kind, whether express or implied.
*/
#include <linux/kernel.h>

View File

@@ -523,51 +523,51 @@ static int kdb_ss(int argc, const char **argv)
}
static kdbtab_t bptab[] = {
{ .cmd_name = "bp",
.cmd_func = kdb_bp,
.cmd_usage = "[<vaddr>]",
.cmd_help = "Set/Display breakpoints",
.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
{ .name = "bp",
.func = kdb_bp,
.usage = "[<vaddr>]",
.help = "Set/Display breakpoints",
.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
},
{ .cmd_name = "bl",
.cmd_func = kdb_bp,
.cmd_usage = "[<vaddr>]",
.cmd_help = "Display breakpoints",
.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
{ .name = "bl",
.func = kdb_bp,
.usage = "[<vaddr>]",
.help = "Display breakpoints",
.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
},
{ .cmd_name = "bc",
.cmd_func = kdb_bc,
.cmd_usage = "<bpnum>",
.cmd_help = "Clear Breakpoint",
.cmd_flags = KDB_ENABLE_FLOW_CTRL,
{ .name = "bc",
.func = kdb_bc,
.usage = "<bpnum>",
.help = "Clear Breakpoint",
.flags = KDB_ENABLE_FLOW_CTRL,
},
{ .cmd_name = "be",
.cmd_func = kdb_bc,
.cmd_usage = "<bpnum>",
.cmd_help = "Enable Breakpoint",
.cmd_flags = KDB_ENABLE_FLOW_CTRL,
{ .name = "be",
.func = kdb_bc,
.usage = "<bpnum>",
.help = "Enable Breakpoint",
.flags = KDB_ENABLE_FLOW_CTRL,
},
{ .cmd_name = "bd",
.cmd_func = kdb_bc,
.cmd_usage = "<bpnum>",
.cmd_help = "Disable Breakpoint",
.cmd_flags = KDB_ENABLE_FLOW_CTRL,
{ .name = "bd",
.func = kdb_bc,
.usage = "<bpnum>",
.help = "Disable Breakpoint",
.flags = KDB_ENABLE_FLOW_CTRL,
},
{ .cmd_name = "ss",
.cmd_func = kdb_ss,
.cmd_usage = "",
.cmd_help = "Single Step",
.cmd_minlen = 1,
.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
{ .name = "ss",
.func = kdb_ss,
.usage = "",
.help = "Single Step",
.minlen = 1,
.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
},
};
static kdbtab_t bphcmd = {
.cmd_name = "bph",
.cmd_func = kdb_bp,
.cmd_usage = "[<vaddr>]",
.cmd_help = "[datar [length]|dataw [length]] Set hw brk",
.cmd_flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
.name = "bph",
.func = kdb_bp,
.usage = "[<vaddr>]",
.help = "[datar [length]|dataw [length]] Set hw brk",
.flags = KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS,
};
/* Initialize the breakpoint table and register breakpoint commands. */

View File

@@ -140,7 +140,6 @@ int kdb_stub(struct kgdb_state *ks)
*/
kdb_common_deinit_state();
KDB_STATE_CLEAR(PAGER);
kdbnearsym_cleanup();
if (error == KDB_CMD_KGDB) {
if (KDB_STATE(DOING_KGDB))
KDB_STATE_CLEAR(DOING_KGDB);

View File

@@ -33,7 +33,6 @@
#include <linux/kallsyms.h>
#include <linux/kgdb.h>
#include <linux/kdb.h>
#include <linux/list.h>
#include <linux/notifier.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
@@ -654,16 +653,17 @@ static void kdb_cmderror(int diag)
* Returns:
* zero for success, a kdb diagnostic if error
*/
struct defcmd_set {
int count;
bool usable;
char *name;
char *usage;
char *help;
char **command;
struct kdb_macro {
kdbtab_t cmd; /* Macro command */
struct list_head statements; /* Associated statement list */
};
static struct defcmd_set *defcmd_set;
static int defcmd_set_count;
struct kdb_macro_statement {
char *statement; /* Statement text */
struct list_head list_node; /* Statement list node */
};
static struct kdb_macro *kdb_macro;
static bool defcmd_in_progress;
/* Forward references */
@@ -671,53 +671,55 @@ static int kdb_exec_defcmd(int argc, const char **argv);
static int kdb_defcmd2(const char *cmdstr, const char *argv0)
{
struct defcmd_set *s = defcmd_set + defcmd_set_count - 1;
char **save_command = s->command;
struct kdb_macro_statement *kms;
if (!kdb_macro)
return KDB_NOTIMP;
if (strcmp(argv0, "endefcmd") == 0) {
defcmd_in_progress = false;
if (!s->count)
s->usable = false;
if (s->usable)
/* macros are always safe because when executed each
* internal command re-enters kdb_parse() and is
* safety checked individually.
*/
kdb_register_flags(s->name, kdb_exec_defcmd, s->usage,
s->help, 0,
KDB_ENABLE_ALWAYS_SAFE);
if (!list_empty(&kdb_macro->statements))
kdb_register(&kdb_macro->cmd);
return 0;
}
if (!s->usable)
return KDB_NOTIMP;
s->command = kcalloc(s->count + 1, sizeof(*(s->command)), GFP_KDB);
if (!s->command) {
kdb_printf("Could not allocate new kdb_defcmd table for %s\n",
kms = kmalloc(sizeof(*kms), GFP_KDB);
if (!kms) {
kdb_printf("Could not allocate new kdb macro command: %s\n",
cmdstr);
s->usable = false;
return KDB_NOTIMP;
}
memcpy(s->command, save_command, s->count * sizeof(*(s->command)));
s->command[s->count++] = kdb_strdup(cmdstr, GFP_KDB);
kfree(save_command);
kms->statement = kdb_strdup(cmdstr, GFP_KDB);
list_add_tail(&kms->list_node, &kdb_macro->statements);
return 0;
}
static int kdb_defcmd(int argc, const char **argv)
{
struct defcmd_set *save_defcmd_set = defcmd_set, *s;
kdbtab_t *mp;
if (defcmd_in_progress) {
kdb_printf("kdb: nested defcmd detected, assuming missing "
"endefcmd\n");
kdb_defcmd2("endefcmd", "endefcmd");
}
if (argc == 0) {
int i;
for (s = defcmd_set; s < defcmd_set + defcmd_set_count; ++s) {
kdb_printf("defcmd %s \"%s\" \"%s\"\n", s->name,
s->usage, s->help);
for (i = 0; i < s->count; ++i)
kdb_printf("%s", s->command[i]);
kdb_printf("endefcmd\n");
kdbtab_t *kp;
struct kdb_macro *kmp;
struct kdb_macro_statement *kms;
list_for_each_entry(kp, &kdb_cmds_head, list_node) {
if (kp->func == kdb_exec_defcmd) {
kdb_printf("defcmd %s \"%s\" \"%s\"\n",
kp->name, kp->usage, kp->help);
kmp = container_of(kp, struct kdb_macro, cmd);
list_for_each_entry(kms, &kmp->statements,
list_node)
kdb_printf("%s", kms->statement);
kdb_printf("endefcmd\n");
}
}
return 0;
}
@@ -727,45 +729,43 @@ static int kdb_defcmd(int argc, const char **argv)
kdb_printf("Command only available during kdb_init()\n");
return KDB_NOTIMP;
}
defcmd_set = kmalloc_array(defcmd_set_count + 1, sizeof(*defcmd_set),
GFP_KDB);
if (!defcmd_set)
kdb_macro = kzalloc(sizeof(*kdb_macro), GFP_KDB);
if (!kdb_macro)
goto fail_defcmd;
memcpy(defcmd_set, save_defcmd_set,
defcmd_set_count * sizeof(*defcmd_set));
s = defcmd_set + defcmd_set_count;
memset(s, 0, sizeof(*s));
s->usable = true;
s->name = kdb_strdup(argv[1], GFP_KDB);
if (!s->name)
mp = &kdb_macro->cmd;
mp->func = kdb_exec_defcmd;
mp->minlen = 0;
mp->flags = KDB_ENABLE_ALWAYS_SAFE;
mp->name = kdb_strdup(argv[1], GFP_KDB);
if (!mp->name)
goto fail_name;
s->usage = kdb_strdup(argv[2], GFP_KDB);
if (!s->usage)
mp->usage = kdb_strdup(argv[2], GFP_KDB);
if (!mp->usage)
goto fail_usage;
s->help = kdb_strdup(argv[3], GFP_KDB);
if (!s->help)
mp->help = kdb_strdup(argv[3], GFP_KDB);
if (!mp->help)
goto fail_help;
if (s->usage[0] == '"') {
strcpy(s->usage, argv[2]+1);
s->usage[strlen(s->usage)-1] = '\0';
if (mp->usage[0] == '"') {
strcpy(mp->usage, argv[2]+1);
mp->usage[strlen(mp->usage)-1] = '\0';
}
if (s->help[0] == '"') {
strcpy(s->help, argv[3]+1);
s->help[strlen(s->help)-1] = '\0';
if (mp->help[0] == '"') {
strcpy(mp->help, argv[3]+1);
mp->help[strlen(mp->help)-1] = '\0';
}
++defcmd_set_count;
INIT_LIST_HEAD(&kdb_macro->statements);
defcmd_in_progress = true;
kfree(save_defcmd_set);
return 0;
fail_help:
kfree(s->usage);
kfree(mp->usage);
fail_usage:
kfree(s->name);
kfree(mp->name);
fail_name:
kfree(defcmd_set);
kfree(kdb_macro);
fail_defcmd:
kdb_printf("Could not allocate new defcmd_set entry for %s\n", argv[1]);
defcmd_set = save_defcmd_set;
kdb_printf("Could not allocate new kdb_macro entry for %s\n", argv[1]);
return KDB_NOTIMP;
}
@@ -780,25 +780,31 @@ fail_defcmd:
*/
static int kdb_exec_defcmd(int argc, const char **argv)
{
int i, ret;
struct defcmd_set *s;
int ret;
kdbtab_t *kp;
struct kdb_macro *kmp;
struct kdb_macro_statement *kms;
if (argc != 0)
return KDB_ARGCOUNT;
for (s = defcmd_set, i = 0; i < defcmd_set_count; ++i, ++s) {
if (strcmp(s->name, argv[0]) == 0)
list_for_each_entry(kp, &kdb_cmds_head, list_node) {
if (strcmp(kp->name, argv[0]) == 0)
break;
}
if (i == defcmd_set_count) {
if (list_entry_is_head(kp, &kdb_cmds_head, list_node)) {
kdb_printf("kdb_exec_defcmd: could not find commands for %s\n",
argv[0]);
return KDB_NOTIMP;
}
for (i = 0; i < s->count; ++i) {
/* Recursive use of kdb_parse, do not use argv after
* this point */
kmp = container_of(kp, struct kdb_macro, cmd);
list_for_each_entry(kms, &kmp->statements, list_node) {
/*
* Recursive use of kdb_parse, do not use argv after this point.
*/
argv = NULL;
kdb_printf("[%s]kdb> %s\n", s->name, s->command[i]);
ret = kdb_parse(s->command[i]);
kdb_printf("[%s]kdb> %s\n", kmp->cmd.name, kms->statement);
ret = kdb_parse(kms->statement);
if (ret)
return ret;
}
@@ -1009,11 +1015,11 @@ int kdb_parse(const char *cmdstr)
* If this command is allowed to be abbreviated,
* check to see if this is it.
*/
if (tp->cmd_minlen && (strlen(argv[0]) <= tp->cmd_minlen) &&
(strncmp(argv[0], tp->cmd_name, tp->cmd_minlen) == 0))
if (tp->minlen && (strlen(argv[0]) <= tp->minlen) &&
(strncmp(argv[0], tp->name, tp->minlen) == 0))
break;
if (strcmp(argv[0], tp->cmd_name) == 0)
if (strcmp(argv[0], tp->name) == 0)
break;
}
@@ -1024,8 +1030,7 @@ int kdb_parse(const char *cmdstr)
*/
if (list_entry_is_head(tp, &kdb_cmds_head, list_node)) {
list_for_each_entry(tp, &kdb_cmds_head, list_node) {
if (strncmp(argv[0], tp->cmd_name,
strlen(tp->cmd_name)) == 0)
if (strncmp(argv[0], tp->name, strlen(tp->name)) == 0)
break;
}
}
@@ -1033,19 +1038,19 @@ int kdb_parse(const char *cmdstr)
if (!list_entry_is_head(tp, &kdb_cmds_head, list_node)) {
int result;
if (!kdb_check_flags(tp->cmd_flags, kdb_cmd_enabled, argc <= 1))
if (!kdb_check_flags(tp->flags, kdb_cmd_enabled, argc <= 1))
return KDB_NOPERM;
KDB_STATE_SET(CMD);
result = (*tp->cmd_func)(argc-1, (const char **)argv);
result = (*tp->func)(argc-1, (const char **)argv);
if (result && ignore_errors && result > KDB_CMD_GO)
result = 0;
KDB_STATE_CLEAR(CMD);
if (tp->cmd_flags & KDB_REPEAT_WITH_ARGS)
if (tp->flags & KDB_REPEAT_WITH_ARGS)
return result;
argc = tp->cmd_flags & KDB_REPEAT_NO_ARGS ? 1 : 0;
argc = tp->flags & KDB_REPEAT_NO_ARGS ? 1 : 0;
if (argv[argc])
*(argv[argc]) = '\0';
return result;
@@ -2412,12 +2417,12 @@ static int kdb_help(int argc, const char **argv)
char *space = "";
if (KDB_FLAG(CMD_INTERRUPT))
return 0;
if (!kdb_check_flags(kt->cmd_flags, kdb_cmd_enabled, true))
if (!kdb_check_flags(kt->flags, kdb_cmd_enabled, true))
continue;
if (strlen(kt->cmd_usage) > 20)
if (strlen(kt->usage) > 20)
space = "\n ";
kdb_printf("%-15.15s %-20s%s%s\n", kt->cmd_name,
kt->cmd_usage, space, kt->cmd_help);
kdb_printf("%-15.15s %-20s%s%s\n", kt->name,
kt->usage, space, kt->help);
}
return 0;
}
@@ -2613,56 +2618,32 @@ static int kdb_grep_help(int argc, const char **argv)
return 0;
}
/*
* kdb_register_flags - This function is used to register a kernel
* debugger command.
* Inputs:
* cmd Command name
* func Function to execute the command
* usage A simple usage string showing arguments
* help A simple help string describing command
* repeat Does the command auto repeat on enter?
* Returns:
* zero for success, one if a duplicate command.
/**
* kdb_register() - This function is used to register a kernel debugger
* command.
* @cmd: pointer to kdb command
*
* Note that it's the job of the caller to keep the memory for the cmd
* allocated until unregister is called.
*/
int kdb_register_flags(char *cmd,
kdb_func_t func,
char *usage,
char *help,
short minlen,
kdb_cmdflags_t flags)
int kdb_register(kdbtab_t *cmd)
{
kdbtab_t *kp;
list_for_each_entry(kp, &kdb_cmds_head, list_node) {
if (strcmp(kp->cmd_name, cmd) == 0) {
kdb_printf("Duplicate kdb command registered: "
"%s, func %px help %s\n", cmd, func, help);
if (strcmp(kp->name, cmd->name) == 0) {
kdb_printf("Duplicate kdb cmd: %s, func %p help %s\n",
cmd->name, cmd->func, cmd->help);
return 1;
}
}
kp = kmalloc(sizeof(*kp), GFP_KDB);
if (!kp) {
kdb_printf("Could not allocate new kdb_command table\n");
return 1;
}
kp->cmd_name = cmd;
kp->cmd_func = func;
kp->cmd_usage = usage;
kp->cmd_help = help;
kp->cmd_minlen = minlen;
kp->cmd_flags = flags;
kp->is_dynamic = true;
list_add_tail(&kp->list_node, &kdb_cmds_head);
list_add_tail(&cmd->list_node, &kdb_cmds_head);
return 0;
}
EXPORT_SYMBOL_GPL(kdb_register_flags);
EXPORT_SYMBOL_GPL(kdb_register);
/*
/**
* kdb_register_table() - This function is used to register a kdb command
* table.
* @kp: pointer to kdb command table
@@ -2676,266 +2657,231 @@ void kdb_register_table(kdbtab_t *kp, size_t len)
}
}
/*
* kdb_register - Compatibility register function for commands that do
* not need to specify a repeat state. Equivalent to
* kdb_register_flags with flags set to 0.
* Inputs:
* cmd Command name
* func Function to execute the command
* usage A simple usage string showing arguments
* help A simple help string describing command
* Returns:
* zero for success, one if a duplicate command.
/**
* kdb_unregister() - This function is used to unregister a kernel debugger
* command. It is generally called when a module which
* implements kdb command is unloaded.
* @cmd: pointer to kdb command
*/
int kdb_register(char *cmd,
kdb_func_t func,
char *usage,
char *help,
short minlen)
void kdb_unregister(kdbtab_t *cmd)
{
return kdb_register_flags(cmd, func, usage, help, minlen, 0);
}
EXPORT_SYMBOL_GPL(kdb_register);
/*
* kdb_unregister - This function is used to unregister a kernel
* debugger command. It is generally called when a module which
* implements kdb commands is unloaded.
* Inputs:
* cmd Command name
* Returns:
* zero for success, one command not registered.
*/
int kdb_unregister(char *cmd)
{
kdbtab_t *kp;
/*
* find the command.
*/
list_for_each_entry(kp, &kdb_cmds_head, list_node) {
if (strcmp(kp->cmd_name, cmd) == 0) {
list_del(&kp->list_node);
if (kp->is_dynamic)
kfree(kp);
return 0;
}
}
/* Couldn't find it. */
return 1;
list_del(&cmd->list_node);
}
EXPORT_SYMBOL_GPL(kdb_unregister);
static kdbtab_t maintab[] = {
{ .cmd_name = "md",
.cmd_func = kdb_md,
.cmd_usage = "<vaddr>",
.cmd_help = "Display Memory Contents, also mdWcN, e.g. md8c1",
.cmd_minlen = 1,
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
{ .name = "md",
.func = kdb_md,
.usage = "<vaddr>",
.help = "Display Memory Contents, also mdWcN, e.g. md8c1",
.minlen = 1,
.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
},
{ .cmd_name = "mdr",
.cmd_func = kdb_md,
.cmd_usage = "<vaddr> <bytes>",
.cmd_help = "Display Raw Memory",
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
{ .name = "mdr",
.func = kdb_md,
.usage = "<vaddr> <bytes>",
.help = "Display Raw Memory",
.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
},
{ .cmd_name = "mdp",
.cmd_func = kdb_md,
.cmd_usage = "<paddr> <bytes>",
.cmd_help = "Display Physical Memory",
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
{ .name = "mdp",
.func = kdb_md,
.usage = "<paddr> <bytes>",
.help = "Display Physical Memory",
.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
},
{ .cmd_name = "mds",
.cmd_func = kdb_md,
.cmd_usage = "<vaddr>",
.cmd_help = "Display Memory Symbolically",
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
{ .name = "mds",
.func = kdb_md,
.usage = "<vaddr>",
.help = "Display Memory Symbolically",
.flags = KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS,
},
{ .cmd_name = "mm",
.cmd_func = kdb_mm,
.cmd_usage = "<vaddr> <contents>",
.cmd_help = "Modify Memory Contents",
.cmd_flags = KDB_ENABLE_MEM_WRITE | KDB_REPEAT_NO_ARGS,
{ .name = "mm",
.func = kdb_mm,
.usage = "<vaddr> <contents>",
.help = "Modify Memory Contents",
.flags = KDB_ENABLE_MEM_WRITE | KDB_REPEAT_NO_ARGS,
},
{ .cmd_name = "go",
.cmd_func = kdb_go,
.cmd_usage = "[<vaddr>]",
.cmd_help = "Continue Execution",
.cmd_minlen = 1,
.cmd_flags = KDB_ENABLE_REG_WRITE |
{ .name = "go",
.func = kdb_go,
.usage = "[<vaddr>]",
.help = "Continue Execution",
.minlen = 1,
.flags = KDB_ENABLE_REG_WRITE |
KDB_ENABLE_ALWAYS_SAFE_NO_ARGS,
},
{ .cmd_name = "rd",
.cmd_func = kdb_rd,
.cmd_usage = "",
.cmd_help = "Display Registers",
.cmd_flags = KDB_ENABLE_REG_READ,
{ .name = "rd",
.func = kdb_rd,
.usage = "",
.help = "Display Registers",
.flags = KDB_ENABLE_REG_READ,
},
{ .cmd_name = "rm",
.cmd_func = kdb_rm,
.cmd_usage = "<reg> <contents>",
.cmd_help = "Modify Registers",
.cmd_flags = KDB_ENABLE_REG_WRITE,
{ .name = "rm",
.func = kdb_rm,
.usage = "<reg> <contents>",
.help = "Modify Registers",
.flags = KDB_ENABLE_REG_WRITE,
},
{ .cmd_name = "ef",
.cmd_func = kdb_ef,
.cmd_usage = "<vaddr>",
.cmd_help = "Display exception frame",
.cmd_flags = KDB_ENABLE_MEM_READ,
{ .name = "ef",
.func = kdb_ef,
.usage = "<vaddr>",
.help = "Display exception frame",
.flags = KDB_ENABLE_MEM_READ,
},
{ .cmd_name = "bt",
.cmd_func = kdb_bt,
.cmd_usage = "[<vaddr>]",
.cmd_help = "Stack traceback",
.cmd_minlen = 1,
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
{ .name = "bt",
.func = kdb_bt,
.usage = "[<vaddr>]",
.help = "Stack traceback",
.minlen = 1,
.flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
},
{ .cmd_name = "btp",
.cmd_func = kdb_bt,
.cmd_usage = "<pid>",
.cmd_help = "Display stack for process <pid>",
.cmd_flags = KDB_ENABLE_INSPECT,
{ .name = "btp",
.func = kdb_bt,
.usage = "<pid>",
.help = "Display stack for process <pid>",
.flags = KDB_ENABLE_INSPECT,
},
{ .cmd_name = "bta",
.cmd_func = kdb_bt,
.cmd_usage = "[D|R|S|T|C|Z|E|U|I|M|A]",
.cmd_help = "Backtrace all processes matching state flag",
.cmd_flags = KDB_ENABLE_INSPECT,
{ .name = "bta",
.func = kdb_bt,
.usage = "[D|R|S|T|C|Z|E|U|I|M|A]",
.help = "Backtrace all processes matching state flag",
.flags = KDB_ENABLE_INSPECT,
},
{ .cmd_name = "btc",
.cmd_func = kdb_bt,
.cmd_usage = "",
.cmd_help = "Backtrace current process on each cpu",
.cmd_flags = KDB_ENABLE_INSPECT,
{ .name = "btc",
.func = kdb_bt,
.usage = "",
.help = "Backtrace current process on each cpu",
.flags = KDB_ENABLE_INSPECT,
},
{ .cmd_name = "btt",
.cmd_func = kdb_bt,
.cmd_usage = "<vaddr>",
.cmd_help = "Backtrace process given its struct task address",
.cmd_flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
{ .name = "btt",
.func = kdb_bt,
.usage = "<vaddr>",
.help = "Backtrace process given its struct task address",
.flags = KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS,
},
{ .cmd_name = "env",
.cmd_func = kdb_env,
.cmd_usage = "",
.cmd_help = "Show environment variables",
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
{ .name = "env",
.func = kdb_env,
.usage = "",
.help = "Show environment variables",
.flags = KDB_ENABLE_ALWAYS_SAFE,
},
{ .cmd_name = "set",
.cmd_func = kdb_set,
.cmd_usage = "",
.cmd_help = "Set environment variables",
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
{ .name = "set",
.func = kdb_set,
.usage = "",
.help = "Set environment variables",
.flags = KDB_ENABLE_ALWAYS_SAFE,
},
{ .cmd_name = "help",
.cmd_func = kdb_help,
.cmd_usage = "",
.cmd_help = "Display Help Message",
.cmd_minlen = 1,
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
{ .name = "help",
.func = kdb_help,
.usage = "",
.help = "Display Help Message",
.minlen = 1,
.flags = KDB_ENABLE_ALWAYS_SAFE,
},
{ .cmd_name = "?",
.cmd_func = kdb_help,
.cmd_usage = "",
.cmd_help = "Display Help Message",
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
{ .name = "?",
.func = kdb_help,
.usage = "",
.help = "Display Help Message",
.flags = KDB_ENABLE_ALWAYS_SAFE,
},
{ .cmd_name = "cpu",
.cmd_func = kdb_cpu,
.cmd_usage = "<cpunum>",
.cmd_help = "Switch to new cpu",
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE_NO_ARGS,
{ .name = "cpu",
.func = kdb_cpu,
.usage = "<cpunum>",
.help = "Switch to new cpu",
.flags = KDB_ENABLE_ALWAYS_SAFE_NO_ARGS,
},
{ .cmd_name = "kgdb",
.cmd_func = kdb_kgdb,
.cmd_usage = "",
.cmd_help = "Enter kgdb mode",
.cmd_flags = 0,
{ .name = "kgdb",
.func = kdb_kgdb,
.usage = "",
.help = "Enter kgdb mode",
.flags = 0,
},
{ .cmd_name = "ps",
.cmd_func = kdb_ps,
.cmd_usage = "[<flags>|A]",
.cmd_help = "Display active task list",
.cmd_flags = KDB_ENABLE_INSPECT,
{ .name = "ps",
.func = kdb_ps,
.usage = "[<flags>|A]",
.help = "Display active task list",
.flags = KDB_ENABLE_INSPECT,
},
{ .cmd_name = "pid",
.cmd_func = kdb_pid,
.cmd_usage = "<pidnum>",
.cmd_help = "Switch to another task",
.cmd_flags = KDB_ENABLE_INSPECT,
{ .name = "pid",
.func = kdb_pid,
.usage = "<pidnum>",
.help = "Switch to another task",
.flags = KDB_ENABLE_INSPECT,
},
{ .cmd_name = "reboot",
.cmd_func = kdb_reboot,
.cmd_usage = "",
.cmd_help = "Reboot the machine immediately",
.cmd_flags = KDB_ENABLE_REBOOT,
{ .name = "reboot",
.func = kdb_reboot,
.usage = "",
.help = "Reboot the machine immediately",
.flags = KDB_ENABLE_REBOOT,
},
#if defined(CONFIG_MODULES)
{ .cmd_name = "lsmod",
.cmd_func = kdb_lsmod,
.cmd_usage = "",
.cmd_help = "List loaded kernel modules",
.cmd_flags = KDB_ENABLE_INSPECT,
{ .name = "lsmod",
.func = kdb_lsmod,
.usage = "",
.help = "List loaded kernel modules",
.flags = KDB_ENABLE_INSPECT,
},
#endif
#if defined(CONFIG_MAGIC_SYSRQ)
{ .cmd_name = "sr",
.cmd_func = kdb_sr,
.cmd_usage = "<key>",
.cmd_help = "Magic SysRq key",
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
{ .name = "sr",
.func = kdb_sr,
.usage = "<key>",
.help = "Magic SysRq key",
.flags = KDB_ENABLE_ALWAYS_SAFE,
},
#endif
#if defined(CONFIG_PRINTK)
{ .cmd_name = "dmesg",
.cmd_func = kdb_dmesg,
.cmd_usage = "[lines]",
.cmd_help = "Display syslog buffer",
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
{ .name = "dmesg",
.func = kdb_dmesg,
.usage = "[lines]",
.help = "Display syslog buffer",
.flags = KDB_ENABLE_ALWAYS_SAFE,
},
#endif
{ .cmd_name = "defcmd",
.cmd_func = kdb_defcmd,
.cmd_usage = "name \"usage\" \"help\"",
.cmd_help = "Define a set of commands, down to endefcmd",
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
{ .name = "defcmd",
.func = kdb_defcmd,
.usage = "name \"usage\" \"help\"",
.help = "Define a set of commands, down to endefcmd",
/*
* Macros are always safe because when executed each
* internal command re-enters kdb_parse() and is safety
* checked individually.
*/
.flags = KDB_ENABLE_ALWAYS_SAFE,
},
{ .cmd_name = "kill",
.cmd_func = kdb_kill,
.cmd_usage = "<-signal> <pid>",
.cmd_help = "Send a signal to a process",
.cmd_flags = KDB_ENABLE_SIGNAL,
{ .name = "kill",
.func = kdb_kill,
.usage = "<-signal> <pid>",
.help = "Send a signal to a process",
.flags = KDB_ENABLE_SIGNAL,
},
{ .cmd_name = "summary",
.cmd_func = kdb_summary,
.cmd_usage = "",
.cmd_help = "Summarize the system",
.cmd_minlen = 4,
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
{ .name = "summary",
.func = kdb_summary,
.usage = "",
.help = "Summarize the system",
.minlen = 4,
.flags = KDB_ENABLE_ALWAYS_SAFE,
},
{ .cmd_name = "per_cpu",
.cmd_func = kdb_per_cpu,
.cmd_usage = "<sym> [<bytes>] [<cpu>]",
.cmd_help = "Display per_cpu variables",
.cmd_minlen = 3,
.cmd_flags = KDB_ENABLE_MEM_READ,
{ .name = "per_cpu",
.func = kdb_per_cpu,
.usage = "<sym> [<bytes>] [<cpu>]",
.help = "Display per_cpu variables",
.minlen = 3,
.flags = KDB_ENABLE_MEM_READ,
},
{ .cmd_name = "grephelp",
.cmd_func = kdb_grep_help,
.cmd_usage = "",
.cmd_help = "Display help on | grep",
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
{ .name = "grephelp",
.func = kdb_grep_help,
.usage = "",
.help = "Display help on | grep",
.flags = KDB_ENABLE_ALWAYS_SAFE,
},
};
static kdbtab_t nmicmd = {
.cmd_name = "disable_nmi",
.cmd_func = kdb_disable_nmi,
.cmd_usage = "",
.cmd_help = "Disable NMI entry to KDB",
.cmd_flags = KDB_ENABLE_ALWAYS_SAFE,
.name = "disable_nmi",
.func = kdb_disable_nmi,
.usage = "",
.help = "Disable NMI entry to KDB",
.flags = KDB_ENABLE_ALWAYS_SAFE,
};
/* Initialize the kdb command table. */

View File

@@ -109,7 +109,6 @@ extern int kdbgetaddrarg(int, const char **, int*, unsigned long *,
long *, char **);
extern int kdbgetsymval(const char *, kdb_symtab_t *);
extern int kdbnearsym(unsigned long, kdb_symtab_t *);
extern void kdbnearsym_cleanup(void);
extern char *kdb_strdup(const char *str, gfp_t type);
extern void kdb_symbol_print(unsigned long, const kdb_symtab_t *, unsigned int);
@@ -165,19 +164,6 @@ typedef struct _kdb_bp {
#ifdef CONFIG_KGDB_KDB
extern kdb_bp_t kdb_breakpoints[/* KDB_MAXBPT */];
/* The KDB shell command table */
typedef struct _kdbtab {
char *cmd_name; /* Command name */
kdb_func_t cmd_func; /* Function to execute command */
char *cmd_usage; /* Usage String for this command */
char *cmd_help; /* Help message for this command */
short cmd_minlen; /* Minimum legal # command
* chars required */
kdb_cmdflags_t cmd_flags; /* Command behaviour flags */
struct list_head list_node; /* Command list */
bool is_dynamic; /* Command table allocation type */
} kdbtab_t;
extern void kdb_register_table(kdbtab_t *kp, size_t len);
extern int kdb_bt(int, const char **); /* KDB display back trace */
@@ -233,10 +219,6 @@ extern struct task_struct *kdb_curr_task(int);
#define GFP_KDB (in_dbg_master() ? GFP_ATOMIC : GFP_KERNEL)
extern void *debug_kmalloc(size_t size, gfp_t flags);
extern void debug_kfree(void *);
extern void debug_kusage(void);
extern struct task_struct *kdb_current_task;
extern struct pt_regs *kdb_current_regs;

View File

@@ -51,48 +51,48 @@ int kdbgetsymval(const char *symname, kdb_symtab_t *symtab)
}
EXPORT_SYMBOL(kdbgetsymval);
static char *kdb_name_table[100]; /* arbitrary size */
/*
* kdbnearsym - Return the name of the symbol with the nearest address
* less than 'addr'.
/**
* kdbnearsym() - Return the name of the symbol with the nearest address
* less than @addr.
* @addr: Address to check for near symbol
* @symtab: Structure to receive results
*
* Parameters:
* addr Address to check for symbol near
* symtab Structure to receive results
* Returns:
* 0 No sections contain this address, symtab zero filled
* 1 Address mapped to module/symbol/section, data in symtab
* Remarks:
* 2.6 kallsyms has a "feature" where it unpacks the name into a
* string. If that string is reused before the caller expects it
* then the caller sees its string change without warning. To
* avoid cluttering up the main kdb code with lots of kdb_strdup,
* tests and kfree calls, kdbnearsym maintains an LRU list of the
* last few unique strings. The list is sized large enough to
* hold active strings, no kdb caller of kdbnearsym makes more
* than ~20 later calls before using a saved value.
* WARNING: This function may return a pointer to a single statically
* allocated buffer (namebuf). kdb's unusual calling context (single
* threaded, all other CPUs halted) provides us sufficient locking for
* this to be safe. The only constraint imposed by the static buffer is
* that the caller must consume any previous reply prior to another call
* to lookup a new symbol.
*
* Note that, strictly speaking, some architectures may re-enter the kdb
* trap if the system turns out to be very badly damaged and this breaks
* the single-threaded assumption above. In these circumstances successful
* continuation and exit from the inner trap is unlikely to work and any
* user attempting this receives a prominent warning before being allowed
* to progress. In these circumstances we remain memory safe because
* namebuf[KSYM_NAME_LEN-1] will never change from '\0' although we do
* tolerate the possibility of garbled symbol display from the outer kdb
* trap.
*
* Return:
* * 0 - No sections contain this address, symtab zero filled
* * 1 - Address mapped to module/symbol/section, data in symtab
*/
int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
{
int ret = 0;
unsigned long symbolsize = 0;
unsigned long offset = 0;
#define knt1_size 128 /* must be >= kallsyms table size */
char *knt1 = NULL;
static char namebuf[KSYM_NAME_LEN];
kdb_dbg_printf(AR, "addr=0x%lx, symtab=%px\n", addr, symtab);
memset(symtab, 0, sizeof(*symtab));
if (addr < 4096)
goto out;
knt1 = debug_kmalloc(knt1_size, GFP_ATOMIC);
if (!knt1) {
kdb_func_printf("addr=0x%lx cannot kmalloc knt1\n", addr);
goto out;
}
symtab->sym_name = kallsyms_lookup(addr, &symbolsize , &offset,
(char **)(&symtab->mod_name), knt1);
(char **)(&symtab->mod_name), namebuf);
if (offset > 8*1024*1024) {
symtab->sym_name = NULL;
addr = offset = symbolsize = 0;
@@ -101,63 +101,14 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
symtab->sym_end = symtab->sym_start + symbolsize;
ret = symtab->sym_name != NULL && *(symtab->sym_name) != '\0';
if (ret) {
int i;
/* Another 2.6 kallsyms "feature". Sometimes the sym_name is
* set but the buffer passed into kallsyms_lookup is not used,
* so it contains garbage. The caller has to work out which
* buffer needs to be saved.
*
* What was Rusty smoking when he wrote that code?
*/
if (symtab->sym_name != knt1) {
strncpy(knt1, symtab->sym_name, knt1_size);
knt1[knt1_size-1] = '\0';
}
for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
if (kdb_name_table[i] &&
strcmp(kdb_name_table[i], knt1) == 0)
break;
}
if (i >= ARRAY_SIZE(kdb_name_table)) {
debug_kfree(kdb_name_table[0]);
memmove(kdb_name_table, kdb_name_table+1,
sizeof(kdb_name_table[0]) *
(ARRAY_SIZE(kdb_name_table)-1));
} else {
debug_kfree(knt1);
knt1 = kdb_name_table[i];
memmove(kdb_name_table+i, kdb_name_table+i+1,
sizeof(kdb_name_table[0]) *
(ARRAY_SIZE(kdb_name_table)-i-1));
}
i = ARRAY_SIZE(kdb_name_table) - 1;
kdb_name_table[i] = knt1;
symtab->sym_name = kdb_name_table[i];
knt1 = NULL;
}
if (symtab->mod_name == NULL)
symtab->mod_name = "kernel";
kdb_dbg_printf(AR, "returns %d symtab->sym_start=0x%lx, symtab->mod_name=%px, symtab->sym_name=%px (%s)\n",
ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name);
out:
debug_kfree(knt1);
return ret;
}
void kdbnearsym_cleanup(void)
{
int i;
for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
if (kdb_name_table[i]) {
debug_kfree(kdb_name_table[i]);
kdb_name_table[i] = NULL;
}
}
}
static char ks_namebuf[KSYM_NAME_LEN+1], ks_namebuf_prev[KSYM_NAME_LEN+1];
/*
@@ -655,230 +606,6 @@ unsigned long kdb_task_state(const struct task_struct *p, unsigned long mask)
return (mask & kdb_task_state_string(state)) != 0;
}
/* Last ditch allocator for debugging, so we can still debug even when
* the GFP_ATOMIC pool has been exhausted. The algorithms are tuned
* for space usage, not for speed. One smallish memory pool, the free
* chain is always in ascending address order to allow coalescing,
* allocations are done in brute force best fit.
*/
struct debug_alloc_header {
u32 next; /* offset of next header from start of pool */
u32 size;
void *caller;
};
/* The memory returned by this allocator must be aligned, which means
* so must the header size. Do not assume that sizeof(struct
* debug_alloc_header) is a multiple of the alignment, explicitly
* calculate the overhead of this header, including the alignment.
* The rest of this code must not use sizeof() on any header or
* pointer to a header.
*/
#define dah_align 8
#define dah_overhead ALIGN(sizeof(struct debug_alloc_header), dah_align)
static u64 debug_alloc_pool_aligned[256*1024/dah_align]; /* 256K pool */
static char *debug_alloc_pool = (char *)debug_alloc_pool_aligned;
static u32 dah_first, dah_first_call = 1, dah_used, dah_used_max;
/* Locking is awkward. The debug code is called from all contexts,
* including non maskable interrupts. A normal spinlock is not safe
* in NMI context. Try to get the debug allocator lock, if it cannot
* be obtained after a second then give up. If the lock could not be
* previously obtained on this cpu then only try once.
*
* sparse has no annotation for "this function _sometimes_ acquires a
* lock", so fudge the acquire/release notation.
*/
static DEFINE_SPINLOCK(dap_lock);
static int get_dap_lock(void)
__acquires(dap_lock)
{
static int dap_locked = -1;
int count;
if (dap_locked == smp_processor_id())
count = 1;
else
count = 1000;
while (1) {
if (spin_trylock(&dap_lock)) {
dap_locked = -1;
return 1;
}
if (!count--)
break;
udelay(1000);
}
dap_locked = smp_processor_id();
__acquire(dap_lock);
return 0;
}
void *debug_kmalloc(size_t size, gfp_t flags)
{
unsigned int rem, h_offset;
struct debug_alloc_header *best, *bestprev, *prev, *h;
void *p = NULL;
if (!get_dap_lock()) {
__release(dap_lock); /* we never actually got it */
return NULL;
}
h = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
if (dah_first_call) {
h->size = sizeof(debug_alloc_pool_aligned) - dah_overhead;
dah_first_call = 0;
}
size = ALIGN(size, dah_align);
prev = best = bestprev = NULL;
while (1) {
if (h->size >= size && (!best || h->size < best->size)) {
best = h;
bestprev = prev;
if (h->size == size)
break;
}
if (!h->next)
break;
prev = h;
h = (struct debug_alloc_header *)(debug_alloc_pool + h->next);
}
if (!best)
goto out;
rem = best->size - size;
/* The pool must always contain at least one header */
if (best->next == 0 && bestprev == NULL && rem < dah_overhead)
goto out;
if (rem >= dah_overhead) {
best->size = size;
h_offset = ((char *)best - debug_alloc_pool) +
dah_overhead + best->size;
h = (struct debug_alloc_header *)(debug_alloc_pool + h_offset);
h->size = rem - dah_overhead;
h->next = best->next;
} else
h_offset = best->next;
best->caller = __builtin_return_address(0);
dah_used += best->size;
dah_used_max = max(dah_used, dah_used_max);
if (bestprev)
bestprev->next = h_offset;
else
dah_first = h_offset;
p = (char *)best + dah_overhead;
memset(p, POISON_INUSE, best->size - 1);
*((char *)p + best->size - 1) = POISON_END;
out:
spin_unlock(&dap_lock);
return p;
}
void debug_kfree(void *p)
{
struct debug_alloc_header *h;
unsigned int h_offset;
if (!p)
return;
if ((char *)p < debug_alloc_pool ||
(char *)p >= debug_alloc_pool + sizeof(debug_alloc_pool_aligned)) {
kfree(p);
return;
}
if (!get_dap_lock()) {
__release(dap_lock); /* we never actually got it */
return; /* memory leak, cannot be helped */
}
h = (struct debug_alloc_header *)((char *)p - dah_overhead);
memset(p, POISON_FREE, h->size - 1);
*((char *)p + h->size - 1) = POISON_END;
h->caller = NULL;
dah_used -= h->size;
h_offset = (char *)h - debug_alloc_pool;
if (h_offset < dah_first) {
h->next = dah_first;
dah_first = h_offset;
} else {
struct debug_alloc_header *prev;
unsigned int prev_offset;
prev = (struct debug_alloc_header *)(debug_alloc_pool +
dah_first);
while (1) {
if (!prev->next || prev->next > h_offset)
break;
prev = (struct debug_alloc_header *)
(debug_alloc_pool + prev->next);
}
prev_offset = (char *)prev - debug_alloc_pool;
if (prev_offset + dah_overhead + prev->size == h_offset) {
prev->size += dah_overhead + h->size;
memset(h, POISON_FREE, dah_overhead - 1);
*((char *)h + dah_overhead - 1) = POISON_END;
h = prev;
h_offset = prev_offset;
} else {
h->next = prev->next;
prev->next = h_offset;
}
}
if (h_offset + dah_overhead + h->size == h->next) {
struct debug_alloc_header *next;
next = (struct debug_alloc_header *)
(debug_alloc_pool + h->next);
h->size += dah_overhead + next->size;
h->next = next->next;
memset(next, POISON_FREE, dah_overhead - 1);
*((char *)next + dah_overhead - 1) = POISON_END;
}
spin_unlock(&dap_lock);
}
void debug_kusage(void)
{
struct debug_alloc_header *h_free, *h_used;
#ifdef CONFIG_IA64
/* FIXME: using dah for ia64 unwind always results in a memory leak.
* Fix that memory leak first, then set debug_kusage_one_time = 1 for
* all architectures.
*/
static int debug_kusage_one_time;
#else
static int debug_kusage_one_time = 1;
#endif
if (!get_dap_lock()) {
__release(dap_lock); /* we never actually got it */
return;
}
h_free = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
if (dah_first == 0 &&
(h_free->size == sizeof(debug_alloc_pool_aligned) - dah_overhead ||
dah_first_call))
goto out;
if (!debug_kusage_one_time)
goto out;
debug_kusage_one_time = 0;
kdb_func_printf("debug_kmalloc memory leak dah_first %d\n", dah_first);
if (dah_first) {
h_used = (struct debug_alloc_header *)debug_alloc_pool;
kdb_func_printf("h_used %px size %d\n", h_used, h_used->size);
}
do {
h_used = (struct debug_alloc_header *)
((char *)h_free + dah_overhead + h_free->size);
kdb_func_printf("h_used %px size %d caller %px\n",
h_used, h_used->size, h_used->caller);
h_free = (struct debug_alloc_header *)
(debug_alloc_pool + h_free->next);
} while (h_free->next);
h_used = (struct debug_alloc_header *)
((char *)h_free + dah_overhead + h_free->size);
if ((char *)h_used - debug_alloc_pool !=
sizeof(debug_alloc_pool_aligned))
kdb_func_printf("h_used %px size %d caller %px\n",
h_used, h_used->size, h_used->caller);
out:
spin_unlock(&dap_lock);
}
/* Maintain a small stack of kdb_flags to allow recursion without disturbing
* the global kdb state.
*/

View File

@@ -135,10 +135,9 @@ config TRACING_SUPPORT
depends on STACKTRACE_SUPPORT
default y
if TRACING_SUPPORT
menuconfig FTRACE
bool "Tracers"
depends on TRACING_SUPPORT
default y if DEBUG_KERNEL
help
Enable the kernel tracing infrastructure.
@@ -1038,6 +1037,3 @@ config HIST_TRIGGERS_DEBUG
If unsure, say N.
endif # FTRACE
endif # TRACING_SUPPORT

View File

@@ -77,6 +77,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
obj-$(CONFIG_PROBE_EVENTS) += trace_eprobe.o
obj-$(CONFIG_TRACE_EVENT_INJECT) += trace_events_inject.o
obj-$(CONFIG_SYNTH_EVENTS) += trace_events_synth.o
obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o

View File

@@ -2111,7 +2111,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
}
}
get_online_cpus();
cpus_read_lock();
/*
* Fire off all the required work handlers
* We can't schedule on offline CPUs, but it's not necessary
@@ -2143,7 +2143,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
cpu_buffer->nr_pages_to_update = 0;
}
put_online_cpus();
cpus_read_unlock();
} else {
cpu_buffer = buffer->buffers[cpu_id];
@@ -2171,7 +2171,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
goto out_err;
}
get_online_cpus();
cpus_read_lock();
/* Can't run something on an offline CPU. */
if (!cpu_online(cpu_id))
@@ -2183,7 +2183,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
}
cpu_buffer->nr_pages_to_update = 0;
put_online_cpus();
cpus_read_unlock();
}
out:

View File

@@ -3698,11 +3698,11 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str)
return false;
event = container_of(trace_event, struct trace_event_call, event);
if (!event->mod)
if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
return false;
/* Would rather have rodata, but this will suffice */
if (within_module_core(addr, event->mod))
if (within_module_core(addr, event->module))
return true;
return false;
@@ -5544,6 +5544,7 @@ static const char readme_msg[] =
#ifdef CONFIG_HIST_TRIGGERS
"\t s:[synthetic/]<event> <field> [<field>]\n"
#endif
"\t e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
"\t -:[<group>/]<event>\n"
#ifdef CONFIG_KPROBE_EVENTS
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
@@ -5553,7 +5554,7 @@ static const char readme_msg[] =
" place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
#endif
"\t args: <name>=fetcharg[:type]\n"
"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
"\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
#else
@@ -5568,6 +5569,8 @@ static const char readme_msg[] =
"\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
"\t [unsigned] char/int/long\n"
#endif
"\t efield: For event probes ('e' types), the field is on of the fields\n"
"\t of the <attached-group>/<attached-event>.\n"
#endif
" events/\t\t- Directory containing all trace event subsystems:\n"
" enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
@@ -5655,6 +5658,7 @@ static const char readme_msg[] =
"\t .execname display a common_pid as a program name\n"
"\t .syscall display a syscall id as a syscall name\n"
"\t .log2 display log2 value rather than raw number\n"
"\t .buckets=size display values in groups of size rather than raw number\n"
"\t .usecs display a common_timestamp in microseconds\n\n"
"\t The 'pause' parameter can be used to pause an existing hist\n"
"\t trigger or to start a hist trigger but not log any events\n"

View File

@@ -126,6 +126,11 @@ struct kprobe_trace_entry_head {
unsigned long ip;
};
struct eprobe_trace_entry_head {
struct trace_entry ent;
unsigned int type;
};
struct kretprobe_trace_entry_head {
struct trace_entry ent;
unsigned long func;
@@ -1508,9 +1513,14 @@ static inline int register_trigger_hist_enable_disable_cmds(void) { return 0; }
extern int register_trigger_cmds(void);
extern void clear_event_triggers(struct trace_array *tr);
enum {
EVENT_TRIGGER_FL_PROBE = BIT(0),
};
struct event_trigger_data {
unsigned long count;
int ref;
int flags;
struct event_trigger_ops *ops;
struct event_command *cmd_ops;
struct event_filter __rcu *filter;
@@ -1918,6 +1928,14 @@ static inline bool is_good_name(const char *name)
return true;
}
/* Convert certain expected symbols into '_' when generating event names */
static inline void sanitize_event_name(char *name)
{
while (*name++ != '\0')
if (*name == ':' || *name == '.')
*name = '_';
}
/*
* This is a generic way to read and write a u64 value from a file in tracefs.
*

View File

@@ -171,6 +171,290 @@ trace_boot_add_synth_event(struct xbc_node *node, const char *event)
}
#endif
#ifdef CONFIG_HIST_TRIGGERS
static int __init __printf(3, 4)
append_printf(char **bufp, char *end, const char *fmt, ...)
{
va_list args;
int ret;
if (*bufp == end)
return -ENOSPC;
va_start(args, fmt);
ret = vsnprintf(*bufp, end - *bufp, fmt, args);
if (ret < end - *bufp) {
*bufp += ret;
} else {
*bufp = end;
ret = -ERANGE;
}
va_end(args);
return ret;
}
static int __init
append_str_nospace(char **bufp, char *end, const char *str)
{
char *p = *bufp;
int len;
while (p < end - 1 && *str != '\0') {
if (!isspace(*str))
*(p++) = *str;
str++;
}
*p = '\0';
if (p == end - 1) {
*bufp = end;
return -ENOSPC;
}
len = p - *bufp;
*bufp = p;
return (int)len;
}
static int __init
trace_boot_hist_add_array(struct xbc_node *hnode, char **bufp,
char *end, const char *key)
{
struct xbc_node *knode, *anode;
const char *p;
char sep;
knode = xbc_node_find_child(hnode, key);
if (knode) {
anode = xbc_node_get_child(knode);
if (!anode) {
pr_err("hist.%s requires value(s).\n", key);
return -EINVAL;
}
append_printf(bufp, end, ":%s", key);
sep = '=';
xbc_array_for_each_value(anode, p) {
append_printf(bufp, end, "%c%s", sep, p);
if (sep == '=')
sep = ',';
}
} else
return -ENOENT;
return 0;
}
static int __init
trace_boot_hist_add_one_handler(struct xbc_node *hnode, char **bufp,
char *end, const char *handler,
const char *param)
{
struct xbc_node *knode, *anode;
const char *p;
char sep;
/* Compose 'handler' parameter */
p = xbc_node_find_value(hnode, param, NULL);
if (!p) {
pr_err("hist.%s requires '%s' option.\n",
xbc_node_get_data(hnode), param);
return -EINVAL;
}
append_printf(bufp, end, ":%s(%s)", handler, p);
/* Compose 'action' parameter */
knode = xbc_node_find_child(hnode, "trace");
if (!knode)
knode = xbc_node_find_child(hnode, "save");
if (knode) {
anode = xbc_node_get_child(knode);
if (!anode || !xbc_node_is_value(anode)) {
pr_err("hist.%s.%s requires value(s).\n",
xbc_node_get_data(hnode),
xbc_node_get_data(knode));
return -EINVAL;
}
append_printf(bufp, end, ".%s", xbc_node_get_data(knode));
sep = '(';
xbc_array_for_each_value(anode, p) {
append_printf(bufp, end, "%c%s", sep, p);
if (sep == '(')
sep = ',';
}
append_printf(bufp, end, ")");
} else if (xbc_node_find_child(hnode, "snapshot")) {
append_printf(bufp, end, ".snapshot()");
} else {
pr_err("hist.%s requires an action.\n",
xbc_node_get_data(hnode));
return -EINVAL;
}
return 0;
}
static int __init
trace_boot_hist_add_handlers(struct xbc_node *hnode, char **bufp,
char *end, const char *param)
{
struct xbc_node *node;
const char *p, *handler;
int ret;
handler = xbc_node_get_data(hnode);
xbc_node_for_each_subkey(hnode, node) {
p = xbc_node_get_data(node);
if (!isdigit(p[0]))
continue;
/* All digit started node should be instances. */
ret = trace_boot_hist_add_one_handler(node, bufp, end, handler, param);
if (ret < 0)
break;
}
if (xbc_node_find_child(hnode, param))
ret = trace_boot_hist_add_one_handler(hnode, bufp, end, handler, param);
return ret;
}
/*
* Histogram boottime tracing syntax.
*
* ftrace.[instance.INSTANCE.]event.GROUP.EVENT.hist[.N] {
* keys = <KEY>[,...]
* values = <VAL>[,...]
* sort = <SORT-KEY>[,...]
* size = <ENTRIES>
* name = <HISTNAME>
* var { <VAR> = <EXPR> ... }
* pause|continue|clear
* onmax|onchange[.N] { var = <VAR>; <ACTION> [= <PARAM>] }
* onmatch[.N] { event = <EVENT>; <ACTION> [= <PARAM>] }
* filter = <FILTER>
* }
*
* Where <ACTION> are;
*
* trace = <EVENT>, <ARG1>[, ...]
* save = <ARG1>[, ...]
* snapshot
*/
static int __init
trace_boot_compose_hist_cmd(struct xbc_node *hnode, char *buf, size_t size)
{
struct xbc_node *node, *knode;
char *end = buf + size;
const char *p;
int ret = 0;
append_printf(&buf, end, "hist");
ret = trace_boot_hist_add_array(hnode, &buf, end, "keys");
if (ret < 0) {
if (ret == -ENOENT)
pr_err("hist requires keys.\n");
return -EINVAL;
}
ret = trace_boot_hist_add_array(hnode, &buf, end, "values");
if (ret == -EINVAL)
return ret;
ret = trace_boot_hist_add_array(hnode, &buf, end, "sort");
if (ret == -EINVAL)
return ret;
p = xbc_node_find_value(hnode, "size", NULL);
if (p)
append_printf(&buf, end, ":size=%s", p);
p = xbc_node_find_value(hnode, "name", NULL);
if (p)
append_printf(&buf, end, ":name=%s", p);
node = xbc_node_find_child(hnode, "var");
if (node) {
xbc_node_for_each_key_value(node, knode, p) {
/* Expression must not include spaces. */
append_printf(&buf, end, ":%s=",
xbc_node_get_data(knode));
append_str_nospace(&buf, end, p);
}
}
/* Histogram control attributes (mutual exclusive) */
if (xbc_node_find_child(hnode, "pause"))
append_printf(&buf, end, ":pause");
else if (xbc_node_find_child(hnode, "continue"))
append_printf(&buf, end, ":continue");
else if (xbc_node_find_child(hnode, "clear"))
append_printf(&buf, end, ":clear");
/* Histogram handler and actions */
node = xbc_node_find_child(hnode, "onmax");
if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0)
return -EINVAL;
node = xbc_node_find_child(hnode, "onchange");
if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0)
return -EINVAL;
node = xbc_node_find_child(hnode, "onmatch");
if (node && trace_boot_hist_add_handlers(node, &buf, end, "event") < 0)
return -EINVAL;
p = xbc_node_find_value(hnode, "filter", NULL);
if (p)
append_printf(&buf, end, " if %s", p);
if (buf == end) {
pr_err("hist exceeds the max command length.\n");
return -E2BIG;
}
return 0;
}
static void __init
trace_boot_init_histograms(struct trace_event_file *file,
struct xbc_node *hnode, char *buf, size_t size)
{
struct xbc_node *node;
const char *p;
char *tmp;
xbc_node_for_each_subkey(hnode, node) {
p = xbc_node_get_data(node);
if (!isdigit(p[0]))
continue;
/* All digit started node should be instances. */
if (trace_boot_compose_hist_cmd(node, buf, size) == 0) {
tmp = kstrdup(buf, GFP_KERNEL);
if (trigger_process_regex(file, buf) < 0)
pr_err("Failed to apply hist trigger: %s\n", tmp);
kfree(tmp);
}
}
if (xbc_node_find_child(hnode, "keys")) {
if (trace_boot_compose_hist_cmd(hnode, buf, size) == 0) {
tmp = kstrdup(buf, GFP_KERNEL);
if (trigger_process_regex(file, buf) < 0)
pr_err("Failed to apply hist trigger: %s\n", tmp);
kfree(tmp);
}
}
}
#else
static void __init
trace_boot_init_histograms(struct trace_event_file *file,
struct xbc_node *hnode, char *buf, size_t size)
{
/* do nothing */
}
#endif
static void __init
trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode,
struct xbc_node *enode)
@@ -205,12 +489,18 @@ trace_boot_init_one_event(struct trace_array *tr, struct xbc_node *gnode,
pr_err("Failed to apply filter: %s\n", buf);
}
xbc_node_for_each_array_value(enode, "actions", anode, p) {
if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
pr_err("action string is too long: %s\n", p);
else if (trigger_process_regex(file, buf) < 0)
pr_err("Failed to apply an action: %s\n", buf);
}
if (IS_ENABLED(CONFIG_HIST_TRIGGERS)) {
xbc_node_for_each_array_value(enode, "actions", anode, p) {
if (strlcpy(buf, p, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
pr_err("action string is too long: %s\n", p);
else if (trigger_process_regex(file, buf) < 0)
pr_err("Failed to apply an action: %s\n", p);
}
anode = xbc_node_find_child(enode, "hist");
if (anode)
trace_boot_init_histograms(file, anode, buf, ARRAY_SIZE(buf));
} else if (xbc_node_find_value(enode, "actions", NULL))
pr_err("Failed to apply event actions because CONFIG_HIST_TRIGGERS is not set.\n");
if (xbc_node_find_value(enode, "enable", NULL)) {
if (trace_event_enable_disable(file, 1, 0) < 0)

View File

@@ -13,11 +13,49 @@
#include <linux/tracefs.h>
#include "trace.h"
#include "trace_output.h" /* for trace_event_sem */
#include "trace_dynevent.h"
static DEFINE_MUTEX(dyn_event_ops_mutex);
static LIST_HEAD(dyn_event_ops_list);
bool trace_event_dyn_try_get_ref(struct trace_event_call *dyn_call)
{
struct trace_event_call *call;
bool ret = false;
if (WARN_ON_ONCE(!(dyn_call->flags & TRACE_EVENT_FL_DYNAMIC)))
return false;
down_read(&trace_event_sem);
list_for_each_entry(call, &ftrace_events, list) {
if (call == dyn_call) {
atomic_inc(&dyn_call->refcnt);
ret = true;
}
}
up_read(&trace_event_sem);
return ret;
}
void trace_event_dyn_put_ref(struct trace_event_call *call)
{
if (WARN_ON_ONCE(!(call->flags & TRACE_EVENT_FL_DYNAMIC)))
return;
if (WARN_ON_ONCE(atomic_read(&call->refcnt) <= 0)) {
atomic_set(&call->refcnt, 0);
return;
}
atomic_dec(&call->refcnt);
}
bool trace_event_dyn_busy(struct trace_event_call *call)
{
return atomic_read(&call->refcnt) != 0;
}
int dyn_event_register(struct dyn_event_operations *ops)
{
if (!ops || !ops->create || !ops->show || !ops->is_busy ||

View File

@@ -76,13 +76,15 @@ int dyn_event_init(struct dyn_event *ev, struct dyn_event_operations *ops)
return 0;
}
static inline int dyn_event_add(struct dyn_event *ev)
static inline int dyn_event_add(struct dyn_event *ev,
struct trace_event_call *call)
{
lockdep_assert_held(&event_mutex);
if (!ev || !ev->ops)
return -EINVAL;
call->flags |= TRACE_EVENT_FL_DYNAMIC;
list_add_tail(&ev->list, &dyn_event_list);
return 0;
}

903
kernel/trace/trace_eprobe.c Normal file
View File

@@ -0,0 +1,903 @@
// SPDX-License-Identifier: GPL-2.0
/*
* event probes
*
* Part of this code was copied from kernel/trace/trace_kprobe.c written by
* Masami Hiramatsu <mhiramat@kernel.org>
*
* Copyright (C) 2021, VMware Inc, Steven Rostedt <rostedt@goodmis.org>
* Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
*
*/
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/ftrace.h>
#include "trace_dynevent.h"
#include "trace_probe.h"
#include "trace_probe_tmpl.h"
#define EPROBE_EVENT_SYSTEM "eprobes"
struct trace_eprobe {
/* tracepoint system */
const char *event_system;
/* tracepoint event */
const char *event_name;
struct trace_event_call *event;
struct dyn_event devent;
struct trace_probe tp;
};
struct eprobe_data {
struct trace_event_file *file;
struct trace_eprobe *ep;
};
static int __trace_eprobe_create(int argc, const char *argv[]);
static void trace_event_probe_cleanup(struct trace_eprobe *ep)
{
if (!ep)
return;
trace_probe_cleanup(&ep->tp);
kfree(ep->event_name);
kfree(ep->event_system);
if (ep->event)
trace_event_put_ref(ep->event);
kfree(ep);
}
static struct trace_eprobe *to_trace_eprobe(struct dyn_event *ev)
{
return container_of(ev, struct trace_eprobe, devent);
}
static int eprobe_dyn_event_create(const char *raw_command)
{
return trace_probe_create(raw_command, __trace_eprobe_create);
}
static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev)
{
struct trace_eprobe *ep = to_trace_eprobe(ev);
int i;
seq_printf(m, "e:%s/%s", trace_probe_group_name(&ep->tp),
trace_probe_name(&ep->tp));
seq_printf(m, " %s.%s", ep->event_system, ep->event_name);
for (i = 0; i < ep->tp.nr_args; i++)
seq_printf(m, " %s=%s", ep->tp.args[i].name, ep->tp.args[i].comm);
seq_putc(m, '\n');
return 0;
}
static int unregister_trace_eprobe(struct trace_eprobe *ep)
{
/* If other probes are on the event, just unregister eprobe */
if (trace_probe_has_sibling(&ep->tp))
goto unreg;
/* Enabled event can not be unregistered */
if (trace_probe_is_enabled(&ep->tp))
return -EBUSY;
/* Will fail if probe is being used by ftrace or perf */
if (trace_probe_unregister_event_call(&ep->tp))
return -EBUSY;
unreg:
dyn_event_remove(&ep->devent);
trace_probe_unlink(&ep->tp);
return 0;
}
static int eprobe_dyn_event_release(struct dyn_event *ev)
{
struct trace_eprobe *ep = to_trace_eprobe(ev);
int ret = unregister_trace_eprobe(ep);
if (!ret)
trace_event_probe_cleanup(ep);
return ret;
}
static bool eprobe_dyn_event_is_busy(struct dyn_event *ev)
{
struct trace_eprobe *ep = to_trace_eprobe(ev);
return trace_probe_is_enabled(&ep->tp);
}
static bool eprobe_dyn_event_match(const char *system, const char *event,
int argc, const char **argv, struct dyn_event *ev)
{
struct trace_eprobe *ep = to_trace_eprobe(ev);
return strcmp(trace_probe_name(&ep->tp), event) == 0 &&
(!system || strcmp(trace_probe_group_name(&ep->tp), system) == 0) &&
trace_probe_match_command_args(&ep->tp, argc, argv);
}
static struct dyn_event_operations eprobe_dyn_event_ops = {
.create = eprobe_dyn_event_create,
.show = eprobe_dyn_event_show,
.is_busy = eprobe_dyn_event_is_busy,
.free = eprobe_dyn_event_release,
.match = eprobe_dyn_event_match,
};
static struct trace_eprobe *alloc_event_probe(const char *group,
const char *this_event,
struct trace_event_call *event,
int nargs)
{
struct trace_eprobe *ep;
const char *event_name;
const char *sys_name;
int ret = -ENOMEM;
if (!event)
return ERR_PTR(-ENODEV);
sys_name = event->class->system;
event_name = trace_event_name(event);
ep = kzalloc(struct_size(ep, tp.args, nargs), GFP_KERNEL);
if (!ep) {
trace_event_put_ref(ep->event);
goto error;
}
ep->event = event;
ep->event_name = kstrdup(event_name, GFP_KERNEL);
if (!ep->event_name)
goto error;
ep->event_system = kstrdup(sys_name, GFP_KERNEL);
if (!ep->event_system)
goto error;
ret = trace_probe_init(&ep->tp, this_event, group, false);
if (ret < 0)
goto error;
dyn_event_init(&ep->devent, &eprobe_dyn_event_ops);
return ep;
error:
trace_event_probe_cleanup(ep);
return ERR_PTR(ret);
}
static int trace_eprobe_tp_arg_update(struct trace_eprobe *ep, int i)
{
struct probe_arg *parg = &ep->tp.args[i];
struct ftrace_event_field *field;
struct list_head *head;
head = trace_get_fields(ep->event);
list_for_each_entry(field, head, link) {
if (!strcmp(parg->code->data, field->name)) {
kfree(parg->code->data);
parg->code->data = field;
return 0;
}
}
kfree(parg->code->data);
parg->code->data = NULL;
return -ENOENT;
}
static int eprobe_event_define_fields(struct trace_event_call *event_call)
{
int ret;
struct eprobe_trace_entry_head field;
struct trace_probe *tp;
tp = trace_probe_primary_from_call(event_call);
if (WARN_ON_ONCE(!tp))
return -ENOENT;
DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0);
return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
}
static struct trace_event_fields eprobe_fields_array[] = {
{ .type = TRACE_FUNCTION_TYPE,
.define_fields = eprobe_event_define_fields },
{}
};
/* Event entry printers */
static enum print_line_t
print_eprobe_event(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct eprobe_trace_entry_head *field;
struct trace_event_call *pevent;
struct trace_event *probed_event;
struct trace_seq *s = &iter->seq;
struct trace_probe *tp;
field = (struct eprobe_trace_entry_head *)iter->ent;
tp = trace_probe_primary_from_call(
container_of(event, struct trace_event_call, event));
if (WARN_ON_ONCE(!tp))
goto out;
trace_seq_printf(s, "%s: (", trace_probe_name(tp));
probed_event = ftrace_find_event(field->type);
if (probed_event) {
pevent = container_of(probed_event, struct trace_event_call, event);
trace_seq_printf(s, "%s.%s", pevent->class->system,
trace_event_name(pevent));
} else {
trace_seq_printf(s, "%u", field->type);
}
trace_seq_putc(s, ')');
if (print_probe_args(s, tp->args, tp->nr_args,
(u8 *)&field[1], field) < 0)
goto out;
trace_seq_putc(s, '\n');
out:
return trace_handle_return(s);
}
static unsigned long get_event_field(struct fetch_insn *code, void *rec)
{
struct ftrace_event_field *field = code->data;
unsigned long val;
void *addr;
addr = rec + field->offset;
switch (field->size) {
case 1:
if (field->is_signed)
val = *(char *)addr;
else
val = *(unsigned char *)addr;
break;
case 2:
if (field->is_signed)
val = *(short *)addr;
else
val = *(unsigned short *)addr;
break;
case 4:
if (field->is_signed)
val = *(int *)addr;
else
val = *(unsigned int *)addr;
break;
default:
if (field->is_signed)
val = *(long *)addr;
else
val = *(unsigned long *)addr;
break;
}
return val;
}
static int get_eprobe_size(struct trace_probe *tp, void *rec)
{
struct probe_arg *arg;
int i, len, ret = 0;
for (i = 0; i < tp->nr_args; i++) {
arg = tp->args + i;
if (unlikely(arg->dynamic)) {
unsigned long val;
val = get_event_field(arg->code, rec);
len = process_fetch_insn_bottom(arg->code + 1, val, NULL, NULL);
if (len > 0)
ret += len;
}
}
return ret;
}
/* Kprobe specific fetch functions */
/* Note that we don't verify it, since the code does not come from user space */
static int
process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
void *base)
{
unsigned long val;
val = get_event_field(code, rec);
return process_fetch_insn_bottom(code + 1, val, dest, base);
}
NOKPROBE_SYMBOL(process_fetch_insn)
/* Return the length of string -- including null terminal byte */
static nokprobe_inline int
fetch_store_strlen_user(unsigned long addr)
{
const void __user *uaddr = (__force const void __user *)addr;
return strnlen_user_nofault(uaddr, MAX_STRING_SIZE);
}
/* Return the length of string -- including null terminal byte */
static nokprobe_inline int
fetch_store_strlen(unsigned long addr)
{
int ret, len = 0;
u8 c;
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
if (addr < TASK_SIZE)
return fetch_store_strlen_user(addr);
#endif
do {
ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1);
len++;
} while (c && ret == 0 && len < MAX_STRING_SIZE);
return (ret < 0) ? ret : len;
}
/*
* Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
* with max length and relative data location.
*/
static nokprobe_inline int
fetch_store_string_user(unsigned long addr, void *dest, void *base)
{
const void __user *uaddr = (__force const void __user *)addr;
int maxlen = get_loc_len(*(u32 *)dest);
void *__dest;
long ret;
if (unlikely(!maxlen))
return -ENOMEM;
__dest = get_loc_data(dest, base);
ret = strncpy_from_user_nofault(__dest, uaddr, maxlen);
if (ret >= 0)
*(u32 *)dest = make_data_loc(ret, __dest - base);
return ret;
}
/*
* Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
* length and relative data location.
*/
static nokprobe_inline int
fetch_store_string(unsigned long addr, void *dest, void *base)
{
int maxlen = get_loc_len(*(u32 *)dest);
void *__dest;
long ret;
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
if ((unsigned long)addr < TASK_SIZE)
return fetch_store_string_user(addr, dest, base);
#endif
if (unlikely(!maxlen))
return -ENOMEM;
__dest = get_loc_data(dest, base);
/*
* Try to get string again, since the string can be changed while
* probing.
*/
ret = strncpy_from_kernel_nofault(__dest, (void *)addr, maxlen);
if (ret >= 0)
*(u32 *)dest = make_data_loc(ret, __dest - base);
return ret;
}
static nokprobe_inline int
probe_mem_read_user(void *dest, void *src, size_t size)
{
const void __user *uaddr = (__force const void __user *)src;
return copy_from_user_nofault(dest, uaddr, size);
}
static nokprobe_inline int
probe_mem_read(void *dest, void *src, size_t size)
{
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
if ((unsigned long)src < TASK_SIZE)
return probe_mem_read_user(dest, src, size);
#endif
return copy_from_kernel_nofault(dest, src, size);
}
/* eprobe handler */
static inline void
__eprobe_trace_func(struct eprobe_data *edata, void *rec)
{
struct eprobe_trace_entry_head *entry;
struct trace_event_call *call = trace_probe_event_call(&edata->ep->tp);
struct trace_event_buffer fbuffer;
int dsize;
if (WARN_ON_ONCE(call != edata->file->event_call))
return;
if (trace_trigger_soft_disabled(edata->file))
return;
fbuffer.trace_ctx = tracing_gen_ctx();
fbuffer.trace_file = edata->file;
dsize = get_eprobe_size(&edata->ep->tp, rec);
fbuffer.regs = NULL;
fbuffer.event =
trace_event_buffer_lock_reserve(&fbuffer.buffer, edata->file,
call->event.type,
sizeof(*entry) + edata->ep->tp.size + dsize,
fbuffer.trace_ctx);
if (!fbuffer.event)
return;
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
if (edata->ep->event)
entry->type = edata->ep->event->event.type;
else
entry->type = 0;
store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
trace_event_buffer_commit(&fbuffer);
}
/*
* The event probe implementation uses event triggers to get access to
* the event it is attached to, but is not an actual trigger. The below
* functions are just stubs to fulfill what is needed to use the trigger
* infrastructure.
*/
static int eprobe_trigger_init(struct event_trigger_ops *ops,
struct event_trigger_data *data)
{
return 0;
}
static void eprobe_trigger_free(struct event_trigger_ops *ops,
struct event_trigger_data *data)
{
}
static int eprobe_trigger_print(struct seq_file *m,
struct event_trigger_ops *ops,
struct event_trigger_data *data)
{
/* Do not print eprobe event triggers */
return 0;
}
static void eprobe_trigger_func(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *rbe)
{
struct eprobe_data *edata = data->private_data;
__eprobe_trace_func(edata, rec);
}
static struct event_trigger_ops eprobe_trigger_ops = {
.func = eprobe_trigger_func,
.print = eprobe_trigger_print,
.init = eprobe_trigger_init,
.free = eprobe_trigger_free,
};
static int eprobe_trigger_cmd_func(struct event_command *cmd_ops,
struct trace_event_file *file,
char *glob, char *cmd, char *param)
{
return -1;
}
static int eprobe_trigger_reg_func(char *glob, struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file)
{
return -1;
}
static void eprobe_trigger_unreg_func(char *glob, struct event_trigger_ops *ops,
struct event_trigger_data *data,
struct trace_event_file *file)
{
}
static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd,
char *param)
{
return &eprobe_trigger_ops;
}
static struct event_command event_trigger_cmd = {
.name = "eprobe",
.trigger_type = ETT_EVENT_EPROBE,
.flags = EVENT_CMD_FL_NEEDS_REC,
.func = eprobe_trigger_cmd_func,
.reg = eprobe_trigger_reg_func,
.unreg = eprobe_trigger_unreg_func,
.unreg_all = NULL,
.get_trigger_ops = eprobe_trigger_get_ops,
.set_filter = NULL,
};
static struct event_trigger_data *
new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
{
struct event_trigger_data *trigger;
struct eprobe_data *edata;
edata = kzalloc(sizeof(*edata), GFP_KERNEL);
trigger = kzalloc(sizeof(*trigger), GFP_KERNEL);
if (!trigger || !edata) {
kfree(edata);
kfree(trigger);
return ERR_PTR(-ENOMEM);
}
trigger->flags = EVENT_TRIGGER_FL_PROBE;
trigger->count = -1;
trigger->ops = &eprobe_trigger_ops;
/*
* EVENT PROBE triggers are not registered as commands with
* register_event_command(), as they are not controlled by the user
* from the trigger file
*/
trigger->cmd_ops = &event_trigger_cmd;
INIT_LIST_HEAD(&trigger->list);
RCU_INIT_POINTER(trigger->filter, NULL);
edata->file = file;
edata->ep = ep;
trigger->private_data = edata;
return trigger;
}
static int enable_eprobe(struct trace_eprobe *ep,
struct trace_event_file *eprobe_file)
{
struct event_trigger_data *trigger;
struct trace_event_file *file;
struct trace_array *tr = eprobe_file->tr;
file = find_event_file(tr, ep->event_system, ep->event_name);
if (!file)
return -ENOENT;
trigger = new_eprobe_trigger(ep, eprobe_file);
if (IS_ERR(trigger))
return PTR_ERR(trigger);
list_add_tail_rcu(&trigger->list, &file->triggers);
trace_event_trigger_enable_disable(file, 1);
update_cond_flag(file);
return 0;
}
static struct trace_event_functions eprobe_funcs = {
.trace = print_eprobe_event
};
static int disable_eprobe(struct trace_eprobe *ep,
struct trace_array *tr)
{
struct event_trigger_data *trigger;
struct trace_event_file *file;
struct eprobe_data *edata;
file = find_event_file(tr, ep->event_system, ep->event_name);
if (!file)
return -ENOENT;
list_for_each_entry(trigger, &file->triggers, list) {
if (!(trigger->flags & EVENT_TRIGGER_FL_PROBE))
continue;
edata = trigger->private_data;
if (edata->ep == ep)
break;
}
if (list_entry_is_head(trigger, &file->triggers, list))
return -ENODEV;
list_del_rcu(&trigger->list);
trace_event_trigger_enable_disable(file, 0);
update_cond_flag(file);
return 0;
}
static int enable_trace_eprobe(struct trace_event_call *call,
struct trace_event_file *file)
{
struct trace_probe *pos, *tp;
struct trace_eprobe *ep;
bool enabled;
int ret = 0;
tp = trace_probe_primary_from_call(call);
if (WARN_ON_ONCE(!tp))
return -ENODEV;
enabled = trace_probe_is_enabled(tp);
/* This also changes "enabled" state */
if (file) {
ret = trace_probe_add_file(tp, file);
if (ret)
return ret;
} else
trace_probe_set_flag(tp, TP_FLAG_PROFILE);
if (enabled)
return 0;
list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
ep = container_of(pos, struct trace_eprobe, tp);
ret = enable_eprobe(ep, file);
if (ret)
break;
enabled = true;
}
if (ret) {
/* Failed to enable one of them. Roll back all */
if (enabled)
disable_eprobe(ep, file->tr);
if (file)
trace_probe_remove_file(tp, file);
else
trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
}
return ret;
}
static int disable_trace_eprobe(struct trace_event_call *call,
struct trace_event_file *file)
{
struct trace_probe *pos, *tp;
struct trace_eprobe *ep;
tp = trace_probe_primary_from_call(call);
if (WARN_ON_ONCE(!tp))
return -ENODEV;
if (file) {
if (!trace_probe_get_file_link(tp, file))
return -ENOENT;
if (!trace_probe_has_single_file(tp))
goto out;
trace_probe_clear_flag(tp, TP_FLAG_TRACE);
} else
trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
if (!trace_probe_is_enabled(tp)) {
list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
ep = container_of(pos, struct trace_eprobe, tp);
disable_eprobe(ep, file->tr);
}
}
out:
if (file)
/*
* Synchronization is done in below function. For perf event,
* file == NULL and perf_trace_event_unreg() calls
* tracepoint_synchronize_unregister() to ensure synchronize
* event. We don't need to care about it.
*/
trace_probe_remove_file(tp, file);
return 0;
}
static int eprobe_register(struct trace_event_call *event,
enum trace_reg type, void *data)
{
struct trace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
return enable_trace_eprobe(event, file);
case TRACE_REG_UNREGISTER:
return disable_trace_eprobe(event, file);
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
case TRACE_REG_PERF_UNREGISTER:
case TRACE_REG_PERF_OPEN:
case TRACE_REG_PERF_CLOSE:
case TRACE_REG_PERF_ADD:
case TRACE_REG_PERF_DEL:
return 0;
#endif
}
return 0;
}
static inline void init_trace_eprobe_call(struct trace_eprobe *ep)
{
struct trace_event_call *call = trace_probe_event_call(&ep->tp);
call->flags = TRACE_EVENT_FL_EPROBE;
call->event.funcs = &eprobe_funcs;
call->class->fields_array = eprobe_fields_array;
call->class->reg = eprobe_register;
}
static struct trace_event_call *
find_and_get_event(const char *system, const char *event_name)
{
struct trace_event_call *tp_event;
const char *name;
list_for_each_entry(tp_event, &ftrace_events, list) {
/* Skip other probes and ftrace events */
if (tp_event->flags &
(TRACE_EVENT_FL_IGNORE_ENABLE |
TRACE_EVENT_FL_KPROBE |
TRACE_EVENT_FL_UPROBE |
TRACE_EVENT_FL_EPROBE))
continue;
if (!tp_event->class->system ||
strcmp(system, tp_event->class->system))
continue;
name = trace_event_name(tp_event);
if (!name || strcmp(event_name, name))
continue;
if (!trace_event_try_get_ref(tp_event)) {
return NULL;
break;
}
return tp_event;
break;
}
return NULL;
}
static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
{
unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TPOINT;
int ret;
ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], flags);
if (ret)
return ret;
if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG)
ret = trace_eprobe_tp_arg_update(ep, i);
return ret;
}
static int __trace_eprobe_create(int argc, const char *argv[])
{
/*
* Argument syntax:
* e[:[GRP/]ENAME] SYSTEM.EVENT [FETCHARGS]
* Fetch args:
* <name>=$<field>[:TYPE]
*/
const char *event = NULL, *group = EPROBE_EVENT_SYSTEM;
const char *sys_event = NULL, *sys_name = NULL;
struct trace_event_call *event_call;
struct trace_eprobe *ep = NULL;
char buf1[MAX_EVENT_NAME_LEN];
char buf2[MAX_EVENT_NAME_LEN];
int ret = 0;
int i;
if (argc < 2 || argv[0][0] != 'e')
return -ECANCELED;
trace_probe_log_init("event_probe", argc, argv);
event = strchr(&argv[0][1], ':');
if (event) {
event++;
ret = traceprobe_parse_event_name(&event, &group, buf1,
event - argv[0]);
if (ret)
goto parse_error;
} else {
strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN);
sanitize_event_name(buf1);
event = buf1;
}
if (!is_good_name(event) || !is_good_name(group))
goto parse_error;
sys_event = argv[1];
ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2,
sys_event - argv[1]);
if (ret || !sys_name)
goto parse_error;
if (!is_good_name(sys_event) || !is_good_name(sys_name))
goto parse_error;
mutex_lock(&event_mutex);
event_call = find_and_get_event(sys_name, sys_event);
ep = alloc_event_probe(group, event, event_call, argc - 2);
mutex_unlock(&event_mutex);
if (IS_ERR(ep)) {
ret = PTR_ERR(ep);
/* This must return -ENOMEM, else there is a bug */
WARN_ON_ONCE(ret != -ENOMEM);
goto error; /* We know ep is not allocated */
}
argc -= 2; argv += 2;
/* parse arguments */
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
trace_probe_log_set_index(i + 2);
ret = trace_eprobe_tp_update_arg(ep, argv, i);
if (ret)
goto error;
}
ret = traceprobe_set_print_fmt(&ep->tp, PROBE_PRINT_EVENT);
if (ret < 0)
goto error;
init_trace_eprobe_call(ep);
mutex_lock(&event_mutex);
ret = trace_probe_register_event_call(&ep->tp);
if (ret) {
if (ret == -EEXIST) {
trace_probe_log_set_index(0);
trace_probe_log_err(0, EVENT_EXIST);
}
mutex_unlock(&event_mutex);
goto error;
}
ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
mutex_unlock(&event_mutex);
return ret;
parse_error:
ret = -EINVAL;
error:
trace_event_probe_cleanup(ep);
return ret;
}
/*
* Register dynevent at core_initcall. This allows kernel to setup eprobe
* events in postcore_initcall without tracefs.
*/
static __init int trace_events_eprobe_init_early(void)
{
int err = 0;
err = dyn_event_register(&eprobe_dyn_event_ops);
if (err)
pr_warn("Could not register eprobe_dyn_event_ops\n");
return err;
}
core_initcall(trace_events_eprobe_init_early);

View File

@@ -177,7 +177,7 @@ static void perf_trace_event_unreg(struct perf_event *p_event)
}
}
out:
module_put(tp_event->mod);
trace_event_put_ref(tp_event);
}
static int perf_trace_event_open(struct perf_event *p_event)
@@ -224,10 +224,10 @@ int perf_trace_init(struct perf_event *p_event)
list_for_each_entry(tp_event, &ftrace_events, list) {
if (tp_event->event.type == event_id &&
tp_event->class && tp_event->class->reg &&
try_module_get(tp_event->mod)) {
trace_event_try_get_ref(tp_event)) {
ret = perf_trace_event_init(tp_event, p_event);
if (ret)
module_put(tp_event->mod);
trace_event_put_ref(tp_event);
break;
}
}

View File

@@ -2525,7 +2525,10 @@ __register_event(struct trace_event_call *call, struct module *mod)
return ret;
list_add(&call->list, &ftrace_events);
call->mod = mod;
if (call->flags & TRACE_EVENT_FL_DYNAMIC)
atomic_set(&call->refcnt, 0);
else
call->module = mod;
return 0;
}
@@ -2839,7 +2842,9 @@ static void trace_module_remove_events(struct module *mod)
down_write(&trace_event_sem);
list_for_each_entry_safe(call, p, &ftrace_events, list) {
if (call->mod == mod)
if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module)
continue;
if (call->module == mod)
__trace_remove_event_call(call);
}
up_write(&trace_event_sem);
@@ -2982,7 +2987,7 @@ struct trace_event_file *trace_get_event_file(const char *instance,
}
/* Don't let event modules unload while in use */
ret = try_module_get(file->event_call->mod);
ret = trace_event_try_get_ref(file->event_call);
if (!ret) {
trace_array_put(tr);
ret = -EBUSY;
@@ -3012,7 +3017,7 @@ EXPORT_SYMBOL_GPL(trace_get_event_file);
void trace_put_event_file(struct trace_event_file *file)
{
mutex_lock(&event_mutex);
module_put(file->event_call->mod);
trace_event_put_ref(file->event_call);
mutex_unlock(&event_mutex);
trace_array_put(file->tr);
@@ -3147,7 +3152,7 @@ static int free_probe_data(void *data)
if (!edata->ref) {
/* Remove the SOFT_MODE flag */
__ftrace_event_enable_disable(edata->file, 0, 1);
module_put(edata->file->event_call->mod);
trace_event_put_ref(edata->file->event_call);
kfree(edata);
}
return 0;
@@ -3280,7 +3285,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
out_reg:
/* Don't let event modules unload while probe registered */
ret = try_module_get(file->event_call->mod);
ret = trace_event_try_get_ref(file->event_call);
if (!ret) {
ret = -EBUSY;
goto out_free;
@@ -3310,7 +3315,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
out_disable:
__ftrace_event_enable_disable(file, 0, 1);
out_put:
module_put(file->event_call->mod);
trace_event_put_ref(file->event_call);
out_free:
kfree(data);
goto out;
@@ -3376,7 +3381,8 @@ void __trace_early_add_events(struct trace_array *tr)
list_for_each_entry(call, &ftrace_events, list) {
/* Early boot up should not have any modules loaded */
if (WARN_ON_ONCE(call->mod))
if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) &&
WARN_ON_ONCE(call->module))
continue;
ret = __trace_early_add_new_event(call, tr);

Some files were not shown because too many files have changed in this diff Show More