blob: 26274d4c63f6c83559ecbc80fe8ba31b777e6cf6 [file] [log] [blame]
diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index d2c4c27e1702..d83c9ab49427 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -50,6 +50,7 @@ The following program demonstrates coverage collection from within a test
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
+ #include <linux/types.h>
#define KCOV_INIT_TRACE _IOR('c', 1, unsigned long)
#define KCOV_ENABLE _IO('c', 100)
@@ -177,6 +178,8 @@ Comparison operands collection
/* Read number of comparisons collected. */
n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
for (i = 0; i < n; i++) {
+ uint64_t ip;
+
type = cover[i * KCOV_WORDS_PER_CMP + 1];
/* arg1 and arg2 - operands of the comparison. */
arg1 = cover[i * KCOV_WORDS_PER_CMP + 2];
@@ -251,6 +254,8 @@ selectively from different subsystems.
.. code-block:: c
+ /* Same includes and defines as above. */
+
struct kcov_remote_arg {
__u32 trace_mode;
__u32 area_size;
diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h
index 1d5716bc060b..2526fd3be5fd 100644
--- a/arch/alpha/include/asm/spinlock_types.h
+++ b/arch/alpha/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef _ALPHA_SPINLOCK_TYPES_H
#define _ALPHA_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4ebd512043be..5ac2009727bd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -32,6 +32,7 @@ config ARM
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_MEMTEST
@@ -68,7 +69,7 @@ config ARM
select HARDIRQS_SW_RESEND
select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
select HAVE_ARCH_MMAP_RND_BITS if MMU
@@ -109,6 +110,7 @@ config ARM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
@@ -124,6 +126,7 @@ config ARM
select OLD_SIGSUSPEND3
select PCI_SYSCALL if PCI
select PERF_USE_VMALLOC
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
select RTC_LIB
select SYS_SUPPORTS_APM_EMULATION
select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index 5976958647fe..0c14b36ef101 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 9a18da3e10cc..2fa63d96a4f0 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -52,6 +52,7 @@ struct cpu_context_save {
struct thread_info {
unsigned long flags; /* low level flags */
int preempt_count; /* 0 => preemptable, <0 => bug */
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
struct task_struct *task; /* main task structure */
__u32 cpu; /* cpu */
__u32 cpu_domain; /* cpu domain */
@@ -134,6 +135,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
#define TIF_SECCOMP 7 /* seccomp syscall filtering active */
#define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY 9
#define TIF_USING_IWMMXT 17
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
@@ -148,6 +150,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
/* Checks for any syscall work in entry-common.S */
@@ -157,7 +160,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
/*
* Change these and you break ASM code in entry-common.S
*/
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+ _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NOTIFY_SIGNAL)
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index a646a3f6440f..beb09d74684f 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -43,6 +43,7 @@ int main(void)
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
DEFINE(TI_TASK, offsetof(struct thread_info, task));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain));
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 68261a83b7ad..fa7d110ce555 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -206,11 +206,18 @@ ENDPROC(__dabt_svc)
#ifdef CONFIG_PREEMPTION
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
- ldr r0, [tsk, #TI_FLAGS] @ get flags
teq r8, #0 @ if preempt count != 0
+ bne 1f @ return from exeption
+ ldr r0, [tsk, #TI_FLAGS] @ get flags
+ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
+ blne svc_preempt @ preempt!
+
+ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
+ teq r8, #0 @ if preempt lazy count != 0
movne r0, #0 @ force flags to 0
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED_LAZY
blne svc_preempt
+1:
#endif
svc_exit r5, irq = 1 @ return from exception
@@ -225,8 +232,14 @@ ENDPROC(__irq_svc)
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
tst r0, #_TIF_NEED_RESCHED
+ bne 1b
+ tst r0, #_TIF_NEED_RESCHED_LAZY
reteq r8 @ go again
- b 1b
+ ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
+ teq r0, #0 @ if preempt lazy count != 0
+ beq 1b
+ ret r8 @ go again
+
#endif
__und_fault:
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 539897ac2828..4655f04ccdcd 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
*/
trace_hardirqs_off();
do {
- if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+ if (likely(thread_flags & (_TIF_NEED_RESCHED |
+ _TIF_NEED_RESCHED_LAZY))) {
schedule();
} else {
if (unlikely(!user_mode(regs)))
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index efa402025031..59487ee9fd61 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -400,6 +400,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
if (addr < TASK_SIZE)
return do_page_fault(addr, fsr, regs);
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
if (user_mode(regs))
goto bad_area;
@@ -470,6 +473,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
static int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
+ if (interrupts_enabled(regs))
+ local_irq_enable();
+
do_bad_area(addr, fsr, regs);
return 0;
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9d80c783142f..fffa284eda00 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -88,6 +88,7 @@ config ARM64
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
@@ -191,6 +192,7 @@ config ARM64
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_PREEMPT_LAZY
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUTEX_CMPXCHG if FUTEX
select MMU_GATHER_RCU_TABLE_FREE
@@ -212,6 +214,7 @@ config ARM64
select PCI_DOMAINS_GENERIC if PCI
select PCI_ECAM if (ACPI && PCI)
select PCI_SYSCALL if PCI
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
select POWER_RESET
select POWER_SUPPLY
select SPARSE_IRQ
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index ed57717cd004..63b39229890b 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1001,7 +1001,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
*/
static inline bool arch_faults_on_old_pte(void)
{
- WARN_ON(preemptible());
+ WARN_ON(is_migratable());
return !cpu_has_hw_af();
}
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
index e83f0982b99c..2545c17281e1 100644
--- a/arch/arm64/include/asm/preempt.h
+++ b/arch/arm64/include/asm/preempt.h
@@ -70,13 +70,36 @@ static inline bool __preempt_count_dec_and_test(void)
* interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
* pair.
*/
- return !pc || !READ_ONCE(ti->preempt_count);
+ if (!pc || !READ_ONCE(ti->preempt_count))
+ return true;
+#ifdef CONFIG_PREEMPT_LAZY
+ if ((pc & ~PREEMPT_NEED_RESCHED))
+ return false;
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+ return false;
+#endif
}
static inline bool should_resched(int preempt_offset)
{
+#ifdef CONFIG_PREEMPT_LAZY
+ u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+ if (pc == preempt_offset)
+ return true;
+
+ if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset)
+ return false;
+
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
u64 pc = READ_ONCE(current_thread_info()->preempt_count);
return pc == preempt_offset;
+#endif
}
#ifdef CONFIG_PREEMPTION
diff --git a/arch/arm64/include/asm/signal.h b/arch/arm64/include/asm/signal.h
index ef449f5f4ba8..5e535c3e4926 100644
--- a/arch/arm64/include/asm/signal.h
+++ b/arch/arm64/include/asm/signal.h
@@ -22,4 +22,8 @@ static inline void __user *arch_untagged_si_addr(void __user *addr,
}
#define arch_untagged_si_addr arch_untagged_si_addr
+#if defined(CONFIG_PREEMPT_RT)
+#define ARCH_RT_DELAYS_SIGNAL_SEND
+#endif
+
#endif
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index 18782f0c4721..11ab1c077697 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -5,7 +5,7 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
# error "please don't include this file directly"
#endif
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 6623c99f0984..c55ccec33a5a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -26,6 +26,7 @@ struct thread_info {
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
union {
u64 preempt_count; /* 0 => preemptible, <0 => bug */
struct {
@@ -67,6 +68,7 @@ int arch_dup_task_struct(struct task_struct *dst,
#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */
#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY 7
#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
@@ -97,8 +99,10 @@ int arch_dup_task_struct(struct task_struct *dst,
#define _TIF_SVE (1 << TIF_SVE)
#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+ _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
_TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
_TIF_NOTIFY_SIGNAL)
@@ -107,6 +111,8 @@ int arch_dup_task_struct(struct task_struct *dst,
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
_TIF_SYSCALL_EMU)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+
#ifdef CONFIG_SHADOW_CALL_STACK
#define INIT_SCS \
.scs_base = init_shadow_call_stack, \
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 551427ae8cc5..96a4f6c9eb78 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -31,6 +31,7 @@ int main(void)
BLANK();
DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
+ DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index ff4962750b3d..99484e8bbade 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -179,10 +179,19 @@ static void __get_cpu_fpsimd_context(void)
*
* The double-underscore version must only be called if you know the task
* can't be preempted.
+ *
+ * On RT kernels local_bh_disable() is not sufficient because it only
+ * serializes soft interrupt related sections via a local lock, but stays
+ * preemptible. Disabling preemption is the right choice here as bottom
+ * half processing is always in thread context on RT kernels so it
+ * implicitly prevents bottom half processing as well.
*/
static void get_cpu_fpsimd_context(void)
{
- local_bh_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_disable();
+ else
+ preempt_disable();
__get_cpu_fpsimd_context();
}
@@ -203,7 +212,10 @@ static void __put_cpu_fpsimd_context(void)
static void put_cpu_fpsimd_context(void)
{
__put_cpu_fpsimd_context();
- local_bh_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_bh_enable();
+ else
+ preempt_enable();
}
static bool have_cpu_fpsimd_context(void)
@@ -1033,6 +1045,7 @@ void fpsimd_thread_switch(struct task_struct *next)
void fpsimd_flush_thread(void)
{
int vl, supported_vl;
+ void *sve_state = NULL;
if (!system_supports_fpsimd())
return;
@@ -1045,7 +1058,10 @@ void fpsimd_flush_thread(void)
if (system_supports_sve()) {
clear_thread_flag(TIF_SVE);
- sve_free(current);
+
+ /* Defer kfree() while in atomic context */
+ sve_state = current->thread.sve_state;
+ current->thread.sve_state = NULL;
/*
* Reset the task vector length as required.
@@ -1079,6 +1095,7 @@ void fpsimd_flush_thread(void)
}
put_cpu_fpsimd_context();
+ kfree(sve_state);
}
/*
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index b3e1beccf458..03183563feb8 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -922,7 +922,7 @@ static void do_signal(struct pt_regs *regs)
void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
{
do {
- if (thread_flags & _TIF_NEED_RESCHED) {
+ if (thread_flags & _TIF_NEED_RESCHED_MASK) {
/* Unmask Debug and SError for the next task */
local_daif_restore(DAIF_PROCCTX_NOIRQ);
@@ -930,6 +930,14 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
} else {
local_daif_restore(DAIF_PROCCTX);
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+ if (unlikely(current->forced_info.si_signo)) {
+ struct task_struct *t = current;
+ force_sig_info(&t->forced_info);
+ t->forced_info.si_signo = 0;
+ }
+#endif
+
if (thread_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 4cb265e15361..fd602025e913 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -828,7 +828,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* involves poking the GIC, which must be done in a
* non-preemptible context.
*/
- preempt_disable();
+ migrate_disable();
kvm_pmu_flush_hwstate(vcpu);
@@ -852,7 +852,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_timer_sync_user(vcpu);
kvm_vgic_sync_hwstate(vcpu);
local_irq_enable();
- preempt_enable();
+ migrate_enable();
continue;
}
@@ -921,7 +921,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Exit types that need handling before we can be preempted */
handle_exit_early(vcpu, ret);
- preempt_enable();
+ migrate_enable();
/*
* The ARMv8 architecture doesn't give the hypervisor
diff --git a/arch/csky/include/asm/spinlock_types.h b/arch/csky/include/asm/spinlock_types.h
index 8ff0f6ff3a00..db87a12c3827 100644
--- a/arch/csky/include/asm/spinlock_types.h
+++ b/arch/csky/include/asm/spinlock_types.h
@@ -3,7 +3,7 @@
#ifndef __ASM_CSKY_SPINLOCK_TYPES_H
#define __ASM_CSKY_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/hexagon/include/asm/spinlock_types.h b/arch/hexagon/include/asm/spinlock_types.h
index 19d233497ba5..d5f66495b670 100644
--- a/arch/hexagon/include/asm/spinlock_types.h
+++ b/arch/hexagon/include/asm/spinlock_types.h
@@ -8,7 +8,7 @@
#ifndef _ASM_SPINLOCK_TYPES_H
#define _ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
index 6e345fefcdca..14b8a161c165 100644
--- a/arch/ia64/include/asm/spinlock_types.h
+++ b/arch/ia64/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_IA64_SPINLOCK_TYPES_H
#define _ASM_IA64_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 27222b75d2a4..5495225807eb 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -151,6 +151,7 @@ config PPC
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x
+ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
@@ -218,6 +219,7 @@ config PPC
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IOREMAP_PROT
select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE
select HAVE_KERNEL_LZO if DEFAULT_UIMAGE
@@ -234,6 +236,7 @@ config PPC
select HAVE_PERF_EVENTS_NMI if PPC64
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
select HAVE_RSEQ
diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h
index 0f3cdd8faa95..08243338069d 100644
--- a/arch/powerpc/include/asm/simple_spinlock_types.h
+++ b/arch/powerpc/include/asm/simple_spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
#define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 7ef1cd8168a0..f9e63cacd220 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -62,6 +62,7 @@ struct smp_ops_t {
extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
+extern void smp_send_debugger_break_cpu(unsigned int cpu);
extern void smp_send_debugger_break(void);
extern void start_secondary_resume(void);
extern void smp_generic_give_timebase(void);
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index c5d742f18021..d5f8a74ed2e8 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_POWERPC_SPINLOCK_TYPES_H
#define _ASM_POWERPC_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h
index 1c8460e23583..b1653c160bab 100644
--- a/arch/powerpc/include/asm/stackprotector.h
+++ b/arch/powerpc/include/asm/stackprotector.h
@@ -24,7 +24,11 @@ static __always_inline void boot_init_stack_canary(void)
unsigned long canary;
/* Try to get a semi random initial value. */
+#ifdef CONFIG_PREEMPT_RT
+ canary = (unsigned long)&canary;
+#else
canary = get_random_canary();
+#endif
canary ^= mftb();
canary ^= LINUX_VERSION_CODE;
canary &= CANARY_MASK;
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 87013ac2a640..2920ed371188 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -53,6 +53,8 @@
struct thread_info {
int preempt_count; /* 0 => preemptable,
<0 => BUG */
+ int preempt_lazy_count; /* 0 => preemptable,
+ <0 => BUG */
unsigned long local_flags; /* private flags for thread */
#ifdef CONFIG_LIVEPATCH
unsigned long *livepatch_sp;
@@ -99,6 +101,7 @@ void arch_setup_new_exec(void);
#define TIF_PATCH_PENDING 6 /* pending live patching update */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SINGLESTEP 8 /* singlestepping active */
+#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
#define TIF_SECCOMP 10 /* secure computing */
#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
#define TIF_NOERROR 12 /* Force successful syscall return */
@@ -114,6 +117,7 @@ void arch_setup_new_exec(void);
#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_32BIT 20 /* 32 bit binary */
+
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
@@ -125,6 +129,7 @@ void arch_setup_new_exec(void);
#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
+#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_RESTOREALL (1<<TIF_RESTOREALL)
#define _TIF_NOERROR (1<<TIF_NOERROR)
@@ -138,10 +143,12 @@ void arch_setup_new_exec(void);
_TIF_SYSCALL_EMU)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
+ _TIF_NEED_RESCHED_LAZY | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
_TIF_NOTIFY_SIGNAL)
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
/* Bits in local_flags */
/* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index df048e331cbf..a81225f476a7 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -346,7 +346,7 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
ti_flags = READ_ONCE(current_thread_info()->flags);
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
local_irq_enable();
- if (ti_flags & _TIF_NEED_RESCHED) {
+ if (ti_flags & _TIF_NEED_RESCHED_MASK) {
schedule();
} else {
/*
@@ -552,11 +552,15 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
/* Returning to a kernel context with local irqs enabled. */
WARN_ON_ONCE(!(regs->msr & MSR_EE));
again:
- if (IS_ENABLED(CONFIG_PREEMPT)) {
+ if (IS_ENABLED(CONFIG_PREEMPTION)) {
/* Return to preemptible kernel context */
if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) {
if (preempt_count() == 0)
preempt_schedule_irq();
+ } else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) {
+ if ((preempt_count() == 0) &&
+ (current_thread_info()->preempt_lazy_count == 0))
+ preempt_schedule_irq();
}
}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c4f1d6b7d992..02e17a57da83 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -690,6 +690,7 @@ static inline void check_stack_overflow(void)
}
}
+#ifndef CONFIG_PREEMPT_RT
static __always_inline void call_do_softirq(const void *sp)
{
/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
@@ -708,6 +709,7 @@ static __always_inline void call_do_softirq(const void *sp)
"r11", "r12"
);
}
+#endif
static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
{
@@ -820,10 +822,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly;
void *softirq_ctx[NR_CPUS] __read_mostly;
void *hardirq_ctx[NR_CPUS] __read_mostly;
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
call_do_softirq(softirq_ctx[smp_processor_id()]);
}
+#endif
irq_hw_number_t virq_to_hw(unsigned int virq)
{
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index bdee7262c080..d57d37497862 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -120,11 +120,19 @@ int kgdb_skipexception(int exception, struct pt_regs *regs)
static int kgdb_debugger_ipi(struct pt_regs *regs)
{
- kgdb_nmicallback(raw_smp_processor_id(), regs);
+ int cpu = raw_smp_processor_id();
+
+ if (!kgdb_roundup_delay(cpu))
+ kgdb_nmicallback(cpu, regs);
return 0;
}
#ifdef CONFIG_SMP
+void kgdb_roundup_cpu(unsigned int cpu)
+{
+ smp_send_debugger_break_cpu(cpu);
+}
+
void kgdb_roundup_cpus(void)
{
smp_send_debugger_break();
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index fb95f92dcfac..308765f2e7a0 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -590,6 +590,11 @@ static void debugger_ipi_callback(struct pt_regs *regs)
debugger_ipi(regs);
}
+void smp_send_debugger_break_cpu(unsigned int cpu)
+{
+ smp_send_nmi_ipi(cpu, debugger_ipi_callback, 1000000);
+}
+
void smp_send_debugger_break(void)
{
smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 11741703d26e..7e4e1f489f56 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -260,12 +260,17 @@ static char *get_mmu_str(void)
static int __die(const char *str, struct pt_regs *regs, long err)
{
+ const char *pr = "";
+
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
+ if (IS_ENABLED(CONFIG_PREEMPTION))
+ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
+
printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
PAGE_SIZE / 1024, get_mmu_str(),
- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+ pr,
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index ff581d70f20c..e5c84d55bdfb 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -178,6 +178,7 @@ config KVM_E500MC
config KVM_MPIC
bool "KVM in-kernel MPIC emulation"
depends on KVM && E500
+ depends on !PREEMPT_RT
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 8322ca86d5ac..f524145d7dd3 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -24,6 +24,7 @@
#include <linux/of.h>
#include <linux/iommu.h>
#include <linux/rculist.h>
+#include <linux/local_lock.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -195,7 +196,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
return ret;
}
-static DEFINE_PER_CPU(__be64 *, tce_page);
+struct tce_page {
+ __be64 * page;
+ local_lock_t lock;
+};
+static DEFINE_PER_CPU(struct tce_page, tce_page) = {
+ .lock = INIT_LOCAL_LOCK(lock),
+};
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
@@ -218,9 +225,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
direction, attrs);
}
- local_irq_save(flags); /* to protect tcep and the page behind it */
+ /* to protect tcep and the page behind it */
+ local_lock_irqsave(&tce_page.lock, flags);
- tcep = __this_cpu_read(tce_page);
+ tcep = __this_cpu_read(tce_page.page);
/* This is safe to do since interrupts are off when we're called
* from iommu_alloc{,_sg}()
@@ -229,12 +237,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
- local_irq_restore(flags);
+ local_unlock_irqrestore(&tce_page.lock, flags);
return tce_build_pSeriesLP(tbl->it_index, tcenum,
tceshift,
npages, uaddr, direction, attrs);
}
- __this_cpu_write(tce_page, tcep);
+ __this_cpu_write(tce_page.page, tcep);
}
rpn = __pa(uaddr) >> tceshift;
@@ -264,7 +272,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
tcenum += limit;
} while (npages > 0 && !rc);
- local_irq_restore(flags);
+ local_unlock_irqrestore(&tce_page.lock, flags);
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
@@ -440,16 +448,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
DMA_BIDIRECTIONAL, 0);
}
- local_irq_disable(); /* to protect tcep and the page behind it */
- tcep = __this_cpu_read(tce_page);
+ /* to protect tcep and the page behind it */
+ local_lock_irq(&tce_page.lock);
+ tcep = __this_cpu_read(tce_page.page);
if (!tcep) {
tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
if (!tcep) {
- local_irq_enable();
+ local_unlock_irq(&tce_page.lock);
return -ENOMEM;
}
- __this_cpu_write(tce_page, tcep);
+ __this_cpu_write(tce_page.page, tcep);
}
proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
@@ -492,7 +501,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
/* error cleanup: caller will clear whole range */
- local_irq_enable();
+ local_unlock_irq(&tce_page.lock);
return rc;
}
diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h
index f398e7638dd6..5a35a49505da 100644
--- a/arch/riscv/include/asm/spinlock_types.h
+++ b/arch/riscv/include/asm/spinlock_types.h
@@ -6,7 +6,7 @@
#ifndef _ASM_RISCV_SPINLOCK_TYPES_H
#define _ASM_RISCV_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index a2bbfd7df85f..b69695e39957 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h
index e82369f286a2..907bda4b1619 100644
--- a/arch/sh/include/asm/spinlock_types.h
+++ b/arch/sh/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SH_SPINLOCK_TYPES_H
#define __ASM_SH_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index ef0f0827cf57..2d3eca8fee01 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu)
hardirq_ctx[cpu] = NULL;
}
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
struct thread_info *curctx;
@@ -176,6 +177,7 @@ void do_softirq_own_stack(void)
"r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
);
}
+#endif
#else
static inline void handle_one_irq(unsigned int irq)
{
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index c8848bb681a1..41fa1be980a3 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -855,6 +855,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs)
set_irq_regs(old_regs);
}
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
void *orig_sp, *sp = softirq_stack[smp_processor_id()];
@@ -869,6 +870,7 @@ void do_softirq_own_stack(void)
__asm__ __volatile__("mov %0, %%sp"
: : "r" (orig_sp));
}
+#endif
#ifdef CONFIG_HOTPLUG_CPU
void fixup_irqs(void)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 57f5e881791a..e8a4870f8686 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -107,6 +107,7 @@ config X86
select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096
select ARCH_SUPPORTS_LTO_CLANG
select ARCH_SUPPORTS_LTO_CLANG_THIN
+ select ARCH_SUPPORTS_RT
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
@@ -230,6 +231,7 @@ config X86
select HAVE_PCI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_LAZY
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index e087cd7837c3..96cc92f63b06 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -202,6 +202,7 @@
IRQ_CONSTRAINTS, regs, vector); \
}
+#ifndef CONFIG_PREEMPT_RT
/*
* Macro to invoke __do_softirq on the irq stack. This is only called from
* task context when bottom halves are about to be reenabled and soft
@@ -215,6 +216,8 @@
__this_cpu_write(hardirq_stack_inuse, false); \
}
+#endif
+
#else /* CONFIG_X86_64 */
/* System vector handlers always run on the stack they interrupted. */
#define run_sysvec_on_irqstack_cond(func, regs) \
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index fe5efbcba824..ab8cb5fc2329 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -90,17 +90,48 @@ static __always_inline void __preempt_count_sub(int val)
* a decrement which hits zero means we have no preempt_count and should
* reschedule.
*/
-static __always_inline bool __preempt_count_dec_and_test(void)
+static __always_inline bool ____preempt_count_dec_and_test(void)
{
return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
}
+static __always_inline bool __preempt_count_dec_and_test(void)
+{
+ if (____preempt_count_dec_and_test())
+ return true;
+#ifdef CONFIG_PREEMPT_LAZY
+ if (preempt_count())
+ return false;
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
+ return false;
+#endif
+}
+
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
static __always_inline bool should_resched(int preempt_offset)
{
+#ifdef CONFIG_PREEMPT_LAZY
+ u32 tmp;
+ tmp = raw_cpu_read_4(__preempt_count);
+ if (tmp == preempt_offset)
+ return true;
+
+ /* preempt count == 0 ? */
+ tmp &= ~PREEMPT_NEED_RESCHED;
+ if (tmp != preempt_offset)
+ return false;
+ /* XXX PREEMPT_LOCK_OFFSET */
+ if (current_thread_info()->preempt_lazy_count)
+ return false;
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+#else
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
+#endif
}
#ifdef CONFIG_PREEMPTION
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 2dfb5fea13af..fc03f4f7ed84 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -28,6 +28,19 @@ typedef struct {
#define SA_IA32_ABI 0x02000000u
#define SA_X32_ABI 0x01000000u
+/*
+ * Because some traps use the IST stack, we must keep preemption
+ * disabled while calling do_trap(), but do_trap() may call
+ * force_sig_info() which will grab the signal spin_locks for the
+ * task, which in PREEMPT_RT are mutexes. By defining
+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
+ * trap.
+ */
+#if defined(CONFIG_PREEMPT_RT)
+#define ARCH_RT_DELAYS_SIGNAL_SEND
+#endif
+
#ifndef CONFIG_COMPAT
#define compat_sigset_t compat_sigset_t
typedef sigset_t compat_sigset_t;
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 24a8d6c4fb18..2fc22c27df18 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -50,7 +50,7 @@
*/
static __always_inline void boot_init_stack_canary(void)
{
- u64 canary;
+ u64 canary = 0;
u64 tsc;
#ifdef CONFIG_X86_64
@@ -61,8 +61,14 @@ static __always_inline void boot_init_stack_canary(void)
* of randomness. The TSC only matters for very early init,
* there it already has some randomness on most systems. Later
* on during the bootup the random pool has true entropy too.
+ * For preempt-rt we need to weaken the randomness a bit, as
+ * we can't call into the random generator from atomic context
+ * due to locking constraints. We just leave canary
+ * uninitialized and use the TSC based randomness on top of it.
*/
+#ifndef CONFIG_PREEMPT_RT
get_random_bytes(&canary, sizeof(canary));
+#endif
tsc = rdtsc();
canary += tsc + (tsc << 32UL);
canary &= CANARY_MASK;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index cf132663c219..75dc786e6365 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -57,11 +57,14 @@ struct thread_info {
unsigned long flags; /* low level flags */
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
u32 status; /* thread synchronous flags */
+ int preempt_lazy_count; /* 0 => lazy preemptable
+ <0 => BUG */
};
#define INIT_THREAD_INFO(tsk) \
{ \
.flags = 0, \
+ .preempt_lazy_count = 0, \
}
#else /* !__ASSEMBLY__ */
@@ -90,6 +93,7 @@ struct thread_info {
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */
#define TIF_SLD 18 /* Restore split lock detection on context switch */
+#define TIF_NEED_RESCHED_LAZY 19 /* lazy rescheduling necessary */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -114,6 +118,7 @@ struct thread_info {
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_SLD (1 << TIF_SLD)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE)
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 044902d5a3c4..e5dd6da78713 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -132,6 +132,7 @@ int irq_init_percpu_irqstack(unsigned int cpu)
return 0;
}
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
struct irq_stack *irqstk;
@@ -148,6 +149,7 @@ void do_softirq_own_stack(void)
call_on_stack(__do_softirq, isp);
}
+#endif
void __handle_irq(struct irq_desc *desc, struct pt_regs *regs)
{
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 3a43a2dee658..37bd37cdf2b6 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -502,9 +502,12 @@ static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs)
if (atomic_read(&kgdb_active) != -1) {
/* KGDB CPU roundup */
cpu = raw_smp_processor_id();
- kgdb_nmicallback(cpu, regs);
- set_bit(cpu, was_in_debug_nmi);
- touch_nmi_watchdog();
+
+ if (!kgdb_roundup_delay(cpu)) {
+ kgdb_nmicallback(cpu, regs);
+ set_bit(cpu, was_in_debug_nmi);
+ touch_nmi_watchdog();
+ }
return NMI_HANDLED;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f5b7a05530eb..d401a2f42c44 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8571,6 +8571,14 @@ int kvm_arch_init(void *opaque)
goto out;
}
+#ifdef CONFIG_PREEMPT_RT
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n");
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+#endif
+
r = -ENOMEM;
x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
__alignof__(struct fpu), SLAB_ACCOUNT,
diff --git a/arch/xtensa/include/asm/spinlock_types.h b/arch/xtensa/include/asm/spinlock_types.h
index 64c9389254f1..797aed7df3dd 100644
--- a/arch/xtensa/include/asm/spinlock_types.h
+++ b/arch/xtensa/include/asm/spinlock_types.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SPINLOCK_TYPES_H
#define __ASM_SPINLOCK_TYPES_H
-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
+#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
# error "please don't include this file directly"
#endif
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1a28ba9017ed..fa59e8650f54 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1566,14 +1566,14 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
return;
if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
- int cpu = get_cpu();
+ int cpu = get_cpu_light();
if (cpumask_test_cpu(cpu, hctx->cpumask)) {
__blk_mq_run_hw_queue(hctx);
- put_cpu();
+ put_cpu_light();
return;
}
- put_cpu();
+ put_cpu_light();
}
kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 163a1283a866..444183fe847d 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1061,14 +1061,14 @@ static void generate_random_testvec_config(struct testvec_config *cfg,
static void crypto_disable_simd_for_test(void)
{
- preempt_disable();
+ migrate_disable();
__this_cpu_write(crypto_simd_disabled_for_test, true);
}
static void crypto_reenable_simd_for_test(void)
{
__this_cpu_write(crypto_simd_disabled_for_test, false);
- preempt_enable();
+ migrate_enable();
}
/*
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 6383c81ac5b3..abb695f5f5e4 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -59,6 +59,40 @@ static void zram_free_page(struct zram *zram, size_t index);
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
u32 index, int offset, struct bio *bio);
+#ifdef CONFIG_PREEMPT_RT
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
+{
+ size_t index;
+
+ for (index = 0; index < num_pages; index++)
+ spin_lock_init(&zram->table[index].lock);
+}
+
+static int zram_slot_trylock(struct zram *zram, u32 index)
+{
+ int ret;
+
+ ret = spin_trylock(&zram->table[index].lock);
+ if (ret)
+ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
+ return ret;
+}
+
+static void zram_slot_lock(struct zram *zram, u32 index)
+{
+ spin_lock(&zram->table[index].lock);
+ __set_bit(ZRAM_LOCK, &zram->table[index].flags);
+}
+
+static void zram_slot_unlock(struct zram *zram, u32 index)
+{
+ __clear_bit(ZRAM_LOCK, &zram->table[index].flags);
+ spin_unlock(&zram->table[index].lock);
+}
+
+#else
+
+static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
static int zram_slot_trylock(struct zram *zram, u32 index)
{
@@ -74,6 +108,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index)
{
bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
}
+#endif
static inline bool init_done(struct zram *zram)
{
@@ -1169,6 +1204,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
if (!huge_class_size)
huge_class_size = zs_huge_class_size(zram->mem_pool);
+ zram_meta_init_table_locks(zram, num_pages);
return true;
}
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 80c3b43b4828..d8f6d880f915 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -63,6 +63,7 @@ struct zram_table_entry {
unsigned long element;
};
unsigned long flags;
+ spinlock_t lock;
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
ktime_t ac_time;
#endif
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index d3f2e5364c27..9c4a99757afd 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da
return container_of(data, struct tpm_tis_tcg_phy, priv);
}
+#ifdef CONFIG_PREEMPT_RT
+/*
+ * Flushes previous write operations to chip so that a subsequent
+ * ioread*()s won't stall a cpu.
+ */
+static inline void tpm_tis_flush(void __iomem *iobase)
+{
+ ioread8(iobase + TPM_ACCESS(0));
+}
+#else
+#define tpm_tis_flush(iobase) do { } while (0)
+#endif
+
+static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr)
+{
+ iowrite8(b, iobase + addr);
+ tpm_tis_flush(iobase);
+}
+
+static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
+{
+ iowrite32(b, iobase + addr);
+ tpm_tis_flush(iobase);
+}
+
static int interrupts = -1;
module_param(interrupts, int, 0444);
MODULE_PARM_DESC(interrupts, "Enable interrupts");
@@ -169,7 +194,7 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
while (len--)
- iowrite8(*value++, phy->iobase + addr);
+ tpm_tis_iowrite8(*value++, phy->iobase, addr);
return 0;
}
@@ -196,7 +221,7 @@ static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value)
{
struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
- iowrite32(value, phy->iobase + addr);
+ tpm_tis_iowrite32(value, phy->iobase, addr);
return 0;
}
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index e3df82d5d37a..5502e176d51b 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -66,7 +66,7 @@ struct mm_struct efi_mm = {
struct workqueue_struct *efi_rts_wq;
-static bool disable_runtime;
+static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT);
static int __init setup_noefi(char *arg)
{
disable_runtime = true;
@@ -97,6 +97,9 @@ static int __init parse_efi_cmdline(char *str)
if (parse_option_str(str, "noruntime"))
disable_runtime = true;
+ if (parse_option_str(str, "runtime"))
+ disable_runtime = false;
+
if (parse_option_str(str, "nosoftreserve"))
set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags);
diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c
index 254e67141a77..7a39029b083f 100644
--- a/drivers/gpu/drm/i915/display/intel_crtc.c
+++ b/drivers/gpu/drm/i915/display/intel_crtc.c
@@ -425,7 +425,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
*/
intel_psr_wait_for_idle(new_crtc_state);
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
crtc->debug.min_vbl = min;
crtc->debug.max_vbl = max;
@@ -450,11 +451,13 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
break;
}
- local_irq_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
timeout = schedule_timeout(timeout);
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
}
finish_wait(wq, &wait);
@@ -487,7 +490,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
return;
irq_disable:
- local_irq_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_disable();
}
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
@@ -566,7 +570,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
new_crtc_state->uapi.event = NULL;
}
- local_irq_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ local_irq_enable();
/* Send VRR Push to terminate Vblank */
intel_vrr_send_push(new_crtc_state);
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 209cf265bf74..6e1b9068d944 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -311,10 +311,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
/* Kick the work once more to drain the signalers, and disarm the irq */
irq_work_sync(&b->irq_work);
while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
- local_irq_disable();
- signal_irq_work(&b->irq_work);
- local_irq_enable();
+ irq_work_queue(&b->irq_work);
cond_resched();
+ irq_work_sync(&b->irq_work);
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index c41098950746..601274ba86e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -163,7 +163,8 @@ static inline void intel_context_enter(struct intel_context *ce)
static inline void intel_context_mark_active(struct intel_context *ce)
{
- lockdep_assert_held(&ce->timeline->mutex);
+ lockdep_assert(lockdep_is_held(&ce->timeline->mutex) ||
+ test_bit(CONTEXT_IS_PARKED, &ce->flags));
++ce->active_count;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index a63631ea0ec4..314457fb9db5 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -112,6 +112,7 @@ struct intel_context {
#define CONTEXT_FORCE_SINGLE_SUBMISSION 7
#define CONTEXT_NOPREEMPT 8
#define CONTEXT_LRCA_DIRTY 9
+#define CONTEXT_IS_PARKED 10
struct {
u64 timeout_us;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index dacd62773735..73e96ca024df 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_wakeref *wf)
return 0;
}
-#if IS_ENABLED(CONFIG_LOCKDEP)
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
-
- return flags;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
- unsigned long flags)
-{
- mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
- local_irq_restore(flags);
-}
-
-#else
-
-static unsigned long __timeline_mark_lock(struct intel_context *ce)
-{
- return 0;
-}
-
-static void __timeline_mark_unlock(struct intel_context *ce,
- unsigned long flags)
-{
-}
-
-#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
-
static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
{
struct i915_request *rq = to_request(fence);
@@ -159,7 +126,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
{
struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
- unsigned long flags;
bool result = true;
/* GPU is pointing to the void, as good as in the kernel context. */
@@ -201,7 +167,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* engine->wakeref.count, we may see the request completion and retire
* it causing an underflow of the engine->wakeref.
*/
- flags = __timeline_mark_lock(ce);
+ set_bit(CONTEXT_IS_PARKED, &ce->flags);
GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
rq = __i915_request_create(ce, GFP_NOWAIT);
@@ -233,7 +199,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
result = false;
out_unlock:
- __timeline_mark_unlock(ce, flags);
+ clear_bit(CONTEXT_IS_PARKED, &ce->flags);
return result;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 416f5e0657f0..c5b0c99e60c9 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1283,7 +1283,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* and context switches) submission.
*/
- spin_lock(&sched_engine->lock);
+ spin_lock_irq(&sched_engine->lock);
/*
* If the queue is higher priority than the last
@@ -1383,7 +1383,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* Even if ELSP[1] is occupied and not worthy
* of timeslices, our queue might be.
*/
- spin_unlock(&sched_engine->lock);
+ spin_unlock_irq(&sched_engine->lock);
return;
}
}
@@ -1409,7 +1409,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.sched_engine->lock);
- spin_unlock(&engine->sched_engine->lock);
+ spin_unlock_irq(&engine->sched_engine->lock);
return; /* leave this for another sibling */
}
@@ -1571,7 +1571,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
*/
sched_engine->queue_priority_hint = queue_prio(sched_engine);
i915_sched_engine_reset_on_empty(sched_engine);
- spin_unlock(&sched_engine->lock);
+ spin_unlock_irq(&sched_engine->lock);
/*
* We can skip poking the HW if we ended up with exactly the same set
@@ -1597,13 +1597,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
}
-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
-{
- local_irq_disable(); /* Suspend interrupts across request submission */
- execlists_dequeue(engine);
- local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
-}
-
static void clear_ports(struct i915_request **ports, int count)
{
memset_p((void **)ports, NULL, count);
@@ -2423,7 +2416,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
}
if (!engine->execlists.pending[0]) {
- execlists_dequeue_irq(engine);
+ execlists_dequeue(engine);
start_timeslice(engine);
}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9bc4f4a8e12e..547347241a47 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -886,7 +886,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
*/
spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
/* Get optional system timestamp before query. */
if (stime)
@@ -950,7 +951,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
if (etime)
*etime = ktime_get();
- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 79da5eca60af..b9dd6100c6d1 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -559,7 +559,6 @@ bool __i915_request_submit(struct i915_request *request)
RQ_TRACE(request, "\n");
- GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->sched_engine->lock);
/*
@@ -668,7 +667,6 @@ void __i915_request_unsubmit(struct i915_request *request)
*/
RQ_TRACE(request, "\n");
- GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->sched_engine->lock);
/*
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 1bc1349ba3c2..a2f713b4ac2f 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -609,7 +609,8 @@ i915_request_timeline(const struct i915_request *rq)
{
/* Valid only while the request is being constructed (or retired). */
return rcu_dereference_protected(rq->timeline,
- lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
+ lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) ||
+ test_bit(CONTEXT_IS_PARKED, &rq->context->flags));
}
static inline struct i915_gem_context *
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 63fec1c3c132..f345a0f12bf6 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -2,6 +2,10 @@
#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _I915_TRACE_H_
+#ifdef CONFIG_PREEMPT_RT
+#define NOTRACE
+#endif
+
#include <linux/stringify.h>
#include <linux/types.h>
#include <linux/tracepoint.h>
@@ -819,7 +823,7 @@ DEFINE_EVENT(i915_request, i915_request_add,
TP_ARGS(rq)
);
-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
+#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE)
DEFINE_EVENT(i915_request, i915_request_guc_submit,
TP_PROTO(struct i915_request *rq),
TP_ARGS(rq)
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 5259edacde38..b36b27c09049 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -343,7 +343,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000)
/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
+#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
#else
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c
index 1cf68f85b2e1..8ccf0c928bb4 100644
--- a/drivers/i2c/busses/i2c-cht-wc.c
+++ b/drivers/i2c/busses/i2c-cht-wc.c
@@ -99,15 +99,8 @@ static irqreturn_t cht_wc_i2c_adap_thread_handler(int id, void *data)
* interrupt handler as well, so running the client irq handler from
* this thread will cause things to lock up.
*/
- if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ) {
- /*
- * generic_handle_irq expects local IRQs to be disabled
- * as normally it is called from interrupt context.
- */
- local_irq_disable();
- generic_handle_irq(adap->client_irq);
- local_irq_enable();
- }
+ if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ)
+ generic_handle_irq_safe(adap->client_irq);
return IRQ_HANDLED;
}
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 8fb065caf30b..c232535ca8f4 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1422,7 +1422,7 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr)
if (irq <= 0)
return -ENXIO;
- generic_handle_irq(irq);
+ generic_handle_irq_safe(irq);
return 0;
}
diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
index 1f1d57288085..dc6816d36d06 100644
--- a/drivers/leds/trigger/Kconfig
+++ b/drivers/leds/trigger/Kconfig
@@ -64,6 +64,7 @@ config LEDS_TRIGGER_BACKLIGHT
config LEDS_TRIGGER_CPU
bool "LED CPU Trigger"
+ depends on !PREEMPT_RT
help
This allows LEDs to be controlled by active CPUs. This shows
the active CPUs across an array of LEDs so you can see which
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 19e497a7e747..55db7f2a65e2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2217,8 +2217,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
struct raid5_percpu *percpu;
unsigned long cpu;
- cpu = get_cpu();
+ cpu = get_cpu_light();
percpu = per_cpu_ptr(conf->percpu, cpu);
+ spin_lock(&percpu->lock);
if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
ops_run_biofill(sh);
overlap_clear++;
@@ -2277,7 +2278,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
if (test_and_clear_bit(R5_Overlap, &dev->flags))
wake_up(&sh->raid_conf->wait_for_overlap);
}
- put_cpu();
+ spin_unlock(&percpu->lock);
+ put_cpu_light();
}
static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
@@ -7099,6 +7101,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
__func__, cpu);
return -ENOMEM;
}
+ spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
return 0;
}
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 5c05acf20e1f..665fe138ab4f 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -635,6 +635,7 @@ struct r5conf {
int recovery_disabled;
/* per cpu variables */
struct raid5_percpu {
+ spinlock_t lock; /* Protection for -RT */
struct page *spare_page; /* Used when checking P/Q in raid6 */
void *scribble; /* space for constructing buffer
* lists and performing address
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 70fa18b04ad2..b14d3f98e1eb 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -193,13 +193,11 @@ static void pcap_isr_work(struct work_struct *work)
ezx_pcap_write(pcap, PCAP_REG_MSR, isr | msr);
ezx_pcap_write(pcap, PCAP_REG_ISR, isr);
- local_irq_disable();
service = isr & ~msr;
for (irq = pcap->irq_base; service; service >>= 1, irq++) {
if (service & 1)
- generic_handle_irq(irq);
+ generic_handle_irq_safe(irq);
}
- local_irq_enable();
ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
} while (gpio_get_value(pdata->gpio));
}
diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c
index 08535e97ff43..0585a5821d05 100644
--- a/drivers/misc/hi6421v600-irq.c
+++ b/drivers/misc/hi6421v600-irq.c
@@ -118,8 +118,8 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
* If both powerkey down and up IRQs are received,
* handle them at the right order
*/
- generic_handle_irq(priv->irqs[POWERKEY_DOWN]);
- generic_handle_irq(priv->irqs[POWERKEY_UP]);
+ generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]);
+ generic_handle_irq_safe(priv->irqs[POWERKEY_UP]);
pending &= ~HISI_IRQ_POWERKEY_UP_DOWN;
}
@@ -127,7 +127,7 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
continue;
for_each_set_bit(offset, &pending, BITS_PER_BYTE) {
- generic_handle_irq(priv->irqs[offset + i * BITS_PER_BYTE]);
+ generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]);
}
}
diff --git a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
index 2473fb5f75e5..2a5cc64227e9 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
@@ -458,7 +458,7 @@ nfp_abm_qdisc_graft(struct nfp_abm_link *alink, u32 handle, u32 child_handle,
static void
nfp_abm_stats_calculate(struct nfp_alink_stats *new,
struct nfp_alink_stats *old,
- struct gnet_stats_basic_packed *bstats,
+ struct gnet_stats_basic_sync *bstats,
struct gnet_stats_queue *qstats)
{
_bstats_update(bstats, new->tx_bytes - old->tx_bytes,
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 3e1a83a22fdd..bce0a6bd46a7 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1367,11 +1367,8 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
- if (dev->domain_data.phyirq > 0) {
- local_irq_disable();
- generic_handle_irq(dev->domain_data.phyirq);
- local_irq_enable();
- }
+ if (dev->domain_data.phyirq > 0)
+ generic_handle_irq_safe(dev->domain_data.phyirq);
} else {
netdev_warn(dev->net,
"unexpected interrupt: 0x%08x\n", intdata);
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 5ae6c207d3ac..660908027dc5 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1450,11 +1450,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
{
struct fcoe_percpu_s *fps;
- int rc;
+ int rc, cpu = get_cpu_light();
- fps = &get_cpu_var(fcoe_percpu);
+ fps = &per_cpu(fcoe_percpu, cpu);
rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
- put_cpu_var(fcoe_percpu);
+ put_cpu_light();
return rc;
}
@@ -1639,11 +1639,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport,
return 0;
}
- stats = per_cpu_ptr(lport->stats, get_cpu());
+ stats = per_cpu_ptr(lport->stats, get_cpu_light());
stats->InvalidCRCCount++;
if (stats->InvalidCRCCount < 5)
printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
- put_cpu();
+ put_cpu_light();
return -EINVAL;
}
@@ -1684,7 +1684,7 @@ static void fcoe_recv_frame(struct sk_buff *skb)
*/
hp = (struct fcoe_hdr *) skb_network_header(skb);
- stats = per_cpu_ptr(lport->stats, get_cpu());
+ stats = per_cpu_ptr(lport->stats, get_cpu_light());
if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
if (stats->ErrorFrames < 5)
printk(KERN_WARNING "fcoe: FCoE version "
@@ -1716,13 +1716,13 @@ static void fcoe_recv_frame(struct sk_buff *skb)
goto drop;
if (!fcoe_filter_frames(lport, fp)) {
- put_cpu();
+ put_cpu_light();
fc_exch_recv(lport, fp);
return;
}
drop:
stats->ErrorFrames++;
- put_cpu();
+ put_cpu_light();
kfree_skb(skb);
}
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 558f3f4e1859..f08feaa4f398 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -828,7 +828,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
INIT_LIST_HEAD(&del_list);
- stats = per_cpu_ptr(fip->lp->stats, get_cpu());
+ stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
@@ -864,7 +864,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
sel_time = fcf->time;
}
}
- put_cpu();
+ put_cpu_light();
list_for_each_entry_safe(fcf, next, &del_list, list) {
/* Removes fcf from current list */
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index aa223db4cf53..0ceb93800704 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -825,10 +825,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
}
memset(ep, 0, sizeof(*ep));
- cpu = get_cpu();
+ cpu = get_cpu_light();
pool = per_cpu_ptr(mp->pool, cpu);
spin_lock_bh(&pool->lock);
- put_cpu();
+ put_cpu_light();
/* peek cache of free slot */
if (pool->left != FC_XID_UNKNOWN) {
diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c
index 7e6347fe93f9..8a7cf1d0e968 100644
--- a/drivers/staging/greybus/gpio.c
+++ b/drivers/staging/greybus/gpio.c
@@ -391,10 +391,7 @@ static int gb_gpio_request_handler(struct gb_operation *op)
return -EINVAL;
}
- local_irq_disable();
- ret = generic_handle_irq(irq);
- local_irq_enable();
-
+ ret = generic_handle_irq_safe(irq);
if (ret)
dev_err(dev, "failed to invoke irq handler\n");
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index b3abc29aa927..c4843be91417 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -156,12 +156,55 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value)
up->dl_write(up, value);
}
+static inline void serial8250_set_IER(struct uart_8250_port *up,
+ unsigned char ier)
+{
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ bool is_console;
+
+ is_console = uart_console(port);
+
+ if (is_console)
+ console_atomic_lock(flags);
+
+ serial_out(up, UART_IER, ier);
+
+ if (is_console)
+ console_atomic_unlock(flags);
+}
+
+static inline unsigned char serial8250_clear_IER(struct uart_8250_port *up)
+{
+ struct uart_port *port = &up->port;
+ unsigned int clearval = 0;
+ unsigned long flags;
+ unsigned int prior;
+ bool is_console;
+
+ is_console = uart_console(port);
+
+ if (up->capabilities & UART_CAP_UUE)
+ clearval = UART_IER_UUE;
+
+ if (is_console)
+ console_atomic_lock(flags);
+
+ prior = serial_port_in(port, UART_IER);
+ serial_port_out(port, UART_IER, clearval);
+
+ if (is_console)
+ console_atomic_unlock(flags);
+
+ return prior;
+}
+
static inline bool serial8250_set_THRI(struct uart_8250_port *up)
{
if (up->ier & UART_IER_THRI)
return false;
up->ier |= UART_IER_THRI;
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
return true;
}
@@ -170,7 +213,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up)
if (!(up->ier & UART_IER_THRI))
return false;
up->ier &= ~UART_IER_THRI;
- serial_out(up, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
return true;
}
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 30b7890645ac..b1883f906f7a 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -265,10 +265,8 @@ static void serial8250_backup_timeout(struct timer_list *t)
* Must disable interrupts or else we risk racing with the interrupt
* based handler.
*/
- if (up->port.irq) {
- ier = serial_in(up, UART_IER);
- serial_out(up, UART_IER, 0);
- }
+ if (up->port.irq)
+ ier = serial8250_clear_IER(up);
iir = serial_in(up, UART_IIR);
@@ -291,7 +289,7 @@ static void serial8250_backup_timeout(struct timer_list *t)
serial8250_tx_chars(up);
if (up->port.irq)
- serial_out(up, UART_IER, ier);
+ serial8250_set_IER(up, ier);
spin_unlock_irqrestore(&up->port.lock, flags);
@@ -572,6 +570,14 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
#ifdef CONFIG_SERIAL_8250_CONSOLE
+static void univ8250_console_write_atomic(struct console *co, const char *s,
+ unsigned int count)
+{
+ struct uart_8250_port *up = &serial8250_ports[co->index];
+
+ serial8250_console_write_atomic(up, s, count);
+}
+
static void univ8250_console_write(struct console *co, const char *s,
unsigned int count)
{
@@ -665,6 +671,7 @@ static int univ8250_console_match(struct console *co, char *name, int idx,
static struct console univ8250_console = {
.name = "ttyS",
+ .write_atomic = univ8250_console_write_atomic,
.write = univ8250_console_write,
.device = uart_console_device,
.setup = univ8250_console_setup,
diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
index af74f82ad782..0bab91ea75c6 100644
--- a/drivers/tty/serial/8250/8250_fsl.c
+++ b/drivers/tty/serial/8250/8250_fsl.c
@@ -60,9 +60,18 @@ int fsl8250_handle_irq(struct uart_port *port)
/* Stop processing interrupts on input overrun */
if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
+ unsigned long flags;
unsigned long delay;
+ bool is_console;
+ is_console = uart_console(port);
+
+ if (is_console)
+ console_atomic_lock(flags);
up->ier = port->serial_in(port, UART_IER);
+ if (is_console)
+ console_atomic_unlock(flags);
+
if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
port->ops->stop_rx(port);
} else {
diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c
index 65402d05eff9..8122645ab05c 100644
--- a/drivers/tty/serial/8250/8250_ingenic.c
+++ b/drivers/tty/serial/8250/8250_ingenic.c
@@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic,x1000-uart",
static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
{
+ unsigned long flags;
+ bool is_console;
int ier;
switch (offset) {
@@ -167,7 +169,12 @@ static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
* If we have enabled modem status IRQs we should enable
* modem mode.
*/
+ is_console = uart_console(p);
+ if (is_console)
+ console_atomic_lock(flags);
ier = p->serial_in(p, UART_IER);
+ if (is_console)
+ console_atomic_unlock(flags);
if (ier & UART_IER_MSI)
value |= UART_MCR_MDCE | UART_MCR_FCM;
diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
index de48a58460f4..364ee950f21a 100644
--- a/drivers/tty/serial/8250/8250_mtk.c
+++ b/drivers/tty/serial/8250/8250_mtk.c
@@ -222,12 +222,37 @@ static void mtk8250_shutdown(struct uart_port *port)
static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask)
{
- serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask));
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ unsigned int ier;
+ bool is_console;
+
+ is_console = uart_console(port);
+
+ if (is_console)
+ console_atomic_lock(flags);
+
+ ier = serial_in(up, UART_IER);
+ serial_out(up, UART_IER, ier & (~mask));
+
+ if (is_console)
+ console_atomic_unlock(flags);
}
static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask)
{
- serial_out(up, UART_IER, serial_in(up, UART_IER) | mask);
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ unsigned int ier;
+
+ if (uart_console(port))
+ console_atomic_lock(flags);
+
+ ier = serial_in(up, UART_IER);
+ serial_out(up, UART_IER, ier | mask);
+
+ if (uart_console(port))
+ console_atomic_unlock(flags);
}
static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index a5496bd1b650..ff8f9c465d9e 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -749,7 +749,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
serial_out(p, UART_EFR, UART_EFR_ECB);
serial_out(p, UART_LCR, 0);
}
- serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
+ serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0);
if (p->capabilities & UART_CAP_EFR) {
serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
serial_out(p, UART_EFR, efr);
@@ -1423,7 +1423,7 @@ static void serial8250_stop_rx(struct uart_port *port)
up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
up->port.read_status_mask &= ~UART_LSR_DR;
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
serial8250_rpm_put(up);
}
@@ -1453,7 +1453,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p)
serial8250_clear_and_reinit_fifos(p);
p->ier |= UART_IER_RLSI | UART_IER_RDI;
- serial_port_out(&p->port, UART_IER, p->ier);
+ serial8250_set_IER(p, p->ier);
}
}
EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx);
@@ -1689,7 +1689,7 @@ static void serial8250_disable_ms(struct uart_port *port)
mctrl_gpio_disable_ms(up->gpios);
up->ier &= ~UART_IER_MSI;
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
}
static void serial8250_enable_ms(struct uart_port *port)
@@ -1705,7 +1705,7 @@ static void serial8250_enable_ms(struct uart_port *port)
up->ier |= UART_IER_MSI;
serial8250_rpm_get(up);
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
serial8250_rpm_put(up);
}
@@ -2124,14 +2124,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
struct uart_8250_port *up = up_to_u8250p(port);
serial8250_rpm_get(up);
- /*
- * First save the IER then disable the interrupts
- */
- ier = serial_port_in(port, UART_IER);
- if (up->capabilities & UART_CAP_UUE)
- serial_port_out(port, UART_IER, UART_IER_UUE);
- else
- serial_port_out(port, UART_IER, 0);
+ ier = serial8250_clear_IER(up);
wait_for_xmitr(up, BOTH_EMPTY);
/*
@@ -2144,7 +2137,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
* and restore the IER
*/
wait_for_xmitr(up, BOTH_EMPTY);
- serial_port_out(port, UART_IER, ier);
+ serial8250_set_IER(up, ier);
serial8250_rpm_put(up);
}
@@ -2447,7 +2440,7 @@ void serial8250_do_shutdown(struct uart_port *port)
*/
spin_lock_irqsave(&port->lock, flags);
up->ier = 0;
- serial_port_out(port, UART_IER, 0);
+ serial8250_set_IER(up, 0);
spin_unlock_irqrestore(&port->lock, flags);
synchronize_irq(port->irq);
@@ -2829,7 +2822,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
if (up->capabilities & UART_CAP_RTOIE)
up->ier |= UART_IER_RTOIE;
- serial_port_out(port, UART_IER, up->ier);
+ serial8250_set_IER(up, up->ier);
if (up->capabilities & UART_CAP_EFR) {
unsigned char efr = 0;
@@ -3297,7 +3290,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults);
#ifdef CONFIG_SERIAL_8250_CONSOLE
-static void serial8250_console_putchar(struct uart_port *port, int ch)
+static void serial8250_console_putchar_locked(struct uart_port *port, int ch)
{
struct uart_8250_port *up = up_to_u8250p(port);
@@ -3305,6 +3298,18 @@ static void serial8250_console_putchar(struct uart_port *port, int ch)
serial_port_out(port, UART_TX, ch);
}
+static void serial8250_console_putchar(struct uart_port *port, int ch)
+{
+ struct uart_8250_port *up = up_to_u8250p(port);
+ unsigned long flags;
+
+ wait_for_xmitr(up, UART_LSR_THRE);
+
+ console_atomic_lock(flags);
+ serial8250_console_putchar_locked(port, ch);
+ console_atomic_unlock(flags);
+}
+
/*
* Restore serial console when h/w power-off detected
*/
@@ -3326,6 +3331,32 @@ static void serial8250_console_restore(struct uart_8250_port *up)
serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
}
+void serial8250_console_write_atomic(struct uart_8250_port *up,
+ const char *s, unsigned int count)
+{
+ struct uart_port *port = &up->port;
+ unsigned long flags;
+ unsigned int ier;
+
+ console_atomic_lock(flags);
+
+ touch_nmi_watchdog();
+
+ ier = serial8250_clear_IER(up);
+
+ if (atomic_fetch_inc(&up->console_printing)) {
+ uart_console_write(port, "\n", 1,
+ serial8250_console_putchar_locked);
+ }
+ uart_console_write(port, s, count, serial8250_console_putchar_locked);
+ atomic_dec(&up->console_printing);
+
+ wait_for_xmitr(up, BOTH_EMPTY);
+ serial8250_set_IER(up, ier);
+
+ console_atomic_unlock(flags);
+}
+
/*
* Print a string to the serial port trying not to disturb
* any possible real use of the port...
@@ -3342,24 +3373,12 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
struct uart_port *port = &up->port;
unsigned long flags;
unsigned int ier;
- int locked = 1;
touch_nmi_watchdog();
- if (oops_in_progress)
- locked = spin_trylock_irqsave(&port->lock, flags);
- else
- spin_lock_irqsave(&port->lock, flags);
-
- /*
- * First save the IER then disable the interrupts
- */
- ier = serial_port_in(port, UART_IER);
+ spin_lock_irqsave(&port->lock, flags);
- if (up->capabilities & UART_CAP_UUE)
- serial_port_out(port, UART_IER, UART_IER_UUE);
- else
- serial_port_out(port, UART_IER, 0);
+ ier = serial8250_clear_IER(up);
/* check scratch reg to see if port powered off during system sleep */
if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
@@ -3373,7 +3392,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
mdelay(port->rs485.delay_rts_before_send);
}
+ atomic_inc(&up->console_printing);
uart_console_write(port, s, count, serial8250_console_putchar);
+ atomic_dec(&up->console_printing);
/*
* Finally, wait for transmitter to become empty
@@ -3386,8 +3407,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
if (em485->tx_stopped)
up->rs485_stop_tx(up);
}
-
- serial_port_out(port, UART_IER, ier);
+ serial8250_set_IER(up, ier);
/*
* The receive handling will happen properly because the
@@ -3399,8 +3419,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
if (up->msr_saved_flags)
serial8250_modem_status(up);
- if (locked)
- spin_unlock_irqrestore(&port->lock, flags);
+ spin_unlock_irqrestore(&port->lock, flags);
}
static unsigned int probe_baud(struct uart_port *port)
@@ -3420,6 +3439,7 @@ static unsigned int probe_baud(struct uart_port *port)
int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
{
+ struct uart_8250_port *up = up_to_u8250p(port);
int baud = 9600;
int bits = 8;
int parity = 'n';
@@ -3429,6 +3449,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
if (!port->iobase && !port->membase)
return -ENODEV;
+ atomic_set(&up->console_printing, 0);
+
if (options)
uart_parse_options(options, &baud, &parity, &bits, &flow);
else if (probe)
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 300a8bbb4b80..c7c6558508b5 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2328,18 +2328,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
{
struct uart_amba_port *uap = amba_ports[co->index];
unsigned int old_cr = 0, new_cr;
- unsigned long flags;
+ unsigned long flags = 0;
int locked = 1;
clk_enable(uap->clk);
- local_irq_save(flags);
+ /*
+ * local_irq_save(flags);
+ *
+ * This local_irq_save() is nonsense. If we come in via sysrq
+ * handling then interrupts are already disabled. Aside of
+ * that the port.sysrq check is racy on SMP regardless.
+ */
if (uap->port.sysrq)
locked = 0;
else if (oops_in_progress)
- locked = spin_trylock(&uap->port.lock);
+ locked = spin_trylock_irqsave(&uap->port.lock, flags);
else
- spin_lock(&uap->port.lock);
+ spin_lock_irqsave(&uap->port.lock, flags);
/*
* First save the CR then disable the interrupts
@@ -2365,8 +2371,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
pl011_write(old_cr, uap, REG_CR);
if (locked)
- spin_unlock(&uap->port.lock);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&uap->port.lock, flags);
clk_disable(uap->clk);
}
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index 0862941862c8..10970632f0e4 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -1255,13 +1255,10 @@ serial_omap_console_write(struct console *co, const char *s,
unsigned int ier;
int locked = 1;
- local_irq_save(flags);
- if (up->port.sysrq)
- locked = 0;
- else if (oops_in_progress)
- locked = spin_trylock(&up->port.lock);
+ if (up->port.sysrq || oops_in_progress)
+ locked = spin_trylock_irqsave(&up->port.lock, flags);
else
- spin_lock(&up->port.lock);
+ spin_lock_irqsave(&up->port.lock, flags);
/*
* First save the IER then disable the interrupts
@@ -1288,8 +1285,7 @@ serial_omap_console_write(struct console *co, const char *s,
check_modem_status(up);
if (locked)
- spin_unlock(&up->port.lock);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&up->port.lock, flags);
}
static int __init
diff --git a/drivers/virt/acrn/irqfd.c b/drivers/virt/acrn/irqfd.c
index df5184979b28..d4ad211dce7a 100644
--- a/drivers/virt/acrn/irqfd.c
+++ b/drivers/virt/acrn/irqfd.c
@@ -17,7 +17,6 @@
#include "acrn_drv.h"
static LIST_HEAD(acrn_irqfd_clients);
-static DEFINE_MUTEX(acrn_irqfds_mutex);
/**
* struct hsm_irqfd - Properties of HSM irqfd
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index 45cfd50a9521..502b56597f10 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -239,7 +239,7 @@ int afs_silly_iput(struct dentry *dentry, struct inode *inode)
struct dentry *alias;
int ret;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
_enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 1929e80c09ee..48eb8c30c6db 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -69,7 +69,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
struct inode *inode;
struct super_block *sb = parent->d_sb;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
diff --git a/fs/dcache.c b/fs/dcache.c
index cf871a81f4fd..02db80f2817f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2537,7 +2537,13 @@ EXPORT_SYMBOL(d_rehash);
static inline unsigned start_dir_add(struct inode *dir)
{
-
+ /*
+ * The caller has a spinlock_t (dentry::d_lock) acquired which disables
+ * preemption on !PREEMPT_RT. On PREEMPT_RT the lock does not disable
+ * preemption and it has be done explicitly.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
for (;;) {
unsigned n = dir->i_dir_seq;
if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
@@ -2549,25 +2555,30 @@ static inline unsigned start_dir_add(struct inode *dir)
static inline void end_dir_add(struct inode *dir, unsigned n)
{
smp_store_release(&dir->i_dir_seq, n + 2);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
}
static void d_wait_lookup(struct dentry *dentry)
{
- if (d_in_lookup(dentry)) {
- DECLARE_WAITQUEUE(wait, current);
- add_wait_queue(dentry->d_wait, &wait);
- do {
- set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(&dentry->d_lock);
- schedule();
- spin_lock(&dentry->d_lock);
- } while (d_in_lookup(dentry));
- }
+ struct swait_queue __wait;
+
+ if (!d_in_lookup(dentry))
+ return;
+
+ INIT_LIST_HEAD(&__wait.task_list);
+ do {
+ prepare_to_swait_exclusive(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&dentry->d_lock);
+ schedule();
+ spin_lock(&dentry->d_lock);
+ } while (d_in_lookup(dentry));
+ finish_swait(dentry->d_wait, &__wait);
}
struct dentry *d_alloc_parallel(struct dentry *parent,
const struct qstr *name,
- wait_queue_head_t *wq)
+ struct swait_queue_head *wq)
{
unsigned int hash = name->hash;
struct hlist_bl_head *b = in_lookup_hash(parent, hash);
@@ -2682,7 +2693,7 @@ void __d_lookup_done(struct dentry *dentry)
hlist_bl_lock(b);
dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
- wake_up_all(dentry->d_wait);
+ swake_up_all(dentry->d_wait);
dentry->d_wait = NULL;
hlist_bl_unlock(b);
INIT_HLIST_NODE(&dentry->d_u.d_alias);
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index c3e4804b8fcb..9edb87e11680 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -81,7 +81,6 @@ extern unsigned fscache_debug;
extern struct kobject *fscache_root;
extern struct workqueue_struct *fscache_object_wq;
extern struct workqueue_struct *fscache_op_wq;
-DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n);
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 4207f98e405f..85f8cf3a323d 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -41,8 +41,6 @@ struct kobject *fscache_root;
struct workqueue_struct *fscache_object_wq;
struct workqueue_struct *fscache_op_wq;
-DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
-
/* these values serve as lower bounds, will be adjusted in fscache_init() */
static unsigned fscache_object_max_active = 4;
static unsigned fscache_op_max_active = 2;
@@ -138,7 +136,6 @@ unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n)
static int __init fscache_init(void)
{
unsigned int nr_cpus = num_possible_cpus();
- unsigned int cpu;
int ret;
fscache_object_max_active =
@@ -161,9 +158,6 @@ static int __init fscache_init(void)
if (!fscache_op_wq)
goto error_op_wq;
- for_each_possible_cpu(cpu)
- init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu));
-
ret = fscache_proc_init();
if (ret < 0)
goto error_proc;
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 6a675652129b..7a972d144b54 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -798,6 +798,8 @@ void fscache_object_destroy(struct fscache_object *object)
}
EXPORT_SYMBOL(fscache_object_destroy);
+static DECLARE_WAIT_QUEUE_HEAD(fscache_object_cong_wait);
+
/*
* enqueue an object for metadata-type processing
*/
@@ -806,16 +808,12 @@ void fscache_enqueue_object(struct fscache_object *object)
_enter("{OBJ%x}", object->debug_id);
if (fscache_get_object(object, fscache_obj_get_queue) >= 0) {
- wait_queue_head_t *cong_wq =
- &get_cpu_var(fscache_object_cong_wait);
if (queue_work(fscache_object_wq, &object->work)) {
if (fscache_object_congested())
- wake_up(cong_wq);
+ wake_up(&fscache_object_cong_wait);
} else
fscache_put_object(object, fscache_obj_put_queue);
-
- put_cpu_var(fscache_object_cong_wait);
}
}
@@ -833,16 +831,15 @@ void fscache_enqueue_object(struct fscache_object *object)
*/
bool fscache_object_sleep_till_congested(signed long *timeoutp)
{
- wait_queue_head_t *cong_wq = this_cpu_ptr(&fscache_object_cong_wait);
DEFINE_WAIT(wait);
if (fscache_object_congested())
return true;
- add_wait_queue_exclusive(cong_wq, &wait);
+ add_wait_queue_exclusive(&fscache_object_cong_wait, &wait);
if (!fscache_object_congested())
*timeoutp = schedule_timeout(*timeoutp);
- finish_wait(cong_wq, &wait);
+ finish_wait(&fscache_object_cong_wait, &wait);
return fscache_object_congested();
}
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index bc267832310c..3176913fae6c 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -158,7 +158,7 @@ static int fuse_direntplus_link(struct file *file,
struct inode *dir = d_inode(parent);
struct fuse_conn *fc;
struct inode *inode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
if (!o->nodeid) {
/*
diff --git a/fs/namei.c b/fs/namei.c
index 1fd854d4cd2c..71c0b93a4800 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1635,7 +1635,7 @@ static struct dentry *__lookup_slow(const struct qstr *name,
{
struct dentry *dentry, *old;
struct inode *inode = dir->d_inode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
/* Don't go there if it's already dead */
if (unlikely(IS_DEADDIR(inode)))
@@ -3246,7 +3246,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
struct dentry *dentry;
int error, create_error = 0;
umode_t mode = op->mode;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
if (unlikely(IS_DEADDIR(dir_inode)))
return ERR_PTR(-ENOENT);
diff --git a/fs/namespace.c b/fs/namespace.c
index d946298691ed..9e6cbea0c879 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -344,8 +344,24 @@ int __mnt_want_write(struct vfsmount *m)
* incremented count after it has set MNT_WRITE_HOLD.
*/
smp_mb();
- while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
- cpu_relax();
+ might_lock(&mount_lock.lock);
+ while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ cpu_relax();
+ } else {
+ /*
+ * This prevents priority inversion, if the task
+ * setting MNT_WRITE_HOLD got preempted on a remote
+ * CPU, and it prevents life lock if the task setting
+ * MNT_WRITE_HOLD has a lower priority and is bound to
+ * the same CPU as the task that is spinning here.
+ */
+ preempt_enable();
+ lock_mount_hash();
+ unlock_mount_hash();
+ preempt_disable();
+ }
+ }
/*
* After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
* be set to match its requirements. So we must not load that until
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 78219396788b..06bde5728e2f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -636,7 +636,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
unsigned long dir_verifier)
{
struct qstr filename = QSTR_INIT(entry->name, entry->len);
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
struct dentry *dentry;
struct dentry *alias;
struct inode *inode;
@@ -1867,7 +1867,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned open_flags,
umode_t mode)
{
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
struct nfs_open_context *ctx;
struct dentry *res;
struct iattr attr = { .ia_valid = ATTR_OPEN };
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index d5ccf095b2a7..0944c068f5cb 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -13,7 +13,7 @@
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/sched.h>
-#include <linux/wait.h>
+#include <linux/swait.h>
#include <linux/namei.h>
#include <linux/fsnotify.h>
@@ -184,7 +184,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
data->cred = get_current_cred();
data->res.dir_attr = &data->dir_attr;
- init_waitqueue_head(&data->wq);
+ init_swait_queue_head(&data->wq);
status = -EBUSY;
spin_lock(&dentry->d_lock);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 300d53ee7040..6ab25d4d4037 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -96,6 +96,7 @@
#include <linux/posix-timers.h>
#include <linux/time_namespace.h>
#include <linux/resctrl.h>
+#include <linux/swait.h>
#include <linux/cn_proc.h>
#include <trace/events/oom.h>
#include "internal.h"
@@ -2071,7 +2072,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
child = d_hash_and_lookup(dir, &qname);
if (!child) {
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
child = d_alloc_parallel(dir, &qname, &wq);
if (IS_ERR(child))
goto end_instantiate;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 013fc5931bc3..279faa9b8ce3 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -678,7 +678,7 @@ static bool proc_sys_fill_cache(struct file *file,
child = d_lookup(dir, &qname);
if (!child) {
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
child = d_alloc_parallel(dir, &qname, &wq);
if (IS_ERR(child))
return false;
diff --git a/include/asm-generic/softirq_stack.h b/include/asm-generic/softirq_stack.h
index eceeecf6a5bd..d3e2d81656e0 100644
--- a/include/asm-generic/softirq_stack.h
+++ b/include/asm-generic/softirq_stack.h
@@ -2,7 +2,7 @@
#ifndef __ASM_GENERIC_SOFTIRQ_STACK_H
#define __ASM_GENERIC_SOFTIRQ_STACK_H
-#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK
+#if defined(CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK) && !defined(CONFIG_PREEMPT_RT)
void do_softirq_own_stack(void);
#else
static inline void do_softirq_own_stack(void)
diff --git a/include/linux/console.h b/include/linux/console.h
index a97f277cfdfa..487a4266ab2c 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -16,6 +16,13 @@
#include <linux/atomic.h>
#include <linux/types.h>
+#include <linux/printk.h>
+#include <linux/seqlock.h>
+
+struct latched_seq {
+ seqcount_latch_t latch;
+ u64 val[2];
+};
struct vc_data;
struct console_font_op;
@@ -136,10 +143,12 @@ static inline int con_debug_leave(void)
#define CON_ANYTIME (16) /* Safe to call when cpu is offline */
#define CON_BRL (32) /* Used for a braille device */
#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */
+#define CON_HANDOVER (128) /* Device was previously a boot console. */
struct console {
char name[16];
void (*write)(struct console *, const char *, unsigned);
+ void (*write_atomic)(struct console *co, const char *s, unsigned int count);
int (*read)(struct console *, char *, unsigned);
struct tty_driver *(*device)(struct console *, int *);
void (*unblank)(void);
@@ -149,6 +158,16 @@ struct console {
short flags;
short index;
int cflag;
+#ifdef CONFIG_PRINTK
+ char sync_buf[CONSOLE_LOG_MAX];
+ struct latched_seq printk_seq;
+ struct latched_seq printk_sync_seq;
+#ifdef CONFIG_HAVE_NMI
+ struct latched_seq printk_sync_nmi_seq;
+#endif
+#endif /* CONFIG_PRINTK */
+
+ struct task_struct *thread;
uint ispeed;
uint ospeed;
void *data;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 9e23d33bb6f1..9f89d4887e35 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -108,7 +108,7 @@ struct dentry {
union {
struct list_head d_lru; /* LRU list */
- wait_queue_head_t *d_wait; /* in-lookup ones only */
+ struct swait_queue_head *d_wait; /* in-lookup ones only */
};
struct list_head d_child; /* child of parent list */
struct list_head d_subdirs; /* our children */
@@ -240,7 +240,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
extern struct dentry * d_alloc_anon(struct super_block *);
extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
- wait_queue_head_t *);
+ struct swait_queue_head *);
extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 2e2b8d6140ed..71064a2c2caf 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -57,9 +57,15 @@
# define ARCH_EXIT_TO_USER_MODE_WORK (0)
#endif
+#ifdef CONFIG_PREEMPT_LAZY
+# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+#else
+# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED)
+#endif
+
#define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
+ _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
ARCH_EXIT_TO_USER_MODE_WORK)
/**
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index ec2a47a81e42..8cd11a223260 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -3,6 +3,7 @@
#define _LINUX_IRQ_WORK_H
#include <linux/smp_types.h>
+#include <linux/rcuwait.h>
/*
* An entry can be in one of four states:
@@ -16,11 +17,13 @@
struct irq_work {
struct __call_single_node node;
void (*func)(struct irq_work *);
+ struct rcuwait irqwait;
};
#define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){ \
.node = { .u_flags = (_flags), }, \
.func = (_func), \
+ .irqwait = __RCUWAIT_INITIALIZER(irqwait), \
}
#define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0)
@@ -46,6 +49,11 @@ static inline bool irq_work_is_busy(struct irq_work *work)
return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY;
}
+static inline bool irq_work_is_hard(struct irq_work *work)
+{
+ return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ;
+}
+
bool irq_work_queue(struct irq_work *work);
bool irq_work_queue_on(struct irq_work *work, int cpu);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 59aea39785bf..d69b819b53e0 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc)
int handle_irq_desc(struct irq_desc *desc);
int generic_handle_irq(unsigned int irq);
+int generic_handle_irq_safe(unsigned int irq);
#ifdef CONFIG_IRQ_DOMAIN
/*
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 600c10da321a..4b140938b03e 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -71,14 +71,6 @@ do { \
do { \
__this_cpu_dec(hardirq_context); \
} while (0)
-# define lockdep_softirq_enter() \
-do { \
- current->softirq_context++; \
-} while (0)
-# define lockdep_softirq_exit() \
-do { \
- current->softirq_context--; \
-} while (0)
# define lockdep_hrtimer_enter(__hrtimer) \
({ \
@@ -140,6 +132,21 @@ do { \
# define lockdep_irq_work_exit(__work) do { } while (0)
#endif
+#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT)
+# define lockdep_softirq_enter() \
+do { \
+ current->softirq_context++; \
+} while (0)
+# define lockdep_softirq_exit() \
+do { \
+ current->softirq_context--; \
+} while (0)
+
+#else
+# define lockdep_softirq_enter() do { } while (0)
+# define lockdep_softirq_exit() do { } while (0)
+#endif
+
#if defined(CONFIG_IRQSOFF_TRACER) || \
defined(CONFIG_PREEMPT_TRACER)
extern void stop_critical_timings(void);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f56cd8879a59..49f1e924b6e6 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -111,8 +111,8 @@ static __always_inline void might_resched(void)
#endif /* CONFIG_PREEMPT_* */
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-extern void ___might_sleep(const char *file, int line, int preempt_offset);
-extern void __might_sleep(const char *file, int line, int preempt_offset);
+extern void __might_resched(const char *file, int line, unsigned int offsets);
+extern void __might_sleep(const char *file, int line);
extern void __cant_sleep(const char *file, int line, int preempt_offset);
extern void __cant_migrate(const char *file, int line);
@@ -129,7 +129,7 @@ extern void __cant_migrate(const char *file, int line);
* supposed to.
*/
# define might_sleep() \
- do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+ do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
/**
* cant_sleep - annotation for functions that cannot sleep
*
@@ -168,10 +168,9 @@ extern void __cant_migrate(const char *file, int line);
*/
# define non_block_end() WARN_ON(current->non_block_count-- == 0)
#else
- static inline void ___might_sleep(const char *file, int line,
- int preempt_offset) { }
- static inline void __might_sleep(const char *file, int line,
- int preempt_offset) { }
+ static inline void __might_resched(const char *file, int line,
+ unsigned int offsets) { }
+static inline void __might_sleep(const char *file, int line) { }
# define might_sleep() do { might_resched(); } while (0)
# define cant_sleep() do { } while (0)
# define cant_migrate() do { } while (0)
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 258cdde8d356..9bca0d98db5a 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -212,6 +212,8 @@ extern void kgdb_call_nmi_hook(void *ignored);
*/
extern void kgdb_roundup_cpus(void);
+extern void kgdb_roundup_cpu(unsigned int cpu);
+
/**
* kgdb_arch_set_pc - Generic call back to the program counter
* @regs: Current &struct pt_regs.
@@ -365,5 +367,6 @@ extern void kgdb_free_init_mem(void);
#define dbg_late_init()
static inline void kgdb_panic(const char *msg) {}
static inline void kgdb_free_init_mem(void) { }
+static inline void kgdb_roundup_cpu(unsigned int cpu) {}
#endif /* ! CONFIG_KGDB */
#endif /* _KGDB_H_ */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f8ee09c711f..e9672de22cf2 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -12,6 +12,7 @@
#include <linux/completion.h>
#include <linux/cpumask.h>
#include <linux/uprobes.h>
+#include <linux/rcupdate.h>
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
#include <linux/seqlock.h>
@@ -572,6 +573,9 @@ struct mm_struct {
bool tlb_flush_batched;
#endif
struct uprobes_state uprobes_state;
+#ifdef CONFIG_PREEMPT_RT
+ struct rcu_head delayed_drop;
+#endif
#ifdef CONFIG_HUGETLB_PAGE
atomic_long_t hugetlb_usage;
#endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3b97438afe3e..e506a579e473 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1930,7 +1930,6 @@ enum netdev_ml_priv_type {
* @sfp_bus: attached &struct sfp_bus structure.
*
* @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
- * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
*
* @proto_down: protocol port state information can be sent to the
* switch driver and used to set the phys state of the
@@ -2264,7 +2263,6 @@ struct net_device {
struct phy_device *phydev;
struct sfp_bus *sfp_bus;
struct lock_class_key *qdisc_tx_busylock;
- struct lock_class_key *qdisc_running_key;
bool proto_down;
unsigned wol_enabled:1;
unsigned threaded:1;
@@ -2374,13 +2372,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
#define netdev_lockdep_set_classes(dev) \
{ \
static struct lock_class_key qdisc_tx_busylock_key; \
- static struct lock_class_key qdisc_running_key; \
static struct lock_class_key qdisc_xmit_lock_key; \
static struct lock_class_key dev_addr_list_lock_key; \
unsigned int i; \
\
(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \
- (dev)->qdisc_running_key = &qdisc_running_key; \
lockdep_set_class(&(dev)->addr_list_lock, \
&dev_addr_list_lock_key); \
for (i = 0; i < (dev)->num_tx_queues; i++) \
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ecd74cc34797..6af28750625a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1692,7 +1692,7 @@ struct nfs_unlinkdata {
struct nfs_removeargs args;
struct nfs_removeres res;
struct dentry *dentry;
- wait_queue_head_t wq;
+ struct swait_queue_head wq;
const struct cred *cred;
struct nfs_fattr dir_attr;
long timeout;
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 4d244e295e85..3da73c968211 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -122,9 +122,10 @@
* The preempt_count offset after spin_lock()
*/
#if !defined(CONFIG_PREEMPT_RT)
-#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
+#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
#else
-#define PREEMPT_LOCK_OFFSET 0
+/* Locks on RT do not disable preemption */
+#define PREEMPT_LOCK_OFFSET 0
#endif
/*
@@ -174,6 +175,20 @@ extern void preempt_count_sub(int val);
#define preempt_count_inc() preempt_count_add(1)
#define preempt_count_dec() preempt_count_sub(1)
+#ifdef CONFIG_PREEMPT_LAZY
+#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
+#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
+#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
+#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
+#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
+#else
+#define add_preempt_lazy_count(val) do { } while (0)
+#define sub_preempt_lazy_count(val) do { } while (0)
+#define inc_preempt_lazy_count() do { } while (0)
+#define dec_preempt_lazy_count() do { } while (0)
+#define preempt_lazy_count() (0)
+#endif
+
#ifdef CONFIG_PREEMPT_COUNT
#define preempt_disable() \
@@ -182,13 +197,25 @@ do { \
barrier(); \
} while (0)
+#define preempt_lazy_disable() \
+do { \
+ inc_preempt_lazy_count(); \
+ barrier(); \
+} while (0)
+
#define sched_preempt_enable_no_resched() \
do { \
barrier(); \
preempt_count_dec(); \
} while (0)
-#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+#ifndef CONFIG_PREEMPT_RT
+# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
+# define preempt_check_resched_rt() barrier();
+#else
+# define preempt_enable_no_resched() preempt_enable()
+# define preempt_check_resched_rt() preempt_check_resched()
+#endif
#define preemptible() (preempt_count() == 0 && !irqs_disabled())
@@ -213,6 +240,18 @@ do { \
__preempt_schedule(); \
} while (0)
+/*
+ * open code preempt_check_resched() because it is not exported to modules and
+ * used by local_unlock() or bpf_enable_instrumentation().
+ */
+#define preempt_lazy_enable() \
+do { \
+ dec_preempt_lazy_count(); \
+ barrier(); \
+ if (should_resched(0)) \
+ __preempt_schedule(); \
+} while (0)
+
#else /* !CONFIG_PREEMPTION */
#define preempt_enable() \
do { \
@@ -220,6 +259,12 @@ do { \
preempt_count_dec(); \
} while (0)
+#define preempt_lazy_enable() \
+do { \
+ dec_preempt_lazy_count(); \
+ barrier(); \
+} while (0)
+
#define preempt_enable_notrace() \
do { \
barrier(); \
@@ -258,8 +303,12 @@ do { \
#define preempt_disable_notrace() barrier()
#define preempt_enable_no_resched_notrace() barrier()
#define preempt_enable_notrace() barrier()
+#define preempt_check_resched_rt() barrier()
#define preemptible() 0
+#define preempt_lazy_disable() barrier()
+#define preempt_lazy_enable() barrier()
+
#endif /* CONFIG_PREEMPT_COUNT */
#ifdef MODULE
@@ -278,7 +327,7 @@ do { \
} while (0)
#define preempt_fold_need_resched() \
do { \
- if (tif_need_resched()) \
+ if (tif_need_resched_now()) \
set_preempt_need_resched(); \
} while (0)
@@ -394,8 +443,15 @@ extern void migrate_enable(void);
#else
-static inline void migrate_disable(void) { }
-static inline void migrate_enable(void) { }
+static inline void migrate_disable(void)
+{
+ preempt_lazy_disable();
+}
+
+static inline void migrate_enable(void)
+{
+ preempt_lazy_enable();
+}
#endif /* CONFIG_SMP */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 9497f6b98339..eddfc5de6ee7 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -47,6 +47,12 @@ static inline const char *printk_skip_headers(const char *buffer)
#define CONSOLE_EXT_LOG_MAX 8192
+/*
+ * The maximum size of a record formatted for console printing
+ * (i.e. with the prefix prepended to every line).
+ */
+#define CONSOLE_LOG_MAX 1024
+
/* printk's without a loglevel use this.. */
#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
@@ -155,6 +161,8 @@ int vprintk(const char *fmt, va_list args);
asmlinkage __printf(1, 2) __cold
int _printk(const char *fmt, ...);
+bool pr_flush(int timeout_ms, bool reset_on_progress);
+
/*
* Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
*/
@@ -224,6 +232,11 @@ static inline void printk_deferred_exit(void)
{
}
+static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
+{
+ return true;
+}
+
static inline int printk_ratelimit(void)
{
return 0;
@@ -284,17 +297,30 @@ static inline void printk_trigger_flush(void)
extern int __printk_cpu_trylock(void);
extern void __printk_wait_on_cpu_lock(void);
extern void __printk_cpu_unlock(void);
+extern bool kgdb_roundup_delay(unsigned int cpu);
+
+#else
+
+#define __printk_cpu_trylock() 1
+#define __printk_wait_on_cpu_lock()
+#define __printk_cpu_unlock()
+
+static inline bool kgdb_roundup_delay(unsigned int cpu)
+{
+ return false;
+}
+#endif /* CONFIG_SMP */
/**
- * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
- * lock and disable interrupts.
+ * raw_printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
+ * lock and disable interrupts.
* @flags: Stack-allocated storage for saving local interrupt state,
- * to be passed to printk_cpu_unlock_irqrestore().
+ * to be passed to raw_printk_cpu_unlock_irqrestore().
*
* If the lock is owned by another CPU, spin until it becomes available.
* Interrupts are restored while spinning.
*/
-#define printk_cpu_lock_irqsave(flags) \
+#define raw_printk_cpu_lock_irqsave(flags) \
for (;;) { \
local_irq_save(flags); \
if (__printk_cpu_trylock()) \
@@ -304,22 +330,30 @@ extern void __printk_cpu_unlock(void);
}
/**
- * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning
- * lock and restore interrupts.
- * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave().
+ * raw_printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant
+ * spinning lock and restore interrupts.
+ * @flags: Caller's saved interrupt state from raw_printk_cpu_lock_irqsave().
*/
-#define printk_cpu_unlock_irqrestore(flags) \
+#define raw_printk_cpu_unlock_irqrestore(flags) \
do { \
__printk_cpu_unlock(); \
local_irq_restore(flags); \
- } while (0) \
-
-#else
+ } while (0)
-#define printk_cpu_lock_irqsave(flags) ((void)flags)
-#define printk_cpu_unlock_irqrestore(flags) ((void)flags)
+/*
+ * Used to synchronize atomic consoles.
+ *
+ * The same as raw_printk_cpu_lock_irqsave() except that hardware interrupts
+ * are _not_ restored while spinning.
+ */
+#define console_atomic_lock(flags) \
+ do { \
+ local_irq_save(flags); \
+ while (!__printk_cpu_trylock()) \
+ cpu_relax(); \
+ } while (0)
-#endif /* CONFIG_SMP */
+#define console_atomic_unlock raw_printk_cpu_unlock_irqrestore
extern int kptr_restrict;
diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h
index f0e535f199be..002266693e50 100644
--- a/include/linux/ratelimit_types.h
+++ b/include/linux/ratelimit_types.h
@@ -4,7 +4,7 @@
#include <linux/bits.h>
#include <linux/param.h>
-#include <linux/spinlock_types.h>
+#include <linux/spinlock_types_raw.h>
#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
#define DEFAULT_RATELIMIT_BURST 10
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 434d12fe2d4f..de6d1a21f113 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -94,6 +94,13 @@ void rcu_init_tasks_generic(void);
static inline void rcu_init_tasks_generic(void) { }
#endif
+#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TASKS_RCU_GENERIC)
+void rcu_tasks_initiate_self_tests(void);
+#else
+static inline void rcu_tasks_initiate_self_tests(void) {}
+#endif
+
+
#ifdef CONFIG_RCU_STALL_COMMON
void rcu_sysrq_start(void);
void rcu_sysrq_end(void);
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 9deedfeec2b1..7d049883a08a 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -99,13 +99,22 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass);
+extern void _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock);
#define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0)
+#define rt_mutex_lock_nest_lock(lock, nest_lock) \
+ do { \
+ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
+ _rt_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
+ } while (0)
+
#else
extern void rt_mutex_lock(struct rt_mutex *lock);
#define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock)
+#define rt_mutex_lock_nest_lock(lock, nest_lock) rt_mutex_lock(lock)
#endif
extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
+extern int rt_mutex_lock_killable(struct rt_mutex *lock);
extern int rt_mutex_trylock(struct rt_mutex *lock);
extern void rt_mutex_unlock(struct rt_mutex *lock);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dcba347cbffa..23fb0ca4993b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -118,12 +118,8 @@ struct task_group;
#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
-#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
-
#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
-#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
-
/*
* Special states are those that do not use the normal wait-loop pattern. See
* the comment with set_special_state().
@@ -1084,6 +1080,10 @@ struct task_struct {
/* Restored if set_restore_sigmask() was used: */
sigset_t saved_sigmask;
struct sigpending pending;
+#ifdef CONFIG_PREEMPT_RT
+ /* TODO: move me into ->restart_block ? */
+ struct kernel_siginfo forced_info;
+#endif
unsigned long sas_ss_sp;
size_t sas_ss_size;
unsigned int sas_ss_flags;
@@ -1738,6 +1738,16 @@ static __always_inline bool is_percpu_thread(void)
#endif
}
+/* Is the current task guaranteed to stay on its current CPU? */
+static inline bool is_migratable(void)
+{
+#ifdef CONFIG_SMP
+ return preemptible() && !current->migration_disabled;
+#else
+ return false;
+#endif
+}
+
/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
@@ -2013,6 +2023,118 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}
+#ifdef CONFIG_PREEMPT_LAZY
+static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
+{
+ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
+}
+
+static inline int need_resched_lazy(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
+}
+
+static inline int need_resched_now(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#else
+static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
+static inline int need_resched_lazy(void) { return 0; }
+
+static inline int need_resched_now(void)
+{
+ return test_thread_flag(TIF_NEED_RESCHED);
+}
+
+#endif
+
+#ifdef CONFIG_PREEMPT_RT
+static inline bool task_match_saved_state(struct task_struct *p, long match_state)
+{
+ return p->saved_state == match_state;
+}
+
+static inline bool task_is_traced(struct task_struct *task)
+{
+ bool traced = false;
+
+ /* in case the task is sleeping on tasklist_lock */
+ raw_spin_lock_irq(&task->pi_lock);
+ if (READ_ONCE(task->__state) & __TASK_TRACED)
+ traced = true;
+ else if (task->saved_state & __TASK_TRACED)
+ traced = true;
+ raw_spin_unlock_irq(&task->pi_lock);
+ return traced;
+}
+
+static inline bool task_is_stopped_or_traced(struct task_struct *task)
+{
+ bool traced_stopped = false;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+ if (READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED))
+ traced_stopped = true;
+ else if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
+ traced_stopped = true;
+
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ return traced_stopped;
+}
+
+#else
+
+static inline bool task_match_saved_state(struct task_struct *p, long match_state)
+{
+ return false;
+}
+
+static inline bool task_is_traced(struct task_struct *task)
+{
+ return READ_ONCE(task->__state) & __TASK_TRACED;
+}
+
+static inline bool task_is_stopped_or_traced(struct task_struct *task)
+{
+ return READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED);
+}
+#endif
+
+static inline bool task_match_state_or_saved(struct task_struct *p,
+ long match_state)
+{
+ if (READ_ONCE(p->__state) == match_state)
+ return true;
+
+ return task_match_saved_state(p, match_state);
+}
+
+static inline bool task_match_state_lock(struct task_struct *p,
+ long match_state)
+{
+ bool match;
+
+ raw_spin_lock_irq(&p->pi_lock);
+ match = task_match_state_or_saved(p, match_state);
+ raw_spin_unlock_irq(&p->pi_lock);
+
+ return match;
+}
+
/*
* cond_resched() and cond_resched_lock(): latency reduction via
* explicit rescheduling in places that are safe. The return
@@ -2047,7 +2169,7 @@ static inline int _cond_resched(void) { return 0; }
#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
#define cond_resched() ({ \
- ___might_sleep(__FILE__, __LINE__, 0); \
+ __might_resched(__FILE__, __LINE__, 0); \
_cond_resched(); \
})
@@ -2055,19 +2177,38 @@ extern int __cond_resched_lock(spinlock_t *lock);
extern int __cond_resched_rwlock_read(rwlock_t *lock);
extern int __cond_resched_rwlock_write(rwlock_t *lock);
-#define cond_resched_lock(lock) ({ \
- ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
- __cond_resched_lock(lock); \
+#define MIGHT_RESCHED_RCU_SHIFT 8
+#define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
+
+#ifndef CONFIG_PREEMPT_RT
+/*
+ * Non RT kernels have an elevated preempt count due to the held lock,
+ * but are not allowed to be inside a RCU read side critical section
+ */
+# define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET
+#else
+/*
+ * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
+ * cond_resched*lock() has to take that into account because it checks for
+ * preempt_count() and rcu_preempt_depth().
+ */
+# define PREEMPT_LOCK_RESCHED_OFFSETS \
+ (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
+#endif
+
+#define cond_resched_lock(lock) ({ \
+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
+ __cond_resched_lock(lock); \
})
-#define cond_resched_rwlock_read(lock) ({ \
- __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
- __cond_resched_rwlock_read(lock); \
+#define cond_resched_rwlock_read(lock) ({ \
+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
+ __cond_resched_rwlock_read(lock); \
})
-#define cond_resched_rwlock_write(lock) ({ \
- __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
- __cond_resched_rwlock_write(lock); \
+#define cond_resched_rwlock_write(lock) ({ \
+ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
+ __cond_resched_rwlock_write(lock); \
})
static inline void cond_resched_rcu(void)
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 95fb7aaaec8d..28e9cc60f47e 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,26 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}
+#ifdef CONFIG_PREEMPT_RT
+extern void __mmdrop_delayed(struct rcu_head *rhp);
+
+/*
+ * Invoked from finish_task_switch(). Delegates the heavy lifting on RT
+ * kernels via RCU.
+ */
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+ /* Provides a full memory barrier. See mmdrop() */
+ if (atomic_dec_and_test(&mm->mm_count))
+ call_rcu(&mm->delayed_drop, __mmdrop_delayed);
+}
+#else
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+ mmdrop(mm);
+}
+#endif
+
/**
* mmget() - Pin the address space associated with a &struct mm_struct.
* @mm: The address space to pin.
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 5db211f43b29..aa011f668705 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -7,6 +7,7 @@
#ifndef _LINUX_SERIAL_8250_H
#define _LINUX_SERIAL_8250_H
+#include <linux/atomic.h>
#include <linux/serial_core.h>
#include <linux/serial_reg.h>
#include <linux/platform_device.h>
@@ -125,6 +126,8 @@ struct uart_8250_port {
#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
unsigned char msr_saved_flags;
+ atomic_t console_printing;
+
struct uart_8250_dma *dma;
const struct uart_8250_ops *ops;
@@ -180,6 +183,8 @@ void serial8250_init_port(struct uart_8250_port *up);
void serial8250_set_defaults(struct uart_8250_port *up);
void serial8250_console_write(struct uart_8250_port *up, const char *s,
unsigned int count);
+void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s,
+ unsigned int count);
int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
int serial8250_console_exit(struct uart_port *port);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ae598ed86b50..922e1b6e5b05 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -300,6 +300,7 @@ struct sk_buff_head {
__u32 qlen;
spinlock_t lock;
+ raw_spinlock_t raw_lock;
};
struct sk_buff;
@@ -1990,6 +1991,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
__skb_queue_head_init(list);
}
+static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
+{
+ raw_spin_lock_init(&list->raw_lock);
+ __skb_queue_head_init(list);
+}
+
static inline void skb_queue_head_init_class(struct sk_buff_head *list,
struct lock_class_key *class)
{
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 510519e8a1eb..7ac9fdb5ad09 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -268,6 +268,9 @@ static inline int get_boot_cpu_id(void)
#define get_cpu() ({ preempt_disable(); __smp_processor_id(); })
#define put_cpu() preempt_enable()
+#define get_cpu_light() ({ migrate_disable(); __smp_processor_id(); })
+#define put_cpu_light() migrate_enable()
+
/*
* Callback to arch code if there's nosmp or maxcpus=0 on the
* boot command line:
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index c09b6407ae1b..7f86a2016ac5 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -1,7 +1,7 @@
#ifndef __LINUX_SPINLOCK_TYPES_UP_H
#define __LINUX_SPINLOCK_TYPES_UP_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 0999f6317978..7af834b7c114 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -163,7 +163,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
#endif /* !CONFIG_GENERIC_ENTRY */
-#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#ifdef CONFIG_PREEMPT_LAZY
+#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
+ test_thread_flag(TIF_NEED_RESCHED_LAZY))
+#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
+#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)
+
+#else
+#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
+#define tif_need_resched_lazy() 0
+#endif
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
static inline int arch_within_stack_frames(const void * const stack,
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 57113190448c..827725f41149 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -69,6 +69,7 @@ struct trace_entry {
unsigned char flags;
unsigned char preempt_count;
int pid;
+ unsigned char preempt_lazy_count;
};
#define TRACE_EVENT_TYPE_MAX \
@@ -157,9 +158,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry,
unsigned int trace_ctx)
{
entry->preempt_count = trace_ctx & 0xff;
+ entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff;
entry->pid = current->pid;
entry->type = type;
- entry->flags = trace_ctx >> 16;
+ entry->flags = trace_ctx >> 24;
}
unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
@@ -172,6 +174,7 @@ enum trace_flag_type {
TRACE_FLAG_SOFTIRQ = 0x10,
TRACE_FLAG_PREEMPT_RESCHED = 0x20,
TRACE_FLAG_NMI = 0x40,
+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x80,
};
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index e81856c0ba13..81dc1f5e181a 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -66,7 +66,7 @@
#include <linux/seqlock.h>
struct u64_stats_sync {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG==32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
seqcount_t seq;
#endif
};
@@ -83,6 +83,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
return local64_read(&p->v);
}
+static inline void u64_stats_set(u64_stats_t *p, u64 val)
+{
+ local64_set(&p->v, val);
+}
+
static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
{
local64_add(val, &p->v);
@@ -104,6 +109,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
return p->v;
}
+static inline void u64_stats_set(u64_stats_t *p, u64 val)
+{
+ p->v = val;
+}
+
static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
{
p->v += val;
@@ -115,7 +125,7 @@ static inline void u64_stats_inc(u64_stats_t *p)
}
#endif
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq)
#else
static inline void u64_stats_init(struct u64_stats_sync *syncp)
@@ -125,15 +135,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp)
static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
write_seqcount_begin(&syncp->seq);
#endif
}
static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
write_seqcount_end(&syncp->seq);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
#endif
}
@@ -142,8 +156,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
{
unsigned long flags = 0;
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
- local_irq_save(flags);
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
+ else
+ local_irq_save(flags);
write_seqcount_begin(&syncp->seq);
#endif
return flags;
@@ -153,15 +170,18 @@ static inline void
u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
unsigned long flags)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
write_seqcount_end(&syncp->seq);
- local_irq_restore(flags);
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
+ else
+ local_irq_restore(flags);
#endif
}
static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
return read_seqcount_begin(&syncp->seq);
#else
return 0;
@@ -170,7 +190,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *
static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
preempt_disable();
#endif
return __u64_stats_fetch_begin(syncp);
@@ -179,7 +199,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
return read_seqcount_retry(&syncp->seq, start);
#else
return false;
@@ -189,7 +209,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
preempt_enable();
#endif
return __u64_stats_fetch_retry(syncp, start);
@@ -203,7 +223,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
*/
static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
{
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
+ preempt_disable();
+#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
local_irq_disable();
#endif
return __u64_stats_fetch_begin(syncp);
@@ -212,7 +234,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync
static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
unsigned int start)
{
-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
+ preempt_enable();
+#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
local_irq_enable();
#endif
return __u64_stats_fetch_retry(syncp, start);
diff --git a/include/net/act_api.h b/include/net/act_api.h
index f19f7f4a463c..b5b624c7e488 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -30,13 +30,13 @@ struct tc_action {
atomic_t tcfa_bindcnt;
int tcfa_action;
struct tcf_t tcfa_tm;
- struct gnet_stats_basic_packed tcfa_bstats;
- struct gnet_stats_basic_packed tcfa_bstats_hw;
+ struct gnet_stats_basic_sync tcfa_bstats;
+ struct gnet_stats_basic_sync tcfa_bstats_hw;
struct gnet_stats_queue tcfa_qstats;
struct net_rate_estimator __rcu *tcfa_rate_est;
spinlock_t tcfa_lock;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats;
- struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats_hw;
struct gnet_stats_queue __percpu *cpu_qstats;
struct tc_cookie __rcu *act_cookie;
struct tcf_chain __rcu *goto_chain;
@@ -206,7 +206,7 @@ static inline void tcf_action_update_bstats(struct tc_action *a,
struct sk_buff *skb)
{
if (likely(a->cpu_bstats)) {
- bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
+ bstats_update(this_cpu_ptr(a->cpu_bstats), skb);
return;
}
spin_lock(&a->tcfa_lock);
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 1424e02cef90..7aa2b8e1fb29 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -7,14 +7,17 @@
#include <linux/rtnetlink.h>
#include <linux/pkt_sched.h>
-/* Note: this used to be in include/uapi/linux/gen_stats.h */
-struct gnet_stats_basic_packed {
- __u64 bytes;
- __u64 packets;
-};
-
-struct gnet_stats_basic_cpu {
- struct gnet_stats_basic_packed bstats;
+/* Throughput stats.
+ * Must be initialized beforehand with gnet_stats_basic_sync_init().
+ *
+ * If no reads can ever occur parallel to writes (e.g. stack-allocated
+ * bstats), then the internal stat values can be written to and read
+ * from directly. Otherwise, use _bstats_set/update() for writes and
+ * gnet_stats_add_basic() for reads.
+ */
+struct gnet_stats_basic_sync {
+ u64_stats_t bytes;
+ u64_stats_t packets;
struct u64_stats_sync syncp;
} __aligned(2 * sizeof(u64));
@@ -34,6 +37,7 @@ struct gnet_dump {
struct tc_stats tc_stats;
};
+void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b);
int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
struct gnet_dump *d, int padattr);
@@ -42,41 +46,38 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
spinlock_t *lock, struct gnet_dump *d,
int padattr);
-int gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
-void __gnet_stats_copy_basic(const seqcount_t *running,
- struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
-int gnet_stats_copy_basic_hw(const seqcount_t *running,
- struct gnet_dump *d,
- struct gnet_stats_basic_cpu __percpu *cpu,
- struct gnet_stats_basic_packed *b);
+int gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running);
+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running);
+int gnet_stats_copy_basic_hw(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b, bool running);
int gnet_stats_copy_rate_est(struct gnet_dump *d,
struct net_rate_estimator __rcu **ptr);
int gnet_stats_copy_queue(struct gnet_dump *d,
struct gnet_stats_queue __percpu *cpu_q,
struct gnet_stats_queue *q, __u32 qlen);
-void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
- const struct gnet_stats_queue __percpu *cpu_q,
- const struct gnet_stats_queue *q, __u32 qlen);
+void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
+ const struct gnet_stats_queue __percpu *cpu_q,
+ const struct gnet_stats_queue *q);
int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
int gnet_stats_finish_copy(struct gnet_dump *d);
-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **rate_est,
spinlock_t *lock,
- seqcount_t *running, struct nlattr *opt);
+ bool running, struct nlattr *opt);
void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
- struct gnet_stats_basic_cpu __percpu *cpu_bstats,
+int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
struct net_rate_estimator __rcu **ptr,
spinlock_t *lock,
- seqcount_t *running, struct nlattr *opt);
+ bool running, struct nlattr *opt);
bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
struct gnet_stats_rate_est64 *sample);
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index 832ab69efda5..4c3809e141f4 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -6,7 +6,7 @@
struct xt_rateest {
/* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
- struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_basic_sync bstats;
spinlock_t lock;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 83a6d0792180..4a5833108083 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -765,7 +765,7 @@ struct tc_cookie {
};
struct tc_qopt_offload_stats {
- struct gnet_stats_basic_packed *bstats;
+ struct gnet_stats_basic_sync *bstats;
struct gnet_stats_queue *qstats;
};
@@ -885,7 +885,7 @@ struct tc_gred_qopt_offload_params {
};
struct tc_gred_qopt_offload_stats {
- struct gnet_stats_basic_packed bstats[MAX_DPs];
+ struct gnet_stats_basic_sync bstats[MAX_DPs];
struct gnet_stats_queue qstats[MAX_DPs];
struct red_stats *xstats[MAX_DPs];
};
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1958d1260fe9..e43f3922faa7 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -40,6 +40,13 @@ enum qdisc_state_t {
__QDISC_STATE_DRAINING,