| diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst |
| index d2c4c27e1702..d83c9ab49427 100644 |
| --- a/Documentation/dev-tools/kcov.rst |
| +++ b/Documentation/dev-tools/kcov.rst |
| @@ -50,6 +50,7 @@ The following program demonstrates coverage collection from within a test |
| #include <sys/mman.h> |
| #include <unistd.h> |
| #include <fcntl.h> |
| + #include <linux/types.h> |
| |
| #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) |
| #define KCOV_ENABLE _IO('c', 100) |
| @@ -177,6 +178,8 @@ Comparison operands collection |
| /* Read number of comparisons collected. */ |
| n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); |
| for (i = 0; i < n; i++) { |
| + uint64_t ip; |
| + |
| type = cover[i * KCOV_WORDS_PER_CMP + 1]; |
| /* arg1 and arg2 - operands of the comparison. */ |
| arg1 = cover[i * KCOV_WORDS_PER_CMP + 2]; |
| @@ -251,6 +254,8 @@ selectively from different subsystems. |
| |
| .. code-block:: c |
| |
| + /* Same includes and defines as above. */ |
| + |
| struct kcov_remote_arg { |
| __u32 trace_mode; |
| __u32 area_size; |
| diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h |
| index 1d5716bc060b..2526fd3be5fd 100644 |
| --- a/arch/alpha/include/asm/spinlock_types.h |
| +++ b/arch/alpha/include/asm/spinlock_types.h |
| @@ -2,7 +2,7 @@ |
| #ifndef _ALPHA_SPINLOCK_TYPES_H |
| #define _ALPHA_SPINLOCK_TYPES_H |
| |
| -#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig |
| index 4ebd512043be..5ac2009727bd 100644 |
| --- a/arch/arm/Kconfig |
| +++ b/arch/arm/Kconfig |
| @@ -32,6 +32,7 @@ config ARM |
| select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 |
| select ARCH_SUPPORTS_ATOMIC_RMW |
| select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE |
| + select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK |
| select ARCH_USE_BUILTIN_BSWAP |
| select ARCH_USE_CMPXCHG_LOCKREF |
| select ARCH_USE_MEMTEST |
| @@ -68,7 +69,7 @@ config ARM |
| select HARDIRQS_SW_RESEND |
| select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT |
| select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 |
| - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU |
| + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT |
| select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU |
| select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL |
| select HAVE_ARCH_MMAP_RND_BITS if MMU |
| @@ -109,6 +110,7 @@ config ARM |
| select HAVE_PERF_EVENTS |
| select HAVE_PERF_REGS |
| select HAVE_PERF_USER_STACK_DUMP |
| + select HAVE_PREEMPT_LAZY |
| select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE |
| select HAVE_REGS_AND_STACK_ACCESS_API |
| select HAVE_RSEQ |
| @@ -124,6 +126,7 @@ config ARM |
| select OLD_SIGSUSPEND3 |
| select PCI_SYSCALL if PCI |
| select PERF_USE_VMALLOC |
| + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM |
| select RTC_LIB |
| select SYS_SUPPORTS_APM_EMULATION |
| select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M |
| diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h |
| index 5976958647fe..0c14b36ef101 100644 |
| --- a/arch/arm/include/asm/spinlock_types.h |
| +++ b/arch/arm/include/asm/spinlock_types.h |
| @@ -2,7 +2,7 @@ |
| #ifndef __ASM_SPINLOCK_TYPES_H |
| #define __ASM_SPINLOCK_TYPES_H |
| |
| -#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h |
| index 9a18da3e10cc..2fa63d96a4f0 100644 |
| --- a/arch/arm/include/asm/thread_info.h |
| +++ b/arch/arm/include/asm/thread_info.h |
| @@ -52,6 +52,7 @@ struct cpu_context_save { |
| struct thread_info { |
| unsigned long flags; /* low level flags */ |
| int preempt_count; /* 0 => preemptable, <0 => bug */ |
| + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ |
| struct task_struct *task; /* main task structure */ |
| __u32 cpu; /* cpu */ |
| __u32 cpu_domain; /* cpu domain */ |
| @@ -134,6 +135,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, |
| #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ |
| #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ |
| #define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ |
| +#define TIF_NEED_RESCHED_LAZY 9 |
| |
| #define TIF_USING_IWMMXT 17 |
| #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ |
| @@ -148,6 +150,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, |
| #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) |
| #define _TIF_SECCOMP (1 << TIF_SECCOMP) |
| #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) |
| +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) |
| #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) |
| |
| /* Checks for any syscall work in entry-common.S */ |
| @@ -157,7 +160,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, |
| /* |
| * Change these and you break ASM code in entry-common.S |
| */ |
| -#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ |
| +#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ |
| + _TIF_SIGPENDING | \ |
| _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ |
| _TIF_NOTIFY_SIGNAL) |
| |
| diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c |
| index a646a3f6440f..beb09d74684f 100644 |
| --- a/arch/arm/kernel/asm-offsets.c |
| +++ b/arch/arm/kernel/asm-offsets.c |
| @@ -43,6 +43,7 @@ int main(void) |
| BLANK(); |
| DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); |
| DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); |
| + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); |
| DEFINE(TI_TASK, offsetof(struct thread_info, task)); |
| DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); |
| DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); |
| diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S |
| index 68261a83b7ad..fa7d110ce555 100644 |
| --- a/arch/arm/kernel/entry-armv.S |
| +++ b/arch/arm/kernel/entry-armv.S |
| @@ -206,11 +206,18 @@ ENDPROC(__dabt_svc) |
| |
| #ifdef CONFIG_PREEMPTION |
| ldr r8, [tsk, #TI_PREEMPT] @ get preempt count |
| - ldr r0, [tsk, #TI_FLAGS] @ get flags |
| teq r8, #0 @ if preempt count != 0 |
| + bne 1f @ return from exeption |
| + ldr r0, [tsk, #TI_FLAGS] @ get flags |
| + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set |
| + blne svc_preempt @ preempt! |
| + |
| + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count |
| + teq r8, #0 @ if preempt lazy count != 0 |
| movne r0, #0 @ force flags to 0 |
| - tst r0, #_TIF_NEED_RESCHED |
| + tst r0, #_TIF_NEED_RESCHED_LAZY |
| blne svc_preempt |
| +1: |
| #endif |
| |
| svc_exit r5, irq = 1 @ return from exception |
| @@ -225,8 +232,14 @@ ENDPROC(__irq_svc) |
| 1: bl preempt_schedule_irq @ irq en/disable is done inside |
| ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS |
| tst r0, #_TIF_NEED_RESCHED |
| + bne 1b |
| + tst r0, #_TIF_NEED_RESCHED_LAZY |
| reteq r8 @ go again |
| - b 1b |
| + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count |
| + teq r0, #0 @ if preempt lazy count != 0 |
| + beq 1b |
| + ret r8 @ go again |
| + |
| #endif |
| |
| __und_fault: |
| diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c |
| index 539897ac2828..4655f04ccdcd 100644 |
| --- a/arch/arm/kernel/signal.c |
| +++ b/arch/arm/kernel/signal.c |
| @@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) |
| */ |
| trace_hardirqs_off(); |
| do { |
| - if (likely(thread_flags & _TIF_NEED_RESCHED)) { |
| + if (likely(thread_flags & (_TIF_NEED_RESCHED | |
| + _TIF_NEED_RESCHED_LAZY))) { |
| schedule(); |
| } else { |
| if (unlikely(!user_mode(regs))) |
| diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c |
| index 842427ff2b3c..b943e2df9540 100644 |
| --- a/arch/arm/kernel/smp.c |
| +++ b/arch/arm/kernel/smp.c |
| @@ -667,9 +667,7 @@ static void do_handle_IPI(int ipinr) |
| break; |
| |
| case IPI_CPU_BACKTRACE: |
| - printk_deferred_enter(); |
| nmi_cpu_backtrace(get_irq_regs()); |
| - printk_deferred_exit(); |
| break; |
| |
| default: |
| diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c |
| index efa402025031..59487ee9fd61 100644 |
| --- a/arch/arm/mm/fault.c |
| +++ b/arch/arm/mm/fault.c |
| @@ -400,6 +400,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, |
| if (addr < TASK_SIZE) |
| return do_page_fault(addr, fsr, regs); |
| |
| + if (interrupts_enabled(regs)) |
| + local_irq_enable(); |
| + |
| if (user_mode(regs)) |
| goto bad_area; |
| |
| @@ -470,6 +473,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, |
| static int |
| do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) |
| { |
| + if (interrupts_enabled(regs)) |
| + local_irq_enable(); |
| + |
| do_bad_area(addr, fsr, regs); |
| return 0; |
| } |
| diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig |
| index 8b6f090e0364..784c90ba371e 100644 |
| --- a/arch/arm64/Kconfig |
| +++ b/arch/arm64/Kconfig |
| @@ -88,6 +88,7 @@ config ARM64 |
| select ARCH_SUPPORTS_ATOMIC_RMW |
| select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 |
| select ARCH_SUPPORTS_NUMA_BALANCING |
| + select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK |
| select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT |
| select ARCH_WANT_DEFAULT_BPF_JIT |
| select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT |
| @@ -191,6 +192,7 @@ config ARM64 |
| select HAVE_PERF_REGS |
| select HAVE_PERF_USER_STACK_DUMP |
| select HAVE_REGS_AND_STACK_ACCESS_API |
| + select HAVE_PREEMPT_LAZY |
| select HAVE_FUNCTION_ARG_ACCESS_API |
| select HAVE_FUTEX_CMPXCHG if FUTEX |
| select MMU_GATHER_RCU_TABLE_FREE |
| @@ -212,6 +214,7 @@ config ARM64 |
| select PCI_DOMAINS_GENERIC if PCI |
| select PCI_ECAM if (ACPI && PCI) |
| select PCI_SYSCALL if PCI |
| + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM |
| select POWER_RESET |
| select POWER_SUPPLY |
| select SPARSE_IRQ |
| diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h |
| index ed57717cd004..63b39229890b 100644 |
| --- a/arch/arm64/include/asm/pgtable.h |
| +++ b/arch/arm64/include/asm/pgtable.h |
| @@ -1001,7 +1001,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, |
| */ |
| static inline bool arch_faults_on_old_pte(void) |
| { |
| - WARN_ON(preemptible()); |
| + WARN_ON(is_migratable()); |
| |
| return !cpu_has_hw_af(); |
| } |
| diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h |
| index e83f0982b99c..2545c17281e1 100644 |
| --- a/arch/arm64/include/asm/preempt.h |
| +++ b/arch/arm64/include/asm/preempt.h |
| @@ -70,13 +70,36 @@ static inline bool __preempt_count_dec_and_test(void) |
| * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE |
| * pair. |
| */ |
| - return !pc || !READ_ONCE(ti->preempt_count); |
| + if (!pc || !READ_ONCE(ti->preempt_count)) |
| + return true; |
| +#ifdef CONFIG_PREEMPT_LAZY |
| + if ((pc & ~PREEMPT_NEED_RESCHED)) |
| + return false; |
| + if (current_thread_info()->preempt_lazy_count) |
| + return false; |
| + return test_thread_flag(TIF_NEED_RESCHED_LAZY); |
| +#else |
| + return false; |
| +#endif |
| } |
| |
| static inline bool should_resched(int preempt_offset) |
| { |
| +#ifdef CONFIG_PREEMPT_LAZY |
| + u64 pc = READ_ONCE(current_thread_info()->preempt_count); |
| + if (pc == preempt_offset) |
| + return true; |
| + |
| + if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset) |
| + return false; |
| + |
| + if (current_thread_info()->preempt_lazy_count) |
| + return false; |
| + return test_thread_flag(TIF_NEED_RESCHED_LAZY); |
| +#else |
| u64 pc = READ_ONCE(current_thread_info()->preempt_count); |
| return pc == preempt_offset; |
| +#endif |
| } |
| |
| #ifdef CONFIG_PREEMPTION |
| diff --git a/arch/arm64/include/asm/signal.h b/arch/arm64/include/asm/signal.h |
| index ef449f5f4ba8..5e535c3e4926 100644 |
| --- a/arch/arm64/include/asm/signal.h |
| +++ b/arch/arm64/include/asm/signal.h |
| @@ -22,4 +22,8 @@ static inline void __user *arch_untagged_si_addr(void __user *addr, |
| } |
| #define arch_untagged_si_addr arch_untagged_si_addr |
| |
| +#if defined(CONFIG_PREEMPT_RT) |
| +#define ARCH_RT_DELAYS_SIGNAL_SEND |
| +#endif |
| + |
| #endif |
| diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h |
| index 18782f0c4721..11ab1c077697 100644 |
| --- a/arch/arm64/include/asm/spinlock_types.h |
| +++ b/arch/arm64/include/asm/spinlock_types.h |
| @@ -5,7 +5,7 @@ |
| #ifndef __ASM_SPINLOCK_TYPES_H |
| #define __ASM_SPINLOCK_TYPES_H |
| |
| -#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H) |
| +#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H) |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h |
| index 6623c99f0984..c55ccec33a5a 100644 |
| --- a/arch/arm64/include/asm/thread_info.h |
| +++ b/arch/arm64/include/asm/thread_info.h |
| @@ -26,6 +26,7 @@ struct thread_info { |
| #ifdef CONFIG_ARM64_SW_TTBR0_PAN |
| u64 ttbr0; /* saved TTBR0_EL1 */ |
| #endif |
| + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ |
| union { |
| u64 preempt_count; /* 0 => preemptible, <0 => bug */ |
| struct { |
| @@ -67,6 +68,7 @@ int arch_dup_task_struct(struct task_struct *dst, |
| #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ |
| #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ |
| #define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ |
| +#define TIF_NEED_RESCHED_LAZY 7 |
| #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ |
| #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ |
| #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ |
| @@ -97,8 +99,10 @@ int arch_dup_task_struct(struct task_struct *dst, |
| #define _TIF_SVE (1 << TIF_SVE) |
| #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) |
| #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) |
| +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) |
| |
| -#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ |
| +#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ |
| + _TIF_SIGPENDING | \ |
| _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ |
| _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ |
| _TIF_NOTIFY_SIGNAL) |
| @@ -107,6 +111,8 @@ int arch_dup_task_struct(struct task_struct *dst, |
| _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ |
| _TIF_SYSCALL_EMU) |
| |
| +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) |
| + |
| #ifdef CONFIG_SHADOW_CALL_STACK |
| #define INIT_SCS \ |
| .scs_base = init_shadow_call_stack, \ |
| diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c |
| index 551427ae8cc5..96a4f6c9eb78 100644 |
| --- a/arch/arm64/kernel/asm-offsets.c |
| +++ b/arch/arm64/kernel/asm-offsets.c |
| @@ -31,6 +31,7 @@ int main(void) |
| BLANK(); |
| DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); |
| DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); |
| + DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count)); |
| #ifdef CONFIG_ARM64_SW_TTBR0_PAN |
| DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); |
| #endif |
| diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c |
| index ff4962750b3d..99484e8bbade 100644 |
| --- a/arch/arm64/kernel/fpsimd.c |
| +++ b/arch/arm64/kernel/fpsimd.c |
| @@ -179,10 +179,19 @@ static void __get_cpu_fpsimd_context(void) |
| * |
| * The double-underscore version must only be called if you know the task |
| * can't be preempted. |
| + * |
| + * On RT kernels local_bh_disable() is not sufficient because it only |
| + * serializes soft interrupt related sections via a local lock, but stays |
| + * preemptible. Disabling preemption is the right choice here as bottom |
| + * half processing is always in thread context on RT kernels so it |
| + * implicitly prevents bottom half processing as well. |
| */ |
| static void get_cpu_fpsimd_context(void) |
| { |
| - local_bh_disable(); |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + local_bh_disable(); |
| + else |
| + preempt_disable(); |
| __get_cpu_fpsimd_context(); |
| } |
| |
| @@ -203,7 +212,10 @@ static void __put_cpu_fpsimd_context(void) |
| static void put_cpu_fpsimd_context(void) |
| { |
| __put_cpu_fpsimd_context(); |
| - local_bh_enable(); |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + local_bh_enable(); |
| + else |
| + preempt_enable(); |
| } |
| |
| static bool have_cpu_fpsimd_context(void) |
| @@ -1033,6 +1045,7 @@ void fpsimd_thread_switch(struct task_struct *next) |
| void fpsimd_flush_thread(void) |
| { |
| int vl, supported_vl; |
| + void *sve_state = NULL; |
| |
| if (!system_supports_fpsimd()) |
| return; |
| @@ -1045,7 +1058,10 @@ void fpsimd_flush_thread(void) |
| |
| if (system_supports_sve()) { |
| clear_thread_flag(TIF_SVE); |
| - sve_free(current); |
| + |
| + /* Defer kfree() while in atomic context */ |
| + sve_state = current->thread.sve_state; |
| + current->thread.sve_state = NULL; |
| |
| /* |
| * Reset the task vector length as required. |
| @@ -1079,6 +1095,7 @@ void fpsimd_flush_thread(void) |
| } |
| |
| put_cpu_fpsimd_context(); |
| + kfree(sve_state); |
| } |
| |
| /* |
| diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c |
| index b3e1beccf458..03183563feb8 100644 |
| --- a/arch/arm64/kernel/signal.c |
| +++ b/arch/arm64/kernel/signal.c |
| @@ -922,7 +922,7 @@ static void do_signal(struct pt_regs *regs) |
| void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) |
| { |
| do { |
| - if (thread_flags & _TIF_NEED_RESCHED) { |
| + if (thread_flags & _TIF_NEED_RESCHED_MASK) { |
| /* Unmask Debug and SError for the next task */ |
| local_daif_restore(DAIF_PROCCTX_NOIRQ); |
| |
| @@ -930,6 +930,14 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) |
| } else { |
| local_daif_restore(DAIF_PROCCTX); |
| |
| +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND |
| + if (unlikely(current->forced_info.si_signo)) { |
| + struct task_struct *t = current; |
| + force_sig_info(&t->forced_info); |
| + t->forced_info.si_signo = 0; |
| + } |
| +#endif |
| + |
| if (thread_flags & _TIF_UPROBE) |
| uprobe_notify_resume(regs); |
| |
| diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c |
| index f181527f9d43..0a9a75c236da 100644 |
| --- a/arch/arm64/kvm/arm.c |
| +++ b/arch/arm64/kvm/arm.c |
| @@ -829,7 +829,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) |
| * involves poking the GIC, which must be done in a |
| * non-preemptible context. |
| */ |
| - preempt_disable(); |
| + migrate_disable(); |
| |
| kvm_pmu_flush_hwstate(vcpu); |
| |
| @@ -853,7 +853,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) |
| kvm_timer_sync_user(vcpu); |
| kvm_vgic_sync_hwstate(vcpu); |
| local_irq_enable(); |
| - preempt_enable(); |
| + migrate_enable(); |
| continue; |
| } |
| |
| @@ -922,7 +922,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) |
| /* Exit types that need handling before we can be preempted */ |
| handle_exit_early(vcpu, ret); |
| |
| - preempt_enable(); |
| + migrate_enable(); |
| |
| /* |
| * The ARMv8 architecture doesn't give the hypervisor |
| diff --git a/arch/csky/include/asm/spinlock_types.h b/arch/csky/include/asm/spinlock_types.h |
| index 8ff0f6ff3a00..db87a12c3827 100644 |
| --- a/arch/csky/include/asm/spinlock_types.h |
| +++ b/arch/csky/include/asm/spinlock_types.h |
| @@ -3,7 +3,7 @@ |
| #ifndef __ASM_CSKY_SPINLOCK_TYPES_H |
| #define __ASM_CSKY_SPINLOCK_TYPES_H |
| |
| -#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/arch/hexagon/include/asm/spinlock_types.h b/arch/hexagon/include/asm/spinlock_types.h |
| index 19d233497ba5..d5f66495b670 100644 |
| --- a/arch/hexagon/include/asm/spinlock_types.h |
| +++ b/arch/hexagon/include/asm/spinlock_types.h |
| @@ -8,7 +8,7 @@ |
| #ifndef _ASM_SPINLOCK_TYPES_H |
| #define _ASM_SPINLOCK_TYPES_H |
| |
| -#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h |
| index 6e345fefcdca..14b8a161c165 100644 |
| --- a/arch/ia64/include/asm/spinlock_types.h |
| +++ b/arch/ia64/include/asm/spinlock_types.h |
| @@ -2,7 +2,7 @@ |
| #ifndef _ASM_IA64_SPINLOCK_TYPES_H |
| #define _ASM_IA64_SPINLOCK_TYPES_H |
| |
| -#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig |
| index 27222b75d2a4..5495225807eb 100644 |
| --- a/arch/powerpc/Kconfig |
| +++ b/arch/powerpc/Kconfig |
| @@ -151,6 +151,7 @@ config PPC |
| select ARCH_STACKWALK |
| select ARCH_SUPPORTS_ATOMIC_RMW |
| select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x |
| + select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK |
| select ARCH_USE_BUILTIN_BSWAP |
| select ARCH_USE_CMPXCHG_LOCKREF if PPC64 |
| select ARCH_USE_MEMTEST |
| @@ -218,6 +219,7 @@ config PPC |
| select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) |
| select HAVE_IOREMAP_PROT |
| select HAVE_IRQ_TIME_ACCOUNTING |
| + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM |
| select HAVE_KERNEL_GZIP |
| select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE |
| select HAVE_KERNEL_LZO if DEFAULT_UIMAGE |
| @@ -234,6 +236,7 @@ config PPC |
| select HAVE_PERF_EVENTS_NMI if PPC64 |
| select HAVE_PERF_REGS |
| select HAVE_PERF_USER_STACK_DUMP |
| + select HAVE_PREEMPT_LAZY |
| select HAVE_REGS_AND_STACK_ACCESS_API |
| select HAVE_RELIABLE_STACKTRACE |
| select HAVE_RSEQ |
| diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h |
| index 0f3cdd8faa95..08243338069d 100644 |
| --- a/arch/powerpc/include/asm/simple_spinlock_types.h |
| +++ b/arch/powerpc/include/asm/simple_spinlock_types.h |
| @@ -2,7 +2,7 @@ |
| #ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H |
| #define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H |
| |
| -#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h |
| index 7ef1cd8168a0..f9e63cacd220 100644 |
| --- a/arch/powerpc/include/asm/smp.h |
| +++ b/arch/powerpc/include/asm/smp.h |
| @@ -62,6 +62,7 @@ struct smp_ops_t { |
| |
| extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); |
| extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); |
| +extern void smp_send_debugger_break_cpu(unsigned int cpu); |
| extern void smp_send_debugger_break(void); |
| extern void start_secondary_resume(void); |
| extern void smp_generic_give_timebase(void); |
| diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h |
| index c5d742f18021..d5f8a74ed2e8 100644 |
| --- a/arch/powerpc/include/asm/spinlock_types.h |
| +++ b/arch/powerpc/include/asm/spinlock_types.h |
| @@ -2,7 +2,7 @@ |
| #ifndef _ASM_POWERPC_SPINLOCK_TYPES_H |
| #define _ASM_POWERPC_SPINLOCK_TYPES_H |
| |
| -#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h |
| index 1c8460e23583..b1653c160bab 100644 |
| --- a/arch/powerpc/include/asm/stackprotector.h |
| +++ b/arch/powerpc/include/asm/stackprotector.h |
| @@ -24,7 +24,11 @@ static __always_inline void boot_init_stack_canary(void) |
| unsigned long canary; |
| |
| /* Try to get a semi random initial value. */ |
| +#ifdef CONFIG_PREEMPT_RT |
| + canary = (unsigned long)&canary; |
| +#else |
| canary = get_random_canary(); |
| +#endif |
| canary ^= mftb(); |
| canary ^= LINUX_VERSION_CODE; |
| canary &= CANARY_MASK; |
| diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h |
| index 87013ac2a640..2920ed371188 100644 |
| --- a/arch/powerpc/include/asm/thread_info.h |
| +++ b/arch/powerpc/include/asm/thread_info.h |
| @@ -53,6 +53,8 @@ |
| struct thread_info { |
| int preempt_count; /* 0 => preemptable, |
| <0 => BUG */ |
| + int preempt_lazy_count; /* 0 => preemptable, |
| + <0 => BUG */ |
| unsigned long local_flags; /* private flags for thread */ |
| #ifdef CONFIG_LIVEPATCH |
| unsigned long *livepatch_sp; |
| @@ -99,6 +101,7 @@ void arch_setup_new_exec(void); |
| #define TIF_PATCH_PENDING 6 /* pending live patching update */ |
| #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ |
| #define TIF_SINGLESTEP 8 /* singlestepping active */ |
| +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ |
| #define TIF_SECCOMP 10 /* secure computing */ |
| #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ |
| #define TIF_NOERROR 12 /* Force successful syscall return */ |
| @@ -114,6 +117,7 @@ void arch_setup_new_exec(void); |
| #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ |
| #define TIF_32BIT 20 /* 32 bit binary */ |
| |
| + |
| /* as above, but as bit values */ |
| #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) |
| #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) |
| @@ -125,6 +129,7 @@ void arch_setup_new_exec(void); |
| #define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING) |
| #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) |
| #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) |
| +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) |
| #define _TIF_SECCOMP (1<<TIF_SECCOMP) |
| #define _TIF_RESTOREALL (1<<TIF_RESTOREALL) |
| #define _TIF_NOERROR (1<<TIF_NOERROR) |
| @@ -138,10 +143,12 @@ void arch_setup_new_exec(void); |
| _TIF_SYSCALL_EMU) |
| |
| #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ |
| + _TIF_NEED_RESCHED_LAZY | \ |
| _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ |
| _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \ |
| _TIF_NOTIFY_SIGNAL) |
| #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) |
| +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) |
| |
| /* Bits in local_flags */ |
| /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ |
| diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c |
| index df048e331cbf..a81225f476a7 100644 |
| --- a/arch/powerpc/kernel/interrupt.c |
| +++ b/arch/powerpc/kernel/interrupt.c |
| @@ -346,7 +346,7 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) |
| ti_flags = READ_ONCE(current_thread_info()->flags); |
| while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { |
| local_irq_enable(); |
| - if (ti_flags & _TIF_NEED_RESCHED) { |
| + if (ti_flags & _TIF_NEED_RESCHED_MASK) { |
| schedule(); |
| } else { |
| /* |
| @@ -552,11 +552,15 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) |
| /* Returning to a kernel context with local irqs enabled. */ |
| WARN_ON_ONCE(!(regs->msr & MSR_EE)); |
| again: |
| - if (IS_ENABLED(CONFIG_PREEMPT)) { |
| + if (IS_ENABLED(CONFIG_PREEMPTION)) { |
| /* Return to preemptible kernel context */ |
| if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) { |
| if (preempt_count() == 0) |
| preempt_schedule_irq(); |
| + } else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) { |
| + if ((preempt_count() == 0) && |
| + (current_thread_info()->preempt_lazy_count == 0)) |
| + preempt_schedule_irq(); |
| } |
| } |
| |
| diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c |
| index c4f1d6b7d992..02e17a57da83 100644 |
| --- a/arch/powerpc/kernel/irq.c |
| +++ b/arch/powerpc/kernel/irq.c |
| @@ -690,6 +690,7 @@ static inline void check_stack_overflow(void) |
| } |
| } |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| static __always_inline void call_do_softirq(const void *sp) |
| { |
| /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ |
| @@ -708,6 +709,7 @@ static __always_inline void call_do_softirq(const void *sp) |
| "r11", "r12" |
| ); |
| } |
| +#endif |
| |
| static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) |
| { |
| @@ -820,10 +822,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly; |
| void *softirq_ctx[NR_CPUS] __read_mostly; |
| void *hardirq_ctx[NR_CPUS] __read_mostly; |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| void do_softirq_own_stack(void) |
| { |
| call_do_softirq(softirq_ctx[smp_processor_id()]); |
| } |
| +#endif |
| |
| irq_hw_number_t virq_to_hw(unsigned int virq) |
| { |
| diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c |
| index bdee7262c080..d57d37497862 100644 |
| --- a/arch/powerpc/kernel/kgdb.c |
| +++ b/arch/powerpc/kernel/kgdb.c |
| @@ -120,11 +120,19 @@ int kgdb_skipexception(int exception, struct pt_regs *regs) |
| |
| static int kgdb_debugger_ipi(struct pt_regs *regs) |
| { |
| - kgdb_nmicallback(raw_smp_processor_id(), regs); |
| + int cpu = raw_smp_processor_id(); |
| + |
| + if (!kgdb_roundup_delay(cpu)) |
| + kgdb_nmicallback(cpu, regs); |
| return 0; |
| } |
| |
| #ifdef CONFIG_SMP |
| +void kgdb_roundup_cpu(unsigned int cpu) |
| +{ |
| + smp_send_debugger_break_cpu(cpu); |
| +} |
| + |
| void kgdb_roundup_cpus(void) |
| { |
| smp_send_debugger_break(); |
| diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c |
| index fb95f92dcfac..308765f2e7a0 100644 |
| --- a/arch/powerpc/kernel/smp.c |
| +++ b/arch/powerpc/kernel/smp.c |
| @@ -590,6 +590,11 @@ static void debugger_ipi_callback(struct pt_regs *regs) |
| debugger_ipi(regs); |
| } |
| |
| +void smp_send_debugger_break_cpu(unsigned int cpu) |
| +{ |
| + smp_send_nmi_ipi(cpu, debugger_ipi_callback, 1000000); |
| +} |
| + |
| void smp_send_debugger_break(void) |
| { |
| smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000); |
| diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c |
| index 11741703d26e..7e4e1f489f56 100644 |
| --- a/arch/powerpc/kernel/traps.c |
| +++ b/arch/powerpc/kernel/traps.c |
| @@ -260,12 +260,17 @@ static char *get_mmu_str(void) |
| |
| static int __die(const char *str, struct pt_regs *regs, long err) |
| { |
| + const char *pr = ""; |
| + |
| printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); |
| |
| + if (IS_ENABLED(CONFIG_PREEMPTION)) |
| + pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; |
| + |
| printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", |
| IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", |
| PAGE_SIZE / 1024, get_mmu_str(), |
| - IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", |
| + pr, |
| IS_ENABLED(CONFIG_SMP) ? " SMP" : "", |
| IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", |
| debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", |
| diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c |
| index 22ceeeb705ab..d5359701f787 100644 |
| --- a/arch/powerpc/kexec/crash.c |
| +++ b/arch/powerpc/kexec/crash.c |
| @@ -312,9 +312,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs) |
| unsigned int i; |
| int (*old_handler)(struct pt_regs *regs); |
| |
| - /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ |
| - printk_deferred_enter(); |
| - |
| /* |
| * This function is only called after the system |
| * has panicked or is otherwise in a critical state. |
| diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig |
| index ff581d70f20c..e5c84d55bdfb 100644 |
| --- a/arch/powerpc/kvm/Kconfig |
| +++ b/arch/powerpc/kvm/Kconfig |
| @@ -178,6 +178,7 @@ config KVM_E500MC |
| config KVM_MPIC |
| bool "KVM in-kernel MPIC emulation" |
| depends on KVM && E500 |
| + depends on !PREEMPT_RT |
| select HAVE_KVM_IRQCHIP |
| select HAVE_KVM_IRQFD |
| select HAVE_KVM_IRQ_ROUTING |
| diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c |
| index 8322ca86d5ac..f524145d7dd3 100644 |
| --- a/arch/powerpc/platforms/pseries/iommu.c |
| +++ b/arch/powerpc/platforms/pseries/iommu.c |
| @@ -24,6 +24,7 @@ |
| #include <linux/of.h> |
| #include <linux/iommu.h> |
| #include <linux/rculist.h> |
| +#include <linux/local_lock.h> |
| #include <asm/io.h> |
| #include <asm/prom.h> |
| #include <asm/rtas.h> |
| @@ -195,7 +196,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, |
| return ret; |
| } |
| |
| -static DEFINE_PER_CPU(__be64 *, tce_page); |
| +struct tce_page { |
| + __be64 * page; |
| + local_lock_t lock; |
| +}; |
| +static DEFINE_PER_CPU(struct tce_page, tce_page) = { |
| + .lock = INIT_LOCAL_LOCK(lock), |
| +}; |
| |
| static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, |
| long npages, unsigned long uaddr, |
| @@ -218,9 +225,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, |
| direction, attrs); |
| } |
| |
| - local_irq_save(flags); /* to protect tcep and the page behind it */ |
| + /* to protect tcep and the page behind it */ |
| + local_lock_irqsave(&tce_page.lock, flags); |
| |
| - tcep = __this_cpu_read(tce_page); |
| + tcep = __this_cpu_read(tce_page.page); |
| |
| /* This is safe to do since interrupts are off when we're called |
| * from iommu_alloc{,_sg}() |
| @@ -229,12 +237,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, |
| tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
| /* If allocation fails, fall back to the loop implementation */ |
| if (!tcep) { |
| - local_irq_restore(flags); |
| + local_unlock_irqrestore(&tce_page.lock, flags); |
| return tce_build_pSeriesLP(tbl->it_index, tcenum, |
| tceshift, |
| npages, uaddr, direction, attrs); |
| } |
| - __this_cpu_write(tce_page, tcep); |
| + __this_cpu_write(tce_page.page, tcep); |
| } |
| |
| rpn = __pa(uaddr) >> tceshift; |
| @@ -264,7 +272,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, |
| tcenum += limit; |
| } while (npages > 0 && !rc); |
| |
| - local_irq_restore(flags); |
| + local_unlock_irqrestore(&tce_page.lock, flags); |
| |
| if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { |
| ret = (int)rc; |
| @@ -440,16 +448,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, |
| DMA_BIDIRECTIONAL, 0); |
| } |
| |
| - local_irq_disable(); /* to protect tcep and the page behind it */ |
| - tcep = __this_cpu_read(tce_page); |
| + /* to protect tcep and the page behind it */ |
| + local_lock_irq(&tce_page.lock); |
| + tcep = __this_cpu_read(tce_page.page); |
| |
| if (!tcep) { |
| tcep = (__be64 *)__get_free_page(GFP_ATOMIC); |
| if (!tcep) { |
| - local_irq_enable(); |
| + local_unlock_irq(&tce_page.lock); |
| return -ENOMEM; |
| } |
| - __this_cpu_write(tce_page, tcep); |
| + __this_cpu_write(tce_page.page, tcep); |
| } |
| |
| proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; |
| @@ -492,7 +501,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, |
| |
| /* error cleanup: caller will clear whole range */ |
| |
| - local_irq_enable(); |
| + local_unlock_irq(&tce_page.lock); |
| return rc; |
| } |
| |
| diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h |
| index f398e7638dd6..5a35a49505da 100644 |
| --- a/arch/riscv/include/asm/spinlock_types.h |
| +++ b/arch/riscv/include/asm/spinlock_types.h |
| @@ -6,7 +6,7 @@ |
| #ifndef _ASM_RISCV_SPINLOCK_TYPES_H |
| #define _ASM_RISCV_SPINLOCK_TYPES_H |
| |
| -#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h |
| index a2bbfd7df85f..b69695e39957 100644 |
| --- a/arch/s390/include/asm/spinlock_types.h |
| +++ b/arch/s390/include/asm/spinlock_types.h |
| @@ -2,7 +2,7 @@ |
| #ifndef __ASM_SPINLOCK_TYPES_H |
| #define __ASM_SPINLOCK_TYPES_H |
| |
| -#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h |
| index e82369f286a2..907bda4b1619 100644 |
| --- a/arch/sh/include/asm/spinlock_types.h |
| +++ b/arch/sh/include/asm/spinlock_types.h |
| @@ -2,7 +2,7 @@ |
| #ifndef __ASM_SH_SPINLOCK_TYPES_H |
| #define __ASM_SH_SPINLOCK_TYPES_H |
| |
| -#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c |
| index ef0f0827cf57..2d3eca8fee01 100644 |
| --- a/arch/sh/kernel/irq.c |
| +++ b/arch/sh/kernel/irq.c |
| @@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu) |
| hardirq_ctx[cpu] = NULL; |
| } |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| void do_softirq_own_stack(void) |
| { |
| struct thread_info *curctx; |
| @@ -176,6 +177,7 @@ void do_softirq_own_stack(void) |
| "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" |
| ); |
| } |
| +#endif |
| #else |
| static inline void handle_one_irq(unsigned int irq) |
| { |
| diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c |
| index c8848bb681a1..41fa1be980a3 100644 |
| --- a/arch/sparc/kernel/irq_64.c |
| +++ b/arch/sparc/kernel/irq_64.c |
| @@ -855,6 +855,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) |
| set_irq_regs(old_regs); |
| } |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| void do_softirq_own_stack(void) |
| { |
| void *orig_sp, *sp = softirq_stack[smp_processor_id()]; |
| @@ -869,6 +870,7 @@ void do_softirq_own_stack(void) |
| __asm__ __volatile__("mov %0, %%sp" |
| : : "r" (orig_sp)); |
| } |
| +#endif |
| |
| #ifdef CONFIG_HOTPLUG_CPU |
| void fixup_irqs(void) |
| diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig |
| index 1d0f16b53393..ecf7aed3ba65 100644 |
| --- a/arch/x86/Kconfig |
| +++ b/arch/x86/Kconfig |
| @@ -107,6 +107,7 @@ config X86 |
| select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 |
| select ARCH_SUPPORTS_LTO_CLANG |
| select ARCH_SUPPORTS_LTO_CLANG_THIN |
| + select ARCH_SUPPORTS_RT |
| select ARCH_USE_BUILTIN_BSWAP |
| select ARCH_USE_MEMTEST |
| select ARCH_USE_QUEUED_RWLOCKS |
| @@ -230,6 +231,7 @@ config X86 |
| select HAVE_PCI |
| select HAVE_PERF_REGS |
| select HAVE_PERF_USER_STACK_DUMP |
| + select HAVE_PREEMPT_LAZY |
| select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT |
| select HAVE_POSIX_CPU_TIMERS_TASK_WORK |
| select HAVE_REGS_AND_STACK_ACCESS_API |
| diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h |
| index e087cd7837c3..96cc92f63b06 100644 |
| --- a/arch/x86/include/asm/irq_stack.h |
| +++ b/arch/x86/include/asm/irq_stack.h |
| @@ -202,6 +202,7 @@ |
| IRQ_CONSTRAINTS, regs, vector); \ |
| } |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| /* |
| * Macro to invoke __do_softirq on the irq stack. This is only called from |
| * task context when bottom halves are about to be reenabled and soft |
| @@ -215,6 +216,8 @@ |
| __this_cpu_write(hardirq_stack_inuse, false); \ |
| } |
| |
| +#endif |
| + |
| #else /* CONFIG_X86_64 */ |
| /* System vector handlers always run on the stack they interrupted. */ |
| #define run_sysvec_on_irqstack_cond(func, regs) \ |
| diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h |
| index fe5efbcba824..ab8cb5fc2329 100644 |
| --- a/arch/x86/include/asm/preempt.h |
| +++ b/arch/x86/include/asm/preempt.h |
| @@ -90,17 +90,48 @@ static __always_inline void __preempt_count_sub(int val) |
| * a decrement which hits zero means we have no preempt_count and should |
| * reschedule. |
| */ |
| -static __always_inline bool __preempt_count_dec_and_test(void) |
| +static __always_inline bool ____preempt_count_dec_and_test(void) |
| { |
| return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var])); |
| } |
| |
| +static __always_inline bool __preempt_count_dec_and_test(void) |
| +{ |
| + if (____preempt_count_dec_and_test()) |
| + return true; |
| +#ifdef CONFIG_PREEMPT_LAZY |
| + if (preempt_count()) |
| + return false; |
| + if (current_thread_info()->preempt_lazy_count) |
| + return false; |
| + return test_thread_flag(TIF_NEED_RESCHED_LAZY); |
| +#else |
| + return false; |
| +#endif |
| +} |
| + |
| /* |
| * Returns true when we need to resched and can (barring IRQ state). |
| */ |
| static __always_inline bool should_resched(int preempt_offset) |
| { |
| +#ifdef CONFIG_PREEMPT_LAZY |
| + u32 tmp; |
| + tmp = raw_cpu_read_4(__preempt_count); |
| + if (tmp == preempt_offset) |
| + return true; |
| + |
| + /* preempt count == 0 ? */ |
| + tmp &= ~PREEMPT_NEED_RESCHED; |
| + if (tmp != preempt_offset) |
| + return false; |
| + /* XXX PREEMPT_LOCK_OFFSET */ |
| + if (current_thread_info()->preempt_lazy_count) |
| + return false; |
| + return test_thread_flag(TIF_NEED_RESCHED_LAZY); |
| +#else |
| return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); |
| +#endif |
| } |
| |
| #ifdef CONFIG_PREEMPTION |
| diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h |
| index 2dfb5fea13af..fc03f4f7ed84 100644 |
| --- a/arch/x86/include/asm/signal.h |
| +++ b/arch/x86/include/asm/signal.h |
| @@ -28,6 +28,19 @@ typedef struct { |
| #define SA_IA32_ABI 0x02000000u |
| #define SA_X32_ABI 0x01000000u |
| |
| +/* |
| + * Because some traps use the IST stack, we must keep preemption |
| + * disabled while calling do_trap(), but do_trap() may call |
| + * force_sig_info() which will grab the signal spin_locks for the |
| + * task, which in PREEMPT_RT are mutexes. By defining |
| + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set |
| + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the |
| + * trap. |
| + */ |
| +#if defined(CONFIG_PREEMPT_RT) |
| +#define ARCH_RT_DELAYS_SIGNAL_SEND |
| +#endif |
| + |
| #ifndef CONFIG_COMPAT |
| #define compat_sigset_t compat_sigset_t |
| typedef sigset_t compat_sigset_t; |
| diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h |
| index 24a8d6c4fb18..2fc22c27df18 100644 |
| --- a/arch/x86/include/asm/stackprotector.h |
| +++ b/arch/x86/include/asm/stackprotector.h |
| @@ -50,7 +50,7 @@ |
| */ |
| static __always_inline void boot_init_stack_canary(void) |
| { |
| - u64 canary; |
| + u64 canary = 0; |
| u64 tsc; |
| |
| #ifdef CONFIG_X86_64 |
| @@ -61,8 +61,14 @@ static __always_inline void boot_init_stack_canary(void) |
| * of randomness. The TSC only matters for very early init, |
| * there it already has some randomness on most systems. Later |
| * on during the bootup the random pool has true entropy too. |
| + * For preempt-rt we need to weaken the randomness a bit, as |
| + * we can't call into the random generator from atomic context |
| + * due to locking constraints. We just leave canary |
| + * uninitialized and use the TSC based randomness on top of it. |
| */ |
| +#ifndef CONFIG_PREEMPT_RT |
| get_random_bytes(&canary, sizeof(canary)); |
| +#endif |
| tsc = rdtsc(); |
| canary += tsc + (tsc << 32UL); |
| canary &= CANARY_MASK; |
| diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h |
| index cf132663c219..75dc786e6365 100644 |
| --- a/arch/x86/include/asm/thread_info.h |
| +++ b/arch/x86/include/asm/thread_info.h |
| @@ -57,11 +57,14 @@ struct thread_info { |
| unsigned long flags; /* low level flags */ |
| unsigned long syscall_work; /* SYSCALL_WORK_ flags */ |
| u32 status; /* thread synchronous flags */ |
| + int preempt_lazy_count; /* 0 => lazy preemptable |
| + <0 => BUG */ |
| }; |
| |
| #define INIT_THREAD_INFO(tsk) \ |
| { \ |
| .flags = 0, \ |
| + .preempt_lazy_count = 0, \ |
| } |
| |
| #else /* !__ASSEMBLY__ */ |
| @@ -90,6 +93,7 @@ struct thread_info { |
| #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
| #define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */ |
| #define TIF_SLD 18 /* Restore split lock detection on context switch */ |
| +#define TIF_NEED_RESCHED_LAZY 19 /* lazy rescheduling necessary */ |
| #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ |
| #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ |
| #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ |
| @@ -114,6 +118,7 @@ struct thread_info { |
| #define _TIF_NOTSC (1 << TIF_NOTSC) |
| #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) |
| #define _TIF_SLD (1 << TIF_SLD) |
| +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) |
| #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) |
| #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) |
| #define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) |
| diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c |
| index 722fd712e1cf..82cc3a7be6bd 100644 |
| --- a/arch/x86/kernel/dumpstack_32.c |
| +++ b/arch/x86/kernel/dumpstack_32.c |
| @@ -141,7 +141,7 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, |
| */ |
| if (visit_mask) { |
| if (*visit_mask & (1UL << info->type)) { |
| - printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type); |
| + pr_warn_once("WARNING: stack recursion on stack type %d\n", info->type); |
| goto unknown; |
| } |
| *visit_mask |= 1UL << info->type; |
| diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c |
| index 6c5defd6569a..5f725b0ceb29 100644 |
| --- a/arch/x86/kernel/dumpstack_64.c |
| +++ b/arch/x86/kernel/dumpstack_64.c |
| @@ -207,7 +207,8 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, |
| if (visit_mask) { |
| if (*visit_mask & (1UL << info->type)) { |
| if (task == current) |
| - printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type); |
| + pr_warn_once("WARNING: stack recursion on stack type %d\n", |
| + info->type); |
| goto unknown; |
| } |
| *visit_mask |= 1UL << info->type; |
| diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c |
| index 15aefa3f3e18..52af9a89ad47 100644 |
| --- a/arch/x86/kernel/i8259.c |
| +++ b/arch/x86/kernel/i8259.c |
| @@ -207,8 +207,7 @@ static void mask_and_ack_8259A(struct irq_data *data) |
| * lets ACK and report it. [once per IRQ] |
| */ |
| if (!(spurious_irq_mask & irqmask)) { |
| - printk_deferred(KERN_DEBUG |
| - "spurious 8259A interrupt: IRQ%d.\n", irq); |
| + printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); |
| spurious_irq_mask |= irqmask; |
| } |
| atomic_inc(&irq_err_count); |
| diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c |
| index 044902d5a3c4..e5dd6da78713 100644 |
| --- a/arch/x86/kernel/irq_32.c |
| +++ b/arch/x86/kernel/irq_32.c |
| @@ -132,6 +132,7 @@ int irq_init_percpu_irqstack(unsigned int cpu) |
| return 0; |
| } |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| void do_softirq_own_stack(void) |
| { |
| struct irq_stack *irqstk; |
| @@ -148,6 +149,7 @@ void do_softirq_own_stack(void) |
| |
| call_on_stack(__do_softirq, isp); |
| } |
| +#endif |
| |
| void __handle_irq(struct irq_desc *desc, struct pt_regs *regs) |
| { |
| diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c |
| index 3a43a2dee658..37bd37cdf2b6 100644 |
| --- a/arch/x86/kernel/kgdb.c |
| +++ b/arch/x86/kernel/kgdb.c |
| @@ -502,9 +502,12 @@ static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs) |
| if (atomic_read(&kgdb_active) != -1) { |
| /* KGDB CPU roundup */ |
| cpu = raw_smp_processor_id(); |
| - kgdb_nmicallback(cpu, regs); |
| - set_bit(cpu, was_in_debug_nmi); |
| - touch_nmi_watchdog(); |
| + |
| + if (!kgdb_roundup_delay(cpu)) { |
| + kgdb_nmicallback(cpu, regs); |
| + set_bit(cpu, was_in_debug_nmi); |
| + touch_nmi_watchdog(); |
| + } |
| |
| return NMI_HANDLED; |
| } |
| diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c |
| index d7c44b257f7f..2d0361cd304f 100644 |
| --- a/arch/x86/kernel/unwind_frame.c |
| +++ b/arch/x86/kernel/unwind_frame.c |
| @@ -41,9 +41,9 @@ static void unwind_dump(struct unwind_state *state) |
| |
| dumped_before = true; |
| |
| - printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n", |
| - state->stack_info.type, state->stack_info.next_sp, |
| - state->stack_mask, state->graph_idx); |
| + printk("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n", |
| + state->stack_info.type, state->stack_info.next_sp, |
| + state->stack_mask, state->graph_idx); |
| |
| for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp; |
| sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { |
| @@ -59,13 +59,11 @@ static void unwind_dump(struct unwind_state *state) |
| |
| if (zero) { |
| if (!prev_zero) |
| - printk_deferred("%p: %0*x ...\n", |
| - sp, BITS_PER_LONG/4, 0); |
| + printk("%p: %0*x ...\n", sp, BITS_PER_LONG/4, 0); |
| continue; |
| } |
| |
| - printk_deferred("%p: %0*lx (%pB)\n", |
| - sp, BITS_PER_LONG/4, word, (void *)word); |
| + printk("%p: %0*lx (%pB)\n", sp, BITS_PER_LONG/4, word, (void *)word); |
| } |
| } |
| } |
| @@ -342,13 +340,13 @@ bool unwind_next_frame(struct unwind_state *state) |
| goto the_end; |
| |
| if (state->regs) { |
| - printk_deferred_once(KERN_WARNING |
| + pr_warn_once( |
| "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", |
| state->regs, state->task->comm, |
| state->task->pid, next_bp); |
| unwind_dump(state); |
| } else { |
| - printk_deferred_once(KERN_WARNING |
| + pr_warn_once( |
| "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n", |
| state->bp, state->task->comm, |
| state->task->pid, next_bp); |
| diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c |
| index a1202536fc57..a26a7c3849f5 100644 |
| --- a/arch/x86/kernel/unwind_orc.c |
| +++ b/arch/x86/kernel/unwind_orc.c |
| @@ -9,7 +9,7 @@ |
| #include <asm/orc_lookup.h> |
| |
| #define orc_warn(fmt, ...) \ |
| - printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__) |
| + pr_warn_once("WARNING: " fmt, ##__VA_ARGS__) |
| |
| #define orc_warn_current(args...) \ |
| ({ \ |
| diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c |
| index 8974884ef2ad..ae033707f278 100644 |
| --- a/arch/x86/kvm/x86.c |
| +++ b/arch/x86/kvm/x86.c |
| @@ -8550,6 +8550,14 @@ int kvm_arch_init(void *opaque) |
| goto out; |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { |
| + pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n"); |
| + r = -EOPNOTSUPP; |
| + goto out; |
| + } |
| +#endif |
| + |
| r = -ENOMEM; |
| x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu), |
| __alignof__(struct fpu), SLAB_ACCOUNT, |
| diff --git a/arch/xtensa/include/asm/spinlock_types.h b/arch/xtensa/include/asm/spinlock_types.h |
| index 64c9389254f1..797aed7df3dd 100644 |
| --- a/arch/xtensa/include/asm/spinlock_types.h |
| +++ b/arch/xtensa/include/asm/spinlock_types.h |
| @@ -2,7 +2,7 @@ |
| #ifndef __ASM_SPINLOCK_TYPES_H |
| #define __ASM_SPINLOCK_TYPES_H |
| |
| -#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H) |
| +#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H) |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/block/blk-mq.c b/block/blk-mq.c |
| index 95993c4efa49..2f173fea818c 100644 |
| --- a/block/blk-mq.c |
| +++ b/block/blk-mq.c |
| @@ -1565,14 +1565,14 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, |
| return; |
| |
| if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { |
| - int cpu = get_cpu(); |
| + int cpu = get_cpu_light(); |
| if (cpumask_test_cpu(cpu, hctx->cpumask)) { |
| __blk_mq_run_hw_queue(hctx); |
| - put_cpu(); |
| + put_cpu_light(); |
| return; |
| } |
| |
| - put_cpu(); |
| + put_cpu_light(); |
| } |
| |
| kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, |
| diff --git a/crypto/testmgr.c b/crypto/testmgr.c |
| index 70f69f0910c9..58eee8eab4bf 100644 |
| --- a/crypto/testmgr.c |
| +++ b/crypto/testmgr.c |
| @@ -1061,14 +1061,14 @@ static void generate_random_testvec_config(struct testvec_config *cfg, |
| |
| static void crypto_disable_simd_for_test(void) |
| { |
| - preempt_disable(); |
| + migrate_disable(); |
| __this_cpu_write(crypto_simd_disabled_for_test, true); |
| } |
| |
| static void crypto_reenable_simd_for_test(void) |
| { |
| __this_cpu_write(crypto_simd_disabled_for_test, false); |
| - preempt_enable(); |
| + migrate_enable(); |
| } |
| |
| /* |
| diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c |
| index 6383c81ac5b3..abb695f5f5e4 100644 |
| --- a/drivers/block/zram/zram_drv.c |
| +++ b/drivers/block/zram/zram_drv.c |
| @@ -59,6 +59,40 @@ static void zram_free_page(struct zram *zram, size_t index); |
| static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, |
| u32 index, int offset, struct bio *bio); |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) |
| +{ |
| + size_t index; |
| + |
| + for (index = 0; index < num_pages; index++) |
| + spin_lock_init(&zram->table[index].lock); |
| +} |
| + |
| +static int zram_slot_trylock(struct zram *zram, u32 index) |
| +{ |
| + int ret; |
| + |
| + ret = spin_trylock(&zram->table[index].lock); |
| + if (ret) |
| + __set_bit(ZRAM_LOCK, &zram->table[index].flags); |
| + return ret; |
| +} |
| + |
| +static void zram_slot_lock(struct zram *zram, u32 index) |
| +{ |
| + spin_lock(&zram->table[index].lock); |
| + __set_bit(ZRAM_LOCK, &zram->table[index].flags); |
| +} |
| + |
| +static void zram_slot_unlock(struct zram *zram, u32 index) |
| +{ |
| + __clear_bit(ZRAM_LOCK, &zram->table[index].flags); |
| + spin_unlock(&zram->table[index].lock); |
| +} |
| + |
| +#else |
| + |
| +static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { } |
| |
| static int zram_slot_trylock(struct zram *zram, u32 index) |
| { |
| @@ -74,6 +108,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index) |
| { |
| bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); |
| } |
| +#endif |
| |
| static inline bool init_done(struct zram *zram) |
| { |
| @@ -1169,6 +1204,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) |
| |
| if (!huge_class_size) |
| huge_class_size = zs_huge_class_size(zram->mem_pool); |
| + zram_meta_init_table_locks(zram, num_pages); |
| return true; |
| } |
| |
| diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h |
| index 80c3b43b4828..d8f6d880f915 100644 |
| --- a/drivers/block/zram/zram_drv.h |
| +++ b/drivers/block/zram/zram_drv.h |
| @@ -63,6 +63,7 @@ struct zram_table_entry { |
| unsigned long element; |
| }; |
| unsigned long flags; |
| + spinlock_t lock; |
| #ifdef CONFIG_ZRAM_MEMORY_TRACKING |
| ktime_t ac_time; |
| #endif |
| diff --git a/drivers/char/random.c b/drivers/char/random.c |
| index 7bd6eb15d432..68e94c265ada 100644 |
| --- a/drivers/char/random.c |
| +++ b/drivers/char/random.c |
| @@ -183,7 +183,7 @@ static void __cold process_random_ready_list(void) |
| |
| #define warn_unseeded_randomness() \ |
| if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \ |
| - printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \ |
| + pr_notice("random: %s called from %pS with crng_init=%d\n", \ |
| __func__, (void *)_RET_IP_, crng_init) |
| |
| |
| diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c |
| index d3f2e5364c27..9c4a99757afd 100644 |
| --- a/drivers/char/tpm/tpm_tis.c |
| +++ b/drivers/char/tpm/tpm_tis.c |
| @@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da |
| return container_of(data, struct tpm_tis_tcg_phy, priv); |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +/* |
| + * Flushes previous write operations to chip so that a subsequent |
| + * ioread*()s won't stall a cpu. |
| + */ |
| +static inline void tpm_tis_flush(void __iomem *iobase) |
| +{ |
| + ioread8(iobase + TPM_ACCESS(0)); |
| +} |
| +#else |
| +#define tpm_tis_flush(iobase) do { } while (0) |
| +#endif |
| + |
| +static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr) |
| +{ |
| + iowrite8(b, iobase + addr); |
| + tpm_tis_flush(iobase); |
| +} |
| + |
| +static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr) |
| +{ |
| + iowrite32(b, iobase + addr); |
| + tpm_tis_flush(iobase); |
| +} |
| + |
| static int interrupts = -1; |
| module_param(interrupts, int, 0444); |
| MODULE_PARM_DESC(interrupts, "Enable interrupts"); |
| @@ -169,7 +194,7 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len, |
| struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); |
| |
| while (len--) |
| - iowrite8(*value++, phy->iobase + addr); |
| + tpm_tis_iowrite8(*value++, phy->iobase, addr); |
| |
| return 0; |
| } |
| @@ -196,7 +221,7 @@ static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value) |
| { |
| struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); |
| |
| - iowrite32(value, phy->iobase + addr); |
| + tpm_tis_iowrite32(value, phy->iobase, addr); |
| |
| return 0; |
| } |
| diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c |
| index e3df82d5d37a..5502e176d51b 100644 |
| --- a/drivers/firmware/efi/efi.c |
| +++ b/drivers/firmware/efi/efi.c |
| @@ -66,7 +66,7 @@ struct mm_struct efi_mm = { |
| |
| struct workqueue_struct *efi_rts_wq; |
| |
| -static bool disable_runtime; |
| +static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT); |
| static int __init setup_noefi(char *arg) |
| { |
| disable_runtime = true; |
| @@ -97,6 +97,9 @@ static int __init parse_efi_cmdline(char *str) |
| if (parse_option_str(str, "noruntime")) |
| disable_runtime = true; |
| |
| + if (parse_option_str(str, "runtime")) |
| + disable_runtime = false; |
| + |
| if (parse_option_str(str, "nosoftreserve")) |
| set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags); |
| |
| diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c |
| index 254e67141a77..7a39029b083f 100644 |
| --- a/drivers/gpu/drm/i915/display/intel_crtc.c |
| +++ b/drivers/gpu/drm/i915/display/intel_crtc.c |
| @@ -425,7 +425,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) |
| */ |
| intel_psr_wait_for_idle(new_crtc_state); |
| |
| - local_irq_disable(); |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + local_irq_disable(); |
| |
| crtc->debug.min_vbl = min; |
| crtc->debug.max_vbl = max; |
| @@ -450,11 +451,13 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) |
| break; |
| } |
| |
| - local_irq_enable(); |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + local_irq_enable(); |
| |
| timeout = schedule_timeout(timeout); |
| |
| - local_irq_disable(); |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + local_irq_disable(); |
| } |
| |
| finish_wait(wq, &wait); |
| @@ -487,7 +490,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) |
| return; |
| |
| irq_disable: |
| - local_irq_disable(); |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + local_irq_disable(); |
| } |
| |
| #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE) |
| @@ -566,7 +570,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) |
| new_crtc_state->uapi.event = NULL; |
| } |
| |
| - local_irq_enable(); |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + local_irq_enable(); |
| |
| /* Send VRR Push to terminate Vblank */ |
| intel_vrr_send_push(new_crtc_state); |
| diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c |
| index 209cf265bf74..6e1b9068d944 100644 |
| --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c |
| +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c |
| @@ -311,10 +311,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) |
| /* Kick the work once more to drain the signalers, and disarm the irq */ |
| irq_work_sync(&b->irq_work); |
| while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { |
| - local_irq_disable(); |
| - signal_irq_work(&b->irq_work); |
| - local_irq_enable(); |
| + irq_work_queue(&b->irq_work); |
| cond_resched(); |
| + irq_work_sync(&b->irq_work); |
| } |
| } |
| |
| diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h |
| index c41098950746..601274ba86e4 100644 |
| --- a/drivers/gpu/drm/i915/gt/intel_context.h |
| +++ b/drivers/gpu/drm/i915/gt/intel_context.h |
| @@ -163,7 +163,8 @@ static inline void intel_context_enter(struct intel_context *ce) |
| |
| static inline void intel_context_mark_active(struct intel_context *ce) |
| { |
| - lockdep_assert_held(&ce->timeline->mutex); |
| + lockdep_assert(lockdep_is_held(&ce->timeline->mutex) || |
| + test_bit(CONTEXT_IS_PARKED, &ce->flags)); |
| ++ce->active_count; |
| } |
| |
| diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h |
| index a63631ea0ec4..314457fb9db5 100644 |
| --- a/drivers/gpu/drm/i915/gt/intel_context_types.h |
| +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h |
| @@ -112,6 +112,7 @@ struct intel_context { |
| #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 |
| #define CONTEXT_NOPREEMPT 8 |
| #define CONTEXT_LRCA_DIRTY 9 |
| +#define CONTEXT_IS_PARKED 10 |
| |
| struct { |
| u64 timeout_us; |
| diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c |
| index dacd62773735..73e96ca024df 100644 |
| --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c |
| +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c |
| @@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_wakeref *wf) |
| return 0; |
| } |
| |
| -#if IS_ENABLED(CONFIG_LOCKDEP) |
| - |
| -static unsigned long __timeline_mark_lock(struct intel_context *ce) |
| -{ |
| - unsigned long flags; |
| - |
| - local_irq_save(flags); |
| - mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); |
| - |
| - return flags; |
| -} |
| - |
| -static void __timeline_mark_unlock(struct intel_context *ce, |
| - unsigned long flags) |
| -{ |
| - mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); |
| - local_irq_restore(flags); |
| -} |
| - |
| -#else |
| - |
| -static unsigned long __timeline_mark_lock(struct intel_context *ce) |
| -{ |
| - return 0; |
| -} |
| - |
| -static void __timeline_mark_unlock(struct intel_context *ce, |
| - unsigned long flags) |
| -{ |
| -} |
| - |
| -#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ |
| - |
| static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) |
| { |
| struct i915_request *rq = to_request(fence); |
| @@ -159,7 +126,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) |
| { |
| struct intel_context *ce = engine->kernel_context; |
| struct i915_request *rq; |
| - unsigned long flags; |
| bool result = true; |
| |
| /* GPU is pointing to the void, as good as in the kernel context. */ |
| @@ -201,7 +167,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) |
| * engine->wakeref.count, we may see the request completion and retire |
| * it causing an underflow of the engine->wakeref. |
| */ |
| - flags = __timeline_mark_lock(ce); |
| + set_bit(CONTEXT_IS_PARKED, &ce->flags); |
| GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); |
| |
| rq = __i915_request_create(ce, GFP_NOWAIT); |
| @@ -233,7 +199,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) |
| |
| result = false; |
| out_unlock: |
| - __timeline_mark_unlock(ce, flags); |
| + clear_bit(CONTEXT_IS_PARKED, &ce->flags); |
| return result; |
| } |
| |
| diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c |
| index 416f5e0657f0..c5b0c99e60c9 100644 |
| --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c |
| +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c |
| @@ -1283,7 +1283,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) |
| * and context switches) submission. |
| */ |
| |
| - spin_lock(&sched_engine->lock); |
| + spin_lock_irq(&sched_engine->lock); |
| |
| /* |
| * If the queue is higher priority than the last |
| @@ -1383,7 +1383,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) |
| * Even if ELSP[1] is occupied and not worthy |
| * of timeslices, our queue might be. |
| */ |
| - spin_unlock(&sched_engine->lock); |
| + spin_unlock_irq(&sched_engine->lock); |
| return; |
| } |
| } |
| @@ -1409,7 +1409,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) |
| |
| if (last && !can_merge_rq(last, rq)) { |
| spin_unlock(&ve->base.sched_engine->lock); |
| - spin_unlock(&engine->sched_engine->lock); |
| + spin_unlock_irq(&engine->sched_engine->lock); |
| return; /* leave this for another sibling */ |
| } |
| |
| @@ -1571,7 +1571,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) |
| */ |
| sched_engine->queue_priority_hint = queue_prio(sched_engine); |
| i915_sched_engine_reset_on_empty(sched_engine); |
| - spin_unlock(&sched_engine->lock); |
| + spin_unlock_irq(&sched_engine->lock); |
| |
| /* |
| * We can skip poking the HW if we ended up with exactly the same set |
| @@ -1597,13 +1597,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) |
| } |
| } |
| |
| -static void execlists_dequeue_irq(struct intel_engine_cs *engine) |
| -{ |
| - local_irq_disable(); /* Suspend interrupts across request submission */ |
| - execlists_dequeue(engine); |
| - local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ |
| -} |
| - |
| static void clear_ports(struct i915_request **ports, int count) |
| { |
| memset_p((void **)ports, NULL, count); |
| @@ -2423,7 +2416,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t) |
| } |
| |
| if (!engine->execlists.pending[0]) { |
| - execlists_dequeue_irq(engine); |
| + execlists_dequeue(engine); |
| start_timeslice(engine); |
| } |
| |
| diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c |
| index 9bc4f4a8e12e..547347241a47 100644 |
| --- a/drivers/gpu/drm/i915/i915_irq.c |
| +++ b/drivers/gpu/drm/i915/i915_irq.c |
| @@ -886,7 +886,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, |
| */ |
| spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); |
| |
| - /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + preempt_disable(); |
| |
| /* Get optional system timestamp before query. */ |
| if (stime) |
| @@ -950,7 +951,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, |
| if (etime) |
| *etime = ktime_get(); |
| |
| - /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + preempt_enable(); |
| |
| spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); |
| |
| diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c |
| index 79da5eca60af..b9dd6100c6d1 100644 |
| --- a/drivers/gpu/drm/i915/i915_request.c |
| +++ b/drivers/gpu/drm/i915/i915_request.c |
| @@ -559,7 +559,6 @@ bool __i915_request_submit(struct i915_request *request) |
| |
| RQ_TRACE(request, "\n"); |
| |
| - GEM_BUG_ON(!irqs_disabled()); |
| lockdep_assert_held(&engine->sched_engine->lock); |
| |
| /* |
| @@ -668,7 +667,6 @@ void __i915_request_unsubmit(struct i915_request *request) |
| */ |
| RQ_TRACE(request, "\n"); |
| |
| - GEM_BUG_ON(!irqs_disabled()); |
| lockdep_assert_held(&engine->sched_engine->lock); |
| |
| /* |
| diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h |
| index 1bc1349ba3c2..a2f713b4ac2f 100644 |
| --- a/drivers/gpu/drm/i915/i915_request.h |
| +++ b/drivers/gpu/drm/i915/i915_request.h |
| @@ -609,7 +609,8 @@ i915_request_timeline(const struct i915_request *rq) |
| { |
| /* Valid only while the request is being constructed (or retired). */ |
| return rcu_dereference_protected(rq->timeline, |
| - lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex)); |
| + lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) || |
| + test_bit(CONTEXT_IS_PARKED, &rq->context->flags)); |
| } |
| |
| static inline struct i915_gem_context * |
| diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h |
| index 63fec1c3c132..f345a0f12bf6 100644 |
| --- a/drivers/gpu/drm/i915/i915_trace.h |
| +++ b/drivers/gpu/drm/i915/i915_trace.h |
| @@ -2,6 +2,10 @@ |
| #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) |
| #define _I915_TRACE_H_ |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +#define NOTRACE |
| +#endif |
| + |
| #include <linux/stringify.h> |
| #include <linux/types.h> |
| #include <linux/tracepoint.h> |
| @@ -819,7 +823,7 @@ DEFINE_EVENT(i915_request, i915_request_add, |
| TP_ARGS(rq) |
| ); |
| |
| -#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) |
| +#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE) |
| DEFINE_EVENT(i915_request, i915_request_guc_submit, |
| TP_PROTO(struct i915_request *rq), |
| TP_ARGS(rq) |
| diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h |
| index 5259edacde38..b36b27c09049 100644 |
| --- a/drivers/gpu/drm/i915/i915_utils.h |
| +++ b/drivers/gpu/drm/i915/i915_utils.h |
| @@ -343,7 +343,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) |
| #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) |
| |
| /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ |
| -#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) |
| +#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) |
| # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) |
| #else |
| # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) |
| diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c |
| index 1cf68f85b2e1..8ccf0c928bb4 100644 |
| --- a/drivers/i2c/busses/i2c-cht-wc.c |
| +++ b/drivers/i2c/busses/i2c-cht-wc.c |
| @@ -99,15 +99,8 @@ static irqreturn_t cht_wc_i2c_adap_thread_handler(int id, void *data) |
| * interrupt handler as well, so running the client irq handler from |
| * this thread will cause things to lock up. |
| */ |
| - if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ) { |
| - /* |
| - * generic_handle_irq expects local IRQs to be disabled |
| - * as normally it is called from interrupt context. |
| - */ |
| - local_irq_disable(); |
| - generic_handle_irq(adap->client_irq); |
| - local_irq_enable(); |
| - } |
| + if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ) |
| + generic_handle_irq_safe(adap->client_irq); |
| |
| return IRQ_HANDLED; |
| } |
| diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c |
| index cfbef70e8ba7..cded25be1f55 100644 |
| --- a/drivers/i2c/i2c-core-base.c |
| +++ b/drivers/i2c/i2c-core-base.c |
| @@ -1422,7 +1422,7 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr) |
| if (irq <= 0) |
| return -ENXIO; |
| |
| - generic_handle_irq(irq); |
| + generic_handle_irq_safe(irq); |
| |
| return 0; |
| } |
| diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig |
| index 1f1d57288085..dc6816d36d06 100644 |
| --- a/drivers/leds/trigger/Kconfig |
| +++ b/drivers/leds/trigger/Kconfig |
| @@ -64,6 +64,7 @@ config LEDS_TRIGGER_BACKLIGHT |
| |
| config LEDS_TRIGGER_CPU |
| bool "LED CPU Trigger" |
| + depends on !PREEMPT_RT |
| help |
| This allows LEDs to be controlled by active CPUs. This shows |
| the active CPUs across an array of LEDs so you can see which |
| diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c |
| index b58984ddca13..3f187b4e8f23 100644 |
| --- a/drivers/md/raid5.c |
| +++ b/drivers/md/raid5.c |
| @@ -2217,8 +2217,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) |
| struct raid5_percpu *percpu; |
| unsigned long cpu; |
| |
| - cpu = get_cpu(); |
| + cpu = get_cpu_light(); |
| percpu = per_cpu_ptr(conf->percpu, cpu); |
| + spin_lock(&percpu->lock); |
| if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { |
| ops_run_biofill(sh); |
| overlap_clear++; |
| @@ -2277,7 +2278,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) |
| if (test_and_clear_bit(R5_Overlap, &dev->flags)) |
| wake_up(&sh->raid_conf->wait_for_overlap); |
| } |
| - put_cpu(); |
| + spin_unlock(&percpu->lock); |
| + put_cpu_light(); |
| } |
| |
| static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) |
| @@ -7099,6 +7101,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) |
| __func__, cpu); |
| return -ENOMEM; |
| } |
| + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock); |
| return 0; |
| } |
| |
| diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h |
| index 5c05acf20e1f..665fe138ab4f 100644 |
| --- a/drivers/md/raid5.h |
| +++ b/drivers/md/raid5.h |
| @@ -635,6 +635,7 @@ struct r5conf { |
| int recovery_disabled; |
| /* per cpu variables */ |
| struct raid5_percpu { |
| + spinlock_t lock; /* Protection for -RT */ |
| struct page *spare_page; /* Used when checking P/Q in raid6 */ |
| void *scribble; /* space for constructing buffer |
| * lists and performing address |
| diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c |
| index 70fa18b04ad2..b14d3f98e1eb 100644 |
| --- a/drivers/mfd/ezx-pcap.c |
| +++ b/drivers/mfd/ezx-pcap.c |
| @@ -193,13 +193,11 @@ static void pcap_isr_work(struct work_struct *work) |
| ezx_pcap_write(pcap, PCAP_REG_MSR, isr | msr); |
| ezx_pcap_write(pcap, PCAP_REG_ISR, isr); |
| |
| - local_irq_disable(); |
| service = isr & ~msr; |
| for (irq = pcap->irq_base; service; service >>= 1, irq++) { |
| if (service & 1) |
| - generic_handle_irq(irq); |
| + generic_handle_irq_safe(irq); |
| } |
| - local_irq_enable(); |
| ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr); |
| } while (gpio_get_value(pdata->gpio)); |
| } |
| diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c |
| index 08535e97ff43..0585a5821d05 100644 |
| --- a/drivers/misc/hi6421v600-irq.c |
| +++ b/drivers/misc/hi6421v600-irq.c |
| @@ -118,8 +118,8 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv) |
| * If both powerkey down and up IRQs are received, |
| * handle them at the right order |
| */ |
| - generic_handle_irq(priv->irqs[POWERKEY_DOWN]); |
| - generic_handle_irq(priv->irqs[POWERKEY_UP]); |
| + generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]); |
| + generic_handle_irq_safe(priv->irqs[POWERKEY_UP]); |
| pending &= ~HISI_IRQ_POWERKEY_UP_DOWN; |
| } |
| |
| @@ -127,7 +127,7 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv) |
| continue; |
| |
| for_each_set_bit(offset, &pending, BITS_PER_BYTE) { |
| - generic_handle_irq(priv->irqs[offset + i * BITS_PER_BYTE]); |
| + generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]); |
| } |
| } |
| |
| diff --git a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c |
| index 2473fb5f75e5..2a5cc64227e9 100644 |
| --- a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c |
| +++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c |
| @@ -458,7 +458,7 @@ nfp_abm_qdisc_graft(struct nfp_abm_link *alink, u32 handle, u32 child_handle, |
| static void |
| nfp_abm_stats_calculate(struct nfp_alink_stats *new, |
| struct nfp_alink_stats *old, |
| - struct gnet_stats_basic_packed *bstats, |
| + struct gnet_stats_basic_sync *bstats, |
| struct gnet_stats_queue *qstats) |
| { |
| _bstats_update(bstats, new->tx_bytes - old->tx_bytes, |
| diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c |
| index 3e1a83a22fdd..bce0a6bd46a7 100644 |
| --- a/drivers/net/usb/lan78xx.c |
| +++ b/drivers/net/usb/lan78xx.c |
| @@ -1367,11 +1367,8 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb) |
| netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata); |
| lan78xx_defer_kevent(dev, EVENT_LINK_RESET); |
| |
| - if (dev->domain_data.phyirq > 0) { |
| - local_irq_disable(); |
| - generic_handle_irq(dev->domain_data.phyirq); |
| - local_irq_enable(); |
| - } |
| + if (dev->domain_data.phyirq > 0) |
| + generic_handle_irq_safe(dev->domain_data.phyirq); |
| } else { |
| netdev_warn(dev->net, |
| "unexpected interrupt: 0x%08x\n", intdata); |
| diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c |
| index 5ae6c207d3ac..660908027dc5 100644 |
| --- a/drivers/scsi/fcoe/fcoe.c |
| +++ b/drivers/scsi/fcoe/fcoe.c |
| @@ -1450,11 +1450,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev, |
| static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) |
| { |
| struct fcoe_percpu_s *fps; |
| - int rc; |
| + int rc, cpu = get_cpu_light(); |
| |
| - fps = &get_cpu_var(fcoe_percpu); |
| + fps = &per_cpu(fcoe_percpu, cpu); |
| rc = fcoe_get_paged_crc_eof(skb, tlen, fps); |
| - put_cpu_var(fcoe_percpu); |
| + put_cpu_light(); |
| |
| return rc; |
| } |
| @@ -1639,11 +1639,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport, |
| return 0; |
| } |
| |
| - stats = per_cpu_ptr(lport->stats, get_cpu()); |
| + stats = per_cpu_ptr(lport->stats, get_cpu_light()); |
| stats->InvalidCRCCount++; |
| if (stats->InvalidCRCCount < 5) |
| printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); |
| - put_cpu(); |
| + put_cpu_light(); |
| return -EINVAL; |
| } |
| |
| @@ -1684,7 +1684,7 @@ static void fcoe_recv_frame(struct sk_buff *skb) |
| */ |
| hp = (struct fcoe_hdr *) skb_network_header(skb); |
| |
| - stats = per_cpu_ptr(lport->stats, get_cpu()); |
| + stats = per_cpu_ptr(lport->stats, get_cpu_light()); |
| if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { |
| if (stats->ErrorFrames < 5) |
| printk(KERN_WARNING "fcoe: FCoE version " |
| @@ -1716,13 +1716,13 @@ static void fcoe_recv_frame(struct sk_buff *skb) |
| goto drop; |
| |
| if (!fcoe_filter_frames(lport, fp)) { |
| - put_cpu(); |
| + put_cpu_light(); |
| fc_exch_recv(lport, fp); |
| return; |
| } |
| drop: |
| stats->ErrorFrames++; |
| - put_cpu(); |
| + put_cpu_light(); |
| kfree_skb(skb); |
| } |
| |
| diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c |
| index 558f3f4e1859..f08feaa4f398 100644 |
| --- a/drivers/scsi/fcoe/fcoe_ctlr.c |
| +++ b/drivers/scsi/fcoe/fcoe_ctlr.c |
| @@ -828,7 +828,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) |
| |
| INIT_LIST_HEAD(&del_list); |
| |
| - stats = per_cpu_ptr(fip->lp->stats, get_cpu()); |
| + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light()); |
| |
| list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { |
| deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; |
| @@ -864,7 +864,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) |
| sel_time = fcf->time; |
| } |
| } |
| - put_cpu(); |
| + put_cpu_light(); |
| |
| list_for_each_entry_safe(fcf, next, &del_list, list) { |
| /* Removes fcf from current list */ |
| diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c |
| index aa223db4cf53..0ceb93800704 100644 |
| --- a/drivers/scsi/libfc/fc_exch.c |
| +++ b/drivers/scsi/libfc/fc_exch.c |
| @@ -825,10 +825,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport, |
| } |
| memset(ep, 0, sizeof(*ep)); |
| |
| - cpu = get_cpu(); |
| + cpu = get_cpu_light(); |
| pool = per_cpu_ptr(mp->pool, cpu); |
| spin_lock_bh(&pool->lock); |
| - put_cpu(); |
| + put_cpu_light(); |
| |
| /* peek cache of free slot */ |
| if (pool->left != FC_XID_UNKNOWN) { |
| diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c |
| index 7e6347fe93f9..8a7cf1d0e968 100644 |
| --- a/drivers/staging/greybus/gpio.c |
| +++ b/drivers/staging/greybus/gpio.c |
| @@ -391,10 +391,7 @@ static int gb_gpio_request_handler(struct gb_operation *op) |
| return -EINVAL; |
| } |
| |
| - local_irq_disable(); |
| - ret = generic_handle_irq(irq); |
| - local_irq_enable(); |
| - |
| + ret = generic_handle_irq_safe(irq); |
| if (ret) |
| dev_err(dev, "failed to invoke irq handler\n"); |
| |
| diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h |
| index 6473361525d1..2321d02e9b7a 100644 |
| --- a/drivers/tty/serial/8250/8250.h |
| +++ b/drivers/tty/serial/8250/8250.h |
| @@ -132,12 +132,55 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value) |
| up->dl_write(up, value); |
| } |
| |
| +static inline void serial8250_set_IER(struct uart_8250_port *up, |
| + unsigned char ier) |
| +{ |
| + struct uart_port *port = &up->port; |
| + unsigned long flags; |
| + bool is_console; |
| + |
| + is_console = uart_console(port); |
| + |
| + if (is_console) |
| + console_atomic_lock(flags); |
| + |
| + serial_out(up, UART_IER, ier); |
| + |
| + if (is_console) |
| + console_atomic_unlock(flags); |
| +} |
| + |
| +static inline unsigned char serial8250_clear_IER(struct uart_8250_port *up) |
| +{ |
| + struct uart_port *port = &up->port; |
| + unsigned int clearval = 0; |
| + unsigned long flags; |
| + unsigned int prior; |
| + bool is_console; |
| + |
| + is_console = uart_console(port); |
| + |
| + if (up->capabilities & UART_CAP_UUE) |
| + clearval = UART_IER_UUE; |
| + |
| + if (is_console) |
| + console_atomic_lock(flags); |
| + |
| + prior = serial_port_in(port, UART_IER); |
| + serial_port_out(port, UART_IER, clearval); |
| + |
| + if (is_console) |
| + console_atomic_unlock(flags); |
| + |
| + return prior; |
| +} |
| + |
| static inline bool serial8250_set_THRI(struct uart_8250_port *up) |
| { |
| if (up->ier & UART_IER_THRI) |
| return false; |
| up->ier |= UART_IER_THRI; |
| - serial_out(up, UART_IER, up->ier); |
| + serial8250_set_IER(up, up->ier); |
| return true; |
| } |
| |
| @@ -146,7 +189,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up) |
| if (!(up->ier & UART_IER_THRI)) |
| return false; |
| up->ier &= ~UART_IER_THRI; |
| - serial_out(up, UART_IER, up->ier); |
| + serial8250_set_IER(up, up->ier); |
| return true; |
| } |
| |
| diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c |
| index 1ce193daea7f..fad00c0414e3 100644 |
| --- a/drivers/tty/serial/8250/8250_core.c |
| +++ b/drivers/tty/serial/8250/8250_core.c |
| @@ -264,10 +264,8 @@ static void serial8250_backup_timeout(struct timer_list *t) |
| * Must disable interrupts or else we risk racing with the interrupt |
| * based handler. |
| */ |
| - if (up->port.irq) { |
| - ier = serial_in(up, UART_IER); |
| - serial_out(up, UART_IER, 0); |
| - } |
| + if (up->port.irq) |
| + ier = serial8250_clear_IER(up); |
| |
| iir = serial_in(up, UART_IIR); |
| |
| @@ -290,7 +288,7 @@ static void serial8250_backup_timeout(struct timer_list *t) |
| serial8250_tx_chars(up); |
| |
| if (up->port.irq) |
| - serial_out(up, UART_IER, ier); |
| + serial8250_set_IER(up, ier); |
| |
| spin_unlock_irqrestore(&up->port.lock, flags); |
| |
| @@ -568,6 +566,14 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) |
| |
| #ifdef CONFIG_SERIAL_8250_CONSOLE |
| |
| +static void univ8250_console_write_atomic(struct console *co, const char *s, |
| + unsigned int count) |
| +{ |
| + struct uart_8250_port *up = &serial8250_ports[co->index]; |
| + |
| + serial8250_console_write_atomic(up, s, count); |
| +} |
| + |
| static void univ8250_console_write(struct console *co, const char *s, |
| unsigned int count) |
| { |
| @@ -661,6 +667,7 @@ static int univ8250_console_match(struct console *co, char *name, int idx, |
| |
| static struct console univ8250_console = { |
| .name = "ttyS", |
| + .write_atomic = univ8250_console_write_atomic, |
| .write = univ8250_console_write, |
| .device = uart_console_device, |
| .setup = univ8250_console_setup, |
| diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c |
| index fc65a2293ce9..19a92530040f 100644 |
| --- a/drivers/tty/serial/8250/8250_fsl.c |
| +++ b/drivers/tty/serial/8250/8250_fsl.c |
| @@ -60,9 +60,18 @@ int fsl8250_handle_irq(struct uart_port *port) |
| |
| /* Stop processing interrupts on input overrun */ |
| if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { |
| + unsigned long flags; |
| unsigned long delay; |
| + bool is_console; |
| |
| + is_console = uart_console(port); |
| + |
| + if (is_console) |
| + console_atomic_lock(flags); |
| up->ier = port->serial_in(port, UART_IER); |
| + if (is_console) |
| + console_atomic_unlock(flags); |
| + |
| if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { |
| port->ops->stop_rx(port); |
| } else { |
| diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c |
| index 65402d05eff9..8122645ab05c 100644 |
| --- a/drivers/tty/serial/8250/8250_ingenic.c |
| +++ b/drivers/tty/serial/8250/8250_ingenic.c |
| @@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic,x1000-uart", |
| |
| static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value) |
| { |
| + unsigned long flags; |
| + bool is_console; |
| int ier; |
| |
| switch (offset) { |
| @@ -167,7 +169,12 @@ static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value) |
| * If we have enabled modem status IRQs we should enable |
| * modem mode. |
| */ |
| + is_console = uart_console(p); |
| + if (is_console) |
| + console_atomic_lock(flags); |
| ier = p->serial_in(p, UART_IER); |
| + if (is_console) |
| + console_atomic_unlock(flags); |
| |
| if (ier & UART_IER_MSI) |
| value |= UART_MCR_MDCE | UART_MCR_FCM; |
| diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c |
| index de48a58460f4..364ee950f21a 100644 |
| --- a/drivers/tty/serial/8250/8250_mtk.c |
| +++ b/drivers/tty/serial/8250/8250_mtk.c |
| @@ -222,12 +222,37 @@ static void mtk8250_shutdown(struct uart_port *port) |
| |
| static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask) |
| { |
| - serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask)); |
| + struct uart_port *port = &up->port; |
| + unsigned long flags; |
| + unsigned int ier; |
| + bool is_console; |
| + |
| + is_console = uart_console(port); |
| + |
| + if (is_console) |
| + console_atomic_lock(flags); |
| + |
| + ier = serial_in(up, UART_IER); |
| + serial_out(up, UART_IER, ier & (~mask)); |
| + |
| + if (is_console) |
| + console_atomic_unlock(flags); |
| } |
| |
| static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask) |
| { |
| - serial_out(up, UART_IER, serial_in(up, UART_IER) | mask); |
| + struct uart_port *port = &up->port; |
| + unsigned long flags; |
| + unsigned int ier; |
| + |
| + if (uart_console(port)) |
| + console_atomic_lock(flags); |
| + |
| + ier = serial_in(up, UART_IER); |
| + serial_out(up, UART_IER, ier | mask); |
| + |
| + if (uart_console(port)) |
| + console_atomic_unlock(flags); |
| } |
| |
| static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) |
| diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c |
| index df9731f73746..363888c2678b 100644 |
| --- a/drivers/tty/serial/8250/8250_port.c |
| +++ b/drivers/tty/serial/8250/8250_port.c |
| @@ -770,7 +770,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) |
| serial_out(p, UART_EFR, UART_EFR_ECB); |
| serial_out(p, UART_LCR, 0); |
| } |
| - serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0); |
| + serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0); |
| if (p->capabilities & UART_CAP_EFR) { |
| serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); |
| serial_out(p, UART_EFR, efr); |
| @@ -1444,7 +1444,7 @@ static void serial8250_stop_rx(struct uart_port *port) |
| |
| up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); |
| up->port.read_status_mask &= ~UART_LSR_DR; |
| - serial_port_out(port, UART_IER, up->ier); |
| + serial8250_set_IER(up, up->ier); |
| |
| serial8250_rpm_put(up); |
| } |
| @@ -1474,7 +1474,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p) |
| serial8250_clear_and_reinit_fifos(p); |
| |
| p->ier |= UART_IER_RLSI | UART_IER_RDI; |
| - serial_port_out(&p->port, UART_IER, p->ier); |
| + serial8250_set_IER(p, p->ier); |
| } |
| } |
| EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx); |
| @@ -1710,7 +1710,7 @@ static void serial8250_disable_ms(struct uart_port *port) |
| mctrl_gpio_disable_ms(up->gpios); |
| |
| up->ier &= ~UART_IER_MSI; |
| - serial_port_out(port, UART_IER, up->ier); |
| + serial8250_set_IER(up, up->ier); |
| } |
| |
| static void serial8250_enable_ms(struct uart_port *port) |
| @@ -1726,7 +1726,7 @@ static void serial8250_enable_ms(struct uart_port *port) |
| up->ier |= UART_IER_MSI; |
| |
| serial8250_rpm_get(up); |
| - serial_port_out(port, UART_IER, up->ier); |
| + serial8250_set_IER(up, up->ier); |
| serial8250_rpm_put(up); |
| } |
| |
| @@ -2145,14 +2145,7 @@ static void serial8250_put_poll_char(struct uart_port *port, |
| struct uart_8250_port *up = up_to_u8250p(port); |
| |
| serial8250_rpm_get(up); |
| - /* |
| - * First save the IER then disable the interrupts |
| - */ |
| - ier = serial_port_in(port, UART_IER); |
| - if (up->capabilities & UART_CAP_UUE) |
| - serial_port_out(port, UART_IER, UART_IER_UUE); |
| - else |
| - serial_port_out(port, UART_IER, 0); |
| + ier = serial8250_clear_IER(up); |
| |
| wait_for_xmitr(up, BOTH_EMPTY); |
| /* |
| @@ -2165,7 +2158,7 @@ static void serial8250_put_poll_char(struct uart_port *port, |
| * and restore the IER |
| */ |
| wait_for_xmitr(up, BOTH_EMPTY); |
| - serial_port_out(port, UART_IER, ier); |
| + serial8250_set_IER(up, ier); |
| serial8250_rpm_put(up); |
| } |
| |
| @@ -2468,7 +2461,7 @@ void serial8250_do_shutdown(struct uart_port *port) |
| */ |
| spin_lock_irqsave(&port->lock, flags); |
| up->ier = 0; |
| - serial_port_out(port, UART_IER, 0); |
| + serial8250_set_IER(up, 0); |
| spin_unlock_irqrestore(&port->lock, flags); |
| |
| synchronize_irq(port->irq); |
| @@ -2850,7 +2843,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, |
| if (up->capabilities & UART_CAP_RTOIE) |
| up->ier |= UART_IER_RTOIE; |
| |
| - serial_port_out(port, UART_IER, up->ier); |
| + serial8250_set_IER(up, up->ier); |
| |
| if (up->capabilities & UART_CAP_EFR) { |
| unsigned char efr = 0; |
| @@ -3316,7 +3309,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults); |
| |
| #ifdef CONFIG_SERIAL_8250_CONSOLE |
| |
| -static void serial8250_console_putchar(struct uart_port *port, int ch) |
| +static void serial8250_console_putchar_locked(struct uart_port *port, int ch) |
| { |
| struct uart_8250_port *up = up_to_u8250p(port); |
| |
| @@ -3324,6 +3317,18 @@ static void serial8250_console_putchar(struct uart_port *port, int ch) |
| serial_port_out(port, UART_TX, ch); |
| } |
| |
| +static void serial8250_console_putchar(struct uart_port *port, int ch) |
| +{ |
| + struct uart_8250_port *up = up_to_u8250p(port); |
| + unsigned long flags; |
| + |
| + wait_for_xmitr(up, UART_LSR_THRE); |
| + |
| + console_atomic_lock(flags); |
| + serial8250_console_putchar_locked(port, ch); |
| + console_atomic_unlock(flags); |
| +} |
| + |
| /* |
| * Restore serial console when h/w power-off detected |
| */ |
| @@ -3345,6 +3350,32 @@ static void serial8250_console_restore(struct uart_8250_port *up) |
| serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); |
| } |
| |
| +void serial8250_console_write_atomic(struct uart_8250_port *up, |
| + const char *s, unsigned int count) |
| +{ |
| + struct uart_port *port = &up->port; |
| + unsigned long flags; |
| + unsigned int ier; |
| + |
| + console_atomic_lock(flags); |
| + |
| + touch_nmi_watchdog(); |
| + |
| + ier = serial8250_clear_IER(up); |
| + |
| + if (atomic_fetch_inc(&up->console_printing)) { |
| + uart_console_write(port, "\n", 1, |
| + serial8250_console_putchar_locked); |
| + } |
| + uart_console_write(port, s, count, serial8250_console_putchar_locked); |
| + atomic_dec(&up->console_printing); |
| + |
| + wait_for_xmitr(up, BOTH_EMPTY); |
| + serial8250_set_IER(up, ier); |
| + |
| + console_atomic_unlock(flags); |
| +} |
| + |
| /* |
| * Print a string to the serial port trying not to disturb |
| * any possible real use of the port... |
| @@ -3361,24 +3392,12 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, |
| struct uart_port *port = &up->port; |
| unsigned long flags; |
| unsigned int ier; |
| - int locked = 1; |
| |
| touch_nmi_watchdog(); |
| |
| - if (oops_in_progress) |
| - locked = spin_trylock_irqsave(&port->lock, flags); |
| - else |
| - spin_lock_irqsave(&port->lock, flags); |
| - |
| - /* |
| - * First save the IER then disable the interrupts |
| - */ |
| - ier = serial_port_in(port, UART_IER); |
| + spin_lock_irqsave(&port->lock, flags); |
| |
| - if (up->capabilities & UART_CAP_UUE) |
| - serial_port_out(port, UART_IER, UART_IER_UUE); |
| - else |
| - serial_port_out(port, UART_IER, 0); |
| + ier = serial8250_clear_IER(up); |
| |
| /* check scratch reg to see if port powered off during system sleep */ |
| if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { |
| @@ -3392,7 +3411,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, |
| mdelay(port->rs485.delay_rts_before_send); |
| } |
| |
| + atomic_inc(&up->console_printing); |
| uart_console_write(port, s, count, serial8250_console_putchar); |
| + atomic_dec(&up->console_printing); |
| |
| /* |
| * Finally, wait for transmitter to become empty |
| @@ -3405,8 +3426,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, |
| if (em485->tx_stopped) |
| up->rs485_stop_tx(up); |
| } |
| - |
| - serial_port_out(port, UART_IER, ier); |
| + serial8250_set_IER(up, ier); |
| |
| /* |
| * The receive handling will happen properly because the |
| @@ -3418,8 +3438,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, |
| if (up->msr_saved_flags) |
| serial8250_modem_status(up); |
| |
| - if (locked) |
| - spin_unlock_irqrestore(&port->lock, flags); |
| + spin_unlock_irqrestore(&port->lock, flags); |
| } |
| |
| static unsigned int probe_baud(struct uart_port *port) |
| @@ -3439,6 +3458,7 @@ static unsigned int probe_baud(struct uart_port *port) |
| |
| int serial8250_console_setup(struct uart_port *port, char *options, bool probe) |
| { |
| + struct uart_8250_port *up = up_to_u8250p(port); |
| int baud = 9600; |
| int bits = 8; |
| int parity = 'n'; |
| @@ -3448,6 +3468,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe) |
| if (!port->iobase && !port->membase) |
| return -ENODEV; |
| |
| + atomic_set(&up->console_printing, 0); |
| + |
| if (options) |
| uart_parse_options(options, &baud, &parity, &bits, &flow); |
| else if (probe) |
| diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c |
| index 0e908061b5d7..6d04cbe736a0 100644 |
| --- a/drivers/tty/serial/amba-pl011.c |
| +++ b/drivers/tty/serial/amba-pl011.c |
| @@ -2309,18 +2309,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) |
| { |
| struct uart_amba_port *uap = amba_ports[co->index]; |
| unsigned int old_cr = 0, new_cr; |
| - unsigned long flags; |
| + unsigned long flags = 0; |
| int locked = 1; |
| |
| clk_enable(uap->clk); |
| |
| - local_irq_save(flags); |
| + /* |
| + * local_irq_save(flags); |
| + * |
| + * This local_irq_save() is nonsense. If we come in via sysrq |
| + * handling then interrupts are already disabled. Aside of |
| + * that the port.sysrq check is racy on SMP regardless. |
| + */ |
| if (uap->port.sysrq) |
| locked = 0; |
| else if (oops_in_progress) |
| - locked = spin_trylock(&uap->port.lock); |
| + locked = spin_trylock_irqsave(&uap->port.lock, flags); |
| else |
| - spin_lock(&uap->port.lock); |
| + spin_lock_irqsave(&uap->port.lock, flags); |
| |
| /* |
| * First save the CR then disable the interrupts |
| @@ -2346,8 +2352,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) |
| pl011_write(old_cr, uap, REG_CR); |
| |
| if (locked) |
| - spin_unlock(&uap->port.lock); |
| - local_irq_restore(flags); |
| + spin_unlock_irqrestore(&uap->port.lock, flags); |
| |
| clk_disable(uap->clk); |
| } |
| diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c |
| index 0862941862c8..10970632f0e4 100644 |
| --- a/drivers/tty/serial/omap-serial.c |
| +++ b/drivers/tty/serial/omap-serial.c |
| @@ -1255,13 +1255,10 @@ serial_omap_console_write(struct console *co, const char *s, |
| unsigned int ier; |
| int locked = 1; |
| |
| - local_irq_save(flags); |
| - if (up->port.sysrq) |
| - locked = 0; |
| - else if (oops_in_progress) |
| - locked = spin_trylock(&up->port.lock); |
| + if (up->port.sysrq || oops_in_progress) |
| + locked = spin_trylock_irqsave(&up->port.lock, flags); |
| else |
| - spin_lock(&up->port.lock); |
| + spin_lock_irqsave(&up->port.lock, flags); |
| |
| /* |
| * First save the IER then disable the interrupts |
| @@ -1288,8 +1285,7 @@ serial_omap_console_write(struct console *co, const char *s, |
| check_modem_status(up); |
| |
| if (locked) |
| - spin_unlock(&up->port.lock); |
| - local_irq_restore(flags); |
| + spin_unlock_irqrestore(&up->port.lock, flags); |
| } |
| |
| static int __init |
| diff --git a/drivers/virt/acrn/irqfd.c b/drivers/virt/acrn/irqfd.c |
| index df5184979b28..d4ad211dce7a 100644 |
| --- a/drivers/virt/acrn/irqfd.c |
| +++ b/drivers/virt/acrn/irqfd.c |
| @@ -17,7 +17,6 @@ |
| #include "acrn_drv.h" |
| |
| static LIST_HEAD(acrn_irqfd_clients); |
| -static DEFINE_MUTEX(acrn_irqfds_mutex); |
| |
| /** |
| * struct hsm_irqfd - Properties of HSM irqfd |
| diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c |
| index 45cfd50a9521..502b56597f10 100644 |
| --- a/fs/afs/dir_silly.c |
| +++ b/fs/afs/dir_silly.c |
| @@ -239,7 +239,7 @@ int afs_silly_iput(struct dentry *dentry, struct inode *inode) |
| struct dentry *alias; |
| int ret; |
| |
| - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
| + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); |
| |
| _enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode); |
| |
| diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c |
| index 1929e80c09ee..48eb8c30c6db 100644 |
| --- a/fs/cifs/readdir.c |
| +++ b/fs/cifs/readdir.c |
| @@ -69,7 +69,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, |
| struct inode *inode; |
| struct super_block *sb = parent->d_sb; |
| struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
| - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
| + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); |
| |
| cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); |
| |
| diff --git a/fs/dcache.c b/fs/dcache.c |
| index cf871a81f4fd..02db80f2817f 100644 |
| --- a/fs/dcache.c |
| +++ b/fs/dcache.c |
| @@ -2537,7 +2537,13 @@ EXPORT_SYMBOL(d_rehash); |
| |
| static inline unsigned start_dir_add(struct inode *dir) |
| { |
| - |
| + /* |
| + * The caller has a spinlock_t (dentry::d_lock) acquired which disables |
| + * preemption on !PREEMPT_RT. On PREEMPT_RT the lock does not disable |
| + * preemption and it has be done explicitly. |
| + */ |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + preempt_disable(); |
| for (;;) { |
| unsigned n = dir->i_dir_seq; |
| if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) |
| @@ -2549,25 +2555,30 @@ static inline unsigned start_dir_add(struct inode *dir) |
| static inline void end_dir_add(struct inode *dir, unsigned n) |
| { |
| smp_store_release(&dir->i_dir_seq, n + 2); |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + preempt_enable(); |
| } |
| |
| static void d_wait_lookup(struct dentry *dentry) |
| { |
| - if (d_in_lookup(dentry)) { |
| - DECLARE_WAITQUEUE(wait, current); |
| - add_wait_queue(dentry->d_wait, &wait); |
| - do { |
| - set_current_state(TASK_UNINTERRUPTIBLE); |
| - spin_unlock(&dentry->d_lock); |
| - schedule(); |
| - spin_lock(&dentry->d_lock); |
| - } while (d_in_lookup(dentry)); |
| - } |
| + struct swait_queue __wait; |
| + |
| + if (!d_in_lookup(dentry)) |
| + return; |
| + |
| + INIT_LIST_HEAD(&__wait.task_list); |
| + do { |
| + prepare_to_swait_exclusive(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE); |
| + spin_unlock(&dentry->d_lock); |
| + schedule(); |
| + spin_lock(&dentry->d_lock); |
| + } while (d_in_lookup(dentry)); |
| + finish_swait(dentry->d_wait, &__wait); |
| } |
| |
| struct dentry *d_alloc_parallel(struct dentry *parent, |
| const struct qstr *name, |
| - wait_queue_head_t *wq) |
| + struct swait_queue_head *wq) |
| { |
| unsigned int hash = name->hash; |
| struct hlist_bl_head *b = in_lookup_hash(parent, hash); |
| @@ -2682,7 +2693,7 @@ void __d_lookup_done(struct dentry *dentry) |
| hlist_bl_lock(b); |
| dentry->d_flags &= ~DCACHE_PAR_LOOKUP; |
| __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); |
| - wake_up_all(dentry->d_wait); |
| + swake_up_all(dentry->d_wait); |
| dentry->d_wait = NULL; |
| hlist_bl_unlock(b); |
| INIT_HLIST_NODE(&dentry->d_u.d_alias); |
| diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h |
| index c3e4804b8fcb..9edb87e11680 100644 |
| --- a/fs/fscache/internal.h |
| +++ b/fs/fscache/internal.h |
| @@ -81,7 +81,6 @@ extern unsigned fscache_debug; |
| extern struct kobject *fscache_root; |
| extern struct workqueue_struct *fscache_object_wq; |
| extern struct workqueue_struct *fscache_op_wq; |
| -DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait); |
| |
| extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n); |
| |
| diff --git a/fs/fscache/main.c b/fs/fscache/main.c |
| index 4207f98e405f..85f8cf3a323d 100644 |
| --- a/fs/fscache/main.c |
| +++ b/fs/fscache/main.c |
| @@ -41,8 +41,6 @@ struct kobject *fscache_root; |
| struct workqueue_struct *fscache_object_wq; |
| struct workqueue_struct *fscache_op_wq; |
| |
| -DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait); |
| - |
| /* these values serve as lower bounds, will be adjusted in fscache_init() */ |
| static unsigned fscache_object_max_active = 4; |
| static unsigned fscache_op_max_active = 2; |
| @@ -138,7 +136,6 @@ unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n) |
| static int __init fscache_init(void) |
| { |
| unsigned int nr_cpus = num_possible_cpus(); |
| - unsigned int cpu; |
| int ret; |
| |
| fscache_object_max_active = |
| @@ -161,9 +158,6 @@ static int __init fscache_init(void) |
| if (!fscache_op_wq) |
| goto error_op_wq; |
| |
| - for_each_possible_cpu(cpu) |
| - init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu)); |
| - |
| ret = fscache_proc_init(); |
| if (ret < 0) |
| goto error_proc; |
| diff --git a/fs/fscache/object.c b/fs/fscache/object.c |
| index 6a675652129b..7a972d144b54 100644 |
| --- a/fs/fscache/object.c |
| +++ b/fs/fscache/object.c |
| @@ -798,6 +798,8 @@ void fscache_object_destroy(struct fscache_object *object) |
| } |
| EXPORT_SYMBOL(fscache_object_destroy); |
| |
| +static DECLARE_WAIT_QUEUE_HEAD(fscache_object_cong_wait); |
| + |
| /* |
| * enqueue an object for metadata-type processing |
| */ |
| @@ -806,16 +808,12 @@ void fscache_enqueue_object(struct fscache_object *object) |
| _enter("{OBJ%x}", object->debug_id); |
| |
| if (fscache_get_object(object, fscache_obj_get_queue) >= 0) { |
| - wait_queue_head_t *cong_wq = |
| - &get_cpu_var(fscache_object_cong_wait); |
| |
| if (queue_work(fscache_object_wq, &object->work)) { |
| if (fscache_object_congested()) |
| - wake_up(cong_wq); |
| + wake_up(&fscache_object_cong_wait); |
| } else |
| fscache_put_object(object, fscache_obj_put_queue); |
| - |
| - put_cpu_var(fscache_object_cong_wait); |
| } |
| } |
| |
| @@ -833,16 +831,15 @@ void fscache_enqueue_object(struct fscache_object *object) |
| */ |
| bool fscache_object_sleep_till_congested(signed long *timeoutp) |
| { |
| - wait_queue_head_t *cong_wq = this_cpu_ptr(&fscache_object_cong_wait); |
| DEFINE_WAIT(wait); |
| |
| if (fscache_object_congested()) |
| return true; |
| |
| - add_wait_queue_exclusive(cong_wq, &wait); |
| + add_wait_queue_exclusive(&fscache_object_cong_wait, &wait); |
| if (!fscache_object_congested()) |
| *timeoutp = schedule_timeout(*timeoutp); |
| - finish_wait(cong_wq, &wait); |
| + finish_wait(&fscache_object_cong_wait, &wait); |
| |
| return fscache_object_congested(); |
| } |
| diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c |
| index bc267832310c..3176913fae6c 100644 |
| --- a/fs/fuse/readdir.c |
| +++ b/fs/fuse/readdir.c |
| @@ -158,7 +158,7 @@ static int fuse_direntplus_link(struct file *file, |
| struct inode *dir = d_inode(parent); |
| struct fuse_conn *fc; |
| struct inode *inode; |
| - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
| + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); |
| |
| if (!o->nodeid) { |
| /* |
| diff --git a/fs/namei.c b/fs/namei.c |
| index 2ea15d043412..383f9fd2daaa 100644 |
| --- a/fs/namei.c |
| +++ b/fs/namei.c |
| @@ -1633,7 +1633,7 @@ static struct dentry *__lookup_slow(const struct qstr *name, |
| { |
| struct dentry *dentry, *old; |
| struct inode *inode = dir->d_inode; |
| - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
| + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); |
| |
| /* Don't go there if it's already dead */ |
| if (unlikely(IS_DEADDIR(inode))) |
| @@ -3244,7 +3244,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, |
| struct dentry *dentry; |
| int error, create_error = 0; |
| umode_t mode = op->mode; |
| - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
| + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); |
| |
| if (unlikely(IS_DEADDIR(dir_inode))) |
| return ERR_PTR(-ENOENT); |
| diff --git a/fs/namespace.c b/fs/namespace.c |
| index dc31ad6b370f..41950313de7a 100644 |
| --- a/fs/namespace.c |
| +++ b/fs/namespace.c |
| @@ -344,8 +344,24 @@ int __mnt_want_write(struct vfsmount *m) |
| * incremented count after it has set MNT_WRITE_HOLD. |
| */ |
| smp_mb(); |
| - while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) |
| - cpu_relax(); |
| + might_lock(&mount_lock.lock); |
| + while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { |
| + cpu_relax(); |
| + } else { |
| + /* |
| + * This prevents priority inversion, if the task |
| + * setting MNT_WRITE_HOLD got preempted on a remote |
| + * CPU, and it prevents life lock if the task setting |
| + * MNT_WRITE_HOLD has a lower priority and is bound to |
| + * the same CPU as the task that is spinning here. |
| + */ |
| + preempt_enable(); |
| + lock_mount_hash(); |
| + unlock_mount_hash(); |
| + preempt_disable(); |
| + } |
| + } |
| /* |
| * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will |
| * be set to match its requirements. So we must not load that until |
| diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c |
| index 78219396788b..06bde5728e2f 100644 |
| --- a/fs/nfs/dir.c |
| +++ b/fs/nfs/dir.c |
| @@ -636,7 +636,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry, |
| unsigned long dir_verifier) |
| { |
| struct qstr filename = QSTR_INIT(entry->name, entry->len); |
| - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
| + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); |
| struct dentry *dentry; |
| struct dentry *alias; |
| struct inode *inode; |
| @@ -1867,7 +1867,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, |
| struct file *file, unsigned open_flags, |
| umode_t mode) |
| { |
| - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
| + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); |
| struct nfs_open_context *ctx; |
| struct dentry *res; |
| struct iattr attr = { .ia_valid = ATTR_OPEN }; |
| diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c |
| index d5ccf095b2a7..0944c068f5cb 100644 |
| --- a/fs/nfs/unlink.c |
| +++ b/fs/nfs/unlink.c |
| @@ -13,7 +13,7 @@ |
| #include <linux/sunrpc/clnt.h> |
| #include <linux/nfs_fs.h> |
| #include <linux/sched.h> |
| -#include <linux/wait.h> |
| +#include <linux/swait.h> |
| #include <linux/namei.h> |
| #include <linux/fsnotify.h> |
| |
| @@ -184,7 +184,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name) |
| |
| data->cred = get_current_cred(); |
| data->res.dir_attr = &data->dir_attr; |
| - init_waitqueue_head(&data->wq); |
| + init_swait_queue_head(&data->wq); |
| |
| status = -EBUSY; |
| spin_lock(&dentry->d_lock); |
| diff --git a/fs/proc/base.c b/fs/proc/base.c |
| index 1f394095eb88..fade2c7c705b 100644 |
| --- a/fs/proc/base.c |
| +++ b/fs/proc/base.c |
| @@ -96,6 +96,7 @@ |
| #include <linux/posix-timers.h> |
| #include <linux/time_namespace.h> |
| #include <linux/resctrl.h> |
| +#include <linux/swait.h> |
| #include <linux/cn_proc.h> |
| #include <trace/events/oom.h> |
| #include "internal.h" |
| @@ -2043,7 +2044,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, |
| |
| child = d_hash_and_lookup(dir, &qname); |
| if (!child) { |
| - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
| + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); |
| child = d_alloc_parallel(dir, &qname, &wq); |
| if (IS_ERR(child)) |
| goto end_instantiate; |
| diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c |
| index 5d66faecd4ef..619d8e114646 100644 |
| --- a/fs/proc/proc_sysctl.c |
| +++ b/fs/proc/proc_sysctl.c |
| @@ -678,7 +678,7 @@ static bool proc_sys_fill_cache(struct file *file, |
| |
| child = d_lookup(dir, &qname); |
| if (!child) { |
| - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); |
| + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); |
| child = d_alloc_parallel(dir, &qname, &wq); |
| if (IS_ERR(child)) |
| return false; |
| diff --git a/include/asm-generic/softirq_stack.h b/include/asm-generic/softirq_stack.h |
| index eceeecf6a5bd..d3e2d81656e0 100644 |
| --- a/include/asm-generic/softirq_stack.h |
| +++ b/include/asm-generic/softirq_stack.h |
| @@ -2,7 +2,7 @@ |
| #ifndef __ASM_GENERIC_SOFTIRQ_STACK_H |
| #define __ASM_GENERIC_SOFTIRQ_STACK_H |
| |
| -#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK |
| +#if defined(CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK) && !defined(CONFIG_PREEMPT_RT) |
| void do_softirq_own_stack(void); |
| #else |
| static inline void do_softirq_own_stack(void) |
| diff --git a/include/linux/console.h b/include/linux/console.h |
| index a97f277cfdfa..487a4266ab2c 100644 |
| --- a/include/linux/console.h |
| +++ b/include/linux/console.h |
| @@ -16,6 +16,13 @@ |
| |
| #include <linux/atomic.h> |
| #include <linux/types.h> |
| +#include <linux/printk.h> |
| +#include <linux/seqlock.h> |
| + |
| +struct latched_seq { |
| + seqcount_latch_t latch; |
| + u64 val[2]; |
| +}; |
| |
| struct vc_data; |
| struct console_font_op; |
| @@ -136,10 +143,12 @@ static inline int con_debug_leave(void) |
| #define CON_ANYTIME (16) /* Safe to call when cpu is offline */ |
| #define CON_BRL (32) /* Used for a braille device */ |
| #define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ |
| +#define CON_HANDOVER (128) /* Device was previously a boot console. */ |
| |
| struct console { |
| char name[16]; |
| void (*write)(struct console *, const char *, unsigned); |
| + void (*write_atomic)(struct console *co, const char *s, unsigned int count); |
| int (*read)(struct console *, char *, unsigned); |
| struct tty_driver *(*device)(struct console *, int *); |
| void (*unblank)(void); |
| @@ -149,6 +158,16 @@ struct console { |
| short flags; |
| short index; |
| int cflag; |
| +#ifdef CONFIG_PRINTK |
| + char sync_buf[CONSOLE_LOG_MAX]; |
| + struct latched_seq printk_seq; |
| + struct latched_seq printk_sync_seq; |
| +#ifdef CONFIG_HAVE_NMI |
| + struct latched_seq printk_sync_nmi_seq; |
| +#endif |
| +#endif /* CONFIG_PRINTK */ |
| + |
| + struct task_struct *thread; |
| uint ispeed; |
| uint ospeed; |
| void *data; |
| diff --git a/include/linux/dcache.h b/include/linux/dcache.h |
| index 9e23d33bb6f1..9f89d4887e35 100644 |
| --- a/include/linux/dcache.h |
| +++ b/include/linux/dcache.h |
| @@ -108,7 +108,7 @@ struct dentry { |
| |
| union { |
| struct list_head d_lru; /* LRU list */ |
| - wait_queue_head_t *d_wait; /* in-lookup ones only */ |
| + struct swait_queue_head *d_wait; /* in-lookup ones only */ |
| }; |
| struct list_head d_child; /* child of parent list */ |
| struct list_head d_subdirs; /* our children */ |
| @@ -240,7 +240,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op |
| extern struct dentry * d_alloc(struct dentry *, const struct qstr *); |
| extern struct dentry * d_alloc_anon(struct super_block *); |
| extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, |
| - wait_queue_head_t *); |
| + struct swait_queue_head *); |
| extern struct dentry * d_splice_alias(struct inode *, struct dentry *); |
| extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); |
| extern struct dentry * d_exact_alias(struct dentry *, struct inode *); |
| diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h |
| index 2e2b8d6140ed..71064a2c2caf 100644 |
| --- a/include/linux/entry-common.h |
| +++ b/include/linux/entry-common.h |
| @@ -57,9 +57,15 @@ |
| # define ARCH_EXIT_TO_USER_MODE_WORK (0) |
| #endif |
| |
| +#ifdef CONFIG_PREEMPT_LAZY |
| +# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) |
| +#else |
| +# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED) |
| +#endif |
| + |
| #define EXIT_TO_USER_MODE_WORK \ |
| (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ |
| - _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ |
| + _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ |
| ARCH_EXIT_TO_USER_MODE_WORK) |
| |
| /** |
| diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h |
| index ec2a47a81e42..8cd11a223260 100644 |
| --- a/include/linux/irq_work.h |
| +++ b/include/linux/irq_work.h |
| @@ -3,6 +3,7 @@ |
| #define _LINUX_IRQ_WORK_H |
| |
| #include <linux/smp_types.h> |
| +#include <linux/rcuwait.h> |
| |
| /* |
| * An entry can be in one of four states: |
| @@ -16,11 +17,13 @@ |
| struct irq_work { |
| struct __call_single_node node; |
| void (*func)(struct irq_work *); |
| + struct rcuwait irqwait; |
| }; |
| |
| #define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){ \ |
| .node = { .u_flags = (_flags), }, \ |
| .func = (_func), \ |
| + .irqwait = __RCUWAIT_INITIALIZER(irqwait), \ |
| } |
| |
| #define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0) |
| @@ -46,6 +49,11 @@ static inline bool irq_work_is_busy(struct irq_work *work) |
| return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY; |
| } |
| |
| +static inline bool irq_work_is_hard(struct irq_work *work) |
| +{ |
| + return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ; |
| +} |
| + |
| bool irq_work_queue(struct irq_work *work); |
| bool irq_work_queue_on(struct irq_work *work, int cpu); |
| |
| diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h |
| index 59aea39785bf..d69b819b53e0 100644 |
| --- a/include/linux/irqdesc.h |
| +++ b/include/linux/irqdesc.h |
| @@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc) |
| |
| int handle_irq_desc(struct irq_desc *desc); |
| int generic_handle_irq(unsigned int irq); |
| +int generic_handle_irq_safe(unsigned int irq); |
| |
| #ifdef CONFIG_IRQ_DOMAIN |
| /* |
| diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h |
| index 600c10da321a..4b140938b03e 100644 |
| --- a/include/linux/irqflags.h |
| +++ b/include/linux/irqflags.h |
| @@ -71,14 +71,6 @@ do { \ |
| do { \ |
| __this_cpu_dec(hardirq_context); \ |
| } while (0) |
| -# define lockdep_softirq_enter() \ |
| -do { \ |
| - current->softirq_context++; \ |
| -} while (0) |
| -# define lockdep_softirq_exit() \ |
| -do { \ |
| - current->softirq_context--; \ |
| -} while (0) |
| |
| # define lockdep_hrtimer_enter(__hrtimer) \ |
| ({ \ |
| @@ -140,6 +132,21 @@ do { \ |
| # define lockdep_irq_work_exit(__work) do { } while (0) |
| #endif |
| |
| +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT) |
| +# define lockdep_softirq_enter() \ |
| +do { \ |
| + current->softirq_context++; \ |
| +} while (0) |
| +# define lockdep_softirq_exit() \ |
| +do { \ |
| + current->softirq_context--; \ |
| +} while (0) |
| + |
| +#else |
| +# define lockdep_softirq_enter() do { } while (0) |
| +# define lockdep_softirq_exit() do { } while (0) |
| +#endif |
| + |
| #if defined(CONFIG_IRQSOFF_TRACER) || \ |
| defined(CONFIG_PREEMPT_TRACER) |
| extern void stop_critical_timings(void); |
| diff --git a/include/linux/kernel.h b/include/linux/kernel.h |
| index f56cd8879a59..49f1e924b6e6 100644 |
| --- a/include/linux/kernel.h |
| +++ b/include/linux/kernel.h |
| @@ -111,8 +111,8 @@ static __always_inline void might_resched(void) |
| #endif /* CONFIG_PREEMPT_* */ |
| |
| #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
| -extern void ___might_sleep(const char *file, int line, int preempt_offset); |
| -extern void __might_sleep(const char *file, int line, int preempt_offset); |
| +extern void __might_resched(const char *file, int line, unsigned int offsets); |
| +extern void __might_sleep(const char *file, int line); |
| extern void __cant_sleep(const char *file, int line, int preempt_offset); |
| extern void __cant_migrate(const char *file, int line); |
| |
| @@ -129,7 +129,7 @@ extern void __cant_migrate(const char *file, int line); |
| * supposed to. |
| */ |
| # define might_sleep() \ |
| - do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) |
| + do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0) |
| /** |
| * cant_sleep - annotation for functions that cannot sleep |
| * |
| @@ -168,10 +168,9 @@ extern void __cant_migrate(const char *file, int line); |
| */ |
| # define non_block_end() WARN_ON(current->non_block_count-- == 0) |
| #else |
| - static inline void ___might_sleep(const char *file, int line, |
| - int preempt_offset) { } |
| - static inline void __might_sleep(const char *file, int line, |
| - int preempt_offset) { } |
| + static inline void __might_resched(const char *file, int line, |
| + unsigned int offsets) { } |
| +static inline void __might_sleep(const char *file, int line) { } |
| # define might_sleep() do { might_resched(); } while (0) |
| # define cant_sleep() do { } while (0) |
| # define cant_migrate() do { } while (0) |
| diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h |
| index 258cdde8d356..9bca0d98db5a 100644 |
| --- a/include/linux/kgdb.h |
| +++ b/include/linux/kgdb.h |
| @@ -212,6 +212,8 @@ extern void kgdb_call_nmi_hook(void *ignored); |
| */ |
| extern void kgdb_roundup_cpus(void); |
| |
| +extern void kgdb_roundup_cpu(unsigned int cpu); |
| + |
| /** |
| * kgdb_arch_set_pc - Generic call back to the program counter |
| * @regs: Current &struct pt_regs. |
| @@ -365,5 +367,6 @@ extern void kgdb_free_init_mem(void); |
| #define dbg_late_init() |
| static inline void kgdb_panic(const char *msg) {} |
| static inline void kgdb_free_init_mem(void) { } |
| +static inline void kgdb_roundup_cpu(unsigned int cpu) {} |
| #endif /* ! CONFIG_KGDB */ |
| #endif /* _KGDB_H_ */ |
| diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h |
| index 7f8ee09c711f..e9672de22cf2 100644 |
| --- a/include/linux/mm_types.h |
| +++ b/include/linux/mm_types.h |
| @@ -12,6 +12,7 @@ |
| #include <linux/completion.h> |
| #include <linux/cpumask.h> |
| #include <linux/uprobes.h> |
| +#include <linux/rcupdate.h> |
| #include <linux/page-flags-layout.h> |
| #include <linux/workqueue.h> |
| #include <linux/seqlock.h> |
| @@ -572,6 +573,9 @@ struct mm_struct { |
| bool tlb_flush_batched; |
| #endif |
| struct uprobes_state uprobes_state; |
| +#ifdef CONFIG_PREEMPT_RT |
| + struct rcu_head delayed_drop; |
| +#endif |
| #ifdef CONFIG_HUGETLB_PAGE |
| atomic_long_t hugetlb_usage; |
| #endif |
| diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h |
| index f8d46dc62d65..07b45ebbcb8f 100644 |
| --- a/include/linux/netdevice.h |
| +++ b/include/linux/netdevice.h |
| @@ -1916,7 +1916,6 @@ enum netdev_ml_priv_type { |
| * @sfp_bus: attached &struct sfp_bus structure. |
| * |
| * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock |
| - * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount |
| * |
| * @proto_down: protocol port state information can be sent to the |
| * switch driver and used to set the phys state of the |
| @@ -2250,7 +2249,6 @@ struct net_device { |
| struct phy_device *phydev; |
| struct sfp_bus *sfp_bus; |
| struct lock_class_key *qdisc_tx_busylock; |
| - struct lock_class_key *qdisc_running_key; |
| bool proto_down; |
| unsigned wol_enabled:1; |
| unsigned threaded:1; |
| @@ -2360,13 +2358,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, |
| #define netdev_lockdep_set_classes(dev) \ |
| { \ |
| static struct lock_class_key qdisc_tx_busylock_key; \ |
| - static struct lock_class_key qdisc_running_key; \ |
| static struct lock_class_key qdisc_xmit_lock_key; \ |
| static struct lock_class_key dev_addr_list_lock_key; \ |
| unsigned int i; \ |
| \ |
| (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ |
| - (dev)->qdisc_running_key = &qdisc_running_key; \ |
| lockdep_set_class(&(dev)->addr_list_lock, \ |
| &dev_addr_list_lock_key); \ |
| for (i = 0; i < (dev)->num_tx_queues; i++) \ |
| diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h |
| index ecd74cc34797..6af28750625a 100644 |
| --- a/include/linux/nfs_xdr.h |
| +++ b/include/linux/nfs_xdr.h |
| @@ -1692,7 +1692,7 @@ struct nfs_unlinkdata { |
| struct nfs_removeargs args; |
| struct nfs_removeres res; |
| struct dentry *dentry; |
| - wait_queue_head_t wq; |
| + struct swait_queue_head wq; |
| const struct cred *cred; |
| struct nfs_fattr dir_attr; |
| long timeout; |
| diff --git a/include/linux/preempt.h b/include/linux/preempt.h |
| index 4d244e295e85..3da73c968211 100644 |
| --- a/include/linux/preempt.h |
| +++ b/include/linux/preempt.h |
| @@ -122,9 +122,10 @@ |
| * The preempt_count offset after spin_lock() |
| */ |
| #if !defined(CONFIG_PREEMPT_RT) |
| -#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET |
| +#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET |
| #else |
| -#define PREEMPT_LOCK_OFFSET 0 |
| +/* Locks on RT do not disable preemption */ |
| +#define PREEMPT_LOCK_OFFSET 0 |
| #endif |
| |
| /* |
| @@ -174,6 +175,20 @@ extern void preempt_count_sub(int val); |
| #define preempt_count_inc() preempt_count_add(1) |
| #define preempt_count_dec() preempt_count_sub(1) |
| |
| +#ifdef CONFIG_PREEMPT_LAZY |
| +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) |
| +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) |
| +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) |
| +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) |
| +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) |
| +#else |
| +#define add_preempt_lazy_count(val) do { } while (0) |
| +#define sub_preempt_lazy_count(val) do { } while (0) |
| +#define inc_preempt_lazy_count() do { } while (0) |
| +#define dec_preempt_lazy_count() do { } while (0) |
| +#define preempt_lazy_count() (0) |
| +#endif |
| + |
| #ifdef CONFIG_PREEMPT_COUNT |
| |
| #define preempt_disable() \ |
| @@ -182,13 +197,25 @@ do { \ |
| barrier(); \ |
| } while (0) |
| |
| +#define preempt_lazy_disable() \ |
| +do { \ |
| + inc_preempt_lazy_count(); \ |
| + barrier(); \ |
| +} while (0) |
| + |
| #define sched_preempt_enable_no_resched() \ |
| do { \ |
| barrier(); \ |
| preempt_count_dec(); \ |
| } while (0) |
| |
| -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() |
| +#ifndef CONFIG_PREEMPT_RT |
| +# define preempt_enable_no_resched() sched_preempt_enable_no_resched() |
| +# define preempt_check_resched_rt() barrier(); |
| +#else |
| +# define preempt_enable_no_resched() preempt_enable() |
| +# define preempt_check_resched_rt() preempt_check_resched() |
| +#endif |
| |
| #define preemptible() (preempt_count() == 0 && !irqs_disabled()) |
| |
| @@ -213,6 +240,18 @@ do { \ |
| __preempt_schedule(); \ |
| } while (0) |
| |
| +/* |
| + * open code preempt_check_resched() because it is not exported to modules and |
| + * used by local_unlock() or bpf_enable_instrumentation(). |
| + */ |
| +#define preempt_lazy_enable() \ |
| +do { \ |
| + dec_preempt_lazy_count(); \ |
| + barrier(); \ |
| + if (should_resched(0)) \ |
| + __preempt_schedule(); \ |
| +} while (0) |
| + |
| #else /* !CONFIG_PREEMPTION */ |
| #define preempt_enable() \ |
| do { \ |
| @@ -220,6 +259,12 @@ do { \ |
| preempt_count_dec(); \ |
| } while (0) |
| |
| +#define preempt_lazy_enable() \ |
| +do { \ |
| + dec_preempt_lazy_count(); \ |
| + barrier(); \ |
| +} while (0) |
| + |
| #define preempt_enable_notrace() \ |
| do { \ |
| barrier(); \ |
| @@ -258,8 +303,12 @@ do { \ |
| #define preempt_disable_notrace() barrier() |
| #define preempt_enable_no_resched_notrace() barrier() |
| #define preempt_enable_notrace() barrier() |
| +#define preempt_check_resched_rt() barrier() |
| #define preemptible() 0 |
| |
| +#define preempt_lazy_disable() barrier() |
| +#define preempt_lazy_enable() barrier() |
| + |
| #endif /* CONFIG_PREEMPT_COUNT */ |
| |
| #ifdef MODULE |
| @@ -278,7 +327,7 @@ do { \ |
| } while (0) |
| #define preempt_fold_need_resched() \ |
| do { \ |
| - if (tif_need_resched()) \ |
| + if (tif_need_resched_now()) \ |
| set_preempt_need_resched(); \ |
| } while (0) |
| |
| @@ -394,8 +443,15 @@ extern void migrate_enable(void); |
| |
| #else |
| |
| -static inline void migrate_disable(void) { } |
| -static inline void migrate_enable(void) { } |
| +static inline void migrate_disable(void) |
| +{ |
| + preempt_lazy_disable(); |
| +} |
| + |
| +static inline void migrate_enable(void) |
| +{ |
| + preempt_lazy_enable(); |
| +} |
| |
| #endif /* CONFIG_SMP */ |
| |
| diff --git a/include/linux/printk.h b/include/linux/printk.h |
| index 9497f6b98339..f1b9cd8d11d6 100644 |
| --- a/include/linux/printk.h |
| +++ b/include/linux/printk.h |
| @@ -47,6 +47,12 @@ static inline const char *printk_skip_headers(const char *buffer) |
| |
| #define CONSOLE_EXT_LOG_MAX 8192 |
| |
| +/* |
| + * The maximum size of a record formatted for console printing |
| + * (i.e. with the prefix prepended to every line). |
| + */ |
| +#define CONSOLE_LOG_MAX 1024 |
| + |
| /* printk's without a loglevel use this.. */ |
| #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT |
| |
| @@ -155,20 +161,7 @@ int vprintk(const char *fmt, va_list args); |
| asmlinkage __printf(1, 2) __cold |
| int _printk(const char *fmt, ...); |
| |
| -/* |
| - * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ ! |
| - */ |
| -__printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); |
| - |
| -extern void __printk_safe_enter(void); |
| -extern void __printk_safe_exit(void); |
| -/* |
| - * The printk_deferred_enter/exit macros are available only as a hack for |
| - * some code paths that need to defer all printk console printing. Interrupts |
| - * must be disabled for the deferred duration. |
| - */ |
| -#define printk_deferred_enter __printk_safe_enter |
| -#define printk_deferred_exit __printk_safe_exit |
| +bool pr_flush(int timeout_ms, bool reset_on_progress); |
| |
| /* |
| * Please don't use printk_ratelimit(), because it shares ratelimiting state |
| @@ -210,18 +203,10 @@ int _printk(const char *s, ...) |
| { |
| return 0; |
| } |
| -static inline __printf(1, 2) __cold |
| -int _printk_deferred(const char *s, ...) |
| -{ |
| - return 0; |
| -} |
| - |
| -static inline void printk_deferred_enter(void) |
| -{ |
| -} |
| |
| -static inline void printk_deferred_exit(void) |
| +static inline bool pr_flush(int timeout_ms, bool reset_on_progress) |
| { |
| + return true; |
| } |
| |
| static inline int printk_ratelimit(void) |
| @@ -284,17 +269,30 @@ static inline void printk_trigger_flush(void) |
| extern int __printk_cpu_trylock(void); |
| extern void __printk_wait_on_cpu_lock(void); |
| extern void __printk_cpu_unlock(void); |
| +extern bool kgdb_roundup_delay(unsigned int cpu); |
| + |
| +#else |
| + |
| +#define __printk_cpu_trylock() 1 |
| +#define __printk_wait_on_cpu_lock() |
| +#define __printk_cpu_unlock() |
| + |
| +static inline bool kgdb_roundup_delay(unsigned int cpu) |
| +{ |
| + return false; |
| +} |
| +#endif /* CONFIG_SMP */ |
| |
| /** |
| - * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning |
| - * lock and disable interrupts. |
| + * raw_printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning |
| + * lock and disable interrupts. |
| * @flags: Stack-allocated storage for saving local interrupt state, |
| - * to be passed to printk_cpu_unlock_irqrestore(). |
| + * to be passed to raw_printk_cpu_unlock_irqrestore(). |
| * |
| * If the lock is owned by another CPU, spin until it becomes available. |
| * Interrupts are restored while spinning. |
| */ |
| -#define printk_cpu_lock_irqsave(flags) \ |
| +#define raw_printk_cpu_lock_irqsave(flags) \ |
| for (;;) { \ |
| local_irq_save(flags); \ |
| if (__printk_cpu_trylock()) \ |
| @@ -304,22 +302,30 @@ extern void __printk_cpu_unlock(void); |
| } |
| |
| /** |
| - * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning |
| - * lock and restore interrupts. |
| - * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave(). |
| + * raw_printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant |
| + * spinning lock and restore interrupts. |
| + * @flags: Caller's saved interrupt state from raw_printk_cpu_lock_irqsave(). |
| */ |
| -#define printk_cpu_unlock_irqrestore(flags) \ |
| +#define raw_printk_cpu_unlock_irqrestore(flags) \ |
| do { \ |
| __printk_cpu_unlock(); \ |
| local_irq_restore(flags); \ |
| - } while (0) \ |
| - |
| -#else |
| + } while (0) |
| |
| -#define printk_cpu_lock_irqsave(flags) ((void)flags) |
| -#define printk_cpu_unlock_irqrestore(flags) ((void)flags) |
| +/* |
| + * Used to synchronize atomic consoles. |
| + * |
| + * The same as raw_printk_cpu_lock_irqsave() except that hardware interrupts |
| + * are _not_ restored while spinning. |
| + */ |
| +#define console_atomic_lock(flags) \ |
| + do { \ |
| + local_irq_save(flags); \ |
| + while (!__printk_cpu_trylock()) \ |
| + cpu_relax(); \ |
| + } while (0) |
| |
| -#endif /* CONFIG_SMP */ |
| +#define console_atomic_unlock raw_printk_cpu_unlock_irqrestore |
| |
| extern int kptr_restrict; |
| |
| @@ -448,8 +454,6 @@ struct pi_entry { |
| * See the vsnprintf() documentation for format string extensions over C99. |
| */ |
| #define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__) |
| -#define printk_deferred(fmt, ...) \ |
| - printk_index_wrap(_printk_deferred, fmt, ##__VA_ARGS__) |
| |
| /** |
| * pr_emerg - Print an emergency-level message |
| @@ -587,13 +591,9 @@ struct pi_entry { |
| #ifdef CONFIG_PRINTK |
| #define printk_once(fmt, ...) \ |
| DO_ONCE_LITE(printk, fmt, ##__VA_ARGS__) |
| -#define printk_deferred_once(fmt, ...) \ |
| - DO_ONCE_LITE(printk_deferred, fmt, ##__VA_ARGS__) |
| #else |
| #define printk_once(fmt, ...) \ |
| no_printk(fmt, ##__VA_ARGS__) |
| -#define printk_deferred_once(fmt, ...) \ |
| - no_printk(fmt, ##__VA_ARGS__) |
| #endif |
| |
| #define pr_emerg_once(fmt, ...) \ |
| diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h |
| index f0e535f199be..002266693e50 100644 |
| --- a/include/linux/ratelimit_types.h |
| +++ b/include/linux/ratelimit_types.h |
| @@ -4,7 +4,7 @@ |
| |
| #include <linux/bits.h> |
| #include <linux/param.h> |
| -#include <linux/spinlock_types.h> |
| +#include <linux/spinlock_types_raw.h> |
| |
| #define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) |
| #define DEFAULT_RATELIMIT_BURST 10 |
| diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h |
| index 434d12fe2d4f..de6d1a21f113 100644 |
| --- a/include/linux/rcupdate.h |
| +++ b/include/linux/rcupdate.h |
| @@ -94,6 +94,13 @@ void rcu_init_tasks_generic(void); |
| static inline void rcu_init_tasks_generic(void) { } |
| #endif |
| |
| +#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TASKS_RCU_GENERIC) |
| +void rcu_tasks_initiate_self_tests(void); |
| +#else |
| +static inline void rcu_tasks_initiate_self_tests(void) {} |
| +#endif |
| + |
| + |
| #ifdef CONFIG_RCU_STALL_COMMON |
| void rcu_sysrq_start(void); |
| void rcu_sysrq_end(void); |
| diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h |
| index 9deedfeec2b1..7d049883a08a 100644 |
| --- a/include/linux/rtmutex.h |
| +++ b/include/linux/rtmutex.h |
| @@ -99,13 +99,22 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock |
| |
| #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass); |
| +extern void _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock); |
| #define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0) |
| +#define rt_mutex_lock_nest_lock(lock, nest_lock) \ |
| + do { \ |
| + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ |
| + _rt_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ |
| + } while (0) |
| + |
| #else |
| extern void rt_mutex_lock(struct rt_mutex *lock); |
| #define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock) |
| +#define rt_mutex_lock_nest_lock(lock, nest_lock) rt_mutex_lock(lock) |
| #endif |
| |
| extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); |
| +extern int rt_mutex_lock_killable(struct rt_mutex *lock); |
| extern int rt_mutex_trylock(struct rt_mutex *lock); |
| |
| extern void rt_mutex_unlock(struct rt_mutex *lock); |
| diff --git a/include/linux/sched.h b/include/linux/sched.h |
| index ad7ff332a0ac..20efdf15c2b9 100644 |
| --- a/include/linux/sched.h |
| +++ b/include/linux/sched.h |
| @@ -118,12 +118,8 @@ struct task_group; |
| |
| #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) |
| |
| -#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0) |
| - |
| #define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0) |
| |
| -#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) |
| - |
| /* |
| * Special states are those that do not use the normal wait-loop pattern. See |
| * the comment with set_special_state(). |
| @@ -1084,6 +1080,10 @@ struct task_struct { |
| /* Restored if set_restore_sigmask() was used: */ |
| sigset_t saved_sigmask; |
| struct sigpending pending; |
| +#ifdef CONFIG_PREEMPT_RT |
| + /* TODO: move me into ->restart_block ? */ |
| + struct kernel_siginfo forced_info; |
| +#endif |
| unsigned long sas_ss_sp; |
| size_t sas_ss_size; |
| unsigned int sas_ss_flags; |
| @@ -1738,6 +1738,16 @@ static __always_inline bool is_percpu_thread(void) |
| #endif |
| } |
| |
| +/* Is the current task guaranteed to stay on its current CPU? */ |
| +static inline bool is_migratable(void) |
| +{ |
| +#ifdef CONFIG_SMP |
| + return preemptible() && !current->migration_disabled; |
| +#else |
| + return false; |
| +#endif |
| +} |
| + |
| /* Per-process atomic flags. */ |
| #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ |
| #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ |
| @@ -2013,6 +2023,118 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) |
| return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); |
| } |
| |
| +#ifdef CONFIG_PREEMPT_LAZY |
| +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) |
| +{ |
| + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); |
| +} |
| + |
| +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) |
| +{ |
| + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); |
| +} |
| + |
| +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) |
| +{ |
| + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); |
| +} |
| + |
| +static inline int need_resched_lazy(void) |
| +{ |
| + return test_thread_flag(TIF_NEED_RESCHED_LAZY); |
| +} |
| + |
| +static inline int need_resched_now(void) |
| +{ |
| + return test_thread_flag(TIF_NEED_RESCHED); |
| +} |
| + |
| +#else |
| +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } |
| +static inline int need_resched_lazy(void) { return 0; } |
| + |
| +static inline int need_resched_now(void) |
| +{ |
| + return test_thread_flag(TIF_NEED_RESCHED); |
| +} |
| + |
| +#endif |
| + |
| +#ifdef CONFIG_PREEMPT_RT |
| +static inline bool task_match_saved_state(struct task_struct *p, long match_state) |
| +{ |
| + return p->saved_state == match_state; |
| +} |
| + |
| +static inline bool task_is_traced(struct task_struct *task) |
| +{ |
| + bool traced = false; |
| + |
| + /* in case the task is sleeping on tasklist_lock */ |
| + raw_spin_lock_irq(&task->pi_lock); |
| + if (READ_ONCE(task->__state) & __TASK_TRACED) |
| + traced = true; |
| + else if (task->saved_state & __TASK_TRACED) |
| + traced = true; |
| + raw_spin_unlock_irq(&task->pi_lock); |
| + return traced; |
| +} |
| + |
| +static inline bool task_is_stopped_or_traced(struct task_struct *task) |
| +{ |
| + bool traced_stopped = false; |
| + unsigned long flags; |
| + |
| + raw_spin_lock_irqsave(&task->pi_lock, flags); |
| + |
| + if (READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) |
| + traced_stopped = true; |
| + else if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED)) |
| + traced_stopped = true; |
| + |
| + raw_spin_unlock_irqrestore(&task->pi_lock, flags); |
| + return traced_stopped; |
| +} |
| + |
| +#else |
| + |
| +static inline bool task_match_saved_state(struct task_struct *p, long match_state) |
| +{ |
| + return false; |
| +} |
| + |
| +static inline bool task_is_traced(struct task_struct *task) |
| +{ |
| + return READ_ONCE(task->__state) & __TASK_TRACED; |
| +} |
| + |
| +static inline bool task_is_stopped_or_traced(struct task_struct *task) |
| +{ |
| + return READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED); |
| +} |
| +#endif |
| + |
| +static inline bool task_match_state_or_saved(struct task_struct *p, |
| + long match_state) |
| +{ |
| + if (READ_ONCE(p->__state) == match_state) |
| + return true; |
| + |
| + return task_match_saved_state(p, match_state); |
| +} |
| + |
| +static inline bool task_match_state_lock(struct task_struct *p, |
| + long match_state) |
| +{ |
| + bool match; |
| + |
| + raw_spin_lock_irq(&p->pi_lock); |
| + match = task_match_state_or_saved(p, match_state); |
| + raw_spin_unlock_irq(&p->pi_lock); |
| + |
| + return match; |
| +} |
| + |
| /* |
| * cond_resched() and cond_resched_lock(): latency reduction via |
| * explicit rescheduling in places that are safe. The return |
| @@ -2047,7 +2169,7 @@ static inline int _cond_resched(void) { return 0; } |
| #endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */ |
| |
| #define cond_resched() ({ \ |
| - ___might_sleep(__FILE__, __LINE__, 0); \ |
| + __might_resched(__FILE__, __LINE__, 0); \ |
| _cond_resched(); \ |
| }) |
| |
| @@ -2055,19 +2177,38 @@ extern int __cond_resched_lock(spinlock_t *lock); |
| extern int __cond_resched_rwlock_read(rwlock_t *lock); |
| extern int __cond_resched_rwlock_write(rwlock_t *lock); |
| |
| -#define cond_resched_lock(lock) ({ \ |
| - ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ |
| - __cond_resched_lock(lock); \ |
| +#define MIGHT_RESCHED_RCU_SHIFT 8 |
| +#define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1) |
| + |
| +#ifndef CONFIG_PREEMPT_RT |
| +/* |
| + * Non RT kernels have an elevated preempt count due to the held lock, |
| + * but are not allowed to be inside a RCU read side critical section |
| + */ |
| +# define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET |
| +#else |
| +/* |
| + * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in |
| + * cond_resched*lock() has to take that into account because it checks for |
| + * preempt_count() and rcu_preempt_depth(). |
| + */ |
| +# define PREEMPT_LOCK_RESCHED_OFFSETS \ |
| + (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT)) |
| +#endif |
| + |
| +#define cond_resched_lock(lock) ({ \ |
| + __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ |
| + __cond_resched_lock(lock); \ |
| }) |
| |
| -#define cond_resched_rwlock_read(lock) ({ \ |
| - __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ |
| - __cond_resched_rwlock_read(lock); \ |
| +#define cond_resched_rwlock_read(lock) ({ \ |
| + __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ |
| + __cond_resched_rwlock_read(lock); \ |
| }) |
| |
| -#define cond_resched_rwlock_write(lock) ({ \ |
| - __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ |
| - __cond_resched_rwlock_write(lock); \ |
| +#define cond_resched_rwlock_write(lock) ({ \ |
| + __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ |
| + __cond_resched_rwlock_write(lock); \ |
| }) |
| |
| static inline void cond_resched_rcu(void) |
| diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h |
| index 95fb7aaaec8d..28e9cc60f47e 100644 |
| --- a/include/linux/sched/mm.h |
| +++ b/include/linux/sched/mm.h |
| @@ -49,6 +49,26 @@ static inline void mmdrop(struct mm_struct *mm) |
| __mmdrop(mm); |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +extern void __mmdrop_delayed(struct rcu_head *rhp); |
| + |
| +/* |
| + * Invoked from finish_task_switch(). Delegates the heavy lifting on RT |
| + * kernels via RCU. |
| + */ |
| +static inline void mmdrop_sched(struct mm_struct *mm) |
| +{ |
| + /* Provides a full memory barrier. See mmdrop() */ |
| + if (atomic_dec_and_test(&mm->mm_count)) |
| + call_rcu(&mm->delayed_drop, __mmdrop_delayed); |
| +} |
| +#else |
| +static inline void mmdrop_sched(struct mm_struct *mm) |
| +{ |
| + mmdrop(mm); |
| +} |
| +#endif |
| + |
| /** |
| * mmget() - Pin the address space associated with a &struct mm_struct. |
| * @mm: The address space to pin. |
| diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h |
| index e5af028c08b4..994c25640e15 100644 |
| --- a/include/linux/sched/rt.h |
| +++ b/include/linux/sched/rt.h |
| @@ -39,20 +39,12 @@ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) |
| } |
| extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); |
| extern void rt_mutex_adjust_pi(struct task_struct *p); |
| -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) |
| -{ |
| - return tsk->pi_blocked_on != NULL; |
| -} |
| #else |
| static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) |
| { |
| return NULL; |
| } |
| # define rt_mutex_adjust_pi(p) do { } while (0) |
| -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) |
| -{ |
| - return false; |
| -} |
| #endif |
| |
| extern void normalize_rt_tasks(void); |
| diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h |
| index 5db211f43b29..aa011f668705 100644 |
| --- a/include/linux/serial_8250.h |
| +++ b/include/linux/serial_8250.h |
| @@ -7,6 +7,7 @@ |
| #ifndef _LINUX_SERIAL_8250_H |
| #define _LINUX_SERIAL_8250_H |
| |
| +#include <linux/atomic.h> |
| #include <linux/serial_core.h> |
| #include <linux/serial_reg.h> |
| #include <linux/platform_device.h> |
| @@ -125,6 +126,8 @@ struct uart_8250_port { |
| #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA |
| unsigned char msr_saved_flags; |
| |
| + atomic_t console_printing; |
| + |
| struct uart_8250_dma *dma; |
| const struct uart_8250_ops *ops; |
| |
| @@ -180,6 +183,8 @@ void serial8250_init_port(struct uart_8250_port *up); |
| void serial8250_set_defaults(struct uart_8250_port *up); |
| void serial8250_console_write(struct uart_8250_port *up, const char *s, |
| unsigned int count); |
| +void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s, |
| + unsigned int count); |
| int serial8250_console_setup(struct uart_port *port, char *options, bool probe); |
| int serial8250_console_exit(struct uart_port *port); |
| |
| diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h |
| index e213acaa91ec..d8bc89ee46e3 100644 |
| --- a/include/linux/skbuff.h |
| +++ b/include/linux/skbuff.h |
| @@ -300,6 +300,7 @@ struct sk_buff_head { |
| |
| __u32 qlen; |
| spinlock_t lock; |
| + raw_spinlock_t raw_lock; |
| }; |
| |
| struct sk_buff; |
| @@ -1945,6 +1946,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) |
| __skb_queue_head_init(list); |
| } |
| |
| +static inline void skb_queue_head_init_raw(struct sk_buff_head *list) |
| +{ |
| + raw_spin_lock_init(&list->raw_lock); |
| + __skb_queue_head_init(list); |
| +} |
| + |
| static inline void skb_queue_head_init_class(struct sk_buff_head *list, |
| struct lock_class_key *class) |
| { |
| diff --git a/include/linux/smp.h b/include/linux/smp.h |
| index 510519e8a1eb..7ac9fdb5ad09 100644 |
| --- a/include/linux/smp.h |
| +++ b/include/linux/smp.h |
| @@ -268,6 +268,9 @@ static inline int get_boot_cpu_id(void) |
| #define get_cpu() ({ preempt_disable(); __smp_processor_id(); }) |
| #define put_cpu() preempt_enable() |
| |
| +#define get_cpu_light() ({ migrate_disable(); __smp_processor_id(); }) |
| +#define put_cpu_light() migrate_enable() |
| + |
| /* |
| * Callback to arch code if there's nosmp or maxcpus=0 on the |
| * boot command line: |
| diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h |
| index c09b6407ae1b..7f86a2016ac5 100644 |
| --- a/include/linux/spinlock_types_up.h |
| +++ b/include/linux/spinlock_types_up.h |
| @@ -1,7 +1,7 @@ |
| #ifndef __LINUX_SPINLOCK_TYPES_UP_H |
| #define __LINUX_SPINLOCK_TYPES_UP_H |
| |
| -#ifndef __LINUX_SPINLOCK_TYPES_H |
| +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H |
| # error "please don't include this file directly" |
| #endif |
| |
| diff --git a/include/linux/suspend.h b/include/linux/suspend.h |
| index 4bcd65679cee..4cd3bc5d3891 100644 |
| --- a/include/linux/suspend.h |
| +++ b/include/linux/suspend.h |
| @@ -541,23 +541,17 @@ static inline void unlock_system_sleep(void) {} |
| #ifdef CONFIG_PM_SLEEP_DEBUG |
| extern bool pm_print_times_enabled; |
| extern bool pm_debug_messages_on; |
| -extern __printf(2, 3) void __pm_pr_dbg(bool defer, const char *fmt, ...); |
| +extern __printf(1, 2) void pm_pr_dbg(const char *fmt, ...); |
| #else |
| #define pm_print_times_enabled (false) |
| #define pm_debug_messages_on (false) |
| |
| #include <linux/printk.h> |
| |
| -#define __pm_pr_dbg(defer, fmt, ...) \ |
| +#define pm_pr_dbg(fmt, ...) \ |
| no_printk(KERN_DEBUG fmt, ##__VA_ARGS__) |
| #endif |
| |
| -#define pm_pr_dbg(fmt, ...) \ |
| - __pm_pr_dbg(false, fmt, ##__VA_ARGS__) |
| - |
| -#define pm_deferred_pr_dbg(fmt, ...) \ |
| - __pm_pr_dbg(true, fmt, ##__VA_ARGS__) |
| - |
| #ifdef CONFIG_PM_AUTOSLEEP |
| |
| /* kernel/power/autosleep.c */ |
| diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h |
| index 0999f6317978..7af834b7c114 100644 |
| --- a/include/linux/thread_info.h |
| +++ b/include/linux/thread_info.h |
| @@ -163,7 +163,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) |
| clear_ti_thread_flag(task_thread_info(t), TIF_##fl) |
| #endif /* !CONFIG_GENERIC_ENTRY */ |
| |
| -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) |
| +#ifdef CONFIG_PREEMPT_LAZY |
| +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ |
| + test_thread_flag(TIF_NEED_RESCHED_LAZY)) |
| +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) |
| +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY) |
| + |
| +#else |
| +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) |
| +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED) |
| +#define tif_need_resched_lazy() 0 |
| +#endif |
| |
| #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES |
| static inline int arch_within_stack_frames(const void * const stack, |
| diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h |
| index 57113190448c..827725f41149 100644 |
| --- a/include/linux/trace_events.h |
| +++ b/include/linux/trace_events.h |
| @@ -69,6 +69,7 @@ struct trace_entry { |
| unsigned char flags; |
| unsigned char preempt_count; |
| int pid; |
| + unsigned char preempt_lazy_count; |
| }; |
| |
| #define TRACE_EVENT_TYPE_MAX \ |
| @@ -157,9 +158,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry, |
| unsigned int trace_ctx) |
| { |
| entry->preempt_count = trace_ctx & 0xff; |
| + entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff; |
| entry->pid = current->pid; |
| entry->type = type; |
| - entry->flags = trace_ctx >> 16; |
| + entry->flags = trace_ctx >> 24; |
| } |
| |
| unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); |
| @@ -172,6 +174,7 @@ enum trace_flag_type { |
| TRACE_FLAG_SOFTIRQ = 0x10, |
| TRACE_FLAG_PREEMPT_RESCHED = 0x20, |
| TRACE_FLAG_NMI = 0x40, |
| + TRACE_FLAG_NEED_RESCHED_LAZY = 0x80, |
| }; |
| |
| #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT |
| diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h |
| index e81856c0ba13..81dc1f5e181a 100644 |
| --- a/include/linux/u64_stats_sync.h |
| +++ b/include/linux/u64_stats_sync.h |
| @@ -66,7 +66,7 @@ |
| #include <linux/seqlock.h> |
| |
| struct u64_stats_sync { |
| -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
| +#if BITS_PER_LONG==32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) |
| seqcount_t seq; |
| #endif |
| }; |
| @@ -83,6 +83,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p) |
| return local64_read(&p->v); |
| } |
| |
| +static inline void u64_stats_set(u64_stats_t *p, u64 val) |
| +{ |
| + local64_set(&p->v, val); |
| +} |
| + |
| static inline void u64_stats_add(u64_stats_t *p, unsigned long val) |
| { |
| local64_add(val, &p->v); |
| @@ -104,6 +109,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p) |
| return p->v; |
| } |
| |
| +static inline void u64_stats_set(u64_stats_t *p, u64 val) |
| +{ |
| + p->v = val; |
| +} |
| + |
| static inline void u64_stats_add(u64_stats_t *p, unsigned long val) |
| { |
| p->v += val; |
| @@ -115,7 +125,7 @@ static inline void u64_stats_inc(u64_stats_t *p) |
| } |
| #endif |
| |
| -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) |
| +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) |
| #define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) |
| #else |
| static inline void u64_stats_init(struct u64_stats_sync *syncp) |
| @@ -125,15 +135,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp) |
| |
| static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) |
| { |
| -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
| +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + preempt_disable(); |
| write_seqcount_begin(&syncp->seq); |
| #endif |
| } |
| |
| static inline void u64_stats_update_end(struct u64_stats_sync *syncp) |
| { |
| -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
| +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) |
| write_seqcount_end(&syncp->seq); |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + preempt_enable(); |
| #endif |
| } |
| |
| @@ -142,8 +156,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp) |
| { |
| unsigned long flags = 0; |
| |
| -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
| - local_irq_save(flags); |
| +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + preempt_disable(); |
| + else |
| + local_irq_save(flags); |
| write_seqcount_begin(&syncp->seq); |
| #endif |
| return flags; |
| @@ -153,15 +170,18 @@ static inline void |
| u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, |
| unsigned long flags) |
| { |
| -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
| +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) |
| write_seqcount_end(&syncp->seq); |
| - local_irq_restore(flags); |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + preempt_enable(); |
| + else |
| + local_irq_restore(flags); |
| #endif |
| } |
| |
| static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) |
| { |
| -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
| +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) |
| return read_seqcount_begin(&syncp->seq); |
| #else |
| return 0; |
| @@ -170,7 +190,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync * |
| |
| static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) |
| { |
| -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) |
| +#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) |
| preempt_disable(); |
| #endif |
| return __u64_stats_fetch_begin(syncp); |
| @@ -179,7 +199,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy |
| static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, |
| unsigned int start) |
| { |
| -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) |
| +#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) |
| return read_seqcount_retry(&syncp->seq, start); |
| #else |
| return false; |
| @@ -189,7 +209,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, |
| static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, |
| unsigned int start) |
| { |
| -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) |
| +#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) |
| preempt_enable(); |
| #endif |
| return __u64_stats_fetch_retry(syncp, start); |
| @@ -203,7 +223,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, |
| */ |
| static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) |
| { |
| -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) |
| +#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) |
| + preempt_disable(); |
| +#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) |
| local_irq_disable(); |
| #endif |
| return __u64_stats_fetch_begin(syncp); |
| @@ -212,7 +234,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync |
| static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, |
| unsigned int start) |
| { |
| -#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) |
| +#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) |
| + preempt_enable(); |
| +#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) |
| local_irq_enable(); |
| #endif |
| return __u64_stats_fetch_retry(syncp, start); |
| diff --git a/include/net/act_api.h b/include/net/act_api.h |
| index f19f7f4a463c..b5b624c7e488 100644 |
| --- a/include/net/act_api.h |
| +++ b/include/net/act_api.h |
| @@ -30,13 +30,13 @@ struct tc_action { |
| atomic_t tcfa_bindcnt; |
| int tcfa_action; |
| struct tcf_t tcfa_tm; |
| - struct gnet_stats_basic_packed tcfa_bstats; |
| - struct gnet_stats_basic_packed tcfa_bstats_hw; |
| + struct gnet_stats_basic_sync tcfa_bstats; |
| + struct gnet_stats_basic_sync tcfa_bstats_hw; |
| struct gnet_stats_queue tcfa_qstats; |
| struct net_rate_estimator __rcu *tcfa_rate_est; |
| spinlock_t tcfa_lock; |
| - struct gnet_stats_basic_cpu __percpu *cpu_bstats; |
| - struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw; |
| + struct gnet_stats_basic_sync __percpu *cpu_bstats; |
| + struct gnet_stats_basic_sync __percpu *cpu_bstats_hw; |
| struct gnet_stats_queue __percpu *cpu_qstats; |
| struct tc_cookie __rcu *act_cookie; |
| struct tcf_chain __rcu *goto_chain; |
| @@ -206,7 +206,7 @@ static inline void tcf_action_update_bstats(struct tc_action *a, |
| struct sk_buff *skb) |
| { |
| if (likely(a->cpu_bstats)) { |
| - bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb); |
| + bstats_update(this_cpu_ptr(a->cpu_bstats), skb); |
| return; |
| } |
| spin_lock(&a->tcfa_lock); |
| diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h |
| index 1424e02cef90..7aa2b8e1fb29 100644 |
| --- a/include/net/gen_stats.h |
| +++ b/include/net/gen_stats.h |
| @@ -7,14 +7,17 @@ |
| #include <linux/rtnetlink.h> |
| #include <linux/pkt_sched.h> |
| |
| -/* Note: this used to be in include/uapi/linux/gen_stats.h */ |
| -struct gnet_stats_basic_packed { |
| - __u64 bytes; |
| - __u64 packets; |
| -}; |
| - |
| -struct gnet_stats_basic_cpu { |
| - struct gnet_stats_basic_packed bstats; |
| +/* Throughput stats. |
| + * Must be initialized beforehand with gnet_stats_basic_sync_init(). |
| + * |
| + * If no reads can ever occur parallel to writes (e.g. stack-allocated |
| + * bstats), then the internal stat values can be written to and read |
| + * from directly. Otherwise, use _bstats_set/update() for writes and |
| + * gnet_stats_add_basic() for reads. |
| + */ |
| +struct gnet_stats_basic_sync { |
| + u64_stats_t bytes; |
| + u64_stats_t packets; |
| struct u64_stats_sync syncp; |
| } __aligned(2 * sizeof(u64)); |
| |
| @@ -34,6 +37,7 @@ struct gnet_dump { |
| struct tc_stats tc_stats; |
| }; |
| |
| +void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b); |
| int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, |
| struct gnet_dump *d, int padattr); |
| |
| @@ -42,41 +46,38 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, |
| spinlock_t *lock, struct gnet_dump *d, |
| int padattr); |
| |
| -int gnet_stats_copy_basic(const seqcount_t *running, |
| - struct gnet_dump *d, |
| - struct gnet_stats_basic_cpu __percpu *cpu, |
| - struct gnet_stats_basic_packed *b); |
| -void __gnet_stats_copy_basic(const seqcount_t *running, |
| - struct gnet_stats_basic_packed *bstats, |
| - struct gnet_stats_basic_cpu __percpu *cpu, |
| - struct gnet_stats_basic_packed *b); |
| -int gnet_stats_copy_basic_hw(const seqcount_t *running, |
| - struct gnet_dump *d, |
| - struct gnet_stats_basic_cpu __percpu *cpu, |
| - struct gnet_stats_basic_packed *b); |
| +int gnet_stats_copy_basic(struct gnet_dump *d, |
| + struct gnet_stats_basic_sync __percpu *cpu, |
| + struct gnet_stats_basic_sync *b, bool running); |
| +void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, |
| + struct gnet_stats_basic_sync __percpu *cpu, |
| + struct gnet_stats_basic_sync *b, bool running); |
| +int gnet_stats_copy_basic_hw(struct gnet_dump *d, |
| + struct gnet_stats_basic_sync __percpu *cpu, |
| + struct gnet_stats_basic_sync *b, bool running); |
| int gnet_stats_copy_rate_est(struct gnet_dump *d, |
| struct net_rate_estimator __rcu **ptr); |
| int gnet_stats_copy_queue(struct gnet_dump *d, |
| struct gnet_stats_queue __percpu *cpu_q, |
| struct gnet_stats_queue *q, __u32 qlen); |
| -void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, |
| - const struct gnet_stats_queue __percpu *cpu_q, |
| - const struct gnet_stats_queue *q, __u32 qlen); |
| +void gnet_stats_add_queue(struct gnet_stats_queue *qstats, |
| + const struct gnet_stats_queue __percpu *cpu_q, |
| + const struct gnet_stats_queue *q); |
| int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); |
| |
| int gnet_stats_finish_copy(struct gnet_dump *d); |
| |
| -int gen_new_estimator(struct gnet_stats_basic_packed *bstats, |
| - struct gnet_stats_basic_cpu __percpu *cpu_bstats, |
| +int gen_new_estimator(struct gnet_stats_basic_sync *bstats, |
| + struct gnet_stats_basic_sync __percpu *cpu_bstats, |
| struct net_rate_estimator __rcu **rate_est, |
| spinlock_t *lock, |
| - seqcount_t *running, struct nlattr *opt); |
| + bool running, struct nlattr *opt); |
| void gen_kill_estimator(struct net_rate_estimator __rcu **ptr); |
| -int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, |
| - struct gnet_stats_basic_cpu __percpu *cpu_bstats, |
| +int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, |
| + struct gnet_stats_basic_sync __percpu *cpu_bstats, |
| struct net_rate_estimator __rcu **ptr, |
| spinlock_t *lock, |
| - seqcount_t *running, struct nlattr *opt); |
| + bool running, struct nlattr *opt); |
| bool gen_estimator_active(struct net_rate_estimator __rcu **ptr); |
| bool gen_estimator_read(struct net_rate_estimator __rcu **ptr, |
| struct gnet_stats_rate_est64 *sample); |
| diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h |
| index 832ab69efda5..4c3809e141f4 100644 |
| --- a/include/net/netfilter/xt_rateest.h |
| +++ b/include/net/netfilter/xt_rateest.h |
| @@ -6,7 +6,7 @@ |
| |
| struct xt_rateest { |
| /* keep lock and bstats on same cache line to speedup xt_rateest_tg() */ |
| - struct gnet_stats_basic_packed bstats; |
| + struct gnet_stats_basic_sync bstats; |
| spinlock_t lock; |
| |
| |
| diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h |
| index 83a6d0792180..4a5833108083 100644 |
| --- a/include/net/pkt_cls.h |
| +++ b/include/net/pkt_cls.h |
| @@ -765,7 +765,7 @@ struct tc_cookie { |
| }; |
| |
| struct tc_qopt_offload_stats { |
| - struct gnet_stats_basic_packed *bstats; |
| + struct gnet_stats_basic_sync *bstats; |
| struct gnet_stats_queue *qstats; |
| }; |
| |
| @@ -885,7 +885,7 @@ struct tc_gred_qopt_offload_params { |
| }; |
| |
| struct tc_gred_qopt_offload_stats { |
| - struct gnet_stats_basic_packed bstats[MAX_DPs]; |
| + struct gnet_stats_basic_sync bstats[MAX_DPs]; |
| struct gnet_stats_queue qstats[MAX_DPs]; |
| struct red_stats *xstats[MAX_DPs]; |
| }; |
| diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h |
| index 1958d1260fe9..e43f3922faa7 100644 |
| --- a/include/net/sch_generic.h |
| +++ b/include/net/sch_generic.h |
| @@ -40,6 +40,13 @@ enum qdisc_state_t { |
| __QDISC_STATE_DRAINING, |
| }; |
| |
| +enum qdisc_state2_t { |
| + /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly. |
| + * Use qdisc_run_begin/end() or qdisc_is_running() instead. |
| + */ |
| + __QDISC_STATE2_RUNNING, |
| +}; |
| + |
| #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) |
| #define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING) |
| |
| @@ -97,7 +104,7 @@ struct Qdisc { |
| struct netdev_queue *dev_queue; |
| |
| struct net_rate_estimator __rcu *rate_est; |
| - struct gnet_stats_basic_cpu __percpu *cpu_bstats; |
| + struct gnet_stats_basic_sync __percpu *cpu_bstats; |
| struct gnet_stats_queue __percpu *cpu_qstats; |
| int pad; |
| refcount_t refcnt; |
| @@ -107,10 +114,10 @@ struct Qdisc { |
| */ |
| struct sk_buff_head gso_skb ____cacheline_aligned_in_smp; |
| struct qdisc_skb_head q; |
| - struct gnet_stats_basic_packed bstats; |
| - seqcount_t running; |
| + struct gnet_stats_basic_sync bstats; |
| struct gnet_stats_queue qstats; |
| unsigned long state; |
| + unsigned long state2; /* must be written under qdisc spinlock */ |
| struct Qdisc *next_sched; |
| struct sk_buff_head skb_bad_txq; |
| |
| @@ -143,11 +150,15 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) |
| return NULL; |
| } |
| |
| +/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc |
| + * root_lock section, or provide their own memory barriers -- ordering |
| + * against qdisc_run_begin/end() atomic bit operations. |
| + */ |
| static inline bool qdisc_is_running(struct Qdisc *qdisc) |
| { |
| if (qdisc->flags & TCQ_F_NOLOCK) |
| return spin_is_locked(&qdisc->seqlock); |
| - return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; |
| + return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); |
| } |
| |
| static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) |
| @@ -167,6 +178,9 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc) |
| return !READ_ONCE(qdisc->q.qlen); |
| } |
| |
| +/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with |
| + * the qdisc root lock acquired. |
| + */ |
| static inline bool qdisc_run_begin(struct Qdisc *qdisc) |
| { |
| if (qdisc->flags & TCQ_F_NOLOCK) { |
| @@ -186,15 +200,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) |
| * when testing it in qdisc_run_end() |
| */ |
| return spin_trylock(&qdisc->seqlock); |
| - } else if (qdisc_is_running(qdisc)) { |
| - return false; |
| } |
| - /* Variant of write_seqcount_begin() telling lockdep a trylock |
| - * was attempted. |
| - */ |
| - raw_write_seqcount_begin(&qdisc->running); |
| - seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); |
| - return true; |
| + return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); |
| } |
| |
| static inline void qdisc_run_end(struct Qdisc *qdisc) |
| @@ -212,7 +219,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) |
| &qdisc->state))) |
| __netif_schedule(qdisc); |
| } else { |
| - write_seqcount_end(&qdisc->running); |
| + __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); |
| } |
| } |
| |
| @@ -576,14 +583,6 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) |
| return qdisc_lock(root); |
| } |
| |
| -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) |
| -{ |
| - struct Qdisc *root = qdisc_root_sleeping(qdisc); |
| - |
| - ASSERT_RTNL(); |
| - return &root->running; |
| -} |
| - |
| static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) |
| { |
| return qdisc->dev_queue->dev; |
| @@ -833,14 +832,16 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
| return sch->enqueue(skb, sch, to_free); |
| } |
| |
| -static inline void _bstats_update(struct gnet_stats_basic_packed *bstats, |
| +static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, |
| __u64 bytes, __u32 packets) |
| { |
| - bstats->bytes += bytes; |
| - bstats->packets += packets; |
| + u64_stats_update_begin(&bstats->syncp); |
| + u64_stats_add(&bstats->bytes, bytes); |
| + u64_stats_add(&bstats->packets, packets); |
| + u64_stats_update_end(&bstats->syncp); |
| } |
| |
| -static inline void bstats_update(struct gnet_stats_basic_packed *bstats, |
| +static inline void bstats_update(struct gnet_stats_basic_sync *bstats, |
| const struct sk_buff *skb) |
| { |
| _bstats_update(bstats, |
| @@ -848,26 +849,10 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats, |
| skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1); |
| } |
| |
| -static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, |
| - __u64 bytes, __u32 packets) |
| -{ |
| - u64_stats_update_begin(&bstats->syncp); |
| - _bstats_update(&bstats->bstats, bytes, packets); |
| - u64_stats_update_end(&bstats->syncp); |
| -} |
| - |
| -static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats, |
| - const struct sk_buff *skb) |
| -{ |
| - u64_stats_update_begin(&bstats->syncp); |
| - bstats_update(&bstats->bstats, skb); |
| - u64_stats_update_end(&bstats->syncp); |
| -} |
| - |
| static inline void qdisc_bstats_cpu_update(struct Qdisc *sch, |
| const struct sk_buff *skb) |
| { |
| - bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb); |
| + bstats_update(this_cpu_ptr(sch->cpu_bstats), skb); |
| } |
| |
| static inline void qdisc_bstats_update(struct Qdisc *sch, |
| @@ -956,10 +941,9 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, |
| __u32 *backlog) |
| { |
| struct gnet_stats_queue qstats = { 0 }; |
| - __u32 len = qdisc_qlen_sum(sch); |
| |
| - __gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len); |
| - *qlen = qstats.qlen; |
| + gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats); |
| + *qlen = qstats.qlen + qdisc_qlen(sch); |
| *backlog = qstats.backlog; |
| } |
| |
| @@ -1305,7 +1289,7 @@ void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64); |
| struct mini_Qdisc { |
| struct tcf_proto *filter_list; |
| struct tcf_block *block; |
| - struct gnet_stats_basic_cpu __percpu *cpu_bstats; |
| + struct gnet_stats_basic_sync __percpu *cpu_bstats; |
| struct gnet_stats_queue __percpu *cpu_qstats; |
| struct rcu_head rcu; |
| }; |
| @@ -1313,7 +1297,7 @@ struct mini_Qdisc { |
| static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq, |
| const struct sk_buff *skb) |
| { |
| - bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb); |
| + bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb); |
| } |
| |
| static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq) |
| diff --git a/init/Kconfig b/init/Kconfig |
| index d19ed66aba3b..160f836f81c7 100644 |
| --- a/init/Kconfig |
| +++ b/init/Kconfig |
| @@ -906,7 +906,7 @@ config NUMA_BALANCING |
| bool "Memory placement aware NUMA scheduler" |
| depends on ARCH_SUPPORTS_NUMA_BALANCING |
| depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY |
| - depends on SMP && NUMA && MIGRATION |
| + depends on SMP && NUMA && MIGRATION && !PREEMPT_RT |
| help |
| This option adds support for automatic NUMA aware memory/task placement. |
| The mechanism is quite primitive and is based on migrating memory when |
| @@ -943,6 +943,7 @@ config PAGE_COUNTER |
| |
| config MEMCG |
| bool "Memory controller" |
| + depends on !PREEMPT_RT |
| select PAGE_COUNTER |
| select EVENTFD |
| help |
| @@ -1901,6 +1902,7 @@ choice |
| |
| config SLAB |
| bool "SLAB" |
| + depends on !PREEMPT_RT |
| select HAVE_HARDENED_USERCOPY_ALLOCATOR |
| help |
| The regular slab allocator that is established and known to work |
| @@ -1921,6 +1923,7 @@ config SLUB |
| config SLOB |
| depends on EXPERT |
| bool "SLOB (Simple Allocator)" |
| + depends on !PREEMPT_RT |
| help |
| SLOB replaces the stock allocator with a drastically simpler |
| allocator. SLOB is generally more space efficient but |
| diff --git a/init/main.c b/init/main.c |
| index cf79b5a766cb..500a40b705e9 100644 |
| --- a/init/main.c |
| +++ b/init/main.c |
| @@ -1605,6 +1605,7 @@ static noinline void __init kernel_init_freeable(void) |
| |
| rcu_init_tasks_generic(); |
| do_pre_smp_initcalls(); |
| + rcu_tasks_initiate_self_tests(); |
| lockup_detector_init(); |
| |
| smp_init(); |
| diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt |
| index 5876e30c5740..5df0776264c2 100644 |
| --- a/kernel/Kconfig.preempt |
| +++ b/kernel/Kconfig.preempt |
| @@ -1,5 +1,11 @@ |
| # SPDX-License-Identifier: GPL-2.0-only |
| |
| +config HAVE_PREEMPT_LAZY |
| + bool |
| + |
| +config PREEMPT_LAZY |
| + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT |
| + |
| choice |
| prompt "Preemption Model" |
| default PREEMPT_NONE |
| diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c |
| index 1486768f2318..bb3b805436c4 100644 |
| --- a/kernel/cgroup/rstat.c |
| +++ b/kernel/cgroup/rstat.c |
| @@ -156,8 +156,9 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) |
| raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, |
| cpu); |
| struct cgroup *pos = NULL; |
| + unsigned long flags; |
| |
| - raw_spin_lock(cpu_lock); |
| + raw_spin_lock_irqsave(cpu_lock, flags); |
| while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) { |
| struct cgroup_subsys_state *css; |
| |
| @@ -169,7 +170,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) |
| css->ss->css_rstat_flush(css, cpu); |
| rcu_read_unlock(); |
| } |
| - raw_spin_unlock(cpu_lock); |
| + raw_spin_unlock_irqrestore(cpu_lock, flags); |
| |
| /* if @may_sleep, play nice and yield if necessary */ |
| if (may_sleep && (need_resched() || |
| diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c |
| index 7beceb447211..28497c00e63b 100644 |
| --- a/kernel/debug/debug_core.c |
| +++ b/kernel/debug/debug_core.c |
| @@ -239,35 +239,42 @@ NOKPROBE_SYMBOL(kgdb_call_nmi_hook); |
| static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd) = |
| CSD_INIT(kgdb_call_nmi_hook, NULL); |
| |
| -void __weak kgdb_roundup_cpus(void) |
| +void __weak kgdb_roundup_cpu(unsigned int cpu) |
| { |
| call_single_data_t *csd; |
| + int ret; |
| + |
| + csd = &per_cpu(kgdb_roundup_csd, cpu); |
| + |
| + /* |
| + * If it didn't round up last time, don't try again |
| + * since smp_call_function_single_async() will block. |
| + * |
| + * If rounding_up is false then we know that the |
| + * previous call must have at least started and that |
| + * means smp_call_function_single_async() won't block. |
| + */ |
| + if (kgdb_info[cpu].rounding_up) |
| + return; |
| + kgdb_info[cpu].rounding_up = true; |
| + |
| + ret = smp_call_function_single_async(cpu, csd); |
| + if (ret) |
| + kgdb_info[cpu].rounding_up = false; |
| +} |
| +NOKPROBE_SYMBOL(kgdb_roundup_cpu); |
| + |
| +void __weak kgdb_roundup_cpus(void) |
| +{ |
| int this_cpu = raw_smp_processor_id(); |
| int cpu; |
| - int ret; |
| |
| for_each_online_cpu(cpu) { |
| /* No need to roundup ourselves */ |
| if (cpu == this_cpu) |
| continue; |
| |
| - csd = &per_cpu(kgdb_roundup_csd, cpu); |
| - |
| - /* |
| - * If it didn't round up last time, don't try again |
| - * since smp_call_function_single_async() will block. |
| - * |
| - * If rounding_up is false then we know that the |
| - * previous call must have at least started and that |
| - * means smp_call_function_single_async() won't block. |
| - */ |
| - if (kgdb_info[cpu].rounding_up) |
| - continue; |
| - kgdb_info[cpu].rounding_up = true; |
| - |
| - ret = smp_call_function_single_async(cpu, csd); |
| - if (ret) |
| - kgdb_info[cpu].rounding_up = false; |
| + kgdb_roundup_cpu(cpu); |
| } |
| } |
| NOKPROBE_SYMBOL(kgdb_roundup_cpus); |
| diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c |
| index 6735ac36b718..539a2f0dc89d 100644 |
| --- a/kernel/debug/kdb/kdb_io.c |
| +++ b/kernel/debug/kdb/kdb_io.c |
| @@ -559,23 +559,17 @@ static void kdb_msg_write(const char *msg, int msg_len) |
| cp++; |
| } |
| |
| + /* mirror output on atomic consoles */ |
| for_each_console(c) { |
| if (!(c->flags & CON_ENABLED)) |
| continue; |
| if (c == dbg_io_ops->cons) |
| continue; |
| - /* |
| - * Set oops_in_progress to encourage the console drivers to |
| - * disregard their internal spin locks: in the current calling |
| - * context the risk of deadlock is a bigger problem than risks |
| - * due to re-entering the console driver. We operate directly on |
| - * oops_in_progress rather than using bust_spinlocks() because |
| - * the calls bust_spinlocks() makes on exit are not appropriate |
| - * for this calling context. |
| - */ |
| - ++oops_in_progress; |
| - c->write(c, msg, msg_len); |
| - --oops_in_progress; |
| + |
| + if (!c->write_atomic) |
| + continue; |
| + c->write_atomic(c, msg, msg_len); |
| + |
| touch_nmi_watchdog(); |
| } |
| } |
| diff --git a/kernel/entry/common.c b/kernel/entry/common.c |
| index d5a61d565ad5..a9579f8bf4f0 100644 |
| --- a/kernel/entry/common.c |
| +++ b/kernel/entry/common.c |
| @@ -159,9 +159,17 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, |
| |
| local_irq_enable_exit_to_user(ti_work); |
| |
| - if (ti_work & _TIF_NEED_RESCHED) |
| + if (ti_work & _TIF_NEED_RESCHED_MASK) |
| schedule(); |
| |
| +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND |
| + if (unlikely(current->forced_info.si_signo)) { |
| + struct task_struct *t = current; |
| + force_sig_info(&t->forced_info); |
| + t->forced_info.si_signo = 0; |
| + } |
| +#endif |
| + |
| if (ti_work & _TIF_UPROBE) |
| uprobe_notify_resume(regs); |
| |
| @@ -387,7 +395,7 @@ void irqentry_exit_cond_resched(void) |
| rcu_irq_exit_check_preempt(); |
| if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) |
| WARN_ON_ONCE(!on_thread_stack()); |
| - if (need_resched()) |
| + if (should_resched(0)) |
| preempt_schedule_irq(); |
| } |
| } |
| diff --git a/kernel/exit.c b/kernel/exit.c |
| index 91a43e57a32e..1d099609568d 100644 |
| --- a/kernel/exit.c |
| +++ b/kernel/exit.c |
| @@ -64,6 +64,7 @@ |
| #include <linux/rcuwait.h> |
| #include <linux/compat.h> |
| #include <linux/io_uring.h> |
| +#include <linux/kprobes.h> |
| |
| #include <linux/uaccess.h> |
| #include <asm/unistd.h> |
| @@ -168,8 +169,14 @@ static void delayed_put_task_struct(struct rcu_head *rhp) |
| { |
| struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); |
| |
| + kprobe_flush_task(tsk); |
| perf_event_delayed_put(tsk); |
| trace_sched_process_free(tsk); |
| + |
| + /* RT enabled kernels delay freeing the VMAP'ed task stack */ |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + put_task_stack(tsk); |
| + |
| put_task_struct(tsk); |
| } |
| |
| diff --git a/kernel/fork.c b/kernel/fork.c |
| index 89475c994ca9..dc1aa0f71089 100644 |
| --- a/kernel/fork.c |
| +++ b/kernel/fork.c |
| @@ -289,7 +289,10 @@ static inline void free_thread_stack(struct task_struct *tsk) |
| return; |
| } |
| |
| - vfree_atomic(tsk->stack); |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + vfree_atomic(tsk->stack); |
| + else |
| + vfree(tsk->stack); |
| return; |
| } |
| #endif |
| @@ -705,6 +708,19 @@ void __mmdrop(struct mm_struct *mm) |
| } |
| EXPORT_SYMBOL_GPL(__mmdrop); |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +/* |
| + * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is |
| + * by far the least expensive way to do that. |
| + */ |
| +void __mmdrop_delayed(struct rcu_head *rhp) |
| +{ |
| + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); |
| + |
| + __mmdrop(mm); |
| +} |
| +#endif |
| + |
| static void mmdrop_async_fn(struct work_struct *work) |
| { |
| struct mm_struct *mm; |
| diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c |
| index 21b3ac2a29d2..26f343228230 100644 |
| --- a/kernel/irq/irqdesc.c |
| +++ b/kernel/irq/irqdesc.c |
| @@ -661,6 +661,29 @@ int generic_handle_irq(unsigned int irq) |
| } |
| EXPORT_SYMBOL_GPL(generic_handle_irq); |
| |
| +/** |
| + * generic_handle_irq_safe - Invoke the handler for a particular irq from any |
| + * context. |
| + * @irq: The irq number to handle |
| + * |
| + * Returns: 0 on success, a negative value on error. |
| + * |
| + * This function can be called from any context (IRQ or process context). It |
| + * will report an error if not invoked from IRQ context and the irq has been |
| + * marked to enforce IRQ-context only. |
| + */ |
| +int generic_handle_irq_safe(unsigned int irq) |
| +{ |
| + unsigned long flags; |
| + int ret; |
| + |
| + local_irq_save(flags); |
| + ret = handle_irq_desc(irq_to_desc(irq)); |
| + local_irq_restore(flags); |
| + return ret; |
| +} |
| +EXPORT_SYMBOL_GPL(generic_handle_irq_safe); |
| + |
| #ifdef CONFIG_IRQ_DOMAIN |
| /** |
| * generic_handle_domain_irq - Invoke the handler for a HW irq belonging |
| diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c |
| index 0c3c26fb054f..16372116f393 100644 |
| --- a/kernel/irq/manage.c |
| +++ b/kernel/irq/manage.c |
| @@ -1286,6 +1286,8 @@ static int irq_thread(void *data) |
| |
| irq_thread_set_ready(desc, action); |
| |
| + sched_set_fifo(current); |
| + |
| if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD, |
| &action->thread_flags)) |
| handler_fn = irq_forced_thread_fn; |
| @@ -1451,8 +1453,6 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) |
| if (IS_ERR(t)) |
| return PTR_ERR(t); |
| |
| - sched_set_fifo(t); |
| - |
| /* |
| * We keep the reference to the task struct even if |
| * the thread dies to avoid that the interrupt code |
| @@ -2846,7 +2846,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state); |
| * This call sets the internal irqchip state of an interrupt, |
| * depending on the value of @which. |
| * |
| - * This function should be called with preemption disabled if the |
| + * This function should be called with migration disabled if the |
| * interrupt controller has per-cpu registers. |
| */ |
| int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, |
| diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c |
| index c481d8458325..02b2daf07441 100644 |
| --- a/kernel/irq/spurious.c |
| +++ b/kernel/irq/spurious.c |
| @@ -447,6 +447,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); |
| |
| static int __init irqfixup_setup(char *str) |
| { |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { |
| + pr_warn("irqfixup boot option not supported with PREEMPT_RT\n"); |
| + return 1; |
| + } |
| irqfixup = 1; |
| printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); |
| printk(KERN_WARNING "This may impact system performance.\n"); |
| @@ -459,6 +463,10 @@ module_param(irqfixup, int, 0644); |
| |
| static int __init irqpoll_setup(char *str) |
| { |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { |
| + pr_warn("irqpoll boot option not supported with PREEMPT_RT\n"); |
| + return 1; |
| + } |
| irqfixup = 2; |
| printk(KERN_WARNING "Misrouted IRQ fixup and polling support " |
| "enabled\n"); |
| diff --git a/kernel/irq_work.c b/kernel/irq_work.c |
| index db8c248ebc8c..f7df715ec28e 100644 |
| --- a/kernel/irq_work.c |
| +++ b/kernel/irq_work.c |
| @@ -18,11 +18,36 @@ |
| #include <linux/cpu.h> |
| #include <linux/notifier.h> |
| #include <linux/smp.h> |
| +#include <linux/smpboot.h> |
| #include <asm/processor.h> |
| #include <linux/kasan.h> |
| |
| static DEFINE_PER_CPU(struct llist_head, raised_list); |
| static DEFINE_PER_CPU(struct llist_head, lazy_list); |
| +static DEFINE_PER_CPU(struct task_struct *, irq_workd); |
| + |
| +static void wake_irq_workd(void) |
| +{ |
| + struct task_struct *tsk = __this_cpu_read(irq_workd); |
| + |
| + if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk) |
| + wake_up_process(tsk); |
| +} |
| + |
| +#ifdef CONFIG_SMP |
| +static void irq_work_wake(struct irq_work *entry) |
| +{ |
| + wake_irq_workd(); |
| +} |
| + |
| +static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) = |
| + IRQ_WORK_INIT_HARD(irq_work_wake); |
| +#endif |
| + |
| +static int irq_workd_should_run(unsigned int cpu) |
| +{ |
| + return !llist_empty(this_cpu_ptr(&lazy_list)); |
| +} |
| |
| /* |
| * Claim the entry so that no one else will poke at it. |
| @@ -52,15 +77,29 @@ void __weak arch_irq_work_raise(void) |
| /* Enqueue on current CPU, work must already be claimed and preempt disabled */ |
| static void __irq_work_queue_local(struct irq_work *work) |
| { |
| + struct llist_head *list; |
| + bool rt_lazy_work = false; |
| + bool lazy_work = false; |
| + int work_flags; |
| + |
| + work_flags = atomic_read(&work->node.a_flags); |
| + if (work_flags & IRQ_WORK_LAZY) |
| + lazy_work = true; |
| + else if (IS_ENABLED(CONFIG_PREEMPT_RT) && |
| + !(work_flags & IRQ_WORK_HARD_IRQ)) |
| + rt_lazy_work = true; |
| + |
| + if (lazy_work || rt_lazy_work) |
| + list = this_cpu_ptr(&lazy_list); |
| + else |
| + list = this_cpu_ptr(&raised_list); |
| + |
| + if (!llist_add(&work->node.llist, list)) |
| + return; |
| + |
| /* If the work is "lazy", handle it from next tick if any */ |
| - if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) { |
| - if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) && |
| - tick_nohz_tick_stopped()) |
| - arch_irq_work_raise(); |
| - } else { |
| - if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list))) |
| - arch_irq_work_raise(); |
| - } |
| + if (!lazy_work || tick_nohz_tick_stopped()) |
| + arch_irq_work_raise(); |
| } |
| |
| /* Enqueue the irq work @work on the current CPU */ |
| @@ -104,17 +143,34 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) |
| if (cpu != smp_processor_id()) { |
| /* Arch remote IPI send/receive backend aren't NMI safe */ |
| WARN_ON_ONCE(in_nmi()); |
| + |
| + /* |
| + * On PREEMPT_RT the items which are not marked as |
| + * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work |
| + * item is used on the remote CPU to wake the thread. |
| + */ |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT) && |
| + !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) { |
| + |
| + if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu))) |
| + goto out; |
| + |
| + work = &per_cpu(irq_work_wakeup, cpu); |
| + if (!irq_work_claim(work)) |
| + goto out; |
| + } |
| + |
| __smp_call_single_queue(cpu, &work->node.llist); |
| } else { |
| __irq_work_queue_local(work); |
| } |
| +out: |
| preempt_enable(); |
| |
| return true; |
| #endif /* CONFIG_SMP */ |
| } |
| |
| - |
| bool irq_work_needs_cpu(void) |
| { |
| struct llist_head *raised, *lazy; |
| @@ -160,6 +216,10 @@ void irq_work_single(void *arg) |
| * else claimed it meanwhile. |
| */ |
| (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY); |
| + |
| + if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || |
| + !arch_irq_work_has_interrupt()) |
| + rcuwait_wake_up(&work->irqwait); |
| } |
| |
| static void irq_work_run_list(struct llist_head *list) |
| @@ -167,7 +227,12 @@ static void irq_work_run_list(struct llist_head *list) |
| struct irq_work *work, *tmp; |
| struct llist_node *llnode; |
| |
| - BUG_ON(!irqs_disabled()); |
| + /* |
| + * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed |
| + * in a per-CPU thread in preemptible context. Only the items which are |
| + * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context. |
| + */ |
| + BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT)); |
| |
| if (llist_empty(list)) |
| return; |
| @@ -184,7 +249,10 @@ static void irq_work_run_list(struct llist_head *list) |
| void irq_work_run(void) |
| { |
| irq_work_run_list(this_cpu_ptr(&raised_list)); |
| - irq_work_run_list(this_cpu_ptr(&lazy_list)); |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + irq_work_run_list(this_cpu_ptr(&lazy_list)); |
| + else |
| + wake_irq_workd(); |
| } |
| EXPORT_SYMBOL_GPL(irq_work_run); |
| |
| @@ -194,7 +262,11 @@ void irq_work_tick(void) |
| |
| if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) |
| irq_work_run_list(raised); |
| - irq_work_run_list(this_cpu_ptr(&lazy_list)); |
| + |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + irq_work_run_list(this_cpu_ptr(&lazy_list)); |
| + else |
| + wake_irq_workd(); |
| } |
| |
| /* |
| @@ -204,8 +276,42 @@ void irq_work_tick(void) |
| void irq_work_sync(struct irq_work *work) |
| { |
| lockdep_assert_irqs_enabled(); |
| + might_sleep(); |
| + |
| + if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || |
| + !arch_irq_work_has_interrupt()) { |
| + rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work), |
| + TASK_UNINTERRUPTIBLE); |
| + return; |
| + } |
| |
| while (irq_work_is_busy(work)) |
| cpu_relax(); |
| } |
| EXPORT_SYMBOL_GPL(irq_work_sync); |
| + |
| +static void run_irq_workd(unsigned int cpu) |
| +{ |
| + irq_work_run_list(this_cpu_ptr(&lazy_list)); |
| +} |
| + |
| +static void irq_workd_setup(unsigned int cpu) |
| +{ |
| + sched_set_fifo_low(current); |
| +} |
| + |
| +static struct smp_hotplug_thread irqwork_threads = { |
| + .store = &irq_workd, |
| + .setup = irq_workd_setup, |
| + .thread_should_run = irq_workd_should_run, |
| + .thread_fn = run_irq_workd, |
| + .thread_comm = "irq_work/%u", |
| +}; |
| + |
| +static __init int irq_work_init_threads(void) |
| +{ |
| + if (IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + BUG_ON(smpboot_register_percpu_thread(&irqwork_threads)); |
| + return 0; |
| +} |
| +early_initcall(irq_work_init_threads); |
| diff --git a/kernel/kcov.c b/kernel/kcov.c |
| index 80bfe71bbe13..36ca640c4f8e 100644 |
| --- a/kernel/kcov.c |
| +++ b/kernel/kcov.c |
| @@ -88,6 +88,7 @@ static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas); |
| |
| struct kcov_percpu_data { |
| void *irq_area; |
| + local_lock_t lock; |
| |
| unsigned int saved_mode; |
| unsigned int saved_size; |
| @@ -96,7 +97,9 @@ struct kcov_percpu_data { |
| int saved_sequence; |
| }; |
| |
| -static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data); |
| +static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data) = { |
| + .lock = INIT_LOCAL_LOCK(lock), |
| +}; |
| |
| /* Must be called with kcov_remote_lock locked. */ |
| static struct kcov_remote *kcov_remote_find(u64 handle) |
| @@ -824,7 +827,7 @@ void kcov_remote_start(u64 handle) |
| if (!in_task() && !in_serving_softirq()) |
| return; |
| |
| - local_irq_save(flags); |
| + local_lock_irqsave(&kcov_percpu_data.lock, flags); |
| |
| /* |
| * Check that kcov_remote_start() is not called twice in background |
| @@ -832,7 +835,7 @@ void kcov_remote_start(u64 handle) |
| */ |
| mode = READ_ONCE(t->kcov_mode); |
| if (WARN_ON(in_task() && kcov_mode_enabled(mode))) { |
| - local_irq_restore(flags); |
| + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); |
| return; |
| } |
| /* |
| @@ -841,14 +844,15 @@ void kcov_remote_start(u64 handle) |
| * happened while collecting coverage from a background thread. |
| */ |
| if (WARN_ON(in_serving_softirq() && t->kcov_softirq)) { |
| - local_irq_restore(flags); |
| + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); |
| return; |
| } |
| |
| spin_lock(&kcov_remote_lock); |
| remote = kcov_remote_find(handle); |
| if (!remote) { |
| - spin_unlock_irqrestore(&kcov_remote_lock, flags); |
| + spin_unlock(&kcov_remote_lock); |
| + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); |
| return; |
| } |
| kcov_debug("handle = %llx, context: %s\n", handle, |
| @@ -869,19 +873,19 @@ void kcov_remote_start(u64 handle) |
| size = CONFIG_KCOV_IRQ_AREA_SIZE; |
| area = this_cpu_ptr(&kcov_percpu_data)->irq_area; |
| } |
| - spin_unlock_irqrestore(&kcov_remote_lock, flags); |
| + spin_unlock(&kcov_remote_lock); |
| |
| /* Can only happen when in_task(). */ |
| if (!area) { |
| + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); |
| area = vmalloc(size * sizeof(unsigned long)); |
| if (!area) { |
| kcov_put(kcov); |
| return; |
| } |
| + local_lock_irqsave(&kcov_percpu_data.lock, flags); |
| } |
| |
| - local_irq_save(flags); |
| - |
| /* Reset coverage size. */ |
| *(u64 *)area = 0; |
| |
| @@ -891,7 +895,7 @@ void kcov_remote_start(u64 handle) |
| } |
| kcov_start(t, kcov, size, area, mode, sequence); |
| |
| - local_irq_restore(flags); |
| + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); |
| |
| } |
| EXPORT_SYMBOL(kcov_remote_start); |
| @@ -965,12 +969,12 @@ void kcov_remote_stop(void) |
| if (!in_task() && !in_serving_softirq()) |
| return; |
| |
| - local_irq_save(flags); |
| + local_lock_irqsave(&kcov_percpu_data.lock, flags); |
| |
| mode = READ_ONCE(t->kcov_mode); |
| barrier(); |
| if (!kcov_mode_enabled(mode)) { |
| - local_irq_restore(flags); |
| + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); |
| return; |
| } |
| /* |
| @@ -978,12 +982,12 @@ void kcov_remote_stop(void) |
| * actually found the remote handle and started collecting coverage. |
| */ |
| if (in_serving_softirq() && !t->kcov_softirq) { |
| - local_irq_restore(flags); |
| + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); |
| return; |
| } |
| /* Make sure that kcov_softirq is only set when in softirq. */ |
| if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) { |
| - local_irq_restore(flags); |
| + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); |
| return; |
| } |
| |
| @@ -1013,7 +1017,7 @@ void kcov_remote_stop(void) |
| spin_unlock(&kcov_remote_lock); |
| } |
| |
| - local_irq_restore(flags); |
| + local_unlock_irqrestore(&kcov_percpu_data.lock, flags); |
| |
| /* Get in kcov_remote_start(). */ |
| kcov_put(kcov); |
| @@ -1034,8 +1038,8 @@ static int __init kcov_init(void) |
| int cpu; |
| |
| for_each_possible_cpu(cpu) { |
| - void *area = vmalloc(CONFIG_KCOV_IRQ_AREA_SIZE * |
| - sizeof(unsigned long)); |
| + void *area = vmalloc_node(CONFIG_KCOV_IRQ_AREA_SIZE * |
| + sizeof(unsigned long), cpu_to_node(cpu)); |
| if (!area) |
| return -ENOMEM; |
| per_cpu_ptr(&kcov_percpu_data, cpu)->irq_area = area; |
| diff --git a/kernel/kprobes.c b/kernel/kprobes.c |
| index 2ef90d15699f..2ab883d856b5 100644 |
| --- a/kernel/kprobes.c |
| +++ b/kernel/kprobes.c |
| @@ -1250,10 +1250,10 @@ void kprobe_busy_end(void) |
| } |
| |
| /* |
| - * This function is called from finish_task_switch when task tk becomes dead, |
| - * so that we can recycle any function-return probe instances associated |
| - * with this task. These left over instances represent probed functions |
| - * that have been called but will never return. |
| + * This function is called from delayed_put_task_struct() when a task is |
| + * dead and cleaned up to recycle any function-return probe instances |
| + * associated with this task. These left over instances represent probed |
| + * functions that have been called but will never return. |
| */ |
| void kprobe_flush_task(struct task_struct *tk) |
| { |
| diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c |
| index 35859da8bd4f..dfff31ed644a 100644 |
| --- a/kernel/ksysfs.c |
| +++ b/kernel/ksysfs.c |
| @@ -138,6 +138,15 @@ KERNEL_ATTR_RO(vmcoreinfo); |
| |
| #endif /* CONFIG_CRASH_CORE */ |
| |
| +#if defined(CONFIG_PREEMPT_RT) |
| +static ssize_t realtime_show(struct kobject *kobj, |
| + struct kobj_attribute *attr, char *buf) |
| +{ |
| + return sprintf(buf, "%d\n", 1); |
| +} |
| +KERNEL_ATTR_RO(realtime); |
| +#endif |
| + |
| /* whether file capabilities are enabled */ |
| static ssize_t fscaps_show(struct kobject *kobj, |
| struct kobj_attribute *attr, char *buf) |
| @@ -228,6 +237,9 @@ static struct attribute * kernel_attrs[] = { |
| #ifndef CONFIG_TINY_RCU |
| &rcu_expedited_attr.attr, |
| &rcu_normal_attr.attr, |
| +#endif |
| +#ifdef CONFIG_PREEMPT_RT |
| + &realtime_attr.attr, |
| #endif |
| NULL |
| }; |
| diff --git a/kernel/kthread.c b/kernel/kthread.c |
| index 5b37a8567168..4a4d7092a2d8 100644 |
| --- a/kernel/kthread.c |
| +++ b/kernel/kthread.c |
| @@ -270,6 +270,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme); |
| |
| static int kthread(void *_create) |
| { |
| + static const struct sched_param param = { .sched_priority = 0 }; |
| /* Copy data: it's on kthread's stack */ |
| struct kthread_create_info *create = _create; |
| int (*threadfn)(void *data) = create->threadfn; |
| @@ -300,6 +301,13 @@ static int kthread(void *_create) |
| init_completion(&self->parked); |
| current->vfork_done = &self->exited; |
| |
| + /* |
| + * The new thread inherited kthreadd's priority and CPU mask. Reset |
| + * back to default in case they have been changed. |
| + */ |
| + sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m); |
| + set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD)); |
| + |
| /* OK, tell user we're spawned, wait for stop or wakeup */ |
| __set_current_state(TASK_UNINTERRUPTIBLE); |
| create->result = current; |
| @@ -397,7 +405,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), |
| } |
| task = create->result; |
| if (!IS_ERR(task)) { |
| - static const struct sched_param param = { .sched_priority = 0 }; |
| char name[TASK_COMM_LEN]; |
| |
| /* |
| @@ -406,13 +413,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), |
| */ |
| vsnprintf(name, sizeof(name), namefmt, args); |
| set_task_comm(task, name); |
| - /* |
| - * root may have changed our (kthreadd's) priority or CPU mask. |
| - * The kernel thread should not inherit these properties. |
| - */ |
| - sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m); |
| - set_cpus_allowed_ptr(task, |
| - housekeeping_cpumask(HK_FLAG_KTHREAD)); |
| } |
| kfree(create); |
| return task; |
| diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c |
| index a30702b847ba..3aa2a6df1f87 100644 |
| --- a/kernel/locking/lockdep.c |
| +++ b/kernel/locking/lockdep.c |
| @@ -5475,6 +5475,7 @@ static noinstr void check_flags(unsigned long flags) |
| } |
| } |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| /* |
| * We dont accurately track softirq state in e.g. |
| * hardirq contexts (such as on 4KSTACKS), so only |
| @@ -5489,6 +5490,7 @@ static noinstr void check_flags(unsigned long flags) |
| DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); |
| } |
| } |
| +#endif |
| |
| if (!debug_locks) |
| print_irqtrace_events(current); |
| diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c |
| index ea5a701ab240..547752d1e9c0 100644 |
| --- a/kernel/locking/rtmutex.c |
| +++ b/kernel/locking/rtmutex.c |
| @@ -1097,8 +1097,26 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, |
| * which is wrong, as the other waiter is not in a deadlock |
| * situation. |
| */ |
| - if (owner == task) |
| + if (owner == task) { |
| +#if defined(DEBUG_WW_MUTEXES) && defined(CONFIG_DEBUG_LOCKING_API_SELFTESTS) |
| + /* |
| + * The lockdep selftest for ww-mutex assumes in a few cases |
| + * the ww_ctx->contending_lock assignment via |
| + * __ww_mutex_check_kill() which does not happen if the rtmutex |
| + * detects the deadlock early. |
| + */ |
| + if (build_ww_mutex() && ww_ctx) { |
| + struct rt_mutex *rtm; |
| + |
| + /* Check whether the waiter should backout immediately */ |
| + rtm = container_of(lock, struct rt_mutex, rtmutex); |
| + |
| + __ww_mutex_add_waiter(waiter, rtm, ww_ctx); |
| + __ww_mutex_check_kill(rtm, waiter, ww_ctx); |
| + } |
| +#endif |
| return -EDEADLK; |
| + } |
| |
| raw_spin_lock(&task->pi_lock); |
| waiter->task = task; |
| diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c |
| index 5c9299aaabae..900220941caa 100644 |
| --- a/kernel/locking/rtmutex_api.c |
| +++ b/kernel/locking/rtmutex_api.c |
| @@ -21,12 +21,13 @@ int max_lock_depth = 1024; |
| */ |
| static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock, |
| unsigned int state, |
| + struct lockdep_map *nest_lock, |
| unsigned int subclass) |
| { |
| int ret; |
| |
| might_sleep(); |
| - mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); |
| + mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, _RET_IP_); |
| ret = __rt_mutex_lock(&lock->rtmutex, state); |
| if (ret) |
| mutex_release(&lock->dep_map, _RET_IP_); |
| @@ -48,10 +49,16 @@ EXPORT_SYMBOL(rt_mutex_base_init); |
| */ |
| void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) |
| { |
| - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); |
| + __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass); |
| } |
| EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); |
| |
| +void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock) |
| +{ |
| + __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0); |
| +} |
| +EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock); |
| + |
| #else /* !CONFIG_DEBUG_LOCK_ALLOC */ |
| |
| /** |
| @@ -61,7 +68,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); |
| */ |
| void __sched rt_mutex_lock(struct rt_mutex *lock) |
| { |
| - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); |
| + __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0); |
| } |
| EXPORT_SYMBOL_GPL(rt_mutex_lock); |
| #endif |
| @@ -77,10 +84,25 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); |
| */ |
| int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) |
| { |
| - return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0); |
| + return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, NULL, 0); |
| } |
| EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); |
| |
| +/** |
| + * rt_mutex_lock_killable - lock a rt_mutex killable |
| + * |
| + * @lock: the rt_mutex to be locked |
| + * |
| + * Returns: |
| + * 0 on success |
| + * -EINTR when interrupted by a signal |
| + */ |
| +int __sched rt_mutex_lock_killable(struct rt_mutex *lock) |
| +{ |
| + return __rt_mutex_lock_common(lock, TASK_KILLABLE, NULL, 0); |
| +} |
| +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); |
| + |
| /** |
| * rt_mutex_trylock - try to lock a rt_mutex |
| * |
| diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c |
| index d2912e44d61f..9e396a09fe0f 100644 |
| --- a/kernel/locking/spinlock_rt.c |
| +++ b/kernel/locking/spinlock_rt.c |
| @@ -24,6 +24,17 @@ |
| #define RT_MUTEX_BUILD_SPINLOCKS |
| #include "rtmutex.c" |
| |
| +/* |
| + * __might_resched() skips the state check as rtlocks are state |
| + * preserving. Take RCU nesting into account as spin/read/write_lock() can |
| + * legitimately nest into an RCU read side critical section. |
| + */ |
| +#define RTLOCK_RESCHED_OFFSETS \ |
| + (rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT) |
| + |
| +#define rtlock_might_resched() \ |
| + __might_resched(__FILE__, __LINE__, RTLOCK_RESCHED_OFFSETS) |
| + |
| static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) |
| { |
| if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) |
| @@ -32,7 +43,7 @@ static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) |
| |
| static __always_inline void __rt_spin_lock(spinlock_t *lock) |
| { |
| - ___might_sleep(__FILE__, __LINE__, 0); |
| + rtlock_might_resched(); |
| rtlock_lock(&lock->lock); |
| rcu_read_lock(); |
| migrate_disable(); |
| @@ -210,7 +221,7 @@ EXPORT_SYMBOL(rt_write_trylock); |
| |
| void __sched rt_read_lock(rwlock_t *rwlock) |
| { |
| - ___might_sleep(__FILE__, __LINE__, 0); |
| + rtlock_might_resched(); |
| rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); |
| rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); |
| rcu_read_lock(); |
| @@ -220,7 +231,7 @@ EXPORT_SYMBOL(rt_read_lock); |
| |
| void __sched rt_write_lock(rwlock_t *rwlock) |
| { |
| - ___might_sleep(__FILE__, __LINE__, 0); |
| + rtlock_might_resched(); |
| rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); |
| rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); |
| rcu_read_lock(); |
| @@ -246,12 +257,6 @@ void __sched rt_write_unlock(rwlock_t *rwlock) |
| } |
| EXPORT_SYMBOL(rt_write_unlock); |
| |
| -int __sched rt_rwlock_is_contended(rwlock_t *rwlock) |
| -{ |
| - return rw_base_is_contended(&rwlock->rwbase); |
| -} |
| -EXPORT_SYMBOL(rt_rwlock_is_contended); |
| - |
| #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| void __rt_rwlock_init(rwlock_t *rwlock, const char *name, |
| struct lock_class_key *key) |
| diff --git a/kernel/panic.c b/kernel/panic.c |
| index cefd7d82366f..d509c0694af9 100644 |
| --- a/kernel/panic.c |
| +++ b/kernel/panic.c |
| @@ -178,12 +178,28 @@ static void panic_print_sys_info(void) |
| void panic(const char *fmt, ...) |
| { |
| static char buf[1024]; |
| + va_list args2; |
| va_list args; |
| long i, i_next = 0, len; |
| int state = 0; |
| int old_cpu, this_cpu; |
| bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; |
| |
| + console_verbose(); |
| + pr_emerg("Kernel panic - not syncing:\n"); |
| + va_start(args2, fmt); |
| + va_copy(args, args2); |
| + vprintk(fmt, args2); |
| + va_end(args2); |
| +#ifdef CONFIG_DEBUG_BUGVERBOSE |
| + /* |
| + * Avoid nested stack-dumping if a panic occurs during oops processing |
| + */ |
| + if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) |
| + dump_stack(); |
| +#endif |
| + pr_flush(1000, true); |
| + |
| /* |
| * Disable local interrupts. This will prevent panic_smp_self_stop |
| * from deadlocking the first cpu that invokes the panic, since |
| @@ -214,24 +230,13 @@ void panic(const char *fmt, ...) |
| if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu) |
| panic_smp_self_stop(); |
| |
| - console_verbose(); |
| bust_spinlocks(1); |
| - va_start(args, fmt); |
| len = vscnprintf(buf, sizeof(buf), fmt, args); |
| va_end(args); |
| |
| if (len && buf[len - 1] == '\n') |
| buf[len - 1] = '\0'; |
| |
| - pr_emerg("Kernel panic - not syncing: %s\n", buf); |
| -#ifdef CONFIG_DEBUG_BUGVERBOSE |
| - /* |
| - * Avoid nested stack-dumping if a panic occurs during oops processing |
| - */ |
| - if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) |
| - dump_stack(); |
| -#endif |
| - |
| /* |
| * If kgdb is enabled, give it a chance to run before we stop all |
| * the other CPUs or else we won't be able to debug processes left |
| @@ -540,9 +545,11 @@ static u64 oops_id; |
| |
| static int init_oops_id(void) |
| { |
| +#ifndef CONFIG_PREEMPT_RT |
| if (!oops_id) |
| get_random_bytes(&oops_id, sizeof(oops_id)); |
| else |
| +#endif |
| oops_id++; |
| |
| return 0; |
| @@ -553,6 +560,7 @@ static void print_oops_end_marker(void) |
| { |
| init_oops_id(); |
| pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id); |
| + pr_flush(1000, true); |
| } |
| |
| /* |
| diff --git a/kernel/power/main.c b/kernel/power/main.c |
| index 7e646079fbeb..8b153aa90ecc 100644 |
| --- a/kernel/power/main.c |
| +++ b/kernel/power/main.c |
| @@ -546,14 +546,13 @@ static int __init pm_debug_messages_setup(char *str) |
| __setup("pm_debug_messages", pm_debug_messages_setup); |
| |
| /** |
| - * __pm_pr_dbg - Print a suspend debug message to the kernel log. |
| - * @defer: Whether or not to use printk_deferred() to print the message. |
| + * pm_pr_dbg - Print a suspend debug message to the kernel log. |
| * @fmt: Message format. |
| * |
| * The message will be emitted if enabled through the pm_debug_messages |
| * sysfs attribute. |
| */ |
| -void __pm_pr_dbg(bool defer, const char *fmt, ...) |
| +void pm_pr_dbg(const char *fmt, ...) |
| { |
| struct va_format vaf; |
| va_list args; |
| @@ -566,10 +565,7 @@ void __pm_pr_dbg(bool defer, const char *fmt, ...) |
| vaf.fmt = fmt; |
| vaf.va = &args; |
| |
| - if (defer) |
| - printk_deferred(KERN_DEBUG "PM: %pV", &vaf); |
| - else |
| - printk(KERN_DEBUG "PM: %pV", &vaf); |
| + printk(KERN_DEBUG "PM: %pV", &vaf); |
| |
| va_end(args); |
| } |
| diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile |
| index d118739874c0..bc6b856a0ff4 100644 |
| --- a/kernel/printk/Makefile |
| +++ b/kernel/printk/Makefile |
| @@ -1,6 +1,5 @@ |
| # SPDX-License-Identifier: GPL-2.0-only |
| obj-y = printk.o |
| -obj-$(CONFIG_PRINTK) += printk_safe.o |
| obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o |
| obj-$(CONFIG_PRINTK) += printk_ringbuffer.o |
| obj-$(CONFIG_PRINTK_INDEX) += index.o |
| diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h |
| index 9f3ed2fdb721..de8ab059dd96 100644 |
| --- a/kernel/printk/internal.h |
| +++ b/kernel/printk/internal.h |
| @@ -2,7 +2,6 @@ |
| /* |
| * internal.h - printk internal definitions |
| */ |
| -#include <linux/percpu.h> |
| |
| #ifdef CONFIG_PRINTK |
| |
| @@ -12,41 +11,6 @@ enum printk_info_flags { |
| LOG_CONT = 8, /* text is a fragment of a continuation line */ |
| }; |
| |
| -__printf(4, 0) |
| -int vprintk_store(int facility, int level, |
| - const struct dev_printk_info *dev_info, |
| - const char *fmt, va_list args); |
| - |
| -__printf(1, 0) int vprintk_default(const char *fmt, va_list args); |
| -__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); |
| - |
| -bool printk_percpu_data_ready(void); |
| - |
| -#define printk_safe_enter_irqsave(flags) \ |
| - do { \ |
| - local_irq_save(flags); \ |
| - __printk_safe_enter(); \ |
| - } while (0) |
| - |
| -#define printk_safe_exit_irqrestore(flags) \ |
| - do { \ |
| - __printk_safe_exit(); \ |
| - local_irq_restore(flags); \ |
| - } while (0) |
| - |
| -void defer_console_output(void); |
| - |
| u16 printk_parse_prefix(const char *text, int *level, |
| enum printk_info_flags *flags); |
| -#else |
| - |
| -/* |
| - * In !PRINTK builds we still export console_sem |
| - * semaphore and some of console functions (console_unlock()/etc.), so |
| - * printk-safe must preserve the existing local IRQ guarantees. |
| - */ |
| -#define printk_safe_enter_irqsave(flags) local_irq_save(flags) |
| -#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) |
| - |
| -static inline bool printk_percpu_data_ready(void) { return false; } |
| #endif /* CONFIG_PRINTK */ |
| diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c |
| index 8d856b7c2e5a..ac2c44792ec4 100644 |
| --- a/kernel/printk/printk.c |
| +++ b/kernel/printk/printk.c |
| @@ -44,6 +44,10 @@ |
| #include <linux/irq_work.h> |
| #include <linux/ctype.h> |
| #include <linux/uio.h> |
| +#include <linux/kdb.h> |
| +#include <linux/kgdb.h> |
| +#include <linux/kthread.h> |
| +#include <linux/clocksource.h> |
| #include <linux/sched/clock.h> |
| #include <linux/sched/debug.h> |
| #include <linux/sched/task_stack.h> |
| @@ -227,19 +231,7 @@ static int nr_ext_console_drivers; |
| |
| static int __down_trylock_console_sem(unsigned long ip) |
| { |
| - int lock_failed; |
| - unsigned long flags; |
| - |
| - /* |
| - * Here and in __up_console_sem() we need to be in safe mode, |
| - * because spindump/WARN/etc from under console ->lock will |
| - * deadlock in printk()->down_trylock_console_sem() otherwise. |
| - */ |
| - printk_safe_enter_irqsave(flags); |
| - lock_failed = down_trylock(&console_sem); |
| - printk_safe_exit_irqrestore(flags); |
| - |
| - if (lock_failed) |
| + if (down_trylock(&console_sem)) |
| return 1; |
| mutex_acquire(&console_lock_dep_map, 0, 1, ip); |
| return 0; |
| @@ -248,13 +240,9 @@ static int __down_trylock_console_sem(unsigned long ip) |
| |
| static void __up_console_sem(unsigned long ip) |
| { |
| - unsigned long flags; |
| - |
| mutex_release(&console_lock_dep_map, ip); |
| |
| - printk_safe_enter_irqsave(flags); |
| up(&console_sem); |
| - printk_safe_exit_irqrestore(flags); |
| } |
| #define up_console_sem() __up_console_sem(_RET_IP_) |
| |
| @@ -268,11 +256,6 @@ static void __up_console_sem(unsigned long ip) |
| */ |
| static int console_locked, console_suspended; |
| |
| -/* |
| - * If exclusive_console is non-NULL then only this console is to be printed to. |
| - */ |
| -static struct console *exclusive_console; |
| - |
| /* |
| * Array of consoles built from command line options (console=) |
| */ |
| @@ -352,10 +335,13 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; |
| * non-prinatable characters are escaped in the "\xff" notation. |
| */ |
| |
| +#ifdef CONFIG_PRINTK |
| /* syslog_lock protects syslog_* variables and write access to clear_seq. */ |
| static DEFINE_MUTEX(syslog_lock); |
| |
| -#ifdef CONFIG_PRINTK |
| +/* Set to enable sync mode. Once set, it is never cleared. */ |
| +static bool sync_mode; |
| + |
| DECLARE_WAIT_QUEUE_HEAD(log_wait); |
| /* All 3 protected by @syslog_lock. */ |
| /* the next printk record to read by syslog(READ) or /proc/kmsg */ |
| @@ -363,17 +349,6 @@ static u64 syslog_seq; |
| static size_t syslog_partial; |
| static bool syslog_time; |
| |
| -/* All 3 protected by @console_sem. */ |
| -/* the next printk record to write to the console */ |
| -static u64 console_seq; |
| -static u64 exclusive_console_stop_seq; |
| -static unsigned long console_dropped; |
| - |
| -struct latched_seq { |
| - seqcount_latch_t latch; |
| - u64 val[2]; |
| -}; |
| - |
| /* |
| * The next printk record to read after the last 'clear' command. There are |
| * two copies (updated with seqcount_latch) so that reads can locklessly |
| @@ -391,9 +366,6 @@ static struct latched_seq clear_seq = { |
| #define PREFIX_MAX 32 |
| #endif |
| |
| -/* the maximum size of a formatted record (i.e. with prefix added per line) */ |
| -#define CONSOLE_LOG_MAX 1024 |
| - |
| /* the maximum size allowed to be reserved for a record */ |
| #define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX) |
| |
| @@ -432,12 +404,12 @@ static struct printk_ringbuffer *prb = &printk_rb_static; |
| */ |
| static bool __printk_percpu_data_ready __read_mostly; |
| |
| -bool printk_percpu_data_ready(void) |
| +static bool printk_percpu_data_ready(void) |
| { |
| return __printk_percpu_data_ready; |
| } |
| |
| -/* Must be called under syslog_lock. */ |
| +/* Must be called under associated write-protection lock. */ |
| static void latched_seq_write(struct latched_seq *ls, u64 val) |
| { |
| raw_write_seqcount_latch(&ls->latch); |
| @@ -1771,188 +1743,152 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) |
| return do_syslog(type, buf, len, SYSLOG_FROM_READER); |
| } |
| |
| -/* |
| - * Special console_lock variants that help to reduce the risk of soft-lockups. |
| - * They allow to pass console_lock to another printk() call using a busy wait. |
| - */ |
| +int printk_delay_msec __read_mostly; |
| |
| -#ifdef CONFIG_LOCKDEP |
| -static struct lockdep_map console_owner_dep_map = { |
| - .name = "console_owner" |
| -}; |
| -#endif |
| +static inline void printk_delay(int level) |
| +{ |
| + boot_delay_msec(level); |
| |
| -static DEFINE_RAW_SPINLOCK(console_owner_lock); |
| -static struct task_struct *console_owner; |
| -static bool console_waiter; |
| + if (unlikely(printk_delay_msec)) { |
| + int m = printk_delay_msec; |
| |
| -/** |
| - * console_lock_spinning_enable - mark beginning of code where another |
| - * thread might safely busy wait |
| - * |
| - * This basically converts console_lock into a spinlock. This marks |
| - * the section where the console_lock owner can not sleep, because |
| - * there may be a waiter spinning (like a spinlock). Also it must be |
| - * ready to hand over the lock at the end of the section. |
| - */ |
| -static void console_lock_spinning_enable(void) |
| + while (m--) { |
| + mdelay(1); |
| + touch_nmi_watchdog(); |
| + } |
| + } |
| +} |
| + |
| +static bool kernel_sync_mode(void) |
| { |
| - raw_spin_lock(&console_owner_lock); |
| - console_owner = current; |
| - raw_spin_unlock(&console_owner_lock); |
| + return (oops_in_progress || sync_mode); |
| +} |
| |
| - /* The waiter may spin on us after setting console_owner */ |
| - spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); |
| +static bool console_may_sync(struct console *con) |
| +{ |
| + if (!(con->flags & CON_ENABLED)) |
| + return false; |
| + if (con->write_atomic && kernel_sync_mode()) |
| + return true; |
| + if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread) |
| + return true; |
| + if (con->write && (con->flags & CON_BOOT) && !con->thread) |
| + return true; |
| + return false; |
| } |
| |
| -/** |
| - * console_lock_spinning_disable_and_check - mark end of code where another |
| - * thread was able to busy wait and check if there is a waiter |
| - * |
| - * This is called at the end of the section where spinning is allowed. |
| - * It has two functions. First, it is a signal that it is no longer |
| - * safe to start busy waiting for the lock. Second, it checks if |
| - * there is a busy waiter and passes the lock rights to her. |
| - * |
| - * Important: Callers lose the lock if there was a busy waiter. |
| - * They must not touch items synchronized by console_lock |
| - * in this case. |
| - * |
| - * Return: 1 if the lock rights were passed, 0 otherwise. |
| - */ |
| -static int console_lock_spinning_disable_and_check(void) |
| +static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len) |
| { |
| - int waiter; |
| + if (!(con->flags & CON_ENABLED)) |
| + return false; |
| |
| - raw_spin_lock(&console_owner_lock); |
| - waiter = READ_ONCE(console_waiter); |
| - console_owner = NULL; |
| - raw_spin_unlock(&console_owner_lock); |
| + if (con->write_atomic && kernel_sync_mode()) { |
| + con->write_atomic(con, text, text_len); |
| + return true; |
| + } |
| |
| - if (!waiter) { |
| - spin_release(&console_owner_dep_map, _THIS_IP_); |
| - return 0; |
| + if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread) { |
| + if (console_trylock()) { |
| + con->write_atomic(con, text, text_len); |
| + console_unlock(); |
| + return true; |
| + } |
| + |
| + } else if (con->write && (con->flags & CON_BOOT) && !con->thread) { |
| + if (console_trylock()) { |
| + con->write(con, text, text_len); |
| + console_unlock(); |
| + return true; |
| + } |
| } |
| |
| - /* The waiter is now free to continue */ |
| - WRITE_ONCE(console_waiter, false); |
| + return false; |
| +} |
| |
| - spin_release(&console_owner_dep_map, _THIS_IP_); |
| +static bool have_atomic_console(void) |
| +{ |
| + struct console *con; |
| |
| - /* |
| - * Hand off console_lock to waiter. The waiter will perform |
| - * the up(). After this, the waiter is the console_lock owner. |
| - */ |
| - mutex_release(&console_lock_dep_map, _THIS_IP_); |
| - return 1; |
| + for_each_console(con) { |
| + if (!(con->flags & CON_ENABLED)) |
| + continue; |
| + if (con->write_atomic) |
| + return true; |
| + } |
| + return false; |
| } |
| |
| -/** |
| - * console_trylock_spinning - try to get console_lock by busy waiting |
| - * |
| - * This allows to busy wait for the console_lock when the current |
| - * owner is running in specially marked sections. It means that |
| - * the current owner is running and cannot reschedule until it |
| - * is ready to lose the lock. |
| - * |
| - * Return: 1 if we got the lock, 0 othrewise |
| - */ |
| -static int console_trylock_spinning(void) |
| +static bool print_sync(struct console *con, u64 *seq) |
| { |
| - struct task_struct *owner = NULL; |
| - bool waiter; |
| - bool spin = false; |
| - unsigned long flags; |
| + struct printk_info info; |
| + struct printk_record r; |
| + size_t text_len; |
| |
| - if (console_trylock()) |
| - return 1; |
| + prb_rec_init_rd(&r, &info, &con->sync_buf[0], sizeof(con->sync_buf)); |
| |
| - printk_safe_enter_irqsave(flags); |
| + if (!prb_read_valid(prb, *seq, &r)) |
| + return false; |
| |
| - raw_spin_lock(&console_owner_lock); |
| - owner = READ_ONCE(console_owner); |
| - waiter = READ_ONCE(console_waiter); |
| - if (!waiter && owner && owner != current) { |
| - WRITE_ONCE(console_waiter, true); |
| - spin = true; |
| - } |
| - raw_spin_unlock(&console_owner_lock); |
| + text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); |
| |
| - /* |
| - * If there is an active printk() writing to the |
| - * consoles, instead of having it write our data too, |
| - * see if we can offload that load from the active |
| - * printer, and do some printing ourselves. |
| - * Go into a spin only if there isn't already a waiter |
| - * spinning, and there is an active printer, and |
| - * that active printer isn't us (recursive printk?). |
| - */ |
| - if (!spin) { |
| - printk_safe_exit_irqrestore(flags); |
| - return 0; |
| - } |
| + if (!call_sync_console_driver(con, &con->sync_buf[0], text_len)) |
| + return false; |
| |
| - /* We spin waiting for the owner to release us */ |
| - spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); |
| - /* Owner will clear console_waiter on hand off */ |
| - while (READ_ONCE(console_waiter)) |
| - cpu_relax(); |
| - spin_release(&console_owner_dep_map, _THIS_IP_); |
| + *seq = r.info->seq; |
| |
| - printk_safe_exit_irqrestore(flags); |
| - /* |
| - * The owner passed the console lock to us. |
| - * Since we did not spin on console lock, annotate |
| - * this as a trylock. Otherwise lockdep will |
| - * complain. |
| - */ |
| - mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); |
| + touch_softlockup_watchdog_sync(); |
| + clocksource_touch_watchdog(); |
| + rcu_cpu_stall_reset(); |
| + touch_nmi_watchdog(); |
| |
| - return 1; |
| + if (text_len) |
| + printk_delay(r.info->level); |
| + |
| + return true; |
| } |
| |
| -/* |
| - * Call the console drivers, asking them to write out |
| - * log_buf[start] to log_buf[end - 1]. |
| - * The console_lock must be held. |
| - */ |
| -static void call_console_drivers(const char *ext_text, size_t ext_len, |
| - const char *text, size_t len) |
| +static u64 read_console_seq(struct console *con) |
| { |
| - static char dropped_text[64]; |
| - size_t dropped_len = 0; |
| - struct console *con; |
| + u64 seq2; |
| + u64 seq; |
| |
| - trace_console_rcuidle(text, len); |
| + seq = latched_seq_read_nolock(&con->printk_seq); |
| + seq2 = latched_seq_read_nolock(&con->printk_sync_seq); |
| + if (seq2 > seq) |
| + seq = seq2; |
| +#ifdef CONFIG_HAVE_NMI |
| + seq2 = latched_seq_read_nolock(&con->printk_sync_nmi_seq); |
| + if (seq2 > seq) |
| + seq = seq2; |
| +#endif |
| + return seq; |
| +} |
| |
| - if (!console_drivers) |
| - return; |
| +static void print_sync_until(struct console *con, u64 seq, bool is_locked) |
| +{ |
| + u64 printk_seq; |
| |
| - if (console_dropped) { |
| - dropped_len = snprintf(dropped_text, sizeof(dropped_text), |
| - "** %lu printk messages dropped **\n", |
| - console_dropped); |
| - console_dropped = 0; |
| - } |
| + while (!__printk_cpu_trylock()) |
| + cpu_relax(); |
| |
| - for_each_console(con) { |
| - if (exclusive_console && con != exclusive_console) |
| - continue; |
| - if (!(con->flags & CON_ENABLED)) |
| - continue; |
| - if (!con->write) |
| - continue; |
| - if (!cpu_online(smp_processor_id()) && |
| - !(con->flags & CON_ANYTIME)) |
| - continue; |
| - if (con->flags & CON_EXTENDED) |
| - con->write(con, ext_text, ext_len); |
| - else { |
| - if (dropped_len) |
| - con->write(con, dropped_text, dropped_len); |
| - con->write(con, text, len); |
| - } |
| + for (;;) { |
| + printk_seq = read_console_seq(con); |
| + if (printk_seq >= seq) |
| + break; |
| + if (!print_sync(con, &printk_seq)) |
| + break; |
| + |
| + if (is_locked) |
| + latched_seq_write(&con->printk_seq, printk_seq + 1); |
| +#ifdef CONFIG_PRINTK_NMI |
| + else if (in_nmi()) |
| + latched_seq_write(&con->printk_sync_nmi_seq, printk_seq + 1); |
| +#endif |
| + else |
| + latched_seq_write(&con->printk_sync_seq, printk_seq + 1); |
| } |
| + |
| + __printk_cpu_unlock(); |
| } |
| |
| /* |
| @@ -2025,20 +1961,6 @@ static u8 *__printk_recursion_counter(void) |
| local_irq_restore(flags); \ |
| } while (0) |
| |
| -int printk_delay_msec __read_mostly; |
| - |
| -static inline void printk_delay(void) |
| -{ |
| - if (unlikely(printk_delay_msec)) { |
| - int m = printk_delay_msec; |
| - |
| - while (m--) { |
| - mdelay(1); |
| - touch_nmi_watchdog(); |
| - } |
| - } |
| -} |
| - |
| static inline u32 printk_caller_id(void) |
| { |
| return in_task() ? task_pid_nr(current) : |
| @@ -2119,13 +2041,14 @@ static u16 printk_sprint(char *text, u16 size, int facility, |
| } |
| |
| __printf(4, 0) |
| -int vprintk_store(int facility, int level, |
| - const struct dev_printk_info *dev_info, |
| - const char *fmt, va_list args) |
| +static int vprintk_store(int facility, int level, |
| + const struct dev_printk_info *dev_info, |
| + const char *fmt, va_list args) |
| { |
| const u32 caller_id = printk_caller_id(); |
| struct prb_reserved_entry e; |
| enum printk_info_flags flags = 0; |
| + bool final_commit = false; |
| struct printk_record r; |
| unsigned long irqflags; |
| u16 trunc_msg_len = 0; |
| @@ -2136,6 +2059,7 @@ int vprintk_store(int facility, int level, |
| u16 text_len; |
| int ret = 0; |
| u64 ts_nsec; |
| + u64 seq; |
| |
| /* |
| * Since the duration of printk() can vary depending on the message |
| @@ -2174,6 +2098,7 @@ int vprintk_store(int facility, int level, |
| if (flags & LOG_CONT) { |
| prb_rec_init_wr(&r, reserve_size); |
| if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { |
| + seq = r.info->seq; |
| text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, |
| facility, &flags, fmt, args); |
| r.info->text_len += text_len; |
| @@ -2181,6 +2106,7 @@ int vprintk_store(int facility, int level, |
| if (flags & LOG_NEWLINE) { |
| r.info->flags |= LOG_NEWLINE; |
| prb_final_commit(&e); |
| + final_commit = true; |
| } else { |
| prb_commit(&e); |
| } |
| @@ -2204,6 +2130,7 @@ int vprintk_store(int facility, int level, |
| if (!prb_reserve(&e, prb, &r)) |
| goto out; |
| } |
| + seq = r.info->seq; |
| |
| /* fill message */ |
| text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args); |
| @@ -2219,13 +2146,25 @@ int vprintk_store(int facility, int level, |
| memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); |
| |
| /* A message without a trailing newline can be continued. */ |
| - if (!(flags & LOG_NEWLINE)) |
| + if (!(flags & LOG_NEWLINE)) { |
| prb_commit(&e); |
| - else |
| + } else { |
| prb_final_commit(&e); |
| + final_commit = true; |
| + } |
| |
| ret = text_len + trunc_msg_len; |
| out: |
| + /* only the kernel may perform synchronous printing */ |
| + if (facility == 0 && final_commit) { |
| + struct console *con; |
| + |
| + for_each_console(con) { |
| + if (console_may_sync(con)) |
| + print_sync_until(con, seq + 1, false); |
| + } |
| + } |
| + |
| printk_exit_irqrestore(recursion_ptr, irqflags); |
| return ret; |
| } |
| @@ -2235,50 +2174,43 @@ asmlinkage int vprintk_emit(int facility, int level, |
| const char *fmt, va_list args) |
| { |
| int printed_len; |
| - bool in_sched = false; |
| |
| /* Suppress unimportant messages after panic happens */ |
| if (unlikely(suppress_printk)) |
| return 0; |
| |
| - if (level == LOGLEVEL_SCHED) { |
| + if (level == LOGLEVEL_SCHED) |
| level = LOGLEVEL_DEFAULT; |
| - in_sched = true; |
| - } |
| - |
| - boot_delay_msec(level); |
| - printk_delay(); |
| |
| printed_len = vprintk_store(facility, level, dev_info, fmt, args); |
| |
| - /* If called from the scheduler, we can not call up(). */ |
| - if (!in_sched) { |
| - /* |
| - * Disable preemption to avoid being preempted while holding |
| - * console_sem which would prevent anyone from printing to |
| - * console |
| - */ |
| - preempt_disable(); |
| - /* |
| - * Try to acquire and then immediately release the console |
| - * semaphore. The release will print out buffers and wake up |
| - * /dev/kmsg and syslog() users. |
| - */ |
| - if (console_trylock_spinning()) |
| - console_unlock(); |
| - preempt_enable(); |
| - } |
| - |
| wake_up_klogd(); |
| return printed_len; |
| } |
| EXPORT_SYMBOL(vprintk_emit); |
| |
| -int vprintk_default(const char *fmt, va_list args) |
| +__printf(1, 0) |
| +static int vprintk_default(const char *fmt, va_list args) |
| { |
| return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); |
| } |
| -EXPORT_SYMBOL_GPL(vprintk_default); |
| + |
| +__printf(1, 0) |
| +static int vprintk_func(const char *fmt, va_list args) |
| +{ |
| +#ifdef CONFIG_KGDB_KDB |
| + /* Allow to pass printk() to kdb but avoid a recursion. */ |
| + if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) |
| + return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); |
| +#endif |
| + return vprintk_default(fmt, args); |
| +} |
| + |
| +asmlinkage int vprintk(const char *fmt, va_list args) |
| +{ |
| + return vprintk_func(fmt, args); |
| +} |
| +EXPORT_SYMBOL(vprintk); |
| |
| asmlinkage __visible int _printk(const char *fmt, ...) |
| { |
| @@ -2293,37 +2225,162 @@ asmlinkage __visible int _printk(const char *fmt, ...) |
| } |
| EXPORT_SYMBOL(_printk); |
| |
| -#else /* CONFIG_PRINTK */ |
| +static int printk_kthread_func(void *data) |
| +{ |
| + struct console *con = data; |
| + unsigned long dropped = 0; |
| + char *dropped_text = NULL; |
| + struct printk_info info; |
| + struct printk_record r; |
| + char *ext_text = NULL; |
| + size_t dropped_len; |
| + int ret = -ENOMEM; |
| + char *text = NULL; |
| + char *write_text; |
| + size_t len; |
| + int error; |
| + u64 seq; |
| + |
| + if (con->flags & CON_EXTENDED) { |
| + ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); |
| + if (!ext_text) |
| + goto out; |
| + } |
| + text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); |
| + dropped_text = kmalloc(64, GFP_KERNEL); |
| + if (!text || !dropped_text) |
| + goto out; |
| + if (con->flags & CON_EXTENDED) |
| + write_text = ext_text; |
| + else |
| + write_text = text; |
| + |
| + seq = read_console_seq(con); |
| |
| -#define CONSOLE_LOG_MAX 0 |
| -#define printk_time false |
| + prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); |
| |
| -#define prb_read_valid(rb, seq, r) false |
| -#define prb_first_valid_seq(rb) 0 |
| + for (;;) { |
| + error = wait_event_interruptible(log_wait, |
| + prb_read_valid(prb, seq, &r) || kthread_should_stop()); |
| |
| -static u64 syslog_seq; |
| -static u64 console_seq; |
| -static u64 exclusive_console_stop_seq; |
| -static unsigned long console_dropped; |
| + if (kthread_should_stop()) |
| + break; |
| + |
| + if (error) |
| + continue; |
| + |
| + if (seq != r.info->seq) { |
| + dropped += r.info->seq - seq; |
| + seq = r.info->seq; |
| + } |
| + |
| + seq++; |
| + |
| + if (!(con->flags & CON_ENABLED)) |
| + continue; |
| + |
| + if (suppress_message_printing(r.info->level)) |
| + continue; |
| + |
| + if (con->flags & CON_EXTENDED) { |
| + len = info_print_ext_header(ext_text, |
| + CONSOLE_EXT_LOG_MAX, |
| + r.info); |
| + len += msg_print_ext_body(ext_text + len, |
| + CONSOLE_EXT_LOG_MAX - len, |
| + &r.text_buf[0], r.info->text_len, |
| + &r.info->dev_info); |
| + } else { |
| + len = record_print_text(&r, |
| + console_msg_format & MSG_FORMAT_SYSLOG, |
| + printk_time); |
| + } |
| + |
| + console_lock(); |
| + |
| + /* |
| + * Even though the printk kthread is always preemptible, it is |
| + * still not allowed to call cond_resched() from within |
| + * console drivers. The task may become non-preemptible in the |
| + * console driver call chain. For example, vt_console_print() |
| + * takes a spinlock and then can call into fbcon_redraw(), |
| + * which can conditionally invoke cond_resched(). |
| + */ |
| + console_may_schedule = 0; |
| + |
| + if (kernel_sync_mode() && con->write_atomic) { |
| + console_unlock(); |
| + break; |
| + } |
| + |
| + if (!(con->flags & CON_EXTENDED) && dropped) { |
| + dropped_len = snprintf(dropped_text, 64, |
| + "** %lu printk messages dropped **\n", |
| + dropped); |
| + dropped = 0; |
| + |
| + con->write(con, dropped_text, dropped_len); |
| + printk_delay(r.info->level); |
| + } |
| + |
| + con->write(con, write_text, len); |
| + if (len) |
| + printk_delay(r.info->level); |
| + |
| + latched_seq_write(&con->printk_seq, seq); |
| + |
| + console_unlock(); |
| + } |
| + ret = 0; |
| +out: |
| + kfree(dropped_text); |
| + kfree(text); |
| + kfree(ext_text); |
| + pr_info("%sconsole [%s%d]: printing thread stopped\n", |
| + (con->flags & CON_BOOT) ? "boot" : "", |
| + con->name, con->index); |
| + return ret; |
| +} |
| |
| -static size_t record_print_text(const struct printk_record *r, |
| - bool syslog, bool time) |
| +/* Must be called within console_lock(). */ |
| +static void start_printk_kthread(struct console *con) |
| { |
| - return 0; |
| + con->thread = kthread_run(printk_kthread_func, con, |
| + "pr/%s%d", con->name, con->index); |
| + if (IS_ERR(con->thread)) { |
| + pr_err("%sconsole [%s%d]: unable to start printing thread\n", |
| + (con->flags & CON_BOOT) ? "boot" : "", |
| + con->name, con->index); |
| + return; |
| + } |
| + pr_info("%sconsole [%s%d]: printing thread started\n", |
| + (con->flags & CON_BOOT) ? "boot" : "", |
| + con->name, con->index); |
| } |
| -static ssize_t info_print_ext_header(char *buf, size_t size, |
| - struct printk_info *info) |
| + |
| +/* protected by console_lock */ |
| +static bool kthreads_started; |
| + |
| +/* Must be called within console_lock(). */ |
| +static void console_try_thread(struct console *con) |
| { |
| - return 0; |
| + if (kthreads_started) { |
| + start_printk_kthread(con); |
| + return; |
| + } |
| + |
| + /* |
| + * The printing threads have not been started yet. If this console |
| + * can print synchronously, print all unprinted messages. |
| + */ |
| + if (console_may_sync(con)) { |
| + unsigned long flags; |
| + |
| + local_irq_save(flags); |
| + print_sync_until(con, prb_next_seq(prb), true); |
| + local_irq_restore(flags); |
| + } |
| } |
| -static ssize_t msg_print_ext_body(char *buf, size_t size, |
| - char *text, size_t text_len, |
| - struct dev_printk_info *dev_info) { return 0; } |
| -static void console_lock_spinning_enable(void) { } |
| -static int console_lock_spinning_disable_and_check(void) { return 0; } |
| -static void call_console_drivers(const char *ext_text, size_t ext_len, |
| - const char *text, size_t len) {} |
| -static bool suppress_message_printing(int level) { return false; } |
| |
| #endif /* CONFIG_PRINTK */ |
| |
| @@ -2580,34 +2637,6 @@ int is_console_locked(void) |
| } |
| EXPORT_SYMBOL(is_console_locked); |
| |
| -/* |
| - * Check if we have any console that is capable of printing while cpu is |
| - * booting or shutting down. Requires console_sem. |
| - */ |
| -static int have_callable_console(void) |
| -{ |
| - struct console *con; |
| - |
| - for_each_console(con) |
| - if ((con->flags & CON_ENABLED) && |
| - (con->flags & CON_ANYTIME)) |
| - return 1; |
| - |
| - return 0; |
| -} |
| - |
| -/* |
| - * Can we actually use the console at this time on this cpu? |
| - * |
| - * Console drivers may assume that per-cpu resources have been allocated. So |
| - * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't |
| - * call them until this CPU is officially up. |
| - */ |
| -static inline int can_use_console(void) |
| -{ |
| - return cpu_online(raw_smp_processor_id()) || have_callable_console(); |
| -} |
| - |
| /** |
| * console_unlock - unlock the console system |
| * |
| @@ -2624,140 +2653,13 @@ static inline int can_use_console(void) |
| */ |
| void console_unlock(void) |
| { |
| - static char ext_text[CONSOLE_EXT_LOG_MAX]; |
| - static char text[CONSOLE_LOG_MAX]; |
| - unsigned long flags; |
| - bool do_cond_resched, retry; |
| - struct printk_info info; |
| - struct printk_record r; |
| - u64 __maybe_unused next_seq; |
| - |
| if (console_suspended) { |
| up_console_sem(); |
| return; |
| } |
| |
| - prb_rec_init_rd(&r, &info, text, sizeof(text)); |
| - |
| - /* |
| - * Console drivers are called with interrupts disabled, so |
| - * @console_may_schedule should be cleared before; however, we may |
| - * end up dumping a lot of lines, for example, if called from |
| - * console registration path, and should invoke cond_resched() |
| - * between lines if allowable. Not doing so can cause a very long |
| - * scheduling stall on a slow console leading to RCU stall and |
| - * softlockup warnings which exacerbate the issue with more |
| - * messages practically incapacitating the system. |
| - * |
| - * console_trylock() is not able to detect the preemptive |
| - * context reliably. Therefore the value must be stored before |
| - * and cleared after the "again" goto label. |
| - */ |
| - do_cond_resched = console_may_schedule; |
| -again: |
| - console_may_schedule = 0; |
| - |
| - /* |
| - * We released the console_sem lock, so we need to recheck if |
| - * cpu is online and (if not) is there at least one CON_ANYTIME |
| - * console. |
| - */ |
| - if (!can_use_console()) { |
| - console_locked = 0; |
| - up_console_sem(); |
| - return; |
| - } |
| - |
| - for (;;) { |
| - size_t ext_len = 0; |
| - int handover; |
| - size_t len; |
| - |
| -skip: |
| - if (!prb_read_valid(prb, console_seq, &r)) |
| - break; |
| - |
| - if (console_seq != r.info->seq) { |
| - console_dropped += r.info->seq - console_seq; |
| - console_seq = r.info->seq; |
| - } |
| - |
| - if (suppress_message_printing(r.info->level)) { |
| - /* |
| - * Skip record we have buffered and already printed |
| - * directly to the console when we received it, and |
| - * record that has level above the console loglevel. |
| - */ |
| - console_seq++; |
| - goto skip; |
| - } |
| - |
| - /* Output to all consoles once old messages replayed. */ |
| - if (unlikely(exclusive_console && |
| - console_seq >= exclusive_console_stop_seq)) { |
| - exclusive_console = NULL; |
| - } |
| - |
| - /* |
| - * Handle extended console text first because later |
| - * record_print_text() will modify the record buffer in-place. |
| - */ |
| - if (nr_ext_console_drivers) { |
| - ext_len = info_print_ext_header(ext_text, |
| - sizeof(ext_text), |
| - r.info); |
| - ext_len += msg_print_ext_body(ext_text + ext_len, |
| - sizeof(ext_text) - ext_len, |
| - &r.text_buf[0], |
| - r.info->text_len, |
| - &r.info->dev_info); |
| - } |
| - len = record_print_text(&r, |
| - console_msg_format & MSG_FORMAT_SYSLOG, |
| - printk_time); |
| - console_seq++; |
| - |
| - /* |
| - * While actively printing out messages, if another printk() |
| - * were to occur on another CPU, it may wait for this one to |
| - * finish. This task can not be preempted if there is a |
| - * waiter waiting to take over. |
| - * |
| - * Interrupts are disabled because the hand over to a waiter |
| - * must not be interrupted until the hand over is completed |
| - * (@console_waiter is cleared). |
| - */ |
| - printk_safe_enter_irqsave(flags); |
| - console_lock_spinning_enable(); |
| - |
| - stop_critical_timings(); /* don't trace print latency */ |
| - call_console_drivers(ext_text, ext_len, text, len); |
| - start_critical_timings(); |
| - |
| - handover = console_lock_spinning_disable_and_check(); |
| - printk_safe_exit_irqrestore(flags); |
| - if (handover) |
| - return; |
| - |
| - if (do_cond_resched) |
| - cond_resched(); |
| - } |
| - |
| - /* Get consistent value of the next-to-be-used sequence number. */ |
| - next_seq = console_seq; |
| - |
| console_locked = 0; |
| up_console_sem(); |
| - |
| - /* |
| - * Someone could have filled up the buffer again, so re-check if there's |
| - * something to flush. In case we cannot trylock the console_sem again, |
| - * there's a new owner and the console_unlock() from them will do the |
| - * flush, no worries. |
| - */ |
| - retry = prb_read_valid(prb, next_seq, NULL); |
| - if (retry && console_trylock()) |
| - goto again; |
| } |
| EXPORT_SYMBOL(console_unlock); |
| |
| @@ -2807,18 +2709,20 @@ void console_unblank(void) |
| */ |
| void console_flush_on_panic(enum con_flush_mode mode) |
| { |
| - /* |
| - * If someone else is holding the console lock, trylock will fail |
| - * and may_schedule may be set. Ignore and proceed to unlock so |
| - * that messages are flushed out. As this can be called from any |
| - * context and we don't want to get preempted while flushing, |
| - * ensure may_schedule is cleared. |
| - */ |
| - console_trylock(); |
| - console_may_schedule = 0; |
| + if (!console_trylock()) |
| + return; |
| + |
| +#ifdef CONFIG_PRINTK |
| + if (mode == CONSOLE_REPLAY_ALL) { |
| + struct console *c; |
| + u64 seq; |
| + |
| + seq = prb_first_valid_seq(prb); |
| + for_each_console(c) |
| + latched_seq_write(&c->printk_seq, seq); |
| + } |
| +#endif |
| |
| - if (mode == CONSOLE_REPLAY_ALL) |
| - console_seq = prb_first_valid_seq(prb); |
| console_unlock(); |
| } |
| |
| @@ -2954,6 +2858,7 @@ static int try_enable_new_console(struct console *newcon, bool user_specified) |
| void register_console(struct console *newcon) |
| { |
| struct console *bcon = NULL; |
| + u64 __maybe_unused seq = 0; |
| int err; |
| |
| for_each_console(bcon) { |
| @@ -2976,6 +2881,8 @@ void register_console(struct console *newcon) |
| } |
| } |
| |
| + newcon->thread = NULL; |
| + |
| if (console_drivers && console_drivers->flags & CON_BOOT) |
| bcon = console_drivers; |
| |
| @@ -3017,8 +2924,10 @@ void register_console(struct console *newcon) |
| * the real console are the same physical device, it's annoying to |
| * see the beginning boot messages twice |
| */ |
| - if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) |
| + if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { |
| newcon->flags &= ~CON_PRINTBUFFER; |
| + newcon->flags |= CON_HANDOVER; |
| + } |
| |
| /* |
| * Put this console in the list - keep the |
| @@ -3040,27 +2949,21 @@ void register_console(struct console *newcon) |
| if (newcon->flags & CON_EXTENDED) |
| nr_ext_console_drivers++; |
| |
| - if (newcon->flags & CON_PRINTBUFFER) { |
| - /* |
| - * console_unlock(); will print out the buffered messages |
| - * for us. |
| - * |
| - * We're about to replay the log buffer. Only do this to the |
| - * just-registered console to avoid excessive message spam to |
| - * the already-registered consoles. |
| - * |
| - * Set exclusive_console with disabled interrupts to reduce |
| - * race window with eventual console_flush_on_panic() that |
| - * ignores console_lock. |
| - */ |
| - exclusive_console = newcon; |
| - exclusive_console_stop_seq = console_seq; |
| +#ifdef CONFIG_PRINTK |
| + if (!(newcon->flags & CON_PRINTBUFFER)) |
| + seq = prb_next_seq(prb); |
| |
| - /* Get a consistent copy of @syslog_seq. */ |
| - mutex_lock(&syslog_lock); |
| - console_seq = syslog_seq; |
| - mutex_unlock(&syslog_lock); |
| - } |
| + seqcount_latch_init(&newcon->printk_seq.latch); |
| + latched_seq_write(&newcon->printk_seq, seq); |
| + seqcount_latch_init(&newcon->printk_sync_seq.latch); |
| + latched_seq_write(&newcon->printk_sync_seq, seq); |
| +#ifdef CONFIG_HAVE_NMI |
| + seqcount_latch_init(&newcon->printk_sync_nmi_seq.latch); |
| + latched_seq_write(&newcon->printk_sync_nmi_seq, seq); |
| +#endif |
| + |
| + console_try_thread(newcon); |
| +#endif /* CONFIG_PRINTK */ |
| console_unlock(); |
| console_sysfs_notify(); |
| |
| @@ -3134,6 +3037,9 @@ int unregister_console(struct console *console) |
| console_unlock(); |
| console_sysfs_notify(); |
| |
| + if (console->thread && !IS_ERR(console->thread)) |
| + kthread_stop(console->thread); |
| + |
| if (console->exit) |
| res = console->exit(console); |
| |
| @@ -3216,6 +3122,15 @@ static int __init printk_late_init(void) |
| unregister_console(con); |
| } |
| } |
| + |
| +#ifdef CONFIG_PRINTK |
| + console_lock(); |
| + for_each_console(con) |
| + start_printk_kthread(con); |
| + kthreads_started = true; |
| + console_unlock(); |
| +#endif |
| + |
| ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, |
| console_cpu_notify); |
| WARN_ON(ret < 0); |
| @@ -3239,14 +3154,8 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) |
| { |
| int pending = this_cpu_xchg(printk_pending, 0); |
| |
| - if (pending & PRINTK_PENDING_OUTPUT) { |
| - /* If trylock fails, someone else is doing the printing */ |
| - if (console_trylock()) |
| - console_unlock(); |
| - } |
| - |
| if (pending & PRINTK_PENDING_WAKEUP) |
| - wake_up_interruptible(&log_wait); |
| + wake_up_interruptible_all(&log_wait); |
| } |
| |
| static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = |
| @@ -3293,29 +3202,7 @@ void defer_console_output(void) |
| |
| void printk_trigger_flush(void) |
| { |
| - defer_console_output(); |
| -} |
| - |
| -int vprintk_deferred(const char *fmt, va_list args) |
| -{ |
| - int r; |
| - |
| - r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); |
| - defer_console_output(); |
| - |
| - return r; |
| -} |
| - |
| -int _printk_deferred(const char *fmt, ...) |
| -{ |
| - va_list args; |
| - int r; |
| - |
| - va_start(args, fmt); |
| - r = vprintk_deferred(fmt, args); |
| - va_end(args); |
| - |
| - return r; |
| + wake_up_klogd(); |
| } |
| |
| /* |
| @@ -3444,6 +3331,24 @@ void kmsg_dump(enum kmsg_dump_reason reason) |
| { |
| struct kmsg_dumper *dumper; |
| |
| + if (!oops_in_progress) { |
| + /* |
| + * If atomic consoles are available, activate kernel sync mode |
| + * to make sure any final messages are visible. The trailing |
| + * printk message is important to flush any pending messages. |
| + */ |
| + if (have_atomic_console()) { |
| + sync_mode = true; |
| + pr_info("enabled sync mode\n"); |
| + } |
| + |
| + /* |
| + * Give the printing threads time to flush, allowing up to |
| + * 1s of no printing forward progress before giving up. |
| + */ |
| + pr_flush(1000, true); |
| + } |
| + |
| rcu_read_lock(); |
| list_for_each_entry_rcu(dumper, &dump_list, list) { |
| enum kmsg_dump_reason max_reason = dumper->max_reason; |
| @@ -3626,6 +3531,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind); |
| #ifdef CONFIG_SMP |
| static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1); |
| static atomic_t printk_cpulock_nested = ATOMIC_INIT(0); |
| +static unsigned int kgdb_cpu = -1; |
| |
| /** |
| * __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant |
| @@ -3705,6 +3611,9 @@ EXPORT_SYMBOL(__printk_cpu_trylock); |
| */ |
| void __printk_cpu_unlock(void) |
| { |
| + bool trigger_kgdb = false; |
| + unsigned int cpu; |
| + |
| if (atomic_read(&printk_cpulock_nested)) { |
| atomic_dec(&printk_cpulock_nested); |
| return; |
| @@ -3715,6 +3624,12 @@ void __printk_cpu_unlock(void) |
| * LMM(__printk_cpu_unlock:A) |
| */ |
| |
| + cpu = smp_processor_id(); |
| + if (kgdb_cpu == cpu) { |
| + trigger_kgdb = true; |
| + kgdb_cpu = -1; |
| + } |
| + |
| /* |
| * Guarantee loads and stores from this CPU when it was the |
| * lock owner are visible to the next lock owner. This pairs |
| @@ -3735,6 +3650,98 @@ void __printk_cpu_unlock(void) |
| */ |
| atomic_set_release(&printk_cpulock_owner, |
| -1); /* LMM(__printk_cpu_unlock:B) */ |
| + |
| + if (trigger_kgdb) { |
| + pr_warn("re-triggering kgdb roundup for CPU#%d\n", cpu); |
| + kgdb_roundup_cpu(cpu); |
| + } |
| } |
| EXPORT_SYMBOL(__printk_cpu_unlock); |
| + |
| +bool kgdb_roundup_delay(unsigned int cpu) |
| +{ |
| + if (cpu != atomic_read(&printk_cpulock_owner)) |
| + return false; |
| + |
| + kgdb_cpu = cpu; |
| + return true; |
| +} |
| +EXPORT_SYMBOL(kgdb_roundup_delay); |
| #endif /* CONFIG_SMP */ |
| + |
| +#ifdef CONFIG_PRINTK |
| +static void pr_msleep(bool may_sleep, int ms) |
| +{ |
| + if (may_sleep) { |
| + msleep(ms); |
| + } else { |
| + while (ms--) |
| + udelay(1000); |
| + } |
| +} |
| + |
| +/** |
| + * pr_flush() - Wait for printing threads to catch up. |
| + * |
| + * @timeout_ms: The maximum time (in ms) to wait. |
| + * @reset_on_progress: Reset the timeout if forward progress is seen. |
| + * |
| + * A value of 0 for @timeout_ms means no waiting will occur. A value of -1 |
| + * represents infinite waiting. |
| + * |
| + * If @reset_on_progress is true, the timeout will be reset whenever any |
| + * printer has been seen to make some forward progress. |
| + * |
| + * Context: Any context. |
| + * Return: true if all enabled printers are caught up. |
| + */ |
| +bool pr_flush(int timeout_ms, bool reset_on_progress) |
| +{ |
| + int remaining = timeout_ms; |
| + struct console *con; |
| + u64 last_diff = 0; |
| + bool may_sleep; |
| + u64 printk_seq; |
| + u64 diff; |
| + u64 seq; |
| + |
| + may_sleep = (preemptible() && |
| + !in_softirq() && |
| + system_state >= SYSTEM_RUNNING); |
| + |
| + seq = prb_next_seq(prb); |
| + |
| + for (;;) { |
| + diff = 0; |
| + |
| + for_each_console(con) { |
| + if (!(con->flags & CON_ENABLED)) |
| + continue; |
| + printk_seq = read_console_seq(con); |
| + if (printk_seq < seq) |
| + diff += seq - printk_seq; |
| + } |
| + |
| + if (diff != last_diff && reset_on_progress) |
| + remaining = timeout_ms; |
| + |
| + if (diff == 0 || remaining == 0) |
| + break; |
| + |
| + if (remaining < 0) { |
| + pr_msleep(may_sleep, 100); |
| + } else if (remaining < 100) { |
| + pr_msleep(may_sleep, remaining); |
| + remaining = 0; |
| + } else { |
| + pr_msleep(may_sleep, 100); |
| + remaining -= 100; |
| + } |
| + |
| + last_diff = diff; |
| + } |
| + |
| + return (diff == 0); |
| +} |
| +EXPORT_SYMBOL(pr_flush); |
| +#endif /* CONFIG_PRINTK */ |
| diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c |
| deleted file mode 100644 |
| index ef0f9a2044da..000000000000 |
| --- a/kernel/printk/printk_safe.c |
| +++ /dev/null |
| @@ -1,52 +0,0 @@ |
| -// SPDX-License-Identifier: GPL-2.0-or-later |
| -/* |
| - * printk_safe.c - Safe printk for printk-deadlock-prone contexts |
| - */ |
| - |
| -#include <linux/preempt.h> |
| -#include <linux/kdb.h> |
| -#include <linux/smp.h> |
| -#include <linux/cpumask.h> |
| -#include <linux/printk.h> |
| -#include <linux/kprobes.h> |
| - |
| -#include "internal.h" |
| - |
| -static DEFINE_PER_CPU(int, printk_context); |
| - |
| -/* Can be preempted by NMI. */ |
| -void __printk_safe_enter(void) |
| -{ |
| - this_cpu_inc(printk_context); |
| -} |
| - |
| -/* Can be preempted by NMI. */ |
| -void __printk_safe_exit(void) |
| -{ |
| - this_cpu_dec(printk_context); |
| -} |
| - |
| -asmlinkage int vprintk(const char *fmt, va_list args) |
| -{ |
| -#ifdef CONFIG_KGDB_KDB |
| - /* Allow to pass printk() to kdb but avoid a recursion. */ |
| - if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) |
| - return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); |
| -#endif |
| - |
| - /* |
| - * Use the main logbuf even in NMI. But avoid calling console |
| - * drivers that might have their own locks. |
| - */ |
| - if (this_cpu_read(printk_context) || in_nmi()) { |
| - int len; |
| - |
| - len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); |
| - defer_console_output(); |
| - return len; |
| - } |
| - |
| - /* No obstacles. */ |
| - return vprintk_default(fmt, args); |
| -} |
| -EXPORT_SYMBOL(vprintk); |
| diff --git a/kernel/ptrace.c b/kernel/ptrace.c |
| index 0cf547531ddf..0df2de214daa 100644 |
| --- a/kernel/ptrace.c |
| +++ b/kernel/ptrace.c |
| @@ -197,7 +197,18 @@ static bool ptrace_freeze_traced(struct task_struct *task) |
| spin_lock_irq(&task->sighand->siglock); |
| if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && |
| !__fatal_signal_pending(task)) { |
| +#ifdef CONFIG_PREEMPT_RT |
| + unsigned long flags; |
| + |
| + raw_spin_lock_irqsave(&task->pi_lock, flags); |
| + if (READ_ONCE(task->__state) & __TASK_TRACED) |
| + WRITE_ONCE(task->__state, __TASK_TRACED); |
| + else |
| + task->saved_state = __TASK_TRACED; |
| + raw_spin_unlock_irqrestore(&task->pi_lock, flags); |
| +#else |
| WRITE_ONCE(task->__state, __TASK_TRACED); |
| +#endif |
| ret = true; |
| } |
| spin_unlock_irq(&task->sighand->siglock); |
| @@ -207,7 +218,11 @@ static bool ptrace_freeze_traced(struct task_struct *task) |
| |
| static void ptrace_unfreeze_traced(struct task_struct *task) |
| { |
| - if (READ_ONCE(task->__state) != __TASK_TRACED) |
| + unsigned long flags; |
| + bool frozen = true; |
| + |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && |
| + READ_ONCE(task->__state) != __TASK_TRACED) |
| return; |
| |
| WARN_ON(!task->ptrace || task->parent != current); |
| @@ -217,12 +232,21 @@ static void ptrace_unfreeze_traced(struct task_struct *task) |
| * Recheck state under the lock to close this race. |
| */ |
| spin_lock_irq(&task->sighand->siglock); |
| - if (READ_ONCE(task->__state) == __TASK_TRACED) { |
| - if (__fatal_signal_pending(task)) |
| - wake_up_state(task, __TASK_TRACED); |
| - else |
| - WRITE_ONCE(task->__state, TASK_TRACED); |
| - } |
| + raw_spin_lock_irqsave(&task->pi_lock, flags); |
| + if (READ_ONCE(task->__state) == __TASK_TRACED) |
| + WRITE_ONCE(task->__state, TASK_TRACED); |
| + |
| +#ifdef CONFIG_PREEMPT_RT |
| + else if (task->saved_state == __TASK_TRACED) |
| + task->saved_state = TASK_TRACED; |
| +#endif |
| + else |
| + frozen = false; |
| + raw_spin_unlock_irqrestore(&task->pi_lock, flags); |
| + |
| + if (frozen && __fatal_signal_pending(task)) |
| + wake_up_state(task, __TASK_TRACED); |
| + |
| spin_unlock_irq(&task->sighand->siglock); |
| } |
| |
| diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h |
| index 60c9eacac25b..2dbcd58383e3 100644 |
| --- a/kernel/rcu/tasks.h |
| +++ b/kernel/rcu/tasks.h |
| @@ -1350,7 +1350,7 @@ static void test_rcu_tasks_callback(struct rcu_head *rhp) |
| rttd->notrun = true; |
| } |
| |
| -static void rcu_tasks_initiate_self_tests(void) |
| +void rcu_tasks_initiate_self_tests(void) |
| { |
| pr_info("Running RCU-tasks wait API self tests\n"); |
| #ifdef CONFIG_TASKS_RCU |
| @@ -1387,9 +1387,7 @@ static int rcu_tasks_verify_self_tests(void) |
| return ret; |
| } |
| late_initcall(rcu_tasks_verify_self_tests); |
| -#else /* #ifdef CONFIG_PROVE_RCU */ |
| -static void rcu_tasks_initiate_self_tests(void) { } |
| -#endif /* #else #ifdef CONFIG_PROVE_RCU */ |
| +#endif /* #ifdef CONFIG_PROVE_RCU */ |
| |
| void __init rcu_init_tasks_generic(void) |
| { |
| @@ -1404,9 +1402,6 @@ void __init rcu_init_tasks_generic(void) |
| #ifdef CONFIG_TASKS_TRACE_RCU |
| rcu_spawn_tasks_trace_kthread(); |
| #endif |
| - |
| - // Run the self-tests. |
| - rcu_tasks_initiate_self_tests(); |
| } |
| |
| #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ |
| diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c |
| index a4a9d68b1fdc..350b2bc83051 100644 |
| --- a/kernel/rcu/tree.c |
| +++ b/kernel/rcu/tree.c |
| @@ -2279,13 +2279,13 @@ rcu_report_qs_rdp(struct rcu_data *rdp) |
| { |
| unsigned long flags; |
| unsigned long mask; |
| - bool needwake = false; |
| - const bool offloaded = rcu_rdp_is_offloaded(rdp); |
| + bool offloaded, needwake = false; |
| struct rcu_node *rnp; |
| |
| WARN_ON_ONCE(rdp->cpu != smp_processor_id()); |
| rnp = rdp->mynode; |
| raw_spin_lock_irqsave_rcu_node(rnp, flags); |
| + offloaded = rcu_rdp_is_offloaded(rdp); |
| if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq || |
| rdp->gpwrap) { |
| |
| @@ -2447,7 +2447,7 @@ static void rcu_do_batch(struct rcu_data *rdp) |
| int div; |
| bool __maybe_unused empty; |
| unsigned long flags; |
| - const bool offloaded = rcu_rdp_is_offloaded(rdp); |
| + bool offloaded; |
| struct rcu_head *rhp; |
| struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); |
| long bl, count = 0; |
| @@ -2473,6 +2473,7 @@ static void rcu_do_batch(struct rcu_data *rdp) |
| rcu_nocb_lock(rdp); |
| WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); |
| pending = rcu_segcblist_n_cbs(&rdp->cblist); |
| + offloaded = rcu_rdp_is_offloaded(rdp); |
| div = READ_ONCE(rcu_divisor); |
| div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div; |
| bl = max(rdp->blimit, pending >> div); |
| diff --git a/kernel/sched/core.c b/kernel/sched/core.c |
| index b89ca5c83143..73e82dae64cd 100644 |
| --- a/kernel/sched/core.c |
| +++ b/kernel/sched/core.c |
| @@ -75,7 +75,11 @@ __read_mostly int sysctl_resched_latency_warn_once = 1; |
| * Number of tasks to iterate in a single balance run. |
| * Limited because this is done with IRQs disabled. |
| */ |
| +#ifdef CONFIG_PREEMPT_RT |
| +const_debug unsigned int sysctl_sched_nr_migrate = 8; |
| +#else |
| const_debug unsigned int sysctl_sched_nr_migrate = 32; |
| +#endif |
| |
| /* |
| * period over which we measure -rt task CPU usage in us. |
| @@ -983,6 +987,46 @@ void resched_curr(struct rq *rq) |
| trace_sched_wake_idle_without_ipi(cpu); |
| } |
| |
| +#ifdef CONFIG_PREEMPT_LAZY |
| + |
| +static int tsk_is_polling(struct task_struct *p) |
| +{ |
| +#ifdef TIF_POLLING_NRFLAG |
| + return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); |
| +#else |
| + return 0; |
| +#endif |
| +} |
| + |
| +void resched_curr_lazy(struct rq *rq) |
| +{ |
| + struct task_struct *curr = rq->curr; |
| + int cpu; |
| + |
| + if (!sched_feat(PREEMPT_LAZY)) { |
| + resched_curr(rq); |
| + return; |
| + } |
| + |
| + if (test_tsk_need_resched(curr)) |
| + return; |
| + |
| + if (test_tsk_need_resched_lazy(curr)) |
| + return; |
| + |
| + set_tsk_need_resched_lazy(curr); |
| + |
| + cpu = cpu_of(rq); |
| + if (cpu == smp_processor_id()) |
| + return; |
| + |
| + /* NEED_RESCHED_LAZY must be visible before we test polling */ |
| + smp_mb(); |
| + if (!tsk_is_polling(curr)) |
| + smp_send_reschedule(cpu); |
| +} |
| +#endif |
| + |
| void resched_cpu(int cpu) |
| { |
| struct rq *rq = cpu_rq(cpu); |
| @@ -2138,6 +2182,7 @@ void migrate_disable(void) |
| preempt_disable(); |
| this_rq()->nr_pinned++; |
| p->migration_disabled = 1; |
| + preempt_lazy_disable(); |
| preempt_enable(); |
| } |
| EXPORT_SYMBOL_GPL(migrate_disable); |
| @@ -2149,6 +2194,8 @@ void migrate_enable(void) |
| if (p->migration_disabled > 1) { |
| p->migration_disabled--; |
| return; |
| + } else if (WARN_ON_ONCE(p->migration_disabled == 0)) { |
| + return; |
| } |
| |
| /* |
| @@ -2166,6 +2213,7 @@ void migrate_enable(void) |
| barrier(); |
| p->migration_disabled = 0; |
| this_rq()->nr_pinned--; |
| + preempt_lazy_enable(); |
| preempt_enable(); |
| } |
| EXPORT_SYMBOL_GPL(migrate_enable); |
| @@ -2945,9 +2993,8 @@ void force_compatible_cpus_allowed_ptr(struct task_struct *p) |
| |
| out_set_mask: |
| if (printk_ratelimit()) { |
| - printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n", |
| - task_pid_nr(p), p->comm, |
| - cpumask_pr_args(override_mask)); |
| + printk("Overriding affinity for process %d (%s) to CPUs %*pbl\n", |
| + task_pid_nr(p), p->comm, cpumask_pr_args(override_mask)); |
| } |
| |
| WARN_ON(set_cpus_allowed_ptr(p, override_mask)); |
| @@ -3203,7 +3250,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state |
| * is actually now running somewhere else! |
| */ |
| while (task_running(rq, p)) { |
| - if (match_state && unlikely(READ_ONCE(p->__state) != match_state)) |
| + if (match_state && !task_match_state_lock(p, match_state)) |
| return 0; |
| cpu_relax(); |
| } |
| @@ -3218,7 +3265,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state |
| running = task_running(rq, p); |
| queued = task_on_rq_queued(p); |
| ncsw = 0; |
| - if (!match_state || READ_ONCE(p->__state) == match_state) |
| + if (!match_state || task_match_state_or_saved(p, match_state)) |
| ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ |
| task_rq_unlock(rq, p, &rf); |
| |
| @@ -3252,7 +3299,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state |
| ktime_t to = NSEC_PER_SEC / HZ; |
| |
| set_current_state(TASK_UNINTERRUPTIBLE); |
| - schedule_hrtimeout(&to, HRTIMER_MODE_REL); |
| + schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); |
| continue; |
| } |
| |
| @@ -3377,8 +3424,8 @@ static int select_fallback_rq(int cpu, struct task_struct *p) |
| * leave kernel. |
| */ |
| if (p->mm && printk_ratelimit()) { |
| - printk_deferred("process %d (%s) no longer affine to cpu%d\n", |
| - task_pid_nr(p), p->comm, cpu); |
| + printk("process %d (%s) no longer affine to cpu%d\n", |
| + task_pid_nr(p), p->comm, cpu); |
| } |
| } |
| |
| @@ -4386,6 +4433,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) |
| p->on_cpu = 0; |
| #endif |
| init_task_preempt_count(p); |
| +#ifdef CONFIG_HAVE_PREEMPT_LAZY |
| + task_thread_info(p)->preempt_lazy_count = 0; |
| +#endif |
| #ifdef CONFIG_SMP |
| plist_node_init(&p->pushable_tasks, MAX_PRIO); |
| RB_CLEAR_NODE(&p->pushable_dl_tasks); |
| @@ -4880,20 +4930,18 @@ static struct rq *finish_task_switch(struct task_struct *prev) |
| */ |
| if (mm) { |
| membarrier_mm_sync_core_before_usermode(mm); |
| - mmdrop(mm); |
| + mmdrop_sched(mm); |
| } |
| if (unlikely(prev_state == TASK_DEAD)) { |
| if (prev->sched_class->task_dead) |
| prev->sched_class->task_dead(prev); |
| |
| /* |
| - * Remove function-return probe instances associated with this |
| - * task and put them back on the free list. |
| + * Release VMAP'ed task stack immediate for reuse. On RT |
| + * enabled kernels this is delayed for latency reasons. |
| */ |
| - kprobe_flush_task(prev); |
| - |
| - /* Task is done with its stack. */ |
| - put_task_stack(prev); |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
| + put_task_stack(prev); |
| |
| put_task_struct_rcu_user(prev); |
| } |
| @@ -6294,6 +6342,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) |
| |
| next = pick_next_task(rq, prev, &rf); |
| clear_tsk_need_resched(prev); |
| + clear_tsk_need_resched_lazy(prev); |
| clear_preempt_need_resched(); |
| #ifdef CONFIG_SCHED_DEBUG |
| rq->last_seen_need_resched_ns = 0; |
| @@ -6379,8 +6428,12 @@ static inline void sched_submit_work(struct task_struct *tsk) |
| preempt_enable_no_resched(); |
| } |
| |
| - if (tsk_is_pi_blocked(tsk)) |
| - return; |
| + /* |
| + * spinlock and rwlock must not flush block requests. This will |
| + * deadlock if the callback attempts to acquire a lock which is |
| + * already acquired. |
| + */ |
| + SCHED_WARN_ON(current->__state & TASK_RTLOCK_WAIT); |
| |
| /* |
| * If we are going to sleep and we have plugged IO queued, |
| @@ -6511,6 +6564,30 @@ static void __sched notrace preempt_schedule_common(void) |
| } while (need_resched()); |
| } |
| |
| +#ifdef CONFIG_PREEMPT_LAZY |
| +/* |
| + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is |
| + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as |
| + * preempt_lazy_count counter >0. |
| + */ |
| +static __always_inline int preemptible_lazy(void) |
| +{ |
| + if (test_thread_flag(TIF_NEED_RESCHED)) |
| + return 1; |
| + if (current_thread_info()->preempt_lazy_count) |
| + return 0; |
| + return 1; |
| +} |
| + |
| +#else |
| + |
| +static inline int preemptible_lazy(void) |
| +{ |
| + return 1; |
| +} |
| + |
| +#endif |
| + |
| #ifdef CONFIG_PREEMPTION |
| /* |
| * This is the entry point to schedule() from in-kernel preemption |
| @@ -6524,7 +6601,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) |
| */ |
| if (likely(!preemptible())) |
| return; |
| - |
| + if (!preemptible_lazy()) |
| + return; |
| preempt_schedule_common(); |
| } |
| NOKPROBE_SYMBOL(preempt_schedule); |
| @@ -6557,6 +6635,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) |
| if (likely(!preemptible())) |
| return; |
| |
| + if (!preemptible_lazy()) |
| + return; |
| + |
| do { |
| /* |
| * Because the function tracer can trace preempt_count_sub() |
| @@ -8709,7 +8790,9 @@ void __init init_idle(struct task_struct *idle, int cpu) |
| |
| /* Set the preempt count _outside_ the spinlocks! */ |
| init_idle_preempt_count(idle, cpu); |
| - |
| +#ifdef CONFIG_HAVE_PREEMPT_LAZY |
| + task_thread_info(idle)->preempt_lazy_count = 0; |
| +#endif |
| /* |
| * The idle tasks have their own, simple scheduling class: |
| */ |
| @@ -9503,14 +9586,8 @@ void __init sched_init(void) |
| } |
| |
| #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
| -static inline int preempt_count_equals(int preempt_offset) |
| -{ |
| - int nested = preempt_count() + rcu_preempt_depth(); |
| |
| - return (nested == preempt_offset); |
| -} |
| - |
| -void __might_sleep(const char *file, int line, int preempt_offset) |
| +void __might_sleep(const char *file, int line) |
| { |
| unsigned int state = get_current_state(); |
| /* |
| @@ -9524,11 +9601,32 @@ void __might_sleep(const char *file, int line, int preempt_offset) |
| (void *)current->task_state_change, |
| (void *)current->task_state_change); |
| |
| - ___might_sleep(file, line, preempt_offset); |
| + __might_resched(file, line, 0); |
| } |
| EXPORT_SYMBOL(__might_sleep); |
| |
| -void ___might_sleep(const char *file, int line, int preempt_offset) |
| +static void print_preempt_disable_ip(int preempt_offset, unsigned long ip) |
| +{ |
| + if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT)) |
| + return; |
| + |
| + if (preempt_count() == preempt_offset) |
| + return; |
| + |
| + pr_err("Preemption disabled at:"); |
| + print_ip_sym(KERN_ERR, ip); |
| +} |
| + |
| +static inline bool resched_offsets_ok(unsigned int offsets) |
| +{ |
| + unsigned int nested = preempt_count(); |
| + |
| + nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT; |
| + |
| + return nested == offsets; |
| +} |
| + |
| +void __might_resched(const char *file, int line, unsigned int offsets) |
| { |
| /* Ratelimiting timestamp: */ |
| static unsigned long prev_jiffy; |
| @@ -9538,7 +9636,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset) |
| /* WARN_ON_ONCE() by default, no rate limit required: */ |
| rcu_sleep_check(); |
| |
| - if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && |
| + if ((resched_offsets_ok(offsets) && !irqs_disabled() && |
| !is_idle_task(current) && !current->non_block_count) || |
| system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || |
| oops_in_progress) |
| @@ -9551,29 +9649,33 @@ void ___might_sleep(const char *file, int line, int preempt_offset) |
| /* Save this before calling printk(), since that will clobber it: */ |
| preempt_disable_ip = get_preempt_disable_ip(current); |
| |
| - printk(KERN_ERR |
| - "BUG: sleeping function called from invalid context at %s:%d\n", |
| - file, line); |
| - printk(KERN_ERR |
| - "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", |
| - in_atomic(), irqs_disabled(), current->non_block_count, |
| - current->pid, current->comm); |
| + pr_err("BUG: sleeping function called from invalid context at %s:%d\n", |
| + file, line); |
| + pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", |
| + in_atomic(), irqs_disabled(), current->non_block_count, |
| + current->pid, current->comm); |
| + pr_err("preempt_count: %x, expected: %x\n", preempt_count(), |
| + offsets & MIGHT_RESCHED_PREEMPT_MASK); |
| + |
| + if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { |
| + pr_err("RCU nest depth: %d, expected: %u\n", |
| + rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT); |
| + } |
| |
| if (task_stack_end_corrupted(current)) |
| - printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); |
| + pr_emerg("Thread overran stack, or stack corrupted\n"); |
| |
| debug_show_held_locks(current); |
| if (irqs_disabled()) |
| print_irqtrace_events(current); |
| - if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) |
| - && !preempt_count_equals(preempt_offset)) { |
| - pr_err("Preemption disabled at:"); |
| - print_ip_sym(KERN_ERR, preempt_disable_ip); |
| - } |
| + |
| + print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK, |
| + preempt_disable_ip); |
| + |
| dump_stack(); |
| add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
| } |
| -EXPORT_SYMBOL(___might_sleep); |
| +EXPORT_SYMBOL(__might_resched); |
| |
| void __cant_sleep(const char *file, int line, int preempt_offset) |
| { |
| diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c |
| index fffcb1aa77b7..2799117917c7 100644 |
| --- a/kernel/sched/deadline.c |
| +++ b/kernel/sched/deadline.c |
| @@ -800,7 +800,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se) |
| * entity. |
| */ |
| if (dl_time_before(dl_se->deadline, rq_clock(rq))) { |
| - printk_deferred_once("sched: DL replenish lagged too much\n"); |
| + printk_once("sched: DL replenish lagged too much\n"); |
| dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; |
| dl_se->runtime = pi_of(dl_se)->dl_runtime; |
| } |
| diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c |
| index fcbacc35d2b9..99e1ccd10439 100644 |
| --- a/kernel/sched/fair.c |
| +++ b/kernel/sched/fair.c |
| @@ -4247,10 +4247,7 @@ static inline void check_schedstat_required(void) |
| trace_sched_stat_iowait_enabled() || |
| trace_sched_stat_blocked_enabled() || |
| trace_sched_stat_runtime_enabled()) { |
| - printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, " |
| - "stat_blocked and stat_runtime require the " |
| - "kernel parameter schedstats=enable or " |
| - "kernel.sched_schedstats=1\n"); |
| + printk_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n"); |
| } |
| #endif |
| } |
| @@ -4458,7 +4455,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) |
| ideal_runtime = sched_slice(cfs_rq, curr); |
| delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; |
| if (delta_exec > ideal_runtime) { |
| - resched_curr(rq_of(cfs_rq)); |
| + resched_curr_lazy(rq_of(cfs_rq)); |
| /* |
| * The current task ran long enough, ensure it doesn't get |
| * re-elected due to buddy favours. |
| @@ -4482,7 +4479,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) |
| return; |
| |
| if (delta > ideal_runtime) |
| - resched_curr(rq_of(cfs_rq)); |
| + resched_curr_lazy(rq_of(cfs_rq)); |
| } |
| |
| static void |
| @@ -4625,7 +4622,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) |
| * validating it and just reschedule. |
| */ |
| if (queued) { |
| - resched_curr(rq_of(cfs_rq)); |
| + resched_curr_lazy(rq_of(cfs_rq)); |
| return; |
| } |
| /* |
| @@ -4765,7 +4762,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) |
| * hierarchy can be throttled |
| */ |
| if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) |
| - resched_curr(rq_of(cfs_rq)); |
| + resched_curr_lazy(rq_of(cfs_rq)); |
| } |
| |
| static __always_inline |
| @@ -5528,7 +5525,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) |
| |
| if (delta < 0) { |
| if (task_current(rq, p)) |
| - resched_curr(rq); |
| + resched_curr_lazy(rq); |
| return; |
| } |
| hrtick_start(rq, delta); |
| @@ -7220,7 +7217,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ |
| return; |
| |
| preempt: |
| - resched_curr(rq); |
| + resched_curr_lazy(rq); |
| /* |
| * Only set the backward buddy when the current task is still |
| * on the rq. This can happen when a wakeup gets interleaved |
| @@ -11123,7 +11120,7 @@ static void task_fork_fair(struct task_struct *p) |
| * 'current' within the tree based on its new key value. |
| */ |
| swap(curr->vruntime, se->vruntime); |
| - resched_curr(rq); |
| + resched_curr_lazy(rq); |
| } |
| |
| se->vruntime -= cfs_rq->min_vruntime; |
| @@ -11150,7 +11147,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) |
| */ |
| if (task_current(rq, p)) { |
| if (p->prio > oldprio) |
| - resched_curr(rq); |
| + resched_curr_lazy(rq); |
| } else |
| check_preempt_curr(rq, p, 0); |
| } |
| diff --git a/kernel/sched/features.h b/kernel/sched/features.h |
| index 7f8dace0964c..d5cee51819bf 100644 |
| --- a/kernel/sched/features.h |
| +++ b/kernel/sched/features.h |
| @@ -46,11 +46,19 @@ SCHED_FEAT(DOUBLE_TICK, false) |
| */ |
| SCHED_FEAT(NONTASK_CAPACITY, true) |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +SCHED_FEAT(TTWU_QUEUE, false) |
| +# ifdef CONFIG_PREEMPT_LAZY |
| +SCHED_FEAT(PREEMPT_LAZY, true) |
| +# endif |
| +#else |
| + |
| /* |
| * Queue remote wakeups on the target CPU and process them |
| * using the scheduler IPI. Reduces rq->lock contention/bounces. |
| */ |
| SCHED_FEAT(TTWU_QUEUE, true) |
| +#endif |
| |
| /* |
| * When doing wakeups, attempt to limit superfluous scans of the LLC domain. |
| diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c |
| index cad2a1b34ed0..fca1bfa2763f 100644 |
| --- a/kernel/sched/psi.c |
| +++ b/kernel/sched/psi.c |
| @@ -717,11 +717,10 @@ static void psi_group_change(struct psi_group *group, int cpu, |
| if (groupc->tasks[t]) { |
| groupc->tasks[t]--; |
| } else if (!psi_bug) { |
| - printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u %u] clear=%x set=%x\n", |
| - cpu, t, groupc->tasks[0], |
| - groupc->tasks[1], groupc->tasks[2], |
| - groupc->tasks[3], groupc->tasks[4], |
| - clear, set); |
| + pr_err("psi: task underflow! cpu=%d t=%d tasks=[%u %u %u %u] clear=%x set=%x\n", |
| + cpu, t, groupc->tasks[0], |
| + groupc->tasks[1], groupc->tasks[2], |
| + groupc->tasks[3], clear, set); |
| psi_bug = 1; |
| } |
| } |
| @@ -787,9 +786,9 @@ static void psi_flags_change(struct task_struct *task, int clear, int set) |
| if (((task->psi_flags & set) || |
| (task->psi_flags & clear) != clear) && |
| !psi_bug) { |
| - printk_deferred(KERN_ERR "psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n", |
| - task->pid, task->comm, task_cpu(task), |
| - task->psi_flags, clear, set); |
| + pr_err("psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n", |
| + task->pid, task->comm, task_cpu(task), |
| + task->psi_flags, clear, set); |
| psi_bug = 1; |
| } |
| |
| diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c |
| index 8007d087a57f..6ba8c7bdcdae 100644 |
| --- a/kernel/sched/rt.c |
| +++ b/kernel/sched/rt.c |
| @@ -983,7 +983,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) |
| */ |
| if (likely(rt_b->rt_runtime)) { |
| rt_rq->rt_throttled = 1; |
| - printk_deferred_once("sched: RT throttling activated\n"); |
| + printk_once("sched: RT throttling activated\n"); |
| } else { |
| /* |
| * In case we did anyway, make it go away, |
| diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h |
| index fe8be2f8a47d..38f9078fcaaa 100644 |
| --- a/kernel/sched/sched.h |
| +++ b/kernel/sched/sched.h |
| @@ -2323,6 +2323,15 @@ extern void reweight_task(struct task_struct *p, int prio); |
| extern void resched_curr(struct rq *rq); |
| extern void resched_cpu(int cpu); |
| |
| +#ifdef CONFIG_PREEMPT_LAZY |
| +extern void resched_curr_lazy(struct rq *rq); |
| +#else |
| +static inline void resched_curr_lazy(struct rq *rq) |
| +{ |
| + resched_curr(rq); |
| +} |
| +#endif |
| + |
| extern struct rt_bandwidth def_rt_bandwidth; |
| extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); |
| |
| diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c |
| index e1c655f928c7..f230b1ac7f91 100644 |
| --- a/kernel/sched/swait.c |
| +++ b/kernel/sched/swait.c |
| @@ -64,6 +64,7 @@ void swake_up_all(struct swait_queue_head *q) |
| struct swait_queue *curr; |
| LIST_HEAD(tmp); |
| |
| + WARN_ON(irqs_disabled()); |
| raw_spin_lock_irq(&q->lock); |
| list_splice_init(&q->task_list, &tmp); |
| while (!list_empty(&tmp)) { |
| diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c |
| index 4e8698e62f07..3d0157bd4e14 100644 |
| --- a/kernel/sched/topology.c |
| +++ b/kernel/sched/topology.c |
| @@ -526,7 +526,7 @@ static int init_rootdomain(struct root_domain *rd) |
| #ifdef HAVE_RT_PUSH_IPI |
| rd->rto_cpu = -1; |
| raw_spin_lock_init(&rd->rto_lock); |
| - init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); |
| + rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func); |
| #endif |
| |
| rd->visit_gen = 0; |
| diff --git a/kernel/signal.c b/kernel/signal.c |
| index d831f0aec56e..24fee2a3788a 100644 |
| --- a/kernel/signal.c |
| +++ b/kernel/signal.c |
| @@ -1324,6 +1324,34 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, |
| struct k_sigaction *action; |
| int sig = info->si_signo; |
| |
| + /* |
| + * On some archs, PREEMPT_RT has to delay sending a signal from a trap |
| + * since it can not enable preemption, and the signal code's spin_locks |
| + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will |
| + * send the signal on exit of the trap. |
| + */ |
| +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND |
| + if (in_atomic()) { |
| + struct task_struct *t = current; |
| + |
| + if (WARN_ON_ONCE(t->forced_info.si_signo)) |
| + return 0; |
| + |
| + if (is_si_special(info)) { |
| + WARN_ON_ONCE(info != SEND_SIG_PRIV); |
| + t->forced_info.si_signo = info->si_signo; |
| + t->forced_info.si_errno = 0; |
| + t->forced_info.si_code = SI_KERNEL; |
| + t->forced_info.si_pid = 0; |
| + t->forced_info.si_uid = 0; |
| + } else { |
| + t->forced_info = *info; |
| + } |
| + |
| + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); |
| + return 0; |
| + } |
| +#endif |
| spin_lock_irqsave(&t->sighand->siglock, flags); |
| action = &t->sighand->action[sig-1]; |
| ignored = action->sa.sa_handler == SIG_IGN; |
| @@ -2308,16 +2336,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t |
| if (gstop_done && ptrace_reparented(current)) |
| do_notify_parent_cldstop(current, false, why); |
| |
| - /* |
| - * Don't want to allow preemption here, because |
| - * sys_ptrace() needs this task to be inactive. |
| - * |
| - * XXX: implement read_unlock_no_resched(). |
| - */ |
| - preempt_disable(); |
| read_unlock(&tasklist_lock); |
| cgroup_enter_frozen(); |
| - preempt_enable_no_resched(); |
| freezable_schedule(); |
| cgroup_leave_frozen(true); |
| } else { |
| diff --git a/kernel/smp.c b/kernel/smp.c |
| index b68d63e965db..d00f1dda09c6 100644 |
| --- a/kernel/smp.c |
| +++ b/kernel/smp.c |
| @@ -690,10 +690,20 @@ void flush_smp_call_function_from_idle(void) |
| |
| cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU, |
| smp_processor_id(), CFD_SEQ_IDLE); |
| + |
| local_irq_save(flags); |
| flush_smp_call_function_queue(true); |
| - if (local_softirq_pending()) |
| - do_softirq(); |
| + |
| + if (local_softirq_pending()) { |
| + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { |
| + do_softirq(); |
| + } else { |
| + struct task_struct *ksoftirqd = this_cpu_ksoftirqd(); |
| + |
| + if (ksoftirqd && !task_is_running(ksoftirqd)) |
| + wake_up_process(ksoftirqd); |
| + } |
| + } |
| |
| local_irq_restore(flags); |
| } |
| diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c |
| index 003ccf338d20..00fc43605c6b 100644 |
| --- a/kernel/time/clockevents.c |
| +++ b/kernel/time/clockevents.c |
| @@ -203,8 +203,7 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev) |
| { |
| /* Nothing to do if we already reached the limit */ |
| if (dev->min_delta_ns >= MIN_DELTA_LIMIT) { |
| - printk_deferred(KERN_WARNING |
| - "CE: Reprogramming failure. Giving up\n"); |
| + pr_warn("CE: Reprogramming failure. Giving up\n"); |
| dev->next_event = KTIME_MAX; |
| return -ETIME; |
| } |
| @@ -217,10 +216,8 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev) |
| if (dev->min_delta_ns > MIN_DELTA_LIMIT) |
| dev->min_delta_ns = MIN_DELTA_LIMIT; |
| |
| - printk_deferred(KERN_WARNING |
| - "CE: %s increased min_delta_ns to %llu nsec\n", |
| - dev->name ? dev->name : "?", |
| - (unsigned long long) dev->min_delta_ns); |
| + pr_warn("CE: %s increased min_delta_ns to %llu nsec\n", |
| + dev->name ? dev->name : "?", (unsigned long long) dev->min_delta_ns); |
| return 0; |
| } |
| |
| diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c |
| index 406dccb79c2b..829d7797811f 100644 |
| --- a/kernel/time/ntp.c |
| +++ b/kernel/time/ntp.c |
| @@ -939,9 +939,7 @@ static long hardpps_update_freq(struct pps_normtime freq_norm) |
| time_status |= STA_PPSERROR; |
| pps_errcnt++; |
| pps_dec_freq_interval(); |
| - printk_deferred(KERN_ERR |
| - "hardpps: PPSERROR: interval too long - %lld s\n", |
| - freq_norm.sec); |
| + pr_err("hardpps: PPSERROR: interval too long - %lld s\n", freq_norm.sec); |
| return 0; |
| } |
| |
| @@ -954,8 +952,7 @@ static long hardpps_update_freq(struct pps_normtime freq_norm) |
| delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT); |
| pps_freq = ftemp; |
| if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) { |
| - printk_deferred(KERN_WARNING |
| - "hardpps: PPSWANDER: change=%ld\n", delta); |
| + pr_warn("hardpps: PPSWANDER: change=%ld\n", delta); |
| time_status |= STA_PPSWANDER; |
| pps_stbcnt++; |
| pps_dec_freq_interval(); |
| @@ -999,9 +996,8 @@ static void hardpps_update_phase(long error) |
| * the time offset is updated. |
| */ |
| if (jitter > (pps_jitter << PPS_POPCORN)) { |
| - printk_deferred(KERN_WARNING |
| - "hardpps: PPSJITTER: jitter=%ld, limit=%ld\n", |
| - jitter, (pps_jitter << PPS_POPCORN)); |
| + pr_warn("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n", |
| + jitter, (pps_jitter << PPS_POPCORN)); |
| time_status |= STA_PPSJITTER; |
| pps_jitcnt++; |
| } else if (time_status & STA_PPSTIME) { |
| @@ -1058,7 +1054,7 @@ void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_t |
| time_status |= STA_PPSJITTER; |
| /* restart the frequency calibration interval */ |
| pps_fbase = *raw_ts; |
| - printk_deferred(KERN_ERR "hardpps: PPSJITTER: bad pulse\n"); |
| + pr_err("hardpps: PPSJITTER: bad pulse\n"); |
| return; |
| } |
| |
| diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c |
| index 871c912860ed..f18cad9a14df 100644 |
| --- a/kernel/time/timekeeping.c |
| +++ b/kernel/time/timekeeping.c |
| @@ -204,22 +204,23 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset) |
| const char *name = tk->tkr_mono.clock->name; |
| |
| if (offset > max_cycles) { |
| - printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n", |
| - offset, name, max_cycles); |
| - printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n"); |
| + printk("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n", |
| + offset, name, max_cycles); |
| + printk(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n"); |
| } else { |
| if (offset > (max_cycles >> 1)) { |
| - printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n", |
| - offset, name, max_cycles >> 1); |
| - printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n"); |
| + printk("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n", |
| + offset, name, max_cycles >> 1); |
| + printk(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n"); |
| } |
| } |
| |
| if (tk->underflow_seen) { |
| if (jiffies - tk->last_warning > WARNING_FREQ) { |
| - printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name); |
| - printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); |
| - printk_deferred(" Your kernel is probably still fine.\n"); |
| + printk("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", |
| + name); |
| + printk(" Please report this, consider using a different clocksource, if possible.\n"); |
| + printk(" Your kernel is probably still fine.\n"); |
| tk->last_warning = jiffies; |
| } |
| tk->underflow_seen = 0; |
| @@ -227,9 +228,10 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset) |
| |
| if (tk->overflow_seen) { |
| if (jiffies - tk->last_warning > WARNING_FREQ) { |
| - printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name); |
| - printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); |
| - printk_deferred(" Your kernel is probably still fine.\n"); |
| + printk("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", |
| + name); |
| + printk(" Please report this, consider using a different clocksource, if possible.\n"); |
| + printk(" Your kernel is probably still fine.\n"); |
| tk->last_warning = jiffies; |
| } |
| tk->overflow_seen = 0; |
| @@ -1669,9 +1671,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, |
| const struct timespec64 *delta) |
| { |
| if (!timespec64_valid_strict(delta)) { |
| - printk_deferred(KERN_WARNING |
| - "__timekeeping_inject_sleeptime: Invalid " |
| - "sleep delta value!\n"); |
| + pr_warn("%s: Invalid sleep delta value!\n", __func__); |
| return; |
| } |
| tk_xtime_add(tk, delta); |
| diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c |
| index b73e8850e58d..149cc4b08d8e 100644 |
| --- a/kernel/time/timekeeping_debug.c |
| +++ b/kernel/time/timekeeping_debug.c |
| @@ -49,7 +49,7 @@ void tk_debug_account_sleep_time(const struct timespec64 *t) |
| int bin = min(fls(t->tv_sec), NUM_BINS-1); |
| |
| sleep_time_bin[bin]++; |
| - pm_deferred_pr_dbg("Timekeeping suspended for %lld.%03lu seconds\n", |
| + pm_pr_dbg("Timekeeping suspended for %lld.%03lu seconds\n", |
| (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); |
| } |
| |
| diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c |
| index 518ce39a878d..53ea832567d7 100644 |
| --- a/kernel/trace/trace.c |
| +++ b/kernel/trace/trace.c |
| @@ -2636,7 +2636,13 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) |
| trace_flags |= TRACE_FLAG_NEED_RESCHED; |
| if (test_preempt_need_resched()) |
| trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; |
| - return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | |
| +#ifdef CONFIG_PREEMPT_LAZY |
| + if (need_resched_lazy()) |
| + trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; |
| +#endif |
| + |
| + return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) | |
| + (preempt_lazy_count() & 0xff) << 16 | |
| (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; |
| } |
| |
| @@ -4217,15 +4223,17 @@ unsigned long trace_total_entries(struct trace_array *tr) |
| |
| static void print_lat_help_header(struct seq_file *m) |
| { |
| - seq_puts(m, "# _------=> CPU# \n" |
| - "# / _-----=> irqs-off \n" |
| - "# | / _----=> need-resched \n" |
| - "# || / _---=> hardirq/softirq \n" |
| - "# ||| / _--=> preempt-depth \n" |
| - "# |||| / _-=> migrate-disable \n" |
| - "# ||||| / delay \n" |
| - "# cmd pid |||||| time | caller \n" |
| - "# \\ / |||||| \\ | / \n"); |
| + seq_puts(m, "# _--------=> CPU# \n" |
| + "# / _-------=> irqs-off \n" |
| + "# | / _------=> need-resched \n" |
| + "# || / _-----=> need-resched-lazy\n" |
| + "# ||| / _----=> hardirq/softirq \n" |
| + "# |||| / _---=> preempt-depth \n" |
| + "# ||||| / _--=> preempt-lazy-depth\n" |
| + "# |||||| / _-=> migrate-disable \n" |
| + "# ||||||| / delay \n" |
| + "# cmd pid |||||||| time | caller \n" |
| + "# \\ / |||||||| \\ | / \n"); |
| } |
| |
| static void print_event_info(struct array_buffer *buf, struct seq_file *m) |
| @@ -4259,14 +4267,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file |
| |
| print_event_info(buf, m); |
| |
| - seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); |
| - seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); |
| - seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); |
| - seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); |
| - seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); |
| - seq_printf(m, "# %.*s|||| / delay\n", prec, space); |
| - seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); |
| - seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); |
| + seq_printf(m, "# %.*s _-------=> irqs-off\n", prec, space); |
| + seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space); |
| + seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space); |
| + seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space); |
| + seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space); |
| + seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space); |
| + seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space); |
| + seq_printf(m, "# %.*s|||||| / delay\n", prec, space); |
| + seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID "); |
| + seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | "); |
| } |
| |
| void |
| diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c |
| index c072e8b9849c..0098d7713f91 100644 |
| --- a/kernel/trace/trace_events.c |
| +++ b/kernel/trace/trace_events.c |
| @@ -192,6 +192,7 @@ static int trace_define_common_fields(void) |
| /* Holds both preempt_count and migrate_disable */ |
| __common_field(unsigned char, preempt_count); |
| __common_field(int, pid); |
| + __common_field(unsigned char, preempt_lazy_count); |
| |
| return ret; |
| } |
| diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c |
| index c2ca40e8595b..be070d258c3b 100644 |
| --- a/kernel/trace/trace_output.c |
| +++ b/kernel/trace/trace_output.c |
| @@ -451,6 +451,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) |
| { |
| char hardsoft_irq; |
| char need_resched; |
| + char need_resched_lazy; |
| char irqs_off; |
| int hardirq; |
| int softirq; |
| @@ -481,6 +482,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) |
| break; |
| } |
| |
| + need_resched_lazy = |
| + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; |
| + |
| hardsoft_irq = |
| (nmi && hardirq) ? 'Z' : |
| nmi ? 'z' : |
| @@ -489,14 +493,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) |
| softirq ? 's' : |
| '.' ; |
| |
| - trace_seq_printf(s, "%c%c%c", |
| - irqs_off, need_resched, hardsoft_irq); |
| + trace_seq_printf(s, "%c%c%c%c", |
| + irqs_off, need_resched, need_resched_lazy, |
| + hardsoft_irq); |
| |
| if (entry->preempt_count & 0xf) |
| trace_seq_printf(s, "%x", entry->preempt_count & 0xf); |
| else |
| trace_seq_putc(s, '.'); |
| |
| + if (entry->preempt_lazy_count) |
| + trace_seq_printf(s, "%x", entry->preempt_lazy_count); |
| + else |
| + trace_seq_putc(s, '.'); |
| + |
| if (entry->preempt_count & 0xf0) |
| trace_seq_printf(s, "%x", entry->preempt_count >> 4); |
| else |
| diff --git a/kernel/workqueue.c b/kernel/workqueue.c |
| index 3f4d27668576..fa66b2ac3198 100644 |
| --- a/kernel/workqueue.c |
| +++ b/kernel/workqueue.c |
| @@ -4845,9 +4845,7 @@ void show_workqueue_state(void) |
| * drivers that queue work while holding locks |
| * also taken in their write paths. |
| */ |
| - printk_deferred_enter(); |
| show_pwq(pwq); |
| - printk_deferred_exit(); |
| } |
| raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); |
| /* |
| @@ -4871,7 +4869,6 @@ void show_workqueue_state(void) |
| * queue work while holding locks also taken in their write |
| * paths. |
| */ |
| - printk_deferred_enter(); |
| pr_info("pool %d:", pool->id); |
| pr_cont_pool_info(pool); |
| pr_cont(" hung=%us workers=%d", |
| @@ -4886,7 +4883,6 @@ void show_workqueue_state(void) |
| first = false; |
| } |
| pr_cont("\n"); |
| - printk_deferred_exit(); |
| next_pool: |
| raw_spin_unlock_irqrestore(&pool->lock, flags); |
| /* |
| diff --git a/lib/bug.c b/lib/bug.c |
| index 45a0584f6541..03a87df69ed2 100644 |
| --- a/lib/bug.c |
| +++ b/lib/bug.c |
| @@ -206,6 +206,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) |
| else |
| pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n", |
| (void *)bugaddr); |
| + pr_flush(1000, true); |
| |
| return BUG_TRAP_TYPE_BUG; |
| } |
| diff --git a/lib/dump_stack.c b/lib/dump_stack.c |
| index 6b7f1bf6715d..6e8ae42c7e27 100644 |
| --- a/lib/dump_stack.c |
| +++ b/lib/dump_stack.c |
| @@ -102,9 +102,9 @@ asmlinkage __visible void dump_stack_lvl(const char *log_lvl) |
| * Permit this cpu to perform nested stack dumps while serialising |
| * against other CPUs |
| */ |
| - printk_cpu_lock_irqsave(flags); |
| + raw_printk_cpu_lock_irqsave(flags); |
| __dump_stack(log_lvl); |
| - printk_cpu_unlock_irqrestore(flags); |
| + raw_printk_cpu_unlock_irqrestore(flags); |
| } |
| EXPORT_SYMBOL(dump_stack_lvl); |
| |
| diff --git a/lib/irq_poll.c b/lib/irq_poll.c |
| index 2f17b488d58e..2b9f797642f6 100644 |
| --- a/lib/irq_poll.c |
| +++ b/lib/irq_poll.c |
| @@ -191,11 +191,13 @@ static int irq_poll_cpu_dead(unsigned int cpu) |
| * If a CPU goes away, splice its entries to the current CPU |
| * and trigger a run of the softirq |
| */ |
| + local_bh_disable(); |
| local_irq_disable(); |
| list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), |
| this_cpu_ptr(&blk_cpu_iopoll)); |
| __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); |
| local_irq_enable(); |
| + local_bh_enable(); |
| |
| return 0; |
| } |
| diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c |
| index 161108e5d2fe..1266ea3726d7 100644 |
| --- a/lib/locking-selftest.c |
| +++ b/lib/locking-selftest.c |
| @@ -26,6 +26,12 @@ |
| #include <linux/rtmutex.h> |
| #include <linux/local_lock.h> |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +# define NON_RT(...) |
| +#else |
| +# define NON_RT(...) __VA_ARGS__ |
| +#endif |
| + |
| /* |
| * Change this to 1 if you want to see the failure printouts: |
| */ |
| @@ -139,7 +145,7 @@ static DEFINE_RT_MUTEX(rtmutex_Z2); |
| |
| #endif |
| |
| -static local_lock_t local_A = INIT_LOCAL_LOCK(local_A); |
| +static DEFINE_PER_CPU(local_lock_t, local_A); |
| |
| /* |
| * non-inlined runtime initializers, to let separate locks share |
| @@ -712,12 +718,18 @@ GENERATE_TESTCASE(ABCDBCDA_rtmutex); |
| |
| #undef E |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +# define RT_PREPARE_DBL_UNLOCK() { migrate_disable(); rcu_read_lock(); } |
| +#else |
| +# define RT_PREPARE_DBL_UNLOCK() |
| +#endif |
| /* |
| * Double unlock: |
| */ |
| #define E() \ |
| \ |
| LOCK(A); \ |
| + RT_PREPARE_DBL_UNLOCK(); \ |
| UNLOCK(A); \ |
| UNLOCK(A); /* fail */ |
| |
| @@ -802,6 +814,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) |
| #include "locking-selftest-wlock-hardirq.h" |
| GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock) |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| #include "locking-selftest-spin-softirq.h" |
| GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin) |
| |
| @@ -810,10 +823,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock) |
| |
| #include "locking-selftest-wlock-softirq.h" |
| GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) |
| +#endif |
| |
| #undef E1 |
| #undef E2 |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| /* |
| * Enabling hardirqs with a softirq-safe lock held: |
| */ |
| @@ -846,6 +861,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) |
| #undef E1 |
| #undef E2 |
| |
| +#endif |
| + |
| /* |
| * Enabling irqs with an irq-safe lock held: |
| */ |
| @@ -875,6 +892,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) |
| #include "locking-selftest-wlock-hardirq.h" |
| GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock) |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| #include "locking-selftest-spin-softirq.h" |
| GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin) |
| |
| @@ -883,6 +901,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock) |
| |
| #include "locking-selftest-wlock-softirq.h" |
| GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) |
| +#endif |
| |
| #undef E1 |
| #undef E2 |
| @@ -921,6 +940,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) |
| #include "locking-selftest-wlock-hardirq.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock) |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| #include "locking-selftest-spin-softirq.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin) |
| |
| @@ -929,6 +949,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock) |
| |
| #include "locking-selftest-wlock-softirq.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) |
| +#endif |
| |
| #undef E1 |
| #undef E2 |
| @@ -969,6 +990,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) |
| #include "locking-selftest-wlock-hardirq.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock) |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| #include "locking-selftest-spin-softirq.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin) |
| |
| @@ -977,6 +999,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock) |
| |
| #include "locking-selftest-wlock-softirq.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) |
| +#endif |
| |
| #undef E1 |
| #undef E2 |
| @@ -1031,6 +1054,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock) |
| #include "locking-selftest-wlock-hardirq.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock) |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| #include "locking-selftest-spin-softirq.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin) |
| |
| @@ -1039,6 +1063,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock) |
| |
| #include "locking-selftest-wlock-softirq.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock) |
| +#endif |
| |
| #undef E1 |
| #undef E2 |
| @@ -1206,12 +1231,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_rlock) |
| #include "locking-selftest-wlock.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_wlock) |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| #include "locking-selftest-softirq.h" |
| #include "locking-selftest-rlock.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_rlock) |
| |
| #include "locking-selftest-wlock.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_wlock) |
| +#endif |
| |
| #undef E1 |
| #undef E2 |
| @@ -1252,12 +1279,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_rlock) |
| #include "locking-selftest-wlock.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_wlock) |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| #include "locking-selftest-softirq.h" |
| #include "locking-selftest-rlock.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_rlock) |
| |
| #include "locking-selftest-wlock.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_wlock) |
| +#endif |
| |
| #undef E1 |
| #undef E2 |
| @@ -1306,12 +1335,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_rlock) |
| #include "locking-selftest-wlock.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_wlock) |
| |
| +#ifndef CONFIG_PREEMPT_RT |
| #include "locking-selftest-softirq.h" |
| #include "locking-selftest-rlock.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_rlock) |
| |
| #include "locking-selftest-wlock.h" |
| GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock) |
| +#endif |
| |
| #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| # define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map) |
| @@ -1320,7 +1351,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock) |
| # define I_MUTEX(x) lockdep_reset_lock(&mutex_##x.dep_map) |
| # define I_RWSEM(x) lockdep_reset_lock(&rwsem_##x.dep_map) |
| # define I_WW(x) lockdep_reset_lock(&x.dep_map) |
| -# define I_LOCAL_LOCK(x) lockdep_reset_lock(&local_##x.dep_map) |
| +# define I_LOCAL_LOCK(x) lockdep_reset_lock(this_cpu_ptr(&local_##x.dep_map)) |
| #ifdef CONFIG_RT_MUTEXES |
| # define I_RTMUTEX(x) lockdep_reset_lock(&rtmutex_##x.dep_map) |
| #endif |
| @@ -1380,7 +1411,7 @@ static void reset_locks(void) |
| init_shared_classes(); |
| raw_spin_lock_init(&raw_lock_A); |
| raw_spin_lock_init(&raw_lock_B); |
| - local_lock_init(&local_A); |
| + local_lock_init(this_cpu_ptr(&local_A)); |
| |
| ww_mutex_init(&o, &ww_lockdep); ww_mutex_init(&o2, &ww_lockdep); ww_mutex_init(&o3, &ww_lockdep); |
| memset(&t, 0, sizeof(t)); memset(&t2, 0, sizeof(t2)); |
| @@ -1398,7 +1429,13 @@ static int unexpected_testcase_failures; |
| |
| static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) |
| { |
| - unsigned long saved_preempt_count = preempt_count(); |
| + int saved_preempt_count = preempt_count(); |
| +#ifdef CONFIG_PREEMPT_RT |
| +#ifdef CONFIG_SMP |
| + int saved_mgd_count = current->migration_disabled; |
| +#endif |
| + int saved_rcu_count = current->rcu_read_lock_nesting; |
| +#endif |
| |
| WARN_ON(irqs_disabled()); |
| |
| @@ -1432,6 +1469,18 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) |
| * count, so restore it: |
| */ |
| preempt_count_set(saved_preempt_count); |
| + |
| +#ifdef CONFIG_PREEMPT_RT |
| +#ifdef CONFIG_SMP |
| + while (current->migration_disabled > saved_mgd_count) |
| + migrate_enable(); |
| +#endif |
| + |
| + while (current->rcu_read_lock_nesting > saved_rcu_count) |
| + rcu_read_unlock(); |
| + WARN_ON_ONCE(current->rcu_read_lock_nesting < saved_rcu_count); |
| +#endif |
| + |
| #ifdef CONFIG_TRACE_IRQFLAGS |
| if (softirq_count()) |
| current->softirqs_enabled = 0; |
| @@ -1499,7 +1548,7 @@ static inline void print_testname(const char *testname) |
| |
| #define DO_TESTCASE_2x2RW(desc, name, nr) \ |
| DO_TESTCASE_2RW("hard-"desc, name##_hard, nr) \ |
| - DO_TESTCASE_2RW("soft-"desc, name##_soft, nr) \ |
| + NON_RT(DO_TESTCASE_2RW("soft-"desc, name##_soft, nr)) \ |
| |
| #define DO_TESTCASE_6x2x2RW(desc, name) \ |
| DO_TESTCASE_2x2RW(desc, name, 123); \ |
| @@ -1547,19 +1596,19 @@ static inline void print_testname(const char *testname) |
| |
| #define DO_TESTCASE_2I(desc, name, nr) \ |
| DO_TESTCASE_1("hard-"desc, name##_hard, nr); \ |
| - DO_TESTCASE_1("soft-"desc, name##_soft, nr); |
| + NON_RT(DO_TESTCASE_1("soft-"desc, name##_soft, nr)); |
| |
| #define DO_TESTCASE_2IB(desc, name, nr) \ |
| DO_TESTCASE_1B("hard-"desc, name##_hard, nr); \ |
| - DO_TESTCASE_1B("soft-"desc, name##_soft, nr); |
| + NON_RT(DO_TESTCASE_1B("soft-"desc, name##_soft, nr)); |
| |
| #define DO_TESTCASE_6I(desc, name, nr) \ |
| DO_TESTCASE_3("hard-"desc, name##_hard, nr); \ |
| - DO_TESTCASE_3("soft-"desc, name##_soft, nr); |
| + NON_RT(DO_TESTCASE_3("soft-"desc, name##_soft, nr)); |
| |
| #define DO_TESTCASE_6IRW(desc, name, nr) \ |
| DO_TESTCASE_3RW("hard-"desc, name##_hard, nr); \ |
| - DO_TESTCASE_3RW("soft-"desc, name##_soft, nr); |
| + NON_RT(DO_TESTCASE_3RW("soft-"desc, name##_soft, nr)); |
| |
| #define DO_TESTCASE_2x3(desc, name) \ |
| DO_TESTCASE_3(desc, name, 12); \ |
| @@ -1651,6 +1700,20 @@ static void ww_test_fail_acquire(void) |
| #endif |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +#define ww_mutex_base_lock(b) rt_mutex_lock(b) |
| +#define ww_mutex_base_lock_nest_lock(b, b2) rt_mutex_lock_nest_lock(b, b2) |
| +#define ww_mutex_base_lock_interruptible(b) rt_mutex_lock_interruptible(b) |
| +#define ww_mutex_base_lock_killable(b) rt_mutex_lock_killable(b) |
| +#define ww_mutex_base_unlock(b) rt_mutex_unlock(b) |
| +#else |
| +#define ww_mutex_base_lock(b) mutex_lock(b) |
| +#define ww_mutex_base_lock_nest_lock(b, b2) mutex_lock_nest_lock(b, b2) |
| +#define ww_mutex_base_lock_interruptible(b) mutex_lock_interruptible(b) |
| +#define ww_mutex_base_lock_killable(b) mutex_lock_killable(b) |
| +#define ww_mutex_base_unlock(b) mutex_unlock(b) |
| +#endif |
| + |
| static void ww_test_normal(void) |
| { |
| int ret; |
| @@ -1665,50 +1728,50 @@ static void ww_test_normal(void) |
| |
| /* mutex_lock (and indirectly, mutex_lock_nested) */ |
| o.ctx = (void *)~0UL; |
| - mutex_lock(&o.base); |
| - mutex_unlock(&o.base); |
| + ww_mutex_base_lock(&o.base); |
| + ww_mutex_base_unlock(&o.base); |
| WARN_ON(o.ctx != (void *)~0UL); |
| |
| /* mutex_lock_interruptible (and *_nested) */ |
| o.ctx = (void *)~0UL; |
| - ret = mutex_lock_interruptible(&o.base); |
| + ret = ww_mutex_base_lock_interruptible(&o.base); |
| if (!ret) |
| - mutex_unlock(&o.base); |
| + ww_mutex_base_unlock(&o.base); |
| else |
| WARN_ON(1); |
| WARN_ON(o.ctx != (void *)~0UL); |
| |
| /* mutex_lock_killable (and *_nested) */ |
| o.ctx = (void *)~0UL; |
| - ret = mutex_lock_killable(&o.base); |
| + ret = ww_mutex_base_lock_killable(&o.base); |
| if (!ret) |
| - mutex_unlock(&o.base); |
| + ww_mutex_base_unlock(&o.base); |
| else |
| WARN_ON(1); |
| WARN_ON(o.ctx != (void *)~0UL); |
| |
| /* trylock, succeeding */ |
| o.ctx = (void *)~0UL; |
| - ret = mutex_trylock(&o.base); |
| + ret = ww_mutex_base_trylock(&o.base); |
| WARN_ON(!ret); |
| if (ret) |
| - mutex_unlock(&o.base); |
| + ww_mutex_base_unlock(&o.base); |
| else |
| WARN_ON(1); |
| WARN_ON(o.ctx != (void *)~0UL); |
| |
| /* trylock, failing */ |
| o.ctx = (void *)~0UL; |
| - mutex_lock(&o.base); |
| - ret = mutex_trylock(&o.base); |
| + ww_mutex_base_lock(&o.base); |
| + ret = ww_mutex_base_trylock(&o.base); |
| WARN_ON(ret); |
| - mutex_unlock(&o.base); |
| + ww_mutex_base_unlock(&o.base); |
| WARN_ON(o.ctx != (void *)~0UL); |
| |
| /* nest_lock */ |
| o.ctx = (void *)~0UL; |
| - mutex_lock_nest_lock(&o.base, &t); |
| - mutex_unlock(&o.base); |
| + ww_mutex_base_lock_nest_lock(&o.base, &t); |
| + ww_mutex_base_unlock(&o.base); |
| WARN_ON(o.ctx != (void *)~0UL); |
| } |
| |
| @@ -1721,7 +1784,7 @@ static void ww_test_two_contexts(void) |
| static void ww_test_diff_class(void) |
| { |
| WWAI(&t); |
| -#ifdef CONFIG_DEBUG_MUTEXES |
| +#ifdef DEBUG_WW_MUTEXES |
| t.ww_class = NULL; |
| #endif |
| WWL(&o, &t); |
| @@ -1785,7 +1848,7 @@ static void ww_test_edeadlk_normal(void) |
| { |
| int ret; |
| |
| - mutex_lock(&o2.base); |
| + ww_mutex_base_lock(&o2.base); |
| o2.ctx = &t2; |
| mutex_release(&o2.base.dep_map, _THIS_IP_); |
| |
| @@ -1801,7 +1864,7 @@ static void ww_test_edeadlk_normal(void) |
| |
| o2.ctx = NULL; |
| mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); |
| - mutex_unlock(&o2.base); |
| + ww_mutex_base_unlock(&o2.base); |
| WWU(&o); |
| |
| WWL(&o2, &t); |
| @@ -1811,7 +1874,7 @@ static void ww_test_edeadlk_normal_slow(void) |
| { |
| int ret; |
| |
| - mutex_lock(&o2.base); |
| + ww_mutex_base_lock(&o2.base); |
| mutex_release(&o2.base.dep_map, _THIS_IP_); |
| o2.ctx = &t2; |
| |
| @@ -1827,7 +1890,7 @@ static void ww_test_edeadlk_normal_slow(void) |
| |
| o2.ctx = NULL; |
| mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); |
| - mutex_unlock(&o2.base); |
| + ww_mutex_base_unlock(&o2.base); |
| WWU(&o); |
| |
| ww_mutex_lock_slow(&o2, &t); |
| @@ -1837,7 +1900,7 @@ static void ww_test_edeadlk_no_unlock(void) |
| { |
| int ret; |
| |
| - mutex_lock(&o2.base); |
| + ww_mutex_base_lock(&o2.base); |
| o2.ctx = &t2; |
| mutex_release(&o2.base.dep_map, _THIS_IP_); |
| |
| @@ -1853,7 +1916,7 @@ static void ww_test_edeadlk_no_unlock(void) |
| |
| o2.ctx = NULL; |
| mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); |
| - mutex_unlock(&o2.base); |
| + ww_mutex_base_unlock(&o2.base); |
| |
| WWL(&o2, &t); |
| } |
| @@ -1862,7 +1925,7 @@ static void ww_test_edeadlk_no_unlock_slow(void) |
| { |
| int ret; |
| |
| - mutex_lock(&o2.base); |
| + ww_mutex_base_lock(&o2.base); |
| mutex_release(&o2.base.dep_map, _THIS_IP_); |
| o2.ctx = &t2; |
| |
| @@ -1878,7 +1941,7 @@ static void ww_test_edeadlk_no_unlock_slow(void) |
| |
| o2.ctx = NULL; |
| mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_); |
| - mutex_unlock(&o2.base); |
| + ww_mutex_base_unlock(&o2.base); |
| |
| ww_mutex_lock_slow(&o2, &t); |
| } |
| @@ -1887,7 +1950,7 @@ static void ww_test_edeadlk_acquire_more(void) |
| { |
| int ret; |
| |
| - mutex_lock(&o2.base); |
| + ww_mutex_base_lock(&o2.base); |
| mutex_release(&o2.base.dep_map, _THIS_IP_); |
| o2.ctx = &t2; |
| |
| @@ -1908,7 +1971,7 @@ static void ww_test_edeadlk_acquire_more_slow(void) |
| { |
| int ret; |
| |
| - mutex_lock(&o2.base); |
| + ww_mutex_base_lock(&o2.base); |
| mutex_release(&o2.base.dep_map, _THIS_IP_); |
| o2.ctx = &t2; |
| |
| @@ -1929,11 +1992,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk(void) |
| { |
| int ret; |
| |
| - mutex_lock(&o2.base); |
| + ww_mutex_base_lock(&o2.base); |
| mutex_release(&o2.base.dep_map, _THIS_IP_); |
| o2.ctx = &t2; |
| |
| - mutex_lock(&o3.base); |
| + ww_mutex_base_lock(&o3.base); |
| mutex_release(&o3.base.dep_map, _THIS_IP_); |
| o3.ctx = &t2; |
| |
| @@ -1955,11 +2018,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk_slow(void) |
| { |
| int ret; |
| |
| - mutex_lock(&o2.base); |
| + ww_mutex_base_lock(&o2.base); |
| mutex_release(&o2.base.dep_map, _THIS_IP_); |
| o2.ctx = &t2; |
| |
| - mutex_lock(&o3.base); |
| + ww_mutex_base_lock(&o3.base); |
| mutex_release(&o3.base.dep_map, _THIS_IP_); |
| o3.ctx = &t2; |
| |
| @@ -1980,7 +2043,7 @@ static void ww_test_edeadlk_acquire_wrong(void) |
| { |
| int ret; |
| |
| - mutex_lock(&o2.base); |
| + ww_mutex_base_lock(&o2.base); |
| mutex_release(&o2.base.dep_map, _THIS_IP_); |
| o2.ctx = &t2; |
| |
| @@ -2005,7 +2068,7 @@ static void ww_test_edeadlk_acquire_wrong_slow(void) |
| { |
| int ret; |
| |
| - mutex_lock(&o2.base); |
| + ww_mutex_base_lock(&o2.base); |
| mutex_release(&o2.base.dep_map, _THIS_IP_); |
| o2.ctx = &t2; |
| |
| @@ -2646,8 +2709,8 @@ static void wait_context_tests(void) |
| |
| static void local_lock_2(void) |
| { |
| - local_lock_acquire(&local_A); /* IRQ-ON */ |
| - local_lock_release(&local_A); |
| + local_lock(&local_A); /* IRQ-ON */ |
| + local_unlock(&local_A); |
| |
| HARDIRQ_ENTER(); |
| spin_lock(&lock_A); /* IN-IRQ */ |
| @@ -2656,18 +2719,18 @@ static void local_lock_2(void) |
| |
| HARDIRQ_DISABLE(); |
| spin_lock(&lock_A); |
| - local_lock_acquire(&local_A); /* IN-IRQ <-> IRQ-ON cycle, false */ |
| - local_lock_release(&local_A); |
| + local_lock(&local_A); /* IN-IRQ <-> IRQ-ON cycle, false */ |
| + local_unlock(&local_A); |
| spin_unlock(&lock_A); |
| HARDIRQ_ENABLE(); |
| } |
| |
| static void local_lock_3A(void) |
| { |
| - local_lock_acquire(&local_A); /* IRQ-ON */ |
| + local_lock(&local_A); /* IRQ-ON */ |
| spin_lock(&lock_B); /* IRQ-ON */ |
| spin_unlock(&lock_B); |
| - local_lock_release(&local_A); |
| + local_unlock(&local_A); |
| |
| HARDIRQ_ENTER(); |
| spin_lock(&lock_A); /* IN-IRQ */ |
| @@ -2676,18 +2739,18 @@ static void local_lock_3A(void) |
| |
| HARDIRQ_DISABLE(); |
| spin_lock(&lock_A); |
| - local_lock_acquire(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */ |
| - local_lock_release(&local_A); |
| + local_lock(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */ |
| + local_unlock(&local_A); |
| spin_unlock(&lock_A); |
| HARDIRQ_ENABLE(); |
| } |
| |
| static void local_lock_3B(void) |
| { |
| - local_lock_acquire(&local_A); /* IRQ-ON */ |
| + local_lock(&local_A); /* IRQ-ON */ |
| spin_lock(&lock_B); /* IRQ-ON */ |
| spin_unlock(&lock_B); |
| - local_lock_release(&local_A); |
| + local_unlock(&local_A); |
| |
| HARDIRQ_ENTER(); |
| spin_lock(&lock_A); /* IN-IRQ */ |
| @@ -2696,8 +2759,8 @@ static void local_lock_3B(void) |
| |
| HARDIRQ_DISABLE(); |
| spin_lock(&lock_A); |
| - local_lock_acquire(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */ |
| - local_lock_release(&local_A); |
| + local_lock(&local_A); /* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */ |
| + local_unlock(&local_A); |
| spin_unlock(&lock_A); |
| HARDIRQ_ENABLE(); |
| |
| @@ -2812,7 +2875,7 @@ void locking_selftest(void) |
| printk("------------------------\n"); |
| printk("| Locking API testsuite:\n"); |
| printk("----------------------------------------------------------------------------\n"); |
| - printk(" | spin |wlock |rlock |mutex | wsem | rsem |\n"); |
| + printk(" | spin |wlock |rlock |mutex | wsem | rsem |rtmutex\n"); |
| printk(" --------------------------------------------------------------------------\n"); |
| |
| init_shared_classes(); |
| @@ -2885,12 +2948,11 @@ void locking_selftest(void) |
| DO_TESTCASE_6x1RR("rlock W1R2/R2R3/W3W1", W1R2_R2R3_W3W1); |
| |
| printk(" --------------------------------------------------------------------------\n"); |
| - |
| /* |
| * irq-context testcases: |
| */ |
| DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1); |
| - DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A); |
| + NON_RT(DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A)); |
| DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B); |
| DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3); |
| DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4); |
| diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c |
| index 199ab201d501..06410209197a 100644 |
| --- a/lib/nmi_backtrace.c |
| +++ b/lib/nmi_backtrace.c |
| @@ -99,7 +99,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) |
| * Allow nested NMI backtraces while serializing |
| * against other CPUs. |
| */ |
| - printk_cpu_lock_irqsave(flags); |
| + raw_printk_cpu_lock_irqsave(flags); |
| if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) { |
| pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", |
| cpu, (void *)instruction_pointer(regs)); |
| @@ -110,7 +110,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) |
| else |
| dump_stack(); |
| } |
| - printk_cpu_unlock_irqrestore(flags); |
| + raw_printk_cpu_unlock_irqrestore(flags); |
| cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); |
| return true; |
| } |
| diff --git a/lib/ratelimit.c b/lib/ratelimit.c |
| index e01a93f46f83..524cf65dce53 100644 |
| --- a/lib/ratelimit.c |
| +++ b/lib/ratelimit.c |
| @@ -47,9 +47,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) |
| if (time_is_before_jiffies(rs->begin + rs->interval)) { |
| if (rs->missed) { |
| if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { |
| - printk_deferred(KERN_WARNING |
| - "%s: %d callbacks suppressed\n", |
| - func, rs->missed); |
| + pr_warn("%s: %d callbacks suppressed\n", func, rs->missed); |
| rs->missed = 0; |
| } |
| } |
| diff --git a/lib/scatterlist.c b/lib/scatterlist.c |
| index abb3432ed744..d5e82e4a57ad 100644 |
| --- a/lib/scatterlist.c |
| +++ b/lib/scatterlist.c |
| @@ -828,8 +828,7 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) |
| * stops @miter. |
| * |
| * Context: |
| - * Don't care if @miter is stopped, or not proceeded yet. |
| - * Otherwise, preemption disabled if the SG_MITER_ATOMIC is set. |
| + * Don't care. |
| * |
| * Returns: |
| * true if @miter contains the valid mapping. false if end of sg |
| @@ -865,8 +864,7 @@ EXPORT_SYMBOL(sg_miter_skip); |
| * @miter->addr and @miter->length point to the current mapping. |
| * |
| * Context: |
| - * Preemption disabled if SG_MITER_ATOMIC. Preemption must stay disabled |
| - * till @miter is stopped. May sleep if !SG_MITER_ATOMIC. |
| + * May sleep if !SG_MITER_ATOMIC. |
| * |
| * Returns: |
| * true if @miter contains the next mapping. false if end of sg |
| @@ -906,8 +904,7 @@ EXPORT_SYMBOL(sg_miter_next); |
| * need to be released during iteration. |
| * |
| * Context: |
| - * Preemption disabled if the SG_MITER_ATOMIC is set. Don't care |
| - * otherwise. |
| + * Don't care otherwise. |
| */ |
| void sg_miter_stop(struct sg_mapping_iter *miter) |
| { |
| @@ -922,7 +919,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) |
| flush_dcache_page(miter->page); |
| |
| if (miter->__flags & SG_MITER_ATOMIC) { |
| - WARN_ON_ONCE(preemptible()); |
| + WARN_ON_ONCE(!pagefault_disabled()); |
| kunmap_atomic(miter->addr); |
| } else |
| kunmap(miter->page); |
| diff --git a/localversion-rt b/localversion-rt |
| new file mode 100644 |
| index 000000000000..24707986c321 |
| --- /dev/null |
| +++ b/localversion-rt |
| @@ -0,0 +1 @@ |
| +-rt48 |
| diff --git a/mm/Kconfig b/mm/Kconfig |
| index c048dea7e342..88778414465b 100644 |
| --- a/mm/Kconfig |
| +++ b/mm/Kconfig |
| @@ -371,7 +371,7 @@ config NOMMU_INITIAL_TRIM_EXCESS |
| |
| config TRANSPARENT_HUGEPAGE |
| bool "Transparent Hugepage Support" |
| - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE |
| + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT |
| select COMPACTION |
| select XARRAY_MULTI |
| help |
| diff --git a/mm/memory.c b/mm/memory.c |
| index 26d115ded4ab..307be06c9484 100644 |
| --- a/mm/memory.c |
| +++ b/mm/memory.c |
| @@ -5287,7 +5287,7 @@ void __might_fault(const char *file, int line) |
| return; |
| if (pagefault_disabled()) |
| return; |
| - __might_sleep(file, line, 0); |
| + __might_sleep(file, line); |
| #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) |
| if (current->mm) |
| might_lock_read(¤t->mm->mmap_lock); |
| diff --git a/mm/page_alloc.c b/mm/page_alloc.c |
| index a0b7afae59e9..ac537f5caa9d 100644 |
| --- a/mm/page_alloc.c |
| +++ b/mm/page_alloc.c |
| @@ -3149,9 +3149,9 @@ static void drain_local_pages_wq(struct work_struct *work) |
| * cpu which is alright but we also have to make sure to not move to |
| * a different one. |
| */ |
| - preempt_disable(); |
| + migrate_disable(); |
| drain_local_pages(drain->zone); |
| - preempt_enable(); |
| + migrate_enable(); |
| } |
| |
| /* |
| diff --git a/mm/vmalloc.c b/mm/vmalloc.c |
| index 8375eecc55de..f81f11bcce5a 100644 |
| --- a/mm/vmalloc.c |
| +++ b/mm/vmalloc.c |
| @@ -1918,11 +1918,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) |
| return ERR_PTR(err); |
| } |
| |
| - vbq = &get_cpu_var(vmap_block_queue); |
| + get_cpu_light(); |
| + vbq = this_cpu_ptr(&vmap_block_queue); |
| spin_lock(&vbq->lock); |
| list_add_tail_rcu(&vb->free_list, &vbq->free); |
| spin_unlock(&vbq->lock); |
| - put_cpu_var(vmap_block_queue); |
| + put_cpu_light(); |
| |
| return vaddr; |
| } |
| @@ -2001,7 +2002,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) |
| order = get_order(size); |
| |
| rcu_read_lock(); |
| - vbq = &get_cpu_var(vmap_block_queue); |
| + get_cpu_light(); |
| + vbq = this_cpu_ptr(&vmap_block_queue); |
| list_for_each_entry_rcu(vb, &vbq->free, free_list) { |
| unsigned long pages_off; |
| |
| @@ -2024,7 +2026,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) |
| break; |
| } |
| |
| - put_cpu_var(vmap_block_queue); |
| + put_cpu_light(); |
| rcu_read_unlock(); |
| |
| /* Allocate new block if nothing was found */ |
| diff --git a/mm/workingset.c b/mm/workingset.c |
| index 880d882f3325..2a9ed5aeb6fa 100644 |
| --- a/mm/workingset.c |
| +++ b/mm/workingset.c |
| @@ -433,6 +433,8 @@ static struct list_lru shadow_nodes; |
| |
| void workingset_update_node(struct xa_node *node) |
| { |
| + struct address_space *mapping; |
| + |
| /* |
| * Track non-empty nodes that contain only shadow entries; |
| * unlink those that contain pages or are being freed. |
| @@ -441,7 +443,8 @@ void workingset_update_node(struct xa_node *node) |
| * already where they should be. The list_empty() test is safe |
| * as node->private_list is protected by the i_pages lock. |
| */ |
| - VM_WARN_ON_ONCE(!irqs_disabled()); /* For __inc_lruvec_page_state */ |
| + mapping = container_of(node->array, struct address_space, i_pages); |
| + lockdep_assert_held(&mapping->i_pages.xa_lock); |
| |
| if (node->count && node->count == node->nr_values) { |
| if (list_empty(&node->private_list)) { |
| diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c |
| index 439deb8decbc..a66431853394 100644 |
| --- a/mm/zsmalloc.c |
| +++ b/mm/zsmalloc.c |
| @@ -57,6 +57,7 @@ |
| #include <linux/wait.h> |
| #include <linux/pagemap.h> |
| #include <linux/fs.h> |
| +#include <linux/local_lock.h> |
| |
| #define ZSPAGE_MAGIC 0x58 |
| |
| @@ -77,6 +78,20 @@ |
| |
| #define ZS_HANDLE_SIZE (sizeof(unsigned long)) |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| + |
| +struct zsmalloc_handle { |
| + unsigned long addr; |
| + spinlock_t lock; |
| +}; |
| + |
| +#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) |
| + |
| +#else |
| + |
| +#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long)) |
| +#endif |
| + |
| /* |
| * Object location (<PFN>, <obj_idx>) is encoded as |
| * a single (unsigned long) handle value. |
| @@ -293,6 +308,7 @@ struct zspage { |
| }; |
| |
| struct mapping_area { |
| + local_lock_t lock; |
| char *vm_buf; /* copy buffer for objects that span pages */ |
| char *vm_addr; /* address of kmap_atomic()'ed pages */ |
| enum zs_mapmode vm_mm; /* mapping mode */ |
| @@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} |
| |
| static int create_cache(struct zs_pool *pool) |
| { |
| - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, |
| + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE, |
| 0, 0, NULL); |
| if (!pool->handle_cachep) |
| return 1; |
| @@ -346,10 +362,27 @@ static void destroy_cache(struct zs_pool *pool) |
| |
| static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) |
| { |
| - return (unsigned long)kmem_cache_alloc(pool->handle_cachep, |
| - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); |
| + void *p; |
| + |
| + p = kmem_cache_alloc(pool->handle_cachep, |
| + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); |
| +#ifdef CONFIG_PREEMPT_RT |
| + if (p) { |
| + struct zsmalloc_handle *zh = p; |
| + |
| + spin_lock_init(&zh->lock); |
| + } |
| +#endif |
| + return (unsigned long)p; |
| } |
| |
| +#ifdef CONFIG_PREEMPT_RT |
| +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) |
| +{ |
| + return (void *)(handle & ~((1 << OBJ_TAG_BITS) - 1)); |
| +} |
| +#endif |
| + |
| static void cache_free_handle(struct zs_pool *pool, unsigned long handle) |
| { |
| kmem_cache_free(pool->handle_cachep, (void *)handle); |
| @@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) |
| |
| static void record_obj(unsigned long handle, unsigned long obj) |
| { |
| +#ifdef CONFIG_PREEMPT_RT |
| + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
| + |
| + WRITE_ONCE(zh->addr, obj); |
| +#else |
| /* |
| * lsb of @obj represents handle lock while other bits |
| * represent object value the handle is pointing so |
| * updating shouldn't do store tearing. |
| */ |
| WRITE_ONCE(*(unsigned long *)handle, obj); |
| +#endif |
| } |
| |
| /* zpool driver */ |
| @@ -455,7 +494,9 @@ MODULE_ALIAS("zpool-zsmalloc"); |
| #endif /* CONFIG_ZPOOL */ |
| |
| /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ |
| -static DEFINE_PER_CPU(struct mapping_area, zs_map_area); |
| +static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = { |
| + .lock = INIT_LOCAL_LOCK(lock), |
| +}; |
| |
| static bool is_zspage_isolated(struct zspage *zspage) |
| { |
| @@ -862,7 +903,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx) |
| |
| static unsigned long handle_to_obj(unsigned long handle) |
| { |
| +#ifdef CONFIG_PREEMPT_RT |
| + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
| + |
| + return zh->addr; |
| +#else |
| return *(unsigned long *)handle; |
| +#endif |
| } |
| |
| static unsigned long obj_to_head(struct page *page, void *obj) |
| @@ -876,22 +923,46 @@ static unsigned long obj_to_head(struct page *page, void *obj) |
| |
| static inline int testpin_tag(unsigned long handle) |
| { |
| +#ifdef CONFIG_PREEMPT_RT |
| + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
| + |
| + return spin_is_locked(&zh->lock); |
| +#else |
| return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); |
| +#endif |
| } |
| |
| static inline int trypin_tag(unsigned long handle) |
| { |
| +#ifdef CONFIG_PREEMPT_RT |
| + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
| + |
| + return spin_trylock(&zh->lock); |
| +#else |
| return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); |
| +#endif |
| } |
| |
| static void pin_tag(unsigned long handle) __acquires(bitlock) |
| { |
| +#ifdef CONFIG_PREEMPT_RT |
| + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
| + |
| + return spin_lock(&zh->lock); |
| +#else |
| bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); |
| +#endif |
| } |
| |
| static void unpin_tag(unsigned long handle) __releases(bitlock) |
| { |
| +#ifdef CONFIG_PREEMPT_RT |
| + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); |
| + |
| + return spin_unlock(&zh->lock); |
| +#else |
| bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); |
| +#endif |
| } |
| |
| static void reset_page(struct page *page) |
| @@ -1274,7 +1345,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, |
| class = pool->size_class[class_idx]; |
| off = (class->size * obj_idx) & ~PAGE_MASK; |
| |
| - area = &get_cpu_var(zs_map_area); |
| + local_lock(&zs_map_area.lock); |
| + area = this_cpu_ptr(&zs_map_area); |
| area->vm_mm = mm; |
| if (off + class->size <= PAGE_SIZE) { |
| /* this object is contained entirely within a page */ |
| @@ -1328,7 +1400,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) |
| |
| __zs_unmap_object(area, pages, off, class->size); |
| } |
| - put_cpu_var(zs_map_area); |
| + local_unlock(&zs_map_area.lock); |
| |
| migrate_read_unlock(zspage); |
| unpin_tag(handle); |
| diff --git a/net/Kconfig b/net/Kconfig |
| index fb13460c6dab..074472dfa94a 100644 |
| --- a/net/Kconfig |
| +++ b/net/Kconfig |
| @@ -294,7 +294,7 @@ config CGROUP_NET_CLASSID |
| |
| config NET_RX_BUSY_POLL |
| bool |
| - default y |
| + default y if !PREEMPT_RT |
| |
| config BQL |
| bool |
| diff --git a/net/core/dev.c b/net/core/dev.c |
| index 6111506a4105..38d6ffad4750 100644 |
| --- a/net/core/dev.c |
| +++ b/net/core/dev.c |
| @@ -225,14 +225,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) |
| static inline void rps_lock(struct softnet_data *sd) |
| { |
| #ifdef CONFIG_RPS |
| - spin_lock(&sd->input_pkt_queue.lock); |
| + raw_spin_lock(&sd->input_pkt_queue.raw_lock); |
| #endif |
| } |
| |
| static inline void rps_unlock(struct softnet_data *sd) |
| { |
| #ifdef CONFIG_RPS |
| - spin_unlock(&sd->input_pkt_queue.lock); |
| + raw_spin_unlock(&sd->input_pkt_queue.raw_lock); |
| #endif |
| } |
| |
| @@ -3044,6 +3044,7 @@ static void __netif_reschedule(struct Qdisc *q) |
| sd->output_queue_tailp = &q->next_sched; |
| raise_softirq_irqoff(NET_TX_SOFTIRQ); |
| local_irq_restore(flags); |
| + preempt_check_resched_rt(); |
| } |
| |
| void __netif_schedule(struct Qdisc *q) |
| @@ -3106,6 +3107,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) |
| __this_cpu_write(softnet_data.completion_queue, skb); |
| raise_softirq_irqoff(NET_TX_SOFTIRQ); |
| local_irq_restore(flags); |
| + preempt_check_resched_rt(); |
| } |
| EXPORT_SYMBOL(__dev_kfree_skb_irq); |
| |
| @@ -3837,7 +3839,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, |
| * This permits qdisc->running owner to get the lock more |
| * often and dequeue packets faster. |
| */ |
| +#ifdef CONFIG_PREEMPT_RT |
| + contended = true; |
| +#else |
| contended = qdisc_is_running(q); |
| +#endif |
| if (unlikely(contended)) |
| spin_lock(&q->busylock); |
| |
| @@ -4662,6 +4668,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, |
| rps_unlock(sd); |
| |
| local_irq_restore(flags); |
| + preempt_check_resched_rt(); |
| |
| atomic_long_inc(&skb->dev->rx_dropped); |
| kfree_skb(skb); |
| @@ -4902,7 +4909,7 @@ static int netif_rx_internal(struct sk_buff *skb) |
| struct rps_dev_flow voidflow, *rflow = &voidflow; |
| int cpu; |
| |
| - preempt_disable(); |
| + migrate_disable(); |
| rcu_read_lock(); |
| |
| cpu = get_rps_cpu(skb->dev, skb, &rflow); |
| @@ -4912,14 +4919,14 @@ static int netif_rx_internal(struct sk_buff *skb) |
| ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
| |
| rcu_read_unlock(); |
| - preempt_enable(); |
| + migrate_enable(); |
| } else |
| #endif |
| { |
| unsigned int qtail; |
| |
| - ret = enqueue_to_backlog(skb, get_cpu(), &qtail); |
| - put_cpu(); |
| + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail); |
| + put_cpu_light(); |
| } |
| return ret; |
| } |
| @@ -4958,11 +4965,9 @@ int netif_rx_ni(struct sk_buff *skb) |
| |
| trace_netif_rx_ni_entry(skb); |
| |
| - preempt_disable(); |
| + local_bh_disable(); |
| err = netif_rx_internal(skb); |
| - if (local_softirq_pending()) |
| - do_softirq(); |
| - preempt_enable(); |
| + local_bh_enable(); |
| trace_netif_rx_ni_exit(err); |
| |
| return err; |
| @@ -6405,12 +6410,14 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) |
| sd->rps_ipi_list = NULL; |
| |
| local_irq_enable(); |
| + preempt_check_resched_rt(); |
| |
| /* Send pending IPI's to kick RPS processing on remote cpus. */ |
| net_rps_send_ipi(remsd); |
| } else |
| #endif |
| local_irq_enable(); |
| + preempt_check_resched_rt(); |
| } |
| |
| static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) |
| @@ -6488,6 +6495,7 @@ void __napi_schedule(struct napi_struct *n) |
| local_irq_save(flags); |
| ____napi_schedule(this_cpu_ptr(&softnet_data), n); |
| local_irq_restore(flags); |
| + preempt_check_resched_rt(); |
| } |
| EXPORT_SYMBOL(__napi_schedule); |
| |
| @@ -11318,6 +11326,7 @@ static int dev_cpu_dead(unsigned int oldcpu) |
| |
| raise_softirq_irqoff(NET_TX_SOFTIRQ); |
| local_irq_enable(); |
| + preempt_check_resched_rt(); |
| |
| #ifdef CONFIG_RPS |
| remsd = oldsd->rps_ipi_list; |
| @@ -11331,7 +11340,7 @@ static int dev_cpu_dead(unsigned int oldcpu) |
| netif_rx_ni(skb); |
| input_queue_head_incr(oldsd); |
| } |
| - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { |
| + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { |
| netif_rx_ni(skb); |
| input_queue_head_incr(oldsd); |
| } |
| @@ -11646,7 +11655,7 @@ static int __init net_dev_init(void) |
| |
| INIT_WORK(flush, flush_backlog); |
| |
| - skb_queue_head_init(&sd->input_pkt_queue); |
| + skb_queue_head_init_raw(&sd->input_pkt_queue); |
| skb_queue_head_init(&sd->process_queue); |
| #ifdef CONFIG_XFRM_OFFLOAD |
| skb_queue_head_init(&sd->xfrm_backlog); |
| diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c |
| index 8e582e29a41e..4fcbdd71c59f 100644 |
| --- a/net/core/gen_estimator.c |
| +++ b/net/core/gen_estimator.c |
| @@ -40,10 +40,10 @@ |
| */ |
| |
| struct net_rate_estimator { |
| - struct gnet_stats_basic_packed *bstats; |
| + struct gnet_stats_basic_sync *bstats; |
| spinlock_t *stats_lock; |
| - seqcount_t *running; |
| - struct gnet_stats_basic_cpu __percpu *cpu_bstats; |
| + bool running; |
| + struct gnet_stats_basic_sync __percpu *cpu_bstats; |
| u8 ewma_log; |
| u8 intvl_log; /* period : (250ms << intvl_log) */ |
| |
| @@ -60,13 +60,13 @@ struct net_rate_estimator { |
| }; |
| |
| static void est_fetch_counters(struct net_rate_estimator *e, |
| - struct gnet_stats_basic_packed *b) |
| + struct gnet_stats_basic_sync *b) |
| { |
| - memset(b, 0, sizeof(*b)); |
| + gnet_stats_basic_sync_init(b); |
| if (e->stats_lock) |
| spin_lock(e->stats_lock); |
| |
| - __gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats); |
| + gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running); |
| |
| if (e->stats_lock) |
| spin_unlock(e->stats_lock); |
| @@ -76,14 +76,18 @@ static void est_fetch_counters(struct net_rate_estimator *e, |
| static void est_timer(struct timer_list *t) |
| { |
| struct net_rate_estimator *est = from_timer(est, t, timer); |
| - struct gnet_stats_basic_packed b; |
| + struct gnet_stats_basic_sync b; |
| + u64 b_bytes, b_packets; |
| u64 rate, brate; |
| |
| est_fetch_counters(est, &b); |
| - brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log); |
| + b_bytes = u64_stats_read(&b.bytes); |
| + b_packets = u64_stats_read(&b.packets); |
| + |
| + brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log); |
| brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log); |
| |
| - rate = (b.packets - est->last_packets) << (10 - est->intvl_log); |
| + rate = (b_packets - est->last_packets) << (10 - est->intvl_log); |
| rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log); |
| |
| write_seqcount_begin(&est->seq); |
| @@ -91,8 +95,8 @@ static void est_timer(struct timer_list *t) |
| est->avpps += rate; |
| write_seqcount_end(&est->seq); |
| |
| - est->last_bytes = b.bytes; |
| - est->last_packets = b.packets; |
| + est->last_bytes = b_bytes; |
| + est->last_packets = b_packets; |
| |
| est->next_jiffies += ((HZ/4) << est->intvl_log); |
| |
| @@ -109,7 +113,9 @@ static void est_timer(struct timer_list *t) |
| * @cpu_bstats: bstats per cpu |
| * @rate_est: rate estimator statistics |
| * @lock: lock for statistics and control path |
| - * @running: qdisc running seqcount |
| + * @running: true if @bstats represents a running qdisc, thus @bstats' |
| + * internal values might change during basic reads. Only used |
| + * if @bstats_cpu is NULL |
| * @opt: rate estimator configuration TLV |
| * |
| * Creates a new rate estimator with &bstats as source and &rate_est |
| @@ -121,16 +127,16 @@ static void est_timer(struct timer_list *t) |
| * Returns 0 on success or a negative error code. |
| * |
| */ |
| -int gen_new_estimator(struct gnet_stats_basic_packed *bstats, |
| - struct gnet_stats_basic_cpu __percpu *cpu_bstats, |
| +int gen_new_estimator(struct gnet_stats_basic_sync *bstats, |
| + struct gnet_stats_basic_sync __percpu *cpu_bstats, |
| struct net_rate_estimator __rcu **rate_est, |
| spinlock_t *lock, |
| - seqcount_t *running, |
| + bool running, |
| struct nlattr *opt) |
| { |
| struct gnet_estimator *parm = nla_data(opt); |
| struct net_rate_estimator *old, *est; |
| - struct gnet_stats_basic_packed b; |
| + struct gnet_stats_basic_sync b; |
| int intvl_log; |
| |
| if (nla_len(opt) < sizeof(*parm)) |
| @@ -164,8 +170,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, |
| est_fetch_counters(est, &b); |
| if (lock) |
| local_bh_enable(); |
| - est->last_bytes = b.bytes; |
| - est->last_packets = b.packets; |
| + est->last_bytes = u64_stats_read(&b.bytes); |
| + est->last_packets = u64_stats_read(&b.packets); |
| |
| if (lock) |
| spin_lock_bh(lock); |
| @@ -214,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator); |
| * @cpu_bstats: bstats per cpu |
| * @rate_est: rate estimator statistics |
| * @lock: lock for statistics and control path |
| - * @running: qdisc running seqcount (might be NULL) |
| + * @running: true if @bstats represents a running qdisc, thus @bstats' |
| + * internal values might change during basic reads. Only used |
| + * if @cpu_bstats is NULL |
| * @opt: rate estimator configuration TLV |
| * |
| * Replaces the configuration of a rate estimator by calling |
| @@ -222,11 +230,11 @@ EXPORT_SYMBOL(gen_kill_estimator); |
| * |
| * Returns 0 on success or a negative error code. |
| */ |
| -int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, |
| - struct gnet_stats_basic_cpu __percpu *cpu_bstats, |
| +int gen_replace_estimator(struct gnet_stats_basic_sync *bstats, |
| + struct gnet_stats_basic_sync __percpu *cpu_bstats, |
| struct net_rate_estimator __rcu **rate_est, |
| spinlock_t *lock, |
| - seqcount_t *running, struct nlattr *opt) |
| + bool running, struct nlattr *opt) |
| { |
| return gen_new_estimator(bstats, cpu_bstats, rate_est, |
| lock, running, opt); |
| diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c |
| index e491b083b348..a10335b4ba2d 100644 |
| --- a/net/core/gen_stats.c |
| +++ b/net/core/gen_stats.c |
| @@ -18,7 +18,7 @@ |
| #include <linux/gen_stats.h> |
| #include <net/netlink.h> |
| #include <net/gen_stats.h> |
| - |
| +#include <net/sch_generic.h> |
| |
| static inline int |
| gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr) |
| @@ -114,63 +114,112 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, |
| } |
| EXPORT_SYMBOL(gnet_stats_start_copy); |
| |
| -static void |
| -__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, |
| - struct gnet_stats_basic_cpu __percpu *cpu) |
| +/* Must not be inlined, due to u64_stats seqcount_t lockdep key */ |
| +void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b) |
| { |
| + u64_stats_set(&b->bytes, 0); |
| + u64_stats_set(&b->packets, 0); |
| + u64_stats_init(&b->syncp); |
| +} |
| +EXPORT_SYMBOL(gnet_stats_basic_sync_init); |
| + |
| +static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, |
| + struct gnet_stats_basic_sync __percpu *cpu) |
| +{ |
| + u64 t_bytes = 0, t_packets = 0; |
| int i; |
| |
| for_each_possible_cpu(i) { |
| - struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i); |
| + struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); |
| unsigned int start; |
| u64 bytes, packets; |
| |
| do { |
| start = u64_stats_fetch_begin_irq(&bcpu->syncp); |
| - bytes = bcpu->bstats.bytes; |
| - packets = bcpu->bstats.packets; |
| + bytes = u64_stats_read(&bcpu->bytes); |
| + packets = u64_stats_read(&bcpu->packets); |
| } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); |
| |
| - bstats->bytes += bytes; |
| - bstats->packets += packets; |
| + t_bytes += bytes; |
| + t_packets += packets; |
| + } |
| + _bstats_update(bstats, t_bytes, t_packets); |
| +} |
| + |
| +void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, |
| + struct gnet_stats_basic_sync __percpu *cpu, |
| + struct gnet_stats_basic_sync *b, bool running) |
| +{ |
| + unsigned int start; |
| + u64 bytes = 0; |
| + u64 packets = 0; |
| + |
| + WARN_ON_ONCE((cpu || running) && in_hardirq()); |
| + |
| + if (cpu) { |
| + gnet_stats_add_basic_cpu(bstats, cpu); |
| + return; |
| } |
| + do { |
| + if (running) |
| + start = u64_stats_fetch_begin_irq(&b->syncp); |
| + bytes = u64_stats_read(&b->bytes); |
| + packets = u64_stats_read(&b->packets); |
| + } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); |
| + |
| + _bstats_update(bstats, bytes, packets); |
| } |
| +EXPORT_SYMBOL(gnet_stats_add_basic); |
| |
| -void |
| -__gnet_stats_copy_basic(const seqcount_t *running, |
| - struct gnet_stats_basic_packed *bstats, |
| - struct gnet_stats_basic_cpu __percpu *cpu, |
| - struct gnet_stats_basic_packed *b) |
| +static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, |
| + struct gnet_stats_basic_sync __percpu *cpu, |
| + struct gnet_stats_basic_sync *b, bool running) |
| { |
| - unsigned int seq; |
| + unsigned int start; |
| |
| if (cpu) { |
| - __gnet_stats_copy_basic_cpu(bstats, cpu); |
| + u64 t_bytes = 0, t_packets = 0; |
| + int i; |
| + |
| + for_each_possible_cpu(i) { |
| + struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i); |
| + unsigned int start; |
| + u64 bytes, packets; |
| + |
| + do { |
| + start = u64_stats_fetch_begin_irq(&bcpu->syncp); |
| + bytes = u64_stats_read(&bcpu->bytes); |
| + packets = u64_stats_read(&bcpu->packets); |
| + } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); |
| + |
| + t_bytes += bytes; |
| + t_packets += packets; |
| + } |
| + *ret_bytes = t_bytes; |
| + *ret_packets = t_packets; |
| return; |
| } |
| do { |
| if (running) |
| - seq = read_seqcount_begin(running); |
| - bstats->bytes = b->bytes; |
| - bstats->packets = b->packets; |
| - } while (running && read_seqcount_retry(running, seq)); |
| + start = u64_stats_fetch_begin_irq(&b->syncp); |
| + *ret_bytes = u64_stats_read(&b->bytes); |
| + *ret_packets = u64_stats_read(&b->packets); |
| + } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); |
| } |
| -EXPORT_SYMBOL(__gnet_stats_copy_basic); |
| |
| static int |
| -___gnet_stats_copy_basic(const seqcount_t *running, |
| - struct gnet_dump *d, |
| - struct gnet_stats_basic_cpu __percpu *cpu, |
| - struct gnet_stats_basic_packed *b, |
| - int type) |
| +___gnet_stats_copy_basic(struct gnet_dump *d, |
| + struct gnet_stats_basic_sync __percpu *cpu, |
| + struct gnet_stats_basic_sync *b, |
| + int type, bool running) |
| { |
| - struct gnet_stats_basic_packed bstats = {0}; |
| + u64 bstats_bytes, bstats_packets; |
| |
| - __gnet_stats_copy_basic(running, &bstats, cpu, b); |
| + gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running); |
| |
| if (d->compat_tc_stats && type == TCA_STATS_BASIC) { |
| - d->tc_stats.bytes = bstats.bytes; |
| - d->tc_stats.packets = bstats.packets; |
| + d->tc_stats.bytes = bstats_bytes; |
| + d->tc_stats.packets = bstats_packets; |
| } |
| |
| if (d->tail) { |
| @@ -178,24 +227,28 @@ ___gnet_stats_copy_basic(const seqcount_t *running, |
| int res; |
| |
| memset(&sb, 0, sizeof(sb)); |
| - sb.bytes = bstats.bytes; |
| - sb.packets = bstats.packets; |
| + sb.bytes = bstats_bytes; |
| + sb.packets = bstats_packets; |
| res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD); |
| - if (res < 0 || sb.packets == bstats.packets) |
| + if (res < 0 || sb.packets == bstats_packets) |
| return res; |
| /* emit 64bit stats only if needed */ |
| - return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets, |
| - sizeof(bstats.packets), TCA_STATS_PAD); |
| + return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets, |
| + sizeof(bstats_packets), TCA_STATS_PAD); |
| } |
| return 0; |
| } |
| |
| /** |
| * gnet_stats_copy_basic - copy basic statistics into statistic TLV |
| - * @running: seqcount_t pointer |
| * @d: dumping handle |
| * @cpu: copy statistic per cpu |
| * @b: basic statistics |
| + * @running: true if @b represents a running qdisc, thus @b's |
| + * internal values might change during basic reads. |
| + * Only used if @cpu is NULL |
| + * |
| + * Context: task; must not be run from IRQ or BH contexts |
| * |
| * Appends the basic statistics to the top level TLV created by |
| * gnet_stats_start_copy(). |
| @@ -204,22 +257,25 @@ ___gnet_stats_copy_basic(const seqcount_t *running, |
| * if the room in the socket buffer was not sufficient. |
| */ |
| int |
| -gnet_stats_copy_basic(const seqcount_t *running, |
| - struct gnet_dump *d, |
| - struct gnet_stats_basic_cpu __percpu *cpu, |
| - struct gnet_stats_basic_packed *b) |
| +gnet_stats_copy_basic(struct gnet_dump *d, |
| + struct gnet_stats_basic_sync __percpu *cpu, |
| + struct gnet_stats_basic_sync *b, |
| + bool running) |
| { |
| - return ___gnet_stats_copy_basic(running, d, cpu, b, |
| - TCA_STATS_BASIC); |
| + return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running); |
| } |
| EXPORT_SYMBOL(gnet_stats_copy_basic); |
| |
| /** |
| * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV |
| - * @running: seqcount_t pointer |
| * @d: dumping handle |
| * @cpu: copy statistic per cpu |
| * @b: basic statistics |
| + * @running: true if @b represents a running qdisc, thus @b's |
| + * internal values might change during basic reads. |
| + * Only used if @cpu is NULL |
| + * |
| + * Context: task; must not be run from IRQ or BH contexts |
| * |
| * Appends the basic statistics to the top level TLV created by |
| * gnet_stats_start_copy(). |
| @@ -228,13 +284,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); |
| * if the room in the socket buffer was not sufficient. |
| */ |
| int |
| -gnet_stats_copy_basic_hw(const seqcount_t *running, |
| - struct gnet_dump *d, |
| - struct gnet_stats_basic_cpu __percpu *cpu, |
| - struct gnet_stats_basic_packed *b) |
| +gnet_stats_copy_basic_hw(struct gnet_dump *d, |
| + struct gnet_stats_basic_sync __percpu *cpu, |
| + struct gnet_stats_basic_sync *b, |
| + bool running) |
| { |
| - return ___gnet_stats_copy_basic(running, d, cpu, b, |
| - TCA_STATS_BASIC_HW); |
| + return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running); |
| } |
| EXPORT_SYMBOL(gnet_stats_copy_basic_hw); |
| |
| @@ -282,16 +337,15 @@ gnet_stats_copy_rate_est(struct gnet_dump *d, |
| } |
| EXPORT_SYMBOL(gnet_stats_copy_rate_est); |
| |
| -static void |
| -__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats, |
| - const struct gnet_stats_queue __percpu *q) |
| +static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats, |
| + const struct gnet_stats_queue __percpu *q) |
| { |
| int i; |
| |
| for_each_possible_cpu(i) { |
| const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i); |
| |
| - qstats->qlen = 0; |
| + qstats->qlen += qcpu->backlog; |
| qstats->backlog += qcpu->backlog; |
| qstats->drops += qcpu->drops; |
| qstats->requeues += qcpu->requeues; |
| @@ -299,24 +353,21 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats, |
| } |
| } |
| |
| -void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats, |
| - const struct gnet_stats_queue __percpu *cpu, |
| - const struct gnet_stats_queue *q, |
| - __u32 qlen) |
| +void gnet_stats_add_queue(struct gnet_stats_queue *qstats, |
| + const struct gnet_stats_queue __percpu *cpu, |
| + const struct gnet_stats_queue *q) |
| { |
| if (cpu) { |
| - __gnet_stats_copy_queue_cpu(qstats, cpu); |
| + gnet_stats_add_queue_cpu(qstats, cpu); |
| } else { |
| - qstats->qlen = q->qlen; |
| - qstats->backlog = q->backlog; |
| - qstats->drops = q->drops; |
| - qstats->requeues = q->requeues; |
| - qstats->overlimits = q->overlimits; |
| + qstats->qlen += q->qlen; |
| + qstats->backlog += q->backlog; |
| + qstats->drops += q->drops; |
| + qstats->requeues += q->requeues; |
| + qstats->overlimits += q->overlimits; |
| } |
| - |
| - qstats->qlen = qlen; |
| } |
| -EXPORT_SYMBOL(__gnet_stats_copy_queue); |
| +EXPORT_SYMBOL(gnet_stats_add_queue); |
| |
| /** |
| * gnet_stats_copy_queue - copy queue statistics into statistics TLV |
| @@ -339,7 +390,8 @@ gnet_stats_copy_queue(struct gnet_dump *d, |
| { |
| struct gnet_stats_queue qstats = {0}; |
| |
| - __gnet_stats_copy_queue(&qstats, cpu_q, q, qlen); |
| + gnet_stats_add_queue(&qstats, cpu_q, q); |
| + qstats.qlen = qlen; |
| |
| if (d->compat_tc_stats) { |
| d->tc_stats.drops = qstats.drops; |
| diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c |
| index 0d5c422f8745..8aec1b529364 100644 |
| --- a/net/netfilter/xt_RATEEST.c |
| +++ b/net/netfilter/xt_RATEEST.c |
| @@ -94,11 +94,11 @@ static unsigned int |
| xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par) |
| { |
| const struct xt_rateest_target_info *info = par->targinfo; |
| - struct gnet_stats_basic_packed *stats = &info->est->bstats; |
| + struct gnet_stats_basic_sync *stats = &info->est->bstats; |
| |
| spin_lock_bh(&info->est->lock); |
| - stats->bytes += skb->len; |
| - stats->packets++; |
| + u64_stats_add(&stats->bytes, skb->len); |
| + u64_stats_inc(&stats->packets); |
| spin_unlock_bh(&info->est->lock); |
| |
| return XT_CONTINUE; |
| @@ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) |
| if (!est) |
| goto err1; |
| |
| + gnet_stats_basic_sync_init(&est->bstats); |
| strlcpy(est->name, info->name, sizeof(est->name)); |
| spin_lock_init(&est->lock); |
| est->refcnt = 1; |
| diff --git a/net/sched/act_api.c b/net/sched/act_api.c |
| index d775676956bf..94c05713ecf8 100644 |
| --- a/net/sched/act_api.c |
| +++ b/net/sched/act_api.c |
| @@ -486,16 +486,18 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, |
| atomic_set(&p->tcfa_bindcnt, 1); |
| |
| if (cpustats) { |
| - p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); |
| + p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); |
| if (!p->cpu_bstats) |
| goto err1; |
| - p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); |
| + p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); |
| if (!p->cpu_bstats_hw) |
| goto err2; |
| p->cpu_qstats = alloc_percpu(struct gnet_stats_queue); |
| if (!p->cpu_qstats) |
| goto err3; |
| } |
| + gnet_stats_basic_sync_init(&p->tcfa_bstats); |
| + gnet_stats_basic_sync_init(&p->tcfa_bstats_hw); |
| spin_lock_init(&p->tcfa_lock); |
| p->tcfa_index = index; |
| p->tcfa_tm.install = jiffies; |
| @@ -505,7 +507,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, |
| if (est) { |
| err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, |
| &p->tcfa_rate_est, |
| - &p->tcfa_lock, NULL, est); |
| + &p->tcfa_lock, false, est); |
| if (err) |
| goto err4; |
| } |
| @@ -1141,13 +1143,13 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, |
| u64 drops, bool hw) |
| { |
| if (a->cpu_bstats) { |
| - _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); |
| + _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); |
| |
| this_cpu_ptr(a->cpu_qstats)->drops += drops; |
| |
| if (hw) |
| - _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), |
| - bytes, packets); |
| + _bstats_update(this_cpu_ptr(a->cpu_bstats_hw), |
| + bytes, packets); |
| return; |
| } |
| |
| @@ -1186,9 +1188,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, |
| if (err < 0) |
| goto errout; |
| |
| - if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 || |
| - gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw, |
| - &p->tcfa_bstats_hw) < 0 || |
| + if (gnet_stats_copy_basic(&d, p->cpu_bstats, |
| + &p->tcfa_bstats, false) < 0 || |
| + gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw, |
| + &p->tcfa_bstats_hw, false) < 0 || |
| gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 || |
| gnet_stats_copy_queue(&d, p->cpu_qstats, |
| &p->tcfa_qstats, |
| diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c |
| index 5c36013339e1..f2bf896331a5 100644 |
| --- a/net/sched/act_bpf.c |
| +++ b/net/sched/act_bpf.c |
| @@ -41,7 +41,7 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, |
| int action, filter_res; |
| |
| tcf_lastuse_update(&prog->tcf_tm); |
| - bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); |
| + bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb); |
| |
| filter = rcu_dereference(prog->filter); |
| if (at_ingress) { |
| diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c |
| index 7064a365a1a9..b757f90a2d58 100644 |
| --- a/net/sched/act_ife.c |
| +++ b/net/sched/act_ife.c |
| @@ -718,7 +718,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, |
| u8 *tlv_data; |
| u16 metalen; |
| |
| - bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); |
| + bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); |
| tcf_lastuse_update(&ife->tcf_tm); |
| |
| if (skb_at_tc_ingress(skb)) |
| @@ -806,7 +806,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, |
| exceed_mtu = true; |
| } |
| |
| - bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); |
| + bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb); |
| tcf_lastuse_update(&ife->tcf_tm); |
| |
| if (!metalen) { /* no metadata to send */ |
| diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c |
| index e4529b428cf4..8faa4c58305e 100644 |
| --- a/net/sched/act_mpls.c |
| +++ b/net/sched/act_mpls.c |
| @@ -59,7 +59,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, |
| int ret, mac_len; |
| |
| tcf_lastuse_update(&m->tcf_tm); |
| - bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); |
| + bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb); |
| |
| /* Ensure 'data' points at mac_header prior calling mpls manipulating |
| * functions. |
| diff --git a/net/sched/act_police.c b/net/sched/act_police.c |
| index 5c0a3ea9fe12..cbeb9995df37 100644 |
| --- a/net/sched/act_police.c |
| +++ b/net/sched/act_police.c |
| @@ -125,7 +125,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, |
| police->common.cpu_bstats, |
| &police->tcf_rate_est, |
| &police->tcf_lock, |
| - NULL, est); |
| + false, est); |
| if (err) |
| goto failure; |
| } else if (tb[TCA_POLICE_AVRATE] && |
| @@ -262,7 +262,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, |
| int ret; |
| |
| tcf_lastuse_update(&police->tcf_tm); |
| - bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb); |
| + bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb); |
| |
| ret = READ_ONCE(police->tcf_action); |
| p = rcu_dereference_bh(police->params); |
| diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c |
| index 230501eb9e06..ce859b0e0deb 100644 |
| --- a/net/sched/act_sample.c |
| +++ b/net/sched/act_sample.c |
| @@ -163,7 +163,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, |
| int retval; |
| |
| tcf_lastuse_update(&s->tcf_tm); |
| - bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb); |
| + bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb); |
| retval = READ_ONCE(s->tcf_action); |
| |
| psample_group = rcu_dereference_bh(s->psample_group); |
| diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c |
| index cbbe1861d3a2..e617ab4505ca 100644 |
| --- a/net/sched/act_simple.c |
| +++ b/net/sched/act_simple.c |
| @@ -36,7 +36,8 @@ static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a, |
| * then it would look like "hello_3" (without quotes) |
| */ |
| pr_info("simple: %s_%llu\n", |
| - (char *)d->tcfd_defdata, d->tcf_bstats.packets); |
| + (char *)d->tcfd_defdata, |
| + u64_stats_read(&d->tcf_bstats.packets)); |
| spin_unlock(&d->tcf_lock); |
| return d->tcf_action; |
| } |
| diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c |
| index 605418538347..d30ecbfc8f84 100644 |
| --- a/net/sched/act_skbedit.c |
| +++ b/net/sched/act_skbedit.c |
| @@ -31,7 +31,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, |
| int action; |
| |
| tcf_lastuse_update(&d->tcf_tm); |
| - bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); |
| + bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); |
| |
| params = rcu_dereference_bh(d->params); |
| action = READ_ONCE(d->tcf_action); |
| diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c |
| index ecb9ee666095..9b6b52c5e24e 100644 |
| --- a/net/sched/act_skbmod.c |
| +++ b/net/sched/act_skbmod.c |
| @@ -31,7 +31,7 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, |
| u64 flags; |
| |
| tcf_lastuse_update(&d->tcf_tm); |
| - bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); |
| + bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb); |
| |
| action = READ_ONCE(d->tcf_action); |
| if (unlikely(action == TC_ACT_SHOT)) |
| diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c |
| index 0fb387c9d706..eb2c5c8fcd32 100644 |
| --- a/net/sched/sch_api.c |
| +++ b/net/sched/sch_api.c |
| @@ -884,7 +884,7 @@ static void qdisc_offload_graft_root(struct net_device *dev, |
| static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, |
| u32 portid, u32 seq, u16 flags, int event) |
| { |
| - struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; |
| + struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL; |
| struct gnet_stats_queue __percpu *cpu_qstats = NULL; |
| struct tcmsg *tcm; |
| struct nlmsghdr *nlh; |
| @@ -942,8 +942,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, |
| cpu_qstats = q->cpu_qstats; |
| } |
| |
| - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), |
| - &d, cpu_bstats, &q->bstats) < 0 || |
| + if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 || |
| gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || |
| gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) |
| goto nla_put_failure; |
| @@ -1264,26 +1263,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev, |
| rcu_assign_pointer(sch->stab, stab); |
| } |
| if (tca[TCA_RATE]) { |
| - seqcount_t *running; |
| - |
| err = -EOPNOTSUPP; |
| if (sch->flags & TCQ_F_MQROOT) { |
| NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); |
| goto err_out4; |
| } |
| |
| - if (sch->parent != TC_H_ROOT && |
| - !(sch->flags & TCQ_F_INGRESS) && |
| - (!p || !(p->flags & TCQ_F_MQROOT))) |
| - running = qdisc_root_sleeping_running(sch); |
| - else |
| - running = &sch->running; |
| - |
| err = gen_new_estimator(&sch->bstats, |
| sch->cpu_bstats, |
| &sch->rate_est, |
| NULL, |
| - running, |
| + true, |
| tca[TCA_RATE]); |
| if (err) { |
| NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); |
| @@ -1359,7 +1349,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, |
| sch->cpu_bstats, |
| &sch->rate_est, |
| NULL, |
| - qdisc_root_sleeping_running(sch), |
| + true, |
| tca[TCA_RATE]); |
| } |
| out: |
| diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c |
| index 7d8518176b45..4c8e994cf0a5 100644 |
| --- a/net/sched/sch_atm.c |
| +++ b/net/sched/sch_atm.c |
| @@ -52,7 +52,7 @@ struct atm_flow_data { |
| struct atm_qdisc_data *parent; /* parent qdisc */ |
| struct socket *sock; /* for closing */ |
| int ref; /* reference count */ |
| - struct gnet_stats_basic_packed bstats; |
| + struct gnet_stats_basic_sync bstats; |
| struct gnet_stats_queue qstats; |
| struct list_head list; |
| struct atm_flow_data *excess; /* flow for excess traffic; |
| @@ -548,6 +548,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, |
| pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); |
| INIT_LIST_HEAD(&p->flows); |
| INIT_LIST_HEAD(&p->link.list); |
| + gnet_stats_basic_sync_init(&p->link.bstats); |
| list_add(&p->link.list, &p->flows); |
| p->link.q = qdisc_create_dflt(sch->dev_queue, |
| &pfifo_qdisc_ops, sch->handle, extack); |
| @@ -652,8 +653,7 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, |
| { |
| struct atm_flow_data *flow = (struct atm_flow_data *)arg; |
| |
| - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), |
| - d, NULL, &flow->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 || |
| gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) |
| return -1; |
| |
| diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c |
| index e0da15530f0e..02d9f0dfe356 100644 |
| --- a/net/sched/sch_cbq.c |
| +++ b/net/sched/sch_cbq.c |
| @@ -116,7 +116,7 @@ struct cbq_class { |
| long avgidle; |
| long deficit; /* Saved deficit for WRR */ |
| psched_time_t penalized; |
| - struct gnet_stats_basic_packed bstats; |
| + struct gnet_stats_basic_sync bstats; |
| struct gnet_stats_queue qstats; |
| struct net_rate_estimator __rcu *rate_est; |
| struct tc_cbq_xstats xstats; |
| @@ -565,8 +565,7 @@ cbq_update(struct cbq_sched_data *q) |
| long avgidle = cl->avgidle; |
| long idle; |
| |
| - cl->bstats.packets++; |
| - cl->bstats.bytes += len; |
| + _bstats_update(&cl->bstats, len, 1); |
| |
| /* |
| * (now - last) is total time between packet right edges. |
| @@ -1384,8 +1383,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, |
| if (cl->undertime != PSCHED_PASTPERFECT) |
| cl->xstats.undertime = cl->undertime - q->now; |
| |
| - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), |
| - d, NULL, &cl->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || |
| gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || |
| gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) |
| return -1; |
| @@ -1519,7 +1517,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t |
| err = gen_replace_estimator(&cl->bstats, NULL, |
| &cl->rate_est, |
| NULL, |
| - qdisc_root_sleeping_running(sch), |
| + true, |
| tca[TCA_RATE]); |
| if (err) { |
| NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); |
| @@ -1611,6 +1609,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t |
| if (cl == NULL) |
| goto failure; |
| |
| + gnet_stats_basic_sync_init(&cl->bstats); |
| err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); |
| if (err) { |
| kfree(cl); |
| @@ -1619,9 +1618,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t |
| |
| if (tca[TCA_RATE]) { |
| err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, |
| - NULL, |
| - qdisc_root_sleeping_running(sch), |
| - tca[TCA_RATE]); |
| + NULL, true, tca[TCA_RATE]); |
| if (err) { |
| NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); |
| tcf_block_put(cl->block); |
| diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c |
| index 642cd179b7a7..18e4f7a0b291 100644 |
| --- a/net/sched/sch_drr.c |
| +++ b/net/sched/sch_drr.c |
| @@ -19,7 +19,7 @@ struct drr_class { |
| struct Qdisc_class_common common; |
| unsigned int filter_cnt; |
| |
| - struct gnet_stats_basic_packed bstats; |
| + struct gnet_stats_basic_sync bstats; |
| struct gnet_stats_queue qstats; |
| struct net_rate_estimator __rcu *rate_est; |
| struct list_head alist; |
| @@ -85,8 +85,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, |
| if (tca[TCA_RATE]) { |
| err = gen_replace_estimator(&cl->bstats, NULL, |
| &cl->rate_est, |
| - NULL, |
| - qdisc_root_sleeping_running(sch), |
| + NULL, true, |
| tca[TCA_RATE]); |
| if (err) { |
| NL_SET_ERR_MSG(extack, "Failed to replace estimator"); |
| @@ -106,6 +105,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, |
| if (cl == NULL) |
| return -ENOBUFS; |
| |
| + gnet_stats_basic_sync_init(&cl->bstats); |
| cl->common.classid = classid; |
| cl->quantum = quantum; |
| cl->qdisc = qdisc_create_dflt(sch->dev_queue, |
| @@ -118,9 +118,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, |
| |
| if (tca[TCA_RATE]) { |
| err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, |
| - NULL, |
| - qdisc_root_sleeping_running(sch), |
| - tca[TCA_RATE]); |
| + NULL, true, tca[TCA_RATE]); |
| if (err) { |
| NL_SET_ERR_MSG(extack, "Failed to replace estimator"); |
| qdisc_put(cl->qdisc); |
| @@ -267,8 +265,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, |
| if (qlen) |
| xstats.deficit = cl->deficit; |
| |
| - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), |
| - d, NULL, &cl->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || |
| gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || |
| gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0) |
| return -1; |
| diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c |
| index 44fa2532a87c..d73393493553 100644 |
| --- a/net/sched/sch_ets.c |
| +++ b/net/sched/sch_ets.c |
| @@ -41,7 +41,7 @@ struct ets_class { |
| struct Qdisc *qdisc; |
| u32 quantum; |
| u32 deficit; |
| - struct gnet_stats_basic_packed bstats; |
| + struct gnet_stats_basic_sync bstats; |
| struct gnet_stats_queue qstats; |
| }; |
| |
| @@ -325,8 +325,7 @@ static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg, |
| struct ets_class *cl = ets_class_from_arg(sch, arg); |
| struct Qdisc *cl_q = cl->qdisc; |
| |
| - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), |
| - d, NULL, &cl_q->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 || |
| qdisc_qstats_copy(d, cl_q) < 0) |
| return -1; |
| |
| @@ -661,7 +660,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, |
| |
| q->nbands = nbands; |
| for (i = nstrict; i < q->nstrict; i++) { |
| - INIT_LIST_HEAD(&q->classes[i].alist); |
| if (q->classes[i].qdisc->q.qlen) { |
| list_add_tail(&q->classes[i].alist, &q->active); |
| q->classes[i].deficit = quanta[i]; |
| @@ -689,7 +687,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, |
| ets_offload_change(sch); |
| for (i = q->nbands; i < oldbands; i++) { |
| qdisc_put(q->classes[i].qdisc); |
| - memset(&q->classes[i], 0, sizeof(q->classes[i])); |
| + q->classes[i].qdisc = NULL; |
| + q->classes[i].quantum = 0; |
| + q->classes[i].deficit = 0; |
| + gnet_stats_basic_sync_init(&q->classes[i].bstats); |
| + memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats)); |
| } |
| return 0; |
| } |
| @@ -698,7 +700,7 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt, |
| struct netlink_ext_ack *extack) |
| { |
| struct ets_sched *q = qdisc_priv(sch); |
| - int err; |
| + int err, i; |
| |
| if (!opt) |
| return -EINVAL; |
| @@ -708,6 +710,9 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt, |
| return err; |
| |
| INIT_LIST_HEAD(&q->active); |
| + for (i = 0; i < TCQ_ETS_MAX_BANDS; i++) |
| + INIT_LIST_HEAD(&q->classes[i].alist); |
| + |
| return ets_qdisc_change(sch, opt, extack); |
| } |
| |
| diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c |
| index 30c29a9a2efd..dd27a062e913 100644 |
| --- a/net/sched/sch_generic.c |
| +++ b/net/sched/sch_generic.c |
| @@ -304,8 +304,8 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, |
| |
| /* |
| * Transmit possibly several skbs, and handle the return status as |
| - * required. Owning running seqcount bit guarantees that |
| - * only one CPU can execute this function. |
| + * required. Owning qdisc running bit guarantees that only one CPU |
| + * can execute this function. |
| * |
| * Returns to the caller: |
| * false - hardware queue frozen backoff |
| @@ -606,7 +606,6 @@ struct Qdisc noop_qdisc = { |
| .ops = &noop_qdisc_ops, |
| .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), |
| .dev_queue = &noop_netdev_queue, |
| - .running = SEQCNT_ZERO(noop_qdisc.running), |
| .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), |
| .gso_skb = { |
| .next = (struct sk_buff *)&noop_qdisc.gso_skb, |
| @@ -867,7 +866,6 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { |
| EXPORT_SYMBOL(pfifo_fast_ops); |
| |
| static struct lock_class_key qdisc_tx_busylock; |
| -static struct lock_class_key qdisc_running_key; |
| |
| struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, |
| const struct Qdisc_ops *ops, |
| @@ -892,11 +890,12 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, |
| __skb_queue_head_init(&sch->gso_skb); |
| __skb_queue_head_init(&sch->skb_bad_txq); |
| qdisc_skb_head_init(&sch->q); |
| + gnet_stats_basic_sync_init(&sch->bstats); |
| spin_lock_init(&sch->q.lock); |
| |
| if (ops->static_flags & TCQ_F_CPUSTATS) { |
| sch->cpu_bstats = |
| - netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); |
| + netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); |
| if (!sch->cpu_bstats) |
| goto errout1; |
| |
| @@ -916,10 +915,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, |
| lockdep_set_class(&sch->seqlock, |
| dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); |
| |
| - seqcount_init(&sch->running); |
| - lockdep_set_class(&sch->running, |
| - dev->qdisc_running_key ?: &qdisc_running_key); |
| - |
| sch->ops = ops; |
| sch->flags = ops->static_flags; |
| sch->enqueue = ops->enqueue; |
| diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c |
| index 621dc6afde8f..1073c76d05c4 100644 |
| --- a/net/sched/sch_gred.c |
| +++ b/net/sched/sch_gred.c |
| @@ -56,6 +56,7 @@ struct gred_sched { |
| u32 DPs; |
| u32 def; |
| struct red_vars wred_set; |
| + struct tc_gred_qopt_offload *opt; |
| }; |
| |
| static inline int gred_wred_mode(struct gred_sched *table) |
| @@ -311,48 +312,50 @@ static void gred_offload(struct Qdisc *sch, enum tc_gred_command command) |
| { |
| struct gred_sched *table = qdisc_priv(sch); |
| struct net_device *dev = qdisc_dev(sch); |
| - struct tc_gred_qopt_offload opt = { |
| - .command = command, |
| - .handle = sch->handle, |
| - .parent = sch->parent, |
| - }; |
| + struct tc_gred_qopt_offload *opt = table->opt; |
| |
| if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) |
| return; |
| |
| + memset(opt, 0, sizeof(*opt)); |
| + opt->command = command; |
| + opt->handle = sch->handle; |
| + opt->parent = sch->parent; |
| + |
| if (command == TC_GRED_REPLACE) { |
| unsigned int i; |
| |
| - opt.set.grio_on = gred_rio_mode(table); |
| - opt.set.wred_on = gred_wred_mode(table); |
| - opt.set.dp_cnt = table->DPs; |
| - opt.set.dp_def = table->def; |
| + opt->set.grio_on = gred_rio_mode(table); |
| + opt->set.wred_on = gred_wred_mode(table); |
| + opt->set.dp_cnt = table->DPs; |
| + opt->set.dp_def = table->def; |
| |
| for (i = 0; i < table->DPs; i++) { |
| struct gred_sched_data *q = table->tab[i]; |
| |
| if (!q) |
| continue; |
| - opt.set.tab[i].present = true; |
| - opt.set.tab[i].limit = q->limit; |
| - opt.set.tab[i].prio = q->prio; |
| - opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; |
| - opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; |
| - opt.set.tab[i].is_ecn = gred_use_ecn(q); |
| - opt.set.tab[i].is_harddrop = gred_use_harddrop(q); |
| - opt.set.tab[i].probability = q->parms.max_P; |
| - opt.set.tab[i].backlog = &q->backlog; |
| + opt->set.tab[i].present = true; |
| + opt->set.tab[i].limit = q->limit; |
| + opt->set.tab[i].prio = q->prio; |
| + opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog; |
| + opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog; |
| + opt->set.tab[i].is_ecn = gred_use_ecn(q); |
| + opt->set.tab[i].is_harddrop = gred_use_harddrop(q); |
| + opt->set.tab[i].probability = q->parms.max_P; |
| + opt->set.tab[i].backlog = &q->backlog; |
| } |
| - opt.set.qstats = &sch->qstats; |
| + opt->set.qstats = &sch->qstats; |
| } |
| |
| - dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt); |
| + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt); |
| } |
| |
| static int gred_offload_dump_stats(struct Qdisc *sch) |
| { |
| struct gred_sched *table = qdisc_priv(sch); |
| struct tc_gred_qopt_offload *hw_stats; |
| + u64 bytes = 0, packets = 0; |
| unsigned int i; |
| int ret; |
| |
| @@ -364,9 +367,11 @@ static int gred_offload_dump_stats(struct Qdisc *sch) |
| hw_stats->handle = sch->handle; |
| hw_stats->parent = sch->parent; |
| |
| - for (i = 0; i < MAX_DPs; i++) |
| + for (i = 0; i < MAX_DPs; i++) { |
| + gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]); |
| if (table->tab[i]) |
| hw_stats->stats.xstats[i] = &table->tab[i]->stats; |
| + } |
| |
| ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats); |
| /* Even if driver returns failure adjust the stats - in case offload |
| @@ -375,19 +380,19 @@ static int gred_offload_dump_stats(struct Qdisc *sch) |
| for (i = 0; i < MAX_DPs; i++) { |
| if (!table->tab[i]) |
| continue; |
| - table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets; |
| - table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes; |
| + table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets); |
| + table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes); |
| table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog; |
| |
| - _bstats_update(&sch->bstats, |
| - hw_stats->stats.bstats[i].bytes, |
| - hw_stats->stats.bstats[i].packets); |
| + bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes); |
| + packets += u64_stats_read(&hw_stats->stats.bstats[i].packets); |
| sch->qstats.qlen += hw_stats->stats.qstats[i].qlen; |
| sch->qstats.backlog += hw_stats->stats.qstats[i].backlog; |
| sch->qstats.drops += hw_stats->stats.qstats[i].drops; |
| sch->qstats.requeues += hw_stats->stats.qstats[i].requeues; |
| sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits; |
| } |
| + _bstats_update(&sch->bstats, bytes, packets); |
| |
| kfree(hw_stats); |
| return ret; |
| @@ -728,6 +733,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt, |
| static int gred_init(struct Qdisc *sch, struct nlattr *opt, |
| struct netlink_ext_ack *extack) |
| { |
| + struct gred_sched *table = qdisc_priv(sch); |
| struct nlattr *tb[TCA_GRED_MAX + 1]; |
| int err; |
| |
| @@ -751,6 +757,12 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt, |
| sch->limit = qdisc_dev(sch)->tx_queue_len |
| * psched_mtu(qdisc_dev(sch)); |
| |
| + if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) { |
| + table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL); |
| + if (!table->opt) |
| + return -ENOMEM; |
| + } |
| + |
| return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack); |
| } |
| |
| @@ -907,6 +919,7 @@ static void gred_destroy(struct Qdisc *sch) |
| gred_destroy_vq(table->tab[i]); |
| } |
| gred_offload(sch, TC_GRED_DESTROY); |
| + kfree(table->opt); |
| } |
| |
| static struct Qdisc_ops gred_qdisc_ops __read_mostly = { |
| diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c |
| index b7ac30cca035..d3979a6000e7 100644 |
| --- a/net/sched/sch_hfsc.c |
| +++ b/net/sched/sch_hfsc.c |
| @@ -111,7 +111,7 @@ enum hfsc_class_flags { |
| struct hfsc_class { |
| struct Qdisc_class_common cl_common; |
| |
| - struct gnet_stats_basic_packed bstats; |
| + struct gnet_stats_basic_sync bstats; |
| struct gnet_stats_queue qstats; |
| struct net_rate_estimator __rcu *rate_est; |
| struct tcf_proto __rcu *filter_list; /* filter list */ |
| @@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, |
| err = gen_replace_estimator(&cl->bstats, NULL, |
| &cl->rate_est, |
| NULL, |
| - qdisc_root_sleeping_running(sch), |
| + true, |
| tca[TCA_RATE]); |
| if (err) |
| return err; |
| @@ -1033,9 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, |
| |
| if (tca[TCA_RATE]) { |
| err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, |
| - NULL, |
| - qdisc_root_sleeping_running(sch), |
| - tca[TCA_RATE]); |
| + NULL, true, tca[TCA_RATE]); |
| if (err) { |
| tcf_block_put(cl->block); |
| kfree(cl); |
| @@ -1328,7 +1326,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, |
| xstats.work = cl->cl_total; |
| xstats.rtwork = cl->cl_cumul; |
| |
| - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || |
| gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || |
| gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) |
| return -1; |
| @@ -1406,6 +1404,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt, |
| if (err) |
| return err; |
| |
| + gnet_stats_basic_sync_init(&q->root.bstats); |
| q->root.cl_common.classid = sch->handle; |
| q->root.sched = q; |
| q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, |
| diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c |
| index 5cbc32fee867..8fd419337d3f 100644 |
| --- a/net/sched/sch_htb.c |
| +++ b/net/sched/sch_htb.c |
| @@ -113,8 +113,8 @@ struct htb_class { |
| /* |
| * Written often fields |
| */ |
| - struct gnet_stats_basic_packed bstats; |
| - struct gnet_stats_basic_packed bstats_bias; |
| + struct gnet_stats_basic_sync bstats; |
| + struct gnet_stats_basic_sync bstats_bias; |
| struct tc_htb_xstats xstats; /* our special stats */ |
| |
| /* token bucket parameters */ |
| @@ -1308,10 +1308,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, |
| static void htb_offload_aggregate_stats(struct htb_sched *q, |
| struct htb_class *cl) |
| { |
| + u64 bytes = 0, packets = 0; |
| struct htb_class *c; |
| unsigned int i; |
| |
| - memset(&cl->bstats, 0, sizeof(cl->bstats)); |
| + gnet_stats_basic_sync_init(&cl->bstats); |
| |
| for (i = 0; i < q->clhash.hashsize; i++) { |
| hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { |
| @@ -1323,14 +1324,15 @@ static void htb_offload_aggregate_stats(struct htb_sched *q, |
| if (p != cl) |
| continue; |
| |
| - cl->bstats.bytes += c->bstats_bias.bytes; |
| - cl->bstats.packets += c->bstats_bias.packets; |
| + bytes += u64_stats_read(&c->bstats_bias.bytes); |
| + packets += u64_stats_read(&c->bstats_bias.packets); |
| if (c->level == 0) { |
| - cl->bstats.bytes += c->leaf.q->bstats.bytes; |
| - cl->bstats.packets += c->leaf.q->bstats.packets; |
| + bytes += u64_stats_read(&c->leaf.q->bstats.bytes); |
| + packets += u64_stats_read(&c->leaf.q->bstats.packets); |
| } |
| } |
| } |
| + _bstats_update(&cl->bstats, bytes, packets); |
| } |
| |
| static int |
| @@ -1357,16 +1359,16 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) |
| if (cl->leaf.q) |
| cl->bstats = cl->leaf.q->bstats; |
| else |
| - memset(&cl->bstats, 0, sizeof(cl->bstats)); |
| - cl->bstats.bytes += cl->bstats_bias.bytes; |
| - cl->bstats.packets += cl->bstats_bias.packets; |
| + gnet_stats_basic_sync_init(&cl->bstats); |
| + _bstats_update(&cl->bstats, |
| + u64_stats_read(&cl->bstats_bias.bytes), |
| + u64_stats_read(&cl->bstats_bias.packets)); |
| } else { |
| htb_offload_aggregate_stats(q, cl); |
| } |
| } |
| |
| - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), |
| - d, NULL, &cl->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || |
| gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || |
| gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) |
| return -1; |
| @@ -1578,8 +1580,9 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, |
| WARN_ON(old != q); |
| |
| if (cl->parent) { |
| - cl->parent->bstats_bias.bytes += q->bstats.bytes; |
| - cl->parent->bstats_bias.packets += q->bstats.packets; |
| + _bstats_update(&cl->parent->bstats_bias, |
| + u64_stats_read(&q->bstats.bytes), |
| + u64_stats_read(&q->bstats.packets)); |
| } |
| |
| offload_opt = (struct tc_htb_qopt_offload) { |
| @@ -1869,6 +1872,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, |
| if (!cl) |
| goto failure; |
| |
| + gnet_stats_basic_sync_init(&cl->bstats); |
| + gnet_stats_basic_sync_init(&cl->bstats_bias); |
| + |
| err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); |
| if (err) { |
| kfree(cl); |
| @@ -1878,7 +1884,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, |
| err = gen_new_estimator(&cl->bstats, NULL, |
| &cl->rate_est, |
| NULL, |
| - qdisc_root_sleeping_running(sch), |
| + true, |
| tca[TCA_RATE] ? : &est.nla); |
| if (err) |
| goto err_block_put; |
| @@ -1942,8 +1948,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, |
| htb_graft_helper(dev_queue, old_q); |
| goto err_kill_estimator; |
| } |
| - parent->bstats_bias.bytes += old_q->bstats.bytes; |
| - parent->bstats_bias.packets += old_q->bstats.packets; |
| + _bstats_update(&parent->bstats_bias, |
| + u64_stats_read(&old_q->bstats.bytes), |
| + u64_stats_read(&old_q->bstats.packets)); |
| qdisc_put(old_q); |
| } |
| new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, |
| @@ -2003,7 +2010,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, |
| err = gen_replace_estimator(&cl->bstats, NULL, |
| &cl->rate_est, |
| NULL, |
| - qdisc_root_sleeping_running(sch), |
| + true, |
| tca[TCA_RATE]); |
| if (err) |
| return err; |
| diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c |
| index db18d8a860f9..24c5d97d88dd 100644 |
| --- a/net/sched/sch_mq.c |
| +++ b/net/sched/sch_mq.c |
| @@ -153,10 +153,9 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) |
| struct net_device *dev = qdisc_dev(sch); |
| struct Qdisc *qdisc; |
| unsigned int ntx; |
| - __u32 qlen = 0; |
| |
| sch->q.qlen = 0; |
| - memset(&sch->bstats, 0, sizeof(sch->bstats)); |
| + gnet_stats_basic_sync_init(&sch->bstats); |
| memset(&sch->qstats, 0, sizeof(sch->qstats)); |
| |
| /* MQ supports lockless qdiscs. However, statistics accounting needs |
| @@ -168,25 +167,11 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) |
| qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; |
| spin_lock_bh(qdisc_lock(qdisc)); |
| |
| - if (qdisc_is_percpu_stats(qdisc)) { |
| - qlen = qdisc_qlen_sum(qdisc); |
| - __gnet_stats_copy_basic(NULL, &sch->bstats, |
| - qdisc->cpu_bstats, |
| - &qdisc->bstats); |
| - __gnet_stats_copy_queue(&sch->qstats, |
| - qdisc->cpu_qstats, |
| - &qdisc->qstats, qlen); |
| - sch->q.qlen += qlen; |
| - } else { |
| - sch->q.qlen += qdisc->q.qlen; |
| - sch->bstats.bytes += qdisc->bstats.bytes; |
| - sch->bstats.packets += qdisc->bstats.packets; |
| - sch->qstats.qlen += qdisc->qstats.qlen; |
| - sch->qstats.backlog += qdisc->qstats.backlog; |
| - sch->qstats.drops += qdisc->qstats.drops; |
| - sch->qstats.requeues += qdisc->qstats.requeues; |
| - sch->qstats.overlimits += qdisc->qstats.overlimits; |
| - } |
| + gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, |
| + &qdisc->bstats, false); |
| + gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, |
| + &qdisc->qstats); |
| + sch->q.qlen += qdisc_qlen(qdisc); |
| |
| spin_unlock_bh(qdisc_lock(qdisc)); |
| } |
| @@ -269,8 +254,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, |
| struct netdev_queue *dev_queue = mq_queue_get(sch, cl); |
| |
| sch = dev_queue->qdisc_sleeping; |
| - if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats, |
| - &sch->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || |
| qdisc_qstats_copy(d, sch) < 0) |
| return -1; |
| return 0; |
| diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c |
| index 50e15add6068..42d4101e4f3d 100644 |
| --- a/net/sched/sch_mqprio.c |
| +++ b/net/sched/sch_mqprio.c |
| @@ -412,7 +412,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) |
| unsigned int ntx, tc; |
| |
| sch->q.qlen = 0; |
| - memset(&sch->bstats, 0, sizeof(sch->bstats)); |
| + gnet_stats_basic_sync_init(&sch->bstats); |
| memset(&sch->qstats, 0, sizeof(sch->qstats)); |
| |
| /* MQ supports lockless qdiscs. However, statistics accounting needs |
| @@ -424,25 +424,11 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) |
| qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; |
| spin_lock_bh(qdisc_lock(qdisc)); |
| |
| - if (qdisc_is_percpu_stats(qdisc)) { |
| - __u32 qlen = qdisc_qlen_sum(qdisc); |
| - |
| - __gnet_stats_copy_basic(NULL, &sch->bstats, |
| - qdisc->cpu_bstats, |
| - &qdisc->bstats); |
| - __gnet_stats_copy_queue(&sch->qstats, |
| - qdisc->cpu_qstats, |
| - &qdisc->qstats, qlen); |
| - sch->q.qlen += qlen; |
| - } else { |
| - sch->q.qlen += qdisc->q.qlen; |
| - sch->bstats.bytes += qdisc->bstats.bytes; |
| - sch->bstats.packets += qdisc->bstats.packets; |
| - sch->qstats.backlog += qdisc->qstats.backlog; |
| - sch->qstats.drops += qdisc->qstats.drops; |
| - sch->qstats.requeues += qdisc->qstats.requeues; |
| - sch->qstats.overlimits += qdisc->qstats.overlimits; |
| - } |
| + gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, |
| + &qdisc->bstats, false); |
| + gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats, |
| + &qdisc->qstats); |
| + sch->q.qlen += qdisc_qlen(qdisc); |
| |
| spin_unlock_bh(qdisc_lock(qdisc)); |
| } |
| @@ -534,12 +520,13 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, |
| { |
| if (cl >= TC_H_MIN_PRIORITY) { |
| int i; |
| - __u32 qlen = 0; |
| + __u32 qlen; |
| struct gnet_stats_queue qstats = {0}; |
| - struct gnet_stats_basic_packed bstats = {0}; |
| + struct gnet_stats_basic_sync bstats; |
| struct net_device *dev = qdisc_dev(sch); |
| struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; |
| |
| + gnet_stats_basic_sync_init(&bstats); |
| /* Drop lock here it will be reclaimed before touching |
| * statistics this is required because the d->lock we |
| * hold here is the look on dev_queue->qdisc_sleeping |
| @@ -554,40 +541,28 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, |
| |
| spin_lock_bh(qdisc_lock(qdisc)); |
| |
| - if (qdisc_is_percpu_stats(qdisc)) { |
| - qlen = qdisc_qlen_sum(qdisc); |
| - |
| - __gnet_stats_copy_basic(NULL, &bstats, |
| - qdisc->cpu_bstats, |
| - &qdisc->bstats); |
| - __gnet_stats_copy_queue(&qstats, |
| - qdisc->cpu_qstats, |
| - &qdisc->qstats, |
| - qlen); |
| - } else { |
| - qlen += qdisc->q.qlen; |
| - bstats.bytes += qdisc->bstats.bytes; |
| - bstats.packets += qdisc->bstats.packets; |
| - qstats.backlog += qdisc->qstats.backlog; |
| - qstats.drops += qdisc->qstats.drops; |
| - qstats.requeues += qdisc->qstats.requeues; |
| - qstats.overlimits += qdisc->qstats.overlimits; |
| - } |
| + gnet_stats_add_basic(&bstats, qdisc->cpu_bstats, |
| + &qdisc->bstats, false); |
| + gnet_stats_add_queue(&qstats, qdisc->cpu_qstats, |
| + &qdisc->qstats); |
| + sch->q.qlen += qdisc_qlen(qdisc); |
| + |
| spin_unlock_bh(qdisc_lock(qdisc)); |
| } |
| + qlen = qdisc_qlen(sch) + qstats.qlen; |
| |
| /* Reclaim root sleeping lock before completing stats */ |
| if (d->lock) |
| spin_lock_bh(d->lock); |
| - if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 || |
| gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0) |
| return -1; |
| } else { |
| struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); |
| |
| sch = dev_queue->qdisc_sleeping; |
| - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, |
| - sch->cpu_bstats, &sch->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, sch->cpu_bstats, |
| + &sch->bstats, true) < 0 || |
| qdisc_qstats_copy(d, sch) < 0) |
| return -1; |
| } |
| diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c |
| index e282e7382117..cd8ab90c4765 100644 |
| --- a/net/sched/sch_multiq.c |
| +++ b/net/sched/sch_multiq.c |
| @@ -338,8 +338,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, |
| struct Qdisc *cl_q; |
| |
| cl_q = q->queues[cl - 1]; |
| - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), |
| - d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 || |
| qdisc_qstats_copy(d, cl_q) < 0) |
| return -1; |
| |
| diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c |
| index 03fdf31ccb6a..3b8d7197c06b 100644 |
| --- a/net/sched/sch_prio.c |
| +++ b/net/sched/sch_prio.c |
| @@ -361,8 +361,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, |
| struct Qdisc *cl_q; |
| |
| cl_q = q->queues[cl - 1]; |
| - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), |
| - d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, |
| + &cl_q->bstats, true) < 0 || |
| qdisc_qstats_copy(d, cl_q) < 0) |
| return -1; |
| |
| diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c |
| index aea435b0aeb3..d4ce58c90f9f 100644 |
| --- a/net/sched/sch_qfq.c |
| +++ b/net/sched/sch_qfq.c |
| @@ -131,7 +131,7 @@ struct qfq_class { |
| |
| unsigned int filter_cnt; |
| |
| - struct gnet_stats_basic_packed bstats; |
| + struct gnet_stats_basic_sync bstats; |
| struct gnet_stats_queue qstats; |
| struct net_rate_estimator __rcu *rate_est; |
| struct Qdisc *qdisc; |
| @@ -451,7 +451,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, |
| err = gen_replace_estimator(&cl->bstats, NULL, |
| &cl->rate_est, |
| NULL, |
| - qdisc_root_sleeping_running(sch), |
| + true, |
| tca[TCA_RATE]); |
| if (err) |
| return err; |
| @@ -465,6 +465,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, |
| if (cl == NULL) |
| return -ENOBUFS; |
| |
| + gnet_stats_basic_sync_init(&cl->bstats); |
| cl->common.classid = classid; |
| cl->deficit = lmax; |
| |
| @@ -477,7 +478,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, |
| err = gen_new_estimator(&cl->bstats, NULL, |
| &cl->rate_est, |
| NULL, |
| - qdisc_root_sleeping_running(sch), |
| + true, |
| tca[TCA_RATE]); |
| if (err) |
| goto destroy_class; |
| @@ -639,8 +640,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, |
| xstats.weight = cl->agg->class_weight; |
| xstats.lmax = cl->agg->lmax; |
| |
| - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), |
| - d, NULL, &cl->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || |
| gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || |
| qdisc_qstats_copy(d, cl->qdisc) < 0) |
| return -1; |
| @@ -1234,8 +1234,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
| return err; |
| } |
| |
| - cl->bstats.bytes += len; |
| - cl->bstats.packets += gso_segs; |
| + _bstats_update(&cl->bstats, len, gso_segs); |
| sch->qstats.backlog += len; |
| ++sch->q.qlen; |
| |
| diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c |
| index 474ba4db5de2..b9c71a304d39 100644 |
| --- a/net/sched/sch_taprio.c |
| +++ b/net/sched/sch_taprio.c |
| @@ -1985,7 +1985,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, |
| struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); |
| |
| sch = dev_queue->qdisc_sleeping; |
| - if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || |
| + if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 || |
| qdisc_qstats_copy(d, sch) < 0) |
| return -1; |
| return 0; |
| diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c |
| index 935bba065636..962e79730eb5 100644 |
| --- a/net/sunrpc/svc_xprt.c |
| +++ b/net/sunrpc/svc_xprt.c |
| @@ -441,7 +441,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) |
| if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) |
| return; |
| |
| - cpu = get_cpu(); |
| + cpu = get_cpu_light(); |
| pool = svc_pool_for_cpu(xprt->xpt_server, cpu); |
| |
| atomic_long_inc(&pool->sp_stats.packets); |
| @@ -465,7 +465,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) |
| rqstp = NULL; |
| out_unlock: |
| rcu_read_unlock(); |
| - put_cpu(); |
| + put_cpu_light(); |
| trace_svc_xprt_do_enqueue(xprt, rqstp); |
| } |
| EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); |
| diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c |
| index 5a90aa527877..642d0748c169 100644 |
| --- a/samples/kfifo/bytestream-example.c |
| +++ b/samples/kfifo/bytestream-example.c |
| @@ -22,10 +22,10 @@ |
| #define PROC_FIFO "bytestream-fifo" |
| |
| /* lock for procfs read access */ |
| -static DEFINE_MUTEX(read_lock); |
| +static DEFINE_MUTEX(read_access); |
| |
| /* lock for procfs write access */ |
| -static DEFINE_MUTEX(write_lock); |
| +static DEFINE_MUTEX(write_access); |
| |
| /* |
| * define DYNAMIC in this example for a dynamically allocated fifo. |
| @@ -116,12 +116,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf, |
| int ret; |
| unsigned int copied; |
| |
| - if (mutex_lock_interruptible(&write_lock)) |
| + if (mutex_lock_interruptible(&write_access)) |
| return -ERESTARTSYS; |
| |
| ret = kfifo_from_user(&test, buf, count, &copied); |
| |
| - mutex_unlock(&write_lock); |
| + mutex_unlock(&write_access); |
| if (ret) |
| return ret; |
| |
| @@ -134,12 +134,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf, |
| int ret; |
| unsigned int copied; |
| |
| - if (mutex_lock_interruptible(&read_lock)) |
| + if (mutex_lock_interruptible(&read_access)) |
| return -ERESTARTSYS; |
| |
| ret = kfifo_to_user(&test, buf, count, &copied); |
| |
| - mutex_unlock(&read_lock); |
| + mutex_unlock(&read_access); |
| if (ret) |
| return ret; |
| |
| diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c |
| index e5403d8c971a..c61482ba94f4 100644 |
| --- a/samples/kfifo/inttype-example.c |
| +++ b/samples/kfifo/inttype-example.c |
| @@ -22,10 +22,10 @@ |
| #define PROC_FIFO "int-fifo" |
| |
| /* lock for procfs read access */ |
| -static DEFINE_MUTEX(read_lock); |
| +static DEFINE_MUTEX(read_access); |
| |
| /* lock for procfs write access */ |
| -static DEFINE_MUTEX(write_lock); |
| +static DEFINE_MUTEX(write_access); |
| |
| /* |
| * define DYNAMIC in this example for a dynamically allocated fifo. |
| @@ -109,12 +109,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf, |
| int ret; |
| unsigned int copied; |
| |
| - if (mutex_lock_interruptible(&write_lock)) |
| + if (mutex_lock_interruptible(&write_access)) |
| return -ERESTARTSYS; |
| |
| ret = kfifo_from_user(&test, buf, count, &copied); |
| |
| - mutex_unlock(&write_lock); |
| + mutex_unlock(&write_access); |
| if (ret) |
| return ret; |
| |
| @@ -127,12 +127,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf, |
| int ret; |
| unsigned int copied; |
| |
| - if (mutex_lock_interruptible(&read_lock)) |
| + if (mutex_lock_interruptible(&read_access)) |
| return -ERESTARTSYS; |
| |
| ret = kfifo_to_user(&test, buf, count, &copied); |
| |
| - mutex_unlock(&read_lock); |
| + mutex_unlock(&read_access); |
| if (ret) |
| return ret; |
| |
| diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c |
| index f64f3d62d6c2..e4087b2d3fc4 100644 |
| --- a/samples/kfifo/record-example.c |
| +++ b/samples/kfifo/record-example.c |
| @@ -22,10 +22,10 @@ |
| #define PROC_FIFO "record-fifo" |
| |
| /* lock for procfs read access */ |
| -static DEFINE_MUTEX(read_lock); |
| +static DEFINE_MUTEX(read_access); |
| |
| /* lock for procfs write access */ |
| -static DEFINE_MUTEX(write_lock); |
| +static DEFINE_MUTEX(write_access); |
| |
| /* |
| * define DYNAMIC in this example for a dynamically allocated fifo. |
| @@ -123,12 +123,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf, |
| int ret; |
| unsigned int copied; |
| |
| - if (mutex_lock_interruptible(&write_lock)) |
| + if (mutex_lock_interruptible(&write_access)) |
| return -ERESTARTSYS; |
| |
| ret = kfifo_from_user(&test, buf, count, &copied); |
| |
| - mutex_unlock(&write_lock); |
| + mutex_unlock(&write_access); |
| if (ret) |
| return ret; |
| |
| @@ -141,12 +141,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf, |
| int ret; |
| unsigned int copied; |
| |
| - if (mutex_lock_interruptible(&read_lock)) |
| + if (mutex_lock_interruptible(&read_access)) |
| return -ERESTARTSYS; |
| |
| ret = kfifo_to_user(&test, buf, count, &copied); |
| |
| - mutex_unlock(&read_lock); |
| + mutex_unlock(&read_access); |
| if (ret) |
| return ret; |
| |
| diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c |
| index 3f3f56f6be4d..5dbcdc5b22b5 100644 |
| --- a/security/smack/smack_lsm.c |
| +++ b/security/smack/smack_lsm.c |
| @@ -51,8 +51,10 @@ |
| #define SMK_RECEIVING 1 |
| #define SMK_SENDING 2 |
| |
| +#ifdef SMACK_IPV6_PORT_LABELING |
| static DEFINE_MUTEX(smack_ipv6_lock); |
| static LIST_HEAD(smk_ipv6_port_list); |
| +#endif |
| struct kmem_cache *smack_rule_cache; |
| int smack_enabled __initdata; |
| |
| @@ -2603,7 +2605,6 @@ static void smk_ipv6_port_label(struct socket *sock, struct sockaddr *address) |
| mutex_unlock(&smack_ipv6_lock); |
| return; |
| } |
| -#endif |
| |
| /** |
| * smk_ipv6_port_check - check Smack port access |
| @@ -2666,6 +2667,7 @@ static int smk_ipv6_port_check(struct sock *sk, struct sockaddr_in6 *address, |
| |
| return smk_ipv6_check(skp, object, address, act); |
| } |
| +#endif |
| |
| /** |
| * smack_inode_setsecurity - set smack xattrs |
| @@ -2852,8 +2854,9 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap, |
| rc = smk_ipv6_check(ssp->smk_out, rsp, sip, |
| SMK_CONNECTING); |
| } |
| - if (__is_defined(SMACK_IPV6_PORT_LABELING)) |
| - rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING); |
| +#ifdef SMACK_IPV6_PORT_LABELING |
| + rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING); |
| +#endif |
| |
| return rc; |
| } |
| diff --git a/sound/soc/mediatek/common/mtk-afe-fe-dai.c b/sound/soc/mediatek/common/mtk-afe-fe-dai.c |
| index e95c7c018e7d..4f2c2379531b 100644 |
| --- a/sound/soc/mediatek/common/mtk-afe-fe-dai.c |
| +++ b/sound/soc/mediatek/common/mtk-afe-fe-dai.c |
| @@ -288,7 +288,6 @@ const struct snd_soc_dai_ops mtk_afe_fe_ops = { |
| }; |
| EXPORT_SYMBOL_GPL(mtk_afe_fe_ops); |
| |
| -static DEFINE_MUTEX(irqs_lock); |
| int mtk_dynamic_irq_acquire(struct mtk_base_afe *afe) |
| { |
| int i; |