Message ID | 20170518155026.23799-1-npiggin@gmail.com |
---|---|
State | RFC |
Delegated to: | David Miller |
Headers | show |
(adding Uli) On Fri, May 19, 2017 at 01:50:26AM +1000, Nicholas Piggin wrote: > I'd like to make it easier for architectures that have their own NMI / > hard lockup detector to reuse various configuration interfaces that are > provided by generic detectors (cmdline, sysctl, suspend/resume calls). > > I'd also like to remove the dependency of arch hard lockup detectors > on the softlockup detector. The reason being these watchdogs can be > very small (sparc's is like a page of core code that does not use any > big subsystem like kthreads or timers). > > So I do this by adding a separate CONFIG_SOFTLOCKUP_DETECTOR, and > juggling around what goes under config options. HAVE_NMI_WATCHDOG > continues to be the config for arch to override the hard lockup > detector, which is expanded to cover a few more cases. Basically you are trying to remove the heavy HARDLOCKUP pieces to minimize the SOFTLOCKUP piece and use your own NMI detector, right? I am guessing you would then disable SOFTLOCKUP to remove all the kthread and timer stuff but continue to use the generic infrastructure to help manager your own NMI detector? A lot of the code is just re-organizing things and adding an explicit ifdef on SOFTLOCKUP, which seems fine to me. I just need to spend some time on some of your #else clauses to see what functionality is dropped when you use your approach. Cheers, Don > > These config options are pretty ugly, but they already were. I don't > think they've become a lot worse after this. > > Architectures can then use watchdog_nmi_reconfigure() which gets > called every time something changes (sysctls, suspend/resume, etc) and > adjust according to relevant parameters (watchdog_enabled, > watchdog_thresh, watchdog_cpumask, watchdog_suspended). > > I have something for powerpc but I want to make sure this generic code > change is reasonable and could work for other archs. > > Thanks, > Nick > > diff --git a/include/linux/nmi.h b/include/linux/nmi.h > index aa3cd0878270..5c2265f84fa3 100644 > --- a/include/linux/nmi.h > +++ b/include/linux/nmi.h > @@ -8,16 +8,25 @@ > #include <asm/irq.h> > > #ifdef CONFIG_LOCKUP_DETECTOR > +void lockup_detector_init(void); > +#else > +static inline void lockup_detector_init(void) > +{ > +} > +#endif > + > +#ifdef CONFIG_SOFTLOCKUP_DETECTOR > +extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, > + void __user *buffer, > + size_t *lenp, loff_t *ppos); > + > extern void touch_softlockup_watchdog_sched(void); > extern void touch_softlockup_watchdog(void); > extern void touch_softlockup_watchdog_sync(void); > extern void touch_all_softlockup_watchdogs(void); > -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, > - void __user *buffer, > - size_t *lenp, loff_t *ppos); > extern unsigned int softlockup_panic; > -extern unsigned int hardlockup_panic; > -void lockup_detector_init(void); > +extern int soft_watchdog_enabled; > +extern atomic_t watchdog_park_in_progress; > #else > static inline void touch_softlockup_watchdog_sched(void) > { > @@ -31,9 +40,6 @@ static inline void touch_softlockup_watchdog_sync(void) > static inline void touch_all_softlockup_watchdogs(void) > { > } > -static inline void lockup_detector_init(void) > -{ > -} > #endif > > #ifdef CONFIG_DETECT_HUNG_TASK > @@ -70,17 +76,14 @@ static inline void reset_hung_task_detector(void) > */ > #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) > #include <asm/nmi.h> > +extern unsigned int hardlockup_panic; > extern void touch_nmi_watchdog(void); > +extern void hardlockup_detector_disable(void); > #else > static inline void touch_nmi_watchdog(void) > { > touch_softlockup_watchdog(); > } > -#endif > - > -#if defined(CONFIG_HARDLOCKUP_DETECTOR) > -extern void hardlockup_detector_disable(void); > -#else > static inline void hardlockup_detector_disable(void) {} > #endif > > @@ -139,15 +142,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu) > } > #endif > > -#ifdef CONFIG_LOCKUP_DETECTOR > +#ifdef CONFIG_HARDLOCKUP_DETECTOR > u64 hw_nmi_get_sample_period(int watchdog_thresh); > +#endif > + > +#ifdef CONFIG_LOCKUP_DETECTOR > extern int nmi_watchdog_enabled; > -extern int soft_watchdog_enabled; > extern int watchdog_user_enabled; > extern int watchdog_thresh; > extern unsigned long watchdog_enabled; > +extern struct cpumask watchdog_cpumask; > extern unsigned long *watchdog_cpumask_bits; > -extern atomic_t watchdog_park_in_progress; > +extern int __read_mostly watchdog_suspended; > #ifdef CONFIG_SMP > extern int sysctl_softlockup_all_cpu_backtrace; > extern int sysctl_hardlockup_all_cpu_backtrace; > diff --git a/kernel/sysctl.c b/kernel/sysctl.c > index 4dfba1a76cc3..bb747d8f8521 100644 > --- a/kernel/sysctl.c > +++ b/kernel/sysctl.c > @@ -880,6 +880,14 @@ static struct ctl_table kern_table[] = { > #endif > }, > { > + .procname = "watchdog_cpumask", > + .data = &watchdog_cpumask_bits, > + .maxlen = NR_CPUS, > + .mode = 0644, > + .proc_handler = proc_watchdog_cpumask, > + }, > +#ifdef CONFIG_SOFTLOCKUP_DETECTOR > + { > .procname = "soft_watchdog", > .data = &soft_watchdog_enabled, > .maxlen = sizeof (int), > @@ -889,13 +897,6 @@ static struct ctl_table kern_table[] = { > .extra2 = &one, > }, > { > - .procname = "watchdog_cpumask", > - .data = &watchdog_cpumask_bits, > - .maxlen = NR_CPUS, > - .mode = 0644, > - .proc_handler = proc_watchdog_cpumask, > - }, > - { > .procname = "softlockup_panic", > .data = &softlockup_panic, > .maxlen = sizeof(int), > @@ -904,7 +905,8 @@ static struct ctl_table kern_table[] = { > .extra1 = &zero, > .extra2 = &one, > }, > -#ifdef CONFIG_HARDLOCKUP_DETECTOR > +#endif > +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) > { > .procname = "hardlockup_panic", > .data = &hardlockup_panic, > diff --git a/kernel/watchdog.c b/kernel/watchdog.c > index 03e0b69bb5bf..192125f3f8aa 100644 > --- a/kernel/watchdog.c > +++ b/kernel/watchdog.c > @@ -29,15 +29,54 @@ > #include <linux/kvm_para.h> > #include <linux/kthread.h> > > +/* Watchdog configuration */ > static DEFINE_MUTEX(watchdog_proc_mutex); > > #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) > unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; > +int __read_mostly nmi_watchdog_enabled; > + > +/* boot commands */ > +/* > + * Should we panic when a soft-lockup or hard-lockup occurs: > + */ > +unsigned int __read_mostly hardlockup_panic = > + CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; > +/* > + * We may not want to enable hard lockup detection by default in all cases, > + * for example when running the kernel as a guest on a hypervisor. In these > + * cases this function can be called to disable hard lockup detection. This > + * function should only be executed once by the boot processor before the > + * kernel command line parameters are parsed, because otherwise it is not > + * possible to override this in hardlockup_panic_setup(). > + */ > +void hardlockup_detector_disable(void) > +{ > + watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; > +} > + > +static int __init hardlockup_panic_setup(char *str) > +{ > + if (!strncmp(str, "panic", 5)) > + hardlockup_panic = 1; > + else if (!strncmp(str, "nopanic", 7)) > + hardlockup_panic = 0; > + else if (!strncmp(str, "0", 1)) > + watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; > + else if (!strncmp(str, "1", 1)) > + watchdog_enabled |= NMI_WATCHDOG_ENABLED; > + return 1; > +} > +__setup("nmi_watchdog=", hardlockup_panic_setup); > + > #else > unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; > #endif > -int __read_mostly nmi_watchdog_enabled; > + > +#ifdef CONFIG_SOFTLOCKUP_DETECTOR > int __read_mostly soft_watchdog_enabled; > +#endif > + > int __read_mostly watchdog_user_enabled; > int __read_mostly watchdog_thresh = 10; > > @@ -45,15 +84,9 @@ int __read_mostly watchdog_thresh = 10; > int __read_mostly sysctl_softlockup_all_cpu_backtrace; > int __read_mostly sysctl_hardlockup_all_cpu_backtrace; > #endif > -static struct cpumask watchdog_cpumask __read_mostly; > +struct cpumask watchdog_cpumask __read_mostly; > unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); > > -/* Helper for online, unparked cpus. */ > -#define for_each_watchdog_cpu(cpu) \ > - for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) > - > -atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); > - > /* > * The 'watchdog_running' variable is set to 1 when the watchdog threads > * are registered/started and is set to 0 when the watchdog threads are > @@ -72,7 +105,32 @@ static int __read_mostly watchdog_running; > * of 'watchdog_running' cannot change while the watchdog is deactivated > * temporarily (see related code in 'proc' handlers). > */ > -static int __read_mostly watchdog_suspended; > +int __read_mostly watchdog_suspended; > + > +/* > + * These functions can be overridden if an architecture implements its > + * own hardlockup detector. > + */ > +int __weak watchdog_nmi_enable(unsigned int cpu) > +{ > + return 0; > +} > +void __weak watchdog_nmi_disable(unsigned int cpu) > +{ > +} > + > +void __weak watchdog_nmi_reconfigure(void) > +{ > +} > + > + > +#ifdef CONFIG_SOFTLOCKUP_DETECTOR > + > +/* Helper for online, unparked cpus. */ > +#define for_each_watchdog_cpu(cpu) \ > + for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) > + > +atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); > > static u64 __read_mostly sample_period; > > @@ -120,6 +178,7 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str) > return 1; > } > __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); > +#ifdef CONFIG_HARDLOCKUP_DETECTOR > static int __init hardlockup_all_cpu_backtrace_setup(char *str) > { > sysctl_hardlockup_all_cpu_backtrace = > @@ -128,6 +187,7 @@ static int __init hardlockup_all_cpu_backtrace_setup(char *str) > } > __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); > #endif > +#endif > > /* > * Hard-lockup warnings should be triggered after just a few seconds. Soft- > @@ -213,18 +273,6 @@ void touch_softlockup_watchdog_sync(void) > __this_cpu_write(watchdog_touch_ts, 0); > } > > -/* watchdog detector functions */ > -bool is_hardlockup(void) > -{ > - unsigned long hrint = __this_cpu_read(hrtimer_interrupts); > - > - if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) > - return true; > - > - __this_cpu_write(hrtimer_interrupts_saved, hrint); > - return false; > -} > - > static int is_softlockup(unsigned long touch_ts) > { > unsigned long now = get_timestamp(); > @@ -237,21 +285,21 @@ static int is_softlockup(unsigned long touch_ts) > return 0; > } > > -static void watchdog_interrupt_count(void) > +/* watchdog detector functions */ > +bool is_hardlockup(void) > { > - __this_cpu_inc(hrtimer_interrupts); > -} > + unsigned long hrint = __this_cpu_read(hrtimer_interrupts); > > -/* > - * These two functions are mostly architecture specific > - * defining them as weak here. > - */ > -int __weak watchdog_nmi_enable(unsigned int cpu) > -{ > - return 0; > + if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) > + return true; > + > + __this_cpu_write(hrtimer_interrupts_saved, hrint); > + return false; > } > -void __weak watchdog_nmi_disable(unsigned int cpu) > + > +static void watchdog_interrupt_count(void) > { > + __this_cpu_inc(hrtimer_interrupts); > } > > static int watchdog_enable_all_cpus(void); > @@ -502,57 +550,6 @@ static void watchdog_unpark_threads(void) > kthread_unpark(per_cpu(softlockup_watchdog, cpu)); > } > > -/* > - * Suspend the hard and soft lockup detector by parking the watchdog threads. > - */ > -int lockup_detector_suspend(void) > -{ > - int ret = 0; > - > - get_online_cpus(); > - mutex_lock(&watchdog_proc_mutex); > - /* > - * Multiple suspend requests can be active in parallel (counted by > - * the 'watchdog_suspended' variable). If the watchdog threads are > - * running, the first caller takes care that they will be parked. > - * The state of 'watchdog_running' cannot change while a suspend > - * request is active (see related code in 'proc' handlers). > - */ > - if (watchdog_running && !watchdog_suspended) > - ret = watchdog_park_threads(); > - > - if (ret == 0) > - watchdog_suspended++; > - else { > - watchdog_disable_all_cpus(); > - pr_err("Failed to suspend lockup detectors, disabled\n"); > - watchdog_enabled = 0; > - } > - > - mutex_unlock(&watchdog_proc_mutex); > - > - return ret; > -} > - > -/* > - * Resume the hard and soft lockup detector by unparking the watchdog threads. > - */ > -void lockup_detector_resume(void) > -{ > - mutex_lock(&watchdog_proc_mutex); > - > - watchdog_suspended--; > - /* > - * The watchdog threads are unparked if they were previously running > - * and if there is no more active suspend request. > - */ > - if (watchdog_running && !watchdog_suspended) > - watchdog_unpark_threads(); > - > - mutex_unlock(&watchdog_proc_mutex); > - put_online_cpus(); > -} > - > static int update_watchdog_all_cpus(void) > { > int ret; > @@ -604,6 +601,96 @@ static void watchdog_disable_all_cpus(void) > } > } > > +static int watchdog_update_cpus(void) > +{ > + return smpboot_update_cpumask_percpu_thread( > + &watchdog_threads, &watchdog_cpumask); > +} > + > +#else /* SOFTLOCKUP */ > +static int watchdog_park_threads(void) > +{ > + return 0; > +} > + > +static void watchdog_unpark_threads(void) > +{ > +} > + > +static int watchdog_enable_all_cpus(void) > +{ > + return 0; > +} > + > +static void watchdog_disable_all_cpus(void) > +{ > +} > + > +static int watchdog_update_cpus(void) > +{ > + return 0; > +} > + > +static void set_sample_period(void) > +{ > +} > +#endif /* SOFTLOCKUP */ > + > +/* > + * Suspend the hard and soft lockup detector by parking the watchdog threads. > + */ > +int lockup_detector_suspend(void) > +{ > + int ret = 0; > + > + get_online_cpus(); > + mutex_lock(&watchdog_proc_mutex); > + /* > + * Multiple suspend requests can be active in parallel (counted by > + * the 'watchdog_suspended' variable). If the watchdog threads are > + * running, the first caller takes care that they will be parked. > + * The state of 'watchdog_running' cannot change while a suspend > + * request is active (see related code in 'proc' handlers). > + */ > + if (watchdog_running && !watchdog_suspended) > + ret = watchdog_park_threads(); > + > + if (ret == 0) > + watchdog_suspended++; > + else { > + watchdog_disable_all_cpus(); > + pr_err("Failed to suspend lockup detectors, disabled\n"); > + watchdog_enabled = 0; > + } > + > + watchdog_nmi_reconfigure(); > + > + mutex_unlock(&watchdog_proc_mutex); > + > + return ret; > +} > + > +/* > + * Resume the hard and soft lockup detector by unparking the watchdog threads. > + */ > +void lockup_detector_resume(void) > +{ > + mutex_lock(&watchdog_proc_mutex); > + > + watchdog_suspended--; > + /* > + * The watchdog threads are unparked if they were previously running > + * and if there is no more active suspend request. > + */ > + if (watchdog_running && !watchdog_suspended) > + watchdog_unpark_threads(); > + > + watchdog_nmi_reconfigure(); > + > + mutex_unlock(&watchdog_proc_mutex); > + put_online_cpus(); > +} > + > #ifdef CONFIG_SYSCTL > > /* > @@ -625,6 +712,8 @@ static int proc_watchdog_update(void) > else > watchdog_disable_all_cpus(); > > + watchdog_nmi_reconfigure(); > + > return err; > > } > @@ -810,10 +899,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, > * a temporary cpumask, so we are likely not in a > * position to do much else to make things better. > */ > - if (smpboot_update_cpumask_percpu_thread( > - &watchdog_threads, &watchdog_cpumask) != 0) > + if (watchdog_update_cpus() != 0) > pr_err("cpumask update failed\n"); > } > + > + watchdog_nmi_reconfigure(); > } > out: > mutex_unlock(&watchdog_proc_mutex); > diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c > index 54a427d1f344..2b328d6c0bc8 100644 > --- a/kernel/watchdog_hld.c > +++ b/kernel/watchdog_hld.c > @@ -22,39 +22,7 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn); > static DEFINE_PER_CPU(bool, watchdog_nmi_touch); > static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); > > -/* boot commands */ > -/* > - * Should we panic when a soft-lockup or hard-lockup occurs: > - */ > -unsigned int __read_mostly hardlockup_panic = > - CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; > static unsigned long hardlockup_allcpu_dumped; > -/* > - * We may not want to enable hard lockup detection by default in all cases, > - * for example when running the kernel as a guest on a hypervisor. In these > - * cases this function can be called to disable hard lockup detection. This > - * function should only be executed once by the boot processor before the > - * kernel command line parameters are parsed, because otherwise it is not > - * possible to override this in hardlockup_panic_setup(). > - */ > -void hardlockup_detector_disable(void) > -{ > - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; > -} > - > -static int __init hardlockup_panic_setup(char *str) > -{ > - if (!strncmp(str, "panic", 5)) > - hardlockup_panic = 1; > - else if (!strncmp(str, "nopanic", 7)) > - hardlockup_panic = 0; > - else if (!strncmp(str, "0", 1)) > - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; > - else if (!strncmp(str, "1", 1)) > - watchdog_enabled |= NMI_WATCHDOG_ENABLED; > - return 1; > -} > -__setup("nmi_watchdog=", hardlockup_panic_setup); > > void touch_nmi_watchdog(void) > { > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > index e4587ebe52c7..68685d3ddac0 100644 > --- a/lib/Kconfig.debug > +++ b/lib/Kconfig.debug > @@ -801,14 +801,18 @@ config LOCKUP_DETECTOR > The frequency of hrtimer and NMI events and the soft and hard lockup > thresholds can be controlled through the sysctl watchdog_thresh. > > +config SOFTLOCKUP_DETECTOR > + bool "Detect Soft Lockups" > + depends on LOCKUP_DETECTOR > + > config HARDLOCKUP_DETECTOR > - def_bool y > - depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG > + bool "Detect Hard Lockups" > + depends on SOFTLOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG > depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI > > config BOOTPARAM_HARDLOCKUP_PANIC > bool "Panic (Reboot) On Hard Lockups" > - depends on HARDLOCKUP_DETECTOR > + depends on HARDLOCKUP_DETECTOR || HAVE_NMI_WATCHDOG > help > Say Y here to enable the kernel to panic on "hard lockups", > which are bugs that cause the kernel to loop in kernel > @@ -819,14 +823,14 @@ config BOOTPARAM_HARDLOCKUP_PANIC > > config BOOTPARAM_HARDLOCKUP_PANIC_VALUE > int > - depends on HARDLOCKUP_DETECTOR > + depends on HARDLOCKUP_DETECTOR || HAVE_NMI_WATCHDOG > range 0 1 > default 0 if !BOOTPARAM_HARDLOCKUP_PANIC > default 1 if BOOTPARAM_HARDLOCKUP_PANIC > > config BOOTPARAM_SOFTLOCKUP_PANIC > bool "Panic (Reboot) On Soft Lockups" > - depends on LOCKUP_DETECTOR > + depends on SOFTLOCKUP_DETECTOR > help > Say Y here to enable the kernel to panic on "soft lockups", > which are bugs that cause the kernel to loop in kernel > @@ -843,7 +847,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC > > config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE > int > - depends on LOCKUP_DETECTOR > + depends on SOFTLOCKUP_DETECTOR > range 0 1 > default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC > default 1 if BOOTPARAM_SOFTLOCKUP_PANIC > @@ -851,7 +855,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE > config DETECT_HUNG_TASK > bool "Detect Hung Tasks" > depends on DEBUG_KERNEL > - default LOCKUP_DETECTOR > + default SOFTLOCKUP_DETECTOR > help > Say Y here to enable the kernel to detect "hung tasks", > which are bugs that cause the task to be stuck in > -- > 2.11.0 > -- To unsubscribe from this list: send the line "unsubscribe sparclinux" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 18 May 2017 12:30:28 -0400 Don Zickus <dzickus@redhat.com> wrote: > (adding Uli) > > On Fri, May 19, 2017 at 01:50:26AM +1000, Nicholas Piggin wrote: > > I'd like to make it easier for architectures that have their own NMI / > > hard lockup detector to reuse various configuration interfaces that are > > provided by generic detectors (cmdline, sysctl, suspend/resume calls). > > > > I'd also like to remove the dependency of arch hard lockup detectors > > on the softlockup detector. The reason being these watchdogs can be > > very small (sparc's is like a page of core code that does not use any > > big subsystem like kthreads or timers). > > > > So I do this by adding a separate CONFIG_SOFTLOCKUP_DETECTOR, and > > juggling around what goes under config options. HAVE_NMI_WATCHDOG > > continues to be the config for arch to override the hard lockup > > detector, which is expanded to cover a few more cases. > > Basically you are trying to remove the heavy HARDLOCKUP pieces to minimize > the SOFTLOCKUP piece and use your own NMI detector, right? > > I am guessing you would then disable SOFTLOCKUP to remove all the kthread > and timer stuff but continue to use the generic infrastructure to help > manager your own NMI detector? Yes that's right. > A lot of the code is just re-organizing things and adding an explicit > ifdef on SOFTLOCKUP, which seems fine to me. > > I just need to spend some time on some of your #else clauses to see what > functionality is dropped when you use your approach. Okay, appreciated. I can trim down cc lists and send you my powerpc WIP if you'd like to have a look. Thanks, Nick -- To unsubscribe from this list: send the line "unsubscribe sparclinux" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 19, 2017 at 09:07:31AM +1000, Nicholas Piggin wrote: > On Thu, 18 May 2017 12:30:28 -0400 > Don Zickus <dzickus@redhat.com> wrote: > > > (adding Uli) > > > > On Fri, May 19, 2017 at 01:50:26AM +1000, Nicholas Piggin wrote: > > > I'd like to make it easier for architectures that have their own NMI / > > > hard lockup detector to reuse various configuration interfaces that are > > > provided by generic detectors (cmdline, sysctl, suspend/resume calls). > > > > > > I'd also like to remove the dependency of arch hard lockup detectors > > > on the softlockup detector. The reason being these watchdogs can be > > > very small (sparc's is like a page of core code that does not use any > > > big subsystem like kthreads or timers). > > > > > > So I do this by adding a separate CONFIG_SOFTLOCKUP_DETECTOR, and > > > juggling around what goes under config options. HAVE_NMI_WATCHDOG > > > continues to be the config for arch to override the hard lockup > > > detector, which is expanded to cover a few more cases. > > > > Basically you are trying to remove the heavy HARDLOCKUP pieces to minimize > > the SOFTLOCKUP piece and use your own NMI detector, right? > > > > I am guessing you would then disable SOFTLOCKUP to remove all the kthread > > and timer stuff but continue to use the generic infrastructure to help > > manager your own NMI detector? > > Yes that's right. > > > A lot of the code is just re-organizing things and adding an explicit > > ifdef on SOFTLOCKUP, which seems fine to me. > > > > I just need to spend some time on some of your #else clauses to see what > > functionality is dropped when you use your approach. > > Okay, appreciated. I can trim down cc lists and send you my powerpc > WIP if you'd like to have a look. I am curious to know what IBM thinks there. Currently the HARDLOCKUP detector sits on top of perf. I get the impression, you are removing that dependency. Is that a permanent thing or are you thinking of switching back and forth depending on if SOFTLOCKUP is enabled or not? Cheers, Don -- To unsubscribe from this list: send the line "unsubscribe sparclinux" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 19 May 2017 09:17:53 -0400 Don Zickus <dzickus@redhat.com> wrote: > On Fri, May 19, 2017 at 09:07:31AM +1000, Nicholas Piggin wrote: > > On Thu, 18 May 2017 12:30:28 -0400 > > Don Zickus <dzickus@redhat.com> wrote: > > > > > (adding Uli) > > > > > > On Fri, May 19, 2017 at 01:50:26AM +1000, Nicholas Piggin wrote: > > > > I'd like to make it easier for architectures that have their own NMI / > > > > hard lockup detector to reuse various configuration interfaces that are > > > > provided by generic detectors (cmdline, sysctl, suspend/resume calls). > > > > > > > > I'd also like to remove the dependency of arch hard lockup detectors > > > > on the softlockup detector. The reason being these watchdogs can be > > > > very small (sparc's is like a page of core code that does not use any > > > > big subsystem like kthreads or timers). > > > > > > > > So I do this by adding a separate CONFIG_SOFTLOCKUP_DETECTOR, and > > > > juggling around what goes under config options. HAVE_NMI_WATCHDOG > > > > continues to be the config for arch to override the hard lockup > > > > detector, which is expanded to cover a few more cases. > > > > > > Basically you are trying to remove the heavy HARDLOCKUP pieces to minimize > > > the SOFTLOCKUP piece and use your own NMI detector, right? > > > > > > I am guessing you would then disable SOFTLOCKUP to remove all the kthread > > > and timer stuff but continue to use the generic infrastructure to help > > > manager your own NMI detector? > > > > Yes that's right. > > > > > A lot of the code is just re-organizing things and adding an explicit > > > ifdef on SOFTLOCKUP, which seems fine to me. > > > > > > I just need to spend some time on some of your #else clauses to see what > > > functionality is dropped when you use your approach. > > > > Okay, appreciated. I can trim down cc lists and send you my powerpc > > WIP if you'd like to have a look. > > I am curious to know what IBM thinks there. Currently the HARDLOCKUP > detector sits on top of perf. I get the impression, you are removing that > dependency. Is that a permanent thing or are you thinking of switching back > and forth depending on if SOFTLOCKUP is enabled or not? We want to get away from perf permanently. The PMU interrupts are not specially non-maskable from a hardware POV, everything gets masked when you turn off interrupts in hardware. powerpc arch code implements a software disable layer, and PMU interrupts are differentiated there by being allowed to run even under local_irq_disable(); We have a few issues with using perf for it. We disable it by default because using it for that breaks another PMU feature. But PMU interupts are not special, so it would be possible to e.g., take the timer interrupt before soft disable and have it touch the watchdog if it fires while under local_irq_disable(). That give exact same kind of pseudo-NMI as perf interrupts, without using PMU. Further, we now want to introduce a local_pmu_disable() type of interface that extends this soft disable layer to perf interrupts as well for some cases. Once we start doing that, more code will be exempt from the hardlockup watchdog, whereas a watchdog specific hook from the timer interrupt would still cover it. Thanks, Nick -- To unsubscribe from this list: send the line "unsubscribe sparclinux" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, May 20, 2017 at 12:53:06AM +1000, Nicholas Piggin wrote: > > I am curious to know what IBM thinks there. Currently the HARDLOCKUP > > detector sits on top of perf. I get the impression, you are removing that > > dependency. Is that a permanent thing or are you thinking of switching back > > and forth depending on if SOFTLOCKUP is enabled or not? > > We want to get away from perf permanently. > > The PMU interrupts are not specially non-maskable from a hardware > POV, everything gets masked when you turn off interrupts in hardware. > powerpc arch code implements a software disable layer, and PMU > interrupts are differentiated there by being allowed to run even > under local_irq_disable(); > > We have a few issues with using perf for it. We disable it by > default because using it for that breaks another PMU feature. > > But PMU interupts are not special, so it would be possible to e.g., > take the timer interrupt before soft disable and have it touch the > watchdog if it fires while under local_irq_disable(). That give > exact same kind of pseudo-NMI as perf interrupts, without using PMU. > > Further, we now want to introduce a local_pmu_disable() type of > interface that extends this soft disable layer to perf interrupts > as well for some cases. Once we start doing that, more code will > be exempt from the hardlockup watchdog, whereas a watchdog specific > hook from the timer interrupt would still cover it. Interesting. Thanks for that info. Do you think you can split the patch in half for me? You are shuffling code around and making subtle changes in the shuffled code. It is hard to double check everything. Namely watchdog_nmi_reconfigure and watchdog_update_cpus suddenly appear. The first patch would be straight code movement, the second the real changes. I almost don't care if you throw in the LOCKUP->SOFTLOCKUP changes in the first patch either. I am really interested in the subtle changes to make sure you covered the various race conditions that we have uncovered over the years. I also would like to see a sample of the watchdog_nmi_reconfigure() you had in mind. Usually we hide all the nmi stuff underneath the watchdog functions. But those are threaded, which is what you are trying to avoid, so the placement of the function makes sense but looks odd. Thanks! Cheers, Don -- To unsubscribe from this list: send the line "unsubscribe sparclinux" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 19 May 2017 15:43:45 -0400 Don Zickus <dzickus@redhat.com> wrote: > On Sat, May 20, 2017 at 12:53:06AM +1000, Nicholas Piggin wrote: > > > I am curious to know what IBM thinks there. Currently the HARDLOCKUP > > > detector sits on top of perf. I get the impression, you are removing that > > > dependency. Is that a permanent thing or are you thinking of switching back > > > and forth depending on if SOFTLOCKUP is enabled or not? > > > > We want to get away from perf permanently. > > > > The PMU interrupts are not specially non-maskable from a hardware > > POV, everything gets masked when you turn off interrupts in hardware. > > powerpc arch code implements a software disable layer, and PMU > > interrupts are differentiated there by being allowed to run even > > under local_irq_disable(); > > > > We have a few issues with using perf for it. We disable it by > > default because using it for that breaks another PMU feature. > > > > But PMU interupts are not special, so it would be possible to e.g., > > take the timer interrupt before soft disable and have it touch the > > watchdog if it fires while under local_irq_disable(). That give > > exact same kind of pseudo-NMI as perf interrupts, without using PMU. > > > > Further, we now want to introduce a local_pmu_disable() type of > > interface that extends this soft disable layer to perf interrupts > > as well for some cases. Once we start doing that, more code will > > be exempt from the hardlockup watchdog, whereas a watchdog specific > > hook from the timer interrupt would still cover it. > > Interesting. Thanks for that info. > > Do you think you can split the patch in half for me? You are shuffling code > around and making subtle changes in the shuffled code. It is hard to double > check everything. Namely watchdog_nmi_reconfigure and watchdog_update_cpus > suddenly appear. > > The first patch would be straight code movement, the second the real > changes. I almost don't care if you throw in the LOCKUP->SOFTLOCKUP changes > in the first patch either. > > I am really interested in the subtle changes to make sure you covered the > various race conditions that we have uncovered over the years. Yes that makes sense, I'll do that. > I also would like to see a sample of the watchdog_nmi_reconfigure() you had > in mind. Usually we hide all the nmi stuff underneath the watchdog > functions. But those are threaded, which is what you are trying to avoid, > so the placement of the function makes sense but looks odd. I'm just calling it after any sysctl changes or suspend/resume etc, and the lockup detector I have basically just shuts down everything and then re-starts with the new parameters (could be made much fancier but I didn't see a need. I will split out this patch and re-post to you with the powerpc patch in the series that you can take a look at. Thanks, Nick -- To unsubscribe from this list: send the line "unsubscribe sparclinux" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/linux/nmi.h b/include/linux/nmi.h index aa3cd0878270..5c2265f84fa3 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -8,16 +8,25 @@ #include <asm/irq.h> #ifdef CONFIG_LOCKUP_DETECTOR +void lockup_detector_init(void); +#else +static inline void lockup_detector_init(void) +{ +} +#endif + +#ifdef CONFIG_SOFTLOCKUP_DETECTOR +extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); + extern void touch_softlockup_watchdog_sched(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; -extern unsigned int hardlockup_panic; -void lockup_detector_init(void); +extern int soft_watchdog_enabled; +extern atomic_t watchdog_park_in_progress; #else static inline void touch_softlockup_watchdog_sched(void) { @@ -31,9 +40,6 @@ static inline void touch_softlockup_watchdog_sync(void) static inline void touch_all_softlockup_watchdogs(void) { } -static inline void lockup_detector_init(void) -{ -} #endif #ifdef CONFIG_DETECT_HUNG_TASK @@ -70,17 +76,14 @@ static inline void reset_hung_task_detector(void) */ #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) #include <asm/nmi.h> +extern unsigned int hardlockup_panic; extern void touch_nmi_watchdog(void); +extern void hardlockup_detector_disable(void); #else static inline void touch_nmi_watchdog(void) { touch_softlockup_watchdog(); } -#endif - -#if defined(CONFIG_HARDLOCKUP_DETECTOR) -extern void hardlockup_detector_disable(void); -#else static inline void hardlockup_detector_disable(void) {} #endif @@ -139,15 +142,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu) } #endif -#ifdef CONFIG_LOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR u64 hw_nmi_get_sample_period(int watchdog_thresh); +#endif + +#ifdef CONFIG_LOCKUP_DETECTOR extern int nmi_watchdog_enabled; -extern int soft_watchdog_enabled; extern int watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; +extern struct cpumask watchdog_cpumask; extern unsigned long *watchdog_cpumask_bits; -extern atomic_t watchdog_park_in_progress; +extern int __read_mostly watchdog_suspended; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4dfba1a76cc3..bb747d8f8521 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -880,6 +880,14 @@ static struct ctl_table kern_table[] = { #endif }, { + .procname = "watchdog_cpumask", + .data = &watchdog_cpumask_bits, + .maxlen = NR_CPUS, + .mode = 0644, + .proc_handler = proc_watchdog_cpumask, + }, +#ifdef CONFIG_SOFTLOCKUP_DETECTOR + { .procname = "soft_watchdog", .data = &soft_watchdog_enabled, .maxlen = sizeof (int), @@ -889,13 +897,6 @@ static struct ctl_table kern_table[] = { .extra2 = &one, }, { - .procname = "watchdog_cpumask", - .data = &watchdog_cpumask_bits, - .maxlen = NR_CPUS, - .mode = 0644, - .proc_handler = proc_watchdog_cpumask, - }, - { .procname = "softlockup_panic", .data = &softlockup_panic, .maxlen = sizeof(int), @@ -904,7 +905,8 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#endif +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) { .procname = "hardlockup_panic", .data = &hardlockup_panic, diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 03e0b69bb5bf..192125f3f8aa 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -29,15 +29,54 @@ #include <linux/kvm_para.h> #include <linux/kthread.h> +/* Watchdog configuration */ static DEFINE_MUTEX(watchdog_proc_mutex); #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; +int __read_mostly nmi_watchdog_enabled; + +/* boot commands */ +/* + * Should we panic when a soft-lockup or hard-lockup occurs: + */ +unsigned int __read_mostly hardlockup_panic = + CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; +/* + * We may not want to enable hard lockup detection by default in all cases, + * for example when running the kernel as a guest on a hypervisor. In these + * cases this function can be called to disable hard lockup detection. This + * function should only be executed once by the boot processor before the + * kernel command line parameters are parsed, because otherwise it is not + * possible to override this in hardlockup_panic_setup(). + */ +void hardlockup_detector_disable(void) +{ + watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; +} + +static int __init hardlockup_panic_setup(char *str) +{ + if (!strncmp(str, "panic", 5)) + hardlockup_panic = 1; + else if (!strncmp(str, "nopanic", 7)) + hardlockup_panic = 0; + else if (!strncmp(str, "0", 1)) + watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; + else if (!strncmp(str, "1", 1)) + watchdog_enabled |= NMI_WATCHDOG_ENABLED; + return 1; +} +__setup("nmi_watchdog=", hardlockup_panic_setup); + #else unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; #endif -int __read_mostly nmi_watchdog_enabled; + +#ifdef CONFIG_SOFTLOCKUP_DETECTOR int __read_mostly soft_watchdog_enabled; +#endif + int __read_mostly watchdog_user_enabled; int __read_mostly watchdog_thresh = 10; @@ -45,15 +84,9 @@ int __read_mostly watchdog_thresh = 10; int __read_mostly sysctl_softlockup_all_cpu_backtrace; int __read_mostly sysctl_hardlockup_all_cpu_backtrace; #endif -static struct cpumask watchdog_cpumask __read_mostly; +struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); -/* Helper for online, unparked cpus. */ -#define for_each_watchdog_cpu(cpu) \ - for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) - -atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); - /* * The 'watchdog_running' variable is set to 1 when the watchdog threads * are registered/started and is set to 0 when the watchdog threads are @@ -72,7 +105,32 @@ static int __read_mostly watchdog_running; * of 'watchdog_running' cannot change while the watchdog is deactivated * temporarily (see related code in 'proc' handlers). */ -static int __read_mostly watchdog_suspended; +int __read_mostly watchdog_suspended; + +/* + * These functions can be overridden if an architecture implements its + * own hardlockup detector. + */ +int __weak watchdog_nmi_enable(unsigned int cpu) +{ + return 0; +} +void __weak watchdog_nmi_disable(unsigned int cpu) +{ +} + +void __weak watchdog_nmi_reconfigure(void) +{ +} + + +#ifdef CONFIG_SOFTLOCKUP_DETECTOR + +/* Helper for online, unparked cpus. */ +#define for_each_watchdog_cpu(cpu) \ + for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) + +atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); static u64 __read_mostly sample_period; @@ -120,6 +178,7 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str) return 1; } __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); +#ifdef CONFIG_HARDLOCKUP_DETECTOR static int __init hardlockup_all_cpu_backtrace_setup(char *str) { sysctl_hardlockup_all_cpu_backtrace = @@ -128,6 +187,7 @@ static int __init hardlockup_all_cpu_backtrace_setup(char *str) } __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); #endif +#endif /* * Hard-lockup warnings should be triggered after just a few seconds. Soft- @@ -213,18 +273,6 @@ void touch_softlockup_watchdog_sync(void) __this_cpu_write(watchdog_touch_ts, 0); } -/* watchdog detector functions */ -bool is_hardlockup(void) -{ - unsigned long hrint = __this_cpu_read(hrtimer_interrupts); - - if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) - return true; - - __this_cpu_write(hrtimer_interrupts_saved, hrint); - return false; -} - static int is_softlockup(unsigned long touch_ts) { unsigned long now = get_timestamp(); @@ -237,21 +285,21 @@ static int is_softlockup(unsigned long touch_ts) return 0; } -static void watchdog_interrupt_count(void) +/* watchdog detector functions */ +bool is_hardlockup(void) { - __this_cpu_inc(hrtimer_interrupts); -} + unsigned long hrint = __this_cpu_read(hrtimer_interrupts); -/* - * These two functions are mostly architecture specific - * defining them as weak here. - */ -int __weak watchdog_nmi_enable(unsigned int cpu) -{ - return 0; + if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) + return true; + + __this_cpu_write(hrtimer_interrupts_saved, hrint); + return false; } -void __weak watchdog_nmi_disable(unsigned int cpu) + +static void watchdog_interrupt_count(void) { + __this_cpu_inc(hrtimer_interrupts); } static int watchdog_enable_all_cpus(void); @@ -502,57 +550,6 @@ static void watchdog_unpark_threads(void) kthread_unpark(per_cpu(softlockup_watchdog, cpu)); } -/* - * Suspend the hard and soft lockup detector by parking the watchdog threads. - */ -int lockup_detector_suspend(void) -{ - int ret = 0; - - get_online_cpus(); - mutex_lock(&watchdog_proc_mutex); - /* - * Multiple suspend requests can be active in parallel (counted by - * the 'watchdog_suspended' variable). If the watchdog threads are - * running, the first caller takes care that they will be parked. - * The state of 'watchdog_running' cannot change while a suspend - * request is active (see related code in 'proc' handlers). - */ - if (watchdog_running && !watchdog_suspended) - ret = watchdog_park_threads(); - - if (ret == 0) - watchdog_suspended++; - else { - watchdog_disable_all_cpus(); - pr_err("Failed to suspend lockup detectors, disabled\n"); - watchdog_enabled = 0; - } - - mutex_unlock(&watchdog_proc_mutex); - - return ret; -} - -/* - * Resume the hard and soft lockup detector by unparking the watchdog threads. - */ -void lockup_detector_resume(void) -{ - mutex_lock(&watchdog_proc_mutex); - - watchdog_suspended--; - /* - * The watchdog threads are unparked if they were previously running - * and if there is no more active suspend request. - */ - if (watchdog_running && !watchdog_suspended) - watchdog_unpark_threads(); - - mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); -} - static int update_watchdog_all_cpus(void) { int ret; @@ -604,6 +601,96 @@ static void watchdog_disable_all_cpus(void) } } +static int watchdog_update_cpus(void) +{ + return smpboot_update_cpumask_percpu_thread( + &watchdog_threads, &watchdog_cpumask); +} + +#else /* SOFTLOCKUP */ +static int watchdog_park_threads(void) +{ + return 0; +} + +static void watchdog_unpark_threads(void) +{ +} + +static int watchdog_enable_all_cpus(void) +{ + return 0; +} + +static void watchdog_disable_all_cpus(void) +{ +} + +static int watchdog_update_cpus(void) +{ + return 0; +} + +static void set_sample_period(void) +{ +} +#endif /* SOFTLOCKUP */ + +/* + * Suspend the hard and soft lockup detector by parking the watchdog threads. + */ +int lockup_detector_suspend(void) +{ + int ret = 0; + + get_online_cpus(); + mutex_lock(&watchdog_proc_mutex); + /* + * Multiple suspend requests can be active in parallel (counted by + * the 'watchdog_suspended' variable). If the watchdog threads are + * running, the first caller takes care that they will be parked. + * The state of 'watchdog_running' cannot change while a suspend + * request is active (see related code in 'proc' handlers). + */ + if (watchdog_running && !watchdog_suspended) + ret = watchdog_park_threads(); + + if (ret == 0) + watchdog_suspended++; + else { + watchdog_disable_all_cpus(); + pr_err("Failed to suspend lockup detectors, disabled\n"); + watchdog_enabled = 0; + } + + watchdog_nmi_reconfigure(); + + mutex_unlock(&watchdog_proc_mutex); + + return ret; +} + +/* + * Resume the hard and soft lockup detector by unparking the watchdog threads. + */ +void lockup_detector_resume(void) +{ + mutex_lock(&watchdog_proc_mutex); + + watchdog_suspended--; + /* + * The watchdog threads are unparked if they were previously running + * and if there is no more active suspend request. + */ + if (watchdog_running && !watchdog_suspended) + watchdog_unpark_threads(); + + watchdog_nmi_reconfigure(); + + mutex_unlock(&watchdog_proc_mutex); + put_online_cpus(); +} + #ifdef CONFIG_SYSCTL /* @@ -625,6 +712,8 @@ static int proc_watchdog_update(void) else watchdog_disable_all_cpus(); + watchdog_nmi_reconfigure(); + return err; } @@ -810,10 +899,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, * a temporary cpumask, so we are likely not in a * position to do much else to make things better. */ - if (smpboot_update_cpumask_percpu_thread( - &watchdog_threads, &watchdog_cpumask) != 0) + if (watchdog_update_cpus() != 0) pr_err("cpumask update failed\n"); } + + watchdog_nmi_reconfigure(); } out: mutex_unlock(&watchdog_proc_mutex); diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 54a427d1f344..2b328d6c0bc8 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -22,39 +22,7 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); -/* boot commands */ -/* - * Should we panic when a soft-lockup or hard-lockup occurs: - */ -unsigned int __read_mostly hardlockup_panic = - CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; static unsigned long hardlockup_allcpu_dumped; -/* - * We may not want to enable hard lockup detection by default in all cases, - * for example when running the kernel as a guest on a hypervisor. In these - * cases this function can be called to disable hard lockup detection. This - * function should only be executed once by the boot processor before the - * kernel command line parameters are parsed, because otherwise it is not - * possible to override this in hardlockup_panic_setup(). - */ -void hardlockup_detector_disable(void) -{ - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; -} - -static int __init hardlockup_panic_setup(char *str) -{ - if (!strncmp(str, "panic", 5)) - hardlockup_panic = 1; - else if (!strncmp(str, "nopanic", 7)) - hardlockup_panic = 0; - else if (!strncmp(str, "0", 1)) - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; - else if (!strncmp(str, "1", 1)) - watchdog_enabled |= NMI_WATCHDOG_ENABLED; - return 1; -} -__setup("nmi_watchdog=", hardlockup_panic_setup); void touch_nmi_watchdog(void) { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e4587ebe52c7..68685d3ddac0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -801,14 +801,18 @@ config LOCKUP_DETECTOR The frequency of hrtimer and NMI events and the soft and hard lockup thresholds can be controlled through the sysctl watchdog_thresh. +config SOFTLOCKUP_DETECTOR + bool "Detect Soft Lockups" + depends on LOCKUP_DETECTOR + config HARDLOCKUP_DETECTOR - def_bool y - depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG + bool "Detect Hard Lockups" + depends on SOFTLOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI config BOOTPARAM_HARDLOCKUP_PANIC bool "Panic (Reboot) On Hard Lockups" - depends on HARDLOCKUP_DETECTOR + depends on HARDLOCKUP_DETECTOR || HAVE_NMI_WATCHDOG help Say Y here to enable the kernel to panic on "hard lockups", which are bugs that cause the kernel to loop in kernel @@ -819,14 +823,14 @@ config BOOTPARAM_HARDLOCKUP_PANIC config BOOTPARAM_HARDLOCKUP_PANIC_VALUE int - depends on HARDLOCKUP_DETECTOR + depends on HARDLOCKUP_DETECTOR || HAVE_NMI_WATCHDOG range 0 1 default 0 if !BOOTPARAM_HARDLOCKUP_PANIC default 1 if BOOTPARAM_HARDLOCKUP_PANIC config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" - depends on LOCKUP_DETECTOR + depends on SOFTLOCKUP_DETECTOR help Say Y here to enable the kernel to panic on "soft lockups", which are bugs that cause the kernel to loop in kernel @@ -843,7 +847,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE int - depends on LOCKUP_DETECTOR + depends on SOFTLOCKUP_DETECTOR range 0 1 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC @@ -851,7 +855,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE config DETECT_HUNG_TASK bool "Detect Hung Tasks" depends on DEBUG_KERNEL - default LOCKUP_DETECTOR + default SOFTLOCKUP_DETECTOR help Say Y here to enable the kernel to detect "hung tasks", which are bugs that cause the task to be stuck in