Message ID | 20200509043552.8745-2-mcgrof@kernel.org |
---|---|
State | Changes Requested |
Delegated to: | David Miller |
Headers | show |
Series | net: taint when the device driver firmware crashes | expand |
On Sat, May 09, 2020 at 04:35:38AM +0000, Luis Chamberlain wrote: > Device driver firmware can crash, and sometimes, this can leave your > system in a state which makes the device or subsystem completely > useless. Detecting this by inspecting /proc/sys/kernel/tainted instead > of scraping some magical words from the kernel log, which is driver > specific, is much easier. So instead provide a helper which lets drivers > annotate this. > > Once this happens, scrapers can easily look for modules taint flags > for a firmware crash. This will taint both the kernel and respective > calling module. > > The new helper module_firmware_crashed() uses LOCKDEP_STILL_OK as this > fact should in no way shape or form affect lockdep. This taint is device > driver specific. > > Signed-off-by: Luis Chamberlain <mcgrof@kernel.org> > --- > include/linux/kernel.h | 3 ++- > include/linux/module.h | 13 +++++++++++++ > include/trace/events/module.h | 3 ++- > kernel/module.c | 5 +++-- > kernel/panic.c | 1 + > 5 files changed, 21 insertions(+), 4 deletions(-) > > diff --git a/include/linux/kernel.h b/include/linux/kernel.h > index 04a5885cec1b..19e1541c82c7 100644 > --- a/include/linux/kernel.h > +++ b/include/linux/kernel.h > @@ -601,7 +601,8 @@ extern enum system_states { > #define TAINT_LIVEPATCH 15 > #define TAINT_AUX 16 > #define TAINT_RANDSTRUCT 17 > -#define TAINT_FLAGS_COUNT 18 > +#define TAINT_FIRMWARE_CRASH 18 > +#define TAINT_FLAGS_COUNT 19 > We are still missing the documentation bits for this new flag, though. How about having a blurb similar to: diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst index 71e9184a9079..5c6a9e2478b0 100644 --- a/Documentation/admin-guide/tainted-kernels.rst +++ b/Documentation/admin-guide/tainted-kernels.rst @@ -100,6 +100,7 @@ Bit Log Number Reason that got the kernel tainted 15 _/K 32768 kernel has been live patched 16 _/X 65536 auxiliary taint, defined for and used by distros 17 _/T 131072 kernel was built with the struct randomization plugin + 18 _/Q 262144 driver firmware crash annotation === === ====== ======================================================== Note: The character ``_`` is representing a blank in this table to make reading @@ -162,3 +163,7 @@ More detailed explanation for tainting produce extremely unusual kernel structure layouts (even performance pathological ones), which is important to know when debugging. Set at build time. + + 18) ``Q`` Device drivers might annotate the kernel with this taint, in cases + their firmware might have crashed leaving the driver in a crippled and + potentially useless state. > struct taint_flag { > char c_true; /* character printed when tainted */ > diff --git a/include/linux/module.h b/include/linux/module.h > index 2c2e988bcf10..221200078180 100644 > --- a/include/linux/module.h > +++ b/include/linux/module.h > @@ -697,6 +697,14 @@ static inline bool is_livepatch_module(struct module *mod) > bool is_module_sig_enforced(void); > void set_module_sig_enforced(void); > > +void add_taint_module(struct module *mod, unsigned flag, > + enum lockdep_ok lockdep_ok); > + > +static inline void module_firmware_crashed(void) > +{ > + add_taint_module(THIS_MODULE, TAINT_FIRMWARE_CRASH, LOCKDEP_STILL_OK); > +} > + > #else /* !CONFIG_MODULES... */ > > static inline struct module *__module_address(unsigned long addr) > @@ -844,6 +852,11 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr) > return ptr; > } > > +static inline void module_firmware_crashed(void) > +{ > + add_taint(TAINT_FIRMWARE_CRASH, LOCKDEP_STILL_OK); > +} > + > #endif /* CONFIG_MODULES */ > > #ifdef CONFIG_SYSFS > diff --git a/include/trace/events/module.h b/include/trace/events/module.h > index 097485c73c01..b749ea25affd 100644 > --- a/include/trace/events/module.h > +++ b/include/trace/events/module.h > @@ -26,7 +26,8 @@ struct module; > { (1UL << TAINT_OOT_MODULE), "O" }, \ > { (1UL << TAINT_FORCED_MODULE), "F" }, \ > { (1UL << TAINT_CRAP), "C" }, \ > - { (1UL << TAINT_UNSIGNED_MODULE), "E" }) > + { (1UL << TAINT_UNSIGNED_MODULE), "E" }, \ > + { (1UL << TAINT_FIRMWARE_CRASH), "Q" }) > > TRACE_EVENT(module_load, > > diff --git a/kernel/module.c b/kernel/module.c > index 80faaf2116dd..f98e8c25c6b4 100644 > --- a/kernel/module.c > +++ b/kernel/module.c > @@ -325,12 +325,13 @@ static inline int strong_try_module_get(struct module *mod) > return -ENOENT; > } > > -static inline void add_taint_module(struct module *mod, unsigned flag, > - enum lockdep_ok lockdep_ok) > +void add_taint_module(struct module *mod, unsigned flag, > + enum lockdep_ok lockdep_ok) > { > add_taint(flag, lockdep_ok); > set_bit(flag, &mod->taints); > } > +EXPORT_SYMBOL_GPL(add_taint_module); > > /* > * A thread that wants to hold a reference to a module only while it > diff --git a/kernel/panic.c b/kernel/panic.c > index ec6d7d788ce7..504fb926947e 100644 > --- a/kernel/panic.c > +++ b/kernel/panic.c > @@ -384,6 +384,7 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { > [ TAINT_LIVEPATCH ] = { 'K', ' ', true }, > [ TAINT_AUX ] = { 'X', ' ', true }, > [ TAINT_RANDSTRUCT ] = { 'T', ' ', true }, > + [ TAINT_FIRMWARE_CRASH ] = { 'Q', ' ', true }, > }; > > /** > -- > 2.25.1 >
On Sat, May 09, 2020 at 11:18:29AM -0400, Rafael Aquini wrote: > We are still missing the documentation bits for this > new flag, though. Ah yeah sorry about that. > How about having a blurb similar to: > > diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst > index 71e9184a9079..5c6a9e2478b0 100644 > --- a/Documentation/admin-guide/tainted-kernels.rst > +++ b/Documentation/admin-guide/tainted-kernels.rst > @@ -100,6 +100,7 @@ Bit Log Number Reason that got the kernel tainted > 15 _/K 32768 kernel has been live patched > 16 _/X 65536 auxiliary taint, defined for and used by distros > 17 _/T 131072 kernel was built with the struct randomization plugin > + 18 _/Q 262144 driver firmware crash annotation > === === ====== ======================================================== > > Note: The character ``_`` is representing a blank in this table to make reading > @@ -162,3 +163,7 @@ More detailed explanation for tainting > produce extremely unusual kernel structure layouts (even performance > pathological ones), which is important to know when debugging. Set at > build time. > + > + 18) ``Q`` Device drivers might annotate the kernel with this taint, in cases > + their firmware might have crashed leaving the driver in a crippled and > + potentially useless state. Sure, I'll modify it a bit to add the use case to help with support issues, ie, to help rule out firmware issues. I'm starting to think that to make this even more usesul later we may want to add a uevent to add_taint() so that userspace can decide to look into this, ignore it, or report something to the user, say on their desktop. Luis
On 5/9/20 9:46 AM, Luis Chamberlain wrote: > On Sat, May 09, 2020 at 11:18:29AM -0400, Rafael Aquini wrote: >> We are still missing the documentation bits for this >> new flag, though. > > Ah yeah sorry about that. > >> How about having a blurb similar to: >> >> diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst >> index 71e9184a9079..5c6a9e2478b0 100644 >> --- a/Documentation/admin-guide/tainted-kernels.rst >> +++ b/Documentation/admin-guide/tainted-kernels.rst >> @@ -100,6 +100,7 @@ Bit Log Number Reason that got the kernel tainted >> 15 _/K 32768 kernel has been live patched >> 16 _/X 65536 auxiliary taint, defined for and used by distros >> 17 _/T 131072 kernel was built with the struct randomization plugin >> + 18 _/Q 262144 driver firmware crash annotation >> === === ====== ======================================================== >> >> Note: The character ``_`` is representing a blank in this table to make reading >> @@ -162,3 +163,7 @@ More detailed explanation for tainting >> produce extremely unusual kernel structure layouts (even performance >> pathological ones), which is important to know when debugging. Set at >> build time. >> + >> + 18) ``Q`` Device drivers might annotate the kernel with this taint, in cases >> + their firmware might have crashed leaving the driver in a crippled and >> + potentially useless state. > > Sure, I'll modify it a bit to add the use case to help with support > issues, ie, to help rule out firmware issues. Please also update tools/debugging/kernel-chktaint. > I'm starting to think that to make this even more usesul later we may > want to add a uevent to add_taint() so that userspace can decide to look > into this, ignore it, or report something to the user, say on their > desktop. thanks.
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 04a5885cec1b..19e1541c82c7 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -601,7 +601,8 @@ extern enum system_states { #define TAINT_LIVEPATCH 15 #define TAINT_AUX 16 #define TAINT_RANDSTRUCT 17 -#define TAINT_FLAGS_COUNT 18 +#define TAINT_FIRMWARE_CRASH 18 +#define TAINT_FLAGS_COUNT 19 struct taint_flag { char c_true; /* character printed when tainted */ diff --git a/include/linux/module.h b/include/linux/module.h index 2c2e988bcf10..221200078180 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -697,6 +697,14 @@ static inline bool is_livepatch_module(struct module *mod) bool is_module_sig_enforced(void); void set_module_sig_enforced(void); +void add_taint_module(struct module *mod, unsigned flag, + enum lockdep_ok lockdep_ok); + +static inline void module_firmware_crashed(void) +{ + add_taint_module(THIS_MODULE, TAINT_FIRMWARE_CRASH, LOCKDEP_STILL_OK); +} + #else /* !CONFIG_MODULES... */ static inline struct module *__module_address(unsigned long addr) @@ -844,6 +852,11 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr) return ptr; } +static inline void module_firmware_crashed(void) +{ + add_taint(TAINT_FIRMWARE_CRASH, LOCKDEP_STILL_OK); +} + #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 097485c73c01..b749ea25affd 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -26,7 +26,8 @@ struct module; { (1UL << TAINT_OOT_MODULE), "O" }, \ { (1UL << TAINT_FORCED_MODULE), "F" }, \ { (1UL << TAINT_CRAP), "C" }, \ - { (1UL << TAINT_UNSIGNED_MODULE), "E" }) + { (1UL << TAINT_UNSIGNED_MODULE), "E" }, \ + { (1UL << TAINT_FIRMWARE_CRASH), "Q" }) TRACE_EVENT(module_load, diff --git a/kernel/module.c b/kernel/module.c index 80faaf2116dd..f98e8c25c6b4 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -325,12 +325,13 @@ static inline int strong_try_module_get(struct module *mod) return -ENOENT; } -static inline void add_taint_module(struct module *mod, unsigned flag, - enum lockdep_ok lockdep_ok) +void add_taint_module(struct module *mod, unsigned flag, + enum lockdep_ok lockdep_ok) { add_taint(flag, lockdep_ok); set_bit(flag, &mod->taints); } +EXPORT_SYMBOL_GPL(add_taint_module); /* * A thread that wants to hold a reference to a module only while it diff --git a/kernel/panic.c b/kernel/panic.c index ec6d7d788ce7..504fb926947e 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -384,6 +384,7 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { [ TAINT_LIVEPATCH ] = { 'K', ' ', true }, [ TAINT_AUX ] = { 'X', ' ', true }, [ TAINT_RANDSTRUCT ] = { 'T', ' ', true }, + [ TAINT_FIRMWARE_CRASH ] = { 'Q', ' ', true }, }; /**
Device driver firmware can crash, and sometimes, this can leave your system in a state which makes the device or subsystem completely useless. Detecting this by inspecting /proc/sys/kernel/tainted instead of scraping some magical words from the kernel log, which is driver specific, is much easier. So instead provide a helper which lets drivers annotate this. Once this happens, scrapers can easily look for modules taint flags for a firmware crash. This will taint both the kernel and respective calling module. The new helper module_firmware_crashed() uses LOCKDEP_STILL_OK as this fact should in no way shape or form affect lockdep. This taint is device driver specific. Signed-off-by: Luis Chamberlain <mcgrof@kernel.org> --- include/linux/kernel.h | 3 ++- include/linux/module.h | 13 +++++++++++++ include/trace/events/module.h | 3 ++- kernel/module.c | 5 +++-- kernel/panic.c | 1 + 5 files changed, 21 insertions(+), 4 deletions(-)