diff mbox series

ubi: wl: Close down wear-leveling before nand is suspended

Message ID 20240907-ubi-wl-race-v1-1-6f7f5e0aea7b@axis.com
State New
Headers show
Series ubi: wl: Close down wear-leveling before nand is suspended | expand

Commit Message

Mårten Lindahl Sept. 7, 2024, 7:28 p.m. UTC
If a reboot/shutdown signal with double force (-ff) is triggered when
the erase worker or wear-leveling worker function runs we may end up in
a race condition since the MTD device gets a reboot notification and
suspends the nand flash before the erase or wear-leveling is done. This
will reject all accesses to the flash with -EBUSY.

Sequence for the erase worker function:

   systemctl reboot -ff           ubi_thread

                                do_work
 __do_sys_reboot
   blocking_notifier_call_chain
     mtd_reboot_notifier
       nand_shutdown
         nand_suspend
                                  __erase_worker
                                    ubi_sync_erase
                                      mtd_erase
                                        nand_erase_nand

                                          # Blocked by suspended chip
                                          nand_get_device
                                            => EBUSY

Similar sequence for the wear-leveling function:

   systemctl reboot -ff           ubi_thread

                                do_work
 __do_sys_reboot
   blocking_notifier_call_chain
     mtd_reboot_notifier
       nand_shutdown
         nand_suspend
                                  wear_leveling_worker
                                    ubi_eba_copy_leb
                                      ubi_io_write
                                        mtd_write
                                          nand_write_oob

                                            # Blocked by suspended chip
                                            nand_get_device
                                              => EBUSY

 systemd-shutdown[1]: Rebooting.
 ubi0 error: ubi_io_write: error -16 while writing 2048 bytes to PEB
 CPU: 1 PID: 82 Comm: ubi_bgt0d Kdump: loaded Tainted: G           O
 (unwind_backtrace) from [<80107b9f>] (show_stack+0xb/0xc)
 (show_stack) from [<8033641f>] (dump_stack_lvl+0x2b/0x34)
 (dump_stack_lvl) from [<803b7f3f>] (ubi_io_write+0x3ab/0x4a8)
 (ubi_io_write) from [<803b817d>] (ubi_io_write_vid_hdr+0x71/0xb4)
 (ubi_io_write_vid_hdr) from [<803b6971>] (ubi_eba_copy_leb+0x195/0x2f0)
 (ubi_eba_copy_leb) from [<803b939b>] (wear_leveling_worker+0x2ff/0x738)
 (wear_leveling_worker) from [<803b86ef>] (do_work+0x5b/0xb0)
 (do_work) from [<803b9ee1>] (ubi_thread+0xb1/0x11c)
 (ubi_thread) from [<8012c113>] (kthread+0x11b/0x134)
 (kthread) from [<80100139>] (ret_from_fork+0x11/0x38)
 Exception stack(0x80c43fb0 to 0x80c43ff8)
 ...
 ubi0 error: ubi_dump_flash: err -16 while reading 2048 bytes from PEB
 ubi0 error: wear_leveling_worker: error -16 while moving PEB 246 to PEB
 ubi0 warning: ubi_ro_mode.part.0: switch to read-only mode
 ...
 ubi0 error: do_work: work failed with error code -16
 ubi0 error: ubi_thread: ubi_bgt0d: work failed with error code -16
 ...
 Kernel panic - not syncing: Software Watchdog Timer expired

Add a reboot notification for the ubi/wear-leveling to shutdown any
potential flash work actions before the nand is suspended.

Signed-off-by: Mårten Lindahl <marten.lindahl@axis.com>
---
 drivers/mtd/ubi/ubi.h |  2 ++
 drivers/mtd/ubi/wl.c  | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+)


---
base-commit: 431c1646e1f86b949fa3685efc50b660a364c2b6
change-id: 20240903-ubi-wl-race-e039db89122f

Best regards,

Comments

Zhihao Cheng Sept. 9, 2024, 3:20 a.m. UTC | #1
在 2024/9/8 3:28, Mårten Lindahl 写道:
> If a reboot/shutdown signal with double force (-ff) is triggered when
> the erase worker or wear-leveling worker function runs we may end up in
> a race condition since the MTD device gets a reboot notification and
> suspends the nand flash before the erase or wear-leveling is done. This
> will reject all accesses to the flash with -EBUSY.
> 
> Sequence for the erase worker function:
> 
>     systemctl reboot -ff           ubi_thread
> 
>                                  do_work
>   __do_sys_reboot
>     blocking_notifier_call_chain
>       mtd_reboot_notifier
>         nand_shutdown
>           nand_suspend
>                                    __erase_worker
>                                      ubi_sync_erase
>                                        mtd_erase
>                                          nand_erase_nand
> 
>                                            # Blocked by suspended chip
>                                            nand_get_device
>                                              => EBUSY
> 
> Similar sequence for the wear-leveling function:
> 
>     systemctl reboot -ff           ubi_thread
> 
>                                  do_work
>   __do_sys_reboot
>     blocking_notifier_call_chain
>       mtd_reboot_notifier
>         nand_shutdown
>           nand_suspend
>                                    wear_leveling_worker
>                                      ubi_eba_copy_leb
>                                        ubi_io_write
>                                          mtd_write
>                                            nand_write_oob
> 
>                                              # Blocked by suspended chip
>                                              nand_get_device
>                                                => EBUSY
> 
>   systemd-shutdown[1]: Rebooting.
>   ubi0 error: ubi_io_write: error -16 while writing 2048 bytes to PEB
>   CPU: 1 PID: 82 Comm: ubi_bgt0d Kdump: loaded Tainted: G           O
>   (unwind_backtrace) from [<80107b9f>] (show_stack+0xb/0xc)
>   (show_stack) from [<8033641f>] (dump_stack_lvl+0x2b/0x34)
>   (dump_stack_lvl) from [<803b7f3f>] (ubi_io_write+0x3ab/0x4a8)
>   (ubi_io_write) from [<803b817d>] (ubi_io_write_vid_hdr+0x71/0xb4)
>   (ubi_io_write_vid_hdr) from [<803b6971>] (ubi_eba_copy_leb+0x195/0x2f0)
>   (ubi_eba_copy_leb) from [<803b939b>] (wear_leveling_worker+0x2ff/0x738)
>   (wear_leveling_worker) from [<803b86ef>] (do_work+0x5b/0xb0)
>   (do_work) from [<803b9ee1>] (ubi_thread+0xb1/0x11c)
>   (ubi_thread) from [<8012c113>] (kthread+0x11b/0x134)
>   (kthread) from [<80100139>] (ret_from_fork+0x11/0x38)
>   Exception stack(0x80c43fb0 to 0x80c43ff8)
>   ...
>   ubi0 error: ubi_dump_flash: err -16 while reading 2048 bytes from PEB
>   ubi0 error: wear_leveling_worker: error -16 while moving PEB 246 to PEB
>   ubi0 warning: ubi_ro_mode.part.0: switch to read-only mode
>   ...
>   ubi0 error: do_work: work failed with error code -16
>   ubi0 error: ubi_thread: ubi_bgt0d: work failed with error code -16

Yes, I noticed these types of messages too before kernel v5.18. Since 
commit 013e6292aaf5e4b0("mtd: rawnand: Simplify the locking"), the 
behavior of nand_get_device() is changed. A process who is invoking 
nand_get_device() during rebooting won't be stucked, it will get an 
EBUSY error code, that's why we see the above messages from UBI module.
After commit 8cba323437a49a4("mtd: rawnand: protect access to rawnand 
devices while in suspend"), the behavior of nand_get_device() is changed 
back. A process who is invoking nand_get_device() during rebooting will 
be stucked again, so there should be no error messages in UBI layer.
So, is your kernel version lower than v5.18?
Mårten Lindahl Sept. 9, 2024, 11:31 a.m. UTC | #2
On 9/9/24 05:20, Zhihao Cheng wrote:
> 在 2024/9/8 3:28, Mårten Lindahl 写道:
>> If a reboot/shutdown signal with double force (-ff) is triggered when
>> the erase worker or wear-leveling worker function runs we may end up in
>> a race condition since the MTD device gets a reboot notification and
>> suspends the nand flash before the erase or wear-leveling is done. This
>> will reject all accesses to the flash with -EBUSY.
>>
>> Sequence for the erase worker function:
>>
>>     systemctl reboot -ff           ubi_thread
>>
>>                                  do_work
>>   __do_sys_reboot
>>     blocking_notifier_call_chain
>>       mtd_reboot_notifier
>>         nand_shutdown
>>           nand_suspend
>>                                    __erase_worker
>>                                      ubi_sync_erase
>>                                        mtd_erase
>>                                          nand_erase_nand
>>
>>                                            # Blocked by suspended chip
>>                                            nand_get_device
>>                                              => EBUSY
>>
>> Similar sequence for the wear-leveling function:
>>
>>     systemctl reboot -ff           ubi_thread
>>
>>                                  do_work
>>   __do_sys_reboot
>>     blocking_notifier_call_chain
>>       mtd_reboot_notifier
>>         nand_shutdown
>>           nand_suspend
>>                                    wear_leveling_worker
>>                                      ubi_eba_copy_leb
>>                                        ubi_io_write
>>                                          mtd_write
>>                                            nand_write_oob
>>
>>                                              # Blocked by suspended chip
>>                                              nand_get_device
>>                                                => EBUSY
>>
>>   systemd-shutdown[1]: Rebooting.
>>   ubi0 error: ubi_io_write: error -16 while writing 2048 bytes to PEB
>>   CPU: 1 PID: 82 Comm: ubi_bgt0d Kdump: loaded Tainted: G           O
>>   (unwind_backtrace) from [<80107b9f>] (show_stack+0xb/0xc)
>>   (show_stack) from [<8033641f>] (dump_stack_lvl+0x2b/0x34)
>>   (dump_stack_lvl) from [<803b7f3f>] (ubi_io_write+0x3ab/0x4a8)
>>   (ubi_io_write) from [<803b817d>] (ubi_io_write_vid_hdr+0x71/0xb4)
>>   (ubi_io_write_vid_hdr) from [<803b6971>] 
>> (ubi_eba_copy_leb+0x195/0x2f0)
>>   (ubi_eba_copy_leb) from [<803b939b>] 
>> (wear_leveling_worker+0x2ff/0x738)
>>   (wear_leveling_worker) from [<803b86ef>] (do_work+0x5b/0xb0)
>>   (do_work) from [<803b9ee1>] (ubi_thread+0xb1/0x11c)
>>   (ubi_thread) from [<8012c113>] (kthread+0x11b/0x134)
>>   (kthread) from [<80100139>] (ret_from_fork+0x11/0x38)
>>   Exception stack(0x80c43fb0 to 0x80c43ff8)
>>   ...
>>   ubi0 error: ubi_dump_flash: err -16 while reading 2048 bytes from PEB
>>   ubi0 error: wear_leveling_worker: error -16 while moving PEB 246 to 
>> PEB
>>   ubi0 warning: ubi_ro_mode.part.0: switch to read-only mode
>>   ...
>>   ubi0 error: do_work: work failed with error code -16
>>   ubi0 error: ubi_thread: ubi_bgt0d: work failed with error code -16
>
Hi Zhihao Cheng!
> Yes, I noticed these types of messages too before kernel v5.18. Since 
> commit 013e6292aaf5e4b0("mtd: rawnand: Simplify the locking"), the 
> behavior of nand_get_device() is changed. A process who is invoking 
> nand_get_device() during rebooting won't be stucked, it will get an 
> EBUSY error code, that's why we see the above messages from UBI module.
> After commit 8cba323437a49a4("mtd: rawnand: protect access to rawnand 
> devices while in suspend"), the behavior of nand_get_device() is 
> changed back. A process who is invoking nand_get_device() during 
> rebooting will be stucked again, so there should be no error messages 
> in UBI layer.
> So, is your kernel version lower than v5.18?
>
Thanks for identifying this! Yes, the device I'm testing runs v5.15. I 
can't upgrade it to newer kernels so I backported a lot of ubi patches 
from mainline, but it seems I missed commit 8cba323437a49a4 which indeed 
seems to solve the problem.

Again, thank you very much!

Kind regards

Mårten
diff mbox series

Patch

diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 1c9e874e8ede..4a48ed0ecdb6 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -549,6 +549,7 @@  struct ubi_debug_info {
  * @peb_buf: a buffer of PEB size used for different purposes
  * @buf_mutex: protects @peb_buf
  * @ckvol_mutex: serializes static volume checking when opening
+ * @wl_reboot_notifier: close all wear-leveling work before reboot
  *
  * @dbg: debugging information for this UBI device
  */
@@ -651,6 +652,7 @@  struct ubi_device {
 	void *peb_buf;
 	struct mutex buf_mutex;
 	struct mutex ckvol_mutex;
+	struct notifier_block wl_reboot_notifier;
 
 	struct ubi_debug_info dbg;
 };
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index a357f3d27f2f..b3484e9e969c 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -89,6 +89,7 @@ 
 #include <linux/crc32.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
+#include <linux/reboot.h>
 #include "ubi.h"
 #include "wl.h"
 
@@ -127,6 +128,8 @@  static int self_check_in_wl_tree(const struct ubi_device *ubi,
 				 struct ubi_wl_entry *e, struct rb_root *root);
 static int self_check_in_pq(const struct ubi_device *ubi,
 			    struct ubi_wl_entry *e);
+static int ubi_wl_reboot_notifier(struct notifier_block *n,
+				  unsigned long state, void *cmd);
 
 /**
  * wl_tree_add - add a wear-leveling entry to a WL RB-tree.
@@ -1943,6 +1946,13 @@  int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai)
 	if (!ubi->ro_mode && !ubi->fm_disabled)
 		ubi_ensure_anchor_pebs(ubi);
 #endif
+
+	if (!ubi->wl_reboot_notifier.notifier_call) {
+		ubi->wl_reboot_notifier.notifier_call = ubi_wl_reboot_notifier;
+		ubi->wl_reboot_notifier.priority = 1; /* Higher than MTD */
+		register_reboot_notifier(&ubi->wl_reboot_notifier);
+	}
+
 	return 0;
 
 out_free:
@@ -1988,6 +1998,17 @@  void ubi_wl_close(struct ubi_device *ubi)
 	kfree(ubi->lookuptbl);
 }
 
+static int ubi_wl_reboot_notifier(struct notifier_block *n,
+				  unsigned long state, void *cmd)
+{
+	struct ubi_device *ubi;
+
+	ubi = container_of(n, struct ubi_device, wl_reboot_notifier);
+	ubi_wl_close(ubi);
+
+	return NOTIFY_DONE;
+}
+
 /**
  * self_check_ec - make sure that the erase counter of a PEB is correct.
  * @ubi: UBI device description object