mirror of
https://github.com/torvalds/linux.git
synced 2025-11-30 23:16:01 +07:00
md/md-llbitmap: introduce new lockless bitmap
Redundant data is used to enhance data fault tolerance, and the storage method for redundant data vary depending on the RAID levels. And it's important to maintain the consistency of redundant data. Bitmap is used to record which data blocks have been synchronized and which ones need to be resynchronized or recovered. Each bit in the bitmap represents a segment of data in the array. When a bit is set, it indicates that the multiple redundant copies of that data segment may not be consistent. Data synchronization can be performed based on the bitmap after power failure or readding a disk. If there is no bitmap, a full disk synchronization is required. Due to known performance issues with md-bitmap and the unreasonable implementations: - self-managed IO submitting like filemap_write_page(); - global spin_lock I have decided not to continue optimizing based on the current bitmap implementation, this new bitmap is invented without locking from IO fast path and can be used with fast disks. For designs and details, see the comments in drivers/md-llbitmap.c. Link: https://lore.kernel.org/linux-raid/20250829080426.1441678-12-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai <yukuai3@huawei.com> Reviewed-by: Li Nan <linan122@huawei.com>
This commit is contained in:
@@ -387,6 +387,8 @@ All md devices contain:
|
||||
No bitmap
|
||||
bitmap
|
||||
The default internal bitmap
|
||||
llbitmap
|
||||
The lockless internal bitmap
|
||||
|
||||
If bitmap_type is not none, then additional bitmap attributes bitmap/xxx or
|
||||
llbitmap/xxx will be created after md device KOBJ_CHANGE event.
|
||||
@@ -447,6 +449,24 @@ If bitmap_type is bitmap, then the md device will also contain:
|
||||
once the array becomes non-degraded, and this fact has been
|
||||
recorded in the metadata.
|
||||
|
||||
If bitmap_type is llbitmap, then the md device will also contain:
|
||||
|
||||
llbitmap/bits
|
||||
This is read-only, show status of bitmap bits, the number of each
|
||||
value.
|
||||
|
||||
llbitmap/metadata
|
||||
This is read-only, show bitmap metadata, include chunksize, chunkshift,
|
||||
chunks, offset and daemon_sleep.
|
||||
|
||||
llbitmap/daemon_sleep
|
||||
This is read-write, time in seconds that daemon function will be
|
||||
triggered to clear dirty bits.
|
||||
|
||||
llbitmap/barrier_idle
|
||||
This is read-write, time in seconds that page barrier will be idled,
|
||||
means dirty bits in the page will be cleared.
|
||||
|
||||
As component devices are added to an md array, they appear in the ``md``
|
||||
directory as new directories named::
|
||||
|
||||
|
||||
@@ -52,6 +52,17 @@ config MD_BITMAP
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config MD_LLBITMAP
|
||||
bool "MD RAID lockless bitmap support"
|
||||
depends on BLK_DEV_MD
|
||||
help
|
||||
If you say Y here, support for the lockless write intent bitmap will
|
||||
be enabled.
|
||||
|
||||
Note, this is an experimental feature.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config MD_AUTODETECT
|
||||
bool "Autodetect RAID arrays during kernel boot"
|
||||
depends on BLK_DEV_MD=y
|
||||
|
||||
@@ -29,6 +29,7 @@ dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
|
||||
|
||||
md-mod-y += md.o
|
||||
md-mod-$(CONFIG_MD_BITMAP) += md-bitmap.o
|
||||
md-mod-$(CONFIG_MD_LLBITMAP) += md-llbitmap.o
|
||||
raid456-y += raid5.o raid5-cache.o raid5-ppl.o
|
||||
linear-y += md-linear.o
|
||||
|
||||
|
||||
@@ -34,15 +34,6 @@
|
||||
#include "md-bitmap.h"
|
||||
#include "md-cluster.h"
|
||||
|
||||
#define BITMAP_MAJOR_LO 3
|
||||
/* version 4 insists the bitmap is in little-endian order
|
||||
* with version 3, it is host-endian which is non-portable
|
||||
* Version 5 is currently set only for clustered devices
|
||||
*/
|
||||
#define BITMAP_MAJOR_HI 4
|
||||
#define BITMAP_MAJOR_CLUSTERED 5
|
||||
#define BITMAP_MAJOR_HOSTENDIAN 3
|
||||
|
||||
/*
|
||||
* in-memory bitmap:
|
||||
*
|
||||
|
||||
@@ -9,10 +9,26 @@
|
||||
|
||||
#define BITMAP_MAGIC 0x6d746962
|
||||
|
||||
/*
|
||||
* version 3 is host-endian order, this is deprecated and not used for new
|
||||
* array
|
||||
*/
|
||||
#define BITMAP_MAJOR_LO 3
|
||||
#define BITMAP_MAJOR_HOSTENDIAN 3
|
||||
/* version 4 is little-endian order, the default value */
|
||||
#define BITMAP_MAJOR_HI 4
|
||||
/* version 5 is only used for cluster */
|
||||
#define BITMAP_MAJOR_CLUSTERED 5
|
||||
/* version 6 is only used for lockless bitmap */
|
||||
#define BITMAP_MAJOR_LOCKLESS 6
|
||||
|
||||
/* use these for bitmap->flags and bitmap->sb->state bit-fields */
|
||||
enum bitmap_state {
|
||||
BITMAP_STALE = 1, /* the bitmap file is out of date or had -EIO */
|
||||
BITMAP_STALE = 1, /* the bitmap file is out of date or had -EIO */
|
||||
BITMAP_WRITE_ERROR = 2, /* A write error has occurred */
|
||||
BITMAP_FIRST_USE = 3, /* llbitmap is just created */
|
||||
BITMAP_CLEAN = 4, /* llbitmap is created with assume_clean */
|
||||
BITMAP_DAEMON_BUSY = 5, /* llbitmap daemon is not finished after daemon_sleep */
|
||||
BITMAP_HOSTENDIAN =15,
|
||||
};
|
||||
|
||||
@@ -166,4 +182,17 @@ static inline void md_bitmap_exit(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MD_LLBITMAP
|
||||
int md_llbitmap_init(void);
|
||||
void md_llbitmap_exit(void);
|
||||
#else
|
||||
static inline int md_llbitmap_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void md_llbitmap_exit(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
1626
drivers/md/md-llbitmap.c
Normal file
1626
drivers/md/md-llbitmap.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -10328,6 +10328,10 @@ static int __init md_init(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = md_llbitmap_init();
|
||||
if (ret)
|
||||
goto err_bitmap;
|
||||
|
||||
ret = -ENOMEM;
|
||||
md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
|
||||
if (!md_wq)
|
||||
@@ -10359,6 +10363,8 @@ err_md:
|
||||
err_misc_wq:
|
||||
destroy_workqueue(md_wq);
|
||||
err_wq:
|
||||
md_llbitmap_exit();
|
||||
err_bitmap:
|
||||
md_bitmap_exit();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
enum md_submodule_type {
|
||||
MD_PERSONALITY = 0,
|
||||
MD_CLUSTER,
|
||||
MD_BITMAP, /* TODO */
|
||||
MD_BITMAP,
|
||||
};
|
||||
|
||||
enum md_submodule_id {
|
||||
@@ -39,7 +39,7 @@ enum md_submodule_id {
|
||||
ID_RAID10 = 10,
|
||||
ID_CLUSTER,
|
||||
ID_BITMAP,
|
||||
ID_LLBITMAP, /* TODO */
|
||||
ID_LLBITMAP,
|
||||
ID_BITMAP_NONE,
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user