Merge tag 'dlm-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm

Pull dlm updates from David Teigland:
 "This adds a dlm_release_lockspace() flag to request that node-failure
  recovery be performed for the node leaving the lockspace.

  The implementation of this flag requires coordination with userland
  clustering components. It's been requested for use by GFS2"

* tag 'dlm-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm:
  dlm: check for undefined release_option values
  dlm: handle release_option as unsigned
  dlm: move to rinfo for all middle conversion cases
  dlm: handle invalid lockspace member remove
  dlm: add new flag DLM_RELEASE_RECOVER for dlm_lockspace_release
  dlm: add new configfs entry release_recover for lockspace members
  dlm: add new RELEASE_RECOVER uevent attribute for release_lockspace
  dlm: use defines for force values in dlm_release_lockspace
  dlm: check for defined force value in dlm_lockspace_release
This commit is contained in:
Linus Torvalds
2025-09-29 15:24:58 -07:00
11 changed files with 153 additions and 39 deletions

View File

@@ -979,7 +979,7 @@ err:
lockres_free(cinfo->resync_lockres);
lockres_free(cinfo->bitmap_lockres);
if (cinfo->lockspace)
dlm_release_lockspace(cinfo->lockspace, 2);
dlm_release_lockspace(cinfo->lockspace, DLM_RELEASE_NORMAL);
mddev->cluster_info = NULL;
kfree(cinfo);
return ret;
@@ -1042,7 +1042,7 @@ static int leave(struct mddev *mddev)
lockres_free(cinfo->resync_lockres);
lockres_free(cinfo->bitmap_lockres);
unlock_all_bitmaps(mddev);
dlm_release_lockspace(cinfo->lockspace, 2);
dlm_release_lockspace(cinfo->lockspace, DLM_RELEASE_NORMAL);
kfree(cinfo);
return 0;
}

View File

@@ -26,6 +26,7 @@
/*
* /config/dlm/<cluster>/spaces/<space>/nodes/<node>/nodeid (refers to <node>)
* /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight
* /config/dlm/<cluster>/spaces/<space>/nodes/<node>/release_recover
* /config/dlm/<cluster>/comms/<comm>/nodeid (refers to <comm>)
* /config/dlm/<cluster>/comms/<comm>/local
* /config/dlm/<cluster>/comms/<comm>/addr (write only)
@@ -267,6 +268,7 @@ enum {
enum {
NODE_ATTR_NODEID = 0,
NODE_ATTR_WEIGHT,
NODE_ATTR_RELEASE_RECOVER,
};
struct dlm_clusters {
@@ -280,6 +282,8 @@ struct dlm_spaces {
struct dlm_space {
struct config_group group;
struct list_head members;
struct list_head members_gone;
int members_gone_count;
struct mutex members_lock;
int members_count;
struct dlm_nodes *nds;
@@ -310,6 +314,14 @@ struct dlm_node {
int weight;
int new;
int comm_seq; /* copy of cm->seq when nd->nodeid is set */
unsigned int release_recover;
};
struct dlm_member_gone {
int nodeid;
unsigned int release_recover;
struct list_head list; /* space->members_gone */
};
static struct configfs_group_operations clusters_ops = {
@@ -480,6 +492,7 @@ static struct config_group *make_space(struct config_group *g, const char *name)
configfs_add_default_group(&nds->ns_group, &sp->group);
INIT_LIST_HEAD(&sp->members);
INIT_LIST_HEAD(&sp->members_gone);
mutex_init(&sp->members_lock);
sp->members_count = 0;
sp->nds = nds;
@@ -587,10 +600,20 @@ static void drop_node(struct config_group *g, struct config_item *i)
{
struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent);
struct dlm_node *nd = config_item_to_node(i);
struct dlm_member_gone *mb_gone;
mb_gone = kzalloc(sizeof(*mb_gone), GFP_KERNEL);
if (!mb_gone)
return;
mutex_lock(&sp->members_lock);
list_del(&nd->list);
sp->members_count--;
mb_gone->nodeid = nd->nodeid;
mb_gone->release_recover = nd->release_recover;
list_add(&mb_gone->list, &sp->members_gone);
sp->members_gone_count++;
mutex_unlock(&sp->members_lock);
config_item_put(i);
@@ -815,12 +838,34 @@ static ssize_t node_weight_store(struct config_item *item, const char *buf,
return len;
}
static ssize_t node_release_recover_show(struct config_item *item, char *buf)
{
struct dlm_node *n = config_item_to_node(item);
return sprintf(buf, "%u\n", n->release_recover);
}
static ssize_t node_release_recover_store(struct config_item *item,
const char *buf, size_t len)
{
struct dlm_node *n = config_item_to_node(item);
int rc;
rc = kstrtouint(buf, 0, &n->release_recover);
if (rc)
return rc;
return len;
}
CONFIGFS_ATTR(node_, nodeid);
CONFIGFS_ATTR(node_, weight);
CONFIGFS_ATTR(node_, release_recover);
static struct configfs_attribute *node_attrs[] = {
[NODE_ATTR_NODEID] = &node_attr_nodeid,
[NODE_ATTR_WEIGHT] = &node_attr_weight,
[NODE_ATTR_RELEASE_RECOVER] = &node_attr_release_recover,
NULL,
};
@@ -882,9 +927,10 @@ static void put_comm(struct dlm_comm *cm)
int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
int *count_out)
{
struct dlm_member_gone *mb_gone, *mb_safe;
struct dlm_config_node *nodes, *node;
struct dlm_space *sp;
struct dlm_node *nd;
struct dlm_config_node *nodes, *node;
int rv, count;
sp = get_space(lsname);
@@ -898,7 +944,7 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
goto out;
}
count = sp->members_count;
count = sp->members_count + sp->members_gone_count;
nodes = kcalloc(count, sizeof(struct dlm_config_node), GFP_NOFS);
if (!nodes) {
@@ -917,6 +963,20 @@ int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
nd->new = 0;
}
/* we delay the remove on nodes until here as configfs does
* not support addtional attributes for rmdir().
*/
list_for_each_entry_safe(mb_gone, mb_safe, &sp->members_gone, list) {
node->nodeid = mb_gone->nodeid;
node->release_recover = mb_gone->release_recover;
node->gone = true;
node++;
list_del(&mb_gone->list);
sp->members_gone_count--;
kfree(mb_gone);
}
*count_out = count;
*nodes_out = nodes;
rv = 0;

View File

@@ -17,8 +17,10 @@
struct dlm_config_node {
int nodeid;
int weight;
bool gone;
int new;
uint32_t comm_seq;
unsigned int release_recover;
};
extern const struct rhashtable_params dlm_rhash_rsb_params;

View File

@@ -5576,7 +5576,7 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
if (rl->rl_status == DLM_LKSTS_CONVERT && middle_conversion(lkb)) {
/* We may need to adjust grmode depending on other granted locks. */
log_limit(ls, "%s %x middle convert gr %d rq %d remote %d %x",
log_rinfo(ls, "%s %x middle convert gr %d rq %d remote %d %x",
__func__, lkb->lkb_id, lkb->lkb_grmode,
lkb->lkb_rqmode, lkb->lkb_nodeid, lkb->lkb_remid);
rsb_set_flag(r, RSB_RECOVER_CONVERT);

View File

@@ -186,12 +186,17 @@ static struct kobj_type dlm_ktype = {
static struct kset *dlm_kset;
static int do_uevent(struct dlm_ls *ls, int in)
static int do_uevent(struct dlm_ls *ls, int in, unsigned int release_recover)
{
if (in)
char message[512] = {};
char *envp[] = { message, NULL };
if (in) {
kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
else
kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
} else {
snprintf(message, 511, "RELEASE_RECOVER=%u", release_recover);
kobject_uevent_env(&ls->ls_kobj, KOBJ_OFFLINE, envp);
}
log_rinfo(ls, "%s the lockspace group...", in ? "joining" : "leaving");
@@ -575,7 +580,7 @@ static int new_lockspace(const char *name, const char *cluster,
current lockspace members are (via configfs) and then tells the
lockspace to start running (via sysfs) in dlm_ls_start(). */
error = do_uevent(ls, 1);
error = do_uevent(ls, 1, 0);
if (error < 0)
goto out_recoverd;
@@ -592,7 +597,7 @@ static int new_lockspace(const char *name, const char *cluster,
return 0;
out_members:
do_uevent(ls, 0);
do_uevent(ls, 0, 0);
dlm_clear_members(ls);
kfree(ls->ls_node_array);
out_recoverd:
@@ -671,19 +676,20 @@ int dlm_new_user_lockspace(const char *name, const char *cluster,
This is because there may be LKBs queued as ASTs that have been unlinked
from their RSBs and are pending deletion once the AST has been delivered */
static int lockspace_busy(struct dlm_ls *ls, int force)
static int lockspace_busy(struct dlm_ls *ls, unsigned int release_option)
{
struct dlm_lkb *lkb;
unsigned long id;
int rv = 0;
read_lock_bh(&ls->ls_lkbxa_lock);
if (force == 0) {
if (release_option == DLM_RELEASE_NO_LOCKS) {
xa_for_each(&ls->ls_lkbxa, id, lkb) {
rv = 1;
break;
}
} else if (force == 1) {
} else if (release_option == DLM_RELEASE_UNUSED) {
/* TODO: handle this UNUSED option as NO_LOCKS in later patch */
xa_for_each(&ls->ls_lkbxa, id, lkb) {
if (lkb->lkb_nodeid == 0 &&
lkb->lkb_grmode != DLM_LOCK_IV) {
@@ -698,11 +704,11 @@ static int lockspace_busy(struct dlm_ls *ls, int force)
return rv;
}
static int release_lockspace(struct dlm_ls *ls, int force)
static int release_lockspace(struct dlm_ls *ls, unsigned int release_option)
{
int busy, rv;
busy = lockspace_busy(ls, force);
busy = lockspace_busy(ls, release_option);
spin_lock_bh(&lslist_lock);
if (ls->ls_create_count == 1) {
@@ -730,8 +736,9 @@ static int release_lockspace(struct dlm_ls *ls, int force)
dlm_device_deregister(ls);
if (force < 3 && dlm_user_daemon_available())
do_uevent(ls, 0);
if (release_option != DLM_RELEASE_NO_EVENT &&
dlm_user_daemon_available())
do_uevent(ls, 0, (release_option == DLM_RELEASE_RECOVER));
dlm_recoverd_stop(ls);
@@ -782,25 +789,24 @@ static int release_lockspace(struct dlm_ls *ls, int force)
* lockspace must continue to function as usual, participating in recoveries,
* until this returns.
*
* Force has 4 possible values:
* 0 - don't destroy lockspace if it has any LKBs
* 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
* 2 - destroy lockspace regardless of LKBs
* 3 - destroy lockspace as part of a forced shutdown
* See DLM_RELEASE defines for release_option values and their meaning.
*/
int dlm_release_lockspace(void *lockspace, int force)
int dlm_release_lockspace(void *lockspace, unsigned int release_option)
{
struct dlm_ls *ls;
int error;
if (release_option > __DLM_RELEASE_MAX)
return -EINVAL;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
return -EINVAL;
dlm_put_lockspace(ls);
mutex_lock(&ls_lock);
error = release_lockspace(ls, force);
error = release_lockspace(ls, release_option);
if (!error)
ls_count--;
if (!ls_count)

View File

@@ -478,7 +478,8 @@ static void dlm_lsop_recover_prep(struct dlm_ls *ls)
ls->ls_ops->recover_prep(ls->ls_ops_arg);
}
static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb)
static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb,
unsigned int release_recover)
{
struct dlm_slot slot;
uint32_t seq;
@@ -495,7 +496,7 @@ static void dlm_lsop_recover_slot(struct dlm_ls *ls, struct dlm_member *memb)
error = dlm_comm_seq(memb->nodeid, &seq, false);
if (!error && seq == memb->comm_seq)
if (!release_recover && !error && seq == memb->comm_seq)
return;
slot.nodeid = memb->nodeid;
@@ -552,6 +553,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
struct dlm_member *memb, *safe;
struct dlm_config_node *node;
int i, error, neg = 0, low = -1;
unsigned int release_recover;
/* previously removed members that we've not finished removing need to
* count as a negative change so the "neg" recovery steps will happen
@@ -569,11 +571,21 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
node = find_config_node(rv, memb->nodeid);
if (node && !node->new)
if (!node) {
log_error(ls, "remove member %d invalid",
memb->nodeid);
return -EFAULT;
}
if (!node->new && !node->gone)
continue;
if (!node) {
log_rinfo(ls, "remove member %d", memb->nodeid);
release_recover = 0;
if (node->gone) {
release_recover = node->release_recover;
log_rinfo(ls, "remove member %d%s", memb->nodeid,
release_recover ? " (release_recover)" : "");
} else {
/* removed and re-added */
log_rinfo(ls, "remove member %d comm_seq %u %u",
@@ -584,13 +596,16 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
list_move(&memb->list, &ls->ls_nodes_gone);
remove_remote_member(memb->nodeid);
ls->ls_num_nodes--;
dlm_lsop_recover_slot(ls, memb);
dlm_lsop_recover_slot(ls, memb, release_recover);
}
/* add new members to ls_nodes */
for (i = 0; i < rv->nodes_count; i++) {
node = &rv->nodes[i];
if (node->gone)
continue;
if (dlm_is_member(ls, node->nodeid))
continue;
error = dlm_add_member(ls, node);

View File

@@ -842,7 +842,7 @@ static void recover_conversion(struct dlm_rsb *r)
*/
if (((lkb->lkb_grmode == DLM_LOCK_PR) && (other_grmode == DLM_LOCK_CW)) ||
((lkb->lkb_grmode == DLM_LOCK_CW) && (other_grmode == DLM_LOCK_PR))) {
log_limit(ls, "%s %x gr %d rq %d, remote %d %x, other_lkid %u, other gr %d, set gr=NL",
log_rinfo(ls, "%s %x gr %d rq %d, remote %d %x, other_lkid %u, other gr %d, set gr=NL",
__func__, lkb->lkb_id, lkb->lkb_grmode,
lkb->lkb_rqmode, lkb->lkb_nodeid,
lkb->lkb_remid, other_lkid, other_grmode);

View File

@@ -425,7 +425,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
dlm_put_lockspace(ls);
if (error)
dlm_release_lockspace(lockspace, 0);
dlm_release_lockspace(lockspace, DLM_RELEASE_NO_LOCKS);
else
error = ls->ls_device.minor;
@@ -436,7 +436,7 @@ static int device_remove_lockspace(struct dlm_lspace_params *params)
{
dlm_lockspace_t *lockspace;
struct dlm_ls *ls;
int error, force = 0;
int error, force = DLM_RELEASE_NO_LOCKS;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -446,7 +446,7 @@ static int device_remove_lockspace(struct dlm_lspace_params *params)
return -ENOENT;
if (params->flags & DLM_USER_LSFLG_FORCEFREE)
force = 2;
force = DLM_RELEASE_NORMAL;
lockspace = ls;
dlm_put_lockspace(ls);

View File

@@ -1418,7 +1418,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
return 0;
fail_release:
dlm_release_lockspace(ls->ls_dlm, 2);
dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL);
fail_free:
free_recover_size(ls);
fail:
@@ -1456,7 +1456,7 @@ static void gdlm_unmount(struct gfs2_sbd *sdp)
release:
down_write(&ls->ls_sem);
if (ls->ls_dlm) {
dlm_release_lockspace(ls->ls_dlm, 2);
dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL);
ls->ls_dlm = NULL;
}
up_write(&ls->ls_sem);

View File

@@ -952,7 +952,7 @@ static const struct dlm_lockspace_ops ocfs2_ls_ops = {
static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
{
version_unlock(conn);
dlm_release_lockspace(conn->cc_lockspace, 2);
dlm_release_lockspace(conn->cc_lockspace, DLM_RELEASE_NORMAL);
conn->cc_lockspace = NULL;
ocfs2_live_connection_drop(conn->cc_private);
conn->cc_private = NULL;

View File

@@ -87,13 +87,44 @@ int dlm_new_lockspace(const char *name, const char *cluster,
const struct dlm_lockspace_ops *ops, void *ops_arg,
int *ops_result, dlm_lockspace_t **lockspace);
/*
* dlm_release_lockspace() release_option values:
*
* DLM_RELEASE_NO_LOCKS returns -EBUSY if any locks (lkb's)
* exist in the local lockspace.
*
* DLM_RELEASE_UNUSED previous value that is no longer used.
*
* DLM_RELEASE_NORMAL releases the lockspace regardless of any
* locks managed in the local lockspace.
*
* DLM_RELEASE_NO_EVENT release the lockspace regardless of any
* locks managed in the local lockspace, and does not submit
* a leave event to the cluster manager, so other nodes will
* not be notified that the node should be removed from the
* list of lockspace members.
*
* DLM_RELEASE_RECOVER like DLM_RELEASE_NORMAL, but the remaining
* nodes will handle the removal of the node as if the node
* had failed, e.g. the recover_slot() callback would be used.
*/
#define DLM_RELEASE_NO_LOCKS 0
#define DLM_RELEASE_UNUSED 1
#define DLM_RELEASE_NORMAL 2
#define DLM_RELEASE_NO_EVENT 3
#define DLM_RELEASE_RECOVER 4
#define __DLM_RELEASE_MAX DLM_RELEASE_RECOVER
/*
* dlm_release_lockspace
*
* Stop a lockspace.
*
* release_option: see DLM_RELEASE values above.
*/
int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force);
int dlm_release_lockspace(dlm_lockspace_t *lockspace,
unsigned int release_option);
/*
* dlm_lock