Discussion:
[PATCH 0/2] xfs log recovery delay instrumentation
Brian Foster
2014-08-20 16:55:53 UTC
Permalink
Hi all,

Here's a couple patches to add the log recovery delay instrumentation
required for reproducing the log recovery buf race problem uncovered by
Alex:

http://oss.sgi.com/archives/xfs/2014-08/msg00155.html

It looked easier to dump this tunable in /proc, but it seems like we
want to move away from adding more things there. This series defines a
generic sys/fs/xfs/debug location to serve a similar purpose and exports
the log recovery delay tunable therein.

The original problem is now easily reproduced with a 10s or so log
recovery delay and the xfstests test I posted the other day:

http://oss.sgi.com/archives/xfs/2014-08/msg00261.html

One thing that comes to mind as I write this is whether it might be a
good idea to only export this debug subdirectory for DEBUG enabled
kernels. Thoughts? Any other thoughts, reviews or flames are appreciated
as well.

Brian

Brian Foster (2):
xfs: add debug sysfs attribute set
xfs: export log_recovery_delay to delay mount time log recovery

fs/xfs/xfs_globals.c | 4 +++
fs/xfs/xfs_log_recover.c | 12 ++++++++
fs/xfs/xfs_super.c | 15 ++++++++--
fs/xfs/xfs_sysctl.h | 5 ++++
fs/xfs/xfs_sysfs.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_sysfs.h | 1 +
6 files changed, 106 insertions(+), 2 deletions(-)
--
1.8.3.1
Brian Foster
2014-08-20 16:55:54 UTC
Permalink
Create a top-level debug directory for global debug sysfs attributes.
This directory is added and removed on XFS module initialization and
removal respectively. It typically resides at /sys/fs/xfs/debug. It is
located at the top level of the xfs sysfs hierarchy as attributes might
define global behavior or behavior that must be configured before an xfs
mount is available (e.g., log recovery behavior).

Define the global debug kobject that represents the debug sysfs
directory and add generic attribute show/store helpers to support future
attributes. No debug attributes are exported as of yet.

Signed-off-by: Brian Foster <***@redhat.com>
---
fs/xfs/xfs_super.c | 15 +++++++++++++--
fs/xfs/xfs_sysfs.c | 40 ++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_sysfs.h | 1 +
3 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index b194652..9b5eb02 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -47,6 +47,7 @@
#include "xfs_dinode.h"
#include "xfs_filestream.h"
#include "xfs_quota.h"
+#include "xfs_sysfs.h"

#include <linux/namei.h>
#include <linux/init.h>
@@ -61,7 +62,9 @@
static const struct super_operations xfs_super_operations;
static kmem_zone_t *xfs_ioend_zone;
mempool_t *xfs_ioend_pool;
-struct kset *xfs_kset;
+
+struct kset *xfs_kset; /* top-level xfs sysfs dir */
+static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */

#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
@@ -1768,10 +1771,15 @@ init_xfs_fs(void)
goto out_sysctl_unregister;;
}

- error = xfs_qm_init();
+ xfs_dbg_kobj.kobject.kset = xfs_kset;
+ error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
if (error)
goto out_kset_unregister;

+ error = xfs_qm_init();
+ if (error)
+ goto out_remove_kobj;
+
error = register_filesystem(&xfs_fs_type);
if (error)
goto out_qm_exit;
@@ -1779,6 +1787,8 @@ init_xfs_fs(void)

out_qm_exit:
xfs_qm_exit();
+ out_remove_kobj:
+ xfs_sysfs_del(&xfs_dbg_kobj);
out_kset_unregister:
kset_unregister(xfs_kset);
out_sysctl_unregister:
@@ -1802,6 +1812,7 @@ exit_xfs_fs(void)
{
xfs_qm_exit();
unregister_filesystem(&xfs_fs_type);
+ xfs_sysfs_del(&xfs_dbg_kobj);
kset_unregister(xfs_kset);
xfs_sysctl_unregister();
xfs_cleanup_procfs();
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 9835139..dd31511 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -51,6 +51,46 @@ struct kobj_type xfs_mp_ktype = {
.release = xfs_sysfs_release,
};

+/* debug */
+
+static struct attribute *xfs_dbg_attrs[] = {
+ NULL,
+};
+
+STATIC ssize_t
+xfs_dbg_show(
+ struct kobject *kobject,
+ struct attribute *attr,
+ char *buf)
+{
+ struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+ return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0;
+}
+
+STATIC ssize_t
+xfs_dbg_store(
+ struct kobject *kobject,
+ struct attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+
+ return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0;
+}
+
+static struct sysfs_ops xfs_dbg_ops = {
+ .show = xfs_dbg_show,
+ .store = xfs_dbg_store,
+};
+
+struct kobj_type xfs_dbg_ktype = {
+ .release = xfs_sysfs_release,
+ .sysfs_ops = &xfs_dbg_ops,
+ .default_attrs = xfs_dbg_attrs,
+};
+
/* xlog */

STATIC ssize_t
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index 54a2091..240eee3 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -20,6 +20,7 @@
#define __XFS_SYSFS_H__

extern struct kobj_type xfs_mp_ktype; /* xfs_mount */
+extern struct kobj_type xfs_dbg_ktype; /* debug */
extern struct kobj_type xfs_log_ktype; /* xlog */

static inline struct xfs_kobj *
--
1.8.3.1
Brian Foster
2014-08-20 16:55:55 UTC
Permalink
XFS log recovery has been discovered to have race conditions with
buffers when I/O errors occur. External tools are available to simulate
I/O errors to XFS, but this alone is not sufficient for testing log
recovery. XFS unconditionally resets the inactive region of the log
prior to log recovery to avoid confusion over processing any partially
written log records that might have been written before an unclean
shutdown. Therefore, unconditional write I/O failures at mount time are
caught by the reset sequence rather than log recovery and hinder the
ability to test the latter.

The device-mapper dm-flakey module uses an up/down timer to define a
cycle for when to fail I/Os. Create a pre log recovery delay tunable
that can be used to coordinate XFS log recovery with I/O errors
simulated by dm-flakey. This facilitates coordination in userspace that
allows the reset of stale log blocks to succeed and writes due to log
recovery to fail. For example, define a dm-flakey instance with an
uptime long enough to allow log reset to succeed and a log recovery
delay long enough to allow the dm-flakey uptime to expire.

The 'log_recovery_delay' sysfs tunable is exported under
/sys/fs/xfs/debug. The value is exported in units of seconds and allows
for a delay of up to 60 seconds. Note that this is for XFS debug and
test instrumentation purposes only and should not be used by
applications. No delay is enabled by default.

Signed-off-by: Brian Foster <***@redhat.com>
---
fs/xfs/xfs_globals.c | 4 ++++
fs/xfs/xfs_log_recover.c | 12 ++++++++++++
fs/xfs/xfs_sysctl.h | 5 +++++
fs/xfs/xfs_sysfs.c | 31 +++++++++++++++++++++++++++++++
4 files changed, 52 insertions(+)

diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 5399ef2..4d41b24 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -43,3 +43,7 @@ xfs_param_t xfs_params = {
.fstrm_timer = { 1, 30*100, 3600*100},
.eofb_timer = { 1, 300, 3600*24},
};
+
+struct xfs_globals xfs_globals = {
+ .log_recovery_delay = 0, /* no delay by default */
+};
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 1fd5787..176c4b3 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4509,6 +4509,18 @@ xlog_recover(
return -EINVAL;
}

+ /*
+ * Delay log recovery if the debug hook is set. This is debug
+ * instrumention to coordinate simulation of I/O failures with
+ * log recovery.
+ */
+ if (xfs_globals.log_recovery_delay) {
+ xfs_notice(log->l_mp,
+ "Delaying log recovery for %d seconds.",
+ xfs_globals.log_recovery_delay);
+ msleep(xfs_globals.log_recovery_delay * 1000);
+ }
+
xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
log->l_mp->m_logname ? log->l_mp->m_logname
: "internal");
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index bd8e157..ffef453 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -92,6 +92,11 @@ enum {

extern xfs_param_t xfs_params;

+struct xfs_globals {
+ int log_recovery_delay; /* log recovery delay (secs) */
+};
+extern struct xfs_globals xfs_globals;
+
#ifdef CONFIG_SYSCTL
extern int xfs_sysctl_register(void);
extern void xfs_sysctl_unregister(void);
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index dd31511..f78ab31 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -53,7 +53,38 @@ struct kobj_type xfs_mp_ktype = {

/* debug */

+STATIC ssize_t
+log_recovery_delay_store(
+ const char *buf,
+ size_t count,
+ void *data)
+{
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val < 0 || val > 60)
+ return -EINVAL;
+
+ xfs_globals.log_recovery_delay = val;
+
+ return count;
+}
+
+STATIC ssize_t
+log_recovery_delay_show(
+ char *buf,
+ void *data)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay);
+}
+XFS_SYSFS_ATTR_RW(log_recovery_delay);
+
static struct attribute *xfs_dbg_attrs[] = {
+ ATTR_LIST(log_recovery_delay),
NULL,
};
--
1.8.3.1
Loading...