From 3cf6af50e0bc02b294bf974a8e653e12c40b5263 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Sun, 12 Aug 2018 18:22:03 -0400 Subject: [PATCH] Linux 4.18 compat: inode timespec -> timespec64 Commit torvalds/linux@95582b0 changes the inode i_atime, i_mtime, and i_ctime members form timespec's to timespec64's to make them 2038 safe. As part of this change the current_time() function was also updated to return the timespec64 type. Resolve this issue by introducing a new inode_timespec_t type which is defined to match the timespec type used by the inode. It should be used when working with inode timestamps to ensure matching types. The timestruc_t type under Illumos was used in a similar fashion but was specified to always be a timespec_t. Rather than incorrectly define this type all timespec_t types have been replaced by the new inode_timespec_t type. Finally, the kernel and user space 'sys/time.h' headers were aligned with each other. They define as appropriate for the context several constants as macros and include static inline implementation of gethrestime(), gethrestime_sec(), and gethrtime(). Reviewed-by: Chunwei Chen Signed-off-by: Brian Behlendorf Closes #7643 Backported-by: Richard Yao --- config/kernel-current-time.m4 | 7 +++--- include/sys/dmu.h | 2 +- include/sys/dmu_objset.h | 2 +- include/sys/dsl_dir.h | 4 ++-- include/sys/spa_impl.h | 2 +- include/sys/xvattr.h | 2 +- include/sys/zfs_context.h | 9 +------ include/sys/zfs_znode.h | 33 ++++++++++++++++++------- include/sys/zpl.h | 9 +++++++ lib/libspl/Makefile.am | 2 -- lib/libspl/gethrestime.c | 38 ----------------------------- lib/libspl/gethrtime.c | 45 ----------------------------------- lib/libspl/include/sys/time.h | 37 +++++++++++++++++++++------- lib/libzpool/kernel.c | 4 ++-- module/zfs/dmu_objset.c | 2 +- module/zfs/dsl_dir.c | 6 ++--- module/zfs/fm.c | 2 +- module/zfs/zfs_ctldir.c | 2 +- module/zfs/zfs_vnops.c | 4 ++-- module/zfs/zfs_znode.c | 4 ++-- module/zfs/zpl_inode.c | 5 ++-- 21 files changed, 88 insertions(+), 133 deletions(-) delete mode 100644 lib/libspl/gethrestime.c delete mode 100644 lib/libspl/gethrtime.c diff --git a/config/kernel-current-time.m4 b/config/kernel-current-time.m4 index 2ede9ff38c4..c7d5c9b5200 100644 --- a/config/kernel-current-time.m4 +++ b/config/kernel-current-time.m4 @@ -1,15 +1,14 @@ dnl # dnl # 4.9, current_time() added +dnl # 4.18, return type changed from timespec to timespec64 dnl # AC_DEFUN([ZFS_AC_KERNEL_CURRENT_TIME], [AC_MSG_CHECKING([whether current_time() exists]) ZFS_LINUX_TRY_COMPILE_SYMBOL([ #include ], [ - struct inode ip; - struct timespec now __attribute__ ((unused)); - - now = current_time(&ip); + struct inode ip __attribute__ ((unused)); + ip.i_atime = current_time(&ip); ], [current_time], [fs/inode.c], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_CURRENT_TIME, 1, [current_time() exists]) diff --git a/include/sys/dmu.h b/include/sys/dmu.h index bcdf7d646fb..755a90561ae 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -891,7 +891,7 @@ uint64_t dmu_objset_fsid_guid(objset_t *os); /* * Get the [cm]time for an objset's snapshot dir */ -timestruc_t dmu_objset_snap_cmtime(objset_t *os); +inode_timespec_t dmu_objset_snap_cmtime(objset_t *os); int dmu_objset_is_snapshot(objset_t *os); diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h index a836e03722c..531e81d4d88 100644 --- a/include/sys/dmu_objset.h +++ b/include/sys/dmu_objset.h @@ -179,7 +179,7 @@ int dmu_objset_find_dp(struct dsl_pool *dp, uint64_t ddobj, int func(struct dsl_pool *, struct dsl_dataset *, void *), void *arg, int flags); void dmu_objset_evict_dbufs(objset_t *os); -timestruc_t dmu_objset_snap_cmtime(objset_t *os); +inode_timespec_t dmu_objset_snap_cmtime(objset_t *os); /* called from dsl */ void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx); diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h index 69b0b6a5355..80e83fdc4d3 100644 --- a/include/sys/dsl_dir.h +++ b/include/sys/dsl_dir.h @@ -103,7 +103,7 @@ struct dsl_dir { /* Protected by dd_lock */ kmutex_t dd_lock; list_t dd_props; /* list of dsl_prop_record_t's */ - timestruc_t dd_snap_cmtime; /* last time snapshot namespace changed */ + inode_timespec_t dd_snap_cmtime; /* last snapshot namespace change */ uint64_t dd_origin_txg; /* gross estimate of space used by in-flight tx's */ @@ -159,7 +159,7 @@ boolean_t dsl_dir_is_clone(dsl_dir_t *dd); void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds, uint64_t reservation, cred_t *cr, dmu_tx_t *tx); void dsl_dir_snap_cmtime_update(dsl_dir_t *dd); -timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd); +inode_timespec_t dsl_dir_snap_cmtime(dsl_dir_t *dd); void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx); void dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index b1e78c1d592..fa7490ace67 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -153,7 +153,7 @@ struct spa { uint64_t spa_freeze_txg; /* freeze pool at this txg */ uint64_t spa_load_max_txg; /* best initial ub_txg */ uint64_t spa_claim_max_txg; /* highest claimed birth txg */ - timespec_t spa_loaded_ts; /* 1st successful open time */ + inode_timespec_t spa_loaded_ts; /* 1st successful open time */ objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */ kmutex_t spa_evicting_os_lock; /* Evicting objset list lock */ list_t spa_evicting_os_list; /* Objsets being evicted. */ diff --git a/include/sys/xvattr.h b/include/sys/xvattr.h index 4779b632163..5d38927cd4b 100644 --- a/include/sys/xvattr.h +++ b/include/sys/xvattr.h @@ -47,7 +47,7 @@ * Structure of all optional attributes. */ typedef struct xoptattr { - timestruc_t xoa_createtime; /* Create time of file */ + inode_timespec_t xoa_createtime; /* Create time of file */ uint8_t xoa_archive; uint8_t xoa_system; uint8_t xoa_readonly; diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 4fe35342dee..68c58f95531 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -527,7 +527,7 @@ extern char *vn_dumpdir; #define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ typedef struct xoptattr { - timestruc_t xoa_createtime; /* Create time of file */ + inode_timespec_t xoa_createtime; /* Create time of file */ uint8_t xoa_archive; uint8_t xoa_system; uint8_t xoa_readonly; @@ -640,13 +640,6 @@ extern void delay(clock_t ticks); #define USEC_TO_TICK(usec) ((usec) / (MICROSEC / hz)) #define NSEC_TO_TICK(usec) ((usec) / (NANOSEC / hz)) -#define gethrestime_sec() time(NULL) -#define gethrestime(t) \ - do {\ - (t)->tv_sec = gethrestime_sec();\ - (t)->tv_nsec = 0;\ - } while (0); - #define max_ncpus 64 #define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN)) diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index c292f03739e..26d1eb37557 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -270,19 +270,36 @@ typedef struct znode_hold { extern unsigned int zfs_object_mutex_size; -/* Encode ZFS stored time values from a struct timespec */ +/* + * Encode ZFS stored time values from a struct timespec / struct timespec64. + */ #define ZFS_TIME_ENCODE(tp, stmp) \ -{ \ +do { \ (stmp)[0] = (uint64_t)(tp)->tv_sec; \ (stmp)[1] = (uint64_t)(tp)->tv_nsec; \ -} +} while (0) -/* Decode ZFS stored time values to a struct timespec */ +#if defined(HAVE_INODE_TIMESPEC64_TIMES) +/* + * Decode ZFS stored time values to a struct timespec64 + * 4.18 and newer kernels. + */ #define ZFS_TIME_DECODE(tp, stmp) \ -{ \ - (tp)->tv_sec = (time_t)(stmp)[0]; \ - (tp)->tv_nsec = (long)(stmp)[1]; \ -} +do { \ + (tp)->tv_sec = (time64_t)(stmp)[0]; \ + (tp)->tv_nsec = (long)(stmp)[1]; \ +} while (0) +#else +/* + * Decode ZFS stored time values to a struct timespec + * 4.17 and older kernels. + */ +#define ZFS_TIME_DECODE(tp, stmp) \ +do { \ + (tp)->tv_sec = (time_t)(stmp)[0]; \ + (tp)->tv_nsec = (long)(stmp)[1]; \ +} while (0) +#endif /* HAVE_INODE_TIMESPEC64_TIMES */ /* * Timestamp defines diff --git a/include/sys/zpl.h b/include/sys/zpl.h index 65ed4313603..e433fbc64ea 100644 --- a/include/sys/zpl.h +++ b/include/sys/zpl.h @@ -189,4 +189,13 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx) } #endif /* HAVE_VFS_ITERATE */ +/* + * Linux 4.18, inode times converted from timespec to timespec64. + */ +#if defined(HAVE_INODE_TIMESPEC64_TIMES) +#define zpl_inode_timespec_trunc(ts, gran) timespec64_trunc(ts, gran) +#else +#define zpl_inode_timespec_trunc(ts, gran) timespec_trunc(ts, gran) +#endif + #endif /* _SYS_ZPL_H */ diff --git a/lib/libspl/Makefile.am b/lib/libspl/Makefile.am index 59bc8ffb42f..a6e63cb8868 100644 --- a/lib/libspl/Makefile.am +++ b/lib/libspl/Makefile.am @@ -19,8 +19,6 @@ noinst_LTLIBRARIES = libspl.la USER_C = \ getexecname.c \ - gethrtime.c \ - gethrestime.c \ getmntany.c \ list.c \ mkdirp.c \ diff --git a/lib/libspl/gethrestime.c b/lib/libspl/gethrestime.c deleted file mode 100644 index d37cc2d5994..00000000000 --- a/lib/libspl/gethrestime.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include -#include - -void -gethrestime(timestruc_t *ts) -{ - struct timeval tv; - - gettimeofday(&tv, NULL); - ts->tv_sec = tv.tv_sec; - ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC; -} diff --git a/lib/libspl/gethrtime.c b/lib/libspl/gethrtime.c deleted file mode 100644 index 95ceb18e119..00000000000 --- a/lib/libspl/gethrtime.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include -#include -#include -#include - -hrtime_t -gethrtime(void) -{ - struct timespec ts; - int rc; - - rc = clock_gettime(CLOCK_MONOTONIC, &ts); - if (rc) { - fprintf(stderr, "Error: clock_gettime() = %d\n", rc); - abort(); - } - - return ((((u_int64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec); -} diff --git a/lib/libspl/include/sys/time.h b/lib/libspl/include/sys/time.h index dc645fa5c37..04b3ba87bd3 100644 --- a/lib/libspl/include/sys/time.h +++ b/lib/libspl/include/sys/time.h @@ -27,8 +27,9 @@ #ifndef _LIBSPL_SYS_TIME_H #define _LIBSPL_SYS_TIME_H -#include_next +#include #include +#include_next #ifndef SEC #define SEC 1 @@ -70,13 +71,33 @@ #define SEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / SEC)) #endif - typedef long long hrtime_t; -typedef struct timespec timestruc_t; -typedef struct timespec timespec_t; - - -extern hrtime_t gethrtime(void); -extern void gethrestime(timestruc_t *); +typedef struct timespec timespec_t; +typedef struct timespec inode_timespec_t; + +static inline void +gethrestime(inode_timespec_t *ts) +{ + struct timeval tv; + (void) gettimeofday(&tv, NULL); + ts->tv_sec = tv.tv_sec; + ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC; +} + +static inline time_t +gethrestime_sec(void) +{ + struct timeval tv; + (void) gettimeofday(&tv, NULL); + return (tv.tv_sec); +} + +static inline hrtime_t +gethrtime(void) +{ + struct timespec ts; + (void) clock_gettime(CLOCK_MONOTONIC, &ts); + return ((((u_int64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec); +} #endif /* _LIBSPL_SYS_TIME_H */ diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index e67d13c9265..3ea8778b1dc 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -498,7 +498,7 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) { int error; struct timeval tv; - timestruc_t ts; + struct timespec ts; clock_t delta; ASSERT3U(cv->cv_magic, ==, CV_MAGIC); @@ -536,7 +536,7 @@ cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res, { int error; struct timeval tv; - timestruc_t ts; + struct timespec ts; hrtime_t delta; ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE); diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 3425d542f98..449ebedfa16 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -860,7 +860,7 @@ dmu_objset_evict_done(objset_t *os) kmem_free(os, sizeof (objset_t)); } -timestruc_t +inode_timespec_t dmu_objset_snap_cmtime(objset_t *os) { return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index a3ef5896a3f..deecf6bc53e 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -1975,10 +1975,10 @@ dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, return (0); } -timestruc_t +inode_timespec_t dsl_dir_snap_cmtime(dsl_dir_t *dd) { - timestruc_t t; + inode_timespec_t t; mutex_enter(&dd->dd_lock); t = dd->dd_snap_cmtime; @@ -1990,7 +1990,7 @@ dsl_dir_snap_cmtime(dsl_dir_t *dd) void dsl_dir_snap_cmtime_update(dsl_dir_t *dd) { - timestruc_t t; + inode_timespec_t t; gethrestime(&t); mutex_enter(&dd->dd_lock); diff --git a/module/zfs/fm.c b/module/zfs/fm.c index cb148149376..9d26cc99eeb 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -508,8 +508,8 @@ zfs_zevent_insert(zevent_t *ev) int zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) { + inode_timespec_t tv; int64_t tv_array[2]; - timestruc_t tv; uint64_t eid; size_t nvl_size = 0; zevent_t *ev; diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index 3b5fb196f1d..3ff2c101b67 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -451,7 +451,7 @@ static struct inode * zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, const struct file_operations *fops, const struct inode_operations *ops) { - struct timespec now; + inode_timespec_t now; struct inode *ip; znode_t *zp; diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 0d2b61a1c31..34ea751c320 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -3158,7 +3158,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr) if (mask & (ATTR_MTIME | ATTR_SIZE)) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); - ZTOI(zp)->i_mtime = timespec_trunc(vap->va_mtime, + ZTOI(zp)->i_mtime = zpl_inode_timespec_trunc(vap->va_mtime, ZTOI(zp)->i_sb->s_time_gran); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, @@ -3167,7 +3167,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr) if (mask & (ATTR_CTIME | ATTR_SIZE)) { ZFS_TIME_ENCODE(&vap->va_ctime, ctime); - ZTOI(zp)->i_ctime = timespec_trunc(vap->va_ctime, + ZTOI(zp)->i_ctime = zpl_inode_timespec_trunc(vap->va_ctime, ZTOI(zp)->i_sb->s_time_gran); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, sizeof (ctime)); diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index f508a248f0f..e222c791183 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -700,7 +700,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, uint64_t rdev = 0; zfsvfs_t *zfsvfs = ZTOZSB(dzp); dmu_buf_t *db; - timestruc_t now; + inode_timespec_t now; uint64_t gen, obj; int bonuslen; int dnodesize; @@ -1349,7 +1349,7 @@ void zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], uint64_t ctime[2]) { - timestruc_t now; + inode_timespec_t now; gethrestime(&now); diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c index 3b5643d0917..41b91cabcb9 100644 --- a/module/zfs/zpl_inode.c +++ b/module/zfs/zpl_inode.c @@ -384,9 +384,10 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia) vap->va_mtime = ia->ia_mtime; vap->va_ctime = ia->ia_ctime; - if (vap->va_mask & ATTR_ATIME) - ip->i_atime = timespec_trunc(ia->ia_atime, + if (vap->va_mask & ATTR_ATIME) { + ip->i_atime = zpl_inode_timespec_trunc(ia->ia_atime, ip->i_sb->s_time_gran); + } cookie = spl_fstrans_mark(); error = -zfs_setattr(ip, vap, 0, cr); From ecaa72315658207608c9e43df870de77af36c1ff Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 15 Jun 2018 15:05:21 -0700 Subject: [PATCH] Linux compat 4.18: check_disk_size_change() Added support for the bops->check_events() interface which was added in the 2.6.38 kernel to replace bops->media_changed(). Fully implementing this functionality allows the volume resize code to rely on revalidate_disk(), which is the preferred mechanism, and removes the need to use check_disk_size_change(). In order for bops->check_events() to lookup the zvol_state_t stored in the disk->private_data the zvol_state_lock needs to be held. Since the check events interface may poll the mutex has been converted to a rwlock for better concurrently. The rwlock need only be taken as a writer in the zvol_free() path when disk->private_data is set to NULL. The configure checks for the block_device_operations structure were consolidated in a single kernel-block-device-operations.m4 file. The ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS configure checks and assoicated dead code was removed. This interface was added to the 2.6.28 kernel which predates the oldest supported 2.6.32 kernel and will therefore always be available. Updated maximum Linux version in META file. The 4.17 kernel was released on 2018-06-03 and ZoL is compatible with the finalized kernel. Reviewed-by: Boris Protopopov Reviewed-by: Sara Hartse Signed-off-by: Brian Behlendorf Closes #7611 Backported-by: Richard Yao --- config/kernel-bdev-block-device-operations.m4 | 34 --- ...el-block-device-operations-release-void.m4 | 29 -- config/kernel-block-device-operations.m4 | 57 ++++ config/kernel.m4 | 2 +- include/linux/blkdev_compat.h | 1 + module/zfs/zvol.c | 286 ++++++++---------- 6 files changed, 192 insertions(+), 217 deletions(-) delete mode 100644 config/kernel-bdev-block-device-operations.m4 delete mode 100644 config/kernel-block-device-operations-release-void.m4 create mode 100644 config/kernel-block-device-operations.m4 diff --git a/config/kernel-bdev-block-device-operations.m4 b/config/kernel-bdev-block-device-operations.m4 deleted file mode 100644 index faacc195da9..00000000000 --- a/config/kernel-bdev-block-device-operations.m4 +++ /dev/null @@ -1,34 +0,0 @@ -dnl # -dnl # 2.6.x API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS], [ - AC_MSG_CHECKING([block device operation prototypes]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include - - int blk_open(struct block_device *bdev, fmode_t mode) - { return 0; } - int blk_ioctl(struct block_device *bdev, fmode_t mode, - unsigned x, unsigned long y) { return 0; } - int blk_compat_ioctl(struct block_device * bdev, fmode_t mode, - unsigned x, unsigned long y) { return 0; } - - static const struct block_device_operations - bops __attribute__ ((unused)) = { - .open = blk_open, - .release = NULL, - .ioctl = blk_ioctl, - .compat_ioctl = blk_compat_ioctl, - }; - ],[ - ],[ - AC_MSG_RESULT(struct block_device) - AC_DEFINE(HAVE_BDEV_BLOCK_DEVICE_OPERATIONS, 1, - [struct block_device_operations use bdevs]) - ],[ - AC_MSG_RESULT(struct inode) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-block-device-operations-release-void.m4 b/config/kernel-block-device-operations-release-void.m4 deleted file mode 100644 index a73f858722a..00000000000 --- a/config/kernel-block-device-operations-release-void.m4 +++ /dev/null @@ -1,29 +0,0 @@ -dnl # -dnl # 3.10.x API change -dnl # -AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ - AC_MSG_CHECKING([whether block_device_operations.release is void]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" - ZFS_LINUX_TRY_COMPILE([ - #include - - void blk_release(struct gendisk *g, fmode_t mode) { return; } - - static const struct block_device_operations - bops __attribute__ ((unused)) = { - .open = NULL, - .release = blk_release, - .ioctl = NULL, - .compat_ioctl = NULL, - }; - ],[ - ],[ - AC_MSG_RESULT(void) - AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1, - [struct block_device_operations.release returns void]) - ],[ - AC_MSG_RESULT(int) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-block-device-operations.m4 b/config/kernel-block-device-operations.m4 new file mode 100644 index 00000000000..5f2811c1534 --- /dev/null +++ b/config/kernel-block-device-operations.m4 @@ -0,0 +1,57 @@ +dnl # +dnl # 2.6.38 API change +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ + AC_MSG_CHECKING([whether bops->check_events() exists]) + tmp_flags="$EXTRA_KCFLAGS" + EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" + ZFS_LINUX_TRY_COMPILE([ + #include + + unsigned int blk_check_events(struct gendisk *disk, + unsigned int clearing) { return (0); } + + static const struct block_device_operations + bops __attribute__ ((unused)) = { + .check_events = blk_check_events, + }; + ],[ + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS, 1, + [bops->check_events() exists]) + ],[ + AC_MSG_RESULT(no) + ]) + EXTRA_KCFLAGS="$tmp_flags" +]) + +dnl # +dnl # 3.10.x API change +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ + AC_MSG_CHECKING([whether bops->release() is void]) + tmp_flags="$EXTRA_KCFLAGS" + EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" + ZFS_LINUX_TRY_COMPILE([ + #include + + void blk_release(struct gendisk *g, fmode_t mode) { return; } + + static const struct block_device_operations + bops __attribute__ ((unused)) = { + .open = NULL, + .release = blk_release, + .ioctl = NULL, + .compat_ioctl = NULL, + }; + ],[ + ],[ + AC_MSG_RESULT(void) + AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1, + [bops->release() returns void]) + ],[ + AC_MSG_RESULT(int) + ]) + EXTRA_KCFLAGS="$tmp_flags" +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 375e4b79ae6..c7ca260c5f0 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -12,7 +12,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_CURRENT_BIO_TAIL ZFS_AC_KERNEL_SUPER_USER_NS ZFS_AC_KERNEL_SUBMIT_BIO - ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS + ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID ZFS_AC_KERNEL_TYPE_FMODE_T ZFS_AC_KERNEL_3ARG_BLKDEV_GET diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h index f99980ab3e0..27f05662662 100644 --- a/include/linux/blkdev_compat.h +++ b/include/linux/blkdev_compat.h @@ -32,6 +32,7 @@ #include #include #include +#include /* for SECTOR_* */ #ifndef HAVE_FMODE_T typedef unsigned __bitwise__ fmode_t; diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 3e7059b340b..e57d2e70332 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -99,7 +99,7 @@ unsigned long zvol_max_discard_blocks = 16384; unsigned int zvol_volmode = ZFS_VOLMODE_GEOM; static taskq_t *zvol_taskq; -static kmutex_t zvol_state_lock; +static krwlock_t zvol_state_lock; static list_t zvol_state_list; #define ZVOL_HT_SIZE 1024 @@ -176,17 +176,17 @@ zvol_find_by_dev(dev_t dev) { zvol_state_t *zv; - mutex_enter(&zvol_state_lock); + rw_enter(&zvol_state_lock, RW_READER); for (zv = list_head(&zvol_state_list); zv != NULL; zv = list_next(&zvol_state_list, zv)) { mutex_enter(&zv->zv_state_lock); if (zv->zv_dev == dev) { - mutex_exit(&zvol_state_lock); + rw_exit(&zvol_state_lock); return (zv); } mutex_exit(&zv->zv_state_lock); } - mutex_exit(&zvol_state_lock); + rw_exit(&zvol_state_lock); return (NULL); } @@ -204,7 +204,7 @@ zvol_find_by_name_hash(const char *name, uint64_t hash, int mode) zvol_state_t *zv; struct hlist_node *p = NULL; - mutex_enter(&zvol_state_lock); + rw_enter(&zvol_state_lock, RW_READER); hlist_for_each(p, ZVOL_HT_HEAD(hash)) { zv = hlist_entry(p, zvol_state_t, zv_hlink); mutex_enter(&zv->zv_state_lock); @@ -227,12 +227,12 @@ zvol_find_by_name_hash(const char *name, uint64_t hash, int mode) strncmp(zv->zv_name, name, MAXNAMELEN) == 0); } - mutex_exit(&zvol_state_lock); + rw_exit(&zvol_state_lock); return (zv); } mutex_exit(&zv->zv_state_lock); } - mutex_exit(&zvol_state_lock); + rw_exit(&zvol_state_lock); return (NULL); } @@ -339,24 +339,6 @@ zvol_get_stats(objset_t *os, nvlist_t *nv) return (SET_ERROR(error)); } -static void -zvol_size_changed(zvol_state_t *zv, uint64_t volsize) -{ - struct block_device *bdev; - - ASSERT(MUTEX_HELD(&zv->zv_state_lock)); - - bdev = bdget_disk(zv->zv_disk, 0); - if (bdev == NULL) - return; - - set_capacity(zv->zv_disk, volsize >> 9); - zv->zv_volsize = volsize; - check_disk_size_change(zv->zv_disk, bdev); - - bdput(bdev); -} - /* * Sanity check volume size. */ @@ -409,31 +391,17 @@ zvol_update_volsize(uint64_t volsize, objset_t *os) return (error); } -static int -zvol_update_live_volsize(zvol_state_t *zv, uint64_t volsize) -{ - zvol_size_changed(zv, volsize); - - /* - * We should post a event here describing the expansion. However, - * the zfs_ereport_post() interface doesn't nicely support posting - * events for zvols, it assumes events relate to vdevs or zios. - */ - - return (0); -} - /* - * Set ZFS_PROP_VOLSIZE set entry point. + * Set ZFS_PROP_VOLSIZE set entry point. Note that modifying the volume + * size will result in a udev "change" event being generated. */ int zvol_set_volsize(const char *name, uint64_t volsize) { - zvol_state_t *zv = NULL; objset_t *os = NULL; - int error; - dmu_object_info_t *doi; + struct gendisk *disk = NULL; uint64_t readonly; + int error; boolean_t owned = B_FALSE; error = dsl_prop_get_integer(name, @@ -443,7 +411,7 @@ zvol_set_volsize(const char *name, uint64_t volsize) if (readonly) return (SET_ERROR(EROFS)); - zv = zvol_find_by_name(name, RW_READER); + zvol_state_t *zv = zvol_find_by_name(name, RW_READER); ASSERT(zv == NULL || (MUTEX_HELD(&zv->zv_state_lock) && RW_READ_HELD(&zv->zv_suspend_lock))); @@ -464,16 +432,18 @@ zvol_set_volsize(const char *name, uint64_t volsize) os = zv->zv_objset; } - doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP); + dmu_object_info_t *doi = kmem_alloc(sizeof (*doi), KM_SLEEP); if ((error = dmu_object_info(os, ZVOL_OBJ, doi)) || (error = zvol_check_volsize(volsize, doi->doi_data_block_size))) goto out; error = zvol_update_volsize(volsize, os); - - if (error == 0 && zv != NULL) - error = zvol_update_live_volsize(zv, volsize); + if (error == 0 && zv != NULL) { + zv->zv_volsize = volsize; + zv->zv_changed = 1; + disk = zv->zv_disk; + } out: kmem_free(doi, sizeof (dmu_object_info_t)); @@ -488,6 +458,9 @@ zvol_set_volsize(const char *name, uint64_t volsize) if (zv != NULL) mutex_exit(&zv->zv_state_lock); + if (disk != NULL) + revalidate_disk(disk); + return (SET_ERROR(error)); } @@ -543,8 +516,8 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize) if (zv == NULL) return (SET_ERROR(ENXIO)); - ASSERT(MUTEX_HELD(&zv->zv_state_lock) && - RW_READ_HELD(&zv->zv_suspend_lock)); + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(RW_READ_HELD(&zv->zv_suspend_lock)); if (zv->zv_flags & ZVOL_RDONLY) { mutex_exit(&zv->zv_state_lock); @@ -1120,7 +1093,7 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) static void zvol_insert(zvol_state_t *zv) { - ASSERT(MUTEX_HELD(&zvol_state_lock)); + ASSERT(RW_WRITE_HELD(&zvol_state_lock)); ASSERT3U(MINOR(zv->zv_dev) & ZVOL_MINOR_MASK, ==, 0); list_insert_head(&zvol_state_list, zv); hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash)); @@ -1132,7 +1105,7 @@ zvol_insert(zvol_state_t *zv) static void zvol_remove(zvol_state_t *zv) { - ASSERT(MUTEX_HELD(&zvol_state_lock)); + ASSERT(RW_WRITE_HELD(&zvol_state_lock)); list_remove(&zvol_state_list, zv); hlist_del(&zv->zv_hlink); } @@ -1148,8 +1121,8 @@ zvol_setup_zv(zvol_state_t *zv) uint64_t ro; objset_t *os = zv->zv_objset; - ASSERT(MUTEX_HELD(&zv->zv_state_lock) && - RW_LOCK_HELD(&zv->zv_suspend_lock)); + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(RW_LOCK_HELD(&zv->zv_suspend_lock)); error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL); if (error) @@ -1227,8 +1200,8 @@ zvol_suspend(const char *name) return (NULL); /* block all I/O, release in zvol_resume. */ - ASSERT(MUTEX_HELD(&zv->zv_state_lock) && - RW_WRITE_HELD(&zv->zv_suspend_lock)); + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock)); atomic_inc(&zv->zv_suspend_ref); @@ -1349,9 +1322,7 @@ zvol_open(struct block_device *bdev, fmode_t flag) int error = 0; boolean_t drop_suspend = B_FALSE; - ASSERT(!mutex_owned(&zvol_state_lock)); - - mutex_enter(&zvol_state_lock); + rw_enter(&zvol_state_lock, RW_READER); /* * Obtain a copy of private_data under the zvol_state_lock to make * sure that either the result of zvol free code path setting @@ -1360,7 +1331,7 @@ zvol_open(struct block_device *bdev, fmode_t flag) */ zv = bdev->bd_disk->private_data; if (zv == NULL) { - mutex_exit(&zvol_state_lock); + rw_exit(&zvol_state_lock); return (SET_ERROR(-ENXIO)); } @@ -1378,8 +1349,7 @@ zvol_open(struct block_device *bdev, fmode_t flag) } else { rw_exit(&zv->zv_suspend_lock); } - - mutex_exit(&zvol_state_lock); + rw_exit(&zvol_state_lock); if (zv->zv_open_count == 0) { error = zvol_first_open(zv); @@ -1394,11 +1364,18 @@ zvol_open(struct block_device *bdev, fmode_t flag) zv->zv_open_count++; + mutex_exit(&zv->zv_state_lock); + if (drop_suspend) + rw_exit(&zv->zv_suspend_lock); + check_disk_change(bdev); + return (0); + out_open_count: if (zv->zv_open_count == 0) zvol_last_close(zv); + out_mutex: mutex_exit(&zv->zv_state_lock); if (drop_suspend) @@ -1419,26 +1396,34 @@ zvol_release(struct gendisk *disk, fmode_t mode) zvol_state_t *zv; boolean_t drop_suspend = B_FALSE; - ASSERT(!mutex_owned(&zvol_state_lock)); - - mutex_enter(&zvol_state_lock); + rw_enter(&zvol_state_lock, RW_READER); zv = disk->private_data; - ASSERT(zv && zv->zv_open_count > 0); - - /* take zv_suspend_lock before zv_state_lock */ - rw_enter(&zv->zv_suspend_lock, RW_READER); mutex_enter(&zv->zv_state_lock); - mutex_exit(&zvol_state_lock); + ASSERT(zv && zv->zv_open_count > 0); /* * make sure zvol is not suspended during last close * (hold zv_suspend_lock), otherwise, drop the lock */ - if (zv->zv_open_count == 1) - drop_suspend = B_TRUE; - else - rw_exit(&zv->zv_suspend_lock); + if (zv->zv_open_count == 1) { + if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) { + mutex_exit(&zv->zv_state_lock); + rw_enter(&zv->zv_suspend_lock, RW_READER); + mutex_enter(&zv->zv_state_lock); + /* check to see if zv_suspend_lock is needed */ + if (zv->zv_open_count != 1) { + rw_exit(&zv->zv_suspend_lock); + drop_suspend = B_FALSE; + } + } + } else { + drop_suspend = B_FALSE; + } + rw_exit(&zvol_state_lock); + + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(zv->zv_open_count != 1 || RW_READ_HELD(&zv->zv_suspend_lock)); zv->zv_open_count--; if (zv->zv_open_count == 0) @@ -1461,7 +1446,7 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode, zvol_state_t *zv = bdev->bd_disk->private_data; int error = 0; - ASSERT(zv && zv->zv_open_count > 0); + ASSERT3U(zv->zv_open_count, >, 0); switch (cmd) { case BLKFLSBUF: @@ -1501,23 +1486,62 @@ zvol_compat_ioctl(struct block_device *bdev, fmode_t mode, #define zvol_compat_ioctl NULL #endif +/* + * Linux 2.6.38 preferred interface. + */ +#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS +static unsigned int +zvol_check_events(struct gendisk *disk, unsigned int clearing) +{ + unsigned int mask = 0; + + rw_enter(&zvol_state_lock, RW_READER); + + zvol_state_t *zv = disk->private_data; + if (zv != NULL) { + mutex_enter(&zv->zv_state_lock); + mask = zv->zv_changed ? DISK_EVENT_MEDIA_CHANGE : 0; + zv->zv_changed = 0; + mutex_exit(&zv->zv_state_lock); + } + + rw_exit(&zvol_state_lock); + + return (mask); +} +#else static int zvol_media_changed(struct gendisk *disk) { + int changed = 0; + + rw_enter(&zvol_state_lock, RW_READER); + zvol_state_t *zv = disk->private_data; + if (zv != NULL) { + mutex_enter(&zv->zv_state_lock); + changed = zv->zv_changed; + zv->zv_changed = 0; + mutex_exit(&zv->zv_state_lock); + } - ASSERT(zv && zv->zv_open_count > 0); + rw_exit(&zvol_state_lock); - return (zv->zv_changed); + return (changed); } +#endif static int zvol_revalidate_disk(struct gendisk *disk) { - zvol_state_t *zv = disk->private_data; + rw_enter(&zvol_state_lock, RW_READER); - ASSERT(zv && zv->zv_open_count > 0); + zvol_state_t *zv = disk->private_data; + if (zv != NULL) { + mutex_enter(&zv->zv_state_lock); + set_capacity(zv->zv_disk, zv->zv_volsize >> SECTOR_BITS); + mutex_exit(&zv->zv_state_lock); + } - zv->zv_changed = 0; - set_capacity(zv->zv_disk, zv->zv_volsize >> 9); + rw_exit(&zvol_state_lock); return (0); } @@ -1534,7 +1558,7 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) zvol_state_t *zv = bdev->bd_disk->private_data; sector_t sectors; - ASSERT(zv && zv->zv_open_count > 0); + ASSERT3U(zv->zv_open_count, >, 0); sectors = get_capacity(zv->zv_disk); @@ -1567,68 +1591,20 @@ zvol_probe(dev_t dev, int *part, void *arg) return (kobj); } -#ifdef HAVE_BDEV_BLOCK_DEVICE_OPERATIONS static struct block_device_operations zvol_ops = { .open = zvol_open, .release = zvol_release, .ioctl = zvol_ioctl, .compat_ioctl = zvol_compat_ioctl, - .media_changed = zvol_media_changed, - .revalidate_disk = zvol_revalidate_disk, - .getgeo = zvol_getgeo, - .owner = THIS_MODULE, -}; - -#else /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */ - -static int -zvol_open_by_inode(struct inode *inode, struct file *file) -{ - return (zvol_open(inode->i_bdev, file->f_mode)); -} - -static int -zvol_release_by_inode(struct inode *inode, struct file *file) -{ - return (zvol_release(inode->i_bdev->bd_disk, file->f_mode)); -} - -static int -zvol_ioctl_by_inode(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - if (file == NULL || inode == NULL) - return (SET_ERROR(-EINVAL)); - - return (zvol_ioctl(inode->i_bdev, file->f_mode, cmd, arg)); -} - -#ifdef CONFIG_COMPAT -static long -zvol_compat_ioctl_by_inode(struct file *file, - unsigned int cmd, unsigned long arg) -{ - if (file == NULL) - return (SET_ERROR(-EINVAL)); - - return (zvol_compat_ioctl(file->f_dentry->d_inode->i_bdev, - file->f_mode, cmd, arg)); -} +#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS + .check_events = zvol_check_events, #else -#define zvol_compat_ioctl_by_inode NULL -#endif - -static struct block_device_operations zvol_ops = { - .open = zvol_open_by_inode, - .release = zvol_release_by_inode, - .ioctl = zvol_ioctl_by_inode, - .compat_ioctl = zvol_compat_ioctl_by_inode, .media_changed = zvol_media_changed, +#endif .revalidate_disk = zvol_revalidate_disk, .getgeo = zvol_getgeo, .owner = THIS_MODULE, }; -#endif /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */ /* * Allocate memory for a new zvol_state_t and setup the required @@ -1681,6 +1657,10 @@ zvol_alloc(dev_t dev, const char *name) rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL); zv->zv_disk->major = zvol_major; +#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS + zv->zv_disk->events = DISK_EVENT_MEDIA_CHANGE; +#endif + if (volmode == ZFS_VOLMODE_DEV) { /* * ZFS_VOLMODE_DEV disable partitioning on ZVOL devices: set @@ -1725,7 +1705,6 @@ zvol_free(void *arg) { zvol_state_t *zv = arg; - ASSERT(!MUTEX_HELD(&zvol_state_lock)); ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); ASSERT(zv->zv_open_count == 0); @@ -1852,9 +1831,9 @@ zvol_create_minor_impl(const char *name) kmem_free(doi, sizeof (dmu_object_info_t)); if (error == 0) { - mutex_enter(&zvol_state_lock); + rw_enter(&zvol_state_lock, RW_WRITER); zvol_insert(zv); - mutex_exit(&zvol_state_lock); + rw_exit(&zvol_state_lock); add_disk(zv->zv_disk); } else { ida_simple_remove(&zvol_ida, idx); @@ -1871,7 +1850,7 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname) { int readonly = get_disk_ro(zv->zv_disk); - ASSERT(MUTEX_HELD(&zvol_state_lock)); + ASSERT(RW_LOCK_HELD(&zvol_state_lock)); ASSERT(MUTEX_HELD(&zv->zv_state_lock)); strlcpy(zv->zv_name, newname, sizeof (zv->zv_name)); @@ -2111,7 +2090,7 @@ zvol_remove_minors_impl(const char *name) list_create(&free_list, sizeof (zvol_state_t), offsetof(zvol_state_t, zv_next)); - mutex_enter(&zvol_state_lock); + rw_enter(&zvol_state_lock, RW_WRITER); for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { zv_next = list_next(&zvol_state_list, zv); @@ -2136,15 +2115,15 @@ zvol_remove_minors_impl(const char *name) zvol_remove(zv); /* - * clear this while holding zvol_state_lock so - * zvol_open won't open it + * Cleared while holding zvol_state_lock as a writer + * which will prevent zvol_open() from opening it. */ zv->zv_disk->private_data = NULL; /* Drop zv_state_lock before zvol_free() */ mutex_exit(&zv->zv_state_lock); - /* try parallel zv_free, if failed do it in place */ + /* Try parallel zv_free, if failed do it in place */ t = taskq_dispatch(system_taskq, zvol_free, zv, TQ_SLEEP); if (t == TASKQID_INVALID) @@ -2155,11 +2134,9 @@ zvol_remove_minors_impl(const char *name) mutex_exit(&zv->zv_state_lock); } } - mutex_exit(&zvol_state_lock); + rw_exit(&zvol_state_lock); - /* - * Drop zvol_state_lock before calling zvol_free() - */ + /* Drop zvol_state_lock before calling zvol_free() */ while ((zv = list_head(&free_list)) != NULL) { list_remove(&free_list, zv); zvol_free(zv); @@ -2178,7 +2155,7 @@ zvol_remove_minor_impl(const char *name) if (zvol_inhibit_dev) return; - mutex_enter(&zvol_state_lock); + rw_enter(&zvol_state_lock, RW_WRITER); for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { zv_next = list_next(&zvol_state_list, zv); @@ -2198,7 +2175,10 @@ zvol_remove_minor_impl(const char *name) } zvol_remove(zv); - /* clear this so zvol_open won't open it */ + /* + * Cleared while holding zvol_state_lock as a writer + * which will prevent zvol_open() from opening it. + */ zv->zv_disk->private_data = NULL; mutex_exit(&zv->zv_state_lock); @@ -2209,7 +2189,7 @@ zvol_remove_minor_impl(const char *name) } /* Drop zvol_state_lock before calling zvol_free() */ - mutex_exit(&zvol_state_lock); + rw_exit(&zvol_state_lock); if (zv != NULL) zvol_free(zv); @@ -2230,7 +2210,7 @@ zvol_rename_minors_impl(const char *oldname, const char *newname) oldnamelen = strlen(oldname); newnamelen = strlen(newname); - mutex_enter(&zvol_state_lock); + rw_enter(&zvol_state_lock, RW_READER); for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { zv_next = list_next(&zvol_state_list, zv); @@ -2258,7 +2238,7 @@ zvol_rename_minors_impl(const char *oldname, const char *newname) mutex_exit(&zv->zv_state_lock); } - mutex_exit(&zvol_state_lock); + rw_exit(&zvol_state_lock); } typedef struct zvol_snapdev_cb_arg { @@ -2635,7 +2615,7 @@ zvol_init(void) list_create(&zvol_state_list, sizeof (zvol_state_t), offsetof(zvol_state_t, zv_next)); - mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&zvol_state_lock, NULL, RW_DEFAULT, NULL); ida_init(&zvol_ida); zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri, @@ -2672,7 +2652,7 @@ zvol_init(void) taskq_destroy(zvol_taskq); out: ida_destroy(&zvol_ida); - mutex_destroy(&zvol_state_lock); + rw_destroy(&zvol_state_lock); list_destroy(&zvol_state_list); return (SET_ERROR(error)); @@ -2689,7 +2669,7 @@ zvol_fini(void) taskq_destroy(zvol_taskq); list_destroy(&zvol_state_list); - mutex_destroy(&zvol_state_lock); + rw_destroy(&zvol_state_lock); ida_destroy(&zvol_ida); }