diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 006e657dfcb9..12943d19bfc4 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -499,6 +499,7 @@ static int dax_open(struct inode *inode, struct file *filp) inode->i_mapping = __dax_inode->i_mapping; inode->i_mapping->host = __dax_inode; filp->f_mapping = inode->i_mapping; + filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); filp->private_data = dev_dax; inode->i_flags = S_DAX; diff --git a/fs/block_dev.c b/fs/block_dev.c index 519599dddd36..4d62fe771587 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1743,6 +1743,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) return -ENOMEM; filp->f_mapping = bdev->bd_inode->i_mapping; + filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); return blkdev_get(bdev, filp->f_mode, filp); } diff --git a/fs/file_table.c b/fs/file_table.c index 954d510b765a..72e861a35a7f 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -168,6 +168,7 @@ struct file *alloc_file(const struct path *path, fmode_t mode, file->f_path = *path; file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; + file->f_wb_err = filemap_sample_wb_err(file->f_mapping); if ((mode & FMODE_READ) && likely(fop->read || fop->read_iter)) mode |= FMODE_CAN_READ; diff --git a/fs/open.c b/fs/open.c index cd0c5be8d012..280d4a963791 100644 --- a/fs/open.c +++ b/fs/open.c @@ -707,6 +707,9 @@ static int do_dentry_open(struct file *f, f->f_inode = inode; f->f_mapping = inode->i_mapping; + /* Ensure that we skip any errors that predate opening of the file */ + f->f_wb_err = filemap_sample_wb_err(f->f_mapping); + if (unlikely(f->f_flags & O_PATH)) { f->f_mode = FMODE_PATH; f->f_op = &empty_fops; diff --git a/include/linux/fs.h b/include/linux/fs.h index 8ac8df1b3550..78b5c2901712 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -30,7 +30,7 @@ #include #include #include - +#include #include #include @@ -392,6 +392,7 @@ struct address_space { gfp_t gfp_mask; /* implicit gfp mask for allocations */ struct list_head private_list; /* ditto */ void *private_data; /* ditto */ + errseq_t wb_err; } __attribute__((aligned(sizeof(long)))); /* * On most architectures that alignment is already the case; but @@ -868,6 +869,7 @@ struct file { struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; + errseq_t f_wb_err; } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { @@ -2526,6 +2528,62 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); extern int filemap_check_errors(struct address_space *mapping); +extern void __filemap_set_wb_err(struct address_space *mapping, int err); +extern int __must_check file_check_and_advance_wb_err(struct file *file); +extern int __must_check file_write_and_wait_range(struct file *file, + loff_t start, loff_t end); + +/** + * filemap_set_wb_err - set a writeback error on an address_space + * @mapping: mapping in which to set writeback error + * @err: error to be set in mapping + * + * When writeback fails in some way, we must record that error so that + * userspace can be informed when fsync and the like are called. We endeavor + * to report errors on any file that was open at the time of the error. Some + * internal callers also need to know when writeback errors have occurred. + * + * When a writeback error occurs, most filesystems will want to call + * filemap_set_wb_err to record the error in the mapping so that it will be + * automatically reported whenever fsync is called on the file. + * + * FIXME: mention FS_* flag here? + */ +static inline void filemap_set_wb_err(struct address_space *mapping, int err) +{ + /* Fastpath for common case of no error */ + if (unlikely(err)) + __filemap_set_wb_err(mapping, err); +} + +/** + * filemap_check_wb_error - has an error occurred since the mark was sampled? + * @mapping: mapping to check for writeback errors + * @since: previously-sampled errseq_t + * + * Grab the errseq_t value from the mapping, and see if it has changed "since" + * the given value was sampled. + * + * If it has then report the latest error set, otherwise return 0. + */ +static inline int filemap_check_wb_err(struct address_space *mapping, + errseq_t since) +{ + return errseq_check(&mapping->wb_err, since); +} + +/** + * filemap_sample_wb_err - sample the current errseq_t to test for later errors + * @mapping: mapping to be sampled + * + * Writeback errors are always reported relative to a particular sample point + * in the past. This function provides those sample points. + */ +static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) +{ + return errseq_sample(&mapping->wb_err); +} + extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h index 42febb6bc1d5..ff91325b8123 100644 --- a/include/trace/events/filemap.h +++ b/include/trace/events/filemap.h @@ -10,6 +10,7 @@ #include #include #include +#include DECLARE_EVENT_CLASS(mm_filemap_op_page_cache, @@ -52,6 +53,62 @@ DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, TP_ARGS(page) ); +TRACE_EVENT(filemap_set_wb_err, + TP_PROTO(struct address_space *mapping, errseq_t eseq), + + TP_ARGS(mapping, eseq), + + TP_STRUCT__entry( + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(errseq_t, errseq) + ), + + TP_fast_assign( + __entry->i_ino = mapping->host->i_ino; + __entry->errseq = eseq; + if (mapping->host->i_sb) + __entry->s_dev = mapping->host->i_sb->s_dev; + else + __entry->s_dev = mapping->host->i_rdev; + ), + + TP_printk("dev=%d:%d ino=0x%lx errseq=0x%x", + MAJOR(__entry->s_dev), MINOR(__entry->s_dev), + __entry->i_ino, __entry->errseq) +); + +TRACE_EVENT(file_check_and_advance_wb_err, + TP_PROTO(struct file *file, errseq_t old), + + TP_ARGS(file, old), + + TP_STRUCT__entry( + __field(struct file *, file); + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(errseq_t, old) + __field(errseq_t, new) + ), + + TP_fast_assign( + __entry->file = file; + __entry->i_ino = file->f_mapping->host->i_ino; + if (file->f_mapping->host->i_sb) + __entry->s_dev = + file->f_mapping->host->i_sb->s_dev; + else + __entry->s_dev = + file->f_mapping->host->i_rdev; + __entry->old = old; + __entry->new = file->f_wb_err; + ), + + TP_printk("file=%p dev=%d:%d ino=0x%lx old=0x%x new=0x%x", + __entry->file, MAJOR(__entry->s_dev), + MINOR(__entry->s_dev), __entry->i_ino, __entry->old, + __entry->new) +); #endif /* _TRACE_FILEMAP_H */ /* This part must be outside protection */ diff --git a/mm/filemap.c b/mm/filemap.c index eb99b5f23c61..d7a30aefee0d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -553,6 +553,90 @@ int filemap_write_and_wait_range(struct address_space *mapping, } EXPORT_SYMBOL(filemap_write_and_wait_range); +void __filemap_set_wb_err(struct address_space *mapping, int err) +{ + errseq_t eseq = __errseq_set(&mapping->wb_err, err); + + trace_filemap_set_wb_err(mapping, eseq); +} +EXPORT_SYMBOL(__filemap_set_wb_err); + +/** + * file_check_and_advance_wb_err - report wb error (if any) that was previously + * and advance wb_err to current one + * @file: struct file on which the error is being reported + * + * When userland calls fsync (or something like nfsd does the equivalent), we + * want to report any writeback errors that occurred since the last fsync (or + * since the file was opened if there haven't been any). + * + * Grab the wb_err from the mapping. If it matches what we have in the file, + * then just quickly return 0. The file is all caught up. + * + * If it doesn't match, then take the mapping value, set the "seen" flag in + * it and try to swap it into place. If it works, or another task beat us + * to it with the new value, then update the f_wb_err and return the error + * portion. The error at this point must be reported via proper channels + * (a'la fsync, or NFS COMMIT operation, etc.). + * + * While we handle mapping->wb_err with atomic operations, the f_wb_err + * value is protected by the f_lock since we must ensure that it reflects + * the latest value swapped in for this file descriptor. + */ +int file_check_and_advance_wb_err(struct file *file) +{ + int err = 0; + errseq_t old = READ_ONCE(file->f_wb_err); + struct address_space *mapping = file->f_mapping; + + /* Locklessly handle the common case where nothing has changed */ + if (errseq_check(&mapping->wb_err, old)) { + /* Something changed, must use slow path */ + spin_lock(&file->f_lock); + old = file->f_wb_err; + err = errseq_check_and_advance(&mapping->wb_err, + &file->f_wb_err); + trace_file_check_and_advance_wb_err(file, old); + spin_unlock(&file->f_lock); + } + return err; +} +EXPORT_SYMBOL(file_check_and_advance_wb_err); + +/** + * file_write_and_wait_range - write out & wait on a file range + * @file: file pointing to address_space with pages + * @lstart: offset in bytes where the range starts + * @lend: offset in bytes where the range ends (inclusive) + * + * Write out and wait upon file offsets lstart->lend, inclusive. + * + * Note that @lend is inclusive (describes the last byte to be written) so + * that this function can be used to write to the very end-of-file (end = -1). + * + * After writing out and waiting on the data, we check and advance the + * f_wb_err cursor to the latest value, and return any errors detected there. + */ +int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend) +{ + int err = 0, err2; + struct address_space *mapping = file->f_mapping; + + if ((!dax_mapping(mapping) && mapping->nrpages) || + (dax_mapping(mapping) && mapping->nrexceptional)) { + err = __filemap_fdatawrite_range(mapping, lstart, lend, + WB_SYNC_ALL); + /* See comment of filemap_write_and_wait() */ + if (err != -EIO) + __filemap_fdatawait_range(mapping, lstart, lend); + } + err2 = file_check_and_advance_wb_err(file); + if (!err) + err = err2; + return err; +} +EXPORT_SYMBOL(file_write_and_wait_range); + /** * replace_page_cache_page - replace a pagecache page with a new one * @old: page to be replaced