mm/swapfile.c: move inode_lock out of claim_swapfile

claim_swapfile() currently keeps the inode locked when it is successful,
or the file is already swapfile (with -ebusy).  and, on the other error
cases, it does not lock the inode.

this inconsistency of the lock state and return value is quite confusing
and actually causing a bad unlock balance as below in the "bad_swap"
section of __do_sys_swapon().

this commit fixes this issue by moving the inode_lock() and is_swapfile
check out of claim_swapfile().  the inode is unlocked in
"bad_swap_unlock_inode" section, so that the inode is ensured to be
unlocked at "bad_swap".  thus, error handling codes after the locking now
jumps to "bad_swap_unlock_inode" instead of "bad_swap".

    =====================================
    warning: bad unlock balance detected!
    5.5.0-rc7+ #176 not tainted
    -------------------------------------
    swapon/4294 is trying to release lock (&sb->s_type->i_mutex_key) at:
    [<ffffffff8173a6eb>] __do_sys_swapon+0x94b/0x3550
    but there are no more locks to release!

    other info that might help us debug this:
    no locks held by swapon/4294.

    stack backtrace:
    cpu: 5 pid: 4294 comm: swapon not tainted 5.5.0-rc7-btrfs-zns+ #176
    hardware name: asus all series/h87-pro, bios 2102 07/29/2014
    call trace:
     dump_stack+0xa1/0xea
     ? __do_sys_swapon+0x94b/0x3550
     print_unlock_imbalance_bug.cold+0x114/0x123
     ? __do_sys_swapon+0x94b/0x3550
     lock_release+0x562/0xed0
     ? kvfree+0x31/0x40
     ? lock_downgrade+0x770/0x770
     ? kvfree+0x31/0x40
     ? rcu_read_lock_sched_held+0xa1/0xd0
     ? rcu_read_lock_bh_held+0xb0/0xb0
     up_write+0x2d/0x490
     ? kfree+0x293/0x2f0
     __do_sys_swapon+0x94b/0x3550
     ? putname+0xb0/0xf0
     ? kmem_cache_free+0x2e7/0x370
     ? do_sys_open+0x184/0x3e0
     ? generic_max_swapfile_size+0x40/0x40
     ? do_syscall_64+0x27/0x4b0
     ? entry_syscall_64_after_hwframe+0x49/0xbe
     ? lockdep_hardirqs_on+0x38c/0x590
     __x64_sys_swapon+0x54/0x80
     do_syscall_64+0xa4/0x4b0
     entry_syscall_64_after_hwframe+0x49/0xbe
    rip: 0033:0x7f15da0a0dc7

link: http://lkml.kernel.org/r/20200206090132.154869-1-naohiro.aota@wdc.com
fixes: 1638045c3677 ("mm: set s_swapfile on blockdev swap devices")
signed-off-by: naohiro aota <naohiro.aota@wdc.com>
reviewed-by: andrew morton <akpm@linux-foundation.org>
reviewed-by: darrick j. wong <darrick.wong@oracle.com>
tested-by: qais youef <qais.yousef@arm.com>
cc: christoph hellwig <hch@infradead.org>
cc: <stable@vger.kernel.org>
tirimbino
Naohiro Aota 5 years ago committed by Jaegeuk Kim
parent 53f2446e64
commit e771685bad
  1. 40
      mm/swapfile.c

@ -2896,10 +2896,6 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
p->bdev = inode->i_sb->s_bdev; p->bdev = inode->i_sb->s_bdev;
} }
inode_lock(inode);
if (IS_SWAPFILE(inode))
return -EBUSY;
return 0; return 0;
} }
@ -3135,36 +3131,40 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
mapping = swap_file->f_mapping; mapping = swap_file->f_mapping;
inode = mapping->host; inode = mapping->host;
/* If S_ISREG(inode->i_mode) will do inode_lock(inode); */
error = claim_swapfile(p, inode); error = claim_swapfile(p, inode);
if (unlikely(error)) if (unlikely(error))
goto bad_swap; goto bad_swap;
inode_lock(inode);
if (IS_SWAPFILE(inode)) {
error = -EBUSY;
goto bad_swap_unlock_inode;
}
/* /*
* Read the swap header. * Read the swap header.
*/ */
if (!mapping->a_ops->readpage) { if (!mapping->a_ops->readpage) {
error = -EINVAL; error = -EINVAL;
goto bad_swap; goto bad_swap_unlock_inode;
} }
page = read_mapping_page(mapping, 0, swap_file); page = read_mapping_page(mapping, 0, swap_file);
if (IS_ERR(page)) { if (IS_ERR(page)) {
error = PTR_ERR(page); error = PTR_ERR(page);
goto bad_swap; goto bad_swap_unlock_inode;
} }
swap_header = kmap(page); swap_header = kmap(page);
maxpages = read_swap_header(p, swap_header, inode); maxpages = read_swap_header(p, swap_header, inode);
if (unlikely(!maxpages)) { if (unlikely(!maxpages)) {
error = -EINVAL; error = -EINVAL;
goto bad_swap; goto bad_swap_unlock_inode;
} }
/* OK, set up the swap map and apply the bad block list */ /* OK, set up the swap map and apply the bad block list */
swap_map = vzalloc(maxpages); swap_map = vzalloc(maxpages);
if (!swap_map) { if (!swap_map) {
error = -ENOMEM; error = -ENOMEM;
goto bad_swap; goto bad_swap_unlock_inode;
} }
if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
@ -3186,7 +3186,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
GFP_KERNEL); GFP_KERNEL);
if (!cluster_info) { if (!cluster_info) {
error = -ENOMEM; error = -ENOMEM;
goto bad_swap; goto bad_swap_unlock_inode;
} }
for (ci = 0; ci < nr_cluster; ci++) for (ci = 0; ci < nr_cluster; ci++)
@ -3195,7 +3195,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
p->percpu_cluster = alloc_percpu(struct percpu_cluster); p->percpu_cluster = alloc_percpu(struct percpu_cluster);
if (!p->percpu_cluster) { if (!p->percpu_cluster) {
error = -ENOMEM; error = -ENOMEM;
goto bad_swap; goto bad_swap_unlock_inode;
} }
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct percpu_cluster *cluster; struct percpu_cluster *cluster;
@ -3207,13 +3207,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = swap_cgroup_swapon(p->type, maxpages); error = swap_cgroup_swapon(p->type, maxpages);
if (error) if (error)
goto bad_swap; goto bad_swap_unlock_inode;
nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
cluster_info, maxpages, &span); cluster_info, maxpages, &span);
if (unlikely(nr_extents < 0)) { if (unlikely(nr_extents < 0)) {
error = nr_extents; error = nr_extents;
goto bad_swap; goto bad_swap_unlock_inode;
} }
/* frontswap enabled? set up bit-per-page map for frontswap */ /* frontswap enabled? set up bit-per-page map for frontswap */
if (IS_ENABLED(CONFIG_FRONTSWAP)) if (IS_ENABLED(CONFIG_FRONTSWAP))
@ -3252,7 +3252,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = init_swap_address_space(p->type, maxpages); error = init_swap_address_space(p->type, maxpages);
if (error) if (error)
goto bad_swap; goto bad_swap_unlock_inode;
/* /*
* Flush any pending IO and dirty mappings before we start using this * Flush any pending IO and dirty mappings before we start using this
@ -3262,7 +3262,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = inode_drain_writes(inode); error = inode_drain_writes(inode);
if (error) { if (error) {
inode->i_flags &= ~S_SWAPFILE; inode->i_flags &= ~S_SWAPFILE;
goto bad_swap; goto bad_swap_unlock_inode;
} }
mutex_lock(&swapon_mutex); mutex_lock(&swapon_mutex);
@ -3287,6 +3287,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = 0; error = 0;
goto out; goto out;
bad_swap_unlock_inode:
inode_unlock(inode);
bad_swap: bad_swap:
free_percpu(p->percpu_cluster); free_percpu(p->percpu_cluster);
p->percpu_cluster = NULL; p->percpu_cluster = NULL;
@ -3294,6 +3296,7 @@ bad_swap:
set_blocksize(p->bdev, p->old_block_size); set_blocksize(p->bdev, p->old_block_size);
blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
} }
inode = NULL;
destroy_swap_extents(p); destroy_swap_extents(p);
swap_cgroup_swapoff(p->type); swap_cgroup_swapoff(p->type);
spin_lock(&swap_lock); spin_lock(&swap_lock);
@ -3303,13 +3306,8 @@ bad_swap:
vfree(swap_map); vfree(swap_map);
kvfree(cluster_info); kvfree(cluster_info);
kvfree(frontswap_map); kvfree(frontswap_map);
if (swap_file) { if (swap_file)
if (inode) {
inode_unlock(inode);
inode = NULL;
}
filp_close(swap_file, NULL); filp_close(swap_file, NULL);
}
out: out:
if (page && !IS_ERR(page)) { if (page && !IS_ERR(page)) {
kunmap(page); kunmap(page);

Loading…
Cancel
Save