|
|
|
@ -35,7 +35,9 @@ |
|
|
|
|
struct wb_writeback_work { |
|
|
|
|
long nr_pages; |
|
|
|
|
struct super_block *sb; |
|
|
|
|
unsigned long *older_than_this; |
|
|
|
|
enum writeback_sync_modes sync_mode; |
|
|
|
|
unsigned int tagged_writepages:1; |
|
|
|
|
unsigned int for_kupdate:1; |
|
|
|
|
unsigned int range_cyclic:1; |
|
|
|
|
unsigned int for_background:1; |
|
|
|
@ -180,12 +182,13 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) |
|
|
|
|
*/ |
|
|
|
|
void inode_wb_list_del(struct inode *inode) |
|
|
|
|
{ |
|
|
|
|
spin_lock(&inode_wb_list_lock); |
|
|
|
|
struct backing_dev_info *bdi = inode_to_bdi(inode); |
|
|
|
|
|
|
|
|
|
spin_lock(&bdi->wb.list_lock); |
|
|
|
|
list_del_init(&inode->i_wb_list); |
|
|
|
|
spin_unlock(&inode_wb_list_lock); |
|
|
|
|
spin_unlock(&bdi->wb.list_lock); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Redirty an inode: set its when-it-was dirtied timestamp and move it to the |
|
|
|
|
* furthest end of its superblock's dirty-inode list. |
|
|
|
@ -195,11 +198,9 @@ void inode_wb_list_del(struct inode *inode) |
|
|
|
|
* the case then the inode must have been redirtied while it was being written |
|
|
|
|
* out and we don't reset its dirtied_when. |
|
|
|
|
*/ |
|
|
|
|
static void redirty_tail(struct inode *inode) |
|
|
|
|
static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) |
|
|
|
|
{ |
|
|
|
|
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
|
|
|
|
|
|
|
|
|
assert_spin_locked(&inode_wb_list_lock); |
|
|
|
|
assert_spin_locked(&wb->list_lock); |
|
|
|
|
if (!list_empty(&wb->b_dirty)) { |
|
|
|
|
struct inode *tail; |
|
|
|
|
|
|
|
|
@ -213,11 +214,9 @@ static void redirty_tail(struct inode *inode) |
|
|
|
|
/*
|
|
|
|
|
* requeue inode for re-scanning after bdi->b_io list is exhausted. |
|
|
|
|
*/ |
|
|
|
|
static void requeue_io(struct inode *inode) |
|
|
|
|
static void requeue_io(struct inode *inode, struct bdi_writeback *wb) |
|
|
|
|
{ |
|
|
|
|
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
|
|
|
|
|
|
|
|
|
assert_spin_locked(&inode_wb_list_lock); |
|
|
|
|
assert_spin_locked(&wb->list_lock); |
|
|
|
|
list_move(&inode->i_wb_list, &wb->b_more_io); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -225,7 +224,7 @@ static void inode_sync_complete(struct inode *inode) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
* Prevent speculative execution through |
|
|
|
|
* spin_unlock(&inode_wb_list_lock); |
|
|
|
|
* spin_unlock(&wb->list_lock); |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
smp_mb(); |
|
|
|
@ -250,15 +249,16 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) |
|
|
|
|
/*
|
|
|
|
|
* Move expired dirty inodes from @delaying_queue to @dispatch_queue. |
|
|
|
|
*/ |
|
|
|
|
static void move_expired_inodes(struct list_head *delaying_queue, |
|
|
|
|
static int move_expired_inodes(struct list_head *delaying_queue, |
|
|
|
|
struct list_head *dispatch_queue, |
|
|
|
|
unsigned long *older_than_this) |
|
|
|
|
unsigned long *older_than_this) |
|
|
|
|
{ |
|
|
|
|
LIST_HEAD(tmp); |
|
|
|
|
struct list_head *pos, *node; |
|
|
|
|
struct super_block *sb = NULL; |
|
|
|
|
struct inode *inode; |
|
|
|
|
int do_sb_sort = 0; |
|
|
|
|
int moved = 0; |
|
|
|
|
|
|
|
|
|
while (!list_empty(delaying_queue)) { |
|
|
|
|
inode = wb_inode(delaying_queue->prev); |
|
|
|
@ -269,12 +269,13 @@ static void move_expired_inodes(struct list_head *delaying_queue, |
|
|
|
|
do_sb_sort = 1; |
|
|
|
|
sb = inode->i_sb; |
|
|
|
|
list_move(&inode->i_wb_list, &tmp); |
|
|
|
|
moved++; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* just one sb in list, splice to dispatch_queue and we're done */ |
|
|
|
|
if (!do_sb_sort) { |
|
|
|
|
list_splice(&tmp, dispatch_queue); |
|
|
|
|
return; |
|
|
|
|
goto out; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Move inodes from one superblock together */ |
|
|
|
@ -286,6 +287,8 @@ static void move_expired_inodes(struct list_head *delaying_queue, |
|
|
|
|
list_move(&inode->i_wb_list, dispatch_queue); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
out: |
|
|
|
|
return moved; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@ -301,9 +304,11 @@ static void move_expired_inodes(struct list_head *delaying_queue, |
|
|
|
|
*/ |
|
|
|
|
static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
|
|
|
|
{ |
|
|
|
|
assert_spin_locked(&inode_wb_list_lock); |
|
|
|
|
int moved; |
|
|
|
|
assert_spin_locked(&wb->list_lock); |
|
|
|
|
list_splice_init(&wb->b_more_io, &wb->b_io); |
|
|
|
|
move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
|
|
|
|
moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
|
|
|
|
trace_writeback_queue_io(wb, older_than_this, moved); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int write_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
@ -316,7 +321,8 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
|
/*
|
|
|
|
|
* Wait for writeback on an inode to complete. |
|
|
|
|
*/ |
|
|
|
|
static void inode_wait_for_writeback(struct inode *inode) |
|
|
|
|
static void inode_wait_for_writeback(struct inode *inode, |
|
|
|
|
struct bdi_writeback *wb) |
|
|
|
|
{ |
|
|
|
|
DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); |
|
|
|
|
wait_queue_head_t *wqh; |
|
|
|
@ -324,15 +330,15 @@ static void inode_wait_for_writeback(struct inode *inode) |
|
|
|
|
wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
|
|
|
|
while (inode->i_state & I_SYNC) { |
|
|
|
|
spin_unlock(&inode->i_lock); |
|
|
|
|
spin_unlock(&inode_wb_list_lock); |
|
|
|
|
spin_unlock(&wb->list_lock); |
|
|
|
|
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); |
|
|
|
|
spin_lock(&inode_wb_list_lock); |
|
|
|
|
spin_lock(&wb->list_lock); |
|
|
|
|
spin_lock(&inode->i_lock); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Write out an inode's dirty pages. Called under inode_wb_list_lock and |
|
|
|
|
* Write out an inode's dirty pages. Called under wb->list_lock and |
|
|
|
|
* inode->i_lock. Either the caller has an active reference on the inode or |
|
|
|
|
* the inode has I_WILL_FREE set. |
|
|
|
|
* |
|
|
|
@ -343,13 +349,15 @@ static void inode_wait_for_writeback(struct inode *inode) |
|
|
|
|
* livelocks, etc. |
|
|
|
|
*/ |
|
|
|
|
static int |
|
|
|
|
writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
|
writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, |
|
|
|
|
struct writeback_control *wbc) |
|
|
|
|
{ |
|
|
|
|
struct address_space *mapping = inode->i_mapping; |
|
|
|
|
long nr_to_write = wbc->nr_to_write; |
|
|
|
|
unsigned dirty; |
|
|
|
|
int ret; |
|
|
|
|
|
|
|
|
|
assert_spin_locked(&inode_wb_list_lock); |
|
|
|
|
assert_spin_locked(&wb->list_lock); |
|
|
|
|
assert_spin_locked(&inode->i_lock); |
|
|
|
|
|
|
|
|
|
if (!atomic_read(&inode->i_count)) |
|
|
|
@ -367,14 +375,16 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
|
* completed a full scan of b_io. |
|
|
|
|
*/ |
|
|
|
|
if (wbc->sync_mode != WB_SYNC_ALL) { |
|
|
|
|
requeue_io(inode); |
|
|
|
|
requeue_io(inode, wb); |
|
|
|
|
trace_writeback_single_inode_requeue(inode, wbc, |
|
|
|
|
nr_to_write); |
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* It's a data-integrity sync. We must wait. |
|
|
|
|
*/ |
|
|
|
|
inode_wait_for_writeback(inode); |
|
|
|
|
inode_wait_for_writeback(inode, wb); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
BUG_ON(inode->i_state & I_SYNC); |
|
|
|
@ -383,7 +393,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
|
inode->i_state |= I_SYNC; |
|
|
|
|
inode->i_state &= ~I_DIRTY_PAGES; |
|
|
|
|
spin_unlock(&inode->i_lock); |
|
|
|
|
spin_unlock(&inode_wb_list_lock); |
|
|
|
|
spin_unlock(&wb->list_lock); |
|
|
|
|
|
|
|
|
|
ret = do_writepages(mapping, wbc); |
|
|
|
|
|
|
|
|
@ -414,10 +424,19 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
|
ret = err; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
spin_lock(&inode_wb_list_lock); |
|
|
|
|
spin_lock(&wb->list_lock); |
|
|
|
|
spin_lock(&inode->i_lock); |
|
|
|
|
inode->i_state &= ~I_SYNC; |
|
|
|
|
if (!(inode->i_state & I_FREEING)) { |
|
|
|
|
/*
|
|
|
|
|
* Sync livelock prevention. Each inode is tagged and synced in |
|
|
|
|
* one shot. If still dirty, it will be redirty_tail()'ed below. |
|
|
|
|
* Update the dirty time to prevent enqueue and sync it again. |
|
|
|
|
*/ |
|
|
|
|
if ((inode->i_state & I_DIRTY) && |
|
|
|
|
(wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) |
|
|
|
|
inode->dirtied_when = jiffies; |
|
|
|
|
|
|
|
|
|
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
|
|
|
|
/*
|
|
|
|
|
* We didn't write back all the pages. nfs_writepages() |
|
|
|
@ -428,7 +447,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
|
/*
|
|
|
|
|
* slice used up: queue for next turn |
|
|
|
|
*/ |
|
|
|
|
requeue_io(inode); |
|
|
|
|
requeue_io(inode, wb); |
|
|
|
|
} else { |
|
|
|
|
/*
|
|
|
|
|
* Writeback blocked by something other than |
|
|
|
@ -437,7 +456,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
|
* retrying writeback of the dirty page/inode |
|
|
|
|
* that cannot be performed immediately. |
|
|
|
|
*/ |
|
|
|
|
redirty_tail(inode); |
|
|
|
|
redirty_tail(inode, wb); |
|
|
|
|
} |
|
|
|
|
} else if (inode->i_state & I_DIRTY) { |
|
|
|
|
/*
|
|
|
|
@ -446,7 +465,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
|
* submission or metadata updates after data IO |
|
|
|
|
* completion. |
|
|
|
|
*/ |
|
|
|
|
redirty_tail(inode); |
|
|
|
|
redirty_tail(inode, wb); |
|
|
|
|
} else { |
|
|
|
|
/*
|
|
|
|
|
* The inode is clean. At this point we either have |
|
|
|
@ -457,9 +476,41 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
inode_sync_complete(inode); |
|
|
|
|
trace_writeback_single_inode(inode, wbc, nr_to_write); |
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static long writeback_chunk_size(struct backing_dev_info *bdi, |
|
|
|
|
struct wb_writeback_work *work) |
|
|
|
|
{ |
|
|
|
|
long pages; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* WB_SYNC_ALL mode does livelock avoidance by syncing dirty |
|
|
|
|
* inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX |
|
|
|
|
* here avoids calling into writeback_inodes_wb() more than once. |
|
|
|
|
* |
|
|
|
|
* The intended call sequence for WB_SYNC_ALL writeback is: |
|
|
|
|
* |
|
|
|
|
* wb_writeback() |
|
|
|
|
* writeback_sb_inodes() <== called only once |
|
|
|
|
* write_cache_pages() <== called once for each inode |
|
|
|
|
* (quickly) tag currently dirty pages |
|
|
|
|
* (maybe slowly) sync all tagged pages |
|
|
|
|
*/ |
|
|
|
|
if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) |
|
|
|
|
pages = LONG_MAX; |
|
|
|
|
else { |
|
|
|
|
pages = min(bdi->avg_write_bandwidth / 2, |
|
|
|
|
global_dirty_limit / DIRTY_SCOPE); |
|
|
|
|
pages = min(pages, work->nr_pages); |
|
|
|
|
pages = round_down(pages + MIN_WRITEBACK_PAGES, |
|
|
|
|
MIN_WRITEBACK_PAGES); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return pages; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Write a portion of b_io inodes which belong to @sb. |
|
|
|
|
* |
|
|
|
@ -467,24 +518,36 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
|
* inodes. Otherwise write only ones which go sequentially |
|
|
|
|
* in reverse order. |
|
|
|
|
* |
|
|
|
|
* Return 1, if the caller writeback routine should be |
|
|
|
|
* interrupted. Otherwise return 0. |
|
|
|
|
* Return the number of pages and/or inodes written. |
|
|
|
|
*/ |
|
|
|
|
static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, |
|
|
|
|
struct writeback_control *wbc, bool only_this_sb) |
|
|
|
|
static long writeback_sb_inodes(struct super_block *sb, |
|
|
|
|
struct bdi_writeback *wb, |
|
|
|
|
struct wb_writeback_work *work) |
|
|
|
|
{ |
|
|
|
|
struct writeback_control wbc = { |
|
|
|
|
.sync_mode = work->sync_mode, |
|
|
|
|
.tagged_writepages = work->tagged_writepages, |
|
|
|
|
.for_kupdate = work->for_kupdate, |
|
|
|
|
.for_background = work->for_background, |
|
|
|
|
.range_cyclic = work->range_cyclic, |
|
|
|
|
.range_start = 0, |
|
|
|
|
.range_end = LLONG_MAX, |
|
|
|
|
}; |
|
|
|
|
unsigned long start_time = jiffies; |
|
|
|
|
long write_chunk; |
|
|
|
|
long wrote = 0; /* count both pages and inodes */ |
|
|
|
|
|
|
|
|
|
while (!list_empty(&wb->b_io)) { |
|
|
|
|
long pages_skipped; |
|
|
|
|
struct inode *inode = wb_inode(wb->b_io.prev); |
|
|
|
|
|
|
|
|
|
if (inode->i_sb != sb) { |
|
|
|
|
if (only_this_sb) { |
|
|
|
|
if (work->sb) { |
|
|
|
|
/*
|
|
|
|
|
* We only want to write back data for this |
|
|
|
|
* superblock, move all inodes not belonging |
|
|
|
|
* to it back onto the dirty list. |
|
|
|
|
*/ |
|
|
|
|
redirty_tail(inode); |
|
|
|
|
redirty_tail(inode, wb); |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -493,7 +556,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, |
|
|
|
|
* Bounce back to the caller to unpin this and |
|
|
|
|
* pin the next superblock. |
|
|
|
|
*/ |
|
|
|
|
return 0; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@ -504,95 +567,91 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, |
|
|
|
|
spin_lock(&inode->i_lock); |
|
|
|
|
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { |
|
|
|
|
spin_unlock(&inode->i_lock); |
|
|
|
|
requeue_io(inode); |
|
|
|
|
redirty_tail(inode, wb); |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Was this inode dirtied after sync_sb_inodes was called? |
|
|
|
|
* This keeps sync from extra jobs and livelock. |
|
|
|
|
*/ |
|
|
|
|
if (inode_dirtied_after(inode, wbc->wb_start)) { |
|
|
|
|
spin_unlock(&inode->i_lock); |
|
|
|
|
return 1; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
__iget(inode); |
|
|
|
|
write_chunk = writeback_chunk_size(wb->bdi, work); |
|
|
|
|
wbc.nr_to_write = write_chunk; |
|
|
|
|
wbc.pages_skipped = 0; |
|
|
|
|
|
|
|
|
|
pages_skipped = wbc->pages_skipped; |
|
|
|
|
writeback_single_inode(inode, wbc); |
|
|
|
|
if (wbc->pages_skipped != pages_skipped) { |
|
|
|
|
writeback_single_inode(inode, wb, &wbc); |
|
|
|
|
|
|
|
|
|
work->nr_pages -= write_chunk - wbc.nr_to_write; |
|
|
|
|
wrote += write_chunk - wbc.nr_to_write; |
|
|
|
|
if (!(inode->i_state & I_DIRTY)) |
|
|
|
|
wrote++; |
|
|
|
|
if (wbc.pages_skipped) { |
|
|
|
|
/*
|
|
|
|
|
* writeback is not making progress due to locked |
|
|
|
|
* buffers. Skip this inode for now. |
|
|
|
|
*/ |
|
|
|
|
redirty_tail(inode); |
|
|
|
|
redirty_tail(inode, wb); |
|
|
|
|
} |
|
|
|
|
spin_unlock(&inode->i_lock); |
|
|
|
|
spin_unlock(&inode_wb_list_lock); |
|
|
|
|
spin_unlock(&wb->list_lock); |
|
|
|
|
iput(inode); |
|
|
|
|
cond_resched(); |
|
|
|
|
spin_lock(&inode_wb_list_lock); |
|
|
|
|
if (wbc->nr_to_write <= 0) { |
|
|
|
|
wbc->more_io = 1; |
|
|
|
|
return 1; |
|
|
|
|
spin_lock(&wb->list_lock); |
|
|
|
|
/*
|
|
|
|
|
* bail out to wb_writeback() often enough to check |
|
|
|
|
* background threshold and other termination conditions. |
|
|
|
|
*/ |
|
|
|
|
if (wrote) { |
|
|
|
|
if (time_is_before_jiffies(start_time + HZ / 10UL)) |
|
|
|
|
break; |
|
|
|
|
if (work->nr_pages <= 0) |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
if (!list_empty(&wb->b_more_io)) |
|
|
|
|
wbc->more_io = 1; |
|
|
|
|
} |
|
|
|
|
/* b_io is empty */ |
|
|
|
|
return 1; |
|
|
|
|
return wrote; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
void writeback_inodes_wb(struct bdi_writeback *wb, |
|
|
|
|
struct writeback_control *wbc) |
|
|
|
|
static long __writeback_inodes_wb(struct bdi_writeback *wb, |
|
|
|
|
struct wb_writeback_work *work) |
|
|
|
|
{ |
|
|
|
|
int ret = 0; |
|
|
|
|
|
|
|
|
|
if (!wbc->wb_start) |
|
|
|
|
wbc->wb_start = jiffies; /* livelock avoidance */ |
|
|
|
|
spin_lock(&inode_wb_list_lock); |
|
|
|
|
if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
|
|
|
|
queue_io(wb, wbc->older_than_this); |
|
|
|
|
unsigned long start_time = jiffies; |
|
|
|
|
long wrote = 0; |
|
|
|
|
|
|
|
|
|
while (!list_empty(&wb->b_io)) { |
|
|
|
|
struct inode *inode = wb_inode(wb->b_io.prev); |
|
|
|
|
struct super_block *sb = inode->i_sb; |
|
|
|
|
|
|
|
|
|
if (!grab_super_passive(sb)) { |
|
|
|
|
requeue_io(inode); |
|
|
|
|
requeue_io(inode, wb); |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
ret = writeback_sb_inodes(sb, wb, wbc, false); |
|
|
|
|
wrote += writeback_sb_inodes(sb, wb, work); |
|
|
|
|
drop_super(sb); |
|
|
|
|
|
|
|
|
|
if (ret) |
|
|
|
|
break; |
|
|
|
|
/* refer to the same tests at the end of writeback_sb_inodes */ |
|
|
|
|
if (wrote) { |
|
|
|
|
if (time_is_before_jiffies(start_time + HZ / 10UL)) |
|
|
|
|
break; |
|
|
|
|
if (work->nr_pages <= 0) |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
spin_unlock(&inode_wb_list_lock); |
|
|
|
|
/* Leave any unwritten inodes on b_io */ |
|
|
|
|
return wrote; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void __writeback_inodes_sb(struct super_block *sb, |
|
|
|
|
struct bdi_writeback *wb, struct writeback_control *wbc) |
|
|
|
|
long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages) |
|
|
|
|
{ |
|
|
|
|
WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
|
|
|
|
struct wb_writeback_work work = { |
|
|
|
|
.nr_pages = nr_pages, |
|
|
|
|
.sync_mode = WB_SYNC_NONE, |
|
|
|
|
.range_cyclic = 1, |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
spin_lock(&inode_wb_list_lock); |
|
|
|
|
if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
|
|
|
|
queue_io(wb, wbc->older_than_this); |
|
|
|
|
writeback_sb_inodes(sb, wb, wbc, true); |
|
|
|
|
spin_unlock(&inode_wb_list_lock); |
|
|
|
|
} |
|
|
|
|
spin_lock(&wb->list_lock); |
|
|
|
|
if (list_empty(&wb->b_io)) |
|
|
|
|
queue_io(wb, NULL); |
|
|
|
|
__writeback_inodes_wb(wb, &work); |
|
|
|
|
spin_unlock(&wb->list_lock); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The maximum number of pages to writeout in a single bdi flush/kupdate |
|
|
|
|
* operation. We do this so we don't hold I_SYNC against an inode for |
|
|
|
|
* enormous amounts of time, which would block a userspace task which has |
|
|
|
|
* been forced to throttle against that inode. Also, the code reevaluates |
|
|
|
|
* the dirty each time it has written this many pages. |
|
|
|
|
*/ |
|
|
|
|
#define MAX_WRITEBACK_PAGES 1024 |
|
|
|
|
return nr_pages - work.nr_pages; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline bool over_bground_thresh(void) |
|
|
|
|
{ |
|
|
|
@ -604,6 +663,16 @@ static inline bool over_bground_thresh(void) |
|
|
|
|
global_page_state(NR_UNSTABLE_NFS) > background_thresh); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Called under wb->list_lock. If there are multiple wb per bdi, |
|
|
|
|
* only the flusher working on the first wb should do it. |
|
|
|
|
*/ |
|
|
|
|
static void wb_update_bandwidth(struct bdi_writeback *wb, |
|
|
|
|
unsigned long start_time) |
|
|
|
|
{ |
|
|
|
|
__bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, start_time); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Explicit flushing or periodic writeback of "old" data. |
|
|
|
|
* |
|
|
|
@ -622,47 +691,16 @@ static inline bool over_bground_thresh(void) |
|
|
|
|
static long wb_writeback(struct bdi_writeback *wb, |
|
|
|
|
struct wb_writeback_work *work) |
|
|
|
|
{ |
|
|
|
|
struct writeback_control wbc = { |
|
|
|
|
.sync_mode = work->sync_mode, |
|
|
|
|
.older_than_this = NULL, |
|
|
|
|
.for_kupdate = work->for_kupdate, |
|
|
|
|
.for_background = work->for_background, |
|
|
|
|
.range_cyclic = work->range_cyclic, |
|
|
|
|
}; |
|
|
|
|
unsigned long wb_start = jiffies; |
|
|
|
|
long nr_pages = work->nr_pages; |
|
|
|
|
unsigned long oldest_jif; |
|
|
|
|
long wrote = 0; |
|
|
|
|
long write_chunk; |
|
|
|
|
struct inode *inode; |
|
|
|
|
long progress; |
|
|
|
|
|
|
|
|
|
if (wbc.for_kupdate) { |
|
|
|
|
wbc.older_than_this = &oldest_jif; |
|
|
|
|
oldest_jif = jiffies - |
|
|
|
|
msecs_to_jiffies(dirty_expire_interval * 10); |
|
|
|
|
} |
|
|
|
|
if (!wbc.range_cyclic) { |
|
|
|
|
wbc.range_start = 0; |
|
|
|
|
wbc.range_end = LLONG_MAX; |
|
|
|
|
} |
|
|
|
|
oldest_jif = jiffies; |
|
|
|
|
work->older_than_this = &oldest_jif; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* WB_SYNC_ALL mode does livelock avoidance by syncing dirty |
|
|
|
|
* inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX |
|
|
|
|
* here avoids calling into writeback_inodes_wb() more than once. |
|
|
|
|
* |
|
|
|
|
* The intended call sequence for WB_SYNC_ALL writeback is: |
|
|
|
|
* |
|
|
|
|
* wb_writeback() |
|
|
|
|
* __writeback_inodes_sb() <== called only once |
|
|
|
|
* write_cache_pages() <== called once for each inode |
|
|
|
|
* (quickly) tag currently dirty pages |
|
|
|
|
* (maybe slowly) sync all tagged pages |
|
|
|
|
*/ |
|
|
|
|
if (wbc.sync_mode == WB_SYNC_NONE) |
|
|
|
|
write_chunk = MAX_WRITEBACK_PAGES; |
|
|
|
|
else |
|
|
|
|
write_chunk = LONG_MAX; |
|
|
|
|
|
|
|
|
|
wbc.wb_start = jiffies; /* livelock avoidance */ |
|
|
|
|
spin_lock(&wb->list_lock); |
|
|
|
|
for (;;) { |
|
|
|
|
/*
|
|
|
|
|
* Stop writeback when nr_pages has been consumed |
|
|
|
@ -687,52 +725,54 @@ static long wb_writeback(struct bdi_writeback *wb, |
|
|
|
|
if (work->for_background && !over_bground_thresh()) |
|
|
|
|
break; |
|
|
|
|
|
|
|
|
|
wbc.more_io = 0; |
|
|
|
|
wbc.nr_to_write = write_chunk; |
|
|
|
|
wbc.pages_skipped = 0; |
|
|
|
|
if (work->for_kupdate) { |
|
|
|
|
oldest_jif = jiffies - |
|
|
|
|
msecs_to_jiffies(dirty_expire_interval * 10); |
|
|
|
|
work->older_than_this = &oldest_jif; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
trace_wbc_writeback_start(&wbc, wb->bdi); |
|
|
|
|
trace_writeback_start(wb->bdi, work); |
|
|
|
|
if (list_empty(&wb->b_io)) |
|
|
|
|
queue_io(wb, work->older_than_this); |
|
|
|
|
if (work->sb) |
|
|
|
|
__writeback_inodes_sb(work->sb, wb, &wbc); |
|
|
|
|
progress = writeback_sb_inodes(work->sb, wb, work); |
|
|
|
|
else |
|
|
|
|
writeback_inodes_wb(wb, &wbc); |
|
|
|
|
trace_wbc_writeback_written(&wbc, wb->bdi); |
|
|
|
|
progress = __writeback_inodes_wb(wb, work); |
|
|
|
|
trace_writeback_written(wb->bdi, work); |
|
|
|
|
|
|
|
|
|
work->nr_pages -= write_chunk - wbc.nr_to_write; |
|
|
|
|
wrote += write_chunk - wbc.nr_to_write; |
|
|
|
|
wb_update_bandwidth(wb, wb_start); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If we consumed everything, see if we have more |
|
|
|
|
* Did we write something? Try for more |
|
|
|
|
* |
|
|
|
|
* Dirty inodes are moved to b_io for writeback in batches. |
|
|
|
|
* The completion of the current batch does not necessarily |
|
|
|
|
* mean the overall work is done. So we keep looping as long |
|
|
|
|
* as made some progress on cleaning pages or inodes. |
|
|
|
|
*/ |
|
|
|
|
if (wbc.nr_to_write <= 0) |
|
|
|
|
if (progress) |
|
|
|
|
continue; |
|
|
|
|
/*
|
|
|
|
|
* Didn't write everything and we don't have more IO, bail |
|
|
|
|
* No more inodes for IO, bail |
|
|
|
|
*/ |
|
|
|
|
if (!wbc.more_io) |
|
|
|
|
if (list_empty(&wb->b_more_io)) |
|
|
|
|
break; |
|
|
|
|
/*
|
|
|
|
|
* Did we write something? Try for more |
|
|
|
|
*/ |
|
|
|
|
if (wbc.nr_to_write < write_chunk) |
|
|
|
|
continue; |
|
|
|
|
/*
|
|
|
|
|
* Nothing written. Wait for some inode to |
|
|
|
|
* become available for writeback. Otherwise |
|
|
|
|
* we'll just busyloop. |
|
|
|
|
*/ |
|
|
|
|
spin_lock(&inode_wb_list_lock); |
|
|
|
|
if (!list_empty(&wb->b_more_io)) { |
|
|
|
|
trace_writeback_wait(wb->bdi, work); |
|
|
|
|
inode = wb_inode(wb->b_more_io.prev); |
|
|
|
|
trace_wbc_writeback_wait(&wbc, wb->bdi); |
|
|
|
|
spin_lock(&inode->i_lock); |
|
|
|
|
inode_wait_for_writeback(inode); |
|
|
|
|
inode_wait_for_writeback(inode, wb); |
|
|
|
|
spin_unlock(&inode->i_lock); |
|
|
|
|
} |
|
|
|
|
spin_unlock(&inode_wb_list_lock); |
|
|
|
|
} |
|
|
|
|
spin_unlock(&wb->list_lock); |
|
|
|
|
|
|
|
|
|
return wrote; |
|
|
|
|
return nr_pages - work->nr_pages; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
@ -1063,10 +1103,10 @@ void __mark_inode_dirty(struct inode *inode, int flags) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
spin_unlock(&inode->i_lock); |
|
|
|
|
spin_lock(&inode_wb_list_lock); |
|
|
|
|
spin_lock(&bdi->wb.list_lock); |
|
|
|
|
inode->dirtied_when = jiffies; |
|
|
|
|
list_move(&inode->i_wb_list, &bdi->wb.b_dirty); |
|
|
|
|
spin_unlock(&inode_wb_list_lock); |
|
|
|
|
spin_unlock(&bdi->wb.list_lock); |
|
|
|
|
|
|
|
|
|
if (wakeup_bdi) |
|
|
|
|
bdi_wakeup_thread_delayed(bdi); |
|
|
|
@ -1162,10 +1202,11 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr) |
|
|
|
|
{ |
|
|
|
|
DECLARE_COMPLETION_ONSTACK(done); |
|
|
|
|
struct wb_writeback_work work = { |
|
|
|
|
.sb = sb, |
|
|
|
|
.sync_mode = WB_SYNC_NONE, |
|
|
|
|
.done = &done, |
|
|
|
|
.nr_pages = nr, |
|
|
|
|
.sb = sb, |
|
|
|
|
.sync_mode = WB_SYNC_NONE, |
|
|
|
|
.tagged_writepages = 1, |
|
|
|
|
.done = &done, |
|
|
|
|
.nr_pages = nr, |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
|
|
|
@ -1267,6 +1308,7 @@ EXPORT_SYMBOL(sync_inodes_sb); |
|
|
|
|
*/ |
|
|
|
|
int write_inode_now(struct inode *inode, int sync) |
|
|
|
|
{ |
|
|
|
|
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
|
|
|
|
int ret; |
|
|
|
|
struct writeback_control wbc = { |
|
|
|
|
.nr_to_write = LONG_MAX, |
|
|
|
@ -1279,11 +1321,11 @@ int write_inode_now(struct inode *inode, int sync) |
|
|
|
|
wbc.nr_to_write = 0; |
|
|
|
|
|
|
|
|
|
might_sleep(); |
|
|
|
|
spin_lock(&inode_wb_list_lock); |
|
|
|
|
spin_lock(&wb->list_lock); |
|
|
|
|
spin_lock(&inode->i_lock); |
|
|
|
|
ret = writeback_single_inode(inode, &wbc); |
|
|
|
|
ret = writeback_single_inode(inode, wb, &wbc); |
|
|
|
|
spin_unlock(&inode->i_lock); |
|
|
|
|
spin_unlock(&inode_wb_list_lock); |
|
|
|
|
spin_unlock(&wb->list_lock); |
|
|
|
|
if (sync) |
|
|
|
|
inode_sync_wait(inode); |
|
|
|
|
return ret; |
|
|
|
@ -1303,13 +1345,14 @@ EXPORT_SYMBOL(write_inode_now); |
|
|
|
|
*/ |
|
|
|
|
int sync_inode(struct inode *inode, struct writeback_control *wbc) |
|
|
|
|
{ |
|
|
|
|
struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
|
|
|
|
int ret; |
|
|
|
|
|
|
|
|
|
spin_lock(&inode_wb_list_lock); |
|
|
|
|
spin_lock(&wb->list_lock); |
|
|
|
|
spin_lock(&inode->i_lock); |
|
|
|
|
ret = writeback_single_inode(inode, wbc); |
|
|
|
|
ret = writeback_single_inode(inode, wb, wbc); |
|
|
|
|
spin_unlock(&inode->i_lock); |
|
|
|
|
spin_unlock(&inode_wb_list_lock); |
|
|
|
|
spin_unlock(&wb->list_lock); |
|
|
|
|
return ret; |
|
|
|
|
} |
|
|
|
|
EXPORT_SYMBOL(sync_inode); |
|
|
|
|