@ -22,6 +22,7 @@
# include "volumes.h"
# include "disk-io.h"
# include "ordered-data.h"
# include "transaction.h"
# include "backref.h"
/*
@ -89,6 +90,7 @@ struct scrub_dev {
int first_free ;
int curr ;
atomic_t in_flight ;
atomic_t fixup_cnt ;
spinlock_t list_lock ;
wait_queue_head_t list_wait ;
u16 csum_size ;
@ -102,6 +104,14 @@ struct scrub_dev {
spinlock_t stat_lock ;
} ;
struct scrub_fixup_nodatasum {
struct scrub_dev * sdev ;
u64 logical ;
struct btrfs_root * root ;
struct btrfs_work work ;
int mirror_num ;
} ;
struct scrub_warning {
struct btrfs_path * path ;
u64 extent_item_size ;
@ -190,12 +200,13 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
if ( i ! = SCRUB_BIOS_PER_DEV - 1 )
sdev - > bios [ i ] - > next_free = i + 1 ;
else
else
sdev - > bios [ i ] - > next_free = - 1 ;
}
sdev - > first_free = 0 ;
sdev - > curr = - 1 ;
atomic_set ( & sdev - > in_flight , 0 ) ;
atomic_set ( & sdev - > fixup_cnt , 0 ) ;
atomic_set ( & sdev - > cancel_req , 0 ) ;
sdev - > csum_size = btrfs_super_csum_size ( & fs_info - > super_copy ) ;
INIT_LIST_HEAD ( & sdev - > csum_list ) ;
@ -347,6 +358,151 @@ out:
kfree ( swarn . msg_buf ) ;
}
static int scrub_fixup_readpage ( u64 inum , u64 offset , u64 root , void * ctx )
{
struct page * page ;
unsigned long index ;
struct scrub_fixup_nodatasum * fixup = ctx ;
int ret ;
int corrected ;
struct btrfs_key key ;
struct inode * inode ;
u64 end = offset + PAGE_SIZE - 1 ;
struct btrfs_root * local_root ;
key . objectid = root ;
key . type = BTRFS_ROOT_ITEM_KEY ;
key . offset = ( u64 ) - 1 ;
local_root = btrfs_read_fs_root_no_name ( fixup - > root - > fs_info , & key ) ;
if ( IS_ERR ( local_root ) )
return PTR_ERR ( local_root ) ;
key . type = BTRFS_INODE_ITEM_KEY ;
key . objectid = inum ;
key . offset = 0 ;
inode = btrfs_iget ( fixup - > root - > fs_info - > sb , & key , local_root , NULL ) ;
if ( IS_ERR ( inode ) )
return PTR_ERR ( inode ) ;
ret = set_extent_bit ( & BTRFS_I ( inode ) - > io_tree , offset , end ,
EXTENT_DAMAGED , 0 , NULL , NULL , GFP_NOFS ) ;
/* set_extent_bit should either succeed or give proper error */
WARN_ON ( ret > 0 ) ;
if ( ret )
return ret < 0 ? ret : - EFAULT ;
index = offset > > PAGE_CACHE_SHIFT ;
page = find_or_create_page ( inode - > i_mapping , index , GFP_NOFS ) ;
if ( ! page )
return - ENOMEM ;
ret = extent_read_full_page ( & BTRFS_I ( inode ) - > io_tree , page ,
btrfs_get_extent , fixup - > mirror_num ) ;
wait_on_page_locked ( page ) ;
corrected = ! test_range_bit ( & BTRFS_I ( inode ) - > io_tree , offset , end ,
EXTENT_DAMAGED , 0 , NULL ) ;
if ( corrected )
WARN_ON ( ! PageUptodate ( page ) ) ;
else
clear_extent_bit ( & BTRFS_I ( inode ) - > io_tree , offset , end ,
EXTENT_DAMAGED , 0 , 0 , NULL , GFP_NOFS ) ;
put_page ( page ) ;
iput ( inode ) ;
if ( ret < 0 )
return ret ;
if ( ret = = 0 & & corrected ) {
/*
* we only need to call readpage for one of the inodes belonging
* to this extent . so make iterate_extent_inodes stop
*/
return 1 ;
}
return - EIO ;
}
static void scrub_fixup_nodatasum ( struct btrfs_work * work )
{
int ret ;
struct scrub_fixup_nodatasum * fixup ;
struct scrub_dev * sdev ;
struct btrfs_trans_handle * trans = NULL ;
struct btrfs_fs_info * fs_info ;
struct btrfs_path * path ;
int uncorrectable = 0 ;
fixup = container_of ( work , struct scrub_fixup_nodatasum , work ) ;
sdev = fixup - > sdev ;
fs_info = fixup - > root - > fs_info ;
path = btrfs_alloc_path ( ) ;
if ( ! path ) {
spin_lock ( & sdev - > stat_lock ) ;
+ + sdev - > stat . malloc_errors ;
spin_unlock ( & sdev - > stat_lock ) ;
uncorrectable = 1 ;
goto out ;
}
trans = btrfs_join_transaction ( fixup - > root ) ;
if ( IS_ERR ( trans ) ) {
uncorrectable = 1 ;
goto out ;
}
/*
* the idea is to trigger a regular read through the standard path . we
* read a page from the ( failed ) logical address by specifying the
* corresponding copynum of the failed sector . thus , that readpage is
* expected to fail .
* that is the point where on - the - fly error correction will kick in
* ( once it ' s finished ) and rewrite the failed sector if a good copy
* can be found .
*/
ret = iterate_inodes_from_logical ( fixup - > logical , fixup - > root - > fs_info ,
path , scrub_fixup_readpage ,
fixup ) ;
if ( ret < 0 ) {
uncorrectable = 1 ;
goto out ;
}
WARN_ON ( ret ! = 1 ) ;
spin_lock ( & sdev - > stat_lock ) ;
+ + sdev - > stat . corrected_errors ;
spin_unlock ( & sdev - > stat_lock ) ;
out :
if ( trans & & ! IS_ERR ( trans ) )
btrfs_end_transaction ( trans , fixup - > root ) ;
if ( uncorrectable ) {
spin_lock ( & sdev - > stat_lock ) ;
+ + sdev - > stat . uncorrectable_errors ;
spin_unlock ( & sdev - > stat_lock ) ;
printk_ratelimited ( KERN_ERR " btrfs: unable to fixup "
" (nodatasum) error at logical %llu \n " ,
fixup - > logical ) ;
}
btrfs_free_path ( path ) ;
kfree ( fixup ) ;
/* see caller why we're pretending to be paused in the scrub counters */
mutex_lock ( & fs_info - > scrub_lock ) ;
atomic_dec ( & fs_info - > scrubs_running ) ;
atomic_dec ( & fs_info - > scrubs_paused ) ;
mutex_unlock ( & fs_info - > scrub_lock ) ;
atomic_dec ( & sdev - > fixup_cnt ) ;
wake_up ( & fs_info - > scrub_pause_wait ) ;
wake_up ( & sdev - > list_wait ) ;
}
/*
* scrub_recheck_error gets called when either verification of the page
* failed or the bio failed to read , e . g . with EIO . In the latter case ,
@ -417,6 +573,7 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
struct btrfs_fs_info * fs_info = sdev - > dev - > dev_root - > fs_info ;
struct btrfs_mapping_tree * map_tree = & fs_info - > mapping_tree ;
struct btrfs_multi_bio * multi = NULL ;
struct scrub_fixup_nodatasum * fixup ;
u64 logical = sbio - > logical + ix * PAGE_SIZE ;
u64 length ;
int i ;
@ -425,12 +582,30 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
if ( ( sbio - > spag [ ix ] . flags & BTRFS_EXTENT_FLAG_DATA ) & &
( sbio - > spag [ ix ] . have_csum = = 0 ) ) {
fixup = kzalloc ( sizeof ( * fixup ) , GFP_NOFS ) ;
if ( ! fixup )
goto uncorrectable ;
fixup - > sdev = sdev ;
fixup - > logical = logical ;
fixup - > root = fs_info - > extent_root ;
fixup - > mirror_num = sbio - > spag [ ix ] . mirror_num ;
/*
* nodatasum , don ' t try to fix anything
* FIXME : we can do better , open the inode and trigger a
* writeback
* increment scrubs_running to prevent cancel requests from
* completing as long as a fixup worker is running . we must also
* increment scrubs_paused to prevent deadlocking on pause
* requests used for transactions commits ( as the worker uses a
* transaction context ) . it is safe to regard the fixup worker
* as paused for all matters practical . effectively , we only
* avoid cancellation requests from completing .
*/
goto uncorrectable ;
mutex_lock ( & fs_info - > scrub_lock ) ;
atomic_inc ( & fs_info - > scrubs_running ) ;
atomic_inc ( & fs_info - > scrubs_paused ) ;
mutex_unlock ( & fs_info - > scrub_lock ) ;
atomic_inc ( & sdev - > fixup_cnt ) ;
fixup - > work . func = scrub_fixup_nodatasum ;
btrfs_queue_worker ( & fs_info - > scrub_workers , & fixup - > work ) ;
return ;
}
length = PAGE_SIZE ;
@ -1425,10 +1600,11 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
ret = scrub_enumerate_chunks ( sdev , start , end ) ;
wait_event ( sdev - > list_wait , atomic_read ( & sdev - > in_flight ) = = 0 ) ;
atomic_dec ( & fs_info - > scrubs_running ) ;
wake_up ( & fs_info - > scrub_pause_wait ) ;
wait_event ( sdev - > list_wait , atomic_read ( & sdev - > fixup_cnt ) = = 0 ) ;
if ( progress )
memcpy ( progress , & sdev - > stat , sizeof ( * progress ) ) ;