@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
{
int i ;
for ( i = 0 ; i < BTRFS_MAX_LEVEL ; i + + ) {
if ( p - > nodes [ i ] & & p - > locks [ i ] )
btrfs_set_lock_blocking ( p - > nodes [ i ] ) ;
if ( ! p - > nodes [ i ] | | ! p - > locks [ i ] )
continue ;
btrfs_set_lock_blocking_rw ( p - > nodes [ i ] , p - > locks [ i ] ) ;
if ( p - > locks [ i ] = = BTRFS_READ_LOCK )
p - > locks [ i ] = BTRFS_READ_LOCK_BLOCKING ;
else if ( p - > locks [ i ] = = BTRFS_WRITE_LOCK )
p - > locks [ i ] = BTRFS_WRITE_LOCK_BLOCKING ;
}
}
@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
* for held
*/
noinline void btrfs_clear_path_blocking ( struct btrfs_path * p ,
struct extent_buffer * held )
struct extent_buffer * held , int held_rw )
{
int i ;
@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
* really sure by forcing the path to blocking before we clear
* the path blocking .
*/
if ( held )
btrfs_set_lock_blocking ( held ) ;
if ( held ) {
btrfs_set_lock_blocking_rw ( held , held_rw ) ;
if ( held_rw = = BTRFS_WRITE_LOCK )
held_rw = BTRFS_WRITE_LOCK_BLOCKING ;
else if ( held_rw = = BTRFS_READ_LOCK )
held_rw = BTRFS_READ_LOCK_BLOCKING ;
}
btrfs_set_path_blocking ( p ) ;
# endif
for ( i = BTRFS_MAX_LEVEL - 1 ; i > = 0 ; i - - ) {
if ( p - > nodes [ i ] & & p - > locks [ i ] )
btrfs_clear_lock_blocking ( p - > nodes [ i ] ) ;
if ( p - > nodes [ i ] & & p - > locks [ i ] ) {
btrfs_clear_lock_blocking_rw ( p - > nodes [ i ] , p - > locks [ i ] ) ;
if ( p - > locks [ i ] = = BTRFS_WRITE_LOCK_BLOCKING )
p - > locks [ i ] = BTRFS_WRITE_LOCK ;
else if ( p - > locks [ i ] = = BTRFS_READ_LOCK_BLOCKING )
p - > locks [ i ] = BTRFS_READ_LOCK ;
}
}
# ifdef CONFIG_DEBUG_LOCK_ALLOC
if ( held )
btrfs_clear_lock_blocking ( held ) ;
btrfs_clear_lock_blocking_rw ( held , held_rw ) ;
# endif
}
@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p)
if ( ! p - > nodes [ i ] )
continue ;
if ( p - > locks [ i ] ) {
btrfs_tree_unlock ( p - > nodes [ i ] ) ;
btrfs_tree_unlock_rw ( p - > nodes [ i ] , p - > lock s [ i ] ) ;
p - > locks [ i ] = 0 ;
}
free_extent_buffer ( p - > nodes [ i ] ) ;
@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
return eb ;
}
/* loop around taking references on and locking the root node of the
* tree until you end up with a lock on the root . A locked buffer
* is returned , with a reference held .
*/
struct extent_buffer * btrfs_read_lock_root_node ( struct btrfs_root * root )
{
struct extent_buffer * eb ;
while ( 1 ) {
eb = btrfs_root_node ( root ) ;
btrfs_tree_read_lock ( eb ) ;
if ( eb = = root - > node )
break ;
btrfs_tree_read_unlock ( eb ) ;
free_extent_buffer ( eb ) ;
}
return eb ;
}
/* cowonly root (everything not a reference counted cow subvolume), just get
* put onto a simple dirty list . transaction . c walks this to make sure they
* get properly updated on disk .
@ -862,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
mid = path - > nodes [ level ] ;
WARN_ON ( ! path - > locks [ level ] ) ;
WARN_ON ( path - > locks [ level ] ! = BTRFS_WRITE_LOCK & &
path - > locks [ level ] ! = BTRFS_WRITE_LOCK_BLOCKING ) ;
WARN_ON ( btrfs_header_generation ( mid ) ! = trans - > transid ) ;
orig_ptr = btrfs_node_blockptr ( mid , orig_slot ) ;
@ -1360,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
t = path - > nodes [ i ] ;
if ( i > = lowest_unlock & & i > skip_level & & path - > locks [ i ] ) {
btrfs_tree_unlock ( t ) ;
btrfs_tree_unlock_rw ( t , path - > locks [ i ] ) ;
path - > locks [ i ] = 0 ;
}
}
@ -1387,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
continue ;
if ( ! path - > locks [ i ] )
continue ;
btrfs_tree_unlock ( path - > nodes [ i ] ) ;
btrfs_tree_unlock_rw ( path - > nodes [ i ] , path - > lock s [ i ] ) ;
path - > locks [ i ] = 0 ;
}
}
@ -1436,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans,
* we can trust our generation number
*/
free_extent_buffer ( tmp ) ;
btrfs_set_path_blocking ( p ) ;
tmp = read_tree_block ( root , blocknr , blocksize , gen ) ;
if ( tmp & & btrfs_buffer_uptodate ( tmp , gen ) ) {
* eb_ret = tmp ;
@ -1491,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans,
static int
setup_nodes_for_search ( struct btrfs_trans_handle * trans ,
struct btrfs_root * root , struct btrfs_path * p ,
struct extent_buffer * b , int level , int ins_len )
struct extent_buffer * b , int level , int ins_len ,
int * write_lock_level )
{
int ret ;
if ( ( p - > search_for_split | | ins_len > 0 ) & & btrfs_header_nritems ( b ) > =
BTRFS_NODEPTRS_PER_BLOCK ( root ) - 3 ) {
int sret ;
if ( * write_lock_level < level + 1 ) {
* write_lock_level = level + 1 ;
btrfs_release_path ( p ) ;
goto again ;
}
sret = reada_for_balance ( root , p , level ) ;
if ( sret )
goto again ;
btrfs_set_path_blocking ( p ) ;
sret = split_node ( trans , root , p , level ) ;
btrfs_clear_path_blocking ( p , NULL ) ;
btrfs_clear_path_blocking ( p , NULL , 0 ) ;
BUG_ON ( sret > 0 ) ;
if ( sret ) {
@ -1516,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
BTRFS_NODEPTRS_PER_BLOCK ( root ) / 2 ) {
int sret ;
if ( * write_lock_level < level + 1 ) {
* write_lock_level = level + 1 ;
btrfs_release_path ( p ) ;
goto again ;
}
sret = reada_for_balance ( root , p , level ) ;
if ( sret )
goto again ;
btrfs_set_path_blocking ( p ) ;
sret = balance_level ( trans , root , p , level ) ;
btrfs_clear_path_blocking ( p , NULL ) ;
btrfs_clear_path_blocking ( p , NULL , 0 ) ;
if ( sret ) {
ret = sret ;
@ -1566,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
int err ;
int level ;
int lowest_unlock = 1 ;
int root_lock ;
/* everything at write_lock_level or lower must be write locked */
int write_lock_level = 0 ;
u8 lowest_level = 0 ;
lowest_level = p - > lowest_level ;
WARN_ON ( lowest_level & & ins_len > 0 ) ;
WARN_ON ( p - > nodes [ 0 ] ! = NULL ) ;
if ( ins_len < 0 )
if ( ins_len < 0 ) {
lowest_unlock = 2 ;
/* when we are removing items, we might have to go up to level
* two as we update tree pointers Make sure we keep write
* for those levels as well
*/
write_lock_level = 2 ;
} else if ( ins_len > 0 ) {
/*
* for inserting items , make sure we have a write lock on
* level 1 so we can update keys
*/
write_lock_level = 1 ;
}
if ( ! cow )
write_lock_level = - 1 ;
if ( cow & & ( p - > keep_locks | | p - > lowest_level ) )
write_lock_level = BTRFS_MAX_LEVEL ;
again :
/*
* we try very hard to do read locks on the root
*/
root_lock = BTRFS_READ_LOCK ;
level = 0 ;
if ( p - > search_commit_root ) {
/*
* the commit roots are read only
* so we always do read locks
*/
b = root - > commit_root ;
extent_buffer_get ( b ) ;
level = btrfs_header_level ( b ) ;
if ( ! p - > skip_locking )
btrfs_tree_lock ( b ) ;
btrfs_tree_read_ lock ( b ) ;
} else {
if ( p - > skip_locking )
if ( p - > skip_locking ) {
b = btrfs_root_node ( root ) ;
else
b = btrfs_lock_root_node ( root ) ;
level = btrfs_header_level ( b ) ;
} else {
/* we don't know the level of the root node
* until we actually have it read locked
*/
b = btrfs_read_lock_root_node ( root ) ;
level = btrfs_header_level ( b ) ;
if ( level < = write_lock_level ) {
/* whoops, must trade for write lock */
btrfs_tree_read_unlock ( b ) ;
free_extent_buffer ( b ) ;
b = btrfs_lock_root_node ( root ) ;
root_lock = BTRFS_WRITE_LOCK ;
/* the level might have changed, check again */
level = btrfs_header_level ( b ) ;
}
}
}
p - > nodes [ level ] = b ;
if ( ! p - > skip_locking )
p - > locks [ level ] = root_lock ;
while ( b ) {
level = btrfs_header_level ( b ) ;
@ -1595,10 +1696,6 @@ again:
* setup the path here so we can release it under lock
* contention with the cow code
*/
p - > nodes [ level ] = b ;
if ( ! p - > skip_locking )
p - > locks [ level ] = 1 ;
if ( cow ) {
/*
* if we don ' t really need to cow this block
@ -1610,6 +1707,16 @@ again:
btrfs_set_path_blocking ( p ) ;
/*
* must have write locks on this node and the
* parent
*/
if ( level + 1 > write_lock_level ) {
write_lock_level = level + 1 ;
btrfs_release_path ( p ) ;
goto again ;
}
err = btrfs_cow_block ( trans , root , b ,
p - > nodes [ level + 1 ] ,
p - > slots [ level + 1 ] , & b ) ;
@ -1622,10 +1729,7 @@ cow_done:
BUG_ON ( ! cow & & ins_len ) ;
p - > nodes [ level ] = b ;
if ( ! p - > skip_locking )
p - > locks [ level ] = 1 ;
btrfs_clear_path_blocking ( p , NULL ) ;
btrfs_clear_path_blocking ( p , NULL , 0 ) ;
/*
* we have a lock on b and as long as we aren ' t changing
@ -1651,7 +1755,7 @@ cow_done:
}
p - > slots [ level ] = slot ;
err = setup_nodes_for_search ( trans , root , p , b , level ,
ins_len ) ;
ins_len , & write_lock_level ) ;
if ( err = = - EAGAIN )
goto again ;
if ( err ) {
@ -1661,6 +1765,19 @@ cow_done:
b = p - > nodes [ level ] ;
slot = p - > slots [ level ] ;
/*
* slot 0 is special , if we change the key
* we have to update the parent pointer
* which means we must have a write lock
* on the parent
*/
if ( slot = = 0 & & cow & &
write_lock_level < level + 1 ) {
write_lock_level = level + 1 ;
btrfs_release_path ( p ) ;
goto again ;
}
unlock_up ( p , level , lowest_unlock ) ;
if ( level = = lowest_level ) {
@ -1679,23 +1796,42 @@ cow_done:
}
if ( ! p - > skip_locking ) {
btrfs_clear_path_blocking ( p , NULL ) ;
err = btrfs_try_spin_lock ( b ) ;
if ( ! err ) {
btrfs_set_path_blocking ( p ) ;
btrfs_tree_lock ( b ) ;
btrfs_clear_path_blocking ( p , b ) ;
level = btrfs_header_level ( b ) ;
if ( level < = write_lock_level ) {
err = btrfs_try_tree_write_lock ( b ) ;
if ( ! err ) {
btrfs_set_path_blocking ( p ) ;
btrfs_tree_lock ( b ) ;
btrfs_clear_path_blocking ( p , b ,
BTRFS_WRITE_LOCK ) ;
}
p - > locks [ level ] = BTRFS_WRITE_LOCK ;
} else {
err = btrfs_try_tree_read_lock ( b ) ;
if ( ! err ) {
btrfs_set_path_blocking ( p ) ;
btrfs_tree_read_lock ( b ) ;
btrfs_clear_path_blocking ( p , b ,
BTRFS_READ_LOCK ) ;
}
p - > locks [ level ] = BTRFS_READ_LOCK ;
}
p - > nodes [ level ] = b ;
}
} else {
p - > slots [ level ] = slot ;
if ( ins_len > 0 & &
btrfs_leaf_free_space ( root , b ) < ins_len ) {
if ( write_lock_level < 1 ) {
write_lock_level = 1 ;
btrfs_release_path ( p ) ;
goto again ;
}
btrfs_set_path_blocking ( p ) ;
err = split_leaf ( trans , root , key ,
p , ins_len , ret = = 0 ) ;
btrfs_clear_path_blocking ( p , NULL ) ;
btrfs_clear_path_blocking ( p , NULL , 0 ) ;
BUG_ON ( err > 0 ) ;
if ( err ) {
@ -1976,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
add_root_to_dirty_list ( root ) ;
extent_buffer_get ( c ) ;
path - > nodes [ level ] = c ;
path - > locks [ level ] = 1 ;
path - > locks [ level ] = BTRFS_WRITE_LOCK ;
path - > slots [ level ] = 0 ;
return 0 ;
}
@ -3819,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
WARN_ON ( ! path - > keep_locks ) ;
again :
cur = btrfs_lock_root_node ( root ) ;
cur = btrfs_read_ lock_root_node ( root ) ;
level = btrfs_header_level ( cur ) ;
WARN_ON ( path - > nodes [ level ] ) ;
path - > nodes [ level ] = cur ;
path - > locks [ level ] = 1 ;
path - > locks [ level ] = BTRFS_READ_LOCK ;
if ( btrfs_header_generation ( cur ) < min_trans ) {
ret = 1 ;
@ -3913,12 +4049,12 @@ find_next_key:
cur = read_node_slot ( root , cur , slot ) ;
BUG_ON ( ! cur ) ;
btrfs_tree_lock ( cur ) ;
btrfs_tree_read_ lock ( cur ) ;
path - > locks [ level - 1 ] = 1 ;
path - > locks [ level - 1 ] = BTRFS_READ_LOCK ;
path - > nodes [ level - 1 ] = cur ;
unlock_up ( path , level , 1 ) ;
btrfs_clear_path_blocking ( path , NULL ) ;
btrfs_clear_path_blocking ( path , NULL , 0 ) ;
}
out :
if ( ret = = 0 )
@ -4034,6 +4170,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
int ret ;
int old_spinning = path - > leave_spinning ;
int force_blocking = 0 ;
int next_rw_lock = 0 ;
nritems = btrfs_header_nritems ( path - > nodes [ 0 ] ) ;
if ( nritems = = 0 )
@ -4051,6 +4188,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
again :
level = 1 ;
next = NULL ;
next_rw_lock = 0 ;
btrfs_release_path ( path ) ;
path - > keep_locks = 1 ;
@ -4096,11 +4234,12 @@ again:
}
if ( next ) {
btrfs_tree_unlock ( next ) ;
btrfs_tree_unlock_rw ( next , next_rw_lock ) ;
free_extent_buffer ( next ) ;
}
next = c ;
next_rw_lock = path - > locks [ level ] ;
ret = read_block_for_search ( NULL , root , path , & next , level ,
slot , & key ) ;
if ( ret = = - EAGAIN )
@ -4112,15 +4251,22 @@ again:
}
if ( ! path - > skip_locking ) {
ret = btrfs_try_spin _lock ( next ) ;
ret = btrfs_try_tree_read _lock ( next ) ;
if ( ! ret ) {
btrfs_set_path_blocking ( path ) ;
btrfs_tree_lock ( next ) ;
if ( ! force_blocking )
btrfs_clear_path_blocking ( path , next ) ;
btrfs_tree_read_lock ( next ) ;
if ( ! force_blocking ) {
btrfs_clear_path_blocking ( path , next ,
BTRFS_READ_LOCK ) ;
}
}
if ( force_blocking ) {
btrfs_set_lock_blocking_rw ( next ,
BTRFS_READ_LOCK ) ;
next_rw_lock = BTRFS_READ_LOCK_BLOCKING ;
} else {
next_rw_lock = BTRFS_READ_LOCK ;
}
if ( force_blocking )
btrfs_set_lock_blocking ( next ) ;
}
break ;
}
@ -4129,14 +4275,13 @@ again:
level - - ;
c = path - > nodes [ level ] ;
if ( path - > locks [ level ] )
btrfs_tree_unlock ( c ) ;
btrfs_tree_unlock_rw ( c , path - > locks [ level ] ) ;
free_extent_buffer ( c ) ;
path - > nodes [ level ] = next ;
path - > slots [ level ] = 0 ;
if ( ! path - > skip_locking )
path - > locks [ level ] = 1 ;
path - > locks [ level ] = next_rw_lock ;
if ( ! level )
break ;
@ -4151,16 +4296,21 @@ again:
}
if ( ! path - > skip_locking ) {
btrfs_assert_tree_locked ( path - > nodes [ level ] ) ;
ret = btrfs_try_spin_lock ( next ) ;
ret = btrfs_try_tree_read_lock ( next ) ;
if ( ! ret ) {
btrfs_set_path_blocking ( path ) ;
btrfs_tree_lock ( next ) ;
btrfs_tree_read_ lock ( next ) ;
if ( ! force_blocking )
btrfs_clear_path_blocking ( path , next ) ;
btrfs_clear_path_blocking ( path , next ,
BTRFS_READ_LOCK ) ;
}
if ( force_blocking ) {
btrfs_set_lock_blocking_rw ( next ,
BTRFS_READ_LOCK ) ;
next_rw_lock = BTRFS_READ_LOCK_BLOCKING ;
} else {
next_rw_lock = BTRFS_READ_LOCK ;
}
if ( force_blocking )
btrfs_set_lock_blocking ( next ) ;
}
}
ret = 0 ;