@ -41,19 +41,13 @@
# define XFSA_FIXUP_BNO_OK 1
# define XFSA_FIXUP_CNT_OK 2
/*
* Prototypes for per - ag allocation routines
*/
STATIC int xfs_alloc_ag_vextent_exact ( xfs_alloc_arg_t * ) ;
STATIC int xfs_alloc_ag_vextent_near ( xfs_alloc_arg_t * ) ;
STATIC int xfs_alloc_ag_vextent_size ( xfs_alloc_arg_t * ) ;
STATIC int xfs_alloc_ag_vextent_small ( xfs_alloc_arg_t * ,
xfs_btree_cur_t * , xfs_agblock_t * , xfs_extlen_t * , int * ) ;
/*
* Internal functions .
*/
xfs_btree_cur_t * , xfs_agblock_t * , xfs_extlen_t * , int * ) ;
STATIC void xfs_alloc_busy_trim ( struct xfs_alloc_arg * ,
xfs_agblock_t , xfs_extlen_t , xfs_agblock_t * , xfs_extlen_t * ) ;
/*
* Lookup the record equal to [ bno , len ] in the btree given by cur .
@ -154,19 +148,21 @@ xfs_alloc_compute_aligned(
xfs_extlen_t * reslen ) /* result length */
{
xfs_agblock_t bno ;
xfs_extlen_t diff ;
xfs_extlen_t len ;
if ( args - > alignment > 1 & & foundlen > = args - > minlen ) {
bno = roundup ( foundbno , args - > alignment ) ;
diff = bno - foundbno ;
len = diff > = foundlen ? 0 : foundlen - diff ;
/* Trim busy sections out of found extent */
xfs_alloc_busy_trim ( args , foundbno , foundlen , & bno , & len ) ;
if ( args - > alignment > 1 & & len > = args - > minlen ) {
xfs_agblock_t aligned_bno = roundup ( bno , args - > alignment ) ;
xfs_extlen_t diff = aligned_bno - bno ;
* resbno = aligned_bno ;
* reslen = diff > = len ? 0 : len - diff ;
} else {
bno = foundbno ;
len = foundlen ;
* res bno = bno ;
* res len = len ;
}
* resbno = bno ;
* reslen = len ;
}
/*
@ -541,16 +537,8 @@ xfs_alloc_ag_vextent(
if ( error )
return error ;
/*
* Search the busylist for these blocks and mark the
* transaction as synchronous if blocks are found . This
* avoids the need to block due to a synchronous log
* force to ensure correct ordering as the synchronous
* transaction will guarantee that for us .
*/
if ( xfs_alloc_busy_search ( args - > mp , args - > agno ,
args - > agbno , args - > len ) )
xfs_trans_set_sync ( args - > tp ) ;
ASSERT ( ! xfs_alloc_busy_search ( args - > mp , args - > agno ,
args - > agbno , args - > len ) ) ;
}
if ( ! args - > isfl ) {
@ -577,14 +565,14 @@ xfs_alloc_ag_vextent_exact(
{
xfs_btree_cur_t * bno_cur ; /* by block-number btree cursor */
xfs_btree_cur_t * cnt_cur ; /* by count btree cursor */
xfs_agblock_t end ; /* end of allocated extent */
int error ;
xfs_agblock_t fbno ; /* start block of found extent */
xfs_agblock_t fend ; /* end block of found extent */
xfs_extlen_t flen ; /* length of found extent */
xfs_agblock_t tbno ; /* start block of trimmed extent */
xfs_extlen_t tlen ; /* length of trimmed extent */
xfs_agblock_t tend ; /* end block of trimmed extent */
xfs_agblock_t end ; /* end of allocated extent */
int i ; /* success/failure of operation */
xfs_agblock_t maxend ; /* end of maximal extent */
xfs_agblock_t minend ; /* end of minimal extent */
xfs_extlen_t rlen ; /* length of returned extent */
ASSERT ( args - > alignment = = 1 ) ;
@ -614,14 +602,22 @@ xfs_alloc_ag_vextent_exact(
goto error0 ;
XFS_WANT_CORRUPTED_GOTO ( i = = 1 , error0 ) ;
ASSERT ( fbno < = args - > agbno ) ;
minend = args - > agbno + args - > minlen ;
maxend = args - > agbno + args - > maxlen ;
fend = fbno + flen ;
/*
* Give up if the freespace isn ' t long enough for the minimum request .
* Check for overlapping busy extents .
*/
if ( fend < minend )
xfs_alloc_busy_trim ( args , fbno , flen , & tbno , & tlen ) ;
/*
* Give up if the start of the extent is busy , or the freespace isn ' t
* long enough for the minimum request .
*/
if ( tbno > args - > agbno )
goto not_found ;
if ( tlen < args - > minlen )
goto not_found ;
tend = tbno + tlen ;
if ( tend < args - > agbno + args - > minlen )
goto not_found ;
/*
@ -630,14 +626,14 @@ xfs_alloc_ag_vextent_exact(
*
* Fix the length according to mod and prod if given .
*/
end = XFS_AGBLOCK_MIN ( f end, maxend ) ;
end = XFS_AGBLOCK_MIN ( t end, args - > agbno + args - > maxl en ) ;
args - > len = end - args - > agbno ;
xfs_alloc_fix_len ( args ) ;
if ( ! xfs_alloc_fix_minleft ( args ) )
goto not_found ;
rlen = args - > len ;
ASSERT ( args - > agbno + rlen < = f end) ;
ASSERT ( args - > agbno + rlen < = t end) ;
end = args - > agbno + rlen ;
/*
@ -686,11 +682,11 @@ xfs_alloc_find_best_extent(
struct xfs_btree_cur * * scur , /* searching cursor */
xfs_agblock_t gdiff , /* difference for search comparison */
xfs_agblock_t * sbno , /* extent found by search */
xfs_extlen_t * slen ,
xfs_extlen_t * slena , /* aligned length */
xfs_extlen_t * slen , /* extent length */
xfs_agblock_t * sbnoa , /* aligned extent found by search */
xfs_extlen_t * slena , /* aligned extent length */
int dir ) /* 0 = search right, 1 = search left */
{
xfs_agblock_t bno ;
xfs_agblock_t new ;
xfs_agblock_t sdiff ;
int error ;
@ -708,16 +704,16 @@ xfs_alloc_find_best_extent(
if ( error )
goto error0 ;
XFS_WANT_CORRUPTED_GOTO ( i = = 1 , error0 ) ;
xfs_alloc_compute_aligned ( args , * sbno , * slen , & bno , slena ) ;
xfs_alloc_compute_aligned ( args , * sbno , * slen , s bnoa , slena ) ;
/*
* The good extent is closer than this one .
*/
if ( ! dir ) {
if ( bno > = args - > agbno + gdiff )
if ( * s bnoa > = args - > agbno + gdiff )
goto out_use_good ;
} else {
if ( bno < = args - > agbno - gdiff )
if ( * s bnoa < = args - > agbno - gdiff )
goto out_use_good ;
}
@ -729,8 +725,8 @@ xfs_alloc_find_best_extent(
xfs_alloc_fix_len ( args ) ;
sdiff = xfs_alloc_compute_diff ( args - > agbno , args - > len ,
args - > alignment , * sbno ,
* slen , & new ) ;
args - > alignment , * sbnoa ,
* slena , & new ) ;
/*
* Choose closer size and invalidate other cursor .
@ -780,7 +776,7 @@ xfs_alloc_ag_vextent_near(
xfs_agblock_t gtbnoa ; /* aligned ... */
xfs_extlen_t gtdiff ; /* difference to right side entry */
xfs_extlen_t gtlen ; /* length of right side entry */
xfs_extlen_t gtlena = 0 ; /* aligned ... */
xfs_extlen_t gtlena ; /* aligned ... */
xfs_agblock_t gtnew ; /* useful start bno of right side */
int error ; /* error code */
int i ; /* result code, temporary */
@ -789,9 +785,10 @@ xfs_alloc_ag_vextent_near(
xfs_agblock_t ltbnoa ; /* aligned ... */
xfs_extlen_t ltdiff ; /* difference to left side entry */
xfs_extlen_t ltlen ; /* length of left side entry */
xfs_extlen_t ltlena = 0 ; /* aligned ... */
xfs_extlen_t ltlena ; /* aligned ... */
xfs_agblock_t ltnew ; /* useful start bno of left side */
xfs_extlen_t rlen ; /* length of returned extent */
int forced = 0 ;
# if defined(DEBUG) && defined(__KERNEL__)
/*
* Randomly don ' t execute the first algorithm .
@ -800,13 +797,20 @@ xfs_alloc_ag_vextent_near(
dofirst = random32 ( ) & 1 ;
# endif
restart :
bno_cur_lt = NULL ;
bno_cur_gt = NULL ;
ltlen = 0 ;
gtlena = 0 ;
ltlena = 0 ;
/*
* Get a cursor for the by - size btree .
*/
cnt_cur = xfs_allocbt_init_cursor ( args - > mp , args - > tp , args - > agbp ,
args - > agno , XFS_BTNUM_CNT ) ;
ltlen = 0 ;
bno_cur_lt = bno_cur_gt = NULL ;
/*
* See if there are any free extents as big as maxlen .
*/
@ -822,11 +826,13 @@ xfs_alloc_ag_vextent_near(
goto error0 ;
if ( i = = 0 | | ltlen = = 0 ) {
xfs_btree_del_cursor ( cnt_cur , XFS_BTREE_NOERROR ) ;
trace_xfs_alloc_near_noentry ( args ) ;
return 0 ;
}
ASSERT ( i = = 1 ) ;
}
args - > wasfromfl = 0 ;
/*
* First algorithm .
* If the requested extent is large wrt the freespaces available
@ -890,7 +896,7 @@ xfs_alloc_ag_vextent_near(
if ( args - > len < blen )
continue ;
ltdiff = xfs_alloc_compute_diff ( args - > agbno , args - > len ,
args - > alignment , ltbno , ltlen , & ltnew ) ;
args - > alignment , ltbnoa , ltlena , & ltnew ) ;
if ( ltnew ! = NULLAGBLOCK & &
( args - > len > blen | | ltdiff < bdiff ) ) {
bdiff = ltdiff ;
@ -1042,11 +1048,12 @@ xfs_alloc_ag_vextent_near(
args - > len = XFS_EXTLEN_MIN ( ltlena , args - > maxlen ) ;
xfs_alloc_fix_len ( args ) ;
ltdiff = xfs_alloc_compute_diff ( args - > agbno , args - > len ,
args - > alignment , ltbno , ltlen , & ltnew ) ;
args - > alignment , ltbnoa , ltlena , & ltnew ) ;
error = xfs_alloc_find_best_extent ( args ,
& bno_cur_lt , & bno_cur_gt ,
ltdiff , & gtbno , & gtlen , & gtlena ,
ltdiff , & gtbno , & gtlen ,
& gtbnoa , & gtlena ,
0 /* search right */ ) ;
} else {
ASSERT ( gtlena > = args - > minlen ) ;
@ -1057,11 +1064,12 @@ xfs_alloc_ag_vextent_near(
args - > len = XFS_EXTLEN_MIN ( gtlena , args - > maxlen ) ;
xfs_alloc_fix_len ( args ) ;
gtdiff = xfs_alloc_compute_diff ( args - > agbno , args - > len ,
args - > alignment , gtbno , gtlen , & gtnew ) ;
args - > alignment , gtbnoa , gtlena , & gtnew ) ;
error = xfs_alloc_find_best_extent ( args ,
& bno_cur_gt , & bno_cur_lt ,
gtdiff , & ltbno , & ltlen , & ltlena ,
gtdiff , & ltbno , & ltlen ,
& ltbnoa , & ltlena ,
1 /* search left */ ) ;
}
@ -1073,6 +1081,12 @@ xfs_alloc_ag_vextent_near(
* If we couldn ' t get anything , give up .
*/
if ( bno_cur_lt = = NULL & & bno_cur_gt = = NULL ) {
if ( ! forced + + ) {
trace_xfs_alloc_near_busy ( args ) ;
xfs_log_force ( args - > mp , XFS_LOG_SYNC ) ;
goto restart ;
}
trace_xfs_alloc_size_neither ( args ) ;
args - > agbno = NULLAGBLOCK ;
return 0 ;
@ -1107,12 +1121,13 @@ xfs_alloc_ag_vextent_near(
return 0 ;
}
rlen = args - > len ;
( void ) xfs_alloc_compute_diff ( args - > agbno , rlen , args - > alignment , ltbno ,
ltlen , & ltnew ) ;
( void ) xfs_alloc_compute_diff ( args - > agbno , rlen , args - > alignment ,
ltbnoa , ltlena , & ltnew ) ;
ASSERT ( ltnew > = ltbno ) ;
ASSERT ( ltnew + rlen < = ltbno + ltlen ) ;
ASSERT ( ltnew + rlen < = ltbnoa + ltlena ) ;
ASSERT ( ltnew + rlen < = be32_to_cpu ( XFS_BUF_TO_AGF ( args - > agbp ) - > agf_length ) ) ;
args - > agbno = ltnew ;
if ( ( error = xfs_alloc_fixup_trees ( cnt_cur , bno_cur_lt , ltbno , ltlen ,
ltnew , rlen , XFSA_FIXUP_BNO_OK ) ) )
goto error0 ;
@ -1155,26 +1170,35 @@ xfs_alloc_ag_vextent_size(
int i ; /* temp status variable */
xfs_agblock_t rbno ; /* returned block number */
xfs_extlen_t rlen ; /* length of returned extent */
int forced = 0 ;
restart :
/*
* Allocate and initialize a cursor for the by - size btree .
*/
cnt_cur = xfs_allocbt_init_cursor ( args - > mp , args - > tp , args - > agbp ,
args - > agno , XFS_BTNUM_CNT ) ;
bno_cur = NULL ;
/*
* Look for an entry > = maxlen + alignment - 1 blocks .
*/
if ( ( error = xfs_alloc_lookup_ge ( cnt_cur , 0 ,
args - > maxlen + args - > alignment - 1 , & i ) ) )
goto error0 ;
/*
* If none , then pick up the last entry in the tree unless the
* tree is empty .
* If none or we have busy extents that we cannot allocate from , then
* we have to settle for a smaller extent . In the case that there are
* no large extents , this will return the last entry in the tree unless
* the tree is empty . In the case that there are only busy large
* extents , this will return the largest small extent unless there
* are no smaller extents available .
*/
if ( ! i ) {
if ( ( error = xfs_alloc_ag_vextent_small ( args , cnt_cur , & fbno ,
& flen , & i ) ) )
if ( ! i | | forced > 1 ) {
error = xfs_alloc_ag_vextent_small ( args , cnt_cur ,
& fbno , & flen , & i ) ;
if ( error )
goto error0 ;
if ( i = = 0 | | flen = = 0 ) {
xfs_btree_del_cursor ( cnt_cur , XFS_BTREE_NOERROR ) ;
@ -1182,22 +1206,56 @@ xfs_alloc_ag_vextent_size(
return 0 ;
}
ASSERT ( i = = 1 ) ;
xfs_alloc_compute_aligned ( args , fbno , flen , & rbno , & rlen ) ;
} else {
/*
* Search for a non - busy extent that is large enough .
* If we are at low space , don ' t check , or if we fall of
* the end of the btree , turn off the busy check and
* restart .
*/
for ( ; ; ) {
error = xfs_alloc_get_rec ( cnt_cur , & fbno , & flen , & i ) ;
if ( error )
goto error0 ;
XFS_WANT_CORRUPTED_GOTO ( i = = 1 , error0 ) ;
xfs_alloc_compute_aligned ( args , fbno , flen ,
& rbno , & rlen ) ;
if ( rlen > = args - > maxlen )
break ;
error = xfs_btree_increment ( cnt_cur , 0 , & i ) ;
if ( error )
goto error0 ;
if ( i = = 0 ) {
/*
* Our only valid extents must have been busy .
* Make it unbusy by forcing the log out and
* retrying . If we ' ve been here before , forcing
* the log isn ' t making the extents available ,
* which means they have probably been freed in
* this transaction . In that case , we have to
* give up on them and we ' ll attempt a minlen
* allocation the next time around .
*/
xfs_btree_del_cursor ( cnt_cur ,
XFS_BTREE_NOERROR ) ;
trace_xfs_alloc_size_busy ( args ) ;
if ( ! forced + + )
xfs_log_force ( args - > mp , XFS_LOG_SYNC ) ;
goto restart ;
}
}
}
/*
* There ' s a freespace as big as maxlen + alignment - 1 , get it .
*/
else {
if ( ( error = xfs_alloc_get_rec ( cnt_cur , & fbno , & flen , & i ) ) )
goto error0 ;
XFS_WANT_CORRUPTED_GOTO ( i = = 1 , error0 ) ;
}
/*
* In the first case above , we got the last entry in the
* by - size btree . Now we check to see if the space hits maxlen
* once aligned ; if not , we search left for something better .
* This can ' t happen in the second case above .
*/
xfs_alloc_compute_aligned ( args , fbno , flen , & rbno , & rlen ) ;
rlen = XFS_EXTLEN_MIN ( args - > maxlen , rlen ) ;
XFS_WANT_CORRUPTED_GOTO ( rlen = = 0 | |
( rlen < = flen & & rbno + rlen < = fbno + flen ) , error0 ) ;
@ -1251,13 +1309,19 @@ xfs_alloc_ag_vextent_size(
* Fix up the length .
*/
args - > len = rlen ;
xfs_alloc_fix_len ( args ) ;
if ( rlen < args - > minlen | | ! xfs_alloc_fix_minleft ( args ) ) {
xfs_btree_del_cursor ( cnt_cur , XFS_BTREE_NOERROR ) ;
trace_xfs_alloc_size_nominleft ( args ) ;
args - > agbno = NULLAGBLOCK ;
return 0 ;
if ( rlen < args - > minlen ) {
if ( ! forced + + ) {
xfs_btree_del_cursor ( cnt_cur , XFS_BTREE_NOERROR ) ;
trace_xfs_alloc_size_busy ( args ) ;
xfs_log_force ( args - > mp , XFS_LOG_SYNC ) ;
goto restart ;
}
goto out_nominleft ;
}
xfs_alloc_fix_len ( args ) ;
if ( ! xfs_alloc_fix_minleft ( args ) )
goto out_nominleft ;
rlen = args - > len ;
XFS_WANT_CORRUPTED_GOTO ( rlen < = flen , error0 ) ;
/*
@ -1287,6 +1351,12 @@ error0:
if ( bno_cur )
xfs_btree_del_cursor ( bno_cur , XFS_BTREE_ERROR ) ;
return error ;
out_nominleft :
xfs_btree_del_cursor ( cnt_cur , XFS_BTREE_NOERROR ) ;
trace_xfs_alloc_size_nominleft ( args ) ;
args - > agbno = NULLAGBLOCK ;
return 0 ;
}
/*
@ -2650,6 +2720,185 @@ xfs_alloc_busy_search(
return match ;
}
/*
* For a given extent [ fbno , flen ] , search the busy extent list to find a
* subset of the extent that is not busy . If * rlen is smaller than
* args - > minlen no suitable extent could be found , and the higher level
* code needs to force out the log and retry the allocation .
*/
STATIC void
xfs_alloc_busy_trim (
struct xfs_alloc_arg * args ,
xfs_agblock_t bno ,
xfs_extlen_t len ,
xfs_agblock_t * rbno ,
xfs_extlen_t * rlen )
{
xfs_agblock_t fbno = bno ;
xfs_extlen_t flen = len ;
struct rb_node * rbp ;
ASSERT ( flen > 0 ) ;
spin_lock ( & args - > pag - > pagb_lock ) ;
rbp = args - > pag - > pagb_tree . rb_node ;
while ( rbp & & flen > = args - > minlen ) {
struct xfs_busy_extent * busyp =
rb_entry ( rbp , struct xfs_busy_extent , rb_node ) ;
xfs_agblock_t fend = fbno + flen ;
xfs_agblock_t bbno = busyp - > bno ;
xfs_agblock_t bend = bbno + busyp - > length ;
if ( fend < = bbno ) {
rbp = rbp - > rb_left ;
continue ;
} else if ( fbno > = bend ) {
rbp = rbp - > rb_right ;
continue ;
}
if ( bbno < = fbno ) {
/* start overlap */
/*
* Case 1 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - +
* fbno fend
*
* Case 2 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - +
* fbno fend
*
* Case 3 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - +
* fbno fend
*
* Case 4 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - +
* fbno fend
*
* No unbusy region in extent , return failure .
*/
if ( fend < = bend )
goto fail ;
/*
* Case 5 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 6 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Needs to be trimmed to :
* + - - - - - - - +
* fbno fend
*/
fbno = bend ;
} else if ( bend > = fend ) {
/* end overlap */
/*
* Case 7 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 8 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Needs to be trimmed to :
* + - - - - - - - +
* fbno fend
*/
fend = bbno ;
} else {
/* middle overlap */
/*
* Case 9 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Can be trimmed to :
* + - - - - - - - + OR + - - - - - - - +
* fbno fend fbno fend
*
* Backward allocation leads to significant
* fragmentation of directories , which degrades
* directory performance , therefore we always want to
* choose the option that produces forward allocation
* patterns .
* Preferring the lower bno extent will make the next
* request use " fend " as the start of the next
* allocation ; if the segment is no longer busy at
* that point , we ' ll get a contiguous allocation , but
* even if it is still busy , we will get a forward
* allocation .
* We try to avoid choosing the segment at " bend " ,
* because that can lead to the next allocation
* taking the segment at " fbno " , which would be a
* backward allocation . We only use the segment at
* " fbno " if it is much larger than the current
* requested size , because in that case there ' s a
* good chance subsequent allocations will be
* contiguous .
*/
if ( bbno - fbno > = args - > maxlen ) {
/* left candidate fits perfect */
fend = bbno ;
} else if ( fend - bend > = args - > maxlen * 4 ) {
/* right candidate has enough free space */
fbno = bend ;
} else if ( bbno - fbno > = args - > minlen ) {
/* left candidate fits minimum requirement */
fend = bbno ;
} else {
goto fail ;
}
}
flen = fend - fbno ;
}
spin_unlock ( & args - > pag - > pagb_lock ) ;
if ( fbno ! = bno | | flen ! = len ) {
trace_xfs_alloc_busy_trim ( args - > mp , args - > agno , bno , len ,
fbno , flen ) ;
}
* rbno = fbno ;
* rlen = flen ;
return ;
fail :
/*
* Return a zero extent length as failure indications . All callers
* re - check if the trimmed extent satisfies the minlen requirement .
*/
spin_unlock ( & args - > pag - > pagb_lock ) ;
trace_xfs_alloc_busy_trim ( args - > mp , args - > agno , bno , len , fbno , 0 ) ;
* rbno = fbno ;
* rlen = 0 ;
}
void
xfs_alloc_busy_clear (
struct xfs_mount * mp ,