@ -366,19 +366,33 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
/*
* Insert a new pg_temp mapping
*/
static int pgid_cmp ( struct ceph_pg l , struct ceph_pg r )
{
u64 a = * ( u64 * ) & l ;
u64 b = * ( u64 * ) & r ;
if ( a < b )
return - 1 ;
if ( a > b )
return 1 ;
return 0 ;
}
static int __insert_pg_mapping ( struct ceph_pg_mapping * new ,
struct rb_root * root )
{
struct rb_node * * p = & root - > rb_node ;
struct rb_node * parent = NULL ;
struct ceph_pg_mapping * pg = NULL ;
int c ;
while ( * p ) {
parent = * p ;
pg = rb_entry ( parent , struct ceph_pg_mapping , node ) ;
if ( new - > pgid < pg - > pgid )
c = pgid_cmp ( new - > pgid , pg - > pgid ) ;
if ( c < 0 )
p = & ( * p ) - > rb_left ;
else if ( new - > pgid > pg - > pgid )
else if ( c > 0 )
p = & ( * p ) - > rb_right ;
else
return - EEXIST ;
@ -467,11 +481,11 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
ceph_decode_32_safe ( p , end , len , bad ) ;
for ( i = 0 ; i < len ; i + + ) {
int n , j ;
u64 pgid ;
struct ceph_pg pgid ;
struct ceph_pg_mapping * pg ;
ceph_decode_need ( p , end , sizeof ( u32 ) + sizeof ( u64 ) , bad ) ;
pgid = ceph_decode_64 ( p ) ;
ceph_decode_copy ( p , & pgid , sizeof ( pgid ) ) ;
n = ceph_decode_32 ( p ) ;
ceph_decode_need ( p , end , n * sizeof ( u32 ) , bad ) ;
pg = kmalloc ( sizeof ( * pg ) + n * sizeof ( u32 ) , GFP_NOFS ) ;
@ -487,7 +501,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
err = __insert_pg_mapping ( pg , & map - > pg_temp ) ;
if ( err )
goto bad ;
dout ( " added pg_temp %llx len %d \n " , pgid , len ) ;
dout ( " added pg_temp %llx len %d \n " , * ( u64 * ) & pgid , len ) ;
}
/* crush */
@ -659,19 +673,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
while ( len - - ) {
struct ceph_pg_mapping * pg ;
int j ;
u64 pgid ;
struct ceph_pg pgid ;
u32 pglen ;
ceph_decode_need ( p , end , sizeof ( u64 ) + sizeof ( u32 ) , bad ) ;
pgid = ceph_decode_64 ( p ) ;
ceph_decode_copy ( p , & pgid , sizeof ( pgid ) ) ;
pglen = ceph_decode_32 ( p ) ;
/* remove any? */
while ( rbp & & rb_entry ( rbp , struct ceph_pg_mapping ,
node ) - > pgid < = pgid ) {
while ( rbp & & pgid_cmp ( rb_entry ( rbp , struct ceph_pg_mapping ,
node ) - > pgid , pgid ) < = 0 ) {
struct rb_node * cur = rbp ;
rbp = rb_next ( rbp ) ;
dout ( " removed pg_temp %llx \n " ,
rb_entry ( cur , struct ceph_pg_mapping , node ) - > pgid ) ;
* ( u64 * ) & rb_entry ( cur , struct ceph_pg_mapping ,
node ) - > pgid ) ;
rb_erase ( cur , & map - > pg_temp ) ;
}
@ -690,14 +705,16 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
err = __insert_pg_mapping ( pg , & map - > pg_temp ) ;
if ( err )
goto bad ;
dout ( " added pg_temp %llx len %d \n " , pgid , pglen ) ;
dout ( " added pg_temp %llx len %d \n " , * ( u64 * ) & pgid ,
pglen ) ;
}
}
while ( rbp ) {
struct rb_node * cur = rbp ;
rbp = rb_next ( rbp ) ;
dout ( " removed pg_temp %llx \n " ,
rb_entry ( cur , struct ceph_pg_mapping , node ) - > pgid ) ;
* ( u64 * ) & rb_entry ( cur , struct ceph_pg_mapping ,
node ) - > pgid ) ;
rb_erase ( cur , & map - > pg_temp ) ;
}
@ -782,16 +799,19 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
struct ceph_osdmap * osdmap )
{
unsigned num , num_mask ;
union ceph_pg pgid ;
struct ceph_pg pgid ;
s32 preferred = ( s32 ) le32_to_cpu ( fl - > fl_pg_preferred ) ;
int poolid = le32_to_cpu ( fl - > fl_pg_pool ) ;
struct ceph_pg_pool_info * pool ;
unsigned ps ;
if ( poolid > = osdmap - > num_pools )
return - EIO ;
pool = & osdmap - > pg_pool [ poolid ] ;
pool = & osdmap - > pg_pool [ poolid ] ;
ps = ceph_full_name_hash ( oid , strlen ( oid ) ) ;
if ( preferred > = 0 ) {
ps + = preferred ;
num = le32_to_cpu ( pool - > v . lpg_num ) ;
num_mask = pool - > lpg_num_mask ;
} else {
@ -799,22 +819,17 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
num_mask = pool - > pg_num_mask ;
}
pgid . pg64 = 0 ; /* start with it zeroed out */
pgid . pg . ps = ceph_full_name_hash ( oid , strlen ( oid ) ) ;
pgid . pg . preferred = preferred ;
if ( preferred > = 0 )
pgid . pg . ps + = preferred ;
pgid . pg . pool = le32_to_cpu ( fl - > fl_pg_pool ) ;
pgid . ps = cpu_to_le16 ( ps ) ;
pgid . preferred = cpu_to_le16 ( preferred ) ;
pgid . pool = fl - > fl_pg_pool ;
if ( preferred > = 0 )
dout ( " calc_object_layout '%s' pgid %d.%xp%d (%llx) \n " , oid ,
pgid . pg . pool , pgid . pg . ps , ( int ) preferred , pgid . pg64 ) ;
dout ( " calc_object_layout '%s' pgid %d.%xp%d \n " , oid , poolid , ps ,
( int ) preferred ) ;
else
dout ( " calc_object_layout '%s' pgid %d.%x (%llx) \n " , oid ,
pgid . pg . pool , pgid . pg . ps , pgid . pg64 ) ;
dout ( " calc_object_layout '%s' pgid %d.%x \n " , oid , poolid , ps ) ;
ol - > ol_pgid = cpu_to_le64 ( pgid . pg64 ) ;
ol - > ol_pgid = pgid ;
ol - > ol_stripe_unit = fl - > fl_object_stripe_unit ;
return 0 ;
}
@ -822,21 +837,24 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
* Calculate raw osd vector for the given pgid . Return pointer to osd
* array , or NULL on failure .
*/
static int * calc_pg_raw ( struct ceph_osdmap * osdmap , union ceph_pg pgid ,
static int * calc_pg_raw ( struct ceph_osdmap * osdmap , struct ceph_pg pgid ,
int * osds , int * num )
{
struct rb_node * n = osdmap - > pg_temp . rb_node ;
struct ceph_pg_mapping * pg ;
struct ceph_pg_pool_info * pool ;
int ruleno ;
unsigned pps ; /* placement ps */
unsigned poolid , ps , pps ;
int preferred ;
int c ;
/* pg_temp? */
while ( n ) {
pg = rb_entry ( n , struct ceph_pg_mapping , node ) ;
if ( pgid . pg64 < pg - > pgid )
c = pgid_cmp ( pgid , pg - > pgid ) ;
if ( c < 0 )
n = n - > rb_left ;
else if ( pgid . pg64 > pg - > pgid )
else if ( c > 0 )
n = n - > rb_right ;
else {
* num = pg - > len ;
@ -845,36 +863,40 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, union ceph_pg pgid,
}
/* crush */
if ( pgid . pg . pool > = osdmap - > num_pools )
poolid = le32_to_cpu ( pgid . pool ) ;
ps = le16_to_cpu ( pgid . ps ) ;
preferred = ( s16 ) le16_to_cpu ( pgid . preferred ) ;
if ( poolid > = osdmap - > num_pools )
return NULL ;
pool = & osdmap - > pg_pool [ pgid . pg . pool ] ;
pool = & osdmap - > pg_pool [ poolid ] ;
ruleno = crush_find_rule ( osdmap - > crush , pool - > v . crush_ruleset ,
pool - > v . type , pool - > v . size ) ;
if ( ruleno < 0 ) {
pr_err ( " no crush rule pool %d type %d size %d \n " ,
pgid . pg . p ool , pool - > v . type , pool - > v . size ) ;
poolid , pool - > v . type , pool - > v . size ) ;
return NULL ;
}
if ( pgid . pg . p referred > = 0 )
pps = ceph_stable_mod ( pgid . pg . p s ,
if ( preferred > = 0 )
pps = ceph_stable_mod ( ps ,
le32_to_cpu ( pool - > v . lpgp_num ) ,
pool - > lpgp_num_mask ) ;
else
pps = ceph_stable_mod ( pgid . pg . p s ,
pps = ceph_stable_mod ( ps ,
le32_to_cpu ( pool - > v . pgp_num ) ,
pool - > pgp_num_mask ) ;
pps + = pgid . pg . p ool ;
pps + = poolid ;
* num = crush_do_rule ( osdmap - > crush , ruleno , pps , osds ,
min_t ( int , pool - > v . size , * num ) ,
pgid . pg . p referred , osdmap - > osd_weight ) ;
preferred , osdmap - > osd_weight ) ;
return osds ;
}
/*
* Return primary osd for given pgid , or - 1 if none .
*/
int ceph_calc_pg_primary ( struct ceph_osdmap * osdmap , union ceph_pg pgid )
int ceph_calc_pg_primary ( struct ceph_osdmap * osdmap , struct ceph_pg pgid )
{
int rawosds [ 10 ] , * osds ;
int i , num = ARRAY_SIZE ( rawosds ) ;