@ -157,13 +157,14 @@ static int __init uvhub_to_first_apicid(int uvhub)
* clear of the Timeout bit ( as well ) will free the resource . No reply will
* be sent ( the hardware will only do one reply per message ) .
*/
static void reply_to_message ( struct msg_desc * mdp , struct bau_control * bcp )
static void reply_to_message ( struct msg_desc * mdp , struct bau_control * bcp ,
int do_acknowledge )
{
unsigned long dw ;
struct bau_pq_entry * msg ;
msg = mdp - > msg ;
if ( ! msg - > canceled ) {
if ( ! msg - > canceled & & do_acknowledge ) {
dw = ( msg - > swack_vec < < UV_SW_ACK_NPENDING ) | msg - > swack_vec ;
write_mmr_sw_ack ( dw ) ;
}
@ -212,8 +213,8 @@ static void bau_process_retry_msg(struct msg_desc *mdp,
if ( mmr & ( msg_res < < UV_SW_ACK_NPENDING ) ) {
unsigned long mr ;
/*
* i s the resource timed out ?
* m ake everyone ignore the cancelled message .
* I s the resource timed out ?
* M ake everyone ignore the cancelled message .
*/
msg2 - > canceled = 1 ;
stat - > d_canceled + + ;
@ -231,8 +232,8 @@ static void bau_process_retry_msg(struct msg_desc *mdp,
* Do all the things a cpu should do for a TLB shootdown message .
* Other cpu ' s may come here at the same time for this message .
*/
static void bau_process_message ( struct msg_desc * mdp ,
struct bau_control * bcp )
static void bau_process_message ( struct msg_desc * mdp , struct bau_control * bcp ,
int do_acknowledge )
{
short socket_ack_count = 0 ;
short * sp ;
@ -284,8 +285,9 @@ static void bau_process_message(struct msg_desc *mdp,
if ( msg_ack_count = = bcp - > cpus_in_uvhub ) {
/*
* All cpus in uvhub saw it ; reply
* ( unless we are in the UV2 workaround )
*/
reply_to_message ( mdp , bcp ) ;
reply_to_message ( mdp , bcp , do_acknowledge ) ;
}
}
@ -491,27 +493,138 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
/*
* UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register .
*/
static unsigned long uv2_read_status ( unsigned long offset , int rshft , int cpu )
static unsigned long uv2_read_status ( unsigned long offset , int rshft , int des c)
{
unsigned long descriptor_status ;
unsigned long descriptor_status2 ;
descriptor_status = ( ( read_lmmr ( offset ) > > rshft ) & UV_ACT_STATUS_MASK ) ;
descriptor_status2 = ( read_mmr_uv2_status ( ) > > cpu ) & 0x1UL ;
descriptor_status2 = ( read_mmr_uv2_status ( ) > > des c) & 0x1UL ;
descriptor_status = ( descriptor_status < < 1 ) | descriptor_status2 ;
return descriptor_status ;
}
/*
* Return whether the status of the descriptor that is normally used for this
* cpu ( the one indexed by its hub - relative cpu number ) is busy .
* The status of the original 32 descriptors is always reflected in the 64
* bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0 .
* The bit provided by the activation_status_2 register is irrelevant to
* the status if it is only being tested for busy or not busy .
*/
int normal_busy ( struct bau_control * bcp )
{
int cpu = bcp - > uvhub_cpu ;
int mmr_offset ;
int right_shift ;
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0 ;
right_shift = cpu * UV_ACT_STATUS_SIZE ;
return ( ( ( ( ( read_lmmr ( mmr_offset ) > > right_shift ) &
UV_ACT_STATUS_MASK ) ) < < 1 ) = = UV2H_DESC_BUSY ) ;
}
/*
* Entered when a bau descriptor has gone into a permanent busy wait because
* of a hardware bug .
* Workaround the bug .
*/
int handle_uv2_busy ( struct bau_control * bcp )
{
int busy_one = bcp - > using_desc ;
int normal = bcp - > uvhub_cpu ;
int selected = - 1 ;
int i ;
unsigned long descriptor_status ;
unsigned long status ;
int mmr_offset ;
struct bau_desc * bau_desc_old ;
struct bau_desc * bau_desc_new ;
struct bau_control * hmaster = bcp - > uvhub_master ;
struct ptc_stats * stat = bcp - > statp ;
cycles_t ttm ;
stat - > s_uv2_wars + + ;
spin_lock ( & hmaster - > uvhub_lock ) ;
/* try for the original first */
if ( busy_one ! = normal ) {
if ( ! normal_busy ( bcp ) )
selected = normal ;
}
if ( selected < 0 ) {
/* can't use the normal, select an alternate */
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1 ;
descriptor_status = read_lmmr ( mmr_offset ) ;
/* scan available descriptors 32-63 */
for ( i = 0 ; i < UV_CPUS_PER_AS ; i + + ) {
if ( ( hmaster - > inuse_map & ( 1 < < i ) ) = = 0 ) {
status = ( ( descriptor_status > >
( i * UV_ACT_STATUS_SIZE ) ) &
UV_ACT_STATUS_MASK ) < < 1 ;
if ( status ! = UV2H_DESC_BUSY ) {
selected = i + UV_CPUS_PER_AS ;
break ;
}
}
}
}
if ( busy_one ! = normal )
/* mark the busy alternate as not in-use */
hmaster - > inuse_map & = ~ ( 1 < < ( busy_one - UV_CPUS_PER_AS ) ) ;
if ( selected > = 0 ) {
/* switch to the selected descriptor */
if ( selected ! = normal ) {
/* set the selected alternate as in-use */
hmaster - > inuse_map | =
( 1 < < ( selected - UV_CPUS_PER_AS ) ) ;
if ( selected > stat - > s_uv2_wars_hw )
stat - > s_uv2_wars_hw = selected ;
}
bau_desc_old = bcp - > descriptor_base ;
bau_desc_old + = ( ITEMS_PER_DESC * busy_one ) ;
bcp - > using_desc = selected ;
bau_desc_new = bcp - > descriptor_base ;
bau_desc_new + = ( ITEMS_PER_DESC * selected ) ;
* bau_desc_new = * bau_desc_old ;
} else {
/*
* All are busy . Wait for the normal one for this cpu to
* free up .
*/
stat - > s_uv2_war_waits + + ;
spin_unlock ( & hmaster - > uvhub_lock ) ;
ttm = get_cycles ( ) ;
do {
cpu_relax ( ) ;
} while ( normal_busy ( bcp ) ) ;
spin_lock ( & hmaster - > uvhub_lock ) ;
/* switch to the original descriptor */
bcp - > using_desc = normal ;
bau_desc_old = bcp - > descriptor_base ;
bau_desc_old + = ( ITEMS_PER_DESC * bcp - > using_desc ) ;
bcp - > using_desc = ( ITEMS_PER_DESC * normal ) ;
bau_desc_new = bcp - > descriptor_base ;
bau_desc_new + = ( ITEMS_PER_DESC * normal ) ;
* bau_desc_new = * bau_desc_old ; /* copy the entire descriptor */
}
spin_unlock ( & hmaster - > uvhub_lock ) ;
return FLUSH_RETRY_BUSYBUG ;
}
static int uv2_wait_completion ( struct bau_desc * bau_desc ,
unsigned long mmr_offset , int right_shift ,
struct bau_control * bcp , long try )
{
unsigned long descriptor_stat ;
cycles_t ttm ;
int cpu = bcp - > uvhub_cpu ;
int desc = bcp - > using_desc ;
long busy_reps = 0 ;
struct ptc_stats * stat = bcp - > statp ;
descriptor_stat = uv2_read_status ( mmr_offset , right_shift , cpu ) ;
descriptor_stat = uv2_read_status ( mmr_offset , right_shift , des c) ;
/* spin on the status MMR, waiting for it to go idle */
while ( descriptor_stat ! = UV2H_DESC_IDLE ) {
@ -542,12 +655,23 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
bcp - > conseccompletes = 0 ;
return FLUSH_RETRY_TIMEOUT ;
} else {
busy_reps + + ;
if ( busy_reps > 1000000 ) {
/* not to hammer on the clock */
busy_reps = 0 ;
ttm = get_cycles ( ) ;
if ( ( ttm - bcp - > send_message ) >
( bcp - > clocks_per_100_usec ) ) {
return handle_uv2_busy ( bcp ) ;
}
}
/*
* descriptor_stat is still BUSY
*/
cpu_relax ( ) ;
}
descriptor_stat = uv2_read_status ( mmr_offset , right_shift , cpu ) ;
descriptor_stat = uv2_read_status ( mmr_offset , right_shift ,
desc ) ;
}
bcp - > conseccompletes + + ;
return FLUSH_COMPLETE ;
@ -563,14 +687,14 @@ static int wait_completion(struct bau_desc *bau_desc,
{
int right_shift ;
unsigned long mmr_offset ;
int cpu = bcp - > uvhub_cpu ;
int des c = bcp - > using_desc ;
if ( cpu < UV_CPUS_PER_AS ) {
if ( des c < UV_CPUS_PER_AS ) {
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0 ;
right_shift = cpu * UV_ACT_STATUS_SIZE ;
right_shift = des c * UV_ACT_STATUS_SIZE ;
} else {
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1 ;
right_shift = ( ( cpu - UV_CPUS_PER_AS ) * UV_ACT_STATUS_SIZE ) ;
right_shift = ( ( des c - UV_CPUS_PER_AS ) * UV_ACT_STATUS_SIZE ) ;
}
if ( bcp - > uvhub_version = = 1 )
@ -752,8 +876,7 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
* Returns 1 if it gives up entirely and the original cpu mask is to be
* returned to the kernel .
*/
int uv_flush_send_and_wait ( struct bau_desc * bau_desc ,
struct cpumask * flush_mask , struct bau_control * bcp )
int uv_flush_send_and_wait ( struct cpumask * flush_mask , struct bau_control * bcp )
{
int seq_number = 0 ;
int completion_stat = 0 ;
@ -766,20 +889,24 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
struct bau_control * hmaster = bcp - > uvhub_master ;
struct uv1_bau_msg_header * uv1_hdr = NULL ;
struct uv2_bau_msg_header * uv2_hdr = NULL ;
struct bau_desc * bau_desc ;
if ( bcp - > uvhub_version = = 1 ) {
uv1 = 1 ;
if ( bcp - > uvhub_version = = 1 )
uv1_throttle ( hmaster , stat ) ;
uv1_hdr = & bau_desc - > header . uv1_hdr ;
} else
uv2_hdr = & bau_desc - > header . uv2_hdr ;
while ( hmaster - > uvhub_quiesce )
cpu_relax ( ) ;
time1 = get_cycles ( ) ;
do {
if ( try = = 0 ) {
bau_desc = bcp - > descriptor_base ;
bau_desc + = ( ITEMS_PER_DESC * bcp - > using_desc ) ;
if ( bcp - > uvhub_version = = 1 ) {
uv1 = 1 ;
uv1_hdr = & bau_desc - > header . uv1_hdr ;
} else
uv2_hdr = & bau_desc - > header . uv2_hdr ;
if ( ( try = = 0 ) | | ( completion_stat = = FLUSH_RETRY_BUSYBUG ) ) {
if ( uv1 )
uv1_hdr - > msg_type = MSG_REGULAR ;
else
@ -797,13 +924,14 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
uv1_hdr - > sequence = seq_number ;
else
uv2_hdr - > sequence = seq_number ;
index = ( 1UL < < AS_PUSH_SHIFT ) | bcp - > uvhub_cpu ;
index = ( 1UL < < AS_PUSH_SHIFT ) | bcp - > using_desc ;
bcp - > send_message = get_cycles ( ) ;
write_mmr_activation ( index ) ;
try + + ;
completion_stat = wait_completion ( bau_desc , bcp , try ) ;
/* UV2: wait_completion() may change the bcp->using_desc */
handle_cmplt ( completion_stat , bau_desc , bcp , hmaster , stat ) ;
@ -814,6 +942,7 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
}
cpu_relax ( ) ;
} while ( ( completion_stat = = FLUSH_RETRY_PLUGGED ) | |
( completion_stat = = FLUSH_RETRY_BUSYBUG ) | |
( completion_stat = = FLUSH_RETRY_TIMEOUT ) ) ;
time2 = get_cycles ( ) ;
@ -828,6 +957,7 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
record_send_stats ( time1 , time2 , bcp , stat , completion_stat , try ) ;
if ( completion_stat = = FLUSH_GIVEUP )
/* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */
return 1 ;
return 0 ;
}
@ -983,7 +1113,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
stat - > s_ntargself + + ;
bau_desc = bcp - > descriptor_base ;
bau_desc + = ( ITEMS_PER_DESC * bcp - > uvhub_cpu ) ;
bau_desc + = ( ITEMS_PER_DESC * bcp - > using_desc ) ;
bau_uvhubs_clear ( & bau_desc - > distribution , UV_DISTRIBUTION_SIZE ) ;
if ( set_distrib_bits ( flush_mask , bcp , bau_desc , & locals , & remotes ) )
return NULL ;
@ -996,12 +1126,85 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
* uv_flush_send_and_wait returns 0 if all cpu ' s were messaged ,
* or 1 if it gave up and the original cpumask should be returned .
*/
if ( ! uv_flush_send_and_wait ( bau_desc , flush_mask , bcp ) )
if ( ! uv_flush_send_and_wait ( flush_mask , bcp ) )
return NULL ;
else
return cpumask ;
}
/*
* Search the message queue for any ' other ' message with the same software
* acknowledge resource bit vector .
*/
struct bau_pq_entry * find_another_by_swack ( struct bau_pq_entry * msg ,
struct bau_control * bcp , unsigned char swack_vec )
{
struct bau_pq_entry * msg_next = msg + 1 ;
if ( msg_next > bcp - > queue_last )
msg_next = bcp - > queue_first ;
while ( ( msg_next - > swack_vec ! = 0 ) & & ( msg_next ! = msg ) ) {
if ( msg_next - > swack_vec = = swack_vec )
return msg_next ;
msg_next + + ;
if ( msg_next > bcp - > queue_last )
msg_next = bcp - > queue_first ;
}
return NULL ;
}
/*
* UV2 needs to work around a bug in which an arriving message has not
* set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register .
* Such a message must be ignored .
*/
void process_uv2_message ( struct msg_desc * mdp , struct bau_control * bcp )
{
unsigned long mmr_image ;
unsigned char swack_vec ;
struct bau_pq_entry * msg = mdp - > msg ;
struct bau_pq_entry * other_msg ;
mmr_image = read_mmr_sw_ack ( ) ;
swack_vec = msg - > swack_vec ;
if ( ( swack_vec & mmr_image ) = = 0 ) {
/*
* This message was assigned a swack resource , but no
* reserved acknowlegment is pending .
* The bug has prevented this message from setting the MMR .
* And no other message has used the same sw_ack resource .
* Do the requested shootdown but do not reply to the msg .
* ( the 0 means make no acknowledge )
*/
bau_process_message ( mdp , bcp , 0 ) ;
return ;
}
/*
* Some message has set the MMR ' pending ' bit ; it might have been
* another message . Look for that message .
*/
other_msg = find_another_by_swack ( msg , bcp , msg - > swack_vec ) ;
if ( other_msg ) {
/* There is another. Do not ack the current one. */
bau_process_message ( mdp , bcp , 0 ) ;
/*
* Let the natural processing of that message acknowledge
* it . Don ' t get the processing of sw_ack ' s out of order .
*/
return ;
}
/*
* There is no other message using this sw_ack , so it is safe to
* acknowledge it .
*/
bau_process_message ( mdp , bcp , 1 ) ;
return ;
}
/*
* The BAU message interrupt comes here . ( registered by set_intr_gate )
* See entry_64 . S
@ -1038,9 +1241,11 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
count + + ;
msgdesc . msg_slot = msg - msgdesc . queue_first ;
msgdesc . swack_slot = ffs ( msg - > swack_vec ) - 1 ;
msgdesc . msg = msg ;
bau_process_message ( & msgdesc , bcp ) ;
if ( bcp - > uvhub_version = = 2 )
process_uv2_message ( & msgdesc , bcp ) ;
else
bau_process_message ( & msgdesc , bcp , 1 ) ;
msg + + ;
if ( msg > msgdesc . queue_last )
@ -1158,7 +1363,7 @@ static int ptc_seq_show(struct seq_file *file, void *data)
seq_printf ( file ,
" all one mult none retry canc nocan reset rcan " ) ;
seq_printf ( file ,
" disable enable \n " ) ;
" disable enable wars warshw warwaits \n " ) ;
}
if ( cpu < num_possible_cpus ( ) & & cpu_online ( cpu ) ) {
stat = & per_cpu ( ptcstats , cpu ) ;
@ -1189,8 +1394,10 @@ static int ptc_seq_show(struct seq_file *file, void *data)
stat - > d_nomsg , stat - > d_retries , stat - > d_canceled ,
stat - > d_nocanceled , stat - > d_resets ,
stat - > d_rcanceled ) ;
seq_printf ( file , " %ld %ld \n " ,
stat - > s_bau_disabled , stat - > s_bau_reenabled ) ;
seq_printf ( file , " %ld %ld %ld %ld %ld \n " ,
stat - > s_bau_disabled , stat - > s_bau_reenabled ,
stat - > s_uv2_wars , stat - > s_uv2_wars_hw ,
stat - > s_uv2_war_waits ) ;
}
return 0 ;
}
@ -1564,6 +1771,7 @@ static void pq_init(int node, int pnode)
write_mmr_payload_first ( pnode , pn_first ) ;
write_mmr_payload_tail ( pnode , first ) ;
write_mmr_payload_last ( pnode , last ) ;
write_gmmr_sw_ack ( pnode , 0xffffUL ) ;
/* in effect, all msg_type's are set to MSG_NOOP */
memset ( pqp , 0 , sizeof ( struct bau_pq_entry ) * DEST_Q_SIZE ) ;
@ -1651,6 +1859,7 @@ static void __init init_per_cpu_tunables(void)
bcp - > cong_response_us = congested_respns_us ;
bcp - > cong_reps = congested_reps ;
bcp - > cong_period = congested_period ;
bcp - > clocks_per_100_usec = usec_2_cycles ( 100 ) ;
}
}
@ -1771,6 +1980,7 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
}
bcp - > uvhub_master = * hmasterp ;
bcp - > uvhub_cpu = uv_cpu_hub_info ( cpu ) - > blade_processor_id ;
bcp - > using_desc = bcp - > uvhub_cpu ;
if ( bcp - > uvhub_cpu > = MAX_CPUS_PER_UVHUB ) {
printk ( KERN_EMERG " %d cpus per uvhub invalid \n " ,
bcp - > uvhub_cpu ) ;