@ -52,7 +52,8 @@
struct aio_ring {
unsigned id ; /* kernel internal index number */
unsigned nr ; /* number of io_events */
unsigned head ;
unsigned head ; /* Written to by userland or under ring_lock
* mutex by aio_read_events_ring ( ) . */
unsigned tail ;
unsigned magic ;
@ -243,6 +244,11 @@ static void aio_free_ring(struct kioctx *ctx)
{
int i ;
/* Disconnect the kiotx from the ring file. This prevents future
* accesses to the kioctx from page migration .
*/
put_aio_ring_file ( ctx ) ;
for ( i = 0 ; i < ctx - > nr_pages ; i + + ) {
struct page * page ;
pr_debug ( " pid(%d) [%d] page->count=%d \n " , current - > pid , i ,
@ -254,8 +260,6 @@ static void aio_free_ring(struct kioctx *ctx)
put_page ( page ) ;
}
put_aio_ring_file ( ctx ) ;
if ( ctx - > ring_pages & & ctx - > ring_pages ! = ctx - > internal_pages ) {
kfree ( ctx - > ring_pages ) ;
ctx - > ring_pages = NULL ;
@ -283,29 +287,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
{
struct kioctx * ctx ;
unsigned long flags ;
pgoff_t idx ;
int rc ;
rc = 0 ;
/* Make sure the old page hasn't already been changed */
/* mapping->private_lock here protects against the kioctx teardown. */
spin_lock ( & mapping - > private_lock ) ;
ctx = mapping - > private_data ;
if ( ctx ) {
pgoff_t idx ;
spin_lock_irqsave ( & ctx - > completion_lock , flags ) ;
idx = old - > index ;
if ( idx < ( pgoff_t ) ctx - > nr_pages ) {
if ( ctx - > ring_pages [ idx ] ! = old )
rc = - EAGAIN ;
} else
rc = - EINVAL ;
spin_unlock_irqrestore ( & ctx - > completion_lock , flags ) ;
if ( ! ctx ) {
rc = - EINVAL ;
goto out ;
}
/* The ring_lock mutex. The prevents aio_read_events() from writing
* to the ring ' s head , and prevents page migration from mucking in
* a partially initialized kiotx .
*/
if ( ! mutex_trylock ( & ctx - > ring_lock ) ) {
rc = - EAGAIN ;
goto out ;
}
idx = old - > index ;
if ( idx < ( pgoff_t ) ctx - > nr_pages ) {
/* Make sure the old page hasn't already been changed */
if ( ctx - > ring_pages [ idx ] ! = old )
rc = - EAGAIN ;
} else
rc = - EINVAL ;
spin_unlock ( & mapping - > private_lock ) ;
if ( rc ! = 0 )
return rc ;
goto out_unlock ;
/* Writeback must be complete */
BUG_ON ( PageWriteback ( old ) ) ;
@ -314,38 +327,26 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
rc = migrate_page_move_mapping ( mapping , new , old , NULL , mode , 1 ) ;
if ( rc ! = MIGRATEPAGE_SUCCESS ) {
put_page ( new ) ;
return rc ;
goto out_unlock ;
}
/* We can potentially race against kioctx teardown here. Use the
* address_space ' s private data lock to protect the mapping ' s
* private_data .
/* Take completion_lock to prevent other writes to the ring buffer
* while the old page is copied to the new . This prevents new
* events from being lost .
*/
spin_lock ( & mapping - > private_lock ) ;
ctx = mapping - > private_data ;
if ( ctx ) {
pgoff_t idx ;
spin_lock_irqsave ( & ctx - > completion_lock , flags ) ;
migrate_page_copy ( new , old ) ;
idx = old - > index ;
if ( idx < ( pgoff_t ) ctx - > nr_pages ) {
/* And only do the move if things haven't changed */
if ( ctx - > ring_pages [ idx ] = = old )
ctx - > ring_pages [ idx ] = new ;
else
rc = - EAGAIN ;
} else
rc = - EINVAL ;
spin_unlock_irqrestore ( & ctx - > completion_lock , flags ) ;
} else
rc = - EBUSY ;
spin_unlock ( & mapping - > private_lock ) ;
spin_lock_irqsave ( & ctx - > completion_lock , flags ) ;
migrate_page_copy ( new , old ) ;
BUG_ON ( ctx - > ring_pages [ idx ] ! = old ) ;
ctx - > ring_pages [ idx ] = new ;
spin_unlock_irqrestore ( & ctx - > completion_lock , flags ) ;
if ( rc = = MIGRATEPAGE_SUCCESS )
put_page ( old ) ;
else
put_page ( new ) ;
/* The old page is no longer accessible. */
put_page ( old ) ;
out_unlock :
mutex_unlock ( & ctx - > ring_lock ) ;
out :
spin_unlock ( & mapping - > private_lock ) ;
return rc ;
}
# endif
@ -380,7 +381,7 @@ static int aio_setup_ring(struct kioctx *ctx)
file = aio_private_file ( ctx , nr_pages ) ;
if ( IS_ERR ( file ) ) {
ctx - > aio_ring_file = NULL ;
return - EAGAI N ;
return - ENOMEM ;
}
ctx - > aio_ring_file = file ;
@ -415,7 +416,7 @@ static int aio_setup_ring(struct kioctx *ctx)
if ( unlikely ( i ! = nr_pages ) ) {
aio_free_ring ( ctx ) ;
return - EAGAI N ;
return - ENOMEM ;
}
ctx - > mmap_size = nr_pages * PAGE_SIZE ;
@ -429,7 +430,7 @@ static int aio_setup_ring(struct kioctx *ctx)
if ( IS_ERR ( ( void * ) ctx - > mmap_base ) ) {
ctx - > mmap_size = 0 ;
aio_free_ring ( ctx ) ;
return - EAGAI N ;
return - ENOMEM ;
}
pr_debug ( " mmap address: 0x%08lx \n " , ctx - > mmap_base ) ;
@ -556,6 +557,10 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
rcu_read_unlock ( ) ;
spin_unlock ( & mm - > ioctx_lock ) ;
/* While kioctx setup is in progress,
* we are protected from page migration
* changes ring_pages by - > ring_lock .
*/
ring = kmap_atomic ( ctx - > ring_pages [ 0 ] ) ;
ring - > id = ctx - > id ;
kunmap_atomic ( ring ) ;
@ -640,24 +645,28 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
ctx - > max_reqs = nr_events ;
if ( percpu_ref_init ( & ctx - > users , free_ioctx_users ) )
goto err ;
if ( percpu_ref_init ( & ctx - > reqs , free_ioctx_reqs ) )
goto err ;
spin_lock_init ( & ctx - > ctx_lock ) ;
spin_lock_init ( & ctx - > completion_lock ) ;
mutex_init ( & ctx - > ring_lock ) ;
/* Protect against page migration throughout kiotx setup by keeping
* the ring_lock mutex held until setup is complete . */
mutex_lock ( & ctx - > ring_lock ) ;
init_waitqueue_head ( & ctx - > wait ) ;
INIT_LIST_HEAD ( & ctx - > active_reqs ) ;
if ( percpu_ref_init ( & ctx - > users , free_ioctx_users ) )
goto err ;
if ( percpu_ref_init ( & ctx - > reqs , free_ioctx_reqs ) )
goto err ;
ctx - > cpu = alloc_percpu ( struct kioctx_cpu ) ;
if ( ! ctx - > cpu )
goto err ;
if ( aio_setup_ring ( ctx ) < 0 )
err = aio_setup_ring ( ctx ) ;
if ( err < 0 )
goto err ;
atomic_set ( & ctx - > reqs_available , ctx - > nr_events - 1 ) ;
@ -683,6 +692,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
if ( err )
goto err_cleanup ;
/* Release the ring_lock mutex now that all setup is complete. */
mutex_unlock ( & ctx - > ring_lock ) ;
pr_debug ( " allocated ioctx %p[%ld]: mm=%p mask=0x%x \n " ,
ctx , ctx - > user_id , mm , ctx - > nr_events ) ;
return ctx ;
@ -692,6 +704,7 @@ err_cleanup:
err_ctx :
aio_free_ring ( ctx ) ;
err :
mutex_unlock ( & ctx - > ring_lock ) ;
free_percpu ( ctx - > cpu ) ;
free_percpu ( ctx - > reqs . pcpu_count ) ;
free_percpu ( ctx - > users . pcpu_count ) ;
@ -1024,6 +1037,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
mutex_lock ( & ctx - > ring_lock ) ;
/* Access to ->ring_pages here is protected by ctx->ring_lock. */
ring = kmap_atomic ( ctx - > ring_pages [ 0 ] ) ;
head = ring - > head ;
tail = ring - > tail ;