@ -131,10 +131,20 @@ struct vfsmount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD ( & mnt - > mnt_share ) ;
INIT_LIST_HEAD ( & mnt - > mnt_slave_list ) ;
INIT_LIST_HEAD ( & mnt - > mnt_slave ) ;
atomic_set ( & mnt - > __mnt_writers , 0 ) ;
# ifdef CONFIG_SMP
mnt - > mnt_writers = alloc_percpu ( int ) ;
if ( ! mnt - > mnt_writers )
goto out_free_devname ;
# else
mnt - > mnt_writers = 0 ;
# endif
}
return mnt ;
# ifdef CONFIG_SMP
out_free_devname :
kfree ( mnt - > mnt_devname ) ;
# endif
out_free_id :
mnt_free_id ( mnt ) ;
out_free_cache :
@ -171,65 +181,38 @@ int __mnt_is_readonly(struct vfsmount *mnt)
}
EXPORT_SYMBOL_GPL ( __mnt_is_readonly ) ;
struct mnt_writer {
/*
* If holding multiple instances of this lock , they
* must be ordered by cpu number .
*/
spinlock_t lock ;
struct lock_class_key lock_class ; /* compiles out with !lockdep */
unsigned long count ;
struct vfsmount * mnt ;
} ____cacheline_aligned_in_smp ;
static DEFINE_PER_CPU ( struct mnt_writer , mnt_writers ) ;
static inline void inc_mnt_writers ( struct vfsmount * mnt )
{
# ifdef CONFIG_SMP
( * per_cpu_ptr ( mnt - > mnt_writers , smp_processor_id ( ) ) ) + + ;
# else
mnt - > mnt_writers + + ;
# endif
}
static int __init init_mnt_writers ( void )
static inline void dec_mnt_writers ( struct vfsmount * mnt )
{
int cpu ;
for_each_possible_cpu ( cpu ) {
struct mnt_writer * writer = & per_cpu ( mnt_writers , cpu ) ;
spin_lock_init ( & writer - > lock ) ;
lockdep_set_class ( & writer - > lock , & writer - > lock_class ) ;
writer - > count = 0 ;
}
return 0 ;
# ifdef CONFIG_SMP
( * per_cpu_ptr ( mnt - > mnt_writers , smp_processor_id ( ) ) ) - - ;
# else
mnt - > mnt_writers - - ;
# endif
}
fs_initcall ( init_mnt_writers ) ;
static void unlock_mnt_writers ( void )
static unsigned int count_mnt_writers ( struct vfsmount * mnt )
{
# ifdef CONFIG_SMP
unsigned int count = 0 ;
int cpu ;
struct mnt_writer * cpu_writer ;
for_each_possible_cpu ( cpu ) {
cpu_writer = & per_cpu ( mnt_writers , cpu ) ;
spin_unlock ( & cpu_writer - > lock ) ;
count + = * per_cpu_ptr ( mnt - > mnt_writers , cpu ) ;
}
}
static inline void __clear_mnt_count ( struct mnt_writer * cpu_writer )
{
if ( ! cpu_writer - > mnt )
return ;
/*
* This is in case anyone ever leaves an invalid ,
* old - > mnt and a count of 0.
*/
if ( ! cpu_writer - > count )
return ;
atomic_add ( cpu_writer - > count , & cpu_writer - > mnt - > __mnt_writers ) ;
cpu_writer - > count = 0 ;
}
/*
* must hold cpu_writer - > lock
*/
static inline void use_cpu_writer_for_mount ( struct mnt_writer * cpu_writer ,
struct vfsmount * mnt )
{
if ( cpu_writer - > mnt = = mnt )
return ;
__clear_mnt_count ( cpu_writer ) ;
cpu_writer - > mnt = mnt ;
return count ;
# else
return mnt - > mnt_writers ;
# endif
}
/*
@ -253,75 +236,34 @@ static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
int mnt_want_write ( struct vfsmount * mnt )
{
int ret = 0 ;
struct mnt_writer * cpu_writer ;
cpu_writer = & get_cpu_var ( mnt_writers ) ;
spin_lock ( & cpu_writer - > lock ) ;
preempt_disable ( ) ;
inc_mnt_writers ( mnt ) ;
/*
* The store to inc_mnt_writers must be visible before we pass
* MNT_WRITE_HOLD loop below , so that the slowpath can see our
* incremented count after it has set MNT_WRITE_HOLD .
*/
smp_mb ( ) ;
while ( mnt - > mnt_flags & MNT_WRITE_HOLD )
cpu_relax ( ) ;
/*
* After the slowpath clears MNT_WRITE_HOLD , mnt_is_readonly will
* be set to match its requirements . So we must not load that until
* MNT_WRITE_HOLD is cleared .
*/
smp_rmb ( ) ;
if ( __mnt_is_readonly ( mnt ) ) {
dec_mnt_writers ( mnt ) ;
ret = - EROFS ;
goto out ;
}
use_cpu_writer_for_mount ( cpu_writer , mnt ) ;
cpu_writer - > count + + ;
out :
spin_unlock ( & cpu_writer - > lock ) ;
put_cpu_var ( mnt_writers ) ;
preempt_enable ( ) ;
return ret ;
}
EXPORT_SYMBOL_GPL ( mnt_want_write ) ;
static void lock_mnt_writers ( void )
{
int cpu ;
struct mnt_writer * cpu_writer ;
for_each_possible_cpu ( cpu ) {
cpu_writer = & per_cpu ( mnt_writers , cpu ) ;
spin_lock ( & cpu_writer - > lock ) ;
__clear_mnt_count ( cpu_writer ) ;
cpu_writer - > mnt = NULL ;
}
}
/*
* These per - cpu write counts are not guaranteed to have
* matched increments and decrements on any given cpu .
* A file open ( ) ed for write on one cpu and close ( ) d on
* another cpu will imbalance this count . Make sure it
* does not get too far out of whack .
*/
static void handle_write_count_underflow ( struct vfsmount * mnt )
{
if ( atomic_read ( & mnt - > __mnt_writers ) > =
MNT_WRITER_UNDERFLOW_LIMIT )
return ;
/*
* It isn ' t necessary to hold all of the locks
* at the same time , but doing it this way makes
* us share a lot more code .
*/
lock_mnt_writers ( ) ;
/*
* vfsmount_lock is for mnt_flags .
*/
spin_lock ( & vfsmount_lock ) ;
/*
* If coalescing the per - cpu writer counts did not
* get us back to a positive writer count , we have
* a bug .
*/
if ( ( atomic_read ( & mnt - > __mnt_writers ) < 0 ) & &
! ( mnt - > mnt_flags & MNT_IMBALANCED_WRITE_COUNT ) ) {
WARN ( 1 , KERN_DEBUG " leak detected on mount(%p) writers "
" count: %d \n " ,
mnt , atomic_read ( & mnt - > __mnt_writers ) ) ;
/* use the flag to keep the dmesg spam down */
mnt - > mnt_flags | = MNT_IMBALANCED_WRITE_COUNT ;
}
spin_unlock ( & vfsmount_lock ) ;
unlock_mnt_writers ( ) ;
}
/**
* mnt_drop_write - give up write access to a mount
* @ mnt : the mount on which to give up write access
@ -332,37 +274,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt)
*/
void mnt_drop_write ( struct vfsmount * mnt )
{
int must_check_underflow = 0 ;
struct mnt_writer * cpu_writer ;
cpu_writer = & get_cpu_var ( mnt_writers ) ;
spin_lock ( & cpu_writer - > lock ) ;
use_cpu_writer_for_mount ( cpu_writer , mnt ) ;
if ( cpu_writer - > count > 0 ) {
cpu_writer - > count - - ;
} else {
must_check_underflow = 1 ;
atomic_dec ( & mnt - > __mnt_writers ) ;
}
spin_unlock ( & cpu_writer - > lock ) ;
/*
* Logically , we could call this each time ,
* but the __mnt_writers cacheline tends to
* be cold , and makes this expensive .
*/
if ( must_check_underflow )
handle_write_count_underflow ( mnt ) ;
/*
* This could be done right after the spinlock
* is taken because the spinlock keeps us on
* the cpu , and disables preemption . However ,
* putting it here bounds the amount that
* __mnt_writers can underflow . Without it ,
* we could theoretically wrap __mnt_writers .
*/
put_cpu_var ( mnt_writers ) ;
preempt_disable ( ) ;
dec_mnt_writers ( mnt ) ;
preempt_enable ( ) ;
}
EXPORT_SYMBOL_GPL ( mnt_drop_write ) ;
@ -370,24 +284,41 @@ static int mnt_make_readonly(struct vfsmount *mnt)
{
int ret = 0 ;
lock_mnt_writers ( ) ;
spin_lock ( & vfsmount_lock ) ;
mnt - > mnt_flags | = MNT_WRITE_HOLD ;
/*
* With all the locks held , this value is stable
* After storing MNT_WRITE_HOLD , we ' ll read the counters . This store
* should be visible before we do .
*/
if ( atomic_read ( & mnt - > __mnt_writers ) > 0 ) {
ret = - EBUSY ;
goto out ;
}
smp_mb ( ) ;
/*
* nobody can do a successful mnt_want_write ( ) with all
* of the counts in MNT_DENIED_WRITE and the locks held .
* With writers on hold , if this value is zero , then there are
* definitely no active writers ( although held writers may subsequently
* increment the count , they ' ll have to wait , and decrement it after
* seeing MNT_READONLY ) .
*
* It is OK to have counter incremented on one CPU and decremented on
* another : the sum will add up correctly . The danger would be when we
* sum up each counter , if we read a counter before it is incremented ,
* but then read another CPU ' s count which it has been subsequently
* decremented from - - we would see more decrements than we should .
* MNT_WRITE_HOLD protects against this scenario , because
* mnt_want_write first increments count , then smp_mb , then spins on
* MNT_WRITE_HOLD , so it can ' t be decremented by another CPU while
* we ' re counting up here .
*/
spin_lock ( & vfsmount_lock ) ;
if ( ! ret )
if ( count_mnt_writers ( mnt ) > 0 )
ret = - EBUSY ;
else
mnt - > mnt_flags | = MNT_READONLY ;
/*
* MNT_READONLY must become visible before ~ MNT_WRITE_HOLD , so writers
* that become unheld will see MNT_READONLY .
*/
smp_wmb ( ) ;
mnt - > mnt_flags & = ~ MNT_WRITE_HOLD ;
spin_unlock ( & vfsmount_lock ) ;
out :
unlock_mnt_writers ( ) ;
return ret ;
}
@ -410,6 +341,9 @@ void free_vfsmnt(struct vfsmount *mnt)
{
kfree ( mnt - > mnt_devname ) ;
mnt_free_id ( mnt ) ;
# ifdef CONFIG_SMP
free_percpu ( mnt - > mnt_writers ) ;
# endif
kmem_cache_free ( mnt_cache , mnt ) ;
}
@ -604,38 +538,18 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
static inline void __mntput ( struct vfsmount * mnt )
{
int cpu ;
struct super_block * sb = mnt - > mnt_sb ;
/*
* We don ' t have to hold all of the locks at the
* same time here because we know that we ' re the
* last reference to mnt and that no new writers
* can come in .
*/
for_each_possible_cpu ( cpu ) {
struct mnt_writer * cpu_writer = & per_cpu ( mnt_writers , cpu ) ;
spin_lock ( & cpu_writer - > lock ) ;
if ( cpu_writer - > mnt ! = mnt ) {
spin_unlock ( & cpu_writer - > lock ) ;
continue ;
}
atomic_add ( cpu_writer - > count , & mnt - > __mnt_writers ) ;
cpu_writer - > count = 0 ;
/*
* Might as well do this so that no one
* ever sees the pointer and expects
* it to be valid .
*/
cpu_writer - > mnt = NULL ;
spin_unlock ( & cpu_writer - > lock ) ;
}
/*
* This probably indicates that somebody messed
* up a mnt_want / drop_write ( ) pair . If this
* happens , the filesystem was probably unable
* to make r / w - > r / o transitions .
*/
WARN_ON ( atomic_read ( & mnt - > __mnt_writers ) ) ;
/*
* atomic_dec_and_lock ( ) used to deal with - > mnt_count decrements
* provides barriers , so count_mnt_writers ( ) below is safe . AV
*/
WARN_ON ( count_mnt_writers ( mnt ) ) ;
dput ( mnt - > mnt_root ) ;
free_vfsmnt ( mnt ) ;
deactivate_super ( sb ) ;