|
|
|
/*
|
|
|
|
* linux/fs/nfs/dir.c
|
|
|
|
*
|
|
|
|
* Copyright (C) 1992 Rick Sladkey
|
|
|
|
*
|
|
|
|
* nfs directory handling functions
|
|
|
|
*
|
|
|
|
* 10 Apr 1996 Added silly rename for unlink --okir
|
|
|
|
* 28 Sep 1996 Improved directory cache --okir
|
|
|
|
* 23 Aug 1997 Claus Heine claus@momo.math.rwth-aachen.de
|
|
|
|
* Re-implemented silly rename for unlink, newly implemented
|
|
|
|
* silly rename for nfs_rename() following the suggestions
|
|
|
|
* of Olaf Kirch (okir) found in this file.
|
|
|
|
* Following Linus comments on my original hack, this version
|
|
|
|
* depends only on the dcache stuff and doesn't touch the inode
|
|
|
|
* layer (iput() and friends).
|
|
|
|
* 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/time.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/stat.h>
|
|
|
|
#include <linux/fcntl.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/sunrpc/clnt.h>
|
|
|
|
#include <linux/nfs_fs.h>
|
|
|
|
#include <linux/nfs_mount.h>
|
|
|
|
#include <linux/pagemap.h>
|
|
|
|
#include <linux/pagevec.h>
|
|
|
|
#include <linux/namei.h>
|
NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
(1) The root and intervening nodes may not be accessible to the client.
With NFS2 and NFS3, for instance, mountd is called on the server to get
the filehandle for the tip of a path. mountd won't give us handles for
anything we don't have permission to access, and so we can't set up NFS
inodes for such nodes, and so can't easily set up dentries (we'd have to
have ghost inodes or something).
With this patch we don't actually create dentries until we get handles
from the server that we can use to set up their inodes, and we don't
actually bind them into the tree until we know for sure where they go.
(2) Inaccessible symbolic links.
If we're asked to mount two exports from the server, eg:
mount warthog:/warthog/aaa/xxx /mmm
mount warthog:/warthog/bbb/yyy /nnn
We may not be able to access anything nearer the root than xxx and yyy,
but we may find out later that /mmm/www/yyy, say, is actually the same
directory as the one mounted on /nnn. What we might then find out, for
example, is that /warthog/bbb was actually a symbolic link to
/warthog/aaa/xxx/www, but we can't actually determine that by talking to
the server until /warthog is made available by NFS.
This would lead to having constructed an errneous dentry tree which we
can't easily fix. We can end up with a dentry marked as a directory when
it should actually be a symlink, or we could end up with an apparently
hardlinked directory.
With this patch we need not make assumptions about the type of a dentry
for which we can't retrieve information, nor need we assume we know its
place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
(1) The server record construction/destruction has been abstracted out into
its own set of functions to make things easier to get right. These have
been moved into fs/nfs/client.c.
All the code in fs/nfs/client.c has to do with the management of
connections to servers, and doesn't touch superblocks in any way; the
remaining code in fs/nfs/super.c has to do with VFS superblock management.
(2) The sequence of events undertaken by NFS mount is now reordered:
(a) A volume representation (struct nfs_server) is allocated.
(b) A server representation (struct nfs_client) is acquired. This may be
allocated or shared, and is keyed on server address, port and NFS
version.
(c) If allocated, the client representation is initialised. The state
member variable of nfs_client is used to prevent a race during
initialisation from two mounts.
(d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
the root filehandle for the mount (fs/nfs/getroot.c). For NFS2/3 we
are given the root FH in advance.
(e) The volume FSID is probed for on the root FH.
(f) The volume representation is initialised from the FSINFO record
retrieved on the root FH.
(g) sget() is called to acquire a superblock. This may be allocated or
shared, keyed on client pointer and FSID.
(h) If allocated, the superblock is initialised.
(i) If the superblock is shared, then the new nfs_server record is
discarded.
(j) The root dentry for this mount is looked up from the root FH.
(k) The root dentry for this mount is assigned to the vfsmount.
(3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
returns; this function now attaches disconnected trees from alternate
roots that happen to be discovered attached to a directory being read (in
the same way nfs_lookup() is made to do for lookup ops).
The new d_materialise_unique() function is now used to do this, thus
permitting the whole thing to be done under one set of locks, and thus
avoiding any race between mount and lookup operations on the same
directory.
(4) The client management code uses a new debug facility: NFSDBG_CLIENT which
is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
(5) Clone mounts are now called xdev mounts.
(6) Use the dentry passed to the statfs() op as the handle for retrieving fs
statistics rather than the root dentry of the superblock (which is now a
dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
19 years ago
|
|
|
#include <linux/mount.h>
|
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
18 years ago
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <linux/kmemleak.h>
|
|
|
|
|
|
|
|
#include "delegation.h"
|
|
|
|
#include "iostat.h"
|
|
|
|
#include "internal.h"
|
|
|
|
#include "fscache.h"
|
|
|
|
|
|
|
|
/* #define NFS_DEBUG_VERBOSE 1 */
|
|
|
|
|
|
|
|
static int nfs_opendir(struct inode *, struct file *);
|
|
|
|
static int nfs_readdir(struct file *, void *, filldir_t);
|
|
|
|
static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
|
|
|
|
static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
|
|
|
|
static int nfs_mkdir(struct inode *, struct dentry *, int);
|
|
|
|
static int nfs_rmdir(struct inode *, struct dentry *);
|
|
|
|
static int nfs_unlink(struct inode *, struct dentry *);
|
|
|
|
static int nfs_symlink(struct inode *, struct dentry *, const char *);
|
|
|
|
static int nfs_link(struct dentry *, struct inode *, struct dentry *);
|
|
|
|
static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
|
|
|
|
static int nfs_rename(struct inode *, struct dentry *,
|
|
|
|
struct inode *, struct dentry *);
|
|
|
|
static int nfs_fsync_dir(struct file *, int);
|
|
|
|
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
|
|
|
|
static int nfs_readdir_clear_array(struct page*, gfp_t);
|
|
|
|
|
|
|
|
const struct file_operations nfs_dir_operations = {
|
|
|
|
.llseek = nfs_llseek_dir,
|
|
|
|
.read = generic_read_dir,
|
|
|
|
.readdir = nfs_readdir,
|
|
|
|
.open = nfs_opendir,
|
|
|
|
.release = nfs_release,
|
|
|
|
.fsync = nfs_fsync_dir,
|
|
|
|
};
|
|
|
|
|
|
|
|
const struct inode_operations nfs_dir_inode_operations = {
|
|
|
|
.create = nfs_create,
|
|
|
|
.lookup = nfs_lookup,
|
|
|
|
.link = nfs_link,
|
|
|
|
.unlink = nfs_unlink,
|
|
|
|
.symlink = nfs_symlink,
|
|
|
|
.mkdir = nfs_mkdir,
|
|
|
|
.rmdir = nfs_rmdir,
|
|
|
|
.mknod = nfs_mknod,
|
|
|
|
.rename = nfs_rename,
|
|
|
|
.permission = nfs_permission,
|
|
|
|
.getattr = nfs_getattr,
|
|
|
|
.setattr = nfs_setattr,
|
|
|
|
};
|
|
|
|
|
|
|
|
const struct address_space_operations nfs_dir_addr_space_ops = {
|
|
|
|
.releasepage = nfs_readdir_clear_array,
|
|
|
|
};
|
|
|
|
|
|
|
|
#ifdef CONFIG_NFS_V3
|
|
|
|
const struct inode_operations nfs3_dir_inode_operations = {
|
|
|
|
.create = nfs_create,
|
|
|
|
.lookup = nfs_lookup,
|
|
|
|
.link = nfs_link,
|
|
|
|
.unlink = nfs_unlink,
|
|
|
|
.symlink = nfs_symlink,
|
|
|
|
.mkdir = nfs_mkdir,
|
|
|
|
.rmdir = nfs_rmdir,
|
|
|
|
.mknod = nfs_mknod,
|
|
|
|
.rename = nfs_rename,
|
|
|
|
.permission = nfs_permission,
|
|
|
|
.getattr = nfs_getattr,
|
|
|
|
.setattr = nfs_setattr,
|
|
|
|
.listxattr = nfs3_listxattr,
|
|
|
|
.getxattr = nfs3_getxattr,
|
|
|
|
.setxattr = nfs3_setxattr,
|
|
|
|
.removexattr = nfs3_removexattr,
|
|
|
|
};
|
|
|
|
#endif /* CONFIG_NFS_V3 */
|
|
|
|
|
|
|
|
#ifdef CONFIG_NFS_V4
|
|
|
|
|
|
|
|
static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
|
|
|
|
static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
|
|
|
|
const struct inode_operations nfs4_dir_inode_operations = {
|
|
|
|
.create = nfs_open_create,
|
|
|
|
.lookup = nfs_atomic_lookup,
|
|
|
|
.link = nfs_link,
|
|
|
|
.unlink = nfs_unlink,
|
|
|
|
.symlink = nfs_symlink,
|
|
|
|
.mkdir = nfs_mkdir,
|
|
|
|
.rmdir = nfs_rmdir,
|
|
|
|
.mknod = nfs_mknod,
|
|
|
|
.rename = nfs_rename,
|
|
|
|
.permission = nfs_permission,
|
|
|
|
.getattr = nfs_getattr,
|
|
|
|
.setattr = nfs_setattr,
|
|
|
|
.getxattr = nfs4_getxattr,
|
|
|
|
.setxattr = nfs4_setxattr,
|
|
|
|
.listxattr = nfs4_listxattr,
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif /* CONFIG_NFS_V4 */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Open file
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
nfs_opendir(struct inode *inode, struct file *filp)
|
|
|
|
{
|
|
|
|
int res;
|
|
|
|
|
|
|
|
dfprintk(FILE, "NFS: open dir(%s/%s)\n",
|
|
|
|
filp->f_path.dentry->d_parent->d_name.name,
|
|
|
|
filp->f_path.dentry->d_name.name);
|
|
|
|
|
|
|
|
nfs_inc_stats(inode, NFSIOS_VFSOPEN);
|
|
|
|
|
|
|
|
/* Call generic open code in order to cache credentials */
|
|
|
|
res = nfs_open(inode, filp);
|
|
|
|
if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
|
|
|
|
/* This is a mountpoint, so d_revalidate will never
|
|
|
|
* have been called, so we need to refresh the
|
|
|
|
* inode (for close-open consistency) ourselves.
|
|
|
|
*/
|
|
|
|
__nfs_revalidate_inode(NFS_SERVER(inode), inode);
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct nfs_cache_array_entry {
|
|
|
|
u64 cookie;
|
|
|
|
u64 ino;
|
|
|
|
struct qstr string;
|
|
|
|
unsigned char d_type;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct nfs_cache_array {
|
|
|
|
unsigned int size;
|
|
|
|
int eof_index;
|
|
|
|
u64 last_cookie;
|
|
|
|
struct nfs_cache_array_entry array[0];
|
|
|
|
};
|
|
|
|
|
NFS: Readdir plus in v4
By requsting more attributes during a readdir, we can mimic the readdir plus
operation that was in NFSv3.
To test, I ran the command `ls -lU --color=none` on directories with various
numbers of files. Without readdir plus, I see this:
n files | 100 | 1,000 | 10,000 | 100,000 | 1,000,000
--------+-----------+-----------+-----------+-----------+----------
real | 0m00.153s | 0m00.589s | 0m05.601s | 0m56.691s | 9m59.128s
user | 0m00.007s | 0m00.007s | 0m00.077s | 0m00.703s | 0m06.800s
sys | 0m00.010s | 0m00.070s | 0m00.633s | 0m06.423s | 1m10.005s
access | 3 | 1 | 1 | 4 | 31
getattr | 2 | 1 | 1 | 1 | 1
lookup | 104 | 1,003 | 10,003 | 100,003 | 1,000,003
readdir | 2 | 16 | 158 | 1,575 | 15,749
total | 111 | 1,021 | 10,163 | 101,583 | 1,015,784
With readdir plus enabled, I see this:
n files | 100 | 1,000 | 10,000 | 100,000 | 1,000,000
--------+-----------+-----------+-----------+-----------+----------
real | 0m00.115s | 0m00.206s | 0m01.079s | 0m12.521s | 2m07.528s
user | 0m00.003s | 0m00.003s | 0m00.040s | 0m00.290s | 0m03.296s
sys | 0m00.007s | 0m00.020s | 0m00.120s | 0m01.357s | 0m17.556s
access | 3 | 1 | 1 | 1 | 7
getattr | 2 | 1 | 1 | 1 | 1
lookup | 4 | 3 | 3 | 3 | 3
readdir | 6 | 62 | 630 | 6,300 | 62,993
total | 15 | 67 | 635 | 6,305 | 63,004
Readdir plus disabled has about a 16x increase in the number of rpc calls and
is 4 - 5 times slower on large directories.
Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
14 years ago
|
|
|
typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
|
|
|
|
typedef struct {
|
|
|
|
struct file *file;
|
|
|
|
struct page *page;
|
|
|
|
unsigned long page_index;
|
|
|
|
u64 *dir_cookie;
|
|
|
|
loff_t current_index;
|
|
|
|
decode_dirent_t decode;
|
|
|
|
|
|
|
|
unsigned long timestamp;
|
|
|
|
unsigned long gencount;
|
|
|
|
unsigned int cache_entry_index;
|
|
|
|
unsigned int plus:1;
|
|
|
|
unsigned int eof:1;
|
|
|
|
} nfs_readdir_descriptor_t;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The caller is responsible for calling nfs_readdir_release_array(page)
|
|
|
|
*/
|
|
|
|
static
|
|
|
|
struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
|
|
|
|
{
|
|
|
|
void *ptr;
|
|
|
|
if (page == NULL)
|
|
|
|
return ERR_PTR(-EIO);
|
|
|
|
ptr = kmap(page);
|
|
|
|
if (ptr == NULL)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
void nfs_readdir_release_array(struct page *page)
|
|
|
|
{
|
|
|
|
kunmap(page);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* we are freeing strings created by nfs_add_to_readdir_array()
|
|
|
|
*/
|
|
|
|
static
|
|
|
|
int nfs_readdir_clear_array(struct page *page, gfp_t mask)
|
|
|
|
{
|
|
|
|
struct nfs_cache_array *array = nfs_readdir_get_array(page);
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (IS_ERR(array))
|
|
|
|
return PTR_ERR(array);
|
|
|
|
for (i = 0; i < array->size; i++)
|
|
|
|
kfree(array->array[i].string.name);
|
|
|
|
nfs_readdir_release_array(page);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* the caller is responsible for freeing qstr.name
|
|
|
|
* when called by nfs_readdir_add_to_array, the strings will be freed in
|
|
|
|
* nfs_clear_readdir_array()
|
|
|
|
*/
|
|
|
|
static
|
|
|
|
int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
|
|
|
|
{
|
|
|
|
string->len = len;
|
|
|
|
string->name = kmemdup(name, len, GFP_KERNEL);
|
|
|
|
if (string->name == NULL)
|
|
|
|
return -ENOMEM;
|
|
|
|
/*
|
|
|
|
* Avoid a kmemleak false positive. The pointer to the name is stored
|
|
|
|
* in a page cache page which kmemleak does not scan.
|
|
|
|
*/
|
|
|
|
kmemleak_not_leak(string->name);
|
|
|
|
string->hash = full_name_hash(name, len);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
|
|
|
|
{
|
|
|
|
struct nfs_cache_array *array = nfs_readdir_get_array(page);
|
|
|
|
struct nfs_cache_array_entry *cache_entry;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (IS_ERR(array))
|
|
|
|
return PTR_ERR(array);
|
|
|
|
|
|
|
|
cache_entry = &array->array[array->size];
|
|
|
|
|
|
|
|
/* Check that this entry lies within the page bounds */
|
|
|
|
ret = -ENOSPC;
|
|
|
|
if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
cache_entry->cookie = entry->prev_cookie;
|
|
|
|
cache_entry->ino = entry->ino;
|
|
|
|
cache_entry->d_type = entry->d_type;
|
|
|
|
ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
array->last_cookie = entry->cookie;
|
|
|
|
array->size++;
|
|
|
|
if (entry->eof == 1)
|
|
|
|
array->eof_index = array->size;
|
|
|
|
out:
|
|
|
|
nfs_readdir_release_array(page);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
|
|
|
|
{
|
|
|
|
loff_t diff = desc->file->f_pos - desc->current_index;
|
|
|
|
unsigned int index;
|
|
|
|
|
|
|
|
if (diff < 0)
|
|
|
|
goto out_eof;
|
|
|
|
if (diff >= array->size) {
|
|
|
|
if (array->eof_index >= 0)
|
|
|
|
goto out_eof;
|
|
|
|
desc->current_index += array->size;
|
|
|
|
return -EAGAIN;
|
|
|
|
}
|
|
|
|
|
|
|
|
index = (unsigned int)diff;
|
|
|
|
*desc->dir_cookie = array->array[index].cookie;
|
|
|
|
desc->cache_entry_index = index;
|
|
|
|
return 0;
|
|
|
|
out_eof:
|
|
|
|
desc->eof = 1;
|
|
|
|
return -EBADCOOKIE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int status = -EAGAIN;
|
|
|
|
|
|
|
|
for (i = 0; i < array->size; i++) {
|
|
|
|
if (array->array[i].cookie == *desc->dir_cookie) {
|
|
|
|
desc->cache_entry_index = i;
|
|
|
|
status = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i == array->eof_index) {
|
|
|
|
desc->eof = 1;
|
|
|
|
status = -EBADCOOKIE;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
|
|
|
|
{
|
|
|
|
struct nfs_cache_array *array;
|
|
|
|
int status = -EBADCOOKIE;
|
|
|
|
|
|
|
|
if (desc->dir_cookie == NULL)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
array = nfs_readdir_get_array(desc->page);
|
|
|
|
if (IS_ERR(array)) {
|
|
|
|
status = PTR_ERR(array);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*desc->dir_cookie == 0)
|
|
|
|
status = nfs_readdir_search_for_pos(array, desc);
|
|
|
|
else
|
|
|
|
status = nfs_readdir_search_for_cookie(array, desc);
|
|
|
|
|
|
|
|
nfs_readdir_release_array(desc->page);
|
|
|
|
out:
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Fill a page with xdr information before transferring to the cache page */
|
|
|
|
static
|
|
|
|
int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
|
|
|
|
struct nfs_entry *entry, struct file *file, struct inode *inode)
|
|
|
|
{
|
|
|
|
struct rpc_cred *cred = nfs_file_cred(file);
|
|
|
|
unsigned long timestamp, gencount;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
again:
|
|
|
|
timestamp = jiffies;
|
|
|
|
gencount = nfs_inc_attr_generation_counter();
|
|
|
|
error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages,
|
|
|
|
NFS_SERVER(inode)->dtsize, desc->plus);
|
|
|
|
if (error < 0) {
|
|
|
|
/* We requested READDIRPLUS, but the server doesn't grok it */
|
|
|
|
if (error == -ENOTSUPP && desc->plus) {
|
|
|
|
NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
|
|
|
|
clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
|
|
|
|
desc->plus = 0;
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
desc->timestamp = timestamp;
|
|
|
|
desc->gencount = gencount;
|
|
|
|
error:
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Fill in an entry based on the xdr code stored in desc->page */
|
|
|
|
static
|
|
|
|
int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
|
|
|
|
{
|
NFS: Readdir plus in v4
By requsting more attributes during a readdir, we can mimic the readdir plus
operation that was in NFSv3.
To test, I ran the command `ls -lU --color=none` on directories with various
numbers of files. Without readdir plus, I see this:
n files | 100 | 1,000 | 10,000 | 100,000 | 1,000,000
--------+-----------+-----------+-----------+-----------+----------
real | 0m00.153s | 0m00.589s | 0m05.601s | 0m56.691s | 9m59.128s
user | 0m00.007s | 0m00.007s | 0m00.077s | 0m00.703s | 0m06.800s
sys | 0m00.010s | 0m00.070s | 0m00.633s | 0m06.423s | 1m10.005s
access | 3 | 1 | 1 | 4 | 31
getattr | 2 | 1 | 1 | 1 | 1
lookup | 104 | 1,003 | 10,003 | 100,003 | 1,000,003
readdir | 2 | 16 | 158 | 1,575 | 15,749
total | 111 | 1,021 | 10,163 | 101,583 | 1,015,784
With readdir plus enabled, I see this:
n files | 100 | 1,000 | 10,000 | 100,000 | 1,000,000
--------+-----------+-----------+-----------+-----------+----------
real | 0m00.115s | 0m00.206s | 0m01.079s | 0m12.521s | 2m07.528s
user | 0m00.003s | 0m00.003s | 0m00.040s | 0m00.290s | 0m03.296s
sys | 0m00.007s | 0m00.020s | 0m00.120s | 0m01.357s | 0m17.556s
access | 3 | 1 | 1 | 1 | 7
getattr | 2 | 1 | 1 | 1 | 1
lookup | 4 | 3 | 3 | 3 | 3
readdir | 6 | 62 | 630 | 6,300 | 62,993
total | 15 | 67 | 635 | 6,305 | 63,004
Readdir plus disabled has about a 16x increase in the number of rpc calls and
is 4 - 5 times slower on large directories.
Signed-off-by: Bryan Schumaker <bjschuma@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
14 years ago
|
|
|
__be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
|
|
|
|
if (IS_ERR(p))
|
|
|
|
return PTR_ERR(p);
|
|
|
|
|
|
|
|
entry->fattr->time_start = desc->timestamp;
|
|
|
|
entry->fattr->gencount = desc->gencount;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
|
|
|
|
{
|
|
|
|
struct nfs_inode *node;
|
|
|
|
if (dentry->d_inode == NULL)
|
|
|
|
goto different;
|
|
|
|
node = NFS_I(dentry->d_inode);
|
|
|
|
if (node->fh.size != entry->fh->size)
|
|
|
|
goto different;
|
|
|
|
if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0)
|
|
|
|
goto different;
|
|
|
|
return 1;
|
|
|
|
different:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
|
|
|
|
{
|
|
|
|
struct qstr filename = {
|
|
|
|
.len = entry->len,
|
|
|
|
.name = entry->name,
|
|
|
|
};
|
|
|
|
struct dentry *dentry;
|
|
|
|
struct dentry *alias;
|
|
|
|
struct inode *dir = parent->d_inode;
|
|
|
|
struct inode *inode;
|
|
|
|
|
|
|
|
if (filename.name[0] == '.') {
|
|
|
|
if (filename.len == 1)
|
|
|
|
return;
|
|
|
|
if (filename.len == 2 && filename.name[1] == '.')
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
filename.hash = full_name_hash(filename.name, filename.len);
|
|
|
|
|
|
|
|
dentry = d_lookup(parent, &filename);
|
|
|
|
if (dentry != NULL) {
|
|
|
|
if (nfs_same_file(dentry, entry)) {
|
|
|
|
nfs_refresh_inode(dentry->d_inode, entry->fattr);
|
|
|
|
goto out;
|
|
|
|
} else {
|
|
|
|
d_drop(dentry);
|
|
|
|
dput(dentry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
dentry = d_alloc(parent, &filename);
|
|
|
|
if (dentry == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
|
|
|
|
inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
|
|
|
|
if (IS_ERR(inode))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
alias = d_materialise_unique(dentry, inode);
|
|
|
|
if (IS_ERR(alias))
|
|
|
|
goto out;
|
|
|
|
else if (alias) {
|
|
|
|
nfs_set_verifier(alias, nfs_save_change_attribute(dir));
|
|
|
|
dput(alias);
|
|
|
|
} else
|
|
|
|
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
|
|
|
|
|
|
|
out:
|
|
|
|
dput(dentry);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Perform conversion from xdr to cache array */
|
|
|
|
static
|
|
|
|
int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
|
|
|
|
void *xdr_page, struct page *page, unsigned int buflen)
|
|
|
|
{
|
|
|
|
struct xdr_stream stream;
|
|
|
|
struct xdr_buf buf;
|
|
|
|
__be32 *ptr = xdr_page;
|
|
|
|
struct nfs_cache_array *array;
|
|
|
|
unsigned int count = 0;
|
|
|
|
int status;
|
|
|
|
|
|
|
|
buf.head->iov_base = xdr_page;
|
|
|
|
buf.head->iov_len = buflen;
|
|
|
|
buf.tail->iov_len = 0;
|
|
|
|
buf.page_base = 0;
|
|
|
|
buf.page_len = 0;
|
|
|
|
buf.buflen = buf.head->iov_len;
|
|
|
|
buf.len = buf.head->iov_len;
|
|
|
|
|
|
|
|
xdr_init_decode(&stream, &buf, ptr);
|
|
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
status = xdr_decode(desc, entry, &stream);
|
|
|
|
if (status != 0) {
|
|
|
|
if (status == -EAGAIN)
|
|
|
|
status = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
count++;
|
|
|
|
|
|
|
|
if (desc->plus == 1)
|
|
|
|
nfs_prime_dcache(desc->file->f_path.dentry, entry);
|
|
|
|
|
|
|
|
status = nfs_readdir_add_to_array(entry, page);
|
|
|
|
if (status != 0)
|
|
|
|
break;
|
|
|
|
} while (!entry->eof);
|
|
|
|
|
|
|
|
if (count == 0 || (status == -EBADCOOKIE && entry->eof == 1)) {
|
|
|
|
array = nfs_readdir_get_array(page);
|
|
|
|
if (!IS_ERR(array)) {
|
|
|
|
array->eof_index = array->size;
|
|
|
|
status = 0;
|
|
|
|
nfs_readdir_release_array(page);
|
|
|
|
} else
|
|
|
|
status = PTR_ERR(array);
|
|
|
|
}
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < npages; i++)
|
|
|
|
put_page(pages[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
void nfs_readdir_free_large_page(void *ptr, struct page **pages,
|
|
|
|
unsigned int npages)
|
|
|
|
{
|
|
|
|
vm_unmap_ram(ptr, npages);
|
|
|
|
nfs_readdir_free_pagearray(pages, npages);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* nfs_readdir_large_page will allocate pages that must be freed with a call
|
|
|
|
* to nfs_readdir_free_large_page
|
|
|
|
*/
|
|
|
|
static
|
|
|
|
void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
|
|
|
|
{
|
|
|
|
void *ptr;
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i < npages; i++) {
|
|
|
|
struct page *page = alloc_page(GFP_KERNEL);
|
|
|
|
if (page == NULL)
|
|
|
|
goto out_freepages;
|
|
|
|
pages[i] = page;
|
|
|
|
}
|
|
|
|
|
|
|
|
ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
|
|
|
|
if (!IS_ERR_OR_NULL(ptr))
|
|
|
|
return ptr;
|
|
|
|
out_freepages:
|
|
|
|
nfs_readdir_free_pagearray(pages, i);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
|
|
|
|
{
|
|
|
|
struct page *pages[NFS_MAX_READDIR_PAGES];
|
|
|
|
void *pages_ptr = NULL;
|
|
|
|
struct nfs_entry entry;
|
|
|
|
struct file *file = desc->file;
|
|
|
|
struct nfs_cache_array *array;
|
|
|
|
int status = -ENOMEM;
|
|
|
|
unsigned int array_size = ARRAY_SIZE(pages);
|
|
|
|
|
|
|
|
entry.prev_cookie = 0;
|
|
|
|
entry.cookie = *desc->dir_cookie;
|
|
|
|
entry.eof = 0;
|
|
|
|
entry.fh = nfs_alloc_fhandle();
|
|
|
|
entry.fattr = nfs_alloc_fattr();
|
|
|
|
if (entry.fh == NULL || entry.fattr == NULL)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
array = nfs_readdir_get_array(page);
|
|
|
|
if (IS_ERR(array)) {
|
|
|
|
status = PTR_ERR(array);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
memset(array, 0, sizeof(struct nfs_cache_array));
|
|
|
|
array->eof_index = -1;
|
|
|
|
|
|
|
|
pages_ptr = nfs_readdir_large_page(pages, array_size);
|
|
|
|
if (!pages_ptr)
|
|
|
|
goto out_release_array;
|
|
|
|
do {
|
|
|
|
unsigned int pglen;
|
|
|
|
status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
|
|
|
|
|
|
|
|
if (status < 0)
|
|
|
|
break;
|
|
|
|
pglen = status;
|
|
|
|
status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
|
|
|
|
if (status < 0) {
|
|
|
|
if (status == -ENOSPC)
|
|
|
|
status = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} while (array->eof_index < 0);
|
|
|
|
|
|
|
|
nfs_readdir_free_large_page(pages_ptr, pages, array_size);
|
|
|
|
out_release_array:
|
|
|
|
nfs_readdir_release_array(page);
|
|
|
|
out:
|
|
|
|
nfs_free_fattr(entry.fattr);
|
|
|
|
nfs_free_fhandle(entry.fh);
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now we cache directories properly, by converting xdr information
|
|
|
|
* to an array that can be used for lookups later. This results in
|
|
|
|
* fewer cache pages, since we can store more information on each page.
|
|
|
|
* We only need to convert from xdr once so future lookups are much simpler
|
|
|
|
*/
|
|
|
|
static
|
|
|
|
int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
|
|
|
|
{
|
|
|
|
struct inode *inode = desc->file->f_path.dentry->d_inode;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = nfs_readdir_xdr_to_array(desc, page, inode);
|
|
|
|
if (ret < 0)
|
|
|
|
goto error;
|
|
|
|
SetPageUptodate(page);
|
|
|
|
|
|
|
|
if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
|
|
|
|
/* Should never happen */
|
|
|
|
nfs_zap_mapping(inode, inode->i_mapping);
|
|
|
|
}
|
|
|
|
unlock_page(page);
|
|
|
|
return 0;
|
|
|
|
error:
|
|
|
|
unlock_page(page);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
void cache_page_release(nfs_readdir_descriptor_t *desc)
|
|
|
|
{
|
|
|
|
page_cache_release(desc->page);
|
|
|
|
desc->page = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
|
|
|
|
{
|
|
|
|
return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
|
|
|
|
desc->page_index, (filler_t *)nfs_readdir_filler, desc);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns 0 if desc->dir_cookie was found on page desc->page_index
|
|
|
|
*/
|
|
|
|
static
|
|
|
|
int find_cache_page(nfs_readdir_descriptor_t *desc)
|
|
|
|
{
|
|
|
|
int res;
|
|
|
|
|
|
|
|
desc->page = get_cache_page(desc);
|
|
|
|
if (IS_ERR(desc->page))
|
|
|
|
return PTR_ERR(desc->page);
|
|
|
|
|
|
|
|
res = nfs_readdir_search_array(desc);
|
|
|
|
if (res == 0)
|
|
|
|
return 0;
|
|
|
|
cache_page_release(desc);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Search for desc->dir_cookie from the beginning of the page cache */
|
|
|
|
static inline
|
|
|
|
int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
|
|
|
|
{
|
|
|
|
int res;
|
|
|
|
|
|
|
|
if (desc->page_index == 0)
|
|
|
|
desc->current_index = 0;
|
|
|
|
while (1) {
|
|
|
|
res = find_cache_page(desc);
|
|
|
|
if (res != -EAGAIN)
|
|
|
|
break;
|
|
|
|
desc->page_index++;
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned int dt_type(struct inode *inode)
|
|
|
|
{
|
|
|
|
return (inode->i_mode >> 12) & 15;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Once we've found the start of the dirent within a page: fill 'er up...
|
|
|
|
*/
|
|
|
|
static
|
|
|
|
int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
|
|
|
|
filldir_t filldir)
|
|
|
|
{
|
|
|
|
struct file *file = desc->file;
|
|
|
|
int i = 0;
|
|
|
|
int res = 0;
|
|
|
|
struct nfs_cache_array *array = NULL;
|
|
|
|
|
|
|
|
array = nfs_readdir_get_array(desc->page);
|
|
|
|
if (IS_ERR(array)) {
|
|
|
|
res = PTR_ERR(array);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = desc->cache_entry_index; i < array->size; i++) {
|
|
|
|
struct nfs_cache_array_entry *ent;
|
|
|
|
|
|
|
|
ent = &array->array[i];
|
|
|
|
if (filldir(dirent, ent->string.name, ent->string.len,
|
|
|
|
file->f_pos, nfs_compat_user_ino64(ent->ino),
|
|
|
|
ent->d_type) < 0) {
|
|
|
|
desc->eof = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
file->f_pos++;
|
|
|
|
desc->cache_entry_index = i;
|
|
|
|
if (i < (array->size-1))
|
|
|
|
*desc->dir_cookie = array->array[i+1].cookie;
|
|
|
|
else
|
|
|
|
*desc->dir_cookie = array->last_cookie;
|
|
|
|
}
|
|
|
|
if (i == array->eof_index)
|
|
|
|
desc->eof = 1;
|
|
|
|
|
|
|
|
nfs_readdir_release_array(desc->page);
|
|
|
|
out:
|
|
|
|
cache_page_release(desc);
|
|
|
|
dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
|
|
|
|
(unsigned long long)*desc->dir_cookie, res);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we cannot find a cookie in our cache, we suspect that this is
|
|
|
|
* because it points to a deleted file, so we ask the server to return
|
|
|
|
* whatever it thinks is the next entry. We then feed this to filldir.
|
|
|
|
* If all goes well, we should then be able to find our way round the
|
|
|
|
* cache on the next call to readdir_search_pagecache();
|
|
|
|
*
|
|
|
|
* NOTE: we cannot add the anonymous page to the pagecache because
|
|
|
|
* the data it contains might not be page aligned. Besides,
|
|
|
|
* we should already have a complete representation of the
|
|
|
|
* directory in the page cache by the time we get here.
|
|
|
|
*/
|
|
|
|
static inline
|
|
|
|
int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
|
|
|
|
filldir_t filldir)
|
|
|
|
{
|
|
|
|
struct page *page = NULL;
|
|
|
|
int status;
|
|
|
|
struct inode *inode = desc->file->f_path.dentry->d_inode;
|
|
|
|
|
|
|
|
dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
|
|
|
|
(unsigned long long)*desc->dir_cookie);
|
|
|
|
|
|
|
|
page = alloc_page(GFP_HIGHUSER);
|
|
|
|
if (!page) {
|
|
|
|
status = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
desc->page_index = 0;
|
|
|
|
desc->page = page;
|
|
|
|
|
|
|
|
status = nfs_readdir_xdr_to_array(desc, page, inode);
|
|
|
|
if (status < 0)
|
|
|
|
goto out_release;
|
|
|
|
|
|
|
|
status = nfs_do_filldir(desc, dirent, filldir);
|
|
|
|
|
|
|
|
out:
|
|
|
|
dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
|
|
|
|
__func__, status);
|
|
|
|
return status;
|
|
|
|
out_release:
|
|
|
|
cache_page_release(desc);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The file offset position represents the dirent entry number. A
|
|
|
|
last cookie cache takes care of the common case of reading the
|
|
|
|
whole directory.
|
|
|
|
*/
|
|
|
|
static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
|
|
|
{
|
|
|
|
struct dentry *dentry = filp->f_path.dentry;
|
|
|
|
struct inode *inode = dentry->d_inode;
|
|
|
|
nfs_readdir_descriptor_t my_desc,
|
|
|
|
*desc = &my_desc;
|
|
|
|
int res = -ENOMEM;
|
|
|
|
|
|
|
|
dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
|
|
|
|
dentry->d_parent->d_name.name, dentry->d_name.name,
|
|
|
|
(long long)filp->f_pos);
|
|
|
|
nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* filp->f_pos points to the dirent entry number.
|
|
|
|
* *desc->dir_cookie has the cookie for the next entry. We have
|
|
|
|
* to either find the entry with the appropriate number or
|
|
|
|
* revalidate the cookie.
|
|
|
|
*/
|
|
|
|
memset(desc, 0, sizeof(*desc));
|
|
|
|
|
|
|
|
desc->file = filp;
|
|
|
|
desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie;
|
|
|
|
desc->decode = NFS_PROTO(inode)->decode_dirent;
|
|
|
|
desc->plus = NFS_USE_READDIRPLUS(inode);
|
|
|
|
|
|
|
|
nfs_block_sillyrename(dentry);
|
|
|
|
res = nfs_revalidate_mapping(inode, filp->f_mapping);
|
|
|
|
if (res < 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
while (desc->eof != 1) {
|
|
|
|
res = readdir_search_pagecache(desc);
|
|
|
|
|
|
|
|
if (res == -EBADCOOKIE) {
|
|
|
|
res = 0;
|
|
|
|
/* This means either end of directory */
|
|
|
|
if (*desc->dir_cookie && desc->eof == 0) {
|
|
|
|
/* Or that the server has 'lost' a cookie */
|
|
|
|
res = uncached_readdir(desc, dirent, filldir);
|
|
|
|
if (res == 0)
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (res == -ETOOSMALL && desc->plus) {
|
|
|
|
clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
|
|
|
|
nfs_zap_caches(inode);
|
|
|
|
desc->page_index = 0;
|
|
|
|
desc->plus = 0;
|
|
|
|
desc->eof = 0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (res < 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
res = nfs_do_filldir(desc, dirent, filldir);
|
|
|
|
if (res < 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
nfs_unblock_sillyrename(dentry);
|
|
|
|
if (res > 0)
|
|
|
|
res = 0;
|
|
|
|
dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
|
|
|
|
dentry->d_parent->d_name.name, dentry->d_name.name,
|
|
|
|
res);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
|
|
|
|
{
|
|
|
|
struct dentry *dentry = filp->f_path.dentry;
|
|
|
|
struct inode *inode = dentry->d_inode;
|
|
|
|
|
|
|
|
dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
|
|
|
|
dentry->d_parent->d_name.name,
|
|
|
|
dentry->d_name.name,
|
|
|
|
offset, origin);
|
|
|
|
|
|
|
|
mutex_lock(&inode->i_mutex);
|
|
|
|
switch (origin) {
|
|
|
|
case 1:
|
|
|
|
offset += filp->f_pos;
|
|
|
|
case 0:
|
|
|
|
if (offset >= 0)
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
offset = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (offset != filp->f_pos) {
|
|
|
|
filp->f_pos = offset;
|
|
|
|
nfs_file_open_context(filp)->dir_cookie = 0;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
mutex_unlock(&inode->i_mutex);
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* All directory operations under NFS are synchronous, so fsync()
|
|
|
|
* is a dummy operation.
|
|
|
|
*/
|
|
|
|
static int nfs_fsync_dir(struct file *filp, int datasync)
|
|
|
|
{
|
|
|
|
struct dentry *dentry = filp->f_path.dentry;
|
|
|
|
|
|
|
|
dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",
|
|
|
|
dentry->d_parent->d_name.name, dentry->d_name.name,
|
|
|
|
datasync);
|
|
|
|
|
|
|
|
nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* nfs_force_lookup_revalidate - Mark the directory as having changed
|
|
|
|
* @dir - pointer to directory inode
|
|
|
|
*
|
|
|
|
* This forces the revalidation code in nfs_lookup_revalidate() to do a
|
|
|
|
* full lookup on all child dentries of 'dir' whenever a change occurs
|
|
|
|
* on the server that might have invalidated our dcache.
|
|
|
|
*
|
|
|
|
* The caller should be holding dir->i_lock
|
|
|
|
*/
|
|
|
|
void nfs_force_lookup_revalidate(struct inode *dir)
|
|
|
|
{
|
|
|
|
NFS_I(dir)->cache_change_attribute++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A check for whether or not the parent directory has changed.
|
|
|
|
* In the case it has, we assume that the dentries are untrustworthy
|
|
|
|
* and may need to be looked up again.
|
|
|
|
*/
|
|
|
|
static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
|
|
|
|
{
|
|
|
|
if (IS_ROOT(dentry))
|
|
|
|
return 1;
|
|
|
|
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
|
|
|
|
return 0;
|
|
|
|
if (!nfs_verify_change_attribute(dir, dentry->d_time))
|
|
|
|
return 0;
|
|
|
|
/* Revalidate nfsi->cache_change_attribute before we declare a match */
|
|
|
|
if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
|
|
|
|
return 0;
|
|
|
|
if (!nfs_verify_change_attribute(dir, dentry->d_time))
|
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return the intent data that applies to this particular path component
|
|
|
|
*
|
|
|
|
* Note that the current set of intents only apply to the very last
|
|
|
|
* component of the path.
|
|
|
|
* We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT.
|
|
|
|
*/
|
|
|
|
static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask)
|
|
|
|
{
|
|
|
|
if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))
|
|
|
|
return 0;
|
|
|
|
return nd->flags & mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use intent information to check whether or not we're going to do
|
|
|
|
* an O_EXCL create using this path component.
|
|
|
|
*/
|
|
|
|
static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
|
|
|
|
{
|
|
|
|
if (NFS_PROTO(dir)->version == 2)
|
|
|
|
return 0;
|
|
|
|
return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Inode and filehandle revalidation for lookups.
|
|
|
|
*
|
|
|
|
* We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
|
|
|
|
* or if the intent information indicates that we're about to open this
|
|
|
|
* particular file and the "nocto" mount flag is not set.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static inline
|
|
|
|
int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd)
|
|
|
|
{
|
|
|
|
struct nfs_server *server = NFS_SERVER(inode);
|
|
|
|
|
|
|
|
if (test_bit(NFS_INO_MOUNTPOINT, &NFS_I(inode)->flags))
|
|
|
|
return 0;
|
|
|
|
if (nd != NULL) {
|
|
|
|
/* VFS wants an on-the-wire revalidation */
|
|
|
|
if (nd->flags & LOOKUP_REVAL)
|
|
|
|
goto out_force;
|
|
|
|
/* This is an open(2) */
|
|
|
|
if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 &&
|
|
|
|
!(server->flags & NFS_MOUNT_NOCTO) &&
|
|
|
|
(S_ISREG(inode->i_mode) ||
|
|
|
|
S_ISDIR(inode->i_mode)))
|
|
|
|
goto out_force;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return nfs_revalidate_inode(server, inode);
|
|
|
|
out_force:
|
|
|
|
return __nfs_revalidate_inode(server, inode);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We judge how long we want to trust negative
|
|
|
|
* dentries by looking at the parent inode mtime.
|
|
|
|
*
|
|
|
|
* If parent mtime has changed, we revalidate, else we wait for a
|
|
|
|
* period corresponding to the parent's attribute cache timeout value.
|
|
|
|
*/
|
|
|
|
static inline
|
|
|
|
int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
|
|
|
|
struct nameidata *nd)
|
|
|
|
{
|
|
|
|
/* Don't revalidate a negative dentry if we're creating a new file */
|
|
|
|
if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0)
|
|
|
|
return 0;
|
|
|
|
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
|
|
|
|
return 1;
|
|
|
|
return !nfs_check_verifier(dir, dentry);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is called every time the dcache has a lookup hit,
|
|
|
|
* and we should check whether we can really trust that
|
|
|
|
* lookup.
|
|
|
|
*
|
|
|
|
* NOTE! The hit can be a negative hit too, don't assume
|
|
|
|
* we have an inode!
|
|
|
|
*
|
|
|
|
* If the parent directory is seen to have changed, we throw out the
|
|
|
|
* cached dentry and do a new lookup.
|
|
|
|
*/
|
|
|
|
static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
|
|
|
|
{
|
|
|
|
struct inode *dir;
|
|
|
|
struct inode *inode;
|
|
|
|
struct dentry *parent;
|
|
|
|
struct nfs_fh *fhandle = NULL;
|
|
|
|
struct nfs_fattr *fattr = NULL;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
parent = dget_parent(dentry);
|
|
|
|
dir = parent->d_inode;
|
|
|
|
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
|
|
|
|
inode = dentry->d_inode;
|
|
|
|
|
|
|
|
if (!inode) {
|
|
|
|
if (nfs_neg_need_reval(dir, dentry, nd))
|
|
|
|
goto out_bad;
|
|
|
|
goto out_valid;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_bad_inode(inode)) {
|
|
|
|
dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
|
|
|
|
__func__, dentry->d_parent->d_name.name,
|
|
|
|
dentry->d_name.name);
|
|
|
|
goto out_bad;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nfs_have_delegation(inode, FMODE_READ))
|
|
|
|
goto out_set_verifier;
|
|
|
|
|
|
|
|
/* Force a full look up iff the parent directory has changed */
|
|
|
|
if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) {
|
|
|
|
if (nfs_lookup_verify_inode(inode, nd))
|
|
|
|
goto out_zap_parent;
|
|
|
|
goto out_valid;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NFS_STALE(inode))
|
|
|
|
goto out_bad;
|
|
|
|
|
|
|
|
error = -ENOMEM;
|
|
|
|
fhandle = nfs_alloc_fhandle();
|
|
|
|
fattr = nfs_alloc_fattr();
|
|
|
|
if (fhandle == NULL || fattr == NULL)
|
|
|
|
goto out_error;
|
|
|
|
|
|
|
|
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
|
|
|
|
if (error)
|
|
|
|
goto out_bad;
|
|
|
|
if (nfs_compare_fh(NFS_FH(inode), fhandle))
|
|
|
|
goto out_bad;
|
|
|
|
if ((error = nfs_refresh_inode(inode, fattr)) != 0)
|
|
|
|
goto out_bad;
|
|
|
|
|
|
|
|
nfs_free_fattr(fattr);
|
|
|
|
nfs_free_fhandle(fhandle);
|
|
|
|
out_set_verifier:
|
|
|
|
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
|
|
|
out_valid:
|
|
|
|
dput(parent);
|
|
|
|
dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
|
|
|
|
__func__, dentry->d_parent->d_name.name,
|
|
|
|
dentry->d_name.name);
|
|
|
|
return 1;
|
|
|
|
out_zap_parent:
|
|
|
|
nfs_zap_caches(dir);
|
|
|
|
out_bad:
|
|
|
|
nfs_mark_for_revalidate(dir);
|
|
|
|
if (inode && S_ISDIR(inode->i_mode)) {
|
|
|
|
/* Purge readdir caches. */
|
|
|
|
nfs_zap_caches(inode);
|
|
|
|
/* If we have submounts, don't unhash ! */
|
|
|
|
if (have_submounts(dentry))
|
|
|
|
goto out_valid;
|
|
|
|
if (dentry->d_flags & DCACHE_DISCONNECTED)
|
|
|
|
goto out_valid;
|
|
|
|
shrink_dcache_parent(dentry);
|
|
|
|
}
|
|
|
|
d_drop(dentry);
|
|
|
|
nfs_free_fattr(fattr);
|
|
|
|
nfs_free_fhandle(fhandle);
|
|
|
|
dput(parent);
|
|
|
|
dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
|
|
|
|
__func__, dentry->d_parent->d_name.name,
|
|
|
|
dentry->d_name.name);
|
|
|
|
return 0;
|
|
|
|
out_error:
|
|
|
|
nfs_free_fattr(fattr);
|
|
|
|
nfs_free_fhandle(fhandle);
|
|
|
|
dput(parent);
|
|
|
|
dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n",
|
|
|
|
__func__, dentry->d_parent->d_name.name,
|
|
|
|
dentry->d_name.name, error);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is called from dput() when d_count is going to 0.
|
|
|
|
*/
|
|
|
|
static int nfs_dentry_delete(struct dentry *dentry)
|
|
|
|
{
|
|
|
|
dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
|
|
|
|
dentry->d_parent->d_name.name, dentry->d_name.name,
|
|
|
|
dentry->d_flags);
|
|
|
|
|
|
|
|
/* Unhash any dentry with a stale inode */
|
|
|
|
if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
|
|
|
|
/* Unhash it, so that ->d_iput() would be called */
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (!(dentry->d_sb->s_flags & MS_ACTIVE)) {
|
|
|
|
/* Unhash it, so that ancestors of killed async unlink
|
|
|
|
* files will be cleaned up during umount */
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nfs_drop_nlink(struct inode *inode)
|
|
|
|
{
|
|
|
|
spin_lock(&inode->i_lock);
|
|
|
|
if (inode->i_nlink > 0)
|
|
|
|
drop_nlink(inode);
|
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Called when the dentry loses inode.
|
|
|
|
* We use it to clean up silly-renamed files.
|
|
|
|
*/
|
|
|
|
static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
|
|
|
|
{
|
|
|
|
if (S_ISDIR(inode->i_mode))
|
|
|
|
/* drop any readdir cache as it could easily be old */
|
|
|
|
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
|
|
|
|
|
|
|
|
if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
|
|
|
|
drop_nlink(inode);
|
|
|
|
nfs_complete_unlink(dentry, inode);
|
|
|
|
}
|
|
|
|
iput(inode);
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct dentry_operations nfs_dentry_operations = {
|
|
|
|
.d_revalidate = nfs_lookup_revalidate,
|
|
|
|
.d_delete = nfs_dentry_delete,
|
|
|
|
.d_iput = nfs_dentry_iput,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
|
|
|
|
{
|
|
|
|
struct dentry *res;
|
|
|
|
struct dentry *parent;
|
|
|
|
struct inode *inode = NULL;
|
|
|
|
struct nfs_fh *fhandle = NULL;
|
|
|
|
struct nfs_fattr *fattr = NULL;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: lookup(%s/%s)\n",
|
|
|
|
dentry->d_parent->d_name.name, dentry->d_name.name);
|
|
|
|
nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
|
|
|
|
|
|
|
|
res = ERR_PTR(-ENAMETOOLONG);
|
|
|
|
if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we're doing an exclusive create, optimize away the lookup
|
|
|
|
* but don't hash the dentry.
|
|
|
|
*/
|
|
|
|
if (nfs_is_exclusive_create(dir, nd)) {
|
|
|
|
d_instantiate(dentry, NULL);
|
|
|
|
res = NULL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
res = ERR_PTR(-ENOMEM);
|
|
|
|
fhandle = nfs_alloc_fhandle();
|
|
|
|
fattr = nfs_alloc_fattr();
|
|
|
|
if (fhandle == NULL || fattr == NULL)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
parent = dentry->d_parent;
|
|
|
|
/* Protect against concurrent sillydeletes */
|
|
|
|
nfs_block_sillyrename(parent);
|
|
|
|
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
|
|
|
|
if (error == -ENOENT)
|
|
|
|
goto no_entry;
|
|
|
|
if (error < 0) {
|
|
|
|
res = ERR_PTR(error);
|
|
|
|
goto out_unblock_sillyrename;
|
|
|
|
}
|
|
|
|
inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
|
|
|
|
res = (struct dentry *)inode;
|
|
|
|
if (IS_ERR(res))
|
|
|
|
goto out_unblock_sillyrename;
|
NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
(1) The root and intervening nodes may not be accessible to the client.
With NFS2 and NFS3, for instance, mountd is called on the server to get
the filehandle for the tip of a path. mountd won't give us handles for
anything we don't have permission to access, and so we can't set up NFS
inodes for such nodes, and so can't easily set up dentries (we'd have to
have ghost inodes or something).
With this patch we don't actually create dentries until we get handles
from the server that we can use to set up their inodes, and we don't
actually bind them into the tree until we know for sure where they go.
(2) Inaccessible symbolic links.
If we're asked to mount two exports from the server, eg:
mount warthog:/warthog/aaa/xxx /mmm
mount warthog:/warthog/bbb/yyy /nnn
We may not be able to access anything nearer the root than xxx and yyy,
but we may find out later that /mmm/www/yyy, say, is actually the same
directory as the one mounted on /nnn. What we might then find out, for
example, is that /warthog/bbb was actually a symbolic link to
/warthog/aaa/xxx/www, but we can't actually determine that by talking to
the server until /warthog is made available by NFS.
This would lead to having constructed an errneous dentry tree which we
can't easily fix. We can end up with a dentry marked as a directory when
it should actually be a symlink, or we could end up with an apparently
hardlinked directory.
With this patch we need not make assumptions about the type of a dentry
for which we can't retrieve information, nor need we assume we know its
place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
(1) The server record construction/destruction has been abstracted out into
its own set of functions to make things easier to get right. These have
been moved into fs/nfs/client.c.
All the code in fs/nfs/client.c has to do with the management of
connections to servers, and doesn't touch superblocks in any way; the
remaining code in fs/nfs/super.c has to do with VFS superblock management.
(2) The sequence of events undertaken by NFS mount is now reordered:
(a) A volume representation (struct nfs_server) is allocated.
(b) A server representation (struct nfs_client) is acquired. This may be
allocated or shared, and is keyed on server address, port and NFS
version.
(c) If allocated, the client representation is initialised. The state
member variable of nfs_client is used to prevent a race during
initialisation from two mounts.
(d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
the root filehandle for the mount (fs/nfs/getroot.c). For NFS2/3 we
are given the root FH in advance.
(e) The volume FSID is probed for on the root FH.
(f) The volume representation is initialised from the FSINFO record
retrieved on the root FH.
(g) sget() is called to acquire a superblock. This may be allocated or
shared, keyed on client pointer and FSID.
(h) If allocated, the superblock is initialised.
(i) If the superblock is shared, then the new nfs_server record is
discarded.
(j) The root dentry for this mount is looked up from the root FH.
(k) The root dentry for this mount is assigned to the vfsmount.
(3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
returns; this function now attaches disconnected trees from alternate
roots that happen to be discovered attached to a directory being read (in
the same way nfs_lookup() is made to do for lookup ops).
The new d_materialise_unique() function is now used to do this, thus
permitting the whole thing to be done under one set of locks, and thus
avoiding any race between mount and lookup operations on the same
directory.
(4) The client management code uses a new debug facility: NFSDBG_CLIENT which
is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
(5) Clone mounts are now called xdev mounts.
(6) Use the dentry passed to the statfs() op as the handle for retrieving fs
statistics rather than the root dentry of the superblock (which is now a
dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
19 years ago
|
|
|
|
|
|
|
no_entry:
|
NFS: Share NFS superblocks per-protocol per-server per-FSID
The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.
It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.
We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.
Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:
(1) The root and intervening nodes may not be accessible to the client.
With NFS2 and NFS3, for instance, mountd is called on the server to get
the filehandle for the tip of a path. mountd won't give us handles for
anything we don't have permission to access, and so we can't set up NFS
inodes for such nodes, and so can't easily set up dentries (we'd have to
have ghost inodes or something).
With this patch we don't actually create dentries until we get handles
from the server that we can use to set up their inodes, and we don't
actually bind them into the tree until we know for sure where they go.
(2) Inaccessible symbolic links.
If we're asked to mount two exports from the server, eg:
mount warthog:/warthog/aaa/xxx /mmm
mount warthog:/warthog/bbb/yyy /nnn
We may not be able to access anything nearer the root than xxx and yyy,
but we may find out later that /mmm/www/yyy, say, is actually the same
directory as the one mounted on /nnn. What we might then find out, for
example, is that /warthog/bbb was actually a symbolic link to
/warthog/aaa/xxx/www, but we can't actually determine that by talking to
the server until /warthog is made available by NFS.
This would lead to having constructed an errneous dentry tree which we
can't easily fix. We can end up with a dentry marked as a directory when
it should actually be a symlink, or we could end up with an apparently
hardlinked directory.
With this patch we need not make assumptions about the type of a dentry
for which we can't retrieve information, nor need we assume we know its
place in the grand scheme of things until we actually see that place.
This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).
This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.
Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.
This patch makes the following changes:
(1) The server record construction/destruction has been abstracted out into
its own set of functions to make things easier to get right. These have
been moved into fs/nfs/client.c.
All the code in fs/nfs/client.c has to do with the management of
connections to servers, and doesn't touch superblocks in any way; the
remaining code in fs/nfs/super.c has to do with VFS superblock management.
(2) The sequence of events undertaken by NFS mount is now reordered:
(a) A volume representation (struct nfs_server) is allocated.
(b) A server representation (struct nfs_client) is acquired. This may be
allocated or shared, and is keyed on server address, port and NFS
version.
(c) If allocated, the client representation is initialised. The state
member variable of nfs_client is used to prevent a race during
initialisation from two mounts.
(d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
the root filehandle for the mount (fs/nfs/getroot.c). For NFS2/3 we
are given the root FH in advance.
(e) The volume FSID is probed for on the root FH.
(f) The volume representation is initialised from the FSINFO record
retrieved on the root FH.
(g) sget() is called to acquire a superblock. This may be allocated or
shared, keyed on client pointer and FSID.
(h) If allocated, the superblock is initialised.
(i) If the superblock is shared, then the new nfs_server record is
discarded.
(j) The root dentry for this mount is looked up from the root FH.
(k) The root dentry for this mount is assigned to the vfsmount.
(3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
returns; this function now attaches disconnected trees from alternate
roots that happen to be discovered attached to a directory being read (in
the same way nfs_lookup() is made to do for lookup ops).
The new d_materialise_unique() function is now used to do this, thus
permitting the whole thing to be done under one set of locks, and thus
avoiding any race between mount and lookup operations on the same
directory.
(4) The client management code uses a new debug facility: NFSDBG_CLIENT which
is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.
(5) Clone mounts are now called xdev mounts.
(6) Use the dentry passed to the statfs() op as the handle for retrieving fs
statistics rather than the root dentry of the superblock (which is now a
dummy).
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
19 years ago
|
|
|
res = d_materialise_unique(dentry, inode);
|
|
|
|
if (res != NULL) {
|
|
|
|
if (IS_ERR(res))
|
|
|
|
goto out_unblock_sillyrename;
|
|
|
|
dentry = res;
|
|
|
|
}
|
|
|
|
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
|
|
|
out_unblock_sillyrename:
|
|
|
|
nfs_unblock_sillyrename(parent);
|
|
|
|
out:
|
|
|
|
nfs_free_fattr(fattr);
|
|
|
|
nfs_free_fhandle(fhandle);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_NFS_V4
|
|
|
|
static int nfs_open_revalidate(struct dentry *, struct nameidata *);
|
|
|
|
|
|
|
|
const struct dentry_operations nfs4_dentry_operations = {
|
|
|
|
.d_revalidate = nfs_open_revalidate,
|
|
|
|
.d_delete = nfs_dentry_delete,
|
|
|
|
.d_iput = nfs_dentry_iput,
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use intent information to determine whether we need to substitute
|
|
|
|
* the NFSv4-style stateful OPEN for the LOOKUP call
|
|
|
|
*/
|
|
|
|
static int is_atomic_open(struct nameidata *nd)
|
|
|
|
{
|
|
|
|
if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0)
|
|
|
|
return 0;
|
|
|
|
/* NFS does not (yet) have a stateful open for directories */
|
|
|
|
if (nd->flags & LOOKUP_DIRECTORY)
|
|
|
|
return 0;
|
|
|
|
/* Are we trying to write to a read only partition? */
|
|
|
|
if (__mnt_is_readonly(nd->path.mnt) &&
|
|
|
|
(nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
|
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd)
|
|
|
|
{
|
|
|
|
struct path path = {
|
|
|
|
.mnt = nd->path.mnt,
|
|
|
|
.dentry = dentry,
|
|
|
|
};
|
|
|
|
struct nfs_open_context *ctx;
|
|
|
|
struct rpc_cred *cred;
|
|
|
|
fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
|
|
|
|
|
|
|
|
cred = rpc_lookup_cred();
|
|
|
|
if (IS_ERR(cred))
|
|
|
|
return ERR_CAST(cred);
|
|
|
|
ctx = alloc_nfs_open_context(&path, cred, fmode);
|
|
|
|
put_rpccred(cred);
|
|
|
|
if (ctx == NULL)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
return ctx;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int do_open(struct inode *inode, struct file *filp)
|
|
|
|
{
|
|
|
|
nfs_fscache_set_inode_cookie(inode, filp);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx)
|
|
|
|
{
|
|
|
|
struct file *filp;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
/* If the open_intent is for execute, we have an extra check to make */
|
|
|
|
if (ctx->mode & FMODE_EXEC) {
|
|
|
|
ret = nfs_may_open(ctx->path.dentry->d_inode,
|
|
|
|
ctx->cred,
|
|
|
|
nd->intent.open.flags);
|
|
|
|
if (ret < 0)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open);
|
|
|
|
if (IS_ERR(filp))
|
|
|
|
ret = PTR_ERR(filp);
|
|
|
|
else
|
|
|
|
nfs_file_set_open_context(filp, ctx);
|
|
|
|
out:
|
|
|
|
put_nfs_open_context(ctx);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
|
|
|
|
{
|
|
|
|
struct nfs_open_context *ctx;
|
|
|
|
struct iattr attr;
|
|
|
|
struct dentry *res = NULL;
|
|
|
|
struct inode *inode;
|
|
|
|
int open_flags;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
|
|
|
|
dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
|
|
|
|
|
|
|
|
/* Check that we are indeed trying to open this file */
|
|
|
|
if (!is_atomic_open(nd))
|
|
|
|
goto no_open;
|
|
|
|
|
|
|
|
if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
|
|
|
|
res = ERR_PTR(-ENAMETOOLONG);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
|
|
|
|
|
|
|
|
/* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
|
|
|
|
* the dentry. */
|
|
|
|
if (nd->flags & LOOKUP_EXCL) {
|
|
|
|
d_instantiate(dentry, NULL);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx = nameidata_to_nfs_open_context(dentry, nd);
|
|
|
|
res = ERR_CAST(ctx);
|
|
|
|
if (IS_ERR(ctx))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
open_flags = nd->intent.open.flags;
|
|
|
|
if (nd->flags & LOOKUP_CREATE) {
|
|
|
|
attr.ia_mode = nd->intent.open.create_mode;
|
|
|
|
attr.ia_valid = ATTR_MODE;
|
|
|
|
if (!IS_POSIXACL(dir))
|
|
|
|
attr.ia_mode &= ~current_umask();
|
|
|
|
} else {
|
|
|
|
open_flags &= ~(O_EXCL | O_CREAT);
|
|
|
|
attr.ia_valid = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Open the file on the server */
|
|
|
|
nfs_block_sillyrename(dentry->d_parent);
|
|
|
|
inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
|
|
|
|
if (IS_ERR(inode)) {
|
|
|
|
nfs_unblock_sillyrename(dentry->d_parent);
|
|
|
|
put_nfs_open_context(ctx);
|
|
|
|
switch (PTR_ERR(inode)) {
|
|
|
|
/* Make a negative dentry */
|
|
|
|
case -ENOENT:
|
|
|
|
d_add(dentry, NULL);
|
|
|
|
res = NULL;
|
|
|
|
goto out;
|
|
|
|
/* This turned out not to be a regular file */
|
|
|
|
case -ENOTDIR:
|
|
|
|
goto no_open;
|
|
|
|
case -ELOOP:
|
|
|
|
if (!(nd->intent.open.flags & O_NOFOLLOW))
|
|
|
|
goto no_open;
|
|
|
|
/* case -EISDIR: */
|
|
|
|
/* case -EINVAL: */
|
|
|
|
default:
|
|
|
|
res = ERR_CAST(inode);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
res = d_add_unique(dentry, inode);
|
|
|
|
nfs_unblock_sillyrename(dentry->d_parent);
|
|
|
|
if (res != NULL) {
|
|
|
|
dput(ctx->path.dentry);
|
|
|
|
ctx->path.dentry = dget(res);
|
|
|
|
dentry = res;
|
|
|
|
}
|
|
|
|
err = nfs_intent_set_file(nd, ctx);
|
|
|
|
if (err < 0) {
|
|
|
|
if (res != NULL)
|
|
|
|
dput(res);
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
|
|
|
return res;
|
|
|
|
no_open:
|
|
|
|
return nfs_lookup(dir, dentry, nd);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
|
|
|
|
{
|
|
|
|
struct dentry *parent = NULL;
|
|
|
|
struct inode *inode = dentry->d_inode;
|
|
|
|
struct inode *dir;
|
|
|
|
struct nfs_open_context *ctx;
|
|
|
|
int openflags, ret = 0;
|
|
|
|
|
|
|
|
if (!is_atomic_open(nd) || d_mountpoint(dentry))
|
|
|
|
goto no_open;
|
|
|
|
|
|
|
|
parent = dget_parent(dentry);
|
|
|
|
dir = parent->d_inode;
|
|
|
|
|
|
|
|
/* We can't create new files in nfs_open_revalidate(), so we
|
|
|
|
* optimize away revalidation of negative dentries.
|
|
|
|
*/
|
|
|
|
if (inode == NULL) {
|
|
|
|
if (!nfs_neg_need_reval(dir, dentry, nd))
|
|
|
|
ret = 1;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* NFS only supports OPEN on regular files */
|
|
|
|
if (!S_ISREG(inode->i_mode))
|
|
|
|
goto no_open_dput;
|
|
|
|
openflags = nd->intent.open.flags;
|
|
|
|
/* We cannot do exclusive creation on a positive dentry */
|
|
|
|
if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
|
|
|
|
goto no_open_dput;
|
|
|
|
/* We can't create new files, or truncate existing ones here */
|
|
|
|
openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
|
|
|
|
|
|
|
|
ctx = nameidata_to_nfs_open_context(dentry, nd);
|
|
|
|
ret = PTR_ERR(ctx);
|
|
|
|
if (IS_ERR(ctx))
|
|
|
|
goto out;
|
|
|
|
/*
|
|
|
|
* Note: we're not holding inode->i_mutex and so may be racing with
|
|
|
|
* operations that change the directory. We therefore save the
|
|
|
|
* change attribute *before* we do the RPC call.
|
|
|
|
*/
|
|
|
|
inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL);
|
|
|
|
if (IS_ERR(inode)) {
|
|
|
|
ret = PTR_ERR(inode);
|
|
|
|
switch (ret) {
|
|
|
|
case -EPERM:
|
|
|
|
case -EACCES:
|
|
|
|
case -EDQUOT:
|
|
|
|
case -ENOSPC:
|
|
|
|
case -EROFS:
|
|
|
|
goto out_put_ctx;
|
|
|
|
default:
|
|
|
|
goto out_drop;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
iput(inode);
|
|
|
|
if (inode != dentry->d_inode)
|
|
|
|
goto out_drop;
|
|
|
|
|
|
|
|
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
|
|
|
ret = nfs_intent_set_file(nd, ctx);
|
|
|
|
if (ret >= 0)
|
|
|
|
ret = 1;
|
|
|
|
out:
|
|
|
|
dput(parent);
|
|
|
|
return ret;
|
|
|
|
out_drop:
|
|
|
|
d_drop(dentry);
|
|
|
|
ret = 0;
|
|
|
|
out_put_ctx:
|
|
|
|
put_nfs_open_context(ctx);
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
no_open_dput:
|
|
|
|
dput(parent);
|
|
|
|
no_open:
|
|
|
|
return nfs_lookup_revalidate(dentry, nd);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
|
|
|
|
struct nameidata *nd)
|
|
|
|
{
|
|
|
|
struct nfs_open_context *ctx = NULL;
|
|
|
|
struct iattr attr;
|
|
|
|
int error;
|
|
|
|
int open_flags = 0;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
|
|
|
|
dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
|
|
|
|
|
|
|
|
attr.ia_mode = mode;
|
|
|
|
attr.ia_valid = ATTR_MODE;
|
|
|
|
|
|
|
|
if ((nd->flags & LOOKUP_CREATE) != 0) {
|
|
|
|
open_flags = nd->intent.open.flags;
|
|
|
|
|
|
|
|
ctx = nameidata_to_nfs_open_context(dentry, nd);
|
|
|
|
error = PTR_ERR(ctx);
|
|
|
|
if (IS_ERR(ctx))
|
|
|
|
goto out_err_drop;
|
|
|
|
}
|
|
|
|
|
|
|
|
error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
|
|
|
|
if (error != 0)
|
|
|
|
goto out_put_ctx;
|
|
|
|
if (ctx != NULL) {
|
|
|
|
error = nfs_intent_set_file(nd, ctx);
|
|
|
|
if (error < 0)
|
|
|
|
goto out_err;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
out_put_ctx:
|
|
|
|
if (ctx != NULL)
|
|
|
|
put_nfs_open_context(ctx);
|
|
|
|
out_err_drop:
|
|
|
|
d_drop(dentry);
|
|
|
|
out_err:
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* CONFIG_NFSV4 */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Code common to create, mkdir, and mknod.
|
|
|
|
*/
|
|
|
|
int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
|
|
|
|
struct nfs_fattr *fattr)
|
|
|
|
{
|
|
|
|
struct dentry *parent = dget_parent(dentry);
|
|
|
|
struct inode *dir = parent->d_inode;
|
|
|
|
struct inode *inode;
|
|
|
|
int error = -EACCES;
|
|
|
|
|
|
|
|
d_drop(dentry);
|
|
|
|
|
|
|
|
/* We may have been initialized further down */
|
|
|
|
if (dentry->d_inode)
|
|
|
|
goto out;
|
|
|
|
if (fhandle->size == 0) {
|
|
|
|
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
|
|
|
|
if (error)
|
|
|
|
goto out_error;
|
|
|
|
}
|
|
|
|
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
|
|
|
if (!(fattr->valid & NFS_ATTR_FATTR)) {
|
|
|
|
struct nfs_server *server = NFS_SB(dentry->d_sb);
|
|
|
|
error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
|
|
|
|
if (error < 0)
|
|
|
|
goto out_error;
|
|
|
|
}
|
|
|
|
inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
|
|
|
|
error = PTR_ERR(inode);
|
|
|
|
if (IS_ERR(inode))
|
|
|
|
goto out_error;
|
|
|
|
d_add(dentry, inode);
|
|
|
|
out:
|
|
|
|
dput(parent);
|
|
|
|
return 0;
|
|
|
|
out_error:
|
|
|
|
nfs_mark_for_revalidate(dir);
|
|
|
|
dput(parent);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Following a failed create operation, we drop the dentry rather
|
|
|
|
* than retain a negative dentry. This avoids a problem in the event
|
|
|
|
* that the operation succeeded on the server, but an error in the
|
|
|
|
* reply path made it appear to have failed.
|
|
|
|
*/
|
|
|
|
static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
|
|
|
|
struct nameidata *nd)
|
|
|
|
{
|
|
|
|
struct iattr attr;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
|
|
|
|
dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
|
|
|
|
|
|
|
|
attr.ia_mode = mode;
|
|
|
|
attr.ia_valid = ATTR_MODE;
|
|
|
|
|
|
|
|
error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL);
|
|
|
|
if (error != 0)
|
|
|
|
goto out_err;
|
|
|
|
return 0;
|
|
|
|
out_err:
|
|
|
|
d_drop(dentry);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* See comments for nfs_proc_create regarding failed operations.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
|
|
|
|
{
|
|
|
|
struct iattr attr;
|
|
|
|
int status;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
|
|
|
|
dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
|
|
|
|
|
|
|
|
if (!new_valid_dev(rdev))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
attr.ia_mode = mode;
|
|
|
|
attr.ia_valid = ATTR_MODE;
|
|
|
|
|
|
|
|
status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
|
|
|
|
if (status != 0)
|
|
|
|
goto out_err;
|
|
|
|
return 0;
|
|
|
|
out_err:
|
|
|
|
d_drop(dentry);
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* See comments for nfs_proc_create regarding failed operations.
|
|
|
|
*/
|
|
|
|
static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
|
|
|
{
|
|
|
|
struct iattr attr;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
|
|
|
|
dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
|
|
|
|
|
|
|
|
attr.ia_valid = ATTR_MODE;
|
|
|
|
attr.ia_mode = mode | S_IFDIR;
|
|
|
|
|
|
|
|
error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
|
|
|
|
if (error != 0)
|
|
|
|
goto out_err;
|
|
|
|
return 0;
|
|
|
|
out_err:
|
|
|
|
d_drop(dentry);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nfs_dentry_handle_enoent(struct dentry *dentry)
|
|
|
|
{
|
|
|
|
if (dentry->d_inode != NULL && !d_unhashed(dentry))
|
|
|
|
d_delete(dentry);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
|
|
|
|
dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
|
|
|
|
|
|
|
|
error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
|
|
|
|
/* Ensure the VFS deletes this inode */
|
|
|
|
if (error == 0 && dentry->d_inode != NULL)
|
|
|
|
clear_nlink(dentry->d_inode);
|
|
|
|
else if (error == -ENOENT)
|
|
|
|
nfs_dentry_handle_enoent(dentry);
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove a file after making sure there are no pending writes,
|
|
|
|
* and after checking that the file has only one user.
|
|
|
|
*
|
|
|
|
* We invalidate the attribute cache and free the inode prior to the operation
|
|
|
|
* to avoid possible races if the server reuses the inode.
|
|
|
|
*/
|
|
|
|
static int nfs_safe_remove(struct dentry *dentry)
|
|
|
|
{
|
|
|
|
struct inode *dir = dentry->d_parent->d_inode;
|
|
|
|
struct inode *inode = dentry->d_inode;
|
|
|
|
int error = -EBUSY;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: safe_remove(%s/%s)\n",
|
|
|
|
dentry->d_parent->d_name.name, dentry->d_name.name);
|
|
|
|
|
|
|
|
/* If the dentry was sillyrenamed, we simply call d_delete() */
|
|
|
|
if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
|
|
|
|
error = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (inode != NULL) {
|
|
|
|
nfs_inode_return_delegation(inode);
|
|
|
|
error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
|
|
|
|
/* The VFS may want to delete this inode */
|
|
|
|
if (error == 0)
|
|
|
|
nfs_drop_nlink(inode);
|
|
|
|
nfs_mark_for_revalidate(inode);
|
|
|
|
} else
|
|
|
|
error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
|
|
|
|
if (error == -ENOENT)
|
|
|
|
nfs_dentry_handle_enoent(dentry);
|
|
|
|
out:
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We do silly rename. In case sillyrename() returns -EBUSY, the inode
|
|
|
|
* belongs to an active ".nfs..." file and we return -EBUSY.
|
|
|
|
*
|
|
|
|
* If sillyrename() returns 0, we do nothing, otherwise we unlink.
|
|
|
|
*/
|
|
|
|
static int nfs_unlink(struct inode *dir, struct dentry *dentry)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
int need_rehash = 0;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
|
|
|
|
dir->i_ino, dentry->d_name.name);
|
|
|
|
|
|
|
|
spin_lock(&dcache_lock);
|
|
|
|
spin_lock(&dentry->d_lock);
|
|
|
|
if (atomic_read(&dentry->d_count) > 1) {
|
|
|
|
spin_unlock(&dentry->d_lock);
|
|
|
|
spin_unlock(&dcache_lock);
|
|
|
|
/* Start asynchronous writeout of the inode */
|
|
|
|
write_inode_now(dentry->d_inode, 0);
|
|
|
|
error = nfs_sillyrename(dir, dentry);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
if (!d_unhashed(dentry)) {
|
|
|
|
__d_drop(dentry);
|
|
|
|
need_rehash = 1;
|
|
|
|
}
|
|
|
|
spin_unlock(&dentry->d_lock);
|
|
|
|
spin_unlock(&dcache_lock);
|
|
|
|
error = nfs_safe_remove(dentry);
|
|
|
|
if (!error || error == -ENOENT) {
|
|
|
|
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
|
|
|
|
} else if (need_rehash)
|
|
|
|
d_rehash(dentry);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* To create a symbolic link, most file systems instantiate a new inode,
|
|
|
|
* add a page to it containing the path, then write it out to the disk
|
|
|
|
* using prepare_write/commit_write.
|
|
|
|
*
|
|
|
|
* Unfortunately the NFS client can't create the in-core inode first
|
|
|
|
* because it needs a file handle to create an in-core inode (see
|
|
|
|
* fs/nfs/inode.c:nfs_fhget). We only have a file handle *after* the
|
|
|
|
* symlink request has completed on the server.
|
|
|
|
*
|
|
|
|
* So instead we allocate a raw page, copy the symname into it, then do
|
|
|
|
* the SYMLINK request with the page as the buffer. If it succeeds, we
|
|
|
|
* now have a new file handle and can instantiate an in-core NFS inode
|
|
|
|
* and move the raw page into its mapping.
|
|
|
|
*/
|
|
|
|
static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
|
|
|
|
{
|
|
|
|
struct pagevec lru_pvec;
|
|
|
|
struct page *page;
|
|
|
|
char *kaddr;
|
|
|
|
struct iattr attr;
|
|
|
|
unsigned int pathlen = strlen(symname);
|
|
|
|
int error;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
|
|
|
|
dir->i_ino, dentry->d_name.name, symname);
|
|
|
|
|
|
|
|
if (pathlen > PAGE_SIZE)
|
|
|
|
return -ENAMETOOLONG;
|
|
|
|
|
|
|
|
attr.ia_mode = S_IFLNK | S_IRWXUGO;
|
|
|
|
attr.ia_valid = ATTR_MODE;
|
|
|
|
|
|
|
|
page = alloc_page(GFP_HIGHUSER);
|
|
|
|
if (!page)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
kaddr = kmap_atomic(page, KM_USER0);
|
|
|
|
memcpy(kaddr, symname, pathlen);
|
|
|
|
if (pathlen < PAGE_SIZE)
|
|
|
|
memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
|
|
|
|
kunmap_atomic(kaddr, KM_USER0);
|
|
|
|
|
|
|
|
error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
|
|
|
|
if (error != 0) {
|
|
|
|
dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
|
|
|
|
dir->i_sb->s_id, dir->i_ino,
|
|
|
|
dentry->d_name.name, symname, error);
|
|
|
|
d_drop(dentry);
|
|
|
|
__free_page(page);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* No big deal if we can't add this page to the page cache here.
|
|
|
|
* READLINK will get the missing page from the server if needed.
|
|
|
|
*/
|
|
|
|
pagevec_init(&lru_pvec, 0);
|
|
|
|
if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
|
|
|
|
GFP_KERNEL)) {
|
|
|
|
pagevec_add(&lru_pvec, page);
|
|
|
|
pagevec_lru_add_file(&lru_pvec);
|
|
|
|
SetPageUptodate(page);
|
|
|
|
unlock_page(page);
|
|
|
|
} else
|
|
|
|
__free_page(page);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
|
|
|
|
{
|
|
|
|
struct inode *inode = old_dentry->d_inode;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n",
|
|
|
|
old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
|
|
|
|
dentry->d_parent->d_name.name, dentry->d_name.name);
|
|
|
|
|
|
|
|
nfs_inode_return_delegation(inode);
|
|
|
|
|
|
|
|
d_drop(dentry);
|
|
|
|
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
|
|
|
|
if (error == 0) {
|
|
|
|
ihold(inode);
|
|
|
|
d_add(dentry, inode);
|
|
|
|
}
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* RENAME
|
|
|
|
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
|
|
|
|
* different file handle for the same inode after a rename (e.g. when
|
|
|
|
* moving to a different directory). A fail-safe method to do so would
|
|
|
|
* be to look up old_dir/old_name, create a link to new_dir/new_name and
|
|
|
|
* rename the old file using the sillyrename stuff. This way, the original
|
|
|
|
* file in old_dir will go away when the last process iput()s the inode.
|
|
|
|
*
|
|
|
|
* FIXED.
|
|
|
|
*
|
|
|
|
* It actually works quite well. One needs to have the possibility for
|
|
|
|
* at least one ".nfs..." file in each directory the file ever gets
|
|
|
|
* moved or linked to which happens automagically with the new
|
|
|
|
* implementation that only depends on the dcache stuff instead of
|
|
|
|
* using the inode layer
|
|
|
|
*
|
|
|
|
* Unfortunately, things are a little more complicated than indicated
|
|
|
|
* above. For a cross-directory move, we want to make sure we can get
|
|
|
|
* rid of the old inode after the operation. This means there must be
|
|
|
|
* no pending writes (if it's a file), and the use count must be 1.
|
|
|
|
* If these conditions are met, we can drop the dentries before doing
|
|
|
|
* the rename.
|
|
|
|
*/
|
|
|
|
static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
|
|
|
|
struct inode *new_dir, struct dentry *new_dentry)
|
|
|
|
{
|
|
|
|
struct inode *old_inode = old_dentry->d_inode;
|
|
|
|
struct inode *new_inode = new_dentry->d_inode;
|
|
|
|
struct dentry *dentry = NULL, *rehash = NULL;
|
|
|
|
int error = -EBUSY;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
|
|
|
|
old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
|
|
|
|
new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
|
|
|
|
atomic_read(&new_dentry->d_count));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For non-directories, check whether the target is busy and if so,
|
|
|
|
* make a copy of the dentry and then do a silly-rename. If the
|
|
|
|
* silly-rename succeeds, the copied dentry is hashed and becomes
|
|
|
|
* the new target.
|
|
|
|
*/
|
|
|
|
if (new_inode && !S_ISDIR(new_inode->i_mode)) {
|
|
|
|
/*
|
|
|
|
* To prevent any new references to the target during the
|
|
|
|
* rename, we unhash the dentry in advance.
|
|
|
|
*/
|
|
|
|
if (!d_unhashed(new_dentry)) {
|
|
|
|
d_drop(new_dentry);
|
|
|
|
rehash = new_dentry;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (atomic_read(&new_dentry->d_count) > 2) {
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/* copy the target dentry's name */
|
|
|
|
dentry = d_alloc(new_dentry->d_parent,
|
|
|
|
&new_dentry->d_name);
|
|
|
|
if (!dentry)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* silly-rename the existing target ... */
|
|
|
|
err = nfs_sillyrename(new_dir, new_dentry);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
new_dentry = dentry;
|
|
|
|
rehash = NULL;
|
|
|
|
new_inode = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
nfs_inode_return_delegation(old_inode);
|
|
|
|
if (new_inode != NULL)
|
|
|
|
nfs_inode_return_delegation(new_inode);
|
|
|
|
|
|
|
|
error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
|
|
|
|
new_dir, &new_dentry->d_name);
|
|
|
|
nfs_mark_for_revalidate(old_inode);
|
|
|
|
out:
|
|
|
|
if (rehash)
|
|
|
|
d_rehash(rehash);
|
|
|
|
if (!error) {
|
|
|
|
if (new_inode != NULL)
|
|
|
|
nfs_drop_nlink(new_inode);
|
|
|
|
d_move(old_dentry, new_dentry);
|
|
|
|
nfs_set_verifier(new_dentry,
|
|
|
|
nfs_save_change_attribute(new_dir));
|
|
|
|
} else if (error == -ENOENT)
|
|
|
|
nfs_dentry_handle_enoent(old_dentry);
|
|
|
|
|
|
|
|
/* new dentry created? */
|
|
|
|
if (dentry)
|
|
|
|
dput(dentry);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
static DEFINE_SPINLOCK(nfs_access_lru_lock);
|
|
|
|
static LIST_HEAD(nfs_access_lru_list);
|
|
|
|
static atomic_long_t nfs_access_nr_entries;
|
|
|
|
|
|
|
|
static void nfs_access_free_entry(struct nfs_access_entry *entry)
|
|
|
|
{
|
|
|
|
put_rpccred(entry->cred);
|
|
|
|
kfree(entry);
|
|
|
|
smp_mb__before_atomic_dec();
|
|
|
|
atomic_long_dec(&nfs_access_nr_entries);
|
|
|
|
smp_mb__after_atomic_dec();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nfs_access_free_list(struct list_head *head)
|
|
|
|
{
|
|
|
|
struct nfs_access_entry *cache;
|
|
|
|
|
|
|
|
while (!list_empty(head)) {
|
|
|
|
cache = list_entry(head->next, struct nfs_access_entry, lru);
|
|
|
|
list_del(&cache->lru);
|
|
|
|
nfs_access_free_entry(cache);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
|
|
|
|
{
|
|
|
|
LIST_HEAD(head);
|
|
|
|
struct nfs_inode *nfsi, *next;
|
|
|
|
struct nfs_access_entry *cache;
|
|
|
|
|
|
|
|
if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
|
|
|
|
return (nr_to_scan == 0) ? 0 : -1;
|
|
|
|
|
|
|
|
spin_lock(&nfs_access_lru_lock);
|
|
|
|
list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
|
|
|
|
struct inode *inode;
|
|
|
|
|
|
|
|
if (nr_to_scan-- == 0)
|
|
|
|
break;
|
|
|
|
inode = &nfsi->vfs_inode;
|
|
|
|
spin_lock(&inode->i_lock);
|
|
|
|
if (list_empty(&nfsi->access_cache_entry_lru))
|
|
|
|
goto remove_lru_entry;
|
|
|
|
cache = list_entry(nfsi->access_cache_entry_lru.next,
|
|
|
|
struct nfs_access_entry, lru);
|
|
|
|
list_move(&cache->lru, &head);
|
|
|
|
rb_erase(&cache->rb_node, &nfsi->access_cache);
|
|
|
|
if (!list_empty(&nfsi->access_cache_entry_lru))
|
|
|
|
list_move_tail(&nfsi->access_cache_inode_lru,
|
|
|
|
&nfs_access_lru_list);
|
|
|
|
else {
|
|
|
|
remove_lru_entry:
|
|
|
|
list_del_init(&nfsi->access_cache_inode_lru);
|
|
|
|
smp_mb__before_clear_bit();
|
|
|
|
clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
|
|
|
|
smp_mb__after_clear_bit();
|
|
|
|
}
|
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
}
|
|
|
|
spin_unlock(&nfs_access_lru_lock);
|
|
|
|
nfs_access_free_list(&head);
|
|
|
|
return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
|
|
|
|
{
|
|
|
|
struct rb_root *root_node = &nfsi->access_cache;
|
|
|
|
struct rb_node *n;
|
|
|
|
struct nfs_access_entry *entry;
|
|
|
|
|
|
|
|
/* Unhook entries from the cache */
|
|
|
|
while ((n = rb_first(root_node)) != NULL) {
|
|
|
|
entry = rb_entry(n, struct nfs_access_entry, rb_node);
|
|
|
|
rb_erase(n, root_node);
|
|
|
|
list_move(&entry->lru, head);
|
|
|
|
}
|
|
|
|
nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
void nfs_access_zap_cache(struct inode *inode)
|
|
|
|
{
|
|
|
|
LIST_HEAD(head);
|
|
|
|
|
|
|
|
if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
|
|
|
|
return;
|
|
|
|
/* Remove from global LRU init */
|
|
|
|
spin_lock(&nfs_access_lru_lock);
|
|
|
|
if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
|
|
|
|
list_del_init(&NFS_I(inode)->access_cache_inode_lru);
|
|
|
|
|
|
|
|
spin_lock(&inode->i_lock);
|
|
|
|
__nfs_access_zap_cache(NFS_I(inode), &head);
|
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
spin_unlock(&nfs_access_lru_lock);
|
|
|
|
nfs_access_free_list(&head);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
|
|
|
|
{
|
|
|
|
struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
|
|
|
|
struct nfs_access_entry *entry;
|
|
|
|
|
|
|
|
while (n != NULL) {
|
|
|
|
entry = rb_entry(n, struct nfs_access_entry, rb_node);
|
|
|
|
|
|
|
|
if (cred < entry->cred)
|
|
|
|
n = n->rb_left;
|
|
|
|
else if (cred > entry->cred)
|
|
|
|
n = n->rb_right;
|
|
|
|
else
|
|
|
|
return entry;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
|
|
|
|
{
|
|
|
|
struct nfs_inode *nfsi = NFS_I(inode);
|
|
|
|
struct nfs_access_entry *cache;
|
|
|
|
int err = -ENOENT;
|
|
|
|
|
|
|
|
spin_lock(&inode->i_lock);
|
|
|
|
if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
|
|
|
|
goto out_zap;
|
|
|
|
cache = nfs_access_search_rbtree(inode, cred);
|
|
|
|
if (cache == NULL)
|
|
|
|
goto out;
|
|
|
|
if (!nfs_have_delegated_attributes(inode) &&
|
optimize attribute timeouts for "noac" and "actimeo=0"
Hi.
I've been looking at a bugzilla which describes a problem where
a customer was advised to use either the "noac" or "actimeo=0"
mount options to solve a consistency problem that they were
seeing in the file attributes. It turned out that this solution
did not work reliably for them because sometimes, the local
attribute cache was believed to be valid and not timed out.
(With an attribute cache timeout of 0, the cache should always
appear to be timed out.)
In looking at this situation, it appears to me that the problem
is that the attribute cache timeout code has an off-by-one
error in it. It is assuming that the cache is valid in the
region, [read_cache_jiffies, read_cache_jiffies + attrtimeo]. The
cache should be considered valid only in the region,
[read_cache_jiffies, read_cache_jiffies + attrtimeo). With this
change, the options, "noac" and "actimeo=0", work as originally
expected.
This problem was previously addressed by special casing the
attrtimeo == 0 case. However, since the problem is only an off-
by-one error, the cleaner solution is address the off-by-one
error and thus, not require the special case.
Thanx...
ps
Signed-off-by: Peter Staubach <staubach@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
16 years ago
|
|
|
!time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
|
|
|
|
goto out_stale;
|
|
|
|
res->jiffies = cache->jiffies;
|
|
|
|
res->cred = cache->cred;
|
|
|
|
res->mask = cache->mask;
|
|
|
|
list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
|
|
|
|
err = 0;
|
|
|
|
out:
|
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
return err;
|
|
|
|
out_stale:
|
|
|
|
rb_erase(&cache->rb_node, &nfsi->access_cache);
|
|
|
|
list_del(&cache->lru);
|
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
nfs_access_free_entry(cache);
|
|
|
|
return -ENOENT;
|
|
|
|
out_zap:
|
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
nfs_access_zap_cache(inode);
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
|
|
|
|
{
|
|
|
|
struct nfs_inode *nfsi = NFS_I(inode);
|
|
|
|
struct rb_root *root_node = &nfsi->access_cache;
|
|
|
|
struct rb_node **p = &root_node->rb_node;
|
|
|
|
struct rb_node *parent = NULL;
|
|
|
|
struct nfs_access_entry *entry;
|
|
|
|
|
|
|
|
spin_lock(&inode->i_lock);
|
|
|
|
while (*p != NULL) {
|
|
|
|
parent = *p;
|
|
|
|
entry = rb_entry(parent, struct nfs_access_entry, rb_node);
|
|
|
|
|
|
|
|
if (set->cred < entry->cred)
|
|
|
|
p = &parent->rb_left;
|
|
|
|
else if (set->cred > entry->cred)
|
|
|
|
p = &parent->rb_right;
|
|
|
|
else
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
rb_link_node(&set->rb_node, parent, p);
|
|
|
|
rb_insert_color(&set->rb_node, root_node);
|
|
|
|
list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
|
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
return;
|
|
|
|
found:
|
|
|
|
rb_replace_node(parent, &set->rb_node, root_node);
|
|
|
|
list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
|
|
|
|
list_del(&entry->lru);
|
|
|
|
spin_unlock(&inode->i_lock);
|
|
|
|
nfs_access_free_entry(entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
|
|
|
|
{
|
|
|
|
struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
|
|
|
|
if (cache == NULL)
|
|
|
|
return;
|
|
|
|
RB_CLEAR_NODE(&cache->rb_node);
|
|
|
|
cache->jiffies = set->jiffies;
|
|
|
|
cache->cred = get_rpccred(set->cred);
|
|
|
|
cache->mask = set->mask;
|
|
|
|
|
|
|
|
nfs_access_add_rbtree(inode, cache);
|
|
|
|
|
|
|
|
/* Update accounting */
|
|
|
|
smp_mb__before_atomic_inc();
|
|
|
|
atomic_long_inc(&nfs_access_nr_entries);
|
|
|
|
smp_mb__after_atomic_inc();
|
|
|
|
|
|
|
|
/* Add inode to global LRU list */
|
|
|
|
if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
|
|
|
|
spin_lock(&nfs_access_lru_lock);
|
|
|
|
if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
|
|
|
|
list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
|
|
|
|
&nfs_access_lru_list);
|
|
|
|
spin_unlock(&nfs_access_lru_lock);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
|
|
|
|
{
|
|
|
|
struct nfs_access_entry cache;
|
|
|
|
int status;
|
|
|
|
|
|
|
|
status = nfs_access_get_cached(inode, cred, &cache);
|
|
|
|
if (status == 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Be clever: ask server to check for all possible rights */
|
|
|
|
cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
|
|
|
|
cache.cred = cred;
|
|
|
|
cache.jiffies = jiffies;
|
|
|
|
status = NFS_PROTO(inode)->access(inode, &cache);
|
|
|
|
if (status != 0) {
|
|
|
|
if (status == -ESTALE) {
|
|
|
|
nfs_zap_caches(inode);
|
|
|
|
if (!S_ISDIR(inode->i_mode))
|
|
|
|
set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
|
|
|
|
}
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
nfs_access_add_cache(inode, &cache);
|
|
|
|
out:
|
|
|
|
if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
|
|
|
|
return 0;
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nfs_open_permission_mask(int openflags)
|
|
|
|
{
|
|
|
|
int mask = 0;
|
|
|
|
|
|
|
|
if (openflags & FMODE_READ)
|
|
|
|
mask |= MAY_READ;
|
|
|
|
if (openflags & FMODE_WRITE)
|
|
|
|
mask |= MAY_WRITE;
|
|
|
|
if (openflags & FMODE_EXEC)
|
|
|
|
mask |= MAY_EXEC;
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
|
|
|
|
{
|
|
|
|
return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
|
|
|
|
}
|
|
|
|
|
|
|
|
int nfs_permission(struct inode *inode, int mask)
|
|
|
|
{
|
|
|
|
struct rpc_cred *cred;
|
|
|
|
int res = 0;
|
|
|
|
|
|
|
|
nfs_inc_stats(inode, NFSIOS_VFSACCESS);
|
|
|
|
|
|
|
|
if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
|
|
|
|
goto out;
|
|
|
|
/* Is this sys_access() ? */
|
|
|
|
if (mask & (MAY_ACCESS | MAY_CHDIR))
|
|
|
|
goto force_lookup;
|
|
|
|
|
|
|
|
switch (inode->i_mode & S_IFMT) {
|
|
|
|
case S_IFLNK:
|
|
|
|
goto out;
|
|
|
|
case S_IFREG:
|
|
|
|
/* NFSv4 has atomic_open... */
|
|
|
|
if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
|
|
|
|
&& (mask & MAY_OPEN)
|
|
|
|
&& !(mask & MAY_EXEC))
|
|
|
|
goto out;
|
|
|
|
break;
|
|
|
|
case S_IFDIR:
|
|
|
|
/*
|
|
|
|
* Optimize away all write operations, since the server
|
|
|
|
* will check permissions when we perform the op.
|
|
|
|
*/
|
|
|
|
if ((mask & MAY_WRITE) && !(mask & MAY_READ))
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
force_lookup:
|
|
|
|
if (!NFS_PROTO(inode)->access)
|
|
|
|
goto out_notsup;
|
|
|
|
|
|
|
|
cred = rpc_lookup_cred();
|
|
|
|
if (!IS_ERR(cred)) {
|
|
|
|
res = nfs_do_access(inode, cred, mask);
|
|
|
|
put_rpccred(cred);
|
|
|
|
} else
|
|
|
|
res = PTR_ERR(cred);
|
|
|
|
out:
|
|
|
|
if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
|
|
|
|
res = -EACCES;
|
|
|
|
|
|
|
|
dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
|
|
|
|
inode->i_sb->s_id, inode->i_ino, mask, res);
|
|
|
|
return res;
|
|
|
|
out_notsup:
|
|
|
|
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
|
|
|
|
if (res == 0)
|
|
|
|
res = generic_permission(inode, mask, NULL);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Local variables:
|
|
|
|
* version-control: t
|
|
|
|
* kept-new-versions: 5
|
|
|
|
* End:
|
|
|
|
*/
|