|
|
|
/* AFS volume management
|
|
|
|
*
|
|
|
|
* Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
|
|
|
|
* Written by David Howells (dhowells@redhat.com)
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/pagemap.h>
|
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
18 years ago
|
|
|
#include <linux/sched.h>
|
|
|
|
#include "internal.h"
|
|
|
|
|
|
|
|
static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
|
|
|
|
|
|
|
|
/*
|
|
|
|
* lookup a volume by name
|
|
|
|
* - this can be one of the following:
|
|
|
|
* "%[cell:]volume[.]" R/W volume
|
|
|
|
* "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
|
|
|
|
* or R/W (rwparent=1) volume
|
|
|
|
* "%[cell:]volume.readonly" R/O volume
|
|
|
|
* "#[cell:]volume.readonly" R/O volume
|
|
|
|
* "%[cell:]volume.backup" Backup volume
|
|
|
|
* "#[cell:]volume.backup" Backup volume
|
|
|
|
*
|
|
|
|
* The cell name is optional, and defaults to the current cell.
|
|
|
|
*
|
|
|
|
* See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
|
|
|
|
* Guide
|
|
|
|
* - Rule 1: Explicit type suffix forces access of that type or nothing
|
|
|
|
* (no suffix, then use Rule 2 & 3)
|
|
|
|
* - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
|
|
|
|
* if not available
|
|
|
|
* - Rule 3: If parent volume is R/W, then only mount R/W volume unless
|
|
|
|
* explicitly told otherwise
|
|
|
|
*/
|
|
|
|
struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
|
|
|
|
{
|
|
|
|
struct afs_vlocation *vlocation = NULL;
|
|
|
|
struct afs_volume *volume = NULL;
|
|
|
|
struct afs_server *server = NULL;
|
|
|
|
char srvtmask;
|
|
|
|
int ret, loop;
|
|
|
|
|
|
|
|
_enter("{%*.*s,%d}",
|
|
|
|
params->volnamesz, params->volnamesz, params->volname, params->rwpath);
|
|
|
|
|
|
|
|
/* lookup the volume location record */
|
|
|
|
vlocation = afs_vlocation_lookup(params->cell, params->key,
|
|
|
|
params->volname, params->volnamesz);
|
|
|
|
if (IS_ERR(vlocation)) {
|
|
|
|
ret = PTR_ERR(vlocation);
|
|
|
|
vlocation = NULL;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* make the final decision on the type we want */
|
|
|
|
ret = -ENOMEDIUM;
|
|
|
|
if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
srvtmask = 0;
|
|
|
|
for (loop = 0; loop < vlocation->vldb.nservers; loop++)
|
|
|
|
srvtmask |= vlocation->vldb.srvtmask[loop];
|
|
|
|
|
|
|
|
if (params->force) {
|
|
|
|
if (!(srvtmask & (1 << params->type)))
|
|
|
|
goto error;
|
|
|
|
} else if (srvtmask & AFS_VOL_VTM_RO) {
|
|
|
|
params->type = AFSVL_ROVOL;
|
|
|
|
} else if (srvtmask & AFS_VOL_VTM_RW) {
|
|
|
|
params->type = AFSVL_RWVOL;
|
|
|
|
} else {
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
down_write(¶ms->cell->vl_sem);
|
|
|
|
|
|
|
|
/* is the volume already active? */
|
|
|
|
if (vlocation->vols[params->type]) {
|
|
|
|
/* yes - re-use it */
|
|
|
|
volume = vlocation->vols[params->type];
|
|
|
|
afs_get_volume(volume);
|
|
|
|
goto success;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* create a new volume record */
|
|
|
|
_debug("creating new volume record");
|
|
|
|
|
|
|
|
ret = -ENOMEM;
|
|
|
|
volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
|
|
|
|
if (!volume)
|
|
|
|
goto error_up;
|
|
|
|
|
|
|
|
atomic_set(&volume->usage, 1);
|
|
|
|
volume->type = params->type;
|
|
|
|
volume->type_force = params->force;
|
|
|
|
volume->cell = params->cell;
|
|
|
|
volume->vid = vlocation->vldb.vid[params->type];
|
|
|
|
|
|
|
|
init_rwsem(&volume->server_sem);
|
|
|
|
|
|
|
|
/* look up all the applicable server records */
|
|
|
|
for (loop = 0; loop < 8; loop++) {
|
|
|
|
if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
|
|
|
|
server = afs_lookup_server(
|
|
|
|
volume->cell, &vlocation->vldb.servers[loop]);
|
|
|
|
if (IS_ERR(server)) {
|
|
|
|
ret = PTR_ERR(server);
|
|
|
|
goto error_discard;
|
|
|
|
}
|
|
|
|
|
|
|
|
volume->servers[volume->nservers] = server;
|
|
|
|
volume->nservers++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* attach the cache and volume location */
|
|
|
|
#ifdef CONFIG_AFS_FSCACHE
|
|
|
|
volume->cache = fscache_acquire_cookie(vlocation->cache,
|
|
|
|
&afs_volume_cache_index_def,
|
FS-Cache: Provide the ability to enable/disable cookies
Provide the ability to enable and disable fscache cookies. A disabled cookie
will reject or ignore further requests to:
Acquire a child cookie
Invalidate and update backing objects
Check the consistency of a backing object
Allocate storage for backing page
Read backing pages
Write to backing pages
but still allows:
Checks/waits on the completion of already in-progress objects
Uncaching of pages
Relinquishment of cookies
Two new operations are provided:
(1) Disable a cookie:
void fscache_disable_cookie(struct fscache_cookie *cookie,
bool invalidate);
If the cookie is not already disabled, this locks the cookie against other
dis/enablement ops, marks the cookie as being disabled, discards or
invalidates any backing objects and waits for cessation of activity on any
associated object.
This is a wrapper around a chunk split out of fscache_relinquish_cookie(),
but it reinitialises the cookie such that it can be reenabled.
All possible failures are handled internally. The caller should consider
calling fscache_uncache_all_inode_pages() afterwards to make sure all page
markings are cleared up.
(2) Enable a cookie:
void fscache_enable_cookie(struct fscache_cookie *cookie,
bool (*can_enable)(void *data),
void *data)
If the cookie is not already enabled, this locks the cookie against other
dis/enablement ops, invokes can_enable() and, if the cookie is not an
index cookie, will begin the procedure of acquiring backing objects.
The optional can_enable() function is passed the data argument and returns
a ruling as to whether or not enablement should actually be permitted to
begin.
All possible failures are handled internally. The cookie will only be
marked as enabled if provisional backing objects are allocated.
A later patch will introduce these to NFS. Cookie enablement during nfs_open()
is then contingent on i_writecount <= 0. can_enable() checks for a race
between open(O_RDONLY) and open(O_WRONLY/O_RDWR). This simplifies NFS's cookie
handling and allows us to get rid of open(O_RDONLY) accidentally introducing
caching to an inode that's open for writing already.
One operation has its API modified:
(3) Acquire a cookie.
struct fscache_cookie *fscache_acquire_cookie(
struct fscache_cookie *parent,
const struct fscache_cookie_def *def,
void *netfs_data,
bool enable);
This now has an additional argument that indicates whether the requested
cookie should be enabled by default. It doesn't need the can_enable()
function because the caller must prevent multiple calls for the same netfs
object and it doesn't need to take the enablement lock because no one else
can get at the cookie before this returns.
Signed-off-by: David Howells <dhowells@redhat.com
11 years ago
|
|
|
volume, true);
|
|
|
|
#endif
|
|
|
|
afs_get_vlocation(vlocation);
|
|
|
|
volume->vlocation = vlocation;
|
|
|
|
|
|
|
|
vlocation->vols[volume->type] = volume;
|
|
|
|
|
|
|
|
success:
|
|
|
|
_debug("kAFS selected %s volume %08x",
|
|
|
|
afs_voltypes[volume->type], volume->vid);
|
|
|
|
up_write(¶ms->cell->vl_sem);
|
|
|
|
afs_put_vlocation(vlocation);
|
|
|
|
_leave(" = %p", volume);
|
|
|
|
return volume;
|
|
|
|
|
|
|
|
/* clean up */
|
|
|
|
error_up:
|
|
|
|
up_write(¶ms->cell->vl_sem);
|
|
|
|
error:
|
|
|
|
afs_put_vlocation(vlocation);
|
|
|
|
_leave(" = %d", ret);
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
|
|
|
|
error_discard:
|
|
|
|
up_write(¶ms->cell->vl_sem);
|
|
|
|
|
|
|
|
for (loop = volume->nservers - 1; loop >= 0; loop--)
|
|
|
|
afs_put_server(volume->servers[loop]);
|
|
|
|
|
|
|
|
kfree(volume);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* destroy a volume record
|
|
|
|
*/
|
|
|
|
void afs_put_volume(struct afs_volume *volume)
|
|
|
|
{
|
|
|
|
struct afs_vlocation *vlocation;
|
|
|
|
int loop;
|
|
|
|
|
|
|
|
if (!volume)
|
|
|
|
return;
|
|
|
|
|
|
|
|
_enter("%p", volume);
|
|
|
|
|
|
|
|
ASSERTCMP(atomic_read(&volume->usage), >, 0);
|
|
|
|
|
|
|
|
vlocation = volume->vlocation;
|
|
|
|
|
|
|
|
/* to prevent a race, the decrement and the dequeue must be effectively
|
|
|
|
* atomic */
|
|
|
|
down_write(&vlocation->cell->vl_sem);
|
|
|
|
|
|
|
|
if (likely(!atomic_dec_and_test(&volume->usage))) {
|
|
|
|
up_write(&vlocation->cell->vl_sem);
|
|
|
|
_leave("");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
vlocation->vols[volume->type] = NULL;
|
|
|
|
|
|
|
|
up_write(&vlocation->cell->vl_sem);
|
|
|
|
|
|
|
|
/* finish cleaning up the volume */
|
|
|
|
#ifdef CONFIG_AFS_FSCACHE
|
|
|
|
fscache_relinquish_cookie(volume->cache, 0);
|
|
|
|
#endif
|
|
|
|
afs_put_vlocation(vlocation);
|
|
|
|
|
|
|
|
for (loop = volume->nservers - 1; loop >= 0; loop--)
|
|
|
|
afs_put_server(volume->servers[loop]);
|
|
|
|
|
|
|
|
kfree(volume);
|
|
|
|
|
|
|
|
_leave(" [destroyed]");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pick a server to use to try accessing this volume
|
|
|
|
* - returns with an elevated usage count on the server chosen
|
|
|
|
*/
|
|
|
|
struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
|
|
|
|
{
|
|
|
|
struct afs_volume *volume = vnode->volume;
|
|
|
|
struct afs_server *server;
|
|
|
|
int ret, state, loop;
|
|
|
|
|
|
|
|
_enter("%s", volume->vlocation->vldb.name);
|
|
|
|
|
|
|
|
/* stick with the server we're already using if we can */
|
|
|
|
if (vnode->server && vnode->server->fs_state == 0) {
|
|
|
|
afs_get_server(vnode->server);
|
|
|
|
_leave(" = %p [current]", vnode->server);
|
|
|
|
return vnode->server;
|
|
|
|
}
|
|
|
|
|
|
|
|
down_read(&volume->server_sem);
|
|
|
|
|
|
|
|
/* handle the no-server case */
|
|
|
|
if (volume->nservers == 0) {
|
|
|
|
ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
|
|
|
|
up_read(&volume->server_sem);
|
|
|
|
_leave(" = %d [no servers]", ret);
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* basically, just search the list for the first live server and use
|
|
|
|
* that */
|
|
|
|
ret = 0;
|
|
|
|
for (loop = 0; loop < volume->nservers; loop++) {
|
|
|
|
server = volume->servers[loop];
|
|
|
|
state = server->fs_state;
|
|
|
|
|
|
|
|
_debug("consider %d [%d]", loop, state);
|
|
|
|
|
|
|
|
switch (state) {
|
|
|
|
/* found an apparently healthy server */
|
|
|
|
case 0:
|
|
|
|
afs_get_server(server);
|
|
|
|
up_read(&volume->server_sem);
|
|
|
|
_leave(" = %p (picked %08x)",
|
|
|
|
server, ntohl(server->addr.s_addr));
|
|
|
|
return server;
|
|
|
|
|
|
|
|
case -ENETUNREACH:
|
|
|
|
if (ret == 0)
|
|
|
|
ret = state;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case -EHOSTUNREACH:
|
|
|
|
if (ret == 0 ||
|
|
|
|
ret == -ENETUNREACH)
|
|
|
|
ret = state;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case -ECONNREFUSED:
|
|
|
|
if (ret == 0 ||
|
|
|
|
ret == -ENETUNREACH ||
|
|
|
|
ret == -EHOSTUNREACH)
|
|
|
|
ret = state;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
case -EREMOTEIO:
|
|
|
|
if (ret == 0 ||
|
|
|
|
ret == -ENETUNREACH ||
|
|
|
|
ret == -EHOSTUNREACH ||
|
|
|
|
ret == -ECONNREFUSED)
|
|
|
|
ret = state;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* no available servers
|
|
|
|
* - TODO: handle the no active servers case better
|
|
|
|
*/
|
|
|
|
up_read(&volume->server_sem);
|
|
|
|
_leave(" = %d", ret);
|
|
|
|
return ERR_PTR(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* release a server after use
|
|
|
|
* - releases the ref on the server struct that was acquired by picking
|
|
|
|
* - records result of using a particular server to access a volume
|
|
|
|
* - return 0 to try again, 1 if okay or to issue error
|
|
|
|
* - the caller must release the server struct if result was 0
|
|
|
|
*/
|
|
|
|
int afs_volume_release_fileserver(struct afs_vnode *vnode,
|
|
|
|
struct afs_server *server,
|
|
|
|
int result)
|
|
|
|
{
|
|
|
|
struct afs_volume *volume = vnode->volume;
|
|
|
|
unsigned loop;
|
|
|
|
|
|
|
|
_enter("%s,%08x,%d",
|
|
|
|
volume->vlocation->vldb.name, ntohl(server->addr.s_addr),
|
|
|
|
result);
|
|
|
|
|
|
|
|
switch (result) {
|
|
|
|
/* success */
|
|
|
|
case 0:
|
|
|
|
server->fs_act_jif = jiffies;
|
|
|
|
server->fs_state = 0;
|
|
|
|
_leave("");
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
/* the fileserver denied all knowledge of the volume */
|
|
|
|
case -ENOMEDIUM:
|
|
|
|
server->fs_act_jif = jiffies;
|
|
|
|
down_write(&volume->server_sem);
|
|
|
|
|
|
|
|
/* firstly, find where the server is in the active list (if it
|
|
|
|
* is) */
|
|
|
|
for (loop = 0; loop < volume->nservers; loop++)
|
|
|
|
if (volume->servers[loop] == server)
|
|
|
|
goto present;
|
|
|
|
|
|
|
|
/* no longer there - may have been discarded by another op */
|
|
|
|
goto try_next_server_upw;
|
|
|
|
|
|
|
|
present:
|
|
|
|
volume->nservers--;
|
|
|
|
memmove(&volume->servers[loop],
|
|
|
|
&volume->servers[loop + 1],
|
|
|
|
sizeof(volume->servers[loop]) *
|
|
|
|
(volume->nservers - loop));
|
|
|
|
volume->servers[volume->nservers] = NULL;
|
|
|
|
afs_put_server(server);
|
|
|
|
volume->rjservers++;
|
|
|
|
|
|
|
|
if (volume->nservers > 0)
|
|
|
|
/* another server might acknowledge its existence */
|
|
|
|
goto try_next_server_upw;
|
|
|
|
|
|
|
|
/* handle the case where all the fileservers have rejected the
|
|
|
|
* volume
|
|
|
|
* - TODO: try asking the fileservers for volume information
|
|
|
|
* - TODO: contact the VL server again to see if the volume is
|
|
|
|
* no longer registered
|
|
|
|
*/
|
|
|
|
up_write(&volume->server_sem);
|
|
|
|
afs_put_server(server);
|
|
|
|
_leave(" [completely rejected]");
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
/* problem reaching the server */
|
|
|
|
case -ENETUNREACH:
|
|
|
|
case -EHOSTUNREACH:
|
|
|
|
case -ECONNREFUSED:
|
|
|
|
case -ETIME:
|
|
|
|
case -ETIMEDOUT:
|
|
|
|
case -EREMOTEIO:
|
|
|
|
/* mark the server as dead
|
|
|
|
* TODO: vary dead timeout depending on error
|
|
|
|
*/
|
|
|
|
spin_lock(&server->fs_lock);
|
|
|
|
if (!server->fs_state) {
|
|
|
|
server->fs_dead_jif = jiffies + HZ * 10;
|
|
|
|
server->fs_state = result;
|
|
|
|
printk("kAFS: SERVER DEAD state=%d\n", result);
|
|
|
|
}
|
|
|
|
spin_unlock(&server->fs_lock);
|
|
|
|
goto try_next_server;
|
|
|
|
|
|
|
|
/* miscellaneous error */
|
|
|
|
default:
|
|
|
|
server->fs_act_jif = jiffies;
|
|
|
|
case -ENOMEM:
|
|
|
|
case -ENONET:
|
|
|
|
/* tell the caller to accept the result */
|
|
|
|
afs_put_server(server);
|
|
|
|
_leave(" [local failure]");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* tell the caller to loop around and try the next server */
|
|
|
|
try_next_server_upw:
|
|
|
|
up_write(&volume->server_sem);
|
|
|
|
try_next_server:
|
|
|
|
afs_put_server(server);
|
|
|
|
_leave(" [try next server]");
|
|
|
|
return 0;
|
|
|
|
}
|