ANDROID: Incremental fs: make remount log buffer change atomic

Read log buffer can have multiple threads doing any of these
operations simultaneously:
- Polling for changes
- Reading log records
- Adding new log records
- Updating log buffer size, or enabling/disabling it completely

As we don't control the userspace, and it turns out that they
all currently originate from different processes, code needs to
be safe against parallel access to a read buffer and a request
for reallocating it.

This CL add an r/w spinlock to protect the buffer and its size.
Each remount takes the write lock, while everything else takes
a read lock. Remount makes sure it doesn't take too long by
preallocating and precalculating all updates, while other
operations don't care much about their critical section size -
they all can still run together.

Bug: 152633648
Test: manual remount + reading
Signed-off-by: Yurii Zubrytskyi <zyy@google.com>
Signed-off-by: Paul Lawrence <paullawrence@google.com>
Change-Id: I7271b4cb89f1ae2cbee6e5b073758f344c4ba66a
tirimbino
Yurii Zubrytskyi 5 years ago committed by Paul Lawrence
parent 467d1f63e3
commit e158e207c2
  1. 145
      fs/incfs/data_mgmt.c
  2. 23
      fs/incfs/data_mgmt.h
  3. 17
      fs/incfs/vfs.c

@ -34,7 +34,8 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb,
mutex_init(&mi->mi_pending_reads_mutex);
init_waitqueue_head(&mi->mi_pending_reads_notif_wq);
init_waitqueue_head(&mi->mi_log.ml_notif_wq);
spin_lock_init(&mi->mi_log.rl_writer_lock);
rwlock_init(&mi->mi_log.rl_access_lock);
spin_lock_init(&mi->mi_log.rl_logging_lock);
INIT_LIST_HEAD(&mi->mi_reads_list_head);
error = incfs_realloc_mount_info(mi, options);
@ -51,20 +52,38 @@ err:
int incfs_realloc_mount_info(struct mount_info *mi,
struct mount_options *options)
{
kfree(mi->mi_log.rl_ring_buf);
mi->mi_log.rl_ring_buf = NULL;
mi->mi_log.rl_size = 0;
void *new_buffer = NULL;
size_t new_buffer_size = 0;
mi->mi_options = *options;
if (options->read_log_pages != 0) {
size_t buf_size = PAGE_SIZE * options->read_log_pages;
if (options->read_log_pages != mi->mi_options.read_log_pages) {
struct read_log_state log_state;
/*
* Even though having two buffers allocated at once isn't
* usually good, allocating a multipage buffer under a spinlock
* is even worse, so let's optimize for the shorter lock
* duration. It's not end of the world if we fail to increase
* the buffer size anyway.
*/
if (options->read_log_pages > 0) {
new_buffer_size = PAGE_SIZE * options->read_log_pages;
new_buffer = kzalloc(new_buffer_size, GFP_NOFS);
if (!new_buffer)
return -ENOMEM;
}
mi->mi_log.rl_size = buf_size / sizeof(*mi->mi_log.rl_ring_buf);
mi->mi_log.rl_ring_buf = kzalloc(buf_size, GFP_NOFS);
if (!mi->mi_log.rl_ring_buf)
return -ENOMEM;
write_lock(&mi->mi_log.rl_access_lock);
kfree(mi->mi_log.rl_ring_buf);
WRITE_ONCE(mi->mi_log.rl_ring_buf, new_buffer);
WRITE_ONCE(mi->mi_log.rl_size,
new_buffer_size / sizeof(*mi->mi_log.rl_ring_buf));
log_state = READ_ONCE(mi->mi_log.rl_state);
log_state.generation_id++;
log_state.next_index = log_state.current_pass_no = 0;
WRITE_ONCE(mi->mi_log.rl_state, log_state);
write_unlock(&mi->mi_log.rl_access_lock);
}
mi->mi_options = *options;
return 0;
}
@ -233,6 +252,7 @@ static void log_block_read(struct mount_info *mi, incfs_uuid_t *id,
struct read_log *log = &mi->mi_log;
struct read_log_state state;
s64 now_us = ktime_to_us(ktime_get());
int rl_size;
struct read_log_record record = {
.file_id = *id,
.block_index = block_index,
@ -240,20 +260,23 @@ static void log_block_read(struct mount_info *mi, incfs_uuid_t *id,
.timestamp_us = now_us
};
if (log->rl_size == 0)
return;
spin_lock(&log->rl_writer_lock);
state = READ_ONCE(log->rl_state);
log->rl_ring_buf[state.next_index] = record;
if (++state.next_index == log->rl_size) {
state.next_index = 0;
++state.current_pass_no;
read_lock(&log->rl_access_lock);
rl_size = READ_ONCE(log->rl_size);
if (rl_size != 0) {
spin_lock(&log->rl_logging_lock);
state = READ_ONCE(log->rl_state);
log->rl_ring_buf[state.next_index] = record;
if (++state.next_index == rl_size) {
state.next_index = 0;
++state.current_pass_no;
}
WRITE_ONCE(log->rl_state, state);
spin_unlock(&log->rl_logging_lock);
}
WRITE_ONCE(log->rl_state, state);
spin_unlock(&log->rl_writer_lock);
read_unlock(&log->rl_access_lock);
wake_up_all(&log->ml_notif_wq);
if (rl_size != 0)
wake_up_all(&log->ml_notif_wq);
}
static int validate_hash_tree(struct file *bf, struct data_file *df,
@ -1171,9 +1194,11 @@ struct read_log_state incfs_get_log_state(struct mount_info *mi)
struct read_log *log = &mi->mi_log;
struct read_log_state result;
spin_lock(&log->rl_writer_lock);
read_lock(&log->rl_access_lock);
spin_lock(&log->rl_logging_lock);
result = READ_ONCE(log->rl_state);
spin_unlock(&log->rl_writer_lock);
spin_unlock(&log->rl_logging_lock);
read_unlock(&log->rl_access_lock);
return result;
}
@ -1186,10 +1211,21 @@ int incfs_get_uncollected_logs_count(struct mount_info *mi,
struct read_log_state state)
{
struct read_log *log = &mi->mi_log;
u64 count = calc_record_count(&log->rl_state, log->rl_size) -
calc_record_count(&state, log->rl_size);
return min_t(int, count, log->rl_size);
struct read_log_state rl_state;
int rl_size;
u64 count;
read_lock(&log->rl_access_lock);
rl_size = READ_ONCE(log->rl_size);
spin_lock(&log->rl_logging_lock);
rl_state = READ_ONCE(log->rl_state);
spin_unlock(&log->rl_logging_lock);
read_unlock(&log->rl_access_lock);
count = calc_record_count(&rl_state, rl_size);
if (rl_state.generation_id == state.generation_id)
count -= calc_record_count(&state, rl_size);
return min_t(int, count, rl_size);
}
static void fill_pending_read_from_log_record(
@ -1209,17 +1245,35 @@ int incfs_collect_logged_reads(struct mount_info *mi,
int reads_size)
{
struct read_log *log = &mi->mi_log;
struct read_log_state live_state = incfs_get_log_state(mi);
u64 read_count = calc_record_count(reader_state, log->rl_size);
u64 written_count = calc_record_count(&live_state, log->rl_size);
struct read_log_state live_state;
int dst_idx;
int rl_size;
int result = 0;
u64 read_count;
u64 written_count;
if (reader_state->next_index >= log->rl_size ||
read_count > written_count)
return -ERANGE;
read_lock(&log->rl_access_lock);
if (read_count == written_count)
return 0;
rl_size = READ_ONCE(log->rl_size);
spin_lock(&log->rl_logging_lock);
live_state = READ_ONCE(log->rl_state);
spin_unlock(&log->rl_logging_lock);
if (reader_state->generation_id != live_state.generation_id) {
reader_state->generation_id = live_state.generation_id;
reader_state->current_pass_no = reader_state->next_index = 0;
}
read_count = calc_record_count(reader_state, rl_size);
written_count = calc_record_count(&live_state, rl_size);
if (read_count == written_count) {
result = 0;
goto out;
}
if (reader_state->next_index >= rl_size) {
result = -ERANGE;
goto out;
}
if (read_count > written_count) {
/* This reader is somehow ahead of the writer. */
@ -1227,16 +1281,17 @@ int incfs_collect_logged_reads(struct mount_info *mi,
*reader_state = live_state;
}
if (written_count - read_count > log->rl_size) {
if (written_count - read_count > rl_size) {
/*
* Reading pointer is too far behind,
* start from the record following the write pointer.
*/
pr_debug("incfs: read pointer is behind, moving: %u/%u -> %u/%u / %u\n",
pr_debug(
"incfs: read pointer is behind, moving: %u/%u -> %u/%u / %u\n",
(u32)reader_state->next_index,
(u32)reader_state->current_pass_no,
(u32)live_state.next_index,
(u32)live_state.current_pass_no - 1, (u32)log->rl_size);
(u32)live_state.current_pass_no - 1, (u32)rl_size);
*reader_state = (struct read_log_state){
.next_index = live_state.next_index,
@ -1252,15 +1307,19 @@ int incfs_collect_logged_reads(struct mount_info *mi,
fill_pending_read_from_log_record(
&reads[dst_idx],
&log->rl_ring_buf[reader_state->next_index],
reader_state, log->rl_size);
reader_state, rl_size);
reader_state->next_index++;
if (reader_state->next_index == log->rl_size) {
if (reader_state->next_index == rl_size) {
reader_state->next_index = 0;
reader_state->current_pass_no++;
}
}
return dst_idx;
result = dst_idx;
out:
read_unlock(&log->rl_access_lock);
return result;
}
bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs)

@ -31,10 +31,13 @@ struct read_log_record {
} __packed;
struct read_log_state {
/* Next slot in rl_ring_buf to write to. */
u32 next_index;
/* Log buffer generation id, incremented on configuration changes */
u32 generation_id : 8;
/* Current number of writer pass over rl_ring_buf */
/* Next slot in rl_ring_buf to write into. */
u32 next_index : 24;
/* Current number of writer passes over rl_ring_buf */
u32 current_pass_no;
};
@ -42,11 +45,21 @@ struct read_log_state {
struct read_log {
struct read_log_record *rl_ring_buf;
int rl_size;
struct read_log_state rl_state;
spinlock_t rl_writer_lock;
/*
* A lock for _all_ accesses to the struct, to protect against remounts.
* Taken for writing when resizing the buffer.
*/
rwlock_t rl_access_lock;
int rl_size;
/*
* A lock to protect the actual logging - adding a new record.
* Note: ALWAYS taken after and under the |rl_access_lock|.
*/
spinlock_t rl_logging_lock;
/*
* A queue of waiters who want to be notified about reads.

@ -584,22 +584,27 @@ static ssize_t log_read(struct file *f, char __user *buf, size_t len,
{
struct log_file_state *log_state = f->private_data;
struct mount_info *mi = get_mount_info(file_superblock(f));
struct incfs_pending_read_info *reads_buf =
(struct incfs_pending_read_info *)__get_free_page(GFP_NOFS);
size_t reads_to_collect = len / sizeof(*reads_buf);
size_t reads_per_page = PAGE_SIZE / sizeof(*reads_buf);
int total_reads_collected = 0;
int rl_size;
ssize_t result = 0;
struct incfs_pending_read_info *reads_buf;
ssize_t reads_to_collect = len / sizeof(*reads_buf);
ssize_t reads_per_page = PAGE_SIZE / sizeof(*reads_buf);
rl_size = READ_ONCE(mi->mi_log.rl_size);
if (rl_size == 0)
return 0;
reads_buf = (struct incfs_pending_read_info *)__get_free_page(GFP_NOFS);
if (!reads_buf)
return -ENOMEM;
reads_to_collect = min_t(size_t, mi->mi_log.rl_size, reads_to_collect);
reads_to_collect = min_t(ssize_t, rl_size, reads_to_collect);
while (reads_to_collect > 0) {
struct read_log_state next_state = READ_ONCE(log_state->state);
int reads_collected = incfs_collect_logged_reads(
mi, &next_state, reads_buf,
min_t(size_t, reads_to_collect, reads_per_page));
min_t(ssize_t, reads_to_collect, reads_per_page));
if (reads_collected <= 0) {
result = total_reads_collected ?
total_reads_collected *

Loading…
Cancel
Save