ANDROID: Incremental fs: Use simple compression in log buffer

Bug: 154342202
Test: incfs_test passes
Signed-off-by: Paul Lawrence <paullawrence@google.com>
Change-Id: Ibcc641dd92596018c9f10b5bc7bd0db2642a80c7
(cherry picked from commit b6b4a3a404ccd9c62347e27c4fc7883d776c2cbb)
Git-commit: fdd560ec46
Git-repo: https://android.googlesource.com/kernel/common/
Signed-off-by: Sayali Lokhande <sayalil@codeaurora.org>
tirimbino
Paul Lawrence 5 years ago committed by Blagovest Kolenichev
parent 8e0360b285
commit 4bdcf62a54
  1. 310
      fs/incfs/data_mgmt.c
  2. 76
      fs/incfs/data_mgmt.h
  3. 2
      fs/incfs/vfs.c
  4. 89
      tools/testing/selftests/filesystems/incfs/incfs_test.c

@ -34,8 +34,7 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb,
mutex_init(&mi->mi_pending_reads_mutex);
init_waitqueue_head(&mi->mi_pending_reads_notif_wq);
init_waitqueue_head(&mi->mi_log.ml_notif_wq);
rwlock_init(&mi->mi_log.rl_access_lock);
spin_lock_init(&mi->mi_log.rl_logging_lock);
spin_lock_init(&mi->mi_log.rl_lock);
INIT_LIST_HEAD(&mi->mi_reads_list_head);
error = incfs_realloc_mount_info(mi, options);
@ -53,6 +52,7 @@ int incfs_realloc_mount_info(struct mount_info *mi,
struct mount_options *options)
{
void *new_buffer = NULL;
void *old_buffer;
size_t new_buffer_size = 0;
if (options->read_log_pages != mi->mi_options.read_log_pages) {
@ -71,16 +71,18 @@ int incfs_realloc_mount_info(struct mount_info *mi,
return -ENOMEM;
}
write_lock(&mi->mi_log.rl_access_lock);
kfree(mi->mi_log.rl_ring_buf);
WRITE_ONCE(mi->mi_log.rl_ring_buf, new_buffer);
WRITE_ONCE(mi->mi_log.rl_size,
new_buffer_size / sizeof(*mi->mi_log.rl_ring_buf));
log_state = READ_ONCE(mi->mi_log.rl_state);
log_state.generation_id++;
log_state.next_index = log_state.current_pass_no = 0;
WRITE_ONCE(mi->mi_log.rl_state, log_state);
write_unlock(&mi->mi_log.rl_access_lock);
spin_lock(&mi->mi_log.rl_lock);
old_buffer = mi->mi_log.rl_ring_buf;
mi->mi_log.rl_ring_buf = new_buffer;
mi->mi_log.rl_size = new_buffer_size;
log_state = (struct read_log_state){
.generation_id = mi->mi_log.rl_head.generation_id + 1,
};
mi->mi_log.rl_head = log_state;
mi->mi_log.rl_tail = log_state;
spin_unlock(&mi->mi_log.rl_lock);
kfree(old_buffer);
}
mi->mi_options = *options;
@ -246,37 +248,121 @@ static ssize_t decompress(struct mem_range src, struct mem_range dst)
return result;
}
static void log_read_one_record(struct read_log *rl, struct read_log_state *rs)
{
union log_record *record =
(union log_record *)((u8 *)rl->rl_ring_buf + rs->next_offset);
size_t record_size;
switch (record->full_record.type) {
case FULL:
rs->base_record = record->full_record;
record_size = sizeof(record->full_record);
break;
case SAME_FILE:
rs->base_record.block_index =
record->same_file_record.block_index;
rs->base_record.absolute_ts_us +=
record->same_file_record.relative_ts_us;
record_size = sizeof(record->same_file_record);
break;
case SAME_FILE_NEXT_BLOCK:
++rs->base_record.block_index;
rs->base_record.absolute_ts_us +=
record->same_file_next_block.relative_ts_us;
record_size = sizeof(record->same_file_next_block);
break;
case SAME_FILE_NEXT_BLOCK_SHORT:
++rs->base_record.block_index;
rs->base_record.absolute_ts_us +=
record->same_file_next_block_short.relative_ts_us;
record_size = sizeof(record->same_file_next_block_short);
break;
}
rs->next_offset += record_size;
if (rs->next_offset > rl->rl_size - sizeof(*record)) {
rs->next_offset = 0;
++rs->current_pass_no;
}
++rs->current_record_no;
}
static void log_block_read(struct mount_info *mi, incfs_uuid_t *id,
int block_index, bool timed_out)
int block_index)
{
struct read_log *log = &mi->mi_log;
struct read_log_state state;
struct read_log_state *head, *tail;
s64 now_us = ktime_to_us(ktime_get());
int rl_size;
struct read_log_record record = {
.file_id = *id,
.block_index = block_index,
.timed_out = timed_out,
.timestamp_us = now_us
};
read_lock(&log->rl_access_lock);
rl_size = READ_ONCE(log->rl_size);
if (rl_size != 0) {
spin_lock(&log->rl_logging_lock);
state = READ_ONCE(log->rl_state);
log->rl_ring_buf[state.next_index] = record;
if (++state.next_index == rl_size) {
state.next_index = 0;
++state.current_pass_no;
}
WRITE_ONCE(log->rl_state, state);
spin_unlock(&log->rl_logging_lock);
s64 relative_us;
union log_record record;
size_t record_size;
spin_lock(&log->rl_lock);
if (log->rl_size == 0) {
spin_unlock(&log->rl_lock);
return;
}
read_unlock(&log->rl_access_lock);
if (rl_size != 0)
wake_up_all(&log->ml_notif_wq);
head = &log->rl_head;
tail = &log->rl_tail;
relative_us = now_us - head->base_record.absolute_ts_us;
if (memcmp(id, &head->base_record.file_id, sizeof(incfs_uuid_t)) ||
relative_us >= 1ll << 32) {
record.full_record = (struct full_record){
.type = FULL,
.block_index = block_index,
.file_id = *id,
.absolute_ts_us = now_us,
};
record_size = sizeof(struct full_record);
} else if (block_index != head->base_record.block_index + 1 ||
relative_us >= 1 << 30) {
record.same_file_record = (struct same_file_record){
.type = SAME_FILE,
.block_index = block_index,
.relative_ts_us = relative_us,
};
record_size = sizeof(struct same_file_record);
} else if (relative_us >= 1 << 14) {
record.same_file_next_block = (struct same_file_next_block){
.type = SAME_FILE_NEXT_BLOCK,
.relative_ts_us = relative_us,
};
record_size = sizeof(struct same_file_next_block);
} else {
record.same_file_next_block_short =
(struct same_file_next_block_short){
.type = SAME_FILE_NEXT_BLOCK_SHORT,
.relative_ts_us = relative_us,
};
record_size = sizeof(struct same_file_next_block_short);
}
head->base_record.file_id = *id;
head->base_record.block_index = block_index;
head->base_record.absolute_ts_us = now_us;
/* Advance tail beyond area we are going to overwrite */
while (tail->current_pass_no < head->current_pass_no &&
tail->next_offset < head->next_offset + record_size)
log_read_one_record(log, tail);
memcpy(((u8 *)log->rl_ring_buf) + head->next_offset, &record,
record_size);
head->next_offset += record_size;
if (head->next_offset > log->rl_size - sizeof(record)) {
head->next_offset = 0;
++head->current_pass_no;
}
++head->current_record_no;
spin_unlock(&log->rl_lock);
wake_up_all(&log->ml_notif_wq);
}
static int validate_hash_tree(struct file *bf, struct data_file *df,
@ -708,8 +794,7 @@ static int wait_for_data_block(struct data_file *df, int block_index,
mi = df->df_mount_info;
if (timeout_ms == 0) {
log_block_read(mi, &df->df_id, block_index,
true /*timed out*/);
log_block_read(mi, &df->df_id, block_index);
return -ETIME;
}
@ -728,8 +813,7 @@ static int wait_for_data_block(struct data_file *df, int block_index,
if (wait_res == 0) {
/* Wait has timed out */
log_block_read(mi, &df->df_id, block_index,
true /*timed out*/);
log_block_read(mi, &df->df_id, block_index);
return -ETIME;
}
if (wait_res < 0) {
@ -825,7 +909,7 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df,
}
if (result >= 0)
log_block_read(mi, &df->df_id, index, false /*timed out*/);
log_block_read(mi, &df->df_id, index);
out:
return result;
@ -1195,49 +1279,29 @@ struct read_log_state incfs_get_log_state(struct mount_info *mi)
struct read_log *log = &mi->mi_log;
struct read_log_state result;
read_lock(&log->rl_access_lock);
spin_lock(&log->rl_logging_lock);
result = READ_ONCE(log->rl_state);
spin_unlock(&log->rl_logging_lock);
read_unlock(&log->rl_access_lock);
spin_lock(&log->rl_lock);
result = log->rl_head;
spin_unlock(&log->rl_lock);
return result;
}
static u64 calc_record_count(const struct read_log_state *state, int rl_size)
{
return state->current_pass_no * (u64)rl_size + state->next_index;
}
int incfs_get_uncollected_logs_count(struct mount_info *mi,
struct read_log_state state)
const struct read_log_state *state)
{
struct read_log *log = &mi->mi_log;
struct read_log_state rl_state;
int rl_size;
u64 count;
read_lock(&log->rl_access_lock);
rl_size = READ_ONCE(log->rl_size);
spin_lock(&log->rl_logging_lock);
rl_state = READ_ONCE(log->rl_state);
spin_unlock(&log->rl_logging_lock);
read_unlock(&log->rl_access_lock);
count = calc_record_count(&rl_state, rl_size);
if (rl_state.generation_id == state.generation_id)
count -= calc_record_count(&state, rl_size);
return min_t(int, count, rl_size);
}
static void fill_pending_read_from_log_record(
struct incfs_pending_read_info *dest, const struct read_log_record *src,
struct read_log_state *state, u64 log_size)
{
dest->file_id = src->file_id;
dest->block_index = src->block_index;
dest->serial_number =
state->current_pass_no * log_size + state->next_index;
dest->timestamp_us = src->timestamp_us;
u32 generation;
u64 head_no, tail_no;
spin_lock(&log->rl_lock);
tail_no = log->rl_tail.current_record_no;
head_no = log->rl_head.current_record_no;
generation = log->rl_head.generation_id;
spin_unlock(&log->rl_lock);
if (generation != state->generation_id)
return head_no - tail_no;
else
return head_no - max_t(u64, tail_no, state->current_record_no);
}
int incfs_collect_logged_reads(struct mount_info *mi,
@ -1245,82 +1309,48 @@ int incfs_collect_logged_reads(struct mount_info *mi,
struct incfs_pending_read_info *reads,
int reads_size)
{
struct read_log *log = &mi->mi_log;
struct read_log_state live_state;
int dst_idx;
int rl_size;
int result = 0;
u64 read_count;
u64 written_count;
read_lock(&log->rl_access_lock);
rl_size = READ_ONCE(log->rl_size);
spin_lock(&log->rl_logging_lock);
live_state = READ_ONCE(log->rl_state);
spin_unlock(&log->rl_logging_lock);
struct read_log *log = &mi->mi_log;
struct read_log_state *head, *tail;
if (reader_state->generation_id != live_state.generation_id) {
reader_state->generation_id = live_state.generation_id;
reader_state->current_pass_no = reader_state->next_index = 0;
}
spin_lock(&log->rl_lock);
head = &log->rl_head;
tail = &log->rl_tail;
read_count = calc_record_count(reader_state, rl_size);
written_count = calc_record_count(&live_state, rl_size);
if (read_count == written_count) {
result = 0;
goto out;
}
if (reader_state->next_index >= rl_size) {
result = -ERANGE;
goto out;
}
if (reader_state->generation_id != head->generation_id) {
pr_debug("read ptr is wrong generation: %u/%u",
reader_state->generation_id, head->generation_id);
if (read_count > written_count) {
/* This reader is somehow ahead of the writer. */
pr_debug("incfs: Log reader is ahead of writer\n");
*reader_state = live_state;
*reader_state = (struct read_log_state){
.generation_id = head->generation_id,
};
}
if (written_count - read_count > rl_size) {
/*
* Reading pointer is too far behind,
* start from the record following the write pointer.
*/
pr_debug(
"incfs: read pointer is behind, moving: %u/%u -> %u/%u / %u\n",
(u32)reader_state->next_index,
(u32)reader_state->current_pass_no,
(u32)live_state.next_index,
(u32)live_state.current_pass_no - 1, (u32)rl_size);
if (reader_state->current_record_no < tail->current_record_no) {
pr_debug("read ptr is behind, moving: %u/%u -> %u/%u\n",
(u32)reader_state->next_offset,
(u32)reader_state->current_pass_no,
(u32)tail->next_offset, (u32)tail->current_pass_no);
*reader_state = (struct read_log_state){
.next_index = live_state.next_index,
.current_pass_no = live_state.current_pass_no - 1,
};
*reader_state = *tail;
}
for (dst_idx = 0; dst_idx < reads_size; dst_idx++) {
if (reader_state->next_index == live_state.next_index &&
reader_state->current_pass_no == live_state.current_pass_no)
if (reader_state->current_record_no == head->current_record_no)
break;
fill_pending_read_from_log_record(
&reads[dst_idx],
&log->rl_ring_buf[reader_state->next_index],
reader_state, rl_size);
log_read_one_record(log, reader_state);
reader_state->next_index++;
if (reader_state->next_index == rl_size) {
reader_state->next_index = 0;
reader_state->current_pass_no++;
}
reads[dst_idx] = (struct incfs_pending_read_info){
.file_id = reader_state->base_record.file_id,
.block_index = reader_state->base_record.block_index,
.serial_number = reader_state->current_record_no,
.timestamp_us = reader_state->base_record.absolute_ts_us
};
}
result = dst_idx;
out:
read_unlock(&log->rl_access_lock);
return result;
spin_unlock(&log->rl_lock);
return dst_idx;
}
bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs)

@ -20,50 +20,74 @@
#define SEGMENTS_PER_FILE 3
struct read_log_record {
u32 block_index : 31;
u32 timed_out : 1;
u64 timestamp_us;
enum LOG_RECORD_TYPE {
FULL,
SAME_FILE,
SAME_FILE_NEXT_BLOCK,
SAME_FILE_NEXT_BLOCK_SHORT,
};
struct full_record {
enum LOG_RECORD_TYPE type : 2; /* FULL */
u32 block_index : 30;
incfs_uuid_t file_id;
} __packed;
u64 absolute_ts_us;
} __packed; /* 28 bytes */
struct same_file_record {
enum LOG_RECORD_TYPE type : 2; /* SAME_FILE */
u32 block_index : 30;
u32 relative_ts_us; /* max 2^32 us ~= 1 hour (1:11:30) */
} __packed; /* 12 bytes */
struct same_file_next_block {
enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK */
u32 relative_ts_us : 30; /* max 2^30 us ~= 15 min (17:50) */
} __packed; /* 4 bytes */
struct same_file_next_block_short {
enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK_SHORT */
u16 relative_ts_us : 14; /* max 2^14 us ~= 16 ms */
} __packed; /* 2 bytes */
union log_record {
struct full_record full_record;
struct same_file_record same_file_record;
struct same_file_next_block same_file_next_block;
struct same_file_next_block_short same_file_next_block_short;
};
struct read_log_state {
/* Log buffer generation id, incremented on configuration changes */
u32 generation_id : 8;
u32 generation_id;
/* Next slot in rl_ring_buf to write into. */
u32 next_index : 24;
/* Offset in rl_ring_buf to write into. */
u32 next_offset;
/* Current number of writer passes over rl_ring_buf */
u32 current_pass_no;
/* Current full_record to diff against */
struct full_record base_record;
/* Current record number counting from configuration change */
u64 current_record_no;
};
/* A ring buffer to save records about data blocks which were recently read. */
struct read_log {
struct read_log_record *rl_ring_buf;
void *rl_ring_buf;
int rl_size;
struct read_log_state rl_state;
struct read_log_state rl_head;
/*
* A lock for _all_ accesses to the struct, to protect against remounts.
* Taken for writing when resizing the buffer.
*/
rwlock_t rl_access_lock;
struct read_log_state rl_tail;
/*
* A lock to protect the actual logging - adding a new record.
* Note: ALWAYS taken after and under the |rl_access_lock|.
*/
spinlock_t rl_logging_lock;
/* A lock to protect the above fields */
spinlock_t rl_lock;
/*
* A queue of waiters who want to be notified about reads.
*/
/* A queue of waiters who want to be notified about reads */
wait_queue_head_t ml_notif_wq;
};
@ -281,7 +305,7 @@ int incfs_collect_logged_reads(struct mount_info *mi,
int reads_size);
struct read_log_state incfs_get_log_state(struct mount_info *mi);
int incfs_get_uncollected_logs_count(struct mount_info *mi,
struct read_log_state state);
const struct read_log_state *state);
static inline struct inode_info *get_incfs_node(struct inode *inode)
{

@ -643,7 +643,7 @@ static __poll_t log_poll(struct file *file, poll_table *wait)
__poll_t ret = 0;
poll_wait(file, &mi->mi_log.ml_notif_wq, wait);
count = incfs_get_uncollected_logs_count(mi, log_state->state);
count = incfs_get_uncollected_logs_count(mi, &log_state->state);
if (count >= mi->mi_options.read_log_wakeup_count)
ret = EPOLLIN | EPOLLRDNORM;

@ -1930,16 +1930,19 @@ failure:
return TEST_FAILURE;
}
enum expected_log { FULL_LOG, NO_LOG, PARTIAL_LOG };
static int validate_logs(char *mount_dir, int log_fd, struct test_file *file,
bool no_rlog)
enum expected_log expected_log)
{
uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE];
struct incfs_pending_read_info prs[100] = {};
struct incfs_pending_read_info prs[2048] = {};
int prs_size = ARRAY_SIZE(prs);
int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE;
int expected_read_block_cnt;
int res;
int read_count;
int i;
int i, j;
char *filename = concat_file_name(mount_dir, file->name);
int fd;
@ -1950,17 +1953,29 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file,
if (block_cnt > prs_size)
block_cnt = prs_size;
expected_read_block_cnt = block_cnt;
for (i = 0; i < block_cnt; i++) {
res = pread(fd, data, sizeof(data),
INCFS_DATA_FILE_BLOCK_SIZE * i);
/* Make some read logs of type SAME_FILE_NEXT_BLOCK */
if (i % 10 == 0)
usleep(20000);
/* Skip some blocks to make logs of type SAME_FILE */
if (i % 10 == 5) {
++i;
--expected_read_block_cnt;
}
if (res <= 0)
goto failure;
}
read_count =
wait_for_pending_reads(log_fd, no_rlog ? 10 : 0, prs, prs_size);
if (no_rlog) {
read_count = wait_for_pending_reads(
log_fd, expected_log == NO_LOG ? 10 : 0, prs, prs_size);
if (expected_log == NO_LOG) {
if (read_count == 0)
goto success;
if (read_count < 0)
@ -1977,14 +1992,26 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file,
goto failure;
}
if (read_count != block_cnt) {
i = 0;
if (expected_log == PARTIAL_LOG) {
if (read_count == 0) {
ksft_print_msg("No logs %s.\n", file->name);
goto failure;
}
for (i = 0, j = 0; j < expected_read_block_cnt - read_count;
i++, j++)
if (i % 10 == 5)
++i;
} else if (read_count != expected_read_block_cnt) {
ksft_print_msg("Bad log read count %s %d %d.\n", file->name,
read_count, block_cnt);
read_count, expected_read_block_cnt);
goto failure;
}
for (i = 0; i < read_count; i++) {
struct incfs_pending_read_info *read = &prs[i];
for (j = 0; j < read_count; i++, j++) {
struct incfs_pending_read_info *read = &prs[j];
if (!same_id(&read->file_id, &file->id)) {
ksft_print_msg("Bad log read ino %s\n", file->name);
@ -1997,8 +2024,8 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file,
goto failure;
}
if (i != 0) {
unsigned long psn = prs[i - 1].serial_number;
if (j != 0) {
unsigned long psn = prs[j - 1].serial_number;
if (read->serial_number != psn + 1) {
ksft_print_msg("Bad log read sn %s %d %d.\n",
@ -2013,6 +2040,9 @@ static int validate_logs(char *mount_dir, int log_fd, struct test_file *file,
file->name);
goto failure;
}
if (i % 10 == 5)
++i;
}
success:
@ -2063,7 +2093,7 @@ static int read_log_test(char *mount_dir)
for (i = 0; i < file_num; i++) {
struct test_file *file = &test.files[i];
if (validate_logs(mount_dir, log_fd, file, false))
if (validate_logs(mount_dir, log_fd, file, FULL_LOG))
goto failure;
}
@ -2091,7 +2121,7 @@ static int read_log_test(char *mount_dir)
for (i = 0; i < file_num; i++) {
struct test_file *file = &test.files[i];
if (validate_logs(mount_dir, log_fd, file, false))
if (validate_logs(mount_dir, log_fd, file, FULL_LOG))
goto failure;
}
@ -2118,7 +2148,30 @@ static int read_log_test(char *mount_dir)
for (i = 0; i < file_num; i++) {
struct test_file *file = &test.files[i];
if (validate_logs(mount_dir, log_fd, file, true))
if (validate_logs(mount_dir, log_fd, file, NO_LOG))
goto failure;
}
/*
* Remount and check that logs start working again
*/
drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC);
if (drop_caches == -1)
goto failure;
i = write(drop_caches, "3", 1);
close(drop_caches);
if (i != 1)
goto failure;
if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=1",
true) != 0)
goto failure;
/* Validate data again */
for (i = 0; i < file_num; i++) {
struct test_file *file = &test.files[i];
if (validate_logs(mount_dir, log_fd, file, PARTIAL_LOG))
goto failure;
}
@ -2141,7 +2194,7 @@ static int read_log_test(char *mount_dir)
for (i = 0; i < file_num; i++) {
struct test_file *file = &test.files[i];
if (validate_logs(mount_dir, log_fd, file, false))
if (validate_logs(mount_dir, log_fd, file, FULL_LOG))
goto failure;
}
@ -2292,8 +2345,6 @@ static int validate_ranges(const char *mount_dir, struct test_file *file)
if (fba.start_index >= block_cnt) {
if (fba.index_out != fba.start_index) {
printf("Paul: %d, %d\n", (int)fba.index_out,
(int)fba.start_index);
error = -EINVAL;
goto out;
}
@ -2645,7 +2696,7 @@ int main(int argc, char *argv[])
rmdir(mount_dir);
if (fails > 0)
ksft_exit_pass();
ksft_exit_fail();
else
ksft_exit_pass();
return 0;

Loading…
Cancel
Save