diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index f02f31cea0e6..34cb9799e324 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -29,6 +29,8 @@ struct pcpu_chunk { int end_offset; /* additional area required to have the region end page aligned */ + + int nr_pages; /* # of pages served by this chunk */ int nr_populated; /* # of populated pages */ unsigned long populated[]; /* populated bitmap */ }; diff --git a/mm/percpu.c b/mm/percpu.c index e08ed61ea70a..7c9f0d3ad1b5 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -181,19 +181,55 @@ static void pcpu_schedule_balance_work(void) schedule_work(&pcpu_balance_work); } +/** + * pcpu_addr_in_first_chunk - address check for first chunk's dynamic region + * @addr: percpu address of interest + * + * The first chunk is considered to be the dynamic region of the first chunk. + * While the true first chunk is composed of the static, dynamic, and + * reserved regions, it is the chunk that serves the dynamic region that is + * circulated in the chunk slots. + * + * The reserved chunk has a separate check and the static region addresses + * should never be passed into the percpu allocator. + * + * RETURNS: + * True if the address is in the dynamic region of the first chunk. + */ static bool pcpu_addr_in_first_chunk(void *addr) { - void *first_start = pcpu_first_chunk->base_addr; + void *start_addr = pcpu_first_chunk->base_addr + + pcpu_first_chunk->start_offset; + void *end_addr = pcpu_first_chunk->base_addr + + pcpu_first_chunk->nr_pages * PAGE_SIZE - + pcpu_first_chunk->end_offset; - return addr >= first_start && addr < first_start + pcpu_unit_size; + return addr >= start_addr && addr < end_addr; } +/** + * pcpu_addr_in_reserved_chunk - address check for reserved region + * + * The reserved region is a part of the first chunk and primarily serves + * static percpu variables from kernel modules. + * + * RETURNS: + * True if the address is in the reserved region. + */ static bool pcpu_addr_in_reserved_chunk(void *addr) { - void *first_start = pcpu_first_chunk->base_addr; + void *start_addr, *end_addr; + + if (!pcpu_reserved_chunk) + return false; - return addr >= first_start && - addr < first_start + pcpu_first_chunk->start_offset; + start_addr = pcpu_reserved_chunk->base_addr + + pcpu_reserved_chunk->start_offset; + end_addr = pcpu_reserved_chunk->base_addr + + pcpu_reserved_chunk->nr_pages * PAGE_SIZE - + pcpu_reserved_chunk->end_offset; + + return addr >= start_addr && addr < end_addr; } static int __pcpu_size_to_slot(int size) @@ -234,11 +270,16 @@ static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx) return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx; } +static unsigned long pcpu_unit_page_offset(unsigned int cpu, int page_idx) +{ + return pcpu_unit_offsets[cpu] + (page_idx << PAGE_SHIFT); +} + static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, unsigned int cpu, int page_idx) { - return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] + - (page_idx << PAGE_SHIFT); + return (unsigned long)chunk->base_addr + + pcpu_unit_page_offset(cpu, page_idx); } static void __maybe_unused pcpu_next_unpop(struct pcpu_chunk *chunk, @@ -708,23 +749,34 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme, pcpu_chunk_relocate(chunk, oslot); } -static struct pcpu_chunk * __init pcpu_alloc_first_chunk(void *base_addr, - int start_offset, +static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, int map_size, int *map, int init_map_size) { struct pcpu_chunk *chunk; - int region_size; + unsigned long aligned_addr; + int start_offset, region_size; + + /* region calculations */ + aligned_addr = tmp_addr & PAGE_MASK; + + start_offset = tmp_addr - aligned_addr; region_size = PFN_ALIGN(start_offset + map_size); + /* allocate chunk */ chunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); + INIT_LIST_HEAD(&chunk->list); INIT_LIST_HEAD(&chunk->map_extend_list); - chunk->base_addr = base_addr; + + chunk->base_addr = (void *)aligned_addr; chunk->start_offset = start_offset; chunk->end_offset = region_size - chunk->start_offset - map_size; + + chunk->nr_pages = pcpu_unit_pages; + chunk->map = map; chunk->map_alloc = init_map_size; @@ -734,10 +786,17 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(void *base_addr, chunk->nr_populated = pcpu_unit_pages; chunk->contig_hint = chunk->free_size = map_size; - chunk->map[0] = 1; - chunk->map[1] = chunk->start_offset; - chunk->map[2] = (chunk->start_offset + chunk->free_size) | 1; - chunk->map_used = 2; + + if (chunk->start_offset) { + /* hide the beginning of the bitmap */ + chunk->map[0] = 1; + chunk->map[1] = chunk->start_offset; + chunk->map_used = 1; + } + + /* set chunk's free region */ + chunk->map[++chunk->map_used] = + (chunk->start_offset + chunk->free_size) | 1; if (chunk->end_offset) { /* hide the end of the bitmap */ @@ -772,6 +831,8 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) chunk->free_size = pcpu_unit_size; chunk->contig_hint = pcpu_unit_size; + chunk->nr_pages = pcpu_unit_pages; + return chunk; } @@ -859,18 +920,21 @@ static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); * pcpu_chunk_addr_search - determine chunk containing specified address * @addr: address for which the chunk needs to be determined. * + * This is an internal function that handles all but static allocations. + * Static percpu address values should never be passed into the allocator. + * * RETURNS: * The address of the found chunk. */ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) { - /* is it in the first chunk? */ - if (pcpu_addr_in_first_chunk(addr)) { - /* is it in the reserved area? */ - if (pcpu_addr_in_reserved_chunk(addr)) - return pcpu_reserved_chunk; + /* is it in the dynamic region (first chunk)? */ + if (pcpu_addr_in_first_chunk(addr)) return pcpu_first_chunk; - } + + /* is it in the reserved region? */ + if (pcpu_addr_in_reserved_chunk(addr)) + return pcpu_reserved_chunk; /* * The address is relative to unit0 which might be unused and @@ -1401,10 +1465,16 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr) * The following test on unit_low/high isn't strictly * necessary but will speed up lookups of addresses which * aren't in the first chunk. + * + * The address check is against full chunk sizes. pcpu_base_addr + * points to the beginning of the first chunk including the + * static region. Assumes good intent as the first chunk may + * not be full (ie. < pcpu_unit_pages in size). */ - first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0); - first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu, - pcpu_unit_pages); + first_low = (unsigned long)pcpu_base_addr + + pcpu_unit_page_offset(pcpu_low_unit_cpu, 0); + first_high = (unsigned long)pcpu_base_addr + + pcpu_unit_page_offset(pcpu_high_unit_cpu, pcpu_unit_pages); if ((unsigned long)addr >= first_low && (unsigned long)addr < first_high) { for_each_possible_cpu(cpu) { @@ -1586,12 +1656,13 @@ static void pcpu_dump_alloc_info(const char *lvl, * The caller should have mapped the first chunk at @base_addr and * copied static data to each unit. * - * If the first chunk ends up with both reserved and dynamic areas, it - * is served by two chunks - one to serve the core static and reserved - * areas and the other for the dynamic area. They share the same vm - * and page map but uses different area allocation map to stay away - * from each other. The latter chunk is circulated in the chunk slots - * and available for dynamic allocation like any other chunks. + * The first chunk will always contain a static and a dynamic region. + * However, the static region is not managed by any chunk. If the first + * chunk also contains a reserved region, it is served by two chunks - + * one for the reserved region and one for the dynamic region. They + * share the same vm, but use offset regions in the area allocation map. + * The chunk serving the dynamic region is circulated in the chunk slots + * and available for dynamic allocation like any other chunk. * * RETURNS: * 0 on success, -errno on failure. @@ -1609,7 +1680,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, unsigned int cpu; int *unit_map; int group, unit, i; - int map_size, start_offset; + int map_size; + unsigned long tmp_addr; #define PCPU_SETUP_BUG_ON(cond) do { \ if (unlikely(cond)) { \ @@ -1712,25 +1784,26 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, INIT_LIST_HEAD(&pcpu_slot[i]); /* - * Initialize static chunk. If reserved_size is zero, the - * static chunk covers static area + dynamic allocation area - * in the first chunk. If reserved_size is not zero, it - * covers static area + reserved area (mostly used for module - * static percpu allocation). + * Initialize first chunk. + * If the reserved_size is non-zero, this initializes the reserved + * chunk. If the reserved_size is zero, the reserved chunk is NULL + * and the dynamic region is initialized here. The first chunk, + * pcpu_first_chunk, will always point to the chunk that serves + * the dynamic region. */ - start_offset = ai->static_size; + tmp_addr = (unsigned long)base_addr + ai->static_size; map_size = ai->reserved_size ?: ai->dyn_size; - chunk = pcpu_alloc_first_chunk(base_addr, start_offset, map_size, smap, + chunk = pcpu_alloc_first_chunk(tmp_addr, map_size, smap, ARRAY_SIZE(smap)); /* init dynamic chunk if necessary */ if (ai->reserved_size) { pcpu_reserved_chunk = chunk; - start_offset = ai->static_size + ai->reserved_size; + tmp_addr = (unsigned long)base_addr + ai->static_size + + ai->reserved_size; map_size = ai->dyn_size; - chunk = pcpu_alloc_first_chunk(base_addr, start_offset, - map_size, dmap, + chunk = pcpu_alloc_first_chunk(tmp_addr, map_size, dmap, ARRAY_SIZE(dmap)); }