From c3b609bb58a30b247b6d7761d53bb8cf927a8f89 Mon Sep 17 00:00:00 2001 From: Erwei Deng Date: Mon, 9 May 2022 21:08:54 +0800 Subject: [PATCH] src: support memory pool for percpu variables There are two ways to implement percpu memory pools. One is to design a percpu memory pool ourselves, and the other is to use the original percpu mechanism of the kernel. For the first way, we need to allocate a block of memory(such as a array) in each numa node to store its own cpus' variables, and the base address of allocated memory in each numa should be recorded in a array. And then, the cpus on the nodes need to be renumbered because the cpu numbers on the nodes are not consecutive. Finally, cpu accesses its own variables through the following way: base_addr[cpu_to_node(cpu)][cpu_id_in_numa(cpu)]; There are two drawbacks to this way. First, We need to create a new set of APIs for the percpu instead of using the kernel's APIs. Second, accessing a percpu variable requires two memory-references. For the second way, in the kernel, the maximum supported allocation size is PCPU_MIN_UNIT_SIZE defined in include/linux/percpu.h that is 32KB in anck-v4.19 for x86_64. I apply the maximum size as a large variable and make that as a part of memory pool. The address of each variables of CPUs is the base address plus a fixed offset. Following this rule, we can assign multiple small variables from a large variable. And if the large variable is not enough, apply other large variables. The offset is stored in gs register(x86) and tpidr register(arm), so, the cpu accessing its own variable just needs one memory-reference. In conclusion, I choose the second way. Signed-off-by: Erwei Deng --- src/mempool.h | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/src/mempool.h b/src/mempool.h index 24d18d3..55c8447 100644 --- a/src/mempool.h +++ b/src/mempool.h @@ -5,6 +5,8 @@ #ifdef SCHEDMOD_MEMPOOL +#include + #define is_simple_mempool_addr(smpool, addr) \ ((unsigned long)(addr) >= (smpool)->vstart && \ (unsigned long)(addr) <= (smpool)->vend) @@ -17,6 +19,20 @@ struct simple_mempool { unsigned int obj_num; }; +struct simple_percpu_mempool { + /* The base address of each percpu memory area. */ + unsigned long *percpu_ptr; + /* Record the areas' allocated size. */ + unsigned long allocated_size; + unsigned int obj_size; + /* How many areas are required for the mempool. */ + unsigned int areas; + /* How many objs can be assigned from each area. */ + unsigned int objs_per_area; + /* Used to record which area is allocated from. */ + unsigned int area_id; +}; + static inline void *simple_mempool_alloc(struct simple_mempool *smpool) { void *ret; @@ -55,6 +71,96 @@ static inline void simple_mempool_destory(struct simple_mempool *smpool) kfree(smpool); } +static struct simple_percpu_mempool *simple_percpu_mempool_create(int obj_num, + int obj_size) +{ + unsigned int areas, objs_per_area, cnt = 0; + struct simple_percpu_mempool *psmpool; + void *ptr; + + psmpool = kzalloc_node(sizeof(*psmpool), GFP_ATOMIC, 0); + if (!psmpool) + return NULL; + + /* Calculate how many percpu areas are required. */ + objs_per_area = PCPU_MIN_UNIT_SIZE / obj_size; + areas = (obj_num + objs_per_area - 1) / objs_per_area; + + psmpool->percpu_ptr = + kzalloc_node(sizeof(unsigned long) * areas, GFP_ATOMIC, 0); + if (!psmpool->percpu_ptr) + goto error; + + for (cnt = 0; cnt < areas; cnt++) { + ptr = __alloc_percpu(PCPU_MIN_UNIT_SIZE, obj_size); + if (!ptr) + goto error; + + psmpool->percpu_ptr[cnt] = (unsigned long)ptr; + } + + psmpool->obj_size = obj_size; + psmpool->objs_per_area = objs_per_area; + psmpool->areas = areas; + + return psmpool; + +error: + while (cnt > 0) + free_percpu((void *)psmpool->percpu_ptr[--cnt]); + + kfree(psmpool->percpu_ptr); + kfree(psmpool); + + return NULL; +} + +static void *simple_percpu_mempool_alloc(struct simple_percpu_mempool *psmpool) +{ + unsigned long area_size, ret; + + area_size = psmpool->obj_size * psmpool->objs_per_area; + + if ((psmpool->allocated_size + psmpool->obj_size) > area_size) { + psmpool->area_id++; + psmpool->allocated_size = 0; + } + + ret = psmpool->percpu_ptr[psmpool->area_id] + psmpool->allocated_size; + psmpool->allocated_size += psmpool->obj_size; + + return (void *)ret; +} + +static void simple_percpu_mempool_destory(struct simple_percpu_mempool *psmpool) +{ + int i; + + for (i = 0; i < psmpool->areas; i++) + free_percpu((void *)psmpool->percpu_ptr[i]); + + kfree(psmpool->percpu_ptr); + kfree(psmpool); +} + +static inline bool is_simple_percpu_mempool_addr( + struct simple_percpu_mempool *psmpool, void *_addr) +{ + int i; + unsigned long addr, area_size, base; + + addr = (unsigned long)_addr; + area_size = psmpool->obj_size * psmpool->objs_per_area; + + for (i = 0; i < psmpool->areas; i++) { + base = psmpool->percpu_ptr[i]; + if (addr >= base && addr < (base + area_size)) + return true; + } + + return false; +} + #define FIELD_TYPE(t, f) typeof(((struct t*)0)->f) #define FIELD_INDIRECT_TYPE(t, f) typeof(*((struct t*)0)->f) @@ -85,6 +191,33 @@ static int recheck_mempool_##name(void) \ return 0; \ } +#define DEFINE_RESERVE_PERCPU(type, field, name, require, max) \ +struct simple_percpu_mempool *name##_smp = NULL; \ +void release_##name##_reserve(struct type *x) \ +{ \ + if (!is_simple_percpu_mempool_addr(name##_smp, x->field)) \ + free_percpu((void *)x->field); \ + x->field = NULL; \ +} \ +FIELD_TYPE(type, field) alloc_##name##_reserve(void) \ +{ \ + return simple_percpu_mempool_alloc(name##_smp); \ +} \ +static int create_mempool_##name(void) \ +{ \ + name##_smp = simple_percpu_mempool_create(max, \ + sizeof(FIELD_INDIRECT_TYPE(type, field))); \ + if (!name##_smp) \ + return -ENOMEM; \ + return 0; \ +} \ +static int recheck_mempool_##name(void) \ +{ \ + if (require > (name##_smp->areas * name##_smp->objs_per_area)) \ + return -ENOMEM; \ + return 0; \ +} + /* * Examples of simple mempool usage @@ -101,6 +234,12 @@ static int recheck_mempool_##name(void) \ * rq, // name the mempool as rq_smp * nr_cpu_ids, // we need exactly nr_cpu_ids objects * nr_cpu_ids); // we alloc nr_cpu_ids objects before stop_machine + * + * DEFINE_RESERVE_PERCPU(task_struct, // struct task_struct + * percpu_var, // task_struct's new percpu_var feild + * percpu_var, // name the percpu mempool as percpu_var_smp + * nr_threads + nr_cpu_ids,// we need exactly nr_cpu_ids objects + * nr_threads + nr_cpu_ids)// we alloc nr_cpu_ids objects before stop_machine */ static int sched_mempools_create(void) @@ -114,6 +253,9 @@ static int sched_mempools_create(void) * if ((err = create_mempool_rq())) * return err; + + * if (err = create_mempool_percpu_var()) + * return err; */ return 0; @@ -125,6 +267,7 @@ static void sched_mempools_destroy(void) * Examples of mempools destroy * simple_mempool_destory(se_smp); * simple_mempool_destory(rq_smp); + * simple_percpu_mempool_destory(percpu_var_smp); */ } @@ -139,6 +282,9 @@ static int recheck_smps(void) * if ((err = recheck_mempool_se())) * return err; + + * if ((err = recheck_mempool_percpu_var())) + * return err; */ return 0; @@ -168,6 +314,9 @@ static void sched_alloc_extrapad(void) * for_each_possible_cpu(cpu) * tg->se[cpu]->statistics.bvt = alloc_se_reserve(); * } + + * for_each_process_thread (p, t) + * t->percpu_var = alloc_percpu_var_reserve(); */ } @@ -194,6 +343,9 @@ static void sched_free_extrapad(void) * for_each_possible_cpu(cpu) * release_se_reserve(&tg->se[cpu]->statistics); * } + + * for_each_process_thread(p, t) + * release_percpu_var_reserve(t); */ } -- Gitee