8 #ifdef ABT_CONFIG_USE_MEM_POOL 16 #include <sys/types.h> 19 #define PROTS (PROT_READ | PROT_WRITE) 21 #if defined(HAVE_MAP_ANONYMOUS) 22 #define FLAGS_RP (MAP_PRIVATE | MAP_ANONYMOUS) 23 #elif defined(HAVE_MAP_ANON) 24 #define FLAGS_RP (MAP_PRIVATE | MAP_ANON) 27 #define FLAGS_RP (MAP_PRIVATE) 30 #if defined(HAVE_MAP_HUGETLB) 31 #define FLAGS_HP (FLAGS_RP | MAP_HUGETLB) 33 #define MMAP_DBG_MSG "mmap a hugepage" 38 #define FLAGS_HP FLAGS_RP 40 #define MMAP_DBG_MSG "mmap regular pages" 43 static inline void ABTI_mem_free_stack_list(ABTI_stack_header *p_stack);
44 static inline void ABTI_mem_free_page_list(ABTI_page_header *p_ph);
45 static inline void ABTI_mem_add_page(ABTI_local *p_local,
46 ABTI_page_header *p_ph);
47 static inline void ABTI_mem_add_pages_to_global(ABTI_page_header *p_head,
48 ABTI_page_header *p_tail);
49 static inline void ABTI_mem_free_sph_list(ABTI_sp_header *p_sph);
50 static ABTD_atomic_uint64 g_sp_id = ABTD_ATOMIC_UINT64_STATIC_INITIALIZER(0);
52 void ABTI_mem_init(ABTI_global *p_global)
54 p_global->p_mem_stack = NULL;
55 ABTI_spinlock_clear(&p_global->mem_task_lock);
56 p_global->p_mem_task = NULL;
57 p_global->p_mem_sph = NULL;
59 ABTD_atomic_relaxed_store_uint64(&g_sp_id, 0);
62 void ABTI_mem_init_local(ABTI_local *p_local)
65 p_local->num_stacks = 0;
66 p_local->p_mem_stack = NULL;
69 p_local->p_mem_task_head = NULL;
70 p_local->p_mem_task_tail = NULL;
73 void ABTI_mem_finalize(ABTI_global *p_global)
76 ABTI_mem_free_stack_list(p_global->p_mem_stack);
77 p_global->p_mem_stack = NULL;
80 ABTI_mem_free_page_list(p_global->p_mem_task);
81 p_global->p_mem_task = NULL;
84 ABTI_mem_free_sph_list(p_global->p_mem_sph);
85 p_global->p_mem_sph = NULL;
88 void ABTI_mem_finalize_local(ABTI_local *p_local)
91 ABTI_mem_free_stack_list(p_local->p_mem_stack);
92 p_local->num_stacks = 0;
93 p_local->p_mem_stack = NULL;
96 ABTI_page_header *p_rem_head = NULL;
97 ABTI_page_header *p_rem_tail = NULL;
98 ABTI_page_header *p_cur = p_local->p_mem_task_head;
100 ABTI_page_header *p_tmp = p_cur;
101 p_cur = p_cur->p_next;
103 size_t num_free_blks =
104 p_tmp->num_empty_blks +
105 ABTD_atomic_acquire_load_uint32(&p_tmp->num_remote_free);
106 if (num_free_blks == p_tmp->num_total_blks) {
107 if (p_tmp->is_mmapped ==
ABT_TRUE) {
114 ABTI_mem_take_free(p_tmp);
118 p_tmp->p_prev = NULL;
119 p_tmp->p_next = p_rem_head;
121 if (p_rem_tail == NULL) {
126 if (p_cur == p_local->p_mem_task_head)
129 p_local->p_mem_task_head = NULL;
130 p_local->p_mem_task_tail = NULL;
135 ABTI_mem_add_pages_to_global(p_rem_head, p_rem_tail);
139 int ABTI_mem_check_lp_alloc(
int lp_alloc)
147 case ABTI_MEM_LP_MMAP_RP:
148 p_page = mmap(NULL, pg_size, PROTS, FLAGS_RP, 0, 0);
149 if (p_page != MAP_FAILED) {
150 munmap(p_page, pg_size);
152 lp_alloc = ABTI_MEM_LP_MALLOC;
156 case ABTI_MEM_LP_MMAP_HP_RP:
157 p_page = mmap(NULL, sp_size, PROTS, FLAGS_HP, 0, 0);
158 if (p_page != MAP_FAILED) {
159 munmap(p_page, sp_size);
161 p_page = mmap(NULL, pg_size, PROTS, FLAGS_RP, 0, 0);
162 if (p_page != MAP_FAILED) {
163 munmap(p_page, pg_size);
164 lp_alloc = ABTI_MEM_LP_MMAP_RP;
166 lp_alloc = ABTI_MEM_LP_MALLOC;
171 case ABTI_MEM_LP_MMAP_HP_THP:
172 p_page = mmap(NULL, sp_size, PROTS, FLAGS_HP, 0, 0);
173 if (p_page != MAP_FAILED) {
174 munmap(p_page, sp_size);
180 lp_alloc = ABTI_MEM_LP_THP;
182 lp_alloc = ABTI_MEM_LP_MALLOC;
187 case ABTI_MEM_LP_THP:
192 lp_alloc = ABTI_MEM_LP_THP;
194 lp_alloc = ABTI_MEM_LP_MALLOC;
205 static inline void ABTI_mem_free_stack_list(ABTI_stack_header *p_stack)
207 ABTI_stack_header *p_cur, *p_tmp;
212 p_cur = p_cur->p_next;
213 ABTD_atomic_fetch_add_uint32(&p_tmp->p_sph->num_empty_stacks, 1);
217 static inline void ABTI_mem_free_page_list(ABTI_page_header *p_ph)
219 ABTI_page_header *p_cur, *p_tmp;
224 p_cur = p_cur->p_next;
225 if (p_tmp->is_mmapped ==
ABT_TRUE) {
233 static inline void ABTI_mem_add_page(ABTI_local *p_local,
234 ABTI_page_header *p_ph)
236 p_ph->owner_id = ABTI_self_get_native_thread_id(p_local);
239 if (p_local->p_mem_task_head != NULL) {
240 p_ph->p_prev = p_local->p_mem_task_tail;
241 p_ph->p_next = p_local->p_mem_task_head;
242 p_local->p_mem_task_head->p_prev = p_ph;
243 p_local->p_mem_task_tail->p_next = p_ph;
244 p_local->p_mem_task_head = p_ph;
248 p_local->p_mem_task_head = p_ph;
249 p_local->p_mem_task_tail = p_ph;
253 static inline void ABTI_mem_add_pages_to_global(ABTI_page_header *p_head,
254 ABTI_page_header *p_tail)
259 ABTI_spinlock_acquire(&p_global->mem_task_lock);
260 p_tail->p_next = p_global->p_mem_task;
261 p_global->p_mem_task = p_head;
262 ABTI_spinlock_release(&p_global->mem_task_lock);
265 char *ABTI_mem_take_global_stack(ABTI_local *p_local)
268 ABTI_stack_header *p_sh, *p_cur;
269 uint32_t cnt_stacks = 0;
271 ABTD_atomic_ptr *ptr;
274 p_sh = (ABTI_stack_header *)ABTD_atomic_acquire_load_ptr(
275 (ABTD_atomic_ptr *)&p_global->p_mem_stack);
276 ptr = (ABTD_atomic_ptr *)&p_global->p_mem_stack;
278 }
while (!ABTD_atomic_bool_cas_weak_ptr(ptr, old, NULL));
288 while (p_cur->p_next) {
289 p_cur = p_cur->p_next;
294 p_local->num_stacks = cnt_stacks;
295 p_local->p_mem_stack = p_sh->p_next;
297 return (
char *)p_sh -
sizeof(ABTI_thread);
300 void ABTI_mem_add_stack_to_global(ABTI_stack_header *p_sh)
303 ABTD_atomic_ptr *ptr;
307 ABTI_stack_header *p_mem_stack =
308 (ABTI_stack_header *)ABTD_atomic_acquire_load_ptr(
309 (ABTD_atomic_ptr *)&p_global->p_mem_stack);
310 p_sh->p_next = p_mem_stack;
311 ptr = (ABTD_atomic_ptr *)&p_global->p_mem_stack;
312 old = (
void *)p_mem_stack;
314 }
while (!ABTD_atomic_bool_cas_weak_ptr(ptr, old,
new));
317 static char *ABTI_mem_alloc_large_page(
int pgsize,
ABT_bool *p_is_mmapped)
322 case ABTI_MEM_LP_MALLOC:
325 LOG_DEBUG(
"malloc a regular page (%d): %p\n", pgsize, p_page);
328 case ABTI_MEM_LP_MMAP_RP:
329 p_page = (
char *)mmap(NULL, pgsize, PROTS, FLAGS_RP, 0, 0);
330 if ((
void *)p_page != MAP_FAILED) {
332 LOG_DEBUG(
"mmap a regular page (%d): %p\n", pgsize, p_page);
337 LOG_DEBUG(
"fall back to malloc a regular page (%d): %p\n",
342 case ABTI_MEM_LP_MMAP_HP_RP:
345 p_page = (
char *)mmap(NULL, pgsize, PROTS, FLAGS_HP, 0, 0);
346 if ((
void *)p_page != MAP_FAILED) {
348 LOG_DEBUG(MMAP_DBG_MSG
" (%d): %p\n", pgsize, p_page);
351 p_page = (
char *)mmap(NULL, pgsize, PROTS, FLAGS_RP, 0, 0);
352 if ((
void *)p_page != MAP_FAILED) {
354 LOG_DEBUG(
"fall back to mmap regular pages (%d): %p\n",
360 LOG_DEBUG(
"fall back to malloc a regular page (%d): %p\n",
366 case ABTI_MEM_LP_MMAP_HP_THP:
369 p_page = (
char *)mmap(NULL, pgsize, PROTS, FLAGS_HP, 0, 0);
370 if ((
void *)p_page != MAP_FAILED) {
372 LOG_DEBUG(MMAP_DBG_MSG
" (%d): %p\n", pgsize, p_page);
377 LOG_DEBUG(
"memalign a THP (%d): %p\n", pgsize, p_page);
381 case ABTI_MEM_LP_THP:
386 LOG_DEBUG(
"memalign a THP (%d): %p\n", pgsize, p_page);
398 ABTI_page_header *ABTI_mem_alloc_page(ABTI_local *p_local,
size_t blk_size)
401 ABTI_page_header *p_ph;
402 ABTI_blk_header *p_cur;
405 size_t pgsize = p_global->mem_page_size;
409 const size_t ph_size =
410 (
sizeof(ABTI_page_header) + clsize) / clsize * clsize;
412 uint32_t num_blks = (pgsize - ph_size) / blk_size;
413 char *p_page = ABTI_mem_alloc_large_page(pgsize, &is_mmapped);
416 p_ph = (ABTI_page_header *)p_page;
417 p_ph->blk_size = blk_size;
418 p_ph->num_total_blks = num_blks;
419 p_ph->num_empty_blks = num_blks;
420 ABTD_atomic_relaxed_store_uint32(&p_ph->num_remote_free, 0);
421 p_ph->p_head = (ABTI_blk_header *)(p_page + ph_size);
423 ABTI_mem_add_page(p_local, p_ph);
424 p_ph->is_mmapped = is_mmapped;
427 p_cur = p_ph->p_head;
428 for (i = 0; i < num_blks - 1; i++) {
430 p_cur->p_next = (ABTI_blk_header *)((
char *)p_cur + blk_size);
431 p_cur = p_cur->p_next;
434 p_cur->p_next = NULL;
439 void ABTI_mem_free_page(ABTI_local *p_local, ABTI_page_header *p_ph)
442 if (p_local->p_mem_task_head == p_local->p_mem_task_tail)
445 uint32_t num_free_blks =
446 p_ph->num_empty_blks +
447 ABTD_atomic_acquire_load_uint32(&p_ph->num_remote_free);
448 if (num_free_blks == p_ph->num_total_blks) {
451 p_ph->p_prev->p_next = p_ph->p_next;
452 p_ph->p_next->p_prev = p_ph->p_prev;
453 if (p_ph == p_local->p_mem_task_head) {
454 p_local->p_mem_task_head = p_ph->p_next;
455 }
else if (p_ph == p_local->p_mem_task_tail) {
456 p_local->p_mem_task_tail = p_ph->p_prev;
466 void ABTI_mem_take_free(ABTI_page_header *p_ph)
473 uint32_t num_remote_free =
474 ABTD_atomic_acquire_load_uint32(&p_ph->num_remote_free);
475 ABTD_atomic_ptr *ptr;
478 ABTD_atomic_fetch_sub_uint32(&p_ph->num_remote_free, num_remote_free);
479 p_ph->num_empty_blks += num_remote_free;
483 ABTI_blk_header *p_free =
484 (ABTI_blk_header *)ABTD_atomic_acquire_load_ptr(
485 (ABTD_atomic_ptr *)&p_ph->p_free);
486 p_ph->p_head = p_free;
487 ptr = (ABTD_atomic_ptr *)&p_ph->p_free;
488 old = (
void *)p_free;
489 }
while (!ABTD_atomic_bool_cas_weak_ptr(ptr, old, NULL));
492 void ABTI_mem_free_remote(ABTI_page_header *p_ph, ABTI_blk_header *p_bh)
494 ABTD_atomic_ptr *ptr;
497 ABTI_blk_header *p_free =
498 (ABTI_blk_header *)ABTD_atomic_acquire_load_ptr(
499 (ABTD_atomic_ptr *)&p_ph->p_free);
500 p_bh->p_next = p_free;
501 ptr = (ABTD_atomic_ptr *)&p_ph->p_free;
502 old = (
void *)p_free;
504 }
while (!ABTD_atomic_bool_cas_weak_ptr(ptr, old,
new));
507 ABTD_atomic_fetch_add_uint32(&p_ph->num_remote_free, 1);
510 ABTI_page_header *ABTI_mem_take_global_page(ABTI_local *p_local)
513 ABTI_page_header *p_ph = NULL;
516 ABTI_spinlock_acquire(&p_global->mem_task_lock);
517 if (p_global->p_mem_task) {
518 p_ph = p_global->p_mem_task;
519 p_global->p_mem_task = p_ph->p_next;
521 ABTI_spinlock_release(&p_global->mem_task_lock);
524 ABTI_mem_add_page(p_local, p_ph);
526 ABTI_mem_take_free(p_ph);
527 if (p_ph->p_head == NULL)
534 static inline void ABTI_mem_free_sph_list(ABTI_sp_header *p_sph)
536 ABTI_sp_header *p_cur, *p_tmp;
541 p_cur = p_cur->p_next;
543 if (p_tmp->num_total_stacks !=
544 ABTD_atomic_acquire_load_uint32(&p_tmp->num_empty_stacks)) {
546 p_tmp->num_total_stacks - ABTD_atomic_acquire_load_uint32(
547 &p_tmp->num_empty_stacks));
550 if (p_tmp->is_mmapped ==
ABT_TRUE) {
563 char *ABTI_mem_alloc_sp(ABTI_local *p_local,
size_t stacksize)
565 char *p_sp, *p_first;
566 ABTI_sp_header *p_sph;
567 ABTI_stack_header *p_sh, *p_next;
571 uint32_t header_size = ABTI_MEM_SH_SIZE;
573 size_t actual_stacksize = stacksize - header_size;
574 void *p_stack = NULL;
577 p_sph = (ABTI_sp_header *)
ABTU_malloc(
sizeof(ABTI_sp_header));
578 num_stacks = sp_size / stacksize;
579 p_sph->num_total_stacks = num_stacks;
580 ABTD_atomic_relaxed_store_uint32(&p_sph->num_empty_stacks, 0);
581 p_sph->stacksize = stacksize;
582 p_sph->id = ABTD_atomic_fetch_add_uint64(&g_sp_id, 1);
585 p_sp = ABTI_mem_alloc_large_page(sp_size, &p_sph->is_mmapped);
591 int first_pos = p_sph->id % num_stacks;
592 p_first = p_sp + actual_stacksize * first_pos;
593 p_sh = (ABTI_stack_header *)(p_first +
sizeof(ABTI_thread));
595 p_stack = (first_pos == 0) ? (
void *)(p_first + header_size * num_stacks)
597 p_sh->p_stack = p_stack;
599 if (num_stacks > 1) {
601 p_sh = (ABTI_stack_header *)((
char *)p_sh + header_size);
603 p_local->num_stacks = num_stacks - 1;
604 p_local->p_mem_stack = p_sh;
606 for (i = 1; i < num_stacks; i++) {
607 p_next = (i + 1) < num_stacks
608 ? (ABTI_stack_header *)((
char *)p_sh + header_size)
610 p_sh->p_next = p_next;
612 if (first_pos == 0) {
614 (
void *)((
char *)p_stack + i * actual_stacksize);
617 p_sh->p_stack = (
void *)(p_sp + i * actual_stacksize);
620 (
void *)(p_first + header_size * num_stacks +
621 (i - first_pos) * actual_stacksize);
630 ABTD_atomic_ptr *ptr = (ABTD_atomic_ptr *)&
gp_ABTI_global->p_mem_sph;
633 p_sph->p_next = (ABTI_sp_header *)ABTD_atomic_acquire_load_ptr(ptr);
634 old = (
void *)p_sph->p_next;
635 }
while (!ABTD_atomic_bool_cas_weak_ptr(ptr, old, (
void *)p_sph));
static void * ABTU_malloc(size_t size)
#define ABT_CONFIG_STATIC_CACHELINE_SIZE
ABTI_global * gp_ABTI_global
#define LOG_DEBUG(fmt,...)
static void * ABTU_memalign(size_t alignment, size_t size)
static void ABTU_free(void *ptr)