From bdd58b802c17e85fea9069c3030ff1cdc3ba3c09 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Mon, 16 Mar 2026 20:45:34 +0300 Subject: [PATCH] Update IR IR commit: 7fed7999743ba6a5ffc5535e786725d5577f6f34 --- ext/opcache/jit/ir/ir_cfg.c | 125 +++++++++++++++++--------------- ext/opcache/jit/ir/ir_gdb.c | 13 ++++ ext/opcache/jit/ir/ir_perf.c | 4 +- ext/opcache/jit/ir/ir_private.h | 7 +- 4 files changed, 87 insertions(+), 62 deletions(-) diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index ca57845a0c7a7..40041004c5615 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -211,6 +211,7 @@ static uint32_t IR_NEVER_INLINE ir_cfg_remove_dead_inputs(ir_ctx *ctx, uint32_t if (life_inputs) { ir_remove_phis_inputs(ctx, &ctx->use_lists[bb->start], insn->inputs_count, life_inputs); ir_mem_free(life_inputs); + life_inputs = NULL; } } } @@ -613,59 +614,64 @@ static int ir_remove_unreachable_blocks(ir_ctx *ctx) return 1; } -static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b) -{ - uint32_t i, *p; - ir_block *bb = &ctx->cfg_blocks[b]; - - if (bb->postnum != 0) { - return; - } - - if (bb->successors_count) { - bb->postnum = -1; /* Marker for "currently visiting" */ - p = ctx->cfg_edges + bb->successors; - i = bb->successors_count; - do { - compute_postnum(ctx, cur, *p); - p++; - } while (--i); - } - bb->postnum = (*cur)++; -} - /* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by * Cooper, Harvey and Kennedy. */ static IR_NEVER_INLINE int ir_build_dominators_tree_slow(ir_ctx *ctx) { - uint32_t blocks_count, b, postnum; + uint32_t blocks_count, b, postnum, i; ir_block *blocks, *bb; uint32_t *edges; + uint32_t *rpo = ir_mem_malloc((ctx->cfg_blocks_count + 1) * sizeof(uint32_t)); bool changed; blocks = ctx->cfg_blocks; edges = ctx->cfg_edges; blocks_count = ctx->cfg_blocks_count; - /* Clear the dominators tree */ - for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) { - bb->idom = 0; - bb->dom_depth = 0; - bb->dom_child = 0; - bb->dom_next_child = 0; - } - ctx->flags2 &= ~IR_NO_LOOPS; postnum = 1; - compute_postnum(ctx, &postnum, 1); + ir_worklist work; + ir_worklist_init(&work, ctx->cfg_blocks_count + 1); + ir_worklist_push(&work, 1); + IR_ASSERT(blocks[1].next_succ == 0); + while (ir_worklist_len(&work)) { +next: + b = ir_worklist_peek(&work); + bb = &blocks[b]; + uint32_t n = bb->successors_count - bb->next_succ; + if (n) { + uint32_t *p = edges + bb->successors + bb->next_succ; + for (; n > 0; p++, n--) { + uint32_t succ = *p; + if (ir_worklist_push(&work, succ)) { + bb->next_succ = bb->successors_count - n + 1; + IR_ASSERT(blocks[succ].next_succ == 0); + goto next; + } + } + } + + /* Start from bb->idom calculated by the fast dominators algorithm */ + // bb->idom = 0; + bb->next_succ = 0; + rpo[postnum] = b; + bb->postnum = postnum++; + ir_worklist_pop(&work); + } + ir_worklist_free(&work); + + IR_ASSERT(rpo[blocks_count] == 1); /* Find immediate dominators by iterative fixed-point algorithm */ blocks[1].idom = 1; do { changed = 0; + /* Iterating in Reverse Post Order */ - for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { + for (i = blocks_count - 1; i > 0; i--) { + b = rpo[i]; + bb = &blocks[b]; IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); IR_ASSERT(bb->predecessors_count > 0); if (bb->predecessors_count == 1) { @@ -718,6 +724,8 @@ static IR_NEVER_INLINE int ir_build_dominators_tree_slow(ir_ctx *ctx) } } while (changed); + ir_mem_free(rpo); + /* Build dominators tree */ blocks[1].idom = 0; blocks[1].dom_depth = 0; @@ -771,7 +779,7 @@ int ir_build_dominators_tree(ir_ctx *ctx) blocks[1].idom = 1; blocks[1].dom_depth = 0; - /* Iterating in Reverse Post Order */ + /* Iterating in Reverse Post Order (relay on existing BB order and fall-back to slow algorithm) */ for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); IR_ASSERT(bb->predecessors_count > 0); @@ -783,8 +791,8 @@ int ir_build_dominators_tree(ir_ctx *ctx) if (UNEXPECTED(idom >= b)) { /* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */ ctx->flags2 &= ~IR_NO_LOOPS; -// IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor"); if (UNEXPECTED(k <= 1)) { + // IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor"); slow_case: ir_list_free(&worklist); return ir_build_dominators_tree_slow(ctx); @@ -798,6 +806,7 @@ int ir_build_dominators_tree(ir_ctx *ctx) break; } if (UNEXPECTED(k == 0)) { + // IR_ASSERT(0 && "Wrong blocks order: BB is before all its predecessors"); goto slow_case; } ir_list_push(&worklist, idom); @@ -830,13 +839,6 @@ int ir_build_dominators_tree(ir_ctx *ctx) bb->dom_depth = idom_bb->dom_depth + 1; } - /* Construct children lists sorted by block number */ - for (b = blocks_count, bb = &blocks[b]; b >= 2; b--, bb--) { - ir_block *idom_bb = &blocks[bb->idom]; - bb->dom_next_child = idom_bb->dom_child; - idom_bb->dom_child = b; - } - blocks[1].idom = 0; if (ir_list_len(&worklist) != 0) { @@ -874,10 +876,18 @@ int ir_build_dominators_tree(ir_ctx *ctx) if (UNEXPECTED(!complete)) { ir_list_free(&worklist); + // TODO: this algorithm may be incorrect. Prove and/or switch to ir_build_dominators_tree_slow() ??? return ir_build_dominators_tree_iterative(ctx); } } + /* Construct children lists sorted by block number */ + for (b = blocks_count, bb = &blocks[b]; b >= 2; b--, bb--) { + ir_block *idom_bb = &blocks[bb->idom]; + bb->dom_next_child = idom_bb->dom_child; + idom_bb->dom_child = b; + } + ir_list_free(&worklist); return 1; @@ -898,8 +908,6 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx) /* Clear the dominators tree, but keep already found dominators */ for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) { bb->dom_depth = 0; - bb->dom_child = 0; - bb->dom_next_child = 0; } /* Find immediate dominators by iterative fixed-point algorithm */ @@ -917,20 +925,20 @@ static int ir_build_dominators_tree_iterative(ir_ctx *ctx) if (blocks[idom].idom == 0) { while (1) { k--; + if (UNEXPECTED(k == 0)) break; p++; idom = *p; if (blocks[idom].idom > 0) { break; } - IR_ASSERT(k > 0); } + if (UNEXPECTED(k == 0)) continue; } IR_ASSERT(k != 0); while (--k > 0) { uint32_t pred_b = *(++p); if (blocks[pred_b].idom > 0) { - IR_ASSERT(blocks[pred_b].idom > 0); while (idom != pred_b) { while (pred_b > idom) { pred_b = blocks[pred_b].idom; @@ -1094,35 +1102,36 @@ int ir_find_loops(ir_ctx *ctx) times = ir_mem_malloc((ctx->cfg_blocks_count + 1) * 3 * sizeof(uint32_t)); sorted_blocks = times + (ctx->cfg_blocks_count + 1) * 2; + ir_bitset visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1); ir_worklist_push(&work, 1); - ENTRY_TIME(1) = time++; - while (ir_worklist_len(&work)) { - ir_block *bb; - +next: b = ir_worklist_peek(&work); + ir_block *bb = &blocks[b]; - /* Visit successors of "b". */ -next: - bb = &blocks[b]; - n = bb->successors_count; - if (n) { - uint32_t *p = edges + bb->successors; + if (!ir_bitset_in(visited, b)) { + ir_bitset_incl(visited, b); + ENTRY_TIME(b) = time++; + } + uint32_t n = bb->successors_count - bb->next_succ; + if (n) { + uint32_t *p = edges + bb->successors + bb->next_succ; for (; n > 0; p++, n--) { uint32_t succ = *p; - if (ir_worklist_push(&work, succ)) { - b = succ; - ENTRY_TIME(b) = time++; + bb->next_succ = bb->successors_count - n + 1; + IR_ASSERT(blocks[succ].next_succ == 0); goto next; } } } + bb->next_succ = 0; EXIT_TIME(b) = time++; ir_worklist_pop(&work); } + ir_mem_free(visited); /* Sort blocks by level, which is the opposite order in which we want to process them */ /* (Breadth First Search using "sorted_blocks" as a queue) */ diff --git a/ext/opcache/jit/ir/ir_gdb.c b/ext/opcache/jit/ir/ir_gdb.c index 8b5fba6b1533a..41141bd287157 100644 --- a/ext/opcache/jit/ir/ir_gdb.c +++ b/ext/opcache/jit/ir/ir_gdb.c @@ -521,6 +521,8 @@ IR_NEVER_INLINE void __jit_debug_register_code(void) static bool ir_gdb_register_code(const void *object, size_t size) { ir_gdbjit_code_entry *entry; + ir_elf_header *elf_header; + ir_elf_sectheader *elf_section, *elf_section_end; entry = malloc(sizeof(ir_gdbjit_code_entry) + size); if (entry == NULL) { @@ -532,6 +534,17 @@ static bool ir_gdb_register_code(const void *object, size_t size) memcpy((char *)entry->symfile_addr, object, size); + elf_header = (ir_elf_header*)entry->symfile_addr; + elf_section = (ir_elf_sectheader*)(entry->symfile_addr + elf_header->shofs); + elf_section_end = (ir_elf_sectheader*)((char*)elf_section + (elf_header->shentsize * elf_header->shnum)); + + while (elf_section < elf_section_end) { + if ((elf_section->flags & ELFSECT_FLAGS_ALLOC) && elf_section->addr == 0) { + elf_section->addr = (uintptr_t)(entry->symfile_addr + elf_section->ofs); + } + elf_section = (ir_elf_sectheader*)((char*)elf_section + elf_header->shentsize); + } + entry->prev_entry = NULL; entry->next_entry = __jit_debug_descriptor.first_entry; diff --git a/ext/opcache/jit/ir/ir_perf.c b/ext/opcache/jit/ir/ir_perf.c index e5a5e59374076..c0561ff86ac1f 100644 --- a/ext/opcache/jit/ir/ir_perf.c +++ b/ext/opcache/jit/ir/ir_perf.c @@ -30,7 +30,7 @@ #if defined(__linux__) #include -#elif defined(__darwin__) +#elif defined(__APPLE__) # include #elif defined(__FreeBSD__) # include @@ -215,7 +215,7 @@ int ir_perf_jitdump_register(const char *name, const void *start, size_t size) uint32_t thread_id = 0; #if defined(__linux__) thread_id = syscall(SYS_gettid); -#elif defined(__darwin__) +#elif defined(__APPLE__) uint64_t thread_id_u64; pthread_threadid_np(NULL, &thread_id_u64); thread_id = (uint32_t) thread_id_u64; diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index 115c5121d7551..96b81a0fcd721 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -1145,12 +1145,15 @@ struct _ir_block { }; union { uint32_t dom_depth; /* depth from the root of the dominators tree */ - uint32_t postnum; /* used temporary during tree constructon */ + uint32_t postnum; /* used temporary for iterative Post Ordering */ }; uint32_t dom_child; /* first dominated blocks */ uint32_t dom_next_child; /* next dominated block (linked list) */ uint32_t loop_header; - uint32_t loop_depth; + union { + uint32_t loop_depth; + uint32_t next_succ; /* used temporary for iterative Post Ordering */ + }; }; void ir_build_prev_refs(ir_ctx *ctx);