Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ext/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"PEANO_BATCH": f"{CMD} -a ext/tests/peano -b ext/tests/peano/buffer_info.json -f l2_ifm_dump --peano",
"PEANO_L2_DUMP": f"{CMD} -a ext/tests/peano -b ext/tests/peano/buffer_info.json -f l2_ifm_dump --peano -e 15",
"WTS_ITER_FLAGS": f"{CMD} -a ext/tests/wts_iter -b ext/tests/wts_iter/buffer_info.json"
" -e 2 -f layer_status text_dump l1_ofm_dump",
" -e 2 -f layer_status text_dump",
"VAIML": f"{CMD} -v ext/tests/vaiml -f skip_dump",
# "X2": f"{CMD} -a ext/tests/x2 -b ext/tests/x2/buffer_info.json -f skip_dump",
}
Expand Down
86 changes: 85 additions & 1 deletion src/mldebug/aie_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from mldebug.utils import LOGGER


class AIEUtil:
"""
AIE Utility class
Expand Down Expand Up @@ -162,6 +163,7 @@ def skip_iterations(self, count, sid):
start_time = time.time()
perf_cntr_1 = reg_map["PERF_CNTR_1"]
while True:
time.sleep(0.1)
values = self.read_aie_regs(perf_cntr_1)
if all(v == count for v in values.values()):
break
Expand All @@ -171,13 +173,42 @@ def skip_iterations(self, count, sid):
f"Design might be hung. Values={values}"
)
return False
time.sleep(0.1)

# Step6: Reset debug control to stop at program counter event
pc_event = self._get_eventid("PC_0_CORE")
write(reg_map["DEBUG_CONTROL1"], pc_event << 16)
return True

def skip_iterations_to_lock_acq(self, lock_acq_pc, count, sid):
"""
Skip iterations without using counter
"""
if self._is_test_mode() or count == 0:
return True

self.impl.set_pc_breakpoint(lock_acq_pc)
self.impl.continue_aie()
timeout = 10
start_time = time.time()
while time.time() - start_time < timeout:
time.sleep(0.1)
if self.impl.poll_core_status():
break

pcs = self.impl.read_core_pc(True)
is_valid = self.pcs_match_target(pcs, lock_acq_pc)
if not is_valid:
LOGGER.log(
f"{sid}: Invalid result in skip_iterations_to_lock_acq. "
f"target_pc={lock_acq_pc} pcs={pcs} "
)
#else:
# LOGGER.log(
# f"{sid}: Successfully skipped to lock acq pc. "
# f"target_pc={lock_acq_pc} pcs={pcs} "
# )
return is_valid

def read_performance_counters(self, c, r):
"""
Read and display the values and configuration registers of all performance counters
Expand Down Expand Up @@ -398,6 +429,25 @@ def read_core_pc(self):
"""
return self.read_aie_regs(self.aie_iface.Core_registers["CORE_PC"])

def read_core_pc_dict(self):
"""
Read the core program counter from all AIE tiles
"""
return self.read_aie_regs(self.aie_iface.Core_registers["CORE_PC"])

def read_core_pc_tile(self, c, r):
"""
Read the core program counter from all AIE tiles
"""
return self.impl.read_register(c, r, self.aie_iface.Core_registers["CORE_PC"])

def single_step_core(self, c, r):
"""
Single step an aie core
"""
offset = self.aie_iface.Core_registers["DEBUG_CONTROL0"]
self.impl.write_register(c, r, offset, (1<<2))

def disable_ecc_event(self):
"""
Disable ECC Event for this stamp
Expand All @@ -406,3 +456,37 @@ def disable_ecc_event(self):
return
for c, r in self._filter_tiles(self.aie_iface.AIE_TILE_T):
self.impl.write_register(c, r, self.aie_iface.Core_registers["ECC_SCRUB_EVENT"], 0)

def pcs_match_target(self, pcs, target_pc, allow_combo_delay=False):
"""
PC matching utility
"""
# AIE PC can lag the breakpoint by 1-2 cycles; combo events add more delay.
# 8 cycles is a safe margin for most cases
num_pipeline_stages = 5
max_pc_tolerance = 32

delay_allowed = max_pc_tolerance if allow_combo_delay else 1
pc_matches = all(abs(pc - target_pc) < delay_allowed for pc in pcs)
if not pc_matches:
# some tiles aren't halted
if not self.impl.poll_core_status():
return False
pc_dict = self.read_core_pc_dict()
for tile, val in pc_dict.items():
if target_pc == val:
continue
#print(f"Try to reconcile tile {tile} {val}")
col, row = tile
for _ in range(num_pipeline_stages):
self.single_step_core(col, row)
newpc = self.read_core_pc_tile(col, row)
delta = newpc - target_pc
if target_pc == newpc or max_pc_tolerance > delta > 0 :
break
# if core pc is slightly ahead, we should be okay
# but if not, execution can run into trouble later
if target_pc > self.read_core_pc_tile(col, row):
return False
#print("Successfully reconciled")
return True
18 changes: 9 additions & 9 deletions src/mldebug/batch_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@

from mldebug.utils import LOGGER, cleanup_and_exit, timeit

# 16 byte pm, we assume 2 clock cycle delay
COMBO_EVENT_MAX_DELAY_CYCLES = 32


class BatchRunner:
"""
Expand Down Expand Up @@ -265,9 +262,7 @@ def schedule_layer_start(self, next_layer):
pcs = self.impls[sid].read_core_pc(True)

# combo event trigger has one cycle delay
is_correct_pc = all(stamp.start_pc == pc for pc in pcs)
if not is_correct_pc and pml:
is_correct_pc = all(pc - stamp.start_pc < COMBO_EVENT_MAX_DELAY_CYCLES for pc in pcs)
is_correct_pc = utl.pcs_match_target(pcs, stamp.start_pc, allow_combo_delay=pml)

if is_correct_pc:
self._process_start_breakpoint(next_layer, 1, sid=sid)
Expand Down Expand Up @@ -400,21 +395,26 @@ def _run_stamp(self, layer, sid, target_itr, cur_it=1):
True on success, False on error.
"""
stamp = layer.stamps[sid]
utl = self.aie_utls[sid]

skip_end_pc = not (self.args.run_flags.l1_ofm_dump and stamp.end_pc)
if not target_itr:
target_itr = layer.lcp.num_iter

if self.args.run_flags.skip_iter:
self.state.error = not self.aie_utls[sid].skip_iterations(target_itr - cur_it, sid)
self.state.error = not utl.skip_iterations(target_itr - cur_it, sid)
elif self.args.run_flags.skip_iter2:
self.state.error = not utl.skip_iterations_to_lock_acq(
self.design_info.work_dir.post_layer_lock_acq_pcs[sid], target_itr - cur_it, sid)
else:
while cur_it < target_itr:
self.hit_next_breakpoint(sid)
all_pc = self.impls[sid].read_core_pc(True)
if all(stamp.start_pc == pc for pc in all_pc):
if utl.pcs_match_target(all_pc, stamp.start_pc):
if cur_it % layer.lcp.depth_iter != 0 or skip_end_pc:
cur_it += 1
self._process_start_breakpoint(layer, cur_it, sid=sid)
elif all(stamp.end_pc == pc for pc in all_pc):
elif utl.pcs_match_target(all_pc, stamp.end_pc):
cur_it += 1
self._process_end_breakpoint(layer, cur_it, sid)
else:
Expand Down
2 changes: 2 additions & 0 deletions src/mldebug/input_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class RunFlags:
l2_ifm_dump: bool
text_dump: bool
skip_iter: bool
skip_iter2: bool
# Test Flags
mock_hang: bool
dump_temps: bool
Expand Down Expand Up @@ -121,6 +122,7 @@ def get_flag(s, default=False):
get_flag("l2_ifm_dump"),
get_flag("text_dump"),
get_flag("skip_iter"),
get_flag("skip_iter2"),
get_flag("mock_hang"),
get_flag("dump_temps"),
get_flag("multistamp"),
Expand Down
4 changes: 3 additions & 1 deletion src/mldebug/mldebug_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,8 @@ def app():
"skip_iter",
"dump_temps",
"multistamp",
"disable_tg"
"disable_tg",
"skip_iter2"
],
help="Specify one or more runtime flags:\n"
"skip_dump : Do not dump memory\n"
Expand All @@ -342,6 +343,7 @@ def app():
"l1_ofm_dump : Dump L1 ofm buffers in addition to others\n"
"text_dump : Dump in text format\n"
"skip_iter : Skip iterations in batch mode when possible\n"
"skip_iter2 : skip_iter using lcp lock.(Telluride only)\n"
#"dump_temps : Write intermediate (.lst) files to disk\n"
"multistamp : Enable N Stamp/Batch mode\n",
#"disable_tg : Disable Step to TG layers\n",
Expand Down
20 changes: 20 additions & 0 deletions src/mldebug/work_dir.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,23 @@ def __init__(self, aie_dir, peano, overlay, dump_lst=False):
self.peano = peano
self.aie_dir = aie_dir
self.dump_lst = dump_lst
# Lock acquire instruction PC after layer execution
# This pc can be used for skip_iter
self.post_layer_lock_acq_pcs = [0] * num_stamps

self._stamp_lst_map = {}
for sid in range(num_stamps):
self._stamp_lst_map[sid] = []

self._initialize_functions(aie_dir, overlay)

def _check_for_lock_acq(self, line, sid, llvm):
"""
find lock acq in base lst
"""
if "acq" in line.lower():
self.post_layer_lock_acq_pcs[sid] = self._get_pc(line, llvm)

def _demangle(self, fstring):
"""
Demangle a C++ mangled function name using c++filt.
Expand Down Expand Up @@ -315,6 +325,8 @@ def _parse_lst_chess(self, elf, stampid):
if not Path(lst_file).is_file():
return False

is_base = "reloadable" not in elf_name

self.aie_functions[stampid][elf_name] = []
with open(lst_file, encoding="utf-8") as fd:
lines = fd.read().split("\n")
Expand All @@ -339,6 +351,9 @@ def _parse_lst_chess(self, elf, stampid):
while i < count:
line = lines[i]
pc_val = self._get_pc(line)
if demangled == "_main" and is_base:
# Find LCP Lock Acquire (Last lock acquire in base lst)
self._check_for_lock_acq(lines[i], stampid, False)
if pc_val:
last_valid_pc = pc_val
if "REL" in line and self._breakpoint_allowed(lines, i):
Expand Down Expand Up @@ -511,6 +526,8 @@ def _parse_lst_llvm(self, elf, stampid):
self._stamp_lst_map[stampid].append((elf_name, self._get_lst(elf_path, elf_name)))
lines = data.split("\n")

is_base = "reloadable" not in elf_name

self.aie_functions[stampid][elf_name] = []
flist = self.aie_functions[stampid][elf_name]
in_func = None
Expand Down Expand Up @@ -545,6 +562,9 @@ def _parse_lst_llvm(self, elf, stampid):
if not in_func:
continue
in_func.final_lock_release_pc = self._get_pc(line, llvm=True)
# lock acq
elif is_base and in_func and in_func.name == "main":
self._check_for_lock_acq(line, stampid, True)

def find_functions_by_pc(self, pc):
"""
Expand Down
Loading