From 54f3fc2ca3819e5f0ab094998da003612c561a9c Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 16:56:53 +0800 Subject: [PATCH 01/10] test: extend ui-smoke with g-code execution and endpoint check Each per-GUI test now also drives estop reset, machine on, home all, mode auto, program_open + auto(RUN) on a tiny shared smoke.ngc, waits for sustained INTERP_IDLE, and asserts stat.position delta against --expect-delta-mm 1,1,0 converted via stat.linear_units so the same arg works on inch (axis, touchy) and mm (gmoccapy, qtdragon) sims. State/mode commands use ensure_state/ensure_mode helpers with a retry-and-stability pattern: gmoccapy and qtdragon re-issue their own mode commands during startup and can revert task_mode AUTO -> MANUAL right after we set it. The helpers wait for the desired state, then re-check after STATE_STABILITY_S; on revert they retry up to STATE_RETRY_BUDGET times. Intermediate timeouts use a quiet variant so spurious UI_SMOKE_FAIL lines do not pollute the log during retries (checkresult.sh greps for ^UI_SMOKE_FAIL on any line). smoke.ngc is G21 G91 G0 X1 Y1 G90 M2 - relative move in mm, sim- agnostic. The driver snapshots stat.position[:3] after homing and checks (final - start) against the converted delta, sidestepping each sim's HOME offset. Adds python3-zmq and python3-opencv to debian/control.top.in under !nocheck: qtdragon's hal_bridge and the camview widget segfault on startup without them, which is invisible to the connect-only Phase 1 smoke but breaks the run-program path before the program can start. 5 consecutive local runs all green at 2m43s wall each. --- debian/control.top.in | 2 + tests/ui-smoke/_lib/drive.py | 293 +++++++++++++++++++++++++++++++- tests/ui-smoke/_lib/launch.sh | 15 +- tests/ui-smoke/_lib/run-gui.sh | 8 +- tests/ui-smoke/_lib/smoke.ngc | 13 ++ tests/ui-smoke/axis/test.sh | 4 +- tests/ui-smoke/gmoccapy/test.sh | 4 +- tests/ui-smoke/qtdragon/test.sh | 4 +- tests/ui-smoke/touchy/test.sh | 4 +- 9 files changed, 330 insertions(+), 17 deletions(-) create mode 100644 tests/ui-smoke/_lib/smoke.ngc diff --git a/debian/control.top.in b/debian/control.top.in index 0adf9121103..0403315cb51 100644 --- a/debian/control.top.in +++ b/debian/control.top.in @@ -57,6 +57,8 @@ Build-Depends: python3-dbus , python3-dbus.mainloop.pyqt5 , python3-qtpy , + python3-zmq , + python3-opencv , python3-cairo , python3-gi , python3-gi-cairo , diff --git a/tests/ui-smoke/_lib/drive.py b/tests/ui-smoke/_lib/drive.py index 1ee90a42234..06becabff45 100755 --- a/tests/ui-smoke/_lib/drive.py +++ b/tests/ui-smoke/_lib/drive.py @@ -1,15 +1,36 @@ #!/usr/bin/env python3 -# Minimal UI smoke driver: confirm linuxcnc task came up and the GUI -# did not crash. The smoke layer answers Bertho's "does it start" -# question only; functional behaviour (home, run a file, verify -# position) belongs in tests/ui-functional/ (Phase 2). +# UI smoke driver. +# +# Default mode (Phase 1): confirm linuxcnc task came up and the GUI did +# not crash. The driver only proves the GUI started and NML is reachable. +# +# --run-program mode (Phase 2): also estop-reset, machine-on, home, +# program_open + auto(RUN), wait for sustained INTERP_IDLE, and assert +# (stat.position_after - stat.position_after_home) equals --expect-delta-mm +# converted to machine units via stat.linear_units. Snapshot-and-delta +# sidesteps per-sim HOME offsets; mm-input + linear_units conversion +# sidesteps per-sim LINEAR_UNITS (axis and touchy sims are inch). +import argparse import linuxcnc import sys import time CONNECT_TIMEOUT_S = 60.0 SETTLE_S = 3.0 +SETTLE_POLLS = 5 +POLL_INTERVAL_S = 0.01 +# Per-attempt wait timeout for ensure_state / ensure_mode. The state +# normally lands well under 1s; profiling showed nothing benefits from +# more than 3s here, and shorter timeouts trim wall time when a retry +# is needed (notably gmoccapy reverting task_mode AUTO -> MANUAL). +ENSURE_ATTEMPT_TIMEOUT_S = 3.0 +# After the desired task_state / task_mode is reached, re-check after +# this long. Some GUIs (notably gmoccapy and qtdragon) run their own +# startup commands that can revert a state we just set; the post-reach +# stability check catches that. +STATE_STABILITY_S = 0.5 +STATE_RETRY_BUDGET = 6 def connect_and_wait_ready(timeout): @@ -17,7 +38,12 @@ def connect_and_wait_ready(timeout): reports a non-negative echo_serial_number. The NML status buffer can be 'invalid err=3' for the first ~30s while linuxcncsvr is still initialising; recreate the stat object on every iteration so - a stale invalid buffer does not stick after linuxcncsvr is ready.""" + a stale invalid buffer does not stick after linuxcncsvr is ready. + + Catch the full Exception hierarchy: in early startup stat.poll() + can raise SystemError ('error return without exception set') when + the underlying C function reports failure without setting a Python + exception. Treat that the same as linuxcnc.error and retry.""" deadline = time.monotonic() + timeout last_err = None while time.monotonic() < deadline: @@ -26,7 +52,7 @@ def connect_and_wait_ready(timeout): stat.poll() if stat.echo_serial_number >= 0: return linuxcnc.command(), stat - except linuxcnc.error as e: + except Exception as e: last_err = e time.sleep(0.5) sys.stderr.write( @@ -35,7 +61,256 @@ def connect_and_wait_ready(timeout): return None, None +def wait_until_quiet(stat, predicate, timeout): + """Poll stat until predicate(stat) is true. Returns True on success, + False on timeout. Never writes UI_SMOKE_FAIL: caller decides whether + a timeout here is fatal (and writes its own UI_SMOKE_FAIL line) or + is part of a retry that may still succeed. checkresult.sh greps for + any '^UI_SMOKE_FAIL' line, so spurious emissions during retries + must not happen.""" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + stat.poll() + if predicate(stat): + return True + time.sleep(POLL_INTERVAL_S) + return False + + +def wait_until(stat, predicate, timeout, label): + """Like wait_until_quiet but emits UI_SMOKE_FAIL on timeout. Use + only when timeout is fatal at the call site (no retry above).""" + if wait_until_quiet(stat, predicate, timeout): + return True + sys.stderr.write(f"UI_SMOKE_FAIL: timeout waiting for {label} after {timeout}s\n") + return False + + +def home_all(cmd, stat, timeout): + """Home every joint. Uses c.home(-1) which respects HOME_SEQUENCE + if configured. Caller must have already ensured task_state is ON + via ensure_state; otherwise the home command is rejected with + 'cannot be executed until the machine is out of E-stop and turned + on'. Mode change uses ensure_mode so a GUI that reverts mode mid- + sequence (gmoccapy) is detected and retried.""" + if not ensure_mode(cmd, stat, linuxcnc.MODE_MANUAL, "MODE_MANUAL"): + return False + cmd.teleop_enable(0) + cmd.wait_complete() + stat.poll() + njoints = stat.joints + cmd.home(-1) + if not wait_until( + stat, + lambda s: all(s.homed[i] for i in range(njoints)), + timeout, "all joints homed"): + return False + cmd.teleop_enable(1) + cmd.wait_complete() + return True + + +def wait_state(stat, target_state, timeout, label): + """Poll until stat.task_state == target_state. wait_complete on a + state-change command only proves task ack'd the NML message, not + that the underlying state machine has transitioned. Polling + task_state is the only deterministic signal.""" + return wait_until( + stat, + lambda s: s.task_state == target_state, + timeout, label) + + +def ensure_state(cmd, stat, target_state, label): + """Issue c.state(target_state), wait for stat.task_state to reach + target_state, then verify it stays there across STATE_STABILITY_S. + If the GUI reverts (e.g. gmoccapy re-issues its own ESTOP on + startup), retry up to STATE_RETRY_BUDGET times. Returns True on + stable success, False on exhausted budget.""" + for attempt in range(1, STATE_RETRY_BUDGET + 1): + cmd.state(target_state) + cmd.wait_complete() + if not wait_until_quiet( + stat, lambda s: s.task_state == target_state, + ENSURE_ATTEMPT_TIMEOUT_S): + sys.stderr.write( + f"WARN: {label} not reached on attempt {attempt}, retrying\n") + continue + time.sleep(STATE_STABILITY_S) + stat.poll() + if stat.task_state == target_state: + return True + sys.stderr.write( + f"WARN: {label} reverted to task_state={stat.task_state} " + f"after attempt {attempt}, retrying\n") + sys.stderr.write( + f"UI_SMOKE_FAIL: {label} did not hold stable across " + f"{STATE_RETRY_BUDGET} attempts\n") + return False + + +def ensure_mode(cmd, stat, target_mode, label): + """Same retry+stability pattern as ensure_state, for task_mode.""" + for attempt in range(1, STATE_RETRY_BUDGET + 1): + cmd.mode(target_mode) + cmd.wait_complete() + if not wait_until_quiet( + stat, lambda s: s.task_mode == target_mode, + ENSURE_ATTEMPT_TIMEOUT_S): + sys.stderr.write( + f"WARN: {label} not reached on attempt {attempt}, retrying\n") + continue + time.sleep(STATE_STABILITY_S) + stat.poll() + if stat.task_mode == target_mode: + return True + sys.stderr.write( + f"WARN: {label} reverted to task_mode={stat.task_mode} " + f"after attempt {attempt}, retrying\n") + sys.stderr.write( + f"UI_SMOKE_FAIL: {label} did not hold stable across " + f"{STATE_RETRY_BUDGET} attempts\n") + return False + + +PROGRAM_START_TIMEOUT_S = 5.0 + + +def snapshot(stat): + """Best-effort one-line summary of state fields relevant to Phase 2 + debugging. Caller is expected to have just polled.""" + return ( + f"task_state={stat.task_state} task_mode={stat.task_mode} " + f"interp_state={stat.interp_state} exec_state={stat.exec_state} " + f"motion_type={stat.motion_type} queue={stat.queue} " + f"queued_mdi_commands={stat.queued_mdi_commands} " + f"file={stat.file!r}") + + +def wait_program_started(stat, timeout): + """Wait until interp_state leaves INTERP_IDLE, i.e. the program + has actually begun executing. Without this guard, a short program + can finish before wait_program_idle gets its first poll, and the + settle-window then mistakes the pre-start IDLE for the post-end + IDLE; we then read stat.position at (0,0,0).""" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + stat.poll() + if stat.interp_state != linuxcnc.INTERP_IDLE: + return True + time.sleep(POLL_INTERVAL_S) + stat.poll() + sys.stderr.write( + f"UI_SMOKE_FAIL: program did not start within {timeout}s " + f"(interp_state stayed INTERP_IDLE) state: {snapshot(stat)}\n") + return False + + +def wait_program_idle(stat, timeout): + """Wait until interp_state returns to INTERP_IDLE and the motion + queue is drained for SETTLE_POLLS consecutive polls. Caller must + have already proven the program started via wait_program_started; + otherwise this returns immediately on the pre-start IDLE.""" + deadline = time.monotonic() + timeout + consecutive = 0 + while time.monotonic() < deadline: + stat.poll() + idle = ( + stat.interp_state == linuxcnc.INTERP_IDLE + and stat.queue == 0 + ) + if idle: + consecutive += 1 + if consecutive >= SETTLE_POLLS: + return True + else: + consecutive = 0 + time.sleep(POLL_INTERVAL_S) + sys.stderr.write(f"UI_SMOKE_FAIL: program did not reach idle within {timeout}s\n") + return False + + +def run_program(cmd, stat, ngc_path, expect_delta_mm, tol, run_timeout): + """Estop reset, machine on, home, snapshot position, load + run ngc, + verify (final - start) delta matches expect_delta_mm converted to + machine units.""" + if not ensure_state(cmd, stat, linuxcnc.STATE_ESTOP_RESET, + "STATE_ESTOP_RESET"): + return False + if not ensure_state(cmd, stat, linuxcnc.STATE_ON, "STATE_ON"): + return False + + if not home_all(cmd, stat, timeout=60.0): + return False + + if not ensure_mode(cmd, stat, linuxcnc.MODE_AUTO, "MODE_AUTO"): + return False + + # Snapshot start position AFTER homing + AFTER mode transition. The + # GUI might re-issue mode commands during its own startup; doing the + # snapshot last means we record the position right before AUTO_RUN. + stat.poll() + start_pos = stat.position[:3] + + cmd.program_open(ngc_path) + cmd.wait_complete() + # No wait_complete after auto(AUTO_RUN, 0): wait_complete blocks + # until the operation finishes, which for AUTO_RUN means the whole + # program completes. That would race wait_program_started; by the + # time we polled, interp would already be back at INTERP_IDLE. + cmd.auto(linuxcnc.AUTO_RUN, 0) + + if not wait_program_started(stat, PROGRAM_START_TIMEOUT_S): + return False + if not wait_program_idle(stat, run_timeout): + return False + + # stat.linear_units: machine units per mm. mm machine -> 1.0; + # inch machine -> 1/25.4 = 0.03937. Multiplying the expected mm + # delta by linear_units gives the expected delta in machine units, + # which is what stat.position reports. + units_per_mm = stat.linear_units + expect_machine = [d * units_per_mm for d in expect_delta_mm] + final_pos = stat.position[:3] + actual_delta = [final_pos[i] - start_pos[i] for i in range(3)] + err = [abs(actual_delta[i] - expect_machine[i]) for i in range(3)] + if any(e > tol for e in err): + sys.stderr.write( + f"UI_SMOKE_FAIL: delta mismatch " + f"expect_mm={expect_delta_mm} units_per_mm={units_per_mm} " + f"expect_machine={expect_machine} " + f"start={start_pos} final={final_pos} " + f"actual_delta={actual_delta} err={err} tol={tol}\n") + return False + return True + + +def parse_xyz(s): + parts = [float(p) for p in s.split(",")] + if len(parts) != 3: + raise argparse.ArgumentTypeError("expected x,y,z (three comma-separated floats)") + return parts + + def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--run-program", metavar="NGC", + help="g-code file to load and run (enables Phase 2 mode)") + ap.add_argument("--expect-delta-mm", type=parse_xyz, metavar="DX,DY,DZ", + help="expected XYZ delta in mm from post-home position " + "(required with --run-program). Driver converts to " + "machine units via stat.linear_units so the same " + "value works on inch and mm sims.") + ap.add_argument("--tol", type=float, default=1e-4, + help="position tolerance per axis in machine units " + "(default: 1e-4)") + ap.add_argument("--run-timeout", type=float, default=60.0, + help="program-completion timeout in seconds (default: 60)") + args = ap.parse_args() + + if args.run_program and args.expect_delta_mm is None: + ap.error("--run-program requires --expect-delta-mm DX,DY,DZ") + cmd, stat = connect_and_wait_ready(CONNECT_TIMEOUT_S) if cmd is None: return 1 @@ -54,6 +329,12 @@ def main(): sys.stderr.write(f"UI_SMOKE_FAIL: task disappeared after GUI startup: {e}\n") return 1 + if args.run_program: + if not run_program(cmd, stat, + args.run_program, args.expect_delta_mm, + args.tol, args.run_timeout): + return 1 + print("UI_SMOKE_OK") return 0 diff --git a/tests/ui-smoke/_lib/launch.sh b/tests/ui-smoke/_lib/launch.sh index 5f3672e786c..1db06423dd1 100755 --- a/tests/ui-smoke/_lib/launch.sh +++ b/tests/ui-smoke/_lib/launch.sh @@ -16,6 +16,8 @@ set -u CONFIG_INI="$1" +shift +DRIVER_ARGS=("$@") TEST_DIR="${TEST_DIR:-$(cd "$(dirname "$0")" && pwd)}" LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -28,9 +30,10 @@ rm -f ui-smoke.out ui-smoke.err linuxcnc.pid bash "$LIB_DIR/cleanup-runtime.sh" # Launch linuxcnc inside xvfb-run. The outer timeout is a safety net -# so a wedged GUI cannot hang CI. -LINUXCNC_TIMEOUT=240 -DRIVER_TIMEOUT=90 +# so a wedged GUI cannot hang CI. Driver timeout covers connect (60s) +# + GUI settle (3s) + optional Phase 2 run (estop/home/program ~90s). +LINUXCNC_TIMEOUT=300 +DRIVER_TIMEOUT=180 # Force software OpenGL (Mesa llvmpipe). CI runners have no GPU and # Qt/GL widgets segfault under hardware GL with no display. The Qt- @@ -71,7 +74,9 @@ xvfb-run -a --server-args="-screen 0 1024x768x24" \ # The driver polls NML readiness itself (BsAtHome review: # avoid real-clock waits where status polling will do). - timeout "$DRIVER_TIMEOUT" python3 "$LIB_DIR/drive.py" >ui-smoke.out 2>ui-smoke.err + # Driver args (Phase 2: --run-program/--expect-pos) come through + # as positional $@ from the inner bash -c. + timeout "$DRIVER_TIMEOUT" python3 "$LIB_DIR/drive.py" "$@" >ui-smoke.out 2>ui-smoke.err DRIVE_RC=$? # Clean shutdown: GUI-specific quit first (lets linuxcnc end @@ -97,7 +102,7 @@ xvfb-run -a --server-args="-screen 0 1024x768x24" \ fi exit "$DRIVE_RC" - ' + ' _launch "${DRIVER_ARGS[@]}" RC=$? # Surface logs so checkresult and CI artifact upload can see them. diff --git a/tests/ui-smoke/_lib/run-gui.sh b/tests/ui-smoke/_lib/run-gui.sh index 01840944bc0..9f4f596afe0 100755 --- a/tests/ui-smoke/_lib/run-gui.sh +++ b/tests/ui-smoke/_lib/run-gui.sh @@ -1,9 +1,10 @@ #!/bin/bash # Dispatcher invoked from each per-GUI test.sh. Resolves an INI path # under configs/sim/ and execs launch.sh in the caller's test dir. -# Usage: run-gui.sh +# Usage: run-gui.sh [driver-args...] # e.g. run-gui.sh axis/axis.ini # run-gui.sh qtdragon/qtdragon_xyz/qtdragon_metric.ini +# run-gui.sh axis/axis.ini --run-program /abs/smoke.ngc --expect-pos 10,10,5 set -u @@ -11,5 +12,8 @@ LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TEST_DIR="${TEST_DIR:-$(cd "$(dirname "$0")" && pwd)}" CONFIGS_DIR="$(cd "$LIB_DIR/../../../configs/sim" && pwd)" +INI_REL="$1" +shift + export TEST_DIR -exec "$LIB_DIR/launch.sh" "$CONFIGS_DIR/$1" +exec "$LIB_DIR/launch.sh" "$CONFIGS_DIR/$INI_REL" "$@" diff --git a/tests/ui-smoke/_lib/smoke.ngc b/tests/ui-smoke/_lib/smoke.ngc new file mode 100644 index 00000000000..036bff7bd09 --- /dev/null +++ b/tests/ui-smoke/_lib/smoke.ngc @@ -0,0 +1,13 @@ +(Phase 2 UI smoke program. Force mm input units with G21 so the move) +(commanded here is the same physical distance regardless of the sim) +(config's LINEAR_UNITS. Use G91 relative so the move is independent of) +(each sim's HOME position, then return to G90 absolute and end with M2.) +(stat.position is still reported in the machine's LINEAR_UNITS; the) +(driver converts the expected mm delta to machine units via) +(stat.linear_units before comparing. Note: axis and touchy sims are) +(inch machines, so a 1mm move shows as ~0.03937 in stat.position.) +G21 +G91 +G0 X1 Y1 +G90 +M2 diff --git a/tests/ui-smoke/axis/test.sh b/tests/ui-smoke/axis/test.sh index efa45dd9590..ba4fddfc6d0 100755 --- a/tests/ui-smoke/axis/test.sh +++ b/tests/ui-smoke/axis/test.sh @@ -1,2 +1,4 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" axis/axis.ini +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +exec "$LIB_DIR/run-gui.sh" axis/axis.ini \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 diff --git a/tests/ui-smoke/gmoccapy/test.sh b/tests/ui-smoke/gmoccapy/test.sh index 29adc2b9397..de93beaed99 100755 --- a/tests/ui-smoke/gmoccapy/test.sh +++ b/tests/ui-smoke/gmoccapy/test.sh @@ -1,2 +1,4 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" gmoccapy/gmoccapy.ini +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +exec "$LIB_DIR/run-gui.sh" gmoccapy/gmoccapy.ini \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index 7df11989920..a6a72e01c3b 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -1,2 +1,4 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" qtdragon/qtdragon_xyz/qtdragon_metric.ini +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +exec "$LIB_DIR/run-gui.sh" qtdragon/qtdragon_xyz/qtdragon_metric.ini \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 diff --git a/tests/ui-smoke/touchy/test.sh b/tests/ui-smoke/touchy/test.sh index 4b9c904d700..831fe81b346 100755 --- a/tests/ui-smoke/touchy/test.sh +++ b/tests/ui-smoke/touchy/test.sh @@ -1,2 +1,4 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" touchy/touchy.ini +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +exec "$LIB_DIR/run-gui.sh" touchy/touchy.ini \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 From 20fbc52ae38e18a680bf28a64bf75104561ece52 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 17:44:06 +0800 Subject: [PATCH 02/10] test: qtdragon ui-smoke runs against writable config mirror CI run hit a PermissionError in qtvcp's logger when it tried to open configs/sim/qtdragon/qtdragon_xyz/qtdragon.log for write: the GitHub Actions workspace is mounted read-only for the docker build user, and qtvcp resolves LOG_FILE = qtdragon.log into the config dir. hal_bridge then exits, linuxcnc tears down, and the driver retries ESTOP_RESET until the budget is exhausted. qtdragon test.sh now mirrors the qtdragon_xyz config dir to a mktemp directory, seds LOG_FILE to ~/qtdragon.log, and passes the absolute INI path to run-gui.sh. run-gui.sh treats any path starting with / as absolute; everything else still resolves under configs/sim. Trap cleans the tmp dir on exit so the working tree stays clean. Does not touch the shipped qtdragon config to avoid changing default behaviour for real users. The same fix would work for any other config that turns out to write into its own dir on CI. --- tests/ui-smoke/_lib/run-gui.sh | 13 +++++++++++-- tests/ui-smoke/qtdragon/test.sh | 19 ++++++++++++++++++- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/tests/ui-smoke/_lib/run-gui.sh b/tests/ui-smoke/_lib/run-gui.sh index 9f4f596afe0..980dff7f51b 100755 --- a/tests/ui-smoke/_lib/run-gui.sh +++ b/tests/ui-smoke/_lib/run-gui.sh @@ -12,8 +12,17 @@ LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TEST_DIR="${TEST_DIR:-$(cd "$(dirname "$0")" && pwd)}" CONFIGS_DIR="$(cd "$LIB_DIR/../../../configs/sim" && pwd)" -INI_REL="$1" +INI_ARG="$1" shift +# Accept either a relative path under configs/sim/ or an absolute path. +# Absolute paths are used by tests that need to point at a writable +# mirror of a shipped config (qtdragon writes a log file inside the +# config dir, which is read-only on CI). +case "$INI_ARG" in + /*) INI_PATH="$INI_ARG" ;; + *) INI_PATH="$CONFIGS_DIR/$INI_ARG" ;; +esac + export TEST_DIR -exec "$LIB_DIR/launch.sh" "$CONFIGS_DIR/$INI_REL" "$@" +exec "$LIB_DIR/launch.sh" "$INI_PATH" "$@" diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index a6a72e01c3b..d2cedd8d5c0 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -1,4 +1,21 @@ #!/bin/bash +# qtdragon's qtvcp logger writes its log file (path from INI [DISPLAY] +# LOG_FILE) into the config directory. CI mounts the workspace read- +# only for the runtime user, so a relative LOG_FILE like 'qtdragon.log' +# resolves to a path qtvcp cannot create, hal_bridge then exits, and +# linuxcnc tears down before our driver can do anything. Mirror the +# config dir to a writable tmp location and patch LOG_FILE to be +# rooted at $HOME so the log lands in a directory we can write to. +set -u + LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" -exec "$LIB_DIR/run-gui.sh" qtdragon/qtdragon_xyz/qtdragon_metric.ini \ +SRC_DIR="$(cd "$LIB_DIR/../../../configs/sim/qtdragon/qtdragon_xyz" && pwd)" + +WORK_DIR="$(mktemp -d -t ui-smoke-qtdragon.XXXXXX)" +trap 'rm -rf "$WORK_DIR"' EXIT +cp -r "$SRC_DIR/." "$WORK_DIR/" +sed -i 's|^LOG_FILE = qtdragon\.log$|LOG_FILE = ~/qtdragon.log|' \ + "$WORK_DIR/qtdragon_metric.ini" + +exec "$LIB_DIR/run-gui.sh" "$WORK_DIR/qtdragon_metric.ini" \ --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 From cc1476b65f3bf738ea6bff5333e99de68359f30e Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 18:18:57 +0800 Subject: [PATCH 03/10] test: drop python3-opencv dep and set QT_XCB_GL_INTEGRATION=xcb_egl Ubuntu 24.04 rip-and-test runs hit a qtvcp segfault after the log- permission fix let qtvcp get further than Phase 1 had. Debian package-arch passes the same code. Two known asymmetries match: - python3-opencv on Ubuntu pulls Qt5 GUI bits whose cv2/qt/plugins directory overrides the system PyQt5 platform plugin path under xvfb (opencv-python issue #572, Qt Forum 119109). qtvcp's camview_widget tolerates ImportError on cv2 and just logs a warning, so dropping the dep restores the harmless fallback path Phase 1 was already exercising. - xcb_glx is the historical fragile integration under xvfb (Launchpad #1761708, QTBUG-67537); xcb_egl is what software-GL stacks expect anyway. Set as defense in depth. Local 4/4 still green with both changes. --- debian/control.top.in | 1 - tests/ui-smoke/_lib/launch.sh | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/debian/control.top.in b/debian/control.top.in index 0403315cb51..596063ea377 100644 --- a/debian/control.top.in +++ b/debian/control.top.in @@ -58,7 +58,6 @@ Build-Depends: python3-dbus.mainloop.pyqt5 , python3-qtpy , python3-zmq , - python3-opencv , python3-cairo , python3-gi , python3-gi-cairo , diff --git a/tests/ui-smoke/_lib/launch.sh b/tests/ui-smoke/_lib/launch.sh index 1db06423dd1..2686c13fa37 100755 --- a/tests/ui-smoke/_lib/launch.sh +++ b/tests/ui-smoke/_lib/launch.sh @@ -43,6 +43,10 @@ export GALLIUM_DRIVER=llvmpipe export QT_QUICK_BACKEND=software export QSG_RHI_BACKEND=software export QT_OPENGL=software +# Dodge a long-known xcb_glx integration crash that hits QtWebEngine +# and related Qt5 widgets under xvfb (Launchpad #1761708, QTBUG-67537). +# Forces the egl path which is what software-GL stacks expect anyway. +export QT_XCB_GL_INTEGRATION=xcb_egl # Silence audio: xvfb covers X but not sound. Demote every Gst # Audio/Sink and disable canberra/SDL/pulse/ALSA-default paths. From ed2c5effd4c6b41be9ce79d68b937a73bfeb3793 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 18:45:02 +0800 Subject: [PATCH 04/10] test: force QT_QPA_PLATFORM=offscreen for qtdragon xvfb + xcb + xcb_egl was not enough for Ubuntu 24.04 rip-and-test: qtvcp still segfaults during widget construction even with opencv and qtwebengine paths quiet, and the same code passes on Debian package-arch. Offscreen renders entirely in memory and exercises a different Qt plugin entirely, dodging the xcb-stack instability. scripts/linuxcnc itself forces QT_QPA_PLATFORM=xcb unless LINUXCNC_OPENGL_PLATFORM is set to a non-glx value, so pin both. Only qtdragon needs this; axis (Tk), touchy and gmoccapy (GTK) are unaffected. Trade-off: no Phase 3 screenshot from qtdragon under this config; Phase 3 would need an opt-out for offscreen tests. --- tests/ui-smoke/qtdragon/test.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index d2cedd8d5c0..d2b9224f59c 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -6,6 +6,15 @@ # linuxcnc tears down before our driver can do anything. Mirror the # config dir to a writable tmp location and patch LOG_FILE to be # rooted at $HOME so the log lands in a directory we can write to. +# +# Force the Qt offscreen platform plugin. qtvcp under xvfb + xcb on +# Ubuntu 24.04 segfaults during widget construction (no backtrace); +# Debian containers in the same CI matrix do not. Offscreen renders +# entirely in memory, no X server needed (xvfb-run still wraps the +# call so the rest of scripts/linuxcnc's X-display assumptions hold). +# scripts/linuxcnc itself forces QT_QPA_PLATFORM=xcb unless +# LINUXCNC_OPENGL_PLATFORM is set to something other than glx, so we +# pin both env vars. set -u LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" @@ -17,5 +26,8 @@ cp -r "$SRC_DIR/." "$WORK_DIR/" sed -i 's|^LOG_FILE = qtdragon\.log$|LOG_FILE = ~/qtdragon.log|' \ "$WORK_DIR/qtdragon_metric.ini" +export LINUXCNC_OPENGL_PLATFORM=offscreen +export QT_QPA_PLATFORM=offscreen + exec "$LIB_DIR/run-gui.sh" "$WORK_DIR/qtdragon_metric.ini" \ --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 From 218b74e851ab6cc9f41d11c52663357ced4a0b93 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 20:04:14 +0800 Subject: [PATCH 05/10] test: disable QtWebEngine sandbox+GPU for qtdragon ui-smoke qtdragon embeds QWebEngineView. On rip-and-test (gcc) CI it racy-crashed during Chromium browser-process spawn under offscreen + xvfb, no GPU, no user namespaces. rip-and-test-clang got past it by luck. Force --no-sandbox --single-process --no-zygote --disable-gpu so the renderer runs in-process with software rendering. --- tests/ui-smoke/qtdragon/test.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index d2b9224f59c..faad7f33ab5 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -15,6 +15,12 @@ # scripts/linuxcnc itself forces QT_QPA_PLATFORM=xcb unless # LINUXCNC_OPENGL_PLATFORM is set to something other than glx, so we # pin both env vars. +# +# qtdragon embeds a QWebEngineView (Chromium). Under offscreen + xvfb +# with no GPU and no user namespaces in the CI runner sandbox, +# QtWebEngine racy-crashes during browser-process spawn. Disable the +# Chromium sandbox and force single-process + software rendering so +# the renderer runs in the same process as Qt with no GPU thread. set -u LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" @@ -28,6 +34,8 @@ sed -i 's|^LOG_FILE = qtdragon\.log$|LOG_FILE = ~/qtdragon.log|' \ export LINUXCNC_OPENGL_PLATFORM=offscreen export QT_QPA_PLATFORM=offscreen +export QTWEBENGINE_DISABLE_SANDBOX=1 +export QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox --disable-gpu --disable-software-rasterizer --single-process --no-zygote" exec "$LIB_DIR/run-gui.sh" "$WORK_DIR/qtdragon_metric.ini" \ --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 From 98a028804cc90d06064587d8d4cb8822e5c3ee22 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sun, 24 May 2026 20:55:35 +0800 Subject: [PATCH 06/10] test: block QtWebEngine import in qtdragon ui-smoke via meta_path shim QtWebEngine browser-process init segfaults inside the qtvcp process on Ubuntu 24.04 CI even with --no-sandbox --single-process --disable-gpu. The smoke test never touches the WebWidget, so block the qtpy.QtWebEngineWidgets import via a sitecustomize meta_path finder; WebWidget already has a fallback that swaps in a plain QWidget when that import fails. No Chromium spawn, no segfault. The previous chromium-flags attempt was retracted: 'Sandboxing disabled by user.' confirmed Chromium got the flags but still crashed during init, so we are not going to win that race. --- tests/ui-smoke/qtdragon/test.sh | 41 +++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index faad7f33ab5..f08c5a1491a 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -18,9 +18,14 @@ # # qtdragon embeds a QWebEngineView (Chromium). Under offscreen + xvfb # with no GPU and no user namespaces in the CI runner sandbox, -# QtWebEngine racy-crashes during browser-process spawn. Disable the -# Chromium sandbox and force single-process + software rendering so -# the renderer runs in the same process as Qt with no GPU thread. +# QtWebEngine browser-process init segfaults even with --no-sandbox +# --single-process --disable-gpu (Chromium logs "Sandboxing disabled +# by user." then crashes inside the same qtvcp PID). Rather than keep +# tuning Chromium flags for a widget the smoke test never touches, +# we shim qtpy.QtWebEngineWidgets to raise ImportError; web_widget.py +# already has a fallback path that swaps the QWebEngineView for a +# plain QWidget when the import fails (its "fail safe - mostly for +# designer" branch). No Chromium spawn = no crash. set -u LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" @@ -34,8 +39,34 @@ sed -i 's|^LOG_FILE = qtdragon\.log$|LOG_FILE = ~/qtdragon.log|' \ export LINUXCNC_OPENGL_PLATFORM=offscreen export QT_QPA_PLATFORM=offscreen -export QTWEBENGINE_DISABLE_SANDBOX=1 -export QTWEBENGINE_CHROMIUM_FLAGS="--no-sandbox --disable-gpu --disable-software-rasterizer --single-process --no-zygote" + +# sitecustomize.py is auto-imported by Python from any sys.path entry +# at interpreter startup. Drop a meta_path finder that blocks the +# qtpy.QtWebEngineWidgets import so WebWidget falls back to QWidget. +SHIM_DIR="$WORK_DIR/_pyshim" +mkdir -p "$SHIM_DIR" +cat >"$SHIM_DIR/sitecustomize.py" <<'PY' +import sys +from importlib.abc import MetaPathFinder, Loader +from importlib.util import spec_from_loader + +_BLOCK = {'qtpy.QtWebEngineWidgets', 'PyQt5.QtWebEngineWidgets'} + +class _BlockLoader(Loader): + def create_module(self, spec): + raise ImportError('QtWebEngineWidgets blocked for ui-smoke CI') + def exec_module(self, module): + pass + +class _BlockFinder(MetaPathFinder): + def find_spec(self, name, path, target=None): + if name in _BLOCK: + return spec_from_loader(name, _BlockLoader()) + return None + +sys.meta_path.insert(0, _BlockFinder()) +PY +export PYTHONPATH="$SHIM_DIR${PYTHONPATH:+:$PYTHONPATH}" exec "$LIB_DIR/run-gui.sh" "$WORK_DIR/qtdragon_metric.ini" \ --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 From 939cdd5064d1358249afd0a1d55f36a063c49b51 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Sat, 30 May 2026 14:41:11 +0800 Subject: [PATCH 07/10] tests: add ui-smoke quit-path coverage (SIGTERM) Adds a quit-path smoke test per GUI that boots the GUI, waits for the NML task to come up, sends SIGTERM to the GUI process alone, and asserts the GUI exits on its own within a short grace. This guards the clean-shutdown handlers: a GUI that absorbs SIGTERM and has to be SIGKILLed fails the test. The new _lib/quit-launch.sh shares the headless environment (software GL + audio silencing) with launch.sh by sourcing a new _lib/launch-env.sh rather than copying it, so the two launchers cannot drift apart. Results go through _lib/checkresult-quit.sh (pass on UI_SMOKE_QUIT_OK). The GUI process is identified by matching a python argv[0], so the linuxcnc launcher and xvfb-run wrappers that also carry the GUI name on their command line are not mistaken for it. Per-GUI dirs: touchy-quit, gmoccapy-quit, qtdragon-quit. The qtdragon quit test needs the same CI workarounds the qtdragon smoke test already carries (writable config mirror with a patched LOG_FILE, the offscreen Qt platform, and the QtWebEngine import shim). Those move out of qtdragon/test.sh into _lib/qtdragon-prepare.sh, sourced by both qtdragon test.sh files, so the quit test reuses them instead of leaving qtvcp to crash on startup. Requires the SIGTERM handlers in #4076 (gmoccapy), #4077 (touchy) and #4078 (qtvcp); without them the GUIs ignore SIGTERM and these tests fail by design. (cherry picked from commit aff5991dc3189b48a747c7c7e8962d4d16c5cbe3) --- tests/ui-smoke/_lib/checkresult-quit.sh | 25 +++++ tests/ui-smoke/_lib/launch-env.sh | 26 +++++ tests/ui-smoke/_lib/launch.sh | 23 +---- tests/ui-smoke/_lib/qtdragon-prepare.sh | 79 +++++++++++++++ tests/ui-smoke/_lib/quit-launch.sh | 120 +++++++++++++++++++++++ tests/ui-smoke/gmoccapy-quit/checkresult | 2 + tests/ui-smoke/gmoccapy-quit/skip | 2 + tests/ui-smoke/gmoccapy-quit/test.sh | 4 + tests/ui-smoke/qtdragon-quit/checkresult | 2 + tests/ui-smoke/qtdragon-quit/skip | 2 + tests/ui-smoke/qtdragon-quit/test.sh | 7 ++ tests/ui-smoke/qtdragon/test.sh | 68 +------------ tests/ui-smoke/touchy-quit/checkresult | 2 + tests/ui-smoke/touchy-quit/skip | 2 + tests/ui-smoke/touchy-quit/test.sh | 4 + 15 files changed, 282 insertions(+), 86 deletions(-) create mode 100755 tests/ui-smoke/_lib/checkresult-quit.sh create mode 100644 tests/ui-smoke/_lib/launch-env.sh create mode 100644 tests/ui-smoke/_lib/qtdragon-prepare.sh create mode 100755 tests/ui-smoke/_lib/quit-launch.sh create mode 100755 tests/ui-smoke/gmoccapy-quit/checkresult create mode 100755 tests/ui-smoke/gmoccapy-quit/skip create mode 100755 tests/ui-smoke/gmoccapy-quit/test.sh create mode 100755 tests/ui-smoke/qtdragon-quit/checkresult create mode 100755 tests/ui-smoke/qtdragon-quit/skip create mode 100755 tests/ui-smoke/qtdragon-quit/test.sh create mode 100755 tests/ui-smoke/touchy-quit/checkresult create mode 100755 tests/ui-smoke/touchy-quit/skip create mode 100755 tests/ui-smoke/touchy-quit/test.sh diff --git a/tests/ui-smoke/_lib/checkresult-quit.sh b/tests/ui-smoke/_lib/checkresult-quit.sh new file mode 100755 index 00000000000..c746e419269 --- /dev/null +++ b/tests/ui-smoke/_lib/checkresult-quit.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Shared result check for UI smoke quit-path tests. +# +# Pass if the quit launcher printed UI_SMOKE_QUIT_OK (the GUI exited on +# its own SIGTERM within the grace) and did not print UI_SMOKE_QUIT_FAIL. +set -u + +if [ $# -lt 1 ]; then + echo "FAIL: checkresult-quit requires the result-log path as argument" >&2 + exit 1 +fi + +LOG="$1" + +if grep -q '^UI_SMOKE_QUIT_FAIL' "$LOG"; then + echo "FAIL: $(grep -m1 '^UI_SMOKE_QUIT_FAIL' "$LOG")" >&2 + exit 1 +fi + +if ! grep -q '^UI_SMOKE_QUIT_OK' "$LOG"; then + echo "FAIL: GUI did not report a clean SIGTERM exit (no UI_SMOKE_QUIT_OK)" >&2 + exit 1 +fi + +exit 0 diff --git a/tests/ui-smoke/_lib/launch-env.sh b/tests/ui-smoke/_lib/launch-env.sh new file mode 100644 index 00000000000..909fff85842 --- /dev/null +++ b/tests/ui-smoke/_lib/launch-env.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Shared headless environment for the UI smoke launchers. Sourced by +# launch.sh and quit-launch.sh so the two stay in lockstep; a knob added +# here reaches both. The caller must set LIB_DIR before sourcing (it +# locates asound.conf). This file only exports; it runs no commands. + +# Force software OpenGL (Mesa llvmpipe). CI runners have no GPU and +# Qt/GL widgets segfault under hardware GL with no display. The Qt- +# specific knobs cover qtdragon's QtQuick + RHI paths. +export LIBGL_ALWAYS_SOFTWARE=1 +export GALLIUM_DRIVER=llvmpipe +export QT_QUICK_BACKEND=software +export QSG_RHI_BACKEND=software +export QT_OPENGL=software +# Dodge a long-known xcb_glx integration crash that hits QtWebEngine +# and related Qt5 widgets under xvfb (Launchpad #1761708, QTBUG-67537). +# Forces the egl path which is what software-GL stacks expect anyway. +export QT_XCB_GL_INTEGRATION=xcb_egl + +# Silence audio: xvfb covers X but not sound. Demote every Gst +# Audio/Sink and disable canberra/SDL/pulse/ALSA-default paths. +export ALSA_CONFIG_PATH="$LIB_DIR/asound.conf" +export CANBERRA_DRIVER=null +export GST_PLUGIN_FEATURE_RANK="pulsesink:NONE,alsasink:NONE,osssink:NONE,oss4sink:NONE,jackaudiosink:NONE,pipewiresink:NONE,openalsink:NONE" +export PULSE_SERVER=/dev/null +export SDL_AUDIODRIVER=dummy diff --git a/tests/ui-smoke/_lib/launch.sh b/tests/ui-smoke/_lib/launch.sh index 2686c13fa37..5bc90821a0d 100755 --- a/tests/ui-smoke/_lib/launch.sh +++ b/tests/ui-smoke/_lib/launch.sh @@ -35,26 +35,9 @@ bash "$LIB_DIR/cleanup-runtime.sh" LINUXCNC_TIMEOUT=300 DRIVER_TIMEOUT=180 -# Force software OpenGL (Mesa llvmpipe). CI runners have no GPU and -# Qt/GL widgets segfault under hardware GL with no display. The Qt- -# specific knobs cover qtdragon's QtQuick + RHI paths. -export LIBGL_ALWAYS_SOFTWARE=1 -export GALLIUM_DRIVER=llvmpipe -export QT_QUICK_BACKEND=software -export QSG_RHI_BACKEND=software -export QT_OPENGL=software -# Dodge a long-known xcb_glx integration crash that hits QtWebEngine -# and related Qt5 widgets under xvfb (Launchpad #1761708, QTBUG-67537). -# Forces the egl path which is what software-GL stacks expect anyway. -export QT_XCB_GL_INTEGRATION=xcb_egl - -# Silence audio: xvfb covers X but not sound. Demote every Gst -# Audio/Sink and disable canberra/SDL/pulse/ALSA-default paths. -export ALSA_CONFIG_PATH="$LIB_DIR/asound.conf" -export CANBERRA_DRIVER=null -export GST_PLUGIN_FEATURE_RANK="pulsesink:NONE,alsasink:NONE,osssink:NONE,oss4sink:NONE,jackaudiosink:NONE,pipewiresink:NONE,openalsink:NONE" -export PULSE_SERVER=/dev/null -export SDL_AUDIODRIVER=dummy +# Shared headless environment (software GL + audio silencing), kept in +# launch-env.sh so launch.sh and quit-launch.sh cannot drift apart. +. "$LIB_DIR/launch-env.sh" # Export the per-invocation values so the inner bash -c receives them # as proper env vars (avoids embedding paths into the inner script diff --git a/tests/ui-smoke/_lib/qtdragon-prepare.sh b/tests/ui-smoke/_lib/qtdragon-prepare.sh new file mode 100644 index 00000000000..19741978d73 --- /dev/null +++ b/tests/ui-smoke/_lib/qtdragon-prepare.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Sourced by the qtdragon ui-smoke tests (smoke and quit) to build a +# config qtvcp can actually run under CI. Sets QTDRAGON_INI to the +# patched ini path and exports the headless env; the caller then execs +# run-gui.sh or quit-launch.sh with "$QTDRAGON_INI". Must be sourced +# with LIB_DIR already set. +# +# qtdragon's qtvcp logger writes its log file (path from INI [DISPLAY] +# LOG_FILE) into the config directory. CI mounts the workspace read- +# only for the runtime user, so a relative LOG_FILE like 'qtdragon.log' +# resolves to a path qtvcp cannot create, hal_bridge then exits, and +# linuxcnc tears down before our driver can do anything. Mirror the +# config dir to a writable tmp location and patch LOG_FILE to be +# rooted at $HOME so the log lands in a directory we can write to. +# +# Force the Qt offscreen platform plugin. qtvcp under xvfb + xcb on +# Ubuntu 24.04 segfaults during widget construction (no backtrace); +# Debian containers in the same CI matrix do not. Offscreen renders +# entirely in memory, no X server needed (xvfb-run still wraps the +# call so the rest of scripts/linuxcnc's X-display assumptions hold). +# scripts/linuxcnc itself forces QT_QPA_PLATFORM=xcb unless +# LINUXCNC_OPENGL_PLATFORM is set to something other than glx, so we +# pin both env vars. +# +# qtdragon embeds a QWebEngineView (Chromium). Under offscreen + xvfb +# with no GPU and no user namespaces in the CI runner sandbox, +# QtWebEngine browser-process init segfaults even with --no-sandbox +# --single-process --disable-gpu (Chromium logs "Sandboxing disabled +# by user." then crashes inside the same qtvcp PID). Rather than keep +# tuning Chromium flags for a widget the smoke test never touches, +# we shim qtpy.QtWebEngineWidgets to raise ImportError; web_widget.py +# already has a fallback path that swaps the QWebEngineView for a +# plain QWidget when the import fails (its "fail safe - mostly for +# designer" branch). No Chromium spawn = no crash. + +: "${LIB_DIR:?qtdragon-prepare.sh must be sourced with LIB_DIR set}" + +SRC_DIR="$(cd "$LIB_DIR/../../../configs/sim/qtdragon/qtdragon_xyz" && pwd)" + +WORK_DIR="$(mktemp -d -t ui-smoke-qtdragon.XXXXXX)" +trap 'rm -rf "$WORK_DIR"' EXIT +cp -r "$SRC_DIR/." "$WORK_DIR/" +sed -i 's|^LOG_FILE = qtdragon\.log$|LOG_FILE = ~/qtdragon.log|' \ + "$WORK_DIR/qtdragon_metric.ini" + +export LINUXCNC_OPENGL_PLATFORM=offscreen +export QT_QPA_PLATFORM=offscreen + +# sitecustomize.py is auto-imported by Python from any sys.path entry +# at interpreter startup. Drop a meta_path finder that blocks the +# qtpy.QtWebEngineWidgets import so WebWidget falls back to QWidget. +SHIM_DIR="$WORK_DIR/_pyshim" +mkdir -p "$SHIM_DIR" +cat >"$SHIM_DIR/sitecustomize.py" <<'PY' +import sys +from importlib.abc import MetaPathFinder, Loader +from importlib.util import spec_from_loader + +_BLOCK = {'qtpy.QtWebEngineWidgets', 'PyQt5.QtWebEngineWidgets'} + +class _BlockLoader(Loader): + def create_module(self, spec): + raise ImportError('QtWebEngineWidgets blocked for ui-smoke CI') + def exec_module(self, module): + pass + +class _BlockFinder(MetaPathFinder): + def find_spec(self, name, path, target=None): + if name in _BLOCK: + return spec_from_loader(name, _BlockLoader()) + return None + +sys.meta_path.insert(0, _BlockFinder()) +PY +export PYTHONPATH="$SHIM_DIR${PYTHONPATH:+:$PYTHONPATH}" + +# Consumed by the sourcing test.sh, which execs the launcher with it. +# shellcheck disable=SC2034 +QTDRAGON_INI="$WORK_DIR/qtdragon_metric.ini" diff --git a/tests/ui-smoke/_lib/quit-launch.sh b/tests/ui-smoke/_lib/quit-launch.sh new file mode 100755 index 00000000000..b25b00584e3 --- /dev/null +++ b/tests/ui-smoke/_lib/quit-launch.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# Quit-path launcher for UI smoke tests. +# Usage: quit-launch.sh +# +# Boots linuxcnc + GUI under xvfb-run exactly like launch.sh, waits for +# the NML task to come up (via drive.py), then sends SIGTERM to the GUI +# process *alone* and asserts the GUI exits on its own within a short +# grace. This is the regression guard for the SIGTERM clean-shutdown +# handlers: a GUI that absorbs SIGTERM and has to be SIGKILLed fails. +# +# is a pgrep -f pattern identifying the GUI process +# (e.g. "bin/touchy", "bin/gmoccapy"). It must not match the linuxcnc +# launcher or task/motion helpers. +# +# Markers (consumed by checkresult-quit.sh): +# UI_SMOKE_QUIT_OK GUI exited on SIGTERM within QUIT_GRACE +# UI_SMOKE_QUIT_FAIL GUI never started, was not found, or ignored TERM + +set -u + +CONFIG_INI="$1" +GUI_MATCH="$2" +TEST_DIR="${TEST_DIR:-$(cd "$(dirname "$0")" && pwd)}" +LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +cd "$TEST_DIR" || exit 1 +rm -f ui-smoke.out ui-smoke.err linuxcnc.pid + +bash "$LIB_DIR/cleanup-runtime.sh" + +LINUXCNC_TIMEOUT=240 +DRIVER_TIMEOUT=90 +# Seconds to wait for the GUI to exit after SIGTERM before declaring it +# stuck. A GUI honouring SIGTERM exits in well under a second; the +# margin covers Cleanup of task/motion on slow CI. +QUIT_GRACE=15 + +# Shared headless environment (software GL + audio silencing), kept in +# launch-env.sh so launch.sh and quit-launch.sh cannot drift apart. +. "$LIB_DIR/launch-env.sh" + +export CONFIG_INI LIB_DIR DRIVER_TIMEOUT GUI_MATCH QUIT_GRACE + +# shellcheck disable=SC2016 +xvfb-run -a --server-args="-screen 0 1024x768x24" \ + timeout "$LINUXCNC_TIMEOUT" \ + bash -c ' + setsid linuxcnc -r "$CONFIG_INI" >linuxcnc.out 2>linuxcnc.err & + LINUXCNC_PID=$! + echo "$LINUXCNC_PID" >linuxcnc.pid + + # Wait until the task is reachable (GUI has constructed and the + # NML round-trip works). Reuse the phase-1 driver for readiness. + timeout "$DRIVER_TIMEOUT" python3 "$LIB_DIR/drive.py" >ui-smoke.out 2>ui-smoke.err + if ! grep -q "^UI_SMOKE_OK$" ui-smoke.out; then + echo "UI_SMOKE_QUIT_FAIL: GUI did not come up; cannot test quit" + kill -KILL -- -"$LINUXCNC_PID" 2>/dev/null || true + bash "$LIB_DIR/cleanup-runtime.sh" + exit 1 + fi + + # Identify the GUI process. pgrep -f matches against the whole + # command line, so wrapper processes (the linuxcnc launcher, the + # xvfb-run shell, this bash -c) also match because the GUI name + # appears in the config path or the embedded script text. Every + # such wrapper has a shell or xvfb-run as argv[0]; the real GUI + # is a python interpreter. Pick the first match whose argv[0] + # basename is a python binary. + GUI_PID="" + for p in $(pgrep -f "$GUI_MATCH"); do + arg0=$(tr "\0" "\n" <"/proc/$p/cmdline" 2>/dev/null | head -1) + case "$(basename "$arg0" 2>/dev/null)" in + python*) GUI_PID="$p"; break ;; + esac + done + if [ -z "$GUI_PID" ]; then + echo "UI_SMOKE_QUIT_FAIL: GUI process matching \"$GUI_MATCH\" not found" + kill -KILL -- -"$LINUXCNC_PID" 2>/dev/null || true + bash "$LIB_DIR/cleanup-runtime.sh" + exit 1 + fi + + # Send SIGTERM to the GUI alone and time how long it takes to go. + kill -TERM "$GUI_PID" 2>/dev/null || true + waited=0 + while [ "$waited" -lt "$QUIT_GRACE" ]; do + kill -0 "$GUI_PID" 2>/dev/null || break + sleep 1 + waited=$((waited + 1)) + done + + if kill -0 "$GUI_PID" 2>/dev/null; then + echo "UI_SMOKE_QUIT_FAIL: GUI (pid $GUI_PID) still alive ${QUIT_GRACE}s after SIGTERM" + RC=1 + else + echo "UI_SMOKE_QUIT_OK: GUI exited ${waited}s after SIGTERM" + RC=0 + fi + + # Tear down whatever is left (task/motion, or the GUI on failure). + kill -TERM -- -"$LINUXCNC_PID" 2>/dev/null || true + for _ in $(seq 30); do + kill -0 "$LINUXCNC_PID" 2>/dev/null || break + sleep 1 + done + if kill -0 "$LINUXCNC_PID" 2>/dev/null; then + kill -KILL -- -"$LINUXCNC_PID" 2>/dev/null || true + sleep 2 + bash "$LIB_DIR/cleanup-runtime.sh" + fi + exit "$RC" + ' +RC=$? + +echo "=== linuxcnc.err ===" +[ -f linuxcnc.err ] && cat linuxcnc.err +echo "=== ui-smoke.out ===" +[ -f ui-smoke.out ] && cat ui-smoke.out + +exit "$RC" diff --git a/tests/ui-smoke/gmoccapy-quit/checkresult b/tests/ui-smoke/gmoccapy-quit/checkresult new file mode 100755 index 00000000000..bb61550b304 --- /dev/null +++ b/tests/ui-smoke/gmoccapy-quit/checkresult @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/checkresult-quit.sh" "$@" diff --git a/tests/ui-smoke/gmoccapy-quit/skip b/tests/ui-smoke/gmoccapy-quit/skip new file mode 100755 index 00000000000..c1c260edf05 --- /dev/null +++ b/tests/ui-smoke/gmoccapy-quit/skip @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/skip-if-missing.sh" diff --git a/tests/ui-smoke/gmoccapy-quit/test.sh b/tests/ui-smoke/gmoccapy-quit/test.sh new file mode 100755 index 00000000000..116481f3872 --- /dev/null +++ b/tests/ui-smoke/gmoccapy-quit/test.sh @@ -0,0 +1,4 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/quit-launch.sh" \ + "$(cd "$(dirname "$0")/../../../configs/sim" && pwd)/gmoccapy/gmoccapy.ini" \ + "bin/gmoccapy" diff --git a/tests/ui-smoke/qtdragon-quit/checkresult b/tests/ui-smoke/qtdragon-quit/checkresult new file mode 100755 index 00000000000..bb61550b304 --- /dev/null +++ b/tests/ui-smoke/qtdragon-quit/checkresult @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/checkresult-quit.sh" "$@" diff --git a/tests/ui-smoke/qtdragon-quit/skip b/tests/ui-smoke/qtdragon-quit/skip new file mode 100755 index 00000000000..c1c260edf05 --- /dev/null +++ b/tests/ui-smoke/qtdragon-quit/skip @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/skip-if-missing.sh" diff --git a/tests/ui-smoke/qtdragon-quit/test.sh b/tests/ui-smoke/qtdragon-quit/test.sh new file mode 100755 index 00000000000..cdd427223fc --- /dev/null +++ b/tests/ui-smoke/qtdragon-quit/test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -u + +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +. "$LIB_DIR/qtdragon-prepare.sh" + +exec "$LIB_DIR/quit-launch.sh" "$QTDRAGON_INI" "bin/qtvcp" diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index f08c5a1491a..812e99c7e89 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -1,72 +1,8 @@ #!/bin/bash -# qtdragon's qtvcp logger writes its log file (path from INI [DISPLAY] -# LOG_FILE) into the config directory. CI mounts the workspace read- -# only for the runtime user, so a relative LOG_FILE like 'qtdragon.log' -# resolves to a path qtvcp cannot create, hal_bridge then exits, and -# linuxcnc tears down before our driver can do anything. Mirror the -# config dir to a writable tmp location and patch LOG_FILE to be -# rooted at $HOME so the log lands in a directory we can write to. -# -# Force the Qt offscreen platform plugin. qtvcp under xvfb + xcb on -# Ubuntu 24.04 segfaults during widget construction (no backtrace); -# Debian containers in the same CI matrix do not. Offscreen renders -# entirely in memory, no X server needed (xvfb-run still wraps the -# call so the rest of scripts/linuxcnc's X-display assumptions hold). -# scripts/linuxcnc itself forces QT_QPA_PLATFORM=xcb unless -# LINUXCNC_OPENGL_PLATFORM is set to something other than glx, so we -# pin both env vars. -# -# qtdragon embeds a QWebEngineView (Chromium). Under offscreen + xvfb -# with no GPU and no user namespaces in the CI runner sandbox, -# QtWebEngine browser-process init segfaults even with --no-sandbox -# --single-process --disable-gpu (Chromium logs "Sandboxing disabled -# by user." then crashes inside the same qtvcp PID). Rather than keep -# tuning Chromium flags for a widget the smoke test never touches, -# we shim qtpy.QtWebEngineWidgets to raise ImportError; web_widget.py -# already has a fallback path that swaps the QWebEngineView for a -# plain QWidget when the import fails (its "fail safe - mostly for -# designer" branch). No Chromium spawn = no crash. set -u LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" -SRC_DIR="$(cd "$LIB_DIR/../../../configs/sim/qtdragon/qtdragon_xyz" && pwd)" +. "$LIB_DIR/qtdragon-prepare.sh" -WORK_DIR="$(mktemp -d -t ui-smoke-qtdragon.XXXXXX)" -trap 'rm -rf "$WORK_DIR"' EXIT -cp -r "$SRC_DIR/." "$WORK_DIR/" -sed -i 's|^LOG_FILE = qtdragon\.log$|LOG_FILE = ~/qtdragon.log|' \ - "$WORK_DIR/qtdragon_metric.ini" - -export LINUXCNC_OPENGL_PLATFORM=offscreen -export QT_QPA_PLATFORM=offscreen - -# sitecustomize.py is auto-imported by Python from any sys.path entry -# at interpreter startup. Drop a meta_path finder that blocks the -# qtpy.QtWebEngineWidgets import so WebWidget falls back to QWidget. -SHIM_DIR="$WORK_DIR/_pyshim" -mkdir -p "$SHIM_DIR" -cat >"$SHIM_DIR/sitecustomize.py" <<'PY' -import sys -from importlib.abc import MetaPathFinder, Loader -from importlib.util import spec_from_loader - -_BLOCK = {'qtpy.QtWebEngineWidgets', 'PyQt5.QtWebEngineWidgets'} - -class _BlockLoader(Loader): - def create_module(self, spec): - raise ImportError('QtWebEngineWidgets blocked for ui-smoke CI') - def exec_module(self, module): - pass - -class _BlockFinder(MetaPathFinder): - def find_spec(self, name, path, target=None): - if name in _BLOCK: - return spec_from_loader(name, _BlockLoader()) - return None - -sys.meta_path.insert(0, _BlockFinder()) -PY -export PYTHONPATH="$SHIM_DIR${PYTHONPATH:+:$PYTHONPATH}" - -exec "$LIB_DIR/run-gui.sh" "$WORK_DIR/qtdragon_metric.ini" \ +exec "$LIB_DIR/run-gui.sh" "$QTDRAGON_INI" \ --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 diff --git a/tests/ui-smoke/touchy-quit/checkresult b/tests/ui-smoke/touchy-quit/checkresult new file mode 100755 index 00000000000..bb61550b304 --- /dev/null +++ b/tests/ui-smoke/touchy-quit/checkresult @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/checkresult-quit.sh" "$@" diff --git a/tests/ui-smoke/touchy-quit/skip b/tests/ui-smoke/touchy-quit/skip new file mode 100755 index 00000000000..c1c260edf05 --- /dev/null +++ b/tests/ui-smoke/touchy-quit/skip @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/skip-if-missing.sh" diff --git a/tests/ui-smoke/touchy-quit/test.sh b/tests/ui-smoke/touchy-quit/test.sh new file mode 100755 index 00000000000..d5a7851ea8c --- /dev/null +++ b/tests/ui-smoke/touchy-quit/test.sh @@ -0,0 +1,4 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/quit-launch.sh" \ + "$(cd "$(dirname "$0")/../../../configs/sim" && pwd)/touchy/touchy.ini" \ + "bin/touchy" From 5098eeb4ffc3be6a83e198b179835ed14aeb19b2 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:18:35 +0800 Subject: [PATCH 08/10] ui-smoke: fail fast and explain when the GUI process dies The driver polled NML for up to 60s after a GUI crash, then blamed whatever stage timed out (e.g. homing); a dead task keeps serving its last stat buffer. Watch the launcher PID and fail in ~1s pointing at the crash. Enable PYTHONFAULTHANDLER for a Python traceback on fatal signals. --- tests/ui-smoke/_lib/drive.py | 103 ++++++++++++++++++++++++------ tests/ui-smoke/_lib/launch-env.sh | 5 ++ 2 files changed, 89 insertions(+), 19 deletions(-) diff --git a/tests/ui-smoke/_lib/drive.py b/tests/ui-smoke/_lib/drive.py index 06becabff45..ad7e89a01fe 100755 --- a/tests/ui-smoke/_lib/drive.py +++ b/tests/ui-smoke/_lib/drive.py @@ -13,6 +13,7 @@ import argparse import linuxcnc +import os import sys import time @@ -32,6 +33,55 @@ STATE_STABILITY_S = 0.5 STATE_RETRY_BUDGET = 6 +# linuxcnc launcher PID, written to linuxcnc.pid by the launcher and read +# once at startup. The driver watches it so a GUI crash, which tears +# linuxcnc down, fails the test in ~1s with a clear message instead of +# waiting out a long NML poll. A dead task keeps serving its last stat +# buffer, so process liveness is the only reliable crash signal. +_WATCH_PID = None + + +class LauncherGone(Exception): + """linuxcnc process group exited (GUI crashed or task died).""" + + +def _read_pid(path): + try: + with open(path) as f: + return int(f.read().strip()) + except (OSError, ValueError): + return None + + +# Crash markers faulthandler and scripts/linuxcnc write to linuxcnc.err +# the instant the GUI dies. The launcher PID can linger in Cleanup, so +# scanning these catches the crash sooner and regardless of which GUI. +_CRASH_MARKERS = ("Fatal Python error", "Segmentation fault", "Aborted") + + +def _crash_marker_seen(): + try: + with open("linuxcnc.err") as f: + return any(m in f.read() for m in _CRASH_MARKERS) + except OSError: + return False + + +def _watchdog(): + """Raise LauncherGone if the GUI has crashed: either the launcher PID + is gone, or a crash marker appeared in linuxcnc.err. Unknown PID and + a missing log count as alive, so a not-yet-written file never + false-fails the test.""" + if _WATCH_PID is not None: + try: + os.kill(_WATCH_PID, 0) + except ProcessLookupError: + raise LauncherGone() + except PermissionError: + pass + if _crash_marker_seen(): + raise LauncherGone() + def connect_and_wait_ready(timeout): """Wait until linuxcnc.stat().poll() returns without error and @@ -47,6 +97,7 @@ def connect_and_wait_ready(timeout): deadline = time.monotonic() + timeout last_err = None while time.monotonic() < deadline: + _watchdog() try: stat = linuxcnc.stat() stat.poll() @@ -70,6 +121,7 @@ def wait_until_quiet(stat, predicate, timeout): must not happen.""" deadline = time.monotonic() + timeout while time.monotonic() < deadline: + _watchdog() stat.poll() if predicate(stat): return True @@ -195,6 +247,7 @@ def wait_program_started(stat, timeout): IDLE; we then read stat.position at (0,0,0).""" deadline = time.monotonic() + timeout while time.monotonic() < deadline: + _watchdog() stat.poll() if stat.interp_state != linuxcnc.INTERP_IDLE: return True @@ -214,6 +267,7 @@ def wait_program_idle(stat, timeout): deadline = time.monotonic() + timeout consecutive = 0 while time.monotonic() < deadline: + _watchdog() stat.poll() idle = ( stat.interp_state == linuxcnc.INTERP_IDLE @@ -311,30 +365,41 @@ def main(): if args.run_program and args.expect_delta_mm is None: ap.error("--run-program requires --expect-delta-mm DX,DY,DZ") - cmd, stat = connect_and_wait_ready(CONNECT_TIMEOUT_S) - if cmd is None: - return 1 - - # Give the GUI process enough time to finish constructing itself - # (load .ui files, compile resources.py if needed, etc.) and - # settle. If the GUI was going to crash on startup it has crashed - # by now. - time.sleep(SETTLE_S) + global _WATCH_PID + _WATCH_PID = _read_pid("linuxcnc.pid") - # Re-check task is still alive; a GUI crash may have torn linuxcnc - # down via Cleanup. try: - stat.poll() - except linuxcnc.error as e: - sys.stderr.write(f"UI_SMOKE_FAIL: task disappeared after GUI startup: {e}\n") - return 1 + cmd, stat = connect_and_wait_ready(CONNECT_TIMEOUT_S) + if cmd is None: + return 1 - if args.run_program: - if not run_program(cmd, stat, - args.run_program, args.expect_delta_mm, - args.tol, args.run_timeout): + # Give the GUI process enough time to finish constructing itself + # (load .ui files, compile resources.py if needed, etc.) and + # settle. If the GUI was going to crash on startup it has crashed + # by now. + time.sleep(SETTLE_S) + _watchdog() + + # Re-check task is still alive; a GUI crash may have torn linuxcnc + # down via Cleanup. + try: + stat.poll() + except linuxcnc.error as e: + sys.stderr.write(f"UI_SMOKE_FAIL: task disappeared after GUI startup: {e}\n") return 1 + if args.run_program: + if not run_program(cmd, stat, + args.run_program, args.expect_delta_mm, + args.tol, args.run_timeout): + return 1 + except LauncherGone: + sys.stderr.write( + "UI_SMOKE_FAIL: linuxcnc exited before the driver finished; " + "the GUI crashed or task died. See linuxcnc.out / linuxcnc.err " + "above for the backtrace.\n") + return 1 + print("UI_SMOKE_OK") return 0 diff --git a/tests/ui-smoke/_lib/launch-env.sh b/tests/ui-smoke/_lib/launch-env.sh index 909fff85842..110319195be 100644 --- a/tests/ui-smoke/_lib/launch-env.sh +++ b/tests/ui-smoke/_lib/launch-env.sh @@ -24,3 +24,8 @@ export CANBERRA_DRIVER=null export GST_PLUGIN_FEATURE_RANK="pulsesink:NONE,alsasink:NONE,osssink:NONE,oss4sink:NONE,jackaudiosink:NONE,pipewiresink:NONE,openalsink:NONE" export PULSE_SERVER=/dev/null export SDL_AUDIODRIVER=dummy + +# Dump a Python traceback on a fatal signal. For a pure-Python crash this +# names the line; for a C/C++ crash (Qt, dbus, GL) it shows the Python +# frame that called in. The native side is captured by crashdump.sh. +export PYTHONFAULTHANDLER=1 From 93ad82040d521e3532b7a8d89e72d04d16faea09 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:43:15 +0800 Subject: [PATCH 09/10] ui-smoke: capture a native backtrace on GUI crash PYTHONFAULTHANDLER stops at the Python frame; a Qt/dbus/GL segfault needs the C stack. Arm a core dump in the launchers and, if the GUI leaves a core, gdb its backtrace into the log. Failure-path only, so green runs pay nothing. --- tests/ui-smoke/_lib/crashdump.sh | 41 ++++++++++++++++++++++++++++++ tests/ui-smoke/_lib/launch.sh | 7 +++++ tests/ui-smoke/_lib/quit-launch.sh | 7 +++++ 3 files changed, 55 insertions(+) create mode 100644 tests/ui-smoke/_lib/crashdump.sh diff --git a/tests/ui-smoke/_lib/crashdump.sh b/tests/ui-smoke/_lib/crashdump.sh new file mode 100644 index 00000000000..eba9b09e7cf --- /dev/null +++ b/tests/ui-smoke/_lib/crashdump.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Native crash capture for the UI smoke launchers. A GUI segfault is the +# failure these tests most need to explain, and it lands in C/C++ (Qt, +# dbus, GL) where PYTHONFAULTHANDLER stops at the event-loop frame. Arm a +# core dump before launch; after the run, if the GUI left a core, print a +# native backtrace into the log so CI shows the faulting frame directly. +# Source with LIB_DIR set; runs only on the failure path, so green runs +# pay nothing. + +crashdump_arm() { + CORE_DIR="$(mktemp -d -t ui-smoke-cores.XXXXXX)" + export CORE_DIR + ulimit -c unlimited 2>/dev/null || true + # core_pattern is global and needs root; best-effort (CI has sudo). + # If it does not take, crashdump_report still finds a cwd "core". + sudo sysctl -w "kernel.core_pattern=$CORE_DIR/core.%e.%p" >/dev/null 2>&1 || true +} + +crashdump_report() { + [ -n "${CORE_DIR:-}" ] || return 0 + local core + # shellcheck disable=SC2012 # mktemp dir, no odd filenames + core=$(ls -t "$CORE_DIR"/core* ./core* /tmp/core* 2>/dev/null | head -1) + if [ -n "$core" ]; then + echo "=== crash: native backtrace ($core) ===" + # gdb is not on the CI runner by default; pull it in to read the core. + command -v gdb >/dev/null 2>&1 || sudo apt-get install -y -q gdb >/dev/null 2>&1 || true + if command -v gdb >/dev/null 2>&1; then + # "bt" first: gdb auto-selects the faulting thread on a SIGSEGV + # core. "thread apply all bt" after gives the rest. + gdb -batch -nx \ + -ex "bt" \ + -ex "echo \n=== all threads ===\n" \ + -ex "thread apply all bt" \ + "$(command -v python3)" "$core" 2>&1 | head -400 + else + echo "(gdb unavailable; core left at $core)" + fi + fi + rm -rf "$CORE_DIR" +} diff --git a/tests/ui-smoke/_lib/launch.sh b/tests/ui-smoke/_lib/launch.sh index 5bc90821a0d..b66b4af1d00 100755 --- a/tests/ui-smoke/_lib/launch.sh +++ b/tests/ui-smoke/_lib/launch.sh @@ -39,6 +39,10 @@ DRIVER_TIMEOUT=180 # launch-env.sh so launch.sh and quit-launch.sh cannot drift apart. . "$LIB_DIR/launch-env.sh" +# Arm a core dump so a GUI segfault can be backtraced after the run. +. "$LIB_DIR/crashdump.sh" +crashdump_arm + # Export the per-invocation values so the inner bash -c receives them # as proper env vars (avoids embedding paths into the inner script # via quoting, which breaks on apostrophes / spaces). @@ -102,4 +106,7 @@ echo "=== ui-smoke.out ===" echo "=== ui-smoke.err ===" [ -f ui-smoke.err ] && cat ui-smoke.err +# If the GUI dumped a core, print its native backtrace. +crashdump_report + exit "$RC" diff --git a/tests/ui-smoke/_lib/quit-launch.sh b/tests/ui-smoke/_lib/quit-launch.sh index b25b00584e3..1d437994a7b 100755 --- a/tests/ui-smoke/_lib/quit-launch.sh +++ b/tests/ui-smoke/_lib/quit-launch.sh @@ -39,6 +39,10 @@ QUIT_GRACE=15 # launch-env.sh so launch.sh and quit-launch.sh cannot drift apart. . "$LIB_DIR/launch-env.sh" +# Arm a core dump so a GUI segfault can be backtraced after the run. +. "$LIB_DIR/crashdump.sh" +crashdump_arm + export CONFIG_INI LIB_DIR DRIVER_TIMEOUT GUI_MATCH QUIT_GRACE # shellcheck disable=SC2016 @@ -117,4 +121,7 @@ echo "=== linuxcnc.err ===" echo "=== ui-smoke.out ===" [ -f ui-smoke.out ] && cat ui-smoke.out +# If the GUI dumped a core, print its native backtrace. +crashdump_report + exit "$RC" From a9ff6a4b4895ccd814b01db8e90862fbc33e6cf5 Mon Sep 17 00:00:00 2001 From: Luca Toniolo <10792599+grandixximo@users.noreply.github.com> Date: Wed, 3 Jun 2026 14:23:35 +0800 Subject: [PATCH 10/10] ui-smoke: show the driver's stderr in the quit path quit-launch.sh catted ui-smoke.out but not ui-smoke.err, hiding the driver's failure reason (e.g. the GUI-crashed message) on a quit-path failure. Cat it like launch.sh already does. --- tests/ui-smoke/_lib/quit-launch.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ui-smoke/_lib/quit-launch.sh b/tests/ui-smoke/_lib/quit-launch.sh index 1d437994a7b..ce6a172f8f7 100755 --- a/tests/ui-smoke/_lib/quit-launch.sh +++ b/tests/ui-smoke/_lib/quit-launch.sh @@ -120,6 +120,8 @@ echo "=== linuxcnc.err ===" [ -f linuxcnc.err ] && cat linuxcnc.err echo "=== ui-smoke.out ===" [ -f ui-smoke.out ] && cat ui-smoke.out +echo "=== ui-smoke.err ===" +[ -f ui-smoke.err ] && cat ui-smoke.err # If the GUI dumped a core, print its native backtrace. crashdump_report