diff --git a/debian/control.top.in b/debian/control.top.in index 0adf9121103..596063ea377 100644 --- a/debian/control.top.in +++ b/debian/control.top.in @@ -57,6 +57,7 @@ Build-Depends: python3-dbus , python3-dbus.mainloop.pyqt5 , python3-qtpy , + python3-zmq , python3-cairo , python3-gi , python3-gi-cairo , diff --git a/tests/ui-smoke/_lib/checkresult-quit.sh b/tests/ui-smoke/_lib/checkresult-quit.sh new file mode 100755 index 00000000000..c746e419269 --- /dev/null +++ b/tests/ui-smoke/_lib/checkresult-quit.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Shared result check for UI smoke quit-path tests. +# +# Pass if the quit launcher printed UI_SMOKE_QUIT_OK (the GUI exited on +# its own SIGTERM within the grace) and did not print UI_SMOKE_QUIT_FAIL. +set -u + +if [ $# -lt 1 ]; then + echo "FAIL: checkresult-quit requires the result-log path as argument" >&2 + exit 1 +fi + +LOG="$1" + +if grep -q '^UI_SMOKE_QUIT_FAIL' "$LOG"; then + echo "FAIL: $(grep -m1 '^UI_SMOKE_QUIT_FAIL' "$LOG")" >&2 + exit 1 +fi + +if ! grep -q '^UI_SMOKE_QUIT_OK' "$LOG"; then + echo "FAIL: GUI did not report a clean SIGTERM exit (no UI_SMOKE_QUIT_OK)" >&2 + exit 1 +fi + +exit 0 diff --git a/tests/ui-smoke/_lib/crashdump.sh b/tests/ui-smoke/_lib/crashdump.sh new file mode 100644 index 00000000000..eba9b09e7cf --- /dev/null +++ b/tests/ui-smoke/_lib/crashdump.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Native crash capture for the UI smoke launchers. A GUI segfault is the +# failure these tests most need to explain, and it lands in C/C++ (Qt, +# dbus, GL) where PYTHONFAULTHANDLER stops at the event-loop frame. Arm a +# core dump before launch; after the run, if the GUI left a core, print a +# native backtrace into the log so CI shows the faulting frame directly. +# Source with LIB_DIR set; runs only on the failure path, so green runs +# pay nothing. + +crashdump_arm() { + CORE_DIR="$(mktemp -d -t ui-smoke-cores.XXXXXX)" + export CORE_DIR + ulimit -c unlimited 2>/dev/null || true + # core_pattern is global and needs root; best-effort (CI has sudo). + # If it does not take, crashdump_report still finds a cwd "core". + sudo sysctl -w "kernel.core_pattern=$CORE_DIR/core.%e.%p" >/dev/null 2>&1 || true +} + +crashdump_report() { + [ -n "${CORE_DIR:-}" ] || return 0 + local core + # shellcheck disable=SC2012 # mktemp dir, no odd filenames + core=$(ls -t "$CORE_DIR"/core* ./core* /tmp/core* 2>/dev/null | head -1) + if [ -n "$core" ]; then + echo "=== crash: native backtrace ($core) ===" + # gdb is not on the CI runner by default; pull it in to read the core. + command -v gdb >/dev/null 2>&1 || sudo apt-get install -y -q gdb >/dev/null 2>&1 || true + if command -v gdb >/dev/null 2>&1; then + # "bt" first: gdb auto-selects the faulting thread on a SIGSEGV + # core. "thread apply all bt" after gives the rest. + gdb -batch -nx \ + -ex "bt" \ + -ex "echo \n=== all threads ===\n" \ + -ex "thread apply all bt" \ + "$(command -v python3)" "$core" 2>&1 | head -400 + else + echo "(gdb unavailable; core left at $core)" + fi + fi + rm -rf "$CORE_DIR" +} diff --git a/tests/ui-smoke/_lib/drive.py b/tests/ui-smoke/_lib/drive.py index 1ee90a42234..ad7e89a01fe 100755 --- a/tests/ui-smoke/_lib/drive.py +++ b/tests/ui-smoke/_lib/drive.py @@ -1,15 +1,86 @@ #!/usr/bin/env python3 -# Minimal UI smoke driver: confirm linuxcnc task came up and the GUI -# did not crash. The smoke layer answers Bertho's "does it start" -# question only; functional behaviour (home, run a file, verify -# position) belongs in tests/ui-functional/ (Phase 2). +# UI smoke driver. +# +# Default mode (Phase 1): confirm linuxcnc task came up and the GUI did +# not crash. The driver only proves the GUI started and NML is reachable. +# +# --run-program mode (Phase 2): also estop-reset, machine-on, home, +# program_open + auto(RUN), wait for sustained INTERP_IDLE, and assert +# (stat.position_after - stat.position_after_home) equals --expect-delta-mm +# converted to machine units via stat.linear_units. Snapshot-and-delta +# sidesteps per-sim HOME offsets; mm-input + linear_units conversion +# sidesteps per-sim LINEAR_UNITS (axis and touchy sims are inch). +import argparse import linuxcnc +import os import sys import time CONNECT_TIMEOUT_S = 60.0 SETTLE_S = 3.0 +SETTLE_POLLS = 5 +POLL_INTERVAL_S = 0.01 +# Per-attempt wait timeout for ensure_state / ensure_mode. The state +# normally lands well under 1s; profiling showed nothing benefits from +# more than 3s here, and shorter timeouts trim wall time when a retry +# is needed (notably gmoccapy reverting task_mode AUTO -> MANUAL). +ENSURE_ATTEMPT_TIMEOUT_S = 3.0 +# After the desired task_state / task_mode is reached, re-check after +# this long. Some GUIs (notably gmoccapy and qtdragon) run their own +# startup commands that can revert a state we just set; the post-reach +# stability check catches that. +STATE_STABILITY_S = 0.5 +STATE_RETRY_BUDGET = 6 + +# linuxcnc launcher PID, written to linuxcnc.pid by the launcher and read +# once at startup. The driver watches it so a GUI crash, which tears +# linuxcnc down, fails the test in ~1s with a clear message instead of +# waiting out a long NML poll. A dead task keeps serving its last stat +# buffer, so process liveness is the only reliable crash signal. +_WATCH_PID = None + + +class LauncherGone(Exception): + """linuxcnc process group exited (GUI crashed or task died).""" + + +def _read_pid(path): + try: + with open(path) as f: + return int(f.read().strip()) + except (OSError, ValueError): + return None + + +# Crash markers faulthandler and scripts/linuxcnc write to linuxcnc.err +# the instant the GUI dies. The launcher PID can linger in Cleanup, so +# scanning these catches the crash sooner and regardless of which GUI. +_CRASH_MARKERS = ("Fatal Python error", "Segmentation fault", "Aborted") + + +def _crash_marker_seen(): + try: + with open("linuxcnc.err") as f: + return any(m in f.read() for m in _CRASH_MARKERS) + except OSError: + return False + + +def _watchdog(): + """Raise LauncherGone if the GUI has crashed: either the launcher PID + is gone, or a crash marker appeared in linuxcnc.err. Unknown PID and + a missing log count as alive, so a not-yet-written file never + false-fails the test.""" + if _WATCH_PID is not None: + try: + os.kill(_WATCH_PID, 0) + except ProcessLookupError: + raise LauncherGone() + except PermissionError: + pass + if _crash_marker_seen(): + raise LauncherGone() def connect_and_wait_ready(timeout): @@ -17,16 +88,22 @@ def connect_and_wait_ready(timeout): reports a non-negative echo_serial_number. The NML status buffer can be 'invalid err=3' for the first ~30s while linuxcncsvr is still initialising; recreate the stat object on every iteration so - a stale invalid buffer does not stick after linuxcncsvr is ready.""" + a stale invalid buffer does not stick after linuxcncsvr is ready. + + Catch the full Exception hierarchy: in early startup stat.poll() + can raise SystemError ('error return without exception set') when + the underlying C function reports failure without setting a Python + exception. Treat that the same as linuxcnc.error and retry.""" deadline = time.monotonic() + timeout last_err = None while time.monotonic() < deadline: + _watchdog() try: stat = linuxcnc.stat() stat.poll() if stat.echo_serial_number >= 0: return linuxcnc.command(), stat - except linuxcnc.error as e: + except Exception as e: last_err = e time.sleep(0.5) sys.stderr.write( @@ -35,23 +112,292 @@ def connect_and_wait_ready(timeout): return None, None +def wait_until_quiet(stat, predicate, timeout): + """Poll stat until predicate(stat) is true. Returns True on success, + False on timeout. Never writes UI_SMOKE_FAIL: caller decides whether + a timeout here is fatal (and writes its own UI_SMOKE_FAIL line) or + is part of a retry that may still succeed. checkresult.sh greps for + any '^UI_SMOKE_FAIL' line, so spurious emissions during retries + must not happen.""" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + _watchdog() + stat.poll() + if predicate(stat): + return True + time.sleep(POLL_INTERVAL_S) + return False + + +def wait_until(stat, predicate, timeout, label): + """Like wait_until_quiet but emits UI_SMOKE_FAIL on timeout. Use + only when timeout is fatal at the call site (no retry above).""" + if wait_until_quiet(stat, predicate, timeout): + return True + sys.stderr.write(f"UI_SMOKE_FAIL: timeout waiting for {label} after {timeout}s\n") + return False + + +def home_all(cmd, stat, timeout): + """Home every joint. Uses c.home(-1) which respects HOME_SEQUENCE + if configured. Caller must have already ensured task_state is ON + via ensure_state; otherwise the home command is rejected with + 'cannot be executed until the machine is out of E-stop and turned + on'. Mode change uses ensure_mode so a GUI that reverts mode mid- + sequence (gmoccapy) is detected and retried.""" + if not ensure_mode(cmd, stat, linuxcnc.MODE_MANUAL, "MODE_MANUAL"): + return False + cmd.teleop_enable(0) + cmd.wait_complete() + stat.poll() + njoints = stat.joints + cmd.home(-1) + if not wait_until( + stat, + lambda s: all(s.homed[i] for i in range(njoints)), + timeout, "all joints homed"): + return False + cmd.teleop_enable(1) + cmd.wait_complete() + return True + + +def wait_state(stat, target_state, timeout, label): + """Poll until stat.task_state == target_state. wait_complete on a + state-change command only proves task ack'd the NML message, not + that the underlying state machine has transitioned. Polling + task_state is the only deterministic signal.""" + return wait_until( + stat, + lambda s: s.task_state == target_state, + timeout, label) + + +def ensure_state(cmd, stat, target_state, label): + """Issue c.state(target_state), wait for stat.task_state to reach + target_state, then verify it stays there across STATE_STABILITY_S. + If the GUI reverts (e.g. gmoccapy re-issues its own ESTOP on + startup), retry up to STATE_RETRY_BUDGET times. Returns True on + stable success, False on exhausted budget.""" + for attempt in range(1, STATE_RETRY_BUDGET + 1): + cmd.state(target_state) + cmd.wait_complete() + if not wait_until_quiet( + stat, lambda s: s.task_state == target_state, + ENSURE_ATTEMPT_TIMEOUT_S): + sys.stderr.write( + f"WARN: {label} not reached on attempt {attempt}, retrying\n") + continue + time.sleep(STATE_STABILITY_S) + stat.poll() + if stat.task_state == target_state: + return True + sys.stderr.write( + f"WARN: {label} reverted to task_state={stat.task_state} " + f"after attempt {attempt}, retrying\n") + sys.stderr.write( + f"UI_SMOKE_FAIL: {label} did not hold stable across " + f"{STATE_RETRY_BUDGET} attempts\n") + return False + + +def ensure_mode(cmd, stat, target_mode, label): + """Same retry+stability pattern as ensure_state, for task_mode.""" + for attempt in range(1, STATE_RETRY_BUDGET + 1): + cmd.mode(target_mode) + cmd.wait_complete() + if not wait_until_quiet( + stat, lambda s: s.task_mode == target_mode, + ENSURE_ATTEMPT_TIMEOUT_S): + sys.stderr.write( + f"WARN: {label} not reached on attempt {attempt}, retrying\n") + continue + time.sleep(STATE_STABILITY_S) + stat.poll() + if stat.task_mode == target_mode: + return True + sys.stderr.write( + f"WARN: {label} reverted to task_mode={stat.task_mode} " + f"after attempt {attempt}, retrying\n") + sys.stderr.write( + f"UI_SMOKE_FAIL: {label} did not hold stable across " + f"{STATE_RETRY_BUDGET} attempts\n") + return False + + +PROGRAM_START_TIMEOUT_S = 5.0 + + +def snapshot(stat): + """Best-effort one-line summary of state fields relevant to Phase 2 + debugging. Caller is expected to have just polled.""" + return ( + f"task_state={stat.task_state} task_mode={stat.task_mode} " + f"interp_state={stat.interp_state} exec_state={stat.exec_state} " + f"motion_type={stat.motion_type} queue={stat.queue} " + f"queued_mdi_commands={stat.queued_mdi_commands} " + f"file={stat.file!r}") + + +def wait_program_started(stat, timeout): + """Wait until interp_state leaves INTERP_IDLE, i.e. the program + has actually begun executing. Without this guard, a short program + can finish before wait_program_idle gets its first poll, and the + settle-window then mistakes the pre-start IDLE for the post-end + IDLE; we then read stat.position at (0,0,0).""" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + _watchdog() + stat.poll() + if stat.interp_state != linuxcnc.INTERP_IDLE: + return True + time.sleep(POLL_INTERVAL_S) + stat.poll() + sys.stderr.write( + f"UI_SMOKE_FAIL: program did not start within {timeout}s " + f"(interp_state stayed INTERP_IDLE) state: {snapshot(stat)}\n") + return False + + +def wait_program_idle(stat, timeout): + """Wait until interp_state returns to INTERP_IDLE and the motion + queue is drained for SETTLE_POLLS consecutive polls. Caller must + have already proven the program started via wait_program_started; + otherwise this returns immediately on the pre-start IDLE.""" + deadline = time.monotonic() + timeout + consecutive = 0 + while time.monotonic() < deadline: + _watchdog() + stat.poll() + idle = ( + stat.interp_state == linuxcnc.INTERP_IDLE + and stat.queue == 0 + ) + if idle: + consecutive += 1 + if consecutive >= SETTLE_POLLS: + return True + else: + consecutive = 0 + time.sleep(POLL_INTERVAL_S) + sys.stderr.write(f"UI_SMOKE_FAIL: program did not reach idle within {timeout}s\n") + return False + + +def run_program(cmd, stat, ngc_path, expect_delta_mm, tol, run_timeout): + """Estop reset, machine on, home, snapshot position, load + run ngc, + verify (final - start) delta matches expect_delta_mm converted to + machine units.""" + if not ensure_state(cmd, stat, linuxcnc.STATE_ESTOP_RESET, + "STATE_ESTOP_RESET"): + return False + if not ensure_state(cmd, stat, linuxcnc.STATE_ON, "STATE_ON"): + return False + + if not home_all(cmd, stat, timeout=60.0): + return False + + if not ensure_mode(cmd, stat, linuxcnc.MODE_AUTO, "MODE_AUTO"): + return False + + # Snapshot start position AFTER homing + AFTER mode transition. The + # GUI might re-issue mode commands during its own startup; doing the + # snapshot last means we record the position right before AUTO_RUN. + stat.poll() + start_pos = stat.position[:3] + + cmd.program_open(ngc_path) + cmd.wait_complete() + # No wait_complete after auto(AUTO_RUN, 0): wait_complete blocks + # until the operation finishes, which for AUTO_RUN means the whole + # program completes. That would race wait_program_started; by the + # time we polled, interp would already be back at INTERP_IDLE. + cmd.auto(linuxcnc.AUTO_RUN, 0) + + if not wait_program_started(stat, PROGRAM_START_TIMEOUT_S): + return False + if not wait_program_idle(stat, run_timeout): + return False + + # stat.linear_units: machine units per mm. mm machine -> 1.0; + # inch machine -> 1/25.4 = 0.03937. Multiplying the expected mm + # delta by linear_units gives the expected delta in machine units, + # which is what stat.position reports. + units_per_mm = stat.linear_units + expect_machine = [d * units_per_mm for d in expect_delta_mm] + final_pos = stat.position[:3] + actual_delta = [final_pos[i] - start_pos[i] for i in range(3)] + err = [abs(actual_delta[i] - expect_machine[i]) for i in range(3)] + if any(e > tol for e in err): + sys.stderr.write( + f"UI_SMOKE_FAIL: delta mismatch " + f"expect_mm={expect_delta_mm} units_per_mm={units_per_mm} " + f"expect_machine={expect_machine} " + f"start={start_pos} final={final_pos} " + f"actual_delta={actual_delta} err={err} tol={tol}\n") + return False + return True + + +def parse_xyz(s): + parts = [float(p) for p in s.split(",")] + if len(parts) != 3: + raise argparse.ArgumentTypeError("expected x,y,z (three comma-separated floats)") + return parts + + def main(): - cmd, stat = connect_and_wait_ready(CONNECT_TIMEOUT_S) - if cmd is None: - return 1 + ap = argparse.ArgumentParser() + ap.add_argument("--run-program", metavar="NGC", + help="g-code file to load and run (enables Phase 2 mode)") + ap.add_argument("--expect-delta-mm", type=parse_xyz, metavar="DX,DY,DZ", + help="expected XYZ delta in mm from post-home position " + "(required with --run-program). Driver converts to " + "machine units via stat.linear_units so the same " + "value works on inch and mm sims.") + ap.add_argument("--tol", type=float, default=1e-4, + help="position tolerance per axis in machine units " + "(default: 1e-4)") + ap.add_argument("--run-timeout", type=float, default=60.0, + help="program-completion timeout in seconds (default: 60)") + args = ap.parse_args() - # Give the GUI process enough time to finish constructing itself - # (load .ui files, compile resources.py if needed, etc.) and - # settle. If the GUI was going to crash on startup it has crashed - # by now. - time.sleep(SETTLE_S) + if args.run_program and args.expect_delta_mm is None: + ap.error("--run-program requires --expect-delta-mm DX,DY,DZ") + + global _WATCH_PID + _WATCH_PID = _read_pid("linuxcnc.pid") - # Re-check task is still alive; a GUI crash may have torn linuxcnc - # down via Cleanup. try: - stat.poll() - except linuxcnc.error as e: - sys.stderr.write(f"UI_SMOKE_FAIL: task disappeared after GUI startup: {e}\n") + cmd, stat = connect_and_wait_ready(CONNECT_TIMEOUT_S) + if cmd is None: + return 1 + + # Give the GUI process enough time to finish constructing itself + # (load .ui files, compile resources.py if needed, etc.) and + # settle. If the GUI was going to crash on startup it has crashed + # by now. + time.sleep(SETTLE_S) + _watchdog() + + # Re-check task is still alive; a GUI crash may have torn linuxcnc + # down via Cleanup. + try: + stat.poll() + except linuxcnc.error as e: + sys.stderr.write(f"UI_SMOKE_FAIL: task disappeared after GUI startup: {e}\n") + return 1 + + if args.run_program: + if not run_program(cmd, stat, + args.run_program, args.expect_delta_mm, + args.tol, args.run_timeout): + return 1 + except LauncherGone: + sys.stderr.write( + "UI_SMOKE_FAIL: linuxcnc exited before the driver finished; " + "the GUI crashed or task died. See linuxcnc.out / linuxcnc.err " + "above for the backtrace.\n") return 1 print("UI_SMOKE_OK") diff --git a/tests/ui-smoke/_lib/launch-env.sh b/tests/ui-smoke/_lib/launch-env.sh new file mode 100644 index 00000000000..110319195be --- /dev/null +++ b/tests/ui-smoke/_lib/launch-env.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Shared headless environment for the UI smoke launchers. Sourced by +# launch.sh and quit-launch.sh so the two stay in lockstep; a knob added +# here reaches both. The caller must set LIB_DIR before sourcing (it +# locates asound.conf). This file only exports; it runs no commands. + +# Force software OpenGL (Mesa llvmpipe). CI runners have no GPU and +# Qt/GL widgets segfault under hardware GL with no display. The Qt- +# specific knobs cover qtdragon's QtQuick + RHI paths. +export LIBGL_ALWAYS_SOFTWARE=1 +export GALLIUM_DRIVER=llvmpipe +export QT_QUICK_BACKEND=software +export QSG_RHI_BACKEND=software +export QT_OPENGL=software +# Dodge a long-known xcb_glx integration crash that hits QtWebEngine +# and related Qt5 widgets under xvfb (Launchpad #1761708, QTBUG-67537). +# Forces the egl path which is what software-GL stacks expect anyway. +export QT_XCB_GL_INTEGRATION=xcb_egl + +# Silence audio: xvfb covers X but not sound. Demote every Gst +# Audio/Sink and disable canberra/SDL/pulse/ALSA-default paths. +export ALSA_CONFIG_PATH="$LIB_DIR/asound.conf" +export CANBERRA_DRIVER=null +export GST_PLUGIN_FEATURE_RANK="pulsesink:NONE,alsasink:NONE,osssink:NONE,oss4sink:NONE,jackaudiosink:NONE,pipewiresink:NONE,openalsink:NONE" +export PULSE_SERVER=/dev/null +export SDL_AUDIODRIVER=dummy + +# Dump a Python traceback on a fatal signal. For a pure-Python crash this +# names the line; for a C/C++ crash (Qt, dbus, GL) it shows the Python +# frame that called in. The native side is captured by crashdump.sh. +export PYTHONFAULTHANDLER=1 diff --git a/tests/ui-smoke/_lib/launch.sh b/tests/ui-smoke/_lib/launch.sh index 5f3672e786c..b66b4af1d00 100755 --- a/tests/ui-smoke/_lib/launch.sh +++ b/tests/ui-smoke/_lib/launch.sh @@ -16,6 +16,8 @@ set -u CONFIG_INI="$1" +shift +DRIVER_ARGS=("$@") TEST_DIR="${TEST_DIR:-$(cd "$(dirname "$0")" && pwd)}" LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -28,26 +30,18 @@ rm -f ui-smoke.out ui-smoke.err linuxcnc.pid bash "$LIB_DIR/cleanup-runtime.sh" # Launch linuxcnc inside xvfb-run. The outer timeout is a safety net -# so a wedged GUI cannot hang CI. -LINUXCNC_TIMEOUT=240 -DRIVER_TIMEOUT=90 - -# Force software OpenGL (Mesa llvmpipe). CI runners have no GPU and -# Qt/GL widgets segfault under hardware GL with no display. The Qt- -# specific knobs cover qtdragon's QtQuick + RHI paths. -export LIBGL_ALWAYS_SOFTWARE=1 -export GALLIUM_DRIVER=llvmpipe -export QT_QUICK_BACKEND=software -export QSG_RHI_BACKEND=software -export QT_OPENGL=software - -# Silence audio: xvfb covers X but not sound. Demote every Gst -# Audio/Sink and disable canberra/SDL/pulse/ALSA-default paths. -export ALSA_CONFIG_PATH="$LIB_DIR/asound.conf" -export CANBERRA_DRIVER=null -export GST_PLUGIN_FEATURE_RANK="pulsesink:NONE,alsasink:NONE,osssink:NONE,oss4sink:NONE,jackaudiosink:NONE,pipewiresink:NONE,openalsink:NONE" -export PULSE_SERVER=/dev/null -export SDL_AUDIODRIVER=dummy +# so a wedged GUI cannot hang CI. Driver timeout covers connect (60s) +# + GUI settle (3s) + optional Phase 2 run (estop/home/program ~90s). +LINUXCNC_TIMEOUT=300 +DRIVER_TIMEOUT=180 + +# Shared headless environment (software GL + audio silencing), kept in +# launch-env.sh so launch.sh and quit-launch.sh cannot drift apart. +. "$LIB_DIR/launch-env.sh" + +# Arm a core dump so a GUI segfault can be backtraced after the run. +. "$LIB_DIR/crashdump.sh" +crashdump_arm # Export the per-invocation values so the inner bash -c receives them # as proper env vars (avoids embedding paths into the inner script @@ -71,7 +65,9 @@ xvfb-run -a --server-args="-screen 0 1024x768x24" \ # The driver polls NML readiness itself (BsAtHome review: # avoid real-clock waits where status polling will do). - timeout "$DRIVER_TIMEOUT" python3 "$LIB_DIR/drive.py" >ui-smoke.out 2>ui-smoke.err + # Driver args (Phase 2: --run-program/--expect-pos) come through + # as positional $@ from the inner bash -c. + timeout "$DRIVER_TIMEOUT" python3 "$LIB_DIR/drive.py" "$@" >ui-smoke.out 2>ui-smoke.err DRIVE_RC=$? # Clean shutdown: GUI-specific quit first (lets linuxcnc end @@ -97,7 +93,7 @@ xvfb-run -a --server-args="-screen 0 1024x768x24" \ fi exit "$DRIVE_RC" - ' + ' _launch "${DRIVER_ARGS[@]}" RC=$? # Surface logs so checkresult and CI artifact upload can see them. @@ -110,4 +106,7 @@ echo "=== ui-smoke.out ===" echo "=== ui-smoke.err ===" [ -f ui-smoke.err ] && cat ui-smoke.err +# If the GUI dumped a core, print its native backtrace. +crashdump_report + exit "$RC" diff --git a/tests/ui-smoke/_lib/qtdragon-prepare.sh b/tests/ui-smoke/_lib/qtdragon-prepare.sh new file mode 100644 index 00000000000..19741978d73 --- /dev/null +++ b/tests/ui-smoke/_lib/qtdragon-prepare.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Sourced by the qtdragon ui-smoke tests (smoke and quit) to build a +# config qtvcp can actually run under CI. Sets QTDRAGON_INI to the +# patched ini path and exports the headless env; the caller then execs +# run-gui.sh or quit-launch.sh with "$QTDRAGON_INI". Must be sourced +# with LIB_DIR already set. +# +# qtdragon's qtvcp logger writes its log file (path from INI [DISPLAY] +# LOG_FILE) into the config directory. CI mounts the workspace read- +# only for the runtime user, so a relative LOG_FILE like 'qtdragon.log' +# resolves to a path qtvcp cannot create, hal_bridge then exits, and +# linuxcnc tears down before our driver can do anything. Mirror the +# config dir to a writable tmp location and patch LOG_FILE to be +# rooted at $HOME so the log lands in a directory we can write to. +# +# Force the Qt offscreen platform plugin. qtvcp under xvfb + xcb on +# Ubuntu 24.04 segfaults during widget construction (no backtrace); +# Debian containers in the same CI matrix do not. Offscreen renders +# entirely in memory, no X server needed (xvfb-run still wraps the +# call so the rest of scripts/linuxcnc's X-display assumptions hold). +# scripts/linuxcnc itself forces QT_QPA_PLATFORM=xcb unless +# LINUXCNC_OPENGL_PLATFORM is set to something other than glx, so we +# pin both env vars. +# +# qtdragon embeds a QWebEngineView (Chromium). Under offscreen + xvfb +# with no GPU and no user namespaces in the CI runner sandbox, +# QtWebEngine browser-process init segfaults even with --no-sandbox +# --single-process --disable-gpu (Chromium logs "Sandboxing disabled +# by user." then crashes inside the same qtvcp PID). Rather than keep +# tuning Chromium flags for a widget the smoke test never touches, +# we shim qtpy.QtWebEngineWidgets to raise ImportError; web_widget.py +# already has a fallback path that swaps the QWebEngineView for a +# plain QWidget when the import fails (its "fail safe - mostly for +# designer" branch). No Chromium spawn = no crash. + +: "${LIB_DIR:?qtdragon-prepare.sh must be sourced with LIB_DIR set}" + +SRC_DIR="$(cd "$LIB_DIR/../../../configs/sim/qtdragon/qtdragon_xyz" && pwd)" + +WORK_DIR="$(mktemp -d -t ui-smoke-qtdragon.XXXXXX)" +trap 'rm -rf "$WORK_DIR"' EXIT +cp -r "$SRC_DIR/." "$WORK_DIR/" +sed -i 's|^LOG_FILE = qtdragon\.log$|LOG_FILE = ~/qtdragon.log|' \ + "$WORK_DIR/qtdragon_metric.ini" + +export LINUXCNC_OPENGL_PLATFORM=offscreen +export QT_QPA_PLATFORM=offscreen + +# sitecustomize.py is auto-imported by Python from any sys.path entry +# at interpreter startup. Drop a meta_path finder that blocks the +# qtpy.QtWebEngineWidgets import so WebWidget falls back to QWidget. +SHIM_DIR="$WORK_DIR/_pyshim" +mkdir -p "$SHIM_DIR" +cat >"$SHIM_DIR/sitecustomize.py" <<'PY' +import sys +from importlib.abc import MetaPathFinder, Loader +from importlib.util import spec_from_loader + +_BLOCK = {'qtpy.QtWebEngineWidgets', 'PyQt5.QtWebEngineWidgets'} + +class _BlockLoader(Loader): + def create_module(self, spec): + raise ImportError('QtWebEngineWidgets blocked for ui-smoke CI') + def exec_module(self, module): + pass + +class _BlockFinder(MetaPathFinder): + def find_spec(self, name, path, target=None): + if name in _BLOCK: + return spec_from_loader(name, _BlockLoader()) + return None + +sys.meta_path.insert(0, _BlockFinder()) +PY +export PYTHONPATH="$SHIM_DIR${PYTHONPATH:+:$PYTHONPATH}" + +# Consumed by the sourcing test.sh, which execs the launcher with it. +# shellcheck disable=SC2034 +QTDRAGON_INI="$WORK_DIR/qtdragon_metric.ini" diff --git a/tests/ui-smoke/_lib/quit-launch.sh b/tests/ui-smoke/_lib/quit-launch.sh new file mode 100755 index 00000000000..ce6a172f8f7 --- /dev/null +++ b/tests/ui-smoke/_lib/quit-launch.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# Quit-path launcher for UI smoke tests. +# Usage: quit-launch.sh +# +# Boots linuxcnc + GUI under xvfb-run exactly like launch.sh, waits for +# the NML task to come up (via drive.py), then sends SIGTERM to the GUI +# process *alone* and asserts the GUI exits on its own within a short +# grace. This is the regression guard for the SIGTERM clean-shutdown +# handlers: a GUI that absorbs SIGTERM and has to be SIGKILLed fails. +# +# is a pgrep -f pattern identifying the GUI process +# (e.g. "bin/touchy", "bin/gmoccapy"). It must not match the linuxcnc +# launcher or task/motion helpers. +# +# Markers (consumed by checkresult-quit.sh): +# UI_SMOKE_QUIT_OK GUI exited on SIGTERM within QUIT_GRACE +# UI_SMOKE_QUIT_FAIL GUI never started, was not found, or ignored TERM + +set -u + +CONFIG_INI="$1" +GUI_MATCH="$2" +TEST_DIR="${TEST_DIR:-$(cd "$(dirname "$0")" && pwd)}" +LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +cd "$TEST_DIR" || exit 1 +rm -f ui-smoke.out ui-smoke.err linuxcnc.pid + +bash "$LIB_DIR/cleanup-runtime.sh" + +LINUXCNC_TIMEOUT=240 +DRIVER_TIMEOUT=90 +# Seconds to wait for the GUI to exit after SIGTERM before declaring it +# stuck. A GUI honouring SIGTERM exits in well under a second; the +# margin covers Cleanup of task/motion on slow CI. +QUIT_GRACE=15 + +# Shared headless environment (software GL + audio silencing), kept in +# launch-env.sh so launch.sh and quit-launch.sh cannot drift apart. +. "$LIB_DIR/launch-env.sh" + +# Arm a core dump so a GUI segfault can be backtraced after the run. +. "$LIB_DIR/crashdump.sh" +crashdump_arm + +export CONFIG_INI LIB_DIR DRIVER_TIMEOUT GUI_MATCH QUIT_GRACE + +# shellcheck disable=SC2016 +xvfb-run -a --server-args="-screen 0 1024x768x24" \ + timeout "$LINUXCNC_TIMEOUT" \ + bash -c ' + setsid linuxcnc -r "$CONFIG_INI" >linuxcnc.out 2>linuxcnc.err & + LINUXCNC_PID=$! + echo "$LINUXCNC_PID" >linuxcnc.pid + + # Wait until the task is reachable (GUI has constructed and the + # NML round-trip works). Reuse the phase-1 driver for readiness. + timeout "$DRIVER_TIMEOUT" python3 "$LIB_DIR/drive.py" >ui-smoke.out 2>ui-smoke.err + if ! grep -q "^UI_SMOKE_OK$" ui-smoke.out; then + echo "UI_SMOKE_QUIT_FAIL: GUI did not come up; cannot test quit" + kill -KILL -- -"$LINUXCNC_PID" 2>/dev/null || true + bash "$LIB_DIR/cleanup-runtime.sh" + exit 1 + fi + + # Identify the GUI process. pgrep -f matches against the whole + # command line, so wrapper processes (the linuxcnc launcher, the + # xvfb-run shell, this bash -c) also match because the GUI name + # appears in the config path or the embedded script text. Every + # such wrapper has a shell or xvfb-run as argv[0]; the real GUI + # is a python interpreter. Pick the first match whose argv[0] + # basename is a python binary. + GUI_PID="" + for p in $(pgrep -f "$GUI_MATCH"); do + arg0=$(tr "\0" "\n" <"/proc/$p/cmdline" 2>/dev/null | head -1) + case "$(basename "$arg0" 2>/dev/null)" in + python*) GUI_PID="$p"; break ;; + esac + done + if [ -z "$GUI_PID" ]; then + echo "UI_SMOKE_QUIT_FAIL: GUI process matching \"$GUI_MATCH\" not found" + kill -KILL -- -"$LINUXCNC_PID" 2>/dev/null || true + bash "$LIB_DIR/cleanup-runtime.sh" + exit 1 + fi + + # Send SIGTERM to the GUI alone and time how long it takes to go. + kill -TERM "$GUI_PID" 2>/dev/null || true + waited=0 + while [ "$waited" -lt "$QUIT_GRACE" ]; do + kill -0 "$GUI_PID" 2>/dev/null || break + sleep 1 + waited=$((waited + 1)) + done + + if kill -0 "$GUI_PID" 2>/dev/null; then + echo "UI_SMOKE_QUIT_FAIL: GUI (pid $GUI_PID) still alive ${QUIT_GRACE}s after SIGTERM" + RC=1 + else + echo "UI_SMOKE_QUIT_OK: GUI exited ${waited}s after SIGTERM" + RC=0 + fi + + # Tear down whatever is left (task/motion, or the GUI on failure). + kill -TERM -- -"$LINUXCNC_PID" 2>/dev/null || true + for _ in $(seq 30); do + kill -0 "$LINUXCNC_PID" 2>/dev/null || break + sleep 1 + done + if kill -0 "$LINUXCNC_PID" 2>/dev/null; then + kill -KILL -- -"$LINUXCNC_PID" 2>/dev/null || true + sleep 2 + bash "$LIB_DIR/cleanup-runtime.sh" + fi + exit "$RC" + ' +RC=$? + +echo "=== linuxcnc.err ===" +[ -f linuxcnc.err ] && cat linuxcnc.err +echo "=== ui-smoke.out ===" +[ -f ui-smoke.out ] && cat ui-smoke.out +echo "=== ui-smoke.err ===" +[ -f ui-smoke.err ] && cat ui-smoke.err + +# If the GUI dumped a core, print its native backtrace. +crashdump_report + +exit "$RC" diff --git a/tests/ui-smoke/_lib/run-gui.sh b/tests/ui-smoke/_lib/run-gui.sh index 01840944bc0..980dff7f51b 100755 --- a/tests/ui-smoke/_lib/run-gui.sh +++ b/tests/ui-smoke/_lib/run-gui.sh @@ -1,9 +1,10 @@ #!/bin/bash # Dispatcher invoked from each per-GUI test.sh. Resolves an INI path # under configs/sim/ and execs launch.sh in the caller's test dir. -# Usage: run-gui.sh +# Usage: run-gui.sh [driver-args...] # e.g. run-gui.sh axis/axis.ini # run-gui.sh qtdragon/qtdragon_xyz/qtdragon_metric.ini +# run-gui.sh axis/axis.ini --run-program /abs/smoke.ngc --expect-pos 10,10,5 set -u @@ -11,5 +12,17 @@ LIB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" TEST_DIR="${TEST_DIR:-$(cd "$(dirname "$0")" && pwd)}" CONFIGS_DIR="$(cd "$LIB_DIR/../../../configs/sim" && pwd)" +INI_ARG="$1" +shift + +# Accept either a relative path under configs/sim/ or an absolute path. +# Absolute paths are used by tests that need to point at a writable +# mirror of a shipped config (qtdragon writes a log file inside the +# config dir, which is read-only on CI). +case "$INI_ARG" in + /*) INI_PATH="$INI_ARG" ;; + *) INI_PATH="$CONFIGS_DIR/$INI_ARG" ;; +esac + export TEST_DIR -exec "$LIB_DIR/launch.sh" "$CONFIGS_DIR/$1" +exec "$LIB_DIR/launch.sh" "$INI_PATH" "$@" diff --git a/tests/ui-smoke/_lib/smoke.ngc b/tests/ui-smoke/_lib/smoke.ngc new file mode 100644 index 00000000000..036bff7bd09 --- /dev/null +++ b/tests/ui-smoke/_lib/smoke.ngc @@ -0,0 +1,13 @@ +(Phase 2 UI smoke program. Force mm input units with G21 so the move) +(commanded here is the same physical distance regardless of the sim) +(config's LINEAR_UNITS. Use G91 relative so the move is independent of) +(each sim's HOME position, then return to G90 absolute and end with M2.) +(stat.position is still reported in the machine's LINEAR_UNITS; the) +(driver converts the expected mm delta to machine units via) +(stat.linear_units before comparing. Note: axis and touchy sims are) +(inch machines, so a 1mm move shows as ~0.03937 in stat.position.) +G21 +G91 +G0 X1 Y1 +G90 +M2 diff --git a/tests/ui-smoke/axis/test.sh b/tests/ui-smoke/axis/test.sh index efa45dd9590..ba4fddfc6d0 100755 --- a/tests/ui-smoke/axis/test.sh +++ b/tests/ui-smoke/axis/test.sh @@ -1,2 +1,4 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" axis/axis.ini +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +exec "$LIB_DIR/run-gui.sh" axis/axis.ini \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 diff --git a/tests/ui-smoke/gmoccapy-quit/checkresult b/tests/ui-smoke/gmoccapy-quit/checkresult new file mode 100755 index 00000000000..bb61550b304 --- /dev/null +++ b/tests/ui-smoke/gmoccapy-quit/checkresult @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/checkresult-quit.sh" "$@" diff --git a/tests/ui-smoke/gmoccapy-quit/skip b/tests/ui-smoke/gmoccapy-quit/skip new file mode 100755 index 00000000000..c1c260edf05 --- /dev/null +++ b/tests/ui-smoke/gmoccapy-quit/skip @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/skip-if-missing.sh" diff --git a/tests/ui-smoke/gmoccapy-quit/test.sh b/tests/ui-smoke/gmoccapy-quit/test.sh new file mode 100755 index 00000000000..116481f3872 --- /dev/null +++ b/tests/ui-smoke/gmoccapy-quit/test.sh @@ -0,0 +1,4 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/quit-launch.sh" \ + "$(cd "$(dirname "$0")/../../../configs/sim" && pwd)/gmoccapy/gmoccapy.ini" \ + "bin/gmoccapy" diff --git a/tests/ui-smoke/gmoccapy/test.sh b/tests/ui-smoke/gmoccapy/test.sh index 29adc2b9397..de93beaed99 100755 --- a/tests/ui-smoke/gmoccapy/test.sh +++ b/tests/ui-smoke/gmoccapy/test.sh @@ -1,2 +1,4 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" gmoccapy/gmoccapy.ini +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +exec "$LIB_DIR/run-gui.sh" gmoccapy/gmoccapy.ini \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 diff --git a/tests/ui-smoke/qtdragon-quit/checkresult b/tests/ui-smoke/qtdragon-quit/checkresult new file mode 100755 index 00000000000..bb61550b304 --- /dev/null +++ b/tests/ui-smoke/qtdragon-quit/checkresult @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/checkresult-quit.sh" "$@" diff --git a/tests/ui-smoke/qtdragon-quit/skip b/tests/ui-smoke/qtdragon-quit/skip new file mode 100755 index 00000000000..c1c260edf05 --- /dev/null +++ b/tests/ui-smoke/qtdragon-quit/skip @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/skip-if-missing.sh" diff --git a/tests/ui-smoke/qtdragon-quit/test.sh b/tests/ui-smoke/qtdragon-quit/test.sh new file mode 100755 index 00000000000..cdd427223fc --- /dev/null +++ b/tests/ui-smoke/qtdragon-quit/test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -u + +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +. "$LIB_DIR/qtdragon-prepare.sh" + +exec "$LIB_DIR/quit-launch.sh" "$QTDRAGON_INI" "bin/qtvcp" diff --git a/tests/ui-smoke/qtdragon/test.sh b/tests/ui-smoke/qtdragon/test.sh index 7df11989920..812e99c7e89 100755 --- a/tests/ui-smoke/qtdragon/test.sh +++ b/tests/ui-smoke/qtdragon/test.sh @@ -1,2 +1,8 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" qtdragon/qtdragon_xyz/qtdragon_metric.ini +set -u + +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +. "$LIB_DIR/qtdragon-prepare.sh" + +exec "$LIB_DIR/run-gui.sh" "$QTDRAGON_INI" \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0 diff --git a/tests/ui-smoke/touchy-quit/checkresult b/tests/ui-smoke/touchy-quit/checkresult new file mode 100755 index 00000000000..bb61550b304 --- /dev/null +++ b/tests/ui-smoke/touchy-quit/checkresult @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/checkresult-quit.sh" "$@" diff --git a/tests/ui-smoke/touchy-quit/skip b/tests/ui-smoke/touchy-quit/skip new file mode 100755 index 00000000000..c1c260edf05 --- /dev/null +++ b/tests/ui-smoke/touchy-quit/skip @@ -0,0 +1,2 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/skip-if-missing.sh" diff --git a/tests/ui-smoke/touchy-quit/test.sh b/tests/ui-smoke/touchy-quit/test.sh new file mode 100755 index 00000000000..d5a7851ea8c --- /dev/null +++ b/tests/ui-smoke/touchy-quit/test.sh @@ -0,0 +1,4 @@ +#!/bin/bash +exec "$(dirname "$0")/../_lib/quit-launch.sh" \ + "$(cd "$(dirname "$0")/../../../configs/sim" && pwd)/touchy/touchy.ini" \ + "bin/touchy" diff --git a/tests/ui-smoke/touchy/test.sh b/tests/ui-smoke/touchy/test.sh index 4b9c904d700..831fe81b346 100755 --- a/tests/ui-smoke/touchy/test.sh +++ b/tests/ui-smoke/touchy/test.sh @@ -1,2 +1,4 @@ #!/bin/bash -exec "$(dirname "$0")/../_lib/run-gui.sh" touchy/touchy.ini +LIB_DIR="$(cd "$(dirname "$0")/../_lib" && pwd)" +exec "$LIB_DIR/run-gui.sh" touchy/touchy.ini \ + --run-program "$LIB_DIR/smoke.ngc" --expect-delta-mm 1,1,0