From 57758f4aaa78027d3b55cf98b451b57a4f5f3370 Mon Sep 17 00:00:00 2001 From: John McLear Date: Mon, 4 May 2026 00:56:55 +0800 Subject: [PATCH] test(ci): stronger diagnostics for silent backend-test exit (follow-up to #7663) (#7665) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * test(ci): stronger diagnostics for silent backend-test exit PR #7663 added unhandledRejection / uncaughtException handlers in common.ts. The next failure after merge (run 25279692065 - Windows without plugins, Node 24) showed mocha exiting with code 1 mid-suite 261ms after the last passing test, with NEITHER handler firing. So something more drastic is killing the process - SIGKILL, OOM, fatal native error - or mocha itself called process.exit before the JS handlers in common.ts could run. Two issues with the previous attempt: 1. Handlers in common.ts only register when a spec imports common.ts. Only 27 of 47 specs do. If a non-common spec triggers the death, handlers may never have been registered. 2. process.stderr.write is asynchronous on Windows when stderr is piped (which it is under GitHub Actions). On a hard kill the buffered line never reaches the runner log. This patch: - Moves diagnostic handlers to a dedicated tests/backend/diagnostics.ts loaded via mocha --require, so they register at startup before any spec runs. - Uses fs.writeSync(2, ...) for synchronous stderr writes that the kernel completes before returning - the line lands in the log even if the process is killed milliseconds later. - Adds beforeExit / exit / signal handlers so we can discriminate the exit mechanism: clean drain vs process.exit vs SIGKILL vs signal. - Tracks last-seen test via mocha root afterEach hook so the death point is visible in the log. The next CI failure should print enough context to identify the cause, after which we can fix the real bug and drop this file. Co-Authored-By: Claude Opus 4.7 (1M context) * fix(diagnostics): exit(1) on uncaughtException so fatal errors fail fast Qodo flagged on PR #7665: the uncaughtException handler in tests/backend/diagnostics.ts only logged and returned. Once a handler is registered, Node no longer exits on its own. Specs that don't import tests/backend/common.ts (20 of 47) have only this handler — so a fatal error would have been swallowed and tests would limp along instead of failing fast. Mirror common.ts and call process.exit(1) after logging. --------- Co-authored-by: Claude Opus 4.7 (1M context) --- src/package.json | 2 +- src/tests/backend/diagnostics.ts | 96 ++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 src/tests/backend/diagnostics.ts diff --git a/src/package.json b/src/package.json index e60c37065..13b969d25 100644 --- a/src/package.json +++ b/src/package.json @@ -143,7 +143,7 @@ }, "scripts": { "lint": "eslint .", - "test": "cross-env NODE_ENV=production mocha --import=tsx --timeout 120000 --recursive tests/backend/specs/**.ts ../node_modules/ep_*/static/tests/backend/specs/**", + "test": "cross-env NODE_ENV=production mocha --import=tsx --require ./tests/backend/diagnostics.ts --timeout 120000 --recursive tests/backend/specs/**.ts ../node_modules/ep_*/static/tests/backend/specs/**", "test-utils": "cross-env NODE_ENV=production mocha --import=tsx --timeout 5000 --recursive tests/backend/specs/*utils.ts", "test-container": "mocha --import=tsx --timeout 5000 tests/container/specs/api", "dev": "cross-env NODE_ENV=development node --require tsx/cjs node/server.ts", diff --git a/src/tests/backend/diagnostics.ts b/src/tests/backend/diagnostics.ts new file mode 100644 index 000000000..ac29ab198 --- /dev/null +++ b/src/tests/backend/diagnostics.ts @@ -0,0 +1,96 @@ +'use strict'; + +// Diagnostic-only mocha bootstrap, loaded via `mocha --require ./tests/backend/diagnostics.ts`. +// +// PR #7663 added unhandledRejection / uncaughtException handlers in +// tests/backend/common.ts to surface the silent ~22% backend-test flake. +// The next failure (run 25279692065, Windows without plugins, Node 24) +// showed mocha exit with code 1 mid-suite, 261ms after the last passing +// test, with NEITHER handler firing. This means the process was killed +// in a way that bypassed JS handlers — SIGKILL, OOM, or a fatal native +// error — OR mocha itself called process.exit before the handlers ran. +// +// This file: +// 1. Registers handlers UNCONDITIONALLY at mocha startup (common.ts is +// only imported by ~27 of 47 specs, so its handlers may register +// late or after a death-causing event). +// 2. Writes via fs.writeSync(2, ...) — synchronous stderr writes that +// complete before the kernel returns from the syscall, so the line +// lands in the runner log even if the process is killed +// milliseconds later. +// 3. Tracks the last-seen test via a mocha root afterEach hook so the +// death point is identified. +// 4. Logs exit-related events so we can discriminate: +// beforeExit + exit -> clean event-loop drain +// only exit -> process.exit() called somewhere +// neither -> hard kill (SIGKILL/OOM/runner) +// signal lines -> SIGTERM / SIGINT / SIGBREAK received +// +// Drop this file once the flake's root cause is identified and fixed. + +import {writeSync} from 'node:fs'; + +const t0 = Date.now(); +let lastSeenTest = ''; + +const diag = (msg: string): void => { + const line = `[diag +${Date.now() - t0}ms] ${msg}\n`; + try { + writeSync(2, line); + } catch (_) { + // Best-effort: if stderr is closed there is nothing we can do. + } +}; + +diag('diagnostics loaded'); + +process.on('unhandledRejection', (reason: any) => { + diag(`unhandledRejection: ${ + reason && reason.stack ? reason.stack : String(reason) + } (lastTest="${lastSeenTest}")`); + // Re-throw so existing common.ts handlers / mocha behavior is preserved. + throw reason; +}); + +process.on('uncaughtException', (err: any) => { + diag(`uncaughtException: ${ + err && err.stack ? err.stack : String(err) + } (lastTest="${lastSeenTest}")`); + // Force fail-fast. Specs that don't import common.ts only have THIS handler, + // and Node won't exit on its own once an uncaughtException listener is + // registered. Without the explicit exit a fatal error would be swallowed. + // common.ts has the same process.exit(1); whichever handler runs first wins. + process.exit(1); +}); + +process.on('beforeExit', (code: number) => { + diag(`beforeExit code=${code} exitCode=${process.exitCode} ` + + `lastTest="${lastSeenTest}"`); +}); + +process.on('exit', (code: number) => { + diag(`exit code=${code} lastTest="${lastSeenTest}"`); +}); + +for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP', 'SIGBREAK'] as const) { + // SIGHUP / SIGBREAK don't exist on every platform; ignore registration errors. + try { + process.on(sig as any, () => { + diag(`received ${sig} (lastTest="${lastSeenTest}")`); + // Let the default behavior (exit) happen. + process.exit(128); + }); + } catch (_) { + // ignore + } +} + +// Mocha root hook — only registered if mocha picks up this file via --require. +// We track the most recently-finished test so the death point is visible. +export const mochaHooks = { + afterEach(this: any) { + if (this.currentTest) { + lastSeenTest = this.currentTest.fullTitle(); + } + }, +};