feat(test): feature tags + declared-disables contract for opt-out plugins (#7648)

Plugins that intentionally remove a baseline Etherpad feature (ep_disable_chat, ep_disable_change_author_name, ep_disable_error_messages, ep_disable_reset_authorship_colours) currently break core tests for the removed feature. Their main branches are red, their auto-publish gates never fire, and Dependabot PRs pile up. The temptation is to give these plugins an "opt-out of these tests" flag — but that's a self-serving attestation: a plugin can claim "I just disable chat, ignore those tests" and quietly break unrelated functionality on the user's install. etherpad.org/plugins would still show it green. This commit introduces a small declared-disables contract that closes that gap: 1. Core specs grow @feature:* Playwright tags. Initial set: @feature:chat, @feature:username, @feature:clear-authorship, @feature:error-gritter. Tags are added test-by-test where the test exercises a single feature, so the contract stays precise. 2. Plugins declare which feature tags they disable in their ep.json: { "name": "ep_disable_chat", "disables": ["@feature:chat"], ... } 3. bin/run-frontend-tests-with-disables.sh enforces the contract via two passes: - Pass 1 (regression): every test NOT in the disabled list must pass. Catches plugins that break things they don't claim to. - Pass 2 (honesty): every test that IS in the disabled list must FAIL. Catches plugins that lie about disabling features they don't actually disable, and stops them from grep-inverting arbitrary unrelated tests. 4. doc/PLUGIN_FEATURE_DISABLES.md walks the design and migration. The disables list is in ep.json (publicly visible), so etherpad.org/plugins can surface "this plugin disables: chat" alongside the green CI badge — users see what they're losing before they install. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 20:26:49 +02:00 · 2026-05-02 17:00:28 +08:00 · 2026-05-02 17:00:28 +08:00 · b769ab6e54
commit b769ab6e54
parent 90fd9b15b1
9 changed files with 279 additions and 19 deletions
--- a/bin/run-frontend-tests-with-disables.sh
+++ b/bin/run-frontend-tests-with-disables.sh
@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+#
+# Run the frontend test suite with awareness of a plugin's declared
+# `disables` list (see doc/PLUGIN_FEATURE_DISABLES.md).
+#
+# A plugin that intentionally removes a baseline Etherpad feature MUST
+# declare which feature tags it disables in its ep.json:
+#
+#   { "name": "ep_disable_chat", "disables": ["@feature:chat"], ... }
+#
+# This script enforces that contract with two passes:
+#
+#   1. Regression pass — every test NOT tagged with a disabled feature
+#      must pass. Catches the case where the plugin breaks something
+#      unrelated to the feature it claims to disable.
+#
+#   2. Honesty pass — every test that IS tagged with a disabled feature
+#      must FAIL. If those tests pass, the plugin's `disables` claim is
+#      wrong; the feature it says it disables actually still works.
+#      Catches the case where a plugin opts out of tests it has no
+#      right to skip.
+#
+# Both passes have to pass for CI to be green. A plugin can't quietly
+# disable functionality without declaring it (pass 1 catches that), and
+# can't quietly opt out of test coverage by declaring features it
+# doesn't actually disable (pass 2 catches that).
+#
+# Usage:
+#   bin/run-frontend-tests-with-disables.sh \
+#     [--plugin-ep-json PATH] [-- <playwright args...>]
+#
+# Resolution order for the disables list:
+#   1. EP_PLUGIN_DISABLES env var (comma- or space-separated)
+#   2. --plugin-ep-json PATH (reads `.disables` JSON array)
+#   3. Auto-detect: if exactly one ep_*/ep.json under plugin_packages/
+#      declares disables, use it. Multiple disabling plugins → error.
+#
+# Run from src/ (where playwright.config.ts and node_modules live).
+
+set -euo pipefail
+
+EP_JSON=""
+PLAYWRIGHT_ARGS=()
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --plugin-ep-json)
+      EP_JSON="$2"; shift 2;;
+    --) shift; PLAYWRIGHT_ARGS+=("$@"); break;;
+    *) PLAYWRIGHT_ARGS+=("$1"); shift;;
+  esac
+done
+
+read_disables_from_json() {
+  # Echo space-separated list of @feature:* tags from a JSON file's
+  # top-level `disables` array. Empty if the file or field is missing.
+  local file="$1"
+  [[ -f "$file" ]] || return 0
+  node -e "
+    try {
+      const j = JSON.parse(require('fs').readFileSync(process.argv[1], 'utf8'));
+      const d = Array.isArray(j.disables) ? j.disables : [];
+      process.stdout.write(d.join(' '));
+    } catch (_) {}
+  " "$file"
+}
+
+DISABLES=""
+if [[ -n "${EP_PLUGIN_DISABLES:-}" ]]; then
+  DISABLES="$(echo "$EP_PLUGIN_DISABLES" | tr ',' ' ')"
+elif [[ -n "$EP_JSON" ]]; then
+  DISABLES="$(read_disables_from_json "$EP_JSON")"
+else
+  # Auto-detect from plugin_packages/. Skip if 0 or >1 disabling plugins.
+  declare -a CANDIDATES=()
+  if [[ -d plugin_packages ]]; then
+    while IFS= read -r f; do
+      d="$(read_disables_from_json "$f")"
+      [[ -n "$d" ]] && CANDIDATES+=("$d")
+    done < <(find plugin_packages -maxdepth 3 -name ep.json -not -path '*/.versions/*' 2>/dev/null)
+  fi
+  if [[ ${#CANDIDATES[@]} -eq 1 ]]; then
+    DISABLES="${CANDIDATES[0]}"
+  elif [[ ${#CANDIDATES[@]} -gt 1 ]]; then
+    echo "ERROR: multiple plugins declare disables, pass --plugin-ep-json explicitly:" >&2
+    printf '  %s\n' "${CANDIDATES[@]}" >&2
+    exit 2
+  fi
+fi
+
+DISABLES="$(echo "$DISABLES" | xargs)" # trim
+if [[ -z "$DISABLES" ]]; then
+  echo "No 'disables' declared — running standard test suite."
+  exec pnpm exec playwright test "${PLAYWRIGHT_ARGS[@]}"
+fi
+
+# Build the regex Playwright wants for --grep / --grep-invert.
+# Tags are matched as substrings of the test title; @feature:chat is
+# distinct enough that we don't need to anchor.
+GREP_PATTERN="$(echo "$DISABLES" | tr ' ' '|')"
+
+echo "Plugin disables: $DISABLES"
+echo
+
+echo "=== Pass 1: regression — tests NOT tagged with disabled features must pass ==="
+pnpm exec playwright test --grep-invert "($GREP_PATTERN)" "${PLAYWRIGHT_ARGS[@]}"
+
+echo
+echo "=== Pass 2: honesty — tests tagged with $DISABLES must FAIL (feature is disabled) ==="
+# Run with --reporter=list so we get test names but suppress the noisy
+# default reporter output for expected failures. Capture the exit code
+# without `set -e` aborting.
+set +e
+pnpm exec playwright test --grep "($GREP_PATTERN)" --reporter=list --retries=0 "${PLAYWRIGHT_ARGS[@]}"
+PASS2_EXIT=$?
+set -e
+
+# Pass 2 SUCCEEDED (tests passed) is BAD: the plugin says it disables
+# the feature but the feature works. Pass 2 FAILED (tests failed) is
+# GOOD: the feature is genuinely disabled.
+if [[ $PASS2_EXIT -eq 0 ]]; then
+  echo
+  echo "ERROR: plugin declares disables=[$DISABLES] but tests with those tags PASSED." >&2
+  echo "       The plugin is opting out of tests it has no right to skip:" >&2
+  echo "         - either the plugin isn't actually disabling those features," >&2
+  echo "         - or ep.json's disables list is wrong." >&2
+  exit 1
+fi
+
+echo
+echo "Both passes succeeded — plugin's disables contract is honoured."
--- a/doc/PLUGIN_FEATURE_DISABLES.md
+++ b/doc/PLUGIN_FEATURE_DISABLES.md
@ -0,0 +1,92 @@
+# Feature-disabling plugins
+
+Some Etherpad plugins exist specifically to **remove** a baseline feature — `ep_disable_chat`, `ep_disable_change_author_name`, `ep_disable_error_messages`, and so on. When the plugin is installed, the feature it disables is intentionally absent.
+
+This is awkward for CI: the core test suite asserts the disabled feature works. Without coordination, every disable plugin's CI is permanently red, every dependency bump is permanently stuck, and the green/red signal on etherpad.org/plugins becomes meaningless.
+
+To fix that — without giving plugins a free pass to opt out of arbitrary tests — Etherpad uses a small declared-disables contract.
+
+## How it works
+
+### 1. Core specs are tagged by feature
+
+Tests that exercise a single feature carry a Playwright tag like `@feature:chat`:
+
+```ts
+test('opens chat, sends a message, makes sure it exists on the page and hides chat', {
+  tag: '@feature:chat',
+}, async ({page}) => { ... });
+
+test.describe('error sanitization', { tag: '@feature:error-gritter' }, () => { ... });
+```
+
+Tags currently in use:
+
+- `@feature:chat`
+- `@feature:username`
+- `@feature:clear-authorship`
+- `@feature:error-gritter`
+
+### 2. A plugin declares the features it disables in its `ep.json`
+
+```json
+{
+  "name": "ep_disable_chat",
+  "description": "Disable chat",
+  "disables": ["@feature:chat"],
+  "parts": [...]
+}
+```
+
+The `disables` field is publicly visible in the plugin's metadata and surfaces on etherpad.org/plugins, so users see the contract before installing.
+
+### 3. The plugin's CI runs the two-pass test script
+
+`bin/run-frontend-tests-with-disables.sh` enforces the contract:
+
+```yaml
+# .github/workflows/frontend-tests.yml
+- name: Run the frontend tests
+  working-directory: ./etherpad-lite/src
+  run: ../bin/run-frontend-tests-with-disables.sh -- --project=chromium
+```
+
+The script reads `disables` (from `EP_PLUGIN_DISABLES`, an explicit `--plugin-ep-json PATH`, or auto-detection in `plugin_packages/`) and runs two passes:
+
+| Pass | What it runs | Must |
+|---|---|---|
+| **1. Regression** | Every spec **not** tagged with a disabled feature | Pass — proves the plugin doesn't break anything beyond what it claims to disable. |
+| **2. Honesty** | Every spec **that is** tagged with a disabled feature | **Fail** — proves the plugin is genuinely disabling the feature it declares. If those tests pass, the plugin's `disables` claim is wrong. |
+
+Both passes have to succeed for CI to be green.
+
+## What this catches
+
+| Failure mode | Caught by |
+|---|---|
+| Plugin breaks an unrelated feature | Pass 1 — its tests aren't excluded, they fail, CI red. |
+| Plugin claims to disable a feature but the feature still works | Pass 2 — tagged tests pass when they should fail, script exits non-zero. |
+| Plugin breaks a feature without declaring it (so etherpad.org/plugins shows it as harmless) | Pass 1 — the feature's tests aren't excluded, they fail, CI red. |
+| Plugin lists a feature in `disables` it doesn't actually disable | Pass 2. |
+
+A plugin **cannot** ship green with broken functionality the user can't see ahead of time.
+
+## Adding a new feature tag
+
+When a core spec needs a new feature tag (because a new disable plugin needs to opt out of it):
+
+1. Pick a stable name: `@feature:<area>` — short, lowercase, kebab-case, plural where appropriate.
+2. Tag the relevant `test()` or `test.describe()` blocks. Multiple tags are fine: `tag: ['@feature:chat', '@feature:username']`.
+3. Update the list above.
+4. Submit the tag PR before the plugin's PR — the plugin can then declare `disables` and pass CI.
+
+## Adding a new disable plugin
+
+1. Confirm the feature you're disabling is tagged in core. If not, propose a tag upstream first.
+2. Add `"disables": ["@feature:thing"]` to your `ep.json`.
+3. Replace the test invocation in `.github/workflows/frontend-tests.yml` with a call to `bin/run-frontend-tests-with-disables.sh` (see `ep_disable_chat` for a worked example).
+4. Confirm both passes go green locally before opening the PR.
+
+## Why not just `--grep-invert`?
+
+The earlier draft of this design just told plugin maintainers to add `--grep-invert "<pattern>"` in CI. That works for the regression case (pass 1 above), but it lets a careless or malicious plugin silently skip arbitrary tests and present green CI on etherpad.org/plugins despite breaking unrelated functionality. Pass 2 — and the requirement that disables be declared in `ep.json` rather than inferred from a CI argument — closes that gap.
--- a/src/tests/frontend-new/specs/a11y_dialogs.spec.ts
+++ b/src/tests/frontend-new/specs/a11y_dialogs.spec.ts
@ -90,7 +90,9 @@ test('export links have an accessible name from their localized content', async
  }
 });

-test('chaticon is a button with an accessible name', async ({page}) => {
+test('chaticon is a button with an accessible name', {
+  tag: '@feature:chat',
+}, async ({page}) => {
  const chatIcon = page.locator('#chaticon');
  const tagName = await chatIcon.evaluate((el) => el.tagName.toLowerCase());
  expect(tagName).toBe('button');
@ -100,7 +102,9 @@ test('chaticon is a button with an accessible name', async ({page}) => {
  expect(label && label.length > 0).toBe(true);
 });

-test('chat header close/pin controls are buttons with accessible names', async ({page}) => {
+test('chat header close/pin controls are buttons with accessible names', {
+  tag: '@feature:chat',
+}, async ({page}) => {
  await page.locator('#chaticon').click();
  // #titlecross has no data-l10n-id so its aria-label stays static English.
  // #titlesticky has data-l10n-id, so html10n fills aria-label from the
--- a/src/tests/frontend-new/specs/change_user_color.spec.ts
+++ b/src/tests/frontend-new/specs/change_user_color.spec.ts
@ -56,7 +56,9 @@ test.describe('change user color', function () {
      expect(await $colorPickerPreview.getAttribute('style')).toContain(await $userSwatch.getAttribute('style'));
    });

-  test('Own user color is shown when you enter a chat', async function ({page}) {
+  test('Own user color is shown when you enter a chat', {
+    tag: '@feature:chat',
+  }, async function ({page}) {

    const colorOption = page.locator('#options-colorscheck');
    if (!(await colorOption.isChecked())) {
--- a/src/tests/frontend-new/specs/change_user_name.spec.ts
+++ b/src/tests/frontend-new/specs/change_user_name.spec.ts
@ -8,7 +8,9 @@ test.beforeEach(async ({ page })=>{
 })


-test("Remembers the username after a refresh", async ({page}) => {
+test("Remembers the username after a refresh", {
+  tag: '@feature:username',
+}, async ({page}) => {
  await toggleUserList(page);
  await setUserName(page,'😃')
  await toggleUserList(page)
@ -20,7 +22,9 @@ test("Remembers the username after a refresh", async ({page}) => {
 })


-test('Own user name is shown when you enter a chat', async ({page})=> {
+test('Own user name is shown when you enter a chat', {
+  tag: ['@feature:chat', '@feature:username'],
+}, async ({page})=> {
  const chatMessage = 'O hi';

  await toggleUserList(page);
@ -39,7 +43,9 @@ test('Own user name is shown when you enter a chat', async ({page})=> {
 // no such button and the cap just made the username field too small. The
 // colibris skin also pre-existing override of margin-left:35px (chosen for
 // the chatAndUsers sticky layout) has been aligned with the base 10px.
-test('#myusernameform has 10px left margin and is not width-capped', async ({page}) => {
+test('#myusernameform has 10px left margin and is not width-capped', {
+  tag: '@feature:username',
+}, async ({page}) => {
  await toggleUserList(page);

  const styles = await page.evaluate(() => {
--- a/src/tests/frontend-new/specs/chat.spec.ts
+++ b/src/tests/frontend-new/specs/chat.spec.ts
@ -20,7 +20,9 @@ test.beforeEach(async ({ page, context })=>{
 })


-test('opens chat, sends a message, makes sure it exists on the page and hides chat', async ({page}) => {
+test('opens chat, sends a message, makes sure it exists on the page and hides chat', {
+  tag: '@feature:chat',
+}, async ({page}) => {
  const chatValue = "JohnMcLear"

  // Open chat
@ -38,7 +40,9 @@ test('opens chat, sends a message, makes sure it exists on the page and hides ch
  expect(chatMessage).toBe(" "+chatValue);
 })

-test("makes sure that an empty message can't be sent", async function ({page}) {
+test("makes sure that an empty message can't be sent", {
+  tag: '@feature:chat',
+}, async function ({page}) {
  const chatValue = 'mluto';

  await showChat(page);
@ -60,7 +64,9 @@ test("makes sure that an empty message can't be sent", async function ({page}) {
  expect(chatMessage).toBe(" "+chatValue);
 });

-test('makes chat stick to right side of the screen via settings, remove sticky via settings, close it', async ({page}) =>{
+test('makes chat stick to right side of the screen via settings, remove sticky via settings, close it', {
+  tag: '@feature:chat',
+}, async ({page}) =>{
  await showSettings(page);

  await enableStickyChatviaSettings(page);
@ -77,7 +83,9 @@ test('makes chat stick to right side of the screen via settings, remove sticky v
 });

 test('makes chat stick to right side of the screen via icon on the top right, ' +
-  'remove sticky via icon, close it', async function ({page}) {
+  'remove sticky via icon, close it', {
+  tag: '@feature:chat',
+}, async function ({page}) {
  await showChat(page);

  await enableStickyChatviaIcon(page);
@ -95,7 +103,9 @@ test('makes chat stick to right side of the screen via icon on the top right, '


 test('Checks showChat=false URL Parameter hides chat then' +
-  ' when removed it shows chat', async function ({page}) {
+  ' when removed it shows chat', {
+  tag: '@feature:chat',
+}, async function ({page}) {

  // get a new pad, but don't clear the cookies
  await appendQueryParams(page, {
@ -120,7 +130,9 @@ test('Checks showChat=false URL Parameter hides chat then' +
 // jQuery .hide(); re-enabling chat doesn't undo it, and chat.show() only flips
 // visibility via the .visible class — so without an explicit display reset the
 // box stays hidden by the lingering inline style. (PR #7597)
-test('chat icon click reveals chatbox after a disable → enable cycle', async ({page}) => {
+test('chat icon click reveals chatbox after a disable → enable cycle', {
+  tag: '@feature:chat',
+}, async ({page}) => {
  await showSettings(page);
  await page.locator('label[for="options-disablechat"]').click();
  await expect(page.locator('#options-disablechat')).toBeChecked();
@ -137,7 +149,9 @@ test('chat icon click reveals chatbox after a disable → enable cycle', async (
 });

 // Title-bar layout / glyph regressions from #7590 review.
-test('chat title bar lays out as a centred flex row with underscore minimize', async ({page}) => {
+test('chat title bar lays out as a centred flex row with underscore minimize', {
+  tag: '@feature:chat',
+}, async ({page}) => {
  await showChat(page);

  // Minimize button uses an underscore (sits at the bottom of its em-box and
@ -185,7 +199,9 @@ test('chat title bar lays out as a centred flex row with underscore minimize', a
 // became a <button>, the inner <span class="buttonicon"> being `display: flex`
 // (from the global icons.css rule) could intercept clicks and the chat icon
 // stopped opening the panel. The fix scopes a reset on `#chaticon .buttonicon`.
-test('chat icon click reliably opens the chat box', async ({page}) => {
+test('chat icon click reliably opens the chat box', {
+  tag: '@feature:chat',
+}, async ({page}) => {
  await expect(page.locator('#chaticon')).toBeVisible();
  await page.locator('#chaticon').click();
  await expect(page.locator('#chatbox')).toHaveClass(/visible/);
--- a/src/tests/frontend-new/specs/clear_authorship_color.spec.ts
+++ b/src/tests/frontend-new/specs/clear_authorship_color.spec.ts
@ -14,7 +14,9 @@ test.beforeEach(async ({ page })=>{
  await goToNewPad(page);
 })

-test('clear authorship color', async ({page}) => {
+test('clear authorship color', {
+  tag: '@feature:clear-authorship',
+}, async ({page}) => {
  const padBody = await getPadBody(page);

  // type some text
@ -35,7 +37,9 @@ test('clear authorship color', async ({page}) => {
 })


-test("clear authorship colors can be undone to restore author colors", async function ({page}) {
+test("clear authorship colors can be undone to restore author colors", {
+  tag: '@feature:clear-authorship',
+}, async function ({page}) {
  // Fix for https://github.com/ether/etherpad-lite/issues/2802
  // Previously, undo of clear authorship was blocked as a workaround.
  // Now the server properly allows it, so undo should restore author colors.
@ -70,7 +74,9 @@ test("clear authorship colors can be undone to restore author colors", async fun


 // Test for https://github.com/ether/etherpad-lite/issues/5128
-test('clears authorship when first line has line attributes', async function ({page}) {
+test('clears authorship when first line has line attributes', {
+  tag: '@feature:clear-authorship',
+}, async function ({page}) {
  // Make sure there is text with author info. The first line must have a line attribute.
  const padBody = await getPadBody(page);
  // Accept confirm dialogs before any action that might trigger one
--- a/src/tests/frontend-new/specs/error_sanitization.spec.ts
+++ b/src/tests/frontend-new/specs/error_sanitization.spec.ts
@ -5,7 +5,9 @@ test.beforeEach(async ({page}) => {
  await goToNewPad(page);
 });

-test.describe('error sanitization', () => {
+test.describe('error sanitization', {
+  tag: '@feature:error-gritter',
+}, () => {

  test('production mode hides error details from gritter popup', async ({page}) => {
    // The test server runs without NODE_ENV=development, so clientVars.mode
--- a/src/tests/frontend-new/specs/undo_clear_authorship.spec.ts
+++ b/src/tests/frontend-new/specs/undo_clear_authorship.spec.ts
@ -24,7 +24,9 @@ import {
 * but the server rejects it because User B is submitting changes containing
 * User A's author ID.
 */
-test.describe('undo clear authorship colors with multiple authors (bug #2802)', function () {
+test.describe('undo clear authorship colors with multiple authors (bug #2802)', {
+  tag: '@feature:clear-authorship',
+}, function () {
  test.describe.configure({ retries: 2 });
  let padId: string;