fix(export): /export/etherpad honors the :rev URL segment (#7566)

Fixes #5071. `/p/:pad/:rev/export/etherpad` has always ignored the rev
parameter and returned the full pad history, unlike the txt/html
export endpoints which use the same route but do respect rev. Users
wanting to back up or inspect a snapshot of a pad at a specific rev
got every later revision in the payload instead — both wasteful and
a surprise when the downloaded .etherpad blob contained content that
had supposedly been reverted.

Change:
  - `exportEtherpad.getPadRaw(padId, readOnlyId, revNum?)` now takes an
    optional revNum. When supplied, it clamps to `min(revNum, pad.head)`,
    iterates only revs 0..effectiveHead, and ships a shallow-cloned pad
    object whose `head` and `atext` reflect the requested snapshot. The
    original live Pad is still passed to the `exportEtherpad` hook so
    plugin callbacks see the real document.
  - `ExportHandler` passes `req.params.rev` through on the `etherpad`
    type, matching the existing behavior of `txt` and `html`.
  - Chat history is intentionally left full (it is not rev-anchored).

Adds three backend regression tests under `ExportEtherpad.ts`:
  - default (no revNum) still exports the full history
  - explicit revNum limits exported revs and rewrites the serialized
    head so re-import reconstructs the pad at that rev
  - revNum above head is treated as full history, preventing accidental
    truncation of short pads

Out of scope: `getHTML(padID, rev)` on the API side is already honoring
rev in current code (exportHtml.getPadHTML threads the parameter
through), so the earlier report on that API call appears to be
resolved. This PR does not touch it.

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
John McLear 2026-04-26 16:29:02 +08:00 committed by GitHub
parent 51356b9a13
commit 83a42afbae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 71 additions and 4 deletions

View File

@ -67,7 +67,10 @@ exports.doExport = async (req: any, res: any, padId: string, readOnlyId: string,
// if this is a plain text export, we can do this directly
// We have to over engineer this because tabs are stored as attributes and not plain text
if (type === 'etherpad') {
const pad = await exportEtherpad.getPadRaw(padId, readOnlyId);
// Honor the :rev URL segment on `.etherpad` exports the same way the
// other formats already do — revNum limits the serialized pad to revs
// 0..rev (issue #5071).
const pad = await exportEtherpad.getPadRaw(padId, readOnlyId, req.params.rev);
res.send(pad);
} else if (type === 'txt') {
const txt = await exporttxt.getPadTXTDocument(padId, req.params.rev);

View File

@ -21,12 +21,21 @@ const authorManager = require('../db/AuthorManager');
const hooks = require('../../static/js/pluginfw/hooks');
const padManager = require('../db/PadManager');
exports.getPadRaw = async (padId:string, readOnlyId:string) => {
exports.getPadRaw = async (padId:string, readOnlyId:string, revNum?: number) => {
const dstPfx = `pad:${readOnlyId || padId}`;
const [pad, customPrefixes] = await Promise.all([
padManager.getPad(padId),
hooks.aCallAll('exportEtherpadAdditionalContent'),
]);
// If a rev limit was supplied, clamp to it and also clamp chat to the
// timestamp-ordered window that ended at that rev. Without this, a rev=5
// export on a pad with head=100 would still ship all 95 later revisions
// (and leak their content via the exported .etherpad file) — which is
// precisely what issue #5071 reported.
const padHead: number = pad.head;
const effectiveHead: number = (revNum == null || revNum > padHead) ? padHead : revNum;
const isRevBound = revNum != null && revNum < padHead;
const boundAtext = isRevBound ? await pad.getInternalRevisionAText(effectiveHead) : null;
const pluginRecords = await Promise.all(customPrefixes.map(async (customPrefix:string) => {
const srcPfx = `${customPrefix}:${padId}`;
const dstPfx = `${customPrefix}:${readOnlyId || padId}`;
@ -49,11 +58,18 @@ exports.getPadRaw = async (padId:string, readOnlyId:string) => {
return authorEntry;
})()];
}
for (let i = 0; i <= pad.head; ++i) yield [`${dstPfx}:revs:${i}`, pad.getRevision(i)];
for (let i = 0; i <= effectiveHead; ++i) yield [`${dstPfx}:revs:${i}`, pad.getRevision(i)];
for (let i = 0; i <= pad.chatHead; ++i) yield [`${dstPfx}:chat:${i}`, pad.getChatMessage(i)];
for (const gen of pluginRecords) yield* gen;
})();
const data = {[dstPfx]: pad};
// When rev-bound, serialize a shallow-cloned pad object with head/atext
// rewritten so the import side reconstructs the pad at the requested rev.
// toJSON() returns a plain object suitable for spreading; the live Pad
// instance is kept for the exportEtherpad hook below.
const serializedPad = isRevBound
? {...(pad.toJSON()), head: effectiveHead, atext: boundAtext}
: pad;
const data = {[dstPfx]: serializedPad};
for (const [dstKey, p] of new Stream(records).batch(100).buffer(99)) data[dstKey] = await p;
await hooks.aCallAll('exportEtherpad', {
pad,

View File

@ -64,4 +64,52 @@ describe(__filename, function () {
assert(!(`custom:${padId}x:foo` in data));
});
});
// Regression test for https://github.com/ether/etherpad/issues/5071.
// `/p/:pad/:rev/export/etherpad` and getPadRaw() historically ignored the
// rev parameter and always exported the full history, surprising users
// who wanted to back up or inspect an earlier snapshot.
describe('revNum bounding (issue #5071)', function () {
const addRevs = async (pad: any, n: number) => {
// Each call to .appendRevision bumps head by one, producing a
// distinct revision we can count in the exported payload.
for (let i = 0; i < n; i++) {
await pad.appendText(`line ${i}\n`);
}
};
it('defaults to full history when revNum is omitted', async function () {
const pad = await padManager.getPad(padId);
await addRevs(pad, 3);
const data = await exportEtherpad.getPadRaw(padId, null);
// revs 0 (pad-create) through pad.head inclusive.
const revKeys =
Object.keys(data).filter((k) => k.startsWith(`pad:${padId}:revs:`));
assert.equal(revKeys.length, pad.head + 1);
assert.equal(data[`pad:${padId}`].head, pad.head);
});
it('limits exported revisions to 0..revNum when supplied', async function () {
const pad = await padManager.getPad(padId);
await addRevs(pad, 5);
const bound = 2;
const data = await exportEtherpad.getPadRaw(padId, null, bound);
const revKeys =
Object.keys(data).filter((k) => k.startsWith(`pad:${padId}:revs:`));
assert.equal(revKeys.length, bound + 1,
`expected ${bound + 1} revisions, got ${revKeys.length}`);
assert(!(`pad:${padId}:revs:${bound + 1}` in data),
'rev after bound must not be exported');
// The serialized pad must also reflect the bounded head so that
// re-importing reconstructs the pad at the requested rev.
assert.equal(data[`pad:${padId}`].head, bound);
});
it('treats a revNum above head as equivalent to full history', async function () {
const pad = await padManager.getPad(padId);
await addRevs(pad, 3);
const data = await exportEtherpad.getPadRaw(padId, null, pad.head + 100);
assert.equal(data[`pad:${padId}`].head, pad.head);
});
});
});