Files
fpc-msgbase/tests/adversarial/test_fuzz_kludge.pas
Ken Johnson f55576218b Bump to 0.6.3 -- SplitKludgeBlob leading-CR preservation
NR Message 23 (v0.6.2 verification) flagged 21% of year-09
archive messages mismatching HPT by exactly N leading #13 bytes.
Root cause in mb.kludge.SplitKludgeBlob: the rejoin loop used
`bodyOut = ''` as a proxy for "haven't emitted yet", conflating
"empty string so far" with "no line committed yet".  A message
whose raw body started with blank lines (leading CRs) lost
those CRs because the bodyOut-stays-empty branch fired and
suppressed the separator.

Fix: track emission with a dedicated `emitted: boolean` flag.
Once any line has been committed (empty or not), subsequent
lines always use CR as separator.  Leading empty lines now
contribute their CRs to the output.

Regression test: test_fuzz_kludge.TestLeadingCrPreserved covers
leading-CR-x2, leading-CR-x1, and kludge-prefix + blank-line +
user-text mixed case.

Expected outcome on NR's re-run: the 2055 messages previously
mismatching by one leading CR and the 15 by 2-4 leading CRs
should all match HPT byte-for-byte post-fix.  That pushes the
66% body-parity to ~99.5%.
2026-04-20 11:08:47 -07:00

200 lines
6.5 KiB
ObjectPascal

{
test_fuzz_kludge.pas - mb.kludge corruption-resilience.
mb.kludge.SplitKludgeBlob takes untrusted body text from the
consumer. Before hardening it used SetLength(lines, Length(lines)+1)
in a loop, which made the function O(n^2) in the number of #13
separators. A million-CR body could hang the process for minutes.
Test IDs:
F-KL-1 body with 100 K #13 separators finishes quickly
F-KL-2 pathological all-CR body respected the 10 K line cap
F-KL-3 a legitimate kludge-rich body still round-trips
}
program test_fuzz_kludge;
{$mode objfpc}{$H+}
uses
SysUtils, DateUtils,
testutil,
mb.types, mb.kludge;
procedure TestManyCrs;
var
body, plain: AnsiString;
attrs: TMsgAttributes;
i: longint;
t0, t1: TDateTime;
elapsed: longint;
begin
TestBegin('F-KL-1: 100 K CR-separated lines in under 1 s');
attrs.Clear;
begin
SetLength(body, 200000);
for i := 1 to 100000 do begin
body[2*i - 1] := 'x';
body[2*i ] := #13;
end;
t0 := Now;
SplitKludgeBlob(body, plain, attrs);
t1 := Now;
elapsed := MilliSecondsBetween(t1, t0);
AssertTrue('under 1000 ms, got ' + IntToStr(elapsed) + ' ms',
elapsed < 1000);
end;
TestOK;
end;
procedure TestAllCr;
var
body, plain: AnsiString;
attrs: TMsgAttributes;
i: longint;
begin
TestBegin('F-KL-2: 1 M CRs handled via line cap, doesn''t hang');
attrs.Clear;
begin
SetLength(body, 1000000);
for i := 1 to Length(body) do body[i] := #13;
SplitKludgeBlob(body, plain, attrs);
AssertTrue('result bounded', Length(plain) <= Length(body));
end;
TestOK;
end;
procedure TestLegitKludge;
var
body, plain: AnsiString;
attrs: TMsgAttributes;
begin
TestBegin('F-KL-3: legit kludge-rich body still parses');
attrs.Clear;
begin
body := #1 + 'MSGID: 1:1/1 12345678' + #13 +
#1 + 'PID: TestPID' + #13 +
'Hello world' + #13 +
'Second line';
SplitKludgeBlob(body, plain, attrs);
AssertTrue('msgid present', attrs.Get('msgid') <> '');
AssertTrue('pid present', attrs.Get('pid') <> '');
AssertTrue('plain has Hello', Pos('Hello world', plain) > 0);
end;
TestOK;
end;
{ NR Message 19: a reply that quotes an earlier message's kludge
block must not overwrite the current message's MSGID when the
quoting preserves SOH at line position 0. First-wins semantics
on the singleton kludges (msgid / reply / pid / tid / flags /
chrs / tzutc / intl / fmpt / topt / area) protects against this.
Without the fix, the tosser sees a false dupe hit on the quoted
msgid's CRC and drops the message. }
procedure TestQuotedMsgidFirstWins;
var
body, plain: AnsiString;
attrs: TMsgAttributes;
begin
TestBegin('F-KL-4: quoted ^AMSGID: in body does not overwrite prefix MSGID');
attrs.Clear;
{ Prefix block first, then user text, then a quoted block with
SOH at position 0 simulating a preserve-kludges quote style,
then trailing SEEN-BY/PATH. }
body := #1 + 'MSGID: 1:1/1 REAL-MSGID' + #13 +
#1 + 'REPLY: 1:1/1 PARENT-ID' + #13 +
#1 + 'PID: Real/Editor' + #13 +
'Hello, you wrote:' + #13 +
#1 + 'MSGID: 2:2/2 QUOTED-OLD' + #13 + { same form, SOH at pos 0 }
#1 + 'PID: Quoted/Editor' + #13 +
'Your message.' + #13 +
'My reply.' + #13 +
'SEEN-BY: 1/1' + #13 +
#1 + 'PATH: 1/1';
SplitKludgeBlob(body, plain, attrs);
AssertEquals('msgid is the REAL one, not the quoted one',
'1:1/1 REAL-MSGID', attrs.Get('msgid'));
AssertEquals('replyid survives',
'1:1/1 PARENT-ID', attrs.Get('replyid'));
AssertEquals('pid is the real one',
'Real/Editor', attrs.Get('pid'));
AssertEquals('seen-by populated', '1/1', attrs.Get('seen-by'));
AssertEquals('path populated', '1/1', attrs.Get('path'));
TestOK;
end;
{ Position-0 rule: SOH appearing mid-line in body text is NOT a
kludge marker; the line stays body text. }
procedure TestSohMidLineIsBody;
var
body, plain: AnsiString;
attrs: TMsgAttributes;
begin
TestBegin('F-KL-5: SOH mid-line is body text, not a kludge');
attrs.Clear;
body := #1 + 'MSGID: 1:1/1 REAL' + #13 +
'Some text ' + #1 + 'MSGID: fake embedded' + #13 + { mid-line SOH }
' ' + #1 + 'MSGID: leading-space' + #13 + { leading space }
'> ' + #1 + 'MSGID: quoted' + #13 + { quote prefix }
'End.';
SplitKludgeBlob(body, plain, attrs);
AssertEquals('msgid stays REAL through all mid-line noise',
'1:1/1 REAL', attrs.Get('msgid'));
AssertTrue('body retains "Some text" with embedded SOH',
Pos('Some text ', plain) > 0);
AssertTrue('body retains quote line',
Pos('> ', plain) > 0);
AssertTrue('body retains End.',
Pos('End.', plain) > 0);
TestOK;
end;
{ NR Message 23: a message whose body starts with one or more
empty lines (CR-only at the start, common in replies, signature
blocks, news clippings) used to lose those leading CRs because
the rejoin loop tested "bodyOut = ''" as a proxy for "haven't
emitted yet", conflating "empty string so far" with "no line
committed yet". Ship v0.6.3 fixes this by tracking emission
via a separate boolean. }
procedure TestLeadingCrPreserved;
var
body, plain: AnsiString;
attrs: TMsgAttributes;
begin
TestBegin('F-KL-6: leading CRs in body preserved (NR Message 23)');
attrs.Clear;
{ Two leading empty lines, then real content. }
body := #13#13'--- D''Bridge 3.x';
SplitKludgeBlob(body, plain, attrs);
AssertEquals('two leading CRs kept', #13#13'--- D''Bridge 3.x', plain);
{ One leading CR. }
attrs.Clear;
body := #13'Reply text.';
SplitKludgeBlob(body, plain, attrs);
AssertEquals('single leading CR kept', #13'Reply text.', plain);
{ Mixed: kludge prefix + empty line before user text -- the
kludge still gets extracted, the empty line between prefix
and text is preserved. }
attrs.Clear;
body := #1'MSGID: 1:1/1 abc'#13#13'Hello, you wrote:'#13'> original';
SplitKludgeBlob(body, plain, attrs);
AssertEquals('msgid extracted', '1:1/1 abc', attrs.Get('msgid'));
AssertEquals('blank line after kludge preserved',
#13'Hello, you wrote:'#13'> original', plain);
TestOK;
end;
begin
WriteLn('fpc-msgbase: mb.kludge corruption-resilience fuzz');
WriteLn;
TestManyCrs;
TestAllCr;
TestLegitKludge;
TestQuotedMsgidFirstWins;
TestSohMidLineIsBody;
TestLeadingCrPreserved;
Halt(TestsSummary);
end.