NR Message 23 (v0.6.2 verification) flagged 21% of year-09 archive messages mismatching HPT by exactly N leading #13 bytes. Root cause in mb.kludge.SplitKludgeBlob: the rejoin loop used `bodyOut = ''` as a proxy for "haven't emitted yet", conflating "empty string so far" with "no line committed yet". A message whose raw body started with blank lines (leading CRs) lost those CRs because the bodyOut-stays-empty branch fired and suppressed the separator. Fix: track emission with a dedicated `emitted: boolean` flag. Once any line has been committed (empty or not), subsequent lines always use CR as separator. Leading empty lines now contribute their CRs to the output. Regression test: test_fuzz_kludge.TestLeadingCrPreserved covers leading-CR-x2, leading-CR-x1, and kludge-prefix + blank-line + user-text mixed case. Expected outcome on NR's re-run: the 2055 messages previously mismatching by one leading CR and the 15 by 2-4 leading CRs should all match HPT byte-for-byte post-fix. That pushes the 66% body-parity to ~99.5%.
200 lines
6.5 KiB
ObjectPascal
200 lines
6.5 KiB
ObjectPascal
{
|
|
test_fuzz_kludge.pas - mb.kludge corruption-resilience.
|
|
|
|
mb.kludge.SplitKludgeBlob takes untrusted body text from the
|
|
consumer. Before hardening it used SetLength(lines, Length(lines)+1)
|
|
in a loop, which made the function O(n^2) in the number of #13
|
|
separators. A million-CR body could hang the process for minutes.
|
|
|
|
Test IDs:
|
|
F-KL-1 body with 100 K #13 separators finishes quickly
|
|
F-KL-2 pathological all-CR body respected the 10 K line cap
|
|
F-KL-3 a legitimate kludge-rich body still round-trips
|
|
}
|
|
|
|
program test_fuzz_kludge;
|
|
|
|
{$mode objfpc}{$H+}
|
|
|
|
uses
|
|
SysUtils, DateUtils,
|
|
testutil,
|
|
mb.types, mb.kludge;
|
|
|
|
procedure TestManyCrs;
|
|
var
|
|
body, plain: AnsiString;
|
|
attrs: TMsgAttributes;
|
|
i: longint;
|
|
t0, t1: TDateTime;
|
|
elapsed: longint;
|
|
begin
|
|
TestBegin('F-KL-1: 100 K CR-separated lines in under 1 s');
|
|
attrs.Clear;
|
|
begin
|
|
SetLength(body, 200000);
|
|
for i := 1 to 100000 do begin
|
|
body[2*i - 1] := 'x';
|
|
body[2*i ] := #13;
|
|
end;
|
|
t0 := Now;
|
|
SplitKludgeBlob(body, plain, attrs);
|
|
t1 := Now;
|
|
elapsed := MilliSecondsBetween(t1, t0);
|
|
AssertTrue('under 1000 ms, got ' + IntToStr(elapsed) + ' ms',
|
|
elapsed < 1000);
|
|
end;
|
|
TestOK;
|
|
end;
|
|
|
|
procedure TestAllCr;
|
|
var
|
|
body, plain: AnsiString;
|
|
attrs: TMsgAttributes;
|
|
i: longint;
|
|
begin
|
|
TestBegin('F-KL-2: 1 M CRs handled via line cap, doesn''t hang');
|
|
attrs.Clear;
|
|
begin
|
|
SetLength(body, 1000000);
|
|
for i := 1 to Length(body) do body[i] := #13;
|
|
SplitKludgeBlob(body, plain, attrs);
|
|
AssertTrue('result bounded', Length(plain) <= Length(body));
|
|
end;
|
|
TestOK;
|
|
end;
|
|
|
|
procedure TestLegitKludge;
|
|
var
|
|
body, plain: AnsiString;
|
|
attrs: TMsgAttributes;
|
|
begin
|
|
TestBegin('F-KL-3: legit kludge-rich body still parses');
|
|
attrs.Clear;
|
|
begin
|
|
body := #1 + 'MSGID: 1:1/1 12345678' + #13 +
|
|
#1 + 'PID: TestPID' + #13 +
|
|
'Hello world' + #13 +
|
|
'Second line';
|
|
SplitKludgeBlob(body, plain, attrs);
|
|
AssertTrue('msgid present', attrs.Get('msgid') <> '');
|
|
AssertTrue('pid present', attrs.Get('pid') <> '');
|
|
AssertTrue('plain has Hello', Pos('Hello world', plain) > 0);
|
|
end;
|
|
TestOK;
|
|
end;
|
|
|
|
{ NR Message 19: a reply that quotes an earlier message's kludge
|
|
block must not overwrite the current message's MSGID when the
|
|
quoting preserves SOH at line position 0. First-wins semantics
|
|
on the singleton kludges (msgid / reply / pid / tid / flags /
|
|
chrs / tzutc / intl / fmpt / topt / area) protects against this.
|
|
Without the fix, the tosser sees a false dupe hit on the quoted
|
|
msgid's CRC and drops the message. }
|
|
procedure TestQuotedMsgidFirstWins;
|
|
var
|
|
body, plain: AnsiString;
|
|
attrs: TMsgAttributes;
|
|
begin
|
|
TestBegin('F-KL-4: quoted ^AMSGID: in body does not overwrite prefix MSGID');
|
|
attrs.Clear;
|
|
{ Prefix block first, then user text, then a quoted block with
|
|
SOH at position 0 simulating a preserve-kludges quote style,
|
|
then trailing SEEN-BY/PATH. }
|
|
body := #1 + 'MSGID: 1:1/1 REAL-MSGID' + #13 +
|
|
#1 + 'REPLY: 1:1/1 PARENT-ID' + #13 +
|
|
#1 + 'PID: Real/Editor' + #13 +
|
|
'Hello, you wrote:' + #13 +
|
|
#1 + 'MSGID: 2:2/2 QUOTED-OLD' + #13 + { same form, SOH at pos 0 }
|
|
#1 + 'PID: Quoted/Editor' + #13 +
|
|
'Your message.' + #13 +
|
|
'My reply.' + #13 +
|
|
'SEEN-BY: 1/1' + #13 +
|
|
#1 + 'PATH: 1/1';
|
|
SplitKludgeBlob(body, plain, attrs);
|
|
AssertEquals('msgid is the REAL one, not the quoted one',
|
|
'1:1/1 REAL-MSGID', attrs.Get('msgid'));
|
|
AssertEquals('replyid survives',
|
|
'1:1/1 PARENT-ID', attrs.Get('replyid'));
|
|
AssertEquals('pid is the real one',
|
|
'Real/Editor', attrs.Get('pid'));
|
|
AssertEquals('seen-by populated', '1/1', attrs.Get('seen-by'));
|
|
AssertEquals('path populated', '1/1', attrs.Get('path'));
|
|
TestOK;
|
|
end;
|
|
|
|
{ Position-0 rule: SOH appearing mid-line in body text is NOT a
|
|
kludge marker; the line stays body text. }
|
|
procedure TestSohMidLineIsBody;
|
|
var
|
|
body, plain: AnsiString;
|
|
attrs: TMsgAttributes;
|
|
begin
|
|
TestBegin('F-KL-5: SOH mid-line is body text, not a kludge');
|
|
attrs.Clear;
|
|
body := #1 + 'MSGID: 1:1/1 REAL' + #13 +
|
|
'Some text ' + #1 + 'MSGID: fake embedded' + #13 + { mid-line SOH }
|
|
' ' + #1 + 'MSGID: leading-space' + #13 + { leading space }
|
|
'> ' + #1 + 'MSGID: quoted' + #13 + { quote prefix }
|
|
'End.';
|
|
SplitKludgeBlob(body, plain, attrs);
|
|
AssertEquals('msgid stays REAL through all mid-line noise',
|
|
'1:1/1 REAL', attrs.Get('msgid'));
|
|
AssertTrue('body retains "Some text" with embedded SOH',
|
|
Pos('Some text ', plain) > 0);
|
|
AssertTrue('body retains quote line',
|
|
Pos('> ', plain) > 0);
|
|
AssertTrue('body retains End.',
|
|
Pos('End.', plain) > 0);
|
|
TestOK;
|
|
end;
|
|
|
|
{ NR Message 23: a message whose body starts with one or more
|
|
empty lines (CR-only at the start, common in replies, signature
|
|
blocks, news clippings) used to lose those leading CRs because
|
|
the rejoin loop tested "bodyOut = ''" as a proxy for "haven't
|
|
emitted yet", conflating "empty string so far" with "no line
|
|
committed yet". Ship v0.6.3 fixes this by tracking emission
|
|
via a separate boolean. }
|
|
procedure TestLeadingCrPreserved;
|
|
var
|
|
body, plain: AnsiString;
|
|
attrs: TMsgAttributes;
|
|
begin
|
|
TestBegin('F-KL-6: leading CRs in body preserved (NR Message 23)');
|
|
attrs.Clear;
|
|
{ Two leading empty lines, then real content. }
|
|
body := #13#13'--- D''Bridge 3.x';
|
|
SplitKludgeBlob(body, plain, attrs);
|
|
AssertEquals('two leading CRs kept', #13#13'--- D''Bridge 3.x', plain);
|
|
|
|
{ One leading CR. }
|
|
attrs.Clear;
|
|
body := #13'Reply text.';
|
|
SplitKludgeBlob(body, plain, attrs);
|
|
AssertEquals('single leading CR kept', #13'Reply text.', plain);
|
|
|
|
{ Mixed: kludge prefix + empty line before user text -- the
|
|
kludge still gets extracted, the empty line between prefix
|
|
and text is preserved. }
|
|
attrs.Clear;
|
|
body := #1'MSGID: 1:1/1 abc'#13#13'Hello, you wrote:'#13'> original';
|
|
SplitKludgeBlob(body, plain, attrs);
|
|
AssertEquals('msgid extracted', '1:1/1 abc', attrs.Get('msgid'));
|
|
AssertEquals('blank line after kludge preserved',
|
|
#13'Hello, you wrote:'#13'> original', plain);
|
|
TestOK;
|
|
end;
|
|
|
|
begin
|
|
WriteLn('fpc-msgbase: mb.kludge corruption-resilience fuzz');
|
|
WriteLn;
|
|
TestManyCrs;
|
|
TestAllCr;
|
|
TestLegitKludge;
|
|
TestQuotedMsgidFirstWins;
|
|
TestSohMidLineIsBody;
|
|
TestLeadingCrPreserved;
|
|
Halt(TestsSummary);
|
|
end.
|