Skip to content

Commit

Permalink
Optimization: combine checking for final & non-final boundaries
Browse files Browse the repository at this point in the history
This could reduce "string" comparisons by half when checking which
boundary we just encountered.

In practice, this changes doesn't seem to have a noticable impact, but
it's worth doing anyway.
  • Loading branch information
jstedfast committed Feb 2, 2025
1 parent d959a06 commit 9d9a3ea
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 48 deletions.
14 changes: 8 additions & 6 deletions MimeKit/AsyncMimeParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -502,12 +502,14 @@ async Task ConstructMultipartAsync (Multipart multipart, MimeEntityEndEventArgs
// We either found the end of the stream or we found a parent's boundary
PopBoundary ();

unsafe {
fixed (byte* inbuf = input) {
if (boundary == BoundaryType.ParentEndBoundary && FoundImmediateBoundary (inbuf, true))
boundary = BoundaryType.ImmediateEndBoundary;
else if (boundary == BoundaryType.ParentBoundary && FoundImmediateBoundary (inbuf, false))
boundary = BoundaryType.ImmediateBoundary;
// If the last boundary we found (before popping one off the stack) was a parent's boundary, we need to check
// to see if that boundary is now an immediate boundary and update our state.
if (boundary == BoundaryType.ParentEndBoundary || boundary == BoundaryType.ParentBoundary) {
unsafe {
fixed (byte* inbuf = input) {
if (FoundImmediateBoundary (inbuf, out var final))
boundary = final ? BoundaryType.ImmediateEndBoundary : BoundaryType.ImmediateBoundary;
}
}
}
}
Expand Down
14 changes: 8 additions & 6 deletions MimeKit/AsyncMimeReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -626,12 +626,14 @@ async Task<int> ConstructMultipartAsync (ContentType contentType, int depth, Can
// We either found the end of the stream or we found a parent's boundary
PopBoundary ();

unsafe {
fixed (byte* inbuf = input) {
if (boundary == BoundaryType.ParentEndBoundary && FoundImmediateBoundary (inbuf, true))
boundary = BoundaryType.ImmediateEndBoundary;
else if (boundary == BoundaryType.ParentBoundary && FoundImmediateBoundary (inbuf, false))
boundary = BoundaryType.ImmediateBoundary;
// If the last boundary we found (before popping one off the stack) was a parent's boundary, we need to check
// to see if that boundary is now an immediate boundary and update our state.
if (boundary == BoundaryType.ParentEndBoundary || boundary == BoundaryType.ParentBoundary) {
unsafe {
fixed (byte* inbuf = input) {
if (FoundImmediateBoundary (inbuf, out var final))
boundary = final ? BoundaryType.ImmediateEndBoundary : BoundaryType.ImmediateBoundary;
}
}
}

Expand Down
51 changes: 33 additions & 18 deletions MimeKit/MimeParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class Boundary
public int FinalLength { get { return Marker.Length; } }
public int Length { get; private set; }
public int MaxLength { get; private set; }
public bool IsMboxMarker { get { return Marker == MboxFrom; } }

public Boundary (string boundary, int currentMaxLength)
{
Expand Down Expand Up @@ -1222,24 +1223,33 @@ unsafe bool IsPossibleBoundary (byte* text, int length)
return false;
}

static unsafe bool IsBoundary (byte* text, int length, byte[] boundary, int boundaryLength)
static unsafe bool IsBoundary (byte* text, int length, Boundary boundary, out bool final)
{
if (boundaryLength > length)
final = false;

if (boundary.Length > length)
return false;

fixed (byte* boundaryptr = boundary) {
fixed (byte* boundaryptr = boundary.Marker) {
// make sure that the text matches the boundary
if (!CStringsEqual (text, boundaryptr, boundaryLength))
if (!CStringsEqual (text, boundaryptr, boundary.Length))
return false;

// if this is an mbox marker, we're done
if (IsMboxMarker (text))
if (boundary.IsMboxMarker) {
final = true;
return true;
}

// the boundary may optionally be followed by lwsp
byte* inptr = text + boundaryLength;
byte* inptr = text + boundary.Length;
byte* inend = text + length;

if (length >= boundary.FinalLength && inptr[0] == (byte) '-' && inptr[1] == (byte) '-') {
final = true;
inptr += 2;
}

// the boundary may optionally be followed by lwsp
while (inptr < inend) {
if (!(*inptr).IsWhitespace ())
return false;
Expand All @@ -1266,28 +1276,30 @@ unsafe BoundaryType CheckBoundary (int startIndex, byte* start, int length)
for (int i = 0; i < count; i++) {
var boundary = bounds[i];

if (IsBoundary (start, length, boundary.Marker, boundary.FinalLength))
return i == 0 ? BoundaryType.ImmediateEndBoundary : BoundaryType.ParentEndBoundary;
if (IsBoundary (start, length, boundary, out var final)) {
if (final)
return i == 0 ? BoundaryType.ImmediateEndBoundary : BoundaryType.ParentEndBoundary;

if (IsBoundary (start, length, boundary.Marker, boundary.Length))
return i == 0 ? BoundaryType.ImmediateBoundary : BoundaryType.ParentBoundary;
}
}

if (contentEnd > 0) {
// now it is time to check the mbox From-marker for the Content-Length case
long curOffset = GetOffset (startIndex);
var boundary = bounds[count];

if (curOffset >= contentEnd && IsBoundary (start, length, boundary.Marker, boundary.Length))
if (curOffset >= contentEnd && IsBoundary (start, length, boundary, out _))
return BoundaryType.ImmediateEndBoundary;
}

return BoundaryType.None;
}

unsafe bool FoundImmediateBoundary (byte* inbuf, bool final)
unsafe bool FoundImmediateBoundary (byte* inbuf, out bool final)
{
int boundaryLength = final ? bounds[0].FinalLength : bounds[0].Length;
// TODO: If the MimeReader recorded which boundary marker it found, we wouldn't need to re-scan the input buffer for eoln,
// we could just check if boundary[0] == MimeReader.lastFoundBoundary (or whatever we call it).
byte* start = inbuf + inputIndex;
byte* inend = inbuf + inputEnd;
byte* inptr = start;
Expand All @@ -1297,9 +1309,10 @@ unsafe bool FoundImmediateBoundary (byte* inbuf, bool final)
while (*inptr != (byte) '\n')
inptr++;

return IsBoundary (start, (int) (inptr - start), bounds[0].Marker, boundaryLength);
return IsBoundary (start, (int) (inptr - start), bounds[0], out final);
}

[MethodImpl (MethodImplOptions.AggressiveInlining)]
int GetMaxBoundaryLength ()
{
return bounds.Count > 0 ? bounds[0].MaxLength + 2 : 0;
Expand Down Expand Up @@ -1716,10 +1729,12 @@ unsafe void ConstructMultipart (Multipart multipart, MimeEntityEndEventArgs args
// We either found the end of the stream or we found a parent's boundary
PopBoundary ();

if (boundary == BoundaryType.ParentEndBoundary && FoundImmediateBoundary (inbuf, true))
boundary = BoundaryType.ImmediateEndBoundary;
else if (boundary == BoundaryType.ParentBoundary && FoundImmediateBoundary (inbuf, false))
boundary = BoundaryType.ImmediateBoundary;
// If the last boundary we found (before popping one off the stack) was a parent's boundary, we need to check
// to see if that boundary is now an immediate boundary and update our state.
if (boundary == BoundaryType.ParentEndBoundary || boundary == BoundaryType.ParentBoundary) {
if (FoundImmediateBoundary (inbuf, out var final))
boundary = final ? BoundaryType.ImmediateEndBoundary : BoundaryType.ImmediateBoundary;
}
}

/// <summary>
Expand Down
53 changes: 35 additions & 18 deletions MimeKit/MimeReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1839,24 +1839,33 @@ unsafe bool IsPossibleBoundary (byte* text, int length)
return false;
}

static unsafe bool IsBoundary (byte* text, int length, byte[] boundary, int boundaryLength)
static unsafe bool IsBoundary (byte* text, int length, Boundary boundary, out bool final)
{
if (boundaryLength > length)
final = false;

if (boundary.Length > length)
return false;

fixed (byte* boundaryptr = boundary) {
fixed (byte* boundaryptr = boundary.Marker) {
// make sure that the text matches the boundary
if (!CStringsEqual (text, boundaryptr, boundaryLength))
if (!CStringsEqual (text, boundaryptr, boundary.Length))
return false;

// if this is an mbox marker, we're done
if (IsMboxMarker (text))
if (boundary.IsMboxMarker) {
final = true;
return true;
}

// the boundary may optionally be followed by lwsp
byte* inptr = text + boundaryLength;
byte* inptr = text + boundary.Length;
byte* inend = text + length;

if (length >= boundary.FinalLength && inptr[0] == (byte) '-' && inptr[1] == (byte) '-') {
final = true;
inptr += 2;
}

// the boundary may optionally be followed by lwsp
while (inptr < inend) {
if (!(*inptr).IsWhitespace ())
return false;
Expand All @@ -1883,28 +1892,33 @@ unsafe BoundaryType CheckBoundary (int startIndex, byte* start, int length)
for (int i = 0; i < count; i++) {
var boundary = bounds[i];

if (IsBoundary (start, length, boundary.Marker, boundary.FinalLength))
return i == 0 ? BoundaryType.ImmediateEndBoundary : BoundaryType.ParentEndBoundary;
if (IsBoundary (start, length, boundary, out var final)) {
if (final)
return i == 0 ? BoundaryType.ImmediateEndBoundary : BoundaryType.ParentEndBoundary;

if (IsBoundary (start, length, boundary.Marker, boundary.Length))
return i == 0 ? BoundaryType.ImmediateBoundary : BoundaryType.ParentBoundary;
}
}

if (contentEnd > 0) {
// now it is time to check the mbox From-marker for the Content-Length case
long curOffset = GetOffset (startIndex);
var boundary = bounds[count];

if (curOffset >= contentEnd && IsBoundary (start, length, boundary.Marker, boundary.Length))
if (curOffset >= contentEnd && IsBoundary (start, length, boundary, out _))
return BoundaryType.ImmediateEndBoundary;
}

return BoundaryType.None;
}

unsafe bool FoundImmediateBoundary (byte* inbuf, bool final)
#if NET5_0_OR_GREATER
[SkipLocalsInit]
#endif
unsafe bool FoundImmediateBoundary (byte* inbuf, out bool final)
{
int boundaryLength = final ? bounds[0].FinalLength : bounds[0].Length;
// TODO: If the MimeReader recorded which boundary marker it found, we wouldn't need to re-scan the input buffer for eoln,
// we could just check if boundary[0] == MimeReader.lastFoundBoundary (or whatever we call it).
byte* start = inbuf + inputIndex;
byte* inend = inbuf + inputEnd;
byte* inptr = start;
Expand All @@ -1915,9 +1929,10 @@ unsafe bool FoundImmediateBoundary (byte* inbuf, bool final)
while (*inptr != (byte) '\n')
inptr++;

return IsBoundary (start, (int) (inptr - start), bounds[0].Marker, boundaryLength);
return IsBoundary (start, (int) (inptr - start), bounds[0], out final);
}

[MethodImpl (MethodImplOptions.AggressiveInlining)]
int GetMaxBoundaryLength ()
{
return bounds.Count > 0 ? bounds[0].MaxLength + 2 : 0;
Expand Down Expand Up @@ -2358,10 +2373,12 @@ unsafe int ConstructMultipart (ContentType contentType, byte* inbuf, int depth,
// We either found the end of the stream or we found a parent's boundary
PopBoundary ();

if (boundary == BoundaryType.ParentEndBoundary && FoundImmediateBoundary (inbuf, true))
boundary = BoundaryType.ImmediateEndBoundary;
else if (boundary == BoundaryType.ParentBoundary && FoundImmediateBoundary (inbuf, false))
boundary = BoundaryType.ImmediateBoundary;
// If the last boundary we found (before popping one off the stack) was a parent's boundary, we need to check
// to see if that boundary is now an immediate boundary and update our state.
if (boundary == BoundaryType.ParentEndBoundary || boundary == BoundaryType.ParentBoundary) {
if (FoundImmediateBoundary (inbuf, out var final))
boundary = final ? BoundaryType.ImmediateEndBoundary : BoundaryType.ImmediateBoundary;
}

endOffset = GetEndOffset (inputIndex);

Expand Down

0 comments on commit 9d9a3ea

Please sign in to comment.