Skip to content

Commit

Permalink
Try faster double-to-string algo; not good enuf
Browse files Browse the repository at this point in the history
grisu.net is the best third-party alternative I found for converting doubles to strings,
	but as "most recent errors.txt" shows, there are some numbers that do not round-trip to the same value,
	like how 7.2600388277351625E-72 is dumped as 7.260038827735163e-72,
	which is not quite the same.
	Interestingly, Python says they're the same, but C# disagrees, and that's disqualifying enough for me.
I will create this branch to reflect my current best effort to improve double-to-string performance,
	but I will not be incorporating this into my main branch.
  • Loading branch information
molsonkiko committed Nov 9, 2024
1 parent a7fef1c commit 4c6a9b5
Show file tree
Hide file tree
Showing 13 changed files with 1,423 additions and 115 deletions.
46 changes: 8 additions & 38 deletions JsonToolsNppPlugin/JSONTools/JNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System;
using System.Collections.Generic; // for dictionary, list
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
Expand Down Expand Up @@ -402,39 +403,14 @@ public static void StrToSb(StringBuilder sb, string s)
}
}

/// <summary>
/// Let <c>d17</c> = d.ToString("G17", <see cref="JNode.DOT_DECIMAL_SEP"/>) (which can always be parsed to regenerate a double equal to <c>d</c>)<br></br>
/// and let <c>d15</c> = d.ToString(<see cref="JNode.DOT_DECIMAL_SEP"/>) (which only keeps 15 digits of precision)<br></br>
/// Returns <c>d</c> formatted with up to 17 digits of precision, using '.' as the decimal separator.<br></br>
/// If <c>d17</c> includes 17 digits of precision, we will generate <c>d15</c>.<br></br>
/// If <c>d15</c> is shorter than <c>d17</c>, and if (<c>double.Parse(d15) == d</c>, we will prefer <c>d15</c> because <c>d17</c> was an unncessarily verbose representation of <c>d</c>.
/// </summary>
/// <param name="d"></param>
/// <returns></returns>
public static string DoubleToString(double d)
{
string dubstring = d.ToString(DOT_DECIMAL_SEP);
int indexOfE = dubstring.IndexOf('E');
bool isValidLong = d == Math.Round(d) && !(d > long.MaxValue || d < long.MinValue);
if (isValidLong && indexOfE < 0)
{
// add ending ".0" to distinguish doubles equal to integers from actual integers
// unless they use exponential notation, in which case you mess things up
// by turning something like 3.123E+15 into 3.123E+15.0 (a non-JSON number representation)
return dubstring + ".0";
}
// the default d.ToString(DOT_DECIMAL_SEP) might lose precision in some cases.
// We will nonetheless prefer this representation because the G17 representation
// has stupid unnecessarily verbose representations like representing 2317.24 as 2317.2399999999998
// We need to parse dubstring to make sure no precision has been lost.
try
{
if (double.Parse(dubstring) == d)
return dubstring; // default string representation has all necessary precision
}
catch { }
string d17 = d.ToString("G17", DOT_DECIMAL_SEP); // we need to use a string representation that retains as much precision as possible
return (isValidLong && d17.IndexOf('E') < 0) ? d17 + ".0" : d17;
var sb = new StringBuilder();
GrisuDotNet.Grisu.DoubleToString(d, sb);
var dstr = sb.ToString();
if (d == Math.Round(d) && !(d > long.MaxValue || d < long.MinValue) && dstr.IndexOf('e') < 0 && dstr.IndexOf('E') < 0)
return dstr + ".0";
return dstr;
}

/// <summary>
Expand All @@ -454,13 +430,7 @@ public virtual string ToString(bool sortKeys = true, string keyValueSep = ": ",
}
case Dtype.FLOAT:
{
double v = (double)value;
if (double.IsInfinity(v))
{
return (v < 0) ? "-Infinity" : "Infinity";
}
if (double.IsNaN(v)) { return "NaN"; }
return DoubleToString(v);
return DoubleToString((double)value);
}
case Dtype.INT: return Convert.ToInt64(value).ToString();
case Dtype.NULL: return "null";
Expand Down
4 changes: 4 additions & 0 deletions JsonToolsNppPlugin/JsonToolsNppPlugin.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@
<Compile Include="Utils\ArrayExtensions.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Utils\Translator.cs" />
<Compile Include="grisu.net\DiyFp.cs" />
<Compile Include="grisu.net\Grisu.cs" />
<Compile Include="grisu.net\GrisuDouble.cs" />
<Compile Include="grisu.net\PowersOfTenCache.cs" />
</ItemGroup>
<ItemGroup>
<!-- references -->
Expand Down
4 changes: 2 additions & 2 deletions JsonToolsNppPlugin/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@
// Build Number
// Revision
//
[assembly: AssemblyVersion("8.1.0.16")]
[assembly: AssemblyFileVersion("8.1.0.16")]
[assembly: AssemblyVersion("8.1.0.17")]
[assembly: AssemblyFileVersion("8.1.0.17")]
22 changes: 11 additions & 11 deletions JsonToolsNppPlugin/Tests/Benchmarker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ public static bool BenchmarkAndFuzzParseAndFormatDoubles(int numTrials, int arra
string numArrPreview = "";
string numArrayStr = "";
string numArrayDumped = "";
var noRoundTripValues = new List<double>();
for (int ii = 0; ii < numTrials; ii++)
{
try
Expand All @@ -200,7 +201,7 @@ public static bool BenchmarkAndFuzzParseAndFormatDoubles(int numTrials, int arra
}
catch (Exception ex)
{
Npp.AddLine($"While generating the string representation of a random array of doubles, got exception {ex}");
Npp.AddLine($"FAIL: While generating the string representation of a random array of doubles, got exception {ex}");
return true;
}
numArrPreview = numArrayStr.Length <= 200 ? numArrayStr : numArrayStr.Substring(0, 200) + "...";
Expand All @@ -215,7 +216,7 @@ public static bool BenchmarkAndFuzzParseAndFormatDoubles(int numTrials, int arra
}
catch (Exception ex)
{
Npp.AddLine($"While parsing the string representation of a random array of doubles (preview: \"{numArrPreview}\"), got exception {ex}");
Npp.AddLine($"FAIL: While parsing the string representation of a random array of doubles (preview: \"{numArrPreview}\"), got exception {ex}");
return true;
}
try
Expand All @@ -228,30 +229,24 @@ public static bool BenchmarkAndFuzzParseAndFormatDoubles(int numTrials, int arra
}
catch (Exception ex)
{
Npp.AddLine($"While compressing the JSON array made by parsing \"{numArrPreview}\", got exception {ex}");
Npp.AddLine($"FAIL: While compressing the JSON array made by parsing \"{numArrPreview}\", got exception {ex}");
return true;
}
try
{
// verify that all doubles in numArray round-trip to the same value when parsing numArrayDumped
JArray numArrayFromDumped = (JArray)parser.Parse(numArrayDumped);
var badValues = new List<double>();
for (int jj = 0; jj < numArray.Length; jj++)
{
double val = (double)numArray[jj].value;
double reloaded = (double)numArrayFromDumped[jj].value;
if (val != reloaded)
badValues.Add(val);
}
if (badValues.Count > 0)
{
Npp.AddLine($"The following doubles did not round-trip:\r\n" + string.Join(", ", badValues.Select(x => x.ToString(JNode.DOT_DECIMAL_SEP))));
return true;
noRoundTripValues.Add(val);
}
}
catch (Exception ex)
{
Npp.AddLine($"While parsing the JSON array made by dumping numArray, and comparing the re-parsed array to numArray, got exception {ex}");
Npp.AddLine($"FAIL: While parsing the JSON array made by dumping numArray, and comparing the re-parsed array to numArray, got exception {ex}");
return true;
}
}
Expand All @@ -267,6 +262,11 @@ public static bool BenchmarkAndFuzzParseAndFormatDoubles(int numTrials, int arra
Npp.AddLine($"Times to re-compress (ms): {string.Join(", ", dumpTimesStr)}");
string numArrayDumpedPreview = numArrayDumped.Length <= 200 ? numArrayDumped : numArrayDumped.Substring(0, 200) + "...";
Npp.AddLine($"Representative example of result of re-compression = \"{numArrayDumpedPreview}\"");
if (noRoundTripValues.Count > 0)
{
Npp.AddLine($"FAIL: The following doubles did not round-trip:\r\n" + string.Join(", ", noRoundTripValues.Select(x => x.ToString("G17", JNode.DOT_DECIMAL_SEP))));
return true;
}
return false;
}

Expand Down
18 changes: 9 additions & 9 deletions JsonToolsNppPlugin/Tests/JsonParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ public static bool Test()
NL + " }" +
NL + "]",
"open issue in Kapilratnani's JSON-Viewer regarding forward slashes having '/' stripped" ),
("111111111111111111111111111111", $"1.1111111111111111E+29", $"1.1111111111111111E+29",
("111111111111111111111111111111", $"1.111111111111111e29", $"1.111111111111111e29",
"auto-conversion of int64 overflow to double" ),
("{ \"a\"\r\n:1, \"b\" : 1, \"c\" :1}", "{\"a\": 1, \"b\": 1, \"c\": 1}",
"{"+ NL + "\"a\": 1,"+ NL + "\"b\": 1,"+ NL + "\"c\": 1" + NL + "}",
Expand All @@ -203,8 +203,8 @@ public static bool Test()
+ NL + "}",
"culture-sensitive sorting of keys (e.g., 'baßk' should sort before 'basst')"),
("[3.1234e15, -2.178e15, 7.59e15, 5.71138315710726E+18]",
"[3.1234E+15, -2.178E+15, 7.59E+15, 5.71138315710726E+18]",
"["+NL+"3.1234E+15,"+NL+"-2.178E+15,"+NL+"7.59E+15,"+NL+"5.71138315710726E+18"+NL+"]",
"[3.1234e15, -2.178e15, 7.59e15, 5.71138315710726e18]",
"["+NL+"3.1234e15,"+NL+"-2.178e15,"+NL+"7.59e15,"+NL+"5.71138315710726e18"+NL+"]",
"floating point numbers using 'E' notation that can exactly represent integers"
),
(
Expand All @@ -221,8 +221,8 @@ public static bool Test()
"dates and datetimes (both valid and invalid)"
),
("{\"super high precision negative\": -1.7976931348623157e308, \"super high precision positive\": [1.7976931348621957e+308, 2.2250738585072014e-308]}",
"{\"super high precision negative\": -1.7976931348623157E+308, \"super high precision positive\": [1.7976931348621958E+308, 2.2250738585072014E-308]}",
"{\r\n\"super high precision negative\": -1.7976931348623157E+308,\r\n\"super high precision positive\":\r\n [\r\n 1.7976931348621958E+308,\r\n 2.2250738585072014E-308\r\n ]\r\n}",
"{\"super high precision negative\": -1.7976931348623157e308, \"super high precision positive\": [1.7976931348621958e308, 2.2250738585072014e-308]}",
"{\r\n\"super high precision negative\": -1.7976931348623157e308,\r\n\"super high precision positive\":\r\n [\r\n 1.7976931348621958e308,\r\n 2.2250738585072014e-308\r\n ]\r\n}",
"very high-precision large numbers"),
};
int testsFailed = 0;
Expand Down Expand Up @@ -1281,12 +1281,12 @@ public static bool TestTryParseNumber()
(".5,boo", 0, 3, "\".5,\""),
("1,15.5e3E7", 2, 8, "15500.0"),
("1,15.5e3E70", 2, 10, "\"15.5e3E7\""),
("1,2.8e-7,7", 2, 8, "2.8E-07"),
(";17.4e+11,7", 1, 9, "1740000000000.0"),
("1,2.8e-7,7", 2, 8, "2.8e-7"),
(";17.4e+11,7", 1, 9, "1.74e12"),
("1,15.5e3e7", 2, 8, "15500.0"),
("1,15.5e3e70", 2, 10, "\"15.5e3e7\""),
("1,2.8E-7,7", 2, 8, "2.8E-07"),
(";17.4E+11,7", 1, 9, "1740000000000.0"),
("1,2.8E-7,7", 2, 8, "2.8e-7"),
(";17.4E+11,7", 1, 9, "1.74e12"),
("1,15.5Eb,ekr", 2, 8, "\"15.5Eb\""),
("a,0x123456789abc,3", 2, 16, "20015998343868"),
("a,0xABCDEFabcdef123,3", 2, 19, "773738404492800291"),
Expand Down
2 changes: 1 addition & 1 deletion JsonToolsNppPlugin/Tests/RemesPathTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ public static bool Test()
new Query_DesiredResult("keys(@)", "[\"foo\", \"bar\", \"baz\", \"quz\", \"jub\", \"guzo\", \"7\", \"_\"]"),
new Query_DesiredResult("values(@.bar)[:]", "[false, [\"a`g\", \"bah\"]]"),
new Query_DesiredResult("s_cat(@.bar.a, ``, 1.5, foo, 1, @.foo[0], null, @._)", "\"false1.5foo1[0, 1, 2]null{\\\"0\\\": 0}\""),
new Query_DesiredResult("s_cat(1e3)", "\"1000.0\""),
new Query_DesiredResult("s_cat(1e3)", "\"1e3\""),
new Query_DesiredResult("s_join(`\t`, @.bar.b)", "\"a`g\\tbah\""),
new Query_DesiredResult("sorted(unique(@.foo[1]), true)", "[5.0, 4.0, 3.0]"), // have to sort because this function involves a HashSet so order is random
new Query_DesiredResult("unique(@.foo[0], true)", "[0, 1, 2]"),
Expand Down
2 changes: 1 addition & 1 deletion JsonToolsNppPlugin/Tests/UserInterfaceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ public static bool Test()
("compare_text", new object[]{"[1,2,3]"}),
("overwrite", new object[]{"[1,2,-9e15]\r\n//foo"}),
("compress", new object[]{}),
("compare_text", new object[]{"[1,2,-9E+15]"}),
("compare_text", new object[]{"[1,2,-9e15]"}),
// TEST PARSE JSON LINES
("overwrite", new object[]{"[1,2,3]\r\n{\"a\": 1, \"b\": [-3,-4]}\r\n-7\r\nfalse"}),
("tree_open", new object[]{}), // to close the tree so it can be reopened
Expand Down
150 changes: 150 additions & 0 deletions JsonToolsNppPlugin/grisu.net/DiyFp.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// Copyright 2010 the V8 project authors. All rights reserved.
// Copyright 2011-2012, Kevin Ring. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

using System.Diagnostics;

namespace GrisuDotNet
{
// This "Do It Yourself Floating Point" class implements a floating-point number
// with a uint64 significand and an int exponent. Normalized DiyFp numbers will
// have the most significant bit of the significand set.
// Multiplication and Subtraction do not normalize their results.
// DiyFp are not designed to contain special doubles (NaN and Infinity).
internal struct DiyFp
{
public const int kSignificandSize = 64;

public DiyFp(ulong f, int e)
{
f_ = f;
e_ = e;
}

// this = this - other.
// The exponents of both numbers must be the same and the significand of this
// must be bigger than the significand of other.
// The result will not be normalized.
public void Subtract(ref DiyFp other)
{
Debug.Assert(e_ == other.e_);
Debug.Assert(f_ >= other.f_);
f_ -= other.f_;
}

// Returns a - b.
// The exponents of both numbers must be the same and this must be bigger
// than other. The result will not be normalized.
public static DiyFp Minus(ref DiyFp a, ref DiyFp b)
{
DiyFp result = a;
result.Subtract(ref b);
return result;
}


// this = this * other.
public void Multiply(ref DiyFp other)
{
// Simply "emulates" a 128 bit multiplication.
// However: the resulting number only contains 64 bits. The least
// significant 64 bits are only used for rounding the most significant 64
// bits.
const ulong kM32 = 0xFFFFFFFFU;
ulong a = f_ >> 32;
ulong b = f_ & kM32;
ulong c = other.f_ >> 32;
ulong d = other.f_ & kM32;
ulong ac = a * c;
ulong bc = b * c;
ulong ad = a * d;
ulong bd = b * d;
ulong tmp = (bd >> 32) + (ad & kM32) + (bc & kM32);
// By adding 1U << 31 to tmp we round the final result.
// Halfway cases will be round up.
tmp += 1U << 31;
ulong result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
e_ += other.e_ + 64;
f_ = result_f;
}

// returns a * b;
public static DiyFp Times(ref DiyFp a, ref DiyFp b)
{
DiyFp result = a;
result.Multiply(ref b);
return result;
}

public void Normalize()
{
Debug.Assert(f_ != 0);
ulong f = f_;
int e = e_;

// This method is mainly called for normalizing boundaries. In general
// boundaries need to be shifted by 10 bits. We thus optimize for this case.
const ulong k10MSBits = 0xFFC0000000000000;
while ((f & k10MSBits) == 0)
{
f <<= 10;
e -= 10;
}
while ((f & kUint64MSB) == 0)
{
f <<= 1;
e--;
}
f_ = f;
e_ = e;
}

public static DiyFp Normalize(ref DiyFp a)
{
DiyFp result = a;
result.Normalize();
return result;
}

public ulong F
{
get { return f_; }
set { f_ = value; }
}

public int E
{
get { return e_; }
set { e_ = value; }
}

private const ulong kUint64MSB = 0x8000000000000000;

private ulong f_;
private int e_;
}
}
Loading

0 comments on commit 4c6a9b5

Please sign in to comment.