Try faster double-to-string algo; not good enuf

grisu.net is the best third-party alternative I found for converting doubles to strings, but as "most recent errors.txt" shows, there are some numbers that do not round-trip to the same value, like how 7.2600388277351625E-72 is dumped as 7.260038827735163e-72, which is not quite the same. Interestingly, Python says they're the same, but C# disagrees, and that's disqualifying enough for me. I will create this branch to reflect my current best effort to improve double-to-string performance, but I will not be incorporating this into my main branch.
molsonkiko · Nov 9, 2024 · 4c6a9b5 · 4c6a9b5
1 parent a7fef1c
commit 4c6a9b5
Show file tree

Hide file tree

Showing 13 changed files with 1,423 additions and 115 deletions.
diff --git a/JsonToolsNppPlugin/JSONTools/JNode.cs b/JsonToolsNppPlugin/JSONTools/JNode.cs
@@ -5,6 +5,7 @@
 using System;
 using System.Collections.Generic; // for dictionary, list
 using System.Globalization;
+using System.IO;
 using System.Linq;
 using System.Text;
 using System.Text.RegularExpressions;
@@ -402,39 +403,14 @@ public static void StrToSb(StringBuilder sb, string s)
             }
         }
 
-        /// <summary>
-        /// Let <c>d17</c> = d.ToString("G17", <see cref="JNode.DOT_DECIMAL_SEP"/>) (which can always be parsed to regenerate a double equal to <c>d</c>)<br></br>
-        /// and let <c>d15</c> = d.ToString(<see cref="JNode.DOT_DECIMAL_SEP"/>) (which only keeps 15 digits of precision)<br></br>
-        /// Returns <c>d</c> formatted with up to 17 digits of precision, using '.' as the decimal separator.<br></br>
-        /// If <c>d17</c> includes 17 digits of precision, we will generate <c>d15</c>.<br></br>
-        /// If <c>d15</c> is shorter than <c>d17</c>, and if (<c>double.Parse(d15) == d</c>, we will prefer <c>d15</c> because <c>d17</c> was an unncessarily verbose representation of <c>d</c>.
-        /// </summary>
-        /// <param name="d"></param>
-        /// <returns></returns>
         public static string DoubleToString(double d)
         {
-            string dubstring = d.ToString(DOT_DECIMAL_SEP);
-            int indexOfE = dubstring.IndexOf('E');
-            bool isValidLong = d == Math.Round(d) && !(d > long.MaxValue || d < long.MinValue);
-            if (isValidLong && indexOfE < 0)
-            {
-                // add ending ".0" to distinguish doubles equal to integers from actual integers
-                // unless they use exponential notation, in which case you mess things up
-                // by turning something like 3.123E+15 into 3.123E+15.0 (a non-JSON number representation)
-                return dubstring + ".0";
-            }
-            // the default d.ToString(DOT_DECIMAL_SEP) might lose precision in some cases.
-            // We will nonetheless prefer this representation because the G17 representation
-            //     has stupid unnecessarily verbose representations like representing 2317.24 as 2317.2399999999998
-            // We need to parse dubstring to make sure no precision has been lost.
-            try
-            {
-                if (double.Parse(dubstring) == d)
-                    return dubstring; // default string representation has all necessary precision
-            }
-            catch { }
-            string d17 = d.ToString("G17", DOT_DECIMAL_SEP); // we need to use a string representation that retains as much precision as possible
-            return (isValidLong && d17.IndexOf('E') < 0) ? d17 + ".0" : d17;
+            var sb = new StringBuilder();
+            GrisuDotNet.Grisu.DoubleToString(d, sb);
+            var dstr = sb.ToString();
+            if (d == Math.Round(d) && !(d > long.MaxValue || d < long.MinValue) && dstr.IndexOf('e') < 0 && dstr.IndexOf('E') < 0)
+                return dstr + ".0";
+            return dstr;
         }
 
         /// <summary>
@@ -454,13 +430,7 @@ public virtual string ToString(bool sortKeys = true, string keyValueSep = ": ",
                 }
                 case Dtype.FLOAT:
                 {
-                    double v = (double)value;
-                    if (double.IsInfinity(v))
-                    {
-                        return (v < 0) ? "-Infinity" : "Infinity";
-                    }
-                    if (double.IsNaN(v)) { return "NaN"; }
-                    return DoubleToString(v);
+                    return DoubleToString((double)value);
                 }
                 case Dtype.INT: return Convert.ToInt64(value).ToString();
                 case Dtype.NULL: return "null";

diff --git a/JsonToolsNppPlugin/JsonToolsNppPlugin.csproj b/JsonToolsNppPlugin/JsonToolsNppPlugin.csproj
@@ -190,6 +190,10 @@
     <Compile Include="Utils\ArrayExtensions.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Utils\Translator.cs" />
+    <Compile Include="grisu.net\DiyFp.cs" />
+    <Compile Include="grisu.net\Grisu.cs" />
+    <Compile Include="grisu.net\GrisuDouble.cs" />
+    <Compile Include="grisu.net\PowersOfTenCache.cs" />
   </ItemGroup>
   <ItemGroup>
     <!-- references -->

diff --git a/JsonToolsNppPlugin/Properties/AssemblyInfo.cs b/JsonToolsNppPlugin/Properties/AssemblyInfo.cs
@@ -28,5 +28,5 @@
 //      Build Number
 //      Revision
 //
-[assembly: AssemblyVersion("8.1.0.16")]
-[assembly: AssemblyFileVersion("8.1.0.16")]
+[assembly: AssemblyVersion("8.1.0.17")]
+[assembly: AssemblyFileVersion("8.1.0.17")]
diff --git a/JsonToolsNppPlugin/Tests/Benchmarker.cs b/JsonToolsNppPlugin/Tests/Benchmarker.cs
@@ -192,6 +192,7 @@ public static bool BenchmarkAndFuzzParseAndFormatDoubles(int numTrials, int arra
             string numArrPreview = "";
             string numArrayStr = "";
             string numArrayDumped = "";
+            var noRoundTripValues = new List<double>();
             for (int ii = 0; ii < numTrials; ii++)
             {
                 try
@@ -200,7 +201,7 @@ public static bool BenchmarkAndFuzzParseAndFormatDoubles(int numTrials, int arra
                 }
                 catch (Exception ex)
                 {
-                    Npp.AddLine($"While generating the string representation of a random array of doubles, got exception {ex}");
+                    Npp.AddLine($"FAIL: While generating the string representation of a random array of doubles, got exception {ex}");
                     return true;
                 }
                 numArrPreview = numArrayStr.Length <= 200 ? numArrayStr : numArrayStr.Substring(0, 200) + "...";
@@ -215,7 +216,7 @@ public static bool BenchmarkAndFuzzParseAndFormatDoubles(int numTrials, int arra
                 }
                 catch (Exception ex)
                 {
-                    Npp.AddLine($"While parsing the string representation of a random array of doubles (preview: \"{numArrPreview}\"), got exception {ex}");
+                    Npp.AddLine($"FAIL: While parsing the string representation of a random array of doubles (preview: \"{numArrPreview}\"), got exception {ex}");
                     return true;
                 }
                 try
@@ -228,30 +229,24 @@ public static bool BenchmarkAndFuzzParseAndFormatDoubles(int numTrials, int arra
                 }
                 catch (Exception ex)
                 {
-                    Npp.AddLine($"While compressing the JSON array made by parsing \"{numArrPreview}\", got exception {ex}");
+                    Npp.AddLine($"FAIL: While compressing the JSON array made by parsing \"{numArrPreview}\", got exception {ex}");
                     return true;
                 }
                 try
                 {
                     // verify that all doubles in numArray round-trip to the same value when parsing numArrayDumped
                     JArray numArrayFromDumped = (JArray)parser.Parse(numArrayDumped);
-                    var badValues = new List<double>();
                     for (int jj = 0; jj < numArray.Length; jj++)
                     {
                         double val = (double)numArray[jj].value;
                         double reloaded = (double)numArrayFromDumped[jj].value;
                         if (val != reloaded)
-                            badValues.Add(val);
-                    }
-                    if (badValues.Count > 0)
-                    {
-                        Npp.AddLine($"The following doubles did not round-trip:\r\n" + string.Join(", ", badValues.Select(x => x.ToString(JNode.DOT_DECIMAL_SEP))));
-                        return true;
+                            noRoundTripValues.Add(val);
                     }
                 }
                 catch (Exception ex)
                 {
-                    Npp.AddLine($"While parsing the JSON array made by dumping numArray, and comparing the re-parsed array to numArray, got exception {ex}");
+                    Npp.AddLine($"FAIL: While parsing the JSON array made by dumping numArray, and comparing the re-parsed array to numArray, got exception {ex}");
                     return true;
                 }
             }
@@ -267,6 +262,11 @@ public static bool BenchmarkAndFuzzParseAndFormatDoubles(int numTrials, int arra
             Npp.AddLine($"Times to re-compress (ms): {string.Join(", ", dumpTimesStr)}");
             string numArrayDumpedPreview = numArrayDumped.Length <= 200 ? numArrayDumped : numArrayDumped.Substring(0, 200) + "...";
             Npp.AddLine($"Representative example of result of re-compression = \"{numArrayDumpedPreview}\"");
+            if (noRoundTripValues.Count > 0)
+            {
+                Npp.AddLine($"FAIL: The following doubles did not round-trip:\r\n" + string.Join(", ", noRoundTripValues.Select(x => x.ToString("G17", JNode.DOT_DECIMAL_SEP))));
+                return true;
+            }
             return false;
         }
 

diff --git a/JsonToolsNppPlugin/Tests/JsonParserTests.cs b/JsonToolsNppPlugin/Tests/JsonParserTests.cs
@@ -181,7 +181,7 @@ public static bool Test()
                              NL + "    }" +
                              NL + "]",
                              "open issue in Kapilratnani's JSON-Viewer regarding forward slashes having '/' stripped" ),
-                ("111111111111111111111111111111", $"1.1111111111111111E+29", $"1.1111111111111111E+29",
+                ("111111111111111111111111111111", $"1.111111111111111e29", $"1.111111111111111e29",
                     "auto-conversion of int64 overflow to double" ),
                 ("{ \"a\"\r\n:1, \"b\" : 1, \"c\"       :1}", "{\"a\": 1, \"b\": 1, \"c\": 1}",
                 "{"+ NL + "\"a\": 1,"+ NL + "\"b\": 1,"+ NL + "\"c\": 1" + NL + "}",
@@ -203,8 +203,8 @@ public static bool Test()
                     + NL + "}",
                     "culture-sensitive sorting of keys (e.g., 'baßk' should sort before 'basst')"),
                 ("[3.1234e15, -2.178e15, 7.59e15, 5.71138315710726E+18]",
-                    "[3.1234E+15, -2.178E+15, 7.59E+15, 5.71138315710726E+18]",
-                    "["+NL+"3.1234E+15,"+NL+"-2.178E+15,"+NL+"7.59E+15,"+NL+"5.71138315710726E+18"+NL+"]",
+                    "[3.1234e15, -2.178e15, 7.59e15, 5.71138315710726e18]",
+                    "["+NL+"3.1234e15,"+NL+"-2.178e15,"+NL+"7.59e15,"+NL+"5.71138315710726e18"+NL+"]",
                     "floating point numbers using 'E' notation that can exactly represent integers"
                 ),
                 (
@@ -221,8 +221,8 @@ public static bool Test()
                     "dates and datetimes (both valid and invalid)"
                 ),
                 ("{\"super high precision negative\": -1.7976931348623157e308, \"super high precision positive\": [1.7976931348621957e+308, 2.2250738585072014e-308]}",
-                 "{\"super high precision negative\": -1.7976931348623157E+308, \"super high precision positive\": [1.7976931348621958E+308, 2.2250738585072014E-308]}",
-                 "{\r\n\"super high precision negative\": -1.7976931348623157E+308,\r\n\"super high precision positive\":\r\n    [\r\n    1.7976931348621958E+308,\r\n    2.2250738585072014E-308\r\n    ]\r\n}",
+                 "{\"super high precision negative\": -1.7976931348623157e308, \"super high precision positive\": [1.7976931348621958e308, 2.2250738585072014e-308]}",
+                 "{\r\n\"super high precision negative\": -1.7976931348623157e308,\r\n\"super high precision positive\":\r\n    [\r\n    1.7976931348621958e308,\r\n    2.2250738585072014e-308\r\n    ]\r\n}",
                  "very high-precision large numbers"),
             };
             int testsFailed = 0;
@@ -1281,12 +1281,12 @@ public static bool TestTryParseNumber()
                 (".5,boo", 0, 3, "\".5,\""),
                 ("1,15.5e3E7", 2, 8, "15500.0"),
                 ("1,15.5e3E70", 2, 10, "\"15.5e3E7\""),
-                ("1,2.8e-7,7", 2, 8, "2.8E-07"),
-                (";17.4e+11,7", 1, 9, "1740000000000.0"),
+                ("1,2.8e-7,7", 2, 8, "2.8e-7"),
+                (";17.4e+11,7", 1, 9, "1.74e12"),
                 ("1,15.5e3e7", 2, 8, "15500.0"),
                 ("1,15.5e3e70", 2, 10, "\"15.5e3e7\""),
-                ("1,2.8E-7,7", 2, 8, "2.8E-07"),
-                (";17.4E+11,7", 1, 9, "1740000000000.0"),
+                ("1,2.8E-7,7", 2, 8, "2.8e-7"),
+                (";17.4E+11,7", 1, 9, "1.74e12"),
                 ("1,15.5Eb,ekr", 2, 8, "\"15.5Eb\""),
                 ("a,0x123456789abc,3", 2, 16, "20015998343868"),
                 ("a,0xABCDEFabcdef123,3", 2, 19, "773738404492800291"),

diff --git a/JsonToolsNppPlugin/Tests/RemesPathTests.cs b/JsonToolsNppPlugin/Tests/RemesPathTests.cs
@@ -323,7 +323,7 @@ public static bool Test()
                 new Query_DesiredResult("keys(@)", "[\"foo\", \"bar\", \"baz\", \"quz\", \"jub\", \"guzo\", \"7\", \"_\"]"),
                 new Query_DesiredResult("values(@.bar)[:]", "[false, [\"a`g\", \"bah\"]]"),
                 new Query_DesiredResult("s_cat(@.bar.a, ``, 1.5, foo, 1, @.foo[0], null, @._)", "\"false1.5foo1[0, 1, 2]null{\\\"0\\\": 0}\""),
-                new Query_DesiredResult("s_cat(1e3)", "\"1000.0\""),
+                new Query_DesiredResult("s_cat(1e3)", "\"1e3\""),
                 new Query_DesiredResult("s_join(`\t`, @.bar.b)", "\"a`g\\tbah\""),
                 new Query_DesiredResult("sorted(unique(@.foo[1]), true)", "[5.0, 4.0, 3.0]"), // have to sort because this function involves a HashSet so order is random
                 new Query_DesiredResult("unique(@.foo[0], true)", "[0, 1, 2]"),

diff --git a/JsonToolsNppPlugin/Tests/UserInterfaceTests.cs b/JsonToolsNppPlugin/Tests/UserInterfaceTests.cs
@@ -521,7 +521,7 @@ public static bool Test()
                 ("compare_text", new object[]{"[1,2,3]"}),
                 ("overwrite", new object[]{"[1,2,-9e15]\r\n//foo"}),
                 ("compress", new object[]{}),
-                ("compare_text", new object[]{"[1,2,-9E+15]"}),
+                ("compare_text", new object[]{"[1,2,-9e15]"}),
                 // TEST PARSE JSON LINES
                 ("overwrite", new object[]{"[1,2,3]\r\n{\"a\": 1, \"b\": [-3,-4]}\r\n-7\r\nfalse"}),
                 ("tree_open", new object[]{}), // to close the tree so it can be reopened

diff --git a/JsonToolsNppPlugin/grisu.net/DiyFp.cs b/JsonToolsNppPlugin/grisu.net/DiyFp.cs
@@ -0,0 +1,150 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Copyright 2011-2012, Kevin Ring. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+using System.Diagnostics;
+
+namespace GrisuDotNet
+{
+    // This "Do It Yourself Floating Point" class implements a floating-point number
+    // with a uint64 significand and an int exponent. Normalized DiyFp numbers will
+    // have the most significant bit of the significand set.
+    // Multiplication and Subtraction do not normalize their results.
+    // DiyFp are not designed to contain special doubles (NaN and Infinity).
+    internal struct DiyFp
+    {
+        public const int kSignificandSize = 64;
+
+        public DiyFp(ulong f, int e)
+        {
+            f_ = f;
+            e_ = e;
+        }
+
+        // this = this - other.
+        // The exponents of both numbers must be the same and the significand of this
+        // must be bigger than the significand of other.
+        // The result will not be normalized.
+        public void Subtract(ref DiyFp other)
+        {
+            Debug.Assert(e_ == other.e_);
+            Debug.Assert(f_ >= other.f_);
+            f_ -= other.f_;
+        }
+
+        // Returns a - b.
+        // The exponents of both numbers must be the same and this must be bigger
+        // than other. The result will not be normalized.
+        public static DiyFp Minus(ref DiyFp a, ref DiyFp b)
+        {
+            DiyFp result = a;
+            result.Subtract(ref b);
+            return result;
+        }
+
+
+        // this = this * other.
+        public void Multiply(ref DiyFp other)
+        {
+            // Simply "emulates" a 128 bit multiplication.
+            // However: the resulting number only contains 64 bits. The least
+            // significant 64 bits are only used for rounding the most significant 64
+            // bits.
+            const ulong kM32 = 0xFFFFFFFFU;
+            ulong a = f_ >> 32;
+            ulong b = f_ & kM32;
+            ulong c = other.f_ >> 32;
+            ulong d = other.f_ & kM32;
+            ulong ac = a * c;
+            ulong bc = b * c;
+            ulong ad = a * d;
+            ulong bd = b * d;
+            ulong tmp = (bd >> 32) + (ad & kM32) + (bc & kM32);
+            // By adding 1U << 31 to tmp we round the final result.
+            // Halfway cases will be round up.
+            tmp += 1U << 31;
+            ulong result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
+            e_ += other.e_ + 64;
+            f_ = result_f;
+        }
+
+        // returns a * b;
+        public static DiyFp Times(ref DiyFp a, ref DiyFp b)
+        {
+            DiyFp result = a;
+            result.Multiply(ref b);
+            return result;
+        }
+
+        public void Normalize()
+        {
+            Debug.Assert(f_ != 0);
+            ulong f = f_;
+            int e = e_;
+
+            // This method is mainly called for normalizing boundaries. In general
+            // boundaries need to be shifted by 10 bits. We thus optimize for this case.
+            const ulong k10MSBits = 0xFFC0000000000000;
+            while ((f & k10MSBits) == 0)
+            {
+                f <<= 10;
+                e -= 10;
+            }
+            while ((f & kUint64MSB) == 0)
+            {
+                f <<= 1;
+                e--;
+            }
+            f_ = f;
+            e_ = e;
+        }
+
+        public static DiyFp Normalize(ref DiyFp a)
+        {
+            DiyFp result = a;
+            result.Normalize();
+            return result;
+        }
+
+        public ulong F
+        {
+            get { return f_; }
+            set { f_ = value; }
+        }
+
+        public int E
+        {
+            get { return e_; }
+            set { e_ = value; }
+        }
+
+        private const ulong kUint64MSB = 0x8000000000000000;
+
+        private ulong f_;
+        private int e_;
+    }
+}