diff --git a/CHANGELOG.md b/CHANGELOG.md index 417791e6..a0304ff4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 1.3.0 - 2019-01-02 + +### Changed + +- Performance improvements of 15-25% on serialization, 10% on deserialization. + ## 1.2.1 - 2018-12-31 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 4bcfd4ff..64301cba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -61,10 +61,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "orjson" -version = "1.2.1" +version = "1.3.0" dependencies = [ "pyo3 0.5.0", - "serde 1.0.83 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.34 (registry+https://github.com/rust-lang/crates.io-index)", "smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -157,7 +157,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "serde" -version = "1.0.83" +version = "1.0.84" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -167,7 +167,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", "ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.83 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -251,7 +251,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" "checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" "checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" -"checksum serde 1.0.83 (registry+https://github.com/rust-lang/crates.io-index)" = "157e12af46859e968da75dea9845530e13d03bcab2009a41b9b7bb3cf4eb3ec2" +"checksum serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)" = "0e732ed5a5592c17d961555e3b552985baf98d50ce418b7b655f31f6ba7eb1b7" "checksum serde_json 1.0.34 (registry+https://github.com/rust-lang/crates.io-index)" = "bdf540260cfee6da923831f4776ddc495ada940c30117977c70f1313a6130545" "checksum smallvec 0.6.7 (registry+https://github.com/rust-lang/crates.io-index)" = "b73ea3738b47563803ef814925e69be00799a8c07420be8b996f8e98fb2336db" "checksum spin 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)" = "ceac490aa12c567115b40b7b7fceca03a6c9d53d5defea066123debc83c5dc1f" diff --git a/Cargo.toml b/Cargo.toml index b68734c0..11748777 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "orjson" -version = "1.2.1" +version = "1.3.0" authors = [] description = "Fast Python JSON library" edition = '2018' diff --git a/README.md b/README.md index 62aa9909..0c4ca666 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ # orjson orjson is a fast JSON library for Python. It benchmarks as the fastest Python -library for JSON serialization, with 1.6x to 2.6x the performance of the nearest -other library, and deserialization performance of 0.95x to 1.2x -the nearest other library. +library for JSON. Its serialization performance is 2x to 3x the nearest +other library and 4.5x to 11.5x the standard library. Its deserialization +performance is 1.05x to 1.2x the nearest other library and 1.2x to 4x +the standard library. -It supports CPython 3.5, 3.6, and 3.7. It is not intended -as a drop-in replacement for the standard library's json module. +It supports CPython 3.5, 3.6, and 3.7. Its API is a subset of the +API of the standard library's json module. ## Usage @@ -23,7 +24,6 @@ and Python environment: ```sh git clone --recurse-submodules https://github.com/ijl/orjson.git && cd orjson -virtualenv .venv && source .venv/bin/activate pip install --upgrade pyo3-pack pyo3-pack build --release --strip --interpreter python3.7 ``` @@ -40,13 +40,13 @@ def dumps(obj: Union[str, bytes, dict, list, tuple, int, float, None]) -> bytes: `dumps()` serializes Python objects to JSON. It has no options, does not support hooks for custom objects, and does not -support subclasses. It supports 64-bit integers and 64-bit floats, which -is the same as the standard library `json` module. +support subclasses. It raises `TypeError` on an unsupported type. This exception message describes the invalid object. -It raises `TypeError` on an integer that is too large. +It raises `TypeError` on an integer that exceeds 64 bits. This is the same +as the standard library's `json` module. It raises `TypeError` if a `dict` has a key of a type other than `str`. @@ -107,102 +107,118 @@ roundtrip, jsonchecker, and fixtures files of the [nativejson-benchmark](https://github.com/miloyip/nativejson-benchmark) repository. It is tested to not crash against the [Big List of Naughty Strings](https://github.com/minimaxir/big-list-of-naughty-strings). -There are integration tests exercising the library's use in web -servers (uwsgi and gunicorn, using multiprocess/forked workers) and when +It is tested to not leak memory. It is tested to be correct against +input from the PyJFuzz JSON fuzzer. There are integration tests +exercising the library's use in web servers (uwsgi and gunicorn, +using multiprocess/forked workers) and when multithreaded. It also uses some tests from the ultrajson library. ## Performance -Serialization performance of orjson is better than ultrajson, rapidjson, or -json. Deserialization performance is better to about the same as ultrajson. +Serialization and deserialization performance of orjson is better than +ultrajson, rapidjson, or json. The benchmarks are done on fixtures of real data: + +* twitter.json, 631.5KiB, results of a search on Twitter for "δΈ€", containing +CJK strings, dictionaries of strings and arrays of dictionaries, indented. + +* github.json, 55.8KiB, a GitHub activity feed, containing dictionaries of +strings and arrays of dictionaries, not indented. + +* citm_catalog.json, 1.7MiB, concert data, containing nested dictionaries of +strings and arrays of integers, indented. + +* canada.json, 2.2MiB, coordinates of the Canadian border in GeoJSON +format, containing floats and arrays, indented. ![alt text](doc/twitter_serialization.png "twitter.json serialization") ![alt text](doc/twitter_deserialization.png "twitter.json deserialization") -![alt text](doc/citm_catalog_serialization.png "citm_catalog.json serialization") -![alt text](doc/citm_catalog_deserialization.png "citm_catalog.json deserialization") ![alt text](doc/github_serialization.png "github.json serialization") ![alt text](doc/github_deserialization.png "github.json deserialization") +![alt text](doc/citm_catalog_serialization.png "citm_catalog.json serialization") +![alt text](doc/citm_catalog_deserialization.png "citm_catalog.json deserialization") ![alt text](doc/canada_serialization.png "canada.json serialization") ![alt text](doc/canada_deserialization.png "canada.json deserialization") -#### canada.json deserialization +#### twitter.json serialization -| Library | Median (milliseconds) | Operations per second | Relative (latency) | -|-----------|-------------------------|-------------------------|----------------------| -| orjson | 7.59 | 131.8 | 1 | -| ujson | 7.26 | 133.5 | 0.96 | -| rapidjson | 26.72 | 37.4 | 3.52 | -| json | 26.78 | 37.3 | 3.53 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 0.48 | 2077.6 | 1 | +| ujson | 1.48 | 664.6 | 3.09 | +| rapidjson | 1.59 | 626.5 | 3.32 | +| json | 2.24 | 443.9 | 4.68 | -#### canada.json serialization +#### twitter.json deserialization -| Library | Median (milliseconds) | Operations per second | Relative (latency) | -|-----------|-------------------------|-------------------------|----------------------| -| orjson | 4.99 | 200.3 | 1 | -| ujson | 8.16 | 122.5 | 1.64 | -| rapidjson | 43.27 | 23.1 | 8.67 | -| json | 48.15 | 20.8 | 9.65 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 2.38 | 418.8 | 1 | +| ujson | 2.67 | 373 | 1.12 | +| rapidjson | 2.78 | 359.5 | 1.16 | +| json | 2.77 | 359.7 | 1.16 | -#### citm_catalog.json deserialization +#### github.json serialization + +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 0.06 | 17745 | 1 | +| ujson | 0.14 | 7107.1 | 2.49 | +| rapidjson | 0.16 | 6253.9 | 2.86 | +| json | 0.25 | 3972.5 | 4.49 | -| Library | Median (milliseconds) | Operations per second | Relative (latency) | -|-----------|-------------------------|-------------------------|----------------------| -| orjson | 5.05 | 198.2 | 1 | -| ujson | 6.2 | 161.2 | 1.23 | -| rapidjson | 6.57 | 152.2 | 1.3 | -| json | 6.62 | 151.1 | 1.31 | +#### github.json deserialization + +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 0.2 | 4929.7 | 1 | +| ujson | 0.22 | 4605.2 | 1.08 | +| rapidjson | 0.24 | 4166.5 | 1.19 | +| json | 0.24 | 4150.8 | 1.19 | #### citm_catalog.json serialization -| Library | Median (milliseconds) | Operations per second | Relative (latency) | -|-----------|-------------------------|-------------------------|----------------------| -| orjson | 1 | 997.4 | 1 | -| ujson | 2.54 | 394.1 | 2.53 | -| rapidjson | 2.38 | 419.5 | 2.38 | -| json | 5.26 | 190 | 5.25 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 0.76 | 1302 | 1 | +| ujson | 2.58 | 387.2 | 3.38 | +| rapidjson | 2.37 | 421.1 | 3.11 | +| json | 5.41 | 184.4 | 7.09 | -#### github.json deserialization +#### citm_catalog.json deserialization -| Library | Median (milliseconds) | Operations per second | Relative (latency) | -|-----------|-------------------------|-------------------------|----------------------| -| orjson | 0.23 | 4310.6 | 1 | -| ujson | 0.23 | 4414.3 | 0.98 | -| rapidjson | 0.23 | 4229.4 | 1 | -| json | 0.23 | 4176.3 | 1 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 4.28 | 233.1 | 1 | +| ujson | 5.06 | 197.2 | 1.18 | +| rapidjson | 5.82 | 171.7 | 1.36 | +| json | 5.81 | 171.8 | 1.36 | -#### github.json serialization +#### canada.json serialization -| Library | Median (milliseconds) | Operations per second | Relative (latency) | -|-----------|-------------------------|-------------------------|----------------------| -| orjson | 0.06 | 16357.9 | 1 | -| ujson | 0.13 | 7531.2 | 2.17 | -| rapidjson | 0.16 | 6362.9 | 2.57 | -| json | 0.23 | 4242.5 | 3.8 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 4.04 | 247.7 | 1 | +| ujson | 8.43 | 118.6 | 2.09 | +| rapidjson | 43.93 | 22.7 | 10.88 | +| json | 47.23 | 21.1 | 11.7 | -#### twitter.json deserialization +#### canada.json deserialization -| Library | Median (milliseconds) | Operations per second | Relative (latency) | -|-----------|-------------------------|-------------------------|----------------------| -| orjson | 2.6 | 385.5 | 1 | -| ujson | 2.98 | 336.5 | 1.15 | -| rapidjson | 2.84 | 339.1 | 1.09 | -| json | 2.84 | 345.9 | 1.09 | +| Library | Median latency (milliseconds) | Operations per second | Relative (latency) | +|-----------|---------------------------------|-------------------------|----------------------| +| orjson | 6.69 | 147.6 | 1 | +| ujson | 7.17 | 139.4 | 1.07 | +| rapidjson | 26.77 | 37.4 | 4 | +| json | 26.59 | 37.6 | 3.97 | -#### twitter.json serialization -| Library | Median (milliseconds) | Operations per second | Relative (latency) | -|-----------|-------------------------|-------------------------|----------------------| -| orjson | 0.56 | 1790 | 1 | -| ujson | 1.44 | 693.9 | 2.58 | -| rapidjson | 1.57 | 636.1 | 2.82 | -| json | 2.21 | 452 | 3.96 | +This was measured using orjson 1.3.0 on Python 3.7.2 and Linux. -This was measured using orjson 1.2.0 on Python 3.7.1 and Linux. The above can be -reproduced using the `pybench` and `graph` scripts. +The results can be reproduced using the `pybench` and `graph` scripts. ## License orjson is dual licensed under the Apache 2.0 and MIT licenses. It contains -code from the hyperjson and ultrajson libraries. It is implemented using +tests from the hyperjson and ultrajson libraries. It is implemented using the [serde_json](https://github.com/serde-rs/json) and [pyo3](https://github.com/PyO3/pyo3) libraries. diff --git a/doc/canada_deserialization.png b/doc/canada_deserialization.png index e23748f0..bb603eb4 100644 Binary files a/doc/canada_deserialization.png and b/doc/canada_deserialization.png differ diff --git a/doc/canada_serialization.png b/doc/canada_serialization.png index 2a54f504..8bb01b04 100644 Binary files a/doc/canada_serialization.png and b/doc/canada_serialization.png differ diff --git a/doc/citm_catalog_deserialization.png b/doc/citm_catalog_deserialization.png index 90076f2e..3dd82518 100644 Binary files a/doc/citm_catalog_deserialization.png and b/doc/citm_catalog_deserialization.png differ diff --git a/doc/citm_catalog_serialization.png b/doc/citm_catalog_serialization.png index 1e6368c0..57ca622f 100644 Binary files a/doc/citm_catalog_serialization.png and b/doc/citm_catalog_serialization.png differ diff --git a/doc/github_deserialization.png b/doc/github_deserialization.png index 359a7e0e..36f0da04 100644 Binary files a/doc/github_deserialization.png and b/doc/github_deserialization.png differ diff --git a/doc/github_serialization.png b/doc/github_serialization.png index 9c52d776..7eb076b5 100644 Binary files a/doc/github_serialization.png and b/doc/github_serialization.png differ diff --git a/doc/twitter_deserialization.png b/doc/twitter_deserialization.png index b124789a..5837fc5f 100644 Binary files a/doc/twitter_deserialization.png and b/doc/twitter_deserialization.png differ diff --git a/doc/twitter_serialization.png b/doc/twitter_serialization.png index a914ba39..ed51fd6d 100644 Binary files a/doc/twitter_serialization.png and b/doc/twitter_serialization.png differ diff --git a/graph b/graph index 394f4ac6..d144870a 100755 --- a/graph +++ b/graph @@ -12,8 +12,6 @@ import matplotlib.pyplot as plt LIBRARIES = ('orjson', 'ujson', 'rapidjson', 'json') -COLOR = ('blue', 'green', 'red', 'blue') - def aggregate(): benchmarks_dir = os.path.join('.benchmarks', os.listdir('.benchmarks')[0]) res = collections.defaultdict(dict) @@ -47,8 +45,8 @@ def box(obj): def tab(obj): buf = io.StringIO() - headers = ('Library', 'Median (milliseconds)', 'Operations per second', 'Relative (latency)') - for group, val in sorted(obj.items()): + headers = ('Library', 'Median latency (milliseconds)', 'Operations per second', 'Relative (latency)') + for group, val in sorted(obj.items(), reverse=True): buf.write('\n' + '#### ' + group + '\n\n') table = [] for lib in LIBRARIES: