diff --git a/doc/decimal.adoc b/doc/decimal.adoc
index f980ce00d..69db874ff 100644
--- a/doc/decimal.adoc
+++ b/doc/decimal.adoc
@@ -32,6 +32,7 @@ include::decimal/limits.adoc[]
 include::decimal/config.adoc[]
 include::decimal/type_traits.adoc[]
 include::decimal/examples.adoc[]
+include::decimal/benchmarks.adoc[]
 //include::decimal/reference.adoc[]
 include::decimal/design.adoc[]
 include::decimal/copyright.adoc[]
diff --git a/doc/decimal/benchmarks.adoc b/doc/decimal/benchmarks.adoc
new file mode 100644
index 000000000..310e6894f
--- /dev/null
+++ b/doc/decimal/benchmarks.adoc
@@ -0,0 +1,140 @@
+////
+Copyright 2024 Matt Borland
+Distributed under the Boost Software License, Version 1.0.
+https://www.boost.org/LICENSE_1_0.txt
+////
+
+[#Benchmarks]
+= Benchmarks
+:idprefix: benchmarks_
+
+This section describes a range of performance benchmarks that have been run comparing this library with the standard library, and how to run your own benchmarks if required.
+
+The values in the ratio column are how many times longer running a specific operation takes in comparison to the same operation with a `double`.
+
+IMPORTANT: On nearly all platforms there is hardware support for binary floating point math, so we are comparing hardware to software runtimes; *Decimal will be slower*
+
+== How to run the Benchmarks
+[#run_benchmarks_]
+
+To run the benchmarks yourself, navigate to the test folder and define `BOOST_DECIMAL_RUN_BENCHMARKS` when running the tests.
+An example on Linux with b2: `../../../b2 cxxstd=20 toolset=gcc-13 define=BOOST_DECIMAL_RUN_BENCHMARKS benchmarks -a release` .
+
+== Comparisons
+
+The benchmark for comparisons generates a random vector containing 10,000,000 elements and does operations `>`, `>=`, `<`, `<=`, `==`, and `!=` between `vec[i] and vec[i + 1]`.
+
+=== M1 macOS Results
+
+Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and homebrew Clang 18.1.4
+
+|===
+| Type | Runtime (ms) | Ratio to `double`
+| `float`
+| 8
+| 1.333
+| `double`
+| 6
+| 1.000
+| `decimal32`
+| 380
+| 63.333
+| `decimal64`
+| 408
+| 608.000
+| `decimal128`
+| 14641
+| 2440.170
+|===
+
+== Basic Operations
+
+The benchmark for these operations generates a random vector containing 10,000,000 elements and does operations `+`, `-`, `*`, `/` between `vec[i] and vec[i + 1]`.
+
+=== M1 macOS Results
+
+Run using a Macbook pro with M1 pro chipset running macOS Sonoma 14.4.1 and homebrew Clang 18.1.4
+
+==== Addition
+
+|===
+| Type | Runtime (ms) | Ratio to `double`
+| `float`
+| 6
+| 1.500
+| `double`
+| 4
+| 1.000
+| `decimal32`
+| 361
+| 90.250
+| `decimal64`
+| 568
+| 142.000
+| `decimal128`
+| 13428
+| 3357.000
+|===
+
+==== Subtraction
+
+|===
+| Type | Runtime (ms) | Ratio to `double`
+| `float`
+| 3
+| 3.000
+| `double`
+| 1
+| 1.000
+| `decimal32`
+| 307
+| 307.000
+| `decimal64`
+| 465
+| 465.000
+| `decimal128`
+| 11444
+| 11444.000
+|===
+
+==== Multiplication
+
+|===
+| Type | Runtime (ms) | Ratio to `double`
+| `float`
+| 1
+| 0.333
+| `double`
+| 3
+| 1.000
+| `decimal32`
+| 311
+| 103.667
+| `decimal64`
+| 569
+| 189.667
+| `decimal128`
+| 9430
+| 3143.330
+|===
+
+==== Division
+
+|===
+| Type | Runtime (ms) | Ratio to `double`
+| `float`
+| 2
+| 0.667
+| `double`
+| 3
+| 1.000
+| `decimal32`
+| 319
+| 106.333
+| `decimal64`
+| 395
+| 131.667
+| `decimal128`
+| 14781
+| 4927.000
+|===
diff --git a/test/Jamfile b/test/Jamfile
index bdb4a781c..ad5b49e57 100644
--- a/test/Jamfile
+++ b/test/Jamfile
@@ -41,6 +41,7 @@ project : requirements
   [ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
   ;
 
+run-fail benchmarks.cpp ;
 compile-fail concepts_test.cpp ;
 run github_issue_426.cpp ;
 run github_issue_448.cpp ;
diff --git a/test/benchmarks.cpp b/test/benchmarks.cpp
new file mode 100644
index 000000000..41b6538e0
--- /dev/null
+++ b/test/benchmarks.cpp
@@ -0,0 +1,168 @@
+// Copyright 2024 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+#include <boost/decimal.hpp>
+#include <chrono>
+#include <random>
+#include <vector>
+#include <type_traits>
+#include <iostream>
+#include <iomanip>
+
+#ifdef BOOST_DECIMAL_RUN_BENCHMARKS
+
+using namespace boost::decimal;
+using namespace std::chrono_literals;
+
+#ifdef __clang__
+#  pragma clang diagnostic push
+#  pragma clang diagnostic ignored "-Wfloat-equal"
+#  define BOOST_DECIMAL_NO_INLINE __attribute__ ((__noinline__))
+#elif defined(_MSC_VER)
+#  define BOOST_DECIMAL_NO_INLINE __declspec(noinline)
+#endif
+
+template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
+std::vector<T> generate_random_vector(std::size_t size = 10'000'000U, unsigned seed = 42U)
+{
+    if (seed == 0)
+    {
+        std::random_device rd;
+        seed = rd();
+    }
+    std::vector<T> v(size);
+
+    std::mt19937_64 gen(seed);
+
+    std::uniform_real_distribution<T> dis(0, 1);
+    for (std::size_t i = 0; i < v.size(); ++i)
+    {
+        v[i] = dis(gen);
+    }
+    return v;
+}
+
+template <typename T, std::enable_if_t<!std::is_floating_point<T>::value, bool> = true>
+std::vector<T> generate_random_vector(std::size_t size = 10'000'000U, unsigned seed = 42U)
+{
+    if (seed == 0)
+    {
+        std::random_device rd;
+        seed = rd();
+    }
+    std::vector<T> v(size);
+
+    std::mt19937_64 gen(seed);
+
+    std::uniform_real_distribution<double> dis(0, 1);
+    for (std::size_t i = 0; i < v.size(); ++i)
+    {
+        v[i] = T{dis(gen)};
+    }
+    return v;
+}
+
+template <typename T>
+BOOST_DECIMAL_NO_INLINE void test_comparisons(const std::vector<T>& data_vec, const char* label)
+{
+    const auto t1 = std::chrono::steady_clock::now();
+    std::size_t s = 0; // discard variable
+
+    for (std::size_t i {}; i < data_vec.size() - 1U; ++i)
+    {
+        const auto val1 = data_vec[i];
+        const auto val2 = data_vec[i + 1];
+        s += static_cast<std::size_t>(val1 > val2);
+        s += static_cast<std::size_t>(val1 >= val2);
+        s += static_cast<std::size_t>(val1 < val2);
+        s += static_cast<std::size_t>(val1 <= val2);
+        s += static_cast<std::size_t>(val1 == val2);
+        s += static_cast<std::size_t>(val1 != val2);
+    }
+
+    const auto t2 = std::chrono::steady_clock::now();
+
+    std::cout << "comparisons<" << std::left << std::setw(10) << label << ">: " << std::setw( 6 ) << ( t2 - t1 ) / 1ms << " ms (s=" << s << ")\n";
+}
+
+template <typename T, typename Func>
+BOOST_DECIMAL_NO_INLINE void test_operation(const std::vector<T>& data_vec, Func op, const char* operation, const char* type)
+{
+    const auto t1 = std::chrono::steady_clock::now();
+    std::size_t s = 0; // discard variable
+
+    for (std::size_t i {}; i < data_vec.size() - 1U; ++i)
+    {
+        const auto val1 = data_vec[i];
+        const auto val2 = data_vec[i + 1];
+        s += static_cast<std::size_t>(op(val1, val2));
+    }
+
+    const auto t2 = std::chrono::steady_clock::now();
+
+    std::cout << operation << "<" << std::left << std::setw(10) << type << ">: " << std::setw( 6 ) << ( t2 - t1 ) / 1ms << " ms (s=" << s << ")\n";
+}
+
+int main()
+{
+    const auto float_vector = generate_random_vector<float>();
+    const auto double_vector = generate_random_vector<double>();
+    const auto dec32_vector = generate_random_vector<decimal32>();
+    const auto dec64_vector = generate_random_vector<decimal64>();
+    const auto dec128_vector = generate_random_vector<decimal128>();
+
+    std::cout << "===== Comparisons =====\n";
+
+    test_comparisons(float_vector, "float");
+    test_comparisons(double_vector, "double");
+    test_comparisons(dec32_vector, "decimal32");
+    test_comparisons(dec64_vector, "decimal64");
+    test_comparisons(dec128_vector, "decimal128");
+
+    std::cout << "\n===== Addition =====\n";
+
+    test_operation(float_vector, std::plus<>(), "Addition", "float");
+    test_operation(double_vector, std::plus<>(), "Addition", "double");
+    test_operation(dec32_vector, std::plus<>(), "Addition", "decimal32");
+    test_operation(dec64_vector, std::plus<>(), "Addition", "decimal64");
+    test_operation(dec128_vector, std::plus<>(), "Addition", "decimal128");
+
+    std::cout << "\n===== Subtraction =====\n";
+
+    test_operation(double_vector, std::minus<>(), "Subtraction", "double");
+    test_operation(float_vector, std::minus<>(), "Subtraction", "float");
+    test_operation(dec32_vector, std::minus<>(), "Subtraction", "decimal32");
+    test_operation(dec64_vector, std::minus<>(), "Subtraction", "decimal64");
+    test_operation(dec128_vector, std::minus<>(), "Subtraction", "decimal128");
+
+    std::cout << "\n===== Multiplication =====\n";
+
+    test_operation(float_vector, std::multiplies<>(), "Multiplication", "float");
+    test_operation(double_vector, std::multiplies<>(), "Multiplication", "double");
+    test_operation(dec32_vector, std::multiplies<>(), "Multiplication", "decimal32");
+    test_operation(dec64_vector, std::multiplies<>(), "Multiplication", "decimal64");
+    test_operation(dec128_vector, std::multiplies<>(), "Multiplication", "decimal128");
+
+    std::cout << "\n===== Division =====\n";
+
+    test_operation(float_vector, std::divides<>(), "Division", "float");
+    test_operation(double_vector, std::divides<>(), "Division", "double");
+    test_operation(dec32_vector, std::divides<>(), "Division", "decimal32");
+    test_operation(dec64_vector, std::divides<>(), "Division", "decimal64");
+    test_operation(dec128_vector, std::divides<>(), "Division", "decimal128");
+
+    std::cout << std::endl;
+
+    return 1;
+}
+
+#else
+
+int main()
+{
+    std::cout << "Benchmarks not run" << std::endl;
+    return 1;
+}
+
+#endif