Skip to content

Commit

Permalink
Enable fast floating point model and fast debug linking (#11466)
Browse files Browse the repository at this point in the history
This commit enables /fp:fast. This doubles the performance of the Delta E
computation in #11095 for instance. Additionally it re-enables two options for
debug builds which are normally enabled by default by Visual Studio.

## PR Checklist
* [x] I work here
* [x] Tests added/passed

## Validation Steps Performed
* No change in binary size
* No obvious change in behavior
  • Loading branch information
lhecker authored and PankajBhojwani committed Oct 13, 2021
1 parent 2391aee commit 385c6ee
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 3 deletions.
1 change: 1 addition & 0 deletions .github/actions/spelling/allow/math.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ LStep
powf
RSub
sqrtf
ULP
6 changes: 4 additions & 2 deletions src/buffer/out/TextColor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "precomp.h"
#include "TextColor.h"

#include <til/bit.h>

// clang-format off

// A table mapping 8-bit RGB colors, in the form RRRGGGBB,
Expand Down Expand Up @@ -186,7 +188,7 @@ COLORREF TextColor::GetColor(const std::array<COLORREF, 256>& colorTable, const
// the result will be something like 0b00100000.
// 5. Use BitScanForward (bsf) to find the index of the most significant 1 bit.
const auto haystack = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(colorTable.data())); // 1.
const auto needle = _mm256_set1_epi32(__builtin_bit_cast(int, defaultColor)); // 2.
const auto needle = _mm256_set1_epi32(til::bit_cast<int>(defaultColor)); // 2.
const auto result = _mm256_cmpeq_epi32(haystack, needle); // 3.
const auto mask = _mm256_movemask_ps(_mm256_castsi256_ps(result)); // 4.
unsigned long index;
Expand All @@ -203,7 +205,7 @@ COLORREF TextColor::GetColor(const std::array<COLORREF, 256>& colorTable, const
// --> the index returned by _BitScanForward must be divided by 2.
const auto haystack1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(colorTable.data() + 0));
const auto haystack2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(colorTable.data() + 4));
const auto needle = _mm_set1_epi32(__builtin_bit_cast(int, defaultColor));
const auto needle = _mm_set1_epi32(til::bit_cast<int>(defaultColor));
const auto result1 = _mm_cmpeq_epi32(haystack1, needle);
const auto result2 = _mm_cmpeq_epi32(haystack2, needle);
const auto result = _mm_packs_epi32(result1, result2); // 3.5
Expand Down
3 changes: 3 additions & 0 deletions src/cascadia/ut_app/ColorHelperTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
#include "../TerminalSettingsModel/Profile.h"
#include "../TerminalApp/ColorHelper.h"

// Import some templates to compare floats using approximate matching.
#include <consoletaeftemplates.hpp>

using namespace Microsoft::Console;
using namespace winrt::TerminalApp;
using namespace WEX::Logging;
Expand Down
5 changes: 5 additions & 0 deletions src/common.build.pre.props
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
<LanguageStandard>stdcpp17</LanguageStandard>
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<ControlFlowGuard>Guard</ControlFlowGuard>
<FloatingPointModel>Fast</FloatingPointModel>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>EXTERNAL_BUILD;_UNICODE;UNICODE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
Expand All @@ -145,6 +146,10 @@
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;DBG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<SetChecksum>false</SetChecksum>
<GenerateDebugInformation>DebugFastLink</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>

<!-- For Release ONLY -->
Expand Down
41 changes: 41 additions & 0 deletions src/inc/consoletaeftemplates.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Revision History:

#pragma once

#include <til/bit.h>

// Helper for declaring a variable to store a TEST_METHOD_PROPERTY and get it's value from the test metadata
#define INIT_TEST_PROPERTY(type, identifer, description) \
type identifer; \
Expand All @@ -43,6 +45,45 @@ Revision History:

namespace WEX::TestExecution
{
// Compare two floats using a ULP (unit last place) tolerance of up to 4.
// Allows you to compare two floats that are almost equal.
// Think of: 0.200000000000000 vs. 0.200000000000001.
template<typename T, typename U>
bool CompareFloats(T a, T b) noexcept
{
if (std::isnan(a))
{
return std::isnan(b);
}

if (a == b)
{
return true;
}

const auto nDiff = static_cast<std::make_signed_t<U>>(til::bit_cast<U>(a) - til::bit_cast<U>(b));
const auto uDiff = static_cast<U>(nDiff < 0 ? -nDiff : nDiff);
return uDiff <= 4;
}

template<>
struct VerifyCompareTraits<float, float>
{
static bool AreEqual(float a, float b) noexcept
{
return CompareFloats<float, uint32_t>(a, b);
}
};

template<>
struct VerifyCompareTraits<double, double>
{
static bool AreEqual(double a, double b) noexcept
{
return CompareFloats<double, uint64_t>(a, b);
}
};

template<>
class VerifyOutputTraits<SMALL_RECT>
{
Expand Down
16 changes: 16 additions & 0 deletions src/inc/til/bit.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

#pragma once

namespace til
{
template<class To, class From, std::enable_if_t<std::conjunction_v<std::bool_constant<sizeof(To) == sizeof(From)>, std::is_trivially_copyable<To>, std::is_trivially_copyable<From>>, int> = 0>
[[nodiscard]] constexpr To bit_cast(const From& _Val) noexcept
{
#ifdef __cpp_lib_bit_cast
#warning "Replace til::bit_cast and __builtin_bit_cast with std::bit_cast"
#endif
return __builtin_bit_cast(To, _Val);
}
}
2 changes: 1 addition & 1 deletion src/project.inc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ USE_NATIVE_EH = 1
# -------------------------------------

MSC_WARNING_LEVEL = /W4 /WX
USER_C_FLAGS = $(USER_C_FLAGS) /utf-8
USER_C_FLAGS = $(USER_C_FLAGS) /fp:fast /utf-8

# -------------------------------------
# Common Console Includes and Libraries
Expand Down

0 comments on commit 385c6ee

Please sign in to comment.