-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Canonicalize away bit width and embed small integers into IntId
s
#4487
Changes from 4 commits
6d73339
5b79952
ebc1f06
b827f5b
2e1d1f6
f17a1ca
cdc6020
136d10d
0ae1295
ff3c7dd
537ae2e
a206dfe
ddf3fa1
657d212
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See /LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#include "toolchain/base/int_store.h" | ||
|
||
namespace Carbon { | ||
|
||
auto IntStore::CanonicalBitWidth(int significant_bits) -> int { | ||
// For larger integers, we store them in as a signed APInt with a canonical | ||
// width that is the smallest multiple of the word type's bits, but no | ||
// smaller than a minimum of 64 bits to avoid spurious resizing of the most | ||
// common cases (<= 64 bits). | ||
static constexpr int WordWidth = llvm::APInt::APINT_BITS_PER_WORD; | ||
|
||
return std::max<int>( | ||
MinAPWidth, ((significant_bits + WordWidth - 1) / WordWidth) * WordWidth); | ||
} | ||
|
||
auto IntStore::CanonicalizeSigned(llvm::APInt value) -> llvm::APInt { | ||
return value.sextOrTrunc(CanonicalBitWidth(value.getSignificantBits())); | ||
} | ||
|
||
auto IntStore::CanonicalizeUnsigned(llvm::APInt value) -> llvm::APInt { | ||
// We need the width to include a zero sign bit as we canonicalize to a | ||
// signed representation. | ||
return value.zextOrTrunc(CanonicalBitWidth(value.getActiveBits() + 1)); | ||
} | ||
|
||
auto IntStore::AddLarge(int64_t value) -> IntId { | ||
auto ap_id = | ||
values_.Add(llvm::APInt(CanonicalBitWidth(64), value, /*isSigned=*/true)); | ||
return IntId::MakeIndexOrInvalid(ap_id.index); | ||
} | ||
|
||
auto IntStore::AddSignedLarge(llvm::APInt value) -> IntId { | ||
auto ap_id = values_.Add(CanonicalizeSigned(value)); | ||
return IntId::MakeIndexOrInvalid(ap_id.index); | ||
} | ||
|
||
auto IntStore::AddUnsignedLarge(llvm::APInt value) -> IntId { | ||
auto ap_id = values_.Add(CanonicalizeUnsigned(value)); | ||
return IntId::MakeIndexOrInvalid(ap_id.index); | ||
} | ||
|
||
auto IntStore::LookupSignedLarge(llvm::APInt value) const -> IntId { | ||
auto ap_id = values_.Lookup(CanonicalizeSigned(value)); | ||
return IntId::MakeIndexOrInvalid(ap_id.index); | ||
} | ||
|
||
auto IntStore::OutputYaml() const -> Yaml::OutputMapping { | ||
return values_.OutputYaml(); | ||
} | ||
|
||
auto IntStore::CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const | ||
-> void { | ||
mem_usage.Collect(std::string(label), values_); | ||
} | ||
|
||
} // namespace Carbon |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See /LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#ifndef CARBON_TOOLCHAIN_BASE_INT_STORE_H_ | ||
#define CARBON_TOOLCHAIN_BASE_INT_STORE_H_ | ||
|
||
#include "common/check.h" | ||
#include "llvm/ADT/APFloat.h" | ||
#include "llvm/ADT/APInt.h" | ||
#include "llvm/ADT/SmallVector.h" | ||
#include "toolchain/base/mem_usage.h" | ||
#include "toolchain/base/value_ids.h" | ||
#include "toolchain/base/value_store.h" | ||
#include "toolchain/base/yaml.h" | ||
|
||
namespace Carbon { | ||
|
||
// Forward declare a testing peer so we can friend it. | ||
namespace Testing { | ||
struct IntStoreTestPeer; | ||
} // namespace Testing | ||
|
||
// A canonicalizing value store with deep optimizations for integers. | ||
// | ||
// This stores integers as abstract, signed mathematical integers. The bit width | ||
// of specific `APInt` values, either as inputs or outputs, is disregarded for | ||
// the purpose of canonicalization and the returned integer may use a very | ||
// different bit width `APInt` than was used when adding. There are also | ||
// optimized paths for adding integer values representable using native integer | ||
// types. | ||
// | ||
// Because the integers in the store are canonicalized without a specific bit | ||
chandlerc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// width there are helper functions to coerce them to a specific desired bit | ||
chandlerc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// width for use. | ||
// | ||
// This leverages a significant optimization for small integer values -- rather | ||
// than canonicalizing and making them unique in a `ValueStore`, they are | ||
// directly embedded in the `IntId` itself. Only larger integers are stored in | ||
// an array of `APInt` values and represented as an index in the ID. | ||
class IntStore { | ||
public: | ||
// Adds an integer value representable in a host `int64_t` to the store. | ||
jonmeow marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Especially useful when the integer is computed without an `APInt` in the | ||
// first place. | ||
// | ||
// This only accepts a signed `int64_t` and uses the mathematical signed | ||
// integer value of it as the added integer value. | ||
// | ||
// Returns the ID corresponding to this integer value, storing an `APInt` if | ||
// necessary to represent it. | ||
auto Add(int64_t value) -> IntId { | ||
// First try directly making this into an ID. | ||
if (IntId id = IntId::TryMakeValue(value); id.is_valid()) [[likely]] { | ||
return id; | ||
} | ||
|
||
// Fallback for larger values. | ||
return AddLarge(value); | ||
jonmeow marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
// Stores a canonical copy of a signed value and returns its ID. | ||
chandlerc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
auto AddSigned(llvm::APInt value) -> IntId { | ||
// First try directly making this into an ID. | ||
if (IntId id = IntId::TryMakeSignedValue(value); id.is_valid()) [[likely]] { | ||
return id; | ||
} | ||
|
||
// Fallback for larger values. | ||
return AddSignedLarge(std::move(value)); | ||
} | ||
|
||
// Stores a canonical copy of an unsigned value and returns its ID. | ||
chandlerc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
auto AddUnsigned(llvm::APInt value) -> IntId { | ||
// First try directly making this into an ID. | ||
if (IntId id = IntId::TryMakeUnsignedValue(value); id.is_valid()) | ||
[[likely]] { | ||
return id; | ||
} | ||
|
||
// Fallback for larger values. | ||
return AddUnsignedLarge(std::move(value)); | ||
} | ||
|
||
// Returns the value for an ID. | ||
// | ||
// This will always be a signed `APInt` with a canonical bit width for the | ||
// specific integer value in question. | ||
auto Get(IntId id) const -> llvm::APInt { | ||
if (id.is_value()) [[likely]] { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We're currently using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just noticed that we have standard attributes now. Happy to either switch to LLVM ones until we can move the rest of the code, or move the rest of the code in a follow-up. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My thought is we've generally agreed to use C++ attribute forms so that seems the better choice. I don't think it makes sense to switch this code if the rest changes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do. |
||
return llvm::APInt(MinAPWidth, id.AsValue(), /*isSigned=*/true); | ||
} | ||
return values_.Get(APIntId(id.AsIndex())); | ||
} | ||
|
||
// Returns the value for an ID adjusted to a specific bit width. | ||
// | ||
// Note that because we store canonical mathematical integers as signed | ||
// integers, this always sign extends or truncates to the target width. The | ||
// caller can then use that as a signed or unsigned integer as needed. | ||
auto GetAtWidth(IntId id, int bit_width) const -> llvm::APInt { | ||
llvm::APInt value = Get(id); | ||
if (static_cast<int>(value.getBitWidth()) != bit_width) { | ||
value = value.sextOrTrunc(bit_width); | ||
} | ||
return value; | ||
} | ||
|
||
// Returns the value for an ID adjusted to the bit width specified with | ||
// another integer ID. | ||
// | ||
// This simply looks up the width integer ID, and then calls the above | ||
// `GetAtWidth` overload using the value found for it. See that overload for | ||
// more details. | ||
auto GetAtWidth(IntId id, IntId bit_width_id) const -> llvm::APInt { | ||
const llvm::APInt& bit_width = Get(bit_width_id); | ||
chandlerc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
CARBON_CHECK(bit_width.isStrictlyPositive() && | ||
bit_width.isSignedIntN(sizeof(int) * 8), | ||
chandlerc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"Invalid bit width value: {0}", bit_width); | ||
return GetAtWidth(id, bit_width.getSExtValue()); | ||
} | ||
|
||
// Looks up the canonical ID for a value, or returns invalid if not in the | ||
// store. | ||
auto LookupSigned(llvm::APInt value) const -> IntId { | ||
if (IntId id = IntId::TryMakeSignedValue(value); id.is_valid()) [[likely]] { | ||
return id; | ||
} | ||
|
||
// Fallback for larger values. | ||
return LookupSignedLarge(std::move(value)); | ||
} | ||
|
||
// Output a YAML description of this data structure. Note that this will only | ||
// include the integers that required storing, not those successfully embedded | ||
// into the ID space. | ||
auto OutputYaml() const -> Yaml::OutputMapping; | ||
|
||
auto array_ref() const -> llvm::ArrayRef<llvm::APInt> { | ||
return values_.array_ref(); | ||
} | ||
auto size() const -> size_t { return values_.size(); } | ||
|
||
// Collects the memory usage of the separately stored integers. | ||
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const | ||
-> void; | ||
|
||
private: | ||
friend struct Testing::IntStoreTestPeer; | ||
|
||
struct APIntId : IdBase, Printable<APIntId> { | ||
chandlerc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
using ValueType = llvm::APInt; | ||
static const APIntId Invalid; | ||
using IdBase::IdBase; | ||
auto Print(llvm::raw_ostream& out) const -> void { | ||
out << "ap-int"; | ||
chandlerc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
IdBase::Print(out); | ||
} | ||
}; | ||
|
||
static constexpr int MinAPWidth = 64; | ||
|
||
// Pick a canonical bit width for the provided number of significant bits. | ||
static auto CanonicalBitWidth(int significant_bits) -> int; | ||
|
||
// Canonicalize an incoming signed APInt to the correct bit width. | ||
static auto CanonicalizeSigned(llvm::APInt value) -> llvm::APInt; | ||
|
||
// Canonicalize an incoming unsigned APInt to the correct bit width. | ||
static auto CanonicalizeUnsigned(llvm::APInt value) -> llvm::APInt; | ||
|
||
auto AddLarge(int64_t value) -> IntId; | ||
auto AddSignedLarge(llvm::APInt value) -> IntId; | ||
auto AddUnsignedLarge(llvm::APInt value) -> IntId; | ||
|
||
auto LookupSignedLarge(llvm::APInt value) const -> IntId; | ||
|
||
CanonicalValueStore<APIntId> values_; | ||
chandlerc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}; | ||
|
||
constexpr IntStore::APIntId IntStore::APIntId::Invalid( | ||
IntId::Invalid.AsIndex()); | ||
|
||
} // namespace Carbon | ||
|
||
#endif // CARBON_TOOLCHAIN_BASE_INT_STORE_H_ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Organizationally, do you think it'd help if we had toolchain/base/int.h with both IntStore and IntId? We do somewhat similar in sem_ir, with things like type.h (just noting that one since it's in the PR)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SGTM. I'll do the rename from
int_store.h
toint.h
last to preserve review threads as much as I can.