Skip to content

Commit

Permalink
PR #21960: Updated nanobind commit
Browse files Browse the repository at this point in the history
Imported from GitHub PR #21960

Point nanobind to the commit fixing python/c++ object concurrent accessing: wjakob/nanobind#867

cc @hawkinsp
Copybara import of the project:

--
77e693f by vfdev-5 <vfdev.5@gmail.com>:

Updated nanobind commit

Merging this change closes #21960

FUTURE_COPYBARA_INTEGRATE_REVIEW=#21960 from vfdev-5:update-nanobind 77e693f
PiperOrigin-RevId: 720567148
  • Loading branch information
vfdev-5 authored and Google-ML-Automation committed Jan 28, 2025
1 parent 109a8ff commit 9e88644
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 32 deletions.
6 changes: 3 additions & 3 deletions third_party/nanobind/workspace.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls")
def repo():
tf_http_archive(
name = "nanobind",
strip_prefix = "nanobind-cee104db8606797a63752d2904b2f2795014a125",
sha256 = "d5dec3690c0a11b1ca48021ff34238886da7938b7bbbd5c0e946dcef6e6b7e25",
urls = tf_mirror_urls("https://github.com/wjakob/nanobind/archive/cee104db8606797a63752d2904b2f2795014a125.tar.gz"),
strip_prefix = "nanobind-d79309197caaad83cda05df533136865d294f01e",
sha256 = "598b116f36dbdf9738bb269cc1551ae073715fb3d69f07ca0dd01e6de0ddf4b0",
urls = tf_mirror_urls("https://github.com/wjakob/nanobind/archive/d79309197caaad83cda05df533136865d294f01e.tar.gz"),
build_file = "//third_party/nanobind:nanobind.BUILD",
)
2 changes: 2 additions & 0 deletions xla/tools/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,7 @@ cc_library(
"//xla/service:executable",
"//xla/service:gpu_plugin",
"//xla/service:hlo_module_config",
"//xla/service:hlo_proto_cc",
"//xla/service:hlo_runner",
"//xla/service:platform_util",
"//xla/service/gpu/model:hlo_op_profile_proto_cc",
Expand All @@ -647,6 +648,7 @@ cc_library(
"//xla/tsl/platform:errors",
"//xla/tsl/platform:logging",
"@com_google_absl//absl/container:flat_hash_set",
"@com_google_absl//absl/hash",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/status",
Expand Down
125 changes: 103 additions & 22 deletions xla/tools/matmul_perf_table_gen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ limitations under the License.
#include <vector>

#include "absl/container/flat_hash_set.h"
#include "absl/hash/hash.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
Expand All @@ -49,6 +50,7 @@ limitations under the License.
#include "xla/service/gpu/model/hlo_op_profile.pb.h"
#include "xla/service/gpu/model/hlo_op_profiler.h"
#include "xla/service/gpu/model/hlo_op_profiles.h"
#include "xla/service/hlo.pb.h"
#include "xla/service/hlo_module_config.h"
#include "xla/service/hlo_runner.h"
#include "xla/tests/test_utils.h"
Expand Down Expand Up @@ -80,6 +82,28 @@ struct StaticSpec {
std::string dtype_out;
};

struct ProfilingResult {
std::string device_info;
HloInstructionProto hlo_proto;
std::string fingerprint;
int64_t clock_cycles;

struct Hash {
size_t operator()(const ProfilingResult& profiling_result) const {
return absl::HashOf(profiling_result.device_info,
profiling_result.fingerprint);
}
};

struct Eq {
bool operator()(const ProfilingResult& lhs,
const ProfilingResult& rhs) const {
return lhs.device_info == rhs.device_info &&
lhs.fingerprint == rhs.fingerprint;
}
};
};

struct ExplicitSpec {
std::unique_ptr<HloModule> module;
};
Expand Down Expand Up @@ -175,12 +199,8 @@ void AddDotsFromHlos(const std::string& hlo_scan_path,
std::vector<std::string> filenames;
CHECK_OK(tsl::Env::Default()->GetChildren(hlo_scan_path, &filenames));
for (const std::string& filename : filenames) {
// Read file.
std::string hlo_data;
std::string hlo_path = absl::StrJoin({hlo_scan_path, filename}, "/");

PathSpec spec;
spec.filepath = hlo_path;
spec.filepath = absl::StrCat(hlo_scan_path, "/", filename);
specs.push_back(spec);
}
}
Expand Down Expand Up @@ -311,7 +331,64 @@ absl::Duration MatmulPerfTableGen::Profile(std::unique_ptr<HloModule> module) {
return absl::Nanoseconds(std::move(*tracer).getMedianKernelTimeNs());
}

gpu::DeviceHloInstructionProfiles MatmulPerfTableGen::ComputeTable() {
absl::StatusOr<DeviceHloInstructionProfiles> MatmulPerfTableGen::Merge(
absl::string_view filepath) {
DeviceHloInstructionProfiles result;
std::vector<std::string> filenames;
CHECK_OK(tsl::Env::Default()->GetChildren(std::string(filepath), &filenames));

absl::flat_hash_set<ProfilingResult, ProfilingResult::Hash,
ProfilingResult::Eq>
profiling_results;
uint64_t profiling_results_counter = 0;
for (const std::string& filename : filenames) {
// Read file.
std::string profile_path = absl::StrCat(filepath, "/", filename);
DeviceHloInstructionProfiles partial_profile;

CHECK_OK(tsl::Env::Default()->FileExists(profile_path));
if (!tsl::ReadTextOrBinaryProto(tsl::Env::Default(), profile_path,
&partial_profile)
.ok()) {
LOG(WARNING) << "Cannot read :" << profile_path;
continue;
}

for (auto& [device_descriptor, data] : partial_profile.entries()) {
for (const HloInstructionProfile& profile : data.entries()) {
CHECK(!profile.fingerprint().empty())
<< "Expected fingerprint to deduplicate: " << profile.DebugString();
ProfilingResult profiling_result{
device_descriptor, std::move(profile.instruction()),
std::move(profile.fingerprint()), profile.clock_cycles()};
profiling_results.insert(profiling_result);
profiling_results_counter++;
}
}
}
LOG(INFO) << "Merging and deduplication entries count. Before "
<< profiling_results_counter << ", after "
<< profiling_results.size() << ".";
for (const ProfilingResult& profiling_result : profiling_results) {
std::string device_descriptor = profiling_result.device_info;
if (!result.mutable_entries()->contains(device_descriptor)) {
result.mutable_entries()->insert({device_descriptor, {}});
}

HloInstructionProfile profile_proto;
*profile_proto.mutable_instruction() =
std::move(profiling_result.hlo_proto);
profile_proto.set_clock_cycles(profiling_result.clock_cycles);
profile_proto.set_fingerprint(profiling_result.fingerprint);

*result.mutable_entries()->at(device_descriptor).add_entries() =
std::move(profile_proto);
}

return result;
}

DeviceHloInstructionProfiles MatmulPerfTableGen::ComputeTable() {
gpu::DeviceHloInstructionProfiles device_profiles;
gpu::HloInstructionProfileList profile_list;

Expand Down Expand Up @@ -378,24 +455,28 @@ absl::Status MatmulPerfTableGen::Dump(
tsl::ReadTextOrBinaryProto(tsl::Env::Default(), config_.output, &file));
}

CHECK_EQ(table.entries_size(), 1)
<< "Expecting one program run, for one device config";
std::string sm_ver = table.entries().begin()->first;
if (file.entries().contains(sm_ver)) {
file.mutable_entries()->at(sm_ver).MergeFrom(table.entries().at(sm_ver));
} else {
file.MergeFrom(table);
}
for (const auto& [sm_ver, entries] : table.entries()) {
if (file.entries().contains(sm_ver)) {
file.mutable_entries()->at(sm_ver).MergeFrom(entries);
} else {
file.MergeFrom(table);
}

if (absl::StrContains(config_.output, ".pbtxt")) {
return tsl::WriteTextProto(tsl::Env::Default(), config_.output, file);
}
if (absl::StrContains(config_.output, ".pb")) {
return tsl::WriteBinaryProto(tsl::Env::Default(), config_.output, file);
if (absl::StrContains(config_.output, ".pbtxt")) {
TF_RETURN_IF_ERROR(
tsl::WriteTextProto(tsl::Env::Default(), config_.output, file));
continue;
}
if (absl::StrContains(config_.output, ".pb")) {
TF_RETURN_IF_ERROR(
tsl::WriteBinaryProto(tsl::Env::Default(), config_.output, file));
continue;
}
return absl::InvalidArgumentError(
absl::StrCat("Unsupported file: ", config_.output,
". Expecting .pb or .pbtxt suffix."));
}
return absl::InvalidArgumentError(
absl::StrCat("Unsupported file: ", config_.output,
". Expecting .pb or .pbtxt suffix."));
return absl::OkStatus();
}

} // namespace xla::gpu
5 changes: 5 additions & 0 deletions xla/tools/matmul_perf_table_gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ class MatmulPerfTableGen {
// Dumps a performance `table` to a given `output_file` from `Config`.
absl::Status Dump(const DeviceHloInstructionProfiles& table);

// Reads, deduplicates and merges multiple `xla.gpu.DeviceInstructionProfiles`
// residing in a given `filepath`.
absl::StatusOr<DeviceHloInstructionProfiles> Merge(
absl::string_view filepath);

private:
std::unique_ptr<Executable> Compile(std::unique_ptr<HloModule> module);

Expand Down
20 changes: 13 additions & 7 deletions xla/tools/matmul_perf_table_gen_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,12 +242,6 @@ std::vector<MatmulPerfTableGen::DataTypeSpec> ParseDataTypes(
return result;
}

std::string ValidateFilepath(absl::string_view filepath) {
std::string path = std::string(filepath);
CHECK_OK(tsl::Env::Default()->FileExists(path));
return path;
}

MatmulPerfTableGen::Config CreateConfig(
absl::string_view m_spec, absl::string_view n_spec,
absl::string_view k_spec, absl::string_view dtypes,
Expand All @@ -259,7 +253,7 @@ MatmulPerfTableGen::Config CreateConfig(
cfg.n_spec = ParseSpec(n_spec);
cfg.k_spec = ParseSpec(k_spec);
cfg.dtypes = ParseDataTypes(dtypes);
cfg.hlo_scan_path = ValidateFilepath(hlo_scan_path);
cfg.hlo_scan_path = hlo_scan_path;

// Execution opts.
cfg.dry_run = dry_run;
Expand All @@ -275,6 +269,7 @@ int main(int argc, char* argv[]) {
std::string dtypes;
std::string out;
std::string hlo_scan_path;
std::string merge_path;
bool dry_run = false;

std::vector<tsl::Flag> flag_list = {
Expand All @@ -298,6 +293,8 @@ int main(int argc, char* argv[]) {
tsl::Flag("hlo_scan_path", &hlo_scan_path,
"Path to HLO files. Tool will scan provided HLOs for dot "
"ops and use those for gathering profiling data."),
tsl::Flag("merge_path", &merge_path,
"Path to DeviceHloInstructionProfiles files."),
tsl::Flag("dry_run", &dry_run,
"For a defined search space does not perform measurements but "
"runs everything else."),
Expand All @@ -313,6 +310,15 @@ int main(int argc, char* argv[]) {
MatmulPerfTableGen::Config cfg =
CreateConfig(m_spec, n_spec, k_spec, dtypes, out, hlo_scan_path, dry_run);
MatmulPerfTableGen table_gen(std::move(cfg));

if (!merge_path.empty()) {
LOG(INFO) << "Merging profiling data from: " << merge_path;
auto profile_data = table_gen.Merge(merge_path);
CHECK_OK(profile_data);
CHECK_OK(table_gen.Dump(*profile_data));
return 0;
}

xla::gpu::DeviceHloInstructionProfiles result = table_gen.ComputeTable();
CHECK_OK(table_gen.Dump(result));

Expand Down

0 comments on commit 9e88644

Please sign in to comment.