diff --git a/CMakeLists.txt b/CMakeLists.txt index 419ad66..1a78a3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,6 +117,30 @@ function (build_gcov) quipper_perf ) + # Same sources as create_gcov_lib except create_gcov.cc (which defines main), + # so we can link gtest_main without duplicate main(). + add_executable(gcc_hierarchical_discriminator_multiplicity_test + gcc_hierarchical_discriminator_multiplicity_test.cc + gcov.cc + instruction_map.cc + profile.cc + profile_creator.cc + profile_writer.cc + sample_reader.cc + simple_spe_sample_reader.cc + symbol_map.cc) + target_link_libraries(gcc_hierarchical_discriminator_multiplicity_test + absl::flags + absl::flags_parse + gtest + gtest_main + perf_proto + addr2line_lib + glog + quipper_perf) + add_test(NAME gcc_hierarchical_discriminator_multiplicity_test + COMMAND gcc_hierarchical_discriminator_multiplicity_test) + add_library(profile_merger_lib OBJECT gcov.cc instruction_map.cc diff --git a/create_gcov.cc b/create_gcov.cc index cce528d..0b71425 100644 --- a/create_gcov.cc +++ b/create_gcov.cc @@ -41,6 +41,13 @@ int main(int argc, char **argv) { absl::GetFlag(FLAGS_gcov_version)); devtools_crosstool_autofdo::ProfileCreator creator( absl::GetFlag(FLAGS_binary)); + + // Enable discriminator encoding and two-pass aggregation only for gcov version 3 + if (absl::GetFlag(FLAGS_gcov_version) >= 3) { + absl::SetFlag(&FLAGS_use_discriminator_encoding, true); + absl::SetFlag(&FLAGS_use_two_pass_aggregation, true); + } + if (creator.CreateProfile(absl::GetFlag(FLAGS_profile), absl::GetFlag(FLAGS_profiler), &writer, absl::GetFlag(FLAGS_gcov))) { diff --git a/gcc_hierarchical_discriminator_multiplicity_test.cc b/gcc_hierarchical_discriminator_multiplicity_test.cc new file mode 100644 index 0000000..71ea452 --- /dev/null +++ b/gcc_hierarchical_discriminator_multiplicity_test.cc @@ -0,0 +1,326 @@ + +// hierarchical_discriminator_test +// +// Targeted checks on the checked-in binary + perf.data: +// +// 1) test_unroll: a pos_counts sample whose packed discriminator has non-zero +// copy_id (HasCopyID). Full-profile keys use the raw word; two-pass pass 1 +// uses StripMultiplicity (before CollapseCopyIDs). Assert the merged bucket +// exists on test_unroll and PERFDATA MAX count matches manual MAX over full +// keys stripping to that bucket. +// +// 2) test_vector_loop: a pos_counts sample with hierarchical multiplicity +// (HasMultiplicity). Same two-pass strip + MAX count check on test_vector_loop. +// +// 3) Pass 2: SymbolMap::CollapseCopyIDs() (as at gcov write) strips copy_id to +// base-only keys and SUMs ProfileInfo. For the same test_unroll copy_id +// sample line+base, assert the collapsed bucket count equals the SUM of +// pass-1 counts for all buckets sharing that (line, base). +#include "profile_creator.h" +#include "gcov_discriminator_encoding.h" +#include "source_info.h" +#include "symbol_map.h" + +#include +#include +#include + +#include "gtest/gtest.h" +#include "third_party/abseil/absl/flags/flag.h" +#include "third_party/abseil/absl/strings/match.h" +#include "third_party/abseil/absl/strings/str_cat.h" +#include "third_party/abseil/absl/strings/string_view.h" + +ABSL_DECLARE_FLAG(bool, use_lbr); +ABSL_DECLARE_FLAG(bool, use_discriminator_encoding); +ABSL_DECLARE_FLAG(bool, use_two_pass_aggregation); + +namespace { + +using ::devtools_crosstool_autofdo::GetBaseDiscriminator; +using ::devtools_crosstool_autofdo::GetCopyID; +using ::devtools_crosstool_autofdo::GetMultiplicity; +using ::devtools_crosstool_autofdo::HasCopyID; +using ::devtools_crosstool_autofdo::HasMultiplicity; +using ::devtools_crosstool_autofdo::ProfileCreator; +using ::devtools_crosstool_autofdo::SourceInfo; +using ::devtools_crosstool_autofdo::StripMultiplicity; +using ::devtools_crosstool_autofdo::PositionCountMap; +using ::devtools_crosstool_autofdo::Symbol; +using ::devtools_crosstool_autofdo::SymbolMap; + +const char kTestDataDir[] = "/testdata/"; + +constexpr char kFixtureElf[] = "hierarchical_discriminator_test.x86_64"; +constexpr char kFixturePerf[] = + "hierarchical_discriminator_test.x86_64.perf.data"; + +std::string TestBinaryPath() { + return absl::StrCat(::testing::SrcDir(), kTestDataDir, kFixtureElf); +} + +std::string TestPerfDataPath() { + return absl::StrCat(::testing::SrcDir(), kTestDataDir, kFixturePerf); +} + +const Symbol *FindSymbolByStem(const SymbolMap &symbol_map, + absl::string_view name_stem) { + const auto &name_to_symbol = symbol_map.map(); + const std::string exact(name_stem); + auto direct = name_to_symbol.find(exact); + if (direct != name_to_symbol.end()) return direct->second; + for (const auto &entry : name_to_symbol) { + if (absl::StrContains(entry.first, name_stem)) return entry.second; + } + return nullptr; +} + +struct PosCountSample { + uint32_t raw_packed_discriminator = 0; + uint64_t pos_count_offset_key = 0; +}; + +uint64_t SampleCountAtBucket(const Symbol *symbol, uint32_t line_relative, + uint32_t packed_discriminator) { + if (symbol == nullptr) return 0; + const uint64_t key = + SourceInfo::GenerateOffset(line_relative, packed_discriminator); + auto it = symbol->pos_counts.find(key); + if (it == symbol->pos_counts.end()) return 0; + return it->second.count; +} + +uint64_t MaxFullProfileCountMergingToStripBucket(const Symbol *full_symbol, + uint32_t line_relative, + uint32_t stripped_discriminator) { + if (full_symbol == nullptr) return 0; + uint64_t max_count = 0; + for (const auto &row : full_symbol->pos_counts) { + if (SourceInfo::GetLineNumberFromOffset(row.first) != line_relative) + continue; + const uint32_t full_packed = + SourceInfo::GetDiscriminatorFromOffset(row.first); + if (StripMultiplicity(full_packed) != stripped_discriminator) continue; + max_count = std::max(max_count, row.second.count); + } + return max_count; +} + +bool TwoPassHasBucket(const Symbol *two_pass_symbol, uint32_t line_relative, + uint32_t stripped_discriminator) { + if (two_pass_symbol == nullptr) return false; + for (const auto &row : two_pass_symbol->pos_counts) { + if (SourceInfo::GetLineNumberFromOffset(row.first) == line_relative && + SourceInfo::GetDiscriminatorFromOffset(row.first) == + stripped_discriminator) { + return true; + } + } + return false; +} + +// Pass 1 two-pass: bucket (line, StripMultiplicity(raw)) exists and .count +// equals MAX of full-profile .count over full keys on that line stripping there. +void VerifyPass1TwoPassBucketMaxMatchesFullProfile( + const Symbol *full_symbol, const Symbol *two_pass_symbol, + const PosCountSample &full_sample, absl::string_view label) { + const uint32_t line_relative = + SourceInfo::GetLineNumberFromOffset(full_sample.pos_count_offset_key); + const uint32_t stripped = + StripMultiplicity(full_sample.raw_packed_discriminator); + ASSERT_TRUE(TwoPassHasBucket(two_pass_symbol, line_relative, stripped)) + << label << ": missing two-pass bucket (line_rel=" << line_relative + << ", StripMultiplicity(raw)=" << stripped << ")"; + const uint64_t expected_max = MaxFullProfileCountMergingToStripBucket( + full_symbol, line_relative, stripped); + const uint64_t actual = + SampleCountAtBucket(two_pass_symbol, line_relative, stripped); + ASSERT_EQ(actual, expected_max) + << label << ": two-pass count must equal MAX of full-profile counts for " + "full keys on this line stripping to this bucket"; +} + +// Pass 1 pos_counts: SUM counts for rows on line_relative whose packed +// discriminator has the same 8-bit base (these rows merge in CollapseCopyIDs). +uint64_t SumPass1CountsMergingToBaseOnLine( + const PositionCountMap &pass1_pos_counts, uint32_t line_relative, + uint32_t base_discriminator) { + uint64_t sum = 0; + for (const auto &row : pass1_pos_counts) { + if (SourceInfo::GetLineNumberFromOffset(row.first) != line_relative) continue; + const uint32_t packed = + SourceInfo::GetDiscriminatorFromOffset(row.first); + if (GetBaseDiscriminator(packed) != base_discriminator) continue; + sum += row.second.count; + } + return sum; +} + +// Snapshot pass-1 test_unroll pos_counts, run SymbolMap::CollapseCopyIDs(), then +// compare the collapsed (line, base-only) bucket to the manual SUM above. +// After CollapseCopyIDs: base-only bucket count equals SUM of pass-1 counts +// merged into that (line, base) on test_unroll for the captured copy_id row. +void VerifyCollapseCopyIdsSumsPass1IntoBaseOnlyUnrollBucket( + SymbolMap *two_pass_map, const Symbol *unroll_two_pass_symbol, + const PositionCountMap &unroll_pass1_pos_counts_snapshot, + const PosCountSample &unroll_copy_id_sample) { + const uint32_t line_relative = SourceInfo::GetLineNumberFromOffset( + unroll_copy_id_sample.pos_count_offset_key); + const uint32_t base_only = GetBaseDiscriminator( + unroll_copy_id_sample.raw_packed_discriminator); + const uint64_t expected_sum_after_collapse = + SumPass1CountsMergingToBaseOnLine(unroll_pass1_pos_counts_snapshot, + line_relative, base_only); + + two_pass_map->CollapseCopyIDs(); + + ASSERT_TRUE(TwoPassHasBucket(unroll_two_pass_symbol, line_relative, base_only)) + << "CollapseCopyIDs: expected base-only bucket on test_unroll"; + const uint64_t actual_count = SampleCountAtBucket( + unroll_two_pass_symbol, line_relative, base_only); + ASSERT_EQ(actual_count, expected_sum_after_collapse) + << "CollapseCopyIDs: pos_counts.count at (line_rel=" << line_relative + << ", base=" << base_only + << ") should equal SUM of pass-1 counts for that line and base"; + + EXPECT_EQ(GetCopyID(base_only), 0u) + << "after CollapseCopyIDs discriminator field should be base-only (no copy_id)"; +} + +// First symbol keyed like `stem` (any map entry whose name contains `stem`) +// with a pos_counts row satisfying `predicate`; pairs full + two-pass by exact +// profile name. +bool FindFirstMatchingSymbolPair( + const SymbolMap &full_map, const SymbolMap &two_pass_map, + absl::string_view name_substring, + const std::function &predicate, + const Symbol **full_symbol_out, const Symbol **two_pass_symbol_out, + std::string *matched_name_out) { + for (const auto &name_and_symbol : full_map.map()) { + if (!absl::StrContains(name_and_symbol.first, name_substring)) continue; + const Symbol *full_sym = name_and_symbol.second; + if (full_sym == nullptr) continue; + auto two_pass_it = two_pass_map.map().find(name_and_symbol.first); + if (two_pass_it == two_pass_map.map().end() || + two_pass_it->second == nullptr) { + continue; + } + for (const auto &row : full_sym->pos_counts) { + const uint32_t packed = + SourceInfo::GetDiscriminatorFromOffset(row.first); + if (!predicate(packed)) continue; + *full_symbol_out = full_sym; + *two_pass_symbol_out = two_pass_it->second; + *matched_name_out = name_and_symbol.first; + return true; + } + } + return false; +} + +// test_unroll*: first pos_counts hit with copy_id — pass-1 strip bucket .count +// vs full-profile MAX for that strip bucket. +void VerifyUnrollFirstCopyIdHitPass1StripAgainstFullProfile( + const SymbolMap &full_map, const SymbolMap &two_pass_map, + PosCountSample *copy_id_sample_out, const Symbol **two_pass_symbol_out) { + const Symbol *full_sym = nullptr; + const Symbol *tp_sym = nullptr; + std::string matched_name; + ASSERT_TRUE(FindFirstMatchingSymbolPair( + full_map, two_pass_map, "test_unroll", + [](uint32_t packed) { return HasCopyID(packed); }, &full_sym, &tp_sym, + &matched_name)) + << "fixture must provide a pos_counts row on a symbol keyed like " + "test_unroll* with HasCopyID (try rebuilding testdata if compiler " + "moved discriminators to a clone symbol)"; + *two_pass_symbol_out = tp_sym; + for (const auto &row : full_sym->pos_counts) { + const uint32_t packed = + SourceInfo::GetDiscriminatorFromOffset(row.first); + if (!HasCopyID(packed)) continue; + *copy_id_sample_out = {packed, row.first}; + ASSERT_GT(GetCopyID(packed), 0u); + VerifyPass1TwoPassBucketMaxMatchesFullProfile( + full_sym, tp_sym, *copy_id_sample_out, + absl::StrCat("copy_id ", matched_name)); + return; + } + FAIL() << "internal error: predicate matched but scan did not"; +} + +// test_vector_loop*: first pos_counts hit with multiplicity — pass-1 strip +// bucket .count vs full-profile MAX. +void VerifyVectorLoopFirstMultiplicityHitPass1StripAgainstFullProfile( + const SymbolMap &full_map, const SymbolMap &two_pass_map) { + const Symbol *full_sym = nullptr; + const Symbol *tp_sym = nullptr; + std::string matched_name; + ASSERT_TRUE(FindFirstMatchingSymbolPair( + full_map, two_pass_map, "test_vector_loop", + [](uint32_t packed) { return HasMultiplicity(packed); }, &full_sym, + &tp_sym, &matched_name)) + << "fixture must provide a pos_counts row on a symbol keyed like " + "test_vector_loop* with HasMultiplicity"; + + for (const auto &row : full_sym->pos_counts) { + const uint32_t packed = + SourceInfo::GetDiscriminatorFromOffset(row.first); + if (!HasMultiplicity(packed)) continue; + const PosCountSample sample{packed, row.first}; + ASSERT_GT(GetMultiplicity(packed), 1u); + VerifyPass1TwoPassBucketMaxMatchesFullProfile( + full_sym, tp_sym, sample, + absl::StrCat("multiplicity ", matched_name)); + return; + } + FAIL() << "internal error: predicate matched but scan did not"; +} + +TEST(GccHierarchicalDiscriminatorMultiplicityTest, + UnrollCopyIdAndVectorMultiplicityTwoPass) { + ProfileCreator profile_creator(TestBinaryPath()); + ASSERT_TRUE(profile_creator.ReadSample(TestPerfDataPath(), "perf")); + + absl::SetFlag(&FLAGS_use_lbr, false); + absl::SetFlag(&FLAGS_use_discriminator_encoding, false); + absl::SetFlag(&FLAGS_use_two_pass_aggregation, false); + + SymbolMap full_map(TestBinaryPath()); + full_map.ReadLoadableExecSegmentInfo(/*is_kernel=*/false); + ASSERT_TRUE(profile_creator.ComputeProfile(&full_map, + /*check_lbr_entry=*/false)); + + ASSERT_NE(FindSymbolByStem(full_map, "test_unroll"), nullptr) + << "expected test_unroll (or clone) in profile map"; + ASSERT_NE(FindSymbolByStem(full_map, "test_vector_loop"), nullptr) + << "expected test_vector_loop (or clone) in profile map"; + + // Two-pass pass 1 (ComputeProfile only). + absl::SetFlag(&FLAGS_use_discriminator_encoding, true); + absl::SetFlag(&FLAGS_use_two_pass_aggregation, true); + + SymbolMap two_pass_map(TestBinaryPath()); + two_pass_map.ReadLoadableExecSegmentInfo(/*is_kernel=*/false); + ASSERT_TRUE(profile_creator.ComputeProfile(&two_pass_map, + /*check_lbr_entry=*/false)); + + ASSERT_NE(FindSymbolByStem(two_pass_map, "test_unroll"), nullptr); + ASSERT_NE(FindSymbolByStem(two_pass_map, "test_vector_loop"), nullptr); + + PosCountSample unroll_copy_id_sample{}; + const Symbol *unroll_two_pass_for_copy_id = nullptr; + VerifyUnrollFirstCopyIdHitPass1StripAgainstFullProfile( + full_map, two_pass_map, &unroll_copy_id_sample, &unroll_two_pass_for_copy_id); + VerifyVectorLoopFirstMultiplicityHitPass1StripAgainstFullProfile(full_map, + two_pass_map); + + // Pass 2 (whole map): compare CollapseCopyIDs output for the same profile + // symbol that held the copy_id row — base-only bucket vs SUM of pass-1 rows. + const PositionCountMap pass1_pos_counts_before_collapse = + unroll_two_pass_for_copy_id->pos_counts; + VerifyCollapseCopyIdsSumsPass1IntoBaseOnlyUnrollBucket( + &two_pass_map, unroll_two_pass_for_copy_id, pass1_pos_counts_before_collapse, + unroll_copy_id_sample); +} + +} // namespace diff --git a/gcov_discriminator_encoding.h b/gcov_discriminator_encoding.h new file mode 100644 index 0000000..63b645d --- /dev/null +++ b/gcov_discriminator_encoding.h @@ -0,0 +1,66 @@ +// Discriminator encoding utilities for AutoFDO +// Similar to LLVM's discriminator encoding scheme +// +// Discriminator format: [Base:8][Multiplicity:7][CopyID:11][Unused:6] +// - Base discriminator (bits 0-7): Distinguishes instructions at same line +// - Multiplicity (bits 8-14): Duplication factor for unrolling/vectorization +// - CopyID (bits 15-25): Unique identifier for code copies +// - Unused (bits 26-31): Reserved + +#ifndef AUTOFDO_DISCRIMINATOR_ENCODING_H_ +#define AUTOFDO_DISCRIMINATOR_ENCODING_H_ + +#include + +namespace devtools_crosstool_autofdo { + +// Extract base discriminator (bits 0-7) +inline uint32_t GetBaseDiscriminator(uint32_t discriminator) { + return discriminator & 0xFF; +} + +// Extract multiplicity/duplication factor (bits 8-14) +// Returns 1 if multiplicity bits are 0 (no duplication) +inline uint32_t GetMultiplicity(uint32_t discriminator) { + uint32_t mult = (discriminator >> 8) & 0x7F; + return (mult == 0) ? 1 : mult; +} + +// Extract copy ID (bits 15-25) +inline uint32_t GetCopyID(uint32_t discriminator) { + return (discriminator >> 15) & 0x7FF; +} + +// Encode discriminator from components +inline uint32_t EncodeDiscriminator(uint32_t base, uint32_t multiplicity, + uint32_t copy_id) { + // Validate ranges + if (base > 0xFF || multiplicity > 127 || copy_id > 0x7FF) { + return base; // Fallback to just base if encoding fails + } + return base | (multiplicity << 8) | (copy_id << 15); +} + +// Check if discriminator has multiplicity encoded +inline bool HasMultiplicity(uint32_t discriminator) { + return GetMultiplicity(discriminator) > 1; +} + +// Check if discriminator has copy ID encoded +inline bool HasCopyID(uint32_t discriminator) { + return GetCopyID(discriminator) != 0; +} + +// Strip only multiplicity bits, keeping base and copy_id +// Used for MAX aggregation where we want to aggregate per (line, base, copy_id) +// but not create separate entries for different multiplicities +inline uint32_t StripMultiplicity(uint32_t discriminator) { + uint32_t base = GetBaseDiscriminator(discriminator); + uint32_t copy_id = GetCopyID(discriminator); + return base | (copy_id << 15); +} + +} // namespace devtools_crosstool_autofdo + +#endif // AUTOFDO_DISCRIMINATOR_ENCODING_H_ + diff --git a/profile_creator.cc b/profile_creator.cc index 09d68b6..4b17888 100644 --- a/profile_creator.cc +++ b/profile_creator.cc @@ -133,6 +133,14 @@ bool ProfileCreator::CreateProfile(const std::string &input_profile_name, if (!ComputeProfile(&symbol_map, check_lbr_entry)) return false; } +#if !defined(HAVE_LLVM) + // GCC GCOV only (gcov_version >= 3): pass 2 SUM after stripping copy_id. + // Done here (not in ProfileWriter) so writers keep a const SymbolMap*. + if (absl::GetFlag(FLAGS_use_two_pass_aggregation)) { + symbol_map.CollapseCopyIDs(); + } +#endif + #if defined(HAVE_LLVM) // Create prof_sym_list after symbol_map is populated because prof_sym_list // is expected not to contain any symbol showing up in the profile in diff --git a/source_info.h b/source_info.h index a8f4a8f..e431719 100644 --- a/source_info.h +++ b/source_info.h @@ -8,9 +8,13 @@ #include #include "base/integral_types.h" +#include "base/logging.h" #include "base/macros.h" #if defined(HAVE_LLVM) #include "llvm/IR/DebugInfoMetadata.h" +#else +// Include discriminator encoding utilities only when LLVM is not available +#include "gcov_discriminator_encoding.h" #endif namespace devtools_crosstool_autofdo { @@ -49,7 +53,25 @@ struct SourceInfo { discriminator) : discriminator)); #else - return (static_cast(line - start_line) << 32) | discriminator; + // Profile stores only base discriminator (bits 0-7). + uint32_t disc = use_discriminator_encoding + ? GetBaseDiscriminator(discriminator) + : discriminator; + return (static_cast(line - start_line) << 32) | disc; +#endif + } + + // Offset that keeps copy_id but strips multiplicity + // Used for MAX aggregation where we want to aggregate per (line, base, copy_id) + uint64_t OffsetWithCopyID() const { +#if defined(HAVE_LLVM) + // LLVM doesn't use two-pass aggregation, so just return regular offset. + // This should never be called in practice (use_two_pass_aggregation=false for LLVM). + return Offset(false); // Use full discriminator +#else + // Strip only multiplicity bits, keep base and copy_id + uint32_t disc = StripMultiplicity(discriminator); + return (static_cast(line - start_line) << 32) | disc; #endif } @@ -59,7 +81,9 @@ struct SourceInfo { return llvm::DILocation::getDuplicationFactorFromDiscriminator( discriminator); #else - return 1; + // Only extract multiplicity if discriminator is non-zero + if (discriminator == 0) return 1; + return GetMultiplicity(discriminator); #endif } diff --git a/symbol_map.cc b/symbol_map.cc index 2a8d949..b6b756f 100644 --- a/symbol_map.cc +++ b/symbol_map.cc @@ -56,6 +56,10 @@ ABSL_FLAG(bool, demangle_symbol_names, false, ABSL_FLAG(bool, use_discriminator_encoding, false, "Tell the symbol map that the discriminator encoding is enabled in " "the profile."); +ABSL_FLAG(bool, use_two_pass_aggregation, false, + "GCOV-only (gcov_version>=3, non-LLVM): pass 1 MAX per " + "(line, base, copy_id); pass 2 SUM after stripping copy_id in " + "ProfileCreator before write."); ABSL_FLAG(bool, use_discriminator_multiply_factor, true, "Tell the symbol map whether to use discriminator multiply factors."); #if defined(HAVE_LLVM) @@ -162,6 +166,43 @@ void Symbol::Merge(const Symbol *other) { } } +void Symbol::CollapseCopyIDs() { + // Pass 2: Strip copy_id from discriminators and SUM counts + // Aggregates across different copy_ids to get total execution count + + PositionCountMap new_pos_counts; + + for (const auto &pos_count : pos_counts) { + uint64_t old_offset = pos_count.first; + + // Extract line and discriminator using helpers + uint32_t line = SourceInfo::GetLineNumberFromOffset(old_offset); + uint32_t discriminator = SourceInfo::GetDiscriminatorFromOffset(old_offset); + + // Strip copy_id and multiplicity, keep only base discriminator +#if defined(HAVE_LLVM) + uint32_t base = llvm::DILocation::getBaseDiscriminatorFromDiscriminator( + discriminator); +#else + uint32_t base = GetBaseDiscriminator(discriminator); +#endif + + // Create new offset with only base discriminator + uint64_t new_offset = SourceInfo::GenerateOffset(line, base); + + // SUM counts from different copy_ids + new_pos_counts[new_offset] += pos_count.second; + } + + pos_counts = std::move(new_pos_counts); + + // Recursively collapse copy_ids in all callsites + for (auto &callsite_symbol : callsites) { + if (callsite_symbol.second) { + callsite_symbol.second->CollapseCopyIDs(); + } + } +} void Symbol::EstimateHeadCount() { if (head_count != 0) return; @@ -340,6 +381,15 @@ void SymbolMap::ElideSuffixesAndMerge() { } } +void SymbolMap::CollapseCopyIDs() { + // Pass 2: Collapse copy_ids for all symbols + for (auto &name_symbol : map_) { + if (name_symbol.second) { + name_symbol.second->CollapseCopyIDs(); + } + } +} + void SymbolMap::AddSymbol(absl::string_view name) { std::pair ret = map_.insert(NameSymbolMap::value_type(name, nullptr)); @@ -564,10 +614,26 @@ void SymbolMap::AddSourceCount(absl::string_view symbol_name, if (!symbol) return; bool need_conversion = (data_source == PERFDATA || data_source == AFDOPROTO); if (need_conversion && src[0].HasInvalidInfo()) return; - uint64_t offset = src[0].Offset(use_discriminator_encoding); // If it is to convert perf data or afdoproto to afdo profile, select the // MAX count if there are multiple records mapping to the same offset. // If it is just to read afdo profile, merge those counts. + + // Two-pass aggregation for discriminator encoding: + // Pass 1 (here): MAX per (line, base, copy_id) - keep copy_id in offset + // Pass 2 (before write): SUM across copy_ids - strip copy_id + bool use_two_pass = absl::GetFlag(FLAGS_use_two_pass_aggregation); + + // Offset calculation: keep copy_id only for two-pass aggregation + uint64_t offset; + if (use_two_pass) { + // Pass 1: Keep copy_id for MAX aggregation per copy + offset = src[0].OffsetWithCopyID(); + } else { + // Profile reading: strip copy_id + offset = src[0].Offset(use_discriminator_encoding); + } + + // Aggregation method: MAX for raw data conversion, SUM for profile merging if (need_conversion) { if (count > symbol->pos_counts[offset].count) { symbol->pos_counts[offset].count = count; @@ -584,13 +650,21 @@ bool SymbolMap::AddIndirectCallTarget(absl::string_view symbol_name, DataSource data_source) { bool use_discriminator_encoding = absl::GetFlag(FLAGS_use_discriminator_encoding); + bool use_two_pass = absl::GetFlag(FLAGS_use_two_pass_aggregation); Symbol *symbol = TraverseInlineStack(symbol_name, src, 0, data_source); if (!symbol) return false; if ((data_source == PERFDATA || data_source == AFDOPROTO) && src[0].HasInvalidInfo()) return false; - symbol->pos_counts[src[0].Offset(use_discriminator_encoding)] - .target_map[GetOriginalName(target)] = count; + + uint64_t offset; + if (use_two_pass) { + offset = src[0].OffsetWithCopyID(); // Keep copy_id during Pass 1 + } else { + offset = src[0].Offset(use_discriminator_encoding); // Strip copy_id + } + + symbol->pos_counts[offset].target_map[GetOriginalName(target)] = count; return true; } diff --git a/symbol_map.h b/symbol_map.h index 6613f9e..043442c 100644 --- a/symbol_map.h +++ b/symbol_map.h @@ -36,6 +36,12 @@ // Whether to use discriminator encoding. ABSL_DECLARE_FLAG(bool, use_discriminator_encoding); +// GCOV-only (gcov_version >= 3, non-LLVM build): two-pass copy_id aggregation. +// Pass 1 (AddSourceCount): MAX per (line, base, copy_id). +// Pass 2 (CollapseCopyIDs before write): SUM after stripping copy_id. +// LLVM profiles use a different discriminator path and do not enable this. +ABSL_DECLARE_FLAG(bool, use_two_pass_aggregation); + #if defined(HAVE_LLVM) // Whether to use FS discriminator. ABSL_DECLARE_FLAG(bool, use_fs_discriminator); @@ -191,6 +197,10 @@ class Symbol { // Merges profile stored in src symbol with this symbol. void Merge(const Symbol *src); + // Pass 2: Strip copy_id and SUM across different copies. + // Called before writing profile to collapse discriminators. + void CollapseCopyIDs(); + // Get an estimation of head count from the starting source or callsite // locations. void EstimateHeadCount(); @@ -420,6 +430,10 @@ class SymbolMap { // profile data. void ElideSuffixesAndMerge(); + // Pass 2: Strip copy_id and SUM across all copies. + // Called before writing profile to get final aggregated counts. + void CollapseCopyIDs(); + // Increments symbol's entry count. void AddSymbolEntryCount(absl::string_view symbol, uint64_t head_count, uint64_t total_count = 0); diff --git a/testdata/hierarchical_discriminator_test.c b/testdata/hierarchical_discriminator_test.c new file mode 100644 index 0000000..2c27975 --- /dev/null +++ b/testdata/hierarchical_discriminator_test.c @@ -0,0 +1,42 @@ +#include + +volatile int a[1024]; + +__attribute__((noinline, noclone)) +int test_unroll(void) { + int sum = 0; + int i, j; + + for (i = 0; i < 32; i++) { + for (j = 0; j < 4; j++) { + sum += a[i + j * 4] * i; + } + } + + return sum; +} + +float vx[2048] = {1.0f}; +float vy[2048] = {2.0f}; +float vz[2048] = {0.0f}; + +/* SIMD-friendly loop */ +__attribute__((noinline, noclone)) +float test_vector_loop(void) { + int j; + for (j = 0; j < 2048; j++) { + vz[j] = vx[j] + vy[j]; + } + return vz[100]; +} + +int main(void) { + volatile int sink_i = 0; + volatile float sink_f = 0.f; + for (int j = 0; j < 100000; j++) { + sink_i += test_unroll(); + sink_f += test_vector_loop(); + } + printf("%d %g\n", sink_i, (double)sink_f); + return 0; +} diff --git a/testdata/hierarchical_discriminator_test.x86_64 b/testdata/hierarchical_discriminator_test.x86_64 new file mode 100755 index 0000000..366682c Binary files /dev/null and b/testdata/hierarchical_discriminator_test.x86_64 differ diff --git a/testdata/hierarchical_discriminator_test.x86_64.perf.data b/testdata/hierarchical_discriminator_test.x86_64.perf.data new file mode 100644 index 0000000..27cba94 Binary files /dev/null and b/testdata/hierarchical_discriminator_test.x86_64.perf.data differ