Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions bin/Index/ArgumentFilter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright (c) 2022-present, Trail of Bits, Inc.
//
// This source code is licensed in accordance with the terms specified in
// the LICENSE file found in the root directory of this source tree.

#include "ArgumentFilter.h"

#include <charconv>
#include <fstream>
#include <sstream>

namespace indexer {

std::optional<std::string>
ArgumentFilter::LoadFromFile(const std::filesystem::path &path) {
std::ifstream file(path);
if (!file.is_open()) {
return "Could not open argument filter config: " + path.string();
}

std::string line;
unsigned line_num = 0;

while (std::getline(file, line)) {
++line_num;

// Strip leading whitespace.
auto start = line.find_first_not_of(" \t");
if (start == std::string::npos) {
continue; // Empty line.
}

// Skip comments.
if (line[start] == '#') {
continue;
}

// Parse: <match_type> <pattern> [<num_values>]
std::istringstream ss(line.substr(start));
std::string match_str;
std::string pattern;
std::string num_str;

ss >> match_str >> pattern;
if (match_str.empty() || pattern.empty()) {
return path.string() + ":" + std::to_string(line_num) +
": expected '<match_type> <pattern> [<num_values>]'";
}

MatchType match;
if (match_str == "exact") {
match = MatchType::kExact;
} else if (match_str == "prefix") {
match = MatchType::kPrefix;
} else if (match_str == "contains") {
match = MatchType::kContains;
} else {
return path.string() + ":" + std::to_string(line_num) +
": unknown match type '" + match_str +
"'; expected 'exact', 'prefix', or 'contains'";
}

int num_following = 0;
ss >> num_str;
if (!num_str.empty()) {
bool lenient = false;
std::string_view digits = num_str;

// ~N means lenient: skip following args only if they don't look like
// flags (don't start with '-').
if (digits.front() == '~') {
lenient = true;
digits.remove_prefix(1);
}

auto [ptr, ec] = std::from_chars(
digits.data(), digits.data() + digits.size(), num_following);
if (ec != std::errc{} || ptr != digits.data() + digits.size()) {
return path.string() + ":" + std::to_string(line_num) +
": invalid num_values '" + num_str + "'";
}

if (lenient) {
num_following = -num_following;
}
}

rules.push_back({match, std::move(pattern), num_following});
}

return std::nullopt;
}

std::optional<int>
ArgumentFilter::ShouldSkip(std::string_view arg) const {
for (const Rule &rule : rules) {
bool matched = false;
switch (rule.match) {
case MatchType::kExact:
matched = (arg == rule.pattern);
break;
case MatchType::kPrefix:
matched = arg.starts_with(rule.pattern);
break;
case MatchType::kContains:
matched = (arg.find(rule.pattern) != std::string_view::npos);
break;
}
if (matched) {
return rule.num_following;
}
}
return std::nullopt;
}

} // namespace indexer
63 changes: 63 additions & 0 deletions bin/Index/ArgumentFilter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) 2022-present, Trail of Bits, Inc.
//
// This source code is licensed in accordance with the terms specified in
// the LICENSE file found in the root directory of this source tree.

#pragma once

#include <cstdint>
#include <filesystem>
#include <optional>
#include <string>
#include <string_view>
#include <vector>

namespace indexer {

// Filters compiler arguments based on pattern rules loaded from a config file.
//
// Each rule specifies a match type (exact, prefix, or contains), a pattern
// string, and how many following arguments to also skip. Rules are checked in
// order; the first match wins.
class ArgumentFilter {
public:
ArgumentFilter(void) = default;

// Load rules from a config file, appending them to any existing rules.
// Returns an error message on failure, or std::nullopt on success.
std::optional<std::string> LoadFromFile(const std::filesystem::path &path);

// Check if an argument should be skipped. Returns the number of following
// arguments to also skip (0 means skip only this argument), or std::nullopt
// if the argument should be kept.
//
// When the returned value is negative, its absolute value is the number of
// following arguments to skip, but only if they don't start with '-'. This
// handles malformed compilation databases where a "value" slot actually
// contains the next flag.
std::optional<int> ShouldSkip(std::string_view arg) const;

// Returns true if no rules have been loaded.
bool Empty(void) const { return rules.empty(); }

private:
enum class MatchType : uint8_t {
kExact,
kPrefix,
kContains,
};

struct Rule {
MatchType match;
std::string pattern;

// Positive: unconditionally skip this many following args.
// Negative: skip |n| following args only if they don't start with '-'.
// Zero: skip only the matched argument itself.
int num_following;
};

std::vector<Rule> rules;
};

} // namespace indexer
35 changes: 35 additions & 0 deletions bin/Index/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ set(exe_name "mx-index")
add_executable("${exe_name}"
"Action.cpp"
"Action.h"
"ArgumentFilter.cpp"
"ArgumentFilter.h"
"BuildPendingFragment.cpp"
"Context.cpp"
"Context.h"
Expand Down Expand Up @@ -96,6 +98,11 @@ target_include_directories("${exe_name}"
"$<BUILD_INTERFACE:${LLVM_INCLUDE_DIRS};${LLVM_INCLUDE_DIR};${CLANG_INCLUDE_DIRS}>"
)

target_compile_definitions("${exe_name}"
PRIVATE
"MX_SHARE_DIR=\"${MX_INSTALL_SHARE_DIR}\""
)

target_compile_options("${exe_name}"
PRIVATE
"-Wno-unknown-warning-option"
Expand All @@ -122,6 +129,27 @@ set_target_properties("${exe_name}"

find_and_link_llvm_dependencies("${exe_name}")

# Copy the default argument filter config into the build tree so that mx-index
# can find it when run from the build directory (the binary sits in
# <build>/<bin_dir>/, and it looks for ../<share_dir>/multiplier/).
set(MX_ARG_FILTER_SRC "${PROJECT_SOURCE_DIR}/share/multiplier/unsupported_args.cfg")
set(MX_ARG_FILTER_DST "${PROJECT_BINARY_DIR}/${MX_INSTALL_SHARE_DIR}/multiplier/unsupported_args.cfg")

# Skip the copy when source and build trees overlap (avoids a dependency cycle).
cmake_path(ABSOLUTE_PATH MX_ARG_FILTER_SRC NORMALIZE OUTPUT_VARIABLE _src_abs)
cmake_path(ABSOLUTE_PATH MX_ARG_FILTER_DST NORMALIZE OUTPUT_VARIABLE _dst_abs)
if(NOT _src_abs STREQUAL _dst_abs)
add_custom_command(
OUTPUT "${MX_ARG_FILTER_DST}"
COMMAND "${CMAKE_COMMAND}" -E copy_if_different
"${MX_ARG_FILTER_SRC}" "${MX_ARG_FILTER_DST}"
DEPENDS "${MX_ARG_FILTER_SRC}"
COMMENT "Copying unsupported_args.cfg to build tree"
)
add_custom_target("${exe_name}-arg-filter" ALL DEPENDS "${MX_ARG_FILTER_DST}")
add_dependencies("${exe_name}" "${exe_name}-arg-filter")
endif()

if(MX_ENABLE_INSTALL AND NOT MX_ENABLE_BOOTSTRAP)
install(
TARGETS
Expand All @@ -132,4 +160,11 @@ if(MX_ENABLE_INSTALL AND NOT MX_ENABLE_BOOTSTRAP)
DESTINATION
"${CMAKE_INSTALL_BINDIR}"
)

install(
FILES
"${MX_ARG_FILTER_SRC}"
DESTINATION
"${MX_INSTALL_SHARE_DIR}/multiplier"
)
endif()
14 changes: 8 additions & 6 deletions bin/Index/Hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,14 @@ void HashVisitor::VisitDecl(const pasta::Decl &decl) {
AccumulateTokenData(ss, pasta::PrintedTokenRange::Create(vd->Type(), pp));

} else if (auto td = pasta::TypeDecl::From(decl)) {
if (auto ty = td->TypeForDeclaration()) {
AccumulateTokenData(ss, pasta::PrintedTokenRange::Create(ty.value(), pp));
// Skip TypeForDeclaration() for TypedefNameDecls: the typedef body tokens
// are already in the fragment's token range, and the type printer can
// produce different output across TUs depending on how much type sugar
// Clang preserved, causing hash instability.
if (!pasta::TypedefNameDecl::From(decl)) {
if (auto ty = td->TypeForDeclaration()) {
AccumulateTokenData(ss, pasta::PrintedTokenRange::Create(ty.value(), pp));
}
}
}

Expand All @@ -403,10 +409,6 @@ static std::string HashNestedFragment(
HashVisitor visitor(ss, em, true);
visitor.Accept(decl);

// std::cerr << "\n-----------------\n";
// std::cerr << ss.str() << '\n';
// Dump(decl);

return ss.str();
}

Expand Down
Loading
Loading