From 959538bbb425e7b1577316a21a23b780593e51a1 Mon Sep 17 00:00:00 2001 From: Lucian Popescu Date: Wed, 13 May 2026 13:58:19 +0100 Subject: [PATCH 1/6] Allow loading rules from src.c --- CMakeLists.txt | 4 +++- cpp2rust/converter/mapper.cpp | 3 ++- cpp2rust/cpp_rule_preprocessor.cpp | 6 +++--- rules/cstring/{src.cpp => src.c} | 0 4 files changed, 8 insertions(+), 5 deletions(-) rename rules/cstring/{src.cpp => src.c} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49e48ac0..8cffef11 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,7 +146,9 @@ add_custom_target("check" DEPENDS check-libcc2rs check-unit ) -file(GLOB rule_src_files ${PROJECT_SOURCE_DIR}/rules/*/src.cpp) +file(GLOB rule_src_files + ${PROJECT_SOURCE_DIR}/rules/*/src.cpp + ${PROJECT_SOURCE_DIR}/rules/*/src.c) set(cpp_rules_ir_outputs) set(rust_rules_ir_outputs) set(rust_rules_inputs) diff --git a/cpp2rust/converter/mapper.cpp b/cpp2rust/converter/mapper.cpp index 457379f1..a0c5ae8c 100644 --- a/cpp2rust/converter/mapper.cpp +++ b/cpp2rust/converter/mapper.cpp @@ -405,7 +405,8 @@ TranslationRule::TypeRule *search(clang::QualType qual_type) { void addRulesFromDirectory(const std::filesystem::path &dir, Model model) { for (const auto &entry : std::filesystem::recursive_directory_iterator(dir)) { auto &path = entry.path(); - if (entry.is_regular_file() && path.extension() == ".cpp") { + if (entry.is_regular_file() && + (path.extension() == ".cpp" || path.extension() == ".c")) { auto [expr_rules, type_rules] = TranslationRule::Load(path, model); if (expr_rules.empty() && type_rules.empty()) { log() << "No rules found in " << path << '\n'; diff --git a/cpp2rust/cpp_rule_preprocessor.cpp b/cpp2rust/cpp_rule_preprocessor.cpp index 22329aee..09887abe 100644 --- a/cpp2rust/cpp_rule_preprocessor.cpp +++ b/cpp2rust/cpp_rule_preprocessor.cpp @@ -752,9 +752,9 @@ llvm::cl::OptionCategory cat("cpp-rule-preprocessor options"); llvm::cl::opt SrcFile("file", - llvm::cl::desc("Path to a rule's src.cpp. ir_src.json is written " - "next to it"), - llvm::cl::value_desc("src.cpp"), llvm::cl::Required, + llvm::cl::desc("Path to a rule's src.c or src.cpp. ir_src.json is " + "written next to it"), + llvm::cl::value_desc("src.c|src.cpp"), llvm::cl::Required, llvm::cl::cat(cat)); } // namespace diff --git a/rules/cstring/src.cpp b/rules/cstring/src.c similarity index 100% rename from rules/cstring/src.cpp rename to rules/cstring/src.c From 95cc408d6674f95071b89ed7eebbe51215d85188 Mon Sep 17 00:00:00 2001 From: Lucian Popescu Date: Wed, 13 May 2026 14:31:13 +0100 Subject: [PATCH 2/6] Support src.cpp and src.c in the same rules dir --- CMakeLists.txt | 15 +++--- cpp2rust/cpp_rule_preprocessor.cpp | 34 +++++++++---- rules/cstring/ir_unsafe.json | 76 ++++++++++++++++++++++++++++++ rules/cstring/src.c | 2 + rules/cstring/src.cpp | 6 +++ rules/cstring/tgt_unsafe.rs | 8 ++++ 6 files changed, 125 insertions(+), 16 deletions(-) create mode 100644 rules/cstring/src.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cffef11..0ff8cf48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,19 +146,20 @@ add_custom_target("check" DEPENDS check-libcc2rs check-unit ) -file(GLOB rule_src_files - ${PROJECT_SOURCE_DIR}/rules/*/src.cpp - ${PROJECT_SOURCE_DIR}/rules/*/src.c) +file(GLOB rule_subdirs ${PROJECT_SOURCE_DIR}/rules/*) set(cpp_rules_ir_outputs) set(rust_rules_ir_outputs) set(rust_rules_inputs) -foreach(_src IN LISTS rule_src_files) - get_filename_component(_rule_dir ${_src} DIRECTORY) +foreach(_rule_dir IN LISTS rule_subdirs) + file(GLOB _srcs ${_rule_dir}/src.c ${_rule_dir}/src.cpp) + if(NOT _srcs) + continue() + endif() set(_out ${_rule_dir}/ir_src.json) add_custom_command( OUTPUT ${_out} - COMMAND $ --file ${_src} - DEPENDS ${_src} ${PROJECT_SOURCE_DIR}/cpp2rust/cpp_rule_preprocessor.cpp + COMMAND $ --dir ${_rule_dir} + DEPENDS ${_srcs} ${PROJECT_SOURCE_DIR}/cpp2rust/cpp_rule_preprocessor.cpp VERBATIM ) list(APPEND cpp_rules_ir_outputs ${_out}) diff --git a/cpp2rust/cpp_rule_preprocessor.cpp b/cpp2rust/cpp_rule_preprocessor.cpp index 09887abe..ccf0fb46 100644 --- a/cpp2rust/cpp_rule_preprocessor.cpp +++ b/cpp2rust/cpp_rule_preprocessor.cpp @@ -751,11 +751,11 @@ namespace { llvm::cl::OptionCategory cat("cpp-rule-preprocessor options"); llvm::cl::opt - SrcFile("file", - llvm::cl::desc("Path to a rule's src.c or src.cpp. ir_src.json is " - "written next to it"), - llvm::cl::value_desc("src.c|src.cpp"), llvm::cl::Required, - llvm::cl::cat(cat)); + SrcDir("dir", + llvm::cl::desc("Path to a rule directory containing src.c and/or " + "src.cpp. ir_src.json is written into this dir."), + llvm::cl::value_desc("rule-dir"), llvm::cl::Required, + llvm::cl::cat(cat)); } // namespace @@ -763,12 +763,28 @@ int main(int argc, char *argv[]) { llvm::cl::HideUnrelatedOptions(cat); llvm::cl::ParseCommandLineOptions(argc, argv); - fs::path src = SrcFile.getValue(); - llvm::errs() << "Preprocessing " << src.string() << '\n'; + fs::path dir = SrcDir.getValue(); llvm::json::Object root; - cpp2rust::Extract(src, root); + for (const char *name : {"src.c", "src.cpp"}) { + auto path = dir / name; + if (!fs::exists(path)) { + continue; + } + llvm::errs() << "Preprocessing " << path.string() << '\n'; + llvm::json::Object file_root; + cpp2rust::Extract(path, file_root); + for (auto &[k, v] : file_root) { + if (root.find(k) != root.end()) { + llvm::errs() << "ERROR: rule name " << k.str() + << " defined in multiple files in " << dir.string() + << '\n'; + return EXIT_FAILURE; + } + root[k] = std::move(v); + } + } - auto out_path = src.parent_path() / "ir_src.json"; + auto out_path = dir / "ir_src.json"; std::error_code ec; llvm::raw_fd_ostream out(out_path.string(), ec); if (ec) { diff --git a/rules/cstring/ir_unsafe.json b/rules/cstring/ir_unsafe.json index 4ddb64c0..91b7b08f 100644 --- a/rules/cstring/ir_unsafe.json +++ b/rules/cstring/ir_unsafe.json @@ -312,5 +312,81 @@ "type": "*mut u8", "is_unsafe_pointer": true } + }, + "f5": { + "body": [ + { + "text": "libc::strchr(" + }, + { + "placeholder": { + "arg": 0, + "access": "read" + } + }, + { + "text": " as *const i8, " + }, + { + "placeholder": { + "arg": 1, + "access": "read" + } + }, + { + "text": ") as *mut u8" + } + ], + "params": { + "a0": { + "type": "*const u8", + "is_unsafe_pointer": true + }, + "a1": { + "type": "i32" + } + }, + "return_type": { + "type": "*mut u8", + "is_unsafe_pointer": true + } + }, + "f6": { + "body": [ + { + "text": "libc::strchr(" + }, + { + "placeholder": { + "arg": 0, + "access": "read" + } + }, + { + "text": " as *const i8, " + }, + { + "placeholder": { + "arg": 1, + "access": "read" + } + }, + { + "text": ") as *const u8" + } + ], + "params": { + "a0": { + "type": "*const u8", + "is_unsafe_pointer": true + }, + "a1": { + "type": "i32" + } + }, + "return_type": { + "type": "*const u8", + "is_unsafe_pointer": true + } } } diff --git a/rules/cstring/src.c b/rules/cstring/src.c index d6397c77..24cbd0e5 100644 --- a/rules/cstring/src.c +++ b/rules/cstring/src.c @@ -10,3 +10,5 @@ void *f2(void *dst, int c, size_t n) { return memset(dst, c, n); } int f3(const void *s1, const void *s2, size_t n) { return memcmp(s1, s2, n); } void *f4(void *dst, const void *src, size_t n) { return memmove(dst, src, n); } + +char *f5(const char *a0, int a1) { return strchr(a0, a1); } diff --git a/rules/cstring/src.cpp b/rules/cstring/src.cpp new file mode 100644 index 00000000..644c3ded --- /dev/null +++ b/rules/cstring/src.cpp @@ -0,0 +1,6 @@ +// Copyright (c) 2022-present INESC-ID. +// Distributed under the MIT license that can be found in the LICENSE file. + +#include + +const char *f6(const char *a0, int a1) { return strchr(a0, a1); } diff --git a/rules/cstring/tgt_unsafe.rs b/rules/cstring/tgt_unsafe.rs index b399dab5..8e0ab804 100644 --- a/rules/cstring/tgt_unsafe.rs +++ b/rules/cstring/tgt_unsafe.rs @@ -35,3 +35,11 @@ unsafe fn f4(a0: *mut u8, a1: *const u8, a2: usize) -> *mut u8 { } a0 } + +unsafe fn f5(a0: *const u8, a1: i32) -> *mut u8 { + libc::strchr(a0 as *const i8, a1) as *mut u8 +} + +unsafe fn f6(a0: *const u8, a1: i32) -> *const u8 { + libc::strchr(a0 as *const i8, a1) as *const u8 +} From 2e065e10569928394bd3df8d4c752f216e1500f1 Mon Sep 17 00:00:00 2001 From: Lucian Popescu Date: Wed, 13 May 2026 14:33:02 +0100 Subject: [PATCH 3/6] Use string.h instead of string --- rules/cstring/src.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/cstring/src.cpp b/rules/cstring/src.cpp index 644c3ded..3e1c77cc 100644 --- a/rules/cstring/src.cpp +++ b/rules/cstring/src.cpp @@ -1,6 +1,6 @@ // Copyright (c) 2022-present INESC-ID. // Distributed under the MIT license that can be found in the LICENSE file. -#include +#include const char *f6(const char *a0, int a1) { return strchr(a0, a1); } From 2e4b246b135f8dc9612db02a5c4f9506d7ecbbef Mon Sep 17 00:00:00 2001 From: Lucian Popescu Date: Wed, 13 May 2026 15:30:11 +0100 Subject: [PATCH 4/6] Add strchr C and C++ tests --- tests/unit/out/unsafe/strchr_c.rs | 21 +++++++++++++++++++++ tests/unit/out/unsafe/strchr_cpp.rs | 21 +++++++++++++++++++++ tests/unit/strchr_c.c | 12 ++++++++++++ tests/unit/strchr_cpp.cpp | 12 ++++++++++++ 4 files changed, 66 insertions(+) create mode 100644 tests/unit/out/unsafe/strchr_c.rs create mode 100644 tests/unit/out/unsafe/strchr_cpp.rs create mode 100644 tests/unit/strchr_c.c create mode 100644 tests/unit/strchr_cpp.cpp diff --git a/tests/unit/out/unsafe/strchr_c.rs b/tests/unit/out/unsafe/strchr_c.rs new file mode 100644 index 00000000..186de27e --- /dev/null +++ b/tests/unit/out/unsafe/strchr_c.rs @@ -0,0 +1,21 @@ +extern crate libc; +use libc::*; +extern crate libcc2rs; +use libcc2rs::*; +use std::collections::BTreeMap; +use std::io::{Read, Seek, Write}; +use std::os::fd::{AsFd, FromRawFd, IntoRawFd}; +use std::rc::Rc; +pub fn main() { + unsafe { + std::process::exit(main_0() as i32); + } +} +unsafe fn main_0() -> i32 { + let mut s: *const u8 = (b"hello world\0".as_ptr().cast_mut()).cast_const(); + let mut r: *mut u8 = libc::strchr(s as *const i8, ('w' as i32)) as *mut u8; + assert!((((!((r).is_null())) as i32) != 0)); + assert!((((((*r) as i32) == ('w' as i32)) as i32) != 0)); + assert!(((((libc::strchr(s as *const i8, ('z' as i32)) as *mut u8).is_null()) as i32) != 0)); + return 0; +} diff --git a/tests/unit/out/unsafe/strchr_cpp.rs b/tests/unit/out/unsafe/strchr_cpp.rs new file mode 100644 index 00000000..309f23e6 --- /dev/null +++ b/tests/unit/out/unsafe/strchr_cpp.rs @@ -0,0 +1,21 @@ +extern crate libc; +use libc::*; +extern crate libcc2rs; +use libcc2rs::*; +use std::collections::BTreeMap; +use std::io::{Read, Seek, Write}; +use std::os::fd::{AsFd, FromRawFd, IntoRawFd}; +use std::rc::Rc; +pub fn main() { + unsafe { + std::process::exit(main_0() as i32); + } +} +unsafe fn main_0() -> i32 { + let mut s: *const u8 = b"hello world\0".as_ptr(); + let mut r: *const u8 = libc::strchr(s as *const i8, (('w' as u8) as i32)) as *const u8; + assert!(!((r).is_null())); + assert!((((*r) as i32) == (('w' as u8) as i32))); + assert!((libc::strchr(s as *const i8, (('z' as u8) as i32)) as *const u8).is_null()); + return 0; +} diff --git a/tests/unit/strchr_c.c b/tests/unit/strchr_c.c new file mode 100644 index 00000000..eb23549b --- /dev/null +++ b/tests/unit/strchr_c.c @@ -0,0 +1,12 @@ +// no-compile: refcount +#include +#include + +int main() { + const char *s = "hello world"; + char *r = strchr(s, 'w'); + assert(r != NULL); + assert(*r == 'w'); + assert(strchr(s, 'z') == NULL); + return 0; +} diff --git a/tests/unit/strchr_cpp.cpp b/tests/unit/strchr_cpp.cpp new file mode 100644 index 00000000..4a9e5be5 --- /dev/null +++ b/tests/unit/strchr_cpp.cpp @@ -0,0 +1,12 @@ +// no-compile: refcount +#include +#include + +int main() { + const char *s = "hello world"; + const char *r = strchr(s, 'w'); + assert(r != NULL); + assert(*r == 'w'); + assert(strchr(s, 'z') == NULL); + return 0; +} From 89c325c1e88b1417fdd467ac4662f9550f7829fb Mon Sep 17 00:00:00 2001 From: Lucian Popescu Date: Wed, 13 May 2026 15:34:44 +0100 Subject: [PATCH 5/6] Update tests --- tests/unit/out/unsafe/strchr_c.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/out/unsafe/strchr_c.rs b/tests/unit/out/unsafe/strchr_c.rs index 186de27e..efce259d 100644 --- a/tests/unit/out/unsafe/strchr_c.rs +++ b/tests/unit/out/unsafe/strchr_c.rs @@ -12,7 +12,7 @@ pub fn main() { } } unsafe fn main_0() -> i32 { - let mut s: *const u8 = (b"hello world\0".as_ptr().cast_mut()).cast_const(); + let mut s: *const u8 = b"hello world\0".as_ptr().cast_mut().cast_const(); let mut r: *mut u8 = libc::strchr(s as *const i8, ('w' as i32)) as *mut u8; assert!((((!((r).is_null())) as i32) != 0)); assert!((((((*r) as i32) == ('w' as i32)) as i32) != 0)); From 5cce31e49f187beed68e4ce213408a4648332968 Mon Sep 17 00:00:00 2001 From: Lucian Popescu Date: Wed, 13 May 2026 20:07:56 +0100 Subject: [PATCH 6/6] Replace find + assign with try_emplace --- cpp2rust/cpp_rule_preprocessor.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp2rust/cpp_rule_preprocessor.cpp b/cpp2rust/cpp_rule_preprocessor.cpp index ccf0fb46..957a6c83 100644 --- a/cpp2rust/cpp_rule_preprocessor.cpp +++ b/cpp2rust/cpp_rule_preprocessor.cpp @@ -774,13 +774,12 @@ int main(int argc, char *argv[]) { llvm::json::Object file_root; cpp2rust::Extract(path, file_root); for (auto &[k, v] : file_root) { - if (root.find(k) != root.end()) { + if (!root.try_emplace(k, std::move(v)).second) { llvm::errs() << "ERROR: rule name " << k.str() << " defined in multiple files in " << dir.string() << '\n'; return EXIT_FAILURE; } - root[k] = std::move(v); } }