diff --git a/.gitmodules b/.gitmodules index 6b4835a..90b49f5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,7 @@ path = external/CommonLibSSE-NG url = https://github.com/alandtse/CommonLibVR.git branch = ng +[submodule "external/rpmalloc"] + path = external/rpmalloc + url = https://github.com/mjansson/rpmalloc.git + branch = main diff --git a/CMakeLists.txt b/CMakeLists.txt index e11b8a3..d9212c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,8 @@ -cmake_minimum_required(VERSION 3.21) +cmake_minimum_required(VERSION 3.30) + +if(POLICY CMP0167) + cmake_policy(SET CMP0167 NEW) +endif() macro(set_from_environment VARIABLE) if (NOT DEFINED ${VARIABLE} AND DEFINED ENV{${VARIABLE}}) @@ -11,7 +15,7 @@ include(cmake/version.cmake) project( EngineFixes VERSION ${VERSION} - LANGUAGES CXX + LANGUAGES C CXX ) add_compile_definitions(SKYRIM) @@ -45,12 +49,7 @@ add_subdirectory(${CommonLibPath} ${CommonLibName} EXCLUDE_FROM_ALL) find_package(spdlog CONFIG REQUIRED) find_package(AutoTOML REQUIRED CONFIG) -find_package(Boost - MODULE - REQUIRED - COMPONENTS - regex -) +find_package(boost_regex CONFIG REQUIRED) find_package(TBB REQUIRED CONFIG @@ -77,6 +76,7 @@ add_library( ${PROJECT_NAME} SHARED ${SOURCES} + external/rpmalloc/rpmalloc/rpmalloc.c ${CMAKE_CURRENT_BINARY_DIR}/include/Version.h ${CMAKE_CURRENT_BINARY_DIR}/version.rc ${PROJECT_NAME}.toml @@ -84,6 +84,9 @@ add_library( .editorconfig ) +# TODO: flags +set_source_files_properties( external/rpmalloc/rpmalloc/rpmalloc.c PROPERTIES COMPILE_FLAGS "/D \"_UNICODE\" /D \"UNICODE\" /std:c17 /Zi /Oi /Oy- /GS- /Qpar- /fp:fast /fp:except- /Zc:forScope /Zc:wchar_t /GR- /openmp- /W4 /WX /wd4201 /wd4100 /Gm- /Ob2 /Ot /GT /GL /GF /O2 /D\"BUILD_RELEASE=1\"") + target_compile_features( ${PROJECT_NAME} PRIVATE @@ -95,6 +98,7 @@ target_include_directories( PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/src + "external/rpmalloc/rpmalloc" ) target_link_libraries( @@ -106,6 +110,16 @@ target_link_libraries( TBB::tbb TBB::tbbmalloc xbyak::xbyak +# ${CMAKE_CURRENT_SOURCE_DIR}/external/rpmalloc/lib/windows/release/x86-64/rpmalloc.lib +) +target_compile_definitions( + ${PROJECT_NAME} + PRIVATE + ENABLE_PRELOAD + ENABLE_VALIDATE_ARGS + ENABLE_ASSERTS + ENABLE_THREAD_CACHE=1 +# ENABLE_STATISTICS ) if (MSVC) @@ -120,7 +134,7 @@ endif() target_precompile_headers( ${PROJECT_NAME} PRIVATE - src/PCH.h + "$<$:${CMAKE_CURRENT_SOURCE_DIR}/src/PCH.h>" ) option(COPY_BUILD "whether we should copy the outputs to the skyrim dir" OFF) diff --git a/CMakePresets.json b/CMakePresets.json index f3f81f0..116dec0 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -23,9 +23,9 @@ } }, { - "environment": { - "CXXFLAGS": "/MP /W4 /WX /external:anglebrackets /external:W0 $penv{CXXFLAGS}" - }, + "environment": { + "CXXFLAGS": "/MP /W4 /WX /external:anglebrackets /external:W0 $penv{CXXFLAGS}" + }, "generator": "Visual Studio 17 2022", "hidden": true, "name": "vs2022" diff --git a/external/rpmalloc b/external/rpmalloc new file mode 160000 index 0000000..9351765 --- /dev/null +++ b/external/rpmalloc @@ -0,0 +1 @@ +Subproject commit 9351765a98dfe8ed341df0a7f00dc756379c7141 diff --git a/src/PCH.h b/src/PCH.h index 0e1e673..3207588 100644 --- a/src/PCH.h +++ b/src/PCH.h @@ -72,7 +72,6 @@ #include #include -#include #include #ifndef NDEBUG diff --git a/src/main.cpp b/src/main.cpp index 2792aa4..5a53549 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,6 +4,7 @@ #include "utils.h" #include "version.h" #include "warnings.h" +#include inline constexpr REL::Version RUNTIME_1_6_1170(1, 6, 1170, 0); @@ -39,6 +40,25 @@ void MessageHandler(SKSE::MessagingInterface::Message* a_msg) if (*config::warnRefHandleLimit) warnings::WarnActiveRefrHandleCount(static_cast(*config::warnRefrLoadedGameLimit)); + break; + case SKSE::MessagingInterface::kSaveGame: + if (*config::patchMemoryManager) { + /* + FILE* fout; + auto path = logger::log_directory(); + if (!path) + stl::report_and_fail("failed to get standard log path"sv); + + *path /= "EngineFixes_rpmalloc_stats.log"sv; + if ((fout = fopen(path->string().c_str(), "w")) != NULL) + { + rpmalloc_dump_statistics(fout); + fclose(fout); + } + */ + patches::WriteMemoryManagerStats(); + } + break; default: break; diff --git a/src/patches.h b/src/patches.h index 1145542..c731ca5 100644 --- a/src/patches.h +++ b/src/patches.h @@ -17,6 +17,7 @@ namespace patches bool PatchWaterflowAnimation(); bool PatchMemoryManager(); + void WriteMemoryManagerStats(); bool PatchSafeExit(); bool PatchSaveGameMaxSize(); bool PatchScaleformAllocator(); diff --git a/src/patches/formcaching.cpp b/src/patches/formcaching.cpp index 6fb4731..e2f7237 100644 --- a/src/patches/formcaching.cpp +++ b/src/patches/formcaching.cpp @@ -48,6 +48,26 @@ namespace patches if (globalFormCacheMap[masterId].find(accessor, baseId)) { formPointer = accessor->second; + if (masterId == 0xFF) + { + RE::TESForm* formPointerGlobalTable = nullptr; + GlobalFormTableLock->LockForRead(); + + if (*GlobalFormTable) + { + auto iter = (*GlobalFormTable)->find(FormId); + formPointerGlobalTable = (iter != (*GlobalFormTable)->end()) ? iter->second : nullptr; + } + + GlobalFormTableLock->UnlockForRead(); + + if (formPointerGlobalTable == nullptr) + { + logger::trace("debug hk_GetFormByID from cache for {:08X} is {}, but in globalTable is {}"sv, FormId, (formPointer == nullptr) ? "nullptr" : "Form", (formPointerGlobalTable == nullptr) ? "nullptr" : "Form"); + //UpdateFormCache(FormId, formPointerGlobalTable, true); + return formPointerGlobalTable; + } + } return formPointer; } } @@ -201,8 +221,8 @@ namespace patches origFunc2HookAddr.address(), reinterpret_cast(UnknownFormFunction2)); } - - logger::trace("done"sv); + bool isTbbScalableAllocator = (globalFormCacheMap[0].get_allocator().allocator_type() == globalFormCacheMap[0].get_allocator().scalable) ? true : false; + logger::trace("done. isTbbScalableAllocator: {}"sv, isTbbScalableAllocator); logger::trace("success"sv); diff --git a/src/patches/memorymanager.cpp b/src/patches/memorymanager.cpp index 035c7e4..b083a8e 100644 --- a/src/patches/memorymanager.cpp +++ b/src/patches/memorymanager.cpp @@ -1,6 +1,8 @@ #include "version.h" #include "offsets.h" +#include +#include namespace { @@ -78,34 +80,119 @@ namespace } } +#if ENABLE_STATISTICS + namespace MemoryManagerStats + { + struct + { + struct + { + std::chrono::nanoseconds total = 0ns; + unsigned long count = 0; + std::mutex mutex; + } Allocate; + struct + { + std::chrono::nanoseconds total = 0ns; + unsigned long count = 0; + std::mutex mutex; + } Deallocate; + struct + { + std::chrono::nanoseconds total = 0ns; + unsigned long count = 0; + std::mutex mutex; + } Reallocate; + } Stats; + } +#endif + namespace MemoryManager { void* Allocate(RE::MemoryManager*, std::size_t a_size, std::uint32_t a_alignment, bool a_alignmentRequired) { + void* ret = g_trash; +#if ENABLE_STATISTICS + auto start = std::chrono::steady_clock::now(); +#endif + //logger::info("MemoryManager::Allocate START"); if (a_size > 0) - return a_alignmentRequired ? - scalable_aligned_malloc(a_size, a_alignment) : - scalable_malloc(a_size); - else - return g_trash; + { + //rpmalloc_thread_initialize(); + ret = a_alignmentRequired ? + rpaligned_alloc(a_alignment, a_size) : // rpaligned_alloc(a_alignment, a_size) : // scalable_aligned_malloc(a_size, a_alignment) + rpmalloc(a_size); // rpmalloc(a_size); // scalable_malloc(a_size) + } + //logger::info("MemoryManager::Allocate END"); + if (ret == 0) { + logger::error("MemoryManager::Allocate ret: 0; a_size: {}, a_alignment: {}, a_alignmentRequired: {}"sv, a_size, a_alignment, a_alignmentRequired); + } +#if ENABLE_STATISTICS + std::lock_guard guard(MemoryManagerStats::Stats.Allocate.mutex); + auto end = std::chrono::steady_clock::now(); + if (MemoryManagerStats::Stats.Allocate.count < (ULONG_MAX - 10)) { + MemoryManagerStats::Stats.Allocate.count++; + MemoryManagerStats::Stats.Allocate.total += end - start; + } +#endif + return ret; } void Deallocate(RE::MemoryManager*, void* a_mem, bool a_alignmentRequired) { +#if ENABLE_STATISTICS + auto start = std::chrono::steady_clock::now(); +#endif + //logger::info("MemoryManager::Deallocate START"); if (a_mem != g_trash) + //rpmalloc_thread_initialize(); a_alignmentRequired ? - scalable_aligned_free(a_mem) : - scalable_free(a_mem); + rpfree(a_mem) : //rpfree(a_mem) : // scalable_aligned_free(a_mem) + rpfree(a_mem); // rpfree(a_mem); // scalable_free(a_mem) + //logger::info("MemoryManager::Deallocate END"); +#if ENABLE_STATISTICS + std::lock_guard guard(MemoryManagerStats::Stats.Deallocate.mutex); + auto end = std::chrono::steady_clock::now(); + if (MemoryManagerStats::Stats.Deallocate.count < (ULONG_MAX - 10)) + { + MemoryManagerStats::Stats.Deallocate.count++; + MemoryManagerStats::Stats.Deallocate.total += end - start; + } +#endif } void* Reallocate(RE::MemoryManager* a_self, void* a_oldMem, std::size_t a_newSize, std::uint32_t a_alignment, bool a_alignmentRequired) { + void* ret = g_trash; +#if ENABLE_STATISTICS + auto start = std::chrono::steady_clock::now(); +#endif + //logger::info("MemoryManager::Reallocate START"); if (a_oldMem == g_trash) - return Allocate(a_self, a_newSize, a_alignment, a_alignmentRequired); + ret = Allocate(a_self, a_newSize, a_alignment, a_alignmentRequired); else - return a_alignmentRequired ? - scalable_aligned_realloc(a_oldMem, a_newSize, a_alignment) : - scalable_realloc(a_oldMem, a_newSize); + { + //rpmalloc_thread_initialize(); + ret = a_alignmentRequired ? + rpaligned_realloc(a_oldMem, a_alignment, a_newSize, rpmalloc_usable_size(a_oldMem), 0) : //rpaligned_realloc(a_oldMem, a_alignment, a_newSize, rpmalloc_usable_size(a_oldMem), 0) : //scalable_aligned_realloc(a_oldMem, a_newSize, a_alignment) : + rprealloc(a_oldMem, a_newSize); //rprealloc(a_oldMem, a_newSize); // scalable_realloc(a_oldMem, a_newSize); + } + + //logger::info("MemoryManager::Reallocate END"); + if (ret == 0) + { + logger::error("MemoryManager::Reallocate ret: 0; a_newSize: {}, a_alignment: {}, a_alignmentRequired: {}"sv, a_newSize, a_alignment, a_alignmentRequired); + } +#if ENABLE_STATISTICS + std::lock_guard guard(MemoryManagerStats::Stats.Reallocate.mutex); + auto end = std::chrono::steady_clock::now(); + if (MemoryManagerStats::Stats.Reallocate.count < (ULONG_MAX - 10)) + { + MemoryManagerStats::Stats.Reallocate.count++; + MemoryManagerStats::Stats.Reallocate.total += end - start; + } +#endif + return ret; } void ReplaceAllocRoutines() @@ -145,7 +232,12 @@ namespace { std::size_t hk_msize(void* a_ptr) { - return scalable_msize(a_ptr); + logger::info("msize::hk_msize START"); + //rpmalloc_thread_initialize(); + //return scalable_msize(a_ptr); + auto a = rpmalloc_usable_size(a_ptr); + logger::info("msize::hk_msize END"); + return a; } void Install() @@ -158,9 +250,18 @@ namespace { void* Allocate(RE::ScrapHeap*, std::size_t a_size, std::size_t a_alignment) { - return a_size > 0 ? - scalable_aligned_malloc(a_size, a_alignment) : - g_trash; + //logger::info("ScrapHeap::Allocate START"); + //rpmalloc_thread_initialize(); + void* ret = a_size > 0 ? + rpaligned_alloc(a_alignment, a_size) : // scalable_aligned_malloc(a_size, a_alignment) : + g_trash; + //logger::info("ScrapHeap::Allocate END"); + + if (ret == 0) + { + logger::error("ScrapHeap::Allocate ret: 0; a_size: {}, a_alignment: {}"sv, a_size, a_alignment); + } + return ret; } RE::ScrapHeap* Ctor(RE::ScrapHeap* a_this) @@ -172,8 +273,12 @@ namespace void Deallocate(RE::ScrapHeap*, void* a_mem) { + //rpmalloc_thread_initialize(); + //logger::info("ScrapHeap::Deallocate START"); if (a_mem != g_trash) - scalable_aligned_free(a_mem); + rpfree(a_mem); // scalable_aligned_free(a_mem); + //logger::info("ScrapHeap::Deallocate END"); + } void WriteHooks() @@ -223,12 +328,24 @@ namespace namespace patches { + + void ErrorCallback(const char* message) + { + logger::error("MemoryManager::Error {}"sv, message); + throw message; + } + bool PatchMemoryManager() { logger::trace("- memory manager patch -"sv); g_trash = new std::byte[1u << 10]{ static_cast(0) }; + //rpmalloc_config_t config{}; + //config.error_callback = &ErrorCallback; + + //rpmalloc_initialize_config(&config); + AutoScrapBuffer::Install(); MemoryManager::Install(); msize::Install(); @@ -237,4 +354,17 @@ namespace patches logger::trace("success"sv); return true; } + + void WriteMemoryManagerStats() + { +#if ENABLE_STATISTICS + std::lock_guard guard(MemoryManagerStats::Stats.Allocate.mutex); + std::lock_guard guard1(MemoryManagerStats::Stats.Deallocate.mutex); + std::lock_guard guard2(MemoryManagerStats::Stats.Reallocate.mutex); + + logger::trace("Allocate: {} ns. Total: {} ns, Count: {}"sv, (MemoryManagerStats::Stats.Allocate.total / MemoryManagerStats::Stats.Allocate.count).count(), MemoryManagerStats::Stats.Allocate.total.count(), MemoryManagerStats::Stats.Allocate.count); + logger::trace("Deallocate: {} ns. Total: {} ns, Count: {}"sv, (MemoryManagerStats::Stats.Deallocate.total / MemoryManagerStats::Stats.Deallocate.count).count(), MemoryManagerStats::Stats.Deallocate.total.count(), MemoryManagerStats::Stats.Deallocate.count); + logger::trace("Reallocate: {} ns. Total: {} ns, Count: {}"sv, (MemoryManagerStats::Stats.Reallocate.total / MemoryManagerStats::Stats.Reallocate.count).count(), MemoryManagerStats::Stats.Reallocate.total.count(), MemoryManagerStats::Stats.Reallocate.count); +#endif + } }