Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
7abb9e2
fix some
JalinWang Feb 9, 2026
c74f276
fix platform related
JalinWang Feb 10, 2026
7fb10c4
hnsw VLAs
JalinWang Feb 10, 2026
72f7214
hnsw sparse VLAs
JalinWang Feb 10, 2026
89f02f5
flexible array member
JalinWang Feb 10, 2026
8dc698b
type define
JalinWang Feb 10, 2026
5521744
vla
JalinWang Feb 10, 2026
88de5da
vla + uint8_t
JalinWang Feb 10, 2026
8b3b3ef
__VA_ARGS__
JalinWang Feb 10, 2026
e8dc871
iota
JalinWang Feb 10, 2026
673cf8e
ssize
JalinWang Feb 10, 2026
431b541
sum4
JalinWang Feb 10, 2026
66a8c56
fixup! hnsw VLAs
JalinWang Mar 10, 2026
28d8c68
win header, file/time/sleep implement
JalinWang Mar 11, 2026
3255327
func overload
JalinWang Mar 11, 2026
228dfae
vla
JalinWang Mar 11, 2026
402f2a1
TODO: should just support std::uniform_int_distribution<int8_t> inste…
JalinWang Mar 11, 2026
fb8511f
uint -> unsigned int
JalinWang Mar 11, 2026
828e457
TODO: unknown reason
JalinWang Mar 11, 2026
83a6423
TODO: skip tests
JalinWang Mar 11, 2026
c26de94
TODO: may revert
JalinWang Mar 11, 2026
60da06e
TODO: rocksdb can't find lz4, could be optimized
JalinWang Mar 11, 2026
3a267a0
third party Cmakelist: naming, compiling options
JalinWang Mar 11, 2026
c575663
TODO: designated init & c++ 20, may revert
JalinWang Mar 11, 2026
539dc20
fix compiler options
JalinWang Mar 11, 2026
c92a51c
TODO: ignore some warning
JalinWang Mar 11, 2026
de9724b
TODO: to verify sparsehash's patch
JalinWang Mar 11, 2026
3dab143
msvc debug will validate the boundary
JalinWang Mar 13, 2026
52170b9
fix -1UL: On MSVC, unsigned long is 32-bit, so -1UL is 0xFFFFFFFF no…
JalinWang Mar 13, 2026
47df026
fix flexible structure
JalinWang Mar 13, 2026
1f87aa7
TODO: verify the fix of mmap alignment
JalinWang Mar 13, 2026
d9abe33
fix ctest failure by storage.close
JalinWang Mar 13, 2026
689bb04
fix mutex by atomic: TestDumpIndexAndAdd test uses mutex.unlock() fro…
JalinWang Mar 13, 2026
9b7321f
storage close
JalinWang Mar 13, 2026
d4d4a0b
TODO: unify the file cleanup of UTs
JalinWang Mar 13, 2026
9133a76
TODO: currently replace dll by static to support indexFactory registe…
JalinWang Mar 13, 2026
938e5a5
ignore more
JalinWang Mar 13, 2026
6fb2707
fix antlr4 release
JalinWang Mar 13, 2026
0e427a2
fix: gtest link warning
JalinWang Mar 13, 2026
1948e39
revert c++20
JalinWang Mar 13, 2026
7a5cfb6
platform related code refactor
JalinWang Mar 16, 2026
ed57793
buffer manager cleanup to avoid ctest failure
JalinWang Mar 16, 2026
5c61212
refactor: file remove/test setup
JalinWang Mar 16, 2026
c6e5012
fix
JalinWang Mar 16, 2026
a7e5190
fix memory limit
JalinWang Mar 16, 2026
e7bbbfe
fix segment_helper_test
JalinWang Mar 16, 2026
e9e9c5d
Merge branch 'main' into feat/win-compilation
JalinWang Mar 16, 2026
5c89b6e
fix write_recovery_test
JalinWang Mar 16, 2026
62683bf
fix __m64
JalinWang Mar 16, 2026
cda7187
fix linux ut
JalinWang Mar 16, 2026
26085af
fix write_recovery_test on linux
JalinWang Mar 17, 2026
6795a6c
format
JalinWang Mar 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,36 @@ cmake_policy(SET CMP0077 NEW)
project(zvec)
set(CC_CXX_STANDARD 17)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror=return-type")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Werror=return-type")
if(MSVC)
set(INTTYPES_FORMAT VC7)
add_compile_options(/FS) #handle .pdb
add_compile_options(/EHsc) # def c++ exception behavior
add_compile_options(/Zc:preprocessor)
add_compile_options(/we4716) # -Werror=return-type


###### reduce output length to make vibe coding work better :), which should be removed or solved later
add_compile_options(/wd4267 /wd4244)
add_compile_options(/wd4146) # unary minus operator applied to unsigned type #usage: uint32_t seg_id_{-1U};
add_compile_options(/wd4310) # warning C4310: cast truncates constant value
Comment on lines +16 to +17
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Broad narrowing-conversion warnings suppressed without fix

/wd4267 (conversion from size_t to a smaller type) and /wd4244 (general type narrowing) are silently disabled with a comment acknowledging they need to be resolved. These two warnings regularly surface real runtime bugs (e.g., size_tint truncation on 64-bit data, unintended float→int narrowing). Leaving them suppressed while the PR is merged makes it easy for future changes to introduce silent truncation bugs that are never caught.

Consider replacing the blanket suppression with targeted #pragma warning(suppress: ...) at the specific call sites that are genuinely safe, rather than disabling them project-wide.

add_compile_options(/wd4530) # warning C4530: C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc
add_compile_options(/wd4251) #protobuf
add_compile_options(/wd4245 /wd4334 /wd4702 /wd4305 /wd4099)
add_compile_options(/wd4200) # flexible structure

# //warning C4334: '<<': result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?)
# keeps |= (1 << k);
add_compile_options(/wd4334)
######

#TODO(windows): to be verified
add_definitions(-DARROW_STATIC -DPARQUET_STATIC -DARROW_ACERO_STATIC -DARROW_DS_STATIC -DARROW_COMPUTE_STATIC)


else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror=return-type")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Werror=return-type")
endif()

if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-as-needed")
Expand All @@ -21,6 +49,7 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)

include_directories(${PROJECT_ROOT_DIR}/src/include)
include_directories(${PROJECT_ROOT_DIR}/src)
include_directories(${PROJECT_ROOT_DIR})

option(BUILD_PYTHON_BINDINGS "Build Python bindings using pybind11" OFF)
message(STATUS "BUILD_PYTHON_BINDINGS:${BUILD_PYTHON_BINDINGS}")
Expand Down
34 changes: 33 additions & 1 deletion cmake/bazel.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,11 @@ function(_target_link_libraries _NAME)
get_target_property(ALWAYS_LINK ${LIB} ALWAYS_LINK)
if(ALWAYS_LINK)
list(APPEND LOCAL_RESULT ${LIB})
elseif(MSVC AND TARGET ${LIB}_static)
get_target_property(_SIBLING_AL ${LIB}_static ALWAYS_LINK)
if(_SIBLING_AL)
list(APPEND LOCAL_RESULT ${LIB}_static)
endif()
endif()

get_target_property(DEP_LIBS ${LIB} INTERFACE_LINK_LIBRARIES)
Expand Down Expand Up @@ -652,6 +657,30 @@ function(_target_link_libraries _NAME)

list(REMOVE_DUPLICATES ALL_LIBS_TO_PROCESS)

# On MSVC, each DLL has its own copy of template statics (e.g. Factory
# singletons), so registrations inside a DLL are invisible to the exe.
# Substitute SHARED libs with their ALWAYS_LINK _static counterparts and
# use /WHOLEARCHIVE so all registration code lives in the same module.
if(MSVC)
set(_SUBSTITUTED_LIBS "")
foreach(LIB ${ALL_LIBS_TO_PROCESS})
if(TARGET ${LIB} AND TARGET ${LIB}_static)
get_target_property(_LIB_TYPE ${LIB} TYPE)
get_target_property(_STATIC_AL ${LIB}_static ALWAYS_LINK)
if("${_LIB_TYPE}" STREQUAL "SHARED_LIBRARY" AND _STATIC_AL)
list(APPEND _SUBSTITUTED_LIBS ${LIB}_static)
list(APPEND ALL_ALWAYS_LINK_LIBS ${LIB}_static)
continue()
endif()
endif()
list(APPEND _SUBSTITUTED_LIBS ${LIB})
endforeach()
set(ALL_LIBS_TO_PROCESS ${_SUBSTITUTED_LIBS})
if(ALL_ALWAYS_LINK_LIBS)
list(REMOVE_DUPLICATES ALL_ALWAYS_LINK_LIBS)
endif()
endif()

foreach(LIB ${ALL_LIBS_TO_PROCESS})
if(NOT TARGET ${LIB})
list(APPEND LINK_LIBS ${LIB})
Expand All @@ -672,7 +701,7 @@ function(_target_link_libraries _NAME)
endif()
else()
# Microsoft Visual C++
list(APPEND LINK_LIBS /WHOLEARCHIVE:$<TARGET_FILE:${LIB}>)
list(APPEND MSVC_WHOLEARCHIVE_OPTS /WHOLEARCHIVE:$<TARGET_FILE:${LIB}>)
get_target_property(OTHER_LINK_LIBS ${LIB} INTERFACE_LINK_LIBRARIES)
if(OTHER_LINK_LIBS)
foreach(OTHER_LIB ${OTHER_LINK_LIBS})
Expand All @@ -691,6 +720,9 @@ function(_target_link_libraries _NAME)
endforeach()

target_link_libraries(${_NAME} ${LINK_LIBS})
if(MSVC_WHOLEARCHIVE_OPTS)
target_link_options(${_NAME} PRIVATE ${MSVC_WHOLEARCHIVE_OPTS})
endif()
if(LIBS_DEPS)
add_dependencies(${_NAME} ${LIBS_DEPS})
target_include_directories(${_NAME} PRIVATE "${LIBS_INCS}")
Expand Down
2 changes: 1 addition & 1 deletion cmake/utils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ function(apply_patch_once patch_name target_dir patch_file)

#message(STATUS "Applying patch '${patch_name}' to ${target_dir} ...")
execute_process(
COMMAND patch -p1 -i "${patch_file}"
COMMAND git apply --ignore-space-change --ignore-whitespace "${patch_file}"
WORKING_DIRECTORY "${target_dir}"
RESULT_VARIABLE patch_result
OUTPUT_VARIABLE patch_stdout
Expand Down
17 changes: 11 additions & 6 deletions src/ailego/buffer/buffer_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ struct BufferManager::BufferContext {

~BufferContext() {
if (vector) {
free(vector);
ailego_aligned_free(vector);
}
}

Expand Down Expand Up @@ -256,15 +256,15 @@ bool BufferManager::BufferContext::read_vector() {
}
AILEGO_DEFER([this] { file.close(); });
uint32_t len = id.vector().length;
auto ret = posix_memalign((void **)&vector, 64, len); // 64-byte alignment
if (ret != 0 || vector == nullptr) {
vector = (uint8_t *)ailego_aligned_malloc(len, 64); // 64-byte alignment
if (vector == nullptr) {
LOG_ERROR("Failed to allocate buffer for file[%s]", file_name.c_str());
return false;
}
uint32_t offset = id.vector().offset;
if (file.read(offset, vector, len) != len) {
LOG_ERROR("Failed to read file[%s]", file_name.c_str());
free(vector);
ailego_aligned_free(vector);
vector = nullptr;
return false;
}
Expand Down Expand Up @@ -390,7 +390,7 @@ class BufferManager::BufferPool {
if (victim->id.type == BufferID::TYPE::PARQUET) {
victim->arrow_refs.clear();
} else {
free(victim->vector);
ailego_aligned_free(victim->vector);
victim->vector = nullptr;
}
victim->state = BufferContext::State::IDLE;
Expand Down Expand Up @@ -585,10 +585,15 @@ uint64_t BufferManager::total_size_in_bytes() const {
}


BufferManager::~BufferManager() {
void BufferManager::cleanup() {
for (auto pool : pools_) {
delete pool;
}
pools_.clear();
}

BufferManager::~BufferManager() {
cleanup();
}


Expand Down
4 changes: 3 additions & 1 deletion src/ailego/internal/cpu_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
#include "cpu_features.h"
#include <cstddef>

#if !defined(_MSC_VER) && !defined(__ARM_ARCH)
#if defined(_MSC_VER)
#include <intrin.h>
#elif !defined(__ARM_ARCH)
#include <cpuid.h>
#endif

Expand Down
57 changes: 45 additions & 12 deletions src/ailego/io/file.cc
Original file line number Diff line number Diff line change
Expand Up @@ -639,14 +639,28 @@ ssize_t File::offset(void) const {
}

void *File::MemoryMap(NativeHandle handle, ssize_t off, size_t len, int opts) {
LARGE_INTEGER file_size;
file_size.QuadPart = len;
// Root cause: Windows MapViewOfFile requires the file offset to be aligned to
// the allocation granularity (64 KB), but segment offsets were only
// page-aligned (4 KB). Also, CreateFileMapping was using len instead of
// off + len as the max size.
//
// Fix: Align the view offset down to allocation granularity, adjust the map
// length, and return base + excess. MemoryUnmap recovers the base by rounding
// down to granularity.

SYSTEM_INFO si;
GetSystemInfo(&si);
DWORD granularity = si.dwAllocationGranularity;
ssize_t aligned_off = (off / (ssize_t)granularity) * (ssize_t)granularity;
size_t excess = (size_t)(off - aligned_off);

LARGE_INTEGER max_size;
max_size.QuadPart = off + len;

// Create map object
HANDLE file_mapping = CreateFileMapping(
handle, nullptr,
((opts & File::MMAP_READONLY) ? PAGE_READONLY : PAGE_READWRITE),
file_size.HighPart, file_size.LowPart, nullptr);
max_size.HighPart, max_size.LowPart, nullptr);
ailego_null_if_false(file_mapping != nullptr);

DWORD desired_access = FILE_MAP_READ;
Expand All @@ -656,14 +670,17 @@ void *File::MemoryMap(NativeHandle handle, ssize_t off, size_t len, int opts) {
if (!(opts & File::MMAP_SHARED)) {
desired_access |= FILE_MAP_COPY;
}
file_size.QuadPart = off;

// Map the whole file to memory and close handle
void *addr = MapViewOfFile(file_mapping, desired_access, file_size.HighPart,
file_size.LowPart, 0);
LARGE_INTEGER view_offset;
view_offset.QuadPart = aligned_off;
size_t view_len = len + excess;

void *base = MapViewOfFile(file_mapping, desired_access, view_offset.HighPart,
view_offset.LowPart, view_len);
CloseHandle(file_mapping);

ailego_null_if_false(addr);
ailego_null_if_false(base);
void *addr = (char *)base + excess;
if (opts & File::MMAP_LOCKED) {
VirtualLock(addr, len);
}
Expand All @@ -673,8 +690,17 @@ void *File::MemoryMap(NativeHandle handle, ssize_t off, size_t len, int opts) {
return addr;
}

void *File::MemoryMap(size_t, int) {
return nullptr;
void *File::MemoryMap(size_t len, int opts) {
void *addr =
VirtualAlloc(nullptr, len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
ailego_null_if_false(addr);
if (opts & File::MMAP_LOCKED) {
VirtualLock(addr, len);
}
if (opts & File::MMAP_WARMUP) {
File::MemoryWarmup(addr, len);
}
return addr;
}

void *File::MemoryRemap(void *, size_t, void *, size_t) {
Expand All @@ -683,7 +709,14 @@ void *File::MemoryRemap(void *, size_t, void *, size_t) {

void File::MemoryUnmap(void *addr, size_t /*len*/) {
ailego_return_if_false(addr);
UnmapViewOfFile(addr);
MEMORY_BASIC_INFORMATION mbi;
if (VirtualQuery(addr, &mbi, sizeof(mbi))) {
if (mbi.Type == MEM_MAPPED) {
UnmapViewOfFile(mbi.AllocationBase);
} else {
VirtualFree(mbi.AllocationBase, 0, MEM_RELEASE);
}
}
}

bool File::MemoryFlush(void *addr, size_t /*len*/) {
Expand Down
14 changes: 7 additions & 7 deletions src/ailego/math/distance_matrix_accum_fp16.i
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
__m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0), \
_mm256_extractf128_ps(ymm_sum_0_0, 1)); \
if (qe >= qe_aligned + 2) { \
__m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m))); \
__m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64x(*(const long long *)(m))); \
__m128 xmm_q = _mm_cvtph_ps( \
_mm_shufflelo_epi16(_mm_broadcast_si32(q), _MM_SHUFFLE(1, 1, 0, 0))); \
ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0) \
Expand Down Expand Up @@ -120,8 +120,8 @@
__m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1), \
_mm256_extractf128_ps(ymm_sum_0_1, 1)); \
if (qe >= qe_aligned + 4) { \
__m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m))); \
__m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(q))); \
__m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64x(*(const long long *)(m))); \
__m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi64x(*(const long long *)(q))); \
__m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(2, 2, 0, 0)); \
ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_0) \
xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(3, 3, 1, 1)); \
Expand Down Expand Up @@ -164,7 +164,7 @@
__m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0), \
_mm256_extractf128_ps(ymm_sum_0_0, 1)); \
if (q != qe) { \
__m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m))); \
__m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64x(*(const long long *)(m))); \
__m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi16(*(const short *)(q))); \
ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0) \
} \
Expand Down Expand Up @@ -198,7 +198,7 @@
if (q != qe) { \
__m128 xmm_q_0 = _mm_cvtph_ps(_mm_set1_epi16(*(const short *)(q + 0))); \
__m128 xmm_q_1 = _mm_cvtph_ps(_mm_set1_epi16(*(const short *)(q + 1))); \
__m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m))); \
__m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64x(*(const long long *)(m))); \
MATRIX_VAR_PROC(1, 2, 0, xmm_m, xmm_q, xmm_sum, ACCUM_FP32_STEP_SSE) \
} \
if (((uintptr_t)out & 0xf) == 0) { \
Expand Down Expand Up @@ -233,8 +233,8 @@
__m128 xmm_sum_0_3 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_3), \
_mm256_extractf128_ps(ymm_sum_0_3, 1)); \
if (q != qe) { \
__m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m))); \
__m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(q))); \
__m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64x(*(const long long *)(m))); \
__m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi64x(*(const long long *)(q))); \
__m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(0, 0, 0, 0)); \
ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_0) \
xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(1, 1, 1, 1)); \
Expand Down
8 changes: 4 additions & 4 deletions src/ailego/math/distance_matrix_fp16.i
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@
} \
case 4: { \
__m256 ymm_lhs = _mm256_cvtph_ps( \
_mm_set_epi64((__m64)(_MASK), *((const __m64 *)(lhs)))); \
_mm_set_epi64x((long long)(_MASK), *(const long long *)(lhs))); \
__m256 ymm_rhs = _mm256_cvtph_ps( \
_mm_set_epi64((__m64)(_MASK), *((const __m64 *)(rhs)))); \
_mm_set_epi64x((long long)(_MASK), *(const long long *)(rhs))); \
_PROC(ymm_lhs, ymm_rhs, _RES##_0_0) \
break; \
} \
Expand Down Expand Up @@ -125,7 +125,7 @@
{ \
__m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \
__m256 ymm_q = _mm256_cvtph_ps(_mm_shufflehi_epi16( \
_mm_shufflelo_epi16(_mm_set1_epi64(*(const __m64 *)(q)), \
_mm_shufflelo_epi16(_mm_set1_epi64x(*(const long long *)(q)), \
_MM_SHUFFLE(1, 1, 0, 0)), \
_MM_SHUFFLE(3, 3, 2, 2))); \
_PROC(ymm_m, ymm_q, _RES##_0_0) \
Expand Down Expand Up @@ -155,7 +155,7 @@
//! Iterative process of computing distance (FP16, M=4, N=2)
#define MATRIX_FP16_ITER_4X2_AVX(m, q, _RES, _LOAD, _PROC) \
{ \
__m128i xmm_qi = _mm_set1_epi64(*(const __m64 *)(q)); \
__m128i xmm_qi = _mm_set1_epi64x(*(const long long *)(q)); \
__m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \
__m256 ymm_q_0 = _mm256_cvtph_ps(_mm_shufflehi_epi16( \
_mm_shufflelo_epi16(xmm_qi, _MM_SHUFFLE(0, 0, 0, 0)), \
Expand Down
2 changes: 1 addition & 1 deletion src/ailego/math/inner_product_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ struct MinusInnerProductSparseMatrix {
const void *sparse_value,
std::string &buffer);

static inline float ComputeInnerProductSparseInSegment(
static float ComputeInnerProductSparseInSegment(
uint32_t m_sparse_count, const uint16_t *m_sparse_index,
const ValueType *m_sparse_value, uint32_t q_sparse_count,
const uint16_t *q_sparse_index, const ValueType *q_sparse_value);
Expand Down
Empty file.
2 changes: 1 addition & 1 deletion src/ailego/math/inner_product_matrix_fp16_avx512.cc
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ float InnerProductSparseInSegmentAVX512FP16(uint32_t m_sparse_count,
_mm_loadu_ph(val_start_2 + k)));
}

Float16 __attribute__((aligned(16))) tmp_res[8];
alignas(16) Float16 tmp_res[8];
_mm_store_ph(tmp_res, sum128);
sum += (tmp_res[0] + tmp_res[1] + tmp_res[2] + tmp_res[3] + tmp_res[4] +
tmp_res[5] + tmp_res[6] + tmp_res[7]);
Expand Down
Empty file.
2 changes: 1 addition & 1 deletion src/ailego/math/inner_product_matrix_fp32_sse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ float InnerProductSparseInSegmentSSE(uint32_t m_sparse_count,
_mm_loadu_ps(val_start_2 + k)));
}

float __attribute__((aligned(16))) tmp_res[4];
alignas(16) float tmp_res[4];
_mm_store_ps(tmp_res, sum128);
sum += (tmp_res[0] + tmp_res[1] + tmp_res[2] + tmp_res[3]);
}
Expand Down
Empty file.
2 changes: 1 addition & 1 deletion src/ailego/math/mips_euclidean_distance_matrix_fp32_sse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ float MipsInnerProductSparseInSegmentSSE(uint32_t m_sparse_count,
_mm_loadu_ps(val_start_2 + k)));
}

float __attribute__((aligned(16))) tmp_res[4];
alignas(16) float tmp_res[4];
_mm_store_ps(tmp_res, sum128);
sum += (tmp_res[0] + tmp_res[1] + tmp_res[2] + tmp_res[3]);
}
Expand Down
2 changes: 1 addition & 1 deletion src/ailego/math/norm_matrix_fp16.i
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
} \
case 4: { \
__m256 ymm_m = _mm256_cvtph_ps( \
_mm_set_epi64((__m64)(0ull), *((const __m64 *)(m)))); \
_mm_set_epi64x(0LL, *(const long long *)(m))); \
NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0) \
break; \
} \
Expand Down
Loading
Loading