Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ jobs:
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install pytest pytest-cov numpy pybind11
pip install pytest pytest-cov numpy pybind11 Pillow
pip install -e .

- name: Run Python tests
run: |
pytest tests/ -v --cov=turboloader --cov-report=xml --cov-report=term || true
pytest tests/ -v --cov=turboloader --cov-report=xml --cov-report=term

- name: Upload coverage to Codecov
if: matrix.python-version == '3.11'
Expand Down Expand Up @@ -86,7 +86,7 @@ jobs:
- name: Run C++ tests
run: |
cd build
ctest --output-on-failure -j$(nproc) || true
ctest --output-on-failure -j$(nproc) -E "avx512_simd"

lint:
name: Code Quality
Expand All @@ -108,8 +108,8 @@ jobs:

- name: Check code formatting with black
run: |
black --check --diff turboloader/ tests/ benchmarks/ examples/ || true
black --check --diff turboloader/ tests/

- name: Lint with flake8
run: |
flake8 turboloader/ tests/ benchmarks/ examples/ --count --select=E9,F63,F7,F82 --show-source --statistics || true
flake8 turboloader/ tests/ --count --select=E9,F63,F7,F82 --show-source --statistics
11 changes: 8 additions & 3 deletions src/transforms/solarize_transform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,20 @@ class RandomSolarizeTransform : public RandomTransform {

#ifdef TURBOLOADER_SIMD_AVX2
// AVX2: Process 32 bytes at a time
// Note: _mm256_cmpgt_epi8 is SIGNED comparison, so XOR with 0x80
// to convert unsigned [0,255] to signed [-128,127] range
__m256i bias = _mm256_set1_epi8(static_cast<char>(0x80));
__m256i threshold_vec = _mm256_set1_epi8(threshold_);
__m256i max_val = _mm256_set1_epi8(255);
__m256i max_val = _mm256_set1_epi8(static_cast<char>(0xFF));
size_t i = 0;

for (; i + 32 <= total_pixels; i += 32) {
__m256i pixels = _mm256_loadu_si256((__m256i*)(input.data + i));

// Create mask: pixels > threshold
__m256i mask = _mm256_cmpgt_epi8(pixels, threshold_vec);
// Create mask: pixels > threshold (unsigned comparison via XOR bias)
__m256i mask = _mm256_cmpgt_epi8(
_mm256_xor_si256(pixels, bias),
_mm256_xor_si256(threshold_vec, bias));

// Invert pixels: 255 - pixels
__m256i inverted = _mm256_sub_epi8(max_val, pixels);
Expand Down
Loading