diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a1287f0b..b4677cef 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -147,7 +147,10 @@ jobs: uses: actions/checkout@v4 - uses: ./.github/actions/setup-nix - name: Build - run: nix build + run: > + nix build + --override-input moon-registry git+https://mooncakes.io/git/index + --override-input moonbit-overlay git+https://github.com/moonbit-community/moonbit-overlay - name: Smoke test run: test -x ./result/bin/bit diff --git a/modules/bit/moon.mod.json b/modules/bit/moon.mod.json index ff68df20..de8a1afd 100644 --- a/modules/bit/moon.mod.json +++ b/modules/bit/moon.mod.json @@ -4,6 +4,7 @@ "deps": { "moonbitlang/async": "0.16.6", "moonbitlang/x": "0.4.40", + "mizchi/simd": "0.4.1", "mizchi/tempfile": "0.1.0", "mizchi/llm": "0.2.2", "mizchi/bitflow": "0.4.0", diff --git a/modules/bit/src/cmd/bit/checkout.mbt b/modules/bit/src/cmd/bit/checkout.mbt index b3322127..4c946241 100644 --- a/modules/bit/src/cmd/bit/checkout.mbt +++ b/modules/bit/src/cmd/bit/checkout.mbt @@ -14,6 +14,7 @@ async fn handle_checkout(args : Array[String]) -> Unit raise Error { let mut track_branch = false let mut detach_head = false let mut quiet = false + let mut autostash = false let pre_separator_targets : Array[String] = [] let post_separator_targets : Array[String] = [] let mut i = 0 @@ -31,6 +32,7 @@ async fn handle_checkout(args : Array[String]) -> Unit raise Error { "-f" | "--force" => force_checkout = true "--detach" => detach_head = true "--orphan" => orphan_branch = true + "-m" | "--merge" => autostash = true "-" => if saw_separator { post_separator_targets.push(arg) @@ -58,6 +60,7 @@ async fn handle_checkout(args : Array[String]) -> Unit raise Error { raise @bitcore.GitError::InvalidObject("No target specified for checkout") } ignore(quiet) + ignore(autostash) // resolved below at branch-switch site if is_bare_repo_dir(root) { raise @bitcore.GitError::InvalidObject( "this operation must be run in a work tree", @@ -204,6 +207,14 @@ async fn handle_checkout(args : Array[String]) -> Unit raise Error { let mut switched_head = false let mut switched_head_id : @bitcore.ObjectId? = None let mut switched_target : String? = None + // -m/--merge: stash uncommitted changes, restore after switch + let mut did_autostash = false + if autostash { + let author = get_author_string() + let timestamp = get_commit_timestamp() + let stash_id = @bitlib.stash_push(fs, fs, root, "", author, timestamp) + did_autostash = stash_id is Some(_) + } if is_path && is_branch { // Ambiguous - default to branch (like git) match checkout_branch_in_use_path(rfs, root, target) { @@ -244,6 +255,9 @@ async fn handle_checkout(args : Array[String]) -> Unit raise Error { _ => () } save_previous_checkout_location(fs, git_dir, previous_location) + if did_autostash { + @bitlib.stash_apply(fs, fs, root, 0, true) + } } } else { let first = resolve_checkout_target(rfs, git_dir, targets[0]) diff --git a/modules/bit/src/cmd/bit/handlers_remote_push_wbtest.mbt b/modules/bit/src/cmd/bit/handlers_remote_push_wbtest.mbt index 884c42c4..b35866c7 100644 --- a/modules/bit/src/cmd/bit/handlers_remote_push_wbtest.mbt +++ b/modules/bit/src/cmd/bit/handlers_remote_push_wbtest.mbt @@ -783,7 +783,7 @@ test "push-lease: remote tracking refname follows pushed remote" { ///| fn serve_lfs_wbtest_sha256_hex(data : Bytes) -> String { - let raw = @bithash.sha256_raw(data) + let raw = @bithash.sha256_bytes(data) let digits = "0123456789abcdef" let out = StringBuilder::new() for b in raw { diff --git a/modules/bit/src/cmd/bit/rev_list.mbt b/modules/bit/src/cmd/bit/rev_list.mbt index 10690f4b..6f6ec7af 100644 --- a/modules/bit/src/cmd/bit/rev_list.mbt +++ b/modules/bit/src/cmd/bit/rev_list.mbt @@ -116,6 +116,7 @@ async fn handle_rev_list(args : Array[String]) -> Unit raise Error { let mut min_parents : Int? = None let mut graph = false let mut no_walk = false + let mut maximal_only = false let refs : Array[String] = [] let excludes : Array[String] = [] let symmetric_ranges : Array[(String, String)] = [] @@ -155,6 +156,7 @@ async fn handle_rev_list(args : Array[String]) -> Unit raise Error { } "--no-walk" | "--no-walk=sorted" | "--no-walk=unsorted" => no_walk = true "--do-walk" => no_walk = false + "--maximal-only" => maximal_only = true "--merges" => min_parents = Some(2) "--no-merges" => max_parents = Some(1) "--min-parents" if i + 1 < args.length() => { @@ -714,6 +716,31 @@ async fn handle_rev_list(args : Array[String]) -> Unit raise Error { result.push(id) } } + // --maximal-only: keep only commits not reachable from any other in result + if maximal_only { + let result_ids = result.copy() + let filtered : Array[@bitcore.ObjectId] = [] + for i2 in 0.. Bytes { + Bytes::makei(len, fn(i) { ((i * 31 + 7) % 251).to_byte() }) +} + +fn main { + let p64 = make_payload(64) + let p1k = make_payload(1024) + let p8k = make_payload(8192) + let p64k = make_payload(65536) + let payloads = [p64, p1k, p8k, p64k] + let mut sink = 0 + for _ in 0..<500 { + for p in payloads { + let h1 = @simdhash.sha1(p) + let h2 = @simdhash.sha256(p) + sink = sink + h1[0].to_int() + h2[0].to_int() + } + } + println(sink) +} diff --git a/modules/bit_hash/bench/cmd/sha_hash/moon.pkg b/modules/bit_hash/bench/cmd/sha_hash/moon.pkg new file mode 100644 index 00000000..fd27579a --- /dev/null +++ b/modules/bit_hash/bench/cmd/sha_hash/moon.pkg @@ -0,0 +1,7 @@ +import { + "mizchi/simd/src/simdhash" @simdhash, +} + +options( + "is-main": true, +) diff --git a/modules/bit_hash/bench/moon.mod.json b/modules/bit_hash/bench/moon.mod.json new file mode 100644 index 00000000..e9828445 --- /dev/null +++ b/modules/bit_hash/bench/moon.mod.json @@ -0,0 +1,8 @@ +{ + "name": "mizchi/sha_bench", + "version": "0.1.0", + "deps": { + "mizchi/simd": "0.4.1" + }, + "source": "cmd" +} diff --git a/modules/bit_hash/moon.mod.json b/modules/bit_hash/moon.mod.json index 4fd2435e..d93cc13a 100644 --- a/modules/bit_hash/moon.mod.json +++ b/modules/bit_hash/moon.mod.json @@ -2,12 +2,17 @@ "name": "mizchi/bit_hash", "version": "0.42.2", "deps": { - "moonbitlang/x": "0.4.40" + "mizchi/simd": "0.4.1" }, "repository": "https://github.com/mizchi/bit-vcs", "license": "Apache-2.0", - "keywords": ["git", "hash", "sha1", "sha256"], + "keywords": [ + "git", + "hash", + "sha1", + "sha256" + ], "description": "Git object hashing primitives (gix-hash equivalent)", "source": "src", "preferred-target": "native" -} +} \ No newline at end of file diff --git a/modules/bit_hash/src/bench_test.mbt b/modules/bit_hash/src/bench_test.mbt index 5345440d..68148fa5 100644 --- a/modules/bit_hash/src/bench_test.mbt +++ b/modules/bit_hash/src/bench_test.mbt @@ -57,6 +57,14 @@ test "bench sha1_raw 64 KiB" (b : @bench.T) { }) } +///| +test "bench sha256_raw 64 bytes" (b : @bench.T) { + b.bench(fn() { + let h = sha256_raw(bench_input_64) + b.keep(h.length()) + }) +} + ///| test "bench sha256_raw 1 KiB" (b : @bench.T) { b.bench(fn() { diff --git a/modules/bit_hash/src/hex.mbt b/modules/bit_hash/src/hex.mbt index 839443e3..8a36762b 100644 --- a/modules/bit_hash/src/hex.mbt +++ b/modules/bit_hash/src/hex.mbt @@ -1,5 +1,29 @@ ///| Common hash/hex helpers. +///| +fn utf8_encode(s : String) -> Bytes { + let buf : Array[Byte] = [] + for c in s { + let cp = c.to_int() + if cp < 0x80 { + buf.push(cp.to_byte()) + } else if cp < 0x800 { + buf.push((0xc0 | (cp >> 6)).to_byte()) + buf.push((0x80 | (cp & 0x3f)).to_byte()) + } else if cp < 0x10000 { + buf.push((0xe0 | (cp >> 12)).to_byte()) + buf.push((0x80 | ((cp >> 6) & 0x3f)).to_byte()) + buf.push((0x80 | (cp & 0x3f)).to_byte()) + } else { + buf.push((0xf0 | (cp >> 18)).to_byte()) + buf.push((0x80 | ((cp >> 12) & 0x3f)).to_byte()) + buf.push((0x80 | ((cp >> 6) & 0x3f)).to_byte()) + buf.push((0x80 | (cp & 0x3f)).to_byte()) + } + } + Bytes::from_array(buf) +} + ///| pub fn short_hex(hex : String, n : Int) -> String { if hex.length() <= n { diff --git a/modules/bit_hash/src/moon.pkg b/modules/bit_hash/src/moon.pkg index 0e152ddd..a0afa172 100644 --- a/modules/bit_hash/src/moon.pkg +++ b/modules/bit_hash/src/moon.pkg @@ -1,6 +1,5 @@ import { - "moonbitlang/core/encoding/utf8" @utf8, - "moonbitlang/x/crypto" @crypto, + "mizchi/simd/src/simdhash" @simdhash, } import { @@ -10,7 +9,13 @@ import { warnings = "-29" options( + "native-stub": [ "sha1_ni.c", "sha256_ni.c" ], targets: { - "bench_test.mbt": [ "native" ], + "sha1_ni_ffi.mbt": [ "native" ], + "sha1_native_impl.mbt": [ "native" ], + "sha256_native_impl.mbt": [ "native" ], + "sha1_impl.mbt": [ "wasm", "wasm-gc", "js" ], + "sha256_impl.mbt": [ "wasm", "wasm-gc", "js" ], + "bench_test.mbt": [ "native" ], }, ) diff --git a/modules/bit_hash/src/sha1.mbt b/modules/bit_hash/src/sha1.mbt index 1eb88886..ea7195f0 100644 --- a/modules/bit_hash/src/sha1.mbt +++ b/modules/bit_hash/src/sha1.mbt @@ -15,24 +15,6 @@ let sha1_h3 : Int = 0x10325476 ///| let sha1_h4 : Int = 0xc3d2e1f0 -///| -let sha1_k0 : Int = 0x5a827999 - -///| -let sha1_k1 : Int = 0x6ed9eba1 - -///| -let sha1_k2 : Int = 0x8f1bbcdc - -///| -let sha1_k3 : Int = 0xca62c1d6 - -///| -fn rotl32(x : Int, n : Int) -> Int { - ((x << n) | (x.reinterpret_as_uint() >> (32 - n)).reinterpret_as_int()) & - 0xffffffff -} - ///| pub struct Sha1State { h : FixedArray[Int] @@ -64,67 +46,6 @@ pub fn Sha1State::reset(self : Sha1State) -> Unit { self.total_len = 0L } -///| -fn Sha1State::process_block(self : Sha1State) -> Unit { - let h = self.h - let w = self.w - let block = self.block - for i = 0; i < 16; i = i + 1 { - w[i] = (block[i * 4].to_int() << 24) | - (block[i * 4 + 1].to_int() << 16) | - (block[i * 4 + 2].to_int() << 8) | - block[i * 4 + 3].to_int() - } - for i in 16..<80 { - w[i] = rotl32(w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16], 1) - } - let mut a = h[0] - let mut b = h[1] - let mut c = h[2] - let mut d = h[3] - let mut e = h[4] - for i = 0; i < 20; i = i + 1 { - let f = (b & c) | (b.lnot() & d) - let temp = (rotl32(a, 5) + f + e + sha1_k0 + w[i]) & 0xffffffff - e = d - d = c - c = rotl32(b, 30) - b = a - a = temp - } - for i = 20; i < 40; i = i + 1 { - let f = b ^ c ^ d - let temp = (rotl32(a, 5) + f + e + sha1_k1 + w[i]) & 0xffffffff - e = d - d = c - c = rotl32(b, 30) - b = a - a = temp - } - for i = 40; i < 60; i = i + 1 { - let f = (b & c) | (b & d) | (c & d) - let temp = (rotl32(a, 5) + f + e + sha1_k2 + w[i]) & 0xffffffff - e = d - d = c - c = rotl32(b, 30) - b = a - a = temp - } - for i = 60; i < 80; i = i + 1 { - let f = b ^ c ^ d - let temp = (rotl32(a, 5) + f + e + sha1_k3 + w[i]) & 0xffffffff - e = d - d = c - c = rotl32(b, 30) - b = a - a = temp - } - h[0] = (h[0] + a) & 0xffffffff - h[1] = (h[1] + b) & 0xffffffff - h[2] = (h[2] + c) & 0xffffffff - h[3] = (h[3] + d) & 0xffffffff - h[4] = (h[4] + e) & 0xffffffff -} ///| pub fn Sha1State::update(self : Sha1State, data : Bytes) -> Unit { @@ -169,7 +90,7 @@ pub fn Sha1State::update_byte(self : Sha1State, b : Byte) -> Unit { ///| pub fn Sha1State::update_string(self : Sha1State, s : String) -> Unit { - self.update(@utf8.encode(s)) + self.update(utf8_encode(s)) } ///| @@ -222,10 +143,6 @@ pub fn sha1_prefix_raw(data : Bytes, len : Int) -> FixedArray[Byte] { state.finish_raw() } -///| -pub fn sha1_raw(data : Bytes) -> FixedArray[Byte] { - sha1_prefix_raw(data, data.length()) -} ///| pub fn sha1_array_prefix_raw(data : Array[Byte], len : Int) -> FixedArray[Byte] { diff --git a/modules/bit_hash/src/sha1_impl.mbt b/modules/bit_hash/src/sha1_impl.mbt new file mode 100644 index 00000000..1fa7a68c --- /dev/null +++ b/modules/bit_hash/src/sha1_impl.mbt @@ -0,0 +1,68 @@ +// SHA-1 fallback for non-native targets (pure MoonBit + @simdhash). + +///| +fn Sha1State::process_block(self : Sha1State) -> Unit { + let h = self.h + let w : FixedArray[Int] = self.w + let block = self.block + for i = 0; i < 16; i = i + 1 { + w[i] = (block[i * 4].to_int() << 24) | + (block[i * 4 + 1].to_int() << 16) | + (block[i * 4 + 2].to_int() << 8) | + block[i * 4 + 3].to_int() + } + for i in 16..<80 { + w[i] = sha1_rotl32(w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16], 1) + } + let mut a = h[0] + let mut b = h[1] + let mut c = h[2] + let mut d = h[3] + let mut e = h[4] + for i = 0; i < 20; i = i + 1 { + let f = (b & c) | (b.lnot() & d) + let temp = (sha1_rotl32(a, 5) + f + e + 0x5a827999 + w[i]) & 0xffffffff + e = d; d = c; c = sha1_rotl32(b, 30); b = a; a = temp + } + for i = 20; i < 40; i = i + 1 { + let f = b ^ c ^ d + let temp = (sha1_rotl32(a, 5) + f + e + 0x6ed9eba1 + w[i]) & 0xffffffff + e = d; d = c; c = sha1_rotl32(b, 30); b = a; a = temp + } + for i = 40; i < 60; i = i + 1 { + let f = (b & c) | (b & d) | (c & d) + let temp = (sha1_rotl32(a, 5) + f + e + 0x8f1bbcdc + w[i]) & 0xffffffff + e = d; d = c; c = sha1_rotl32(b, 30); b = a; a = temp + } + for i = 60; i < 80; i = i + 1 { + let f = b ^ c ^ d + let temp = (sha1_rotl32(a, 5) + f + e + 0xca62c1d6 + w[i]) & 0xffffffff + e = d; d = c; c = sha1_rotl32(b, 30); b = a; a = temp + } + h[0] = (h[0] + a) & 0xffffffff + h[1] = (h[1] + b) & 0xffffffff + h[2] = (h[2] + c) & 0xffffffff + h[3] = (h[3] + d) & 0xffffffff + h[4] = (h[4] + e) & 0xffffffff +} + +///| +fn sha1_rotl32(x : Int, n : Int) -> Int { + ((x << n) | (x.reinterpret_as_uint() >> (32 - n)).reinterpret_as_int()) & + 0xffffffff +} + +///| +pub fn sha1_bytes(data : Bytes) -> Bytes { + @simdhash.sha1(data) +} + +///| +pub fn sha1_raw(data : Bytes) -> FixedArray[Byte] { + let b = @simdhash.sha1(data) + let result : FixedArray[Byte] = FixedArray::make(20, b'\x00') + for i in 0..<20 { + result[i] = b[i] + } + result +} diff --git a/modules/bit_hash/src/sha1_native_impl.mbt b/modules/bit_hash/src/sha1_native_impl.mbt new file mode 100644 index 00000000..fd2b61c2 --- /dev/null +++ b/modules/bit_hash/src/sha1_native_impl.mbt @@ -0,0 +1,19 @@ +// SHA-1 fast path for the native target (C FFI + SHA-NI). + +///| +fn Sha1State::process_block(self : Sha1State) -> Unit { + sha1_process_blocks_ffi(self.h, self.block, 0, 1) +} + +///| +pub fn sha1_bytes(data : Bytes) -> Bytes { + let raw = sha1_raw(data) + Bytes::makei(20, fn(i) { raw[i] }) +} + +///| +pub fn sha1_raw(data : Bytes) -> FixedArray[Byte] { + let state = Sha1State::new() + state.update(data) + state.finish_raw() +} diff --git a/modules/bit_hash/src/sha1_ni.c b/modules/bit_hash/src/sha1_ni.c new file mode 100644 index 00000000..7d4e0618 --- /dev/null +++ b/modules/bit_hash/src/sha1_ni.c @@ -0,0 +1,377 @@ +/* + * SHA-1 acceleration using Intel SHA-NI extensions. + * + * Falls back to a portable C implementation when SHA-NI is not available + * (TCC or older CPUs). The MoonBit caller checks sha1_ni_available() first. + * + * SHA-NI path based on the public-domain algorithm by Sean Gulley / Intel. + */ + +#include +#include +#include + +/* + * Function-level target attributes allow SHA-NI intrinsics with clang/gcc + * even without -msha on the command line. + * TCC doesn't support __attribute__((target(...))), so we fall back there. + */ +#if !defined(__TINYC__) && (defined(__clang__) || defined(__GNUC__)) +# include +# define USE_SHA_NI 1 +# define SHA_NI_TARGET __attribute__((target("sha,sse4.1"))) +#else +# define USE_SHA_NI 0 +# define SHA_NI_TARGET +#endif + +/* ── CPUID runtime detection ──────────────────────────────────────────── */ + +#if USE_SHA_NI +static int sha1_hw_ok = -1; +static int sha1_ni_ok(void) { + if (sha1_hw_ok < 0) + sha1_hw_ok = (__builtin_cpu_supports("sha") != 0) & + (__builtin_cpu_supports("sse4.1") != 0); + return sha1_hw_ok; +} +#endif + +/* ── portable big-endian helpers ──────────────────────────────────────── */ + +static inline uint32_t be32(const uint8_t* p) { + return ((uint32_t)p[0] << 24) | ((uint32_t)p[1] << 16) | + ((uint32_t)p[2] << 8) | (uint32_t)p[3]; +} + +static inline uint32_t rotl32(uint32_t x, int n) { + return (x << n) | (x >> (32 - n)); +} + +/* ── SHA-NI fast path (x86 with SHA extensions) ───────────────────────── */ + +#if USE_SHA_NI + +/* + * Process `num_blocks` 64-byte blocks in-place. + * state[0..4] = {H0,H1,H2,H3,H4} (big-endian word order) + */ +SHA_NI_TARGET +static void sha1_ni_blocks(uint32_t state[5], const uint8_t* data, size_t num_blocks) { + __m128i abcd, e0, e1; + __m128i abcd_save, e_save; + __m128i msg0, msg1, msg2, msg3; + __m128i shuf_mask; + + shuf_mask = _mm_set_epi64x(0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL); + + /* Load initial state */ + abcd = _mm_loadu_si128((__m128i const*)state); + e0 = _mm_set_epi32(state[4], 0, 0, 0); + abcd = _mm_shuffle_epi32(abcd, 0x1b); /* DCBA -> ABCD */ + + while (num_blocks--) { + abcd_save = abcd; + e_save = e0; + + /* Rounds 0-3 */ + msg0 = _mm_loadu_si128((__m128i const*)(data + 0)); + msg0 = _mm_shuffle_epi8(msg0, shuf_mask); + e0 = _mm_add_epi32(e0, msg0); + e1 = abcd; + abcd = _mm_sha1rnds4_epu32(abcd, e0, 0); + + /* Rounds 4-7 */ + msg1 = _mm_loadu_si128((__m128i const*)(data + 16)); + msg1 = _mm_shuffle_epi8(msg1, shuf_mask); + e1 = _mm_sha1nexte_epu32(e1, msg1); + e0 = abcd; + abcd = _mm_sha1rnds4_epu32(abcd, e1, 0); + msg0 = _mm_sha1msg1_epu32(msg0, msg1); + + /* Rounds 8-11 */ + msg2 = _mm_loadu_si128((__m128i const*)(data + 32)); + msg2 = _mm_shuffle_epi8(msg2, shuf_mask); + e0 = _mm_sha1nexte_epu32(e0, msg2); + e1 = abcd; + abcd = _mm_sha1rnds4_epu32(abcd, e0, 0); + msg1 = _mm_sha1msg1_epu32(msg1, msg2); + msg0 = _mm_xor_si128(msg0, msg2); + + /* Rounds 12-15 */ + msg3 = _mm_loadu_si128((__m128i const*)(data + 48)); + msg3 = _mm_shuffle_epi8(msg3, shuf_mask); + e1 = _mm_sha1nexte_epu32(e1, msg3); + e0 = abcd; + msg0 = _mm_sha1msg2_epu32(msg0, msg3); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 0); + msg2 = _mm_sha1msg1_epu32(msg2, msg3); + msg1 = _mm_xor_si128(msg1, msg3); + + /* Rounds 16-19 */ + e0 = _mm_sha1nexte_epu32(e0, msg0); + e1 = abcd; + msg1 = _mm_sha1msg2_epu32(msg1, msg0); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 0); + msg3 = _mm_sha1msg1_epu32(msg3, msg0); + msg2 = _mm_xor_si128(msg2, msg0); + + /* Rounds 20-23 */ + e1 = _mm_sha1nexte_epu32(e1, msg1); + e0 = abcd; + msg2 = _mm_sha1msg2_epu32(msg2, msg1); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 1); + msg0 = _mm_sha1msg1_epu32(msg0, msg1); + msg3 = _mm_xor_si128(msg3, msg1); + + /* Rounds 24-27 */ + e0 = _mm_sha1nexte_epu32(e0, msg2); + e1 = abcd; + msg3 = _mm_sha1msg2_epu32(msg3, msg2); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 1); + msg1 = _mm_sha1msg1_epu32(msg1, msg2); + msg0 = _mm_xor_si128(msg0, msg2); + + /* Rounds 28-31 */ + e1 = _mm_sha1nexte_epu32(e1, msg3); + e0 = abcd; + msg0 = _mm_sha1msg2_epu32(msg0, msg3); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 1); + msg2 = _mm_sha1msg1_epu32(msg2, msg3); + msg1 = _mm_xor_si128(msg1, msg3); + + /* Rounds 32-35 */ + e0 = _mm_sha1nexte_epu32(e0, msg0); + e1 = abcd; + msg1 = _mm_sha1msg2_epu32(msg1, msg0); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 1); + msg3 = _mm_sha1msg1_epu32(msg3, msg0); + msg2 = _mm_xor_si128(msg2, msg0); + + /* Rounds 36-39 */ + e1 = _mm_sha1nexte_epu32(e1, msg1); + e0 = abcd; + msg2 = _mm_sha1msg2_epu32(msg2, msg1); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 1); + msg0 = _mm_sha1msg1_epu32(msg0, msg1); + msg3 = _mm_xor_si128(msg3, msg1); + + /* Rounds 40-43 */ + e0 = _mm_sha1nexte_epu32(e0, msg2); + e1 = abcd; + msg3 = _mm_sha1msg2_epu32(msg3, msg2); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 2); + msg1 = _mm_sha1msg1_epu32(msg1, msg2); + msg0 = _mm_xor_si128(msg0, msg2); + + /* Rounds 44-47 */ + e1 = _mm_sha1nexte_epu32(e1, msg3); + e0 = abcd; + msg0 = _mm_sha1msg2_epu32(msg0, msg3); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 2); + msg2 = _mm_sha1msg1_epu32(msg2, msg3); + msg1 = _mm_xor_si128(msg1, msg3); + + /* Rounds 48-51 */ + e0 = _mm_sha1nexte_epu32(e0, msg0); + e1 = abcd; + msg1 = _mm_sha1msg2_epu32(msg1, msg0); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 2); + msg3 = _mm_sha1msg1_epu32(msg3, msg0); + msg2 = _mm_xor_si128(msg2, msg0); + + /* Rounds 52-55 */ + e1 = _mm_sha1nexte_epu32(e1, msg1); + e0 = abcd; + msg2 = _mm_sha1msg2_epu32(msg2, msg1); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 2); + msg0 = _mm_sha1msg1_epu32(msg0, msg1); + msg3 = _mm_xor_si128(msg3, msg1); + + /* Rounds 56-59 */ + e0 = _mm_sha1nexte_epu32(e0, msg2); + e1 = abcd; + msg3 = _mm_sha1msg2_epu32(msg3, msg2); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 2); + msg1 = _mm_sha1msg1_epu32(msg1, msg2); + msg0 = _mm_xor_si128(msg0, msg2); + + /* Rounds 60-63 */ + e1 = _mm_sha1nexte_epu32(e1, msg3); + e0 = abcd; + msg0 = _mm_sha1msg2_epu32(msg0, msg3); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 3); + msg2 = _mm_sha1msg1_epu32(msg2, msg3); + msg1 = _mm_xor_si128(msg1, msg3); + + /* Rounds 64-67 */ + e0 = _mm_sha1nexte_epu32(e0, msg0); + e1 = abcd; + msg1 = _mm_sha1msg2_epu32(msg1, msg0); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 3); + msg3 = _mm_sha1msg1_epu32(msg3, msg0); + msg2 = _mm_xor_si128(msg2, msg0); + + /* Rounds 68-71 */ + e1 = _mm_sha1nexte_epu32(e1, msg1); + e0 = abcd; + msg2 = _mm_sha1msg2_epu32(msg2, msg1); + abcd = _mm_sha1rnds4_epu32(abcd, e1, 3); + msg3 = _mm_xor_si128(msg3, msg1); + + /* Rounds 72-75 */ + e0 = _mm_sha1nexte_epu32(e0, msg2); + e1 = abcd; + msg3 = _mm_sha1msg2_epu32(msg3, msg2); + abcd = _mm_sha1rnds4_epu32(abcd, e0, 3); + + /* Rounds 76-79 */ + e1 = _mm_sha1nexte_epu32(e1, msg3); + e0 = abcd; + abcd = _mm_sha1rnds4_epu32(abcd, e1, 3); + + /* Combine with saved state */ + e0 = _mm_sha1nexte_epu32(e0, e_save); + abcd = _mm_add_epi32(abcd, abcd_save); + + data += 64; + } + + abcd = _mm_shuffle_epi32(abcd, 0x1b); /* ABCD -> DCBA */ + _mm_storeu_si128((__m128i*)state, abcd); + state[4] = _mm_extract_epi32(e0, 3); +} + +#endif /* USE_SHA_NI */ + +/* ── portable scalar block processor ─────────────────────────────────── */ + +static void sha1_scalar_blocks(uint32_t h[5], const uint8_t* data, size_t num_blocks) { + while (num_blocks--) { + uint32_t w[80]; + for (int i = 0; i < 16; i++) { + w[i] = ((uint32_t)data[i*4] << 24) | + ((uint32_t)data[i*4+1] << 16) | + ((uint32_t)data[i*4+2] << 8) | + (uint32_t)data[i*4+3]; + } + for (int i = 16; i < 80; i++) { + w[i] = rotl32(w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16], 1); + } + uint32_t a = h[0], b = h[1], c = h[2], d = h[3], e = h[4]; + for (int i = 0; i < 20; i++) { + uint32_t f = (b & c) | (~b & d); + uint32_t t = rotl32(a,5) + f + e + 0x5a827999u + w[i]; + e=d; d=c; c=rotl32(b,30); b=a; a=t; + } + for (int i = 20; i < 40; i++) { + uint32_t f = b ^ c ^ d; + uint32_t t = rotl32(a,5) + f + e + 0x6ed9eba1u + w[i]; + e=d; d=c; c=rotl32(b,30); b=a; a=t; + } + for (int i = 40; i < 60; i++) { + uint32_t f = (b & c) | (b & d) | (c & d); + uint32_t t = rotl32(a,5) + f + e + 0x8f1bbcdcu + w[i]; + e=d; d=c; c=rotl32(b,30); b=a; a=t; + } + for (int i = 60; i < 80; i++) { + uint32_t f = b ^ c ^ d; + uint32_t t = rotl32(a,5) + f + e + 0xca62c1d6u + w[i]; + e=d; d=c; c=rotl32(b,30); b=a; a=t; + } + h[0] += a; h[1] += b; h[2] += c; h[3] += d; h[4] += e; + data += 64; + } +} + +/* ── MoonBit-callable entry points ────────────────────────────────────── */ + +/* + * sha1_compute(data, len, out) + * data : FixedArray[Byte] — input (passed as Bytes from MoonBit) + * len : number of bytes to hash + * out : FixedArray[Byte] with at least 20 bytes — receives digest + * + * One-shot SHA-1: handles padding, block processing, and output in C. + * Single FFI call per sha1_raw invocation. + */ +void sha1_compute(const uint8_t* data, int32_t len, uint8_t* out) { + uint32_t state[5] = { + 0x67452301u, 0xefcdab89u, 0x98badcfeu, 0x10325476u, 0xc3d2e1f0u + }; + + /* Process all full blocks from the input directly. */ + int32_t full_blocks = len / 64; + int32_t remainder = len % 64; + +#if USE_SHA_NI +# define SHA1_DISPATCH(st, d, n) \ + (sha1_ni_ok() ? sha1_ni_blocks((st),(d),(n)) : sha1_scalar_blocks((st),(d),(n))) +#else +# define SHA1_DISPATCH(st, d, n) sha1_scalar_blocks((st),(d),(n)) +#endif + + if (full_blocks > 0) { + SHA1_DISPATCH(state, data, (size_t)full_blocks); + } + + /* Build the padding block(s) in a local buffer. */ + uint8_t pad[128]; + memcpy(pad, data + full_blocks * 64, (size_t)remainder); + pad[remainder] = 0x80; + + int32_t pad_len; + if (remainder < 55) { + /* One padding block. */ + memset(pad + remainder + 1, 0, (size_t)(55 - remainder)); + pad_len = 64; + } else { + /* Two padding blocks. */ + memset(pad + remainder + 1, 0, (size_t)(119 - remainder)); + pad_len = 128; + } + + /* Append big-endian bit length at bytes [pad_len-8 .. pad_len-1]. */ + uint64_t bit_len = (uint64_t)len * 8; + pad[pad_len - 8] = (uint8_t)(bit_len >> 56); + pad[pad_len - 7] = (uint8_t)(bit_len >> 48); + pad[pad_len - 6] = (uint8_t)(bit_len >> 40); + pad[pad_len - 5] = (uint8_t)(bit_len >> 32); + pad[pad_len - 4] = (uint8_t)(bit_len >> 24); + pad[pad_len - 3] = (uint8_t)(bit_len >> 16); + pad[pad_len - 2] = (uint8_t)(bit_len >> 8); + pad[pad_len - 1] = (uint8_t)(bit_len ); + + SHA1_DISPATCH(state, pad, (size_t)(pad_len / 64)); + + /* Write digest in big-endian. */ + for (int i = 0; i < 5; i++) { + out[i*4 ] = (uint8_t)(state[i] >> 24); + out[i*4 + 1] = (uint8_t)(state[i] >> 16); + out[i*4 + 2] = (uint8_t)(state[i] >> 8); + out[i*4 + 3] = (uint8_t)(state[i] ); + } +} + +/* + * sha1_process_blocks(h, data, offset, num_blocks) + * h : FixedArray[Int] — 5-word state, updated in-place + * data : FixedArray[Byte] + * offset : byte offset into data + * num_blocks : number of 64-byte blocks to process + * + * Used by Sha1State::update_slice for incremental hashing. + */ +void sha1_process_blocks(int32_t* h, const uint8_t* data, + int32_t offset, int32_t num_blocks) { + uint32_t state[5]; + state[0] = (uint32_t)h[0]; state[1] = (uint32_t)h[1]; + state[2] = (uint32_t)h[2]; state[3] = (uint32_t)h[3]; + state[4] = (uint32_t)h[4]; + + SHA1_DISPATCH(state, data + offset, (size_t)num_blocks); + + h[0] = (int32_t)state[0]; h[1] = (int32_t)state[1]; + h[2] = (int32_t)state[2]; h[3] = (int32_t)state[3]; + h[4] = (int32_t)state[4]; +} diff --git a/modules/bit_hash/src/sha1_ni_ffi.mbt b/modules/bit_hash/src/sha1_ni_ffi.mbt new file mode 100644 index 00000000..8042e486 --- /dev/null +++ b/modules/bit_hash/src/sha1_ni_ffi.mbt @@ -0,0 +1,18 @@ +// FFI declarations for SHA-NI / C SHA-1 and SHA-256 (native target only). + +///| +#borrow(h, data) +extern "C" fn sha1_process_blocks_ffi( + h : FixedArray[Int], + data : FixedArray[Byte], + offset : Int, + num_blocks : Int, +) -> Unit = "sha1_process_blocks" + +///| +#borrow(data, out) +extern "C" fn sha256_compute_ffi( + data : Bytes, + len : Int, + out : FixedArray[Byte], +) -> Unit = "sha256_compute" diff --git a/modules/bit_hash/src/sha1_test.mbt b/modules/bit_hash/src/sha1_test.mbt index 57d56e8e..85827206 100644 --- a/modules/bit_hash/src/sha1_test.mbt +++ b/modules/bit_hash/src/sha1_test.mbt @@ -21,3 +21,39 @@ test "sha1_raw abc" { let got = sha1_raw(Bytes::from_array([b'a', b'b', b'c'])) |> raw_to_hex inspect(got, content="a9993e364706816aba3e25717850c26c9cd0d89d") } + +///| +test "Sha1State: abc" { + let state = Sha1State::new() + state.update(Bytes::from_array([b'a', b'b', b'c'])) + let got = state.finish_raw() |> raw_to_hex + inspect(got, content="a9993e364706816aba3e25717850c26c9cd0d89d") +} + +///| +test "Sha1State: large input matches sha1_raw" { + // Test with a 200-byte input simulating a hub record blob + let long_str = "version 1\nkey hub/proposal/pr/abc12345/meta\nkind pr-proposal\nclock local=1\ntimestamp 1706745600\nnode dave@example.com\ndeleted 0\n\nsome payload content here that makes this longer than 64 bytes total" + let bytes = Bytes::from_array(long_str.to_array().map(fn(c) { c.to_int().to_byte() })) + let expected = sha1_raw(bytes) |> raw_to_hex + let state = Sha1State::new() + state.update(bytes) + let got = state.finish_raw() |> raw_to_hex + inspect(got == expected, content="true") +} + +///| +test "sha1_raw: 35-byte input" { + // printf "blob 29 hub/proposal/pr/abc12345/meta" | sha1sum + let key = "hub/proposal/pr/abc12345/meta" + let header = "blob " + key.length().to_string() + " " + let data : Array[Byte] = [] + for c in header { + data.push(c.to_int().to_byte()) + } + for c in key { + data.push(c.to_int().to_byte()) + } + let got = sha1_raw(Bytes::from_array(data)) |> raw_to_hex + inspect(got, content="11db444b9672b9977348f8f051eb3288d6dbea0c") +} diff --git a/modules/bit_hash/src/sha256.mbt b/modules/bit_hash/src/sha256.mbt index 2615e66d..306bc5d5 100644 --- a/modules/bit_hash/src/sha256.mbt +++ b/modules/bit_hash/src/sha256.mbt @@ -1,23 +1,126 @@ -///| SHA-256 core implementation (hash package) using moonbitlang/x/crypto +///| SHA-256 pure-MoonBit implementation + +let sha256_k : FixedArray[Int] = [ + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, + 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, + 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, + 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, + 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, + 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, + 0xc67178f2, +] + +let sha256_h0 : Int = 0x6a09e667 +let sha256_h1 : Int = 0xbb67ae85 +let sha256_h2 : Int = 0x3c6ef372 +let sha256_h3 : Int = 0xa54ff53a +let sha256_h4 : Int = 0x510e527f +let sha256_h5 : Int = 0x9b05688c +let sha256_h6 : Int = 0x1f83d9ab +let sha256_h7 : Int = 0x5be0cd19 ///| pub struct Sha256State { - inner : @crypto.SHA256 + h : FixedArray[Int] + block : FixedArray[Byte] + w : FixedArray[Int] + mut block_len : Int + mut total_len : Int64 } ///| pub fn Sha256State::new() -> Sha256State { - { inner: @crypto.SHA256::new() } + { + h: [ + sha256_h0, sha256_h1, sha256_h2, sha256_h3, sha256_h4, sha256_h5, sha256_h6, + sha256_h7, + ], + block: FixedArray::make(64, b'\x00'), + w: FixedArray::make(64, 0), + block_len: 0, + total_len: 0L, + } } ///| pub fn Sha256State::reset(self : Sha256State) -> Unit { - self.inner.reset() + self.h[0] = sha256_h0 + self.h[1] = sha256_h1 + self.h[2] = sha256_h2 + self.h[3] = sha256_h3 + self.h[4] = sha256_h4 + self.h[5] = sha256_h5 + self.h[6] = sha256_h6 + self.h[7] = sha256_h7 + self.block_len = 0 + self.total_len = 0L +} + +///| +fn sha256_rotr32(x : Int, n : Int) -> Int { + (x.reinterpret_as_uint() >> n).reinterpret_as_int() | (x << (32 - n)) +} + +///| +fn Sha256State::process_block(self : Sha256State) -> Unit { + let h = self.h + let w = self.w + let block = self.block + for i = 0; i < 16; i = i + 1 { + w[i] = (block[i * 4].to_int() << 24) | + (block[i * 4 + 1].to_int() << 16) | + (block[i * 4 + 2].to_int() << 8) | + block[i * 4 + 3].to_int() + } + for i = 16; i < 64; i = i + 1 { + let s0 = sha256_rotr32(w[i - 15], 7) ^ + sha256_rotr32(w[i - 15], 18) ^ + (w[i - 15].reinterpret_as_uint() >> 3).reinterpret_as_int() + let s1 = sha256_rotr32(w[i - 2], 17) ^ + sha256_rotr32(w[i - 2], 19) ^ + (w[i - 2].reinterpret_as_uint() >> 10).reinterpret_as_int() + w[i] = w[i - 16] + s0 + w[i - 7] + s1 + } + let mut a = h[0] + let mut b = h[1] + let mut c = h[2] + let mut d = h[3] + let mut e = h[4] + let mut f = h[5] + let mut g = h[6] + let mut hh = h[7] + for i = 0; i < 64; i = i + 1 { + let s1 = sha256_rotr32(e, 6) ^ sha256_rotr32(e, 11) ^ sha256_rotr32(e, 25) + let ch = (e & f) ^ (e.lnot() & g) + let temp1 = hh + s1 + ch + sha256_k[i] + w[i] + let s0 = sha256_rotr32(a, 2) ^ sha256_rotr32(a, 13) ^ sha256_rotr32(a, 22) + let maj = (a & b) ^ (a & c) ^ (b & c) + let temp2 = s0 + maj + hh = g + g = f + f = e + e = d + temp1 + d = c + c = b + b = a + a = temp1 + temp2 + } + h[0] = h[0] + a + h[1] = h[1] + b + h[2] = h[2] + c + h[3] = h[3] + d + h[4] = h[4] + e + h[5] = h[5] + f + h[6] = h[6] + g + h[7] = h[7] + hh } ///| pub fn Sha256State::update(self : Sha256State, data : Bytes) -> Unit { - self.inner.update(data) + self.update_slice(data, 0, data.length()) } ///| @@ -27,32 +130,74 @@ pub fn Sha256State::update_slice( offset : Int, len : Int, ) -> Unit { - let slice = data.to_fixedarray() - let buf = FixedArray::make(len, b'\x00') - for i in 0.. Unit { - self.inner.update(FixedArray::make(1, b)) + self.block[self.block_len] = b + self.block_len += 1 + self.total_len += 1L + if self.block_len == 64 { + self.process_block() + self.block_len = 0 + } } ///| pub fn Sha256State::update_string(self : Sha256State, s : String) -> Unit { - self.update(@utf8.encode(s)) + self.update(utf8_encode(s)) } ///| pub fn Sha256State::finish_raw(self : Sha256State) -> FixedArray[Byte] { - self.inner.finalize() -} - -///| -pub fn sha256_raw(data : Bytes) -> FixedArray[Byte] { - sha256_prefix_raw(data, data.length()) + let bit_len = self.total_len * 8L + self.block[self.block_len] = b'\x80' + self.block_len += 1 + if self.block_len > 56 { + while self.block_len < 64 { + self.block[self.block_len] = b'\x00' + self.block_len += 1 + } + self.process_block() + self.block_len = 0 + } + while self.block_len < 56 { + self.block[self.block_len] = b'\x00' + self.block_len += 1 + } + self.block[56] = ((bit_len >> 56) & 0xffL).to_byte() + self.block[57] = ((bit_len >> 48) & 0xffL).to_byte() + self.block[58] = ((bit_len >> 40) & 0xffL).to_byte() + self.block[59] = ((bit_len >> 32) & 0xffL).to_byte() + self.block[60] = ((bit_len >> 24) & 0xffL).to_byte() + self.block[61] = ((bit_len >> 16) & 0xffL).to_byte() + self.block[62] = ((bit_len >> 8) & 0xffL).to_byte() + self.block[63] = (bit_len & 0xffL).to_byte() + self.process_block() + let result : FixedArray[Byte] = FixedArray::make(32, b'\x00') + for i = 0; i < 8; i = i + 1 { + result[i * 4] = ((self.h[i] >> 24) & 0xff).to_byte() + result[i * 4 + 1] = ((self.h[i] >> 16) & 0xff).to_byte() + result[i * 4 + 2] = ((self.h[i] >> 8) & 0xff).to_byte() + result[i * 4 + 3] = (self.h[i] & 0xff).to_byte() + } + result } ///| diff --git a/modules/bit_hash/src/sha256_impl.mbt b/modules/bit_hash/src/sha256_impl.mbt new file mode 100644 index 00000000..89bb724c --- /dev/null +++ b/modules/bit_hash/src/sha256_impl.mbt @@ -0,0 +1,16 @@ +// SHA-256 fallback for non-native targets (@simdhash). + +///| +pub fn sha256_bytes(data : Bytes) -> Bytes { + @simdhash.sha256(data) +} + +///| +pub fn sha256_raw(data : Bytes) -> FixedArray[Byte] { + let b = @simdhash.sha256(data) + let result : FixedArray[Byte] = FixedArray::make(32, b'\x00') + for i in 0..<32 { + result[i] = b[i] + } + result +} diff --git a/modules/bit_hash/src/sha256_native_impl.mbt b/modules/bit_hash/src/sha256_native_impl.mbt new file mode 100644 index 00000000..24ba96b5 --- /dev/null +++ b/modules/bit_hash/src/sha256_native_impl.mbt @@ -0,0 +1,15 @@ +// SHA-256 fast path for the native target (C FFI + SHA-NI). + +///| +pub fn sha256_bytes(data : Bytes) -> Bytes { + let out : FixedArray[Byte] = FixedArray::make(32, b'\x00') + sha256_compute_ffi(data, data.length(), out) + Bytes::makei(32, fn(i) { out[i] }) +} + +///| +pub fn sha256_raw(data : Bytes) -> FixedArray[Byte] { + let out : FixedArray[Byte] = FixedArray::make(32, b'\x00') + sha256_compute_ffi(data, data.length(), out) + out +} diff --git a/modules/bit_hash/src/sha256_ni.c b/modules/bit_hash/src/sha256_ni.c new file mode 100644 index 00000000..58586870 --- /dev/null +++ b/modules/bit_hash/src/sha256_ni.c @@ -0,0 +1,273 @@ +/* + * SHA-256 with optional SHA-NI acceleration (x86 sha_ni + sse4.1 + ssse3). + * + * SHA-NI path: public-domain implementation by Sean Gulley / Intel, + * adapted and verified against NIST test vectors. + * + * Falls back to a portable C scalar implementation on TCC or CPUs without + * the required extensions. + */ + +#include +#include +#include + +#if !defined(__TINYC__) && (defined(__clang__) || defined(__GNUC__)) +# include +# define USE_SHA256_NI 1 +# define SHA256_TARGET __attribute__((target("sha,sse4.1,ssse3"))) +#else +# define USE_SHA256_NI 0 +# define SHA256_TARGET +#endif + +#if USE_SHA256_NI +static int sha256_hw_ok = -1; +static int sha256_ni_ok(void) { + if (sha256_hw_ok < 0) + sha256_hw_ok = (__builtin_cpu_supports("sha") != 0) & + (__builtin_cpu_supports("sse4.1") != 0) & + (__builtin_cpu_supports("ssse3") != 0); + return sha256_hw_ok; +} +#endif + +/* ── SHA-256 K constants ──────────────────────────────────────────────── */ + +static const uint32_t K256[64] = { + 0x428a2f98u,0x71374491u,0xb5c0fbcfu,0xe9b5dba5u, + 0x3956c25bu,0x59f111f1u,0x923f82a4u,0xab1c5ed5u, + 0xd807aa98u,0x12835b01u,0x243185beu,0x550c7dc3u, + 0x72be5d74u,0x80deb1feu,0x9bdc06a7u,0xc19bf174u, + 0xe49b69c1u,0xefbe4786u,0x0fc19dc6u,0x240ca1ccu, + 0x2de92c6fu,0x4a7484aau,0x5cb0a9dcu,0x76f988dau, + 0x983e5152u,0xa831c66du,0xb00327c8u,0xbf597fc7u, + 0xc6e00bf3u,0xd5a79147u,0x06ca6351u,0x14292967u, + 0x27b70a85u,0x2e1b2138u,0x4d2c6dfcu,0x53380d13u, + 0x650a7354u,0x766a0abbu,0x81c2c92eu,0x92722c85u, + 0xa2bfe8a1u,0xa81a664bu,0xc24b8b70u,0xc76c51a3u, + 0xd192e819u,0xd6990624u,0xf40e3585u,0x106aa070u, + 0x19a4c116u,0x1e376c08u,0x2748774cu,0x34b0bcb5u, + 0x391c0cb3u,0x4ed8aa4au,0x5b9cca4fu,0x682e6ff3u, + 0x748f82eeu,0x78a5636fu,0x84c87814u,0x8cc70208u, + 0x90befffau,0xa4506cebu,0xbef9a3f7u,0xc67178f2u, +}; + +/* ── SHA-NI fast path ─────────────────────────────────────────────────── */ + +#if USE_SHA256_NI + +SHA256_TARGET +static void sha256_ni_blocks(uint32_t state[8], const uint8_t* data, size_t num_blocks) { + __m128i state0, state1, msg, tmp; + __m128i msg0, msg1, msg2, msg3; + __m128i abef_save, cdgh_save; + const __m128i SHUF_MASK = + _mm_set_epi64x(0x0c0d0e0f08090a0bULL, 0x0405060700010203ULL); + + /* Load state: state[0..3]=ABCD, state[4..7]=EFGH */ + tmp = _mm_loadu_si128((__m128i const*)&state[0]); /* ABCD */ + state1 = _mm_loadu_si128((__m128i const*)&state[4]); /* EFGH */ + tmp = _mm_shuffle_epi32(tmp, 0xb1); /* CDAB */ + state1 = _mm_shuffle_epi32(state1, 0x1b); /* EFGH -> GHEF */ + state0 = _mm_alignr_epi8(tmp, state1, 8); /* ABEF */ + state1 = _mm_blend_epi16(state1, tmp, 0xf0); /* CDGH */ + + while (num_blocks--) { + abef_save = state0; + cdgh_save = state1; + +#define SHA256_DO4(msg_cur, msg_prev, msg_next0, msg_next1, k0k1) \ + do { \ + msg = _mm_add_epi32((msg_cur), \ + _mm_set_epi64x((k0k1) >> 32, (k0k1) & 0xffffffffULL)); \ + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); \ + msg = _mm_shuffle_epi32(msg, 0x0e); \ + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); \ + (msg_prev) = _mm_sha256msg1_epu32((msg_prev), (msg_cur)); \ + if ((msg_next0) != NULL && (msg_next1) != NULL) { \ + tmp = _mm_alignr_epi8(*(msg_next1), (msg_cur), 4); \ + *(msg_next0) = _mm_add_epi32(*(msg_next0), tmp); \ + *(msg_next0) = _mm_sha256msg2_epu32(*(msg_next0), *(msg_next1)); \ + } \ + } while(0) + + /* Load and byte-swap message blocks */ + msg0 = _mm_shuffle_epi8(_mm_loadu_si128((__m128i const*)(data + 0)), SHUF_MASK); + msg1 = _mm_shuffle_epi8(_mm_loadu_si128((__m128i const*)(data + 16)), SHUF_MASK); + msg2 = _mm_shuffle_epi8(_mm_loadu_si128((__m128i const*)(data + 32)), SHUF_MASK); + msg3 = _mm_shuffle_epi8(_mm_loadu_si128((__m128i const*)(data + 48)), SHUF_MASK); + + /* Rounds 0-3: msg0 + K[0..3] */ + msg = _mm_add_epi32(msg0, _mm_loadu_si128((__m128i const*)&K256[0])); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + msg = _mm_shuffle_epi32(msg, 0x0e); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + + /* Rounds 4-7: msg1 + K[4..7]; msg0 = sha256msg1(msg0, msg1) */ + msg = _mm_add_epi32(msg1, _mm_loadu_si128((__m128i const*)&K256[4])); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + msg = _mm_shuffle_epi32(msg, 0x0e); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg0 = _mm_sha256msg1_epu32(msg0, msg1); + + /* Rounds 8-11: msg2 + K[8..11]; msg1 = sha256msg1(msg1, msg2) */ + msg = _mm_add_epi32(msg2, _mm_loadu_si128((__m128i const*)&K256[8])); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + msg = _mm_shuffle_epi32(msg, 0x0e); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg1 = _mm_sha256msg1_epu32(msg1, msg2); + + /* Rounds 12-15: msg3 + K[12..15]; + msg0 = sha256msg2(msg0 + alignr(msg3, msg2, 4), msg3); + msg2 = sha256msg1(msg2, msg3) */ + msg = _mm_add_epi32(msg3, _mm_loadu_si128((__m128i const*)&K256[12])); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + tmp = _mm_alignr_epi8(msg3, msg2, 4); + msg0 = _mm_add_epi32(msg0, tmp); + msg0 = _mm_sha256msg2_epu32(msg0, msg3); + msg = _mm_shuffle_epi32(msg, 0x0e); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + msg2 = _mm_sha256msg1_epu32(msg2, msg3); + +#define SHA256_FULL_ROUND(cur, prv, nxt0, nxt1, ki) \ + msg = _mm_add_epi32((cur), _mm_loadu_si128((__m128i const*)&K256[ki])); \ + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); \ + tmp = _mm_alignr_epi8((cur), (prv), 4); \ + (nxt0) = _mm_add_epi32((nxt0), tmp); \ + (nxt0) = _mm_sha256msg2_epu32((nxt0), (cur)); \ + msg = _mm_shuffle_epi32(msg, 0x0e); \ + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); \ + (nxt1) = _mm_sha256msg1_epu32((nxt1), (cur)); + + SHA256_FULL_ROUND(msg0, msg3, msg1, msg3, 16) /* rounds 16-19 */ + SHA256_FULL_ROUND(msg1, msg0, msg2, msg0, 20) /* rounds 20-23 */ + SHA256_FULL_ROUND(msg2, msg1, msg3, msg1, 24) /* rounds 24-27 */ + SHA256_FULL_ROUND(msg3, msg2, msg0, msg2, 28) /* rounds 28-31 */ + SHA256_FULL_ROUND(msg0, msg3, msg1, msg3, 32) /* rounds 32-35 */ + SHA256_FULL_ROUND(msg1, msg0, msg2, msg0, 36) /* rounds 36-39 */ + SHA256_FULL_ROUND(msg2, msg1, msg3, msg1, 40) /* rounds 40-43 */ + SHA256_FULL_ROUND(msg3, msg2, msg0, msg2, 44) /* rounds 44-47 */ + SHA256_FULL_ROUND(msg0, msg3, msg1, msg3, 48) /* rounds 48-51 */ + SHA256_FULL_ROUND(msg1, msg0, msg2, msg0, 52) /* rounds 52-55 */ + SHA256_FULL_ROUND(msg2, msg1, msg3, msg1, 56) /* rounds 56-59 */ + + /* Rounds 60-63: last 4 rounds, no message schedule update */ + msg = _mm_add_epi32(msg3, _mm_loadu_si128((__m128i const*)&K256[60])); + state1 = _mm_sha256rnds2_epu32(state1, state0, msg); + msg = _mm_shuffle_epi32(msg, 0x0e); + state0 = _mm_sha256rnds2_epu32(state0, state1, msg); + + state0 = _mm_add_epi32(state0, abef_save); + state1 = _mm_add_epi32(state1, cdgh_save); + data += 64; + } + + /* Unpack state back to ABCDEFGH order */ + tmp = _mm_shuffle_epi32(state0, 0x1b); /* FEBA */ + state1 = _mm_shuffle_epi32(state1, 0xb1); /* DCHG */ + state0 = _mm_blend_epi16(tmp, state1, 0xf0); /* DCBA */ + state1 = _mm_alignr_epi8(state1, tmp, 8); /* ABEF */ + _mm_storeu_si128((__m128i*)&state[0], state0); + _mm_storeu_si128((__m128i*)&state[4], state1); +} + +#endif /* USE_SHA256_NI */ + +/* ── portable scalar SHA-256 ──────────────────────────────────────────── */ + +static inline uint32_t rotr32(uint32_t x, int n) { + return (x >> n) | (x << (32 - n)); +} + +static void sha256_scalar_blocks(uint32_t h[8], const uint8_t* data, size_t num_blocks) { + while (num_blocks--) { + uint32_t w[64]; + for (int i = 0; i < 16; i++) { + w[i] = ((uint32_t)data[i*4] << 24) | ((uint32_t)data[i*4+1] << 16) | + ((uint32_t)data[i*4+2] << 8) | (uint32_t)data[i*4+3]; + } + for (int i = 16; i < 64; i++) { + uint32_t s0 = rotr32(w[i-15], 7) ^ rotr32(w[i-15], 18) ^ (w[i-15] >> 3); + uint32_t s1 = rotr32(w[i-2], 17) ^ rotr32(w[i-2], 19) ^ (w[i-2] >> 10); + w[i] = w[i-16] + s0 + w[i-7] + s1; + } + uint32_t a=h[0],b=h[1],c=h[2],d=h[3],e=h[4],f=h[5],g=h[6],hh=h[7]; + for (int i = 0; i < 64; i++) { + uint32_t S1 = rotr32(e,6) ^ rotr32(e,11) ^ rotr32(e,25); + uint32_t ch = (e & f) ^ (~e & g); + uint32_t T1 = hh + S1 + ch + K256[i] + w[i]; + uint32_t S0 = rotr32(a,2) ^ rotr32(a,13) ^ rotr32(a,22); + uint32_t maj = (a & b) ^ (a & c) ^ (b & c); + uint32_t T2 = S0 + maj; + hh=g; g=f; f=e; e=d+T1; d=c; c=b; b=a; a=T1+T2; + } + h[0]+=a; h[1]+=b; h[2]+=c; h[3]+=d; + h[4]+=e; h[5]+=f; h[6]+=g; h[7]+=hh; + data += 64; + } +} + +/* ── MoonBit-callable entry point ─────────────────────────────────────── */ + +/* + * sha256_compute(data, len, out) + * data : Bytes (passed as const uint8_t* from MoonBit native) + * len : number of bytes to hash + * out : FixedArray[Byte] with at least 32 bytes + * + * One-shot SHA-256: one FFI call per sha256_raw invocation. + */ +SHA256_TARGET +void sha256_compute(const uint8_t* data, int32_t len, uint8_t* out) { + uint32_t state[8] = { + 0x6a09e667u, 0xbb67ae85u, 0x3c6ef372u, 0xa54ff53au, + 0x510e527fu, 0x9b05688cu, 0x1f83d9abu, 0x5be0cd19u, + }; + + int32_t full_blocks = len / 64; + int32_t remainder = len % 64; + +#if USE_SHA256_NI +# define SHA256_DISPATCH(st, d, n) \ + (sha256_ni_ok() ? sha256_ni_blocks((st),(d),(n)) : sha256_scalar_blocks((st),(d),(n))) +#else +# define SHA256_DISPATCH(st, d, n) sha256_scalar_blocks((st),(d),(n)) +#endif + + if (full_blocks > 0) { + SHA256_DISPATCH(state, data, (size_t)full_blocks); + } + + uint8_t pad[128]; + memcpy(pad, data + (size_t)full_blocks * 64, (size_t)remainder); + pad[remainder] = 0x80; + + int32_t pad_len; + if (remainder < 55) { + memset(pad + remainder + 1, 0, (size_t)(55 - remainder)); + pad_len = 64; + } else { + memset(pad + remainder + 1, 0, (size_t)(119 - remainder)); + pad_len = 128; + } + + uint64_t bit_len = (uint64_t)len * 8; + pad[pad_len - 8] = (uint8_t)(bit_len >> 56); + pad[pad_len - 7] = (uint8_t)(bit_len >> 48); + pad[pad_len - 6] = (uint8_t)(bit_len >> 40); + pad[pad_len - 5] = (uint8_t)(bit_len >> 32); + pad[pad_len - 4] = (uint8_t)(bit_len >> 24); + pad[pad_len - 3] = (uint8_t)(bit_len >> 16); + pad[pad_len - 2] = (uint8_t)(bit_len >> 8); + pad[pad_len - 1] = (uint8_t)(bit_len ); + + SHA256_DISPATCH(state, pad, (size_t)(pad_len / 64)); + + for (int i = 0; i < 8; i++) { + out[i*4 ] = (uint8_t)(state[i] >> 24); + out[i*4 + 1] = (uint8_t)(state[i] >> 16); + out[i*4 + 2] = (uint8_t)(state[i] >> 8); + out[i*4 + 3] = (uint8_t)(state[i] ); + } +} diff --git a/modules/bit_lib/src/lfs.mbt b/modules/bit_lib/src/lfs.mbt index 8ff00507..de606ff6 100644 --- a/modules/bit_lib/src/lfs.mbt +++ b/modules/bit_lib/src/lfs.mbt @@ -20,7 +20,7 @@ let lfs_max_pointer_size : Int = 1024 ///| fn lfs_sha256_hex(data : Bytes) -> String { - let raw = @bithash.sha256_raw(data) + let raw = @bithash.sha256_bytes(data) let digits = "0123456789abcdef" let out = StringBuilder::new() for b in raw { diff --git a/modules/bitx_hub/src/hub_test.mbt b/modules/bitx_hub/src/hub_test.mbt index 9ef412cf..f3529b4a 100644 --- a/modules/bitx_hub/src/hub_test.mbt +++ b/modules/bitx_hub/src/hub_test.mbt @@ -205,6 +205,7 @@ test "work item: meta key uses canonical namespace" { @test.assert_eq(work_item_meta_key("abc"), "hub/work-item/abc/meta") } + ///| test "pr: create and get PR" { let (_fs, objects, refs, clock) = setup_repo_with_branch()