diff --git a/justfile b/justfile index 7a23ee9d..caa3f706 100644 --- a/justfile +++ b/justfile @@ -261,6 +261,7 @@ lint-plugins: @cd plugins/native/matcha && cargo fmt -- --check && cargo clippy -- -D warnings @cd plugins/native/pocket-tts && cargo fmt -- --check && cargo clippy -- -D warnings @cd plugins/native/nllb && cargo fmt -- --check && CMAKE_ARGS="-DCMAKE_INSTALL_PREFIX=$$(pwd)/target/cmake-install" cargo clippy -- -D warnings + @cd plugins/native/supertonic && cargo fmt -- --check && cargo clippy -- -D warnings @echo "✓ All native plugins passed linting" # Auto-fix formatting and linting issues in native plugins @@ -274,6 +275,7 @@ fix-plugins: @cd plugins/native/matcha && cargo fmt && cargo clippy --fix --allow-dirty --allow-staged -- -D warnings @cd plugins/native/pocket-tts && cargo fmt && cargo clippy --fix --allow-dirty --allow-staged -- -D warnings @cd plugins/native/nllb && cargo fmt && CMAKE_ARGS="-DCMAKE_INSTALL_PREFIX=$$(pwd)/target/cmake-install" cargo clippy --fix --allow-dirty --allow-staged -- -D warnings + @cd plugins/native/supertonic && cargo fmt && cargo clippy --fix --allow-dirty --allow-staged -- -D warnings @echo "✓ All native plugins fixed" # --- Profiling --- @@ -801,12 +803,49 @@ upload-helsinki-plugin: build-plugin-native-helsinki @curl -X POST -F plugin=@target/release/libhelsinki.so \ http://127.0.0.1:4545/api/v1/plugins +# Download Supertonic TTS models +download-supertonic-models: + @echo "Downloading Supertonic TTS models..." + @mkdir -p models + @if [ -f models/supertonic-v2-onnx.tar.bz2 ]; then \ + echo "✓ Supertonic archive already exists at models/supertonic-v2-onnx.tar.bz2"; \ + else \ + echo "Downloading supertonic-v2-onnx.tar.bz2..." && \ + curl -L -o models/supertonic-v2-onnx.tar.bz2 \ + https://huggingface.co/streamkit/supertonic-models/resolve/main/supertonic-v2-onnx.tar.bz2 && \ + echo "✓ Supertonic archive downloaded"; \ + fi + @if [ -d models/supertonic-v2-onnx ]; then \ + echo "✓ Supertonic models already extracted at models/supertonic-v2-onnx"; \ + else \ + echo "Extracting models..." && \ + cd models && tar xf supertonic-v2-onnx.tar.bz2 && \ + echo "✓ Supertonic v2 models ready at models/supertonic-v2-onnx (5 languages, 10 voices)"; \ + fi + +# Setup Supertonic TTS (download models) +setup-supertonic: download-supertonic-models + @echo "✓ Supertonic TTS setup complete!" + +# Build native Supertonic TTS plugin +[working-directory: 'plugins/native/supertonic'] +build-plugin-native-supertonic: + @echo "Building native Supertonic TTS plugin..." + @cargo build --release + +# Upload Supertonic plugin to running server +[working-directory: 'plugins/native/supertonic'] +upload-supertonic-plugin: build-plugin-native-supertonic + @echo "Uploading Supertonic plugin to server..." + @curl -X POST -F plugin=@target/release/libsupertonic.so \ + http://127.0.0.1:4545/api/v1/plugins + # Build specific native plugin by name build-plugin-native name: @just build-plugin-native-{{name}} # Build all native plugin examples -build-plugins-native: build-plugin-native-gain build-plugin-native-whisper build-plugin-native-kokoro build-plugin-native-piper build-plugin-native-matcha build-plugin-native-pocket-tts build-plugin-native-sensevoice build-plugin-native-nllb build-plugin-native-vad build-plugin-native-helsinki +build-plugins-native: build-plugin-native-gain build-plugin-native-whisper build-plugin-native-kokoro build-plugin-native-piper build-plugin-native-matcha build-plugin-native-pocket-tts build-plugin-native-sensevoice build-plugin-native-nllb build-plugin-native-vad build-plugin-native-helsinki build-plugin-native-supertonic ## Combined @@ -840,7 +879,7 @@ copy-plugins-native: cp examples/plugins/gain-native/target/release/libgain_plugin_native.* .plugins/native/ 2>/dev/null || true # Official native plugins (repo-local) - for name in whisper kokoro piper matcha vad sensevoice nllb helsinki; do + for name in whisper kokoro piper matcha vad sensevoice nllb helsinki supertonic; do for f in \ plugins/native/"$name"/target/release/lib"$name".so \ plugins/native/"$name"/target/release/lib"$name".so.* \ diff --git a/marketplace/official-plugins.json b/marketplace/official-plugins.json index ff58a3b2..3db42b4c 100644 --- a/marketplace/official-plugins.json +++ b/marketplace/official-plugins.json @@ -238,6 +238,34 @@ } ] }, + { + "id": "supertonic", + "name": "Supertonic", + "version": "0.1.0", + "node_kind": "supertonic", + "kind": "native", + "entrypoint": "libsupertonic.so", + "artifact": "plugins/native/supertonic/target/release/libsupertonic.so", + "description": "Multilingual TTS using Supertonic (66M params, 5 languages, up to 167x real-time)", + "license": "MPL-2.0", + "models": [ + { + "id": "supertonic-v2-onnx", + "name": "Supertonic v2 ONNX models", + "default": true, + "source": "huggingface", + "repo_id": "streamkit/supertonic-models", + "revision": "main", + "files": [ + "supertonic-v2-onnx.tar.bz2" + ], + "expected_size_bytes": 244451376, + "license": "MIT", + "license_url": "https://github.com/supertone-inc/supertonic/blob/main/LICENSE", + "sha256": "29e18bfdcbfbdd8bef25204b19be21d13fda36d4e66fe31c74e2a01dad457cec" + } + ] + }, { "id": "vad", "name": "VAD", diff --git a/plugins/native/supertonic/Cargo.lock b/plugins/native/supertonic/Cargo.lock new file mode 100644 index 00000000..91273518 --- /dev/null +++ b/plugins/native/supertonic/Cargo.lock @@ -0,0 +1,1356 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" + +[[package]] +name = "cc" +version = "1.2.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "filetime" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" +dependencies = [ + "cfg-if", + "libc", + "libredox", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" + +[[package]] +name = "flate2" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "hound" +version = "3.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62adaabb884c94955b19907d60019f4e145d091c75345379e70d1ee696f7854f" + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "libredox" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" +dependencies = [ + "bitflags", + "libc", + "redox_syscall", +] + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "matrixmultiply" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "native-tls" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "ndarray" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "portable-atomic", + "portable-atomic-util", + "rawpointer", + "rayon", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "openssl" +version = "0.10.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + +[[package]] +name = "openssl-sys" +version = "0.9.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "ort" +version = "2.0.0-rc.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa7e49bd669d32d7bc2a15ec540a527e7764aec722a45467814005725bcd721" +dependencies = [ + "ndarray", + "ort-sys", + "smallvec 2.0.0-alpha.10", + "tracing", +] + +[[package]] +name = "ort-sys" +version = "2.0.0-rc.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2aba9f5c7c479925205799216e7e5d07cc1d4fa76ea8058c60a9a30f6a4e890" +dependencies = [ + "flate2", + "pkg-config", + "sha2", + "tar", + "ureq", +] + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "portable-atomic" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "primal-check" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc0d895b311e3af9902528fbb8f928688abbd95872819320517cc24ca6b2bd08" +dependencies = [ + "num-integer", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27" +dependencies = [ + "bitflags", +] + +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rustfft" +version = "6.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21db5f9893e91f41798c88680037dba611ca6674703c1a18601b01a72c8adb89" +dependencies = [ + "num-complex", + "num-integer", + "num-traits", + "primal-check", + "strength_reduce", + "transpose", +] + +[[package]] +name = "rustix" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "zeroize", +] + +[[package]] +name = "schannel" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "schemars" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54e910108742c57a770f492731f99be216a52fadd361b06c8fb59d74ccc267d2" +dependencies = [ + "dyn-clone", + "ref-cast", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4908ad288c5035a8eb12cfdf0d49270def0a268ee162b75eeee0f85d155a7c45" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn", +] + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "smallvec" +version = "2.0.0-alpha.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d44cfb396c3caf6fbfd0ab422af02631b69ddd96d2eff0b0f0724f9024051b" + +[[package]] +name = "socks" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + +[[package]] +name = "streamkit-core" +version = "0.1.0" +dependencies = [ + "async-trait", + "base64", + "bytes", + "schemars", + "serde", + "serde_json", + "smallvec 1.15.1", + "thiserror", + "tokio", + "tokio-util", + "tracing", + "ts-rs", +] + +[[package]] +name = "streamkit-plugin-sdk-native" +version = "0.1.0" +dependencies = [ + "async-trait", + "bytes", + "serde", + "serde_json", + "streamkit-core", + "tracing", +] + +[[package]] +name = "strength_reduce" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" + +[[package]] +name = "supertonic-plugin-native" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", + "streamkit-plugin-sdk-native", + "supertonic-vendor", + "tracing", +] + +[[package]] +name = "supertonic-vendor" +version = "0.1.0" +dependencies = [ + "anyhow", + "hound", + "libc", + "ndarray", + "ort", + "rand", + "rand_distr", + "rayon", + "regex", + "rustfft", + "serde", + "serde_json", + "unicode-normalization", +] + +[[package]] +name = "syn" +version = "2.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tar" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "tempfile" +version = "3.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +dependencies = [ + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinyvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.49.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +dependencies = [ + "pin-project-lite", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", +] + +[[package]] +name = "transpose" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad61aed86bc3faea4300c7aee358b4c6d0c8d6ccc36524c96e4c92ccf26e77e" +dependencies = [ + "num-integer", + "strength_reduce", +] + +[[package]] +name = "ts-rs" +version = "11.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4994acea2522cd2b3b85c1d9529a55991e3ad5e25cdcd3de9d505972c4379424" +dependencies = [ + "serde_json", + "thiserror", + "ts-rs-macros", +] + +[[package]] +name = "ts-rs-macros" +version = "11.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6ff59666c9cbaec3533964505d39154dc4e0a56151fdea30a09ed0301f62e2" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "termcolor", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "ureq" +version = "3.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d39cb1dbab692d82a977c0392ffac19e188bd9186a9f32806f0aaa859d75585a" +dependencies = [ + "base64", + "der", + "log", + "native-tls", + "percent-encoding", + "rustls-pki-types", + "socks", + "ureq-proto", + "utf-8", + "webpki-root-certs", +] + +[[package]] +name = "ureq-proto" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f" +dependencies = [ + "base64", + "http", + "httparse", + "log", +] + +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "webpki-root-certs" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36a29fc0408b113f68cf32637857ab740edfafdf460c326cd2afaa2d84cc05dc" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" + +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + +[[package]] +name = "zerocopy" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdea86ddd5568519879b8187e1cf04e24fce28f7fe046ceecbce472ff19a2572" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c15e1b46eff7c6c91195752e0eeed8ef040e391cdece7c25376957d5f15df22" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zmij" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02aae0f83f69aafc94776e879363e9771d7ecbffe2c7fbb6c14c5e00dfe88439" diff --git a/plugins/native/supertonic/Cargo.toml b/plugins/native/supertonic/Cargo.toml new file mode 100644 index 00000000..71574741 --- /dev/null +++ b/plugins/native/supertonic/Cargo.toml @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: © 2025 StreamKit Contributors +# +# SPDX-License-Identifier: MPL-2.0 + +[package] +name = "supertonic-plugin-native" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lib] +name = "supertonic" +crate-type = ["cdylib"] + +[dependencies] +streamkit-plugin-sdk-native = { path = "../../../sdks/plugin-sdk/native" } +supertonic-vendor = { path = "vendor/supertonic" } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +tracing = "0.1" + +[lints.clippy] +# Categories +pedantic = { level = "warn", priority = -1 } +nursery = { level = "warn", priority = -1 } +# Safety +unwrap_used = "warn" +expect_used = "warn" +# Complexity +cognitive_complexity = "warn" +# Math +cast_possible_truncation = "warn" +cast_precision_loss = "warn" +cast_sign_loss = "warn" +# Allow-list (Noise reduction) +module_name_repetitions = "allow" +must_use_candidate = "allow" +doc_markdown = "allow" diff --git a/plugins/native/supertonic/README.md b/plugins/native/supertonic/README.md new file mode 100644 index 00000000..f94a7b3e --- /dev/null +++ b/plugins/native/supertonic/README.md @@ -0,0 +1,46 @@ + + +# Supertonic TTS Plugin + +Multilingual text-to-speech plugin for StreamKit using the [Supertonic](https://github.com/supertone-inc/supertonic) TTS engine. + +## Features + +- 66M parameter model, up to 167x faster than real-time +- 5 languages: English, Korean, Spanish, Portuguese, French +- 10 voice styles: M1-M5 (male), F1-F5 (female) +- ONNX Runtime-based inference (4 models: duration predictor, text encoder, vector estimator, vocoder) +- Global model caching across pipeline nodes + +## Setup + +```bash +# Download models +just download-supertonic-models + +# Build plugin +just build-plugin-native-supertonic +``` + +## Configuration + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `model_dir` | string | `./models/supertonic-v2-onnx` | Path to ONNX model directory | +| `lang` | string | `en` | Language: `en`, `ko`, `es`, `pt`, `fr` | +| `voice_style` | string | `M1` | Style name (M1-M5, F1-F5) or `.json` path | +| `voice_styles_dir` | string | - | Directory for named voice style files | +| `total_step` | integer | `5` | Denoising steps (1-20, higher = better quality) | +| `speed` | number | `1.05` | Speech speed multiplier (0.5-2.0) | +| `silence_duration` | number | `0.3` | Silence between chunks in seconds | +| `min_sentence_length` | integer | `10` | Minimum chars before triggering TTS | +| `emit_telemetry` | boolean | `false` | Emit tts.start/tts.done telemetry events | + +## License + +Plugin code: MPL-2.0 +Supertonic engine: MIT diff --git a/plugins/native/supertonic/plugin.yml b/plugins/native/supertonic/plugin.yml new file mode 100644 index 00000000..3bd440b9 --- /dev/null +++ b/plugins/native/supertonic/plugin.yml @@ -0,0 +1,22 @@ +id: supertonic +name: Supertonic +version: 0.1.0 +node_kind: supertonic +kind: native +entrypoint: libsupertonic.so +artifact: plugins/native/supertonic/target/release/libsupertonic.so +description: Multilingual TTS using Supertonic (66M params, 5 languages, up to 167x real-time) +license: MPL-2.0 +models: +- id: supertonic-v2-onnx + name: Supertonic v2 ONNX models + default: true + source: huggingface + repo_id: streamkit/supertonic-models + revision: main + files: + - supertonic-v2-onnx.tar.bz2 + expected_size_bytes: 244451376 + license: MIT + license_url: https://github.com/supertone-inc/supertonic/blob/main/LICENSE + sha256: 29e18bfdcbfbdd8bef25204b19be21d13fda36d4e66fe31c74e2a01dad457cec diff --git a/plugins/native/supertonic/src/config.rs b/plugins/native/supertonic/src/config.rs new file mode 100644 index 00000000..c446af7e --- /dev/null +++ b/plugins/native/supertonic/src/config.rs @@ -0,0 +1,92 @@ +// SPDX-FileCopyrightText: © 2025 StreamKit Contributors +// +// SPDX-License-Identifier: MPL-2.0 + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct SupertonicConfig { + /// Path to ONNX model directory (contains duration_predictor.onnx, etc.) + pub model_dir: String, + + /// Language code: "en", "ko", "es", "pt", "fr" + #[serde(default = "default_lang")] + pub lang: String, + + /// Voice style name (e.g. "M1", "F1") or path to .json file + #[serde(default = "default_voice_style")] + pub voice_style: String, + + /// Directory containing named voice style .json files + #[serde(default)] + pub voice_styles_dir: Option, + + /// Denoising steps (1-20) + #[serde(default = "default_total_step")] + pub total_step: usize, + + /// Speed multiplier (0.5-2.0) + #[serde(default = "default_speed")] + pub speed: f32, + + /// Silence between chunks in seconds + #[serde(default = "default_silence_duration")] + pub silence_duration: f32, + + /// Minimum characters before triggering TTS + #[serde(default = "default_min_sentence_length")] + pub min_sentence_length: usize, + + /// Emit out-of-band telemetry events (tts.start/tts.done) + #[serde(default)] + pub emit_telemetry: bool, + + /// Maximum characters of text preview to include in telemetry events (0 = omit preview) + #[serde(default = "default_telemetry_preview_chars")] + pub telemetry_preview_chars: usize, +} + +fn default_lang() -> String { + "en".to_string() +} + +fn default_voice_style() -> String { + "M1".to_string() +} + +const fn default_total_step() -> usize { + 5 +} + +const fn default_speed() -> f32 { + 1.05 +} + +const fn default_silence_duration() -> f32 { + 0.3 +} + +const fn default_min_sentence_length() -> usize { + 10 +} + +const fn default_telemetry_preview_chars() -> usize { + 80 +} + +impl Default for SupertonicConfig { + fn default() -> Self { + Self { + model_dir: "models/supertonic-v2-onnx".to_string(), + lang: default_lang(), + voice_style: default_voice_style(), + voice_styles_dir: None, + total_step: default_total_step(), + speed: default_speed(), + silence_duration: default_silence_duration(), + min_sentence_length: default_min_sentence_length(), + emit_telemetry: false, + telemetry_preview_chars: default_telemetry_preview_chars(), + } + } +} diff --git a/plugins/native/supertonic/src/lib.rs b/plugins/native/supertonic/src/lib.rs new file mode 100644 index 00000000..7377c41f --- /dev/null +++ b/plugins/native/supertonic/src/lib.rs @@ -0,0 +1,14 @@ +// SPDX-FileCopyrightText: © 2025 StreamKit Contributors +// +// SPDX-License-Identifier: MPL-2.0 + +mod config; +mod model; +mod sentence_splitter; +mod supertonic_node; +mod voice; + +use streamkit_plugin_sdk_native::{native_plugin_entry, NativeProcessorNode}; +use supertonic_node::SupertonicNode; + +native_plugin_entry!(SupertonicNode); diff --git a/plugins/native/supertonic/src/model.rs b/plugins/native/supertonic/src/model.rs new file mode 100644 index 00000000..05080b22 --- /dev/null +++ b/plugins/native/supertonic/src/model.rs @@ -0,0 +1,101 @@ +// SPDX-FileCopyrightText: © 2025 StreamKit Contributors +// +// SPDX-License-Identifier: MPL-2.0 + +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; +use streamkit_plugin_sdk_native::prelude::*; +use supertonic_vendor::TextToSpeech; + +/// Wrapper around `TextToSpeech` to mark it as thread-safe. +/// +/// # Safety +/// Access to the inner `TextToSpeech` is serialised through the `Mutex` in +/// `CachedModel`, so concurrent mutation cannot occur. +pub struct TtsModelWrapper { + inner: Mutex, +} + +impl TtsModelWrapper { + pub const fn new(tts: TextToSpeech) -> Self { + Self { inner: Mutex::new(tts) } + } + + pub fn lock(&self) -> Result, String> { + self.inner.lock().map_err(|e| format!("Failed to lock TTS model: {e}")) + } +} + +// SAFETY: The inner TextToSpeech is protected by a Mutex. +unsafe impl Send for TtsModelWrapper {} +unsafe impl Sync for TtsModelWrapper {} + +struct CachedModel { + model: Arc, + sample_rate: i32, +} + +/// Global cache of loaded TTS models, keyed by canonicalized model_dir. +static MODEL_CACHE: std::sync::LazyLock>> = + std::sync::LazyLock::new(|| Mutex::new(HashMap::new())); + +/// Load or retrieve a cached TTS model. +/// +/// Returns `(Arc, sample_rate)`. +pub fn get_or_load_model( + model_dir: &str, + logger: &Logger, +) -> Result<(Arc, i32), String> { + { + let cache = MODEL_CACHE.lock().map_err(|e| format!("Failed to lock model cache: {e}"))?; + + if let Some(cached) = cache.get(model_dir) { + plugin_info!( + logger, + model_dir = %model_dir, + "CACHE HIT: Reusing cached Supertonic TTS model" + ); + return Ok((cached.model.clone(), cached.sample_rate)); + } + } + + plugin_warn!( + logger, + model_dir = %model_dir, + "CACHE MISS: Loading Supertonic TTS model" + ); + + // The upstream HF repo stores ONNX files under an `onnx/` subdirectory. + // Try `{model_dir}/onnx` first, fall back to `model_dir` directly. + let onnx_dir = { + let sub = format!("{model_dir}/onnx"); + if std::path::Path::new(&sub).join("tts.json").exists() { + sub + } else { + model_dir.to_string() + } + }; + + let tts = supertonic_vendor::load_text_to_speech(&onnx_dir, false) + .map_err(|e| format!("Failed to load Supertonic model from '{onnx_dir}': {e}"))?; + + let sample_rate = tts.sample_rate; + let wrapper = Arc::new(TtsModelWrapper::new(tts)); + + let cache_size = { + let mut cache = + MODEL_CACHE.lock().map_err(|e| format!("Failed to lock model cache: {e}"))?; + cache.insert(model_dir.to_string(), CachedModel { model: wrapper.clone(), sample_rate }); + cache.len() + }; + + plugin_info!( + logger, + model_dir = %model_dir, + sample_rate = sample_rate, + cache_size = cache_size, + "Supertonic TTS model loaded and cached" + ); + + Ok((wrapper, sample_rate)) +} diff --git a/plugins/native/supertonic/src/sentence_splitter.rs b/plugins/native/supertonic/src/sentence_splitter.rs new file mode 100644 index 00000000..fa3ca31c --- /dev/null +++ b/plugins/native/supertonic/src/sentence_splitter.rs @@ -0,0 +1,94 @@ +// SPDX-FileCopyrightText: © 2025 StreamKit Contributors +// +// SPDX-License-Identifier: MPL-2.0 + +pub struct SentenceSplitter { + min_length: usize, +} + +impl SentenceSplitter { + pub const fn new(min_length: usize) -> Self { + Self { min_length } + } + + /// Extract complete sentence from buffer if available. + /// Supports English, Chinese, and Korean punctuation. + pub fn extract_sentence(&self, buffer: &mut String) -> Option { + if buffer.len() < self.min_length { + return None; + } + + // Sentence boundaries: English, Chinese, and Korean punctuation + let boundaries = [ + ". ", ".\n", "! ", "!\n", "? ", "?\n", // English + "。", "!", "?", // CJK (no space needed) + ]; + + for boundary in &boundaries { + if let Some(pos) = buffer.find(boundary) { + let end_pos = pos + boundary.len(); + let sentence: String = buffer.drain(..end_pos).collect(); + return Some(sentence.trim().to_string()); + } + } + + // Also check for final punctuation at end + if buffer.ends_with('.') + || buffer.ends_with('!') + || buffer.ends_with('?') + || buffer.ends_with('。') + || buffer.ends_with('!') + || buffer.ends_with('?') + { + return Some(std::mem::take(buffer)); + } + + None + } + + /// Force flush remaining buffer (called on cleanup) + #[allow(dead_code)] + pub fn flush(buffer: &mut String) -> Option { + if buffer.is_empty() { + None + } else { + Some(std::mem::take(buffer)) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sentence_extraction() { + let splitter = SentenceSplitter::new(5); + let mut buffer = "Hello world. How are you?".to_string(); + + assert_eq!(splitter.extract_sentence(&mut buffer), Some("Hello world.".to_string())); + assert_eq!(buffer, "How are you?"); + + assert_eq!(splitter.extract_sentence(&mut buffer), Some("How are you?".to_string())); + assert_eq!(buffer, ""); + } + + #[test] + fn test_min_length() { + let splitter = SentenceSplitter::new(20); + let mut buffer = "Hi.".to_string(); + + // Too short, should not extract + assert_eq!(splitter.extract_sentence(&mut buffer), None); + assert_eq!(buffer, "Hi."); + } + + #[test] + fn test_flush() { + let _splitter = SentenceSplitter::new(10); + let mut buffer = "Incomplete sentence".to_string(); + + assert_eq!(SentenceSplitter::flush(&mut buffer), Some("Incomplete sentence".to_string())); + assert_eq!(buffer, ""); + } +} diff --git a/plugins/native/supertonic/src/supertonic_node.rs b/plugins/native/supertonic/src/supertonic_node.rs new file mode 100644 index 00000000..e42dd610 --- /dev/null +++ b/plugins/native/supertonic/src/supertonic_node.rs @@ -0,0 +1,434 @@ +// SPDX-FileCopyrightText: © 2025 StreamKit Contributors +// +// SPDX-License-Identifier: MPL-2.0 + +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Instant; +use streamkit_plugin_sdk_native::prelude::*; +use streamkit_plugin_sdk_native::streamkit_core::types::{AudioFormat, SampleFormat}; + +use crate::config::SupertonicConfig; +use crate::model::{self, TtsModelWrapper}; +use crate::sentence_splitter::SentenceSplitter; +use crate::voice::{self, StyleWrapper}; + +pub struct SupertonicNode { + tts_model: Arc, + voice_style: Arc, + config: SupertonicConfig, + model_dir: String, + sample_rate: i32, + text_buffer: String, + sentence_splitter: SentenceSplitter, + logger: Logger, +} + +// SAFETY: Thread-safety is ensured through Arc and Mutex on shared resources. +unsafe impl Send for SupertonicNode {} +unsafe impl Sync for SupertonicNode {} + +impl NativeProcessorNode for SupertonicNode { + fn metadata() -> NodeMetadata { + NodeMetadata::builder("supertonic") + .description( + "Multilingual text-to-speech using the Supertonic TTS engine. \ + Supports 5 languages (en, ko, es, pt, fr) with 10 voice styles. \ + 66M parameters, up to 167x faster than real-time.", + ) + .input("in", &[PacketType::Text]) + .output( + "out", + PacketType::RawAudio(AudioFormat { + sample_rate: 22050, + channels: 1, + sample_format: SampleFormat::F32, + }), + ) + .param_schema(serde_json::json!({ + "type": "object", + "properties": { + "model_dir": { + "type": "string", + "description": "Path to Supertonic ONNX model directory", + "default": "./models/supertonic-v2-onnx" + }, + "lang": { + "type": "string", + "description": "Language code", + "default": "en", + "enum": ["en", "ko", "es", "pt", "fr"] + }, + "voice_style": { + "type": "string", + "description": "Voice style name (M1-M5, F1-F5) or path to .json file", + "default": "M1" + }, + "voice_styles_dir": { + "type": "string", + "description": "Directory containing named voice style .json files" + }, + "total_step": { + "type": "integer", + "description": "Denoising steps (higher = better quality, slower)", + "default": 5, + "minimum": 1, + "maximum": 20 + }, + "speed": { + "type": "number", + "description": "Speech speed multiplier", + "default": 1.05, + "minimum": 0.5, + "maximum": 2.0 + }, + "silence_duration": { + "type": "number", + "description": "Silence between chunks in seconds", + "default": 0.3, + "minimum": 0.0, + "maximum": 2.0 + }, + "min_sentence_length": { + "type": "integer", + "description": "Minimum chars before TTS generation", + "default": 10, + "minimum": 1 + }, + "emit_telemetry": { + "type": "boolean", + "description": "Emit out-of-band telemetry events (tts.start/tts.done)", + "default": false + }, + "telemetry_preview_chars": { + "type": "integer", + "description": "Maximum characters of text preview in telemetry (0 = omit)", + "default": 80, + "minimum": 0, + "maximum": 1000 + } + }, + "required": ["model_dir"] + })) + .category("audio") + .category("tts") + .category("ml") + .build() + } + + fn new(params: Option, logger: Logger) -> Result { + plugin_info!(logger, "SupertonicNode::new() called with params: {:?}", params); + + let config: SupertonicConfig = if let Some(p) = params { + serde_json::from_value(p).map_err(|e| format!("Config parse error: {e}"))? + } else { + SupertonicConfig::default() + }; + + plugin_info!( + logger, + "Config: model_dir={}, lang={}, voice_style={}, total_step={}, speed={}", + config.model_dir, + config.lang, + config.voice_style, + config.total_step, + config.speed + ); + + // Canonicalize model path + let model_dir = PathBuf::from(&config.model_dir); + let model_dir = if model_dir.is_absolute() { + model_dir + } else { + std::env::current_dir() + .map_err(|e| format!("Failed to get current dir: {e}"))? + .join(model_dir) + }; + let model_dir = model_dir.canonicalize().map_err(|e| { + format!("Failed to canonicalize model dir '{}': {e}", model_dir.display()) + })?; + let model_dir_str = model_dir.to_string_lossy().to_string(); + + plugin_info!(logger, "Canonicalized model_dir: {}", model_dir_str); + + // Load/cache model + let (tts_model, sample_rate) = model::get_or_load_model(&model_dir_str, &logger)?; + + plugin_info!(logger, "Model loaded, sample_rate={}", sample_rate); + + // Load/cache voice style + let voice_style = voice::resolve_voice_style( + &config.voice_style, + config.voice_styles_dir.as_deref(), + &model_dir_str, + &logger, + )?; + + let min_sentence_length = config.min_sentence_length; + + Ok(Self { + tts_model, + voice_style, + config, + model_dir: model_dir_str, + sample_rate, + text_buffer: String::new(), + sentence_splitter: SentenceSplitter::new(min_sentence_length), + logger, + }) + } + + fn process(&mut self, _pin: &str, packet: Packet, output: &OutputSender) -> Result<(), String> { + let text: std::borrow::Cow<'_, str> = match &packet { + Packet::Text(text) => std::borrow::Cow::Borrowed(text.as_ref()), + Packet::Binary { data, .. } => std::borrow::Cow::Owned( + String::from_utf8(data.to_vec()) + .map_err(|e| format!("Failed to decode binary data as UTF-8: {e}"))?, + ), + _ => return Err("Only accepts Text or Binary packets".to_string()), + }; + + plugin_debug!(self.logger, text = %text, "Received text input"); + + let mut sanitized = Self::sanitize_text(text.as_ref()); + + if sanitized.is_empty() { + plugin_debug!(self.logger, "Text empty after sanitization, skipping"); + return Ok(()); + } + + // Add sentence-ending punctuation if missing + if !sanitized.ends_with('.') + && !sanitized.ends_with('!') + && !sanitized.ends_with('?') + && !sanitized.ends_with('。') + && !sanitized.ends_with('!') + && !sanitized.ends_with('?') + { + sanitized.push('.'); + } + + self.text_buffer.push_str(&sanitized); + + while let Some(sentence) = self.sentence_splitter.extract_sentence(&mut self.text_buffer) { + plugin_info!(self.logger, sentence_len = sentence.len(), "Generating TTS for sentence"); + self.generate_and_send(&sentence, output)?; + } + + Ok(()) + } + + fn update_params(&mut self, params: Option) -> Result<(), String> { + if let Some(p) = params { + let new_config: SupertonicConfig = + serde_json::from_value(p).map_err(|e| format!("Config parse error: {e}"))?; + + // Hot-update runtime parameters + self.config.lang = new_config.lang; + self.config.total_step = new_config.total_step; + self.config.speed = new_config.speed; + self.config.silence_duration = new_config.silence_duration; + self.config.min_sentence_length = new_config.min_sentence_length; + self.config.emit_telemetry = new_config.emit_telemetry; + self.config.telemetry_preview_chars = new_config.telemetry_preview_chars; + + // Reload voice style if changed + if new_config.voice_style != self.config.voice_style + || new_config.voice_styles_dir != self.config.voice_styles_dir + { + plugin_info!( + self.logger, + old = %self.config.voice_style, + new = %new_config.voice_style, + "Voice style changed, reloading" + ); + self.voice_style = voice::resolve_voice_style( + &new_config.voice_style, + new_config.voice_styles_dir.as_deref(), + &self.model_dir, + &self.logger, + )?; + self.config.voice_style = new_config.voice_style; + self.config.voice_styles_dir = new_config.voice_styles_dir; + } + + // Warn if model_dir changed (requires node recreation) + if new_config.model_dir != self.config.model_dir { + plugin_warn!( + self.logger, + "model_dir changed but requires node recreation to take effect" + ); + } + + self.sentence_splitter = SentenceSplitter::new(self.config.min_sentence_length); + } + + Ok(()) + } + + fn flush(&mut self, output: &OutputSender) -> Result<(), String> { + plugin_info!( + self.logger, + buffer_len = self.text_buffer.len(), + "Flush called on Supertonic TTS" + ); + + if self.text_buffer.is_empty() { + plugin_info!(self.logger, "Text buffer was empty during flush"); + } else { + let text = self.text_buffer.clone(); + plugin_info!(self.logger, len = text.len(), "Flushing remaining text buffer"); + self.generate_and_send(&text, output)?; + self.text_buffer.clear(); + } + + Ok(()) + } + + fn cleanup(&mut self) { + if !self.text_buffer.is_empty() { + plugin_warn!( + self.logger, + len = self.text_buffer.len(), + "Text buffer not empty at cleanup" + ); + } + } +} + +impl SupertonicNode { + fn text_preview(&self, text: &str) -> Option { + let max_chars = self.config.telemetry_preview_chars; + if max_chars == 0 { + return None; + } + + let mut chars = text.chars(); + let prefix: String = chars.by_ref().take(max_chars).collect(); + if chars.next().is_some() { + Some(format!("{prefix}...")) + } else { + Some(prefix) + } + } + + fn generate_and_send(&self, text: &str, output: &OutputSender) -> Result<(), String> { + plugin_debug!(self.logger, text_len = text.len(), "Starting TTS generation"); + + let start = Instant::now(); + if self.config.emit_telemetry { + let _ = output.emit_telemetry( + "tts.start", + &serde_json::json!({ + "text_length": text.len(), + "text_preview": self.text_preview(text), + "lang": self.config.lang, + "voice_style": self.config.voice_style, + "speed": self.config.speed, + "total_step": self.config.total_step, + }), + None, + ); + } + + // Lock the model for inference (TextToSpeech::call takes &mut self) + let (wav, _duration) = { + let mut tts = self.tts_model.lock()?; + tts.call( + text, + &self.config.lang, + &self.voice_style.0, + self.config.total_step, + self.config.speed, + self.config.silence_duration, + ) + .map_err(|e| format!("TTS generation failed: {e}"))? + }; + + if wav.is_empty() { + plugin_warn!(self.logger, "TTS generated empty audio"); + return Err("TTS generated empty audio".to_string()); + } + + let sample_count = wav.len(); + + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + let frame = AudioFrame::new(self.sample_rate as u32, 1, wav); + + output.send("out", &Packet::Audio(frame)).map_err(|e| { + plugin_error!(self.logger, error = %e, "Failed to send audio frame"); + format!("Failed to send audio: {e}") + })?; + + plugin_debug!(self.logger, sample_count = sample_count, "Sent audio frame"); + + if self.config.emit_telemetry { + let latency_ms = start.elapsed().as_millis(); + #[allow(clippy::cast_sign_loss)] + let sr = self.sample_rate as u64; + let duration_ms = if sr > 0 { (sample_count as u64 * 1000 + sr / 2) / sr } else { 0 }; + let _ = output.emit_telemetry( + "tts.done", + &serde_json::json!({ + "text_length": text.len(), + "text_preview": self.text_preview(text), + "lang": self.config.lang, + "voice_style": self.config.voice_style, + "speed": self.config.speed, + "total_step": self.config.total_step, + "audio_samples": sample_count, + "audio_duration_ms": duration_ms, + "latency_ms": latency_ms, + }), + None, + ); + } + + Ok(()) + } + + /// Sanitize text input: keep alphanumeric, punctuation, Korean/CJK, accented Latin, whitespace + fn sanitize_text(text: &str) -> String { + text.chars() + .filter_map(|c| match c { + 'a'..='z' + | 'A'..='Z' + | '0'..='9' + | ' ' + | '.' + | ',' + | '!' + | '?' + | '-' + | '\'' + | '"' + | '\n' + | ':' + | ';' + | 'à'..='ÿ' + | 'À'..='Ÿ' + // CJK unified ideographs + | '\u{4E00}'..='\u{9FFF}' + // Korean Hangul syllables + | '\u{AC00}'..='\u{D7AF}' + // Korean Hangul Jamo + | '\u{1100}'..='\u{11FF}' + // Korean Hangul Compatibility Jamo + | '\u{3130}'..='\u{318F}' + // CJK punctuation + | '。' | ',' | '!' | '?' | '、' | ';' | ':' | '(' | ')' => Some(c), + c if c.is_whitespace() => Some(' '), + _ => None, + }) + .collect::() + .split_whitespace() + .collect::>() + .join(" ") + } +} + +impl Drop for SupertonicNode { + fn drop(&mut self) { + // Arc references will be dropped automatically + } +} diff --git a/plugins/native/supertonic/src/voice.rs b/plugins/native/supertonic/src/voice.rs new file mode 100644 index 00000000..0df26730 --- /dev/null +++ b/plugins/native/supertonic/src/voice.rs @@ -0,0 +1,106 @@ +// SPDX-FileCopyrightText: © 2025 StreamKit Contributors +// +// SPDX-License-Identifier: MPL-2.0 + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Mutex}; +use streamkit_plugin_sdk_native::prelude::*; +use supertonic_vendor::Style; + +/// Wrapper to mark `Style` as thread-safe (it is read-only after creation). +pub struct StyleWrapper(pub Style); + +// SAFETY: Style is read-only after construction and only contains ndarray data. +unsafe impl Send for StyleWrapper {} +unsafe impl Sync for StyleWrapper {} + +/// Global cache of loaded voice styles, keyed by resolved absolute path. +static VOICE_CACHE: std::sync::LazyLock>>> = + std::sync::LazyLock::new(|| Mutex::new(HashMap::new())); + +/// Resolve a voice style name or path to a loaded `Style`. +/// +/// Resolution order: +/// 1. If `voice_style` ends with `.json`, treat as direct path +/// 2. If `voice_styles_dir` is set, look for `{dir}/{voice_style}.json` +/// 3. Fallback to `{model_dir}/voice_styles/{voice_style}.json` +pub fn resolve_voice_style( + voice_style: &str, + voice_styles_dir: Option<&str>, + model_dir: &str, + logger: &Logger, +) -> Result, String> { + let resolved_path = resolve_path(voice_style, voice_styles_dir, model_dir)?; + + let resolved_str = resolved_path.to_string_lossy().to_string(); + + { + let cache = VOICE_CACHE.lock().map_err(|e| format!("Failed to lock voice cache: {e}"))?; + + if let Some(cached) = cache.get(&resolved_str) { + plugin_info!( + logger, + path = %resolved_str, + "CACHE HIT: Reusing cached voice style" + ); + return Ok(cached.clone()); + } + } + + plugin_info!( + logger, + path = %resolved_str, + "CACHE MISS: Loading voice style" + ); + + let style = supertonic_vendor::load_voice_style(std::slice::from_ref(&resolved_str)) + .map_err(|e| format!("Failed to load voice style '{resolved_str}': {e}"))?; + + let wrapper = Arc::new(StyleWrapper(style)); + VOICE_CACHE + .lock() + .map_err(|e| format!("Failed to lock voice cache: {e}"))? + .insert(resolved_str, wrapper.clone()); + + Ok(wrapper) +} + +fn resolve_path( + voice_style: &str, + voice_styles_dir: Option<&str>, + model_dir: &str, +) -> Result { + // 1. Direct .json path + if Path::new(voice_style).extension().is_some_and(|ext| ext.eq_ignore_ascii_case("json")) { + let p = Path::new(voice_style); + if p.exists() { + return p + .canonicalize() + .map_err(|e| format!("Failed to canonicalize voice style path: {e}")); + } + return Err(format!("Voice style file not found: {voice_style}")); + } + + // 2. Named style in voice_styles_dir + if let Some(dir) = voice_styles_dir { + let p = Path::new(dir).join(format!("{voice_style}.json")); + if p.exists() { + return p + .canonicalize() + .map_err(|e| format!("Failed to canonicalize voice style path: {e}")); + } + } + + // 3. Fallback to model_dir/voice_styles/ + let p = Path::new(model_dir).join("voice_styles").join(format!("{voice_style}.json")); + if p.exists() { + return p + .canonicalize() + .map_err(|e| format!("Failed to canonicalize voice style path: {e}")); + } + + Err(format!( + "Voice style '{voice_style}' not found. Searched: voice_styles_dir={voice_styles_dir:?}, model_dir={model_dir}/voice_styles/" + )) +} diff --git a/plugins/native/supertonic/vendor/supertonic/Cargo.toml b/plugins/native/supertonic/vendor/supertonic/Cargo.toml new file mode 100644 index 00000000..f062f878 --- /dev/null +++ b/plugins/native/supertonic/vendor/supertonic/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "supertonic-vendor" +version = "0.1.0" +edition = "2021" +license = "MIT" + +[lib] +name = "supertonic_vendor" + +[dependencies] +ort = { version = "=2.0.0-rc.10", features = ["ndarray"] } +ndarray = { version = "0.16", features = ["rayon"] } +rand = "0.8" +rand_distr = "0.4" +rayon = "1.10" +hound = "3.5" +rustfft = "6.2" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +anyhow = "1.0" +unicode-normalization = "0.1" +regex = "1.10" +libc = "0.2" diff --git a/plugins/native/supertonic/vendor/supertonic/src/helper.rs b/plugins/native/supertonic/vendor/supertonic/src/helper.rs new file mode 100644 index 00000000..b2893dd7 --- /dev/null +++ b/plugins/native/supertonic/vendor/supertonic/src/helper.rs @@ -0,0 +1,832 @@ +// SPDX-FileCopyrightText: © 2025 Supertone, Inc. +// +// SPDX-License-Identifier: MIT +// +// Vendored from https://github.com/supertone-inc/supertonic +// with CLI-specific code removed for library use. + +use anyhow::{bail, Context, Result}; +use hound::{SampleFormat, WavSpec, WavWriter}; +use ndarray::{Array, Array3}; +use ort::session::Session; +use ort::value::Value; +use rand::thread_rng; +use rand_distr::{Distribution, Normal}; +use regex::Regex; +use serde::{Deserialize, Serialize}; +use std::fs::File; +use std::io::BufReader; +use std::path::Path; +use unicode_normalization::UnicodeNormalization; + +// Available languages for multilingual TTS +pub const AVAILABLE_LANGS: &[&str] = &["en", "ko", "es", "pt", "fr"]; + +pub fn is_valid_lang(lang: &str) -> bool { + AVAILABLE_LANGS.contains(&lang) +} + +// ============================================================================ +// Configuration Structures +// ============================================================================ + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Config { + pub ae: AEConfig, + pub ttl: TTLConfig, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AEConfig { + pub sample_rate: i32, + pub base_chunk_size: i32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TTLConfig { + pub chunk_compress_factor: i32, + pub latent_dim: i32, +} + +/// Load configuration from JSON file +pub fn load_cfgs>(onnx_dir: P) -> Result { + let cfg_path = onnx_dir.as_ref().join("tts.json"); + let file = File::open(cfg_path)?; + let reader = BufReader::new(file); + let cfgs: Config = serde_json::from_reader(reader)?; + Ok(cfgs) +} + +// ============================================================================ +// Voice Style Data Structure +// ============================================================================ + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VoiceStyleData { + pub style_ttl: StyleComponent, + pub style_dp: StyleComponent, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StyleComponent { + pub data: Vec>>, + pub dims: Vec, + #[serde(rename = "type")] + pub dtype: String, +} + +// ============================================================================ +// Unicode Text Processor +// ============================================================================ + +pub struct UnicodeProcessor { + indexer: Vec, +} + +impl UnicodeProcessor { + pub fn new>(unicode_indexer_json_path: P) -> Result { + let file = File::open(unicode_indexer_json_path)?; + let reader = BufReader::new(file); + let indexer: Vec = serde_json::from_reader(reader)?; + Ok(UnicodeProcessor { indexer }) + } + + pub fn call( + &self, + text_list: &[String], + lang_list: &[String], + ) -> Result<(Vec>, Array3)> { + let mut processed_texts: Vec = Vec::new(); + for (text, lang) in text_list.iter().zip(lang_list.iter()) { + processed_texts.push(preprocess_text(text, lang)?); + } + + let text_ids_lengths: Vec = + processed_texts.iter().map(|t| t.chars().count()).collect(); + + let max_len = *text_ids_lengths.iter().max().unwrap_or(&0); + + let mut text_ids = Vec::new(); + for text in &processed_texts { + let mut row = vec![0i64; max_len]; + let unicode_vals = text_to_unicode_values(text); + for (j, &val) in unicode_vals.iter().enumerate() { + if val < self.indexer.len() { + row[j] = self.indexer[val]; + } else { + row[j] = -1; + } + } + text_ids.push(row); + } + + let text_mask = get_text_mask(&text_ids_lengths); + + Ok((text_ids, text_mask)) + } +} + +pub fn preprocess_text(text: &str, lang: &str) -> Result { + let mut text: String = text.nfkd().collect(); + + // Remove emojis (wide Unicode range) + let emoji_pattern = Regex::new( + r"[\x{1F600}-\x{1F64F}\x{1F300}-\x{1F5FF}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F800}-\x{1F8FF}\x{1F900}-\x{1F9FF}\x{1FA00}-\x{1FA6F}\x{1FA70}-\x{1FAFF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{1F1E6}-\x{1F1FF}]+" + ).unwrap(); + text = emoji_pattern.replace_all(&text, "").to_string(); + + // Replace various dashes and symbols + let replacements = [ + ("\u{2013}", "-"), // en dash + ("\u{2011}", "-"), // non-breaking hyphen + ("\u{2014}", "-"), // em dash + ("_", " "), + ("\u{201C}", "\""), // left double quote + ("\u{201D}", "\""), // right double quote + ("\u{2018}", "'"), // left single quote + ("\u{2019}", "'"), // right single quote + ("\u{00B4}", "'"), // acute accent + ("`", "'"), + ("[", " "), + ("]", " "), + ("|", " "), + ("/", " "), + ("#", " "), + ("\u{2192}", " "), // right arrow + ("\u{2190}", " "), // left arrow + ]; + + for (from, to) in &replacements { + text = text.replace(from, to); + } + + // Remove special symbols + let special_symbols = ["\u{2665}", "\u{2606}", "\u{2661}", "\u{00A9}", "\\"]; + for symbol in &special_symbols { + text = text.replace(symbol, ""); + } + + // Replace known expressions + let expr_replacements = [ + ("@", " at "), + ("e.g.,", "for example, "), + ("i.e.,", "that is, "), + ]; + + for (from, to) in &expr_replacements { + text = text.replace(from, to); + } + + // Fix spacing around punctuation + text = Regex::new(r" ,") + .unwrap() + .replace_all(&text, ",") + .to_string(); + text = Regex::new(r" \.") + .unwrap() + .replace_all(&text, ".") + .to_string(); + text = Regex::new(r" !") + .unwrap() + .replace_all(&text, "!") + .to_string(); + text = Regex::new(r" \?") + .unwrap() + .replace_all(&text, "?") + .to_string(); + text = Regex::new(r" ;") + .unwrap() + .replace_all(&text, ";") + .to_string(); + text = Regex::new(r" :") + .unwrap() + .replace_all(&text, ":") + .to_string(); + text = Regex::new(r" '") + .unwrap() + .replace_all(&text, "'") + .to_string(); + + // Remove duplicate quotes + while text.contains("\"\"") { + text = text.replace("\"\"", "\""); + } + while text.contains("''") { + text = text.replace("''", "'"); + } + while text.contains("``") { + text = text.replace("``", "`"); + } + + // Remove extra spaces + text = Regex::new(r"\s+") + .unwrap() + .replace_all(&text, " ") + .to_string(); + text = text.trim().to_string(); + + // If text doesn't end with punctuation, add a period + if !text.is_empty() { + let ends_with_punct = Regex::new( + r#"[.!?;:,'"\u{201C}\u{201D}\u{2018}\u{2019})\]}\u{2026}\u{3002}\u{300D}\u{300F}\u{3011}\u{3009}\u{300B}\u{203A}\u{00BB}]$"#, + ) + .unwrap(); + if !ends_with_punct.is_match(&text) { + text.push('.'); + } + } + + // Validate language + if !is_valid_lang(lang) { + bail!( + "Invalid language: {}. Available: {:?}", + lang, + AVAILABLE_LANGS + ); + } + + // Wrap text with language tags + text = format!("<{lang}>{text}"); + + Ok(text) +} + +pub fn text_to_unicode_values(text: &str) -> Vec { + text.chars().map(|c| c as usize).collect() +} + +pub fn length_to_mask(lengths: &[usize], max_len: Option) -> Array3 { + let bsz = lengths.len(); + let max_len = max_len.unwrap_or_else(|| *lengths.iter().max().unwrap_or(&0)); + + let mut mask = Array3::::zeros((bsz, 1, max_len)); + for (i, &len) in lengths.iter().enumerate() { + for j in 0..len.min(max_len) { + mask[[i, 0, j]] = 1.0; + } + } + mask +} + +pub fn get_text_mask(text_ids_lengths: &[usize]) -> Array3 { + let max_len = *text_ids_lengths.iter().max().unwrap_or(&0); + length_to_mask(text_ids_lengths, Some(max_len)) +} + +/// Sample noisy latent from normal distribution and apply mask +pub fn sample_noisy_latent( + duration: &[f32], + sample_rate: i32, + base_chunk_size: i32, + chunk_compress: i32, + latent_dim: i32, +) -> (Array3, Array3) { + let bsz = duration.len(); + let max_dur = duration.iter().fold(0.0f32, |a, &b| a.max(b)); + + let wav_len_max = (max_dur * sample_rate as f32) as usize; + let wav_lengths: Vec = duration + .iter() + .map(|&d| (d * sample_rate as f32) as usize) + .collect(); + + let chunk_size = (base_chunk_size * chunk_compress) as usize; + let latent_len = (wav_len_max + chunk_size - 1) / chunk_size; + let latent_dim_val = (latent_dim * chunk_compress) as usize; + + let mut noisy_latent = Array3::::zeros((bsz, latent_dim_val, latent_len)); + + let normal = Normal::new(0.0, 1.0).unwrap(); + let mut rng = thread_rng(); + + for b in 0..bsz { + for d in 0..latent_dim_val { + for t in 0..latent_len { + noisy_latent[[b, d, t]] = normal.sample(&mut rng); + } + } + } + + let latent_lengths: Vec = wav_lengths + .iter() + .map(|&len| (len + chunk_size - 1) / chunk_size) + .collect(); + + let latent_mask = length_to_mask(&latent_lengths, Some(latent_len)); + + // Apply mask + for b in 0..bsz { + for d in 0..latent_dim_val { + for t in 0..latent_len { + noisy_latent[[b, d, t]] *= latent_mask[[b, 0, t]]; + } + } + } + + (noisy_latent, latent_mask) +} + +// ============================================================================ +// WAV File I/O +// ============================================================================ + +#[allow(dead_code)] +pub fn write_wav_file>( + filename: P, + audio_data: &[f32], + sample_rate: i32, +) -> Result<()> { + let spec = WavSpec { + channels: 1, + sample_rate: sample_rate as u32, + bits_per_sample: 16, + sample_format: SampleFormat::Int, + }; + + let mut writer = WavWriter::create(filename, spec)?; + + for &sample in audio_data { + let clamped = sample.max(-1.0).min(1.0); + #[allow(clippy::cast_possible_truncation)] + let val = (clamped * 32767.0) as i16; + writer.write_sample(val)?; + } + + writer.finalize()?; + Ok(()) +} + +// ============================================================================ +// Text Chunking +// ============================================================================ + +const MAX_CHUNK_LENGTH: usize = 300; + +const ABBREVIATIONS: &[&str] = &[ + "Dr.", "Mr.", "Mrs.", "Ms.", "Prof.", "Sr.", "Jr.", "St.", "Ave.", "Rd.", "Blvd.", "Dept.", + "Inc.", "Ltd.", "Co.", "Corp.", "etc.", "vs.", "i.e.", "e.g.", "Ph.D.", +]; + +pub fn chunk_text(text: &str, max_len: Option) -> Vec { + let max_len = max_len.unwrap_or(MAX_CHUNK_LENGTH); + let text = text.trim(); + + if text.is_empty() { + return vec![String::new()]; + } + + // Split by paragraphs + let para_re = Regex::new(r"\n\s*\n").unwrap(); + let paragraphs: Vec<&str> = para_re.split(text).collect(); + let mut chunks = Vec::new(); + + for para in paragraphs { + let para = para.trim(); + if para.is_empty() { + continue; + } + + if para.len() <= max_len { + chunks.push(para.to_string()); + continue; + } + + // Split by sentences + let sentences = split_sentences(para); + let mut current = String::new(); + let mut current_len = 0; + + for sentence in sentences { + let sentence = sentence.trim(); + if sentence.is_empty() { + continue; + } + + let sentence_len = sentence.len(); + if sentence_len > max_len { + if !current.is_empty() { + chunks.push(current.trim().to_string()); + current.clear(); + current_len = 0; + } + + // Try splitting by comma + let parts: Vec<&str> = sentence.split(',').collect(); + for part in parts { + let part = part.trim(); + if part.is_empty() { + continue; + } + + let part_len = part.len(); + if part_len > max_len { + // Split by space as last resort + let words: Vec<&str> = part.split_whitespace().collect(); + let mut word_chunk = String::new(); + let mut word_chunk_len = 0; + + for word in words { + let word_len = word.len(); + if word_chunk_len + word_len + 1 > max_len && !word_chunk.is_empty() { + chunks.push(word_chunk.trim().to_string()); + word_chunk.clear(); + word_chunk_len = 0; + } + + if !word_chunk.is_empty() { + word_chunk.push(' '); + word_chunk_len += 1; + } + word_chunk.push_str(word); + word_chunk_len += word_len; + } + + if !word_chunk.is_empty() { + chunks.push(word_chunk.trim().to_string()); + } + } else { + if current_len + part_len + 1 > max_len && !current.is_empty() { + chunks.push(current.trim().to_string()); + current.clear(); + current_len = 0; + } + + if !current.is_empty() { + current.push_str(", "); + current_len += 2; + } + current.push_str(part); + current_len += part_len; + } + } + continue; + } + + if current_len + sentence_len + 1 > max_len && !current.is_empty() { + chunks.push(current.trim().to_string()); + current.clear(); + current_len = 0; + } + + if !current.is_empty() { + current.push(' '); + current_len += 1; + } + current.push_str(sentence); + current_len += sentence_len; + } + + if !current.is_empty() { + chunks.push(current.trim().to_string()); + } + } + + if chunks.is_empty() { + vec![String::new()] + } else { + chunks + } +} + +fn split_sentences(text: &str) -> Vec { + let re = Regex::new(r"([.!?])\s+").unwrap(); + + let matches: Vec<_> = re.find_iter(text).collect(); + if matches.is_empty() { + return vec![text.to_string()]; + } + + let mut sentences = Vec::new(); + let mut last_end = 0; + + for m in matches { + let before_punc = &text[last_end..m.start()]; + + // Check if this ends with an abbreviation + let mut is_abbrev = false; + for abbrev in ABBREVIATIONS { + let combined = format!( + "{}{}", + before_punc.trim(), + &text[m.start()..m.start() + 1] + ); + if combined.ends_with(abbrev) { + is_abbrev = true; + break; + } + } + + if !is_abbrev { + sentences.push(text[last_end..m.end()].to_string()); + last_end = m.end(); + } + } + + if last_end < text.len() { + sentences.push(text[last_end..].to_string()); + } + + if sentences.is_empty() { + vec![text.to_string()] + } else { + sentences + } +} + +// ============================================================================ +// ONNX Runtime Integration +// ============================================================================ + +pub struct Style { + pub ttl: Array3, + pub dp: Array3, +} + +pub struct TextToSpeech { + cfgs: Config, + text_processor: UnicodeProcessor, + dp_ort: Session, + text_enc_ort: Session, + vector_est_ort: Session, + vocoder_ort: Session, + pub sample_rate: i32, +} + +impl TextToSpeech { + pub fn new( + cfgs: Config, + text_processor: UnicodeProcessor, + dp_ort: Session, + text_enc_ort: Session, + vector_est_ort: Session, + vocoder_ort: Session, + ) -> Self { + let sample_rate = cfgs.ae.sample_rate; + TextToSpeech { + cfgs, + text_processor, + dp_ort, + text_enc_ort, + vector_est_ort, + vocoder_ort, + sample_rate, + } + } + + fn _infer( + &mut self, + text_list: &[String], + lang_list: &[String], + style: &Style, + total_step: usize, + speed: f32, + ) -> Result<(Vec, Vec)> { + let bsz = text_list.len(); + + // Process text + let (text_ids, text_mask) = self.text_processor.call(text_list, lang_list)?; + + let text_ids_array = { + let text_ids_shape = (bsz, text_ids[0].len()); + let mut flat = Vec::new(); + for row in &text_ids { + flat.extend_from_slice(row); + } + Array::from_shape_vec(text_ids_shape, flat)? + }; + + let text_ids_value = Value::from_array(text_ids_array)?; + let text_mask_value = Value::from_array(text_mask.clone())?; + let style_dp_value = Value::from_array(style.dp.clone())?; + + // Predict duration + let dp_outputs = self.dp_ort.run(ort::inputs! { + "text_ids" => &text_ids_value, + "style_dp" => &style_dp_value, + "text_mask" => &text_mask_value + })?; + + let (_, duration_data) = dp_outputs["duration"].try_extract_tensor::()?; + let mut duration: Vec = duration_data.to_vec(); + + // Apply speed factor to duration + for dur in duration.iter_mut() { + *dur /= speed; + } + + // Encode text + let style_ttl_value = Value::from_array(style.ttl.clone())?; + let text_enc_outputs = self.text_enc_ort.run(ort::inputs! { + "text_ids" => &text_ids_value, + "style_ttl" => &style_ttl_value, + "text_mask" => &text_mask_value + })?; + + let (text_emb_shape, text_emb_data) = + text_enc_outputs["text_emb"].try_extract_tensor::()?; + let text_emb = Array3::from_shape_vec( + ( + text_emb_shape[0] as usize, + text_emb_shape[1] as usize, + text_emb_shape[2] as usize, + ), + text_emb_data.to_vec(), + )?; + + // Sample noisy latent + let (mut xt, latent_mask) = sample_noisy_latent( + &duration, + self.sample_rate, + self.cfgs.ae.base_chunk_size, + self.cfgs.ttl.chunk_compress_factor, + self.cfgs.ttl.latent_dim, + ); + + // Prepare constant arrays + let total_step_array = Array::from_elem(bsz, total_step as f32); + + // Denoising loop + for step in 0..total_step { + let current_step_array = Array::from_elem(bsz, step as f32); + + let xt_value = Value::from_array(xt.clone())?; + let text_emb_value = Value::from_array(text_emb.clone())?; + let latent_mask_value = Value::from_array(latent_mask.clone())?; + let text_mask_value2 = Value::from_array(text_mask.clone())?; + let current_step_value = Value::from_array(current_step_array)?; + let total_step_value = Value::from_array(total_step_array.clone())?; + + let vector_est_outputs = self.vector_est_ort.run(ort::inputs! { + "noisy_latent" => &xt_value, + "text_emb" => &text_emb_value, + "style_ttl" => &style_ttl_value, + "latent_mask" => &latent_mask_value, + "text_mask" => &text_mask_value2, + "current_step" => ¤t_step_value, + "total_step" => &total_step_value + })?; + + let (denoised_shape, denoised_data) = + vector_est_outputs["denoised_latent"].try_extract_tensor::()?; + xt = Array3::from_shape_vec( + ( + denoised_shape[0] as usize, + denoised_shape[1] as usize, + denoised_shape[2] as usize, + ), + denoised_data.to_vec(), + )?; + } + + // Generate waveform + let final_latent_value = Value::from_array(xt)?; + let vocoder_outputs = self.vocoder_ort.run(ort::inputs! { + "latent" => &final_latent_value + })?; + + let (_, wav_data) = vocoder_outputs["wav_tts"].try_extract_tensor::()?; + let wav: Vec = wav_data.to_vec(); + + Ok((wav, duration)) + } + + pub fn call( + &mut self, + text: &str, + lang: &str, + style: &Style, + total_step: usize, + speed: f32, + silence_duration: f32, + ) -> Result<(Vec, f32)> { + let max_len = if lang == "ko" { 120 } else { 300 }; + let chunks = chunk_text(text, Some(max_len)); + + let mut wav_cat: Vec = Vec::new(); + let mut dur_cat: f32 = 0.0; + + for (i, chunk) in chunks.iter().enumerate() { + let (wav, duration) = + self._infer(&[chunk.clone()], &[lang.to_string()], style, total_step, speed)?; + + let dur = duration[0]; + #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] + let wav_len = (self.sample_rate as f32 * dur) as usize; + let wav_chunk = &wav[..wav_len.min(wav.len())]; + + if i == 0 { + wav_cat.extend_from_slice(wav_chunk); + dur_cat = dur; + } else { + #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] + let silence_len = (silence_duration * self.sample_rate as f32) as usize; + let silence = vec![0.0f32; silence_len]; + + wav_cat.extend_from_slice(&silence); + wav_cat.extend_from_slice(wav_chunk); + dur_cat += silence_duration + dur; + } + } + + Ok((wav_cat, dur_cat)) + } +} + +// ============================================================================ +// Component Loading Functions +// ============================================================================ + +/// Load voice style from JSON files +pub fn load_voice_style(voice_style_paths: &[String]) -> Result