diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 00cb14b..6eb7975 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,3 +25,15 @@ jobs: - uses: actions/checkout@v4 - run: cargo test - run: cargo test --features ffi + + wasm: + name: WASM + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 22 + - uses: jetli/wasm-pack-action@v0.4.0 + - run: rustup target add wasm32-unknown-unknown + - run: npm run wasm:ci diff --git a/.github/workflows/npm-publish.yml b/.github/workflows/npm-publish.yml new file mode 100644 index 0000000..ae4ebcc --- /dev/null +++ b/.github/workflows/npm-publish.yml @@ -0,0 +1,27 @@ +name: Publish WASM Package + +on: + push: + tags: + - "v*" + +jobs: + publish: + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + registry-url: "https://registry.npmjs.org" + + - uses: jetli/wasm-pack-action@v0.4.0 + + - run: rustup target add wasm32-unknown-unknown + - run: npm run wasm:ci + - run: npm run wasm:publish + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} diff --git a/.gitignore b/.gitignore index ea8c4bf..30fa6c4 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ /target +/pkg +/pkg-node +/pkg-web diff --git a/Cargo.lock b/Cargo.lock index 9bf9c1a..dcdc0ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -35,12 +35,28 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -321,6 +337,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "rusty-fork" version = "0.3.1" @@ -409,8 +431,10 @@ dependencies = [ name = "text-processing-rs" version = "0.1.0" dependencies = [ + "console_error_panic_hook", "lazy_static", "proptest", + "wasm-bindgen", ] [[package]] @@ -458,6 +482,51 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasm-bindgen" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +dependencies = [ + "unicode-ident", +] + [[package]] name = "wasm-encoder" version = "0.244.0" diff --git a/Cargo.toml b/Cargo.toml index f918537..cde6772 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,8 @@ crate-type = ["lib", "staticlib", "cdylib"] [dependencies] lazy_static = "1" +wasm-bindgen = { version = "0.2", optional = true } +console_error_panic_hook = { version = "0.1", optional = true } [dev-dependencies] proptest = "1" @@ -20,3 +22,4 @@ proptest = "1" [features] default = [] ffi = [] # Enable C FFI bindings +wasm = ["dep:wasm-bindgen", "dep:console_error_panic_hook"] diff --git a/README.md b/README.md index 130372e..dfd23e0 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,62 @@ let result = tn_normalize("123"); assert_eq!(result, "one hundred twenty three"); ``` +### JavaScript (WASM) + +Build wasm artifacts: + +```bash +npm run wasm:build:node +npm run wasm:build:web +``` + +Node usage: + +```javascript +import * as wasm from "./pkg-node/text_processing_rs.js"; + +console.log(wasm.normalize("two hundred")); // "200" +console.log(wasm.tnNormalize("$5.50")); // "five dollars and fifty cents" + +wasm.addRule("gee pee tee", "GPT"); +console.log(wasm.normalize("gee pee tee")); // "GPT" +``` + +The generated npm package name is `@fluidinference/text-processing-rs`. + +Web project usage (Vite / Next.js / webpack): + +```bash +npm install @fluidinference/text-processing-rs +``` + +```javascript +import init, * as wasm from "@fluidinference/text-processing-rs"; + +async function run() { + // Loads and initializes the .wasm module (required once at startup) + await init(); + + const itn = wasm.normalize("two hundred"); + const tn = wasm.tnNormalize("$5.50"); + + console.log(itn); // "200" + console.log(tn); // "five dollars and fifty cents" + + wasm.addRule("gee pee tee", "GPT"); + console.log(wasm.normalize("gee pee tee")); // "GPT" +} + +run(); +``` + +If your framework supports top-level `await`, you can initialize at module load time: + +```javascript +import init, * as wasm from "@fluidinference/text-processing-rs"; +await init(); +``` + Sentence-level normalization scans for normalizable spans within a larger sentence: ```rust @@ -163,6 +219,19 @@ cargo build cargo test ``` +### WASM + JavaScript + +```bash +# Build + smoke test (Node) + build browser artifact +npm run wasm:ci + +# Create a tarball from the browser package +npm run wasm:pack + +# Publish browser package to npm (requires npm auth) +npm run wasm:publish +``` + ### CLI Tools ```bash diff --git a/package.json b/package.json new file mode 100644 index 0000000..4ac5a1a --- /dev/null +++ b/package.json @@ -0,0 +1,22 @@ +{ + "name": "@fluidinference/text-processing-rs", + "version": "0.1.0", + "description": "Inverse Text Normalization (ITN) — convert spoken-form ASR output to written form", + "type": "module", + "main": "pkg-web/text_processing_rs.js", + "types": "pkg-web/text_processing_rs.d.ts", + "files": [ + "pkg-web/text_processing_rs.js", + "pkg-web/text_processing_rs.d.ts", + "pkg-web/text_processing_rs_bg.wasm", + "pkg-web/text_processing_rs_bg.wasm.d.ts" + ], + "scripts": { + "wasm:build:node": "wasm-pack build --release --target nodejs --features wasm && mkdir -p pkg-node && cp -f pkg/* pkg-node/ && node scripts/set-wasm-package-name.mjs pkg-node", + "wasm:build:web": "wasm-pack build --release --target web --features wasm && mkdir -p pkg-web && cp -f pkg/* pkg-web/ && node scripts/set-wasm-package-name.mjs pkg-web", + "wasm:test:node": "node wasm-tests/node-smoke.mjs", + "wasm:ci": "npm run wasm:build:node && npm run wasm:test:node && npm run wasm:build:web", + "wasm:pack": "npm pack ./pkg-web", + "wasm:publish": "npm publish ./pkg-web --access public" + } +} diff --git a/scripts/set-wasm-package-name.mjs b/scripts/set-wasm-package-name.mjs new file mode 100644 index 0000000..fa1b6ab --- /dev/null +++ b/scripts/set-wasm-package-name.mjs @@ -0,0 +1,13 @@ +import fs from 'node:fs'; +import path from 'node:path'; + +const pkgDir = process.argv[2]; +if (!pkgDir) { + throw new Error('Usage: node scripts/set-wasm-package-name.mjs '); +} + +const packageJsonPath = path.join(pkgDir, 'package.json'); +const pkg = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8')); +pkg.name = '@fluidinference/text-processing-rs'; +pkg.keywords = ['asr', 'speech', 'normalization', 'nlp', 'itn', 'tts', 'wasm']; +fs.writeFileSync(packageJsonPath, `${JSON.stringify(pkg, null, 2)}\n`); diff --git a/src/lib.rs b/src/lib.rs index 493997a..5ad114d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,6 +22,8 @@ pub mod tn; #[cfg(feature = "ffi")] pub mod ffi; +#[cfg(all(target_arch = "wasm32", feature = "wasm"))] +pub mod wasm; use itn::en::{ cardinal, date, decimal, electronic, measure, money, ordinal, punctuation, telephone, time, diff --git a/src/wasm.rs b/src/wasm.rs new file mode 100644 index 0000000..0ccfd04 --- /dev/null +++ b/src/wasm.rs @@ -0,0 +1,90 @@ +//! WebAssembly exports for JavaScript interop. + +use wasm_bindgen::prelude::*; + +use crate::{ + custom_rules, normalize, normalize_sentence, normalize_sentence_with_max_span, + normalize_with_lang, tn_normalize, tn_normalize_lang, tn_normalize_sentence, + tn_normalize_sentence_lang, tn_normalize_sentence_with_max_span, + tn_normalize_sentence_with_max_span_lang, +}; + +/// Initialize panic hook for better error messages in browser devtools. +#[wasm_bindgen] +pub fn set_panic_hook() { + console_error_panic_hook::set_once(); +} + +#[wasm_bindgen(js_name = normalize)] +pub fn normalize_js(input: &str) -> String { + normalize(input) +} + +#[wasm_bindgen(js_name = normalizeWithLang)] +pub fn normalize_with_lang_js(input: &str, lang: &str) -> String { + normalize_with_lang(input, lang) +} + +#[wasm_bindgen(js_name = normalizeSentence)] +pub fn normalize_sentence_js(input: &str) -> String { + normalize_sentence(input) +} + +#[wasm_bindgen(js_name = normalizeSentenceWithMaxSpan)] +pub fn normalize_sentence_with_max_span_js(input: &str, max_span_tokens: u32) -> String { + normalize_sentence_with_max_span(input, max_span_tokens as usize) +} + +#[wasm_bindgen(js_name = tnNormalize)] +pub fn tn_normalize_js(input: &str) -> String { + tn_normalize(input) +} + +#[wasm_bindgen(js_name = tnNormalizeLang)] +pub fn tn_normalize_lang_js(input: &str, lang: &str) -> String { + tn_normalize_lang(input, lang) +} + +#[wasm_bindgen(js_name = tnNormalizeSentence)] +pub fn tn_normalize_sentence_js(input: &str) -> String { + tn_normalize_sentence(input) +} + +#[wasm_bindgen(js_name = tnNormalizeSentenceLang)] +pub fn tn_normalize_sentence_lang_js(input: &str, lang: &str) -> String { + tn_normalize_sentence_lang(input, lang) +} + +#[wasm_bindgen(js_name = tnNormalizeSentenceWithMaxSpan)] +pub fn tn_normalize_sentence_with_max_span_js(input: &str, max_span_tokens: u32) -> String { + tn_normalize_sentence_with_max_span(input, max_span_tokens as usize) +} + +#[wasm_bindgen(js_name = tnNormalizeSentenceWithMaxSpanLang)] +pub fn tn_normalize_sentence_with_max_span_lang_js( + input: &str, + lang: &str, + max_span_tokens: u32, +) -> String { + tn_normalize_sentence_with_max_span_lang(input, lang, max_span_tokens as usize) +} + +#[wasm_bindgen(js_name = addRule)] +pub fn add_rule_js(spoken: &str, written: &str) { + custom_rules::add_rule(spoken, written); +} + +#[wasm_bindgen(js_name = removeRule)] +pub fn remove_rule_js(spoken: &str) -> bool { + custom_rules::remove_rule(spoken) +} + +#[wasm_bindgen(js_name = clearRules)] +pub fn clear_rules_js() { + custom_rules::clear_rules(); +} + +#[wasm_bindgen(js_name = ruleCount)] +pub fn rule_count_js() -> u32 { + custom_rules::rule_count() as u32 +} diff --git a/wasm-tests/node-smoke.mjs b/wasm-tests/node-smoke.mjs new file mode 100644 index 0000000..d59a2dc --- /dev/null +++ b/wasm-tests/node-smoke.mjs @@ -0,0 +1,42 @@ +import * as wasm from '../pkg-node/text_processing_rs.js'; + +function assertEqual(actual, expected, message) { + if (actual !== expected) { + throw new Error(`${message}: expected "${expected}", got "${actual}"`); + } +} + +function assertTrue(condition, message) { + if (!condition) { + throw new Error(message); + } +} + +assertEqual(wasm.normalize('two hundred'), '200', 'normalize should convert spoken numbers'); +assertEqual( + wasm.normalizeWithLang('two hundred', 'en'), + '200', + 'normalizeWithLang should work' +); +assertEqual( + wasm.normalizeSentence('I have twenty one apples'), + 'I have 21 apples', + 'normalizeSentence should convert spans' +); +assertEqual(wasm.tnNormalize('$5.50'), 'five dollars and fifty cents', 'tnNormalize should work'); +assertEqual( + wasm.tnNormalizeSentence('I paid $5 for 23 items'), + 'I paid five dollars for twenty three items', + 'tnNormalizeSentence should convert spans' +); + +wasm.clearRules(); +assertEqual(wasm.ruleCount(), 0, 'ruleCount starts at 0'); +wasm.addRule('gee pee tee', 'GPT'); +assertEqual(wasm.ruleCount(), 1, 'ruleCount increments'); +assertEqual(wasm.normalize('gee pee tee'), 'GPT', 'custom rules should apply'); +assertTrue(wasm.removeRule('gee pee tee'), 'removeRule should return true when found'); +assertEqual(wasm.ruleCount(), 0, 'rule removed'); +assertTrue(!wasm.removeRule('gee pee tee'), 'removeRule should return false when missing'); + +console.log('WASM node smoke test passed');