Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,15 @@ jobs:
- uses: actions/checkout@v4
- run: cargo test
- run: cargo test --features ffi

wasm:
name: WASM
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 22
- uses: jetli/wasm-pack-action@v0.4.0
- run: rustup target add wasm32-unknown-unknown
- run: npm run wasm:ci
27 changes: 27 additions & 0 deletions .github/workflows/npm-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Publish WASM Package

on:
push:
tags:
- "v*"

jobs:
publish:
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v4

- uses: actions/setup-node@v4
with:
node-version: 22
registry-url: "https://registry.npmjs.org"

- uses: jetli/wasm-pack-action@v0.4.0

- run: rustup target add wasm32-unknown-unknown
- run: npm run wasm:ci
- run: npm run wasm:publish
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
/target
/pkg
/pkg-node
/pkg-web
69 changes: 69 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@ crate-type = ["lib", "staticlib", "cdylib"]

[dependencies]
lazy_static = "1"
wasm-bindgen = { version = "0.2", optional = true }
console_error_panic_hook = { version = "0.1", optional = true }

[dev-dependencies]
proptest = "1"

[features]
default = []
ffi = [] # Enable C FFI bindings
wasm = ["dep:wasm-bindgen", "dep:console_error_panic_hook"]
69 changes: 69 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,62 @@ let result = tn_normalize("123");
assert_eq!(result, "one hundred twenty three");
```

### JavaScript (WASM)

Build wasm artifacts:

```bash
npm run wasm:build:node
npm run wasm:build:web
```

Node usage:

```javascript
import * as wasm from "./pkg-node/text_processing_rs.js";

console.log(wasm.normalize("two hundred")); // "200"
console.log(wasm.tnNormalize("$5.50")); // "five dollars and fifty cents"

wasm.addRule("gee pee tee", "GPT");
console.log(wasm.normalize("gee pee tee")); // "GPT"
```

The generated npm package name is `@fluidinference/text-processing-rs`.

Web project usage (Vite / Next.js / webpack):

```bash
npm install @fluidinference/text-processing-rs
```

```javascript
import init, * as wasm from "@fluidinference/text-processing-rs";

async function run() {
// Loads and initializes the .wasm module (required once at startup)
await init();

const itn = wasm.normalize("two hundred");
const tn = wasm.tnNormalize("$5.50");

console.log(itn); // "200"
console.log(tn); // "five dollars and fifty cents"

wasm.addRule("gee pee tee", "GPT");
console.log(wasm.normalize("gee pee tee")); // "GPT"
}

run();
```

If your framework supports top-level `await`, you can initialize at module load time:

```javascript
import init, * as wasm from "@fluidinference/text-processing-rs";
await init();
```

Sentence-level normalization scans for normalizable spans within a larger sentence:

```rust
Expand Down Expand Up @@ -163,6 +219,19 @@ cargo build
cargo test
```

### WASM + JavaScript

```bash
# Build + smoke test (Node) + build browser artifact
npm run wasm:ci

# Create a tarball from the browser package
npm run wasm:pack

# Publish browser package to npm (requires npm auth)
npm run wasm:publish
```

### CLI Tools

```bash
Expand Down
22 changes: 22 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"name": "@fluidinference/text-processing-rs",
"version": "0.1.0",
"description": "Inverse Text Normalization (ITN) — convert spoken-form ASR output to written form",
"type": "module",
"main": "pkg-web/text_processing_rs.js",
"types": "pkg-web/text_processing_rs.d.ts",
"files": [
"pkg-web/text_processing_rs.js",
"pkg-web/text_processing_rs.d.ts",
"pkg-web/text_processing_rs_bg.wasm",
"pkg-web/text_processing_rs_bg.wasm.d.ts"
],
"scripts": {
"wasm:build:node": "wasm-pack build --release --target nodejs --features wasm && mkdir -p pkg-node && cp -f pkg/* pkg-node/ && node scripts/set-wasm-package-name.mjs pkg-node",
"wasm:build:web": "wasm-pack build --release --target web --features wasm && mkdir -p pkg-web && cp -f pkg/* pkg-web/ && node scripts/set-wasm-package-name.mjs pkg-web",
"wasm:test:node": "node wasm-tests/node-smoke.mjs",
"wasm:ci": "npm run wasm:build:node && npm run wasm:test:node && npm run wasm:build:web",
"wasm:pack": "npm pack ./pkg-web",
"wasm:publish": "npm publish ./pkg-web --access public"
}
}
13 changes: 13 additions & 0 deletions scripts/set-wasm-package-name.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import fs from 'node:fs';
import path from 'node:path';

const pkgDir = process.argv[2];
if (!pkgDir) {
throw new Error('Usage: node scripts/set-wasm-package-name.mjs <pkg-dir>');
}

const packageJsonPath = path.join(pkgDir, 'package.json');
const pkg = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8'));
pkg.name = '@fluidinference/text-processing-rs';
pkg.keywords = ['asr', 'speech', 'normalization', 'nlp', 'itn', 'tts', 'wasm'];
fs.writeFileSync(packageJsonPath, `${JSON.stringify(pkg, null, 2)}\n`);
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ pub mod tn;

#[cfg(feature = "ffi")]
pub mod ffi;
#[cfg(all(target_arch = "wasm32", feature = "wasm"))]
pub mod wasm;

use itn::en::{
cardinal, date, decimal, electronic, measure, money, ordinal, punctuation, telephone, time,
Expand Down
90 changes: 90 additions & 0 deletions src/wasm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
//! WebAssembly exports for JavaScript interop.

use wasm_bindgen::prelude::*;

use crate::{
custom_rules, normalize, normalize_sentence, normalize_sentence_with_max_span,
normalize_with_lang, tn_normalize, tn_normalize_lang, tn_normalize_sentence,
tn_normalize_sentence_lang, tn_normalize_sentence_with_max_span,
tn_normalize_sentence_with_max_span_lang,
};

/// Initialize panic hook for better error messages in browser devtools.
#[wasm_bindgen]
pub fn set_panic_hook() {
console_error_panic_hook::set_once();
}

#[wasm_bindgen(js_name = normalize)]
pub fn normalize_js(input: &str) -> String {
normalize(input)
}

#[wasm_bindgen(js_name = normalizeWithLang)]
pub fn normalize_with_lang_js(input: &str, lang: &str) -> String {
normalize_with_lang(input, lang)
}

#[wasm_bindgen(js_name = normalizeSentence)]
pub fn normalize_sentence_js(input: &str) -> String {
normalize_sentence(input)
}

#[wasm_bindgen(js_name = normalizeSentenceWithMaxSpan)]
pub fn normalize_sentence_with_max_span_js(input: &str, max_span_tokens: u32) -> String {
normalize_sentence_with_max_span(input, max_span_tokens as usize)
}

#[wasm_bindgen(js_name = tnNormalize)]
pub fn tn_normalize_js(input: &str) -> String {
tn_normalize(input)
}

#[wasm_bindgen(js_name = tnNormalizeLang)]
pub fn tn_normalize_lang_js(input: &str, lang: &str) -> String {
tn_normalize_lang(input, lang)
}

#[wasm_bindgen(js_name = tnNormalizeSentence)]
pub fn tn_normalize_sentence_js(input: &str) -> String {
tn_normalize_sentence(input)
}

#[wasm_bindgen(js_name = tnNormalizeSentenceLang)]
pub fn tn_normalize_sentence_lang_js(input: &str, lang: &str) -> String {
tn_normalize_sentence_lang(input, lang)
}

#[wasm_bindgen(js_name = tnNormalizeSentenceWithMaxSpan)]
pub fn tn_normalize_sentence_with_max_span_js(input: &str, max_span_tokens: u32) -> String {
tn_normalize_sentence_with_max_span(input, max_span_tokens as usize)
}

#[wasm_bindgen(js_name = tnNormalizeSentenceWithMaxSpanLang)]
pub fn tn_normalize_sentence_with_max_span_lang_js(
input: &str,
lang: &str,
max_span_tokens: u32,
) -> String {
tn_normalize_sentence_with_max_span_lang(input, lang, max_span_tokens as usize)
}

#[wasm_bindgen(js_name = addRule)]
pub fn add_rule_js(spoken: &str, written: &str) {
custom_rules::add_rule(spoken, written);
}

#[wasm_bindgen(js_name = removeRule)]
pub fn remove_rule_js(spoken: &str) -> bool {
custom_rules::remove_rule(spoken)
}

#[wasm_bindgen(js_name = clearRules)]
pub fn clear_rules_js() {
custom_rules::clear_rules();
}

#[wasm_bindgen(js_name = ruleCount)]
pub fn rule_count_js() -> u32 {
custom_rules::rule_count() as u32
}
Loading
Loading