diff --git a/README.md b/README.md index 07bc9b4d..0eef298b 100644 --- a/README.md +++ b/README.md @@ -215,27 +215,111 @@ Explore complete working examples in the [`samples/`](samples/) folder: The SDK also supports audio transcription via Whisper models. Use `model.createAudioClient()` to transcribe audio files on-device: +> [!TIP] +> The JavaScript SDK does not require end users to have the Foundry Local CLI installed. It is a completely self-contained SDK that includes native in-process Chat Completions and Audio Transcription APIs — no HTTP calls or external services needed. + +#### Chat Completions + ```javascript -import { FoundryLocalManager } from 'foundry-local-sdk'; +import { FoundryLocalManager } from "foundry-local-sdk"; + +// Initialize the SDK +const manager = FoundryLocalManager.create({ appName: "MyApp" }); + +// Get and load a chat model +const model = await manager.catalog.getModel("phi-3.5-mini"); +await model.download(); +await model.load(); + +// Create a chat client and generate a response +const chatClient = model.createChatClient(); +chatClient.settings.temperature = 0.7; +chatClient.settings.maxTokens = 800; + +const response = await chatClient.completeChat([ + { role: "user", content: "What is the golden ratio?" }, +]); +console.log(response.choices[0].message.content); + +// Stream responses in real-time +for await (const chunk of chatClient.completeStreamingChat([ + { role: "user", content: "Explain quantum computing simply." }, +])) { + const content = chunk.choices?.[0]?.message?.content; + if (content) process.stdout.write(content); +} -const manager = FoundryLocalManager.create({ appName: 'MyApp' }); +// Clean up +await model.unload(); +``` + +#### Audio Transcription (Speech-to-Text) -// Download and load the Whisper model -const whisperModel = await manager.catalog.getModel('whisper-tiny'); +```javascript +import { FoundryLocalManager } from "foundry-local-sdk"; + +// Initialize the SDK (reuses the same singleton if already created) +const manager = FoundryLocalManager.create({ appName: "MyApp" }); + +// Get and load the Whisper model for audio transcription +const whisperModel = await manager.catalog.getModel("whisper-tiny"); await whisperModel.download(); await whisperModel.load(); -// Transcribe an audio file +// Create an audio client and transcribe const audioClient = whisperModel.createAudioClient(); -audioClient.settings.language = 'en'; -const result = await audioClient.transcribe('recording.wav'); -console.log('Transcription:', result.text); +audioClient.settings.language = "en"; + +// Transcribe an audio file +const result = await audioClient.transcribe("recording.wav"); +console.log("Transcription:", result.text); -// Or stream in real-time -for await (const chunk of audioClient.transcribeStreaming('recording.wav')) { - process.stdout.write(chunk.text); +// Or stream the transcription in real-time +for await (const chunk of audioClient.transcribeStreaming("recording.wav")) { + process.stdout.write(chunk.text); } +// Clean up +await whisperModel.unload(); +``` + +#### Chat + Audio Together + +A single `FoundryLocalManager` can manage both chat and audio models simultaneously — no need for separate runtimes: + +```javascript +import { FoundryLocalManager } from "foundry-local-sdk"; + +const manager = FoundryLocalManager.create({ appName: "VoiceJournal" }); + +// Load both models +const chatModel = await manager.catalog.getModel("phi-3.5-mini"); +await chatModel.download(); +await chatModel.load(); + +const whisperModel = await manager.catalog.getModel("whisper-tiny"); +await whisperModel.download(); +await whisperModel.load(); + +// Step 1: Transcribe audio +const audioClient = whisperModel.createAudioClient(); +audioClient.settings.language = "en"; +const transcription = await audioClient.transcribe("journal-entry.wav"); +console.log("You said:", transcription.text); + +// Step 2: Analyze the transcription with the chat model +const chatClient = chatModel.createChatClient(); +const analysis = await chatClient.completeChat([ + { + role: "system", + content: "Summarize this journal entry and extract key themes.", + }, + { role: "user", content: transcription.text }, +]); +console.log("Summary:", analysis.choices[0].message.content); + +// Clean up +await chatModel.unload(); await whisperModel.unload(); ``` diff --git a/samples/js/audio-transcription-foundry-local/README.md b/samples/js/audio-transcription-foundry-local/README.md new file mode 100644 index 00000000..09b5f839 --- /dev/null +++ b/samples/js/audio-transcription-foundry-local/README.md @@ -0,0 +1,39 @@ +# Sample: Audio Transcription with Foundry Local + +This sample demonstrates how to use Foundry Local for **speech-to-text (audio transcription)** using the Whisper model — entirely on-device, with no cloud services required. + +## What This Shows + +- Loading the `whisper-tiny` model via the Foundry Local SDK +- Transcribing an audio file (`.wav`, `.mp3`, etc.) to text +- Both standard and streaming transcription modes +- Automatic hardware acceleration (NPU > GPU > CPU) + +## Prerequisites + +- [Foundry Local](https://github.com/microsoft/Foundry-Local) installed on your machine +- Node.js 18+ + +## Getting Started + +From this sample's directory, install dependencies: + +```bash +npm install +``` + +Place an audio file (e.g., `recording.wav` or `recording.mp3`) in the project directory, then run: + +```bash +node src/app.js +``` + +## How It Works + +The Foundry Local SDK handles everything: +1. **Model discovery** — finds the best `whisper-tiny` variant for your hardware +2. **Model download** — downloads the model if not already cached +3. **Model loading** — loads the model into memory with optimized hardware acceleration +4. **Transcription** — runs Whisper inference entirely on-device + +No need for `whisper.cpp`, `@huggingface/transformers`, or any other separate STT tool. diff --git a/samples/js/audio-transcription-foundry-local/package.json b/samples/js/audio-transcription-foundry-local/package.json new file mode 100644 index 00000000..50de32c8 --- /dev/null +++ b/samples/js/audio-transcription-foundry-local/package.json @@ -0,0 +1,11 @@ +{ + "name": "audio-transcription-foundry-local", + "type": "module", + "description": "Audio transcription (speech-to-text) sample using Foundry Local", + "scripts": { + "start": "node src/app.js" + }, + "dependencies": { + "foundry-local-sdk": "^0.9.0" + } +} diff --git a/samples/js/audio-transcription-foundry-local/src/app.js b/samples/js/audio-transcription-foundry-local/src/app.js new file mode 100644 index 00000000..aa382ff8 --- /dev/null +++ b/samples/js/audio-transcription-foundry-local/src/app.js @@ -0,0 +1,64 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +import { FoundryLocalManager } from "foundry-local-sdk"; +import path from "path"; + +// The Whisper model alias for audio transcription +const alias = "whisper-tiny"; + +async function main() { + console.log("Initializing Foundry Local SDK..."); + const manager = FoundryLocalManager.create({ + appName: "AudioTranscriptionSample", + logLevel: "info", + }); + + // Get the Whisper model from the catalog + const catalog = manager.catalog; + const model = await catalog.getModel(alias); + if (!model) { + throw new Error( + `Model "${alias}" not found. Run "foundry model list" to see available models.` + ); + } + + // Download the model if not already cached + if (!model.isCached) { + console.log(`Downloading model "${alias}"...`); + await model.download((progress) => { + process.stdout.write(`\rDownload progress: ${progress.toFixed(1)}%`); + }); + console.log("\nDownload complete."); + } + + // Load the model into memory + console.log(`Loading model "${model.id}"...`); + await model.load(); + console.log("Model loaded.\n"); + + // Create an audio client for transcription + const audioClient = model.createAudioClient(); + audioClient.settings.language = "en"; + + // Update this path to point to your audio file + const audioFilePath = path.resolve("recording.mp3"); + + // --- Standard transcription --- + console.log("=== Standard Transcription ==="); + const result = await audioClient.transcribe(audioFilePath); + console.log("Transcription:", result.text); + + // --- Streaming transcription --- + console.log("\n=== Streaming Transcription ==="); + for await (const chunk of audioClient.transcribeStreaming(audioFilePath)) { + process.stdout.write(chunk.text); + } + console.log("\n"); + + // Clean up + await model.unload(); + console.log("Done."); +} + +main().catch(console.error);