From fafa8ed2bcceb680a985874bc1ae53aa38391ce2 Mon Sep 17 00:00:00 2001 From: pswierad-src Date: Mon, 25 Aug 2025 19:24:52 +0200 Subject: [PATCH 1/2] Added simple TTS docs --- src/assets/docs/docs-index.json | 462 +++++++++--------- .../docs/examples/example-text-to-speech.md | 55 +++ 2 files changed, 294 insertions(+), 223 deletions(-) create mode 100644 src/assets/docs/examples/example-text-to-speech.md diff --git a/src/assets/docs/docs-index.json b/src/assets/docs/docs-index.json index 286a9e2..bdf814e 100644 --- a/src/assets/docs/docs-index.json +++ b/src/assets/docs/docs-index.json @@ -1,224 +1,240 @@ [ - { - "title": "Overview", - "path": "overview" - }, - { - "title": "How to start", - "path": "how-to" - }, - { - "title": "MaIN CLI", - "path": "cli" - }, - { - "title": "Configuration", - "path": "settings" - }, - { - "title": "Tutorial", - "path": "tutorial" - }, - { - "title": "Examples", - "children": [ - { - "title": "Example runner", - "path": "examples/example-runner" - }, - { - "title": "Mcp", - "children": [ - { - "title": "Basic mcp", - "path": "examples/example-mcp-basic" - }, - { - "title": "Mcp agent", - "path": "examples/example-agent-mcp" - } - ]}, - { - "title": "Chat", - "children": [ - { - "title": "Basic chat", - "path": "examples/example-chat-basic" - }, - { - "title": "Chat existing", - "path": "examples/example-chat-existing" - }, - { - "title": "Chat with reasoning", - "path": "examples/example-chat-reasoning" - }, - { - "title": "Chat with files", - "path": "examples/example-chat-files" - }, - { - "title": "Chat with image-gen", - "path": "examples/example-chat-image" - }, - { - "title": "Chat with vision", - "path": "examples/example-chat-vision" - }]}, - { - "title": "Agent", - "children": [ - { - "title": "Basic agent", - "path": "examples/example-agent-basic" - }, - { - "title": "Agent conversation", - "path": "examples/example-agent-conversation" - }, - { - "title": "Agent redirect", - "path": "examples/example-agent-redirect" - }, - { - "title": "Agent redirect img", - "path": "examples/example-agent-redirect-img" - }, - { - "title": "Multi Backend Agent", - "path": "examples/multi-backend-agent-redirect" - }, - { - "title": "Agent data source", - "path": "examples/example-agent-source" - }, - { - "title": "Agent become", - "path": "examples/example-agent-become" - }, - { - "title": "Two agents talking", - "path": "examples/example-agent-talking" - } - ]}, - { - "title": "Flow", - "children": [ - { - "title": "Agents as flow", - "path": "examples/example-agent-flow" - }, - { - "title": "Flow loaded", - "path": "examples/example-agent-load-flow" - } - ]}, - { - "title": "OpenAi Integration", - "children": [ - { - "title": "Basic chat", - "path": "examples/example-chat-basic-openai" - }, - { - "title": "Chat with image", - "path": "examples/example-chat-image-openai" - }, - { - "title": "Agent with Web source", - "path": "examples/example-agent-source-openai" - } - ]}, - { - "title": "Gemini Integration", - "children": [ - { - "title": "Basic chat", - "path": "examples/example-chat-basic-gemini" - }, - { - "title": "Chat with image", - "path": "examples/example-chat-image-gemini" - }, - { - "title": "Chat with files", - "path": "examples/example-chat-files-gemini" - } - ] - }, - { - "title": "DeepSeek Integration", - "children": [ - { - "title": "Chat with Reasoning", - "path": "examples/example-chat-reasoning-deepseek" - } - ] - }, - { - "title": "GroqCloud Integration", - "children": [ - { - "title": "Basic chat", - "path": "examples/example-chat-basic-groqcloud" - } - ] - } - ] - }, - { - "title": "Framework", - "children": [ - { - "title": "Contracts", - "children": [ - { - "title": "McpContext", - "path": "contracts/mcp-context" - }, - { - "title": "ModelContext", - "path": "contracts/model-context" - }, - { - "title": "ChatContext", - "path": "contracts/chat-context" - }, - { - "title": "AgentContext", - "path": "contracts/agent-context" - }, - { - "title": "FlowContext", - "path": "contracts/flow-context" - }, - { - "title": "Extensions", - "path": "contracts/extensions" - } - ] - } - ] - }, - { - "title": "Integrations", - "children": [ - { - "title": "OpenAi", - "path": "integrations/openai" - }, - { - "title": "Gemini", - "path": "integrations/gemini" - }, - { - "title": "DeepSeek", - "path": "integrations/deepseek" - }, - { - "title": "GroqCloud", - "path": "integrations/groqcloud" - } - ] - } -] + { + "title":"Overview", + "path":"overview" + }, + { + "title":"How to start", + "path":"how-to" + }, + { + "title":"MaIN CLI", + "path":"cli" + }, + { + "title":"Configuration", + "path":"settings" + }, + { + "title":"Tutorial", + "path":"tutorial" + }, + { + "title":"Examples", + "children":[ + { + "title":"Example runner", + "path":"examples/example-runner" + }, + { + "title":"Mcp", + "children":[ + { + "title":"Basic mcp", + "path":"examples/example-mcp-basic" + }, + { + "title":"Mcp agent", + "path":"examples/example-agent-mcp" + } + ] + }, + { + "title":"Chat", + "children":[ + { + "title":"Basic chat", + "path":"examples/example-chat-basic" + }, + { + "title":"Chat existing", + "path":"examples/example-chat-existing" + }, + { + "title":"Chat with reasoning", + "path":"examples/example-chat-reasoning" + }, + { + "title":"Chat with files", + "path":"examples/example-chat-files" + }, + { + "title":"Chat with image-gen", + "path":"examples/example-chat-image" + }, + { + "title":"Chat with vision", + "path":"examples/example-chat-vision" + } + ] + }, + { + "title":"Agent", + "children":[ + { + "title":"Basic agent", + "path":"examples/example-agent-basic" + }, + { + "title":"Agent conversation", + "path":"examples/example-agent-conversation" + }, + { + "title":"Agent redirect", + "path":"examples/example-agent-redirect" + }, + { + "title":"Agent redirect img", + "path":"examples/example-agent-redirect-img" + }, + { + "title":"Multi Backend Agent", + "path":"examples/multi-backend-agent-redirect" + }, + { + "title":"Agent data source", + "path":"examples/example-agent-source" + }, + { + "title":"Agent become", + "path":"examples/example-agent-become" + }, + { + "title":"Two agents talking", + "path":"examples/example-agent-talking" + } + ] + }, + { + "title":"Flow", + "children":[ + { + "title":"Agents as flow", + "path":"examples/example-agent-flow" + }, + { + "title":"Flow loaded", + "path":"examples/example-agent-load-flow" + } + ] + }, + { + "title":"TTS", + "children":[ + { + "title":"Text to Speech example", + "path":"examples/example-text-to-speech" + } + ] + + }, + { + "title":"OpenAi Integration", + "children":[ + { + "title":"Basic chat", + "path":"examples/example-chat-basic-openai" + }, + { + "title":"Chat with image", + "path":"examples/example-chat-image-openai" + }, + { + "title":"Agent with Web source", + "path":"examples/example-agent-source-openai" + } + ] + }, + { + "title":"Gemini Integration", + "children":[ + { + "title":"Basic chat", + "path":"examples/example-chat-basic-gemini" + }, + { + "title":"Chat with image", + "path":"examples/example-chat-image-gemini" + }, + { + "title":"Chat with files", + "path":"examples/example-chat-files-gemini" + } + ] + }, + { + "title":"DeepSeek Integration", + "children":[ + { + "title":"Chat with Reasoning", + "path":"examples/example-chat-reasoning-deepseek" + } + ] + }, + { + "title":"GroqCloud Integration", + "children":[ + { + "title":"Basic chat", + "path":"examples/example-chat-basic-groqcloud" + } + ] + } + ] + }, + { + "title":"Framework", + "children":[ + { + "title":"Contracts", + "children":[ + { + "title":"McpContext", + "path":"contracts/mcp-context" + }, + { + "title":"ModelContext", + "path":"contracts/model-context" + }, + { + "title":"ChatContext", + "path":"contracts/chat-context" + }, + { + "title":"AgentContext", + "path":"contracts/agent-context" + }, + { + "title":"FlowContext", + "path":"contracts/flow-context" + }, + { + "title":"Extensions", + "path":"contracts/extensions" + } + ] + } + ] + }, + { + "title":"Integrations", + "children":[ + { + "title":"OpenAi", + "path":"integrations/openai" + }, + { + "title":"Gemini", + "path":"integrations/gemini" + }, + { + "title":"DeepSeek", + "path":"integrations/deepseek" + }, + { + "title":"GroqCloud", + "path":"integrations/groqcloud" + } + ] + } +] \ No newline at end of file diff --git a/src/assets/docs/examples/example-text-to-speech.md b/src/assets/docs/examples/example-text-to-speech.md new file mode 100644 index 0000000..7406447 --- /dev/null +++ b/src/assets/docs/examples/example-text-to-speech.md @@ -0,0 +1,55 @@ +# 📢 Text To Speech Example + +This example demonstrates how to use the TextToSpeech feature of MaIN.NET. In this case the example shows how to setup a simple speech generation using Kokoro model with preset voices. + +## 🚀 Quick Start + +To run the example you need the Kokoro TTS model downloaded. The model must be in ONNX format. +
It can be acquired here: GET KOKORO.
Voices can be downloaded from original Kokoro repository on HuggingFace GET VOICES. +
Model needs to be placed in set Models directory. Voices can be stored wherever but their path MUST be set in the example code. + +⚠️⚠️⚠️ IMPORTANT ⚠️⚠️⚠️ +
TTS feature is in an ongoing development. Some changes will be refactored in near future. +
Some approaches will be changed completely to match desired MaIN approach. (VoiceService is great example) + +### 📝 Code Example +```csharp +public class ChatWithTextToSpeechExample : IExample +{ + private const string VoicePath = ""; + + public async Task Start() + { + Console.WriteLine("ChatWithTextToSpeech is running! Put on your headphones and press any key."); + Console.ReadKey(); + + VoiceService.SetVoicesPath(VoicePath); + var voice = VoiceService.GetVoice("af_heart") + .MixWith(VoiceService.GetVoice("bf_emma")); + + await AIHub.Chat().WithModel("gemma2:2b") + .WithMessage("Generate a 4 sentence poem.") + .Speak(new TextToSpeechParams("kokoro:82m", voice, true)) + .CompleteAsync(interactive: true); + + Console.WriteLine("Done!"); + Console.ReadKey(); + } +} +``` + +## 🔹 How It Works + +1. **Set Voices Path** → required for a time being. Sets manually directory where voice files are stored +2. **Voice Service** → static utility class. Works as a temporary bridge make certain features possible. It is mainly used for `GetVoice()` voice loading and `MixWith()` extension method that allows for voice mixing* +3. **Speak** → core of the TTS functionality. Vocalizes each message returned by model. In this case a 4 sentence poem. Requires `TextToSpeechParams` parameters which are essentially all "moving parts" of TTS. It consists of 3 parameters: +- `model` - model name. Similar to how `WithModel()` parameter +- `voice` - `Voice` class loaded in previous step +- `playback` - a boolean that specifies whether generated audio should be played back to via system audio driver. This parameter is optional and defaults to `false`. +
Generated TTS audio (apart from the optional playback) will be stored in `Message` class, in `Speech` byte array property. + +## 📋 Prerequisites + +- Kokoro model and voices downloaded +- Any audio device present +- MaIN.NET framework properly configured \ No newline at end of file From 597fe489ab86ab80a349bc0edd51e5272a95bc42 Mon Sep 17 00:00:00 2001 From: pswierad-src Date: Mon, 25 Aug 2025 19:27:16 +0200 Subject: [PATCH 2/2] Mention --- src/assets/docs/examples/example-text-to-speech.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/assets/docs/examples/example-text-to-speech.md b/src/assets/docs/examples/example-text-to-speech.md index 7406447..a3c4431 100644 --- a/src/assets/docs/examples/example-text-to-speech.md +++ b/src/assets/docs/examples/example-text-to-speech.md @@ -52,4 +52,6 @@ public class ChatWithTextToSpeechExample : IExample - Kokoro model and voices downloaded - Any audio device present -- MaIN.NET framework properly configured \ No newline at end of file +- MaIN.NET framework properly configured + +*This feature as well as parts of TTS code were heavily inspired by Lyrcaxis project called KokoroSharp. Please check their work and give them a ⭐ \ No newline at end of file