From 9d6925c0e264a434dd510bc4848e0d805b0be7ab Mon Sep 17 00:00:00 2001 From: Anmol Garg Date: Fri, 12 Jun 2026 15:03:01 +0530 Subject: [PATCH 1/8] [ADD] Enable/Disable Button. UI Enhancements, Info Icon, Fixed Text Live Updation in Read More Page, Dynamic Timeout, Stagging --- qml/components/richtext/HtmlEditorToolbar.qml | 1 + qml/components/richtext/RichTextEditor.qml | 63 ++- qml/components/richtext/RichTextPreview.qml | 36 +- qml/features/settings/pages/Settings_Page.qml | 2 + .../settings/pages/Settings_VoiceModel.qml | 363 ++++++++++++++++-- src/backend.py | 248 ++++++++++-- voice_to_text/voice2text.py | 36 +- 7 files changed, 660 insertions(+), 89 deletions(-) diff --git a/qml/components/richtext/HtmlEditorToolbar.qml b/qml/components/richtext/HtmlEditorToolbar.qml index 9599c4f..0cc2731 100644 --- a/qml/components/richtext/HtmlEditorToolbar.qml +++ b/qml/components/richtext/HtmlEditorToolbar.qml @@ -68,6 +68,7 @@ Rectangle { width: units.gu(5) height: units.gu(4) color: (editor && editor.listening) ? LomiriColors.red : (darkMode ? "#555555" : "#F0F0F0") + visible: editor ? editor.isVoiceInputEnabled : false Icon { anchors.centerIn: parent diff --git a/qml/components/richtext/RichTextEditor.qml b/qml/components/richtext/RichTextEditor.qml index f7cc193..5aabfde 100644 --- a/qml/components/richtext/RichTextEditor.qml +++ b/qml/components/richtext/RichTextEditor.qml @@ -19,6 +19,7 @@ import QtQuick 2.7 import QtQuick.Window 2.2 import Lomiri.Components 1.3 import QtWebEngine 1.5 +import QtQuick.LocalStorage 2.7 as Sql import "js/html-sanitizer.js" as HtmlSanitizer Item { @@ -75,6 +76,29 @@ Item { /** Partial voice recognition text (for UI feedback) */ property string _partialVoiceText: "" + + /** Whether voice input is enabled globally */ + property bool isVoiceInputEnabled: true + + function checkVoiceInputEnabled() { + try { + var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + var result = true; + db.transaction(function (tx) { + var rs = tx.executeSql('SELECT value FROM app_settings WHERE key = "voice_input_enabled"'); + if (rs.rows.length > 0) { + result = rs.rows.item(0).value === "true"; + } + }); + isVoiceInputEnabled = result; + } catch (e) { + console.warn("Error reading voice_input_enabled:", e); + } + } + + Component.onCompleted: { + checkVoiceInputEnabled() + } Connections { target: mainView.backend_bridge @@ -85,12 +109,16 @@ Item { var partialText = data.payload if (partialText) { - var jsCode = "var marker = document.getElementById('voice-partial-marker'); " + + var jsCode = "var spans = document.getElementsByTagName('span'); " + + "var marker = null; " + + "for(var i=spans.length-1; i>=0; i--) { if (spans[i].innerText && spans[i].innerText.indexOf('\\u200B') !== -1) { marker = spans[i]; break; } } " + "if (marker) { " + - " marker.innerText = " + JSON.stringify(partialText + " (Listening...)") + "; " + + " marker.innerText = '\\u200B ' + " + JSON.stringify(partialText + " (Listening...)") + "; " + " window.editor.moveCursorToEnd(); " + "}"; - wv.runJavaScript(jsCode); + wv.runJavaScript(jsCode, function(res) { + editor.syncContent(); + }); } } else if (data.event === "voice_recognition_result") { editor.listening = false @@ -99,7 +127,9 @@ Item { console.log("[RichTextEditor] Received recognition result: " + recognizedText) // Replace the partial span with the final text - var jsCode = "var marker = document.getElementById('voice-partial-marker'); " + + var jsCode = "var spans = document.getElementsByTagName('span'); " + + "var marker = null; " + + "for(var i=spans.length-1; i>=0; i--) { if (spans[i].innerText && spans[i].innerText.indexOf('\\u200B') !== -1) { marker = spans[i]; break; } } " + "if (marker) { " + " marker.outerHTML = " + JSON.stringify(recognizedText ? (recognizedText + " ") : "") + "; " + "} else if (" + JSON.stringify(recognizedText) + ") { " + @@ -109,13 +139,30 @@ Item { // Force a sync to update the 'text' property and emit contentChanged editor.syncContent(); + } else if (data.event === "voice_recognition_status") { + if (!listening && !processing) return; + var statusText = data.payload; + if (statusText) { + var jsCode = "var spans = document.getElementsByTagName('span'); " + + "var marker = null; " + + "for(var i=spans.length-1; i>=0; i--) { if (spans[i].innerText && spans[i].innerText.indexOf('\\u200B') !== -1) { marker = spans[i]; break; } } " + + "if (marker) { " + + " marker.innerText = '\\u200B (' + " + JSON.stringify(statusText) + " + ')'; " + + " window.editor.moveCursorToEnd(); " + + "}"; + wv.runJavaScript(jsCode, function(res) { + editor.syncContent(); + }); + } } else if (data.event === "voice_recognition_error") { editor.listening = false editor.processing = false console.log("[RichTextEditor] Voice recognition error: " + data.payload) // Remove the partial span on error - var jsCode = "var marker = document.getElementById('voice-partial-marker'); " + + var jsCode = "var spans = document.getElementsByTagName('span'); " + + "var marker = null; " + + "for(var i=spans.length-1; i>=0; i--) { if (spans[i].innerText && spans[i].innerText.indexOf('\\u200B') !== -1) { marker = spans[i]; break; } } " + "if (marker) { " + " marker.remove(); " + "}"; @@ -146,7 +193,7 @@ Item { " var currentHTML = window.editor.getHTML(); " + " var hasContent = currentHTML.replace(/<[^>]*>/g, '').trim().length > 0; " + - " var marker = 'Listening...'; " + + " var marker = '\\u200B (Starting...)'; " + " if (hasContent) { " + " window.editor.insertHTML('

' + marker); " + @@ -155,7 +202,9 @@ Item { " } " + " window.editor.moveCursorToEnd(); " + "} catch(e) { console.error('Error inserting voice marker: ', e); }"; - wv.runJavaScript(jsCode); + wv.runJavaScript(jsCode, function(res) { + editor.syncContent(); + }); mainView.backend_bridge.call("backend.run_voice_recognition", []) } diff --git a/qml/components/richtext/RichTextPreview.qml b/qml/components/richtext/RichTextPreview.qml index 7709998..d2d881e 100644 --- a/qml/components/richtext/RichTextPreview.qml +++ b/qml/components/richtext/RichTextPreview.qml @@ -1,5 +1,6 @@ import QtQuick 2.7 import Lomiri.Components 1.3 +import QtQuick.LocalStorage 2.7 as Sql import "js/html-sanitizer.js" as HtmlSanitizer import "../../../models/global.js" as Global @@ -23,6 +24,27 @@ Rectangle { property bool listening: false property bool processing: false property string textBeforeRecording: "" + property bool isVoiceInputEnabled: true + + function checkVoiceInputEnabled() { + try { + var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + var result = true; + db.transaction(function (tx) { + var rs = tx.executeSql('SELECT value FROM app_settings WHERE key = "voice_input_enabled"'); + if (rs.rows.length > 0) { + result = rs.rows.item(0).value === "true"; + } + }); + isVoiceInputEnabled = result; + } catch (e) { + console.warn("Error reading voice_input_enabled:", e); + } + } + + Component.onCompleted: { + checkVoiceInputEnabled() + } property int _liveStartPos: 0 property int _liveLength: 0 @@ -41,6 +63,15 @@ Rectangle { previewText.insert(root._liveStartPos, str); root._liveLength = str.length; + cursorTimer.start(); + } + } else if (data.event === "voice_recognition_status") { + var statusText = data.payload; + if (statusText) { + var str = " (" + statusText + ")"; + previewText.remove(root._liveStartPos, root._liveStartPos + root._liveLength); + previewText.insert(root._liveStartPos, str); + root._liveLength = str.length; cursorTimer.start(); } } else if (data.event === "voice_recognition_result") { @@ -346,6 +377,7 @@ Rectangle { Rectangle { id: voiceButton + visible: root.isVoiceInputEnabled width: units.gu(3) height: units.gu(3) radius: units.gu(.5) @@ -387,8 +419,8 @@ Rectangle { // Initialize the live insertion point at the very end root._liveStartPos = previewText.length; - root._liveLength = " (Listening...)".length; - previewText.insert(root._liveStartPos, " (Listening...)"); + root._liveLength = " (Starting...)".length; + previewText.insert(root._liveStartPos, " (Starting...)"); cursorTimer.start() root.listening = true diff --git a/qml/features/settings/pages/Settings_Page.qml b/qml/features/settings/pages/Settings_Page.qml index 3e7ebf2..b887d34 100644 --- a/qml/features/settings/pages/Settings_Page.qml +++ b/qml/features/settings/pages/Settings_Page.qml @@ -112,8 +112,10 @@ Page { iconName: "audio-input-microphone-symbolic" iconColor: "#8e44ad" text: i18n.dtr("ubtms", "Voice Model Settings") + active: settings.selectedSettingsPageUrl === "Settings_VoiceModel.qml" showDivider: false onClicked: { + settings.selectedSettingsPageUrl = "Settings_VoiceModel.qml"; apLayout.addPageToNextColumn(settings, Qt.resolvedUrl('Settings_VoiceModel.qml')); } } diff --git a/qml/features/settings/pages/Settings_VoiceModel.qml b/qml/features/settings/pages/Settings_VoiceModel.qml index 65a3585..6c2d802 100644 --- a/qml/features/settings/pages/Settings_VoiceModel.qml +++ b/qml/features/settings/pages/Settings_VoiceModel.qml @@ -38,6 +38,10 @@ Page { id: pageHeader title: voiceModelSettingsPage.title trailingActions: [ + Action { + iconName: "info" + onTriggered: PopupUtils.open(infoDialogComponent, voiceModelSettingsPage) + }, Action { iconName: "reload" onTriggered: refreshModels() @@ -51,7 +55,52 @@ Page { property bool isDownloading: false property string downloadingModelId: "" property string downloadingModelName: "" + property string failedModelId: "" property var downloadStatus: { "in_progress": false, "progress": 0, "message": "", "error": "" } + property int deviceRamMB: 2048 + property bool isVoiceInputEnabled: true + + function getVoiceInputEnabledSetting() { + try { + var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + var result = true; + db.transaction(function (tx) { + var rs = tx.executeSql('SELECT value FROM app_settings WHERE key = "voice_input_enabled"'); + if (rs.rows.length > 0) { + result = rs.rows.item(0).value === "true"; + } + }); + return result; + } catch (e) { + console.warn("Error reading voice_input_enabled setting:", e); + return true; + } + } + + function saveVoiceInputEnabledSetting(value) { + try { + var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + db.transaction(function (tx) { + tx.executeSql('CREATE TABLE IF NOT EXISTS app_settings (key TEXT PRIMARY KEY, value TEXT)'); + tx.executeSql('INSERT OR REPLACE INTO app_settings (key, value) VALUES ("voice_input_enabled", ?)', [value ? "true" : "false"]); + }); + isVoiceInputEnabled = value; + } catch (e) { + console.warn("Error saving voice_input_enabled setting:", e); + } + } + + function isModelCompatible(sizeStr) { + if (!sizeStr) return true; + var isG = sizeStr.indexOf("G") !== -1; + if (isG) { + var val = parseFloat(sizeStr.replace("G", "")); + // Assume 1G requires ~2500MB RAM, 1.8G requires ~4000MB RAM. + var reqRam = val * 2500; + return deviceRamMB >= reqRam; + } + return true; // MB sizes are usually compatible with any device + } function getActiveModelSetting() { try { @@ -172,14 +221,56 @@ Page { }); } + function cancelDownload() { + if (!mainView.backend_bridge.ready) return; + mainView.backend_bridge.call("backend.cancel_voice_model_download", [], function(res) { + isDownloading = false; + failedModelId = ""; + downloadStatusTimer.stop(); + if (mainView.modelDownloadTimerWidget) { + mainView.modelDownloadTimerWidget.failSync(i18n.dtr("ubtms", "Download cancelled")); + } + notifPopup.open(i18n.dtr("ubtms", "Download Cancelled"), i18n.dtr("ubtms", "Download of %1 was cancelled and partial data deleted.").arg(downloadingModelName), "info"); + refreshModels(); + }); + } + + function pauseDownload() { + if (!mainView.backend_bridge.ready) return; + mainView.backend_bridge.call("backend.pause_voice_model_download", [], function(res) { + // Timer will catch the state change and handle UI updates + }); + } + Component.onCompleted: { activeModelPath = getActiveModelSetting(); + isVoiceInputEnabled = getVoiceInputEnabledSetting(); if (mainView.backend_bridge.ready) { refreshModels(); checkInProgressDownload(); + checkPausedDownloads(); + fetchDeviceRam(); } } + function fetchDeviceRam() { + mainView.backend_bridge.call("backend.get_device_total_ram_mb", [], function(ram) { + if (ram) { + deviceRamMB = ram; + console.log("Device RAM detected:", deviceRamMB, "MB"); + } + }); + } + + function checkPausedDownloads() { + mainView.backend_bridge.call("backend.get_paused_voice_models", [], function(paused) { + if (paused && paused.length > 0) { + // If there are partial downloads, mark the first one as paused (failedModelId handles UI for paused state) + failedModelId = paused[0]; + } + }); + } + function checkInProgressDownload() { mainView.backend_bridge.call("backend.get_model_download_status", [], function(status) { if (status.in_progress) { @@ -200,6 +291,32 @@ Page { }); } + Component { + id: infoDialogComponent + Dialog { + id: infoDialog + title: i18n.dtr("ubtms", "About Voice Models") + + Column { + spacing: units.gu(2) + width: parent.width + + Text { + text: i18n.dtr("ubtms", "Voice models allow you to dictate text using your microphone directly into the app. Because processing happens locally on your device, your voice data remains completely private and no internet connection is required after the initial model download.\n\nLarger models provide higher accuracy but require more device memory and space. Smaller models are faster and use fewer resources but may be less accurate.") + width: parent.width + wrapMode: Text.WordWrap + color: theme.palette.normal.backgroundText + } + + Button { + text: i18n.dtr("ubtms", "Close") + anchors.horizontalCenter: parent.horizontalCenter + onClicked: PopupUtils.close(infoDialog) + } + } + } + } + Component { id: confirmDeleteComponent Dialog { @@ -249,12 +366,57 @@ Page { } } + Component { + id: warningComponent + Dialog { + id: warningDialog + property string modelId + property string modelUrl + property string modelName + title: i18n.dtr("ubtms", "Warning") + + Column { + spacing: units.gu(2) + width: parent.width + + Text { + text: i18n.dtr("ubtms", "This model is incompatible with your device because it requires more RAM than available. It may cause the app to crash.\n\nBut if you want to download it, you can.") + width: parent.width + wrapMode: Text.WordWrap + color: theme.palette.normal.backgroundText + horizontalAlignment: Text.AlignHCenter + } + + Row { + spacing: units.gu(2) + anchors.horizontalCenter: parent.horizontalCenter + + Button { + text: i18n.dtr("ubtms", "Cancel") + onClicked: PopupUtils.close(warningDialog) + } + + Button { + text: i18n.dtr("ubtms", "Download Anyway") + color: LomiriColors.orange + onClicked: { + PopupUtils.close(warningDialog); + downloadModel(warningDialog.modelId, warningDialog.modelUrl, warningDialog.modelName); + } + } + } + } + } + } + Connections { target: mainView.backend_bridge onReadyChanged: { if (mainView.backend_bridge.ready) { refreshModels(); checkInProgressDownload(); + checkPausedDownloads(); + fetchDeviceRam(); } } } @@ -285,14 +447,24 @@ Page { downloadStatusTimer.stop(); isDownloading = false; refreshModels(); // Refresh both lists - if (status.error) { - // Show error - console.error("Download error:", status.error); - if (mainView.modelDownloadTimerWidget) { - mainView.modelDownloadTimerWidget.failSync(status.error); + + if (status.is_paused) { + failedModelId = downloadingModelId; + if (status.error) { + console.error("Download error:", status.error); + if (mainView.modelDownloadTimerWidget) mainView.modelDownloadTimerWidget.failSync(status.error); + notifPopup.open(i18n.dtr("ubtms", "Download Interrupted"), i18n.dtr("ubtms", "Failed to download %1: %2. You can resume it.").arg(downloadingModelName).arg(status.error), "warning"); + } else { + notifPopup.open(i18n.dtr("ubtms", "Download Paused"), i18n.dtr("ubtms", "Download of %1 is paused.").arg(downloadingModelName), "info"); } - notifPopup.open(i18n.dtr("ubtms", "Download Failed"), i18n.dtr("ubtms", "Failed to download %1: %2").arg(downloadingModelName).arg(status.error), "error"); + } else if (status.message === "Cancelled" || status.message === "Download cancelled") { + failedModelId = ""; + // Already handled by cancelDownload() or just silently reset + } else if (status.error) { + failedModelId = ""; // fallback + notifPopup.open(i18n.dtr("ubtms", "Download Failed"), status.error, "error"); } else { + failedModelId = ""; if (mainView.modelDownloadTimerWidget) { mainView.modelDownloadTimerWidget.completeSyncSuccessfully(); } @@ -315,7 +487,33 @@ Page { Column { id: contentColumn width: parent.width - // spacing: units.gu(1) + + ListItem { + width: parent.width + height: units.gu(7) + divider.visible: true + + Label { + anchors.left: parent.left + anchors.leftMargin: units.gu(2) + anchors.verticalCenter: parent.verticalCenter + text: i18n.dtr("ubtms", "Enable Voice Input") + font.pixelSize: units.gu(2) + color: theme.name === "Ubuntu.Components.Themes.SuruDark" ? "#f5f5f5" : "#111" + } + + Switch { + anchors.right: parent.right + anchors.rightMargin: units.gu(2) + anchors.verticalCenter: parent.verticalCenter + checked: isVoiceInputEnabled + onCheckedChanged: { + if (checked !== isVoiceInputEnabled) { + saveVoiceInputEnabledSetting(checked); + } + } + } + } ListItem { width: parent.width @@ -358,9 +556,19 @@ Page { Rectangle { anchors.fill: parent color: activeModelPath === model.path ? - (theme.name === "Ubuntu.Components.Themes.SuruDark" ? "#222" : "#f0f0f0") : + (theme.name === "Ubuntu.Components.Themes.SuruDark" ? "#2b241b" : "#fff1de") : "transparent" visible: activeModelPath === model.path + + Rectangle { + anchors.left: parent.left + anchors.verticalCenter: parent.verticalCenter + width: units.dp(3) + height: parent.height - units.gu(1.6) + radius: units.dp(2) + color: LomiriColors.orange + visible: activeModelPath === model.path + } } Column { @@ -377,7 +585,7 @@ Page { text: model.name font.pixelSize: units.gu(2) font.bold: activeModelPath === model.path - color: theme.name === "Ubuntu.Components.Themes.SuruDark" ? "#f5f5f5" : "#111" + color: activeModelPath === model.path ? LomiriColors.orange : (theme.name === "Ubuntu.Components.Themes.SuruDark" ? "#f5f5f5" : "#111") elide: Text.ElideRight width: parent.width } @@ -444,7 +652,20 @@ Page { opacity: isDownloading ? (downloadingModelId === model.id ? 1.0 : 0.5) : 1.0 onClicked: { - if (!isDownloading) { + if (isDownloading && downloadingModelId === model.id) { + pauseDownload(); + } else if (!isDownloading) { + if (!isModelCompatible(model.size)) { + PopupUtils.open(warningComponent, voiceModelSettingsPage, { + "modelId": model.id, + "modelUrl": model.url, + "modelName": model.name + }); + return; + } + if (failedModelId === model.id) { + failedModelId = ""; + } downloadModel(model.id, model.url, model.name); } } @@ -480,33 +701,111 @@ Page { anchors.right: parent.right anchors.verticalCenter: parent.verticalCenter anchors.rightMargin: units.gu(2) - width: units.gu(2.5) + width: units.gu(9) height: units.gu(2.5) - visible: !isDownloading || downloadingModelId !== model.id - Image { - id: downloadImg + Row { anchors.fill: parent - source: Qt.resolvedUrl("../../../images/download.svg") - sourceSize: Qt.size(parent.width, parent.height) - visible: false - } + spacing: units.gu(1) + layoutDirection: Qt.RightToLeft + + // Warning icon + Icon { + name: "dialog-warning" + width: units.gu(2.5) + height: units.gu(2.5) + color: LomiriColors.red + visible: !isModelCompatible(model.size) && (!isDownloading || downloadingModelId !== model.id) && failedModelId !== model.id + + MouseArea { + anchors.fill: parent + onClicked: { + PopupUtils.open(warningComponent, voiceModelSettingsPage, { + "modelId": model.id, + "modelUrl": model.url, + "modelName": model.name + }); + } + } + } - ColorOverlay { - anchors.fill: downloadImg - source: downloadImg - color: LomiriColors.orange - } - } + // Default Download icon + Item { + width: units.gu(2.5) + height: units.gu(2.5) + visible: isModelCompatible(model.size) && (!isDownloading || downloadingModelId !== model.id) && failedModelId !== model.id + + Image { + id: downloadImg + anchors.fill: parent + source: Qt.resolvedUrl("../../../images/download.svg") + sourceSize: Qt.size(parent.width, parent.height) + visible: false + } - BusyIndicator { - anchors.right: parent.right - anchors.verticalCenter: parent.verticalCenter - anchors.rightMargin: units.gu(2) - running: isDownloading && downloadingModelId === model.id - visible: isDownloading && downloadingModelId === model.id - width: units.gu(2.5) - height: units.gu(2.5) + ColorOverlay { + anchors.fill: downloadImg + source: downloadImg + color: LomiriColors.orange + } + } + + // Cancel icon (when paused or downloading) + Icon { + name: "close" + width: units.gu(2.5) + height: units.gu(2.5) + color: LomiriColors.red + visible: ((!isDownloading || downloadingModelId !== model.id) && failedModelId === model.id) || (isDownloading && downloadingModelId === model.id) + + MouseArea { + anchors.fill: parent + onClicked: { + cancelDownload(); + } + } + } + + // Play/Resume icon (when paused) + Icon { + name: "media-playback-start" + width: units.gu(2.5) + height: units.gu(2.5) + color: LomiriColors.green + visible: (!isDownloading || downloadingModelId !== model.id) && failedModelId === model.id + + MouseArea { + anchors.fill: parent + onClicked: { + failedModelId = ""; + downloadModel(model.id, model.url, model.name); + } + } + } + + // Pause icon (when downloading) + Icon { + name: "media-playback-pause" + width: units.gu(2.5) + height: units.gu(2.5) + color: LomiriColors.orange + visible: isDownloading && downloadingModelId === model.id + + MouseArea { + anchors.fill: parent + onClicked: { + pauseDownload(); + } + } + } + + BusyIndicator { + width: units.gu(2.5) + height: units.gu(2.5) + running: isDownloading && downloadingModelId === model.id + visible: isDownloading && downloadingModelId === model.id + } + } } } } diff --git a/src/backend.py b/src/backend.py index 67329d4..a9e5711 100755 --- a/src/backend.py +++ b/src/backend.py @@ -671,16 +671,58 @@ def list_installed_models(): }) seen_paths.add(item) + models.sort(key=lambda x: x["m_name"].lower()) log.info(f"[VOICE] Found {len(models)} installed models") return models +def get_installed_voice_models(): + """Returns a list of installed voice model IDs.""" + models_dir = get_voice_models_dir() + if not models_dir.exists(): + return [] + # Any directory in models_dir is considered an installed model + installed = [d.name for d in models_dir.iterdir() if d.is_dir()] + log.info(f"[VOICE] Found {len(installed)} installed models") + return installed + +def get_device_total_ram_mb(): + """Returns total device RAM in MB to check model compatibility.""" + try: + with open('/proc/meminfo', 'r') as f: + for line in f: + if line.startswith('MemTotal:'): + parts = line.split() + return int(parts[1]) // 1024 + except Exception: + pass + # Fallback using sysconf if /proc/meminfo is not available + try: + import os + pagesize = os.sysconf('SC_PAGE_SIZE') + pages = os.sysconf('SC_PHYS_PAGES') + return (pagesize * pages) // (1024 * 1024) + except Exception: + pass + return 2048 # Fallback to 2048MB + + +def get_paused_voice_models(): + """Returns a list of model IDs that have a partial download file.""" + models_dir = get_voice_models_dir() + if not models_dir.exists(): + return [] + paused = [] + for f in models_dir.glob("*.zip.tmp"): + paused.append(f.name.replace(".zip.tmp", "")) + return paused + def list_available_models(): """ Returns a list of models available for download. """ - return [ - {"id": "vosk-model-small-en-us-0.15", "name": "English (US, Small)", "size": "40M", "url": "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"}, + models = [ + {"id": "vosk-model-small-en-us-0.15", "name": "English (US, Small)", "size": "40M", "url": "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"}, {"id": "vosk-model-en-us-0.22", "name": "English (US)", "size": "1.8G", "url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip"}, {"id": "vosk-model-en-us-0.22-lgraph", "name": "English (US, LGraph)", "size": "128M", "url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22-lgraph.zip"}, {"id": "vosk-model-en-us-0.42-gigaspeech", "name": "English (US, Gigaspeech)", "size": "2.3G", "url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.42-gigaspeech.zip"}, @@ -757,6 +799,8 @@ def list_available_models(): {"id": "vosk-model-small-ka-0.42", "name": "Georgian (Small)", "size": "45M", "url": "https://alphacephei.com/vosk/models/vosk-model-small-ka-0.42.zip"}, {"id": "vosk-model-ka-0.42", "name": "Georgian", "size": "700M", "url": "https://alphacephei.com/vosk/models/vosk-model-ka-0.42.zip"} ] + models.sort(key=lambda x: x["name"].lower()) + return models def get_model_download_status(): @@ -764,20 +808,55 @@ def get_model_download_status(): return download_status +_voice_download_cancel_event = threading.Event() +_voice_download_action = None + +def cancel_voice_model_download(): + """Cancels an ongoing voice model download and deletes partial data.""" + global download_status, _voice_download_action + _voice_download_action = "cancel" + _voice_download_cancel_event.set() + download_status["in_progress"] = False + download_status["is_paused"] = False + download_status["message"] = "Download cancelled" + + # Clean up any partial download files immediately to prevent them from showing up on restart + models_dir = get_voice_models_dir() + if models_dir.exists(): + for f in models_dir.glob("*.zip.tmp"): + try: + f.unlink() + except Exception: + pass + + return {"status": "cancelled"} + +def pause_voice_model_download(): + """Pauses an ongoing voice model download, keeping partial data.""" + global download_status, _voice_download_action + _voice_download_action = "pause" + _voice_download_cancel_event.set() + download_status["in_progress"] = False + download_status["is_paused"] = True + download_status["message"] = "Download paused" + return {"status": "paused"} + def download_voice_model(model_id, url): """ Initiates a background thread to download and extract a voice model. """ global download_status + _voice_download_cancel_event.clear() if download_status["in_progress"]: return {"status": "error", "message": "Download already in progress"} download_status = { "in_progress": True, - "progress": 0, + "progress": download_status.get("progress", 0), "message": "Starting download...", "error": "", - "model_id": model_id + "model_id": model_id, + "is_paused": False } def do_download(): @@ -795,42 +874,57 @@ def do_download(): download_status["progress"] = 100 return - log.info(f"[VOICE] Downloading model from {url}") - download_status["message"] = "Downloading..." - - # Use a temporary file to avoid memory exhaustion for large models temp_zip = models_dir / f"{model_id}.zip.tmp" + existing_size = 0 if temp_zip.exists(): - temp_zip.unlink() + existing_size = temp_zip.stat().st_size - # Using urllib3 for download - response = http.request('GET', url, preload_content=False) - if response.status != 200: + headers = {} + if existing_size > 0: + headers['Range'] = f'bytes={existing_size}-' + log.info(f"[VOICE] Resuming {model_id} from {existing_size} bytes") + download_status["message"] = "Resuming..." + else: + log.info(f"[VOICE] Downloading model from {url}") + download_status["message"] = "Downloading..." + + # Using urllib3 for download with a timeout to handle sudden internet disconnection + response = http.request('GET', url, headers=headers, preload_content=False, timeout=urllib3.Timeout(connect=5.0, read=10.0)) + if response.status not in (200, 206, 416): raise Exception(f"Server returned status {response.status}") + if response.status == 200 and existing_size > 0: + # Server ignored Range header, restart from scratch + log.info(f"[VOICE] Server ignored Range, restarting download") + existing_size = 0 + temp_zip.unlink() + content_length = response.getheader('Content-Length') - total_size = int(content_length) if content_length else None + total_size = (int(content_length) + existing_size) if content_length else None - if total_size: - # Check available disk space (need roughly 2.5x the zip size for download + extraction + safety) + if total_size and existing_size == 0: + # Check available disk space only on new downloads try: _, _, free = shutil.disk_usage(str(models_dir)) free_mb = free / (1024 * 1024) required_mb = (total_size * 2.5) / (1024 * 1024) - - log.info(f"[VOICE] Space check for {model_id}: Required: {required_mb:.1f} MB, Available: {free_mb:.1f} MB") - if free < total_size * 2.5: raise Exception(f"Insufficient storage. Have {free_mb:.1f} MB, need at least {required_mb:.1f} MB available.") except Exception as disk_err: if isinstance(disk_err, Exception) and "Insufficient storage" in str(disk_err): raise disk_err log.warning(f"[VOICE] Could not check disk space: {disk_err}") - # Continue anyway if check fails, as disk_usage might not work on all filesystems - downloaded = 0 - with open(temp_zip, 'wb') as f: + downloaded = existing_size + mode = 'ab' if existing_size > 0 else 'wb' + with open(temp_zip, mode) as f: for chunk in response.stream(1024 * 64): + if _voice_download_cancel_event.is_set(): + action = _voice_download_action + if action == "pause": + raise Exception("PAUSED_BY_USER") + else: + raise Exception("CANCELLED_BY_USER") f.write(chunk) downloaded += len(chunk) if total_size: @@ -881,18 +975,30 @@ def do_download(): log.info(f"[VOICE] Successfully installed model {model_id}") except Exception as e: - log.exception(f"[VOICE] Download failed: {e}") + err_str = str(e) + log.exception(f"[VOICE] Download interrupted: {err_str}") download_status["in_progress"] = False - download_status["error"] = str(e) - download_status["message"] = "Failed" - send("download_error", str(e)) + + if err_str == "PAUSED_BY_USER": + download_status["is_paused"] = True + download_status["message"] = "Paused" + # Do not unlink temp_zip + elif err_str == "CANCELLED_BY_USER": + download_status["is_paused"] = False + download_status["message"] = "Cancelled" + if temp_zip and temp_zip.exists(): + try: + temp_zip.unlink() + except: + pass + else: + # Network error or other failure + download_status["error"] = err_str + download_status["message"] = "Failed" + download_status["is_paused"] = True # Keep partial file for resume + send("download_error", err_str) + finally: - # Cleanup temporary files - if temp_zip and temp_zip.exists(): - try: - temp_zip.unlink() - except Exception: - pass if temp_extract_dir and temp_extract_dir.exists(): try: shutil.rmtree(temp_extract_dir) @@ -969,12 +1075,39 @@ def resolve_settings_db_path(db_name="UBTMS_SettingsDB", app_id="ubtms"): return None +def _subprocess_recognize(model_path, pipe, timeout): + """Runs voice recognition in a separate process to avoid blocking the GIL.""" + try: + from voice_to_text.voice2text import recognize_from_mic + + class PipeStopEvent: + def is_set(self): + if pipe.poll(): + msg = pipe.recv() + if msg == "stop": + return True + return False + + def _partial(txt): + pipe.send(("partial", txt)) + + def _status(txt): + pipe.send(("status", txt)) + + text, err = recognize_from_mic(verbose=False, stop_event=PipeStopEvent(), timeout=timeout, partial_callback=_partial, status_callback=_status, model_path=model_path) + pipe.send(("final", text, err)) + except Exception as e: + import traceback + pipe.send(("final", None, f"Subprocess Error: {e}")) + + def run_voice_recognition(): """ Runs voice recognition in a background thread to avoid blocking the UI. Uses the offline Vosk engine. """ def do_recognition(): + import multiprocessing try: log.info("[VOICE] Starting offline voice recognition thread") @@ -1026,7 +1159,7 @@ def do_recognition(): else: os.environ["LD_LIBRARY_PATH"] = env_path - from voice_to_text.voice2text import recognize_from_mic, list_microphones + from voice_to_text.voice2text import list_microphones # Reset the stop event _voice_stop_event.clear() @@ -1034,17 +1167,48 @@ def do_recognition(): # Log available mics for debug log.info(f"[VOICE] mics: {list_microphones()}") - def handle_partial(txt): - if txt: - send("voice_recognition_partial", txt) + # Run recognition in a separate process to avoid GIL blocking and UI freezing + parent_conn, child_conn = multiprocessing.Pipe() + p = multiprocessing.Process(target=_subprocess_recognize, args=(str(model_path), child_conn, 30)) + p.start() - text, error = recognize_from_mic(stop_event=_voice_stop_event, partial_callback=handle_partial, model_path=str(model_path)) - if text: - log.info(f"[VOICE] Recognized text: {text}") - send("voice_recognition_result", text) - else: - log.warning(f"[VOICE] Recognition failed: {error or 'No speech detected'}") - send("voice_recognition_error", error or "No speech detected") + received_final = False + while p.is_alive(): + if _voice_stop_event.is_set(): + parent_conn.send("stop") + _voice_stop_event.clear() + + if parent_conn.poll(0.5): + msg = parent_conn.recv() + if msg[0] == "partial": + send("voice_recognition_partial", msg[1]) + elif msg[0] == "status": + send("voice_recognition_status", msg[1]) + elif msg[0] == "final": + received_final = True + text, error = msg[1], msg[2] + if text: + log.info(f"[VOICE] Recognized text: {text}") + send("voice_recognition_result", text) + else: + log.warning(f"[VOICE] Recognition failed: {error or 'No speech detected'}") + send("voice_recognition_error", error or "No speech detected") + break + + # Ensure process cleanup + p.join(timeout=1) + if p.is_alive(): + p.terminate() + p.join() + + # Handle unexpected crash (like OOM kill) + if not received_final: + exit_code = p.exitcode + if exit_code is not None and exit_code != 0: + err_msg = f"Voice recognition crashed unexpectedly (possibly Out of Memory). Exit code: {exit_code}" + log.error(f"[VOICE] {err_msg}") + send("voice_recognition_error", err_msg) + except Exception as e: log.exception(f"[VOICE] Error during voice recognition: {e}") send("voice_recognition_error", f"System Error: {str(e)}") diff --git a/voice_to_text/voice2text.py b/voice_to_text/voice2text.py index 4478a5e..95cdd25 100644 --- a/voice_to_text/voice2text.py +++ b/voice_to_text/voice2text.py @@ -17,7 +17,7 @@ except ImportError: pass -def recognize_from_mic(verbose=True, stop_event=None, timeout=30, partial_callback=None, model_path=None): +def recognize_from_mic(verbose=True, stop_event=None, timeout=30, partial_callback=None, status_callback=None, model_path=None): """ Records audio using arecord and recognizes it using Vosk (offline). Returns (text, error_message). @@ -37,12 +37,18 @@ def recognize_from_mic(verbose=True, stop_event=None, timeout=30, partial_callba if not model_path.exists(): return None, f"Vosk model not found at {model_path}" + if status_callback: + status_callback("Preparing...") + # Load the model and initialize recognizer if verbose: logger.info(f"Loading Vosk model from {model_path} for live processing...") model = Model(str(model_path)) rec = KaldiRecognizer(model, 16000) rec.SetWords(True) + if status_callback: + status_callback("Listening...") + try: arecord_cmd = "arecord" use_ffmpeg = False @@ -68,19 +74,30 @@ def recognize_from_mic(verbose=True, stop_event=None, timeout=30, partial_callba process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) start_time = time.time() - max_duration = timeout + last_speech_time = start_time + hard_max_duration = 300 # 5 minutes absolute max + silence_timeout = 7.0 # Stop after 7 seconds of silence results = [] + last_partial = "" # Read from stdout in chunks # `read(4000)` might block until 4000 bytes are available, which is fine for live streams. while process.poll() is None: + current_time = time.time() + if stop_event and stop_event.is_set(): logger.info("[VOICE] Stop signal received, terminating record process.") process.terminate() break - if (time.time() - start_time) > max_duration: - logger.info("[VOICE] Max duration reached, stopping.") + + if (current_time - start_time) > hard_max_duration: + logger.info("[VOICE] Absolute max duration reached, stopping.") + process.terminate() + break + + if (current_time - last_speech_time) > silence_timeout: + logger.info("[VOICE] Silence timeout reached, stopping auto-record.") process.terminate() break @@ -94,13 +111,20 @@ def recognize_from_mic(verbose=True, stop_event=None, timeout=30, partial_callba res = json.loads(rec.Result()) if res.get("text"): results.append(res["text"]) + last_speech_time = time.time() # Just finished a phrase + last_partial = "" if partial_callback: combined = " ".join(results).strip() partial_callback(combined) else: res = json.loads(rec.PartialResult()) - if res.get("partial"): - current_partial = res["partial"] + current_partial = res.get("partial", "") + if current_partial: + # User is actively speaking if partial result changes + if current_partial != last_partial: + last_speech_time = time.time() + last_partial = current_partial + if partial_callback: combined = " ".join(results + [current_partial]).strip() partial_callback(combined) From e57beba3b5b1fd6253c099a77ba6683ac04482ef Mon Sep 17 00:00:00 2001 From: Parvathy Nair Date: Fri, 12 Jun 2026 15:56:07 +0530 Subject: [PATCH 2/8] [ADD] Search Feature, Renamed the Page Name Co-authored-by: AnmollGarg --- qml/components/navigation/ListHeader.qml | 3 +- qml/features/settings/pages/Settings_Page.qml | 2 +- .../settings/pages/Settings_VoiceModel.qml | 136 +++++++++++++----- 3 files changed, 102 insertions(+), 39 deletions(-) diff --git a/qml/components/navigation/ListHeader.qml b/qml/components/navigation/ListHeader.qml index 4cf9b45..25ec515 100644 --- a/qml/components/navigation/ListHeader.qml +++ b/qml/components/navigation/ListHeader.qml @@ -6,7 +6,7 @@ import ".." Rectangle { id: topFilterBar width: parent ? parent.width : Screen.width - height: showSearchBox ? units.gu(11) : units.gu(6) // Restored height to give proper space + height: (showSearchBox ? units.gu(5) : 0) + (filterModel.length > 0 ? units.gu(6) : 0) color: "transparent" // Helper property to check if dark mode is active @@ -207,6 +207,7 @@ Rectangle { // Filter buttons row below search Item { + visible: topFilterBar.filterModel.length > 0 width: parent.width height: units.gu(6) // Adjusted back diff --git a/qml/features/settings/pages/Settings_Page.qml b/qml/features/settings/pages/Settings_Page.qml index b887d34..d56ed61 100644 --- a/qml/features/settings/pages/Settings_Page.qml +++ b/qml/features/settings/pages/Settings_Page.qml @@ -111,7 +111,7 @@ Page { SettingsListItem { iconName: "audio-input-microphone-symbolic" iconColor: "#8e44ad" - text: i18n.dtr("ubtms", "Voice Model Settings") + text: i18n.dtr("ubtms", "Voice Model (Beta)") active: settings.selectedSettingsPageUrl === "Settings_VoiceModel.qml" showDivider: false onClicked: { diff --git a/qml/features/settings/pages/Settings_VoiceModel.qml b/qml/features/settings/pages/Settings_VoiceModel.qml index 6c2d802..b8cc779 100644 --- a/qml/features/settings/pages/Settings_VoiceModel.qml +++ b/qml/features/settings/pages/Settings_VoiceModel.qml @@ -30,9 +30,10 @@ import Lomiri.Components.Popups 1.3 import QtGraphicalEffects 1.0 import "../components" import "../../../components" +import "../../../components/navigation" as Nav Page { id: voiceModelSettingsPage - title: i18n.dtr("ubtms", "Voice Model Settings") + title: i18n.dtr("ubtms", "Voice Model (Beta)") header: SettingsHeader { id: pageHeader @@ -42,6 +43,12 @@ Page { iconName: "info" onTriggered: PopupUtils.open(infoDialogComponent, voiceModelSettingsPage) }, + Action { + iconName: "search" + onTriggered: { + myTaskListHeader.toggleSearchVisibility() + } + }, Action { iconName: "reload" onTriggered: refreshModels() @@ -59,6 +66,21 @@ Page { property var downloadStatus: { "in_progress": false, "progress": 0, "message": "", "error": "" } property int deviceRamMB: 2048 property bool isVoiceInputEnabled: true + property string searchQuery: "" + property var allInstalledModels: [] + property var allAvailableModels: [] + + Timer { + id: searchDebounceTimer + interval: 300 + onTriggered: { + filterModels(); + } + } + + onSearchQueryChanged: { + searchDebounceTimer.restart(); + } function getVoiceInputEnabledSetting() { try { @@ -138,58 +160,83 @@ Page { if (!mainView.backend_bridge.ready) return; isLoading = true; - modelList.clear(); mainView.backend_bridge.call("backend.list_installed_models", [], function(models) { isLoading = false; - if (models && models.length > 0) { + var arr = []; + if (models) { for (var i = 0; i < models.length; i++) { - modelList.append({ - "name": models[i].m_name, - "path": models[i].m_path, - "m_source": models[i].m_source, - "size": models[i].m_size || "" - }); + arr.push(models[i]); } } + allInstalledModels = arr; + filterModels(); // After refreshing installed models, refresh available ones refreshAvailableModels(); }); } - function refreshAvailableModels() { - if (!mainView.backend_bridge.ready) return; - + function filterModels() { + modelList.clear(); availableModelList.clear(); - mainView.backend_bridge.call("backend.list_available_models", [], function(models) { - if (models && models.length > 0) { - // Only show models that are NOT installed - var installedPaths = []; - for (var i = 0; i < modelList.count; i++) { - installedPaths.push(modelList.get(i).path); + + var query = searchQuery.toLowerCase(); + + // Installed models + for (var i = 0; i < allInstalledModels.length; i++) { + var m = allInstalledModels[i]; + if (query === "" || (m.m_name && m.m_name.toLowerCase().indexOf(query) !== -1) || (m.m_path && m.m_path.toLowerCase().indexOf(query) !== -1)) { + modelList.append({ + "name": m.m_name, + "path": m.m_path, + "m_source": m.m_source, + "size": m.m_size || "" + }); + } + } + + // Available models + var installedPaths = []; + for (var idx = 0; idx < allInstalledModels.length; idx++) { + installedPaths.push(allInstalledModels[idx].m_path); + } + + for (var j = 0; j < allAvailableModels.length; j++) { + var modelId = allAvailableModels[j].id; + var alreadyInstalled = false; + for (var k = 0; k < installedPaths.length; k++) { + if (installedPaths[k].indexOf(modelId) !== -1) { + alreadyInstalled = true; + break; } + } - for (var j = 0; j < models.length; j++) { - var modelId = models[j].id; - // Check if already installed (simple check by ID in path or similar) - var alreadyInstalled = false; - for (var k = 0; k < installedPaths.length; k++) { - if (installedPaths[k].indexOf(modelId) !== -1) { - alreadyInstalled = true; - break; - } - } + if (!alreadyInstalled) { + var avM = allAvailableModels[j]; + if (query === "" || (avM.name && avM.name.toLowerCase().indexOf(query) !== -1) || (avM.id && avM.id.toLowerCase().indexOf(query) !== -1)) { + availableModelList.append({ + "id": avM.id, + "name": avM.name, + "size": avM.size, + "url": avM.url + }); + } + } + } + } - if (!alreadyInstalled) { - availableModelList.append({ - "id": models[j].id, - "name": models[j].name, - "size": models[j].size, - "url": models[j].url - }); - } + function refreshAvailableModels() { + if (!mainView.backend_bridge.ready) return; + + mainView.backend_bridge.call("backend.list_available_models", [], function(models) { + var arr = []; + if (models) { + for (var i = 0; i < models.length; i++) { + arr.push(models[i]); } } + allAvailableModels = arr; + filterModels(); }); } @@ -475,9 +522,24 @@ Page { } } + Nav.ListHeader { + id: myTaskListHeader + anchors.top: pageHeader.bottom + anchors.left: parent.left + anchors.right: parent.right + + filterModel: [] + showSearchBox: false + currentFilter: "" + + onCustomSearch: { + searchQuery = query; + } + } + Flickable { id: flickable - anchors.top: pageHeader.bottom + anchors.top: myTaskListHeader.bottom anchors.left: parent.left anchors.right: parent.right anchors.bottom: parent.bottom From cd51b18c39b509b9a297fd362860559fdc887122 Mon Sep 17 00:00:00 2001 From: Anmol Garg Date: Mon, 15 Jun 2026 17:58:27 +0530 Subject: [PATCH 3/8] [FIX] Critical Bug - Duplicating --- qml/components/richtext/ReadMorePage.qml | 150 ++++++++++++++++++-- qml/components/richtext/RichTextEditor.qml | 93 +++++------- qml/components/richtext/RichTextPreview.qml | 73 +++++----- qml/components/richtext/js/editor.html | 54 +++---- qml/components/system/VoiceTimerWidget.qml | 123 ++++++++++++++++ voice_to_text/voice2text.py | 4 +- 6 files changed, 372 insertions(+), 125 deletions(-) create mode 100644 qml/components/system/VoiceTimerWidget.qml diff --git a/qml/components/richtext/ReadMorePage.qml b/qml/components/richtext/ReadMorePage.qml index 383cd13..5a57862 100644 --- a/qml/components/richtext/ReadMorePage.qml +++ b/qml/components/richtext/ReadMorePage.qml @@ -1,6 +1,8 @@ import QtQuick 2.7 import Lomiri.Components 1.3 +import QtQuick.LocalStorage 2.7 as Sql import "../../../models/global.js" as Global +import "../system" Page { id: readmepage @@ -23,6 +25,91 @@ Page { property string _lastKnownHolder: "" property bool _parentSaveCommitted: false + property bool listening: false + property bool processing: false + property string textBeforeRecording: "" + property bool isVoiceInputEnabled: true + property string _partialRecognizedText: "" + property string _currentVoiceStatus: "" + + function checkVoiceInputEnabled() { + try { + var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + var result = true; + db.transaction(function (tx) { + var rs = tx.executeSql('SELECT value FROM app_settings WHERE key = "voice_input_enabled"'); + if (rs.rows.length > 0) { + result = rs.rows.item(0).value === "true"; + } + }); + isVoiceInputEnabled = result; + } catch (e) { + console.warn("Error reading voice_input_enabled:", e); + } + } + + Connections { + target: mainView.backend_bridge + onMessageReceived: { + if (!readmepage.listening && !readmepage.processing) return; + + if (data.event === "voice_recognition_partial") { + var partialText = data.payload + if (partialText) { + readmepage._partialRecognizedText = partialText; + readmepage._currentVoiceStatus = i18n.dtr("ubtms", "Listening..."); + } + } else if (data.event === "voice_recognition_status") { + var statusText = data.payload; + if (statusText) { + readmepage._currentVoiceStatus = statusText; + } + } else if (data.event === "voice_recognition_result") { + readmepage.listening = false + readmepage.processing = false + var recognizedText = data.payload + readmepage._partialRecognizedText = ""; + readmepage._currentVoiceStatus = ""; + + if (recognizedText) { + var prefix = ""; + if (simpleEditor.length > 0) { + var lastChar = simpleEditor.text.charAt(simpleEditor.length - 1); + if (lastChar !== ' ' && lastChar !== '\n' && lastChar !== '\r' && lastChar !== '\t') { + prefix = " "; + } + } + var finalStr = prefix + recognizedText; + simpleEditor.insert(simpleEditor.length, finalStr); + + cursorTimer.start(); + } + } else if (data.event === "voice_recognition_error") { + readmepage.listening = false + readmepage.processing = false + readmepage._partialRecognizedText = ""; + readmepage._currentVoiceStatus = ""; + cursorTimer.start() + } + } + } + + Timer { + id: cursorTimer + interval: 100 + repeat: false + onTriggered: { + if (simpleEditor) { + simpleEditor.cursorPosition = simpleEditor.length; + if (simpleEditor.flickableItem) { + simpleEditor.flickableItem.contentY = Math.max(0, simpleEditor.flickableItem.contentHeight - simpleEditor.flickableItem.height); + } else if (simpleEditor.flickable) { + simpleEditor.flickable.contentY = Math.max(0, simpleEditor.flickable.contentHeight - simpleEditor.flickable.height); + } + } + } + } + header: PageHeader { id: header title: i18n.dtr("ubtms","Description") @@ -35,20 +122,45 @@ Page { dividerColor: LomiriColors.slate } + trailingActionBar.numberOfSlots: 3 trailingActionBar.actions: [ Action { - visible: !isReadOnly && useRichText - iconName: editor.toolbarExpanded ? "view-collapse" : "view-expand" - text: editor.toolbarExpanded ? i18n.dtr("ubtms", "Hide Toolbar") : i18n.dtr("ubtms", "Show Toolbar") + visible: !isReadOnly && (!readmepage.listening && !readmepage.processing) + iconName: "tick" onTriggered: { - editor.toolbarExpanded = !editor.toolbarExpanded + saveAndClose() } }, Action { - visible: !isReadOnly - iconName: "tick" + visible: !isReadOnly && readmepage.isVoiceInputEnabled && !useRichText + iconName: "microphone" + text: readmepage.listening ? i18n.dtr("ubtms", "Stop Recording") : i18n.dtr("ubtms", "Start Recording") onTriggered: { - saveAndClose() + if (readmepage.listening) { + readmepage.listening = false + readmepage.processing = true + readmepage._currentVoiceStatus = i18n.dtr("ubtms", "Processing..."); + + backend_bridge.call("backend.stop_voice_recognition", []) + return; + } + if (readmepage.processing) return; + + readmepage.textBeforeRecording = simpleEditor.text + readmepage._partialRecognizedText = ""; + readmepage._currentVoiceStatus = i18n.dtr("ubtms", "Starting..."); + + readmepage.listening = true + readmepage.processing = false + backend_bridge.call("backend.run_voice_recognition", []) + } + }, + Action { + visible: !isReadOnly && useRichText && (!readmepage.listening && !readmepage.processing) + iconName: editor.toolbarExpanded ? "view-collapse" : "view-expand" + text: editor.toolbarExpanded ? i18n.dtr("ubtms", "Hide Toolbar") : i18n.dtr("ubtms", "Show Toolbar") + onTriggered: { + editor.toolbarExpanded = !editor.toolbarExpanded } } ] @@ -133,8 +245,8 @@ Page { id: editor visible: useRichText text: Global.description_temporary_holder - readOnly: isReadOnly - showToolbar: !isReadOnly + readOnly: isReadOnly || readmepage.listening || readmepage.processing + showToolbar: !isReadOnly && !readmepage.listening && !readmepage.processing anchors.fill: parent onContentChanged: { @@ -165,7 +277,7 @@ Page { id: simpleEditor visible: !useRichText text: Global.description_temporary_holder - readOnly: isReadOnly + readOnly: isReadOnly || readmepage.listening || readmepage.processing textFormat: Text.PlainText font.pixelSize: units.gu(2) wrapMode: TextArea.Wrap @@ -222,6 +334,23 @@ Page { } } + VoiceTimerWidget { + id: voiceTimerWidget + parent: readmepage + + isListening: readmepage.listening + isProcessing: readmepage.processing + partialText: readmepage._partialRecognizedText + voiceStatus: readmepage._currentVoiceStatus + + onStopClicked: { + readmepage.listening = false + readmepage.processing = true + readmepage._currentVoiceStatus = i18n.dtr("ubtms", "Processing..."); + backend_bridge.call("backend.stop_voice_recognition", []) + } + } + // Handle page visibility changes to ensure content is saved onVisibleChanged: { if (!visible && !isReadOnly && !_parentSaveCommitted) { @@ -252,6 +381,7 @@ Page { } Component.onCompleted: { + checkVoiceInputEnabled(); // Initialize tracking to avoid false external-change detection _lastKnownHolder = Global.description_temporary_holder || ""; diff --git a/qml/components/richtext/RichTextEditor.qml b/qml/components/richtext/RichTextEditor.qml index 5aabfde..1011cef 100644 --- a/qml/components/richtext/RichTextEditor.qml +++ b/qml/components/richtext/RichTextEditor.qml @@ -20,6 +20,7 @@ import QtQuick.Window 2.2 import Lomiri.Components 1.3 import QtWebEngine 1.5 import QtQuick.LocalStorage 2.7 as Sql +import "../system" import "js/html-sanitizer.js" as HtmlSanitizer Item { @@ -76,6 +77,7 @@ Item { /** Partial voice recognition text (for UI feedback) */ property string _partialVoiceText: "" + property string _currentVoiceStatus: "" /** Whether voice input is enabled globally */ property bool isVoiceInputEnabled: true @@ -109,16 +111,8 @@ Item { var partialText = data.payload if (partialText) { - var jsCode = "var spans = document.getElementsByTagName('span'); " + - "var marker = null; " + - "for(var i=spans.length-1; i>=0; i--) { if (spans[i].innerText && spans[i].innerText.indexOf('\\u200B') !== -1) { marker = spans[i]; break; } } " + - "if (marker) { " + - " marker.innerText = '\\u200B ' + " + JSON.stringify(partialText + " (Listening...)") + "; " + - " window.editor.moveCursorToEnd(); " + - "}"; - wv.runJavaScript(jsCode, function(res) { - editor.syncContent(); - }); + editor._partialVoiceText = partialText; + editor._currentVoiceStatus = "Listening..."; } } else if (data.event === "voice_recognition_result") { editor.listening = false @@ -126,15 +120,13 @@ Item { var recognizedText = data.payload console.log("[RichTextEditor] Received recognition result: " + recognizedText) - // Replace the partial span with the final text - var jsCode = "var spans = document.getElementsByTagName('span'); " + - "var marker = null; " + - "for(var i=spans.length-1; i>=0; i--) { if (spans[i].innerText && spans[i].innerText.indexOf('\\u200B') !== -1) { marker = spans[i]; break; } } " + - "if (marker) { " + - " marker.outerHTML = " + JSON.stringify(recognizedText ? (recognizedText + " ") : "") + "; " + - "} else if (" + JSON.stringify(recognizedText) + ") { " + - " window.editor.focus(); window.editor.insertHTML(" + JSON.stringify(recognizedText + " ") + "); " + - "}"; + editor._partialVoiceText = ""; + editor._currentVoiceStatus = ""; + + var jsCode = "if (" + JSON.stringify(recognizedText) + ") { " + + " window.editor.focus(); window.editor.moveCursorToEnd(); window.editor.insertHTML(" + JSON.stringify(recognizedText + " ") + "); " + + "}" + + "document.body.contentEditable = " + (!readOnly).toString() + ";"; wv.runJavaScript(jsCode); // Force a sync to update the 'text' property and emit contentChanged @@ -143,30 +135,16 @@ Item { if (!listening && !processing) return; var statusText = data.payload; if (statusText) { - var jsCode = "var spans = document.getElementsByTagName('span'); " + - "var marker = null; " + - "for(var i=spans.length-1; i>=0; i--) { if (spans[i].innerText && spans[i].innerText.indexOf('\\u200B') !== -1) { marker = spans[i]; break; } } " + - "if (marker) { " + - " marker.innerText = '\\u200B (' + " + JSON.stringify(statusText) + " + ')'; " + - " window.editor.moveCursorToEnd(); " + - "}"; - wv.runJavaScript(jsCode, function(res) { - editor.syncContent(); - }); + editor._currentVoiceStatus = statusText; } } else if (data.event === "voice_recognition_error") { editor.listening = false editor.processing = false + editor._partialVoiceText = ""; + editor._currentVoiceStatus = ""; console.log("[RichTextEditor] Voice recognition error: " + data.payload) - // Remove the partial span on error - var jsCode = "var spans = document.getElementsByTagName('span'); " + - "var marker = null; " + - "for(var i=spans.length-1; i>=0; i--) { if (spans[i].innerText && spans[i].innerText.indexOf('\\u200B') !== -1) { marker = spans[i]; break; } } " + - "if (marker) { " + - " marker.remove(); " + - "}"; - wv.runJavaScript(jsCode); + wv.runJavaScript("document.body.contentEditable = " + (!readOnly).toString() + ";"); } } } @@ -177,6 +155,7 @@ Item { console.log("[RichTextEditor] Stopping voice recognition...") listening = false processing = true + editor._currentVoiceStatus = "Processing..." mainView.backend_bridge.call("backend.stop_voice_recognition", []) } else { if (processing) return; @@ -184,27 +163,9 @@ Item { listening = true processing = false _partialVoiceText = "" + editor._currentVoiceStatus = "Starting..." - // Move cursor to the end and insert a new line if there's already text, - // then insert a temporary span for live partial text - var jsCode = "try { " + - " window.editor.focus(); " + - " window.editor.moveCursorToEnd(); " + - - " var currentHTML = window.editor.getHTML(); " + - " var hasContent = currentHTML.replace(/<[^>]*>/g, '').trim().length > 0; " + - " var marker = '\\u200B (Starting...)'; " + - - " if (hasContent) { " + - " window.editor.insertHTML('

' + marker); " + - " } else { " + - " window.editor.insertHTML(marker); " + - " } " + - " window.editor.moveCursorToEnd(); " + - "} catch(e) { console.error('Error inserting voice marker: ', e); }"; - wv.runJavaScript(jsCode, function(res) { - editor.syncContent(); - }); + wv.runJavaScript("document.body.contentEditable = false;"); mainView.backend_bridge.call("backend.run_voice_recognition", []) } @@ -784,4 +745,22 @@ Item { } } } + + VoiceTimerWidget { + id: voiceTimerWidget + parent: mainView + anchors.bottomMargin: editor._oskHeight + units.gu(1) + + isListening: editor.listening + isProcessing: editor.processing + partialText: editor._partialVoiceText + voiceStatus: editor._currentVoiceStatus + + onStopClicked: { + editor.listening = false + editor.processing = true + editor._currentVoiceStatus = "Processing..."; + mainView.backend_bridge.call("backend.stop_voice_recognition", []) + } + } } diff --git a/qml/components/richtext/RichTextPreview.qml b/qml/components/richtext/RichTextPreview.qml index d2d881e..4779355 100644 --- a/qml/components/richtext/RichTextPreview.qml +++ b/qml/components/richtext/RichTextPreview.qml @@ -3,6 +3,7 @@ import Lomiri.Components 1.3 import QtQuick.LocalStorage 2.7 as Sql import "js/html-sanitizer.js" as HtmlSanitizer import "../../../models/global.js" as Global +import "../system" Rectangle { id: root @@ -45,8 +46,8 @@ Rectangle { Component.onCompleted: { checkVoiceInputEnabled() } - property int _liveStartPos: 0 - property int _liveLength: 0 + property string _partialRecognizedText: "" + property string _currentVoiceStatus: "" Connections { target: mainView.backend_bridge @@ -58,21 +59,13 @@ Rectangle { if (data.event === "voice_recognition_partial") { var partialText = data.payload if (partialText) { - var str = " " + partialText + " (Listening...)"; - previewText.remove(root._liveStartPos, root._liveStartPos + root._liveLength); - previewText.insert(root._liveStartPos, str); - root._liveLength = str.length; - - cursorTimer.start(); + root._partialRecognizedText = partialText; + root._currentVoiceStatus = i18n.dtr("ubtms", "Listening..."); } } else if (data.event === "voice_recognition_status") { var statusText = data.payload; if (statusText) { - var str = " (" + statusText + ")"; - previewText.remove(root._liveStartPos, root._liveStartPos + root._liveLength); - previewText.insert(root._liveStartPos, str); - root._liveLength = str.length; - cursorTimer.start(); + root._currentVoiceStatus = statusText; } } else if (data.event === "voice_recognition_result") { root.listening = false @@ -80,24 +73,30 @@ Rectangle { var recognizedText = data.payload console.log("[RichTextPreview] Received recognition result: " + recognizedText) + root._partialRecognizedText = ""; + root._currentVoiceStatus = ""; + if (recognizedText) { - var finalStr = " " + recognizedText; - previewText.remove(root._liveStartPos, root._liveStartPos + root._liveLength); - previewText.insert(root._liveStartPos, finalStr); - root._liveLength = 0; + var prefix = ""; + if (previewText.length > 0) { + var lastChar = previewText.text.charAt(previewText.length - 1); + if (lastChar !== ' ' && lastChar !== '\n' && lastChar !== '\r' && lastChar !== '\t') { + prefix = " "; + } + } + var finalStr = prefix + recognizedText; + previewText.insert(previewText.length, finalStr); root.contentChanged(previewText.text); cursorTimer.start(); } else { - // Restore if no text - previewText.remove(root._liveStartPos, root._liveStartPos + root._liveLength); - root._liveLength = 0; cursorTimer.start(); } } else if (data.event === "voice_recognition_error") { root.listening = false root.processing = false - root.text = root.textBeforeRecording + root._partialRecognizedText = ""; + root._currentVoiceStatus = ""; console.log("[RichTextPreview] Voice recognition error: " + data.payload) cursorTimer.start() } @@ -318,7 +317,7 @@ Rectangle { id: previewText textFormat: useRichText ? Text.RichText : Text.PlainText - readOnly: is_read_only + readOnly: is_read_only || root.listening || root.processing color: theme.name === "Ubuntu.Components.Themes.SuruDark" ? "white" : "black" wrapMode: Text.WordWrap font.pixelSize: units.gu(2) @@ -403,11 +402,7 @@ Rectangle { console.log("[RichTextPreview] Stopping voice recognition...") root.listening = false root.processing = true - - // Replace the Listening indicator with Processing via direct node manipulation - previewText.remove(root._liveStartPos, root._liveStartPos + root._liveLength); - previewText.insert(root._liveStartPos, " (Processing...)"); - root._liveLength = " (Processing...)".length; + root._currentVoiceStatus = i18n.dtr("ubtms", "Processing..."); backend_bridge.call("backend.stop_voice_recognition", []) return; @@ -417,12 +412,9 @@ Rectangle { console.log("[RichTextPreview] Voice recognition started") root.textBeforeRecording = root.text - // Initialize the live insertion point at the very end - root._liveStartPos = previewText.length; - root._liveLength = " (Starting...)".length; - previewText.insert(root._liveStartPos, " (Starting...)"); + root._partialRecognizedText = ""; + root._currentVoiceStatus = i18n.dtr("ubtms", "Starting..."); - cursorTimer.start() root.listening = true root.processing = false backend_bridge.call("backend.run_voice_recognition", []) @@ -457,4 +449,21 @@ Rectangle { } } } + + VoiceTimerWidget { + id: voiceTimerWidget + parent: mainView + + isListening: root.listening + isProcessing: root.processing + partialText: root._partialRecognizedText + voiceStatus: root._currentVoiceStatus + + onStopClicked: { + root.listening = false + root.processing = true + root._currentVoiceStatus = i18n.dtr("ubtms", "Processing..."); + backend_bridge.call("backend.stop_voice_recognition", []) + } + } } diff --git a/qml/components/richtext/js/editor.html b/qml/components/richtext/js/editor.html index 16d8916..00104db 100644 --- a/qml/components/richtext/js/editor.html +++ b/qml/components/richtext/js/editor.html @@ -1,12 +1,13 @@ + Rich Text Editor - + - + + \ No newline at end of file diff --git a/qml/components/system/VoiceTimerWidget.qml b/qml/components/system/VoiceTimerWidget.qml new file mode 100644 index 0000000..882d503 --- /dev/null +++ b/qml/components/system/VoiceTimerWidget.qml @@ -0,0 +1,123 @@ +import QtQuick 2.7 +import Lomiri.Components 1.3 + +Rectangle { + id: voiceTimerWidget + + // Properties to control the widget state + property bool isListening: false + property bool isProcessing: false + property string partialText: "" + property string voiceStatus: "" + + // Signal emitted when the user clicks the stop button + signal stopClicked() + + width: units.gu(47) + height: units.gu(8) + color: "#2d2d2d" + radius: units.gu(1) + + // Default positioning logic that can be overridden by instantiators + anchors.bottom: parent.bottom + anchors.horizontalCenter: parent.horizontalCenter + anchors.bottomMargin: Qt.inputMethod.visible ? Qt.inputMethod.keyboardRectangle.height + units.gu(1) : units.gu(1) + z: 999 + + visible: isListening || isProcessing + + Rectangle { + id: indicator + width: units.gu(1.5) + height: units.gu(1.5) + radius: units.gu(.75) + color: voiceTimerWidget.isProcessing ? "#ffa500" : "#0078d4" + anchors.left: parent.left + anchors.margins: units.gu(2) + anchors.verticalCenter: parent.verticalCenter + + SequentialAnimation on opacity { + loops: Animation.Infinite + running: voiceTimerWidget.visible + NumberAnimation { from: 0.3; to: 1; duration: 800; easing.type: Easing.InOutQuad } + NumberAnimation { from: 1; to: 0.3; duration: 800; easing.type: Easing.InOutQuad } + } + } + + Flickable { + id: textFlickable + anchors.left: indicator.right + anchors.leftMargin: units.gu(1) + anchors.right: stopbutton.left + anchors.rightMargin: units.gu(1) + anchors.verticalCenter: parent.verticalCenter + height: units.gu(4) + contentWidth: voiceLabel.paintedWidth + contentHeight: height + clip: true + interactive: false + + Label { + id: voiceLabel + text: voiceTimerWidget.voiceStatus + (voiceTimerWidget.partialText ? " - " + voiceTimerWidget.partialText : "") + color: "white" + font.pixelSize: units.gu(2) + anchors.verticalCenter: parent.verticalCenter + verticalAlignment: Text.AlignVCenter + + onTextChanged: { + if (paintedWidth > textFlickable.width) { + textFlickable.contentX = paintedWidth - textFlickable.width; + } else { + textFlickable.contentX = 0; + } + } + } + } + + Image { + id: stopbutton + anchors.verticalCenter: parent.verticalCenter + anchors.right: parent.right + anchors.margins: units.gu(1) + width: units.gu(5) + height: units.gu(5) + // Image path assumes we are in components/system/ + source: "../../../images/stop.png" + fillMode: Image.PreserveAspectFit + visible: voiceTimerWidget.isListening + + MouseArea { + anchors.fill: parent + onPressed: stopbutton.opacity = 0.5 + onReleased: stopbutton.opacity = 1.0 + onCanceled: stopbutton.opacity = 1.0 + onClicked: { + voiceTimerWidget.stopClicked() + } + } + } + + Rectangle { + anchors.bottom: parent.bottom + anchors.left: parent.left + anchors.right: parent.right + height: units.gu(0.5) + color: "#333333" + opacity: 0.7 + + Rectangle { + id: progressIndicator + anchors.top: parent.top + anchors.bottom: parent.bottom + width: parent.width * 0.3 + color: voiceTimerWidget.isProcessing ? "#ffa500" : "#0078d4" + + SequentialAnimation on x { + running: voiceTimerWidget.visible && voiceTimerWidget.isProcessing + loops: Animation.Infinite + NumberAnimation { from: -progressIndicator.width; to: voiceTimerWidget.width; duration: 2000; easing.type: Easing.InOutQuad } + } + } + } +} diff --git a/voice_to_text/voice2text.py b/voice_to_text/voice2text.py index 95cdd25..aab7b0b 100644 --- a/voice_to_text/voice2text.py +++ b/voice_to_text/voice2text.py @@ -38,7 +38,7 @@ def recognize_from_mic(verbose=True, stop_event=None, timeout=30, partial_callba return None, f"Vosk model not found at {model_path}" if status_callback: - status_callback("Preparing...") + status_callback("Preparing...~$") # Load the model and initialize recognizer if verbose: logger.info(f"Loading Vosk model from {model_path} for live processing...") @@ -47,7 +47,7 @@ def recognize_from_mic(verbose=True, stop_event=None, timeout=30, partial_callba rec.SetWords(True) if status_callback: - status_callback("Listening...") + status_callback("Listening...~$") try: arecord_cmd = "arecord" From 79555f573426c3917a68d14b4ff63e1a2790c346 Mon Sep 17 00:00:00 2001 From: Parvathy Nair Date: Tue, 16 Jun 2026 11:47:52 +0530 Subject: [PATCH 4/8] [FIX] voice model setting info, the navigation issue fixed, the download error pop up message --- qml/components/richtext/ReadMorePage.qml | 10 ++ qml/components/richtext/RichTextEditor.qml | 7 ++ qml/components/richtext/RichTextPreview.qml | 7 ++ .../settings/pages/Settings_VoiceModel.qml | 96 ++++++++++++++++--- 4 files changed, 106 insertions(+), 14 deletions(-) diff --git a/qml/components/richtext/ReadMorePage.qml b/qml/components/richtext/ReadMorePage.qml index 5a57862..0e897c2 100644 --- a/qml/components/richtext/ReadMorePage.qml +++ b/qml/components/richtext/ReadMorePage.qml @@ -378,6 +378,11 @@ Page { } } } + + if (!visible && (listening || processing)) { + console.log("[ReadMorePage] visibility changed: Stopping voice recognition...") + mainView.backend_bridge.call("backend.stop_voice_recognition", []) + } } Component.onCompleted: { @@ -411,5 +416,10 @@ Page { parentDraftHandler.saveDraft(); } } + + if (listening || processing) { + console.log("[ReadMorePage] destruction: Stopping voice recognition...") + mainView.backend_bridge.call("backend.stop_voice_recognition", []) + } } } diff --git a/qml/components/richtext/RichTextEditor.qml b/qml/components/richtext/RichTextEditor.qml index 1011cef..5257f2c 100644 --- a/qml/components/richtext/RichTextEditor.qml +++ b/qml/components/richtext/RichTextEditor.qml @@ -149,6 +149,13 @@ Item { } } + Component.onDestruction: { + if (listening || processing) { + console.log("[RichTextEditor] destruction: Stopping voice recognition...") + mainView.backend_bridge.call("backend.stop_voice_recognition", []) + } + } + /** Toggle voice recognition state */ function toggleVoiceRecognition() { if (listening) { diff --git a/qml/components/richtext/RichTextPreview.qml b/qml/components/richtext/RichTextPreview.qml index 4779355..8b0a0e6 100644 --- a/qml/components/richtext/RichTextPreview.qml +++ b/qml/components/richtext/RichTextPreview.qml @@ -24,6 +24,13 @@ Rectangle { property bool liveSyncActive: false property bool listening: false property bool processing: false + + Component.onDestruction: { + if (listening || processing) { + console.log("[RichTextPreview] destruction: Stopping voice recognition...") + backend_bridge.call("backend.stop_voice_recognition", []) + } + } property string textBeforeRecording: "" property bool isVoiceInputEnabled: true diff --git a/qml/features/settings/pages/Settings_VoiceModel.qml b/qml/features/settings/pages/Settings_VoiceModel.qml index b8cc779..6aefb72 100644 --- a/qml/features/settings/pages/Settings_VoiceModel.qml +++ b/qml/features/settings/pages/Settings_VoiceModel.qml @@ -344,21 +344,89 @@ Page { id: infoDialog title: i18n.dtr("ubtms", "About Voice Models") - Column { - spacing: units.gu(2) + Flickable { width: parent.width - - Text { - text: i18n.dtr("ubtms", "Voice models allow you to dictate text using your microphone directly into the app. Because processing happens locally on your device, your voice data remains completely private and no internet connection is required after the initial model download.\n\nLarger models provide higher accuracy but require more device memory and space. Smaller models are faster and use fewer resources but may be less accurate.") + height: Math.min(units.gu(50), infoContentColumn.height) + contentHeight: infoContentColumn.height + clip: true + interactive: contentHeight > height + + Column { + id: infoContentColumn width: parent.width - wrapMode: Text.WordWrap - color: theme.palette.normal.backgroundText - } - - Button { - text: i18n.dtr("ubtms", "Close") - anchors.horizontalCenter: parent.horizontalCenter - onClicked: PopupUtils.close(infoDialog) + spacing: units.gu(2) + + Text { + text: i18n.dtr("ubtms", "Voice models allow you to dictate text using your microphone directly into the app. Because processing happens locally on your device, your voice data remains completely private and no internet connection is required after the initial model download.\n\nLarger models provide higher accuracy but require more device memory and space. Smaller models are faster and use fewer resources but may be less accurate.") + width: parent.width + wrapMode: Text.WordWrap + color: theme.palette.normal.backgroundText + } + + Text { + width: parent.width + wrapMode: Text.WordWrap + color: theme.palette.normal.backgroundText + font.pixelSize: units.gu(1.6) + lineHeight: 1.2 + text: i18n.dtr("ubtms", "Voice Feature Stages: When you click the voice icon, it will show Starting, then Preparing. Only start speaking once it shows Listening. When stopped, it will show Processing with a yellow bar.") + } + + Text { + width: parent.width + wrapMode: Text.WordWrap + color: theme.palette.normal.backgroundText + font.pixelSize: units.gu(1.6) + lineHeight: 1.2 + text: i18n.dtr("ubtms", "Auto-Stop & Limits: If you do not speak for 7 seconds, the voice icon will automatically stop. The maximum duration for a single recording is 5 minutes.") + } + + Text { + width: parent.width + wrapMode: Text.WordWrap + color: theme.palette.normal.backgroundText + font.pixelSize: units.gu(1.6) + lineHeight: 1.2 + text: i18n.dtr("ubtms", "Getting Started: Make sure you have enabled the \"Enable voice input\" feature, under voice model (Beta) settings. ") + } + + Text { + width: parent.width + wrapMode: Text.WordWrap + color: theme.palette.normal.backgroundText + font.pixelSize: units.gu(1.6) + lineHeight: 1.2 + text: i18n.dtr("ubtms", "Compatibility & Errors: A red warning icon indicates the model is incompatible with your device (usually due to RAM limits), but you can still attempt to download it.") + } + + Text { + width: parent.width + wrapMode: Text.WordWrap + color: theme.palette.normal.backgroundText + font.pixelSize: units.gu(1.6) + lineHeight: 1.2 + text: i18n.dtr("ubtms", "Managing Downloads:Check the internet connectivity before downloading a file. Once the voice model is downloaded, select the model you want from the installed models list. The selected model will be shown in bold text, with a tick mark to its right. During download, you will see Loading (downloading), Pause, and Cancel buttons. Pausing or losing internet will preserve your progress, allowing you to resume later from this page. Cancelling will delete the partial download.") + } + + Text { + width: parent.width + wrapMode: Text.WordWrap + color: theme.palette.normal.backgroundText + font.pixelSize: units.gu(1.6) + lineHeight: 1.2 + text: i18n.dtr("ubtms", "Deleting Models: To remove an installed model, swipe its name to the left and click the delete icon.") + } + + Button { + text: i18n.dtr("ubtms", "Close") + anchors.horizontalCenter: parent.horizontalCenter + onClicked: PopupUtils.close(infoDialog) + } + + Item { + width: parent.width + height: units.gu(2) + } } } } @@ -500,7 +568,7 @@ Page { if (status.error) { console.error("Download error:", status.error); if (mainView.modelDownloadTimerWidget) mainView.modelDownloadTimerWidget.failSync(status.error); - notifPopup.open(i18n.dtr("ubtms", "Download Interrupted"), i18n.dtr("ubtms", "Failed to download %1: %2. You can resume it.").arg(downloadingModelName).arg(status.error), "warning"); + notifPopup.open(i18n.dtr("ubtms", "Download Interrupted"), i18n.dtr("ubtms", "Failed to download. You can resume it.").arg(downloadingModelName).arg(status.error), "warning"); } else { notifPopup.open(i18n.dtr("ubtms", "Download Paused"), i18n.dtr("ubtms", "Download of %1 is paused.").arg(downloadingModelName), "info"); } From 4d672bfe3239b21fe59ea69e7e2f3a770b20eb2e Mon Sep 17 00:00:00 2001 From: Anmol Garg Date: Tue, 16 Jun 2026 14:30:14 +0530 Subject: [PATCH 5/8] Added the stop button --- qml/components/system/VoiceTimerWidget.qml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qml/components/system/VoiceTimerWidget.qml b/qml/components/system/VoiceTimerWidget.qml index 882d503..f3db7fe 100644 --- a/qml/components/system/VoiceTimerWidget.qml +++ b/qml/components/system/VoiceTimerWidget.qml @@ -80,10 +80,10 @@ Rectangle { anchors.verticalCenter: parent.verticalCenter anchors.right: parent.right anchors.margins: units.gu(1) - width: units.gu(5) - height: units.gu(5) + width: units.gu(4) + height: units.gu(4) // Image path assumes we are in components/system/ - source: "../../../images/stop.png" + source: "../../images/stop.png" fillMode: Image.PreserveAspectFit visible: voiceTimerWidget.isListening From 6c85e8852fd27f6525791299d171098cce1e26e1 Mon Sep 17 00:00:00 2001 From: Anmol Garg Date: Tue, 16 Jun 2026 17:41:11 +0530 Subject: [PATCH 6/8] Fixed the issues --- qml/components/richtext/ReadMorePage.qml | 65 +++++++--- qml/components/richtext/RichTextEditor.qml | 129 ++++++++++++++++++-- qml/components/richtext/RichTextPreview.qml | 107 +++++++++++++--- 3 files changed, 257 insertions(+), 44 deletions(-) diff --git a/qml/components/richtext/ReadMorePage.qml b/qml/components/richtext/ReadMorePage.qml index 0e897c2..a756275 100644 --- a/qml/components/richtext/ReadMorePage.qml +++ b/qml/components/richtext/ReadMorePage.qml @@ -27,6 +27,7 @@ Page { property bool listening: false property bool processing: false + property bool ignoreNextResult: false property string textBeforeRecording: "" property bool isVoiceInputEnabled: true property string _partialRecognizedText: "" @@ -56,8 +57,17 @@ Page { if (data.event === "voice_recognition_partial") { var partialText = data.payload if (partialText) { - readmepage._partialRecognizedText = partialText; readmepage._currentVoiceStatus = i18n.dtr("ubtms", "Listening..."); + + var prefix = ""; + if (readmepage.textBeforeRecording.length > 0) { + var lastChar = readmepage.textBeforeRecording.charAt(readmepage.textBeforeRecording.length - 1); + if (lastChar !== '\n' && lastChar !== '\r') { + prefix = "\n"; + } + } + simpleEditor.text = readmepage.textBeforeRecording + prefix + partialText; + cursorTimer.start(); } } else if (data.event === "voice_recognition_status") { var statusText = data.payload; @@ -65,30 +75,38 @@ Page { readmepage._currentVoiceStatus = statusText; } } else if (data.event === "voice_recognition_result") { + if (readmepage.ignoreNextResult) { + readmepage.ignoreNextResult = false; + readmepage.listening = false; + readmepage.processing = false; + readmepage._currentVoiceStatus = ""; + readmepage.textBeforeRecording = simpleEditor.text; + cursorTimer.start(); + return; + } + readmepage.listening = false readmepage.processing = false var recognizedText = data.payload - readmepage._partialRecognizedText = ""; readmepage._currentVoiceStatus = ""; if (recognizedText) { var prefix = ""; - if (simpleEditor.length > 0) { - var lastChar = simpleEditor.text.charAt(simpleEditor.length - 1); - if (lastChar !== ' ' && lastChar !== '\n' && lastChar !== '\r' && lastChar !== '\t') { - prefix = " "; + if (readmepage.textBeforeRecording.length > 0) { + var lastChar = readmepage.textBeforeRecording.charAt(readmepage.textBeforeRecording.length - 1); + if (lastChar !== '\n' && lastChar !== '\r') { + prefix = "\n"; } } - var finalStr = prefix + recognizedText; - simpleEditor.insert(simpleEditor.length, finalStr); - + simpleEditor.text = readmepage.textBeforeRecording + prefix + recognizedText; + readmepage.textBeforeRecording = simpleEditor.text; cursorTimer.start(); } } else if (data.event === "voice_recognition_error") { readmepage.listening = false readmepage.processing = false - readmepage._partialRecognizedText = ""; readmepage._currentVoiceStatus = ""; + readmepage.textBeforeRecording = simpleEditor.text; cursorTimer.start() } } @@ -125,7 +143,7 @@ Page { trailingActionBar.numberOfSlots: 3 trailingActionBar.actions: [ Action { - visible: !isReadOnly && (!readmepage.listening && !readmepage.processing) + visible: !isReadOnly iconName: "tick" onTriggered: { saveAndClose() @@ -221,13 +239,28 @@ Page { } function saveAndClose() { + // Auto-stop voice recognition if it's still running + if (readmepage.listening || readmepage.processing) { + readmepage.ignoreNextResult = true; + readmepage.listening = false; + readmepage.processing = false; + readmepage._currentVoiceStatus = ""; + backend_bridge.call("backend.stop_voice_recognition", []); + } + if (useRichText) { + // Finalize voice span in RichTextEditor if needed + if (editor.editor && editor.editor.listening || editor.editor && editor.editor.processing) { + editor.editor.stopAndFinalizeVoice(); + } editor.getText(function (content) { if (commitContent(content)) { closePage(); } }); } else { + // For plain text, textBeforeRecording is already updated + readmepage.textBeforeRecording = simpleEditor.text; if (commitContent(simpleEditor.text)) { closePage(); } @@ -340,13 +373,17 @@ Page { isListening: readmepage.listening isProcessing: readmepage.processing - partialText: readmepage._partialRecognizedText + partialText: "" // Don't show partial text in the widget anymore voiceStatus: readmepage._currentVoiceStatus onStopClicked: { + readmepage.ignoreNextResult = true; readmepage.listening = false - readmepage.processing = true - readmepage._currentVoiceStatus = i18n.dtr("ubtms", "Processing..."); + readmepage.processing = false + readmepage.textBeforeRecording = simpleEditor.text; + + readmepage._currentVoiceStatus = ""; + backend_bridge.call("backend.stop_voice_recognition", []) } } diff --git a/qml/components/richtext/RichTextEditor.qml b/qml/components/richtext/RichTextEditor.qml index 5257f2c..defe5a9 100644 --- a/qml/components/richtext/RichTextEditor.qml +++ b/qml/components/richtext/RichTextEditor.qml @@ -75,6 +75,9 @@ Item { /** Whether the editor is processing voice input */ property bool processing: false + /** Ignore the final result from the backend if stop was explicitly clicked */ + property bool ignoreNextVoiceResult: false + /** Partial voice recognition text (for UI feedback) */ property string _partialVoiceText: "" property string _currentVoiceStatus: "" @@ -111,22 +114,62 @@ Item { var partialText = data.payload if (partialText) { - editor._partialVoiceText = partialText; editor._currentVoiceStatus = "Listening..."; + var script = " + var el = document.getElementById('voice-live-transcription'); + if (el) { + el.innerText = ' ' + " + JSON.stringify(partialText) + "; + } + "; + wv.runJavaScript(script); } } else if (data.event === "voice_recognition_result") { + if (editor.ignoreNextVoiceResult) { + editor.ignoreNextVoiceResult = false; + editor.listening = false; + editor.processing = false; + editor._currentVoiceStatus = ""; + + var finalScript = " + var el = document.getElementById('voice-live-transcription'); + if (el) { + var txt = el.innerText.trim(); + var parent = el.parentNode; + if (txt) { + var textNode = document.createTextNode(txt); + parent.replaceChild(textNode, el); + } else { + parent.parentNode.removeChild(parent); + } + } + document.body.contentEditable = " + (!readOnly).toString() + "; + "; + wv.runJavaScript(finalScript); + editor.syncContent(); + return; + } + editor.listening = false editor.processing = false var recognizedText = data.payload console.log("[RichTextEditor] Received recognition result: " + recognizedText) - editor._partialVoiceText = ""; editor._currentVoiceStatus = ""; - var jsCode = "if (" + JSON.stringify(recognizedText) + ") { " + - " window.editor.focus(); window.editor.moveCursorToEnd(); window.editor.insertHTML(" + JSON.stringify(recognizedText + " ") + "); " + - "}" + - "document.body.contentEditable = " + (!readOnly).toString() + ";"; + var jsCode = " + var el = document.getElementById('voice-live-transcription'); + var finalTxt = " + JSON.stringify(recognizedText) + "; + if (el) { + var parent = el.parentNode; + if (finalTxt && finalTxt.trim()) { + var textNode = document.createTextNode(finalTxt); + parent.replaceChild(textNode, el); + } else { + parent.parentNode.removeChild(parent); + } + } + document.body.contentEditable = " + (!readOnly).toString() + "; + "; wv.runJavaScript(jsCode); // Force a sync to update the 'text' property and emit contentChanged @@ -152,7 +195,43 @@ Item { Component.onDestruction: { if (listening || processing) { console.log("[RichTextEditor] destruction: Stopping voice recognition...") - mainView.backend_bridge.call("backend.stop_voice_recognition", []) + stopAndFinalizeVoice(); + } + } + + /** + * Stop voice recognition and finalize any in-progress transcription. + * Replaces the live transcription span with its text content so it + * persists correctly in the saved HTML. Call this before saving. + */ + function stopAndFinalizeVoice(callback) { + ignoreNextVoiceResult = true; + listening = false; + processing = false; + _currentVoiceStatus = ""; + + wv.runJavaScript(" + var el = document.getElementById('voice-live-transcription'); + if (el) { + var txt = el.innerText.trim(); + var parent = el.parentNode; + if (txt) { + var textNode = document.createTextNode(txt); + parent.replaceChild(textNode, el); + } else { + parent.parentNode.removeChild(parent); + } + } + document.body.contentEditable = " + (!readOnly).toString() + "; + "); + + mainView.backend_bridge.call("backend.stop_voice_recognition", []); + + // Sync content so the finalized text is captured + syncContent(); + + if (callback && typeof callback === 'function') { + callback(); } } @@ -172,7 +251,33 @@ Item { _partialVoiceText = "" editor._currentVoiceStatus = "Starting..." - wv.runJavaScript("document.body.contentEditable = false;"); + wv.runJavaScript(" + if (window.editor) { + // Clean up any stale span from a previous session + var oldEl = document.getElementById('voice-live-transcription'); + if (oldEl) { + var txt = oldEl.innerText.trim(); + var oldParent = oldEl.parentNode; + if (txt) { + var tn = document.createTextNode(txt); + oldParent.replaceChild(tn, oldEl); + } else { + oldParent.parentNode.removeChild(oldParent); + } + } + + var el = document.createElement('span'); + el.id = 'voice-live-transcription'; + el.innerText = ' '; + + var wrapper = document.createElement('div'); + wrapper.appendChild(el); + document.body.appendChild(wrapper); + + document.body.contentEditable = false; + el.scrollIntoView({behavior: 'smooth', block: 'nearest'}); + } + "); mainView.backend_bridge.call("backend.run_voice_recognition", []) } @@ -760,14 +865,12 @@ Item { isListening: editor.listening isProcessing: editor.processing - partialText: editor._partialVoiceText + partialText: "" // Don't show partial text in the widget anymore voiceStatus: editor._currentVoiceStatus onStopClicked: { - editor.listening = false - editor.processing = true - editor._currentVoiceStatus = "Processing..."; - mainView.backend_bridge.call("backend.stop_voice_recognition", []) + console.log("[RichTextEditor] Stopping voice recognition from widget...") + editor.stopAndFinalizeVoice(); } } } diff --git a/qml/components/richtext/RichTextPreview.qml b/qml/components/richtext/RichTextPreview.qml index 8b0a0e6..c196e95 100644 --- a/qml/components/richtext/RichTextPreview.qml +++ b/qml/components/richtext/RichTextPreview.qml @@ -24,16 +24,34 @@ Rectangle { property bool liveSyncActive: false property bool listening: false property bool processing: false + property bool ignoreNextResult: false Component.onDestruction: { if (listening || processing) { console.log("[RichTextPreview] destruction: Stopping voice recognition...") + ignoreNextResult = true; + listening = false; + processing = false; + _currentVoiceStatus = ""; + textBeforeRecording = root.text; + _syncVoiceResult(); backend_bridge.call("backend.stop_voice_recognition", []) } } property string textBeforeRecording: "" property bool isVoiceInputEnabled: true + // This is used to sync the voice input with the parent form's draft handler + function _syncVoiceResult() { + var currentContent = root.text; + originalHtmlContent = currentContent; + root.contentChanged(currentContent); + if (root.liveSyncActive) { + Global.description_temporary_holder = currentContent; + root._lastSyncedContent = currentContent; + } + } + function checkVoiceInputEnabled() { try { var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); @@ -66,8 +84,29 @@ Rectangle { if (data.event === "voice_recognition_partial") { var partialText = data.payload if (partialText) { - root._partialRecognizedText = partialText; root._currentVoiceStatus = i18n.dtr("ubtms", "Listening..."); + + var prefix = ""; + if (root.textBeforeRecording.length > 0) { + var lastChar = root.textBeforeRecording.charAt(root.textBeforeRecording.length - 1); + if (lastChar !== '\n' && lastChar !== '\r') { + prefix = "\n"; + } + } + root._settingContent = true; + root.text = root.textBeforeRecording + prefix + partialText; + // Force update originalHtmlContent so draft listeners get the live text + // without waiting for the mic to stop. + root.originalHtmlContent = root.text; + root._settingContent = false; + + // Keep cursor at end and scroll to bottom immediately + previewText.cursorPosition = previewText.length; + if (previewText.flickableItem) { + previewText.flickableItem.contentY = Math.max(0, previewText.flickableItem.contentHeight - previewText.flickableItem.height); + } else if (previewText.flickable) { + previewText.flickable.contentY = Math.max(0, previewText.flickable.contentHeight - previewText.flickable.height); + } } } else if (data.event === "voice_recognition_status") { var statusText = data.payload; @@ -75,35 +114,49 @@ Rectangle { root._currentVoiceStatus = statusText; } } else if (data.event === "voice_recognition_result") { + if (root.ignoreNextResult) { + root.ignoreNextResult = false; + root.listening = false; + root.processing = false; + root._currentVoiceStatus = ""; + root.textBeforeRecording = root.text; + root._syncVoiceResult(); + cursorTimer.start(); + return; + } + root.listening = false root.processing = false var recognizedText = data.payload console.log("[RichTextPreview] Received recognition result: " + recognizedText) - root._partialRecognizedText = ""; root._currentVoiceStatus = ""; if (recognizedText) { var prefix = ""; - if (previewText.length > 0) { - var lastChar = previewText.text.charAt(previewText.length - 1); - if (lastChar !== ' ' && lastChar !== '\n' && lastChar !== '\r' && lastChar !== '\t') { - prefix = " "; + if (root.textBeforeRecording.length > 0) { + var lastChar = root.textBeforeRecording.charAt(root.textBeforeRecording.length - 1); + if (lastChar !== '\n' && lastChar !== '\r') { + prefix = "\n"; } } - var finalStr = prefix + recognizedText; - previewText.insert(previewText.length, finalStr); - - root.contentChanged(previewText.text); + root._settingContent = true; + root.text = root.textBeforeRecording + prefix + recognizedText; + root._settingContent = false; + root.textBeforeRecording = root.text; + root._syncVoiceResult(); cursorTimer.start(); } else { + root.textBeforeRecording = root.text; + root._syncVoiceResult(); cursorTimer.start(); } } else if (data.event === "voice_recognition_error") { root.listening = false root.processing = false - root._partialRecognizedText = ""; root._currentVoiceStatus = ""; + root.textBeforeRecording = root.text; + root._syncVoiceResult(); console.log("[RichTextPreview] Voice recognition error: " + data.payload) cursorTimer.start() } @@ -285,11 +338,24 @@ Rectangle { repeat: false onTriggered: { if (previewText) { - // Ensure the TextArea is focused so it respects scroll-to-cursor - // and use .length (visible characters) instead of .text.length (HTML source length) previewText.cursorPosition = previewText.length; - // Force scroll to bottom if internal flickable exists + if (previewText.flickableItem) { + previewText.flickableItem.contentY = Math.max(0, previewText.flickableItem.contentHeight - previewText.flickableItem.height); + } else if (previewText.flickable) { + previewText.flickable.contentY = Math.max(0, previewText.flickable.contentHeight - previewText.flickable.height); + } + } + } + } + + // Separate timer for scrolling during voice input - only scrolls, doesn't move cursor + Timer { + id: scrollToBottomTimer + interval: 50 + repeat: false + onTriggered: { + if (previewText) { if (previewText.flickableItem) { previewText.flickableItem.contentY = Math.max(0, previewText.flickableItem.contentHeight - previewText.flickableItem.height); } else if (previewText.flickable) { @@ -434,7 +500,8 @@ Rectangle { width: units.gu(3) height: units.gu(3) radius: units.gu(.5) - color: LomiriColors.orange + color: (root.listening || root.processing) ? LomiriColors.ash : LomiriColors.orange + opacity: (root.listening || root.processing) ? 0.5 : 1.0 Image { id: expansionIcon @@ -448,7 +515,11 @@ Rectangle { MouseArea { anchors.fill: parent // cursorShape: Qt.PointingHandCursor - onClicked: root.clicked() + onClicked: { + if (!root.listening && !root.processing) { + root.clicked() + } + } } } } @@ -463,13 +534,15 @@ Rectangle { isListening: root.listening isProcessing: root.processing - partialText: root._partialRecognizedText + partialText: "" // Don't show partial text in the widget anymore voiceStatus: root._currentVoiceStatus onStopClicked: { + console.log("[RichTextPreview] Stopping voice recognition from widget...") root.listening = false root.processing = true root._currentVoiceStatus = i18n.dtr("ubtms", "Processing..."); + backend_bridge.call("backend.stop_voice_recognition", []) } } From c0c6e1ad0dcf78a756e54ff254b2a495bbe5522b Mon Sep 17 00:00:00 2001 From: Parvathy Nair Date: Tue, 16 Jun 2026 17:56:38 +0530 Subject: [PATCH 7/8] [EDIT] the voice model settings info content --- qml/features/settings/pages/Settings_VoiceModel.qml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qml/features/settings/pages/Settings_VoiceModel.qml b/qml/features/settings/pages/Settings_VoiceModel.qml index 6aefb72..c35fc42 100644 --- a/qml/features/settings/pages/Settings_VoiceModel.qml +++ b/qml/features/settings/pages/Settings_VoiceModel.qml @@ -405,7 +405,7 @@ Page { color: theme.palette.normal.backgroundText font.pixelSize: units.gu(1.6) lineHeight: 1.2 - text: i18n.dtr("ubtms", "Managing Downloads:Check the internet connectivity before downloading a file. Once the voice model is downloaded, select the model you want from the installed models list. The selected model will be shown in bold text, with a tick mark to its right. During download, you will see Loading (downloading), Pause, and Cancel buttons. Pausing or losing internet will preserve your progress, allowing you to resume later from this page. Cancelling will delete the partial download.") + text: i18n.dtr("ubtms", "Managing Downloads:Check the internet connectivity before downloading a file. Once the voice model is downloaded, select the model you want from the installed models list. Even if only one model is installed, selecting the model is mandatory. The selected model will be shown in bold text, with a tick mark to its right. During download, you will see Loading (downloading), Pause, and Cancel buttons. Pausing or losing internet will preserve your progress, allowing you to resume later from this page. Cancelling will delete the partial download.") } Text { From 4ac47a114b05a7c24af0c860bc6fbf7245fa5423 Mon Sep 17 00:00:00 2001 From: Anmol Garg Date: Thu, 18 Jun 2026 14:18:54 +0530 Subject: [PATCH 8/8] Added Fixes --- qml/components/richtext/ReadMorePage.qml | 3 +- qml/components/richtext/RichTextEditor.qml | 3 +- qml/components/richtext/RichTextPreview.qml | 3 +- .../settings/pages/Settings_VoiceModel.qml | 15 ++- src/backend.py | 17 ++- voice_to_text/voice2text.py | 114 ++++++++++-------- 6 files changed, 90 insertions(+), 65 deletions(-) diff --git a/qml/components/richtext/ReadMorePage.qml b/qml/components/richtext/ReadMorePage.qml index a756275..a95c960 100644 --- a/qml/components/richtext/ReadMorePage.qml +++ b/qml/components/richtext/ReadMorePage.qml @@ -35,9 +35,10 @@ Page { function checkVoiceInputEnabled() { try { - var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + var db = Sql.LocalStorage.openDatabaseSync("myDatabase", "1.0", "My Database", 1000000); var result = true; db.transaction(function (tx) { + tx.executeSql('CREATE TABLE IF NOT EXISTS app_settings (key TEXT PRIMARY KEY, value TEXT)'); var rs = tx.executeSql('SELECT value FROM app_settings WHERE key = "voice_input_enabled"'); if (rs.rows.length > 0) { result = rs.rows.item(0).value === "true"; diff --git a/qml/components/richtext/RichTextEditor.qml b/qml/components/richtext/RichTextEditor.qml index defe5a9..94f13ad 100644 --- a/qml/components/richtext/RichTextEditor.qml +++ b/qml/components/richtext/RichTextEditor.qml @@ -87,9 +87,10 @@ Item { function checkVoiceInputEnabled() { try { - var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + var db = Sql.LocalStorage.openDatabaseSync("myDatabase", "1.0", "My Database", 1000000); var result = true; db.transaction(function (tx) { + tx.executeSql('CREATE TABLE IF NOT EXISTS app_settings (key TEXT PRIMARY KEY, value TEXT)'); var rs = tx.executeSql('SELECT value FROM app_settings WHERE key = "voice_input_enabled"'); if (rs.rows.length > 0) { result = rs.rows.item(0).value === "true"; diff --git a/qml/components/richtext/RichTextPreview.qml b/qml/components/richtext/RichTextPreview.qml index c196e95..358d0f1 100644 --- a/qml/components/richtext/RichTextPreview.qml +++ b/qml/components/richtext/RichTextPreview.qml @@ -54,9 +54,10 @@ Rectangle { function checkVoiceInputEnabled() { try { - var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + var db = Sql.LocalStorage.openDatabaseSync("myDatabase", "1.0", "My Database", 1000000); var result = true; db.transaction(function (tx) { + tx.executeSql('CREATE TABLE IF NOT EXISTS app_settings (key TEXT PRIMARY KEY, value TEXT)'); var rs = tx.executeSql('SELECT value FROM app_settings WHERE key = "voice_input_enabled"'); if (rs.rows.length > 0) { result = rs.rows.item(0).value === "true"; diff --git a/qml/features/settings/pages/Settings_VoiceModel.qml b/qml/features/settings/pages/Settings_VoiceModel.qml index c35fc42..2aeb109 100644 --- a/qml/features/settings/pages/Settings_VoiceModel.qml +++ b/qml/features/settings/pages/Settings_VoiceModel.qml @@ -84,9 +84,10 @@ Page { function getVoiceInputEnabledSetting() { try { - var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + var db = Sql.LocalStorage.openDatabaseSync("myDatabase", "1.0", "My Database", 1000000); var result = true; db.transaction(function (tx) { + tx.executeSql('CREATE TABLE IF NOT EXISTS app_settings (key TEXT PRIMARY KEY, value TEXT)'); var rs = tx.executeSql('SELECT value FROM app_settings WHERE key = "voice_input_enabled"'); if (rs.rows.length > 0) { result = rs.rows.item(0).value === "true"; @@ -101,7 +102,7 @@ Page { function saveVoiceInputEnabledSetting(value) { try { - var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + var db = Sql.LocalStorage.openDatabaseSync("myDatabase", "1.0", "My Database", 1000000); db.transaction(function (tx) { tx.executeSql('CREATE TABLE IF NOT EXISTS app_settings (key TEXT PRIMARY KEY, value TEXT)'); tx.executeSql('INSERT OR REPLACE INTO app_settings (key, value) VALUES ("voice_input_enabled", ?)', [value ? "true" : "false"]); @@ -114,9 +115,10 @@ Page { function isModelCompatible(sizeStr) { if (!sizeStr) return true; - var isG = sizeStr.indexOf("G") !== -1; + var upperSize = sizeStr.toUpperCase(); + var isG = upperSize.indexOf("G") !== -1; if (isG) { - var val = parseFloat(sizeStr.replace("G", "")); + var val = parseFloat(upperSize.replace("G", "")); // Assume 1G requires ~2500MB RAM, 1.8G requires ~4000MB RAM. var reqRam = val * 2500; return deviceRamMB >= reqRam; @@ -126,10 +128,11 @@ Page { function getActiveModelSetting() { try { - var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + var db = Sql.LocalStorage.openDatabaseSync("myDatabase", "1.0", "My Database", 1000000); var result = "voice_to_text/model"; // Default db.transaction(function (tx) { + tx.executeSql('CREATE TABLE IF NOT EXISTS app_settings (key TEXT PRIMARY KEY, value TEXT)'); var rs = tx.executeSql('SELECT value FROM app_settings WHERE key = "active_voice_model"'); if (rs.rows.length > 0) { result = rs.rows.item(0).value; @@ -144,7 +147,7 @@ Page { function saveActiveModelSetting(value) { try { - var db = Sql.LocalStorage.openDatabaseSync("UBTMS_SettingsDB", "1.0", "UBTMS Settings Database", 1000000); + var db = Sql.LocalStorage.openDatabaseSync("myDatabase", "1.0", "My Database", 1000000); db.transaction(function (tx) { tx.executeSql('CREATE TABLE IF NOT EXISTS app_settings (key TEXT PRIMARY KEY, value TEXT)'); tx.executeSql('INSERT OR REPLACE INTO app_settings (key, value) VALUES ("active_voice_model", ?)', [value]); diff --git a/src/backend.py b/src/backend.py index a9e5711..1a6fe51 100755 --- a/src/backend.py +++ b/src/backend.py @@ -1056,7 +1056,7 @@ def delete_voice_model(model_path): return {"status": "error", "message": str(e)} -def resolve_settings_db_path(db_name="UBTMS_SettingsDB", app_id="ubtms"): +def resolve_settings_db_path(db_name="myDatabase", app_id="ubtms"): """Finds a specific QML database by its name (hash).""" import hashlib db_hash = hashlib.md5(db_name.encode()).hexdigest() @@ -1113,7 +1113,7 @@ def do_recognition(): # Fetch the active model path from settings # First try the specific settings DB - db_path = resolve_settings_db_path("UBTMS_SettingsDB") + db_path = resolve_settings_db_path("myDatabase") if not db_path: # Fallback to main app DB db_path = resolve_qml_db_path() @@ -1169,14 +1169,25 @@ def do_recognition(): # Run recognition in a separate process to avoid GIL blocking and UI freezing parent_conn, child_conn = multiprocessing.Pipe() - p = multiprocessing.Process(target=_subprocess_recognize, args=(str(model_path), child_conn, 30)) + ctx = multiprocessing.get_context("spawn") + p = ctx.Process(target=_subprocess_recognize, args=(str(model_path), child_conn, 30)) p.start() + import time received_final = False + stop_sent_time = None while p.is_alive(): if _voice_stop_event.is_set(): parent_conn.send("stop") _voice_stop_event.clear() + if stop_sent_time is None: + stop_sent_time = time.time() + + # Watchdog: terminate if process ignores stop signal for > 5 seconds + if stop_sent_time is not None and (time.time() - stop_sent_time > 5.0): + log.warning("[VOICE] Watchdog triggered: Process failed to stop cleanly. Terminating.") + p.terminate() + break if parent_conn.poll(0.5): msg = parent_conn.recv() diff --git a/voice_to_text/voice2text.py b/voice_to_text/voice2text.py index aab7b0b..e66e3bc 100644 --- a/voice_to_text/voice2text.py +++ b/voice_to_text/voice2text.py @@ -73,63 +73,71 @@ def recognize_from_mic(verbose=True, stop_event=None, timeout=30, partial_callba import time process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) - start_time = time.time() - last_speech_time = start_time - hard_max_duration = 300 # 5 minutes absolute max - silence_timeout = 7.0 # Stop after 7 seconds of silence - - results = [] - last_partial = "" - - # Read from stdout in chunks - # `read(4000)` might block until 4000 bytes are available, which is fine for live streams. - while process.poll() is None: - current_time = time.time() + try: + start_time = time.time() + last_speech_time = start_time + hard_max_duration = 300 # 5 minutes absolute max + silence_timeout = 7.0 # Stop after 7 seconds of silence - if stop_event and stop_event.is_set(): - logger.info("[VOICE] Stop signal received, terminating record process.") - process.terminate() - break + results = [] + last_partial = "" + + # Read from stdout in chunks + # `read(4000)` might block until 4000 bytes are available, which is fine for live streams. + while process.poll() is None: + current_time = time.time() - if (current_time - start_time) > hard_max_duration: - logger.info("[VOICE] Absolute max duration reached, stopping.") - process.terminate() - break + if stop_event and stop_event.is_set(): + logger.info("[VOICE] Stop signal received, terminating record process.") + break + + if (current_time - start_time) > hard_max_duration: + logger.info("[VOICE] Absolute max duration reached, stopping.") + break + + if (current_time - last_speech_time) > silence_timeout: + logger.info("[VOICE] Silence timeout reached, stopping auto-record.") + break - if (current_time - last_speech_time) > silence_timeout: - logger.info("[VOICE] Silence timeout reached, stopping auto-record.") - process.terminate() - break - - # We use a non-blocking-ish approach or just read. - # read 4000 bytes (125ms of 16kHz 16-bit mono) - data = process.stdout.read(4000) - if len(data) == 0: - break - - if rec.AcceptWaveform(data): - res = json.loads(rec.Result()) - if res.get("text"): - results.append(res["text"]) - last_speech_time = time.time() # Just finished a phrase - last_partial = "" - if partial_callback: - combined = " ".join(results).strip() - partial_callback(combined) + # We use a non-blocking-ish approach or just read. + # read 4000 bytes (125ms of 16kHz 16-bit mono) + data = process.stdout.read(4000) + if len(data) == 0: + break + + if rec.AcceptWaveform(data): + res = json.loads(rec.Result()) + if res.get("text"): + results.append(res["text"]) + last_speech_time = time.time() # Just finished a phrase + last_partial = "" + if partial_callback: + combined = " ".join(results).strip() + partial_callback(combined) + else: + res = json.loads(rec.PartialResult()) + current_partial = res.get("partial", "") + if current_partial: + # User is actively speaking if partial result changes + if current_partial != last_partial: + last_speech_time = time.time() + last_partial = current_partial + + if partial_callback: + combined = " ".join(results + [current_partial]).strip() + partial_callback(combined) + finally: + if process.poll() is None: + try: + process.terminate() + process.wait(timeout=1) + except Exception: + try: + process.kill() + except Exception: + pass else: - res = json.loads(rec.PartialResult()) - current_partial = res.get("partial", "") - if current_partial: - # User is actively speaking if partial result changes - if current_partial != last_partial: - last_speech_time = time.time() - last_partial = current_partial - - if partial_callback: - combined = " ".join(results + [current_partial]).strip() - partial_callback(combined) - - process.wait() # Ensure it's fully closed + process.wait() except (FileNotFoundError, Exception) as e: logger.exception(f"[VOICE] System error running {arecord_cmd}: {e}")