From 5108214db81083b20c5c34d4d111e1f78581b85e Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Fri, 20 Mar 2026 21:02:47 +1000 Subject: [PATCH 1/4] Update duckdb tool and support .duckdb files --- .../DataFileUpload/DataFileUpload.tsx | 290 +++++++++++++----- ui/src/components/ToolsBar/ToolsBar.tsx | 37 ++- ui/src/pages/chat/ChatPage.tsx | 2 + ui/src/pages/chat/useChat.ts | 18 +- ui/src/services/duckdb/duckdbService.ts | 53 +++- ui/src/services/duckdb/duckdbWorker.ts | 99 +++++- ui/src/stores/chatUIStore.ts | 14 +- 7 files changed, 414 insertions(+), 99 deletions(-) diff --git a/ui/src/components/DataFileUpload/DataFileUpload.tsx b/ui/src/components/DataFileUpload/DataFileUpload.tsx index 80e843d..3f48795 100644 --- a/ui/src/components/DataFileUpload/DataFileUpload.tsx +++ b/ui/src/components/DataFileUpload/DataFileUpload.tsx @@ -1,25 +1,39 @@ /** * DataFileUpload - Upload data files for SQL queries * - * Allows users to upload CSV, Parquet, and JSON files for querying + * Allows users to upload CSV, Parquet, JSON, and DuckDB database files for querying * with DuckDB. Files are registered in-memory and reset on page reload. * - * Note: SQLite files are NOT supported in DuckDB-WASM due to extension limitations. + * DuckDB database files are registered via BROWSER_FILEREADER protocol, which reads + * lazily from the File handle on demand — no size limit, no memory overhead. */ import { useCallback, useRef, useState } from "react"; -import { Upload, X, FileSpreadsheet, AlertCircle, Loader2 } from "lucide-react"; +import { Upload, X, FileSpreadsheet, AlertCircle, Loader2, Eye } from "lucide-react"; import { duckdbService, type FileType } from "@/services/duckdb"; -import { useChatUIStore, useDataFiles, type DataFile } from "@/stores/chatUIStore"; +import { + useChatUIStore, + useDataFiles, + type DataFile, + type DataFileTable, +} from "@/stores/chatUIStore"; import { cn } from "@/utils/cn"; -import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/Tooltip/Tooltip"; +import { + Modal, + ModalHeader, + ModalTitle, + ModalDescription, + ModalClose, + ModalContent, +} from "@/components/Modal/Modal"; /** Accepted file extensions and their types */ const FILE_TYPE_MAP: Record = { csv: "csv", parquet: "parquet", json: "json", + duckdb: "duckdb", }; /** File type to display name */ @@ -27,16 +41,18 @@ const FILE_TYPE_LABELS: Record = { csv: "CSV", parquet: "Parquet", json: "JSON", + duckdb: "DuckDB", }; -/** Max file size: 100MB */ -const MAX_FILE_SIZE = 100 * 1024 * 1024; +/** Max file size for flat files: 100MB (DuckDB databases use lazy reads, no limit) */ +const MAX_FLAT_FILE_SIZE = 100 * 1024 * 1024; /** Format file size for display */ function formatFileSize(bytes: number): string { if (bytes < 1024) return `${bytes} B`; if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; - return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; + if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; + return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`; } /** Get file extension from filename */ @@ -57,12 +73,15 @@ export interface DataFileUploadProps { disabled?: boolean; /** Compact mode - show only chips, no drop zone */ compact?: boolean; + /** Called when a file is successfully added (use for auto-enabling tool) */ + onFileAdded?: () => void; } export function DataFileUpload({ className, disabled = false, compact = false, + onFileAdded, }: DataFileUploadProps) { const inputRef = useRef(null); const [isDragging, setIsDragging] = useState(false); @@ -89,8 +108,8 @@ export function DataFileUpload({ continue; } - // Validate file size - if (file.size > MAX_FILE_SIZE) { + // Validate file size (only for flat files — duckdb uses lazy file reads) + if (fileType !== "duckdb" && file.size > MAX_FLAT_FILE_SIZE) { console.warn(`File too large: ${file.name} (${formatFileSize(file.size)})`); continue; } @@ -117,21 +136,53 @@ export function DataFileUpload({ // Register with DuckDB try { - const buffer = await file.arrayBuffer(); - const result = await duckdbService.registerFile(file.name, buffer, fileType); + // For .duckdb files, pass the File handle directly — DuckDB reads lazily via + // BROWSER_FILEREADER protocol (no memory overhead, no size limit). + // For flat files, load into memory (capped at 100MB). + const result = + fileType === "duckdb" + ? await duckdbService.registerDatabaseFile(file.name, file) + : await duckdbService.registerFile(file.name, await file.arrayBuffer(), fileType); if (result.success) { - // Get column schema for the file - // Use quoted filename for DuckDB (files are accessed as 'filename.ext') - const schemaResult = await duckdbService.describeTable(`'${file.name}'`); - if (schemaResult.success && schemaResult.columns.length > 0) { - updateDataFileStatus(fileId, true, undefined, { - columns: schemaResult.columns.map((c) => ({ name: c.name, type: c.type })), - }); + if (fileType === "duckdb" && result.dbAlias) { + // For DuckDB database files, enumerate tables and describe each + const tablesResult = await duckdbService.execute( + `SELECT table_name FROM information_schema.tables WHERE table_catalog = '${result.dbAlias}' AND table_schema = 'main' ORDER BY table_name` + ); + if (tablesResult.success && tablesResult.rows.length > 0) { + const tables = []; + for (const row of tablesResult.rows) { + const tableName = String(row.table_name); + const colResult = await duckdbService.describeTable( + `"${result.dbAlias}".main."${tableName}"` + ); + tables.push({ + tableName, + columns: colResult.success + ? colResult.columns.map((c) => ({ name: c.name, type: c.type })) + : [], + }); + } + updateDataFileStatus(fileId, true, undefined, { + tables, + dbName: result.dbAlias, + }); + } else { + updateDataFileStatus(fileId, true, undefined, { dbName: result.dbAlias }); + } } else { - // Registration succeeded but schema extraction failed - still mark as registered - updateDataFileStatus(fileId, true); + // Flat file — get column schema + const schemaResult = await duckdbService.describeTable(`'${file.name}'`); + if (schemaResult.success && schemaResult.columns.length > 0) { + updateDataFileStatus(fileId, true, undefined, { + columns: schemaResult.columns.map((c) => ({ name: c.name, type: c.type })), + }); + } else { + updateDataFileStatus(fileId, true); + } } + onFileAdded?.(); } else { updateDataFileStatus(fileId, false, result.error || "Registration failed"); } @@ -143,16 +194,14 @@ export function DataFileUpload({ setIsUploading(false); }, - [disabled, dataFiles, addDataFile, updateDataFileStatus] + [disabled, dataFiles, addDataFile, updateDataFileStatus, onFileAdded] ); /** Handle file removal */ const handleRemove = useCallback( async (file: DataFile) => { - // Remove from store removeDataFile(file.id); - // Unregister from DuckDB if it was registered if (file.registered) { try { await duckdbService.unregisterFile(file.name); @@ -201,7 +250,6 @@ export function DataFileUpload({ (e: React.ChangeEvent) => { if (e.target.files && e.target.files.length > 0) { handleFiles(e.target.files); - // Reset input so same file can be selected again e.target.value = ""; } }, @@ -213,7 +261,7 @@ export function DataFileUpload({ .join(","); return ( -
+
{/* Hidden file input */} {isDragging ? "Drop files here" : "Drop files or click to upload"} - CSV, Parquet, JSON (max 100MB) + CSV, Parquet, JSON, DuckDB
)} @@ -283,7 +331,7 @@ export function DataFileUpload({ {/* File chips */} {dataFiles.length > 0 && ( -
+
{dataFiles.map((file: DataFile) => ( handleRemove(file)} /> ))} @@ -316,20 +364,31 @@ export function DataFileUpload({ /** Individual file chip */ function FileChip({ file, onRemove }: { file: DataFile; onRemove: () => void }) { + const [schemaOpen, setSchemaOpen] = useState(false); const hasError = !file.registered && file.error; const isLoading = !file.registered && !file.error; const hasColumns = file.columns && file.columns.length > 0; + const hasTables = file.tables && file.tables.length > 0; + const isDatabase = file.type === "duckdb"; + const hasSchema = hasColumns || hasTables; + + const chipDetail = + isDatabase && hasTables + ? `${file.tables!.length} table${file.tables!.length !== 1 ? "s" : ""}` + : hasColumns + ? `${file.columns!.length} cols` + : undefined; return ( - - -
+ <> +
+ {isLoading ? ( ) : hasError ? ( @@ -337,53 +396,134 @@ function FileChip({ file, onRemove }: { file: DataFile; onRemove: () => void }) ) : ( )} - {file.name} - {FILE_TYPE_LABELS[file.type]} - {hasColumns && {file.columns!.length} cols} + + {file.name} + {FILE_TYPE_LABELS[file.type]} + {chipDetail && {chipDetail}} + {hasSchema && !isLoading && ( -
- - -
-

{file.name}

-

- {FILE_TYPE_LABELS[file.type]} · {formatFileSize(file.size)} -

- {hasError &&

{file.error}

} - {!hasError && !isLoading && ( + )} + +
+ + {/* Schema modal */} + {hasSchema && ( + setSchemaOpen(false)} file={file} /> + )} + + ); +} + +/** Modal showing full schema for a data file */ +function DataSchemaModal({ + open, + onClose, + file, +}: { + open: boolean; + onClose: () => void; + file: DataFile; +}) { + const isDatabase = file.type === "duckdb"; + const hasTables = file.tables && file.tables.length > 0; + const hasColumns = file.columns && file.columns.length > 0; + + return ( + + + {file.name} + + {FILE_TYPE_LABELS[file.type]} · {formatFileSize(file.size)} + {isDatabase && file.dbName && ( <> -

- Query with: SELECT * FROM '{file.name}' -

- - {/* File columns */} - {hasColumns && ( -
-

Columns:

-
- {file.columns!.map((col) => ( -
- {col.name} - {col.type} -
- ))} -
-
- )} + {" "} + · Attached as{" "} + {file.dbName} )} -
- -
+ + + + + {/* Database tables */} + {hasTables && ( +
+ {file.tables!.map((table) => ( + + ))} +
+ )} + + {/* Flat file columns */} + {!isDatabase && hasColumns && ( +
+

+ SELECT * FROM '{file.name}' +

+ +
+ )} +
+ + ); +} + +/** Render a single table's schema */ +function TableSchema({ table, dbName }: { table: DataFileTable; dbName?: string }) { + return ( +
+
+

{table.tableName}

+ {table.columns.length} columns +
+ {dbName && ( +

+ SELECT * FROM {dbName}.{table.tableName} +

+ )} + {table.columns.length > 0 && } +
+ ); +} + +/** Render a columns table */ +function ColumnTable({ columns }: { columns: Array<{ name: string; type: string }> }) { + return ( + + + + + + + + + {columns.map((col) => ( + + + + + ))} + +
ColumnType
{col.name}{col.type}
); } diff --git a/ui/src/components/ToolsBar/ToolsBar.tsx b/ui/src/components/ToolsBar/ToolsBar.tsx index db170eb..b18656a 100644 --- a/ui/src/components/ToolsBar/ToolsBar.tsx +++ b/ui/src/components/ToolsBar/ToolsBar.tsx @@ -462,6 +462,16 @@ export function ToolsBar({ [allToolsData, enabledTools] ); + /** Ensure a tool is enabled (no-op if already enabled) */ + const ensureEnabled = useCallback( + (toolId: string) => { + if (!enabledTools.includes(toolId)) { + onEnabledToolsChange([...enabledTools, toolId]); + } + }, + [enabledTools, onEnabledToolsChange] + ); + // Get extra content for specific tools (settings panels) const getToolExtraContent = useCallback( (toolId: string): React.ReactNode => { @@ -472,7 +482,10 @@ export function ToolsBar({
{ + onVectorStoreIdsChange(ids); + if (ids.length > 0) ensureEnabled("file_search"); + }} ownerType={vectorStoreOwnerType} ownerId={vectorStoreOwnerId} maxStores={10} @@ -485,7 +498,11 @@ export function ToolsBar({ if (toolId === "sql_query") { return (
- + ensureEnabled("sql_query")} + />
); } @@ -494,13 +511,24 @@ export function ToolsBar({ { + onSubAgentModelChange(model); + ensureEnabled("sub_agent"); + }} disabled={disabled} /> ); } if (toolId === "mcp") { - return ; + return ( + { + onOpenMCPConfig?.(); + ensureEnabled("mcp"); + }} + disabled={disabled} + /> + ); } return null; }, @@ -514,6 +542,7 @@ export function ToolsBar({ subAgentModel, onSubAgentModelChange, onOpenMCPConfig, + ensureEnabled, ] ); diff --git a/ui/src/pages/chat/ChatPage.tsx b/ui/src/pages/chat/ChatPage.tsx index 84c9b37..54009d0 100644 --- a/ui/src/pages/chat/ChatPage.tsx +++ b/ui/src/pages/chat/ChatPage.tsx @@ -129,6 +129,8 @@ export default function ChatPage() { .map((f) => ({ name: f.name, columns: f.columns, + tables: f.tables, + dbName: f.dbName, })), [dataFiles] ); diff --git a/ui/src/pages/chat/useChat.ts b/ui/src/pages/chat/useChat.ts index c341a44..8756432 100644 --- a/ui/src/pages/chat/useChat.ts +++ b/ui/src/pages/chat/useChat.ts @@ -67,12 +67,12 @@ interface DataFileInfo { name: string; /** For flat files (CSV, Parquet, JSON) */ columns?: Array<{ name: string; type: string }>; - /** For SQLite databases */ + /** For database files (DuckDB) */ tables?: Array<{ tableName: string; columns: Array<{ name: string; type: string }>; }>; - /** Database name for SQLite files */ + /** Database alias for attached databases */ dbName?: string; } @@ -429,18 +429,26 @@ export function useChat({ let sqlDescription = "Execute SQL queries in-browser using DuckDB. " + "Supports standard SQL syntax with analytics functions. " + - "Can query CSV, Parquet, JSON files directly (e.g., SELECT * FROM 'data.csv'). " + + "Can query CSV, Parquet, JSON files directly (e.g., SELECT * FROM 'data.csv') " + + "and DuckDB database files (e.g., SELECT * FROM db_name.table_name). " + "Use for data analysis, aggregations, joins, and transformations."; // Add available files and their schemas if (dataFiles.length > 0) { sqlDescription += "\n\nAvailable data:"; for (const file of dataFiles) { - if (file.columns && file.columns.length > 0) { + if (file.tables && file.tables.length > 0 && file.dbName) { + // Database file with tables + for (const table of file.tables) { + const columnList = table.columns.map((c) => `${c.name} (${c.type})`).join(", "); + sqlDescription += `\n- ${file.dbName}.${table.tableName}: ${columnList}`; + } + } else if (file.columns && file.columns.length > 0) { const columnList = file.columns.map((c) => `${c.name} (${c.type})`).join(", "); sqlDescription += `\n- '${file.name}': ${columnList}`; + } else if (file.dbName) { + sqlDescription += `\n- Database '${file.name}' attached as ${file.dbName}`; } else { - // File without schema info sqlDescription += `\n- '${file.name}'`; } } diff --git a/ui/src/services/duckdb/duckdbService.ts b/ui/src/services/duckdb/duckdbService.ts index f3325cb..7a75a1c 100644 --- a/ui/src/services/duckdb/duckdbService.ts +++ b/ui/src/services/duckdb/duckdbService.ts @@ -4,8 +4,6 @@ * Manages communication with the DuckDB Web Worker for executing SQL queries. * Provides a simple async API for database operations with proper lifecycle management. * - * Note: SQLite support is NOT available in DuckDB-WASM due to extension limitations. - * * ## Usage * * ```typescript @@ -21,6 +19,11 @@ * * // Query the CSV * const csvResult = await duckdbService.execute("SELECT * FROM 'data.csv'"); + * + * // Register a DuckDB database file + * const dbData = await fetch("data.duckdb").then(r => r.arrayBuffer()); + * await duckdbService.registerFile("data.duckdb", dbData, "duckdb"); + * // Tables are available as: SELECT * FROM data.table_name * ``` */ @@ -72,8 +75,8 @@ export type DuckDBStatus = "idle" | "loading" | "ready" | "error"; /** Status update callback */ export type StatusCallback = (status: DuckDBStatus, message?: string) => void; -/** File types supported for registration (SQLite NOT supported in WASM) */ -export type FileType = "csv" | "parquet" | "json"; +/** File types supported for registration */ +export type FileType = "csv" | "parquet" | "json" | "duckdb"; /** Internal message counter for correlation */ let messageId = 0; @@ -303,7 +306,7 @@ class DuckDBService { data: ArrayBuffer, fileType: FileType, options?: ExecuteOptions - ): Promise<{ success: boolean; error?: string }> { + ): Promise<{ success: boolean; error?: string; dbAlias?: string }> { const id = nextId(); const response = await this.sendMessage<{ @@ -311,6 +314,7 @@ class DuckDBService { id: string; success: boolean; error?: string; + dbAlias?: string; }>( { type: "registerFile", @@ -330,6 +334,45 @@ class DuckDBService { return { success: response.success, error: response.error, + dbAlias: response.dbAlias, + }; + } + + /** + * Register a database file via BROWSER_FILEREADER protocol. + * DuckDB reads lazily from the File handle — no memory overhead, no size limit. + */ + async registerDatabaseFile( + name: string, + handle: File, + options?: ExecuteOptions + ): Promise<{ success: boolean; error?: string; dbAlias?: string }> { + const id = nextId(); + + const response = await this.sendMessage<{ + type: "registerFileResult"; + id: string; + success: boolean; + error?: string; + dbAlias?: string; + }>( + { + type: "registerDatabaseHandle", + id, + name, + handle, + }, + { timeout: options?.timeout ?? 60000, signal: options?.signal } + ); + + if (response.success) { + this.registeredFiles.add(name); + } + + return { + success: response.success, + error: response.error, + dbAlias: response.dbAlias, }; } diff --git a/ui/src/services/duckdb/duckdbWorker.ts b/ui/src/services/duckdb/duckdbWorker.ts index a29713f..66f15d1 100644 --- a/ui/src/services/duckdb/duckdbWorker.ts +++ b/ui/src/services/duckdb/duckdbWorker.ts @@ -2,9 +2,7 @@ * DuckDB Web Worker * * This worker loads and manages a DuckDB WASM instance for executing SQL queries - * in-browser. Supports CSV, Parquet, and JSON files via the virtual filesystem. - * - * Note: SQLite support is NOT available in DuckDB-WASM due to extension limitations. + * in-browser. Supports CSV, Parquet, JSON, and DuckDB database files via the virtual filesystem. * * Communication protocol: * - Main thread sends { type, id, ... } messages @@ -26,7 +24,14 @@ interface RegisterFileMessage { id: string; name: string; data: ArrayBuffer; - fileType: "csv" | "parquet" | "json"; + fileType: "csv" | "parquet" | "json" | "duckdb"; +} + +interface RegisterDatabaseHandleMessage { + type: "registerDatabaseHandle"; + id: string; + name: string; + handle: File; } interface UnregisterFileMessage { @@ -54,6 +59,7 @@ interface StatusMessage { type WorkerMessage = | ExecuteMessage | RegisterFileMessage + | RegisterDatabaseHandleMessage | UnregisterFileMessage | ListTablesMessage | DescribeTableMessage @@ -84,6 +90,7 @@ interface RegisterFileResponse { id: string; success: boolean; error?: string; + dbAlias?: string; } interface UnregisterFileResponse { @@ -138,6 +145,8 @@ let db: duckdb.AsyncDuckDB | null = null; let conn: duckdb.AsyncDuckDBConnection | null = null; let isLoading = false; const registeredFiles = new Set(); +/** Tracks attached DuckDB databases: filename -> alias */ +const attachedDatabases = new Map(); /** * Send a message to the main thread @@ -257,17 +266,31 @@ async function executeQuery(sql: string): Promise<{ } } +/** + * Derive a safe database alias from a filename (e.g., "my-data.duckdb" -> "my_data") + */ +function deriveDbAlias(filename: string): string { + const base = filename.replace(/\.duckdb$/i, ""); + // Replace non-alphanumeric/underscore chars with underscores, collapse runs + return ( + base + .replace(/[^a-zA-Z0-9_]/g, "_") + .replace(/_+/g, "_") + .replace(/^_|_$/g, "") || "db" + ); +} + /** * Register a file in DuckDB's virtual filesystem */ async function registerFile( name: string, data: ArrayBuffer, - _fileType: "csv" | "parquet" | "json" -): Promise<{ success: boolean; error?: string }> { + fileType: "csv" | "parquet" | "json" | "duckdb" +): Promise<{ success: boolean; error?: string; dbAlias?: string }> { await initDuckDB(); - if (!db) { + if (!db || !conn) { return { success: false, error: "Database not initialized" }; } @@ -282,6 +305,14 @@ async function registerFile( await db.registerFileBuffer(name, new Uint8Array(data)); registeredFiles.add(name); + // For .duckdb files, attach the database so its tables are queryable + if (fileType === "duckdb") { + const alias = deriveDbAlias(name); + await conn.query(`ATTACH '${name}' AS "${alias}" (READ_ONLY)`); + attachedDatabases.set(name, alias); + return { success: true, dbAlias: alias }; + } + return { success: true }; } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); @@ -289,6 +320,34 @@ async function registerFile( } } +/** + * Register a database file via BROWSER_FILEREADER protocol. + * DuckDB reads lazily from the File handle on demand — no memory overhead. + */ +async function registerDatabaseHandle( + name: string, + handle: File +): Promise<{ success: boolean; error?: string; dbAlias?: string }> { + await initDuckDB(); + + if (!db || !conn) { + return { success: false, error: "Database not initialized" }; + } + + try { + await db.registerFileHandle(name, handle, duckdb.DuckDBDataProtocol.BROWSER_FILEREADER, true); + registeredFiles.add(name); + + const alias = deriveDbAlias(name); + await conn.query(`ATTACH '${name}' AS "${alias}" (READ_ONLY)`); + attachedDatabases.set(name, alias); + return { success: true, dbAlias: alias }; + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + return { success: false, error: errorMsg }; + } +} + /** * Unregister a file from DuckDB's virtual filesystem */ @@ -298,6 +357,13 @@ async function unregisterFile(name: string): Promise<{ success: boolean; error?: } try { + // Detach if it was an attached database + const alias = attachedDatabases.get(name); + if (alias && conn) { + await conn.query(`DETACH "${alias}"`); + attachedDatabases.delete(name); + } + await db.dropFile(name); registeredFiles.delete(name); return { success: true }; @@ -428,6 +494,25 @@ self.onmessage = async (event: MessageEvent) => { break; } + case "registerDatabaseHandle": { + try { + const result = await registerDatabaseHandle(message.name, message.handle); + sendMessage({ + type: "registerFileResult", + id: message.id, + ...result, + }); + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + sendMessage({ + type: "error", + id: message.id, + error: errorMsg, + }); + } + break; + } + case "unregisterFile": { try { const result = await unregisterFile(message.name); diff --git a/ui/src/stores/chatUIStore.ts b/ui/src/stores/chatUIStore.ts index ddb521d..3e913a1 100644 --- a/ui/src/stores/chatUIStore.ts +++ b/ui/src/stores/chatUIStore.ts @@ -61,7 +61,7 @@ export interface DataFileColumn { type: string; } -/** Table schema for SQLite databases */ +/** Table schema for database files */ export interface DataFileTable { /** Table name */ tableName: string; @@ -75,7 +75,7 @@ export interface DataFile { id: string; /** Original filename */ name: string; - /** File type (csv, parquet, json) - SQLite NOT supported in WASM */ + /** File type */ type: FileType; /** File size in bytes */ size: number; @@ -85,8 +85,12 @@ export interface DataFile { registered: boolean; /** Error message if registration failed */ error?: string; - /** Column schema for the file */ + /** Column schema for flat files (CSV, Parquet, JSON) */ columns?: DataFileColumn[]; + /** Table schemas for database files (DuckDB) */ + tables?: DataFileTable[]; + /** Database alias for attached databases */ + dbName?: string; } // Re-export types for convenience @@ -265,6 +269,8 @@ interface ChatUIActions { error?: string, schema?: { columns?: DataFileColumn[]; + tables?: DataFileTable[]; + dbName?: string; } ) => void; /** Clear all data files */ @@ -530,6 +536,8 @@ export const useChatUIStore = create((set) => ({ registered, error, columns: schema?.columns, + tables: schema?.tables, + dbName: schema?.dbName, } : f ), From ef01a30b2032151d90aec8615bbbbf0a7a6249e6 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Fri, 20 Mar 2026 22:08:15 +1000 Subject: [PATCH 2/4] Review fixes --- ui/src/services/duckdb/duckdbService.ts | 6 ++--- ui/src/services/duckdb/duckdbWorker.ts | 30 ++++++++++++++++--------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/ui/src/services/duckdb/duckdbService.ts b/ui/src/services/duckdb/duckdbService.ts index 7a75a1c..5aa72f1 100644 --- a/ui/src/services/duckdb/duckdbService.ts +++ b/ui/src/services/duckdb/duckdbService.ts @@ -20,9 +20,9 @@ * // Query the CSV * const csvResult = await duckdbService.execute("SELECT * FROM 'data.csv'"); * - * // Register a DuckDB database file - * const dbData = await fetch("data.duckdb").then(r => r.arrayBuffer()); - * await duckdbService.registerFile("data.duckdb", dbData, "duckdb"); + * // Register a DuckDB database file (lazy read, no memory overhead) + * const file = fileInput.files[0]; // e.g. from + * await duckdbService.registerDatabaseFile("data.duckdb", file); * // Tables are available as: SELECT * FROM data.table_name * ``` */ diff --git a/ui/src/services/duckdb/duckdbWorker.ts b/ui/src/services/duckdb/duckdbWorker.ts index 66f15d1..6dae5d8 100644 --- a/ui/src/services/duckdb/duckdbWorker.ts +++ b/ui/src/services/duckdb/duckdbWorker.ts @@ -267,17 +267,22 @@ async function executeQuery(sql: string): Promise<{ } /** - * Derive a safe database alias from a filename (e.g., "my-data.duckdb" -> "my_data") + * Derive a safe, unique database alias from a filename (e.g., "my-data.duckdb" -> "my_data"). + * Appends a counter suffix when the alias already exists in attachedDatabases. */ function deriveDbAlias(filename: string): string { - const base = filename.replace(/\.duckdb$/i, ""); - // Replace non-alphanumeric/underscore chars with underscores, collapse runs - return ( - base + const base = + filename + .replace(/\.duckdb$/i, "") .replace(/[^a-zA-Z0-9_]/g, "_") .replace(/_+/g, "_") - .replace(/^_|_$/g, "") || "db" - ); + .replace(/^_|_$/g, "") || "db"; + + const existing = new Set(attachedDatabases.values()); + if (!existing.has(base)) return base; + let i = 2; + while (existing.has(`${base}_${i}`)) i++; + return `${base}_${i}`; } /** @@ -303,16 +308,18 @@ async function registerFile( // Register the file buffer await db.registerFileBuffer(name, new Uint8Array(data)); - registeredFiles.add(name); // For .duckdb files, attach the database so its tables are queryable if (fileType === "duckdb") { const alias = deriveDbAlias(name); - await conn.query(`ATTACH '${name}' AS "${alias}" (READ_ONLY)`); + const escapedName = name.replace(/'/g, "''"); + await conn.query(`ATTACH '${escapedName}' AS "${alias}" (READ_ONLY)`); + registeredFiles.add(name); attachedDatabases.set(name, alias); return { success: true, dbAlias: alias }; } + registeredFiles.add(name); return { success: true }; } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); @@ -336,10 +343,11 @@ async function registerDatabaseHandle( try { await db.registerFileHandle(name, handle, duckdb.DuckDBDataProtocol.BROWSER_FILEREADER, true); - registeredFiles.add(name); const alias = deriveDbAlias(name); - await conn.query(`ATTACH '${name}' AS "${alias}" (READ_ONLY)`); + const escapedName = name.replace(/'/g, "''"); + await conn.query(`ATTACH '${escapedName}' AS "${alias}" (READ_ONLY)`); + registeredFiles.add(name); attachedDatabases.set(name, alias); return { success: true, dbAlias: alias }; } catch (error) { From fa897d963a6ba5f06a4ca91240d8ae96cd86a8da Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Fri, 20 Mar 2026 22:31:15 +1000 Subject: [PATCH 3/4] Review fixes --- .../components/DataFileUpload/DataFileUpload.tsx | 10 +++++++--- ui/src/pages/chat/useChat.ts | 3 ++- ui/src/services/duckdb/duckdbWorker.ts | 14 ++++++++++++-- ui/src/stores/chatUIStore.ts | 2 ++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/ui/src/components/DataFileUpload/DataFileUpload.tsx b/ui/src/components/DataFileUpload/DataFileUpload.tsx index 3f48795..4ef688e 100644 --- a/ui/src/components/DataFileUpload/DataFileUpload.tsx +++ b/ui/src/components/DataFileUpload/DataFileUpload.tsx @@ -148,17 +148,21 @@ export function DataFileUpload({ if (fileType === "duckdb" && result.dbAlias) { // For DuckDB database files, enumerate tables and describe each const tablesResult = await duckdbService.execute( - `SELECT table_name FROM information_schema.tables WHERE table_catalog = '${result.dbAlias}' AND table_schema = 'main' ORDER BY table_name` + `SELECT table_schema, table_name FROM information_schema.tables WHERE table_catalog = '${result.dbAlias}' AND table_schema NOT IN ('information_schema', 'pg_catalog') ORDER BY table_schema, table_name` ); if (tablesResult.success && tablesResult.rows.length > 0) { const tables = []; for (const row of tablesResult.rows) { const tableName = String(row.table_name); + const schemaName = String(row.table_schema); + const safeTable = tableName.replace(/"/g, '""'); + const safeSchema = schemaName.replace(/"/g, '""'); const colResult = await duckdbService.describeTable( - `"${result.dbAlias}".main."${tableName}"` + `"${result.dbAlias}"."${safeSchema}"."${safeTable}"` ); tables.push({ tableName, + schemaName, columns: colResult.success ? colResult.columns.map((c) => ({ name: c.name, type: c.type })) : [], @@ -498,7 +502,7 @@ function TableSchema({ table, dbName }: { table: DataFileTable; dbName?: string
{dbName && (

- SELECT * FROM {dbName}.{table.tableName} + SELECT * FROM {dbName}.{table.schemaName}.{table.tableName}

)} {table.columns.length > 0 && } diff --git a/ui/src/pages/chat/useChat.ts b/ui/src/pages/chat/useChat.ts index 8756432..cea82a2 100644 --- a/ui/src/pages/chat/useChat.ts +++ b/ui/src/pages/chat/useChat.ts @@ -70,6 +70,7 @@ interface DataFileInfo { /** For database files (DuckDB) */ tables?: Array<{ tableName: string; + schemaName: string; columns: Array<{ name: string; type: string }>; }>; /** Database alias for attached databases */ @@ -441,7 +442,7 @@ export function useChat({ // Database file with tables for (const table of file.tables) { const columnList = table.columns.map((c) => `${c.name} (${c.type})`).join(", "); - sqlDescription += `\n- ${file.dbName}.${table.tableName}: ${columnList}`; + sqlDescription += `\n- ${file.dbName}.${table.schemaName}.${table.tableName}: ${columnList}`; } } else if (file.columns && file.columns.length > 0) { const columnList = file.columns.map((c) => `${c.name} (${c.type})`).join(", "); diff --git a/ui/src/services/duckdb/duckdbWorker.ts b/ui/src/services/duckdb/duckdbWorker.ts index 6dae5d8..3a64d72 100644 --- a/ui/src/services/duckdb/duckdbWorker.ts +++ b/ui/src/services/duckdb/duckdbWorker.ts @@ -313,7 +313,12 @@ async function registerFile( if (fileType === "duckdb") { const alias = deriveDbAlias(name); const escapedName = name.replace(/'/g, "''"); - await conn.query(`ATTACH '${escapedName}' AS "${alias}" (READ_ONLY)`); + try { + await conn.query(`ATTACH '${escapedName}' AS "${alias}" (READ_ONLY)`); + } catch (attachError) { + await db.dropFile(name); + throw attachError; + } registeredFiles.add(name); attachedDatabases.set(name, alias); return { success: true, dbAlias: alias }; @@ -346,7 +351,12 @@ async function registerDatabaseHandle( const alias = deriveDbAlias(name); const escapedName = name.replace(/'/g, "''"); - await conn.query(`ATTACH '${escapedName}' AS "${alias}" (READ_ONLY)`); + try { + await conn.query(`ATTACH '${escapedName}' AS "${alias}" (READ_ONLY)`); + } catch (attachError) { + await db.dropFile(name); + throw attachError; + } registeredFiles.add(name); attachedDatabases.set(name, alias); return { success: true, dbAlias: alias }; diff --git a/ui/src/stores/chatUIStore.ts b/ui/src/stores/chatUIStore.ts index 3e913a1..c522500 100644 --- a/ui/src/stores/chatUIStore.ts +++ b/ui/src/stores/chatUIStore.ts @@ -65,6 +65,8 @@ export interface DataFileColumn { export interface DataFileTable { /** Table name */ tableName: string; + /** Schema name (e.g. "main") */ + schemaName: string; /** Columns in the table */ columns: DataFileColumn[]; } From 234d6256c91303b7ff6d3527b6e1554a32ddc4a3 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Fri, 20 Mar 2026 22:43:22 +1000 Subject: [PATCH 4/4] Review fixes --- ui/src/components/DataFileUpload/DataFileUpload.tsx | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ui/src/components/DataFileUpload/DataFileUpload.tsx b/ui/src/components/DataFileUpload/DataFileUpload.tsx index 4ef688e..e754677 100644 --- a/ui/src/components/DataFileUpload/DataFileUpload.tsx +++ b/ui/src/components/DataFileUpload/DataFileUpload.tsx @@ -473,7 +473,11 @@ function DataSchemaModal({ {hasTables && (
{file.tables!.map((table) => ( - + ))}
)} @@ -502,7 +506,8 @@ function TableSchema({ table, dbName }: { table: DataFileTable; dbName?: string
{dbName && (

- SELECT * FROM {dbName}.{table.schemaName}.{table.tableName} + SELECT * FROM "{dbName}"."{table.schemaName}"."{table.tableName} + "

)} {table.columns.length > 0 && }