diff --git a/apps/sim/app/(landing)/actions/github.ts b/apps/sim/app/(landing)/actions/github.ts index 527f29ea44..42f586a956 100644 --- a/apps/sim/app/(landing)/actions/github.ts +++ b/apps/sim/app/(landing)/actions/github.ts @@ -1,6 +1,6 @@ import { createLogger } from '@/lib/logs/console/logger' -const DEFAULT_STARS = '18.6k' +const DEFAULT_STARS = '19.4k' const logger = createLogger('GitHubStars') diff --git a/apps/sim/app/api/chat/[identifier]/route.ts b/apps/sim/app/api/chat/[identifier]/route.ts index eefb9ca997..44e9e524a9 100644 --- a/apps/sim/app/api/chat/[identifier]/route.ts +++ b/apps/sim/app/api/chat/[identifier]/route.ts @@ -132,7 +132,7 @@ export async function POST( if ((password || email) && !input) { const response = addCorsHeaders(createSuccessResponse({ authenticated: true }), request) - setChatAuthCookie(response, deployment.id, deployment.authType) + setChatAuthCookie(response, deployment.id, deployment.authType, deployment.password) return response } @@ -315,7 +315,7 @@ export async function GET( if ( deployment.authType !== 'public' && authCookie && - validateAuthToken(authCookie.value, deployment.id) + validateAuthToken(authCookie.value, deployment.id, deployment.password) ) { return addCorsHeaders( createSuccessResponse({ diff --git a/apps/sim/app/api/chat/utils.ts b/apps/sim/app/api/chat/utils.ts index 1e41f92012..c8b76d92fc 100644 --- a/apps/sim/app/api/chat/utils.ts +++ b/apps/sim/app/api/chat/utils.ts @@ -1,3 +1,4 @@ +import { createHash } from 'crypto' import { db } from '@sim/db' import { chat, workflow } from '@sim/db/schema' import { eq } from 'drizzle-orm' @@ -9,6 +10,10 @@ import { hasAdminPermission } from '@/lib/workspaces/permissions/utils' const logger = createLogger('ChatAuthUtils') +function hashPassword(encryptedPassword: string): string { + return createHash('sha256').update(encryptedPassword).digest('hex').substring(0, 8) +} + /** * Check if user has permission to create a chat for a specific workflow * Either the user owns the workflow directly OR has admin permission for the workflow's workspace @@ -77,14 +82,20 @@ export async function checkChatAccess( return { hasAccess: false } } -const encryptAuthToken = (chatId: string, type: string): string => { - return Buffer.from(`${chatId}:${type}:${Date.now()}`).toString('base64') +function encryptAuthToken(chatId: string, type: string, encryptedPassword?: string | null): string { + const pwHash = encryptedPassword ? hashPassword(encryptedPassword) : '' + return Buffer.from(`${chatId}:${type}:${Date.now()}:${pwHash}`).toString('base64') } -export const validateAuthToken = (token: string, chatId: string): boolean => { +export function validateAuthToken( + token: string, + chatId: string, + encryptedPassword?: string | null +): boolean { try { const decoded = Buffer.from(token, 'base64').toString() - const [storedId, _type, timestamp] = decoded.split(':') + const parts = decoded.split(':') + const [storedId, _type, timestamp, storedPwHash] = parts if (storedId !== chatId) { return false @@ -92,20 +103,32 @@ export const validateAuthToken = (token: string, chatId: string): boolean => { const createdAt = Number.parseInt(timestamp) const now = Date.now() - const expireTime = 24 * 60 * 60 * 1000 // 24 hours + const expireTime = 24 * 60 * 60 * 1000 if (now - createdAt > expireTime) { return false } + if (encryptedPassword) { + const currentPwHash = hashPassword(encryptedPassword) + if (storedPwHash !== currentPwHash) { + return false + } + } + return true } catch (_e) { return false } } -export const setChatAuthCookie = (response: NextResponse, chatId: string, type: string): void => { - const token = encryptAuthToken(chatId, type) +export function setChatAuthCookie( + response: NextResponse, + chatId: string, + type: string, + encryptedPassword?: string | null +): void { + const token = encryptAuthToken(chatId, type, encryptedPassword) response.cookies.set({ name: `chat_auth_${chatId}`, value: token, @@ -113,7 +136,7 @@ export const setChatAuthCookie = (response: NextResponse, chatId: string, type: secure: !isDev, sameSite: 'lax', path: '/', - maxAge: 60 * 60 * 24, // 24 hours + maxAge: 60 * 60 * 24, }) } @@ -145,7 +168,7 @@ export async function validateChatAuth( const cookieName = `chat_auth_${deployment.id}` const authCookie = request.cookies.get(cookieName) - if (authCookie && validateAuthToken(authCookie.value, deployment.id)) { + if (authCookie && validateAuthToken(authCookie.value, deployment.id, deployment.password)) { return { authorized: true } } diff --git a/apps/sim/app/api/proxy/tts/stream/route.ts b/apps/sim/app/api/proxy/tts/stream/route.ts index 84c8c05b0f..316c0d0a0a 100644 --- a/apps/sim/app/api/proxy/tts/stream/route.ts +++ b/apps/sim/app/api/proxy/tts/stream/route.ts @@ -1,26 +1,81 @@ +import { db } from '@sim/db' +import { chat } from '@sim/db/schema' +import { eq } from 'drizzle-orm' import type { NextRequest } from 'next/server' -import { checkHybridAuth } from '@/lib/auth/hybrid' import { env } from '@/lib/core/config/env' import { validateAlphanumericId } from '@/lib/core/security/input-validation' import { createLogger } from '@/lib/logs/console/logger' +import { validateAuthToken } from '@/app/api/chat/utils' const logger = createLogger('ProxyTTSStreamAPI') +/** + * Validates chat-based authentication for deployed chat voice mode + * Checks if the user has a valid chat auth cookie for the given chatId + */ +async function validateChatAuth(request: NextRequest, chatId: string): Promise { + try { + const chatResult = await db + .select({ + id: chat.id, + isActive: chat.isActive, + authType: chat.authType, + password: chat.password, + }) + .from(chat) + .where(eq(chat.id, chatId)) + .limit(1) + + if (chatResult.length === 0 || !chatResult[0].isActive) { + logger.warn('Chat not found or inactive for TTS auth:', chatId) + return false + } + + const chatData = chatResult[0] + + if (chatData.authType === 'public') { + return true + } + + const cookieName = `chat_auth_${chatId}` + const authCookie = request.cookies.get(cookieName) + + if (authCookie && validateAuthToken(authCookie.value, chatId, chatData.password)) { + return true + } + + return false + } catch (error) { + logger.error('Error validating chat auth for TTS:', error) + return false + } +} + export async function POST(request: NextRequest) { try { - const authResult = await checkHybridAuth(request, { requireWorkflowId: false }) - if (!authResult.success) { - logger.error('Authentication failed for TTS stream proxy:', authResult.error) - return new Response('Unauthorized', { status: 401 }) + let body: any + try { + body = await request.json() + } catch { + return new Response('Invalid request body', { status: 400 }) } - const body = await request.json() - const { text, voiceId, modelId = 'eleven_turbo_v2_5' } = body + const { text, voiceId, modelId = 'eleven_turbo_v2_5', chatId } = body + + if (!chatId) { + return new Response('chatId is required', { status: 400 }) + } if (!text || !voiceId) { return new Response('Missing required parameters', { status: 400 }) } + const isChatAuthed = await validateChatAuth(request, chatId) + if (!isChatAuthed) { + logger.warn('Chat authentication failed for TTS, chatId:', chatId) + return new Response('Unauthorized', { status: 401 }) + } + const voiceIdValidation = validateAlphanumericId(voiceId, 'voiceId', 255) if (!voiceIdValidation.isValid) { logger.error(`Invalid voice ID: ${voiceIdValidation.error}`) diff --git a/apps/sim/app/api/stars/route.ts b/apps/sim/app/api/stars/route.ts index e0e9d48ea8..fb02b20d47 100644 --- a/apps/sim/app/api/stars/route.ts +++ b/apps/sim/app/api/stars/route.ts @@ -23,13 +23,13 @@ export async function GET() { if (!response.ok) { console.warn('GitHub API request failed:', response.status) - return NextResponse.json({ stars: formatStarCount(14500) }) + return NextResponse.json({ stars: formatStarCount(19400) }) } const data = await response.json() - return NextResponse.json({ stars: formatStarCount(Number(data?.stargazers_count ?? 14500)) }) + return NextResponse.json({ stars: formatStarCount(Number(data?.stargazers_count ?? 19400)) }) } catch (error) { console.warn('Error fetching GitHub stars:', error) - return NextResponse.json({ stars: formatStarCount(14500) }) + return NextResponse.json({ stars: formatStarCount(19400) }) } } diff --git a/apps/sim/app/chat/[identifier]/chat.tsx b/apps/sim/app/chat/[identifier]/chat.tsx index fe63fbf18f..cb70cbbb91 100644 --- a/apps/sim/app/chat/[identifier]/chat.tsx +++ b/apps/sim/app/chat/[identifier]/chat.tsx @@ -39,6 +39,7 @@ interface ChatConfig { interface AudioStreamingOptions { voiceId: string + chatId?: string onError: (error: Error) => void } @@ -62,16 +63,19 @@ function fileToBase64(file: File): Promise { * Creates an audio stream handler for text-to-speech conversion * @param streamTextToAudio - Function to stream text to audio * @param voiceId - The voice ID to use for TTS + * @param chatId - Optional chat ID for deployed chat authentication * @returns Audio stream handler function or undefined */ function createAudioStreamHandler( streamTextToAudio: (text: string, options: AudioStreamingOptions) => Promise, - voiceId: string + voiceId: string, + chatId?: string ) { return async (text: string) => { try { await streamTextToAudio(text, { voiceId, + chatId, onError: (error: Error) => { logger.error('Audio streaming error:', error) }, @@ -113,7 +117,7 @@ export default function ChatClient({ identifier }: { identifier: string }) { const [error, setError] = useState(null) const messagesEndRef = useRef(null) const messagesContainerRef = useRef(null) - const [starCount, setStarCount] = useState('3.4k') + const [starCount, setStarCount] = useState('19.4k') const [conversationId, setConversationId] = useState('') const [showScrollButton, setShowScrollButton] = useState(false) @@ -391,7 +395,11 @@ export default function ChatClient({ identifier }: { identifier: string }) { // Use the streaming hook with audio support const shouldPlayAudio = isVoiceInput || isVoiceFirstMode const audioHandler = shouldPlayAudio - ? createAudioStreamHandler(streamTextToAudio, DEFAULT_VOICE_SETTINGS.voiceId) + ? createAudioStreamHandler( + streamTextToAudio, + DEFAULT_VOICE_SETTINGS.voiceId, + chatConfig?.id + ) : undefined logger.info('Starting to handle streamed response:', { shouldPlayAudio }) diff --git a/apps/sim/app/chat/components/voice-interface/voice-interface.tsx b/apps/sim/app/chat/components/voice-interface/voice-interface.tsx index a4f2ad095e..d4dc002ff2 100644 --- a/apps/sim/app/chat/components/voice-interface/voice-interface.tsx +++ b/apps/sim/app/chat/components/voice-interface/voice-interface.tsx @@ -68,7 +68,6 @@ export function VoiceInterface({ messages = [], className, }: VoiceInterfaceProps) { - // Simple state machine const [state, setState] = useState<'idle' | 'listening' | 'agent_speaking'>('idle') const [isInitialized, setIsInitialized] = useState(false) const [isMuted, setIsMuted] = useState(false) @@ -76,12 +75,10 @@ export function VoiceInterface({ const [permissionStatus, setPermissionStatus] = useState<'prompt' | 'granted' | 'denied'>( 'prompt' ) - - // Current turn transcript (subtitle) const [currentTranscript, setCurrentTranscript] = useState('') - // State tracking const currentStateRef = useRef<'idle' | 'listening' | 'agent_speaking'>('idle') + const isCallEndedRef = useRef(false) useEffect(() => { currentStateRef.current = state @@ -98,12 +95,10 @@ export function VoiceInterface({ const isSupported = typeof window !== 'undefined' && !!(window.SpeechRecognition || window.webkitSpeechRecognition) - // Update muted ref useEffect(() => { isMutedRef.current = isMuted }, [isMuted]) - // Timeout to handle cases where agent doesn't provide audio response const setResponseTimeout = useCallback(() => { if (responseTimeoutRef.current) { clearTimeout(responseTimeoutRef.current) @@ -113,7 +108,7 @@ export function VoiceInterface({ if (currentStateRef.current === 'listening') { setState('idle') } - }, 5000) // 5 second timeout (increased from 3) + }, 5000) }, []) const clearResponseTimeout = useCallback(() => { @@ -123,14 +118,12 @@ export function VoiceInterface({ } }, []) - // Sync with external state useEffect(() => { if (isPlayingAudio && state !== 'agent_speaking') { - clearResponseTimeout() // Clear timeout since agent is responding + clearResponseTimeout() setState('agent_speaking') setCurrentTranscript('') - // Mute microphone immediately setIsMuted(true) if (mediaStreamRef.current) { mediaStreamRef.current.getAudioTracks().forEach((track) => { @@ -138,7 +131,6 @@ export function VoiceInterface({ }) } - // Stop speech recognition completely if (recognitionRef.current) { try { recognitionRef.current.abort() @@ -150,7 +142,6 @@ export function VoiceInterface({ setState('idle') setCurrentTranscript('') - // Re-enable microphone setIsMuted(false) if (mediaStreamRef.current) { mediaStreamRef.current.getAudioTracks().forEach((track) => { @@ -160,7 +151,6 @@ export function VoiceInterface({ } }, [isPlayingAudio, state, clearResponseTimeout]) - // Audio setup const setupAudio = useCallback(async () => { try { const stream = await navigator.mediaDevices.getUserMedia({ @@ -175,7 +165,6 @@ export function VoiceInterface({ setPermissionStatus('granted') mediaStreamRef.current = stream - // Setup audio context for visualization if (!audioContextRef.current) { const AudioContext = window.AudioContext || window.webkitAudioContext audioContextRef.current = new AudioContext() @@ -194,7 +183,6 @@ export function VoiceInterface({ source.connect(analyser) analyserRef.current = analyser - // Start visualization const updateVisualization = () => { if (!analyserRef.current) return @@ -223,7 +211,6 @@ export function VoiceInterface({ } }, []) - // Speech recognition setup const setupSpeechRecognition = useCallback(() => { if (!isSupported) return @@ -259,14 +246,11 @@ export function VoiceInterface({ } } - // Update live transcript setCurrentTranscript(interimTranscript || finalTranscript) - // Send final transcript (but keep listening state until agent responds) if (finalTranscript.trim()) { - setCurrentTranscript('') // Clear transcript + setCurrentTranscript('') - // Stop recognition to avoid interference while waiting for response if (recognitionRef.current) { try { recognitionRef.current.stop() @@ -275,7 +259,6 @@ export function VoiceInterface({ } } - // Start timeout in case agent doesn't provide audio response setResponseTimeout() onVoiceTranscript?.(finalTranscript) @@ -283,13 +266,14 @@ export function VoiceInterface({ } recognition.onend = () => { + if (isCallEndedRef.current) return + const currentState = currentStateRef.current - // Only restart recognition if we're in listening state and not muted if (currentState === 'listening' && !isMutedRef.current) { - // Add a delay to avoid immediate restart after sending transcript setTimeout(() => { - // Double-check state hasn't changed during delay + if (isCallEndedRef.current) return + if ( recognitionRef.current && currentStateRef.current === 'listening' && @@ -301,14 +285,12 @@ export function VoiceInterface({ logger.debug('Error restarting speech recognition:', error) } } - }, 1000) // Longer delay to give agent time to respond + }, 1000) } } recognition.onerror = (event: SpeechRecognitionErrorEvent) => { - // Filter out "aborted" errors - these are expected when we intentionally stop recognition if (event.error === 'aborted') { - // Ignore return } @@ -320,7 +302,6 @@ export function VoiceInterface({ recognitionRef.current = recognition }, [isSupported, onVoiceTranscript, setResponseTimeout]) - // Start/stop listening const startListening = useCallback(() => { if (!isInitialized || isMuted || state !== 'idle') { return @@ -351,17 +332,12 @@ export function VoiceInterface({ } }, []) - // Handle interrupt const handleInterrupt = useCallback(() => { if (state === 'agent_speaking') { - // Clear any subtitle timeouts and text - // (No longer needed after removing subtitle system) - onInterrupt?.() setState('listening') setCurrentTranscript('') - // Unmute microphone for user input setIsMuted(false) if (mediaStreamRef.current) { mediaStreamRef.current.getAudioTracks().forEach((track) => { @@ -369,7 +345,6 @@ export function VoiceInterface({ }) } - // Start listening immediately if (recognitionRef.current) { try { recognitionRef.current.start() @@ -380,14 +355,13 @@ export function VoiceInterface({ } }, [state, onInterrupt]) - // Handle call end with proper cleanup const handleCallEnd = useCallback(() => { - // Stop everything immediately + isCallEndedRef.current = true + setState('idle') setCurrentTranscript('') setIsMuted(false) - // Stop speech recognition if (recognitionRef.current) { try { recognitionRef.current.abort() @@ -396,17 +370,11 @@ export function VoiceInterface({ } } - // Clear timeouts clearResponseTimeout() - - // Stop audio playback and streaming immediately onInterrupt?.() - - // Call the original onCallEnd onCallEnd?.() }, [onCallEnd, onInterrupt, clearResponseTimeout]) - // Keyboard handler useEffect(() => { const handleKeyDown = (event: KeyboardEvent) => { if (event.code === 'Space') { @@ -419,7 +387,6 @@ export function VoiceInterface({ return () => document.removeEventListener('keydown', handleKeyDown) }, [handleInterrupt]) - // Mute toggle const toggleMute = useCallback(() => { if (state === 'agent_speaking') { handleInterrupt() @@ -442,7 +409,6 @@ export function VoiceInterface({ } }, [isMuted, state, handleInterrupt, stopListening, startListening]) - // Initialize useEffect(() => { if (isSupported) { setupSpeechRecognition() @@ -450,47 +416,40 @@ export function VoiceInterface({ } }, [isSupported, setupSpeechRecognition, setupAudio]) - // Auto-start listening when ready useEffect(() => { if (isInitialized && !isMuted && state === 'idle') { startListening() } }, [isInitialized, isMuted, state, startListening]) - // Cleanup when call ends or component unmounts useEffect(() => { return () => { - // Stop speech recognition + isCallEndedRef.current = true + if (recognitionRef.current) { try { recognitionRef.current.abort() - } catch (error) { + } catch (_e) { // Ignore } recognitionRef.current = null } - // Stop media stream if (mediaStreamRef.current) { - mediaStreamRef.current.getTracks().forEach((track) => { - track.stop() - }) + mediaStreamRef.current.getTracks().forEach((track) => track.stop()) mediaStreamRef.current = null } - // Stop audio context if (audioContextRef.current) { audioContextRef.current.close() audioContextRef.current = null } - // Cancel animation frame if (animationFrameRef.current) { cancelAnimationFrame(animationFrameRef.current) animationFrameRef.current = null } - // Clear timeouts if (responseTimeoutRef.current) { clearTimeout(responseTimeoutRef.current) responseTimeoutRef.current = null @@ -498,7 +457,6 @@ export function VoiceInterface({ } }, []) - // Get status text const getStatusText = () => { switch (state) { case 'listening': @@ -510,7 +468,6 @@ export function VoiceInterface({ } } - // Get button content const getButtonContent = () => { if (state === 'agent_speaking') { return ( @@ -524,9 +481,7 @@ export function VoiceInterface({ return (
- {/* Main content */}
- {/* Voice visualization */}
- {/* Live transcript - subtitle style */}
{currentTranscript && (
@@ -549,17 +503,14 @@ export function VoiceInterface({ )}
- {/* Status */}

{getStatusText()} {isMuted && (Muted)}

- {/* Controls */}
- {/* End call */} - {/* Mic/Stop button */}