diff --git a/examples/07-transcription-live-websocket.py b/examples/07-transcription-live-websocket.py deleted file mode 100644 index e12c9f67..00000000 --- a/examples/07-transcription-live-websocket.py +++ /dev/null @@ -1,61 +0,0 @@ -""" -Example: Live Transcription with WebSocket (Listen V1) - -This example shows how to stream audio for real-time transcription using WebSocket. -""" - -from typing import Union - -from dotenv import load_dotenv - -load_dotenv() - -from deepgram import DeepgramClient -from deepgram.core.events import EventType -from deepgram.listen.v1.types import ( - ListenV1Metadata, - ListenV1Results, - ListenV1SpeechStarted, - ListenV1UtteranceEnd, -) - -ListenV1SocketClientResponse = Union[ListenV1Results, ListenV1Metadata, ListenV1UtteranceEnd, ListenV1SpeechStarted] - -client = DeepgramClient() - -try: - with client.listen.v1.connect(model="nova-3") as connection: - - def on_message(message: ListenV1SocketClientResponse) -> None: - msg_type = getattr(message, "type", "Unknown") - print(f"Received {msg_type} event") - - # Extract transcription from Results events - if isinstance(message, ListenV1Results): - if message.channel and message.channel.alternatives: - transcript = message.channel.alternatives[0].transcript - if transcript: - print(f"Transcript: {transcript}") - - connection.on(EventType.OPEN, lambda _: print("Connection opened")) - connection.on(EventType.MESSAGE, on_message) - connection.on(EventType.CLOSE, lambda _: print("Connection closed")) - connection.on(EventType.ERROR, lambda error: print(f"Error: {error}")) - - # Start listening - this blocks until the connection closes - # In production, you would send audio data here: - # audio_path = os.path.join(os.path.dirname(__file__), "..", "fixtures", "audio.wav") - # with open(audio_path, "rb") as audio_file: - # audio_data = audio_file.read() - # connection.send_listen_v_1_media(audio_data) - - connection.start_listening() - - # For async version: - # from deepgram import AsyncDeepgramClient - # async with client.listen.v1.connect(model="nova-3") as connection: - # # ... same event handlers ... - # await connection.start_listening() - -except Exception as e: - print(f"Error: {e}") diff --git a/examples/04-transcription-prerecorded-url.py b/examples/10-transcription-prerecorded-url.py similarity index 100% rename from examples/04-transcription-prerecorded-url.py rename to examples/10-transcription-prerecorded-url.py diff --git a/examples/05-transcription-prerecorded-file.py b/examples/11-transcription-prerecorded-file.py similarity index 100% rename from examples/05-transcription-prerecorded-file.py rename to examples/11-transcription-prerecorded-file.py diff --git a/examples/06-transcription-prerecorded-callback.py b/examples/12-transcription-prerecorded-callback.py similarity index 100% rename from examples/06-transcription-prerecorded-callback.py rename to examples/12-transcription-prerecorded-callback.py diff --git a/examples/13-transcription-live-websocket.py b/examples/13-transcription-live-websocket.py new file mode 100644 index 00000000..5deaee98 --- /dev/null +++ b/examples/13-transcription-live-websocket.py @@ -0,0 +1,132 @@ +""" +Example: Live Transcription with WebSocket (Listen V1) + +This example demonstrates how to use WebSocket for real-time audio transcription. +In production, you would stream audio from a microphone or other live source. +This example uses an audio file to demonstrate the streaming pattern. +""" + +import os +import threading +import time +from typing import Union + +from dotenv import load_dotenv + +load_dotenv() + +from deepgram import DeepgramClient +from deepgram.core.events import EventType +from deepgram.listen.v1.types import ( + ListenV1Finalize, + ListenV1Metadata, + ListenV1Results, + ListenV1SpeechStarted, + ListenV1UtteranceEnd, +) + +ListenV1SocketClientResponse = Union[ListenV1Results, ListenV1Metadata, ListenV1UtteranceEnd, ListenV1SpeechStarted] + +# Audio streaming configuration +CHUNK_SIZE = 8192 # Bytes to send at a time +SAMPLE_RATE = 44100 # Hz (typical for WAV files) +SAMPLE_WIDTH = 2 # 16-bit audio = 2 bytes per sample +CHANNELS = 1 # Mono audio + +# Calculate delay between chunks to simulate real-time streaming +# This makes the audio stream at its natural playback rate +CHUNK_DELAY = CHUNK_SIZE / (SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS) + +client = DeepgramClient() + +try: + with client.listen.v1.connect(model="nova-3") as connection: + + def on_message(message: ListenV1SocketClientResponse) -> None: + # Extract transcription from Results events + if isinstance(message, ListenV1Results): + if message.channel and message.channel.alternatives: + transcript = message.channel.alternatives[0].transcript + if transcript: + print(f"Transcript: {transcript}") + + def on_open(_) -> None: + print("Connection opened") + + def on_close(_) -> None: + print("Connection closed") + + def on_error(error) -> None: + print(f"Error: {error}") + + # Register event handlers + connection.on(EventType.OPEN, on_open) + connection.on(EventType.MESSAGE, on_message) + connection.on(EventType.CLOSE, on_close) + connection.on(EventType.ERROR, on_error) + + # Define a function to send audio in a background thread + def send_audio(): + audio_path = os.path.join(os.path.dirname(__file__), "fixtures", "audio.wav") + + with open(audio_path, "rb") as audio_file: + print(f"Streaming audio from {audio_path}") + + while True: + chunk = audio_file.read(CHUNK_SIZE) + if not chunk: + break + + connection.send_media(chunk) + + # Simulate real-time streaming by adding delay between chunks + time.sleep(CHUNK_DELAY) + + print("Finished sending audio") + + connection.send_finalize(ListenV1Finalize(type="Finalize")) + + # Start sending audio in a background thread + threading.Thread(target=send_audio, daemon=True).start() + + # Start listening - this blocks until the connection closes or times out + # The connection will stay open until the server closes it or it times out + connection.start_listening() + + # For async version: + # import asyncio + # from deepgram import AsyncDeepgramClient + # + # async with client.listen.v1.connect(model="nova-3") as connection: + # async def on_message(message): + # if isinstance(message, ListenV1Results): + # if message.channel and message.channel.alternatives: + # transcript = message.channel.alternatives[0].transcript + # if transcript: + # print(f"Transcript: {transcript}") + # + # connection.on(EventType.MESSAGE, on_message) + # + # # Define coroutine to send audio + # async def send_audio(): + # audio_path = os.path.join(os.path.dirname(__file__), "fixtures", "audio.wav") + # with open(audio_path, "rb") as audio_file: + # while chunk := audio_file.read(CHUNK_SIZE): + # await connection.send_media(chunk) + # # Simulate real-time streaming + # await asyncio.sleep(CHUNK_DELAY) + # print("Finished sending audio") + # await connection.send_finalize(ListenV1Finalize(type="Finalize")) + # + # # Start both tasks + # listen_task = asyncio.create_task(connection.start_listening()) + # send_task = asyncio.create_task(send_audio()) + # + # # Wait for send to complete + # await send_task + # + # # Continue listening until connection closes or times out + # await listen_task + +except Exception as e: + print(f"Error: {e}") diff --git a/examples/14-transcription-live-websocket-v2.py b/examples/14-transcription-live-websocket-v2.py new file mode 100644 index 00000000..e84b11d3 --- /dev/null +++ b/examples/14-transcription-live-websocket-v2.py @@ -0,0 +1,132 @@ +""" +Example: Live Transcription with WebSocket V2 (Listen V2) + +This example demonstrates how to use Listen V2 for advanced conversational speech +recognition with contextual turn detection. + +Note: Listen V2 requires 16kHz linear16 PCM audio format. +In production, you would stream audio from a microphone or other live source. +This example uses an audio file to demonstrate the streaming pattern. +""" + +import os +import threading +import time +from typing import Union + +from dotenv import load_dotenv + +load_dotenv() + +from deepgram import DeepgramClient +from deepgram.core.events import EventType +from deepgram.listen.v2.types import ( + ListenV2Connected, + ListenV2FatalError, + ListenV2TurnInfo, +) + +ListenV2SocketClientResponse = Union[ListenV2Connected, ListenV2TurnInfo, ListenV2FatalError] + +# Audio streaming configuration +# IMPORTANT: Listen V2 requires 16kHz linear16 PCM audio +CHUNK_SIZE = 8192 # Bytes to send at a time +SAMPLE_RATE = 16000 # Hz (required for Listen V2) +SAMPLE_WIDTH = 2 # 16-bit audio = 2 bytes per sample +CHANNELS = 1 # Mono audio + +# Calculate delay between chunks to simulate real-time streaming +# This makes the audio stream at its natural playback rate +CHUNK_DELAY = CHUNK_SIZE / (SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS) + +client = DeepgramClient() + +try: + # Listen V2 requires specific audio format: 16kHz linear16 PCM + with client.listen.v2.connect(model="flux-general-en", encoding="linear16", sample_rate="16000") as connection: + + def on_message(message: ListenV2SocketClientResponse) -> None: + # Handle TurnInfo events containing transcription and turn metadata + if isinstance(message, ListenV2TurnInfo): + print(f"Turn {message.turn_index}: {message.transcript}") + print(f" Event: {message.event}") + + def on_open(_) -> None: + print("Connection opened") + + def on_close(_) -> None: + print("Connection closed") + + def on_error(error) -> None: + print(f"Error: {error}") + + # Register event handlers + connection.on(EventType.OPEN, on_open) + connection.on(EventType.MESSAGE, on_message) + connection.on(EventType.CLOSE, on_close) + connection.on(EventType.ERROR, on_error) + + # Define a function to send audio in a background thread + def send_audio(): + # IMPORTANT: Audio must be 16kHz linear16 PCM for Listen V2 + audio_path = os.path.join(os.path.dirname(__file__), "fixtures", "audio.wav") + + with open(audio_path, "rb") as audio_file: + print(f"Streaming audio from {audio_path}") + + while True: + chunk = audio_file.read(CHUNK_SIZE) + if not chunk: + break + + connection.send_media(chunk) + + # Simulate real-time streaming by adding delay between chunks + time.sleep(CHUNK_DELAY) + + print("Finished sending audio") + + # Start sending audio in a background thread + threading.Thread(target=send_audio, daemon=True).start() + + # Start listening - this blocks until the connection closes or times out + # The connection will stay open until the server closes it or it times out + connection.start_listening() + + # For async version: + # import asyncio + # from deepgram import AsyncDeepgramClient + # + # async with client.listen.v2.connect( + # model="flux-general-en", + # encoding="linear16", + # sample_rate="16000" + # ) as connection: + # async def on_message(message): + # if isinstance(message, ListenV2TurnInfo): + # print(f"Turn {message.turn_index}: {message.transcript}") + # + # connection.on(EventType.MESSAGE, on_message) + # + # # Define coroutine to send audio + # async def send_audio(): + # audio_path = os.path.join(os.path.dirname(__file__), "fixtures", "audio.wav") + # with open(audio_path, "rb") as audio_file: + # while chunk := audio_file.read(CHUNK_SIZE): + # await connection.send_media(chunk) + # # Simulate real-time streaming + # await asyncio.sleep(CHUNK_DELAY) + # print("Finished sending audio") + # + # # Start both tasks + # listen_task = asyncio.create_task(connection.start_listening()) + # send_task = asyncio.create_task(send_audio()) + # + # # Wait for send to complete + # await send_task + # + # # Continue listening until connection closes or times out + # await listen_task + +except Exception as e: + print(f"Error: {e}") diff --git a/examples/22-transcription-advanced-options.py b/examples/15-transcription-advanced-options.py similarity index 100% rename from examples/22-transcription-advanced-options.py rename to examples/15-transcription-advanced-options.py diff --git a/examples/10-text-to-speech-single.py b/examples/20-text-to-speech-single.py similarity index 100% rename from examples/10-text-to-speech-single.py rename to examples/20-text-to-speech-single.py diff --git a/examples/11-text-to-speech-streaming.py b/examples/21-text-to-speech-streaming.py similarity index 86% rename from examples/11-text-to-speech-streaming.py rename to examples/21-text-to-speech-streaming.py index cb8f31f5..23d43638 100644 --- a/examples/11-text-to-speech-streaming.py +++ b/examples/21-text-to-speech-streaming.py @@ -40,15 +40,15 @@ def on_message(message: SpeakV1SocketClientResponse) -> None: # Note: start_listening() blocks, so send all messages first # For better control with bidirectional communication, use the async version text_message = SpeakV1Text(text="Hello, this is a text to speech example.") - connection.send_speak_v_1_text(text_message) + connection.send_text(text_message) # Flush to ensure all text is processed flush_message = SpeakV1Flush() - connection.send_speak_v_1_flush(flush_message) + connection.send_flush(flush_message) # Close the connection when done close_message = SpeakV1Close() - connection.send_speak_v_1_close(close_message) + connection.send_close(close_message) # Start listening - this blocks until the connection closes # All messages should be sent before calling this in sync mode @@ -58,9 +58,9 @@ def on_message(message: SpeakV1SocketClientResponse) -> None: # from deepgram import AsyncDeepgramClient # async with client.speak.v1.connect(...) as connection: # listen_task = asyncio.create_task(connection.start_listening()) - # await connection.send_speak_v_1_text(SpeakV1Text(text="...")) - # await connection.send_speak_v_1_flush(SpeakV1Flush()) - # await connection.send_speak_v_1_close(SpeakV1Close()) + # await connection.send_text(SpeakV1Text(text="...")) + # await connection.send_flush(SpeakV1Flush()) + # await connection.send_close(SpeakV1Close()) # await listen_task except Exception as e: diff --git a/examples/26-transcription-live-websocket-v2.py b/examples/26-transcription-live-websocket-v2.py deleted file mode 100644 index 222c1441..00000000 --- a/examples/26-transcription-live-websocket-v2.py +++ /dev/null @@ -1,55 +0,0 @@ -""" -Example: Live Transcription with WebSocket V2 (Listen V2) - -This example shows how to use Listen V2 for advanced conversational speech recognition -with contextual turn detection. -""" - -from typing import Union - -from dotenv import load_dotenv - -load_dotenv() - -from deepgram import DeepgramClient -from deepgram.core.events import EventType -from deepgram.listen.v2.types import ( - ListenV2Connected, - ListenV2FatalError, - ListenV2TurnInfo, -) - -ListenV2SocketClientResponse = Union[ListenV2Connected, ListenV2TurnInfo, ListenV2FatalError] - -client = DeepgramClient() - -try: - with client.listen.v2.connect(model="flux-general-en", encoding="linear16", sample_rate="16000") as connection: - - def on_message(message: ListenV2SocketClientResponse) -> None: - msg_type = getattr(message, "type", "Unknown") - print(f"Received {msg_type} event") - - # Extract transcription from TurnInfo events - if isinstance(message, ListenV2TurnInfo): - print(f"Turn transcript: {message.transcript}") - print(f"Turn event: {message.event}") - print(f"Turn index: {message.turn_index}") - - connection.on(EventType.OPEN, lambda _: print("Connection opened")) - connection.on(EventType.MESSAGE, on_message) - connection.on(EventType.CLOSE, lambda _: print("Connection closed")) - connection.on(EventType.ERROR, lambda error: print(f"Error: {error}")) - - # Start listening - this blocks until the connection closes - # In production, you would send audio data here using connection.send_listen_v_2_media() - connection.start_listening() - - # For async version: - # from deepgram import AsyncDeepgramClient - # async with client.listen.v2.connect(...) as connection: - # # ... same event handlers ... - # await connection.start_listening() - -except Exception as e: - print(f"Error: {e}") diff --git a/examples/09-voice-agent.py b/examples/30-voice-agent.py similarity index 95% rename from examples/09-voice-agent.py rename to examples/30-voice-agent.py index 82ed081e..35f40540 100644 --- a/examples/09-voice-agent.py +++ b/examples/30-voice-agent.py @@ -65,7 +65,7 @@ ) print("Sending agent settings...") - agent.send_agent_v_1_settings(settings) + agent.send_settings(settings) def on_message(message: AgentV1SocketClientResponse) -> None: if isinstance(message, bytes): @@ -84,7 +84,7 @@ def on_message(message: AgentV1SocketClientResponse) -> None: # In production, you would send audio from your microphone or audio source: # with open("audio.wav", "rb") as audio_file: # audio_data = audio_file.read() - # agent.send_agent_v_1_media(audio_data) + # agent.send_media(audio_data) agent.start_listening() @@ -92,7 +92,7 @@ def on_message(message: AgentV1SocketClientResponse) -> None: # from deepgram import AsyncDeepgramClient # async with client.agent.v1.connect() as agent: # # ... same configuration ... - # await agent.send_agent_v_1_settings(settings) + # await agent.send_settings(settings) # await agent.start_listening() except Exception as e: diff --git a/examples/12-text-intelligence.py b/examples/40-text-intelligence.py similarity index 100% rename from examples/12-text-intelligence.py rename to examples/40-text-intelligence.py diff --git a/examples/13-management-projects.py b/examples/50-management-projects.py similarity index 100% rename from examples/13-management-projects.py rename to examples/50-management-projects.py diff --git a/examples/14-management-keys.py b/examples/51-management-keys.py similarity index 100% rename from examples/14-management-keys.py rename to examples/51-management-keys.py diff --git a/examples/15-management-members.py b/examples/52-management-members.py similarity index 100% rename from examples/15-management-members.py rename to examples/52-management-members.py diff --git a/examples/16-management-invites.py b/examples/53-management-invites.py similarity index 100% rename from examples/16-management-invites.py rename to examples/53-management-invites.py diff --git a/examples/17-management-usage.py b/examples/54-management-usage.py similarity index 100% rename from examples/17-management-usage.py rename to examples/54-management-usage.py diff --git a/examples/18-management-billing.py b/examples/55-management-billing.py similarity index 100% rename from examples/18-management-billing.py rename to examples/55-management-billing.py diff --git a/examples/19-management-models.py b/examples/56-management-models.py similarity index 100% rename from examples/19-management-models.py rename to examples/56-management-models.py diff --git a/examples/20-onprem-credentials.py b/examples/60-onprem-credentials.py similarity index 100% rename from examples/20-onprem-credentials.py rename to examples/60-onprem-credentials.py diff --git a/examples/23-request-options.py b/examples/70-request-options.py similarity index 100% rename from examples/23-request-options.py rename to examples/70-request-options.py diff --git a/examples/24-error-handling.py b/examples/71-error-handling.py similarity index 100% rename from examples/24-error-handling.py rename to examples/71-error-handling.py diff --git a/examples/README.md b/examples/README.md index 4499527e..32e099e8 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,54 +1,54 @@ # Deepgram Python SDK Examples -This directory contains comprehensive examples demonstrating how to use the Deepgram Python SDK. These examples cover all major use cases and demonstrate production-ready patterns. +This directory contains comprehensive examples demonstrating how to use the Deepgram Python SDK. Examples are organized by feature area, with each section starting at a multiple of 10. ## Examples Overview -### Authentication +### 01-09: Authentication - **01-authentication-api-key.py** - API key authentication - **02-authentication-access-token.py** - Access token authentication -### Transcription +### 10-19: Transcription (Listen) -- **04-transcription-prerecorded-url.py** - Transcribe audio from URL -- **05-transcription-prerecorded-file.py** - Transcribe audio from local file -- **06-transcription-prerecorded-callback.py** - Async transcription with callbacks -- **07-transcription-live-websocket.py** - Live transcription via WebSocket (Listen V1) -- **22-transcription-advanced-options.py** - Advanced transcription options -- **26-transcription-live-websocket-v2.py** - Live transcription via WebSocket (Listen V2) +- **10-transcription-prerecorded-url.py** - Transcribe audio from URL +- **11-transcription-prerecorded-file.py** - Transcribe audio from local file +- **12-transcription-prerecorded-callback.py** - Async transcription with callbacks +- **13-transcription-live-websocket.py** - Live transcription via WebSocket (Listen V1) +- **14-transcription-live-websocket-v2.py** - Live transcription via WebSocket (Listen V2) +- **15-transcription-advanced-options.py** - Advanced transcription options -### Voice Agent +### 20-29: Text-to-Speech (Speak) -- **09-voice-agent.py** - Voice Agent configuration and usage +- **20-text-to-speech-single.py** - Single request TTS (REST API) +- **21-text-to-speech-streaming.py** - Streaming TTS via WebSocket -### Text-to-Speech +### 30-39: Voice Agent -- **10-text-to-speech-single.py** - Single request TTS -- **11-text-to-speech-streaming.py** - Streaming TTS via WebSocket +- **30-voice-agent.py** - Voice Agent configuration and usage -### Text Intelligence +### 40-49: Text Intelligence (Read) -- **12-text-intelligence.py** - Text analysis using AI features +- **40-text-intelligence.py** - Text analysis using AI features -### Management API +### 50-59: Management API -- **13-management-projects.py** - Project management (list, get, update, delete) -- **14-management-keys.py** - API key management (list, get, create, delete) -- **15-management-members.py** - Member management (list, remove, scopes) -- **16-management-invites.py** - Invitation management (list, send, delete, leave) -- **17-management-usage.py** - Usage statistics and request information -- **18-management-billing.py** - Billing and balance information -- **19-management-models.py** - Model information +- **50-management-projects.py** - Project management (list, get, update, delete) +- **51-management-keys.py** - API key management (list, get, create, delete) +- **52-management-members.py** - Member management (list, remove, scopes) +- **53-management-invites.py** - Invitation management (list, send, delete, leave) +- **54-management-usage.py** - Usage statistics and request information +- **55-management-billing.py** - Billing and balance information +- **56-management-models.py** - Model information -### On-Premises +### 60-69: On-Premises -- **20-onprem-credentials.py** - On-premises credentials management +- **60-onprem-credentials.py** - On-premises credentials management -### Configuration & Advanced +### 70-79: Configuration & Advanced -- **23-request-options.py** - Request options including additional query parameters -- **24-error-handling.py** - Error handling patterns +- **70-request-options.py** - Request options including additional query parameters +- **71-error-handling.py** - Error handling patterns ## Usage