diff --git a/CLAUDE.md b/CLAUDE.md index 4e9c2830..2ad07a8a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -58,6 +58,7 @@ return render_template('page.html', settings=public_settings) ## Version Management +- Its important to update the version at the end of every plan - Version is stored in `config.py`: `VERSION = "X.XXX.XXX"` - When incrementing, only change the third segment (e.g., `0.238.024` -> `0.238.025`) - Include the current version in functional test file headers and documentation files @@ -83,7 +84,7 @@ return render_template('page.html', settings=public_settings) ## Release Notes -After completing code changes, offer to update `docs/explanation/release_notes.md`. +After completing plans and code changes, offer to update `docs/explanation/release_notes.md`. - Add entries under the current version from `config.py` - If the version was bumped, create a new section at the top: `### **(vX.XXX.XXX)**` diff --git a/README.md b/README.md index 31ea020b..cffbeedc 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,7 @@ azd up - **Metadata Extraction (Optional)**: Apply an AI model (configurable GPT model via Admin Settings) to automatically generate keywords, two-sentence summaries, and infer author/date for uploaded documents. Allows manual override for richer search context. - **File Processing Logs (Optional)**: Enable verbose logging for all ingestion pipelines (workspaces and ephemeral chat uploads) to aid in debugging, monitoring, and auditing file processing steps. - **Redis Cache (Optional)**: Integrate Azure Cache for Redis to provide a distributed, high-performance session store. This enables true horizontal scaling and high availability by decoupling user sessions from individual app instances. +- **SQL Database Agents (Optional)**: Connect agents to Azure SQL or other SQL databases through configurable SQL Query and SQL Schema plugins. Database schema is automatically discovered and injected into agent instructions at load time, enabling agents to answer natural language questions by generating and executing SQL queries without requiring users to know table or column names. - **Authentication & RBAC**: Secure access via Azure Active Directory (Entra ID) using MSAL. Supports Managed Identities for Azure service authentication, group-based controls, and custom application roles (`Admin`, `User`, `CreateGroup`, `SafetyAdmin`, `FeedbackAdmin`). - **Supported File Types**: diff --git a/application/single_app/app.py b/application/single_app/app.py index 805137d4..594d245d 100644 --- a/application/single_app/app.py +++ b/application/single_app/app.py @@ -75,6 +75,7 @@ from route_backend_public_prompts import * from route_backend_user_agreement import register_route_backend_user_agreement from route_backend_conversation_export import register_route_backend_conversation_export +from route_backend_thoughts import register_route_backend_thoughts from route_backend_speech import register_route_backend_speech from route_backend_tts import register_route_backend_tts from route_enhanced_citations import register_enhanced_citations_routes @@ -657,6 +658,9 @@ def list_semantic_kernel_plugins(): # ------------------- API User Agreement Routes ---------- register_route_backend_user_agreement(app) +# ------------------- API Thoughts Routes ---------------- +register_route_backend_thoughts(app) + # ------------------- Extenral Health Routes ---------- register_route_external_health(app) diff --git a/application/single_app/config.py b/application/single_app/config.py index 08c0adf1..741d8b55 100644 --- a/application/single_app/config.py +++ b/application/single_app/config.py @@ -94,7 +94,7 @@ EXECUTOR_TYPE = 'thread' EXECUTOR_MAX_WORKERS = 30 SESSION_TYPE = 'filesystem' -VERSION = "0.239.005" +VERSION = "0.239.031" SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production') @@ -257,6 +257,8 @@ def get_redis_cache_infrastructure_endpoint(redis_hostname: str) -> str: storage_account_user_documents_container_name = "user-documents" storage_account_group_documents_container_name = "group-documents" storage_account_public_documents_container_name = "public-documents" +storage_account_personal_chat_container_name = "personal-chat" +storage_account_group_chat_container_name = "group-chat" # Initialize Azure Cosmos DB client cosmos_endpoint = os.getenv("AZURE_COSMOS_ENDPOINT") @@ -459,6 +461,18 @@ def get_redis_cache_infrastructure_endpoint(redis_hostname: str) -> str: default_ttl=-1 # TTL disabled by default, enabled per-document for auto-cleanup ) +cosmos_thoughts_container_name = "thoughts" +cosmos_thoughts_container = cosmos_database.create_container_if_not_exists( + id=cosmos_thoughts_container_name, + partition_key=PartitionKey(path="/user_id") +) + +cosmos_archived_thoughts_container_name = "archive_thoughts" +cosmos_archived_thoughts_container = cosmos_database.create_container_if_not_exists( + id=cosmos_archived_thoughts_container_name, + partition_key=PartitionKey(path="/user_id") +) + def ensure_custom_logo_file_exists(app, settings): """ If custom_logo_base64 or custom_logo_dark_base64 is present in settings, ensure the appropriate @@ -745,9 +759,11 @@ def initialize_clients(settings): # This addresses the issue where the application assumes containers exist if blob_service_client: for container_name in [ - storage_account_user_documents_container_name, - storage_account_group_documents_container_name, - storage_account_public_documents_container_name + storage_account_user_documents_container_name, + storage_account_group_documents_container_name, + storage_account_public_documents_container_name, + storage_account_personal_chat_container_name, + storage_account_group_chat_container_name ]: try: container_client = blob_service_client.get_container_client(container_name) diff --git a/application/single_app/functions_activity_logging.py b/application/single_app/functions_activity_logging.py index 2a653a47..efb6e780 100644 --- a/application/single_app/functions_activity_logging.py +++ b/application/single_app/functions_activity_logging.py @@ -1393,3 +1393,332 @@ def log_retention_policy_force_push( level=logging.ERROR ) debug_print(f"⚠️ Warning: Failed to log retention policy force push: {str(e)}") + + +# === AGENT & ACTION ACTIVITY LOGGING === + +def log_agent_creation( + user_id: str, + agent_id: str, + agent_name: str, + agent_display_name: Optional[str] = None, + scope: str = 'personal', + group_id: Optional[str] = None +) -> None: + """ + Log an agent creation activity. + + Args: + user_id: The ID of the user who created the agent + agent_id: The unique ID of the new agent + agent_name: The name of the agent + agent_display_name: The display name of the agent + scope: 'personal', 'group', or 'global' + group_id: The group ID (only for group scope) + """ + try: + activity_record = { + 'id': str(uuid.uuid4()), + 'user_id': user_id, + 'activity_type': 'agent_creation', + 'timestamp': datetime.utcnow().isoformat(), + 'created_at': datetime.utcnow().isoformat(), + 'entity_type': 'agent', + 'operation': 'create', + 'entity': { + 'id': agent_id, + 'name': agent_name, + 'display_name': agent_display_name or agent_name + }, + 'workspace_type': scope, + 'workspace_context': {} + } + if scope == 'group' and group_id: + activity_record['workspace_context']['group_id'] = group_id + + cosmos_activity_logs_container.create_item(body=activity_record) + log_event( + message=f"Agent created: {agent_name} ({scope}) by user {user_id}", + extra=activity_record, + level=logging.INFO + ) + debug_print(f"✅ Agent creation logged: {agent_name} ({scope})") + except Exception as e: + log_event( + message=f"Error logging agent creation: {str(e)}", + extra={'user_id': user_id, 'agent_id': agent_id, 'scope': scope, 'error': str(e)}, + level=logging.ERROR + ) + debug_print(f"⚠️ Warning: Failed to log agent creation: {str(e)}") + + +def log_agent_update( + user_id: str, + agent_id: str, + agent_name: str, + agent_display_name: Optional[str] = None, + scope: str = 'personal', + group_id: Optional[str] = None +) -> None: + """ + Log an agent update activity. + + Args: + user_id: The ID of the user who updated the agent + agent_id: The unique ID of the agent + agent_name: The name of the agent + agent_display_name: The display name of the agent + scope: 'personal', 'group', or 'global' + group_id: The group ID (only for group scope) + """ + try: + activity_record = { + 'id': str(uuid.uuid4()), + 'user_id': user_id, + 'activity_type': 'agent_update', + 'timestamp': datetime.utcnow().isoformat(), + 'created_at': datetime.utcnow().isoformat(), + 'entity_type': 'agent', + 'operation': 'update', + 'entity': { + 'id': agent_id, + 'name': agent_name, + 'display_name': agent_display_name or agent_name + }, + 'workspace_type': scope, + 'workspace_context': {} + } + if scope == 'group' and group_id: + activity_record['workspace_context']['group_id'] = group_id + + cosmos_activity_logs_container.create_item(body=activity_record) + log_event( + message=f"Agent updated: {agent_name} ({scope}) by user {user_id}", + extra=activity_record, + level=logging.INFO + ) + debug_print(f"✅ Agent update logged: {agent_name} ({scope})") + except Exception as e: + log_event( + message=f"Error logging agent update: {str(e)}", + extra={'user_id': user_id, 'agent_id': agent_id, 'scope': scope, 'error': str(e)}, + level=logging.ERROR + ) + debug_print(f"⚠️ Warning: Failed to log agent update: {str(e)}") + + +def log_agent_deletion( + user_id: str, + agent_id: str, + agent_name: str, + scope: str = 'personal', + group_id: Optional[str] = None +) -> None: + """ + Log an agent deletion activity. + + Args: + user_id: The ID of the user who deleted the agent + agent_id: The unique ID of the agent + agent_name: The name of the agent + scope: 'personal', 'group', or 'global' + group_id: The group ID (only for group scope) + """ + try: + activity_record = { + 'id': str(uuid.uuid4()), + 'user_id': user_id, + 'activity_type': 'agent_deletion', + 'timestamp': datetime.utcnow().isoformat(), + 'created_at': datetime.utcnow().isoformat(), + 'entity_type': 'agent', + 'operation': 'delete', + 'entity': { + 'id': agent_id, + 'name': agent_name + }, + 'workspace_type': scope, + 'workspace_context': {} + } + if scope == 'group' and group_id: + activity_record['workspace_context']['group_id'] = group_id + + cosmos_activity_logs_container.create_item(body=activity_record) + log_event( + message=f"Agent deleted: {agent_name} ({scope}) by user {user_id}", + extra=activity_record, + level=logging.INFO + ) + debug_print(f"✅ Agent deletion logged: {agent_name} ({scope})") + except Exception as e: + log_event( + message=f"Error logging agent deletion: {str(e)}", + extra={'user_id': user_id, 'agent_id': agent_id, 'scope': scope, 'error': str(e)}, + level=logging.ERROR + ) + debug_print(f"⚠️ Warning: Failed to log agent deletion: {str(e)}") + + +def log_action_creation( + user_id: str, + action_id: str, + action_name: str, + action_type: Optional[str] = None, + scope: str = 'personal', + group_id: Optional[str] = None +) -> None: + """ + Log an action/plugin creation activity. + + Args: + user_id: The ID of the user who created the action + action_id: The unique ID of the new action + action_name: The name of the action + action_type: The type of the action (e.g., 'openapi', 'sql_query') + scope: 'personal', 'group', or 'global' + group_id: The group ID (only for group scope) + """ + try: + activity_record = { + 'id': str(uuid.uuid4()), + 'user_id': user_id, + 'activity_type': 'action_creation', + 'timestamp': datetime.utcnow().isoformat(), + 'created_at': datetime.utcnow().isoformat(), + 'entity_type': 'action', + 'operation': 'create', + 'entity': { + 'id': action_id, + 'name': action_name, + 'type': action_type + }, + 'workspace_type': scope, + 'workspace_context': {} + } + if scope == 'group' and group_id: + activity_record['workspace_context']['group_id'] = group_id + + cosmos_activity_logs_container.create_item(body=activity_record) + log_event( + message=f"Action created: {action_name} ({scope}) by user {user_id}", + extra=activity_record, + level=logging.INFO + ) + debug_print(f"✅ Action creation logged: {action_name} ({scope})") + except Exception as e: + log_event( + message=f"Error logging action creation: {str(e)}", + extra={'user_id': user_id, 'action_id': action_id, 'scope': scope, 'error': str(e)}, + level=logging.ERROR + ) + debug_print(f"⚠️ Warning: Failed to log action creation: {str(e)}") + + +def log_action_update( + user_id: str, + action_id: str, + action_name: str, + action_type: Optional[str] = None, + scope: str = 'personal', + group_id: Optional[str] = None +) -> None: + """ + Log an action/plugin update activity. + + Args: + user_id: The ID of the user who updated the action + action_id: The unique ID of the action + action_name: The name of the action + action_type: The type of the action + scope: 'personal', 'group', or 'global' + group_id: The group ID (only for group scope) + """ + try: + activity_record = { + 'id': str(uuid.uuid4()), + 'user_id': user_id, + 'activity_type': 'action_update', + 'timestamp': datetime.utcnow().isoformat(), + 'created_at': datetime.utcnow().isoformat(), + 'entity_type': 'action', + 'operation': 'update', + 'entity': { + 'id': action_id, + 'name': action_name, + 'type': action_type + }, + 'workspace_type': scope, + 'workspace_context': {} + } + if scope == 'group' and group_id: + activity_record['workspace_context']['group_id'] = group_id + + cosmos_activity_logs_container.create_item(body=activity_record) + log_event( + message=f"Action updated: {action_name} ({scope}) by user {user_id}", + extra=activity_record, + level=logging.INFO + ) + debug_print(f"✅ Action update logged: {action_name} ({scope})") + except Exception as e: + log_event( + message=f"Error logging action update: {str(e)}", + extra={'user_id': user_id, 'action_id': action_id, 'scope': scope, 'error': str(e)}, + level=logging.ERROR + ) + debug_print(f"⚠️ Warning: Failed to log action update: {str(e)}") + + +def log_action_deletion( + user_id: str, + action_id: str, + action_name: str, + action_type: Optional[str] = None, + scope: str = 'personal', + group_id: Optional[str] = None +) -> None: + """ + Log an action/plugin deletion activity. + + Args: + user_id: The ID of the user who deleted the action + action_id: The unique ID of the action + action_name: The name of the action + action_type: The type of the action + scope: 'personal', 'group', or 'global' + group_id: The group ID (only for group scope) + """ + try: + activity_record = { + 'id': str(uuid.uuid4()), + 'user_id': user_id, + 'activity_type': 'action_deletion', + 'timestamp': datetime.utcnow().isoformat(), + 'created_at': datetime.utcnow().isoformat(), + 'entity_type': 'action', + 'operation': 'delete', + 'entity': { + 'id': action_id, + 'name': action_name, + 'type': action_type + }, + 'workspace_type': scope, + 'workspace_context': {} + } + if scope == 'group' and group_id: + activity_record['workspace_context']['group_id'] = group_id + + cosmos_activity_logs_container.create_item(body=activity_record) + log_event( + message=f"Action deleted: {action_name} ({scope}) by user {user_id}", + extra=activity_record, + level=logging.INFO + ) + debug_print(f"✅ Action deletion logged: {action_name} ({scope})") + except Exception as e: + log_event( + message=f"Error logging action deletion: {str(e)}", + extra={'user_id': user_id, 'action_id': action_id, 'scope': scope, 'error': str(e)}, + level=logging.ERROR + ) + debug_print(f"⚠️ Warning: Failed to log action deletion: {str(e)}") diff --git a/application/single_app/functions_content.py b/application/single_app/functions_content.py index 376d23f4..3116ed82 100644 --- a/application/single_app/functions_content.py +++ b/application/single_app/functions_content.py @@ -352,7 +352,7 @@ def generate_embedding( embedding_model = selected_embedding_model['deploymentName'] while True: - random_delay = random.uniform(0.5, 2.0) + random_delay = random.uniform(0.05, 0.2) time.sleep(random_delay) try: @@ -385,3 +385,102 @@ def generate_embedding( except Exception as e: raise + +def generate_embeddings_batch( + texts, + batch_size=16, + max_retries=5, + initial_delay=1.0, + delay_multiplier=2.0 +): + """Generate embeddings for multiple texts in batches. + + Azure OpenAI embeddings API accepts a list of strings as input. + This reduces per-call overhead and delay significantly. + + Args: + texts: List of text strings to embed. + batch_size: Number of texts per API call (default 16). + max_retries: Max retries on rate limit errors. + initial_delay: Initial retry delay in seconds. + delay_multiplier: Multiplier for exponential backoff. + + Returns: + list of (embedding, token_usage) tuples, one per input text. + """ + settings = get_settings() + + enable_embedding_apim = settings.get('enable_embedding_apim', False) + + if enable_embedding_apim: + embedding_model = settings.get('azure_apim_embedding_deployment') + embedding_client = AzureOpenAI( + api_version=settings.get('azure_apim_embedding_api_version'), + azure_endpoint=settings.get('azure_apim_embedding_endpoint'), + api_key=settings.get('azure_apim_embedding_subscription_key')) + else: + if (settings.get('azure_openai_embedding_authentication_type') == 'managed_identity'): + token_provider = get_bearer_token_provider(DefaultAzureCredential(), cognitive_services_scope) + + embedding_client = AzureOpenAI( + api_version=settings.get('azure_openai_embedding_api_version'), + azure_endpoint=settings.get('azure_openai_embedding_endpoint'), + azure_ad_token_provider=token_provider + ) + + embedding_model_obj = settings.get('embedding_model', {}) + if embedding_model_obj and embedding_model_obj.get('selected'): + selected_embedding_model = embedding_model_obj['selected'][0] + embedding_model = selected_embedding_model['deploymentName'] + else: + embedding_client = AzureOpenAI( + api_version=settings.get('azure_openai_embedding_api_version'), + azure_endpoint=settings.get('azure_openai_embedding_endpoint'), + api_key=settings.get('azure_openai_embedding_key') + ) + + embedding_model_obj = settings.get('embedding_model', {}) + if embedding_model_obj and embedding_model_obj.get('selected'): + selected_embedding_model = embedding_model_obj['selected'][0] + embedding_model = selected_embedding_model['deploymentName'] + + results = [] + for i in range(0, len(texts), batch_size): + batch = texts[i:i + batch_size] + retries = 0 + current_delay = initial_delay + + while True: + random_delay = random.uniform(0.05, 0.2) + time.sleep(random_delay) + + try: + response = embedding_client.embeddings.create( + model=embedding_model, + input=batch + ) + + for item in response.data: + token_usage = None + if hasattr(response, 'usage') and response.usage: + token_usage = { + 'prompt_tokens': response.usage.prompt_tokens // len(batch), + 'total_tokens': response.usage.total_tokens // len(batch), + 'model_deployment_name': embedding_model + } + results.append((item.embedding, token_usage)) + break + + except RateLimitError as e: + retries += 1 + if retries > max_retries: + raise + + wait_time = current_delay * random.uniform(1.0, 1.5) + time.sleep(wait_time) + current_delay *= delay_multiplier + + except Exception as e: + raise + + return results diff --git a/application/single_app/functions_documents.py b/application/single_app/functions_documents.py index ce08066d..110afbd2 100644 --- a/application/single_app/functions_documents.py +++ b/application/single_app/functions_documents.py @@ -1646,6 +1646,191 @@ def save_chunks(page_text_content, page_number, file_name, user_id, document_id, # Return token usage information for accumulation return token_usage +def save_chunks_batch(chunks_data, user_id, document_id, group_id=None, public_workspace_id=None): + """ + Save multiple chunks at once using batch embedding and batch AI Search upload. + Significantly faster than calling save_chunks() per chunk. + + Args: + chunks_data: list of dicts with keys: page_text_content, page_number, file_name + user_id: The user ID + document_id: The document ID + group_id: Optional group ID for group documents + public_workspace_id: Optional public workspace ID for public documents + + Returns: + dict with 'total_tokens', 'prompt_tokens', 'model_deployment_name' + """ + from functions_content import generate_embeddings_batch + + current_time = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') + is_group = group_id is not None + is_public_workspace = public_workspace_id is not None + + # Retrieve metadata once for all chunks + try: + if is_public_workspace: + metadata = get_document_metadata( + document_id=document_id, + user_id=user_id, + public_workspace_id=public_workspace_id + ) + elif is_group: + metadata = get_document_metadata( + document_id=document_id, + user_id=user_id, + group_id=group_id + ) + else: + metadata = get_document_metadata( + document_id=document_id, + user_id=user_id + ) + + if not metadata: + raise ValueError(f"No metadata found for document {document_id}") + + version = metadata.get("version") if metadata.get("version") else 1 + except Exception as e: + log_event(f"[save_chunks_batch] Error retrieving metadata for document {document_id}: {repr(e)}", level=logging.ERROR) + raise + + # Generate all embeddings in batches + texts = [c['page_text_content'] for c in chunks_data] + try: + embedding_results = generate_embeddings_batch(texts) + except Exception as e: + log_event(f"[save_chunks_batch] Error generating batch embeddings for document {document_id}: {e}", level=logging.ERROR) + raise + + # Check for vision analysis once + vision_analysis = metadata.get('vision_analysis') + vision_text = "" + if vision_analysis: + vision_text_parts = [] + vision_text_parts.append("\n\n=== AI Vision Analysis ===") + vision_text_parts.append(f"Model: {vision_analysis.get('model', 'unknown')}") + if vision_analysis.get('description'): + vision_text_parts.append(f"\nDescription: {vision_analysis['description']}") + if vision_analysis.get('objects'): + objects_list = vision_analysis['objects'] + if isinstance(objects_list, list): + vision_text_parts.append(f"\nObjects Detected: {', '.join(objects_list)}") + else: + vision_text_parts.append(f"\nObjects Detected: {objects_list}") + if vision_analysis.get('text'): + vision_text_parts.append(f"\nVisible Text: {vision_analysis['text']}") + if vision_analysis.get('analysis'): + vision_text_parts.append(f"\nContextual Analysis: {vision_analysis['analysis']}") + vision_text = "\n".join(vision_text_parts) + + # Build all chunk documents + chunk_documents = [] + total_token_usage = {'total_tokens': 0, 'prompt_tokens': 0, 'model_deployment_name': None} + + for idx, chunk_info in enumerate(chunks_data): + embedding, token_usage = embedding_results[idx] + page_number = chunk_info['page_number'] + file_name = chunk_info['file_name'] + page_text_content = chunk_info['page_text_content'] + + if token_usage: + total_token_usage['total_tokens'] += token_usage.get('total_tokens', 0) + total_token_usage['prompt_tokens'] += token_usage.get('prompt_tokens', 0) + if not total_token_usage['model_deployment_name']: + total_token_usage['model_deployment_name'] = token_usage.get('model_deployment_name') + + chunk_id = f"{document_id}_{page_number}" + enhanced_chunk_text = page_text_content + vision_text if vision_text else page_text_content + + if is_public_workspace: + chunk_document = { + "id": chunk_id, + "document_id": document_id, + "chunk_id": str(page_number), + "chunk_text": enhanced_chunk_text, + "embedding": embedding, + "file_name": file_name, + "chunk_keywords": [], + "chunk_summary": "", + "page_number": page_number, + "author": [], + "title": "", + "document_classification": "None", + "document_tags": metadata.get('tags', []), + "chunk_sequence": page_number, + "upload_date": current_time, + "version": version, + "public_workspace_id": public_workspace_id + } + elif is_group: + shared_group_ids = metadata.get('shared_group_ids', []) if metadata else [] + chunk_document = { + "id": chunk_id, + "document_id": document_id, + "chunk_id": str(page_number), + "chunk_text": enhanced_chunk_text, + "embedding": embedding, + "file_name": file_name, + "chunk_keywords": [], + "chunk_summary": "", + "page_number": page_number, + "author": [], + "title": "", + "document_classification": "None", + "document_tags": metadata.get('tags', []), + "chunk_sequence": page_number, + "upload_date": current_time, + "version": version, + "group_id": group_id, + "shared_group_ids": shared_group_ids + } + else: + shared_user_ids = metadata.get('shared_user_ids', []) if metadata else [] + chunk_document = { + "id": chunk_id, + "document_id": document_id, + "chunk_id": str(page_number), + "chunk_text": enhanced_chunk_text, + "embedding": embedding, + "file_name": file_name, + "chunk_keywords": [], + "chunk_summary": "", + "page_number": page_number, + "author": [], + "title": "", + "document_classification": "None", + "document_tags": metadata.get('tags', []), + "chunk_sequence": page_number, + "upload_date": current_time, + "version": version, + "user_id": user_id, + "shared_user_ids": shared_user_ids + } + + chunk_documents.append(chunk_document) + + # Batch upload to AI Search + try: + if is_public_workspace: + search_client = CLIENTS["search_client_public"] + elif is_group: + search_client = CLIENTS["search_client_group"] + else: + search_client = CLIENTS["search_client_user"] + + # Upload in sub-batches of 32 to avoid request size limits + upload_batch_size = 32 + for i in range(0, len(chunk_documents), upload_batch_size): + sub_batch = chunk_documents[i:i + upload_batch_size] + search_client.upload_documents(documents=sub_batch) + + except Exception as e: + log_event(f"[save_chunks_batch] Error uploading batch to AI Search for document {document_id}: {e}", level=logging.ERROR) + raise + + return total_token_usage + def get_document_metadata_for_citations(document_id, user_id=None, group_id=None, public_workspace_id=None): """ Retrieve keywords and abstract from a document for creating metadata citations. @@ -4669,37 +4854,30 @@ def process_single_tabular_sheet(df, document_id, user_id, file_name, update_cal # Consider accumulating page count in the caller if needed. update_callback(number_of_pages=num_chunks_final) - # Save chunks, prepending the header to each + # Save chunks, prepending the header to each — use batch processing for speed + all_chunks = [] for idx, chunk_rows_content in enumerate(final_chunks_content, start=1): - # Prepend header - header length does not count towards chunk size limit chunk_with_header = header_string + chunk_rows_content - - update_callback( - current_file_chunk=idx, - status=f"Saving chunk {idx}/{num_chunks_final} from {file_name}..." - ) - - args = { + all_chunks.append({ "page_text_content": chunk_with_header, "page_number": idx, - "file_name": file_name, - "user_id": user_id, - "document_id": document_id - } + "file_name": file_name + }) - if is_public_workspace: - args["public_workspace_id"] = public_workspace_id - elif is_group: - args["group_id"] = group_id + if all_chunks: + update_callback( + current_file_chunk=1, + status=f"Batch processing {num_chunks_final} chunks from {file_name}..." + ) - token_usage = save_chunks(**args) - total_chunks_saved += 1 - - # Accumulate embedding tokens - if token_usage: - total_embedding_tokens += token_usage.get('total_tokens', 0) - if not embedding_model_name: - embedding_model_name = token_usage.get('model_deployment_name') + batch_token_usage = save_chunks_batch( + all_chunks, user_id, document_id, + group_id=group_id, public_workspace_id=public_workspace_id + ) + total_chunks_saved = len(all_chunks) + if batch_token_usage: + total_embedding_tokens = batch_token_usage.get('total_tokens', 0) + embedding_model_name = batch_token_usage.get('model_deployment_name') return total_chunks_saved, total_embedding_tokens, embedding_model_name @@ -4729,63 +4907,75 @@ def process_tabular(document_id, user_id, temp_file_path, original_filename, fil args["group_id"] = group_id upload_to_blob(**args) + update_callback(enhanced_citations=True, status=f"Enhanced citations enabled for {file_ext}") - try: - if file_ext == '.csv': - # Process CSV - # Read CSV, attempt to infer header, keep data as string initially - df = pandas.read_csv( - temp_file_path, - keep_default_na=False, - dtype=str + # When enhanced citations is on, index a single schema summary chunk + # instead of row-by-row chunking. The tabular processing plugin handles analysis. + if enable_enhanced_citations: + try: + if file_ext == '.csv': + df_preview = pandas.read_csv(temp_file_path, keep_default_na=False, dtype=str, nrows=5) + full_df = pandas.read_csv(temp_file_path, keep_default_na=False, dtype=str) + elif file_ext in ('.xlsx', '.xls', '.xlsm'): + engine = 'openpyxl' if file_ext in ('.xlsx', '.xlsm') else 'xlrd' + df_preview = pandas.read_excel(temp_file_path, engine=engine, keep_default_na=False, dtype=str, nrows=5) + full_df = pandas.read_excel(temp_file_path, engine=engine, keep_default_na=False, dtype=str) + else: + raise ValueError(f"Unsupported tabular file type: {file_ext}") + + row_count = len(full_df) + columns = list(df_preview.columns) + preview_rows = df_preview.head(5).to_string(index=False) + + schema_summary = ( + f"Tabular data file: {original_filename}\n" + f"Columns ({len(columns)}): {', '.join(columns)}\n" + f"Total rows: {row_count}\n" + f"Preview (first 5 rows):\n{preview_rows}\n\n" + f"This file is available for detailed analysis via the Tabular Processing plugin." ) - args = { - "df": df, - "document_id": document_id, - "user_id": user_id, + + update_callback(number_of_pages=1, status=f"Indexing schema summary for {original_filename}...") + + save_args = { + "page_text_content": schema_summary, + "page_number": 1, "file_name": original_filename, - "update_callback": update_callback + "user_id": user_id, + "document_id": document_id } - if is_public_workspace: - args["public_workspace_id"] = public_workspace_id + save_args["public_workspace_id"] = public_workspace_id elif is_group: - args["group_id"] = group_id + save_args["group_id"] = group_id - result = process_single_tabular_sheet(**args) - if isinstance(result, tuple) and len(result) == 3: - chunks, tokens, model = result - total_chunks_saved = chunks - total_embedding_tokens += tokens - if not embedding_model_name: - embedding_model_name = model - else: - total_chunks_saved = result - - elif file_ext in ('.xlsx', '.xls', '.xlsm'): - # Process Excel (potentially multiple sheets) - excel_file = pandas.ExcelFile( - temp_file_path, - engine='openpyxl' if file_ext in ('.xlsx', '.xlsm') else 'xlrd' - ) - sheet_names = excel_file.sheet_names - base_name, ext = os.path.splitext(original_filename) - - accumulated_total_chunks = 0 - for sheet_name in sheet_names: - update_callback(status=f"Processing sheet '{sheet_name}'...") - # Read specific sheet, get values (not formulas), keep data as string - # Note: pandas typically reads values, not formulas by default. - df = excel_file.parse(sheet_name, keep_default_na=False, dtype=str) + token_usage = save_chunks(**save_args) + total_chunks_saved = 1 + if token_usage: + total_embedding_tokens = token_usage.get('total_tokens', 0) + embedding_model_name = token_usage.get('model_deployment_name') - # Create effective filename for this sheet - effective_filename = f"{base_name}-{sheet_name}{ext}" if len(sheet_names) > 1 else original_filename + # Don't return here — fall through to metadata extraction below + except Exception as e: + log_event(f"[process_tabular] Error creating schema summary, falling back to row-by-row: {e}", level=logging.WARNING) + # Fall through to existing row-by-row processing + # Only do row-by-row chunking if schema-only didn't produce chunks + if total_chunks_saved == 0: + try: + if file_ext == '.csv': + # Process CSV + # Read CSV, attempt to infer header, keep data as string initially + df = pandas.read_csv( + temp_file_path, + keep_default_na=False, + dtype=str + ) args = { "df": df, "document_id": document_id, "user_id": user_id, - "file_name": effective_filename, + "file_name": original_filename, "update_callback": update_callback } @@ -4797,21 +4987,62 @@ def process_tabular(document_id, user_id, temp_file_path, original_filename, fil result = process_single_tabular_sheet(**args) if isinstance(result, tuple) and len(result) == 3: chunks, tokens, model = result - accumulated_total_chunks += chunks + total_chunks_saved = chunks total_embedding_tokens += tokens if not embedding_model_name: embedding_model_name = model else: - accumulated_total_chunks += result + total_chunks_saved = result - total_chunks_saved = accumulated_total_chunks # Total across all sheets + elif file_ext in ('.xlsx', '.xls', '.xlsm'): + # Process Excel (potentially multiple sheets) + excel_file = pandas.ExcelFile( + temp_file_path, + engine='openpyxl' if file_ext in ('.xlsx', '.xlsm') else 'xlrd' + ) + sheet_names = excel_file.sheet_names + base_name, ext = os.path.splitext(original_filename) + accumulated_total_chunks = 0 + for sheet_name in sheet_names: + update_callback(status=f"Processing sheet '{sheet_name}'...") + # Read specific sheet, get values (not formulas), keep data as string + # Note: pandas typically reads values, not formulas by default. + df = excel_file.parse(sheet_name, keep_default_na=False, dtype=str) - except pandas.errors.EmptyDataError: - print(f"Warning: Tabular file or sheet is empty: {original_filename}") - update_callback(status=f"Warning: File/sheet is empty - {original_filename}", number_of_pages=0) - except Exception as e: - raise Exception(f"Failed processing Tabular file {original_filename}: {e}") + # Create effective filename for this sheet + effective_filename = f"{base_name}-{sheet_name}{ext}" if len(sheet_names) > 1 else original_filename + + args = { + "df": df, + "document_id": document_id, + "user_id": user_id, + "file_name": effective_filename, + "update_callback": update_callback + } + + if is_public_workspace: + args["public_workspace_id"] = public_workspace_id + elif is_group: + args["group_id"] = group_id + + result = process_single_tabular_sheet(**args) + if isinstance(result, tuple) and len(result) == 3: + chunks, tokens, model = result + accumulated_total_chunks += chunks + total_embedding_tokens += tokens + if not embedding_model_name: + embedding_model_name = model + else: + accumulated_total_chunks += result + + total_chunks_saved = accumulated_total_chunks # Total across all sheets + + except pandas.errors.EmptyDataError: + log_event(f"[process_tabular] Warning: Tabular file or sheet is empty: {original_filename}", level=logging.WARNING) + update_callback(status=f"Warning: File/sheet is empty - {original_filename}", number_of_pages=0) + except Exception as e: + raise Exception(f"Failed processing Tabular file {original_filename}: {e}") # Extract metadata if enabled and chunks were processed settings = get_settings() diff --git a/application/single_app/functions_global_actions.py b/application/single_app/functions_global_actions.py index 91f0d9f9..4d7293cd 100644 --- a/application/single_app/functions_global_actions.py +++ b/application/single_app/functions_global_actions.py @@ -60,12 +60,13 @@ def get_global_action(action_id, return_type=SecretReturnType.TRIGGER): return None -def save_global_action(action_data): +def save_global_action(action_data, user_id=None): """ Save or update a global action. Args: action_data (dict): Action data to save + user_id (str, optional): The user ID of the person performing the action Returns: dict: Saved action data or None if failed @@ -76,8 +77,27 @@ def save_global_action(action_data): action_data['id'] = str(uuid.uuid4()) # Add metadata action_data['is_global'] = True - action_data['created_at'] = datetime.utcnow().isoformat() - action_data['updated_at'] = datetime.utcnow().isoformat() + now = datetime.utcnow().isoformat() + + # Check if this is a new action or an update to preserve created_by/created_at + existing_action = None + try: + existing_action = cosmos_global_actions_container.read_item( + item=action_data['id'], + partition_key=action_data['id'] + ) + except Exception: + pass + + if existing_action: + action_data['created_by'] = existing_action.get('created_by', user_id) + action_data['created_at'] = existing_action.get('created_at', now) + else: + action_data['created_by'] = user_id + action_data['created_at'] = now + action_data['modified_by'] = user_id + action_data['modified_at'] = now + action_data['updated_at'] = now print(f"💾 Saving global action: {action_data.get('name', 'Unknown')}") # Store secrets in Key Vault before upsert action_data = keyvault_plugin_save_helper(action_data, scope_value=action_data.get('id'), scope="global") diff --git a/application/single_app/functions_global_agents.py b/application/single_app/functions_global_agents.py index 5cf6a3d4..87976510 100644 --- a/application/single_app/functions_global_agents.py +++ b/application/single_app/functions_global_agents.py @@ -163,25 +163,46 @@ def get_global_agent(agent_id): return None -def save_global_agent(agent_data): +def save_global_agent(agent_data, user_id=None): """ Save or update a global agent. Args: agent_data (dict): Agent data to save + user_id (str, optional): The user ID of the person performing the action Returns: dict: Saved agent data or None if failed """ try: - user_id = get_current_user_id() + if user_id is None: + user_id = get_current_user_id() cleaned_agent = sanitize_agent_payload(agent_data) if 'id' not in cleaned_agent: cleaned_agent['id'] = str(uuid.uuid4()) cleaned_agent['is_global'] = True cleaned_agent['is_group'] = False - cleaned_agent['created_at'] = datetime.utcnow().isoformat() - cleaned_agent['updated_at'] = datetime.utcnow().isoformat() + now = datetime.utcnow().isoformat() + + # Check if this is a new agent or an update to preserve created_by/created_at + existing_agent = None + try: + existing_agent = cosmos_global_agents_container.read_item( + item=cleaned_agent['id'], + partition_key=cleaned_agent['id'] + ) + except Exception: + pass + + if existing_agent: + cleaned_agent['created_by'] = existing_agent.get('created_by', user_id) + cleaned_agent['created_at'] = existing_agent.get('created_at', now) + else: + cleaned_agent['created_by'] = user_id + cleaned_agent['created_at'] = now + cleaned_agent['modified_by'] = user_id + cleaned_agent['modified_at'] = now + cleaned_agent['updated_at'] = now log_event( "Saving global agent.", extra={"agent_name": cleaned_agent.get('name', 'Unknown')}, diff --git a/application/single_app/functions_group_actions.py b/application/single_app/functions_group_actions.py index bc6aa4ea..c0d264b1 100644 --- a/application/single_app/functions_group_actions.py +++ b/application/single_app/functions_group_actions.py @@ -82,14 +82,36 @@ def get_group_action( return _clean_action(action, group_id, return_type) -def save_group_action(group_id: str, action_data: Dict[str, Any]) -> Dict[str, Any]: +def save_group_action(group_id: str, action_data: Dict[str, Any], user_id: Optional[str] = None) -> Dict[str, Any]: """Create or update a group action entry.""" payload = dict(action_data) action_id = payload.get("id") or str(uuid.uuid4()) payload["id"] = action_id payload["group_id"] = group_id - payload["last_updated"] = datetime.utcnow().isoformat() + now = datetime.utcnow().isoformat() + payload["last_updated"] = now + + # Track who created/modified this action + existing_action = None + try: + existing_action = cosmos_group_actions_container.read_item( + item=action_id, + partition_key=group_id, + ) + except exceptions.CosmosResourceNotFoundError: + pass + except Exception: + pass + + if existing_action: + payload["created_by"] = existing_action.get("created_by", user_id) + payload["created_at"] = existing_action.get("created_at", now) + else: + payload["created_by"] = user_id + payload["created_at"] = now + payload["modified_by"] = user_id + payload["modified_at"] = now payload.setdefault("name", "") payload.setdefault("displayName", payload.get("name", "")) diff --git a/application/single_app/functions_group_agents.py b/application/single_app/functions_group_agents.py index 8bf6f87c..7cbb8324 100644 --- a/application/single_app/functions_group_agents.py +++ b/application/single_app/functions_group_agents.py @@ -63,16 +63,38 @@ def get_group_agent(group_id: str, agent_id: str) -> Optional[Dict[str, Any]]: return None -def save_group_agent(group_id: str, agent_data: Dict[str, Any]) -> Dict[str, Any]: +def save_group_agent(group_id: str, agent_data: Dict[str, Any], user_id: Optional[str] = None) -> Dict[str, Any]: """Create or update a group agent entry.""" payload = sanitize_agent_payload(agent_data) agent_id = payload.get("id") or str(uuid.uuid4()) payload["id"] = agent_id payload["group_id"] = group_id - payload["last_updated"] = datetime.utcnow().isoformat() + now = datetime.utcnow().isoformat() + payload["last_updated"] = now payload["is_global"] = False payload["is_group"] = True + # Track who created/modified this agent + existing_agent = None + try: + existing_agent = cosmos_group_agents_container.read_item( + item=agent_id, + partition_key=group_id, + ) + except exceptions.CosmosResourceNotFoundError: + pass + except Exception: + pass + + if existing_agent: + payload["created_by"] = existing_agent.get("created_by", user_id) + payload["created_at"] = existing_agent.get("created_at", now) + else: + payload["created_by"] = user_id + payload["created_at"] = now + payload["modified_by"] = user_id + payload["modified_at"] = now + # Required/defaulted fields payload.setdefault("name", "") payload.setdefault("display_name", payload.get("name", "")) diff --git a/application/single_app/functions_personal_actions.py b/application/single_app/functions_personal_actions.py index 6345438e..91d849f3 100644 --- a/application/single_app/functions_personal_actions.py +++ b/application/single_app/functions_personal_actions.py @@ -113,15 +113,26 @@ def save_personal_action(user_id, action_data): existing_action = get_personal_action(user_id, action_data['name']) # Preserve existing ID if updating, or generate new ID if creating + now = datetime.utcnow().isoformat() if existing_action: - # Update existing action - preserve the original ID + # Update existing action - preserve the original ID and creation tracking action_data['id'] = existing_action['id'] + action_data['created_by'] = existing_action.get('created_by', user_id) + action_data['created_at'] = existing_action.get('created_at', now) elif 'id' not in action_data or not action_data['id']: # New action - generate UUID for ID action_data['id'] = str(uuid.uuid4()) - + action_data['created_by'] = user_id + action_data['created_at'] = now + else: + # Has an ID but no existing action found - treat as new + action_data['created_by'] = user_id + action_data['created_at'] = now + action_data['modified_by'] = user_id + action_data['modified_at'] = now + action_data['user_id'] = user_id - action_data['last_updated'] = datetime.utcnow().isoformat() + action_data['last_updated'] = now # Validate required fields required_fields = ['name', 'displayName', 'type', 'description'] diff --git a/application/single_app/functions_personal_agents.py b/application/single_app/functions_personal_agents.py index a4a5e47d..3c6c275e 100644 --- a/application/single_app/functions_personal_agents.py +++ b/application/single_app/functions_personal_agents.py @@ -128,9 +128,33 @@ def save_personal_agent(user_id, agent_data): cleaned_agent.setdefault(field, '') if 'id' not in cleaned_agent: cleaned_agent['id'] = str(f"{user_id}_{cleaned_agent.get('name', 'default')}") - + + # Check if this is a new agent or an update to preserve created_by/created_at + existing_agent = None + try: + existing_agent = cosmos_personal_agents_container.read_item( + item=cleaned_agent['id'], + partition_key=user_id + ) + except exceptions.CosmosResourceNotFoundError: + pass + except Exception: + pass + + now = datetime.utcnow().isoformat() + if existing_agent: + # Preserve original creation tracking + cleaned_agent['created_by'] = existing_agent.get('created_by', user_id) + cleaned_agent['created_at'] = existing_agent.get('created_at', now) + else: + # New agent + cleaned_agent['created_by'] = user_id + cleaned_agent['created_at'] = now + cleaned_agent['modified_by'] = user_id + cleaned_agent['modified_at'] = now + cleaned_agent['user_id'] = user_id - cleaned_agent['last_updated'] = datetime.utcnow().isoformat() + cleaned_agent['last_updated'] = now cleaned_agent['is_global'] = False cleaned_agent['is_group'] = False diff --git a/application/single_app/functions_settings.py b/application/single_app/functions_settings.py index 8176939d..f3dc59de 100644 --- a/application/single_app/functions_settings.py +++ b/application/single_app/functions_settings.py @@ -25,6 +25,7 @@ def get_settings(use_cosmos=False): 'enable_text_plugin': True, 'enable_default_embedding_model_plugin': False, 'enable_fact_memory_plugin': True, + 'enable_tabular_processing_plugin': False, 'enable_multi_agent_orchestration': False, 'max_rounds_per_agent': 1, 'enable_semantic_kernel': False, @@ -205,6 +206,9 @@ def get_settings(use_cosmos=False): 'require_member_of_feedback_admin': False, 'enable_conversation_archiving': False, + # Processing Thoughts + 'enable_thoughts': False, + # Search and Extract 'azure_ai_search_endpoint': '', 'azure_ai_search_key': '', @@ -391,6 +395,9 @@ def update_settings(new_settings): # always fetch the latest settings doc, which includes your merges settings_item = get_settings() settings_item.update(new_settings) + # Dependency enforcement: tabular processing requires enhanced citations + if not settings_item.get('enable_enhanced_citations', False): + settings_item['enable_tabular_processing_plugin'] = False cosmos_settings_container.upsert_item(settings_item) cache_updater = getattr(app_settings_cache, "update_settings_cache", None) if callable(cache_updater): diff --git a/application/single_app/functions_thoughts.py b/application/single_app/functions_thoughts.py new file mode 100644 index 00000000..c6ffe9dd --- /dev/null +++ b/application/single_app/functions_thoughts.py @@ -0,0 +1,256 @@ +# functions_thoughts.py + +import uuid +import time +from datetime import datetime, timezone +from config import cosmos_thoughts_container, cosmos_archived_thoughts_container, cosmos_messages_container +from functions_appinsights import log_event +from functions_settings import get_settings + + +class ThoughtTracker: + """Stateful per-request tracker that writes processing step records to Cosmos DB. + + Each add_thought() call immediately upserts a document so that polling + clients can see partial progress before the final response is sent. + + All Cosmos writes are wrapped in try/except so thought errors never + interrupt the chat processing flow. + """ + + def __init__(self, conversation_id, message_id, thread_id, user_id): + self.conversation_id = conversation_id + self.message_id = message_id + self.thread_id = thread_id + self.user_id = user_id + self.current_index = 0 + settings = get_settings() + self.enabled = settings.get('enable_thoughts', False) + + def add_thought(self, step_type, content, detail=None): + """Write a thought step to Cosmos immediately. + + Args: + step_type: One of search, tabular_analysis, web_search, + agent_tool_call, generation, content_safety. + content: Short human-readable description of the step. + detail: Optional technical detail (function names, params, etc.). + + Returns: + The thought document id, or None if disabled/failed. + """ + if not self.enabled: + return None + + thought_id = str(uuid.uuid4()) + thought_doc = { + 'id': thought_id, + 'conversation_id': self.conversation_id, + 'message_id': self.message_id, + 'thread_id': self.thread_id, + 'user_id': self.user_id, + 'step_index': self.current_index, + 'step_type': step_type, + 'content': content, + 'detail': detail, + 'duration_ms': None, + 'timestamp': datetime.now(timezone.utc).isoformat() + } + self.current_index += 1 + + try: + cosmos_thoughts_container.upsert_item(thought_doc) + except Exception as e: + log_event(f"ThoughtTracker.add_thought failed: {e}", level="WARNING") + return None + + return thought_id + + def complete_thought(self, thought_id, duration_ms): + """Patch an existing thought with its duration after the step finishes.""" + if not self.enabled or not thought_id: + return + + try: + thought_doc = cosmos_thoughts_container.read_item( + item=thought_id, + partition_key=self.user_id + ) + thought_doc['duration_ms'] = duration_ms + cosmos_thoughts_container.upsert_item(thought_doc) + except Exception as e: + log_event(f"ThoughtTracker.complete_thought failed: {e}", level="WARNING") + + def timed_thought(self, step_type, content, detail=None): + """Convenience: add a thought and return a timer helper. + + Usage: + timer = tracker.timed_thought('search', 'Searching documents...') + # ... do work ... + timer.stop() + """ + start = time.time() + thought_id = self.add_thought(step_type, content, detail) + return _ThoughtTimer(self, thought_id, start) + + +class _ThoughtTimer: + """Helper returned by ThoughtTracker.timed_thought() for auto-duration capture.""" + + def __init__(self, tracker, thought_id, start_time): + self._tracker = tracker + self._thought_id = thought_id + self._start = start_time + + def stop(self): + elapsed_ms = int((time.time() - self._start) * 1000) + self._tracker.complete_thought(self._thought_id, elapsed_ms) + return elapsed_ms + + +# --------------------------------------------------------------------------- +# CRUD helpers +# --------------------------------------------------------------------------- + +def get_thoughts_for_message(conversation_id, message_id, user_id): + """Return all thoughts for a specific assistant message, ordered by step_index.""" + try: + query = ( + "SELECT * FROM c " + "WHERE c.conversation_id = @conv_id " + "AND c.message_id = @msg_id " + "ORDER BY c.step_index ASC" + ) + params = [ + {"name": "@conv_id", "value": conversation_id}, + {"name": "@msg_id", "value": message_id}, + ] + results = list(cosmos_thoughts_container.query_items( + query=query, + parameters=params, + partition_key=user_id + )) + return results + except Exception as e: + log_event(f"get_thoughts_for_message failed: {e}", level="WARNING") + return [] + + +def get_pending_thoughts(conversation_id, user_id): + """Return the latest thoughts for a conversation that are still in-progress. + + Used by the polling endpoint. Retrieves thoughts created within the last + 5 minutes for the conversation, grouped by the most recent message_id. + """ + try: + five_minutes_ago = datetime.now(timezone.utc) + from datetime import timedelta + five_minutes_ago = (five_minutes_ago - timedelta(minutes=5)).isoformat() + + query = ( + "SELECT * FROM c " + "WHERE c.conversation_id = @conv_id " + "AND c.timestamp >= @since " + "ORDER BY c.timestamp DESC" + ) + params = [ + {"name": "@conv_id", "value": conversation_id}, + {"name": "@since", "value": five_minutes_ago}, + ] + results = list(cosmos_thoughts_container.query_items( + query=query, + parameters=params, + partition_key=user_id + )) + + if not results: + return [] + + # Group by the most recent message_id + latest_message_id = results[0].get('message_id') + latest_thoughts = [ + t for t in results if t.get('message_id') == latest_message_id + ] + # Return in ascending step_index order + latest_thoughts.sort(key=lambda t: t.get('step_index', 0)) + return latest_thoughts + except Exception as e: + log_event(f"get_pending_thoughts failed: {e}", level="WARNING") + return [] + + +def get_thoughts_for_conversation(conversation_id, user_id): + """Return all thoughts for a conversation.""" + try: + query = ( + "SELECT * FROM c " + "WHERE c.conversation_id = @conv_id " + "ORDER BY c.timestamp ASC" + ) + params = [ + {"name": "@conv_id", "value": conversation_id}, + ] + results = list(cosmos_thoughts_container.query_items( + query=query, + parameters=params, + partition_key=user_id + )) + return results + except Exception as e: + log_event(f"get_thoughts_for_conversation failed: {e}", level="WARNING") + return [] + + +def archive_thoughts_for_conversation(conversation_id, user_id): + """Copy all thoughts for a conversation to the archive container, then delete originals.""" + try: + thoughts = get_thoughts_for_conversation(conversation_id, user_id) + for thought in thoughts: + archived = dict(thought) + archived['archived_at'] = datetime.now(timezone.utc).isoformat() + cosmos_archived_thoughts_container.upsert_item(archived) + + for thought in thoughts: + cosmos_thoughts_container.delete_item( + item=thought['id'], + partition_key=user_id + ) + except Exception as e: + log_event(f"archive_thoughts_for_conversation failed: {e}", level="WARNING") + + +def delete_thoughts_for_conversation(conversation_id, user_id): + """Delete all thoughts for a conversation.""" + try: + thoughts = get_thoughts_for_conversation(conversation_id, user_id) + for thought in thoughts: + cosmos_thoughts_container.delete_item( + item=thought['id'], + partition_key=user_id + ) + except Exception as e: + log_event(f"delete_thoughts_for_conversation failed: {e}", level="WARNING") + + +def delete_thoughts_for_message(message_id, user_id): + """Delete all thoughts associated with a specific assistant message.""" + try: + query = ( + "SELECT * FROM c " + "WHERE c.message_id = @msg_id" + ) + params = [ + {"name": "@msg_id", "value": message_id}, + ] + results = list(cosmos_thoughts_container.query_items( + query=query, + parameters=params, + partition_key=user_id + )) + for thought in results: + cosmos_thoughts_container.delete_item( + item=thought['id'], + partition_key=user_id + ) + except Exception as e: + log_event(f"delete_thoughts_for_message failed: {e}", level="WARNING") diff --git a/application/single_app/route_backend_agents.py b/application/single_app/route_backend_agents.py index 57097ee5..2f631af7 100644 --- a/application/single_app/route_backend_agents.py +++ b/application/single_app/route_backend_agents.py @@ -23,6 +23,11 @@ from functions_appinsights import log_event from json_schema_validation import validate_agent from swagger_wrapper import swagger_route, get_auth_security +from functions_activity_logging import ( + log_agent_creation, + log_agent_update, + log_agent_deletion, +) bpa = Blueprint('admin_agents', __name__) @@ -147,6 +152,18 @@ def set_user_agents(): for agent_name in agents_to_delete: delete_personal_agent(user_id, agent_name) + # Log individual agent activities + for agent in filtered_agents: + a_name = agent.get('name', '') + a_id = agent.get('id', '') + a_display = agent.get('display_name', a_name) + if a_name in current_agent_names: + log_agent_update(user_id=user_id, agent_id=a_id, agent_name=a_name, agent_display_name=a_display, scope='personal') + else: + log_agent_creation(user_id=user_id, agent_id=a_id, agent_name=a_name, agent_display_name=a_display, scope='personal') + for agent_name in agents_to_delete: + log_agent_deletion(user_id=user_id, agent_id=agent_name, agent_name=agent_name, scope='personal') + log_event("User agents updated", extra={"user_id": user_id, "agents_count": len(filtered_agents)}) return jsonify({'success': True}) @@ -175,6 +192,9 @@ def delete_user_agent(agent_name): # Delete from personal_agents container delete_personal_agent(user_id, agent_name) + # Log agent deletion activity + log_agent_deletion(user_id=user_id, agent_id=agent_to_delete.get('id', agent_name), agent_name=agent_name, scope='personal') + # Check if there are any agents left and if they match global_selected_agent remaining_agents = get_personal_agents(user_id) if len(remaining_agents) > 0: @@ -270,11 +290,12 @@ def create_group_agent_route(): cleaned_payload.pop(key, None) try: - saved = save_group_agent(active_group, cleaned_payload) + saved = save_group_agent(active_group, cleaned_payload, user_id=user_id) except Exception as exc: debug_print('Failed to save group agent: %s', exc) return jsonify({'error': 'Unable to save agent'}), 500 + log_agent_creation(user_id=user_id, agent_id=saved.get('id', ''), agent_name=saved.get('name', ''), agent_display_name=saved.get('display_name', ''), scope='group', group_id=active_group) return jsonify(saved), 201 @@ -325,11 +346,12 @@ def update_group_agent_route(agent_id): return jsonify({'error': str(exc)}), 400 try: - saved = save_group_agent(active_group, cleaned_payload) + saved = save_group_agent(active_group, cleaned_payload, user_id=user_id) except Exception as exc: debug_print('Failed to update group agent %s: %s', agent_id, exc) return jsonify({'error': 'Unable to update agent'}), 500 + log_agent_update(user_id=user_id, agent_id=agent_id, agent_name=saved.get('name', ''), agent_display_name=saved.get('display_name', ''), scope='group', group_id=active_group) return jsonify(saved), 200 @@ -360,6 +382,7 @@ def delete_group_agent_route(agent_id): if not removed: return jsonify({'error': 'Agent not found'}), 404 + log_agent_deletion(user_id=user_id, agent_id=agent_id, agent_name=agent_id, scope='group', group_id=active_group) return jsonify({'message': 'Agent deleted'}), 200 # User endpoint to set selected agent (new model, not legacy default_agent) @@ -504,10 +527,11 @@ def add_agent(): cleaned_agent['id'] = '15b0c92a-741d-42ff-ba0b-367c7ee0c848' # Save to global agents container - result = save_global_agent(cleaned_agent) + result = save_global_agent(cleaned_agent, user_id=str(get_current_user_id())) if not result: return jsonify({'error': 'Failed to save agent.'}), 500 + log_agent_creation(user_id=str(get_current_user_id()), agent_id=cleaned_agent.get('id', ''), agent_name=cleaned_agent.get('name', ''), agent_display_name=cleaned_agent.get('display_name', ''), scope='global') log_event("Agent added", extra={"action": "add", "agent": {k: v for k, v in cleaned_agent.items() if k != 'id'}, "user": str(get_current_user_id())}) # --- HOT RELOAD TRIGGER --- setattr(builtins, "kernel_reload_needed", True) @@ -615,10 +639,11 @@ def edit_agent(agent_name): return jsonify({'error': 'Agent not found.'}), 404 # Save the updated agent - result = save_global_agent(cleaned_agent) + result = save_global_agent(cleaned_agent, user_id=str(get_current_user_id())) if not result: return jsonify({'error': 'Failed to save agent.'}), 500 + log_agent_update(user_id=str(get_current_user_id()), agent_id=cleaned_agent.get('id', ''), agent_name=agent_name, agent_display_name=cleaned_agent.get('display_name', ''), scope='global') log_event( f"Agent {agent_name} edited", extra={ @@ -660,6 +685,7 @@ def delete_agent(agent_name): if not success: return jsonify({'error': 'Failed to delete agent.'}), 500 + log_agent_deletion(user_id=str(get_current_user_id()), agent_id=agent_to_delete.get('id', ''), agent_name=agent_name, scope='global') log_event("Agent deleted", extra={"action": "delete", "agent_name": agent_name, "user": str(get_current_user_id())}) # --- HOT RELOAD TRIGGER --- setattr(builtins, "kernel_reload_needed", True) diff --git a/application/single_app/route_backend_chats.py b/application/single_app/route_backend_chats.py index e452fed4..f923dd1b 100644 --- a/application/single_app/route_backend_chats.py +++ b/application/single_app/route_backend_chats.py @@ -28,6 +28,7 @@ from functions_activity_logging import log_chat_activity, log_conversation_creation, log_token_usage from flask import current_app from swagger_wrapper import swagger_route, get_auth_security +from functions_thoughts import ThoughtTracker def get_kernel(): @@ -39,6 +40,185 @@ def get_kernel_agents(): log_event(f"[SKChat] get_kernel_agents - g.kernel_agents: {type(g_agents)} ({len(g_agents) if g_agents else 0} agents), builtins.kernel_agents: {type(builtins_agents)} ({len(builtins_agents) if builtins_agents else 0} agents)", level=logging.INFO) return g_agents or builtins_agents +async def run_tabular_sk_analysis(user_question, tabular_filenames, user_id, + conversation_id, gpt_model, settings, + source_hint="workspace", group_id=None, + public_workspace_id=None): + """Run lightweight SK with TabularProcessingPlugin to analyze tabular data. + + Creates a temporary Kernel with only the TabularProcessingPlugin, uses the + same chat model as the user's session, and returns computed analysis results. + Returns None on failure for graceful degradation. + """ + from semantic_kernel import Kernel as SKKernel + from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion + from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior + from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import AzureChatPromptExecutionSettings + from semantic_kernel.contents.chat_history import ChatHistory as SKChatHistory + from semantic_kernel_plugins.tabular_processing_plugin import TabularProcessingPlugin + + try: + log_event(f"[Tabular SK Analysis] Starting analysis for files: {tabular_filenames}", level=logging.INFO) + + # 1. Create lightweight kernel with only tabular plugin + kernel = SKKernel() + tabular_plugin = TabularProcessingPlugin() + kernel.add_plugin(tabular_plugin, plugin_name="tabular_processing") + + # 2. Create chat service using same config as main chat + enable_gpt_apim = settings.get('enable_gpt_apim', False) + if enable_gpt_apim: + chat_service = AzureChatCompletion( + service_id="tabular-analysis", + deployment_name=gpt_model, + endpoint=settings.get('azure_apim_gpt_endpoint'), + api_key=settings.get('azure_apim_gpt_subscription_key'), + api_version=settings.get('azure_apim_gpt_api_version'), + ) + else: + auth_type = settings.get('azure_openai_gpt_authentication_type') + if auth_type == 'managed_identity': + token_provider = get_bearer_token_provider(DefaultAzureCredential(), cognitive_services_scope) + chat_service = AzureChatCompletion( + service_id="tabular-analysis", + deployment_name=gpt_model, + endpoint=settings.get('azure_openai_gpt_endpoint'), + api_version=settings.get('azure_openai_gpt_api_version'), + ad_token_provider=token_provider, + ) + else: + chat_service = AzureChatCompletion( + service_id="tabular-analysis", + deployment_name=gpt_model, + endpoint=settings.get('azure_openai_gpt_endpoint'), + api_key=settings.get('azure_openai_gpt_key'), + api_version=settings.get('azure_openai_gpt_api_version'), + ) + kernel.add_service(chat_service) + + # 3. Pre-dispatch: load file schemas to eliminate discovery LLM rounds + source_context = f"source='{source_hint}'" + if group_id: + source_context += f", group_id='{group_id}'" + if public_workspace_id: + source_context += f", public_workspace_id='{public_workspace_id}'" + + schema_parts = [] + for fname in tabular_filenames: + try: + container, blob_path = tabular_plugin._resolve_blob_location_with_fallback( + user_id, conversation_id, fname, source_hint, + group_id=group_id, public_workspace_id=public_workspace_id + ) + df = tabular_plugin._read_tabular_blob_to_dataframe(container, blob_path) + df_numeric = tabular_plugin._try_numeric_conversion(df.copy()) + schema_info = { + "filename": fname, + "row_count": len(df), + "columns": list(df.columns), + "dtypes": {col: str(dtype) for col, dtype in df_numeric.dtypes.items()}, + "preview": df.head(3).to_dict(orient='records') + } + schema_parts.append(json.dumps(schema_info, indent=2, default=str)) + log_event(f"[Tabular SK Analysis] Pre-loaded schema for {fname} ({len(df)} rows)", level=logging.DEBUG) + except Exception as e: + log_event(f"[Tabular SK Analysis] Failed to pre-load schema for {fname}: {e}", level=logging.WARNING) + schema_parts.append(json.dumps({"filename": fname, "error": f"Could not pre-load: {str(e)}"})) + + schema_context = "\n".join(schema_parts) + + # 4. Build chat history with pre-loaded schemas + chat_history = SKChatHistory() + chat_history.add_system_message( + "You are a data analyst. Use the tabular_processing plugin functions to " + "analyze the data and answer the user's question.\n\n" + f"FILE SCHEMAS (pre-loaded — do NOT call list_tabular_files or describe_tabular_file):\n" + f"{schema_context}\n\n" + "IMPORTANT: Batch multiple independent function calls in a SINGLE response. " + "For example, call multiple aggregate_column or group_by_aggregate functions " + "at once rather than one at a time.\n\n" + "Return the computed results clearly." + ) + + chat_history.add_user_message( + f"Analyze the tabular data to answer: {user_question}\n" + f"Use user_id='{user_id}', conversation_id='{conversation_id}', {source_context}." + ) + + # 5. Execute with auto function calling + execution_settings = AzureChatPromptExecutionSettings( + service_id="tabular-analysis", + function_choice_behavior=FunctionChoiceBehavior.Auto( + maximum_auto_invoke_attempts=5 + ), + ) + + result = await chat_service.get_chat_message_contents( + chat_history, execution_settings, kernel=kernel + ) + + if result and result[0].content: + analysis = result[0].content + # Cap at 20k characters to stay within token budget + if len(analysis) > 20000: + analysis = analysis[:20000] + "\n[Analysis truncated]" + log_event(f"[Tabular SK Analysis] Analysis complete, {len(analysis)} chars", level=logging.INFO) + return analysis + log_event("[Tabular SK Analysis] No content in SK response", level=logging.WARNING) + return None + + except Exception as e: + log_event(f"[Tabular SK Analysis] Error: {e}", level=logging.WARNING, exceptionTraceback=True) + return None + +def collect_tabular_sk_citations(user_id, conversation_id): + """Collect plugin invocations from the tabular SK analysis and convert to citation format.""" + from semantic_kernel_plugins.plugin_invocation_logger import get_plugin_logger + + plugin_logger = get_plugin_logger() + plugin_invocations = plugin_logger.get_invocations_for_conversation(user_id, conversation_id) + + if not plugin_invocations: + return [] + + def make_json_serializable(obj): + if obj is None: + return None + elif isinstance(obj, (str, int, float, bool)): + return obj + elif isinstance(obj, dict): + return {str(k): make_json_serializable(v) for k, v in obj.items()} + elif isinstance(obj, (list, tuple)): + return [make_json_serializable(item) for item in obj] + else: + return str(obj) + + citations = [] + for inv in plugin_invocations: + timestamp_str = None + if inv.timestamp: + if hasattr(inv.timestamp, 'isoformat'): + timestamp_str = inv.timestamp.isoformat() + else: + timestamp_str = str(inv.timestamp) + + citation = { + 'tool_name': f"{inv.plugin_name}.{inv.function_name}", + 'function_name': inv.function_name, + 'plugin_name': inv.plugin_name, + 'function_arguments': make_json_serializable(inv.parameters), + 'function_result': make_json_serializable(inv.result), + 'duration_ms': inv.duration_ms, + 'timestamp': timestamp_str, + 'success': inv.success, + 'error_message': make_json_serializable(inv.error_message), + 'user_id': inv.user_id + } + citations.append(citation) + + log_event(f"[Tabular SK Citations] Collected {len(citations)} tool execution citations", level=logging.INFO) + return citations + def register_route_backend_chats(app): @app.route('/api/chat', methods=['POST']) @swagger_route(security=get_auth_security()) @@ -46,6 +226,7 @@ def register_route_backend_chats(app): @user_required def chat_api(): try: + request_start_time = time.time() settings = get_settings() data = request.get_json() user_id = get_current_user_id() @@ -668,6 +849,18 @@ def result_requires_message_reload(result: Any) -> bool: conversation_item['last_updated'] = datetime.utcnow().isoformat() cosmos_conversations_container.upsert_item(conversation_item) # Update timestamp and potentially title + + # Generate assistant_message_id early for thought tracking + assistant_message_id = f"{conversation_id}_assistant_{int(time.time())}_{random.randint(1000,9999)}" + + # Initialize thought tracker + thought_tracker = ThoughtTracker( + conversation_id=conversation_id, + message_id=assistant_message_id, + thread_id=current_user_thread_id, + user_id=user_id + ) + # region 3 - Content Safety # --------------------------------------------------------------------- # 3) Check Content Safety (but DO NOT return 403). @@ -679,6 +872,7 @@ def result_requires_message_reload(result: Any) -> bool: blocklist_matches = [] if settings.get('enable_content_safety') and "content_safety_client" in CLIENTS: + thought_tracker.add_thought('content_safety', 'Checking content safety...') try: content_safety_client = CLIENTS["content_safety_client"] request_obj = AnalyzeTextOptions(text=user_message) @@ -836,6 +1030,7 @@ def result_requires_message_reload(result: Any) -> bool: # Perform the search + thought_tracker.add_thought('search', f"Searching {document_scope or 'personal'} workspace documents for '{(search_query or user_message)[:50]}'") try: # Prepare search arguments # Set default and maximum values for top_n @@ -899,6 +1094,8 @@ def result_requires_message_reload(result: Any) -> bool: }), 500 if search_results: + unique_doc_names = set(doc.get('file_name', 'Unknown') for doc in search_results) + thought_tracker.add_thought('search', f"Found {len(search_results)} results from {len(unique_doc_names)} documents") retrieved_texts = [] combined_documents = [] classifications_found = set(conversation_item.get('classification', [])) # Load existing @@ -953,6 +1150,70 @@ def result_requires_message_reload(result: Any) -> bool: 'documents': combined_documents # Keep track of docs used }) + # Auto-detect tabular files in search results and prompt the LLM to use the plugin + if settings.get('enable_tabular_processing_plugin', False) and settings.get('enable_enhanced_citations', False): + tabular_files_in_results = set() + for source_doc in combined_documents: + fname = source_doc.get('file_name', '') + if fname and any(fname.lower().endswith(ext) for ext in TABULAR_EXTENSIONS): + tabular_files_in_results.add(fname) + + if tabular_files_in_results: + # Determine source based on document_scope, not just active IDs + if document_scope == 'group' and active_group_id: + tabular_source_hint = "group" + elif document_scope == 'public' and active_public_workspace_id: + tabular_source_hint = "public" + else: + tabular_source_hint = "workspace" + + tabular_filenames_str = ", ".join(tabular_files_in_results) + + # Run SK tabular analysis to pre-compute results + tabular_analysis = asyncio.run(run_tabular_sk_analysis( + user_question=user_message, + tabular_filenames=tabular_files_in_results, + user_id=user_id, + conversation_id=conversation_id, + gpt_model=gpt_model, + settings=settings, + source_hint=tabular_source_hint, + group_id=active_group_id if tabular_source_hint == "group" else None, + public_workspace_id=active_public_workspace_id if tabular_source_hint == "public" else None, + )) + + if tabular_analysis: + # Inject pre-computed analysis results as context + tabular_system_msg = ( + f"The following analysis was computed from the tabular file(s) " + f"{tabular_filenames_str} using data analysis functions:\n\n" + f"{tabular_analysis}\n\n" + f"Use these computed results to answer the user's question accurately." + ) + else: + # Fallback: instruct LLM to use plugin functions (for agent mode) + tabular_system_msg = ( + f"IMPORTANT: The search results include data from tabular file(s): {tabular_filenames_str}. " + f"The search results contain only a schema summary (column names and a few sample rows), NOT the full data. " + f"You MUST use the tabular_processing plugin functions to answer ANY question about these files. " + f"Do NOT attempt to answer using the schema summary alone — it is incomplete. " + f"Available functions: describe_tabular_file, aggregate_column, filter_rows, query_tabular_data, group_by_aggregate. " + f"Use source='{tabular_source_hint}'" + + (f" and group_id='{active_group_id}'" if tabular_source_hint == "group" else "") + + (f" and public_workspace_id='{active_public_workspace_id}'" if tabular_source_hint == "public" else "") + + "." + ) + system_messages_for_augmentation.append({ + 'role': 'system', + 'content': tabular_system_msg + }) + + # Collect tool execution citations from SK tabular analysis + if tabular_analysis: + tabular_sk_citations = collect_tabular_sk_citations(user_id, conversation_id) + if tabular_sk_citations: + agent_citations_list.extend(tabular_sk_citations) + # Loop through each source document/chunk used for this message for source_doc in combined_documents: # 4. Create a citation dictionary, selecting the desired fields @@ -1138,8 +1399,8 @@ def result_requires_message_reload(result: Any) -> bool: """ # Update the system message with enhanced content and updated documents array if system_messages_for_augmentation: - system_messages_for_augmentation[-1]['content'] = system_prompt_search - system_messages_for_augmentation[-1]['documents'] = combined_documents + system_messages_for_augmentation[0]['content'] = system_prompt_search + system_messages_for_augmentation[0]['documents'] = combined_documents # --- END NEW METADATA CITATIONS --- # Update conversation classifications if new ones were found @@ -1489,6 +1750,7 @@ def result_requires_message_reload(result: Any) -> bool: }), status_code if web_search_enabled: + thought_tracker.add_thought('web_search', f"Searching the web for '{(search_query or user_message)[:50]}'") perform_web_search( settings=settings, conversation_id=conversation_id, @@ -1504,7 +1766,9 @@ def result_requires_message_reload(result: Any) -> bool: agent_citations_list=agent_citations_list, web_search_citations_list=web_search_citations_list, ) - + if web_search_citations_list: + thought_tracker.add_thought('web_search', f"Got {len(web_search_citations_list)} web search results") + # region 5 - FINAL conversation history preparation # --------------------------------------------------------------------- # 5) Prepare FINAL conversation history for GPT (including summarization) @@ -1650,6 +1914,7 @@ def result_requires_message_reload(result: Any) -> bool: allowed_roles_in_history = ['user', 'assistant'] # Add 'system' if you PERSIST general system messages not related to augmentation max_file_content_length_in_history = 50000 # Increased limit for all file content in history max_tabular_content_length_in_history = 50000 # Same limit for tabular data consistency + chat_tabular_files = set() # Track tabular files uploaded directly to chat for message in recent_messages: role = message.get('role') @@ -1685,25 +1950,38 @@ def result_requires_message_reload(result: Any) -> bool: filename = message.get('filename', 'uploaded_file') file_content = message.get('file_content', '') # Assuming file content is stored is_table = message.get('is_table', False) - - # Use higher limit for tabular data that needs complete analysis - content_limit = max_tabular_content_length_in_history if is_table else max_file_content_length_in_history - - display_content = file_content[:content_limit] - if len(file_content) > content_limit: - display_content += "..." - - # Enhanced message for tabular data - if is_table: + file_content_source = message.get('file_content_source', '') + + # Tabular files stored in blob (enhanced citations enabled) - reference plugin + if is_table and file_content_source == 'blob': + chat_tabular_files.add(filename) # Track for mini SK analysis conversation_history_for_api.append({ - 'role': 'system', # Represent file as system info - 'content': f"[User uploaded a tabular data file named '{filename}'. This is CSV format data for analysis:\n{display_content}]\nThis is complete tabular data in CSV format. You can perform calculations, analysis, and data operations on this dataset." + 'role': 'system', + 'content': f"[User uploaded a tabular data file named '{filename}'. " + f"The file is stored in blob storage and available for analysis. " + f"Use the tabular_processing plugin functions (list_tabular_files, describe_tabular_file, " + f"aggregate_column, filter_rows, query_tabular_data, group_by_aggregate) to analyze this data. " + f"The file source is 'chat'.]" }) else: - conversation_history_for_api.append({ - 'role': 'system', # Represent file as system info - 'content': f"[User uploaded a file named '{filename}'. Content preview:\n{display_content}]\nUse this file context if relevant." - }) + # Use higher limit for tabular data that needs complete analysis + content_limit = max_tabular_content_length_in_history if is_table else max_file_content_length_in_history + + display_content = file_content[:content_limit] + if len(file_content) > content_limit: + display_content += "..." + + # Enhanced message for tabular data + if is_table: + conversation_history_for_api.append({ + 'role': 'system', # Represent file as system info + 'content': f"[User uploaded a tabular data file named '{filename}'. This is CSV format data for analysis:\n{display_content}]\nThis is complete tabular data in CSV format. You can perform calculations, analysis, and data operations on this dataset." + }) + else: + conversation_history_for_api.append({ + 'role': 'system', # Represent file as system info + 'content': f"[User uploaded a file named '{filename}'. Content preview:\n{display_content}]\nUse this file context if relevant." + }) elif role == 'image': # Handle image uploads with extracted text and vision analysis filename = message.get('filename', 'uploaded_image') is_user_upload = message.get('metadata', {}).get('is_user_upload', False) @@ -1767,6 +2045,45 @@ def result_requires_message_reload(result: Any) -> bool: # Ignored roles: 'safety', 'blocked', 'system' (if they are only for augmentation/summary) + # --- Mini SK analysis for tabular files uploaded directly to chat --- + if chat_tabular_files and settings.get('enable_tabular_processing_plugin', False) and settings.get('enable_enhanced_citations', False): + chat_tabular_filenames_str = ", ".join(chat_tabular_files) + log_event( + f"[Chat Tabular SK] Detected {len(chat_tabular_files)} tabular file(s) uploaded to chat: {chat_tabular_filenames_str}", + level=logging.INFO + ) + + chat_tabular_analysis = asyncio.run(run_tabular_sk_analysis( + user_question=user_message, + tabular_filenames=chat_tabular_files, + user_id=user_id, + conversation_id=conversation_id, + gpt_model=gpt_model, + settings=settings, + source_hint="chat", + )) + + if chat_tabular_analysis: + # Inject pre-computed analysis results as context + conversation_history_for_api.append({ + 'role': 'system', + 'content': ( + f"The following analysis was computed from the chat-uploaded tabular file(s) " + f"{chat_tabular_filenames_str} using data analysis functions:\n\n" + f"{chat_tabular_analysis}\n\n" + f"Use these computed results to answer the user's question accurately." + ) + }) + + # Collect tool execution citations from SK tabular analysis + chat_tabular_sk_citations = collect_tabular_sk_citations(user_id, conversation_id) + if chat_tabular_sk_citations: + agent_citations_list.extend(chat_tabular_sk_citations) + + debug_print(f"[Chat Tabular SK] Analysis injected, {len(chat_tabular_analysis)} chars") + else: + debug_print("[Chat Tabular SK] Analysis returned None, relying on existing file context messages") + # Ensure the very last message is the current user's message (it should be if fetched correctly) if not conversation_history_for_api or conversation_history_for_api[-1]['role'] != 'user': debug_print("Warning: Last message in history is not the user's current message. Appending.") @@ -2110,6 +2427,27 @@ def orchestrator_error(e): }) if selected_agent: + agent_deployment_name = getattr(selected_agent, 'deployment_name', None) or gpt_model + thought_tracker.add_thought('agent_tool_call', f"Sending to agent '{getattr(selected_agent, 'display_name', getattr(selected_agent, 'name', 'unknown'))}'") + thought_tracker.add_thought('generation', f"Sending to '{agent_deployment_name}'") + + # Register callback to write plugin thoughts to Cosmos in real-time + callback_key = f"{user_id}:{conversation_id}" + plugin_logger = get_plugin_logger() + + def on_plugin_invocation(inv): + duration_str = f" ({int(inv.duration_ms)}ms)" if inv.duration_ms else "" + tool_name = f"{inv.plugin_name}.{inv.function_name}" + thought_tracker.add_thought( + 'agent_tool_call', + f"Agent called {tool_name}{duration_str}", + detail=f"success={inv.success}" + ) + + plugin_logger.register_callback(callback_key, on_plugin_invocation) + + agent_invoke_start_time = time.time() + def invoke_selected_agent(): return asyncio.run(run_sk_call( selected_agent.invoke, @@ -2120,16 +2458,22 @@ def agent_success(result): msg = str(result) notice = None agent_used = getattr(selected_agent, 'name', 'All Plugins') - + + # Emit responded thought with total duration from user message + agent_total_duration_s = round(time.time() - request_start_time, 1) + thought_tracker.add_thought('generation', f"'{agent_deployment_name}' responded ({agent_total_duration_s}s from initial message)") + + # Deregister real-time thought callback + plugin_logger.deregister_callbacks(callback_key) + # Get the actual model deployment used by the agent actual_model_deployment = getattr(selected_agent, 'deployment_name', None) or agent_used debug_print(f"Agent '{agent_used}' using deployment: {actual_model_deployment}") - + # Extract detailed plugin invocations for enhanced agent citations - plugin_logger = get_plugin_logger() - # CRITICAL FIX: Filter by user_id and conversation_id to prevent cross-conversation contamination + # (Thoughts already written to Cosmos in real-time by callback) plugin_invocations = plugin_logger.get_invocations_for_conversation(user_id, conversation_id) - + # Convert plugin invocations to citation format with detailed information detailed_citations = [] for inv in plugin_invocations: @@ -2204,6 +2548,7 @@ def make_json_serializable(obj): ) return (msg, actual_model_deployment, "agent", notice) def agent_error(e): + plugin_logger.deregister_callbacks(callback_key) debug_print(f"Error during Semantic Kernel Agent invocation: {str(e)}") log_event( f"Error during Semantic Kernel Agent invocation: {str(e)}", @@ -2244,8 +2589,21 @@ def foundry_agent_success(result): or agent_used ) + # Emit responded thought with total duration from user message + foundry_total_duration_s = round(time.time() - request_start_time, 1) + thought_tracker.add_thought('generation', f"'{actual_model_deployment}' responded ({foundry_total_duration_s}s from initial message)") + + # Deregister real-time thought callback + plugin_logger.deregister_callbacks(callback_key) + foundry_citations = getattr(selected_agent, 'last_run_citations', []) or [] if foundry_citations: + # Emit thoughts for Foundry agent citations/tool calls + for citation in foundry_citations: + thought_tracker.add_thought( + 'agent_tool_call', + f"Agent retrieved citation from Azure AI Foundry" + ) for citation in foundry_citations: try: serializable = json.loads(json.dumps(citation, default=str)) @@ -2282,6 +2640,7 @@ def foundry_agent_success(result): return (msg, actual_model_deployment, 'agent', notice) def foundry_agent_error(e): + plugin_logger.deregister_callbacks(callback_key) log_event( f"Error during Azure AI Foundry agent invocation: {str(e)}", extra={ @@ -2360,6 +2719,7 @@ def kernel_error(e): 'on_error': kernel_error }) + thought_tracker.add_thought('generation', f"Sending to '{gpt_model}'") def invoke_gpt_fallback(): if not conversation_history_for_api: raise Exception('Cannot generate response: No conversation history available.') @@ -2443,12 +2803,18 @@ def gpt_error(e): }) fallback_result = try_fallback_chain(fallback_steps) + # Unpack result - handle both 4-tuple (SK) and 5-tuple (GPT with tokens) if len(fallback_result) == 5: ai_message, final_model_used, chat_mode, kernel_fallback_notice, token_usage_data = fallback_result else: ai_message, final_model_used, chat_mode, kernel_fallback_notice = fallback_result token_usage_data = None + + # Emit responded thought for non-agent paths (agent paths emit their own inside callbacks) + if not selected_agent: + gpt_total_duration_s = round(time.time() - request_start_time, 1) + thought_tracker.add_thought('generation', f"'{final_model_used}' responded ({gpt_total_duration_s}s from initial message)") # Collect token usage from Semantic Kernel services if available if kernel and not token_usage_data: @@ -2510,8 +2876,8 @@ def gpt_error(e): if hasattr(selected_agent, 'name'): agent_name = selected_agent.name - assistant_message_id = f"{conversation_id}_assistant_{int(time.time())}_{random.randint(1000,9999)}" - + # assistant_message_id was generated earlier for thought tracking + # Get user_info and thread_id from the user message for ownership tracking and threading user_info_for_assistant = None user_thread_id = None @@ -2672,7 +3038,8 @@ def gpt_error(e): 'web_search_citations': web_search_citations_list, 'agent_citations': agent_citations_list, 'reload_messages': reload_messages_required, - 'kernel_fallback_notice': kernel_fallback_notice + 'kernel_fallback_notice': kernel_fallback_notice, + 'thoughts_enabled': thought_tracker.enabled }), 200 except Exception as e: @@ -2713,6 +3080,7 @@ def chat_stream_api(): data = request.get_json() user_id = get_current_user_id() settings = get_settings() + request_start_time = time.time() except Exception as e: return jsonify({'error': f'Failed to parse request: {str(e)}'}), 400 @@ -3111,10 +3479,122 @@ def generate(): conversation_item['last_updated'] = datetime.utcnow().isoformat() cosmos_conversations_container.upsert_item(conversation_item) - + + # Generate assistant_message_id early for thought tracking + assistant_message_id = f"{conversation_id}_assistant_{int(time.time())}_{random.randint(1000,9999)}" + + # Initialize thought tracker for streaming path + thought_tracker = ThoughtTracker( + conversation_id=conversation_id, + message_id=assistant_message_id, + thread_id=current_user_thread_id, + user_id=user_id + ) + + def emit_thought(step_type, content, detail=None): + """Add a thought to Cosmos and return an SSE event string.""" + thought_tracker.add_thought(step_type, content, detail) + return f"data: {json.dumps({'type': 'thought', 'step_index': thought_tracker.current_index - 1, 'step_type': step_type, 'content': content})}\n\n" + + # Content Safety check (matching non-streaming path) + blocked = False + if settings.get('enable_content_safety') and "content_safety_client" in CLIENTS: + yield emit_thought('content_safety', 'Checking content safety...') + try: + content_safety_client = CLIENTS["content_safety_client"] + request_obj = AnalyzeTextOptions(text=user_message) + cs_response = content_safety_client.analyze_text(request_obj) + + max_severity = 0 + triggered_categories = [] + blocklist_matches = [] + block_reasons = [] + + for cat_result in cs_response.categories_analysis: + triggered_categories.append({ + "category": cat_result.category, + "severity": cat_result.severity + }) + if cat_result.severity > max_severity: + max_severity = cat_result.severity + + if cs_response.blocklists_match: + for match in cs_response.blocklists_match: + blocklist_matches.append({ + "blocklistName": match.blocklist_name, + "blocklistItemId": match.blocklist_item_id, + "blocklistItemText": match.blocklist_item_text + }) + + if max_severity >= 4: + blocked = True + block_reasons.append("Max severity >= 4") + if len(blocklist_matches) > 0: + blocked = True + block_reasons.append("Blocklist match") + + if blocked: + # Upsert to safety container + safety_item = { + 'id': str(uuid.uuid4()), + 'user_id': user_id, + 'conversation_id': conversation_id, + 'message': user_message, + 'triggered_categories': triggered_categories, + 'blocklist_matches': blocklist_matches, + 'timestamp': datetime.utcnow().isoformat(), + 'reason': "; ".join(block_reasons), + 'metadata': {} + } + cosmos_safety_container.upsert_item(safety_item) + + # Build blocked message + blocked_msg_content = ( + "Your message was blocked by Content Safety.\n\n" + f"**Reason**: {', '.join(block_reasons)}\n" + "Triggered categories:\n" + ) + for cat in triggered_categories: + blocked_msg_content += ( + f" - {cat['category']} (severity={cat['severity']})\n" + ) + if blocklist_matches: + blocked_msg_content += ( + "\nBlocklist Matches:\n" + + "\n".join([f" - {m['blocklistItemText']} (in {m['blocklistName']})" + for m in blocklist_matches]) + ) + + # Insert safety message + safety_message_id = f"{conversation_id}_safety_{int(time.time())}_{random.randint(1000,9999)}" + safety_doc = { + 'id': safety_message_id, + 'conversation_id': conversation_id, + 'role': 'safety', + 'content': blocked_msg_content.strip(), + 'timestamp': datetime.utcnow().isoformat(), + 'model_deployment_name': None, + 'metadata': {}, + } + cosmos_messages_container.upsert_item(safety_doc) + + conversation_item['last_updated'] = datetime.utcnow().isoformat() + cosmos_conversations_container.upsert_item(conversation_item) + + # Stream the blocked response and stop + yield f"data: {json.dumps({'content': blocked_msg_content.strip(), 'blocked': True})}\n\n" + yield "data: [DONE]\n\n" + return + + except HttpResponseError as e: + debug_print(f"[Content Safety Error - Streaming] {e}") + except Exception as ex: + debug_print(f"[Content Safety - Streaming] Unexpected error: {ex}") + # Hybrid search (if enabled) combined_documents = [] if hybrid_search_enabled: + yield emit_thought('search', f"Searching {document_scope or 'personal'} workspace documents for '{(search_query or user_message)[:50]}'") try: search_args = { "query": search_query, @@ -3144,8 +3624,10 @@ def generate(): search_results = hybrid_search(**search_args) except Exception as e: debug_print(f"Error during hybrid search: {e}") - + if search_results: + unique_doc_names_stream = set(doc.get('file_name', 'Unknown') for doc in search_results) + yield emit_thought('search', f"Found {len(search_results)} results from {len(unique_doc_names_stream)} documents") retrieved_texts = [] for doc in search_results: @@ -3319,11 +3801,60 @@ def generate(): 'content': system_prompt_search, 'documents': combined_documents }) - + + # Auto-detect tabular files in search results and run SK analysis + if settings.get('enable_tabular_processing_plugin', False) and settings.get('enable_enhanced_citations', False): + tabular_files_in_results = set() + for source_doc in combined_documents: + fname = source_doc.get('file_name', '') + if fname and any(fname.lower().endswith(ext) for ext in TABULAR_EXTENSIONS): + tabular_files_in_results.add(fname) + + if tabular_files_in_results: + # Determine source based on document_scope, not just active IDs + if document_scope == 'group' and active_group_id: + tabular_source_hint = "group" + elif document_scope == 'public' and active_public_workspace_id: + tabular_source_hint = "public" + else: + tabular_source_hint = "workspace" + + tabular_filenames_str = ", ".join(tabular_files_in_results) + + # Run SK tabular analysis to pre-compute results + tabular_analysis = asyncio.run(run_tabular_sk_analysis( + user_question=user_message, + tabular_filenames=tabular_files_in_results, + user_id=user_id, + conversation_id=conversation_id, + gpt_model=gpt_model, + settings=settings, + source_hint=tabular_source_hint, + group_id=active_group_id if tabular_source_hint == "group" else None, + public_workspace_id=active_public_workspace_id if tabular_source_hint == "public" else None, + )) + + if tabular_analysis: + system_messages_for_augmentation.append({ + 'role': 'system', + 'content': ( + f"The following analysis was computed from the tabular file(s) " + f"{tabular_filenames_str} using data analysis functions:\n\n" + f"{tabular_analysis}\n\n" + f"Use these computed results to answer the user's question accurately." + ) + }) + + # Collect tool execution citations from SK tabular analysis + tabular_sk_citations = collect_tabular_sk_citations(user_id, conversation_id) + if tabular_sk_citations: + agent_citations_list.extend(tabular_sk_citations) + # Reorder hybrid citations list in descending order based on page_number hybrid_citations_list.sort(key=lambda x: x.get('page_number', 0), reverse=True) if web_search_enabled: + yield emit_thought('web_search', f"Searching the web for '{(search_query or user_message)[:50]}'") perform_web_search( settings=settings, conversation_id=conversation_id, @@ -3339,6 +3870,8 @@ def generate(): agent_citations_list=agent_citations_list, web_search_citations_list=web_search_citations_list, ) + if web_search_citations_list: + yield emit_thought('web_search', f"Got {len(web_search_citations_list)} web search results") # Update message chat type message_chat_type = None @@ -3381,15 +3914,108 @@ def generate(): 'content': aug_msg['content'] }) - # Add recent messages + # Add recent messages (with file role handling) allowed_roles_in_history = ['user', 'assistant'] + max_file_content_length_in_history = 50000 + max_tabular_content_length_in_history = 50000 + chat_tabular_files = set() # Track tabular files uploaded directly to chat + for message in recent_messages: - if message.get('role') in allowed_roles_in_history: + role = message.get('role') + content = message.get('content', '') + + if role in allowed_roles_in_history: conversation_history_for_api.append({ - 'role': message['role'], - 'content': message.get('content', '') + 'role': role, + 'content': content }) - + elif role == 'file': + filename = message.get('filename', 'uploaded_file') + file_content = message.get('file_content', '') + is_table = message.get('is_table', False) + file_content_source = message.get('file_content_source', '') + + # Tabular files stored in blob - track for mini SK analysis + if is_table and file_content_source == 'blob': + chat_tabular_files.add(filename) + conversation_history_for_api.append({ + 'role': 'system', + 'content': ( + f"[User uploaded a tabular data file named '{filename}'. " + f"The file is stored in blob storage and available for analysis. " + f"Use the tabular_processing plugin functions (list_tabular_files, " + f"describe_tabular_file, aggregate_column, filter_rows, " + f"query_tabular_data, group_by_aggregate) to analyze this data. " + f"The file source is 'chat'.]" + ) + }) + else: + content_limit = ( + max_tabular_content_length_in_history if is_table + else max_file_content_length_in_history + ) + display_content = file_content[:content_limit] + if len(file_content) > content_limit: + display_content += "..." + + if is_table: + conversation_history_for_api.append({ + 'role': 'system', + 'content': ( + f"[User uploaded a tabular data file named '{filename}'. " + f"This is CSV format data for analysis:\n{display_content}]\n" + f"This is complete tabular data in CSV format. You can perform " + f"calculations, analysis, and data operations on this dataset." + ) + }) + else: + conversation_history_for_api.append({ + 'role': 'system', + 'content': ( + f"[User uploaded a file named '{filename}'. " + f"Content preview:\n{display_content}]\n" + f"Use this file context if relevant." + ) + }) + + # --- Mini SK analysis for tabular files uploaded directly to chat --- + if chat_tabular_files and settings.get('enable_tabular_processing_plugin', False) and settings.get('enable_enhanced_citations', False): + chat_tabular_filenames_str = ", ".join(chat_tabular_files) + log_event( + f"[Chat Tabular SK] Streaming: Detected {len(chat_tabular_files)} tabular file(s) uploaded to chat: {chat_tabular_filenames_str}", + level=logging.INFO + ) + + chat_tabular_analysis = asyncio.run(run_tabular_sk_analysis( + user_question=user_message, + tabular_filenames=chat_tabular_files, + user_id=user_id, + conversation_id=conversation_id, + gpt_model=gpt_model, + settings=settings, + source_hint="chat", + )) + + if chat_tabular_analysis: + conversation_history_for_api.append({ + 'role': 'system', + 'content': ( + f"The following analysis was computed from the chat-uploaded tabular file(s) " + f"{chat_tabular_filenames_str} using data analysis functions:\n\n" + f"{chat_tabular_analysis}\n\n" + f"Use these computed results to answer the user's question accurately." + ) + }) + + # Collect tool execution citations + chat_tabular_sk_citations = collect_tabular_sk_citations(user_id, conversation_id) + if chat_tabular_sk_citations: + agent_citations_list.extend(chat_tabular_sk_citations) + + debug_print(f"[Chat Tabular SK] Streaming: Analysis injected, {len(chat_tabular_analysis)} chars") + else: + debug_print("[Chat Tabular SK] Streaming: Analysis returned None, relying on existing file context") + except Exception as e: yield f"data: {json.dumps({'error': f'History error: {str(e)}'})}\n\n" return @@ -3472,7 +4098,7 @@ def generate(): # Stream the response accumulated_content = "" token_usage_data = None # Will be populated from final stream chunk - assistant_message_id = f"{conversation_id}_assistant_{int(time.time())}_{random.randint(1000,9999)}" + # assistant_message_id was generated earlier for thought tracking final_model_used = gpt_model # Default to gpt_model, will be overridden if agent is used # DEBUG: Check agent streaming decision @@ -3482,8 +4108,24 @@ def generate(): try: if use_agent_streaming and selected_agent: # Stream from agent using invoke_stream + yield emit_thought('agent_tool_call', f"Sending to agent '{agent_display_name_used or agent_name_used}'") + yield emit_thought('generation', f"Sending to '{actual_model_used}'") debug_print(f"--- Streaming from Agent: {agent_name_used} ---") - + + # Register callback to persist plugin thoughts to Cosmos in real-time + callback_key = f"{user_id}:{conversation_id}" + plugin_logger_cb = get_plugin_logger() + + def on_plugin_invocation_streaming(inv): + duration_str = f" ({int(inv.duration_ms)}ms)" if inv.duration_ms else "" + tool_name = f"{inv.plugin_name}.{inv.function_name}" + thought_tracker.add_thought( + 'agent_tool_call', + f"Agent called {tool_name}{duration_str}" + ) + + plugin_logger_cb.register_callback(callback_key, on_plugin_invocation_streaming) + # Import required classes from semantic_kernel.contents.chat_message_content import ChatMessageContent @@ -3497,6 +4139,8 @@ def generate(): for msg in conversation_history_for_api ] + agent_stream_start_time = time.time() + # Stream agent responses - collect chunks first then yield async def stream_agent_async(): """Collect all streaming chunks from agent""" @@ -3524,7 +4168,6 @@ async def stream_agent_async(): return chunks, usage_data # Execute async streaming - import asyncio try: # Try to get existing event loop loop = asyncio.get_event_loop() @@ -3539,36 +4182,53 @@ async def stream_agent_async(): try: # Run streaming and collect chunks and usage chunks, stream_usage = loop.run_until_complete(stream_agent_async()) - - # Yield chunks to frontend - for chunk_content in chunks: - accumulated_content += chunk_content - yield f"data: {json.dumps({'content': chunk_content})}\n\n" - - # Try to capture token usage from stream metadata - if stream_usage: - # stream_usage is a CompletionUsage object, not a dict - prompt_tokens = getattr(stream_usage, 'prompt_tokens', 0) - completion_tokens = getattr(stream_usage, 'completion_tokens', 0) - total_tokens = getattr(stream_usage, 'total_tokens', None) - - # Calculate total if not provided - if total_tokens is None or total_tokens == 0: - total_tokens = prompt_tokens + completion_tokens - - token_usage_data = { - 'prompt_tokens': prompt_tokens, - 'completion_tokens': completion_tokens, - 'total_tokens': total_tokens, - 'captured_at': datetime.utcnow().isoformat() - } - debug_print(f"[Agent Streaming Tokens] From metadata - prompt: {prompt_tokens}, completion: {completion_tokens}, total: {total_tokens}") except Exception as stream_error: + plugin_logger_cb.deregister_callbacks(callback_key) debug_print(f"❌ Agent streaming error: {stream_error}") import traceback traceback.print_exc() yield f"data: {json.dumps({'error': f'Agent streaming failed: {str(stream_error)}'})}\n\n" return + + # Emit responded thought with total duration from user message + agent_stream_total_duration_s = round(time.time() - request_start_time, 1) + yield emit_thought('generation', f"'{actual_model_used}' responded ({agent_stream_total_duration_s}s from initial message)") + + # Deregister callback (agent completed successfully) + plugin_logger_cb.deregister_callbacks(callback_key) + + # Emit SSE-only events for streaming UI (Cosmos writes already done by callback) + agent_plugin_invocations = plugin_logger_cb.get_invocations_for_conversation(user_id, conversation_id) + for inv in agent_plugin_invocations: + duration_str = f" ({int(inv.duration_ms)}ms)" if inv.duration_ms else "" + tool_name = f"{inv.plugin_name}.{inv.function_name}" + content = f"Agent called {tool_name}{duration_str}" + yield f"data: {json.dumps({'type': 'thought', 'step_index': thought_tracker.current_index, 'step_type': 'agent_tool_call', 'content': content})}\n\n" + thought_tracker.current_index += 1 + + # Yield chunks to frontend + for chunk_content in chunks: + accumulated_content += chunk_content + yield f"data: {json.dumps({'content': chunk_content})}\n\n" + + # Try to capture token usage from stream metadata + if stream_usage: + # stream_usage is a CompletionUsage object, not a dict + prompt_tokens = getattr(stream_usage, 'prompt_tokens', 0) + completion_tokens = getattr(stream_usage, 'completion_tokens', 0) + total_tokens = getattr(stream_usage, 'total_tokens', None) + + # Calculate total if not provided + if total_tokens is None or total_tokens == 0: + total_tokens = prompt_tokens + completion_tokens + + token_usage_data = { + 'prompt_tokens': prompt_tokens, + 'completion_tokens': completion_tokens, + 'total_tokens': total_tokens, + 'captured_at': datetime.utcnow().isoformat() + } + debug_print(f"[Agent Streaming Tokens] From metadata - prompt: {prompt_tokens}, completion: {completion_tokens}, total: {total_tokens}") # Collect token usage from kernel services if not captured from stream if not token_usage_data: @@ -3650,6 +4310,7 @@ def make_json_serializable(obj): else: # Stream from regular GPT model (non-agent) + yield emit_thought('generation', f"Sending to '{gpt_model}'") debug_print(f"--- Streaming from GPT ({gpt_model}) ---") # Prepare stream parameters @@ -3700,6 +4361,10 @@ def make_json_serializable(obj): 'captured_at': datetime.utcnow().isoformat() } debug_print(f"[Streaming Tokens] Captured usage - prompt: {chunk.usage.prompt_tokens}, completion: {chunk.usage.completion_tokens}, total: {chunk.usage.total_tokens}") + + # Emit responded thought for regular LLM streaming + gpt_stream_total_duration_s = round(time.time() - request_start_time, 1) + yield emit_thought('generation', f"'{gpt_model}' responded ({gpt_stream_total_duration_s}s from initial message)") # Stream complete - save message and send final metadata # Get user thread info to maintain thread consistency @@ -3818,7 +4483,8 @@ def make_json_serializable(obj): 'agent_citations': agent_citations_list, 'agent_display_name': agent_display_name_used if use_agent_streaming else None, 'agent_name': agent_name_used if use_agent_streaming else None, - 'full_content': accumulated_content + 'full_content': accumulated_content, + 'thoughts_enabled': thought_tracker.enabled } yield f"data: {json.dumps(final_data)}\n\n" diff --git a/application/single_app/route_backend_conversation_export.py b/application/single_app/route_backend_conversation_export.py index aad750e4..abd6490f 100644 --- a/application/single_app/route_backend_conversation_export.py +++ b/application/single_app/route_backend_conversation_export.py @@ -2,17 +2,31 @@ import io import json +import markdown2 +import re +import tempfile import zipfile +from collections import Counter, defaultdict from datetime import datetime +from html import escape as _escape_html +from typing import Any, Dict, List, Optional from config import * +from flask import jsonify, make_response, request +from functions_appinsights import log_event from functions_authentication import * -from functions_settings import * -from flask import Response, jsonify, request, make_response +from functions_chat import sort_messages_by_thread +from functions_conversation_metadata import update_conversation_with_metadata from functions_debug import debug_print +from functions_settings import * +from functions_thoughts import get_thoughts_for_conversation from swagger_wrapper import swagger_route, get_auth_security +TRANSCRIPT_ROLES = {'user', 'assistant'} +SUMMARY_SOURCE_CHAR_LIMIT = 60000 + + def register_route_backend_conversation_export(app): """Register conversation export API routes.""" @@ -29,32 +43,36 @@ def api_export_conversations(): conversation_ids (list): List of conversation IDs to export. format (str): Export format — "json" or "markdown". packaging (str): Output packaging — "single" or "zip". + include_summary_intro (bool): Whether to generate a per-conversation intro. + summary_model_deployment (str): Optional model deployment for summary generation. """ user_id = get_current_user_id() if not user_id: return jsonify({'error': 'User not authenticated'}), 401 - data = request.get_json() + data = request.get_json(silent=True) if not data: return jsonify({'error': 'Request body is required'}), 400 conversation_ids = data.get('conversation_ids', []) - export_format = data.get('format', 'json').lower() - packaging = data.get('packaging', 'single').lower() + export_format = str(data.get('format', 'json')).lower() + packaging = str(data.get('packaging', 'single')).lower() + include_summary_intro = bool(data.get('include_summary_intro', False)) + summary_model_deployment = str(data.get('summary_model_deployment', '') or '').strip() if not conversation_ids or not isinstance(conversation_ids, list): return jsonify({'error': 'At least one conversation_id is required'}), 400 - if export_format not in ('json', 'markdown'): - return jsonify({'error': 'Format must be "json" or "markdown"'}), 400 + if export_format not in ('json', 'markdown', 'pdf'): + return jsonify({'error': 'Format must be "json", "markdown", or "pdf"'}), 400 if packaging not in ('single', 'zip'): return jsonify({'error': 'Packaging must be "single" or "zip"'}), 400 try: + settings = get_settings() exported = [] for conv_id in conversation_ids: - # Verify ownership and fetch conversation try: conversation = cosmos_conversations_container.read_item( item=conv_id, @@ -64,225 +82,1597 @@ def api_export_conversations(): debug_print(f"Export: conversation {conv_id} not found or access denied") continue - # Verify user owns this conversation if conversation.get('user_id') != user_id: debug_print(f"Export: user {user_id} does not own conversation {conv_id}") continue - # Fetch messages ordered by timestamp - message_query = f""" + message_query = """ SELECT * FROM c - WHERE c.conversation_id = '{conv_id}' + WHERE c.conversation_id = @conversation_id ORDER BY c.timestamp ASC """ messages = list(cosmos_messages_container.query_items( query=message_query, + parameters=[{'name': '@conversation_id', 'value': conv_id}], partition_key=conv_id )) - # Filter for active thread messages only - filtered_messages = [] - for msg in messages: - thread_info = msg.get('metadata', {}).get('thread_info', {}) - active = thread_info.get('active_thread') - if active is True or active is None or 'active_thread' not in thread_info: - filtered_messages.append(msg) - - exported.append({ - 'conversation': _sanitize_conversation(conversation), - 'messages': [_sanitize_message(m) for m in filtered_messages] - }) + exported.append( + _build_export_entry( + conversation=conversation, + raw_messages=messages, + user_id=user_id, + settings=settings, + include_summary_intro=include_summary_intro, + summary_model_deployment=summary_model_deployment + ) + ) if not exported: return jsonify({'error': 'No accessible conversations found'}), 404 - # Generate export content timestamp_str = datetime.utcnow().strftime('%Y%m%d_%H%M%S') if packaging == 'zip': return _build_zip_response(exported, export_format, timestamp_str) - else: - return _build_single_file_response(exported, export_format, timestamp_str) - - except Exception as e: - debug_print(f"Export error: {str(e)}") - return jsonify({'error': f'Export failed: {str(e)}'}), 500 - - def _sanitize_conversation(conv): - """Return only user-facing conversation fields.""" - return { - 'id': conv.get('id'), - 'title': conv.get('title', 'Untitled'), - 'last_updated': conv.get('last_updated', ''), - 'chat_type': conv.get('chat_type', 'personal'), - 'tags': conv.get('tags', []), - 'is_pinned': conv.get('is_pinned', False), - 'context': conv.get('context', []) - } - - def _sanitize_message(msg): - """Return only user-facing message fields.""" - result = { - 'role': msg.get('role', ''), - 'content': msg.get('content', ''), - 'timestamp': msg.get('timestamp', ''), - } - # Include citations if present - if msg.get('citations'): - result['citations'] = msg['citations'] - # Include context/tool info if present - if msg.get('context'): - result['context'] = msg['context'] - return result - - def _build_single_file_response(exported, export_format, timestamp_str): - """Build a single-file download response.""" - if export_format == 'json': - content = json.dumps(exported, indent=2, ensure_ascii=False, default=str) - filename = f"conversations_export_{timestamp_str}.json" - content_type = 'application/json; charset=utf-8' + + return _build_single_file_response(exported, export_format, timestamp_str) + + except Exception as exc: + debug_print(f"Export error: {str(exc)}") + log_event(f"Conversation export failed: {exc}", level="WARNING") + return jsonify({'error': f'Export failed: {str(exc)}'}), 500 + + +def _build_export_entry( + conversation: Dict[str, Any], + raw_messages: List[Dict[str, Any]], + user_id: str, + settings: Dict[str, Any], + include_summary_intro: bool = False, + summary_model_deployment: str = '' +) -> Dict[str, Any]: + filtered_messages = _filter_messages_for_export(raw_messages) + ordered_messages = sort_messages_by_thread(filtered_messages) + + raw_thoughts = get_thoughts_for_conversation(conversation.get('id'), user_id) + thoughts_by_message = defaultdict(list) + for thought in raw_thoughts: + thoughts_by_message[thought.get('message_id')].append(_sanitize_thought(thought)) + + exported_messages = [] + role_counts = Counter() + total_citation_counts = Counter({'document': 0, 'web': 0, 'agent_tool': 0, 'legacy': 0, 'total': 0}) + transcript_index = 0 + total_thoughts = 0 + + for sequence_index, message in enumerate(ordered_messages, start=1): + role = message.get('role', 'unknown') + role_counts[role] += 1 + + message_transcript_index = None + if role in TRANSCRIPT_ROLES: + transcript_index += 1 + message_transcript_index = transcript_index + + thoughts = thoughts_by_message.get(message.get('id'), []) + exported_message = _sanitize_message( + message, + sequence_index=sequence_index, + transcript_index=message_transcript_index, + thoughts=thoughts + ) + exported_messages.append(exported_message) + + counts = exported_message.get('citation_counts', {}) + for key in total_citation_counts: + total_citation_counts[key] += counts.get(key, 0) + total_thoughts += len(thoughts) + + # Compute message time range for summary caching + message_time_start = None + message_time_end = None + if ordered_messages: + message_time_start = ordered_messages[0].get('timestamp') + message_time_end = ordered_messages[-1].get('timestamp') + + sanitized_conversation = _sanitize_conversation( + conversation, + messages=exported_messages, + role_counts=role_counts, + citation_counts=total_citation_counts, + thought_count=total_thoughts + ) + summary_intro = _build_summary_intro( + messages=exported_messages, + conversation=conversation, + sanitized_conversation=sanitized_conversation, + settings=settings, + enabled=include_summary_intro, + summary_model_deployment=summary_model_deployment, + message_time_start=message_time_start, + message_time_end=message_time_end + ) + + return { + 'conversation': sanitized_conversation, + 'summary_intro': summary_intro, + 'messages': exported_messages + } + + +def _filter_messages_for_export(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + filtered_messages = [] + for message in messages: + metadata = message.get('metadata', {}) or {} + if metadata.get('is_deleted') is True: + continue + + thread_info = metadata.get('thread_info', {}) or {} + active = thread_info.get('active_thread') + if active is True or active is None or 'active_thread' not in thread_info: + filtered_messages.append(message) + + return filtered_messages + + +def _sanitize_conversation( + conversation: Dict[str, Any], + messages: List[Dict[str, Any]], + role_counts: Counter, + citation_counts: Counter, + thought_count: int +) -> Dict[str, Any]: + transcript_count = sum(1 for message in messages if message.get('is_transcript_message')) + return { + 'id': conversation.get('id'), + 'title': conversation.get('title', 'Untitled'), + 'last_updated': conversation.get('last_updated', ''), + 'chat_type': conversation.get('chat_type', 'personal'), + 'tags': conversation.get('tags', []), + 'context': conversation.get('context', []), + 'classification': conversation.get('classification', []), + 'strict': conversation.get('strict', False), + 'is_pinned': conversation.get('is_pinned', False), + 'scope_locked': conversation.get('scope_locked'), + 'locked_contexts': conversation.get('locked_contexts', []), + 'message_count': len(messages), + 'transcript_message_count': transcript_count, + 'message_counts_by_role': dict(role_counts), + 'citation_counts': dict(citation_counts), + 'thought_count': thought_count + } + + +def _sanitize_message( + message: Dict[str, Any], + sequence_index: int, + transcript_index: Optional[int], + thoughts: List[Dict[str, Any]] +) -> Dict[str, Any]: + role = message.get('role', '') + content = message.get('content', '') + raw_citation_buckets = _collect_raw_citation_buckets(message) + normalized_citations = _normalize_citations(raw_citation_buckets) + citation_counts = _build_citation_counts(normalized_citations) + details = _curate_message_details(message, citation_counts, len(thoughts)) + + return { + 'id': message.get('id'), + 'role': role, + 'speaker_label': _role_to_label(role), + 'sequence_index': sequence_index, + 'transcript_index': transcript_index, + 'label': f"Turn {transcript_index}" if transcript_index else f"Message {sequence_index}", + 'is_transcript_message': role in TRANSCRIPT_ROLES, + 'timestamp': message.get('timestamp', ''), + 'content': content, + 'content_text': _normalize_content(content), + 'details': details, + 'citations': normalized_citations, + 'citation_counts': citation_counts, + 'thoughts': thoughts, + 'legacy_citations': raw_citation_buckets['legacy'], + 'hybrid_citations': raw_citation_buckets['hybrid'], + 'web_search_citations': raw_citation_buckets['web'], + 'agent_citations': raw_citation_buckets['agent'] + } + + +def _sanitize_thought(thought: Dict[str, Any]) -> Dict[str, Any]: + return { + 'step_index': thought.get('step_index'), + 'step_type': thought.get('step_type'), + 'content': thought.get('content'), + 'detail': thought.get('detail'), + 'duration_ms': thought.get('duration_ms'), + 'timestamp': thought.get('timestamp') + } + + +def _collect_raw_citation_buckets(message: Dict[str, Any]) -> Dict[str, List[Any]]: + def ensure_list(value: Any) -> List[Any]: + if not value: + return [] + return value if isinstance(value, list) else [value] + + return { + 'legacy': ensure_list(message.get('citations')), + 'hybrid': ensure_list(message.get('hybrid_citations')), + 'web': ensure_list(message.get('web_search_citations')), + 'agent': ensure_list(message.get('agent_citations')) + } + + +def _normalize_citations(raw_citation_buckets: Dict[str, List[Any]]) -> List[Dict[str, Any]]: + normalized = [] + + for citation in raw_citation_buckets.get('hybrid', []): + if isinstance(citation, dict): + normalized.append({ + 'citation_type': 'document', + 'label': _build_document_citation_label(citation), + 'file_name': citation.get('file_name'), + 'title': citation.get('title') or citation.get('file_name'), + 'page_number': citation.get('page_number'), + 'citation_id': citation.get('citation_id'), + 'chunk_id': citation.get('chunk_id'), + 'metadata_type': citation.get('metadata_type'), + 'metadata_content': citation.get('metadata_content'), + 'score': citation.get('score'), + 'classification': citation.get('classification'), + 'url': citation.get('url') + }) else: - parts = [] - for entry in exported: - parts.append(_conversation_to_markdown(entry)) - content = '\n\n---\n\n'.join(parts) - filename = f"conversations_export_{timestamp_str}.md" - content_type = 'text/markdown; charset=utf-8' - - response = make_response(content) - response.headers['Content-Type'] = content_type - response.headers['Content-Disposition'] = f'attachment; filename="{filename}"' - return response - - def _build_zip_response(exported, export_format, timestamp_str): - """Build a ZIP archive containing one file per conversation.""" - buffer = io.BytesIO() - with zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED) as zf: - for entry in exported: - conv = entry['conversation'] - safe_title = _safe_filename(conv.get('title', 'Untitled')) - conv_id_short = conv.get('id', 'unknown')[:8] - - if export_format == 'json': - file_content = json.dumps(entry, indent=2, ensure_ascii=False, default=str) - ext = 'json' - else: - file_content = _conversation_to_markdown(entry) - ext = 'md' + normalized.append({ + 'citation_type': 'document', + 'label': str(citation), + 'value': str(citation) + }) + + for citation in raw_citation_buckets.get('web', []): + if isinstance(citation, dict): + title = citation.get('title') or citation.get('url') or 'Web source' + normalized.append({ + 'citation_type': 'web', + 'label': title, + 'title': title, + 'url': citation.get('url') + }) + else: + normalized.append({ + 'citation_type': 'web', + 'label': str(citation), + 'value': str(citation) + }) + + for citation in raw_citation_buckets.get('agent', []): + if isinstance(citation, dict): + tool_name = citation.get('tool_name') or citation.get('function_name') or 'Tool invocation' + normalized.append({ + 'citation_type': 'agent_tool', + 'label': tool_name, + 'tool_name': citation.get('tool_name'), + 'function_name': citation.get('function_name'), + 'plugin_name': citation.get('plugin_name'), + 'success': citation.get('success'), + 'timestamp': citation.get('timestamp') + }) + else: + normalized.append({ + 'citation_type': 'agent_tool', + 'label': str(citation), + 'value': str(citation) + }) + + for citation in raw_citation_buckets.get('legacy', []): + if isinstance(citation, dict): + title = citation.get('title') or citation.get('filepath') or citation.get('url') or 'Legacy citation' + normalized.append({ + 'citation_type': 'legacy', + 'label': title, + 'title': title, + 'url': citation.get('url'), + 'filepath': citation.get('filepath') + }) + else: + normalized.append({ + 'citation_type': 'legacy', + 'label': str(citation), + 'value': str(citation) + }) + + return normalized + + +def _build_document_citation_label(citation: Dict[str, Any]) -> str: + file_name = citation.get('file_name') or citation.get('title') or 'Document source' + metadata_type = citation.get('metadata_type') + page_number = citation.get('page_number') + + if metadata_type: + return f"{file_name} — {metadata_type.replace('_', ' ').title()}" + if page_number not in (None, ''): + return f"{file_name} — Page {page_number}" + return file_name + + +def _build_citation_counts(citations: List[Dict[str, Any]]) -> Dict[str, int]: + counts = { + 'document': 0, + 'web': 0, + 'agent_tool': 0, + 'legacy': 0, + 'total': len(citations) + } + for citation in citations: + citation_type = citation.get('citation_type') + if citation_type in counts: + counts[citation_type] += 1 + return counts + + +def _curate_message_details( + message: Dict[str, Any], + citation_counts: Dict[str, int], + thought_count: int +) -> Dict[str, Any]: + role = message.get('role', '') + metadata = message.get('metadata', {}) or {} + details: Dict[str, Any] = {} + + if role == 'user': + details['interaction_mode'] = _remove_empty_values({ + 'button_states': metadata.get('button_states'), + 'workspace_search': _curate_workspace_search(metadata.get('workspace_search')), + 'prompt_selection': _curate_prompt_selection(metadata.get('prompt_selection')), + 'agent_selection': _curate_agent_selection(metadata.get('agent_selection')), + 'model_selection': _curate_model_selection(metadata.get('model_selection')) + }) + elif role == 'assistant': + details['generation'] = _remove_empty_values({ + 'augmented': message.get('augmented'), + 'model_deployment': message.get('model_deployment_name'), + 'agent_name': message.get('agent_name'), + 'agent_display_name': message.get('agent_display_name'), + 'reasoning_effort': metadata.get('reasoning_effort'), + 'hybrid_search_query': message.get('hybridsearch_query'), + 'token_usage': _curate_token_usage(metadata.get('token_usage')), + 'citation_counts': citation_counts, + 'thought_count': thought_count + }) + else: + details['message_context'] = _remove_empty_values({ + 'filename': message.get('filename'), + 'prompt': message.get('prompt'), + 'is_table': message.get('is_table'), + 'model_deployment': message.get('model_deployment_name') + }) + + return _remove_empty_values(details) + + +def _curate_workspace_search(workspace_search: Optional[Dict[str, Any]]) -> Dict[str, Any]: + if not isinstance(workspace_search, dict): + return {} + return _remove_empty_values({ + 'search_enabled': workspace_search.get('search_enabled'), + 'document_scope': workspace_search.get('document_scope'), + 'document_name': workspace_search.get('document_name'), + 'document_filename': workspace_search.get('document_filename'), + 'group_name': workspace_search.get('group_name'), + 'classification': workspace_search.get('classification'), + 'public_workspace_id': workspace_search.get('active_public_workspace_id') + }) - file_name = f"{safe_title}_{conv_id_short}.{ext}" - zf.writestr(file_name, file_content) - buffer.seek(0) - filename = f"conversations_export_{timestamp_str}.zip" +def _curate_prompt_selection(prompt_selection: Optional[Dict[str, Any]]) -> Dict[str, Any]: + if not isinstance(prompt_selection, dict): + return {} + return _remove_empty_values({ + 'prompt_name': prompt_selection.get('prompt_name'), + 'selected_prompt_index': prompt_selection.get('selected_prompt_index'), + 'selected_prompt_text': prompt_selection.get('selected_prompt_text') + }) - response = make_response(buffer.read()) - response.headers['Content-Type'] = 'application/zip' - response.headers['Content-Disposition'] = f'attachment; filename="{filename}"' - return response - def _conversation_to_markdown(entry): - """Convert a conversation + messages entry to Markdown format.""" - conv = entry['conversation'] - messages = entry['messages'] +def _curate_agent_selection(agent_selection: Optional[Dict[str, Any]]) -> Dict[str, Any]: + if not isinstance(agent_selection, dict): + return {} + return _remove_empty_values({ + 'selected_agent': agent_selection.get('selected_agent'), + 'agent_display_name': agent_selection.get('agent_display_name'), + 'is_global': agent_selection.get('is_global'), + 'is_group': agent_selection.get('is_group'), + 'group_name': agent_selection.get('group_name') + }) + + +def _curate_model_selection(model_selection: Optional[Dict[str, Any]]) -> Dict[str, Any]: + if not isinstance(model_selection, dict): + return {} + return _remove_empty_values({ + 'selected_model': model_selection.get('selected_model'), + 'frontend_requested_model': model_selection.get('frontend_requested_model'), + 'reasoning_effort': model_selection.get('reasoning_effort'), + 'streaming': model_selection.get('streaming') + }) + + +def _curate_token_usage(token_usage: Any) -> Dict[str, Any]: + if not isinstance(token_usage, dict): + return {} + return _remove_empty_values({ + 'prompt_tokens': token_usage.get('prompt_tokens'), + 'completion_tokens': token_usage.get('completion_tokens'), + 'total_tokens': token_usage.get('total_tokens') + }) + + +def _remove_empty_values(value: Any) -> Any: + if isinstance(value, dict): + cleaned = {} + for key, item in value.items(): + cleaned_item = _remove_empty_values(item) + if cleaned_item in (None, '', [], {}): + continue + cleaned[key] = cleaned_item + return cleaned + + if isinstance(value, list): + cleaned_list = [] + for item in value: + cleaned_item = _remove_empty_values(item) + if cleaned_item in (None, '', [], {}): + continue + cleaned_list.append(cleaned_item) + return cleaned_list + + return value + + +def generate_conversation_summary( + messages: List[Dict[str, Any]], + conversation_title: str, + settings: Dict[str, Any], + model_deployment: str, + message_time_start: str = None, + message_time_end: str = None, + conversation_id: str = None +) -> Dict[str, Any]: + """Generate a conversation summary using the LLM and optionally persist it. + + This is the shared helper used by both the export pipeline and the + on-demand summary API endpoint. Returns a summary dict suitable for + storage in conversation metadata. + + Raises ValueError when there is no content to summarise and + RuntimeError on model errors. + """ + transcript_lines = [] + for message in messages: + content_text = message.get('content_text', '') + if not content_text: + continue + role = message.get('role', 'unknown') + speaker = message.get('speaker_label', role).upper() + transcript_lines.append(f"{speaker}: {content_text}") + + transcript_text = '\n\n'.join(transcript_lines).strip() + if not transcript_text: + raise ValueError('No message content was available to summarize.') + + transcript_text = _truncate_for_summary(transcript_text) + + gpt_client, gpt_model = _initialize_gpt_client(settings, model_deployment) + summary_prompt = ( + "You are summarizing a conversation for an export document. " + "Read the full conversation below and write a concise summary. " + "Use your judgement on length: for short conversations write one brief paragraph, " + "for longer or more detailed conversations write two paragraphs. " + "If you need refer to the user, use their name, but do not refer to the user too often." + "Cover the goals, the key topics discussed, any data or tools referenced, " + "and the main outcomes or answers provided. " + "Be factual and neutral. Return plain text only — no headings, no bullet points, no markdown formatting." + ) + + model_lower = gpt_model.lower() + is_reasoning_model = ( + 'o1' in model_lower or 'o3' in model_lower or 'gpt-5' in model_lower + ) + instruction_role = 'developer' if is_reasoning_model else 'system' + + debug_print(f"Summary generation: sending {len(transcript_lines)} messages " + f"({len(transcript_text)} chars) to {gpt_model} (role={instruction_role})") + + summary_response = gpt_client.chat.completions.create( + model=gpt_model, + messages=[ + { + 'role': instruction_role, + 'content': summary_prompt + }, + { + 'role': 'user', + 'content': ( + f"Conversation Title: {conversation_title}\n\n" + f"{transcript_text}" + ) + } + ] + ) + + debug_print(f"Summary generation: response choices=" + f"{len(summary_response.choices) if summary_response.choices else 0}, " + f"finish_reason={summary_response.choices[0].finish_reason if summary_response.choices else 'N/A'}") + + summary_text = (summary_response.choices[0].message.content or '').strip() if summary_response.choices else '' + if not summary_text: + debug_print('Summary generation: model returned an empty response') + log_event('Conversation summary generation returned empty response', level='WARNING') + raise RuntimeError('Summary model returned an empty response.') + + summary_data = { + 'content': summary_text, + 'model_deployment': gpt_model, + 'generated_at': datetime.utcnow().isoformat(), + 'message_time_start': message_time_start, + 'message_time_end': message_time_end + } + + # Persist to Cosmos when a conversation_id is available + if conversation_id: + try: + update_conversation_with_metadata(conversation_id, {'summary': summary_data}) + debug_print(f"Summary persisted to conversation {conversation_id}") + except Exception as persist_exc: + debug_print(f"Failed to persist summary to Cosmos: {persist_exc}") + log_event(f"Failed to persist conversation summary: {persist_exc}", level="WARNING") + + return summary_data + + +def _build_summary_intro( + messages: List[Dict[str, Any]], + conversation: Dict[str, Any], + sanitized_conversation: Dict[str, Any], + settings: Dict[str, Any], + enabled: bool, + summary_model_deployment: str, + message_time_start: str = None, + message_time_end: str = None +) -> Dict[str, Any]: + """Build the summary_intro block for the export payload. + + Uses cached summary from conversation metadata when present and + still current (no newer messages). Otherwise generates a fresh + summary via ``generate_conversation_summary`` and persists it. + """ + summary_intro = { + 'enabled': enabled, + 'generated': False, + 'model_deployment': summary_model_deployment or None, + 'generated_at': None, + 'content': '', + 'error': None + } + + if not enabled: + return summary_intro + + # Check for a cached summary stored in the conversation document + existing_summary = conversation.get('summary') + if existing_summary and isinstance(existing_summary, dict): + cached_end = existing_summary.get('message_time_end') + if cached_end and message_time_end and cached_end >= message_time_end: + debug_print('Export summary: using cached summary from conversation metadata') + summary_intro.update({ + 'generated': True, + 'model_deployment': existing_summary.get('model_deployment'), + 'generated_at': existing_summary.get('generated_at'), + 'content': existing_summary.get('content', ''), + 'error': None + }) + return summary_intro + debug_print('Export summary: cached summary is stale, regenerating') + + try: + conversation_id = conversation.get('id') + conversation_title = sanitized_conversation.get('title', 'Untitled') + + summary_data = generate_conversation_summary( + messages=messages, + conversation_title=conversation_title, + settings=settings, + model_deployment=summary_model_deployment, + message_time_start=message_time_start, + message_time_end=message_time_end, + conversation_id=conversation_id + ) + + summary_intro.update({ + 'generated': True, + 'model_deployment': summary_data.get('model_deployment'), + 'generated_at': summary_data.get('generated_at'), + 'content': summary_data.get('content', ''), + 'error': None + }) + return summary_intro + + except (ValueError, RuntimeError) as known_exc: + debug_print(f"Export summary generation issue: {known_exc}") + summary_intro['error'] = str(known_exc) + if hasattr(known_exc, 'model_deployment'): + summary_intro['model_deployment'] = known_exc.model_deployment + return summary_intro + + except Exception as exc: + debug_print(f"Export summary generation failed: {exc}") + log_event(f"Conversation export summary generation failed: {exc}", level="WARNING") + summary_intro['error'] = str(exc) + return summary_intro + + +def _truncate_for_summary(transcript_text: str) -> str: + if len(transcript_text) <= SUMMARY_SOURCE_CHAR_LIMIT: + return transcript_text + + head_chars = SUMMARY_SOURCE_CHAR_LIMIT // 2 + tail_chars = SUMMARY_SOURCE_CHAR_LIMIT - head_chars + return ( + transcript_text[:head_chars] + + "\n\n[... transcript truncated for export summary generation ...]\n\n" + + transcript_text[-tail_chars:] + ) + + +def _initialize_gpt_client(settings: Dict[str, Any], requested_model: str = ''): + enable_gpt_apim = settings.get('enable_gpt_apim', False) + + if enable_gpt_apim: + raw_models = settings.get('azure_apim_gpt_deployment', '') or '' + apim_models = [model.strip() for model in raw_models.split(',') if model.strip()] + if not apim_models: + raise ValueError('APIM GPT deployment name is not configured.') + + if requested_model and requested_model not in apim_models: + raise ValueError(f"Requested summary model '{requested_model}' is not configured for APIM.") + + gpt_model = requested_model or apim_models[0] + gpt_client = AzureOpenAI( + api_version=settings.get('azure_apim_gpt_api_version'), + azure_endpoint=settings.get('azure_apim_gpt_endpoint'), + api_key=settings.get('azure_apim_gpt_subscription_key') + ) + return gpt_client, gpt_model + + auth_type = settings.get('azure_openai_gpt_authentication_type') + endpoint = settings.get('azure_openai_gpt_endpoint') + api_version = settings.get('azure_openai_gpt_api_version') + gpt_model_obj = settings.get('gpt_model', {}) or {} + + if requested_model: + gpt_model = requested_model + elif gpt_model_obj.get('selected'): + gpt_model = gpt_model_obj['selected'][0]['deploymentName'] + else: + raise ValueError('No GPT model selected or configured for export summary generation.') + + if auth_type == 'managed_identity': + token_provider = get_bearer_token_provider(DefaultAzureCredential(), cognitive_services_scope) + gpt_client = AzureOpenAI( + api_version=api_version, + azure_endpoint=endpoint, + azure_ad_token_provider=token_provider + ) + else: + api_key = settings.get('azure_openai_gpt_key') + if not api_key: + raise ValueError('Azure OpenAI API Key not configured.') + gpt_client = AzureOpenAI( + api_version=api_version, + azure_endpoint=endpoint, + api_key=api_key + ) + + return gpt_client, gpt_model + + +def _build_single_file_response(exported: List[Dict[str, Any]], export_format: str, timestamp_str: str): + """Build a single-file download response.""" + if export_format == 'json': + content = json.dumps(exported, indent=2, ensure_ascii=False, default=str) + filename = f"conversations_export_{timestamp_str}.json" + content_type = 'application/json; charset=utf-8' + elif export_format == 'pdf': + if len(exported) == 1: + content = _conversation_to_pdf_bytes(exported[0]) + else: + combined_parts = [] + for idx, entry in enumerate(exported): + if idx > 0: + combined_parts.append( + '
' + ) + combined_parts.append(_build_pdf_html_body(entry)) + content = _html_body_to_pdf_bytes('\n'.join(combined_parts)) + filename = f"conversations_export_{timestamp_str}.pdf" + content_type = 'application/pdf' + else: + parts = [] + for entry in exported: + parts.append(_conversation_to_markdown(entry)) + content = '\n\n---\n\n'.join(parts) + filename = f"conversations_export_{timestamp_str}.md" + content_type = 'text/markdown; charset=utf-8' + + response = make_response(content) + response.headers['Content-Type'] = content_type + response.headers['Content-Disposition'] = f'attachment; filename="{filename}"' + return response + + +def _build_zip_response(exported: List[Dict[str, Any]], export_format: str, timestamp_str: str): + """Build a ZIP archive containing one file per conversation.""" + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, 'w', zipfile.ZIP_DEFLATED) as zf: + for entry in exported: + conversation = entry['conversation'] + safe_title = _safe_filename(conversation.get('title', 'Untitled')) + conversation_id_short = conversation.get('id', 'unknown')[:8] + + if export_format == 'json': + file_content = json.dumps(entry, indent=2, ensure_ascii=False, default=str) + ext = 'json' + elif export_format == 'pdf': + file_content = _conversation_to_pdf_bytes(entry) + ext = 'pdf' + else: + file_content = _conversation_to_markdown(entry) + ext = 'md' - lines = [] - title = conv.get('title', 'Untitled') - lines.append(f"# {title}") + file_name = f"{safe_title}_{conversation_id_short}.{ext}" + zf.writestr(file_name, file_content) + + buffer.seek(0) + filename = f"conversations_export_{timestamp_str}.zip" + + response = make_response(buffer.read()) + response.headers['Content-Type'] = 'application/zip' + response.headers['Content-Disposition'] = f'attachment; filename="{filename}"' + return response + + +def _conversation_to_markdown(entry: Dict[str, Any]) -> str: + """Convert a conversation + messages entry to Markdown format.""" + conversation = entry['conversation'] + messages = entry['messages'] + summary_intro = entry.get('summary_intro', {}) or {} + + transcript_messages = [message for message in messages if message.get('is_transcript_message')] + detail_messages = [message for message in messages if message.get('details')] + reference_messages = [message for message in messages if message.get('citations')] + thought_messages = [message for message in messages if message.get('thoughts')] + supplemental_messages = [message for message in messages if not message.get('is_transcript_message')] + + lines: List[str] = [] + lines.append(f"# {conversation.get('title', 'Untitled')}") + lines.append('') + lines.append(f"**Last Updated:** {conversation.get('last_updated', '')} ") + lines.append(f"**Chat Type:** {conversation.get('chat_type', 'personal')} ") + lines.append(f"**Messages:** {conversation.get('message_count', len(messages))} ") + if conversation.get('tags'): + lines.append(f"**Tags:** {', '.join(_format_tag(tag) for tag in conversation.get('tags', []))} ") + if conversation.get('classification'): + lines.append(f"**Classification:** {', '.join(_format_tag(item) for item in conversation.get('classification', []))} ") + lines.append('') + + if summary_intro.get('enabled') and summary_intro.get('generated') and summary_intro.get('content'): + lines.append('## Abstract') lines.append('') + lines.append(summary_intro.get('content', '')) + lines.append('') + lines.append(f"_Generated with {summary_intro.get('model_deployment') or 'configured model'} on {summary_intro.get('generated_at')}_") + lines.append('') + elif summary_intro.get('enabled') and summary_intro.get('error'): + lines.append('> _A summary intro was requested, but it could not be generated for this export._') + lines.append(f"> _Error: {summary_intro.get('error')}_") + lines.append('') + + lines.append('## Transcript') + lines.append('') + if not transcript_messages: + lines.append('_No user or assistant transcript messages were available for export._') + lines.append('') + else: + for message in transcript_messages: + lines.append(f"### {message.get('label')} — {message.get('speaker_label')}") + if message.get('timestamp'): + lines.append(f"*{message.get('timestamp')}*") + lines.append('') + lines.append(message.get('content_text') or '_No content recorded._') + lines.append('') - # Metadata - last_updated = conv.get('last_updated', '') - chat_type = conv.get('chat_type', 'personal') - tags = conv.get('tags', []) - - lines.append(f"**Last Updated:** {last_updated} ") - lines.append(f"**Chat Type:** {chat_type} ") - if tags: - tag_strs = [str(t) for t in tags] - lines.append(f"**Tags:** {', '.join(tag_strs)} ") - lines.append(f"**Messages:** {len(messages)} ") + lines.append('## Appendix A — Conversation Metadata') + lines.append('') + metadata_to_render = _remove_empty_values({ + 'context': conversation.get('context'), + 'classification': conversation.get('classification'), + 'strict': conversation.get('strict'), + 'is_pinned': conversation.get('is_pinned'), + 'scope_locked': conversation.get('scope_locked'), + 'locked_contexts': conversation.get('locked_contexts'), + 'message_counts_by_role': conversation.get('message_counts_by_role'), + 'citation_counts': conversation.get('citation_counts'), + 'thought_count': conversation.get('thought_count') + }) + _append_markdown_mapping(lines, metadata_to_render) + lines.append('') + + if detail_messages: + lines.append('## Appendix B — Message Details') lines.append('') - lines.append('---') + for message in detail_messages: + lines.append(f"### {message.get('label')} — {message.get('speaker_label')}") + if message.get('timestamp'): + lines.append(f"*{message.get('timestamp')}*") + lines.append('') + _append_markdown_mapping(lines, message.get('details', {})) + lines.append('') + + if reference_messages: + lines.append('## Appendix C — References') lines.append('') + for message in reference_messages: + lines.append(f"### {message.get('label')} — {message.get('speaker_label')}") + if message.get('timestamp'): + lines.append(f"*{message.get('timestamp')}*") + lines.append('') + _append_citations_markdown(lines, message) + lines.append('') - # Messages - for msg in messages: - role = msg.get('role', 'unknown') - timestamp = msg.get('timestamp', '') - raw_content = msg.get('content', '') - content = _normalize_content(raw_content) - - role_label = role.capitalize() - if role == 'assistant': - role_label = 'Assistant' - elif role == 'user': - role_label = 'User' - elif role == 'system': - role_label = 'System' - elif role == 'tool': - role_label = 'Tool' - - lines.append(f"### {role_label}") - if timestamp: - lines.append(f"*{timestamp}*") + if thought_messages: + lines.append('## Appendix D — Processing Thoughts') + lines.append('') + for message in thought_messages: + lines.append(f"### {message.get('label')} — {message.get('speaker_label')}") + if message.get('timestamp'): + lines.append(f"*{message.get('timestamp')}*") lines.append('') - lines.append(content) + for thought in message.get('thoughts', []): + thought_label = thought.get('step_type', 'step').replace('_', ' ').title() + lines.append(f"1. **{thought_label}:** {thought.get('content') or 'No content recorded.'}") + if thought.get('duration_ms') is not None: + lines.append(f" - **Duration:** {thought.get('duration_ms')} ms") + if thought.get('timestamp'): + lines.append(f" - **Timestamp:** {thought.get('timestamp')}") + if thought.get('detail'): + lines.append(' - **Detail:**') + _append_code_block(lines, thought.get('detail'), indent=' ') lines.append('') - # Citations - citations = msg.get('citations') - if citations: - lines.append('**Citations:**') - if isinstance(citations, list): - for cit in citations: - if isinstance(cit, dict): - source = cit.get('title') or cit.get('filepath') or cit.get('url', 'Unknown') - lines.append(f"- {source}") - else: - lines.append(f"- {cit}") - lines.append('') - - lines.append('---') + if supplemental_messages: + lines.append('## Appendix E — Supplemental Messages') + lines.append('') + for message in supplemental_messages: + lines.append(f"### {message.get('label')} — {message.get('speaker_label')}") + if message.get('timestamp'): + lines.append(f"*{message.get('timestamp')}*") + lines.append('') + lines.append(message.get('content_text') or '_No content recorded._') lines.append('') - return '\n'.join(lines) + return '\n'.join(lines).strip() - def _normalize_content(content): - """Normalize message content to a plain string. - - Content may be a string, a list of content-part dicts - (e.g. [{"type": "text", "text": "..."}, ...]), or a dict. - """ - if isinstance(content, str): - return content - if isinstance(content, list): - parts = [] - for item in content: - if isinstance(item, dict): - if item.get('type') == 'text': - parts.append(item.get('text', '')) - elif item.get('type') == 'image_url': - parts.append('[Image]') + +def _append_citations_markdown(lines: List[str], message: Dict[str, Any]): + document_citations = [citation for citation in message.get('citations', []) if citation.get('citation_type') == 'document'] + web_citations = [citation for citation in message.get('citations', []) if citation.get('citation_type') == 'web'] + agent_citations = message.get('agent_citations', []) or [] + legacy_citations = [citation for citation in message.get('citations', []) if citation.get('citation_type') == 'legacy'] + + if not any([document_citations, web_citations, agent_citations, legacy_citations]): + lines.append('_No citations were recorded for this message._') + return + + if document_citations: + lines.append('#### Document Sources') + lines.append('') + for index, citation in enumerate(document_citations, start=1): + lines.append(f"{index}. **{citation.get('label', 'Document source')}**") + detail_mapping = _remove_empty_values({ + 'citation_id': citation.get('citation_id'), + 'page_number': citation.get('page_number'), + 'classification': citation.get('classification'), + 'score': citation.get('score'), + 'metadata_type': citation.get('metadata_type') + }) + _append_markdown_mapping(lines, detail_mapping, indent=1) + if citation.get('metadata_content'): + lines.append(' - **Metadata Content:**') + _append_code_block(lines, citation.get('metadata_content'), indent=' ') + lines.append('') + + if web_citations: + lines.append('#### Web Sources') + lines.append('') + for index, citation in enumerate(web_citations, start=1): + title = citation.get('title') or citation.get('label') or 'Web source' + url = citation.get('url') + if url: + lines.append(f"{index}. [{title}]({url})") + else: + lines.append(f"{index}. {title}") + lines.append('') + + if agent_citations: + lines.append('#### Tool Invocations') + lines.append('') + for index, citation in enumerate(agent_citations, start=1): + label = citation.get('tool_name') or citation.get('function_name') or f"Tool {index}" + lines.append(f"{index}. **{label}**") + detail_mapping = _remove_empty_values({ + 'function_name': citation.get('function_name'), + 'plugin_name': citation.get('plugin_name'), + 'success': citation.get('success'), + 'timestamp': citation.get('timestamp') + }) + _append_markdown_mapping(lines, detail_mapping, indent=1) + if citation.get('function_arguments') not in (None, '', [], {}): + lines.append(' - **Arguments:**') + _append_code_block(lines, citation.get('function_arguments'), indent=' ') + if citation.get('function_result') not in (None, '', [], {}): + lines.append(' - **Result:**') + _append_code_block(lines, citation.get('function_result'), indent=' ') + lines.append('') + + if legacy_citations: + lines.append('#### Legacy Citation Records') + lines.append('') + for index, citation in enumerate(legacy_citations, start=1): + lines.append(f"{index}. {citation.get('label', 'Legacy citation')}") + lines.append('') + + +def _append_markdown_mapping(lines: List[str], mapping: Dict[str, Any], indent: int = 0): + if not isinstance(mapping, dict) or not mapping: + return + + prefix = ' ' * indent + for key, value in mapping.items(): + label = _format_markdown_key(key) + if isinstance(value, dict): + lines.append(f"{prefix}- **{label}:**") + _append_markdown_mapping(lines, value, indent + 1) + elif isinstance(value, list): + if not value: + continue + if all(not isinstance(item, (dict, list)) for item in value): + lines.append(f"{prefix}- **{label}:** {', '.join(_stringify_markdown_value(item) for item in value)}") + else: + lines.append(f"{prefix}- **{label}:**") + for item in value: + if isinstance(item, dict): + lines.append(f"{prefix} -") + _append_markdown_mapping(lines, item, indent + 2) else: - parts.append(str(item)) + lines.append(f"{prefix} - {_stringify_markdown_value(item)}") + else: + lines.append(f"{prefix}- **{label}:** {_stringify_markdown_value(value)}") + + +def _append_code_block(lines: List[str], value: Any, indent: str = ''): + if isinstance(value, (dict, list)): + code_block = json.dumps(value, indent=2, ensure_ascii=False, default=str) + language = 'json' + else: + code_block = str(value) + language = 'text' + + lines.append(f"{indent}```{language}") + for line in code_block.splitlines() or ['']: + lines.append(f"{indent}{line}") + lines.append(f"{indent}```") + + +def _format_markdown_key(key: str) -> str: + return str(key).replace('_', ' ').title() + + +def _stringify_markdown_value(value: Any) -> str: + if isinstance(value, bool): + return 'Yes' if value else 'No' + return str(value) + + +def _format_tag(tag: Any) -> str: + """Format a tag or classification entry for display. + + Tags in Cosmos are stored as dicts such as + ``{'category': 'model', 'value': 'gpt-5'}`` or + ``{'category': 'participant', 'name': 'Alice', 'user_id': '...'}`` + but they can also be plain strings in older data. + """ + if isinstance(tag, dict): + category = tag.get('category', '') + # Participant tags carry a readable name / email + name = tag.get('name') or tag.get('email') or tag.get('display_name') + if name: + return f"{category}: {name}" if category else str(name) + # Document tags carry a title + title = tag.get('title') or tag.get('document_id') + if title: + return f"{category}: {title}" if category else str(title) + # Generic category/value tags + value = tag.get('value') + if value: + return f"{category}: {value}" if category else str(value) + return category or str(tag) + return str(tag) + + +def _role_to_label(role: str) -> str: + role_map = { + 'assistant': 'Assistant', + 'user': 'User', + 'system': 'System', + 'tool': 'Tool', + 'file': 'File', + 'image': 'Image', + 'safety': 'Safety', + 'blocked': 'Blocked' + } + return role_map.get(role, str(role).capitalize() or 'Message') + + +def _normalize_content(content: Any) -> str: + """Normalize message content to a plain string.""" + if isinstance(content, str): + return content + if isinstance(content, list): + parts = [] + for item in content: + if isinstance(item, dict): + if item.get('type') == 'text': + parts.append(item.get('text', '')) + elif item.get('type') == 'image_url': + parts.append('[Image]') else: parts.append(str(item)) - return '\n'.join(parts) - if isinstance(content, dict): - if content.get('type') == 'text': - return content.get('text', '') - return str(content) - return str(content) if content else '' - - def _safe_filename(title): - """Create a filesystem-safe filename from a conversation title.""" - import re - # Remove or replace unsafe characters - safe = re.sub(r'[<>:"/\\|?*]', '_', title) - safe = re.sub(r'\s+', '_', safe) - safe = safe.strip('_. ') - # Truncate to reasonable length - if len(safe) > 50: - safe = safe[:50] - return safe or 'Untitled' + else: + parts.append(str(item)) + return '\n'.join(parts) + if isinstance(content, dict): + if content.get('type') == 'text': + return content.get('text', '') + return str(content) + return str(content) if content else '' + + +def _safe_filename(title: str) -> str: + """Create a filesystem-safe filename from a conversation title.""" + safe = re.sub(r'[<>:"/\\|?*]', '_', title) + safe = re.sub(r'\s+', '_', safe) + safe = safe.strip('_. ') + if len(safe) > 50: + safe = safe[:50] + return safe or 'Untitled' + + +# --------------------------------------------------------------------------- +# PDF Export — HTML generation and PyMuPDF Story rendering +# --------------------------------------------------------------------------- + +_PDF_CSS = """ +body { + font-family: sans-serif; + font-size: 10pt; + color: #222; + line-height: 1.4; +} +h1 { + font-size: 16pt; + color: #1a1a2e; + margin-bottom: 2pt; +} +h2 { + font-size: 13pt; + color: #16213e; + margin-top: 16pt; + margin-bottom: 6pt; + border-bottom: 1px solid #ccc; + padding-bottom: 4pt; +} +h3 { + font-size: 11pt; + color: #0f3460; + margin-top: 10pt; + margin-bottom: 4pt; +} +h4 { + font-size: 10pt; + color: #333; + margin-top: 8pt; + margin-bottom: 4pt; +} +p { + margin-top: 2pt; + margin-bottom: 4pt; +} +.metadata { + font-size: 8pt; + color: #666; +} +.abstract { + background-color: #f8f9fa; + padding: 8pt; + margin-bottom: 8pt; +} +.note { + font-size: 9pt; + color: #856404; + background-color: #fff3cd; + padding: 6pt; +} +.bubble { + padding: 8pt 12pt; + margin-bottom: 8pt; +} +.bubble-header { + font-size: 8pt; + color: #444; + margin-bottom: 2pt; +} +.ts { + font-weight: normal; + color: #888; +} +.user-bubble { + background-color: #c8e0fa; + margin-left: 60pt; +} +.assistant-bubble { + background-color: #f1f0f0; + margin-right: 60pt; +} +.system-bubble { + background-color: #fff3cd; + margin-left: 30pt; + margin-right: 30pt; + font-size: 9pt; +} +.file-bubble { + background-color: #e8f5e9; + margin-right: 60pt; + font-size: 9pt; +} +.other-bubble { + background-color: #f5f5f5; + margin-left: 30pt; + margin-right: 30pt; + font-size: 9pt; +} +table { + border-collapse: collapse; + width: 100%; + font-size: 9pt; + margin-bottom: 8pt; +} +th, td { + border: 1px solid #ddd; + padding: 4pt 6pt; + text-align: left; +} +th { + background-color: #f5f5f5; + font-weight: bold; +} +pre { + background-color: #f5f5f5; + padding: 6pt; + font-size: 8pt; + font-family: monospace; +} +code { + font-family: monospace; + font-size: 9pt; + background-color: #f0f0f0; + padding: 1pt 3pt; +} +ol, ul { + margin-top: 4pt; + margin-bottom: 8pt; +} +li { + margin-bottom: 4pt; +} +small { + font-size: 8pt; + color: #666; +} +a { + color: #0066cc; +} +""" + + +def _pdf_bubble_class(role: str) -> str: + """Return the CSS class for a chat bubble based on message role.""" + role_classes = { + 'user': 'user-bubble', + 'assistant': 'assistant-bubble', + 'system': 'system-bubble', + 'file': 'file-bubble', + 'image': 'file-bubble' + } + return role_classes.get(role, 'other-bubble') + + +def _build_pdf_html_body(entry: Dict[str, Any]) -> str: + """Build the HTML body content for a single conversation PDF.""" + conversation = entry['conversation'] + messages = entry['messages'] + summary_intro = entry.get('summary_intro', {}) or {} + + transcript_messages = [m for m in messages if m.get('is_transcript_message')] + detail_messages = [m for m in messages if m.get('details')] + reference_messages = [m for m in messages if m.get('citations')] + thought_messages = [m for m in messages if m.get('thoughts')] + supplemental_messages = [m for m in messages if not m.get('is_transcript_message')] + + parts: List[str] = [] + + # --- Title and metadata --- + parts.append(f'

{_escape_html(conversation.get("title", "Untitled"))}

') + meta_items = [ + f'Last Updated: {_escape_html(str(conversation.get("last_updated", "")))}', + f'Chat Type: {_escape_html(str(conversation.get("chat_type", "personal")))}', + f'Messages: {conversation.get("message_count", len(messages))}' + ] + tags = conversation.get('tags') + if tags: + meta_items.append(f'Tags: {_escape_html(", ".join(_format_tag(t) for t in tags))}') + classification = conversation.get('classification') + if classification: + meta_items.append( + f'Classification: {_escape_html(", ".join(_format_tag(c) for c in classification))}' + ) + parts.append(f'

{"  |  ".join(meta_items)}

') + + # --- Abstract --- + if summary_intro.get('enabled') and summary_intro.get('generated') and summary_intro.get('content'): + parts.append('

Abstract

') + abstract_html = markdown2.markdown( + summary_intro.get('content', ''), + extras=['fenced-code-blocks', 'tables'] + ) + parts.append(f'
{abstract_html}
') + parts.append( + f'

Generated with ' + f'{_escape_html(str(summary_intro.get("model_deployment") or "configured model"))} on ' + f'{_escape_html(str(summary_intro.get("generated_at", "")))}

' + ) + elif summary_intro.get('enabled') and summary_intro.get('error'): + error_text = _escape_html(str(summary_intro.get('error', ''))) + parts.append( + '

A summary intro was requested, ' + 'but could not be generated for this export.
' + f'Error: {error_text}

' + ) + + # --- Transcript with chat bubbles --- + parts.append('

Transcript

') + if not transcript_messages: + parts.append( + '

No user or assistant transcript messages were available for export.

' + ) + else: + for message in transcript_messages: + role = message.get('role', '') + bubble_class = _pdf_bubble_class(role) + label = message.get('label', '') + speaker = message.get('speaker_label', '') + timestamp = message.get('timestamp', '') + content = message.get('content_text', '') or 'No content recorded.' + + parts.append(f'
') + ts_str = ( + f'  |  {_escape_html(str(timestamp))}' + if timestamp else '' + ) + parts.append( + f'

{_escape_html(label)} — ' + f'{_escape_html(speaker)}{ts_str}

' + ) + content_html = markdown2.markdown( + content, + extras=['fenced-code-blocks', 'tables', 'break-on-newline'] + ) + parts.append(content_html) + parts.append('
') + + # --- Appendix A: Conversation Metadata --- + parts.append('

Appendix A — Conversation Metadata

') + metadata_to_render = _remove_empty_values({ + 'context': conversation.get('context'), + 'classification': conversation.get('classification'), + 'strict': conversation.get('strict'), + 'is_pinned': conversation.get('is_pinned'), + 'scope_locked': conversation.get('scope_locked'), + 'locked_contexts': conversation.get('locked_contexts'), + 'message_counts_by_role': conversation.get('message_counts_by_role'), + 'citation_counts': conversation.get('citation_counts'), + 'thought_count': conversation.get('thought_count') + }) + _append_html_table(parts, metadata_to_render) + + # --- Appendix B: Message Details --- + if detail_messages: + parts.append('

Appendix B — Message Details

') + for message in detail_messages: + parts.append( + f'

{_escape_html(message.get("label", ""))} — ' + f'{_escape_html(message.get("speaker_label", ""))}

' + ) + if message.get('timestamp'): + parts.append( + f'

{_escape_html(str(message.get("timestamp")))}

' + ) + _append_html_table(parts, message.get('details', {})) + + # --- Appendix C: References --- + if reference_messages: + parts.append('

Appendix C — References

') + for message in reference_messages: + parts.append( + f'

{_escape_html(message.get("label", ""))} — ' + f'{_escape_html(message.get("speaker_label", ""))}

' + ) + if message.get('timestamp'): + parts.append( + f'

{_escape_html(str(message.get("timestamp")))}

' + ) + _append_html_citations(parts, message) + + # --- Appendix D: Processing Thoughts --- + if thought_messages: + parts.append('

Appendix D — Processing Thoughts

') + for message in thought_messages: + parts.append( + f'

{_escape_html(message.get("label", ""))} — ' + f'{_escape_html(message.get("speaker_label", ""))}

' + ) + if message.get('timestamp'): + parts.append( + f'

{_escape_html(str(message.get("timestamp")))}

' + ) + parts.append('
    ') + for thought in message.get('thoughts', []): + thought_label = (thought.get('step_type') or 'step').replace('_', ' ').title() + parts.append( + f'
  1. {_escape_html(thought_label)}: ' + f'{_escape_html(str(thought.get("content") or "No content recorded."))}' + ) + if thought.get('duration_ms') is not None: + parts.append( + f'
    Duration: {thought.get("duration_ms")} ms' + ) + if thought.get('timestamp'): + parts.append( + f'
    Timestamp: ' + f'{_escape_html(str(thought.get("timestamp")))}' + ) + if thought.get('detail'): + parts.append('
    Detail:') + _append_html_code_block(parts, thought.get('detail')) + parts.append('
  2. ') + parts.append('
') + + # --- Appendix E: Supplemental Messages --- + if supplemental_messages: + parts.append('

Appendix E — Supplemental Messages

') + for message in supplemental_messages: + parts.append( + f'

{_escape_html(message.get("label", ""))} — ' + f'{_escape_html(message.get("speaker_label", ""))}

' + ) + if message.get('timestamp'): + parts.append( + f'

{_escape_html(str(message.get("timestamp")))}

' + ) + content = message.get('content_text', '') or 'No content recorded.' + content_html = markdown2.markdown( + content, + extras=['fenced-code-blocks', 'tables', 'break-on-newline'] + ) + parts.append(content_html) + + return '\n'.join(parts) + + +def _render_pdf_bytes(body_html: str) -> bytes: + """Render HTML body content to PDF bytes using PyMuPDF Story API.""" + MEDIABOX = fitz.paper_rect("letter") + WHERE = MEDIABOX + (36, 36, -36, -36) + + story = fitz.Story(html=body_html, user_css=_PDF_CSS) + + tmp_path = None + try: + with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp: + tmp_path = tmp.name + + writer = fitz.DocumentWriter(tmp_path) + more = True + while more: + device = writer.begin_page(MEDIABOX) + more, _ = story.place(WHERE) + story.draw(device) + writer.end_page() + writer.close() + del story + del writer + + with open(tmp_path, 'rb') as f: + return f.read() + finally: + if tmp_path: + try: + os.unlink(tmp_path) + except OSError: + pass + + +def _conversation_to_pdf_bytes(entry: Dict[str, Any]) -> bytes: + """Convert a conversation export entry to PDF bytes.""" + body_html = _build_pdf_html_body(entry) + return _render_pdf_bytes(body_html) + + +def _html_body_to_pdf_bytes(body_html: str) -> bytes: + """Convert raw HTML body content to PDF bytes.""" + return _render_pdf_bytes(body_html) + + +def _append_html_table(parts: List[str], mapping: Dict[str, Any]): + """Append a key-value mapping as an HTML table.""" + if not isinstance(mapping, dict) or not mapping: + parts.append('

No data available.

') + return + + parts.append('') + parts.append('') + for key, value in mapping.items(): + label = _format_markdown_key(key) + if isinstance(value, dict): + formatted = _format_nested_html_value(value) + elif isinstance(value, list): + formatted = ( + ', '.join(_escape_html(str(item)) for item in value) + if value else 'None' + ) + elif isinstance(value, bool): + formatted = 'Yes' if value else 'No' + else: + formatted = _escape_html(str(value)) + parts.append(f'') + parts.append('
PropertyValue
{_escape_html(label)}{formatted}
') + + +def _format_nested_html_value(mapping: Dict[str, Any], depth: int = 0) -> str: + """Format a nested dict as an HTML string for table cells.""" + if not mapping: + return 'None' + + items = [] + for key, value in mapping.items(): + label = _format_markdown_key(key) + if isinstance(value, dict): + nested = _format_nested_html_value(value, depth + 1) + items.append(f'{_escape_html(label)}:
{nested}') + elif isinstance(value, list): + list_str = ( + ', '.join(_escape_html(str(v)) for v in value) + if value else 'None' + ) + items.append(f'{_escape_html(label)}: {list_str}') + elif isinstance(value, bool): + items.append(f'{_escape_html(label)}: {"Yes" if value else "No"}') + else: + items.append(f'{_escape_html(label)}: {_escape_html(str(value))}') + return '
'.join(items) + + +def _append_html_citations(parts: List[str], message: Dict[str, Any]): + """Append citation data as HTML.""" + citations = message.get('citations', []) + if not citations: + parts.append('

No citations were recorded for this message.

') + return + + doc_citations = [c for c in citations if c.get('citation_type') == 'document'] + web_citations = [c for c in citations if c.get('citation_type') == 'web'] + agent_citations = [c for c in citations if c.get('citation_type') == 'agent_tool'] + legacy_citations = [c for c in citations if c.get('citation_type') == 'legacy'] + + if doc_citations: + parts.append('

Document Sources

') + parts.append('
    ') + for citation in doc_citations: + parts.append( + f'
  1. {_escape_html(str(citation.get("label", "Document source")))}' + ) + detail_items = _remove_empty_values({ + 'citation_id': citation.get('citation_id'), + 'page_number': citation.get('page_number'), + 'classification': citation.get('classification'), + 'score': citation.get('score'), + 'metadata_type': citation.get('metadata_type') + }) + if detail_items: + detail_str = '; '.join( + f'{_format_markdown_key(k)}: {_escape_html(str(v))}' + for k, v in detail_items.items() + ) + parts.append(f'
    {detail_str}') + if citation.get('metadata_content'): + parts.append('
    Metadata Content:') + _append_html_code_block(parts, citation.get('metadata_content')) + parts.append('
  2. ') + parts.append('
') + + if web_citations: + parts.append('

Web Sources

') + parts.append('
    ') + for citation in web_citations: + title = _escape_html( + str(citation.get('title') or citation.get('label') or 'Web source') + ) + url = citation.get('url') + if url: + parts.append(f'
  1. {title}
  2. ') + else: + parts.append(f'
  3. {title}
  4. ') + parts.append('
') + + if agent_citations: + parts.append('

Tool Invocations

') + parts.append('
    ') + for citation in agent_citations: + label = _escape_html( + str(citation.get('tool_name') or citation.get('function_name') or 'Tool') + ) + parts.append(f'
  1. {label}') + detail_items = _remove_empty_values({ + 'function_name': citation.get('function_name'), + 'plugin_name': citation.get('plugin_name'), + 'success': citation.get('success'), + 'timestamp': citation.get('timestamp') + }) + if detail_items: + detail_str = '; '.join( + f'{_format_markdown_key(k)}: {_escape_html(str(v))}' + for k, v in detail_items.items() + ) + parts.append(f'
    {detail_str}') + parts.append('
  2. ') + parts.append('
') + + if legacy_citations: + parts.append('

Legacy Citation Records

') + parts.append('
    ') + for citation in legacy_citations: + parts.append( + f'
  1. {_escape_html(str(citation.get("label", "Legacy citation")))}
  2. ' + ) + parts.append('
') + + +def _append_html_code_block(parts: List[str], value: Any): + """Append a code block in HTML format.""" + if isinstance(value, (dict, list)): + code_text = json.dumps(value, indent=2, ensure_ascii=False, default=str) + else: + code_text = str(value) + parts.append(f'
{_escape_html(code_text)}
') diff --git a/application/single_app/route_backend_conversations.py b/application/single_app/route_backend_conversations.py index ed15cb91..d90d7e4a 100644 --- a/application/single_app/route_backend_conversations.py +++ b/application/single_app/route_backend_conversations.py @@ -3,11 +3,12 @@ from config import * from functions_authentication import * from functions_settings import * -from functions_conversation_metadata import get_conversation_metadata +from functions_conversation_metadata import get_conversation_metadata, update_conversation_with_metadata from flask import Response, request from functions_debug import debug_print from swagger_wrapper import swagger_route, get_auth_security from functions_activity_logging import log_conversation_creation, log_conversation_deletion, log_conversation_archival +from functions_thoughts import archive_thoughts_for_conversation, delete_thoughts_for_conversation def register_route_backend_conversations(app): @@ -430,7 +431,14 @@ def delete_conversation(conversation_id): cosmos_archived_messages_container.upsert_item(archived_doc) cosmos_messages_container.delete_item(doc['id'], partition_key=conversation_id) - + + # Archive/delete thoughts for conversation + user_id_for_thoughts = conversation_item.get('user_id') + if archiving_enabled: + archive_thoughts_for_conversation(conversation_id, user_id_for_thoughts) + else: + delete_thoughts_for_conversation(conversation_id, user_id_for_thoughts) + # Log conversation deletion before actual deletion log_conversation_deletion( user_id=conversation_item.get('user_id'), @@ -530,7 +538,13 @@ def delete_multiple_conversations(): cosmos_archived_messages_container.upsert_item(archived_message) cosmos_messages_container.delete_item(message['id'], partition_key=conversation_id) - + + # Archive/delete thoughts for conversation + if archiving_enabled: + archive_thoughts_for_conversation(conversation_id, user_id) + else: + delete_thoughts_for_conversation(conversation_id, user_id) + # Log conversation deletion before actual deletion log_conversation_deletion( user_id=user_id, @@ -798,7 +812,8 @@ def get_conversation_metadata_api(conversation_id): "is_hidden": conversation_item.get('is_hidden', False), "scope_locked": conversation_item.get('scope_locked'), "locked_contexts": conversation_item.get('locked_contexts', []), - "chat_type": conversation_item.get('chat_type', 'personal') # Default to 'personal' if chat_type is not defined (legacy conversations) + "chat_type": conversation_item.get('chat_type'), + "summary": conversation_item.get('summary') }), 200 except CosmosResourceNotFoundError: @@ -807,6 +822,95 @@ def get_conversation_metadata_api(conversation_id): print(f"Error retrieving conversation metadata: {e}") return jsonify({'error': 'Failed to retrieve conversation metadata'}), 500 + @app.route('/api/conversations//summary', methods=['POST']) + @swagger_route(security=get_auth_security()) + @login_required + @user_required + def generate_conversation_summary_api(conversation_id): + """ + Generate (or regenerate) a summary for a conversation and persist it. + + Request body (optional): + { "model_deployment": "gpt-4o" } + + Returns the generated summary dict on success. + """ + from route_backend_conversation_export import generate_conversation_summary, _normalize_content + from functions_chat import sort_messages_by_thread + + user_id = get_current_user_id() + if not user_id: + return jsonify({'error': 'User not authenticated'}), 401 + + try: + conversation_item = cosmos_conversations_container.read_item( + item=conversation_id, + partition_key=conversation_id + ) + if conversation_item.get('user_id') != user_id: + return jsonify({'error': 'Forbidden'}), 403 + except CosmosResourceNotFoundError: + return jsonify({'error': 'Conversation not found'}), 404 + except Exception as e: + debug_print(f"Error reading conversation for summary: {e}") + return jsonify({'error': 'Failed to read conversation'}), 500 + + body = request.get_json(silent=True) or {} + model_deployment = body.get('model_deployment', '') + + # Query messages for this conversation + try: + query = "SELECT * FROM c WHERE c.conversation_id = @cid ORDER BY c.timestamp ASC" + params = [{"name": "@cid", "value": conversation_id}] + raw_messages = list(cosmos_messages_container.query_items( + query=query, + parameters=params, + enable_cross_partition_query=True + )) + except Exception as e: + debug_print(f"Error querying messages for summary: {e}") + return jsonify({'error': 'Failed to query messages'}), 500 + + if not raw_messages: + return jsonify({'error': 'No messages in this conversation'}), 400 + + # Build lightweight export-style message list for the summary helper + ordered_messages = sort_messages_by_thread(raw_messages) + export_messages = [] + for msg in ordered_messages: + role = msg.get('role', 'unknown') + # Content may be a string OR a list of content parts — normalise it + content = _normalize_content(msg.get('content', '')) + speaker = 'USER' if role == 'user' else 'ASSISTANT' if role == 'assistant' else role.upper() + export_messages.append({ + 'role': role, + 'content_text': content, + 'speaker_label': speaker + }) + + message_time_start = ordered_messages[0].get('timestamp') if ordered_messages else None + message_time_end = ordered_messages[-1].get('timestamp') if ordered_messages else None + + settings = get_settings() + + try: + summary_data = generate_conversation_summary( + messages=export_messages, + conversation_title=conversation_item.get('title', 'Untitled'), + settings=settings, + model_deployment=model_deployment, + message_time_start=message_time_start, + message_time_end=message_time_end, + conversation_id=conversation_id + ) + return jsonify({'success': True, 'summary': summary_data}), 200 + + except (ValueError, RuntimeError) as known_exc: + return jsonify({'error': str(known_exc)}), 400 + except Exception as exc: + debug_print(f"Summary generation API error: {exc}") + return jsonify({'error': 'Summary generation failed'}), 500 + @app.route('/api/conversations//scope_lock', methods=['PATCH']) @swagger_route(security=get_auth_security()) @login_required diff --git a/application/single_app/route_backend_documents.py b/application/single_app/route_backend_documents.py index 28d4ef69..0e9d490b 100644 --- a/application/single_app/route_backend_documents.py +++ b/application/single_app/route_backend_documents.py @@ -7,6 +7,7 @@ from utils_cache import invalidate_personal_search_cache from functions_debug import * from functions_activity_logging import log_document_upload, log_document_metadata_update_transaction +import io import os import requests from flask import current_app @@ -72,7 +73,58 @@ def get_file_content(): filename = items_sorted[0].get('filename', 'Untitled') is_table = items_sorted[0].get('is_table', False) - debug_print(f"[GET_FILE_CONTENT] Filename: {filename}, is_table: {is_table}") + file_content_source = items_sorted[0].get('file_content_source', '') + debug_print(f"[GET_FILE_CONTENT] Filename: {filename}, is_table: {is_table}, source: {file_content_source}") + + # Handle blob-stored tabular files (enhanced citations enabled) + if file_content_source == 'blob': + blob_container = items_sorted[0].get('blob_container', '') + blob_path = items_sorted[0].get('blob_path', '') + debug_print(f"[GET_FILE_CONTENT] Blob-stored file: container={blob_container}, path={blob_path}") + + if not blob_container or not blob_path: + return jsonify({'error': 'Blob storage reference is incomplete'}), 500 + + try: + blob_service_client = CLIENTS.get("storage_account_office_docs_client") + if not blob_service_client: + return jsonify({'error': 'Blob storage client not available'}), 500 + + blob_client = blob_service_client.get_blob_client( + container=blob_container, + blob=blob_path + ) + stream = blob_client.download_blob() + blob_data = stream.readall() + + # Convert to CSV using pandas for display + file_ext = os.path.splitext(filename)[1].lower() + if file_ext == '.csv': + import pandas + df = pandas.read_csv(io.BytesIO(blob_data)) + combined_content = df.to_csv(index=False) + elif file_ext in ['.xlsx', '.xlsm']: + import pandas + df = pandas.read_excel(io.BytesIO(blob_data), engine='openpyxl') + combined_content = df.to_csv(index=False) + elif file_ext == '.xls': + import pandas + df = pandas.read_excel(io.BytesIO(blob_data), engine='xlrd') + combined_content = df.to_csv(index=False) + else: + combined_content = blob_data.decode('utf-8', errors='replace') + + debug_print(f"[GET_FILE_CONTENT] Successfully read blob content, length: {len(combined_content)}") + return jsonify({ + 'file_content': combined_content, + 'filename': filename, + 'is_table': is_table, + 'file_content_source': 'blob' + }), 200 + + except Exception as blob_err: + debug_print(f"[GET_FILE_CONTENT] Error reading from blob: {blob_err}") + return jsonify({'error': f'Error reading file from storage: {str(blob_err)}'}), 500 add_file_task_to_file_processing_log(document_id=file_id, user_id=user_id, content="Combining file content from chunks, filename: " + filename + ", is_table: " + str(is_table)) combined_parts = [] diff --git a/application/single_app/route_backend_plugins.py b/application/single_app/route_backend_plugins.py index 77aab866..f4d4dca0 100644 --- a/application/single_app/route_backend_plugins.py +++ b/application/single_app/route_backend_plugins.py @@ -32,6 +32,11 @@ from functions_debug import debug_print from json_schema_validation import validate_plugin +from functions_activity_logging import ( + log_action_creation, + log_action_update, + log_action_deletion, +) def discover_plugin_types(): # Dynamically discover allowed plugin types from available plugin classes. @@ -345,6 +350,19 @@ def set_user_plugins(): except Exception as e: debug_print(f"Error saving personal actions for user {user_id}: {e}") return jsonify({'error': 'Failed to save plugins'}), 500 + + # Log individual action activities + for plugin in filtered_plugins: + p_name = plugin.get('name', '') + p_id = plugin.get('id', '') + p_type = plugin.get('type', '') + if p_name in current_action_names: + log_action_update(user_id=user_id, action_id=p_id, action_name=p_name, action_type=p_type, scope='personal') + else: + log_action_creation(user_id=user_id, action_id=p_id, action_name=p_name, action_type=p_type, scope='personal') + for plugin_name in (current_action_names - new_plugin_names): + log_action_deletion(user_id=user_id, action_id=plugin_name, action_name=plugin_name, scope='personal') + log_event("User plugins updated", extra={"user_id": user_id, "plugins_count": len(filtered_plugins)}) return jsonify({'success': True}) @@ -360,6 +378,7 @@ def delete_user_plugin(plugin_name): if not deleted: return jsonify({'error': 'Plugin not found.'}), 404 + log_action_deletion(user_id=user_id, action_id=plugin_name, action_name=plugin_name, scope='personal') log_event("User plugin deleted", extra={"user_id": user_id, "plugin_name": plugin_name}) return jsonify({'success': True}) @@ -460,6 +479,13 @@ def create_group_action_route(): for key in ('group_id', 'last_updated', 'user_id', 'is_global', 'is_group', 'scope'): payload.pop(key, None) + # Handle endpoint based on plugin type (same logic as personal plugins) + plugin_type = payload.get('type', '') + if plugin_type in ['sql_schema', 'sql_query']: + payload.setdefault('endpoint', f'sql://{plugin_type}') + elif plugin_type == 'msgraph': + payload.setdefault('endpoint', 'https://graph.microsoft.com') + # Merge with schema to ensure all required fields are present (same as global actions) schema_dir = os.path.join(current_app.root_path, 'static', 'json', 'schemas') merged = get_merged_plugin_settings(payload.get('type'), payload, schema_dir) @@ -467,11 +493,12 @@ def create_group_action_route(): payload['additionalFields'] = merged.get('additionalFields', payload.get('additionalFields', {})) try: - saved = save_group_action(active_group, payload) + saved = save_group_action(active_group, payload, user_id=user_id) except Exception as exc: debug_print('Failed to save group action: %s', exc) return jsonify({'error': 'Unable to save action'}), 500 + log_action_creation(user_id=user_id, action_id=saved.get('id', ''), action_name=saved.get('name', ''), action_type=saved.get('type', ''), scope='group', group_id=active_group) return jsonify(saved), 201 @@ -516,6 +543,13 @@ def update_group_action_route(action_id): merged['is_group'] = True merged['id'] = existing.get('id', action_id) + # Handle endpoint based on plugin type (same logic as personal plugins) + plugin_type = merged.get('type', '') + if plugin_type in ['sql_schema', 'sql_query']: + merged.setdefault('endpoint', f'sql://{plugin_type}') + elif plugin_type == 'msgraph': + merged.setdefault('endpoint', 'https://graph.microsoft.com') + try: validate_group_action_payload(merged, partial=False) except ValueError as exc: @@ -528,11 +562,12 @@ def update_group_action_route(action_id): merged['additionalFields'] = schema_merged.get('additionalFields', merged.get('additionalFields', {})) try: - saved = save_group_action(active_group, merged) + saved = save_group_action(active_group, merged, user_id=user_id) except Exception as exc: debug_print('Failed to update group action %s: %s', action_id, exc) return jsonify({'error': 'Unable to update action'}), 500 + log_action_update(user_id=user_id, action_id=action_id, action_name=saved.get('name', ''), action_type=saved.get('type', ''), scope='group', group_id=active_group) return jsonify(saved), 200 @@ -563,6 +598,7 @@ def delete_group_action_route(action_id): if not removed: return jsonify({'error': 'Action not found'}), 404 + log_action_deletion(user_id=user_id, action_id=action_id, action_name=action_id, scope='group', group_id=active_group) return jsonify({'message': 'Action deleted'}), 200 @bpap.route('/api/user/plugins/types', methods=['GET']) @@ -588,6 +624,8 @@ def get_core_plugin_settings(): 'enable_text_plugin': bool(settings.get('enable_text_plugin', True)), 'enable_default_embedding_model_plugin': bool(settings.get('enable_default_embedding_model_plugin', True)), 'enable_fact_memory_plugin': bool(settings.get('enable_fact_memory_plugin', True)), + 'enable_tabular_processing_plugin': bool(settings.get('enable_tabular_processing_plugin', False)), + 'enable_enhanced_citations': bool(settings.get('enable_enhanced_citations', False)), 'enable_semantic_kernel': bool(settings.get('enable_semantic_kernel', False)), 'allow_user_plugins': bool(settings.get('allow_user_plugins', True)), 'allow_group_plugins': bool(settings.get('allow_group_plugins', True)), @@ -610,6 +648,7 @@ def update_core_plugin_settings(): 'enable_text_plugin', 'enable_default_embedding_model_plugin', 'enable_fact_memory_plugin', + 'enable_tabular_processing_plugin', 'allow_user_plugins', 'allow_group_plugins' ] @@ -627,6 +666,11 @@ def update_core_plugin_settings(): return jsonify({'error': f"Field '{key}' must be a boolean."}), 400 updates[key] = data[key] logging.info("Validated plugin settings: %s", updates) + # Dependency: tabular processing requires enhanced citations + if updates.get('enable_tabular_processing_plugin', False): + full_settings = get_settings() + if not full_settings.get('enable_enhanced_citations', False): + return jsonify({'error': 'Tabular Processing requires Enhanced Citations to be enabled.'}), 400 # Update settings success = update_settings(updates) if success: @@ -692,9 +736,10 @@ def add_plugin(): new_plugin['id'] = plugin_id # Save to global actions container - save_global_action(new_plugin) + save_global_action(new_plugin, user_id=str(get_current_user_id())) - log_event("Plugin added", extra={"action": "add", "plugin": new_plugin, "user": str(getattr(request, 'user', 'unknown'))}) + log_action_creation(user_id=str(get_current_user_id()), action_id=plugin_id, action_name=new_plugin.get('name', ''), action_type=new_plugin.get('type', ''), scope='global') + log_event("Plugin added", extra={"action": "add", "plugin": new_plugin, "user": str(get_current_user_id())}) # --- HOT RELOAD TRIGGER --- setattr(builtins, "kernel_reload_needed", True) @@ -753,9 +798,10 @@ def edit_plugin(plugin_name): # Delete old and save updated if 'id' in found_plugin: delete_global_action(found_plugin['id']) - save_global_action(updated_plugin) + save_global_action(updated_plugin, user_id=str(get_current_user_id())) - log_event("Plugin edited", extra={"action": "edit", "plugin": updated_plugin, "user": str(getattr(request, 'user', 'unknown'))}) + log_action_update(user_id=str(get_current_user_id()), action_id=updated_plugin.get('id', ''), action_name=plugin_name, action_type=updated_plugin.get('type', ''), scope='global') + log_event("Plugin edited", extra={"action": "edit", "plugin": updated_plugin, "user": str(get_current_user_id())}) # --- HOT RELOAD TRIGGER --- setattr(builtins, "kernel_reload_needed", True) return jsonify({'success': True}) @@ -796,7 +842,8 @@ def delete_plugin(plugin_name): if 'id' in plugin_to_delete: delete_global_action(plugin_to_delete['id']) - log_event("Plugin deleted", extra={"action": "delete", "plugin_name": plugin_name, "user": str(getattr(request, 'user', 'unknown'))}) + log_action_deletion(user_id=str(get_current_user_id()), action_id=plugin_to_delete.get('id', ''), action_name=plugin_name, action_type=plugin_to_delete.get('type', ''), scope='global') + log_event("Plugin deleted", extra={"action": "delete", "plugin_name": plugin_name, "user": str(get_current_user_id())}) # --- HOT RELOAD TRIGGER --- setattr(builtins, "kernel_reload_needed", True) return jsonify({'success': True}) @@ -928,4 +975,116 @@ def _merge_group_and_global_actions(group_actions, global_actions): return normalized_actions +@bpap.route('/api/plugins/test-sql-connection', methods=['POST']) +@swagger_route(security=get_auth_security()) +@login_required +@user_required +def test_sql_connection(): + """Test a SQL database connection using provided configuration.""" + data = request.get_json(silent=True) or {} + database_type = (data.get('database_type') or 'sqlserver').lower() + connection_method = data.get('connection_method', 'parameters') + connection_string = data.get('connection_string', '') + server = data.get('server', '') + database = data.get('database', '') + port = data.get('port', '') + driver = data.get('driver', '') + username = data.get('username', '') + password = data.get('password', '') + auth_type = data.get('auth_type', 'username_password') + timeout = min(int(data.get('timeout', 10)), 15) # Cap at 15 seconds for test + + # Map azure_sql to sqlserver + if database_type in ('azure_sql', 'azuresql'): + database_type = 'sqlserver' + + try: + if database_type == 'sqlserver': + import pyodbc + if connection_method == 'connection_string' and connection_string: + conn = pyodbc.connect(connection_string, timeout=timeout) + else: + if not server or not database: + return jsonify({'success': False, 'error': 'Server and database are required for individual parameters connection.'}), 400 + drv = driver or 'ODBC Driver 17 for SQL Server' + conn_str = f"DRIVER={{{drv}}};SERVER={server};DATABASE={database}" + if port: + conn_str += f",{port}" + if auth_type == 'username_password' and username and password: + conn_str += f";UID={username};PWD={password}" + elif auth_type == 'managed_identity': + conn_str += ";Authentication=ActiveDirectoryMsi" + elif auth_type == 'integrated': + conn_str += ";Trusted_Connection=yes" + conn = pyodbc.connect(conn_str, timeout=timeout) + cursor = conn.cursor() + cursor.execute("SELECT 1") + cursor.close() + conn.close() + return jsonify({'success': True, 'message': f'Successfully connected to {data.get("database", "database")} on {data.get("server", "server")}.'}) + + elif database_type == 'postgresql': + import psycopg2 + if connection_method == 'connection_string' and connection_string: + conn = psycopg2.connect(connection_string, connect_timeout=timeout) + else: + if not server or not database: + return jsonify({'success': False, 'error': 'Server and database are required.'}), 400 + conn_params = {'host': server, 'database': database, 'connect_timeout': timeout} + if port: + conn_params['port'] = int(port) + if username: + conn_params['user'] = username + if password: + conn_params['password'] = password + conn = psycopg2.connect(**conn_params) + cursor = conn.cursor() + cursor.execute("SELECT 1") + cursor.close() + conn.close() + return jsonify({'success': True, 'message': f'Successfully connected to PostgreSQL database {data.get("database", "")}.'}) + + elif database_type == 'mysql': + import pymysql + if connection_method == 'connection_string' and connection_string: + # pymysql doesn't natively parse connection strings, so use params + return jsonify({'success': False, 'error': 'MySQL test connection requires individual parameters, not a connection string.'}), 400 + if not server or not database: + return jsonify({'success': False, 'error': 'Server and database are required.'}), 400 + conn_params = {'host': server, 'database': database, 'connect_timeout': timeout} + if port: + conn_params['port'] = int(port) + if username: + conn_params['user'] = username + if password: + conn_params['password'] = password + conn = pymysql.connect(**conn_params) + cursor = conn.cursor() + cursor.execute("SELECT 1") + cursor.close() + conn.close() + return jsonify({'success': True, 'message': f'Successfully connected to MySQL database {data.get("database", "")}.'}) + + elif database_type == 'sqlite': + import sqlite3 + db_path = connection_string or database + if not db_path: + return jsonify({'success': False, 'error': 'Database path is required for SQLite.'}), 400 + conn = sqlite3.connect(db_path, timeout=timeout) + cursor = conn.cursor() + cursor.execute("SELECT 1") + cursor.close() + conn.close() + return jsonify({'success': True, 'message': f'Successfully connected to SQLite database.'}) + + else: + return jsonify({'success': False, 'error': f'Unsupported database type: {database_type}'}), 400 + except ImportError as e: + return jsonify({'success': False, 'error': f'Database driver not installed: {str(e)}'}), 400 + except Exception as e: + error_msg = str(e) + # Sanitize error message to avoid leaking sensitive details + if 'password' in error_msg.lower() or 'pwd' in error_msg.lower(): + error_msg = 'Authentication failed. Please check your credentials.' + return jsonify({'success': False, 'error': f'Connection failed: {error_msg}'}), 400 diff --git a/application/single_app/route_backend_thoughts.py b/application/single_app/route_backend_thoughts.py new file mode 100644 index 00000000..a7624a3f --- /dev/null +++ b/application/single_app/route_backend_thoughts.py @@ -0,0 +1,80 @@ +# route_backend_thoughts.py + +from flask import request, jsonify +from functions_authentication import login_required, user_required, get_current_user_id +from functions_settings import get_settings +from functions_thoughts import get_thoughts_for_message, get_pending_thoughts +from swagger_wrapper import swagger_route, get_auth_security +from functions_appinsights import log_event + + +def register_route_backend_thoughts(app): + + @app.route('/api/conversations//messages//thoughts', methods=['GET']) + @swagger_route(security=get_auth_security()) + @login_required + @user_required + def api_get_message_thoughts(conversation_id, message_id): + """Return persisted thoughts for a specific assistant message.""" + user_id = get_current_user_id() + if not user_id: + return jsonify({'error': 'User not authenticated'}), 401 + + settings = get_settings() + if not settings.get('enable_thoughts', False): + return jsonify({'thoughts': [], 'enabled': False}), 200 + + try: + thoughts = get_thoughts_for_message(conversation_id, message_id, user_id) + # Strip internal Cosmos fields before returning + sanitized = [] + for t in thoughts: + sanitized.append({ + 'id': t.get('id'), + 'step_index': t.get('step_index'), + 'step_type': t.get('step_type'), + 'content': t.get('content'), + 'detail': t.get('detail'), + 'duration_ms': t.get('duration_ms'), + 'timestamp': t.get('timestamp') + }) + return jsonify({'thoughts': sanitized, 'enabled': True}), 200 + except Exception as e: + log_event(f"api_get_message_thoughts error: {e}", level="WARNING") + return jsonify({'error': 'Failed to retrieve thoughts'}), 500 + + @app.route('/api/conversations//thoughts/pending', methods=['GET']) + @swagger_route(security=get_auth_security()) + @login_required + @user_required + def api_get_pending_thoughts(conversation_id): + """Return the latest in-progress thoughts for a conversation. + + Used by the non-streaming frontend to poll for thought updates + while waiting for the chat response. + """ + user_id = get_current_user_id() + if not user_id: + return jsonify({'error': 'User not authenticated'}), 401 + + settings = get_settings() + if not settings.get('enable_thoughts', False): + return jsonify({'thoughts': [], 'enabled': False}), 200 + + try: + thoughts = get_pending_thoughts(conversation_id, user_id) + sanitized = [] + for t in thoughts: + sanitized.append({ + 'id': t.get('id'), + 'step_index': t.get('step_index'), + 'step_type': t.get('step_type'), + 'content': t.get('content'), + 'detail': t.get('detail'), + 'duration_ms': t.get('duration_ms'), + 'timestamp': t.get('timestamp') + }) + return jsonify({'thoughts': sanitized, 'enabled': True}), 200 + except Exception as e: + log_event(f"api_get_pending_thoughts error: {e}", level="WARNING") + return jsonify({'error': 'Failed to retrieve pending thoughts'}), 500 diff --git a/application/single_app/route_backend_user_agreement.py b/application/single_app/route_backend_user_agreement.py index f46559ff..b76213b3 100644 --- a/application/single_app/route_backend_user_agreement.py +++ b/application/single_app/route_backend_user_agreement.py @@ -130,7 +130,7 @@ def api_accept_user_agreement(): return jsonify({"error": "workspace_id and workspace_type are required"}), 400 # Validate workspace type - valid_types = ["personal", "group", "public"] + valid_types = ["personal", "group", "public", "chat"] if workspace_type not in valid_types: return jsonify({"error": f"Invalid workspace_type. Must be one of: {', '.join(valid_types)}"}), 400 diff --git a/application/single_app/route_enhanced_citations.py b/application/single_app/route_enhanced_citations.py index c81ef225..44a35223 100644 --- a/application/single_app/route_enhanced_citations.py +++ b/application/single_app/route_enhanced_citations.py @@ -8,6 +8,7 @@ import requests import mimetypes import io +import pandas from functions_authentication import login_required, user_required, get_current_user_id from functions_settings import get_settings, enabled_required @@ -15,7 +16,7 @@ from functions_group import get_user_groups from functions_public_workspaces import get_user_visible_public_workspace_ids_from_settings from swagger_wrapper import swagger_route, get_auth_security -from config import CLIENTS, storage_account_user_documents_container_name, storage_account_group_documents_container_name, storage_account_public_documents_container_name, IMAGE_EXTENSIONS, VIDEO_EXTENSIONS, AUDIO_EXTENSIONS +from config import CLIENTS, storage_account_user_documents_container_name, storage_account_group_documents_container_name, storage_account_public_documents_container_name, storage_account_personal_chat_container_name, IMAGE_EXTENSIONS, VIDEO_EXTENSIONS, AUDIO_EXTENSIONS, TABULAR_EXTENSIONS, cosmos_messages_container from functions_debug import debug_print def register_enhanced_citations_routes(app): @@ -183,6 +184,189 @@ def get_enhanced_citation_pdf(): except Exception as e: return jsonify({"error": str(e)}), 500 + @app.route("/api/enhanced_citations/tabular", methods=["GET"]) + @swagger_route(security=get_auth_security()) + @login_required + @user_required + @enabled_required("enable_enhanced_citations") + def get_enhanced_citation_tabular(): + """ + Serve original tabular file (CSV, XLSX, etc.) from blob storage for download. + Used for chat-uploaded tabular files stored in blob storage. + """ + conversation_id = request.args.get("conversation_id") + file_id = request.args.get("file_id") + + if not conversation_id or not file_id: + return jsonify({"error": "conversation_id and file_id are required"}), 400 + + user_id = get_current_user_id() + if not user_id: + return jsonify({"error": "User not authenticated"}), 401 + + try: + # Look up the file message in Cosmos to get blob reference + query_str = """ + SELECT * FROM c + WHERE c.conversation_id = @conversation_id + AND c.id = @file_id + """ + items = list(cosmos_messages_container.query_items( + query=query_str, + parameters=[ + {'name': '@conversation_id', 'value': conversation_id}, + {'name': '@file_id', 'value': file_id} + ], + partition_key=conversation_id + )) + + if not items: + return jsonify({"error": "File not found"}), 404 + + file_msg = items[0] + file_content_source = file_msg.get('file_content_source', '') + + if file_content_source != 'blob': + return jsonify({"error": "File is not stored in blob storage"}), 400 + + blob_container = file_msg.get('blob_container', '') + blob_path = file_msg.get('blob_path', '') + filename = file_msg.get('filename', 'download') + + if not blob_container or not blob_path: + return jsonify({"error": "Blob reference is incomplete"}), 500 + + blob_service_client = CLIENTS.get("storage_account_office_docs_client") + if not blob_service_client: + return jsonify({"error": "Storage not available"}), 500 + + blob_client = blob_service_client.get_blob_client( + container=blob_container, + blob=blob_path + ) + stream = blob_client.download_blob() + content = stream.readall() + + # Determine content type + content_type, _ = mimetypes.guess_type(filename) + if not content_type: + content_type = 'application/octet-stream' + + return Response( + content, + content_type=content_type, + headers={ + 'Content-Length': str(len(content)), + 'Content-Disposition': f'attachment; filename="{filename}"', + 'Cache-Control': 'private, max-age=300', + } + ) + + except Exception as e: + debug_print(f"Error serving tabular citation: {e}") + return jsonify({"error": str(e)}), 500 + + @app.route("/api/enhanced_citations/tabular_workspace", methods=["GET"]) + @swagger_route(security=get_auth_security()) + @login_required + @user_required + @enabled_required("enable_enhanced_citations") + def get_enhanced_citation_tabular_workspace(): + """ + Serve tabular file (CSV, XLSX, etc.) from blob storage for workspace documents. + Uses doc_id to look up the document across personal, group, and public workspaces. + """ + doc_id = request.args.get("doc_id") + if not doc_id: + return jsonify({"error": "doc_id is required"}), 400 + + user_id = get_current_user_id() + if not user_id: + return jsonify({"error": "User not authenticated"}), 401 + + try: + doc_response, status_code = get_document(user_id, doc_id) + if status_code != 200: + return doc_response, status_code + + raw_doc = doc_response.get_json() + file_name = raw_doc.get('file_name', '') + ext = file_name.lower().split('.')[-1] if '.' in file_name else '' + + if ext not in ('csv', 'xlsx', 'xls', 'xlsm'): + return jsonify({"error": "File is not a tabular file"}), 400 + + return serve_enhanced_citation_content(raw_doc, force_download=True) + + except Exception as e: + debug_print(f"Error serving tabular workspace citation: {e}") + return jsonify({"error": str(e)}), 500 + + @app.route("/api/enhanced_citations/tabular_preview", methods=["GET"]) + @swagger_route(security=get_auth_security()) + @login_required + @user_required + @enabled_required("enable_enhanced_citations") + def get_enhanced_citation_tabular_preview(): + """ + Return JSON preview of a tabular file for rendering as an HTML table. + Reads the file into a pandas DataFrame and returns columns + rows as JSON. + """ + doc_id = request.args.get("doc_id") + max_rows = min(int(request.args.get("max_rows", 200)), 500) + if not doc_id: + return jsonify({"error": "doc_id is required"}), 400 + + user_id = get_current_user_id() + if not user_id: + return jsonify({"error": "User not authenticated"}), 401 + + try: + doc_response, status_code = get_document(user_id, doc_id) + if status_code != 200: + return doc_response, status_code + + raw_doc = doc_response.get_json() + file_name = raw_doc.get('file_name', '') + ext = file_name.lower().rsplit('.', 1)[-1] if '.' in file_name else '' + if ext not in ('csv', 'xlsx', 'xls', 'xlsm'): + return jsonify({"error": "File is not a tabular file"}), 400 + + # Download blob + workspace_type, container_name = determine_workspace_type_and_container(raw_doc) + blob_name = get_blob_name(raw_doc, workspace_type) + blob_service_client = CLIENTS.get("storage_account_office_docs_client") + if not blob_service_client: + return jsonify({"error": "Blob storage client not available"}), 500 + blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name) + data = blob_client.download_blob().readall() + + # Read into DataFrame + if ext == 'csv': + df = pandas.read_csv(io.BytesIO(data), keep_default_na=False, dtype=str) + elif ext in ('xlsx', 'xlsm'): + df = pandas.read_excel(io.BytesIO(data), engine='openpyxl', keep_default_na=False, dtype=str) + elif ext == 'xls': + df = pandas.read_excel(io.BytesIO(data), engine='xlrd', keep_default_na=False, dtype=str) + else: + return jsonify({"error": f"Unsupported file type: {ext}"}), 400 + + total_rows = len(df) + preview = df.head(max_rows) + + return jsonify({ + "filename": file_name, + "total_rows": total_rows, + "total_columns": len(df.columns), + "columns": list(df.columns), + "rows": preview.values.tolist(), + "truncated": total_rows > max_rows + }) + + except Exception as e: + debug_print(f"Error generating tabular preview: {e}") + return jsonify({"error": str(e)}), 500 + def get_document(user_id, doc_id): """ Get document metadata - searches across all enabled workspace types diff --git a/application/single_app/route_frontend_admin_settings.py b/application/single_app/route_frontend_admin_settings.py index 578e1545..fdd77ce5 100644 --- a/application/single_app/route_frontend_admin_settings.py +++ b/application/single_app/route_frontend_admin_settings.py @@ -98,6 +98,8 @@ def admin_settings(): settings['enable_text_plugin'] = False if 'enable_fact_memory_plugin' not in settings: settings['enable_fact_memory_plugin'] = False + if 'enable_tabular_processing_plugin' not in settings: + settings['enable_tabular_processing_plugin'] = False if 'enable_default_embedding_model_plugin' not in settings: settings['enable_default_embedding_model_plugin'] = False if 'enable_multi_agent_orchestration' not in settings: @@ -809,9 +811,10 @@ def is_valid_url(url): 'require_member_of_safety_violation_admin': require_member_of_safety_violation_admin, # ADDED 'require_member_of_feedback_admin': require_member_of_feedback_admin, # ADDED - # Feedback & Archiving + # Feedback, Archiving & Thoughts 'enable_user_feedback': form_data.get('enable_user_feedback') == 'on', 'enable_conversation_archiving': form_data.get('enable_conversation_archiving') == 'on', + 'enable_thoughts': form_data.get('enable_thoughts') == 'on', # Search (Web Search via Azure AI Foundry agent) 'enable_web_search': enable_web_search, diff --git a/application/single_app/route_frontend_chats.py b/application/single_app/route_frontend_chats.py index ca0feb1a..67a41879 100644 --- a/application/single_app/route_frontend_chats.py +++ b/application/single_app/route_frontend_chats.py @@ -237,8 +237,33 @@ def upload_file(): # Handle XML, YAML, and LOG files as text for inline chat extracted_content = extract_text_file(temp_file_path) elif file_ext_nodot in TABULAR_EXTENSIONS: - extracted_content = extract_table_file(temp_file_path, file_ext) is_table = True + + # Upload tabular file to blob storage for tabular processing plugin access + if settings.get('enable_enhanced_citations', False): + try: + blob_service_client = CLIENTS.get("storage_account_office_docs_client") + if blob_service_client: + blob_path = f"{user_id}/{conversation_id}/{filename}" + blob_client = blob_service_client.get_blob_client( + container=storage_account_personal_chat_container_name, + blob=blob_path + ) + metadata = { + "conversation_id": str(conversation_id), + "user_id": str(user_id) + } + with open(temp_file_path, "rb") as blob_f: + blob_client.upload_blob(blob_f, overwrite=True, metadata=metadata) + log_event(f"Uploaded chat tabular file to blob storage: {blob_path}") + except Exception as blob_err: + log_event( + f"Warning: Failed to upload chat tabular file to blob storage: {blob_err}", + level=logging.WARNING + ) + else: + # Only extract content for Cosmos storage when enhanced citations is disabled + extracted_content = extract_table_file(temp_file_path, file_ext) else: return jsonify({'error': 'Unsupported file type'}), 400 @@ -395,25 +420,50 @@ def upload_file(): current_thread_id = str(uuid.uuid4()) - file_message = { - 'id': file_message_id, - 'conversation_id': conversation_id, - 'role': 'file', - 'filename': filename, - 'file_content': extracted_content, - 'is_table': is_table, - 'timestamp': datetime.utcnow().isoformat(), - 'model_deployment_name': None, - 'metadata': { - 'thread_info': { - 'thread_id': current_thread_id, - 'previous_thread_id': previous_thread_id, - 'active_thread': True, - 'thread_attempt': 1 + # When enhanced citations is enabled and file is tabular, store a lightweight + # reference without file_content to avoid Cosmos DB size limits. + # The tabular data lives in blob storage and is served from there. + if is_table and settings.get('enable_enhanced_citations', False): + file_message = { + 'id': file_message_id, + 'conversation_id': conversation_id, + 'role': 'file', + 'filename': filename, + 'is_table': is_table, + 'file_content_source': 'blob', + 'blob_container': storage_account_personal_chat_container_name, + 'blob_path': f"{user_id}/{conversation_id}/{filename}", + 'timestamp': datetime.utcnow().isoformat(), + 'model_deployment_name': None, + 'metadata': { + 'thread_info': { + 'thread_id': current_thread_id, + 'previous_thread_id': previous_thread_id, + 'active_thread': True, + 'thread_attempt': 1 + } } } - } - + else: + file_message = { + 'id': file_message_id, + 'conversation_id': conversation_id, + 'role': 'file', + 'filename': filename, + 'file_content': extracted_content, + 'is_table': is_table, + 'timestamp': datetime.utcnow().isoformat(), + 'model_deployment_name': None, + 'metadata': { + 'thread_info': { + 'thread_id': current_thread_id, + 'previous_thread_id': previous_thread_id, + 'active_thread': True, + 'thread_attempt': 1 + } + } + } + # Add vision analysis if available if vision_analysis: file_message['vision_analysis'] = vision_analysis diff --git a/application/single_app/semantic_kernel_loader.py b/application/single_app/semantic_kernel_loader.py index 78f54203..c2a7cc1e 100644 --- a/application/single_app/semantic_kernel_loader.py +++ b/application/single_app/semantic_kernel_loader.py @@ -19,6 +19,7 @@ from semantic_kernel.functions.kernel_plugin import KernelPlugin from semantic_kernel_plugins.embedding_model_plugin import EmbeddingModelPlugin from semantic_kernel_plugins.fact_memory_plugin import FactMemoryPlugin +from semantic_kernel_plugins.tabular_processing_plugin import TabularProcessingPlugin from functions_settings import get_settings, get_user_settings from foundry_agent_runtime import AzureAIFoundryChatCompletionAgent from functions_appinsights import log_event, get_appinsights_logger @@ -408,6 +409,13 @@ def load_embedding_model_plugin(kernel: Kernel, settings): description="Provides text embedding functions using the configured embedding model." ) +def load_tabular_processing_plugin(kernel: Kernel): + kernel.add_plugin( + TabularProcessingPlugin(), + plugin_name="tabular_processing", + description="Provides data analysis on tabular files (CSV, XLSX) stored in blob storage. Can list files, describe schemas, aggregate columns, filter rows, run queries, and perform group-by operations." + ) + def load_core_plugins_only(kernel: Kernel, settings): """Load only core plugins for model-only conversations without agents.""" debug_print(f"[SK Loader] Loading core plugins only for model-only mode...") @@ -429,6 +437,10 @@ def load_core_plugins_only(kernel: Kernel, settings): load_text_plugin(kernel) log_event("[SK Loader] Loaded Text plugin.", level=logging.INFO) + if settings.get('enable_tabular_processing_plugin', False) and settings.get('enable_enhanced_citations', False): + load_tabular_processing_plugin(kernel) + log_event("[SK Loader] Loaded Tabular Processing plugin.", level=logging.INFO) + # =================== Semantic Kernel Initialization =================== def initialize_semantic_kernel(user_id: str=None, redis_client=None): debug_print(f"[SK Loader] Initializing Semantic Kernel and plugins...") @@ -721,6 +733,195 @@ def normalize(s): print(f"[SK Loader] Error loading agent-specific plugins: {e}") log_event(f"[SK Loader] Error loading agent-specific plugins: {e}", level=logging.ERROR, exceptionTraceback=True) + +def _extract_sql_schema_for_instructions(kernel) -> str: + """ + Check if any SQL Schema plugins are loaded in the kernel and extract their schema + information to inject into agent instructions. + + Returns a formatted schema summary string, or empty string if no SQL schema plugins found. + """ + from semantic_kernel_plugins.sql_schema_plugin import SQLSchemaPlugin + + schema_parts = [] + + try: + # Iterate through all registered plugins in the kernel + for plugin_name, plugin in kernel.plugins.items(): + # Check if the underlying plugin object is a SQLSchemaPlugin + # Kernel plugins wrap the original object - we need to check the underlying instance + plugin_obj = None + + # Try to access the underlying plugin instance + if isinstance(plugin, SQLSchemaPlugin): + plugin_obj = plugin + elif hasattr(plugin, '_plugin_instance'): + if isinstance(plugin._plugin_instance, SQLSchemaPlugin): + plugin_obj = plugin._plugin_instance + else: + # Check if any function in this plugin belongs to a SQLSchemaPlugin + for func_name, func in plugin.functions.items(): + if hasattr(func, 'method') and hasattr(func.method, '__self__'): + if isinstance(func.method.__self__, SQLSchemaPlugin): + plugin_obj = func.method.__self__ + break + + if plugin_obj is not None: + print(f"[SK Loader] Found SQL Schema plugin: {plugin_name}, fetching schema...") + try: + schema_result = plugin_obj.get_database_schema() + if schema_result and hasattr(schema_result, 'data'): + schema_data = schema_result.data + else: + schema_data = schema_result + + if isinstance(schema_data, dict) and "tables" in schema_data: + db_name = schema_data.get("database_name", "Unknown") + db_type = schema_data.get("database_type", "Unknown") + + schema_text = f"### Database: {db_name} ({db_type})\n\n" + + for table_name, table_info in schema_data["tables"].items(): + schema_name = table_info.get("schema_name", "dbo") + qualified_name = f"{schema_name}.{table_name}" if schema_name else table_name + schema_text += f"**Table: {qualified_name}**\n" + + columns = table_info.get("columns", []) + if columns: + schema_text += "| Column | Type | Nullable |\n|--------|------|----------|\n" + for col in columns: + col_name = col.get("column_name", "?") + col_type = col.get("data_type", "?") + nullable = "Yes" if col.get("is_nullable", True) else "No" + schema_text += f"| {col_name} | {col_type} | {nullable} |\n" + + pks = table_info.get("primary_keys", []) + if pks: + schema_text += f"Primary Key(s): {', '.join(pks)}\n" + + schema_text += "\n" + + # Add relationships + relationships = schema_data.get("relationships", []) + if relationships: + schema_text += "**Relationships (Foreign Keys):**\n" + for rel in relationships: + parent = rel.get("parent_table", "?") + parent_col = rel.get("parent_column", "?") + ref = rel.get("referenced_table", "?") + ref_col = rel.get("referenced_column", "?") + schema_text += f"- {parent}.{parent_col} → {ref}.{ref_col}\n" + schema_text += "\n" + + schema_parts.append(schema_text) + print(f"[SK Loader] Successfully extracted schema for {db_name}: {len(schema_data['tables'])} tables") + else: + print(f"[SK Loader] Schema data for {plugin_name} was empty or had unexpected format") + + except Exception as e: + print(f"[SK Loader] Warning: Failed to fetch schema from {plugin_name}: {e}") + log_event(f"[SK Loader] Failed to fetch SQL schema for injection: {e}", + extra={"plugin_name": plugin_name, "error": str(e)}, + level=logging.WARNING) + except Exception as e: + print(f"[SK Loader] Warning: Error iterating kernel plugins for SQL schema: {e}") + log_event(f"[SK Loader] Error iterating kernel plugins for SQL schema: {e}", + extra={"error": str(e)}, level=logging.WARNING) + + # Fallback: If no SQLSchemaPlugin was found, check for SQLQueryPlugin instances + # and create a temporary SQLSchemaPlugin from their connection config to extract schema + if not schema_parts: + from semantic_kernel_plugins.sql_query_plugin import SQLQueryPlugin as _SQLQueryPlugin + + try: + for plugin_name, plugin in kernel.plugins.items(): + query_obj = None + + if isinstance(plugin, _SQLQueryPlugin): + query_obj = plugin + elif hasattr(plugin, '_plugin_instance'): + if isinstance(plugin._plugin_instance, _SQLQueryPlugin): + query_obj = plugin._plugin_instance + else: + for func_name, func in plugin.functions.items(): + if hasattr(func, 'method') and hasattr(func.method, '__self__'): + if isinstance(func.method.__self__, _SQLQueryPlugin): + query_obj = func.method.__self__ + break + + if query_obj is not None: + print(f"[SK Loader] Fallback: Found SQLQueryPlugin '{plugin_name}', creating temporary schema extractor...") + try: + temp_manifest = { + 'type': 'sql_schema', + 'name': f'{plugin_name}_temp_schema', + 'database_type': getattr(query_obj, 'database_type', 'azure_sql'), + 'server': getattr(query_obj, 'server', ''), + 'database': getattr(query_obj, 'database', ''), + 'username': getattr(query_obj, 'username', ''), + 'password': getattr(query_obj, 'password', ''), + 'driver': getattr(query_obj, 'driver', ''), + 'connection_string': getattr(query_obj, 'connection_string', ''), + } + temp_schema = SQLSchemaPlugin(temp_manifest) + schema_result = temp_schema.get_database_schema() + if schema_result and hasattr(schema_result, 'data'): + schema_data = schema_result.data + else: + schema_data = schema_result + + if isinstance(schema_data, dict) and "tables" in schema_data: + db_name = schema_data.get("database_name", "Unknown") + db_type = schema_data.get("database_type", "Unknown") + + schema_text = f"### Database: {db_name} ({db_type})\n\n" + + for table_name, table_info in schema_data["tables"].items(): + schema_name = table_info.get("schema_name", "dbo") + qualified_name = f"{schema_name}.{table_name}" if schema_name else table_name + schema_text += f"**Table: {qualified_name}**\n" + + columns = table_info.get("columns", []) + if columns: + schema_text += "| Column | Type | Nullable |\n|--------|------|----------|\n" + for col in columns: + col_name = col.get("column_name", "?") + col_type = col.get("data_type", "?") + nullable = "Yes" if col.get("is_nullable", True) else "No" + schema_text += f"| {col_name} | {col_type} | {nullable} |\n" + + pks = table_info.get("primary_keys", []) + if pks: + schema_text += f"Primary Key(s): {', '.join(pks)}\n" + + schema_text += "\n" + + relationships = schema_data.get("relationships", []) + if relationships: + schema_text += "**Relationships (Foreign Keys):**\n" + for rel in relationships: + parent = rel.get("parent_table", "?") + parent_col = rel.get("parent_column", "?") + ref = rel.get("referenced_table", "?") + ref_col = rel.get("referenced_column", "?") + schema_text += f"- {parent}.{parent_col} → {ref}.{ref_col}\n" + schema_text += "\n" + + schema_parts.append(schema_text) + print(f"[SK Loader] Fallback: Successfully extracted schema from SQLQueryPlugin '{plugin_name}': {len(schema_data['tables'])} tables") + except Exception as e: + print(f"[SK Loader] Fallback: Failed to extract schema from SQLQueryPlugin '{plugin_name}': {e}") + log_event(f"[SK Loader] Fallback schema extraction failed", + extra={"plugin_name": plugin_name, "error": str(e)}, + level=logging.WARNING) + except Exception as e: + print(f"[SK Loader] Warning: Error in fallback SQL schema extraction: {e}") + log_event(f"[SK Loader] Error in fallback SQL schema extraction: {e}", + extra={"error": str(e)}, level=logging.WARNING) + + return "\n".join(schema_parts) + + def load_single_agent_for_kernel(kernel, agent_cfg, settings, context_obj, redis_client=None, mode_label="global"): """ DRY helper to load a single agent (default agent) for the kernel. @@ -859,6 +1060,27 @@ def load_single_agent_for_kernel(kernel, agent_cfg, settings, context_obj, redis group_id=group_id, ) + # Auto-inject SQL database schema into agent instructions if SQL plugins are loaded + try: + sql_schema_summary = _extract_sql_schema_for_instructions(kernel) + if sql_schema_summary: + agent_config["instructions"] = ( + agent_config.get("instructions", "") + + "\n\n## Available Database Schema\n" + "The following database tables and columns are available for SQL queries. " + "ALWAYS use these exact table and column names when writing SQL queries.\n\n" + + sql_schema_summary + + "\n\nWhen a user asks a question about data, use the schema above to construct " + "the appropriate SQL query and execute it using the SQL Query plugin functions. " + "Do NOT ask the user for table or column names — use the schema provided above." + ) + print(f"[SK Loader] Injected SQL schema into agent instructions for {agent_config['name']}") + except Exception as e: + print(f"[SK Loader] Warning: Failed to inject SQL schema into instructions: {e}") + log_event(f"[SK Loader] Failed to inject SQL schema into agent instructions: {e}", + extra={"agent_name": agent_config["name"], "error": str(e)}, + level=logging.WARNING) + try: kwargs = { "name": agent_config["name"], @@ -1013,6 +1235,14 @@ def load_plugins_for_kernel(kernel, plugin_manifests, settings, mode_label="glob except Exception as e: log_event(f"[SK Loader] Failed to load Fact Memory Plugin: {e}", level=logging.WARNING) + # Register Tabular Processing Plugin if enabled (requires enhanced citations) + if settings.get('enable_tabular_processing_plugin', False) and settings.get('enable_enhanced_citations', False): + try: + load_tabular_processing_plugin(kernel) + log_event("[SK Loader] Loaded Tabular Processing plugin.", level=logging.INFO) + except Exception as e: + log_event(f"[SK Loader] Failed to load Tabular Processing plugin: {e}", level=logging.WARNING) + # Conditionally load static embedding model plugin if settings.get('enable_default_embedding_model_plugin', True): try: @@ -1357,7 +1587,11 @@ def load_user_semantic_kernel(kernel: Kernel, settings, user_id: str, redis_clie load_embedding_model_plugin(kernel, settings) print(f"[SK Loader] Loaded Default Embedding Model plugin.") log_event("[SK Loader] Loaded Default Embedding Model plugin.", level=logging.INFO) - + + if settings.get('enable_tabular_processing_plugin', False) and settings.get('enable_enhanced_citations', False): + load_tabular_processing_plugin(kernel) + log_event("[SK Loader] Loaded Tabular Processing plugin.", level=logging.INFO) + # Get selected agent from user settings (this still needs to be in user settings for UI state) user_settings = get_user_settings(user_id).get('settings', {}) selected_agent = user_settings.get('selected_agent') diff --git a/application/single_app/semantic_kernel_plugins/logged_plugin_loader.py b/application/single_app/semantic_kernel_plugins/logged_plugin_loader.py index 64443633..f7c9e38c 100644 --- a/application/single_app/semantic_kernel_plugins/logged_plugin_loader.py +++ b/application/single_app/semantic_kernel_plugins/logged_plugin_loader.py @@ -80,6 +80,10 @@ def load_plugin_from_manifest(self, manifest: Dict[str, Any], # Register the plugin with the kernel self._register_plugin_with_kernel(plugin_instance, plugin_name) + # Auto-create companion SQL Schema plugin when loading a SQL Query plugin + if plugin_type == 'sql_query': + self._auto_create_companion_schema_plugin(manifest, plugin_name) + log_event( f"[Logged Plugin Loader] Successfully loaded plugin: {plugin_name}", extra={ @@ -117,8 +121,8 @@ def _create_plugin_instance(self, manifest: Dict[str, Any]): return self._create_openapi_plugin(manifest) elif plugin_type == 'python': return self._create_python_plugin(manifest) - #elif plugin_type in ['sql_schema', 'sql_query']: - # return self._create_sql_plugin(manifest) + elif plugin_type in ['sql_schema', 'sql_query']: + return self._create_sql_plugin(manifest) else: try: debug_print(f"[Logged Plugin Loader] Attempting to discover plugin type: {plugin_type}") @@ -221,6 +225,60 @@ def _create_sql_plugin(self, manifest: Dict[str, Any]): self.logger.error(f"Failed to import SQL plugin class for {plugin_type}: {e}") return None + def _auto_create_companion_schema_plugin(self, query_manifest: Dict[str, Any], query_plugin_name: str): + """ + Auto-create a companion SQLSchemaPlugin when a SQLQueryPlugin is loaded. + This ensures the agent has access to database schema information for constructing queries. + The schema plugin uses the same connection details as the query plugin. + """ + try: + # Derive schema plugin name from query plugin name + if query_plugin_name.endswith('_query'): + schema_plugin_name = query_plugin_name[:-6] + '_schema' + else: + schema_plugin_name = query_plugin_name + '_schema' + + # Check if schema plugin already exists in kernel + if schema_plugin_name in self.kernel.plugins: + log_event( + f"[Logged Plugin Loader] Companion schema plugin already exists: {schema_plugin_name}", + level=logging.DEBUG + ) + return + + # Create schema manifest from query manifest (same connection details) + schema_manifest = dict(query_manifest) + schema_manifest['type'] = 'sql_schema' + schema_manifest['name'] = schema_plugin_name + + # Create the schema plugin instance + schema_instance = SQLSchemaPlugin(schema_manifest) + + # Enable logging if supported + if hasattr(schema_instance, 'enable_invocation_logging'): + schema_instance.enable_invocation_logging(True) + + # Wrap functions if it's a BasePlugin + if isinstance(schema_instance, BasePlugin): + self._wrap_plugin_functions(schema_instance, schema_plugin_name) + + # Register with kernel + self._register_plugin_with_kernel(schema_instance, schema_plugin_name) + + log_event( + f"[Logged Plugin Loader] Auto-created companion SQL Schema plugin: {schema_plugin_name}", + extra={"query_plugin": query_plugin_name, "schema_plugin": schema_plugin_name}, + level=logging.INFO + ) + + except Exception as e: + log_event( + f"[Logged Plugin Loader] Warning: Failed to auto-create companion schema plugin", + extra={"query_plugin": query_plugin_name, "error": str(e)}, + level=logging.WARNING, + exceptionTraceback=True + ) + def _wrap_plugin_functions(self, plugin_instance, plugin_name: str): """Wrap all kernel functions in a plugin with logging.""" log_event(f"[Logged Plugin Loader] Checking logging status for plugin", diff --git a/application/single_app/semantic_kernel_plugins/plugin_invocation_logger.py b/application/single_app/semantic_kernel_plugins/plugin_invocation_logger.py index f982f0a4..bddf9cda 100644 --- a/application/single_app/semantic_kernel_plugins/plugin_invocation_logger.py +++ b/application/single_app/semantic_kernel_plugins/plugin_invocation_logger.py @@ -11,6 +11,7 @@ import logging import functools import inspect +import threading from typing import Any, Dict, List, Optional, Callable from datetime import datetime from dataclasses import dataclass, asdict @@ -51,24 +52,29 @@ def __init__(self): self.invocations: List[PluginInvocation] = [] self.max_history = 1000 # Keep last 1000 invocations in memory self.logger = get_appinsights_logger() or logging.getLogger(__name__) + self._callbacks: Dict[str, List[Callable[[PluginInvocation], None]]] = {} + self._callback_lock = threading.Lock() def log_invocation(self, invocation: PluginInvocation): """Log a plugin invocation to Application Insights and local history.""" # Add to local history self.invocations.append(invocation) - + # Trim history if needed if len(self.invocations) > self.max_history: self.invocations = self.invocations[-self.max_history:] - + # Enhanced terminal logging self._log_to_terminal(invocation) - + # Log to Application Insights self._log_to_appinsights(invocation) - + # Log to standard logging self._log_to_standard(invocation) + + # Fire registered thought callbacks + self._fire_callbacks(invocation) def _log_to_terminal(self, invocation: PluginInvocation): """Log detailed invocation information to terminal.""" @@ -277,6 +283,34 @@ def clear_history(self): """Clear the invocation history.""" self.invocations.clear() + def register_callback(self, key, callback): + """Register a callback fired on each plugin invocation for the given key. + + Args: + key: A string key, typically f"{user_id}:{conversation_id}". + callback: Called with the PluginInvocation after it is logged. + """ + with self._callback_lock: + if key not in self._callbacks: + self._callbacks[key] = [] + self._callbacks[key].append(callback) + + def deregister_callbacks(self, key): + """Remove all callbacks for the given key.""" + with self._callback_lock: + self._callbacks.pop(key, None) + + def _fire_callbacks(self, invocation): + """Fire matching callbacks for this invocation's user+conversation.""" + key = f"{invocation.user_id}:{invocation.conversation_id}" + with self._callback_lock: + callbacks = list(self._callbacks.get(key, [])) + for cb in callbacks: + try: + cb(invocation) + except Exception as e: + log_event(f"Plugin invocation callback error: {e}", level="WARNING") + # Global instance _plugin_logger = PluginInvocationLogger() diff --git a/application/single_app/semantic_kernel_plugins/sql_query_plugin.py b/application/single_app/semantic_kernel_plugins/sql_query_plugin.py index ccad030f..084c4c9b 100644 --- a/application/single_app/semantic_kernel_plugins/sql_query_plugin.py +++ b/application/single_app/semantic_kernel_plugins/sql_query_plugin.py @@ -176,11 +176,12 @@ def metadata(self) -> Dict[str, Any]: user_desc = self._metadata.get("description", f"SQL Query plugin for {self.database_type} database") api_desc = ( "This plugin executes SQL queries against databases and returns structured results. " - "It supports SQL Server, PostgreSQL, MySQL, and SQLite databases. The plugin includes " - "query sanitization, validation, and security features including parameterized queries, " - "read-only mode, result limiting, and timeout protection. It automatically cleans queries " - "from unnecessary characters and formats results for easy consumption by AI agents. " - "The plugin handles database-specific SQL variations and connection management." + "It supports SQL Server, PostgreSQL, MySQL, and SQLite databases. " + "WORKFLOW: Before executing any query, you MUST first use the SQL Schema plugin to discover " + "available tables, column names, data types, and relationships. Then construct valid SQL queries " + "using the discovered schema with correct fully-qualified table names (e.g., dbo.TableName). " + "The plugin includes query sanitization, validation, and security features including " + "parameterized queries, read-only mode, result limiting, and timeout protection." ) full_desc = f"{user_desc}\n\n{api_desc}" @@ -215,14 +216,24 @@ def metadata(self) -> Dict[str, Any]: {"name": "query", "type": "str", "description": "The SQL query to validate", "required": True} ], "returns": {"type": "ResultWithMetadata", "description": "Validation result with any issues found"} + }, + { + "name": "query_database", + "description": "Execute a SQL query to answer a question about the database", + "parameters": [ + {"name": "question", "type": "str", "description": "The natural language question being answered", "required": True}, + {"name": "query", "type": "str", "description": "The SQL query to execute", "required": True}, + {"name": "max_rows", "type": "int", "description": "Maximum number of rows to return (overrides default)", "required": False} + ], + "returns": {"type": "ResultWithMetadata", "description": "Query results with columns, data, and original question context"} } ] } def get_functions(self) -> List[str]: - return ["execute_query", "execute_scalar", "validate_query"] + return ["execute_query", "execute_scalar", "validate_query", "query_database"] - @kernel_function(description="Execute a SQL query and return results") + @kernel_function(description="Execute a SQL query against the database and return results as structured data with columns and rows. If the database schema is provided in your instructions, use those exact table and column names to construct valid SQL queries. If no schema is available in your instructions, call get_database_schema or get_table_list from the SQL Schema plugin to discover tables first. Always use fully qualified table names (e.g., dbo.TableName) when available. Results are limited by max_rows to prevent excessive data transfer.") @plugin_function_logger("SQLQueryPlugin") def execute_query( self, @@ -301,7 +312,7 @@ def execute_query( } return ResultWithMetadata(error_result, self.metadata) - @kernel_function(description="Execute a query that returns a single value") + @kernel_function(description="Execute a query that returns a single scalar value (e.g., COUNT, SUM, MAX, MIN). If the database schema is provided in your instructions, use it directly to construct the query. Otherwise, call get_database_schema from the SQL Schema plugin first to discover table and column names.") @plugin_function_logger("SQLQueryPlugin") def execute_scalar( self, @@ -360,7 +371,7 @@ def execute_scalar( } return ResultWithMetadata(error_result, self.metadata) - @kernel_function(description="Validate a SQL query without executing it") + @kernel_function(description="Validate a SQL query for syntax correctness and safety without executing it. Use this to pre-check complex queries before execution, especially when constructing multi-table JOINs or complex WHERE clauses.") @plugin_function_logger("SQLQueryPlugin") def validate_query(self, query: str) -> ResultWithMetadata: """Validate a SQL query without executing it""" @@ -380,6 +391,80 @@ def validate_query(self, query: str) -> ResultWithMetadata: } return ResultWithMetadata(error_result, self.metadata) + @kernel_function(description="Execute a SQL query to answer a question about the database. This is a convenience function that executes a SQL query and returns results along with the original question for context. If the database schema is provided in your instructions, use those table and column names directly to construct the query. Otherwise, first call get_database_schema from the SQL Schema plugin to discover the schema. Then construct the appropriate SQL query and provide it along with the original question.") + @plugin_function_logger("SQLQueryPlugin") + def query_database( + self, + question: str, + query: str, + max_rows: Optional[int] = None + ) -> ResultWithMetadata: + """Execute a SQL query to answer a specific question about the database""" + try: + # Clean and validate the query + cleaned_query = self._clean_query(query) + validation_result = self._validate_query(cleaned_query) + + if not validation_result["is_valid"]: + raise ValueError(f"Invalid query: {validation_result['issues']}") + + conn = self._get_connection() + cursor = conn.cursor() + + # Set query timeout + if hasattr(cursor, 'settimeout'): + cursor.settimeout(self.timeout) + + cursor.execute(cleaned_query) + + # Get column names + if hasattr(cursor, 'description') and cursor.description: + columns = [desc[0] for desc in cursor.description] + else: + columns = [] + + # Fetch results with row limit + effective_max_rows = max_rows or self.max_rows + + if self.database_type == 'sqlite': + rows = cursor.fetchall() + if len(rows) > effective_max_rows: + rows = rows[:effective_max_rows] + results = [dict(row) for row in rows] + else: + rows = cursor.fetchmany(effective_max_rows) + results = [] + for row in rows: + if isinstance(row, (list, tuple)): + results.append(dict(zip(columns, row))) + else: + results.append(row) + + # Prepare result data with question context + result_data = { + "question": question, + "columns": columns, + "data": results, + "row_count": len(results), + "is_truncated": len(results) >= effective_max_rows, + "query": cleaned_query + } + + log_event(f"[SQLQueryPlugin] query_database executed successfully, returned {len(results)} rows", extra={"question": question}) + return ResultWithMetadata(result_data, self.metadata) + + except Exception as e: + log_event(f"[SQLQueryPlugin] Error in query_database: {e}", extra={"question": question}) + error_result = { + "error": str(e), + "question": question, + "query": query, + "columns": [], + "data": [], + "row_count": 0 + } + return ResultWithMetadata(error_result, self.metadata) + def _clean_query(self, query: str) -> str: """Clean query from unnecessary characters and formatting""" if not query: diff --git a/application/single_app/semantic_kernel_plugins/sql_schema_plugin.py b/application/single_app/semantic_kernel_plugins/sql_schema_plugin.py index 01d89aa2..7d5737f3 100644 --- a/application/single_app/semantic_kernel_plugins/sql_schema_plugin.py +++ b/application/single_app/semantic_kernel_plugins/sql_schema_plugin.py @@ -165,11 +165,11 @@ def metadata(self) -> Dict[str, Any]: user_desc = self._metadata.get("description", f"SQL Schema plugin for {self.database_type} database") api_desc = ( "This plugin connects to SQL databases and extracts schema information including tables, columns, " - "data types, primary keys, foreign keys, and relationships. It supports SQL Server, PostgreSQL, " - "MySQL, and SQLite databases. The plugin provides structured schema data that can be used by " - "AI agents to understand database structure and generate appropriate SQL queries. " - "Authentication supports connection strings, username/password, and integrated authentication. " - "The plugin handles database-specific SQL variations for schema extraction." + "data types, primary keys, foreign keys, and relationships. WORKFLOW: ALWAYS call get_database_schema " + "or get_table_list FIRST before executing any SQL queries via the SQL Query plugin. This ensures " + "you have accurate table names, column names, and relationship information to construct valid queries. " + "It supports SQL Server, PostgreSQL, MySQL, and SQLite databases. " + "Authentication supports connection strings, username/password, and integrated authentication." ) full_desc = f"{user_desc}\n\n{api_desc}" @@ -219,7 +219,7 @@ def get_functions(self) -> List[str]: return ["get_database_schema", "get_table_schema", "get_table_list", "get_relationships"] @plugin_function_logger("SQLSchemaPlugin") - @kernel_function(description="Get complete database schema including all tables, columns, and relationships") + @kernel_function(description="Get complete database schema including all tables, columns, data types, primary keys, foreign keys, and relationships. If the database schema is already provided in your instructions, use that directly and do NOT call this function. Only call this function if you need to discover the schema and it was not already provided. The returned schema should be used to construct valid SQL queries with the correct fully-qualified table names (e.g., dbo.TableName) and column references.") def get_database_schema( self, include_system_tables: bool = False, @@ -255,24 +255,26 @@ def get_database_schema( # Get schema for each table for table in tables: - if isinstance(table, tuple) and len(table) >= 2: + try: + # Robust row parsing — works with pyodbc.Row, tuple, list, etc. table_name = table[0] - schema_name = table[1] - qualified_table_name = f"{schema_name}.{table_name}" - else: - table_name = table[0] if isinstance(table, tuple) else table + schema_name = table[1] if len(table) >= 2 else None + qualified_table_name = f"{schema_name}.{table_name}" if schema_name else str(table_name) + except (TypeError, IndexError): + table_name = str(table) schema_name = None qualified_table_name = table_name try: - table_schema = self._get_table_schema_data(cursor, table_name, schema_name) - schema_data["tables"][table_name] = table_schema - print(f"[SQLSchemaPlugin] Got schema for table: {qualified_table_name}") + table_schema = self._get_table_schema_data(cursor, str(table_name), str(schema_name) if schema_name else None) + schema_data["tables"][str(table_name)] = table_schema + print(f"[SQLSchemaPlugin] Got schema for table: {qualified_table_name} ({len(table_schema.get('columns', []))} columns)") except Exception as e: print(f"[SQLSchemaPlugin] Error getting schema for table {qualified_table_name}: {e}") log_event(f"[SQLSchemaPlugin] Error getting table schema", extra={ "table_name": qualified_table_name, - "error": str(e) + "error": str(e), + "raw_row": repr(table) }) # Get relationships @@ -333,7 +335,7 @@ def get_database_schema( log_event(f"[SQLSchemaPlugin] Error getting database schema: {e}") raise - @kernel_function(description="Get detailed schema for a specific table") + @kernel_function(description="Get the detailed schema (column names, data types, constraints) for a specific table. If the database schema is already provided in your instructions, use that directly instead of calling this function. Only call this if you need details for a specific table not already in your instructions.") @plugin_function_logger("SQLSchemaPlugin") def get_table_schema(self, table_name: str) -> ResultWithMetadata: """Get detailed schema for a specific table""" @@ -350,7 +352,7 @@ def get_table_schema(self, table_name: str) -> ResultWithMetadata: log_event(f"[SQLSchemaPlugin] Error getting table schema for {table_name}: {e}") raise - @kernel_function(description="Get list of all tables in the database") + @kernel_function(description="Return the names of all tables in the database. If the database schema is already provided in your instructions, use that directly instead of calling this function. Only call this if you need to discover available tables and they are not already listed in your instructions.") @plugin_function_logger("SQLSchemaPlugin") def get_table_list( self, @@ -368,14 +370,14 @@ def get_table_list( table_list = [] for table_row in tables: - if isinstance(table_row, (list, tuple)): + try: table_info = { - "table_name": table_row[0], - "schema": table_row[1] if len(table_row) > 1 else None, - "table_type": table_row[2] if len(table_row) > 2 else "TABLE" + "table_name": str(table_row[0]), + "schema": str(table_row[1]) if len(table_row) > 1 else None, + "table_type": str(table_row[2]) if len(table_row) > 2 else "TABLE" } - else: - table_info = {"table_name": table_row, "schema": None, "table_type": "TABLE"} + except (TypeError, IndexError): + table_info = {"table_name": str(table_row), "schema": None, "table_type": "TABLE"} table_list.append(table_info) log_event(f"[SQLSchemaPlugin] Retrieved {len(table_list)} tables") @@ -385,7 +387,7 @@ def get_table_list( log_event(f"[SQLSchemaPlugin] Error getting table list: {e}") raise - @kernel_function(description="Get foreign key relationships between tables") + @kernel_function(description="Get foreign key relationships between tables. If the database schema and relationships are already provided in your instructions, use those directly instead of calling this function. Only call this if you need relationship details not already in your instructions.") def get_relationships(self, table_name: Optional[str] = None) -> ResultWithMetadata: """Get foreign key relationships between tables""" try: @@ -402,17 +404,20 @@ def get_relationships(self, table_name: Optional[str] = None) -> ResultWithMetad raise def _get_tables_query(self, include_system_tables: bool, table_filter: Optional[str]) -> str: - """Get database-specific query for listing tables""" + """Get database-specific query for listing tables. + Uses sys.tables/sys.schemas for SQL Server (more reliable than INFORMATION_SCHEMA + in Azure SQL environments with restricted permissions).""" if self.database_type == 'sqlserver': base_query = """ - SELECT TABLE_NAME, TABLE_SCHEMA, TABLE_TYPE - FROM INFORMATION_SCHEMA.TABLES - WHERE TABLE_TYPE = 'BASE TABLE' + SELECT t.name AS TABLE_NAME, s.name AS TABLE_SCHEMA, 'BASE TABLE' AS TABLE_TYPE + FROM sys.tables t + INNER JOIN sys.schemas s ON t.schema_id = s.schema_id + WHERE t.type = 'U' """ if not include_system_tables: - base_query += " AND TABLE_SCHEMA NOT IN ('sys', 'information_schema')" + base_query += " AND s.name NOT IN ('sys', 'information_schema')" if table_filter: - base_query += f" AND TABLE_NAME LIKE '{table_filter.replace('*', '%')}'" + base_query += f" AND t.name LIKE '{table_filter.replace('*', '%')}'" return base_query elif self.database_type == 'postgresql': @@ -467,22 +472,30 @@ def _get_table_schema_data(self, cursor, table_name: str, schema_name: str = Non if pk_query: cursor.execute(pk_query) pks = cursor.fetchall() - schema_data["primary_keys"] = [pk[0] if isinstance(pk, (list, tuple)) else pk for pk in pks] + schema_data["primary_keys"] = [str(pk[0]) for pk in pks] return schema_data def _get_columns_query(self, table_name: str, schema_name: str = None) -> str: - """Get database-specific query for table columns""" + """Get database-specific query for table columns. + Uses sys.columns/sys.types for SQL Server (consistent with sys.tables used for enumeration).""" if self.database_type == 'sqlserver': - where_clause = f"WHERE TABLE_NAME = '{table_name}'" - if schema_name: - where_clause += f" AND TABLE_SCHEMA = '{schema_name}'" + schema_filter = f"AND s.name = '{schema_name}'" if schema_name else "" return f""" - SELECT COLUMN_NAME, DATA_TYPE, IS_NULLABLE, COLUMN_DEFAULT, - CHARACTER_MAXIMUM_LENGTH, NUMERIC_PRECISION, NUMERIC_SCALE - FROM INFORMATION_SCHEMA.COLUMNS - {where_clause} - ORDER BY ORDINAL_POSITION + SELECT + c.name AS COLUMN_NAME, + TYPE_NAME(c.user_type_id) AS DATA_TYPE, + CASE WHEN c.is_nullable = 1 THEN 'YES' ELSE 'NO' END AS IS_NULLABLE, + dc.definition AS COLUMN_DEFAULT, + c.max_length AS CHARACTER_MAXIMUM_LENGTH, + c.precision AS NUMERIC_PRECISION, + c.scale AS NUMERIC_SCALE + FROM sys.columns c + INNER JOIN sys.tables t ON c.object_id = t.object_id + INNER JOIN sys.schemas s ON t.schema_id = s.schema_id + LEFT JOIN sys.default_constraints dc ON c.default_object_id = dc.object_id + WHERE t.name = '{table_name}' {schema_filter} + ORDER BY c.column_id """ elif self.database_type == 'postgresql': return f""" @@ -498,16 +511,18 @@ def _get_columns_query(self, table_name: str, schema_name: str = None) -> str: return f"PRAGMA table_info({table_name})" def _get_primary_keys_query(self, table_name: str, schema_name: str = None) -> Optional[str]: - """Get database-specific query for primary keys""" + """Get database-specific query for primary keys. + Uses sys.indexes/sys.index_columns for SQL Server (consistent with sys.tables).""" if self.database_type == 'sqlserver': - where_clause = f"WHERE TABLE_NAME = '{table_name}'" - if schema_name: - where_clause += f" AND TABLE_SCHEMA = '{schema_name}'" + schema_filter = f"AND s.name = '{schema_name}'" if schema_name else "" return f""" - SELECT COLUMN_NAME - FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE - {where_clause} - AND CONSTRAINT_NAME LIKE 'PK_%' + SELECT c.name AS COLUMN_NAME + FROM sys.index_columns ic + INNER JOIN sys.columns c ON ic.object_id = c.object_id AND ic.column_id = c.column_id + INNER JOIN sys.indexes i ON ic.object_id = i.object_id AND ic.index_id = i.index_id + INNER JOIN sys.tables t ON i.object_id = t.object_id + INNER JOIN sys.schemas s ON t.schema_id = s.schema_id + WHERE i.is_primary_key = 1 AND t.name = '{table_name}' {schema_filter} """ elif self.database_type == 'postgresql': return f""" diff --git a/application/single_app/semantic_kernel_plugins/tabular_processing_plugin.py b/application/single_app/semantic_kernel_plugins/tabular_processing_plugin.py new file mode 100644 index 00000000..a525250b --- /dev/null +++ b/application/single_app/semantic_kernel_plugins/tabular_processing_plugin.py @@ -0,0 +1,515 @@ +# tabular_processing_plugin.py +""" +TabularProcessingPlugin for Semantic Kernel: provides data analysis operations +on tabular files (CSV, XLSX, XLS, XLSM) stored in Azure Blob Storage. + +Works with workspace documents (user-documents, group-documents, public-documents) +and chat-uploaded documents (personal-chat container). +""" +import asyncio +import io +import json +import logging +import pandas +from typing import Annotated, Optional, List +from semantic_kernel.functions import kernel_function +from semantic_kernel_plugins.plugin_invocation_logger import plugin_function_logger +from functions_appinsights import log_event +from config import ( + CLIENTS, + TABULAR_EXTENSIONS, + storage_account_user_documents_container_name, + storage_account_personal_chat_container_name, + storage_account_group_documents_container_name, + storage_account_public_documents_container_name, +) + + +class TabularProcessingPlugin: + """Provides data analysis functions on tabular files stored in blob storage.""" + + SUPPORTED_EXTENSIONS = {'.csv', '.xlsx', '.xls', '.xlsm'} + + def __init__(self): + self._df_cache = {} # Per-instance cache: (container, blob_name) -> DataFrame + + def _get_blob_service_client(self): + """Get the blob service client from CLIENTS cache.""" + client = CLIENTS.get("storage_account_office_docs_client") + if not client: + raise RuntimeError("Blob storage client not available. Enhanced citations must be enabled.") + return client + + def _list_tabular_blobs(self, container_name: str, prefix: str) -> List[str]: + """List all tabular file blobs under a given prefix.""" + client = self._get_blob_service_client() + container_client = client.get_container_client(container_name) + blobs = [] + for blob in container_client.list_blobs(name_starts_with=prefix): + name_lower = blob['name'].lower() + if any(name_lower.endswith(ext) for ext in self.SUPPORTED_EXTENSIONS): + blobs.append(blob['name']) + return blobs + + def _read_tabular_blob_to_dataframe(self, container_name: str, blob_name: str) -> pandas.DataFrame: + """Download a blob and read it into a pandas DataFrame. Uses per-instance cache.""" + cache_key = (container_name, blob_name) + if cache_key in self._df_cache: + log_event(f"[TabularProcessingPlugin] Cache hit for {blob_name}", level=logging.DEBUG) + return self._df_cache[cache_key].copy() + + client = self._get_blob_service_client() + blob_client = client.get_blob_client(container=container_name, blob=blob_name) + stream = blob_client.download_blob() + data = stream.readall() + + name_lower = blob_name.lower() + if name_lower.endswith('.csv'): + df = pandas.read_csv(io.BytesIO(data), keep_default_na=False, dtype=str) + elif name_lower.endswith('.xlsx') or name_lower.endswith('.xlsm'): + df = pandas.read_excel(io.BytesIO(data), engine='openpyxl', keep_default_na=False, dtype=str) + elif name_lower.endswith('.xls'): + df = pandas.read_excel(io.BytesIO(data), engine='xlrd', keep_default_na=False, dtype=str) + else: + raise ValueError(f"Unsupported tabular file type: {blob_name}") + + self._df_cache[cache_key] = df + log_event(f"[TabularProcessingPlugin] Cached DataFrame for {blob_name} ({len(df)} rows)", level=logging.DEBUG) + return df.copy() + + def _try_numeric_conversion(self, df: pandas.DataFrame) -> pandas.DataFrame: + """Attempt to convert string columns to numeric where possible.""" + for col in df.columns: + try: + df[col] = pandas.to_numeric(df[col]) + except (ValueError, TypeError): + pass + return df + + def _resolve_blob_location(self, user_id: str, conversation_id: str, filename: str, source: str, + group_id: str = None, public_workspace_id: str = None) -> tuple: + """Resolve container name and blob path from source type.""" + source = source.lower().strip() + if source == 'chat': + container = storage_account_personal_chat_container_name + blob_path = f"{user_id}/{conversation_id}/{filename}" + elif source == 'workspace': + container = storage_account_user_documents_container_name + blob_path = f"{user_id}/{filename}" + elif source == 'group': + if not group_id: + raise ValueError("group_id is required for source='group'") + container = storage_account_group_documents_container_name + blob_path = f"{group_id}/{filename}" + elif source == 'public': + if not public_workspace_id: + raise ValueError("public_workspace_id is required for source='public'") + container = storage_account_public_documents_container_name + blob_path = f"{public_workspace_id}/{filename}" + else: + raise ValueError(f"Unknown source '{source}'. Use 'workspace', 'chat', 'group', or 'public'.") + return container, blob_path + + def _resolve_blob_location_with_fallback(self, user_id: str, conversation_id: str, filename: str, source: str, + group_id: str = None, public_workspace_id: str = None) -> tuple: + """Try primary source first, then fall back to other containers if blob not found.""" + source = source.lower().strip() + attempts = [] + + # Primary attempt based on specified source + try: + primary = self._resolve_blob_location(user_id, conversation_id, filename, source, group_id, public_workspace_id) + attempts.append(primary) + except ValueError: + pass + + # Fallback attempts in priority order (skip the primary source) + if source != 'workspace': + attempts.append((storage_account_user_documents_container_name, f"{user_id}/{filename}")) + if source != 'group' and group_id: + attempts.append((storage_account_group_documents_container_name, f"{group_id}/{filename}")) + if source != 'public' and public_workspace_id: + attempts.append((storage_account_public_documents_container_name, f"{public_workspace_id}/{filename}")) + if source != 'chat': + attempts.append((storage_account_personal_chat_container_name, f"{user_id}/{conversation_id}/{filename}")) + + client = self._get_blob_service_client() + for container, blob_path in attempts: + try: + blob_client = client.get_blob_client(container=container, blob=blob_path) + if blob_client.exists(): + log_event(f"[TabularProcessingPlugin] Found blob at {container}/{blob_path}", level=logging.DEBUG) + return container, blob_path + except Exception: + continue + + # If nothing found, return primary for the original error message + if attempts: + return attempts[0] + raise ValueError(f"Could not resolve blob location for {filename}") + + @kernel_function( + description=( + "List all tabular data files available for a user. Checks workspace documents " + "(user-documents container), chat-uploaded documents (personal-chat container), " + "and optionally group or public workspace documents. " + "Returns a JSON list of available files with their source." + ), + name="list_tabular_files" + ) + @plugin_function_logger("TabularProcessingPlugin") + async def list_tabular_files( + self, + user_id: Annotated[str, "The user ID (from Scope ID in Conversation Metadata)"], + conversation_id: Annotated[str, "The conversation ID (from Conversation Metadata)"], + group_id: Annotated[Optional[str], "Group ID (for group workspace documents)"] = None, + public_workspace_id: Annotated[Optional[str], "Public workspace ID (for public workspace documents)"] = None, + ) -> Annotated[str, "JSON list of available tabular files"]: + """List all tabular files available for the user across all accessible containers.""" + def _sync_work(): + results = [] + try: + workspace_prefix = f"{user_id}/" + workspace_blobs = self._list_tabular_blobs( + storage_account_user_documents_container_name, workspace_prefix + ) + for blob in workspace_blobs: + filename = blob.split('/')[-1] + results.append({ + "filename": filename, + "blob_path": blob, + "source": "workspace", + "container": storage_account_user_documents_container_name + }) + except Exception as e: + log_event(f"[TabularProcessingPlugin] Error listing workspace blobs: {e}", level=logging.WARNING) + + try: + chat_prefix = f"{user_id}/{conversation_id}/" + chat_blobs = self._list_tabular_blobs( + storage_account_personal_chat_container_name, chat_prefix + ) + for blob in chat_blobs: + filename = blob.split('/')[-1] + results.append({ + "filename": filename, + "blob_path": blob, + "source": "chat", + "container": storage_account_personal_chat_container_name + }) + except Exception as e: + log_event(f"[TabularProcessingPlugin] Error listing chat blobs: {e}", level=logging.WARNING) + + if group_id: + try: + group_prefix = f"{group_id}/" + group_blobs = self._list_tabular_blobs( + storage_account_group_documents_container_name, group_prefix + ) + for blob in group_blobs: + filename = blob.split('/')[-1] + results.append({ + "filename": filename, + "blob_path": blob, + "source": "group", + "container": storage_account_group_documents_container_name + }) + except Exception as e: + log_event(f"[TabularProcessingPlugin] Error listing group blobs: {e}", level=logging.WARNING) + + if public_workspace_id: + try: + public_prefix = f"{public_workspace_id}/" + public_blobs = self._list_tabular_blobs( + storage_account_public_documents_container_name, public_prefix + ) + for blob in public_blobs: + filename = blob.split('/')[-1] + results.append({ + "filename": filename, + "blob_path": blob, + "source": "public", + "container": storage_account_public_documents_container_name + }) + except Exception as e: + log_event(f"[TabularProcessingPlugin] Error listing public blobs: {e}", level=logging.WARNING) + + return json.dumps(results, indent=2) + return await asyncio.to_thread(_sync_work) + + @kernel_function( + description=( + "Get a summary of a tabular file including column names, row count, data types, " + "and a preview of the first few rows." + ), + name="describe_tabular_file" + ) + @plugin_function_logger("TabularProcessingPlugin") + async def describe_tabular_file( + self, + user_id: Annotated[str, "The user ID (from Scope ID in Conversation Metadata)"], + conversation_id: Annotated[str, "The conversation ID (from Conversation Metadata)"], + filename: Annotated[str, "The filename of the tabular file"], + source: Annotated[str, "Source: 'workspace', 'chat', 'group', or 'public'"] = "chat", + group_id: Annotated[Optional[str], "Group ID (for group workspace documents)"] = None, + public_workspace_id: Annotated[Optional[str], "Public workspace ID (for public workspace documents)"] = None, + ) -> Annotated[str, "JSON summary of the tabular file"]: + """Get schema and preview of a tabular file.""" + def _sync_work(): + try: + container, blob_path = self._resolve_blob_location( + user_id, conversation_id, filename, source, + group_id=group_id, public_workspace_id=public_workspace_id + ) + df = self._read_tabular_blob_to_dataframe(container, blob_path) + df_numeric = self._try_numeric_conversion(df.copy()) + + summary = { + "filename": filename, + "row_count": len(df), + "column_count": len(df.columns), + "columns": list(df.columns), + "dtypes": {col: str(dtype) for col, dtype in df_numeric.dtypes.items()}, + "preview": df.head(5).to_dict(orient='records'), + "null_counts": df.isnull().sum().to_dict() + } + return json.dumps(summary, indent=2, default=str) + except Exception as e: + log_event(f"[TabularProcessingPlugin] Error describing file: {e}", level=logging.WARNING) + return json.dumps({"error": str(e)}) + return await asyncio.to_thread(_sync_work) + + @kernel_function( + description=( + "Execute an aggregation operation on a column of a tabular file. " + "Supported operations: sum, mean, count, min, max, median, std, nunique, value_counts." + ), + name="aggregate_column" + ) + @plugin_function_logger("TabularProcessingPlugin") + async def aggregate_column( + self, + user_id: Annotated[str, "The user ID (from Scope ID in Conversation Metadata)"], + conversation_id: Annotated[str, "The conversation ID (from Conversation Metadata)"], + filename: Annotated[str, "The filename of the tabular file"], + column: Annotated[str, "The column name to aggregate"], + operation: Annotated[str, "Aggregation: sum, mean, count, min, max, median, std, nunique, value_counts"], + source: Annotated[str, "Source: 'workspace', 'chat', 'group', or 'public'"] = "chat", + group_id: Annotated[Optional[str], "Group ID (for group workspace documents)"] = None, + public_workspace_id: Annotated[Optional[str], "Public workspace ID (for public workspace documents)"] = None, + ) -> Annotated[str, "JSON result of the aggregation"]: + """Execute an aggregation operation on a column.""" + def _sync_work(): + try: + container, blob_path = self._resolve_blob_location( + user_id, conversation_id, filename, source, + group_id=group_id, public_workspace_id=public_workspace_id + ) + df = self._read_tabular_blob_to_dataframe(container, blob_path) + df = self._try_numeric_conversion(df) + + if column not in df.columns: + return json.dumps({"error": f"Column '{column}' not found. Available: {list(df.columns)}"}) + + series = df[column] + op = operation.lower().strip() + + if op == 'sum': + result = series.sum() + elif op == 'mean': + result = series.mean() + elif op == 'count': + result = series.count() + elif op == 'min': + result = series.min() + elif op == 'max': + result = series.max() + elif op == 'median': + result = series.median() + elif op == 'std': + result = series.std() + elif op == 'nunique': + result = series.nunique() + elif op == 'value_counts': + result = series.value_counts().to_dict() + else: + return json.dumps({"error": f"Unsupported operation: {operation}. Use sum, mean, count, min, max, median, std, nunique, value_counts."}) + + return json.dumps({"column": column, "operation": op, "result": result}, indent=2, default=str) + except Exception as e: + log_event(f"[TabularProcessingPlugin] Error aggregating column: {e}", level=logging.WARNING) + return json.dumps({"error": str(e)}) + return await asyncio.to_thread(_sync_work) + + @kernel_function( + description=( + "Filter rows in a tabular file based on conditions and return matching rows. " + "Supports operators: ==, !=, >, <, >=, <=, contains, startswith, endswith." + ), + name="filter_rows" + ) + @plugin_function_logger("TabularProcessingPlugin") + async def filter_rows( + self, + user_id: Annotated[str, "The user ID (from Scope ID in Conversation Metadata)"], + conversation_id: Annotated[str, "The conversation ID (from Conversation Metadata)"], + filename: Annotated[str, "The filename of the tabular file"], + column: Annotated[str, "The column to filter on"], + operator: Annotated[str, "Operator: ==, !=, >, <, >=, <=, contains, startswith, endswith"], + value: Annotated[str, "The value to compare against"], + source: Annotated[str, "Source: 'workspace', 'chat', 'group', or 'public'"] = "chat", + max_rows: Annotated[str, "Maximum rows to return"] = "100", + group_id: Annotated[Optional[str], "Group ID (for group workspace documents)"] = None, + public_workspace_id: Annotated[Optional[str], "Public workspace ID (for public workspace documents)"] = None, + ) -> Annotated[str, "JSON list of matching rows"]: + """Filter rows based on a condition.""" + def _sync_work(): + try: + container, blob_path = self._resolve_blob_location( + user_id, conversation_id, filename, source, + group_id=group_id, public_workspace_id=public_workspace_id + ) + df = self._read_tabular_blob_to_dataframe(container, blob_path) + df = self._try_numeric_conversion(df) + + if column not in df.columns: + return json.dumps({"error": f"Column '{column}' not found. Available: {list(df.columns)}"}) + + series = df[column] + op = operator.strip().lower() + + numeric_value = None + try: + numeric_value = float(value) + except (ValueError, TypeError): + pass + + if op == '==' or op == 'equals': + if numeric_value is not None and pandas.api.types.is_numeric_dtype(series): + mask = series == numeric_value + else: + mask = series.astype(str).str.lower() == value.lower() + elif op == '!=': + if numeric_value is not None and pandas.api.types.is_numeric_dtype(series): + mask = series != numeric_value + else: + mask = series.astype(str).str.lower() != value.lower() + elif op == '>': + mask = series > numeric_value + elif op == '<': + mask = series < numeric_value + elif op == '>=': + mask = series >= numeric_value + elif op == '<=': + mask = series <= numeric_value + elif op == 'contains': + mask = series.astype(str).str.contains(value, case=False, na=False) + elif op == 'startswith': + mask = series.astype(str).str.lower().str.startswith(value.lower()) + elif op == 'endswith': + mask = series.astype(str).str.lower().str.endswith(value.lower()) + else: + return json.dumps({"error": f"Unsupported operator: {operator}"}) + + limit = int(max_rows) + filtered = df[mask].head(limit) + return json.dumps({ + "total_matches": int(mask.sum()), + "returned_rows": len(filtered), + "data": filtered.to_dict(orient='records') + }, indent=2, default=str) + except Exception as e: + log_event(f"[TabularProcessingPlugin] Error filtering rows: {e}", level=logging.WARNING) + return json.dumps({"error": str(e)}) + return await asyncio.to_thread(_sync_work) + + @kernel_function( + description=( + "Execute a pandas query expression against a tabular file for advanced analysis. " + "The query string uses pandas DataFrame.query() syntax. " + "Examples: 'Age > 30 and State == \"CA\"', 'Price < 100'" + ), + name="query_tabular_data" + ) + @plugin_function_logger("TabularProcessingPlugin") + async def query_tabular_data( + self, + user_id: Annotated[str, "The user ID (from Scope ID in Conversation Metadata)"], + conversation_id: Annotated[str, "The conversation ID (from Conversation Metadata)"], + filename: Annotated[str, "The filename of the tabular file"], + query_expression: Annotated[str, "Pandas query expression (e.g. 'Age > 30 and State == \"CA\"')"], + source: Annotated[str, "Source: 'workspace', 'chat', 'group', or 'public'"] = "chat", + max_rows: Annotated[str, "Maximum rows to return"] = "100", + group_id: Annotated[Optional[str], "Group ID (for group workspace documents)"] = None, + public_workspace_id: Annotated[Optional[str], "Public workspace ID (for public workspace documents)"] = None, + ) -> Annotated[str, "JSON result of the query"]: + """Execute a pandas query expression against a tabular file.""" + def _sync_work(): + try: + container, blob_path = self._resolve_blob_location( + user_id, conversation_id, filename, source, + group_id=group_id, public_workspace_id=public_workspace_id + ) + df = self._read_tabular_blob_to_dataframe(container, blob_path) + df = self._try_numeric_conversion(df) + + result_df = df.query(query_expression) + limit = int(max_rows) + return json.dumps({ + "total_matches": len(result_df), + "returned_rows": min(len(result_df), limit), + "data": result_df.head(limit).to_dict(orient='records') + }, indent=2, default=str) + except Exception as e: + log_event(f"[TabularProcessingPlugin] Error querying data: {e}", level=logging.WARNING) + return json.dumps({"error": f"Query error: {str(e)}. Ensure column names and values are correct."}) + return await asyncio.to_thread(_sync_work) + + @kernel_function( + description=( + "Perform a group-by aggregation on a tabular file. " + "Groups data by one column and aggregates another column. " + "Supported operations: sum, mean, count, min, max." + ), + name="group_by_aggregate" + ) + @plugin_function_logger("TabularProcessingPlugin") + async def group_by_aggregate( + self, + user_id: Annotated[str, "The user ID (from Scope ID in Conversation Metadata)"], + conversation_id: Annotated[str, "The conversation ID (from Conversation Metadata)"], + filename: Annotated[str, "The filename of the tabular file"], + group_by_column: Annotated[str, "The column to group by"], + aggregate_column: Annotated[str, "The column to aggregate"], + operation: Annotated[str, "Aggregation operation: sum, mean, count, min, max"], + source: Annotated[str, "Source: 'workspace', 'chat', 'group', or 'public'"] = "chat", + group_id: Annotated[Optional[str], "Group ID (for group workspace documents)"] = None, + public_workspace_id: Annotated[Optional[str], "Public workspace ID (for public workspace documents)"] = None, + ) -> Annotated[str, "JSON result of the group-by aggregation"]: + """Group by one column and aggregate another.""" + def _sync_work(): + try: + container, blob_path = self._resolve_blob_location( + user_id, conversation_id, filename, source, + group_id=group_id, public_workspace_id=public_workspace_id + ) + df = self._read_tabular_blob_to_dataframe(container, blob_path) + df = self._try_numeric_conversion(df) + + for col in [group_by_column, aggregate_column]: + if col not in df.columns: + return json.dumps({"error": f"Column '{col}' not found. Available: {list(df.columns)}"}) + + op = operation.lower().strip() + grouped = df.groupby(group_by_column)[aggregate_column].agg(op) + return json.dumps({ + "group_by": group_by_column, + "aggregate_column": aggregate_column, + "operation": op, + "groups": len(grouped), + "result": grouped.to_dict() + }, indent=2, default=str) + except Exception as e: + log_event(f"[TabularProcessingPlugin] Error in group-by: {e}", level=logging.WARNING) + return json.dumps({"error": str(e)}) + return await asyncio.to_thread(_sync_work) diff --git a/application/single_app/static/css/chats.css b/application/single_app/static/css/chats.css index f672b28f..7a6232a2 100644 --- a/application/single_app/static/css/chats.css +++ b/application/single_app/static/css/chats.css @@ -1676,4 +1676,160 @@ mark.search-highlight { 100% { transform: scale(1.05); } +} + +/* ============================================= + Processing Thoughts + ============================================= */ + +/* Loading indicator thought text */ +.thought-live-text { + font-style: italic; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + max-width: 300px; +} + +/* Toggle button in message footer */ +.thoughts-toggle-btn { + font-size: 0.9rem; + color: #6c757d; + padding: 0 0.25rem; + border: none; + background: none; + cursor: pointer; + transition: color 0.15s ease-in-out; +} + +.thoughts-toggle-btn:hover { + color: #ffc107; +} + +/* Collapsible container inside message bubble */ +.thoughts-container { + max-height: 300px; + overflow-y: auto; + font-size: 0.85rem; +} + +/* Timeline wrapper */ +.thoughts-list { + position: relative; + padding-left: 1.25rem; +} + +/* Vertical timeline line */ +.thoughts-list::before { + content: ''; + position: absolute; + left: 0.5rem; + top: 0.25rem; + bottom: 0.25rem; + width: 2px; + background: linear-gradient(to bottom, #0d6efd, #6ea8fe); + border-radius: 1px; +} + +/* Individual thought step */ +.thought-step { + display: flex; + align-items: flex-start; + padding-left: 0.75rem; + padding-top: 0.25rem; + padding-bottom: 0.25rem; + position: relative; +} + +/* Timeline node dot */ +.thought-step::before { + content: ''; + position: absolute; + left: -1rem; + top: 0.55rem; + width: 8px; + height: 8px; + border-radius: 50%; + background-color: #0d6efd; + border: 2px solid #fff; + box-shadow: 0 0 0 1px #0d6efd; + z-index: 1; +} + +/* Last thought step gets a slightly different dot */ +.thought-step:last-child::before { + background-color: #198754; + box-shadow: 0 0 0 1px #198754; +} + +.thought-step i { + flex-shrink: 0; + margin-top: 2px; +} + +/* Streaming cursor thought badge pulse animation */ +.animate-pulse { + animation: thought-pulse 1.5s ease-in-out infinite; +} + +/* Streaming thought display (before content arrives) */ +.streaming-thought-display { + display: flex; + align-items: center; + padding: 0.5rem 0; +} + +/* Light mode: use darker, more readable colors */ +.streaming-thought-display .badge { + background-color: rgba(13, 110, 253, 0.08) !important; + color: #0a58ca !important; + border-color: rgba(13, 110, 253, 0.25) !important; +} + +/* Dark mode: lighter accent colors */ +[data-bs-theme="dark"] .streaming-thought-display .badge { + background-color: rgba(13, 202, 240, 0.15) !important; + color: #6edff6 !important; + border-color: rgba(13, 202, 240, 0.3) !important; +} + +@keyframes thought-pulse { + 0%, 100% { + opacity: 1; + } + 50% { + opacity: 0.6; + } +} + +/* Dark mode overrides */ +[data-bs-theme="dark"] .thoughts-toggle-btn { + color: #adb5bd; +} + +[data-bs-theme="dark"] .thoughts-toggle-btn:hover { + color: #ffc107; +} + +[data-bs-theme="dark"] .thought-step { + /* Dark mode dot border matches dark background */ +} + +[data-bs-theme="dark"] .thought-step::before { + border-color: #212529; + background-color: #6ea8fe; + box-shadow: 0 0 0 1px #6ea8fe; +} + +[data-bs-theme="dark"] .thought-step:last-child::before { + background-color: #75b798; + box-shadow: 0 0 0 1px #75b798; +} + +[data-bs-theme="dark"] .thoughts-list::before { + background: linear-gradient(to bottom, #6ea8fe, #9ec5fe); +} + +[data-bs-theme="dark"] .thoughts-container { + border-top-color: #495057 !important; } \ No newline at end of file diff --git a/application/single_app/static/css/styles.css b/application/single_app/static/css/styles.css index c0632a99..eacc8859 100644 --- a/application/single_app/static/css/styles.css +++ b/application/single_app/static/css/styles.css @@ -502,6 +502,95 @@ main { flex-grow: 1; } +/* ============================================ + Item cards (agents/actions grid view) + ============================================ */ +.item-card { + cursor: default; + transition: all 0.3s ease; + border: 1px solid #dee2e6; + border-radius: 0.375rem; + background-color: #ffffff; +} + +.item-card:hover { + border-color: #adb5bd; + transform: translateY(-2px); + box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); +} + +.item-card .card-title { + font-weight: 600; + font-size: 0.9rem; + color: #212529; +} + +.item-card .card-text { + color: #6c757d; + font-size: 0.8rem; + line-height: 1.4; +} + +.item-card .item-card-icon { + color: #0d6efd; +} + +.item-card .item-card-buttons { + border-top: 1px solid #f0f0f0; + padding-top: 0.5rem; +} + +/* Dark mode for item cards */ +[data-bs-theme="dark"] .item-card { + background-color: #343a40; + border: 1px solid #495057; + color: #e9ecef; +} + +[data-bs-theme="dark"] .item-card:hover { + background-color: #3d444b; + border-color: #6c757d; +} + +[data-bs-theme="dark"] .item-card .card-title { + color: #e9ecef; +} + +[data-bs-theme="dark"] .item-card .card-text { + color: #adb5bd; +} + +[data-bs-theme="dark"] .item-card .item-card-icon { + color: #6ea8fe; +} + +[data-bs-theme="dark"] .item-card .item-card-buttons { + border-top-color: #495057; +} + +/* Improved table column layout for agents and actions */ +.item-list-table th:nth-child(1), +.item-list-table td:nth-child(1) { + width: 28%; + min-width: 140px; +} + +.item-list-table th:nth-child(2), +.item-list-table td:nth-child(2) { + width: 47%; + max-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.item-list-table th:nth-child(3), +.item-list-table td:nth-child(3) { + width: 25%; + min-width: 160px; + white-space: nowrap; +} + /* Connection type buttons */ .connection-type-btn { border: 2px solid #dee2e6; @@ -855,144 +944,170 @@ main { color: #b399ff !important; /* Purple-ish for visited links */ } -/* ============================================= - SimpleMDE Prompt Content Toolbar - Bootstrap Icons replacement for Font Awesome - ============================================= */ -.editor-toolbar a[title] { - font-family: "bootstrap-icons" !important; - font-style: normal; - font-size: 1rem; - line-height: 30px; -} - -.editor-toolbar a.fa-bold::before { content: "\f5f0"; } /* bi-type-bold */ -.editor-toolbar a.fa-italic::before { content: "\f5f4"; } /* bi-type-italic */ -.editor-toolbar a.fa-strikethrough::before { content: "\f5f5"; } /* bi-type-strikethrough */ -.editor-toolbar a.fa-header::before { content: "\f5f1"; } /* bi-type-h1 */ -.editor-toolbar a.fa-quote-left::before { content: "\f190"; } /* bi-blockquote-left */ -.editor-toolbar a.fa-list-ul::before { content: "\f478"; } /* bi-list-ul */ -.editor-toolbar a.fa-list-ol::before { content: "\f475"; } /* bi-list-ol */ -.editor-toolbar a.fa-code::before { content: "\f2c6"; } /* bi-code-slash */ -.editor-toolbar a.fa-link::before { content: "\f470"; } /* bi-link-45deg */ -.editor-toolbar a.fa-picture-o::before { content: "\f42a"; } /* bi-image */ -.editor-toolbar a.fa-table::before { content: "\f5aa"; } /* bi-table */ -.editor-toolbar a.fa-minus::before { content: "\f63b"; } /* bi-dash-lg */ -.editor-toolbar a.fa-eye::before { content: "\f341"; } /* bi-eye */ -.editor-toolbar a.fa-columns::before { content: "\f460"; } /* bi-layout-split */ -.editor-toolbar a.fa-arrows-alt::before { content: "\f14d"; } /* bi-arrows-fullscreen */ -.editor-toolbar a.fa-undo::before { content: "\f117"; } /* bi-arrow-counterclockwise */ -.editor-toolbar a.fa-repeat::before { content: "\f116"; } /* bi-arrow-clockwise */ -.editor-toolbar a.fa-question-circle::before { content: "\f505"; } /* bi-question-circle */ -.editor-toolbar a.fa-eraser::before { content: "\f331"; } /* bi-eraser */ - -.editor-toolbar a::before { - font-family: "bootstrap-icons" !important; - font-style: normal; - font-weight: normal; - display: inline-block; -} - -.editor-toolbar { - opacity: 1 !important; -} - -.editor-toolbar a { - font-size: 0; - width: 30px; - height: 30px; - display: inline-flex; - align-items: center; - justify-content: center; +/* ============================================ + Rendered Markdown — table & code block styles + Shared by agent detail view, template preview, + and any non-chat area that renders Markdown. + ============================================ */ + +/* --- Tables --- */ +.rendered-markdown table { + width: 100%; + max-width: 100%; + margin: 0.75rem 0; + border-collapse: collapse; + border-spacing: 0; + border: 1px solid #dee2e6; + border-radius: 0.375rem; + overflow: hidden; + background-color: var(--bs-body-bg); + box-shadow: 0 0.125rem 0.25rem rgba(0, 0, 0, 0.075); + font-size: 0.875rem; + display: block; + overflow-x: auto; + white-space: nowrap; + -webkit-overflow-scrolling: touch; } -.editor-toolbar a::before { - font-size: 1rem; +@media (min-width: 768px) { + .rendered-markdown table { + display: table; + white-space: normal; + } } -/* ============================================= - SimpleMDE Dark Mode Overrides - ============================================= */ -[data-bs-theme="dark"] .editor-toolbar { - background-color: #343a40; - border-color: #495057; +.rendered-markdown table th, +.rendered-markdown table td { + padding: 0.5rem 0.75rem; + border-bottom: 1px solid #dee2e6; + border-right: 1px solid #dee2e6; + text-align: left; + vertical-align: top; + word-wrap: break-word; + line-height: 1.4; } -[data-bs-theme="dark"] .editor-toolbar a { - color: #adb5bd !important; +.rendered-markdown table th:last-child, +.rendered-markdown table td:last-child { + border-right: none; } -[data-bs-theme="dark"] .editor-toolbar a::before { - color: #adb5bd; +.rendered-markdown table thead th { + background-color: #f8f9fa; + font-weight: 600; + color: #495057; + border-bottom: 2px solid #dee2e6; } -[data-bs-theme="dark"] .editor-toolbar a:hover, -[data-bs-theme="dark"] .editor-toolbar a:focus { - background-color: #495057; - border-color: #6c757d; - color: #e9ecef !important; +.rendered-markdown table tbody tr:nth-child(even) { + background-color: rgba(0, 0, 0, 0.02); } -[data-bs-theme="dark"] .editor-toolbar a:hover::before, -[data-bs-theme="dark"] .editor-toolbar a:focus::before { - color: #e9ecef; +.rendered-markdown table tbody tr:hover { + background-color: rgba(0, 0, 0, 0.04); + transition: background-color 0.15s ease-in-out; } -[data-bs-theme="dark"] .editor-toolbar a.active, -[data-bs-theme="dark"] .editor-toolbar a.active::before { - color: #86b7fe !important; - background-color: #1e3a5f; +.rendered-markdown table th[align="center"], +.rendered-markdown table td[align="center"] { + text-align: center; } -[data-bs-theme="dark"] .editor-toolbar i.separator { - border-left-color: #495057; - border-right-color: #495057; +.rendered-markdown table th[align="right"], +.rendered-markdown table td[align="right"] { + text-align: right; } -[data-bs-theme="dark"] .CodeMirror { - background-color: #212529; +/* Dark mode tables */ +[data-bs-theme="dark"] .rendered-markdown table { + border-color: #495057; + background-color: var(--bs-dark); color: #e9ecef; +} + +[data-bs-theme="dark"] .rendered-markdown table th, +[data-bs-theme="dark"] .rendered-markdown table td { border-color: #495057; } -[data-bs-theme="dark"] .CodeMirror-cursor { - border-left-color: #e9ecef; +[data-bs-theme="dark"] .rendered-markdown table thead th { + background-color: #343a40; + color: #e9ecef; + border-bottom-color: #495057; } -[data-bs-theme="dark"] .CodeMirror .CodeMirror-selected { - background-color: #1e3a5f !important; +[data-bs-theme="dark"] .rendered-markdown table tbody tr:nth-child(even) { + background-color: rgba(255, 255, 255, 0.05); } -[data-bs-theme="dark"] .CodeMirror-focused .CodeMirror-selected { - background-color: #0d6efd !important; +[data-bs-theme="dark"] .rendered-markdown table tbody tr:hover { + background-color: rgba(255, 255, 255, 0.1); } -[data-bs-theme="dark"] .CodeMirror .CodeMirror-line::selection, -[data-bs-theme="dark"] .CodeMirror .CodeMirror-line > span::selection, -[data-bs-theme="dark"] .CodeMirror .CodeMirror-line > span > span::selection { - background-color: #0d6efd !important; - color: #ffffff !important; +.rendered-markdown table code { + background-color: rgba(0, 0, 0, 0.1); + padding: 0.125rem 0.25rem; + border-radius: 0.25rem; + font-size: 0.8em; } -[data-bs-theme="dark"] .CodeMirror .CodeMirror-line::-moz-selection, -[data-bs-theme="dark"] .CodeMirror .CodeMirror-line > span::-moz-selection, -[data-bs-theme="dark"] .CodeMirror .CodeMirror-line > span > span::-moz-selection { - background-color: #0d6efd !important; - color: #ffffff !important; +[data-bs-theme="dark"] .rendered-markdown table code { + background-color: rgba(255, 255, 255, 0.1); } -[data-bs-theme="dark"] .editor-preview { - background-color: #2b3035; - color: #e9ecef; +/* --- Code blocks --- */ +.rendered-markdown pre, +.rendered-markdown pre[class*="language-"] { + overflow-x: auto; + max-width: 100%; + width: 100%; + box-sizing: border-box; + display: block; + white-space: pre; + background-color: #1e1e1e; + color: #d4d4d4; + border-radius: 0.375rem; + padding: 1rem; + margin: 0.75rem 0; + font-size: 0.85rem; + line-height: 1.5; } -[data-bs-theme="dark"] .editor-preview-side { - background-color: #2b3035; - color: #e9ecef; - border-left-color: #495057; +.rendered-markdown pre code { + display: block; + min-width: 0; + max-width: 100%; + overflow-x: auto; + white-space: pre; + background: transparent; + color: inherit; + padding: 0; + font-size: inherit; } -[data-bs-theme="dark"] .editor-statusbar { - background-color: #343a40; +/* Inline code */ +.rendered-markdown code:not(pre code) { + background-color: rgba(0, 0, 0, 0.06); + padding: 0.15rem 0.35rem; + border-radius: 0.25rem; + font-size: 0.85em; + color: #d63384; +} + +[data-bs-theme="dark"] .rendered-markdown code:not(pre code) { + background-color: rgba(255, 255, 255, 0.1); + color: #e685b5; +} + +/* Blockquotes */ +.rendered-markdown blockquote { + border-left: 4px solid #dee2e6; + padding-left: 1em; + color: #6c757d; + margin: 0.75rem 0; +} + +[data-bs-theme="dark"] .rendered-markdown blockquote { + border-left-color: #495057; color: #adb5bd; - border-top-color: #495057; -} \ No newline at end of file +} diff --git a/application/single_app/static/images/custom_logo.png b/application/single_app/static/images/custom_logo.png new file mode 100644 index 00000000..ecf6e652 Binary files /dev/null and b/application/single_app/static/images/custom_logo.png differ diff --git a/application/single_app/static/images/custom_logo_dark.png b/application/single_app/static/images/custom_logo_dark.png new file mode 100644 index 00000000..4f281945 Binary files /dev/null and b/application/single_app/static/images/custom_logo_dark.png differ diff --git a/application/single_app/static/js/admin/admin_settings.js b/application/single_app/static/js/admin/admin_settings.js index c6bdec36..21c989fd 100644 --- a/application/single_app/static/js/admin/admin_settings.js +++ b/application/single_app/static/js/admin/admin_settings.js @@ -1237,10 +1237,11 @@ function setupToggles() { const mathToggle = document.getElementById('toggle-math-plugin'); const textToggle = document.getElementById('toggle-text-plugin'); const factMemoryToggle = document.getElementById('toggle-fact-memory-plugin'); + const tabularProcessingToggle = document.getElementById('toggle-tabular-processing-plugin'); const embeddingToggle = document.getElementById('toggle-default-embedding-model-plugin'); const allowUserPluginsToggle = document.getElementById('toggle-allow-user-plugins'); const allowGroupPluginsToggle = document.getElementById('toggle-allow-group-plugins'); - const toggles = [timeToggle, httpToggle, waitToggle, mathToggle, textToggle, factMemoryToggle, embeddingToggle, allowUserPluginsToggle, allowGroupPluginsToggle]; + const toggles = [timeToggle, httpToggle, waitToggle, mathToggle, textToggle, factMemoryToggle, tabularProcessingToggle, embeddingToggle, allowUserPluginsToggle, allowGroupPluginsToggle]; // Feedback area let feedbackDiv = document.getElementById('core-plugin-toggles-feedback'); if (!feedbackDiv) { @@ -1270,6 +1271,16 @@ function setupToggles() { if (textToggle) textToggle.checked = !!settings.enable_text_plugin; if (embeddingToggle) embeddingToggle.checked = !!settings.enable_default_embedding_model_plugin; if (factMemoryToggle) factMemoryToggle.checked = !!settings.enable_fact_memory_plugin; + if (tabularProcessingToggle) { + tabularProcessingToggle.checked = !!settings.enable_tabular_processing_plugin; + const ecEnabled = !!settings.enable_enhanced_citations; + tabularProcessingToggle.disabled = !ecEnabled; + const depNote = document.getElementById('tabular-processing-dependency-note'); + if (depNote) { + depNote.textContent = ecEnabled ? 'Requires Enhanced Citations' : 'Requires Enhanced Citations (currently disabled)'; + depNote.className = ecEnabled ? 'text-muted d-block ms-4' : 'text-danger d-block ms-4'; + } + } if (allowUserPluginsToggle) allowUserPluginsToggle.checked = !!settings.allow_user_plugins; if (allowGroupPluginsToggle) allowGroupPluginsToggle.checked = !!settings.allow_group_plugins; } catch (err) { @@ -1291,6 +1302,7 @@ function setupToggles() { enable_text_plugin: textToggle ? textToggle.checked : false, enable_default_embedding_model_plugin: embeddingToggle ? embeddingToggle.checked : false, enable_fact_memory_plugin: factMemoryToggle ? factMemoryToggle.checked : false, + enable_tabular_processing_plugin: tabularProcessingToggle ? tabularProcessingToggle.checked : false, allow_user_plugins: allowUserPluginsToggle ? allowUserPluginsToggle.checked : false, allow_group_plugins: allowGroupPluginsToggle ? allowGroupPluginsToggle.checked : false }; @@ -3844,11 +3856,12 @@ function checkOptionalFeaturesEnabled(stepNumber) { return endpoint && key; } - case 11: // User feedback and archiving - // Check if feedback is enabled + case 11: // User feedback, archiving, and thoughts + // Check if feedback, archiving, or thoughts is enabled const feedbackEnabled = document.getElementById('enable_user_feedback')?.checked; const archivingEnabled = document.getElementById('enable_conversation_archiving')?.checked; - return feedbackEnabled || archivingEnabled; + const thoughtsEnabled = document.getElementById('enable_thoughts')?.checked; + return feedbackEnabled || archivingEnabled || thoughtsEnabled; case 12: // Enhanced citations and image generation // Check if enhanced citations or image generation is enabled diff --git a/application/single_app/static/js/chat/chat-conversation-details.js b/application/single_app/static/js/chat/chat-conversation-details.js index 19851bae..484128af 100644 --- a/application/single_app/static/js/chat/chat-conversation-details.js +++ b/application/single_app/static/js/chat/chat-conversation-details.js @@ -75,7 +75,7 @@ export async function showConversationDetails(conversationId) { * @returns {string} HTML string */ function renderConversationMetadata(metadata, conversationId) { - const { context = [], tags = [], strict = false, classification = [], last_updated, chat_type = 'personal', is_pinned = false, is_hidden = false, scope_locked, locked_contexts = [] } = metadata; + const { context = [], tags = [], strict = false, classification = [], last_updated, chat_type = 'personal', is_pinned = false, is_hidden = false, scope_locked, locked_contexts = [], summary = null } = metadata; // Organize tags by category const tagsByCategory = { @@ -97,6 +97,18 @@ function renderConversationMetadata(metadata, conversationId) { // Build HTML sections let html = `
+ +
+
+
+
Summary
+ ${summary ? `Generated ${formatDate(summary.generated_at)}${summary.model_deployment ? ` · ${summary.model_deployment}` : ''}` : ''} +
+
+ ${renderSummaryContent(summary, conversationId)} +
+
+
@@ -570,8 +582,159 @@ function extractPageNumbers(chunkIds) { return pages.sort((a, b) => parseInt(a) - parseInt(b)); } +/** + * Render the summary card body content + * @param {Object|null} summary - Existing summary data or null + * @param {string} conversationId - The conversation ID + * @returns {string} HTML string + */ +function renderSummaryContent(summary, conversationId) { + if (summary && summary.content) { + return ` +

${escapeHtml(summary.content)}

+
+ +
+ `; + } + + // Build model options from the global model-select dropdown + const modelOptions = getAvailableModelOptions(); + return ` +

No summary has been generated for this conversation yet.

+
+ + +
+ `; +} + +/** + * Get available model options from the global #model-select dropdown + * @returns {string} HTML option elements + */ +function getAvailableModelOptions() { + const globalSelect = document.getElementById('model-select'); + if (!globalSelect) { + return ''; + } + let options = ''; + for (const opt of globalSelect.options) { + options += ``; + } + return options || ''; +} + +/** + * Handle summary generation (generate or regenerate) + * @param {string} conversationId - The conversation ID + * @param {string} modelDeployment - Selected model deployment + */ +async function handleGenerateSummary(conversationId, modelDeployment) { + const cardBody = document.getElementById('summary-card-body'); + if (!cardBody) { + return; + } + + cardBody.innerHTML = ` +
+
+ Generating... +
+ Generating summary... +
+ `; + + try { + const response = await fetch(`/api/conversations/${conversationId}/summary`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model_deployment: modelDeployment }) + }); + + if (!response.ok) { + const errData = await response.json().catch(() => ({})); + throw new Error(errData.error || `HTTP ${response.status}`); + } + + const data = await response.json(); + const summary = data.summary; + cardBody.innerHTML = renderSummaryContent(summary, conversationId); + + // Update card header with generation info + const cardHeader = cardBody.closest('.card').querySelector('.card-header'); + if (cardHeader && summary) { + const smallEl = cardHeader.querySelector('small'); + const infoText = `Generated ${formatDate(summary.generated_at)}${summary.model_deployment ? ` · ${summary.model_deployment}` : ''}`; + if (smallEl) { + smallEl.textContent = infoText; + } else { + const small = document.createElement('small'); + small.className = 'opacity-75'; + small.textContent = infoText; + cardHeader.appendChild(small); + } + } + + } catch (error) { + console.error('Error generating summary:', error); + cardBody.innerHTML = ` +
+ + Failed to generate summary: ${escapeHtml(error.message)} +
+ ${renderSummaryContent(null, conversationId)} + `; + } +} + +/** + * Simple HTML escapefor display + * @param {string} str - String to escape + * @returns {string} Escaped string + */ +function escapeHtml(str) { + if (!str) { + return ''; + } + const div = document.createElement('div'); + div.textContent = str; + return div.innerHTML; +} + // Event listeners for details buttons document.addEventListener('click', function(e) { + // Generate summary button + if (e.target.closest('#generate-summary-btn')) { + e.preventDefault(); + const btn = e.target.closest('#generate-summary-btn'); + const cid = btn.getAttribute('data-conversation-id'); + const modelSelect = document.getElementById('summary-model-select'); + const model = modelSelect ? modelSelect.value : ''; + handleGenerateSummary(cid, model); + return; + } + + // Regenerate summary button + if (e.target.closest('#regenerate-summary-btn')) { + e.preventDefault(); + const btn = e.target.closest('#regenerate-summary-btn'); + const cid = btn.getAttribute('data-conversation-id'); + // Use the currently selected global model for regeneration + const globalSelect = document.getElementById('model-select'); + const model = globalSelect ? globalSelect.value : ''; + handleGenerateSummary(cid, model); + return; + } + if (e.target.closest('.details-btn')) { e.preventDefault(); diff --git a/application/single_app/static/js/chat/chat-enhanced-citations.js b/application/single_app/static/js/chat/chat-enhanced-citations.js index dcda708b..9d4344bb 100644 --- a/application/single_app/static/js/chat/chat-enhanced-citations.js +++ b/application/single_app/static/js/chat/chat-enhanced-citations.js @@ -18,11 +18,13 @@ export function getFileType(fileName) { const imageExtensions = ['jpg', 'jpeg', 'png', 'bmp', 'tiff', 'tif']; const videoExtensions = ['mp4', 'mov', 'avi', 'mkv', 'flv', 'webm', 'wmv', 'm4v', '3gp']; const audioExtensions = ['mp3', 'wav', 'ogg', 'aac', 'flac', 'm4a']; - + const tabularExtensions = ['csv', 'xlsx', 'xls', 'xlsm']; + if (imageExtensions.includes(ext)) return 'image'; if (ext === 'pdf') return 'pdf'; if (videoExtensions.includes(ext)) return 'video'; if (audioExtensions.includes(ext)) return 'audio'; + if (tabularExtensions.includes(ext)) return 'tabular'; return 'other'; } @@ -66,6 +68,9 @@ export function showEnhancedCitationModal(docId, pageNumberOrTimestamp, citation const audioTimestamp = convertTimestampToSeconds(pageNumberOrTimestamp); showAudioModal(docId, audioTimestamp, docMetadata.file_name); break; + case 'tabular': + showTabularDownloadModal(docId, docMetadata.file_name); + break; default: // Fall back to text citation for unsupported types import('./chat-citations.js').then(module => { @@ -291,6 +296,119 @@ export function showAudioModal(docId, timestamp, fileName) { modalInstance.show(); } +/** + * Show tabular file preview modal with data table + * @param {string} docId - Document ID + * @param {string} fileName - File name + */ +export function showTabularDownloadModal(docId, fileName) { + console.log(`Showing tabular preview modal for docId: ${docId}, fileName: ${fileName}`); + showLoadingIndicator(); + + // Create or get tabular modal + let tabularModal = document.getElementById("enhanced-tabular-modal"); + if (!tabularModal) { + tabularModal = createTabularModal(); + } + + const title = tabularModal.querySelector(".modal-title"); + const tableContainer = tabularModal.querySelector("#enhanced-tabular-table-container"); + const rowInfo = tabularModal.querySelector("#enhanced-tabular-row-info"); + const downloadBtn = tabularModal.querySelector("#enhanced-tabular-download"); + const errorContainer = tabularModal.querySelector("#enhanced-tabular-error"); + + title.textContent = `Tabular Data: ${fileName}`; + tableContainer.innerHTML = '
Loading...

Loading data preview...

'; + rowInfo.textContent = ''; + errorContainer.classList.add('d-none'); + + const downloadUrl = `/api/enhanced_citations/tabular_workspace?doc_id=${encodeURIComponent(docId)}`; + downloadBtn.href = downloadUrl; + downloadBtn.download = fileName; + + // Show modal immediately with loading state + const modalInstance = new bootstrap.Modal(tabularModal); + modalInstance.show(); + + // Fetch preview data + const previewUrl = `/api/enhanced_citations/tabular_preview?doc_id=${encodeURIComponent(docId)}`; + fetch(previewUrl) + .then(response => { + if (!response.ok) throw new Error(`HTTP ${response.status}`); + return response.json(); + }) + .then(data => { + hideLoadingIndicator(); + if (data.error) { + showTabularError(tableContainer, errorContainer, data.error); + return; + } + renderTabularPreview(tableContainer, rowInfo, data); + }) + .catch(error => { + hideLoadingIndicator(); + console.error('Error loading tabular preview:', error); + showTabularError(tableContainer, errorContainer, 'Could not load data preview.'); + }); +} + +/** + * Render tabular data as an HTML table + * @param {HTMLElement} container - Table container element + * @param {HTMLElement} rowInfo - Row info display element + * @param {Object} data - Preview data from API + */ +function renderTabularPreview(container, rowInfo, data) { + const { columns, rows, total_rows, truncated } = data; + + // Build table HTML + let html = ''; + + // Header + html += ''; + for (const col of columns) { + const escaped = col.replace(/&/g, '&').replace(//g, '>'); + html += ``; + } + html += ''; + + // Body + html += ''; + for (const row of rows) { + html += ''; + for (const cell of row) { + const val = cell === null || cell === undefined ? '' : String(cell); + const escaped = val.replace(/&/g, '&').replace(//g, '>'); + html += ``; + } + html += ''; + } + html += '
${escaped}
${escaped}
'; + + container.innerHTML = html; + + // Row info + const displayedRows = rows.length; + const totalFormatted = total_rows.toLocaleString(); + if (truncated) { + rowInfo.textContent = `Showing ${displayedRows.toLocaleString()} of ${totalFormatted} rows`; + } else { + rowInfo.textContent = `${totalFormatted} rows, ${columns.length} columns`; + } +} + +/** + * Show error state in tabular modal with download fallback + * @param {HTMLElement} tableContainer - Table container element + * @param {HTMLElement} errorContainer - Error display element + * @param {string} message - Error message + */ +function showTabularError(tableContainer, errorContainer, message) { + tableContainer.innerHTML = '
'; + errorContainer.textContent = message + ' You can still download the file below.'; + errorContainer.classList.remove('d-none'); +} + /** * Convert timestamp string to seconds * @param {string|number} timestamp - Timestamp in various formats @@ -445,3 +563,36 @@ function createPdfModal() { document.body.appendChild(modal); return modal; } + +/** + * Create tabular file preview modal HTML structure + * @returns {HTMLElement} - Modal element + */ +function createTabularModal() { + const modal = document.createElement("div"); + modal.id = "enhanced-tabular-modal"; + modal.classList.add("modal", "fade"); + modal.tabIndex = -1; + modal.innerHTML = ` + + `; + document.body.appendChild(modal); + return modal; +} diff --git a/application/single_app/static/js/chat/chat-export.js b/application/single_app/static/js/chat/chat-export.js index 269cbfe0..fc53d2b6 100644 --- a/application/single_app/static/js/chat/chat-export.js +++ b/application/single_app/static/js/chat/chat-export.js @@ -15,6 +15,8 @@ let exportConversationIds = []; let exportConversationTitles = {}; let exportFormat = 'json'; let exportPackaging = 'single'; +let includeSummaryIntro = false; +let summaryModelDeployment = ''; let currentStep = 1; let totalSteps = 3; let skipSelectionStep = false; @@ -53,14 +55,16 @@ function openExportWizard(conversationIds, skipSelection) { exportConversationTitles = {}; exportFormat = 'json'; exportPackaging = conversationIds.length > 1 ? 'zip' : 'single'; + includeSummaryIntro = false; + summaryModelDeployment = _getDefaultSummaryModel(); skipSelectionStep = !!skipSelection; // Determine step configuration if (skipSelectionStep) { - totalSteps = 3; + totalSteps = 4; currentStep = 1; // Format step (mapped to visual step) } else { - totalSteps = 4; + totalSteps = 5; currentStep = 1; // Selection review step } @@ -142,19 +146,21 @@ function _renderCurrentStep() { if (!stepBody) return; if (skipSelectionStep) { - // Steps: 1=Format, 2=Packaging, 3=Download + // Steps: 1=Format, 2=Packaging, 3=Summary, 4=Download switch (currentStep) { case 1: _renderFormatStep(stepBody); break; case 2: _renderPackagingStep(stepBody); break; - case 3: _renderDownloadStep(stepBody); break; + case 3: _renderSummaryStep(stepBody); break; + case 4: _renderDownloadStep(stepBody); break; } } else { - // Steps: 1=Selection, 2=Format, 3=Packaging, 4=Download + // Steps: 1=Selection, 2=Format, 3=Packaging, 4=Summary, 5=Download switch (currentStep) { case 1: _renderSelectionStep(stepBody); break; case 2: _renderFormatStep(stepBody); break; case 3: _renderPackagingStep(stepBody); break; - case 4: _renderDownloadStep(stepBody); break; + case 4: _renderSummaryStep(stepBody); break; + case 5: _renderDownloadStep(stepBody); break; } } } @@ -210,7 +216,7 @@ function _renderFormatStep(container) {

Select the format for your exported conversations.

-
+
@@ -219,7 +225,7 @@ function _renderFormatStep(container) {
-
+
@@ -228,6 +234,15 @@ function _renderFormatStep(container) {
+
+
+
+ +
PDF
+

Print-ready format with chat bubbles. Ideal for archiving and printing.

+
+
+
`; // Wire card clicks @@ -297,11 +312,68 @@ function _renderPackagingStep(container) { }); } +function _renderSummaryStep(container) { + const mainModelSelect = getEl('model-select'); + const hasModelOptions = Boolean(mainModelSelect && mainModelSelect.options.length > 0); + const defaultSummaryModel = summaryModelDeployment || _getDefaultSummaryModel(); + const perConversationText = exportConversationIds.length > 1 + ? 'An intro will be generated for each exported conversation.' + : 'An intro will be generated for this conversation.'; + + container.innerHTML = ` +
+
Optional Intro Summary
+

Add a short abstract before the exported transcript. ${perConversationText}

+
+
+ + +
+
+
+ + +
Uses the same model list as the chat composer.
+
+
`; + + const toggle = getEl('export-summary-toggle'); + const modelContainer = getEl('export-summary-model-container'); + const summaryModelSelect = getEl('export-summary-model'); + + if (summaryModelSelect && hasModelOptions) { + summaryModelSelect.value = defaultSummaryModel || summaryModelSelect.value; + summaryModelDeployment = summaryModelSelect.value; + summaryModelSelect.addEventListener('change', () => { + summaryModelDeployment = summaryModelSelect.value; + }); + } + + if (toggle) { + toggle.addEventListener('change', () => { + includeSummaryIntro = toggle.checked; + if (modelContainer) { + modelContainer.classList.toggle('d-none', !includeSummaryIntro); + } + if (includeSummaryIntro && summaryModelSelect && !summaryModelSelect.value) { + summaryModelSelect.value = _getDefaultSummaryModel(); + summaryModelDeployment = summaryModelSelect.value; + } + }); + } +} + function _renderDownloadStep(container) { const count = exportConversationIds.length; - const formatLabel = exportFormat === 'json' ? 'JSON' : 'Markdown'; + const formatLabels = { json: 'JSON', markdown: 'Markdown', pdf: 'PDF' }; + const formatLabel = formatLabels[exportFormat] || exportFormat.toUpperCase(); const packagingLabel = exportPackaging === 'zip' ? 'ZIP Archive' : 'Single File'; - const ext = exportPackaging === 'zip' ? '.zip' : (exportFormat === 'json' ? '.json' : '.md'); + const extMap = { json: '.json', markdown: '.md', pdf: '.pdf' }; + const ext = exportPackaging === 'zip' ? '.zip' : (extMap[exportFormat] || '.bin'); + const summaryLabel = includeSummaryIntro ? 'Enabled' : 'Disabled'; + const summaryModelLabel = includeSummaryIntro ? (summaryModelDeployment || 'Configured default') : '—'; let conversationsList = ''; exportConversationIds.forEach(id => { @@ -328,6 +400,14 @@ function _renderDownloadStep(container) {
Packaging:
${packagingLabel}
+
+
Intro summary:
+
${summaryLabel}
+
+
+
Summary model:
+
${_escapeHtml(summaryModelLabel)}
+
File type:
${ext}
@@ -364,6 +444,7 @@ function _updateStepIndicators() { steps = [ { label: 'Format', icon: 'bi-filetype-json' }, { label: 'Packaging', icon: 'bi-box' }, + { label: 'Summary', icon: 'bi-card-text' }, { label: 'Download', icon: 'bi-download' } ]; } else { @@ -371,6 +452,7 @@ function _updateStepIndicators() { { label: 'Select', icon: 'bi-list-check' }, { label: 'Format', icon: 'bi-filetype-json' }, { label: 'Packaging', icon: 'bi-box' }, + { label: 'Summary', icon: 'bi-card-text' }, { label: 'Download', icon: 'bi-download' } ]; } @@ -448,7 +530,9 @@ async function _executeExport() { body: JSON.stringify({ conversation_ids: exportConversationIds, format: exportFormat, - packaging: exportPackaging + packaging: exportPackaging, + include_summary_intro: includeSummaryIntro, + summary_model_deployment: includeSummaryIntro ? summaryModelDeployment : null }) }); @@ -460,7 +544,8 @@ async function _executeExport() { // Get filename from Content-Disposition header const disposition = response.headers.get('Content-Disposition') || ''; const filenameMatch = disposition.match(/filename="?([^"]+)"?/); - const filename = filenameMatch ? filenameMatch[1] : `conversations_export.${exportPackaging === 'zip' ? 'zip' : (exportFormat === 'json' ? 'json' : 'md')}`; + const fallbackExtMap = { json: 'json', markdown: 'md', pdf: 'pdf' }; + const filename = filenameMatch ? filenameMatch[1] : `conversations_export.${exportPackaging === 'zip' ? 'zip' : (fallbackExtMap[exportFormat] || 'bin')}`; // Download the blob const blob = await response.blob(); @@ -511,6 +596,15 @@ function _escapeHtml(text) { return div.innerHTML; } +function _getDefaultSummaryModel() { + const mainModelSelect = getEl('model-select'); + if (!mainModelSelect) { + return ''; + } + + return mainModelSelect.value || (mainModelSelect.options[0] ? mainModelSelect.options[0].value : ''); +} + // --- Expose Globally --- window.chatExport = { openExportWizard diff --git a/application/single_app/static/js/chat/chat-input-actions.js b/application/single_app/static/js/chat/chat-input-actions.js index 77851319..c0c7832b 100644 --- a/application/single_app/static/js/chat/chat-input-actions.js +++ b/application/single_app/static/js/chat/chat-input-actions.js @@ -127,11 +127,11 @@ export function fetchFileContent(conversationId, fileId) { hideLoadingIndicator(); if (data.file_content && data.filename) { - showFileContentPopup(data.file_content, data.filename, data.is_table); + showFileContentPopup(data.file_content, data.filename, data.is_table, data.file_content_source, conversationId, fileId); } else if (data.error) { showToast(data.error, "danger"); } else { - ashowToastlert("Unexpected response from server.", "danger"); + showToast("Unexpected response from server.", "danger"); } }) .catch((error) => { @@ -141,7 +141,7 @@ export function fetchFileContent(conversationId, fileId) { }); } -export function showFileContentPopup(fileContent, filename, isTable) { +export function showFileContentPopup(fileContent, filename, isTable, fileContentSource, conversationId, fileId) { let modalContainer = document.getElementById("file-modal"); if (!modalContainer) { modalContainer = document.createElement("div"); @@ -155,6 +155,7 @@ export function showFileContentPopup(fileContent, filename, isTable) { `; @@ -760,6 +763,7 @@ export function appendMessage(
${senderLabel}
${mainMessageHtml} ${citationContentContainerHtml} + ${thoughtsHtml.containerHtml} ${metadataContainerHtml} ${footerContentHtml}
@@ -816,6 +820,9 @@ export function appendMessage( } }); } + + // Attach thoughts toggle listener + attachThoughtsToggleListener(messageDiv, messageId, currentConversationId); const maskBtn = messageDiv.querySelector(".mask-btn"); if (maskBtn) { @@ -1516,6 +1523,7 @@ export function actuallySendMessage(finalMessageToSend) { } // Regular non-streaming fetch + startThoughtPolling(currentConversationId); fetch("/api/chat", { method: "POST", headers: { @@ -1547,6 +1555,7 @@ export function actuallySendMessage(finalMessageToSend) { }) .then((data) => { // Only successful responses reach here + stopThoughtPolling(); hideLoadingIndicatorInChatbox(); console.log("--- Data received from /api/chat ---"); @@ -1688,6 +1697,7 @@ export function actuallySendMessage(finalMessageToSend) { } }) .catch((error) => { + stopThoughtPolling(); hideLoadingIndicatorInChatbox(); console.error("Error sending message:", error); diff --git a/application/single_app/static/js/chat/chat-streaming.js b/application/single_app/static/js/chat/chat-streaming.js index faf6f59e..d2b5b218 100644 --- a/application/single_app/static/js/chat/chat-streaming.js +++ b/application/single_app/static/js/chat/chat-streaming.js @@ -5,6 +5,7 @@ import { loadUserSettings, saveUserSetting } from './chat-layout.js'; import { showToast } from './chat-toast.js'; import { updateSidebarConversationTitle } from './chat-sidebar-conversations.js'; import { applyScopeLock } from './chat-documents.js'; +import { handleStreamingThought } from './chat-thoughts.js'; let streamingEnabled = false; let currentEventSource = null; @@ -207,8 +208,11 @@ export function sendMessageWithStreaming(messageData, tempUserMessageId, current handleStreamError(tempAiMessageId, data.partial_content || accumulatedContent, data.error); return; } - - if (data.content) { + + if (data.type === 'thought') { + handleStreamingThought(data); + // Continue reading — don't fall through to content handling + } else if (data.content) { // Append chunk to accumulated content accumulatedContent += data.content; updateStreamingMessage(tempAiMessageId, accumulatedContent); diff --git a/application/single_app/static/js/chat/chat-thoughts.js b/application/single_app/static/js/chat/chat-thoughts.js new file mode 100644 index 00000000..a780bd3f --- /dev/null +++ b/application/single_app/static/js/chat/chat-thoughts.js @@ -0,0 +1,215 @@ +// chat-thoughts.js + +import { updateLoadingIndicatorText } from './chat-loading-indicator.js'; +import { escapeHtml } from './chat-utils.js'; + +let thoughtPollingInterval = null; +let lastSeenThoughtIndex = -1; + +// --------------------------------------------------------------------------- +// Icon map: step_type → Bootstrap Icon class +// --------------------------------------------------------------------------- +function getThoughtIcon(stepType) { + const iconMap = { + 'search': 'bi-search', + 'tabular_analysis': 'bi-table', + 'web_search': 'bi-globe', + 'agent_tool_call': 'bi-robot', + 'generation': 'bi-lightning', + 'content_safety': 'bi-shield-check' + }; + return iconMap[stepType] || 'bi-stars'; +} + +// --------------------------------------------------------------------------- +// Polling (non-streaming mode) +// --------------------------------------------------------------------------- + +/** + * Start polling for pending thoughts while waiting for a non-streaming response. + * @param {string} conversationId - The current conversation ID. + */ +export function startThoughtPolling(conversationId) { + if (!conversationId) return; + if (!window.appSettings?.enable_thoughts) return; + + stopThoughtPolling(); // clear any previous interval + lastSeenThoughtIndex = -1; + + thoughtPollingInterval = setInterval(() => { + fetch(`/api/conversations/${conversationId}/thoughts/pending`, { + credentials: 'same-origin' + }) + .then(r => r.json()) + .then(data => { + if (data.thoughts && data.thoughts.length > 0) { + const latest = data.thoughts[data.thoughts.length - 1]; + if (latest.step_index > lastSeenThoughtIndex) { + lastSeenThoughtIndex = latest.step_index; + const icon = getThoughtIcon(latest.step_type); + updateLoadingIndicatorText(latest.content, icon); + } + } + }) + .catch(() => { /* ignore polling errors */ }); + }, 2000); +} + +/** + * Stop the thought polling interval. + */ +export function stopThoughtPolling() { + if (thoughtPollingInterval) { + clearInterval(thoughtPollingInterval); + thoughtPollingInterval = null; + } + lastSeenThoughtIndex = -1; +} + +// --------------------------------------------------------------------------- +// Streaming handler +// --------------------------------------------------------------------------- + +/** + * Handle a streaming thought event received via SSE. + * Updates the streaming message placeholder with a styled thought indicator. + * When actual content starts streaming, updateStreamingMessage() will overwrite this. + * @param {object} thoughtData - { step_index, step_type, content } + */ +export function handleStreamingThought(thoughtData) { + // Find the streaming message's content area + const messageElement = document.querySelector('[data-message-id^="temp_ai_"]'); + if (!messageElement) return; + + const contentElement = messageElement.querySelector('.message-text'); + if (!contentElement) return; + + const icon = getThoughtIcon(thoughtData.step_type); + // Replace entire content with styled thought indicator (visually distinct from AI response) + contentElement.innerHTML = `
+ + ${escapeHtml(thoughtData.content)} + +
`; +} + +// --------------------------------------------------------------------------- +// Per-message collapsible: toggle button + container HTML +// --------------------------------------------------------------------------- + +/** + * Create HTML for the thoughts toggle button and hidden container. + * Returns an object with { toggleHtml, containerHtml }. + * @param {string} messageId + */ +export function createThoughtsToggleHtml(messageId) { + if (!window.appSettings?.enable_thoughts) { + return { toggleHtml: '', containerHtml: '' }; + } + + const containerId = `thoughts-${messageId || Date.now()}`; + const toggleHtml = ``; + const containerHtml = `
Loading thoughts...
`; + + return { toggleHtml, containerHtml }; +} + +/** + * Attach event listener for the thoughts toggle button inside a message div. + * @param {HTMLElement} messageDiv + * @param {string} messageId + * @param {string} conversationId + */ +export function attachThoughtsToggleListener(messageDiv, messageId, conversationId) { + const toggleBtn = messageDiv.querySelector('.thoughts-toggle-btn'); + if (!toggleBtn) return; + + toggleBtn.addEventListener('click', () => { + const targetId = toggleBtn.getAttribute('aria-controls'); + const container = messageDiv.querySelector(`#${targetId}`); + if (!container) return; + + // Store scroll position + const scrollContainer = document.getElementById('chat-messages-container'); + const currentScroll = scrollContainer?.scrollTop || window.pageYOffset; + + const isExpanded = !container.classList.contains('d-none'); + if (isExpanded) { + container.classList.add('d-none'); + toggleBtn.setAttribute('aria-expanded', 'false'); + toggleBtn.title = 'Show processing thoughts'; + toggleBtn.innerHTML = ''; + } else { + container.classList.remove('d-none'); + toggleBtn.setAttribute('aria-expanded', 'true'); + toggleBtn.title = 'Hide processing thoughts'; + toggleBtn.innerHTML = ''; + + // Lazy-load thoughts on first expand + if (container.innerHTML.includes('Loading thoughts')) { + loadThoughtsForMessage(conversationId, messageId, container); + } + } + + // Restore scroll position + setTimeout(() => { + if (scrollContainer) { + scrollContainer.scrollTop = currentScroll; + } else { + window.scrollTo(0, currentScroll); + } + }, 10); + }); +} + +// --------------------------------------------------------------------------- +// Fetch + render thoughts for a message +// --------------------------------------------------------------------------- + +/** + * Fetch thoughts for a specific message from the API and render them. + * @param {string} conversationId + * @param {string} messageId + * @param {HTMLElement} container + */ +function loadThoughtsForMessage(conversationId, messageId, container) { + fetch(`/api/conversations/${conversationId}/messages/${messageId}/thoughts`, { + credentials: 'same-origin' + }) + .then(r => r.json()) + .then(data => { + if (!data.enabled) { + container.innerHTML = '
Processing thoughts are disabled.
'; + return; + } + if (!data.thoughts || data.thoughts.length === 0) { + container.innerHTML = '
No processing thoughts recorded for this message.
'; + return; + } + container.innerHTML = renderThoughtsList(data.thoughts); + }) + .catch(err => { + console.error('Error loading thoughts:', err); + container.innerHTML = '
Failed to load processing thoughts.
'; + }); +} + +/** + * Render a list of thought steps as HTML. + * @param {Array} thoughts + * @returns {string} HTML string + */ +function renderThoughtsList(thoughts) { + let html = '
'; + thoughts.forEach(t => { + const icon = getThoughtIcon(t.step_type); + const durationStr = t.duration_ms != null ? `(${t.duration_ms}ms)` : ''; + html += `
+ + ${escapeHtml(t.content || '')} + ${durationStr} +
`; + }); + html += '
'; + return html; +} diff --git a/application/single_app/static/js/plugin_common.js b/application/single_app/static/js/plugin_common.js index e40158b9..29a88a24 100644 --- a/application/single_app/static/js/plugin_common.js +++ b/application/single_app/static/js/plugin_common.js @@ -2,6 +2,10 @@ // Shared logic for admin_plugins.js and workspace_plugins.js // Exports: functions for modal field handling, validation, label toggling, table rendering, and plugin CRUD import { showToast } from "./chat/chat-toast.js" +import { + humanizeName, truncateDescription, + openViewModal, createActionCard +} from './workspace/view-utils.js'; // Fetch merged plugin settings from backend given type and current settings export async function fetchAndMergePluginSettings(pluginType, currentSettings = {}) { @@ -60,8 +64,7 @@ export function escapeHtml(str) { } // Render plugins table (parameterized for tbody selector and button handlers) -export function renderPluginsTable({plugins, tbodySelector, onEdit, onDelete, ensureTable = true, isAdmin = false}) { - console.log('Rendering plugins table with %d plugins', plugins.length); +export function renderPluginsTable({plugins, tbodySelector, onEdit, onDelete, onView, ensureTable = true, isAdmin = false}) { // Optionally ensure the table is present before rendering if (ensureTable) { ensurePluginsTableInRoot(); @@ -75,29 +78,33 @@ export function renderPluginsTable({plugins, tbodySelector, onEdit, onDelete, en plugins.forEach(plugin => { const tr = document.createElement('tr'); const safeName = escapeHtml(plugin.name); - const safeDisplayName = escapeHtml(plugin.display_name || plugin.name); - const safeDesc = escapeHtml(plugin.description || 'No description available'); + const displayName = humanizeName(plugin.display_name || plugin.name); + const safeDisplayName = escapeHtml(displayName); + const description = plugin.description || 'No description available'; + const truncatedDesc = escapeHtml(truncateDescription(description, 90)); let actionButtons = ''; let globalBadge = plugin.is_global ? ' Global' : ''; - // Show action buttons for: - // - Admin context: all actions (global and personal) - // - User context: only personal actions (not global) + // View button always shown + let viewButton = ``; + + // Edit/Delete buttons based on context + let editDeleteButtons = ''; if (isAdmin || !plugin.is_global) { - actionButtons = ` -
+ editDeleteButtons = ` -
- `; + `; } + actionButtons = `
${viewButton}${editDeleteButtons}
`; tr.innerHTML = ` - ${safeDisplayName}${globalBadge} - ${safeDesc} + ${safeDisplayName}${globalBadge} + ${truncatedDesc} ${actionButtons} `; tbody.appendChild(tr); @@ -109,6 +116,34 @@ export function renderPluginsTable({plugins, tbodySelector, onEdit, onDelete, en tbody.querySelectorAll('.delete-plugin-btn').forEach(btn => { btn.onclick = () => onDelete(btn.getAttribute('data-plugin-name')); }); + tbody.querySelectorAll('.view-plugin-btn').forEach(btn => { + btn.onclick = () => { + if (onView) { + onView(btn.getAttribute('data-plugin-name')); + } + }; + }); +} + +// Render plugins grid (card-based view) +export function renderPluginsGrid({plugins, containerSelector, onEdit, onDelete, onView, isAdmin = false}) { + const container = document.querySelector(containerSelector); + if (!container) return; + container.innerHTML = ''; + if (!plugins.length) { + container.innerHTML = '
No actions found.
'; + return; + } + plugins.forEach(plugin => { + const card = createActionCard(plugin, { + onView: (p) => { if (onView) onView(p.name); }, + onEdit: (p) => onEdit(p.name), + onDelete: (p) => onDelete(p.name), + canManage: isAdmin || !plugin.is_global, + isAdmin + }); + container.appendChild(card); + }); } // Toggle auth fields and labels (parameterized for DOM elements) diff --git a/application/single_app/static/js/plugin_modal_stepper.js b/application/single_app/static/js/plugin_modal_stepper.js index 89076076..aa5b4e01 100644 --- a/application/single_app/static/js/plugin_modal_stepper.js +++ b/application/single_app/static/js/plugin_modal_stepper.js @@ -1,6 +1,10 @@ // plugin_modal_stepper.js // Multi-step modal functionality for action/plugin creation import { showToast } from "./chat/chat-toast.js"; +import { getTypeIcon } from "./workspace/view-utils.js"; + +// Action types hidden from the creation UI (backend plugins remain intact) +const HIDDEN_ACTION_TYPES = ['sql_schema', 'ui_test', 'queue_storage', 'blob_storage', 'embedding_model']; export class PluginModalStepper { @@ -129,6 +133,12 @@ export class PluginModalStepper { document.getElementById('sql-auth-type').addEventListener('change', () => this.handleSqlAuthTypeChange()); + // Test SQL connection button + const testConnBtn = document.getElementById('sql-test-connection-btn'); + if (testConnBtn) { + testConnBtn.addEventListener('click', () => this.testSqlConnection()); + } + // Set up display name to generated name conversion this.setupNameGeneration(); @@ -193,6 +203,8 @@ export class PluginModalStepper { if (!res.ok) throw new Error('Failed to load action types'); this.availableTypes = await res.json(); + // Hide deprecated/internal action types from the creation UI + this.availableTypes = this.availableTypes.filter(t => !HIDDEN_ACTION_TYPES.includes(t.type)); // Sort action types alphabetically by display name this.availableTypes.sort((a, b) => { const nameA = (a.display || a.displayName || a.type || a.name || '').toLowerCase(); @@ -271,10 +283,15 @@ export class PluginModalStepper { description.substring(0, maxLength) + '...' : description; const needsTruncation = description.length > maxLength; + const iconClass = getTypeIcon(type.type || type.name); + col.innerHTML = `
-
${this.escapeHtml(displayName)}
+
+ +
${this.escapeHtml(displayName)}
+

${this.escapeHtml(truncatedDescription)} ${needsTruncation ? ` @@ -538,43 +555,52 @@ export class PluginModalStepper { } if (stepNumber === 4) { - // Load additional settings schema for selected type - let options = {forceReload: true}; - this.getAdditionalSettingsSchema(this.selectedType, options); + const isSqlType = this.selectedType === 'sql_query' || this.selectedType === 'sql_schema'; const additionalFieldsDiv = document.getElementById('plugin-additional-fields-div'); - if (additionalFieldsDiv) { - // Only clear and rebuild if type changes - if (this.selectedType !== this.lastAdditionalFieldsType) { - additionalFieldsDiv.innerHTML = ''; - additionalFieldsDiv.classList.remove('d-none'); - if (this.selectedType) { - this.getAdditionalSettingsSchema(this.selectedType) - .then(schema => { - if (schema) { - this.buildAdditionalFieldsUI(schema, additionalFieldsDiv); - try { - if (this.isEditMode && this.originalPlugin && this.originalPlugin.additionalFields) { - this.populateDynamicAdditionalFields(this.originalPlugin.additionalFields); + + // For SQL types, hide additional fields entirely since Step 3 covers all SQL config + if (isSqlType && additionalFieldsDiv) { + additionalFieldsDiv.innerHTML = ''; + additionalFieldsDiv.classList.add('d-none'); + this.lastAdditionalFieldsType = this.selectedType; + } else { + // Load additional settings schema for selected type + let options = {forceReload: true}; + this.getAdditionalSettingsSchema(this.selectedType, options); + if (additionalFieldsDiv) { + // Only clear and rebuild if type changes + if (this.selectedType !== this.lastAdditionalFieldsType) { + additionalFieldsDiv.innerHTML = ''; + additionalFieldsDiv.classList.remove('d-none'); + if (this.selectedType) { + this.getAdditionalSettingsSchema(this.selectedType) + .then(schema => { + if (schema) { + this.buildAdditionalFieldsUI(schema, additionalFieldsDiv); + try { + if (this.isEditMode && this.originalPlugin && this.originalPlugin.additionalFields) { + this.populateDynamicAdditionalFields(this.originalPlugin.additionalFields); + } + } catch (error) { + console.error('Error populating dynamic additional fields:', error); } - } catch (error) { - console.error('Error populating dynamic additional fields:', error); + } else { + console.log('No additional settings schema found'); + additionalFieldsDiv.classList.add('d-none'); } - } else { - console.log('No additional settings schema found'); + }) + .catch(error => { + console.error(`Error fetching additional settings schema for type: ${this.selectedType} -- ${error}`); additionalFieldsDiv.classList.add('d-none'); - } - }) - .catch(error => { - console.error(`Error fetching additional settings schema for type: ${this.selectedType} -- ${error}`); - additionalFieldsDiv.classList.add('d-none'); - }); - } else { - console.warn('No plugin type selected'); - additionalFieldsDiv.classList.add('d-none'); + }); + } else { + console.warn('No plugin type selected'); + additionalFieldsDiv.classList.add('d-none'); + } + this.lastAdditionalFieldsType = this.selectedType; } - this.lastAdditionalFieldsType = this.selectedType; + // Otherwise, preserve user data and do not redraw } - // Otherwise, preserve user data and do not redraw } if (!this.isEditMode) { @@ -1230,6 +1256,80 @@ export class PluginModalStepper { this.updateSqlAuthInfo(); } + async testSqlConnection() { + const btn = document.getElementById('sql-test-connection-btn'); + const resultDiv = document.getElementById('sql-test-connection-result'); + const alertDiv = document.getElementById('sql-test-connection-alert'); + if (!btn || !resultDiv || !alertDiv) return; + + // Collect current SQL config from Step 3 + const databaseType = document.querySelector('input[name="sql-database-type"]:checked')?.value; + const connectionMethod = document.querySelector('input[name="sql-connection-method"]:checked')?.value || 'parameters'; + const authType = document.getElementById('sql-auth-type')?.value || 'username_password'; + + if (!databaseType) { + resultDiv.classList.remove('d-none'); + alertDiv.className = 'alert alert-warning mb-0 py-2 px-3 small'; + alertDiv.textContent = 'Please select a database type first.'; + return; + } + + const payload = { + database_type: databaseType, + connection_method: connectionMethod, + auth_type: authType + }; + + if (connectionMethod === 'connection_string') { + payload.connection_string = document.getElementById('sql-connection-string')?.value?.trim() || ''; + } else { + payload.server = document.getElementById('sql-server')?.value?.trim() || ''; + payload.database = document.getElementById('sql-database')?.value?.trim() || ''; + payload.port = document.getElementById('sql-port')?.value?.trim() || ''; + if (databaseType === 'sqlserver' || databaseType === 'azure_sql') { + payload.driver = document.getElementById('sql-driver')?.value || ''; + } + } + + if (authType === 'username_password') { + payload.username = document.getElementById('sql-username')?.value?.trim() || ''; + payload.password = document.getElementById('sql-password')?.value?.trim() || ''; + } + + payload.timeout = parseInt(document.getElementById('sql-timeout')?.value) || 10; + + // Show loading state + const originalText = btn.innerHTML; + btn.innerHTML = 'Testing...'; + btn.disabled = true; + resultDiv.classList.add('d-none'); + + try { + const response = await fetch('/api/plugins/test-sql-connection', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(payload) + }); + const data = await response.json(); + + resultDiv.classList.remove('d-none'); + if (data.success) { + alertDiv.className = 'alert alert-success mb-0 py-2 px-3 small'; + alertDiv.innerHTML = '' + (data.message || 'Connection successful!'); + } else { + alertDiv.className = 'alert alert-danger mb-0 py-2 px-3 small'; + alertDiv.innerHTML = '' + (data.error || 'Connection failed.'); + } + } catch (error) { + resultDiv.classList.remove('d-none'); + alertDiv.className = 'alert alert-danger mb-0 py-2 px-3 small'; + alertDiv.innerHTML = 'Test failed: ' + (error.message || 'Network error'); + } finally { + btn.innerHTML = originalText; + btn.disabled = false; + } + } + updateSqlConnectionExamples() { const selectedType = document.querySelector('input[name="sql-database-type"]:checked')?.value; const examplesDiv = document.getElementById('sql-connection-examples'); @@ -1720,12 +1820,17 @@ export class PluginModalStepper { // Collect additional fields from the dynamic UI and MERGE with existing additionalFields // This preserves OpenAPI spec content and other auto-populated fields - try { - const dynamicFields = this.collectAdditionalFields(); - // Merge dynamicFields into additionalFields (preserving existing values) - additionalFields = { ...additionalFields, ...dynamicFields }; - } catch (e) { - throw new Error('Invalid additional fields input'); + // For SQL types, Step 3 already provides all necessary config — skip dynamic field merge + // to prevent empty Step 4 fields from overwriting populated Step 3 values + const isSqlType = this.selectedType === 'sql_query' || this.selectedType === 'sql_schema'; + if (!isSqlType) { + try { + const dynamicFields = this.collectAdditionalFields(); + // Merge dynamicFields into additionalFields (preserving existing values) + additionalFields = { ...additionalFields, ...dynamicFields }; + } catch (e) { + throw new Error('Invalid additional fields input'); + } } let metadata = {}; @@ -2106,6 +2211,7 @@ export class PluginModalStepper { populateAdvancedSummary() { const advancedSection = document.getElementById('summary-advanced-section'); + const isSqlType = this.selectedType === 'sql_query' || this.selectedType === 'sql_schema'; // Check if there's any metadata or additional fields const metadata = document.getElementById('plugin-metadata').value.trim(); @@ -2123,9 +2229,33 @@ export class PluginModalStepper { hasMetadata = metadata.length > 0 && metadata !== '{}'; } - // DRY: Use private helper to collect additional fields - let additionalFieldsObj = this.collectAdditionalFields(); - hasAdditionalFields = Object.keys(additionalFieldsObj).length > 0; + // For SQL types, additional fields are already shown in the SQL Database Configuration + // summary section, so skip showing them again in Advanced to avoid redundancy + if (!isSqlType) { + // DRY: Use private helper to collect additional fields + let additionalFieldsObj = this.collectAdditionalFields(); + hasAdditionalFields = Object.keys(additionalFieldsObj).length > 0; + + // Show/hide additional fields preview + const additionalFieldsPreview = document.getElementById('summary-additional-fields-preview'); + if (hasAdditionalFields) { + let previewContent = ''; + if (typeof additionalFieldsObj === 'object' && additionalFieldsObj !== null) { + previewContent = JSON.stringify(additionalFieldsObj, null, 2); + } else { + previewContent = ''; + } + document.getElementById('summary-additional-fields-content').textContent = previewContent; + additionalFieldsPreview.style.display = ''; + } else { + additionalFieldsPreview.style.display = 'none'; + } + } else { + // Hide additional fields for SQL types + const additionalFieldsPreview = document.getElementById('summary-additional-fields-preview'); + if (additionalFieldsPreview) additionalFieldsPreview.style.display = 'none'; + hasAdditionalFields = false; + } // Update has metadata/additional fields indicators document.getElementById('summary-has-metadata').textContent = hasMetadata ? 'Yes' : 'No'; @@ -2140,21 +2270,6 @@ export class PluginModalStepper { metadataPreview.style.display = 'none'; } - // Show/hide additional fields preview - const additionalFieldsPreview = document.getElementById('summary-additional-fields-preview'); - if (hasAdditionalFields) { - let previewContent = ''; - if (typeof additionalFieldsObj === 'object' && additionalFieldsObj !== null) { - previewContent = JSON.stringify(additionalFieldsObj, null, 2); - } else { - previewContent = ''; - } - document.getElementById('summary-additional-fields-content').textContent = previewContent; - additionalFieldsPreview.style.display = ''; - } else { - additionalFieldsPreview.style.display = 'none'; - } - // Show advanced section if there's any advanced content if (hasMetadata || hasAdditionalFields) { advancedSection.style.display = ''; diff --git a/application/single_app/static/js/workspace/group_agents.js b/application/single_app/static/js/workspace/group_agents.js index f97dbd07..608f029e 100644 --- a/application/single_app/static/js/workspace/group_agents.js +++ b/application/single_app/static/js/workspace/group_agents.js @@ -4,16 +4,23 @@ import { showToast } from "../chat/chat-toast.js"; import * as agentsCommon from "../agents_common.js"; import { AgentModalStepper } from "../agent_modal_stepper.js"; +import { + humanizeName, truncateDescription, escapeHtml as escapeHtmlUtil, + setupViewToggle, switchViewContainers, openViewModal, createAgentCard +} from './view-utils.js'; const tableBody = document.getElementById("group-agents-table-body"); const errorContainer = document.getElementById("group-agents-error"); const searchInput = document.getElementById("group-agents-search"); const createButton = document.getElementById("create-group-agent-btn"); const permissionWarning = document.getElementById("group-agents-permission-warning"); +const agentsListView = document.getElementById("group-agents-list-view"); +const agentsGridView = document.getElementById("group-agents-grid-view"); let agents = []; let filteredAgents = []; let agentStepper = null; +let currentViewMode = 'list'; let currentContext = window.groupWorkspaceContext || { activeGroupId: null, activeGroupName: "", @@ -21,14 +28,7 @@ let currentContext = window.groupWorkspaceContext || { }; function escapeHtml(value) { - if (!value) return ""; - return value.replace(/[&<>"']/g, (char) => ({ - "&": "&", - "<": "<", - ">": ">", - '"': """, - "'": "'" - }[char] || char)); + return escapeHtmlUtil(value); } function canManageAgents() { @@ -46,6 +46,7 @@ function groupAllowsModifications() { } function truncateName(name, maxLength = 18) { + // Kept for backward compat; prefer humanizeName for display if (!name || name.length <= maxLength) return name || ""; return `${name.substring(0, maxLength)}…`; } @@ -114,29 +115,61 @@ function renderAgentsTable(list) { list.forEach((agent) => { const tr = document.createElement("tr"); - const displayName = truncateName(agent.display_name || agent.displayName || agent.name || ""); - const description = escapeHtml(agent.description || "No description available."); - - let actionsHtml = ""; + const rawName = agent.display_name || agent.displayName || agent.name || ""; + const displayName = humanizeName(rawName); + const fullDesc = agent.description || "No description available."; + const shortDesc = truncateDescription(fullDesc, 90); + + let actionsHtml = ` + + `; if (canManage) { - actionsHtml = ` - - `; } tr.innerHTML = ` - ${escapeHtml(displayName)} - ${description} + ${escapeHtml(displayName)} + ${escapeHtml(shortDesc)} ${actionsHtml}`; tableBody.appendChild(tr); }); } +function renderAgentsGrid(list) { + if (!agentsGridView) return; + agentsGridView.innerHTML = ''; + + if (!list.length) { + agentsGridView.innerHTML = '

No group agents found.
'; + return; + } + + const canManage = canManageAgents() && groupAllowsModifications(); + list.forEach(agent => { + const col = createAgentCard(agent, { + onChat: a => chatWithGroupAgent(a.name || a), + onView: a => openGroupAgentViewModal(a), + onEdit: canManage ? a => { + const found = agents.find(x => x.id === (a.id || a.name || a) || x.name === (a.name || a)); + openAgentModal(found || null); + } : null, + onDelete: canManage ? a => deleteGroupAgent(a.id || a.name || a) : null + }); + agentsGridView.appendChild(col); + }); +} + function filterAgents(term) { if (!term) { filteredAgents = agents.slice(); @@ -149,6 +182,23 @@ function filterAgents(term) { }); } renderAgentsTable(filteredAgents); + renderAgentsGrid(filteredAgents); +} + +// Open the view modal for a group agent with Chat/Edit/Delete actions +function openGroupAgentViewModal(agent) { + const canManage = canManageAgents() && groupAllowsModifications(); + const callbacks = { + onChat: (a) => chatWithGroupAgent(a.name) + }; + if (canManage) { + callbacks.onEdit = (a) => { + const found = agents.find(x => x.id === a.id || x.name === a.name); + openAgentModal(found || a); + }; + callbacks.onDelete = (a) => deleteGroupAgent(a.id || a.name); + } + openViewModal(agent, 'agent', callbacks); } function overrideAgentStepper(stepper) { @@ -343,7 +393,57 @@ async function fetchGroupAgents() { } } +async function chatWithGroupAgent(agentName) { + try { + const agent = agents.find(a => a.name === agentName); + if (!agent) { + throw new Error("Agent not found"); + } + + const payloadData = { + selected_agent: { + name: agentName, + display_name: agent.display_name || agent.displayName || agentName, + is_global: !!agent.is_global, + is_group: true, + group_id: currentContext.activeGroupId, + group_name: currentContext.activeGroupName + } + }; + + const resp = await fetch("/api/user/settings/selected_agent", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payloadData) + }); + + if (!resp.ok) { + throw new Error("Failed to select agent"); + } + + window.location.href = "/chats"; + } catch (err) { + console.error("Error selecting group agent for chat:", err); + showToast("Error selecting agent for chat. Please try again.", "danger"); + } +} + function handleTableClick(event) { + const viewBtn = event.target.closest(".view-group-agent-btn"); + if (viewBtn) { + const agentName = viewBtn.dataset.agentName; + const agent = agents.find(a => a.name === agentName); + if (agent) openGroupAgentViewModal(agent); + return; + } + + const chatBtn = event.target.closest(".chat-group-agent-btn"); + if (chatBtn) { + const agentName = chatBtn.dataset.agentName; + chatWithGroupAgent(agentName); + return; + } + const editBtn = event.target.closest(".edit-group-agent-btn"); if (editBtn) { const agentId = editBtn.dataset.agentId; @@ -384,6 +484,11 @@ function initialize() { updatePermissionUI(); bindEventHandlers(); + setupViewToggle('groupAgents', 'groupAgentsViewPreference', (mode) => { + currentViewMode = mode; + switchViewContainers(mode, agentsListView, agentsGridView); + }); + if (document.getElementById("group-agents-tab-btn")?.classList.contains("active")) { fetchGroupAgents(); } diff --git a/application/single_app/static/js/workspace/group_plugins.js b/application/single_app/static/js/workspace/group_plugins.js index 60a7f42e..8acdf5bd 100644 --- a/application/single_app/static/js/workspace/group_plugins.js +++ b/application/single_app/static/js/workspace/group_plugins.js @@ -3,6 +3,10 @@ import { ensurePluginsTableInRoot, validatePluginManifest } from "../plugin_common.js"; import { showToast } from "../chat/chat-toast.js"; +import { + humanizeName, truncateDescription, escapeHtml as escapeHtmlUtil, + setupViewToggle, switchViewContainers, openViewModal, createActionCard +} from './view-utils.js'; const root = document.getElementById("group-plugins-root"); const permissionWarning = document.getElementById("group-plugins-permission-warning"); @@ -11,6 +15,7 @@ let plugins = []; let filteredPlugins = []; let templateReady = false; let listenersBound = false; +let currentViewMode = 'list'; let currentContext = window.groupWorkspaceContext || { activeGroupId: null, activeGroupName: "", @@ -18,14 +23,7 @@ let currentContext = window.groupWorkspaceContext || { }; function escapeHtml(value) { - if (!value) return ""; - return value.replace(/[&<>"']/g, (char) => ({ - "&": "&", - "<": "<", - ">": ">", - '"': """, - "'": "'" - }[char] || char)); + return escapeHtmlUtil(value); } function canManagePlugins() { @@ -66,6 +64,14 @@ function bindRootEvents() { }); root.addEventListener("click", async (event) => { + const viewBtn = event.target.closest(".view-group-plugin-btn"); + if (viewBtn) { + const pluginId = viewBtn.dataset.pluginId; + const plugin = plugins.find(x => x.id === pluginId || x.name === pluginId); + if (plugin) openGroupPluginViewModal(plugin); + return; + } + const createBtn = event.target.closest("#create-group-plugin-btn"); if (createBtn) { event.preventDefault(); @@ -148,23 +154,28 @@ function renderPluginsTable(list) { const canManage = canManagePlugins() && groupAllowsModifications(); list.forEach((plugin) => { const tr = document.createElement("tr"); - const displayName = plugin.displayName || plugin.display_name || plugin.name || ""; - const description = plugin.description || "No description available."; + const rawName = plugin.displayName || plugin.display_name || plugin.name || ""; + const displayName = humanizeName(rawName); + const fullDesc = plugin.description || "No description available."; + const shortDesc = truncateDescription(fullDesc, 90); const isGlobal = Boolean(plugin.is_global); - let actionsHtml = ""; + // View button always visible + let actionsHtml = ` + `; + if (canManage && !isGlobal) { - actionsHtml = ` -
- - -
`; + actionsHtml += ` + + `; } else if (canManage && isGlobal) { - actionsHtml = "Managed globally"; + actionsHtml += `Managed globally`; } const titleHtml = isGlobal @@ -172,14 +183,36 @@ function renderPluginsTable(list) { : escapeHtml(displayName); tr.innerHTML = ` - ${titleHtml} - ${escapeHtml(description)} + ${titleHtml} + ${escapeHtml(shortDesc)} ${actionsHtml}`; tbody.appendChild(tr); }); } +function renderPluginsGrid(list) { + const gridView = document.getElementById('group-plugins-grid-view'); + if (!gridView) return; + gridView.innerHTML = ''; + + if (!list.length) { + gridView.innerHTML = '
No group actions found.
'; + return; + } + + const canManage = canManagePlugins() && groupAllowsModifications(); + list.forEach(plugin => { + const isGlobal = Boolean(plugin.is_global); + const col = createActionCard(plugin, { + onView: p => openGroupPluginViewModal(p), + onEdit: (canManage && !isGlobal) ? p => openPluginModal(p.id || p.name) : null, + onDelete: (canManage && !isGlobal) ? p => deleteGroupPlugin(p.id || p.name) : null + }); + gridView.appendChild(col); + }); +} + function filterPlugins(term) { if (!term) { filteredPlugins = plugins.slice(); @@ -192,6 +225,19 @@ function filterPlugins(term) { }); } renderPluginsTable(filteredPlugins); + renderPluginsGrid(filteredPlugins); +} + +// Open the view modal for a group action with Edit/Delete actions +function openGroupPluginViewModal(plugin) { + const canManage = canManagePlugins() && groupAllowsModifications(); + const isGlobal = Boolean(plugin.is_global); + const callbacks = {}; + if (canManage && !isGlobal) { + callbacks.onEdit = (p) => openPluginModal(p.id || p.name); + callbacks.onDelete = (p) => deleteGroupPlugin(p.id || p.name); + } + openViewModal(plugin, 'action', callbacks); } async function fetchGroupPlugins() { @@ -220,7 +266,17 @@ async function fetchGroupPlugins() { filteredPlugins = plugins.slice(); renderPluginsTable(filteredPlugins); + renderPluginsGrid(filteredPlugins); updatePermissionUI(); + + // Set up view toggle (only once after template is in DOM) + setupViewToggle('groupPlugins', 'groupPluginsViewPreference', (mode) => { + currentViewMode = mode; + switchViewContainers(mode, + document.getElementById('group-plugins-list-view'), + document.getElementById('group-plugins-grid-view') + ); + }); } catch (error) { console.error("Error loading group actions:", error); renderError(error.message || "Unable to load group actions."); diff --git a/application/single_app/static/js/workspace/view-utils.js b/application/single_app/static/js/workspace/view-utils.js new file mode 100644 index 00000000..3b78bc15 --- /dev/null +++ b/application/single_app/static/js/workspace/view-utils.js @@ -0,0 +1,523 @@ +// view-utils.js +// Shared utilities for list/grid view toggle, name humanization, and view modal +// Used by personal and group agents/actions workspace modules + +/** + * Convert a technical name to a human-readable display name. + * Handles underscores, camelCase, PascalCase, and consecutive uppercase. + * Examples: + * "sql_query" → "Sql Query" + * "myAgentName" → "My Agent Name" + * "OpenAPIPlugin" → "Open API Plugin" + * "log_analytics" → "Log Analytics" + */ +export function humanizeName(name) { + if (!name) return ""; + // Replace underscores and hyphens with spaces + let result = name.replace(/[_-]/g, " "); + // Insert space before uppercase letters that follow lowercase letters (camelCase) + result = result.replace(/([a-z])([A-Z])/g, "$1 $2"); + // Insert space between consecutive uppercase followed by lowercase (e.g., "APIPlugin" → "API Plugin") + result = result.replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2"); + // Capitalize first letter of each word + result = result.replace(/\b\w/g, (c) => c.toUpperCase()); + // Collapse multiple spaces + result = result.replace(/\s+/g, " ").trim(); + return result; +} + +/** + * Truncate a description string to maxLen characters, appending "…" if truncated. + */ +export function truncateDescription(text, maxLen = 100) { + if (!text) return ""; + if (text.length <= maxLen) return text; + return text.substring(0, maxLen).trimEnd() + "…"; +} + +/** + * Escape HTML entities to prevent XSS. + */ +export function escapeHtml(str) { + if (!str) return ""; + return str.replace(/[&<>"']/g, (c) => + ({ "&": "&", "<": "<", ">": ">", '"': """, "'": "'" }[c]) + ); +} + +/** + * Get an appropriate Bootstrap icon class for an action/plugin type. + */ +export function getTypeIcon(type) { + if (!type) return "bi-lightning-charge"; + const t = type.toLowerCase(); + if (t.includes("sql")) return "bi-database"; + if (t.includes("openapi")) return "bi-globe"; + if (t.includes("log_analytics")) return "bi-graph-up"; + if (t.includes("msgraph")) return "bi-microsoft"; + if (t.includes("databricks")) return "bi-bricks"; + if (t.includes("http") || t.includes("smart_http")) return "bi-cloud-arrow-up"; + if (t.includes("azure_function")) return "bi-lightning"; + if (t.includes("blob")) return "bi-file-earmark"; + if (t.includes("queue")) return "bi-inbox"; + if (t.includes("embedding")) return "bi-vector-pen"; + if (t.includes("fact_memory")) return "bi-brain"; + if (t.includes("math")) return "bi-calculator"; + if (t.includes("text")) return "bi-fonts"; + if (t.includes("time")) return "bi-clock"; + return "bi-lightning-charge"; +} + +/** + * Create the HTML string for a list/grid view toggle button group. + * @param {string} prefix - Unique prefix for element IDs (e.g., "agents", "plugins", "group-agents") + * @returns {string} HTML string + */ +export function createViewToggleHtml(prefix) { + return ` +
+ + + + +
`; +} + +/** + * Set up view toggle event listeners and restore saved preference. + * @param {string} prefix - Unique prefix matching createViewToggleHtml + * @param {string} storageKey - localStorage key for persistence + * @param {function} onSwitch - Callback receiving 'list' or 'grid' + */ +export function setupViewToggle(prefix, storageKey, onSwitch) { + const listRadio = document.getElementById(`${prefix}-view-list`); + const gridRadio = document.getElementById(`${prefix}-view-grid`); + if (!listRadio || !gridRadio) return; + + listRadio.addEventListener("change", () => { + if (listRadio.checked) { + localStorage.setItem(storageKey, "list"); + onSwitch("list"); + } + }); + + gridRadio.addEventListener("change", () => { + if (gridRadio.checked) { + localStorage.setItem(storageKey, "grid"); + onSwitch("grid"); + } + }); + + // Restore saved preference + const saved = localStorage.getItem(storageKey); + if (saved === "grid") { + gridRadio.checked = true; + listRadio.checked = false; + onSwitch("grid"); + } else { + onSwitch("list"); + } +} + +/** + * Toggle visibility of list and grid containers. + * @param {string} mode - 'list' or 'grid' + * @param {HTMLElement} listContainer - The list/table container element + * @param {HTMLElement} gridContainer - The grid container element + */ +export function switchViewContainers(mode, listContainer, gridContainer) { + if (listContainer) { + listContainer.classList.toggle("d-none", mode !== "list"); + } + if (gridContainer) { + gridContainer.classList.toggle("d-none", mode !== "grid"); + } +} + +// ============================================================================ +// VIEW MODAL — Lightweight read-only detail view +// ============================================================================ + +/** + * Open a read-only view modal for an agent or action. + * @param {object} item - The agent or action data object + * @param {'agent'|'action'} type - What kind of item this is + * @param {object} [callbacks] - Optional action callbacks { onChat, onEdit, onDelete } + */ +export function openViewModal(item, type, callbacks = {}) { + const modalEl = document.getElementById("item-view-modal"); + if (!modalEl) return; + + const titleEl = modalEl.querySelector(".modal-title"); + const bodyEl = modalEl.querySelector(".modal-body"); + const footerEl = modalEl.querySelector(".modal-footer"); + if (!titleEl || !bodyEl || !footerEl) return; + + if (type === "agent") { + titleEl.textContent = "Agent Details"; + bodyEl.innerHTML = buildAgentViewHtml(item); + } else { + titleEl.textContent = "Action Details"; + bodyEl.innerHTML = buildActionViewHtml(item); + } + + // Build footer buttons dynamically + footerEl.innerHTML = ''; + const { onChat, onEdit, onDelete } = callbacks; + + if (onChat && typeof onChat === 'function') { + const chatBtn = document.createElement('button'); + chatBtn.type = 'button'; + chatBtn.className = 'btn btn-primary'; + chatBtn.innerHTML = 'Chat'; + chatBtn.addEventListener('click', () => { + bootstrap.Modal.getInstance(modalEl)?.hide(); + onChat(item); + }); + footerEl.appendChild(chatBtn); + } + + if (onEdit && typeof onEdit === 'function') { + const editBtn = document.createElement('button'); + editBtn.type = 'button'; + editBtn.className = 'btn btn-outline-secondary'; + editBtn.innerHTML = 'Edit'; + editBtn.addEventListener('click', () => { + bootstrap.Modal.getInstance(modalEl)?.hide(); + onEdit(item); + }); + footerEl.appendChild(editBtn); + } + + if (onDelete && typeof onDelete === 'function') { + const delBtn = document.createElement('button'); + delBtn.type = 'button'; + delBtn.className = 'btn btn-outline-danger'; + delBtn.innerHTML = 'Delete'; + delBtn.addEventListener('click', () => { + bootstrap.Modal.getInstance(modalEl)?.hide(); + onDelete(item); + }); + footerEl.appendChild(delBtn); + } + + const closeBtn = document.createElement('button'); + closeBtn.type = 'button'; + closeBtn.className = 'btn btn-secondary'; + closeBtn.textContent = 'Close'; + closeBtn.setAttribute('data-bs-dismiss', 'modal'); + footerEl.appendChild(closeBtn); + + const modal = new bootstrap.Modal(modalEl); + modal.show(); +} + +function buildAgentViewHtml(agent) { + const displayName = escapeHtml(agent.display_name || agent.displayName || agent.name || ""); + const name = escapeHtml(agent.name || ""); + const description = escapeHtml(agent.description || "No description available."); + const model = escapeHtml(agent.azure_openai_gpt_deployment || agent.model || "Default"); + const agentType = agent.agent_type === "aifoundry" ? "Azure AI Foundry" : "Local (Semantic Kernel)"; + const rawInstructions = agent.instructions || "No instructions defined."; + // Render instructions as Markdown (marked + DOMPurify are loaded globally in base.html) + const renderedInstructions = (typeof marked !== 'undefined' && typeof DOMPurify !== 'undefined') + ? DOMPurify.sanitize(marked.parse(rawInstructions)) + : escapeHtml(rawInstructions); + const isGlobal = agent.is_global; + const scopeBadge = isGlobal + ? 'Global' + : 'Personal'; + + return ` +
+
+ Basic Information +
+
+
+
+ + ${displayName} +
+
+ + ${name} +
+
+ + ${scopeBadge} +
+
+ + ${escapeHtml(agentType)} +
+
+ + ${description} +
+
+
+
+
+
+ Model Configuration +
+
+
+
+ + ${model} +
+
+
+
+
+
+ Instructions +
+
+
+${renderedInstructions} +
+
+
`; +} + +function buildActionViewHtml(action) { + const displayName = escapeHtml(action.display_name || action.displayName || action.name || ""); + const name = escapeHtml(action.name || ""); + const description = escapeHtml(action.description || "No description available."); + const type = escapeHtml(action.type || "unknown"); + const typeIcon = getTypeIcon(action.type); + const authType = escapeHtml(formatAuthType(action.auth?.type || action.auth_type || "")); + const endpoint = escapeHtml(action.endpoint || action.base_url || ""); + const isGlobal = action.is_global; + const scopeBadge = isGlobal + ? 'Global' + : 'Personal'; + + let configHtml = ""; + if (endpoint) { + configHtml = ` +
+
+ Configuration +
+
+
+
+ + ${endpoint} +
+
+ + ${authType || "None"} +
+
+
+
`; + } + + return ` +
+
+ Basic Information +
+
+
+
+ + ${displayName} +
+
+ + ${name} +
+
+ + ${humanizeName(type)} +
+
+ + ${scopeBadge} +
+
+ + ${description} +
+
+
+
+ ${configHtml}`; +} + +function formatAuthType(type) { + if (!type) return ""; + const map = { + "key": "API Key", + "identity": "Managed Identity", + "user": "User (Delegated)", + "servicePrincipal": "Service Principal", + "connection_string": "Connection String", + "basic": "Basic Auth", + "username_password": "Username / Password", + "NoAuth": "No Authentication" + }; + return map[type] || type; +} + +// ============================================================================ +// GRID CARD RENDERERS +// ============================================================================ + +/** + * Create a grid card element for an agent. + * @param {object} agent - Agent data object + * @param {object} options - { onChat, onView, onEdit, onDelete, canManage, isGroup } + * @returns {HTMLElement} + */ +export function createAgentCard(agent, options = {}) { + const { onChat, onView, onEdit, onDelete, canManage = false, isGroup = false } = options; + const col = document.createElement("div"); + col.className = "col-sm-6 col-md-4 col-lg-3"; + + const displayName = humanizeName(agent.display_name || agent.displayName || agent.name || ""); + const description = agent.description || "No description available."; + const isGlobal = agent.is_global; + + let badgeHtml = ""; + if (isGlobal) { + badgeHtml = 'Global'; + } + + let buttonsHtml = ` + + `; + + if (canManage && !isGlobal) { + buttonsHtml += ` + + `; + } + + col.innerHTML = ` +
+
+
+ +
+
${escapeHtml(displayName)}${badgeHtml}
+

${escapeHtml(truncateDescription(description, 120))}

+
+ ${buttonsHtml} +
+
+
`; + + // Bind button events + const chatBtn = col.querySelector(".item-card-chat-btn"); + const viewBtn = col.querySelector(".item-card-view-btn"); + const editBtn = col.querySelector(".item-card-edit-btn"); + const deleteBtn = col.querySelector(".item-card-delete-btn"); + + if (chatBtn && onChat) chatBtn.addEventListener("click", (e) => { e.stopPropagation(); onChat(agent); }); + if (viewBtn && onView) viewBtn.addEventListener("click", (e) => { e.stopPropagation(); onView(agent); }); + if (editBtn && onEdit) editBtn.addEventListener("click", (e) => { e.stopPropagation(); onEdit(agent); }); + if (deleteBtn && onDelete) deleteBtn.addEventListener("click", (e) => { e.stopPropagation(); onDelete(agent); }); + + // Clicking anywhere on the card opens the detail view + const cardEl = col.querySelector(".item-card"); + if (cardEl && onView) { + cardEl.style.cursor = "pointer"; + cardEl.addEventListener("click", () => onView(agent)); + } + + return col; +} + +/** + * Create a grid card element for an action/plugin. + * @param {object} plugin - Action/plugin data object + * @param {object} options - { onView, onEdit, onDelete, canManage, isAdmin } + * @returns {HTMLElement} + */ +export function createActionCard(plugin, options = {}) { + const { onView, onEdit, onDelete, canManage = true, isAdmin = false } = options; + const col = document.createElement("div"); + col.className = "col-sm-6 col-md-4 col-lg-3"; + + const displayName = humanizeName(plugin.display_name || plugin.displayName || plugin.name || ""); + const description = plugin.description || "No description available."; + const type = plugin.type || ""; + const typeIcon = getTypeIcon(type); + const isGlobal = plugin.is_global; + + let badgeHtml = ""; + if (isGlobal) { + badgeHtml = 'Global'; + } + + const typeBadge = type + ? `${escapeHtml(humanizeName(type))}` + : ""; + + let buttonsHtml = ` + `; + + if ((isAdmin || (canManage && !isGlobal))) { + buttonsHtml += ` + + `; + } + + col.innerHTML = ` +
+
+
+ +
+
${escapeHtml(displayName)}${badgeHtml}
+
${typeBadge}
+

${escapeHtml(truncateDescription(description, 120))}

+
+ ${buttonsHtml} +
+
+
`; + + // Bind button events + const viewBtn = col.querySelector(".item-card-view-btn"); + const editBtn = col.querySelector(".item-card-edit-btn"); + const deleteBtn = col.querySelector(".item-card-delete-btn"); + + if (viewBtn && onView) viewBtn.addEventListener("click", (e) => { e.stopPropagation(); onView(plugin); }); + if (editBtn && onEdit) editBtn.addEventListener("click", (e) => { e.stopPropagation(); onEdit(plugin); }); + if (deleteBtn && onDelete) deleteBtn.addEventListener("click", (e) => { e.stopPropagation(); onDelete(plugin); }); + + // Clicking anywhere on the card opens the detail view + const cardEl = col.querySelector(".item-card"); + if (cardEl && onView) { + cardEl.style.cursor = "pointer"; + cardEl.addEventListener("click", () => onView(plugin)); + } + + return col; +} diff --git a/application/single_app/static/js/workspace/workspace_agents.js b/application/single_app/static/js/workspace/workspace_agents.js index a0839b25..623be234 100644 --- a/application/single_app/static/js/workspace/workspace_agents.js +++ b/application/single_app/static/js/workspace/workspace_agents.js @@ -4,14 +4,22 @@ import { showToast } from "../chat/chat-toast.js"; import * as agentsCommon from '../agents_common.js'; import { AgentModalStepper } from '../agent_modal_stepper.js'; +import { + humanizeName, truncateDescription, escapeHtml, + setupViewToggle, switchViewContainers, + openViewModal, createAgentCard +} from './view-utils.js'; // --- DOM Elements & Globals --- const agentsTbody = document.getElementById('agents-table-body'); const agentsErrorDiv = document.getElementById('workspace-agents-error'); const createAgentBtn = document.getElementById('create-agent-btn'); const agentsSearchInput = document.getElementById('agents-search'); +const agentsListView = document.getElementById('agents-list-view'); +const agentsGridView = document.getElementById('agents-grid-view'); let agents = []; let filteredAgents = []; +let currentViewMode = 'list'; // --- Function Definitions --- @@ -43,104 +51,87 @@ function filterAgents(searchTerm) { }); } renderAgentsTable(filteredAgents); + renderAgentsGrid(filteredAgents); } -// --- Helper Functions --- - -function truncateDisplayName(displayName, maxLength = 12) { - if (!displayName || displayName.length <= maxLength) { - return displayName; +// Open the view modal for an agent with Chat/Edit/Delete actions in the footer +function openAgentViewModal(agent) { + const callbacks = { + onChat: (a) => chatWithAgent(a.name), + onDelete: !agent.is_global ? (a) => { if (confirm(`Delete agent '${a.name}'?`)) deleteAgent(a.name); } : null + }; + if (!agent.is_global) { + callbacks.onEdit = (a) => openAgentModal(a); } - return displayName.substring(0, maxLength) + '...'; + openViewModal(agent, 'agent', callbacks); } +// --- Rendering Functions --- function renderAgentsTable(agentsList) { if (!agentsTbody) return; agentsTbody.innerHTML = ''; if (!agentsList.length) { const tr = document.createElement('tr'); - tr.innerHTML = 'No agents found.'; + tr.innerHTML = 'No agents found.'; agentsTbody.appendChild(tr); return; } - // Fetch selected_agent from user settings (async) - fetch('/api/user/settings').then(res => { - if (!res.ok) throw new Error('Failed to load user settings'); - return res.json(); - }).then(settings => { - let selectedAgentObj = settings.selected_agent; - if (!selectedAgentObj && settings.settings && settings.settings.selected_agent) { - selectedAgentObj = settings.settings.selected_agent; - } - let selectedAgentName = typeof selectedAgentObj === 'object' ? selectedAgentObj.name : selectedAgentObj; - agentsTbody.innerHTML = ''; - for (const agent of agentsList) { - const tr = document.createElement('tr'); - - // Create action buttons - let actionButtons = ``; - - if (!agent.is_global) { - actionButtons += ` - - - `; - } - - const truncatedDisplayName = truncateDisplayName(agent.display_name || agent.name || ''); - - tr.innerHTML = ` - - ${truncatedDisplayName} - ${agent.is_global ? ' Global' : ''} - - ${agent.description || 'No description available'} - ${actionButtons} - `; - agentsTbody.appendChild(tr); - } - }).catch(e => { - renderError('Could not load agent settings: ' + e.message); - // Fallback: render table without settings - agentsTbody.innerHTML = ''; - for (const agent of agentsList) { - const tr = document.createElement('tr'); - - // Create action buttons - let actionButtons = ` + `; - - if (!agent.is_global) { - actionButtons += ` - - - `; - } - - const truncatedDisplayName = truncateDisplayName(agent.display_name || agent.name || ''); - - tr.innerHTML = ` - - ${truncatedDisplayName} - ${agent.is_global ? ' Global' : ''} - - ${agent.description || 'No description available'} - ${actionButtons} - `; - agentsTbody.appendChild(tr); + + if (!isGlobal) { + actionButtons += ` + + `; } - }); + + tr.innerHTML = ` + + ${escapeHtml(displayName)} + ${isGlobal ? ' Global' : ''} + + ${escapeHtml(truncatedDesc)} + ${actionButtons} + `; + agentsTbody.appendChild(tr); + } +} + +function renderAgentsGrid(agentsList) { + if (!agentsGridView) return; + agentsGridView.innerHTML = ''; + if (!agentsList.length) { + agentsGridView.innerHTML = '
No agents found.
'; + return; + } + + for (const agent of agentsList) { + const card = createAgentCard(agent, { + onChat: (a) => chatWithAgent(a.name), + onView: (a) => openAgentViewModal(a), + onEdit: (a) => openAgentModal(a), + onDelete: (a) => { if (confirm(`Delete agent '${a.name}'?`)) deleteAgent(a.name); }, + canManage: !agent.is_global + }); + agentsGridView.appendChild(card); + } } async function fetchAgents() { @@ -151,6 +142,7 @@ async function fetchAgents() { agents = await res.json(); filteredAgents = agents; // Initialize filtered list renderAgentsTable(filteredAgents); + renderAgentsGrid(filteredAgents); } catch (e) { renderError(e.message); } @@ -177,17 +169,14 @@ function attachAgentTableEvents() { } agentsTbody.addEventListener('click', function (e) { - console.log('Agent table clicked, target:', e.target); - // Find the button element (could be the target or a parent) const editBtn = e.target.closest('.edit-agent-btn'); const deleteBtn = e.target.closest('.delete-agent-btn'); const chatBtn = e.target.closest('.chat-agent-btn'); + const viewBtn = e.target.closest('.view-agent-btn'); if (editBtn) { - console.log('Edit agent button clicked, dataset:', editBtn.dataset); const agent = agents.find(a => a.name === editBtn.dataset.name); - console.log('Found agent:', agent); openAgentModal(agent); } @@ -201,33 +190,27 @@ function attachAgentTableEvents() { const agentName = chatBtn.dataset.name; chatWithAgent(agentName); } + + if (viewBtn) { + const agent = agents.find(a => a.name === viewBtn.dataset.name); + if (agent) openAgentViewModal(agent); + } }); } async function chatWithAgent(agentName) { try { - console.log('DEBUG: chatWithAgent called with agentName:', agentName); - console.log('DEBUG: Available agents:', agents); - - // Find the agent to get its is_global status const agent = agents.find(a => a.name === agentName); - console.log('DEBUG: Found agent:', agent); - if (!agent) { throw new Error('Agent not found'); } - console.log('DEBUG: Agent is_global flag:', agent.is_global); - console.log('DEBUG: !!agent.is_global:', !!agent.is_global); - - // Set the selected agent with proper is_global flag const payloadData = { selected_agent: { name: agentName, is_global: !!agent.is_global } }; - console.log('DEBUG: Sending payload:', payloadData); const resp = await fetch('/api/user/settings/selected_agent', { method: 'POST', @@ -239,9 +222,6 @@ async function chatWithAgent(agentName) { throw new Error('Failed to select agent'); } - console.log('DEBUG: Agent selection saved successfully'); - - // Navigate to chat page window.location.href = '/chats'; } catch (err) { console.error('Error selecting agent for chat:', err); @@ -353,6 +333,17 @@ async function deleteAgent(name) { function initializeWorkspaceAgentUI() { window.agentModalStepper = new AgentModalStepper(false); attachAgentTableEvents(); + + // Set up view toggle + setupViewToggle('agents', 'agentsViewPreference', (mode) => { + currentViewMode = mode; + switchViewContainers(mode, agentsListView, agentsGridView); + // Re-render grid if switching to grid and we have data + if (mode === 'grid' && filteredAgents.length) { + renderAgentsGrid(filteredAgents); + } + }); + fetchAgents(); } diff --git a/application/single_app/static/js/workspace/workspace_plugins.js b/application/single_app/static/js/workspace/workspace_plugins.js index 30fef0d5..84f1eb46 100644 --- a/application/single_app/static/js/workspace/workspace_plugins.js +++ b/application/single_app/static/js/workspace/workspace_plugins.js @@ -1,10 +1,14 @@ // workspace_plugins.js (refactored to use plugin_common.js and new multi-step modal) -import { renderPluginsTable, ensurePluginsTableInRoot, validatePluginManifest } from '../plugin_common.js'; +import { renderPluginsTable, renderPluginsGrid, ensurePluginsTableInRoot, validatePluginManifest } from '../plugin_common.js'; import { showToast } from "../chat/chat-toast.js" +import { + setupViewToggle, switchViewContainers, openViewModal +} from './view-utils.js'; const root = document.getElementById('workspace-plugins-root'); let plugins = []; let filteredPlugins = []; +let currentViewMode = 'list'; function renderLoading() { root.innerHTML = `
Loading...
`; @@ -14,6 +18,22 @@ function renderError(msg) { root.innerHTML = `
${msg}
`; } +function getViewHandlers() { + return { + onEdit: name => openPluginModal(plugins.find(p => p.name === name)), + onDelete: name => deletePlugin(name), + onView: name => { + const plugin = plugins.find(p => p.name === name); + if (plugin) { + openViewModal(plugin, 'action', { + onEdit: (item) => openPluginModal(item), + onDelete: (item) => deletePlugin(item.name) + }); + } + } + }; +} + function filterPlugins(searchTerm) { if (!searchTerm || !searchTerm.trim()) { filteredPlugins = plugins; @@ -26,14 +46,18 @@ function filterPlugins(searchTerm) { }); } - // Ensure table template is in place ensurePluginsTableInRoot(); + const handlers = getViewHandlers(); renderPluginsTable({ plugins: filteredPlugins, tbodySelector: '#plugins-table-body', - onEdit: name => openPluginModal(plugins.find(p => p.name === name)), - onDelete: name => deletePlugin(name) + ...handlers + }); + renderPluginsGrid({ + plugins: filteredPlugins, + containerSelector: '#plugins-grid-view', + ...handlers }); } @@ -47,12 +71,26 @@ async function fetchPlugins() { // Ensure table template is in place ensurePluginsTableInRoot(); + const handlers = getViewHandlers(); renderPluginsTable({ plugins: filteredPlugins, tbodySelector: '#plugins-table-body', - onEdit: name => openPluginModal(plugins.find(p => p.name === name)), - onDelete: name => deletePlugin(name) + ...handlers + }); + renderPluginsGrid({ + plugins: filteredPlugins, + containerSelector: '#plugins-grid-view', + ...handlers + }); + + // Set up view toggle (only once after template is in DOM) + setupViewToggle('plugins', 'pluginsViewPreference', (mode) => { + currentViewMode = mode; + switchViewContainers(mode, + document.getElementById('plugins-list-view'), + document.getElementById('plugins-grid-view') + ); }); // Set up the create action button diff --git a/application/single_app/static/json/schemas/sql_query.definition.json b/application/single_app/static/json/schemas/sql_query.definition.json index d38a41a8..6903c22a 100644 --- a/application/single_app/static/json/schemas/sql_query.definition.json +++ b/application/single_app/static/json/schemas/sql_query.definition.json @@ -1,6 +1,9 @@ { "$schema": "./plugin.definition.schema.json", "allowedAuthTypes": [ + "user", + "identity", + "servicePrincipal", "connection_string" ] } diff --git a/application/single_app/static/json/schemas/sql_query_plugin.additional_settings.schema.json b/application/single_app/static/json/schemas/sql_query_plugin.additional_settings.schema.json index 9e4f6d34..f7f46ebd 100644 --- a/application/single_app/static/json/schemas/sql_query_plugin.additional_settings.schema.json +++ b/application/single_app/static/json/schemas/sql_query_plugin.additional_settings.schema.json @@ -3,13 +3,13 @@ "title": "SQL Query Plugin Additional Settings", "type": "object", "properties": { - "connection_string__Secret": { + "connection_string": { "type": "string", "description": "Database connection string. Required if server/database not provided." }, "database_type": { "type": "string", - "enum": ["sqlserver", "postgresql", "mysql", "sqlite", "azure_sql", "azuresql"], + "enum": ["sqlserver", "postgresql", "mysql", "sqlite", "azure_sql"], "description": "Type of database engine." }, "server": { @@ -24,7 +24,7 @@ "type": "string", "description": "Username for authentication." }, - "password__Secret": { + "password": { "type": "string", "description": "Password for authentication." }, @@ -50,6 +50,6 @@ "description": "Query timeout in seconds." } }, - "required": ["database_type", "database"], + "required": ["database_type"], "additionalProperties": false } diff --git a/application/single_app/static/json/schemas/sql_schema.definition.json b/application/single_app/static/json/schemas/sql_schema.definition.json index d38a41a8..6903c22a 100644 --- a/application/single_app/static/json/schemas/sql_schema.definition.json +++ b/application/single_app/static/json/schemas/sql_schema.definition.json @@ -1,6 +1,9 @@ { "$schema": "./plugin.definition.schema.json", "allowedAuthTypes": [ + "user", + "identity", + "servicePrincipal", "connection_string" ] } diff --git a/application/single_app/static/json/schemas/sql_schema_plugin.additional_settings.schema.json b/application/single_app/static/json/schemas/sql_schema_plugin.additional_settings.schema.json index e97c7b4b..29fb6b3f 100644 --- a/application/single_app/static/json/schemas/sql_schema_plugin.additional_settings.schema.json +++ b/application/single_app/static/json/schemas/sql_schema_plugin.additional_settings.schema.json @@ -3,13 +3,13 @@ "title": "SQL Schema Plugin Additional Settings", "type": "object", "properties": { - "connection_string__Secret": { + "connection_string": { "type": "string", "description": "Database connection string. Required if server/database not provided." }, "database_type": { "type": "string", - "enum": ["sqlserver", "postgresql", "mysql", "sqlite", "azure_sql", "azuresql"], + "enum": ["sqlserver", "postgresql", "mysql", "sqlite", "azure_sql"], "description": "Type of database engine." }, "server": { @@ -24,7 +24,7 @@ "type": "string", "description": "Username for authentication." }, - "password__Secret": { + "password": { "type": "string", "description": "Password for authentication." }, @@ -33,6 +33,6 @@ "description": "ODBC or DB driver name." } }, - "required": ["database_type", "database"], + "required": ["database_type"], "additionalProperties": false } diff --git a/application/single_app/templates/_agent_examples_modal.html b/application/single_app/templates/_agent_examples_modal.html index 52f95cdc..398e930c 100644 --- a/application/single_app/templates/_agent_examples_modal.html +++ b/application/single_app/templates/_agent_examples_modal.html @@ -92,7 +92,7 @@
-

+          
@@ -427,7 +427,12 @@
+ + +
+
+ +
+ + +
+
+
+ +
+
Advanced
+

Advanced settings are typically not required. Expand below if you need to customize metadata or additional fields.

- - -
Optional metadata for this action.
+
-
- - -
Additional configuration fields specific to this action type.
+
+
+ + +
Optional metadata for this action.
+
+
+ + +
Additional configuration fields specific to this action type.
+
@@ -777,6 +802,15 @@
background-color: #f8f9fa; } +/* Advanced toggle chevron animation */ +#plugin-advanced-toggle-icon { + transition: transform 0.3s ease; +} +#plugin-advanced-collapse.show ~ .mb-3 #plugin-advanced-toggle-icon, +[aria-expanded="true"] #plugin-advanced-toggle-icon { + transform: rotate(180deg); +} + .sql-connection-config, .sql-auth-config { background-color: white; diff --git a/application/single_app/templates/_sidebar_nav.html b/application/single_app/templates/_sidebar_nav.html index a0bceee8..33a89b04 100644 --- a/application/single_app/templates/_sidebar_nav.html +++ b/application/single_app/templates/_sidebar_nav.html @@ -287,6 +287,11 @@ GPT Configuration +
+
+ + + + Requires Enhanced Citations +
@@ -1580,6 +1586,27 @@
+ +
+
+ Processing Thoughts +
+

When enabled, real-time processing steps are shown to users during chat responses and persisted for later review.

+
+ + + +
+
+
@@ -3229,9 +3256,10 @@
+ - +

diff --git a/application/single_app/templates/chats.html b/application/single_app/templates/chats.html index b6c212cc..e845cc53 100644 --- a/application/single_app/templates/chats.html +++ b/application/single_app/templates/chats.html @@ -1039,7 +1039,8 @@

Group Workspace

You do not have permission to manage group agents.
-
+
+
+ + + + +
-
- - - - - - - - - - - - -
Display NameDescriptionActions
-
- Loading... -
- Select a group to load agents. -
+
+ + + + + + + + + + + + + +
Display NameDescriptionActions
+
+ Loading... +
+ Select a group to load agents. +
+
+
@@ -813,33 +822,42 @@

Group Workspace

-
+
+
+ + + + +
- - - - - - - - - - - - - -
Display NameDescriptionActions
-
- Loading... -
- Select a group to load actions. -
+
+ + + + + + + + + + + + + +
Display NameDescriptionActions
+
+ Loading... +
+ Select a group to load actions. +
+
+
@@ -851,6 +869,22 @@

Group Workspace

+ + + +
+ + + + +
- - - - - - - -
Display NameDescriptionActions
-
Loading...
- Loading agents... -
+ +
+ + + + + + + +
Display NameDescriptionActions
+
Loading...
+ Loading agents... +
+
+ +
@@ -730,16 +741,27 @@

Personal Workspace

+
+ + + + +
- - - - - -
Display NameDescriptionActions
+ +
+ + + + + +
Display NameDescriptionActions
+
+ +
@@ -754,6 +776,24 @@

Personal Workspace

+ + +