diff --git a/src/runpod_flash/cli/commands/build_utils/scanner.py b/src/runpod_flash/cli/commands/build_utils/scanner.py index 5ff5c4d2..2215ab9e 100644 --- a/src/runpod_flash/cli/commands/build_utils/scanner.py +++ b/src/runpod_flash/cli/commands/build_utils/scanner.py @@ -74,11 +74,11 @@ def discover_remote_functions(self) -> List[RemoteFunctionMetadata]: tree = ast.parse(content) self._extract_resource_configs(tree, py_file) except UnicodeDecodeError: - logger.debug(f"Skipping non-UTF-8 file: {py_file}") + pass except SyntaxError as e: logger.warning(f"Syntax error in {py_file}: {e}") - except Exception as e: - logger.debug(f"Failed to parse {py_file}: {e}") + except Exception: + pass # Second pass: extract @remote decorated functions for py_file in self.py_files: @@ -87,11 +87,11 @@ def discover_remote_functions(self) -> List[RemoteFunctionMetadata]: tree = ast.parse(content) functions.extend(self._extract_remote_functions(tree, py_file)) except UnicodeDecodeError: - logger.debug(f"Skipping non-UTF-8 file: {py_file}") + pass except SyntaxError as e: logger.warning(f"Syntax error in {py_file}: {e}") - except Exception as e: - logger.debug(f"Failed to parse {py_file}: {e}") + except Exception: + pass # Third pass: analyze function call graphs remote_function_names = {f.function_name for f in functions} @@ -115,11 +115,11 @@ def discover_remote_functions(self) -> List[RemoteFunctionMetadata]: node, func_meta, remote_function_names ) except UnicodeDecodeError: - logger.debug(f"Skipping non-UTF-8 file: {py_file}") + pass except SyntaxError as e: logger.warning(f"Syntax error in {py_file}: {e}") - except Exception as e: - logger.debug(f"Failed to parse {py_file}: {e}") + except Exception: + pass return functions diff --git a/src/runpod_flash/cli/utils/ignore.py b/src/runpod_flash/cli/utils/ignore.py index bd3b8c7b..b9634dc2 100644 --- a/src/runpod_flash/cli/utils/ignore.py +++ b/src/runpod_flash/cli/utils/ignore.py @@ -128,7 +128,6 @@ def get_file_tree( for item in directory.iterdir(): # Check if should ignore if should_ignore(item, spec, base_dir): - log.debug(f"Ignoring: {item.relative_to(base_dir)}") continue if item.is_file(): diff --git a/src/runpod_flash/client.py b/src/runpod_flash/client.py index 1288e24f..ed68bc30 100644 --- a/src/runpod_flash/client.py +++ b/src/runpod_flash/client.py @@ -25,17 +25,9 @@ def _should_execute_locally(func_name: str) -> bool: # Check if we're in a deployed environment runpod_endpoint_id = os.getenv("RUNPOD_ENDPOINT_ID") runpod_pod_id = os.getenv("RUNPOD_POD_ID") - flash_resource_name = os.getenv("FLASH_RESOURCE_NAME") - - log.debug( - f"@remote decorator for {func_name}: " - f"RUNPOD_ENDPOINT_ID={runpod_endpoint_id}, " - f"FLASH_RESOURCE_NAME={flash_resource_name}" - ) if not runpod_endpoint_id and not runpod_pod_id: # Local development - create stub for remote execution via ResourceManager - log.debug(f"@remote {func_name}: local dev mode, creating stub") return False # In deployed environment - check build-time generated configuration @@ -43,9 +35,6 @@ def _should_execute_locally(func_name: str) -> bool: from .runtime._flash_resource_config import is_local_function result = is_local_function(func_name) - log.debug( - f"@remote {func_name}: deployed mode, is_local_function returned {result}" - ) return result except ImportError as e: # Configuration not generated (shouldn't happen in deployed env) @@ -186,14 +175,10 @@ def decorator(func_or_class): if should_execute_local: # This function belongs to our resource - execute locally - log.debug( - f"@remote {func_name}: returning original function (local execution)" - ) func_or_class.__remote_config__ = routing_config return func_or_class # Remote execution mode - create stub for calling other endpoints - log.debug(f"@remote {func_name}: creating wrapper for remote execution") if inspect.isclass(func_or_class): # Handle class decoration diff --git a/src/runpod_flash/core/api/runpod.py b/src/runpod_flash/core/api/runpod.py index b451b31f..bc30219a 100644 --- a/src/runpod_flash/core/api/runpod.py +++ b/src/runpod_flash/core/api/runpod.py @@ -3,7 +3,7 @@ Bypasses the outdated runpod-python SDK limitations. """ -import json +import json # noqa: F401 - used in commented debug logs import logging import os from typing import Any, Dict, Optional, List @@ -92,19 +92,19 @@ async def _execute_graphql( payload = {"query": query, "variables": variables or {}} - log.debug(f"GraphQL Query: {query}") - sanitized_vars = _sanitize_for_logging(variables) - log.debug(f"GraphQL Variables: {json.dumps(sanitized_vars, indent=2)}") + # log.debug(f"GraphQL Query: {query}") + # sanitized_vars = _sanitize_for_logging(variables) + # log.debug(f"GraphQL Variables: {json.dumps(sanitized_vars, indent=2)}") try: async with session.post(self.GRAPHQL_URL, json=payload) as response: response_data = await response.json() - log.debug(f"GraphQL Response Status: {response.status}") - sanitized_response = _sanitize_for_logging(response_data) - log.debug( - f"GraphQL Response: {json.dumps(sanitized_response, indent=2)}" - ) + # log.debug(f"GraphQL Response Status: {response.status}") + # sanitized_response = _sanitize_for_logging(response_data) + # log.debug( + # f"GraphQL Response: {json.dumps(sanitized_response, indent=2)}" + # ) if response.status >= 400: sanitized_err = _sanitize_for_logging(response_data) @@ -156,7 +156,7 @@ async def update_template(self, input_data: Dict[str, Any]) -> Dict[str, Any]: raise Exception("Unexpected GraphQL response structure") template_data = result["saveTemplate"] - log.info( + log.debug( f"Updated template: {template_data.get('id', 'unknown')} - {template_data.get('name', 'unnamed')}" ) @@ -354,8 +354,6 @@ async def finalize_artifact_upload( """ variables = {"input": input_data} - log.debug(f"finalizing upload for flash app: {input_data}") - result = await self._execute_graphql(mutation, variables) return result["finalizeFlashArtifactUpload"] @@ -407,7 +405,6 @@ async def get_flash_app_by_name(self, app_name: str) -> Dict[str, Any]: """ variables = {"flashAppName": app_name} - log.debug(f"Fetching flash app by name for input: {app_name}") result = await self._execute_graphql(query, variables) return result["flashAppByName"] @@ -460,7 +457,6 @@ async def get_flash_environment_by_name( """ variables = {"input": input_data} - log.debug(f"Fetching flash environment by name for input: {variables}") result = await self._execute_graphql(query, variables) return result["flashEnvironmentByName"] @@ -513,8 +509,6 @@ async def deploy_build_to_environment( variables = {"input": input_data} - log.debug(f"Deploying flash environment with vars: {input_data}") - result = await self._execute_graphql(mutation, variables) return result["deployBuildToEnvironment"] @@ -834,15 +828,15 @@ async def _execute_rest( """Execute a REST API request.""" session = await self._get_session() - log.debug(f"REST Request: {method} {url}") - log.debug(f"REST Data: {json.dumps(data, indent=2) if data else 'None'}") + # log.debug(f"REST Request: {method} {url}") + # log.debug(f"REST Data: {json.dumps(data, indent=2) if data else 'None'}") try: async with session.request(method, url, json=data) as response: response_data = await response.json() - log.debug(f"REST Response Status: {response.status}") - log.debug(f"REST Response: {json.dumps(response_data, indent=2)}") + # log.debug(f"REST Response Status: {response.status}") + # log.debug(f"REST Response: {json.dumps(response_data, indent=2)}") if response.status >= 400: raise Exception( diff --git a/src/runpod_flash/core/discovery.py b/src/runpod_flash/core/discovery.py index 8ce4f3e5..06c5d57e 100644 --- a/src/runpod_flash/core/discovery.py +++ b/src/runpod_flash/core/discovery.py @@ -52,9 +52,6 @@ def discover(self) -> List[DeployableResource]: resource = self._resolve_resource_variable(module, var_name) if resource: resources.append(resource) - log.debug( - f"Discovered resource: {var_name} -> {resource.__class__.__name__}" - ) else: log.warning(f"Failed to import {self.entry_point}") @@ -405,10 +402,6 @@ def _scan_project_directory(self) -> List[DeployableResource]: resource = self._resolve_resource_variable(module, var_name) if resource: resources.append(resource) - log.debug( - f"Discovered resource in {file_path.relative_to(project_root)}: " - f"{var_name} -> {resource.__class__.__name__}" - ) except Exception as e: log.debug(f"Failed to scan {file_path}: {e}") diff --git a/src/runpod_flash/core/resources/app.py b/src/runpod_flash/core/resources/app.py index a109f474..2fdce8f3 100644 --- a/src/runpod_flash/core/resources/app.py +++ b/src/runpod_flash/core/resources/app.py @@ -185,10 +185,8 @@ async def _hydrate(self) -> None: """ async with self._hydrate_lock: if self._hydrated: - log.debug("App is already hydrated while calling hydrate. Returning") return - log.debug("Hydrating app") async with RunpodGraphQLClient() as client: try: result = await client.get_flash_app_by_name(self.name) diff --git a/src/runpod_flash/core/resources/resource_manager.py b/src/runpod_flash/core/resources/resource_manager.py index a43f2d5e..0cd18f51 100644 --- a/src/runpod_flash/core/resources/resource_manager.py +++ b/src/runpod_flash/core/resources/resource_manager.py @@ -152,7 +152,6 @@ def _save_resources(self) -> None: data = (self._resources, self._resource_configs) cloudpickle.dump(data, f) f.flush() # Ensure data is written to disk - log.debug(f"Saved resources in {RESOURCE_STATE_FILE}") except (FileLockError, Exception) as e: log.error(f"Failed to save resources to {RESOURCE_STATE_FILE}: {e}") raise @@ -224,15 +223,6 @@ async def get_or_deploy_resource( resource_key = config.get_resource_key() new_config_hash = config.config_hash - log.debug( - f"get_or_deploy_resource called:\n" - f" Config type: {type(config).__name__}\n" - f" Config name: {getattr(config, 'name', 'N/A')}\n" - f" Resource key: {resource_key}\n" - f" New config hash: {new_config_hash[:16]}...\n" - f" Available keys in cache: {list(self._resources.keys())}" - ) - # Ensure global lock is initialized assert ResourceManager._global_lock is not None, "Global lock not initialized" @@ -247,7 +237,6 @@ async def get_or_deploy_resource( existing = self._resources.get(resource_key) if existing: - log.debug(f"Resource found in cache: {resource_key}") # Resource exists - check if still valid if not existing.is_deployed(): log.warning(f"{existing} is no longer valid, redeploying.") @@ -273,21 +262,6 @@ async def get_or_deploy_resource( stored_config_hash = self._resource_configs.get(resource_key, "") if stored_config_hash != new_config_hash: - # Detailed drift debugging - log.debug( - f"DRIFT DEBUG for '{config.name}':\n" - f" Stored hash: {stored_config_hash}\n" - f" New hash: {new_config_hash}\n" - f" Stored resource type: {type(existing).__name__}\n" - f" New resource type: {type(config).__name__}\n" - f" Existing config fields: {existing.model_dump(exclude_none=True, exclude={'id'}) if hasattr(existing, 'model_dump') else 'N/A'}\n" - f" New config fields: {config.model_dump(exclude_none=True, exclude={'id'}) if hasattr(config, 'model_dump') else 'N/A'}" - ) - log.debug( - f"Config drift detected for '{config.name}': " - f"Automatically updating endpoint" - ) - # Attempt update (will redeploy if structural changes detected) if hasattr(existing, "update"): updated_resource = await existing.update(config) @@ -318,15 +292,10 @@ async def get_or_deploy_resource( raise # Config unchanged, reuse existing - log.debug(f"{existing} exists, reusing (config unchanged)") - log.debug(f"URL: {existing.url}") + log.info(f"URL: {existing.url}") return existing # No existing resource, deploy new one - log.debug( - f"Resource NOT found in cache, deploying new: {resource_key}\n" - f" Searched in keys: {list(self._resources.keys())}" - ) try: deployed_resource = await self._deploy_with_error_context(config) log.debug(f"URL: {deployed_resource.url}") diff --git a/src/runpod_flash/core/resources/serverless.py b/src/runpod_flash/core/resources/serverless.py index bf9238ce..fc87efa6 100644 --- a/src/runpod_flash/core/resources/serverless.py +++ b/src/runpod_flash/core/resources/serverless.py @@ -646,14 +646,9 @@ async def update(self, new_config: "ServerlessResource") -> "ServerlessResource" try: resolved_template_id = self.templateId or new_config.templateId - # Log if version-triggering changes detected (informational only) - if self._has_structural_changes(new_config): - log.debug( - f"{self.name}: Version-triggering changes detected. " - "Server will increment version and recreate workers." - ) - else: - log.debug(f"Updating endpoint '{self.name}' (ID: {self.id})") + # Check for version-triggering changes + if not self._has_structural_changes(new_config): + log.info(f"Updating endpoint '{self.name}' (ID: {self.id})") # Ensure network volume is deployed if specified await new_config._ensure_network_volume_deployed() @@ -678,7 +673,7 @@ async def update(self, new_config: "ServerlessResource") -> "ServerlessResource" new_config.template, resolved_template_id ) await client.update_template(template_payload) - log.info( + log.debug( f"Updated template '{resolved_template_id}' for endpoint '{self.name}'" ) else: @@ -752,11 +747,9 @@ def _has_structural_changes(self, new_config: "ServerlessResource") -> bool: # Handle list comparison if isinstance(old_val, list) and isinstance(new_val, list): if sorted(str(v) for v in old_val) != sorted(str(v) for v in new_val): - log.debug(f"Structural change in '{field}': {old_val} → {new_val}") return True # Handle other types elif old_val != new_val: - log.debug(f"Structural change in '{field}': {old_val} → {new_val}") return True return False diff --git a/src/runpod_flash/core/utils/file_lock.py b/src/runpod_flash/core/utils/file_lock.py index c104cfd8..b1866c34 100644 --- a/src/runpod_flash/core/utils/file_lock.py +++ b/src/runpod_flash/core/utils/file_lock.py @@ -102,7 +102,6 @@ def file_lock( _acquire_fallback_lock(file_handle, exclusive, timeout) lock_acquired = True - log.debug(f"File lock acquired (exclusive={exclusive})") except (OSError, IOError, FileLockError) as e: # Check timeout @@ -128,8 +127,6 @@ def file_lock( else: _release_fallback_lock(file_handle) - log.debug("File lock released") - except Exception as e: log.error(f"Error releasing file lock: {e}") # Don't raise - we're in cleanup diff --git a/src/runpod_flash/execute_class.py b/src/runpod_flash/execute_class.py index 0e301d5d..643bc378 100644 --- a/src/runpod_flash/execute_class.py +++ b/src/runpod_flash/execute_class.py @@ -57,8 +57,6 @@ def get_or_cache_class_data( }, ) - log.debug(f"Cached class data for {cls.__name__} with key: {cache_key}") - except (TypeError, AttributeError, OSError, SerializationError) as e: log.warning( f"Could not serialize constructor arguments for {cls.__name__}: {e}" @@ -81,9 +79,6 @@ def get_or_cache_class_data( else: # Cache hit - retrieve cached data cached_data = _SERIALIZED_CLASS_CACHE.get(cache_key) - log.debug( - f"Retrieved cached class data for {cls.__name__} with key: {cache_key}" - ) return cached_data["class_code"] @@ -121,7 +116,6 @@ def extract_class_code_simple(cls: Type) -> str: # Validate the code by trying to compile it compile(class_code, "", "exec") - log.debug(f"Successfully extracted class code for {cls.__name__}") return class_code except Exception as e: @@ -182,7 +176,6 @@ def get_class_cache_key( # Combine hashes for final cache key cache_key = f"{cls.__name__}_{class_hash[:HASH_TRUNCATE_LENGTH]}_{args_hash[:HASH_TRUNCATE_LENGTH]}" - log.debug(f"Generated cache key for {cls.__name__}: {cache_key}") return cache_key except (TypeError, AttributeError, OSError) as e: @@ -229,8 +222,6 @@ def __init__(self, *args, **kwargs): cls, args, kwargs, self._cache_key ) - log.debug(f"Created remote class wrapper for {cls.__name__}") - async def _ensure_initialized(self): """Ensure the remote instance is created.""" if self._initialized: diff --git a/src/runpod_flash/logger.py b/src/runpod_flash/logger.py index d024b079..88283edc 100644 --- a/src/runpod_flash/logger.py +++ b/src/runpod_flash/logger.py @@ -64,6 +64,10 @@ class SensitiveDataFilter(logging.Filter): # Pattern for Bearer tokens in Authorization headers BEARER_PATTERN = re.compile(r"(bearer\s+)([A-Za-z0-9_.-]+)", re.IGNORECASE) + # Pattern for common API key prefixes (OpenAI, Anthropic, etc) + # Matches: sk-..., key_..., etc. (32+ chars total) + PREFIXED_KEY_PATTERN = re.compile(r"\b(sk-|key_|api_)[A-Za-z0-9_-]{28,}\b") + def filter(self, record: logging.LogRecord) -> bool: """Sanitize log record by redacting sensitive data. @@ -129,8 +133,12 @@ def _redact_string(self, text: str) -> str: lambda m: f"{m.group(1)}***REDACTED***{m.group(3)}", text ) - # Redact generic long tokens - text = self.TOKEN_PATTERN.sub(self._redact_token, text) + # Redact common prefixed API keys (sk-, key_, api_) + text = self.PREFIXED_KEY_PATTERN.sub(self._redact_token, text) + + # Generic token pattern disabled - causes false positives with Job IDs, Template IDs, etc. + # Specific patterns above catch actual sensitive tokens. + # text = self.TOKEN_PATTERN.sub(self._redact_token, text) # Redact common password/secret patterns # Match field names with : or = separators and redact the value, preserving separator @@ -293,7 +301,7 @@ def setup_logging( # Determine format based on final effective level if fmt is None: if level == logging.DEBUG: - fmt = "%(asctime)s | %(levelname)-5s | %(name)s | %(filename)s:%(lineno)d | %(message)s" + fmt = "%(asctime)s | %(levelname)-5s | %(message)s" else: # Default format for INFO level and above fmt = "%(asctime)s | %(levelname)-5s | %(message)s" @@ -322,3 +330,8 @@ def setup_logging( existing_handler.addFilter(sensitive_filter) root_logger.setLevel(level) + + # Silence httpcore trace logs (connection/request details) + logging.getLogger("httpcore").setLevel(logging.WARNING) + logging.getLogger("httpx").setLevel(logging.WARNING) + logging.getLogger("asyncio").setLevel(logging.WARNING) diff --git a/src/runpod_flash/runtime/load_balancer.py b/src/runpod_flash/runtime/load_balancer.py index 6c32b465..0c4b6f44 100644 --- a/src/runpod_flash/runtime/load_balancer.py +++ b/src/runpod_flash/runtime/load_balancer.py @@ -85,10 +85,6 @@ async def _round_robin_select(self, endpoints: List[str]) -> str: async with self._lock: selected = endpoints[self._round_robin_index % len(endpoints)] self._round_robin_index += 1 - logger.debug( - f"Load balancer: ROUND_ROBIN selected {selected} " - f"(index {self._round_robin_index - 1})" - ) return selected async def _least_connections_select(self, endpoints: List[str]) -> str: @@ -109,10 +105,6 @@ async def _least_connections_select(self, endpoints: List[str]) -> str: # Find endpoint with minimum connections selected = min(endpoints, key=lambda e: self._in_flight_requests.get(e, 0)) - logger.debug( - f"Load balancer: LEAST_CONNECTIONS selected {selected} " - f"({self._in_flight_requests.get(selected, 0)} in-flight)" - ) return selected async def _random_select(self, endpoints: List[str]) -> str: @@ -125,7 +117,6 @@ async def _random_select(self, endpoints: List[str]) -> str: Selected endpoint URL """ selected = random.choice(endpoints) - logger.debug(f"Load balancer: RANDOM selected {selected}") return selected async def record_request(self, endpoint: str) -> None: diff --git a/tests/unit/test_logger.py b/tests/unit/test_logger.py index de33226f..7b527ede 100644 --- a/tests/unit/test_logger.py +++ b/tests/unit/test_logger.py @@ -253,7 +253,7 @@ def test_log_level_override_via_env(self, tmp_path, monkeypatch): monkeypatch.delenv("LOG_LEVEL") def test_debug_format_includes_details(self, tmp_path, monkeypatch): - """Verify DEBUG level uses detailed format.""" + """Verify DEBUG level logging works with clean format.""" # Change to temp directory monkeypatch.chdir(tmp_path) @@ -275,10 +275,9 @@ def test_debug_format_includes_details(self, tmp_path, monkeypatch): output = stream.getvalue() - # Verify detailed format includes filename and line number + # Verify message is logged assert "Debug message" in output - assert "test_logger.py" in output # filename - assert "test" in output # logger name + assert "DEBUG" in output # Cleanup cleanup_handlers(root_logger) diff --git a/tests/unit/test_logger_sensitive_data.py b/tests/unit/test_logger_sensitive_data.py index 573c3193..e5ad2640 100644 --- a/tests/unit/test_logger_sensitive_data.py +++ b/tests/unit/test_logger_sensitive_data.py @@ -127,10 +127,11 @@ def test_recursive_dict_sanitization(self): assert sanitized_config["api"]["endpoint"] == "https://api.example.com" def test_long_token_partial_redaction(self): - """Verify long tokens show first/last 4 chars for debugging.""" + """Verify prefixed API keys show first/last 4 chars for debugging.""" filter_instance = SensitiveDataFilter() - long_token = "abcdefghijklmnopqrstuvwxyz0123456789" + # Use a prefixed token that will be caught by PREFIXED_KEY_PATTERN + long_token = "sk-abcdefghijklmnopqrstuvwxyz0123456789" record = logging.LogRecord( name="test", level=logging.INFO, @@ -143,7 +144,7 @@ def test_long_token_partial_redaction(self): filter_instance.filter(record) # Should show first 4 and last 4 chars - assert "abcd" in record.msg + assert "sk-a" in record.msg assert "6789" in record.msg assert "***REDACTED***" in record.msg assert long_token not in record.msg