Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions scripts/coding_discovery_tools/ai_tools_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
CursorSkillsExtractorFactory,
ClineSkillsExtractorFactory,
)
from .utils import send_report_to_backend, send_scan_event, send_discovery_metrics, get_user_info, get_all_users_macos, get_all_users_windows, get_all_users_linux, load_pending_reports, save_failed_reports, report_to_sentry, get_claude_subscription_type, get_cursor_subscription_type, QUEUE_FILE
from .utils import send_report_to_backend, send_scan_event, send_discovery_metrics, get_user_info, get_all_users_macos, get_all_users_windows, get_all_users_linux, load_pending_reports, save_failed_reports, report_to_sentry, get_claude_subscription_type, get_cursor_subscription_type, in_container, QUEUE_FILE
from .linux_extraction_helpers import linux_home_for_user
from .logging_helpers import configure_logger, log_rules_details, log_mcp_details, log_settings_details
from .settings_transformers import transform_settings_to_backend_format
Expand Down Expand Up @@ -119,7 +119,7 @@
CursorSkillsExtractorFactory,
ClineSkillsExtractorFactory,
)
from scripts.coding_discovery_tools.utils import send_report_to_backend, send_scan_event, send_discovery_metrics, get_user_info, get_all_users_macos, get_all_users_windows, get_all_users_linux, load_pending_reports, save_failed_reports, report_to_sentry, get_claude_subscription_type, get_cursor_subscription_type, QUEUE_FILE
from scripts.coding_discovery_tools.utils import send_report_to_backend, send_scan_event, send_discovery_metrics, get_user_info, get_all_users_macos, get_all_users_windows, get_all_users_linux, load_pending_reports, save_failed_reports, report_to_sentry, get_claude_subscription_type, get_cursor_subscription_type, in_container, QUEUE_FILE
from scripts.coding_discovery_tools.linux_extraction_helpers import linux_home_for_user
from scripts.coding_discovery_tools.logging_helpers import configure_logger, log_rules_details, log_mcp_details, log_settings_details
from scripts.coding_discovery_tools.settings_transformers import transform_settings_to_backend_format
Expand Down Expand Up @@ -2134,6 +2134,7 @@ def generate_single_tool_report(self, tool: Dict, device_id: str, home_user: str
"home_user": home_user,
"system_user": system_user or home_user,
"device_id": device_id,
"is_container": in_container(),
"tools": [tool_for_report]
}

Expand Down
82 changes: 79 additions & 3 deletions scripts/coding_discovery_tools/linux/device_id.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""Device ID extraction for Linux."""

import logging
import os
import tempfile
import uuid
from pathlib import Path

from .. import cache
from ..coding_tool_base import BaseDeviceIdExtractor
from ..utils import get_hostname

logger = logging.getLogger(__name__)

Expand All @@ -13,14 +16,22 @@
Path("/var/lib/dbus/machine-id"), # older dbus fallback
]

_DEVICE_ID_FILENAME = "device-id"


class LinuxDeviceIdExtractor(BaseDeviceIdExtractor):
"""Device ID extractor for Linux systems."""

def extract_device_id(self) -> str:
"""
Return the machine-id from /etc/machine-id (systemd standard).
Falls back to hostname if the file is absent or unreadable.

When no machine-id is available (common in containers, which often have
an empty/absent /etc/machine-id), fall back to a UUID persisted in the
home-user's ``~/.unbound/`` directory. A restarted container that mounts
a persistent ``~/.unbound`` (the primary ``unbound login`` flow) then
keeps a single stable device row instead of exploding into one row per
launch — which an ephemeral hostname fallback would produce.
"""
for path in _MACHINE_ID_PATHS:
try:
Expand All @@ -31,4 +42,69 @@ def extract_device_id(self) -> str:
except Exception as e:
logger.debug(f"Could not read {path}: {e}")

return get_hostname()
return self._persisted_device_id()

@staticmethod
def _persisted_device_id() -> str:
"""Read (or generate-and-persist) a stable UUID under ``~/.unbound/``.

Reuses the repo's canonical state-dir resolver (``cache._ensure_state_dir``)
rather than a bare ``Path.home()`` so we honour the writable-fallback chain
and land next to the API key written by ``unbound login``. If the state dir
cannot be resolved or the write fails, return an unpersisted uuid4 — no
worse than the previous ephemeral hostname behaviour, and never raises.
"""
try:
if not cache._ensure_state_dir():
logger.warning("No usable state dir for device-id; using ephemeral uuid")
return str(uuid.uuid4())

device_id_path = cache.UNBOUND_DIR / _DEVICE_ID_FILENAME

try:
if device_id_path.exists() and device_id_path.is_file():
existing = device_id_path.read_text(encoding="utf-8").strip()
if existing:
# Validate the persisted value is a well-formed UUID. A
# truncated/partial write (pre-atomic-write), a manual edit,
# or another tool clobbering the file can leave a non-UUID
# string here; returning it would create a backend device
# row that no valid UUID can ever match. Treat corrupt as
# absent and regenerate.
try:
uuid.UUID(existing)
return existing
except ValueError:
logger.warning(
f"Corrupt (non-UUID) persisted device-id at "
f"{device_id_path!s}; regenerating"
)
except Exception as e:
logger.warning(f"Could not read persisted device-id at {device_id_path!s}: {e}")

new_id = str(uuid.uuid4())
try:
# Atomic write: a SIGKILL/OOM/power-loss mid-write must never leave
# a partial UUID on disk. Write to a temp file in the same dir then
# os.replace() (atomic rename on the same filesystem), mirroring
# cache.atomic_write_cache().
cache.UNBOUND_DIR.mkdir(parents=True, exist_ok=True)
fd, tmp = tempfile.mkstemp(
prefix=".device-id.", suffix=".tmp", dir=str(cache.UNBOUND_DIR)
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
f.write(new_id)
os.replace(tmp, str(device_id_path))
finally:
if os.path.exists(tmp):
try:
os.unlink(tmp)
except OSError:
pass
except Exception as e:
logger.warning(f"Could not persist device-id at {device_id_path!s}: {e}")
return new_id
except Exception as e:
logger.warning(f"device-id fallback failed: {e}")
return str(uuid.uuid4())
Comment thread
sumit-badsara marked this conversation as resolved.
Comment thread
sumit-badsara marked this conversation as resolved.
42 changes: 42 additions & 0 deletions scripts/coding_discovery_tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Utility functions shared across the AI tools discovery system
"""

import functools
import json
import logging
import os
Expand Down Expand Up @@ -101,6 +102,47 @@ def get_hostname() -> str:
return platform.node()


@functools.lru_cache(maxsize=1)
def in_container() -> bool:
"""Best-effort detection of whether we're running inside a container.

Combines several signals because no single one is reliable across runtimes
and kernels:
- ``/.dockerenv`` / ``/run/.containerenv`` — Docker / Podman runtime markers.
- root filesystem mounted as ``overlay`` — cgroup-version-agnostic.
- ``/proc/1/cgroup`` docker/lxc/kube markers — cgroup v1 ONLY (v2 shows
``0::/`` from inside, so this is a fallback, not the primary check).

This is for honest behavioural branching, not security — every marker here
is forgeable by whoever controls the container. Result is cached for the
process lifetime.
"""
try:
if os.path.exists("/.dockerenv") or os.path.exists("/run/.containerenv"):
return True
except OSError:
pass

try:
with open("/proc/mounts", encoding="utf-8") as f:
for line in f:
parts = line.split()
if len(parts) >= 3 and parts[1] == "/" and parts[2] == "overlay":
return True
except OSError:
pass

try:
with open("/proc/1/cgroup", encoding="utf-8") as f:
blob = f.read()
if any(marker in blob for marker in ("/docker", "/lxc", "kubepods", "/containerd")):
return True
except OSError:
pass

return False


class DsclBatchData(NamedTuple):
uid_map: Dict[str, int]
shell_map: Dict[str, str]
Expand Down
Loading
Loading