Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ wheels/
# misc
.DS_Store
*.pem
*.save
.promptfoo/
.promptfoo-home/
.tmp/
promptfoo.db
data-gym-cache/

# debug
npm-debug.log*
Expand Down
141 changes: 141 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "DeepWiki: Install API",
"type": "shell",
"command": "poetry",
"args": [
"-C",
"${workspaceFolder}/api",
"install"
],
"options": {
"cwd": "${workspaceFolder}"
},
"presentation": {
"reveal": "always",
"panel": "dedicated"
},
"problemMatcher": []
},
{
"label": "DeepWiki: API",
"type": "shell",
"command": "${workspaceFolder}/api/.venv/bin/python",
"args": [
"-m",
"api.main"
],
"options": {
"cwd": "${workspaceFolder}",
"env": {
"PYTHONPATH": "${workspaceFolder}"
}
},
"isBackground": true,
"presentation": {
"reveal": "always",
"panel": "dedicated"
},
"problemMatcher": []
},
{
"label": "DeepWiki: Web",
"type": "shell",
"command": "env",
"args": [
"-u",
"ELECTRON_RUN_AS_NODE",
"npm",
"run",
"dev"
],
"options": {
"cwd": "${workspaceFolder}"
},
"isBackground": true,
"presentation": {
"reveal": "always",
"panel": "dedicated"
},
"problemMatcher": []
},
{
"label": "DeepWiki: Start All",
"dependsOn": [
"DeepWiki: API",
"DeepWiki: Web"
],
"dependsOrder": "parallel"
},
{
"label": "DeepWiki: Promptfoo Smoke",
"type": "shell",
"command": "npm",
"args": [
"run",
"promptfoo:smoke"
],
"options": {
"cwd": "${workspaceFolder}"
},
"presentation": {
"reveal": "always",
"panel": "dedicated"
},
"problemMatcher": []
},
{
"label": "DeepWiki: Promptfoo Eval",
"type": "shell",
"command": "npm",
"args": [
"run",
"promptfoo:eval"
],
"options": {
"cwd": "${workspaceFolder}"
},
"presentation": {
"reveal": "always",
"panel": "dedicated"
},
"problemMatcher": []
},
{
"label": "DeepWiki: Promptfoo View",
"type": "shell",
"command": "npm",
"args": [
"run",
"promptfoo:view"
],
"options": {
"cwd": "${workspaceFolder}"
},
"presentation": {
"reveal": "always",
"panel": "dedicated"
},
"problemMatcher": []
},
{
"label": "DeepWiki: Promptfoo CI",
"type": "shell",
"command": "npm",
"args": [
"run",
"promptfoo:ci"
],
"options": {
"cwd": "${workspaceFolder}"
},
"presentation": {
"reveal": "always",
"panel": "dedicated"
},
"problemMatcher": []
}
]
}
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,17 @@ yarn dev
3. For private repositories, click "+ Add access tokens" and enter your GitHub or GitLab personal access token
4. Click "Generate Wiki" and watch the magic happen!

## Local Evals With Promptfoo

DeepWiki now includes a local `promptfoo` setup for regression-style checks against your Ollama-backed API.

1. Start the backend on `http://127.0.0.1:8001`
2. Run `npm run promptfoo:smoke` for a fast local confidence check
3. Run `npm run promptfoo:ci` for the full local regression suite
4. Optional: inspect the latest run with `npm run promptfoo:view`

The default eval config lives in `promptfooconfig.yaml` and targets the local repository path `/Users/samroku/dev/deepwiki-open`, so it stays inside your local-first workflow by default.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The path /Users/samroku/dev/deepwiki-open is hardcoded. This makes the documentation and the default configuration specific to your local machine. Please use a placeholder or a relative path to ensure it works for other contributors.


## 🔍 How It Works

DeepWiki uses AI to:
Expand Down
4 changes: 2 additions & 2 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ async def get_model_config():

# Create providers from the config file
providers = []
default_provider = configs.get("default_provider", "google")
default_provider = configs.get("default_provider", "ollama")

# Add provider configuration based on config.py
for provider_id, provider_config in configs["providers"].items():
Expand Down Expand Up @@ -221,7 +221,7 @@ async def get_model_config():
]
)
],
defaultProvider="google"
defaultProvider="ollama"
)

@app.post("/export/wiki")
Expand Down
46 changes: 42 additions & 4 deletions api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,14 @@
WIKI_AUTH_MODE = raw_auth_mode.lower() in ['true', '1', 't']
WIKI_AUTH_CODE = os.environ.get('DEEPWIKI_AUTH_CODE', '')

# Privacy/local-first settings
raw_local_only_mode = os.environ.get('DEEPWIKI_LOCAL_ONLY', 'False')
LOCAL_ONLY_MODE = raw_local_only_mode.lower() in ['true', '1', 't']
ALLOWED_PROVIDERS_ENV = os.environ.get('DEEPWIKI_PROVIDER_ALLOWLIST', '')
DEFAULT_PROVIDER_OVERRIDE = os.environ.get('DEEPWIKI_DEFAULT_PROVIDER', '').strip().lower()

# Embedder settings
EMBEDDER_TYPE = os.environ.get('DEEPWIKI_EMBEDDER_TYPE', 'openai').lower()
EMBEDDER_TYPE = os.environ.get('DEEPWIKI_EMBEDDER_TYPE', 'ollama').lower()

# Get configuration directory from environment variable, or use default if not set
CONFIG_DIR = os.environ.get('DEEPWIKI_CONFIG_DIR', None)
Expand Down Expand Up @@ -147,6 +153,24 @@ def load_generator_config():

return generator_config


def get_allowed_providers():
"""
Resolve the provider allowlist from environment variables.

When DEEPWIKI_LOCAL_ONLY is enabled and no explicit allowlist is set,
only the local Ollama provider is exposed.
"""
if ALLOWED_PROVIDERS_ENV.strip():
return [
provider.strip().lower()
for provider in ALLOWED_PROVIDERS_ENV.split(",")
if provider.strip()
]
if LOCAL_ONLY_MODE:
return ["ollama"]
return []

# Load embedder configuration
def load_embedder_config():
embedder_config = load_json_config("embedder.json")
Expand Down Expand Up @@ -336,8 +360,22 @@ def load_lang_config():

# Update configuration
if generator_config:
configs["default_provider"] = generator_config.get("default_provider", "google")
configs["providers"] = generator_config.get("providers", {})
configured_providers = generator_config.get("providers", {})
allowed_providers = get_allowed_providers()

if allowed_providers:
configured_providers = {
provider_id: provider_config
for provider_id, provider_config in configured_providers.items()
if provider_id in allowed_providers
}

default_provider = DEFAULT_PROVIDER_OVERRIDE or generator_config.get("default_provider", "ollama")
if default_provider not in configured_providers and configured_providers:
default_provider = next(iter(configured_providers.keys()))

configs["default_provider"] = default_provider
configs["providers"] = configured_providers

# Update embedder configuration
if embedder_config:
Expand All @@ -356,7 +394,7 @@ def load_lang_config():
configs["lang_config"] = lang_config


def get_model_config(provider="google", model=None):
def get_model_config(provider="ollama", model=None):
"""
Get configuration for the specified provider and model

Expand Down
25 changes: 3 additions & 22 deletions api/config/embedder.json
Original file line number Diff line number Diff line change
@@ -1,33 +1,14 @@
{
"embedder": {
"client_class": "OpenAIClient",
"batch_size": 500,
"model_kwargs": {
"model": "text-embedding-3-small",
"dimensions": 256,
"encoding_format": "float"
}
},
"embedder_ollama": {
"client_class": "OllamaClient",
"model_kwargs": {
"model": "nomic-embed-text"
}
},
"embedder_google": {
"client_class": "GoogleEmbedderClient",
"batch_size": 100,
"model_kwargs": {
"model": "gemini-embedding-001",
"task_type": "SEMANTIC_SIMILARITY"
}
},
"embedder_bedrock": {
"client_class": "BedrockClient",
"batch_size": 100,
"embedder": {
"client_class": "OllamaClient",
"model_kwargs": {
"model": "amazon.titan-embed-text-v2:0",
"dimensions": 256
"model": "nomic-embed-text"
}
},
"retriever": {
Expand Down
3 changes: 1 addition & 2 deletions api/config/generator.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"default_provider": "google",
"default_provider": "ollama",
"providers": {
"dashscope": {
"default_model": "qwen-plus",
Expand Down Expand Up @@ -196,4 +196,3 @@
}
}
}

23 changes: 22 additions & 1 deletion api/data_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,14 +700,35 @@ def get_file_content(repo_url: str, file_path: str, repo_type: str = None, acces
Raises:
ValueError: If the file cannot be fetched or if the URL is not valid
"""
if repo_type == "local" or (
repo_url
and not repo_url.startswith("http://")
and not repo_url.startswith("https://")
):
Comment on lines +703 to +707
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

If repo_type is set to "local" but repo_url is missing or null, os.path.abspath(repo_url) will raise a TypeError. A check should be added to ensure repo_url is provided when repo_type is "local".

Suggested change
if repo_type == "local" or (
repo_url
and not repo_url.startswith("http://")
and not repo_url.startswith("https://")
):
if (repo_type == "local" or (
repo_url
and not repo_url.startswith("http://")
and not repo_url.startswith("https://")
)) and repo_url:

repo_root = os.path.abspath(repo_url)
candidate_path = os.path.abspath(os.path.join(repo_root, file_path))

# Prevent path traversal outside the repository root.
if os.path.commonpath([repo_root, candidate_path]) != repo_root:
raise ValueError("Requested file path escapes the local repository root.")

if not os.path.isfile(candidate_path):
raise ValueError(f"Local file not found: {file_path}")

try:
with open(candidate_path, "r", encoding="utf-8") as f:
return f.read()
except Exception as e:
raise ValueError(f"Failed to read local file content: {str(e)}")

if repo_type == "github":
return get_github_file_content(repo_url, file_path, access_token)
elif repo_type == "gitlab":
return get_gitlab_file_content(repo_url, file_path, access_token)
elif repo_type == "bitbucket":
return get_bitbucket_file_content(repo_url, file_path, access_token)
else:
raise ValueError("Unsupported repository type. Only GitHub, GitLab, and Bitbucket are supported.")
raise ValueError("Unsupported repository type. Only local, GitHub, GitLab, and Bitbucket are supported.")

class DatabaseManager:
"""
Expand Down
Loading