Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions openviking/prompts/templates/semantic/code_ast_summary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@ variables:
description: "AST-extracted code skeleton (classes, functions, imports)"
required: true

- name: "output_language"
type: "string"
description: "Language code for output (e.g., 'en', 'zh-CN', 'ja', 'ko', 'ru', 'ar')"
required: true

template: |
Output Language: {{ output_language }}

You are a code analysis expert. Based on the structural skeleton below,
generate a concise summary (80-200 words) focusing on purpose, key components,
and relationships. The skeleton was extracted via AST parsing.
Expand Down
9 changes: 8 additions & 1 deletion openviking/prompts/templates/semantic/code_summary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@ variables:
description: "File content (code)"
required: true

- name: "output_language"
type: "string"
description: "Language code for output (e.g., 'en', 'zh-CN', 'ja', 'ko', 'ru', 'ar')"
required: true

template: |
Output Language: {{ output_language }}

You are a code analysis expert. Generate a concise yet informative summary for the following code file.

【File Name】
Expand All @@ -43,4 +50,4 @@ template: |
4. Role in the larger codebase context

llm_config:
temperature: 0.0
temperature: 0.0
9 changes: 8 additions & 1 deletion openviking/prompts/templates/semantic/document_summary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@ variables:
description: "File content (documentation text)"
required: true

- name: "output_language"
type: "string"
description: "Language code for output (e.g., 'en', 'zh-CN', 'ja', 'ko', 'ru', 'ar')"
required: true

template: |
Output Language: {{ output_language }}

You are a documentation analysis expert. Generate a concise yet informative summary for the following documentation file.

【File Name】
Expand Down Expand Up @@ -49,4 +56,4 @@ template: |
- For reference docs: focus on completeness and organization

llm_config:
temperature: 0.0
temperature: 0.0
7 changes: 7 additions & 0 deletions openviking/prompts/templates/semantic/file_summary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@ variables:
description: "File content"
required: true

- name: "output_language"
type: "string"
description: "Language code for output (e.g., 'en', 'zh-CN', 'ja', 'ko', 'ru', 'ar')"
required: true

template: |
Output Language: {{ output_language }}

Please generate a summary for the following file:

【File Name】
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,14 @@ variables:
description: "List of subdirectories and their abstracts, format: '- dirname/: abstract'"
default: ""

- name: "output_language"
type: "string"
description: "Language code for output (e.g., 'en', 'zh-CN', 'ja', 'ko', 'ru', 'ar')"
required: true

template: |
Output Language: {{ output_language }}

Generate an overview document based on the following directory content:

[Directory Name]
Expand Down
29 changes: 26 additions & 3 deletions openviking/storage/queuefs/semantic_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,11 @@ async def _generate_text_summary(
logger.warning("VLM not available, using empty summary")
return {"name": file_name, "summary": ""}

from openviking.session.memory.utils.language import _detect_language_from_text

fallback_language = (get_openviking_config().language_fallback or "en").strip() or "en"
output_language = _detect_language_from_text(content, fallback_language)

# Detect file type and select appropriate prompt
file_type = self._detect_file_type(file_name)

Expand All @@ -749,7 +754,11 @@ async def _generate_text_summary(
else: # ast_llm
prompt = render_prompt(
"semantic.code_ast_summary",
{"file_name": file_name, "skeleton": skeleton_text},
{
"file_name": file_name,
"skeleton": skeleton_text,
"output_language": output_language,
},
)
async with llm_sem:
summary = await vlm.get_completion_async(prompt)
Expand All @@ -762,7 +771,7 @@ async def _generate_text_summary(
# "llm" mode or fallback when skeleton is None/empty
prompt = render_prompt(
"semantic.code_summary",
{"file_name": file_name, "content": content},
{"file_name": file_name, "content": content, "output_language": output_language},
)
async with llm_sem:
summary = await vlm.get_completion_async(prompt)
Expand All @@ -775,7 +784,7 @@ async def _generate_text_summary(

prompt = render_prompt(
prompt_id,
{"file_name": file_name, "content": content},
{"file_name": file_name, "content": content, "output_language": output_language},
)

async with llm_sem:
Expand Down Expand Up @@ -924,6 +933,10 @@ async def _generate_overview(
logger.warning("VLM not available, using default overview")
return f"# {dir_uri.split('/')[-1]}\n\nDirectory overview"

from openviking.session.memory.utils.language import _detect_language_from_text

fallback_language = (config.language_fallback or "en").strip() or "en"

# Build file index mapping and summary string
file_index_map = {}
file_summaries_lines = []
Expand All @@ -932,6 +945,8 @@ async def _generate_overview(
file_summaries_lines.append(f"[{idx}] {item['name']}: {item['summary']}")
file_summaries_str = "\n".join(file_summaries_lines) if file_summaries_lines else "None"

output_language = _detect_language_from_text(file_summaries_str, fallback_language)

# Build subdirectory summary string
children_abstracts_str = (
"\n".join(f"- {item['name']}/: {item['abstract']}" for item in children_abstracts)
Expand All @@ -957,6 +972,7 @@ async def _generate_overview(
children_abstracts,
file_index_map,
llm_sem=llm_sem,
output_language=output_language,
)
elif over_budget:
# Few files but long summaries → truncate summaries to fit budget
Expand All @@ -978,13 +994,15 @@ async def _generate_overview(
file_summaries_str,
children_abstracts_str,
file_index_map,
output_language=output_language,
)
else:
overview = await self._single_generate_overview(
dir_uri,
file_summaries_str,
children_abstracts_str,
file_index_map,
output_language=output_language,
)

return overview
Expand All @@ -995,6 +1013,7 @@ async def _single_generate_overview(
file_summaries_str: str,
children_abstracts_str: str,
file_index_map: Dict[int, str],
output_language: str = "en",
) -> str:
"""Generate overview from a single prompt (small directories)."""
import re
Expand All @@ -1008,6 +1027,7 @@ async def _single_generate_overview(
"dir_name": dir_uri.split("/")[-1],
"file_summaries": file_summaries_str,
"children_abstracts": children_abstracts_str,
"output_language": output_language,
},
)

Expand Down Expand Up @@ -1036,6 +1056,7 @@ async def _batched_generate_overview(
children_abstracts: List[Dict[str, str]],
file_index_map: Dict[int, str],
llm_sem: Optional[asyncio.Semaphore] = None,
output_language: str = "en",
) -> str:
"""Generate overview by batching file summaries and merging.

Expand Down Expand Up @@ -1089,6 +1110,7 @@ async def _batched_generate_overview(
"dir_name": dir_name,
"file_summaries": batch_str,
"children_abstracts": children_str,
"output_language": output_language,
},
)
batch_prompts.append((batch_idx, prompt, batch_index_map))
Expand Down Expand Up @@ -1131,6 +1153,7 @@ async def _run_batch(batch_idx: int, prompt: str, batch_index_map: Dict[int, str
"dir_name": dir_name,
"file_summaries": combined,
"children_abstracts": children_abstracts_str,
"output_language": output_language,
},
)
overview = await vlm.get_completion_async(prompt)
Expand Down
4 changes: 2 additions & 2 deletions openviking_cli/utils/config/open_viking_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ class OpenVikingConfig(BaseModel):
language_fallback: str = Field(
default="en",
description=(
"Fallback language used by memory extraction when dominant user language "
"cannot be confidently detected"
"Fallback language used by memory extraction and semantic processing when dominant "
"user language cannot be confidently detected"
),
)

Expand Down
Loading