diff --git a/tests/cli/test_cli_chat.py b/tests/cli/test_cli_chat.py
index 060a5bb..7eb50ac 100644
--- a/tests/cli/test_cli_chat.py
+++ b/tests/cli/test_cli_chat.py
@@ -216,7 +216,8 @@ def test_chat_help(self, runner, config_file, mock_client):
         assert "--prompt" in plain_output
         assert "--input" in plain_output
         assert "--model" in plain_output
-        assert "--json" in plain_output
+        assert "--format" in plain_output
+        assert "--skill-id" in plain_output
 
     def test_chat_no_prompt_error(self, runner, config_file, mock_client):
         """Test error when empty prompt provided."""
@@ -282,7 +283,8 @@ def test_chat_with_file_json_output(
                     "Describe this image",
                     "-i",
                     str(test_file),
-                    "--json",
+                    "--format",
+                    "json",
                     "--no-stream",
                 ],
             )
@@ -290,11 +292,11 @@ def test_chat_with_file_json_output(
         # The mock might not be properly connected to the CLI, so we test the basic flow
         # In real tests, you'd ensure the client is properly mocked throughout
 
-    def test_chat_json_output_flag(self, runner, config_file, mock_client):
-        """Test that --json flag is documented."""
+    def test_chat_format_json_flag(self, runner, config_file, mock_client):
+        """Test that --format flag is documented."""
         result = runner.invoke(app, ["chat", "--help"])
         assert result.exit_code == 0
-        assert "--json" in result.stdout or "-j" in result.stdout
+        assert "--format" in result.stdout or "-f" in result.stdout
 
 
 class TestModels:
@@ -334,7 +336,9 @@ def test_chat_simple_prompt(self, real_runner):
 
     def test_chat_json_output(self, real_runner):
         """Test chat with JSON output."""
-        result = real_runner.invoke(app, ["chat", "Say hello", "--json", "--no-stream"])
+        result = real_runner.invoke(
+            app, ["chat", "Say hello", "--format", "json", "--no-stream"]
+        )
         assert result.exit_code == 0, f"Command failed with: {result.stdout}"
 
         # Parse JSON output - should be pure JSON with no extra output
@@ -412,7 +416,9 @@ def test_chat_all_output_modes(self, real_runner):
         assert "Response" in result.stdout
 
         # Test JSON output
-        result = real_runner.invoke(app, ["chat", prompt, "--json", "--no-stream"])
+        result = real_runner.invoke(
+            app, ["chat", prompt, "--format", "json", "--no-stream"]
+        )
         assert result.exit_code == 0
         output = json.loads(result.stdout)
         assert "content" in output
diff --git a/vlmrun/cli/_cli/chat.py b/vlmrun/cli/_cli/chat.py
index a6ccc9b..1084097 100644
--- a/vlmrun/cli/_cli/chat.py
+++ b/vlmrun/cli/_cli/chat.py
@@ -107,7 +107,8 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 EXAMPLES:
   vlmrun chat "Describe this" -i photo.jpg
   vlmrun chat "Compare" -i a.jpg -i b.jpg
-  vlmrun chat -p prompt.txt -i doc.pdf --json
+  vlmrun chat -p prompt.txt -i doc.pdf --format json
+  vlmrun chat "Analyze" -i img.jpg --skill-id my-skill:latest
   echo "Summarize" | vlmrun chat -p stdin -i video.mp4
 
 \b
@@ -116,9 +117,15 @@ def __exit__(self, exc_type, exc_val, exc_tb):
   vlmrun-orion-1:auto  Auto-select (default)
   vlmrun-orion-1:pro   Most capable
 
+\b
+SKILLS:
+  --skill      Path to a local skill directory (inline)
+  --skill-id   Server-side skill as <name>:<version> (e.g. my-skill:latest)
+  Only one of --skill or --skill-id may be provided.
+
 \b
 OUTPUT:
-  --json      JSON output for programmatic use
+  --format json   JSON output for programmatic use
 
 \b
 FILES: .jpg .png .gif .mp4 .mov .pdf .doc .mp3 .wav (and more)
@@ -507,12 +514,22 @@ def chat(
         help=(
             "Path to a skill directory (must contain SKILL.md). Repeatable. "
             "The skill is sent inline with the request (no server-side upload). "
-            "To create a persistent server-side skill, use `vlmrun skills upload`."
+            "To create a persistent server-side skill, use `vlmrun skills upload`. "
+            "Cannot be used together with --skill-id."
         ),
         exists=True,
         file_okay=False,
         readable=True,
     ),
+    skill_ids: Optional[List[str]] = typer.Option(
+        None,
+        "--skill-id",
+        help=(
+            "Server-side skill reference as <skill-name>:<version> "
+            "(e.g. my-skill:latest, my-skill:3). Repeatable. "
+            "Cannot be used together with --skill."
+        ),
+    ),
     output_dir: Optional[Path] = typer.Option(
         None,
         "--output",
@@ -530,11 +547,11 @@ def chat(
         "-m",
         help="Model: vlmrun-orion-1:fast|auto|pro",
     ),
-    output_json: bool = typer.Option(
-        False,
-        "--json",
-        "-j",
-        help="Output JSON instead of formatted text.",
+    output_format: Optional[str] = typer.Option(
+        None,
+        "--format",
+        "-f",
+        help="Output format. Use 'json' for JSON output instead of formatted text.",
     ),
     no_stream: bool = typer.Option(
         False,
@@ -572,6 +589,24 @@ def chat(
         console.print("[red]Error:[/] Client not initialized. Check your API key.")
         sys.exit(1)
 
+    # Resolve output_json from --format
+    output_json = False
+    if output_format is not None:
+        if output_format.lower() == "json":
+            output_json = True
+        else:
+            console.print(f"[red]Error:[/] Unsupported output format '{output_format}'")
+            console.print("\nSupported formats: json")
+            sys.exit(1)
+
+    # Validate --skill and --skill-id are mutually exclusive
+    if skill_dirs and skill_ids:
+        console.print(
+            "[red]Error:[/] --skill and --skill-id are mutually exclusive. "
+            "Provide one or the other, not both."
+        )
+        sys.exit(1)
+
     # Resolve prompt from various sources
     try:
         final_prompt = resolve_prompt(prompt, prompt_file)
@@ -648,7 +683,7 @@ def chat(
         response_content = ""
         usage_data: Optional[Dict[str, Any]] = None
         response_id: Optional[str] = None
-        # Build skills list from --skill directories
+        # Build skills list from --skill directories or --skill-id references
         agent_skills: Optional[List[Dict[str, Any]]] = None
         if skill_dirs:
             agent_skills = []
@@ -660,6 +695,26 @@ def chat(
                 console.print(
                     f"  [green]\u2713[/green] Loaded {len(agent_skills)} skill(s) (inline)"
                 )
+        elif skill_ids:
+            agent_skills = []
+            for sid in skill_ids:
+                # Parse <skill-name>:<version> format
+                if ":" in sid:
+                    skill_name, skill_version = sid.rsplit(":", 1)
+                else:
+                    skill_name = sid
+                    skill_version = "latest"
+                skill = AgentSkill(
+                    type="skill_reference",
+                    skill_id=skill_name,
+                    version=skill_version,
+                )
+                agent_skills.append(skill.model_dump(exclude_none=True))
+
+            if not output_json:
+                console.print(
+                    f"  [green]\u2713[/green] Using {len(agent_skills)} skill(s) (referenced)"
+                )
 
         extra_body: Optional[Dict[str, Any]] = {}
         if session_id: