diff --git a/apps/memos-local-plugin/core/llm/client.ts b/apps/memos-local-plugin/core/llm/client.ts
index 0c6c75d89..5aeee1c04 100644
--- a/apps/memos-local-plugin/core/llm/client.ts
+++ b/apps/memos-local-plugin/core/llm/client.ts
@@ -83,7 +83,24 @@ export function createLlmClientWithProvider(
     if (!Array.isArray(input) || input.length === 0) {
       throw new MemosError(ERROR_CODES.INVALID_ARGUMENT, "LLM messages array is empty");
     }
-    return input;
+    // Ensure system messages are always at the beginning.
+    // Some models (e.g. Qwen3.6 via vLLM) enforce "system must be first"
+    // in their Jinja2 chat templates, returning HTTP 400 otherwise.
+    // See: https://github.com/MemTensor/MemOS/issues/XXXX
+    const systems = input.filter((m) => m.role === "system");
+    const nonSystems = input.filter((m) => m.role !== "system");
+    if (systems.length === 0) return input;
+    // Fast path: single system already at position 0, no later systems.
+    if (
+      systems.length === 1 &&
+      input[0]?.role === "system" &&
+      !input.slice(1).some((m) => m.role === "system")
+    ) {
+      return input;
+    }
+    // Merge all system contents into one leading message, preserving order.
+    const merged = systems.map((s) => s.content).join("\n\n");
+    return [{ role: "system", content: merged }, ...nonSystems];
   }
 
   function inject(messages: LlmMessage[], systemInsert: string): LlmMessage[] {
diff --git a/apps/memos-local-plugin/core/llm/providers/openai.ts b/apps/memos-local-plugin/core/llm/providers/openai.ts
index c24c96236..1801a5aad 100644
--- a/apps/memos-local-plugin/core/llm/providers/openai.ts
+++ b/apps/memos-local-plugin/core/llm/providers/openai.ts
@@ -50,18 +50,19 @@ export class OpenAiLlmProvider implements LlmProvider {
     ctx: LlmProviderCtx,
   ): Promise<ProviderCompletion> {
     const { config, log, signal } = ctx;
-    if (!config.apiKey) {
-      throw new MemosError(
-        ERROR_CODES.LLM_UNAVAILABLE,
-        "openai_compatible provider requires config.llm.apiKey",
-        { provider: this.name },
-      );
-    }
     const url = normalizeEndpoint(
       config.endpoint && config.endpoint.length > 0
         ? config.endpoint
         : "https://api.openai.com/v1/chat/completions",
     );
+    const isLocal = isLocalhostOrPrivateUrl(url);
+    if (!config.apiKey && !isLocal) {
+      throw new MemosError(
+        ERROR_CODES.LLM_UNAVAILABLE,
+        "openai_compatible provider requires config.llm.apiKey (or use a local endpoint)",
+        { provider: this.name },
+      );
+    }
     const model = config.model && config.model.length > 0 ? config.model : "gpt-4o-mini";
 
     const body: Record<string, unknown> = {
@@ -73,13 +74,16 @@ export class OpenAiLlmProvider implements LlmProvider {
     if (opts.jsonMode) body.response_format = { type: "json_object" };
     if (opts.stop && opts.stop.length > 0) body.stop = opts.stop;
 
+    const headers: Record<string, string> = {};
+    if (config.apiKey) {
+      headers.Authorization = `Bearer ${config.apiKey}`;
+    }
+    Object.assign(headers, config.headers);
+
     const { json, durationMs } = await httpPostJson<OaResp>({
       url,
       body,
-      headers: {
-        Authorization: `Bearer ${config.apiKey}`,
-        ...config.headers,
-      },
+      headers,
       timeoutMs: config.timeoutMs,
       maxRetries: config.maxRetries,
       signal,
@@ -109,18 +113,19 @@ export class OpenAiLlmProvider implements LlmProvider {
     ctx: LlmProviderCtx,
   ): AsyncGenerator<LlmStreamChunk> {
     const { config, log, signal } = ctx;
-    if (!config.apiKey) {
-      throw new MemosError(
-        ERROR_CODES.LLM_UNAVAILABLE,
-        "openai_compatible provider requires config.llm.apiKey",
-        { provider: this.name },
-      );
-    }
     const url = normalizeEndpoint(
       config.endpoint && config.endpoint.length > 0
         ? config.endpoint
         : "https://api.openai.com/v1/chat/completions",
     );
+    const isLocal = isLocalhostOrPrivateUrl(url);
+    if (!config.apiKey && !isLocal) {
+      throw new MemosError(
+        ERROR_CODES.LLM_UNAVAILABLE,
+        "openai_compatible provider requires config.llm.apiKey (or use a local endpoint)",
+        { provider: this.name },
+      );
+    }
     const model = config.model && config.model.length > 0 ? config.model : "gpt-4o-mini";
 
     const body: Record<string, unknown> = {
@@ -133,13 +138,16 @@ export class OpenAiLlmProvider implements LlmProvider {
     if (opts.jsonMode) body.response_format = { type: "json_object" };
     if (opts.stop && opts.stop.length > 0) body.stop = opts.stop;
 
+    const headers: Record<string, string> = {};
+    if (config.apiKey) {
+      headers.Authorization = `Bearer ${config.apiKey}`;
+    }
+    Object.assign(headers, config.headers);
+
     const resp = await httpPostStream({
       url,
       body,
-      headers: {
-        Authorization: `Bearer ${config.apiKey}`,
-        ...config.headers,
-      },
+      headers,
       timeoutMs: config.timeoutMs,
       signal,
       provider: this.name,
@@ -211,3 +219,25 @@ function mapFinish(reason: string | undefined): ProviderCompletion["finishReason
       return "other";
   }
 }
+
+/**
+ * Return true if the URL points to localhost or a private-network address.
+ * Used to relax the apiKey requirement for local/self-hosted inference servers.
+ */
+function isLocalhostOrPrivateUrl(url: string): boolean {
+  try {
+    const u = new URL(url);
+    const h = u.hostname.toLowerCase();
+    if (h === "localhost" || h === "127.0.0.1" || h === "::1") return true;
+    // Private ranges: 10.x, 172.16-31.x, 192.168.x
+    if (h.startsWith("10.") || h.startsWith("192.168.")) return true;
+    const m = h.match(/^172\.(\d+)\./);
+    if (m) {
+      const n = parseInt(m[1], 10);
+      if (n >= 16 && n <= 31) return true;
+    }
+  } catch {
+    // Malformed URL — let the caller handle it.
+  }
+  return false;
+}