From e7efc3fb9dd41948158e869759248aeaa1067dbb Mon Sep 17 00:00:00 2001
From: Devam Shah <devamshah91@gmail.com>
Date: Fri, 12 Jun 2026 12:48:05 +0530
Subject: [PATCH 1/2] python: add hf-trust-remote-code rule (HuggingFace
 trust_remote_code=True RCE)

Signed-off-by: Devam Shah <devamshah91@gmail.com>
---
 python/hf-trust-remote-code.yaml | 37 ++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 python/hf-trust-remote-code.yaml

diff --git a/python/hf-trust-remote-code.yaml b/python/hf-trust-remote-code.yaml
new file mode 100644
index 0000000..deaa586
--- /dev/null
+++ b/python/hf-trust-remote-code.yaml
@@ -0,0 +1,37 @@
+rules:
+  - id: hf-trust-remote-code
+    message: >-
+      Loading a Hugging Face model, tokenizer, config, pipeline, or dataset with
+      `trust_remote_code=True` executes arbitrary Python (`modeling_*.py`,
+      `configuration_*.py`, or dataset loading scripts) shipped in the remote
+      repository. A malicious or compromised repository achieves remote code
+      execution on the loading host. Prefer `trust_remote_code=False`, pin the
+      load to a reviewed `revision`, or vendor and audit the custom code before
+      loading.
+    languages: [python]
+    severity: ERROR
+    metadata:
+      category: security
+      cwe: "CWE-94: Improper Control of Generation of Code ('Code Injection')"
+      subcategory: [vuln]
+      confidence: MEDIUM
+      likelihood: MEDIUM
+      impact: HIGH
+      technology: [transformers, huggingface]
+      description: "Potential arbitrary code execution from loading Hugging Face assets with `trust_remote_code=True`"
+      references:
+        - https://huggingface.co/docs/transformers/en/models#custom-models
+        - https://blog.trailofbits.com/2025/09/24/supply-chain-attacks-are-exploiting-our-assumptions/
+
+    patterns:
+      - pattern-either:
+          - pattern: $OBJ.from_pretrained(..., trust_remote_code=True, ...)
+          - pattern: transformers.pipeline(..., trust_remote_code=True, ...)
+          - pattern: pipeline(..., trust_remote_code=True, ...)
+          - pattern: datasets.load_dataset(..., trust_remote_code=True, ...)
+          - pattern: load_dataset(..., trust_remote_code=True, ...)
+      - pattern-not: $OBJ.from_pretrained(..., trust_remote_code=False, ...)
+      - pattern-not: transformers.pipeline(..., trust_remote_code=False, ...)
+      - pattern-not: pipeline(..., trust_remote_code=False, ...)
+      - pattern-not: datasets.load_dataset(..., trust_remote_code=False, ...)
+      - pattern-not: load_dataset(..., trust_remote_code=False, ...)

From fb7992f44f2b1fb706cbdc365a9e53a30a4eb55d Mon Sep 17 00:00:00 2001
From: Devam Shah <devamshah91@gmail.com>
Date: Fri, 12 Jun 2026 12:48:06 +0530
Subject: [PATCH 2/2] python: add hf-trust-remote-code rule (HuggingFace
 trust_remote_code=True RCE)

Signed-off-by: Devam Shah <devamshah91@gmail.com>
---
 python/hf-trust-remote-code.py | 66 ++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 python/hf-trust-remote-code.py

diff --git a/python/hf-trust-remote-code.py b/python/hf-trust-remote-code.py
new file mode 100644
index 0000000..0d22d74
--- /dev/null
+++ b/python/hf-trust-remote-code.py
@@ -0,0 +1,66 @@
+import datasets
+import transformers
+from datasets import load_dataset
+from transformers import (
+    AutoConfig,
+    AutoModel,
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    pipeline,
+)
+
+CHECKPOINT = "org/model"
+
+# ruleid: hf-trust-remote-code
+model = AutoModelForCausalLM.from_pretrained(CHECKPOINT, trust_remote_code=True)
+
+# ruleid: hf-trust-remote-code
+tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT, trust_remote_code=True)
+
+# ruleid: hf-trust-remote-code
+config = AutoConfig.from_pretrained(CHECKPOINT, trust_remote_code=True)
+
+# ruleid: hf-trust-remote-code
+base = AutoModel.from_pretrained("org/custom", revision="main", trust_remote_code=True)
+
+# ruleid: hf-trust-remote-code
+pipe = pipeline("text-generation", model=CHECKPOINT, trust_remote_code=True)
+
+# ruleid: hf-trust-remote-code
+pipe2 = transformers.pipeline("text-generation", model=CHECKPOINT, trust_remote_code=True)
+
+# ruleid: hf-trust-remote-code
+ds = load_dataset("org/dataset", trust_remote_code=True)
+
+# ruleid: hf-trust-remote-code
+ds2 = datasets.load_dataset("org/dataset", split="train", trust_remote_code=True)
+
+
+def configurable_load(trust: bool = True):
+    # ruleid: hf-trust-remote-code
+    return AutoModelForCausalLM.from_pretrained(CHECKPOINT, trust_remote_code=True)
+
+
+# ok: hf-trust-remote-code
+safe_model = AutoModelForCausalLM.from_pretrained(CHECKPOINT)
+
+# ok: hf-trust-remote-code
+safe_model_explicit = AutoModelForCausalLM.from_pretrained(CHECKPOINT, trust_remote_code=False)
+
+# ok: hf-trust-remote-code
+safe_tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT, trust_remote_code=False)
+
+# ok: hf-trust-remote-code
+safe_config = AutoConfig.from_pretrained(CHECKPOINT)
+
+# ok: hf-trust-remote-code
+safe_pipe = pipeline("text-generation", model=CHECKPOINT)
+
+# ok: hf-trust-remote-code
+safe_pipe_explicit = transformers.pipeline("text-generation", model=CHECKPOINT, trust_remote_code=False)
+
+# ok: hf-trust-remote-code
+safe_ds = load_dataset("org/dataset", trust_remote_code=False)
+
+# ok: hf-trust-remote-code
+safe_ds_default = datasets.load_dataset("org/dataset", split="train")