|
| 1 | +#!/usr/bin/env bash |
| 2 | +# Copyright (c) 2026 Lark Technologies Pte. Ltd. |
| 3 | +# SPDX-License-Identifier: MIT |
| 4 | +# |
| 5 | +# check-doc-tokens.sh |
| 6 | +# |
| 7 | +# Scans skill reference docs for token-like values that look realistic but |
| 8 | +# are not using the required placeholder format (*_EXAMPLE_TOKEN or similar). |
| 9 | +# |
| 10 | +# Real token patterns (Lark API) often look like: |
| 11 | +# wikcnXXXXXXXXX doccnXXXXXXX shtcnXXX fldcnXXX ou_XXXX cli_XXXX |
| 12 | +# |
| 13 | +# Docs MUST use clearly fake placeholders, e.g.: |
| 14 | +# wikcn_EXAMPLE_TOKEN doccn_EXAMPLE_TOKEN <space_id> your_token_here |
| 15 | +# |
| 16 | +# If this check fails, replace the realistic-looking value with a placeholder |
| 17 | +# like `wikcn_EXAMPLE_TOKEN` so gitleaks CI won't flag it as a real secret. |
| 18 | + |
| 19 | +set -euo pipefail |
| 20 | + |
| 21 | +SKILLS_DIR="${1:-skills}" |
| 22 | +ERRORS=0 |
| 23 | + |
| 24 | +# Patterns that indicate a realistic-looking Lark token value inside a string. |
| 25 | +# Matches JSON-style: "field": "token_value" or markdown backtick spans. |
| 26 | +# Token prefixes used by Lark Open Platform: |
| 27 | +# wikcn doccn docx shtcn bascn fldcn vewcn tbln ou_ cli_ obcn flec |
| 28 | +# |
| 29 | +# Excluded (clearly fake): |
| 30 | +# - Values ending with EXAMPLE_TOKEN (e.g. wikcn_EXAMPLE_TOKEN) |
| 31 | +# - Values that are all uppercase X (e.g. bascnXXXXXXXX) |
| 32 | +# - Values containing only X/_/<> (e.g. <your_token>) |
| 33 | +REALISTIC_TOKEN_RE='"(wikcn|doccn|shtcn|bascn|fldcn|vewcn|tbln|obcn|flec)[A-Za-z0-9]{6,}"' |
| 34 | +PLACEHOLDER_RE='(EXAMPLE|_TOKEN|XXXX|xxxx|<|>|your_|_here)' |
| 35 | + |
| 36 | +while IFS= read -r -d '' file; do |
| 37 | + # grep returns exit 1 when no match — use || true to avoid set -e killing us |
| 38 | + # Then filter out values that are clearly placeholders (EXAMPLE, XXXX, etc.) |
| 39 | + matches=$(grep -nEo "$REALISTIC_TOKEN_RE" "$file" 2>/dev/null | grep -vE "$PLACEHOLDER_RE" || true) |
| 40 | + if [[ -n "$matches" ]]; then |
| 41 | + echo "" |
| 42 | + echo "❌ $file" |
| 43 | + echo " Contains realistic-looking token values that may trigger gitleaks:" |
| 44 | + while IFS= read -r line; do |
| 45 | + echo " $line" |
| 46 | + done <<< "$matches" |
| 47 | + echo " → Replace with a placeholder, e.g.: wikcn_EXAMPLE_TOKEN, doccn_EXAMPLE_TOKEN" |
| 48 | + ERRORS=$((ERRORS + 1)) |
| 49 | + fi |
| 50 | +done < <(find "$SKILLS_DIR" -path "*/references/*.md" -print0) |
| 51 | + |
| 52 | +if [[ $ERRORS -gt 0 ]]; then |
| 53 | + echo "" |
| 54 | + echo "❌ check-doc-tokens: $ERRORS file(s) contain realistic token values in reference docs." |
| 55 | + echo " Use _EXAMPLE_TOKEN placeholders to avoid false positives in gitleaks CI." |
| 56 | + exit 1 |
| 57 | +else |
| 58 | + echo "✅ check-doc-tokens: all reference docs use safe placeholder tokens." |
| 59 | +fi |
0 commit comments