From 13eec13bb3e0a6603e7f8c523588ecf0d45b1ebc Mon Sep 17 00:00:00 2001 From: Jay Sahnan Date: Mon, 27 Apr 2026 07:32:38 +0100 Subject: [PATCH] init commit --- .gitignore | 6 + skills/event-follow-up/.gitignore | 8 + skills/event-follow-up/SKILL.md | 413 ++++++++ skills/event-follow-up/profiles/example.json | 9 + .../event-follow-up/references/csv-schemas.md | 76 ++ .../references/email-patterns.md | 119 +++ .../references/example-research.md | 195 ++++ .../references/report-template.html | 139 +++ .../references/research-patterns.md | 277 ++++++ skills/event-follow-up/references/workflow.md | 430 +++++++++ .../scripts/__fixtures__/example.csv | 10 + .../scripts/compile_report.mjs | 894 ++++++++++++++++++ .../event-follow-up/scripts/enrich_person.mjs | 60 ++ .../event-follow-up/scripts/extract_page.mjs | 168 ++++ skills/event-follow-up/scripts/package.json | 6 + skills/event-follow-up/scripts/parse_csv.mjs | 218 +++++ 16 files changed, 3028 insertions(+) create mode 100644 skills/event-follow-up/.gitignore create mode 100644 skills/event-follow-up/SKILL.md create mode 100644 skills/event-follow-up/profiles/example.json create mode 100644 skills/event-follow-up/references/csv-schemas.md create mode 100644 skills/event-follow-up/references/email-patterns.md create mode 100644 skills/event-follow-up/references/example-research.md create mode 100644 skills/event-follow-up/references/report-template.html create mode 100644 skills/event-follow-up/references/research-patterns.md create mode 100644 skills/event-follow-up/references/workflow.md create mode 100644 skills/event-follow-up/scripts/__fixtures__/example.csv create mode 100644 skills/event-follow-up/scripts/compile_report.mjs create mode 100755 skills/event-follow-up/scripts/enrich_person.mjs create mode 100755 skills/event-follow-up/scripts/extract_page.mjs create mode 100644 skills/event-follow-up/scripts/package.json create mode 100755 skills/event-follow-up/scripts/parse_csv.mjs diff --git a/.gitignore b/.gitignore index d8133af..7fc1de8 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,9 @@ tmp/.cache/.chrome-pid # Per-skill local Claude Code settings (contains user-specific permission approvals) skills/*/.claude/settings.local.json + +# Per-skill personal ICP profiles — only example.json is committed; users keep +# their own profiles locally. Lives at the repo root so it protects every skill +# regardless of which branch is checked out. +skills/*/profiles/*.json +!skills/*/profiles/example.json diff --git a/skills/event-follow-up/.gitignore b/skills/event-follow-up/.gitignore new file mode 100644 index 0000000..61c5452 --- /dev/null +++ b/skills/event-follow-up/.gitignore @@ -0,0 +1,8 @@ +node_modules/ +*.log +.DS_Store + +# Personal ICP profiles — example.json is the only one tracked; users keep +# their own profiles locally without committing them upstream. +profiles/*.json +!profiles/example.json diff --git a/skills/event-follow-up/SKILL.md b/skills/event-follow-up/SKILL.md new file mode 100644 index 0000000..b6e74f1 --- /dev/null +++ b/skills/event-follow-up/SKILL.md @@ -0,0 +1,413 @@ +--- +name: event-follow-up +description: | + Event follow-up skill. Takes a CSV of attendees from a conference, + enriches each person against the user's ICP, scores their sales + readiness (HOT / WARM / NURTURE / COLD), and drafts a personalized + follow-up email per person — replacing the generic "great meeting + you" templates that get ignored. + Use when the user wants to: (1) follow up after an event, + (2) qualify post-event leads, (3) decide who to route to sales, + (4) personalize follow-up emails at scale, + (5) work a CSV of badge-scanned attendees. + Triggers: "follow up after {event}", "post-event emails", + "event follow-up", "conference attendee CSV", "follow up on leads", + "qualify event attendees", "personalize follow-up emails", + "stripe sessions follow-up", "post-conference outreach". +license: MIT +compatibility: Requires bb CLI (@browserbasehq/cli) and BROWSERBASE_API_KEY env var. +allowed-tools: Bash Agent AskUserQuestion +metadata: + author: browserbase + version: "0.1.0" +--- + +# Event Follow-Up + +Take an attendee CSV → get a per-person follow-up email plus a sales-readiness flag (HOT / WARM / NURTURE / COLD), with a "why" rationale per person. + +**Required**: `BROWSERBASE_API_KEY` env var, `bb` CLI installed (`@browserbasehq/cli`). Browse CLI is NOT required (this skill takes a CSV in, no event-page scraping). + +**Path rules**: Always use the full literal path in all Bash commands — NOT `~` or `$HOME`. Resolve the home directory once and use it everywhere. When constructing subagent prompts, replace `{SKILL_DIR}` with the full literal path (typically `/Users/jay/skills/skills/event-follow-up`). + +**Output directory**: All event follow-up output goes to `~/Desktop/{event_slug}_followup_{YYYY-MM-DD-HHMM}/`. Final deliverable is `index.html` (people grouped by sales-readiness, ranked HOT → COLD), with `people.html` and `companies.html` alternate views, plus `results.csv` (one row per person with the email body in a column for direct CRM import). + +**CRITICAL — Tool restrictions (applies to main agent AND all subagents)**: +- All web searches: use `bb search`. NEVER use WebSearch. +- All page content extraction: use `node {SKILL_DIR}/scripts/extract_page.mjs ""`. This script fetches via `bb fetch`, parses title + meta tags + visible body text, and automatically falls back to `bb browse` when JS-rendered. NEVER hand-roll a `bb fetch | sed` pipeline. NEVER use WebFetch. +- All research output: subagents write **one markdown file per company OR per person** to `{OUTPUT_DIR}/companies/{slug}.md` or `{OUTPUT_DIR}/people/{slug}.md` using bash heredoc. NEVER use the Write tool or `python3 -c`. See `references/example-research.md` for both file formats. +- Report compilation: use `node {SKILL_DIR}/scripts/compile_report.mjs {OUTPUT_DIR} --open`. +- **Subagents must use ONLY the Bash tool. No other tools allowed.** +- **HARD TOOL-CALL CAPS**: ICP triage = 1 call/company; deep research = 5 calls/company; person enrichment + email = 4 calls/person. See `references/workflow.md`. + +**CRITICAL — Anti-hallucination rules (applies to main agent AND all subagents)**: +- NEVER infer `product_description`, `industry`, or a person's `role_reason` from a site's fonts, framework, design system, or typography. +- NEVER let the user's own ICP leak into a target's description. If you don't know what the target does, write `Unknown`. +- `product_description` MUST quote or paraphrase a phrase from `extract_page.mjs` output. Otherwise `Unknown — homepage content not accessible` and cap `icp_fit_score` at 3. +- A logo on a target's homepage does NOT establish a customer relationship. If `{TARGET}` shows `{USER_COMPANY}`'s logo in a "trusted by" section, the USER is the TARGET's customer — NOT the reverse. Only call a target an "existing customer" if its name appears in the user profile's `existing_customers` array. +- The personalized email MUST reference a specific finding from research (recent activity, hiring, product launch, talk topic). Generic "great to meet you" filler is the failure mode this skill exists to prevent. + +**CRITICAL — Minimize permission prompts**: +- Subagents MUST batch ALL file writes into a SINGLE Bash call using chained heredocs. +- Batch ALL searches and ALL fetches into single Bash calls using `&&` chaining. + +## Pipeline Overview + +Follow these 11 steps in order. Do not skip steps or reorder. + +0. **Setup** — output dir + clean slate +1. **Event context** — ask the user to paste a description or URL of the event +2. **Promo codes** — ask the user (AskUserQuestion) whether to include a discount code, and for which buckets +3. **Load profile** — read `profiles/{user_slug}.json` +4. **Parse CSV** — normalize headers, write `people.jsonl` + `seed_companies.txt` +5. **Group by company** — verify the seed companies count +6. **ICP triage** — fast company-level scoring (1 call/company) +7. **Filter** — companies with `icp_fit_score >= --icp-threshold` +8. **Deep research** — full Plan→Research→Synthesize on ICP fits +9. **Enrich attendees + draft email** — at ICP-fit companies only (combined Person Enrichment + Email subagent pass) +10. **Compile report** — HTML + CSV, open in browser + +The user invokes the skill with a CSV path like `/event-follow-up /Users/jay/Downloads/stripe-attendees.csv`. Parse `CSV_PATH` from that invocation message. Defaults: `DEPTH=deep`, `ICP_THRESHOLD=6`. The `USER_SLUG` (ICP profile) is auto-resolved in Step 3 — there is no built-in default profile. Do NOT ask the user to confirm the path. + +--- + +## Step 0: Setup Output Directory + +Derive the output directory from the CSV filename, or from an `--event-name` flag if provided. Do NOT hardcode any event name. + +```bash +EVENT_SLUG=$(node -e 'const p = require("path").basename(process.argv[1]).replace(/\.csv$/i,"").toLowerCase().replace(/[^a-z0-9]+/g,"-").replace(/^-+|-+$/g,""); console.log(p)' "$CSV_PATH") +TIMESTAMP=$(date +%Y-%m-%d-%H%M) +OUTPUT_DIR=/Users/jay/Desktop/${EVENT_SLUG}_followup_${TIMESTAMP} +mkdir -p "$OUTPUT_DIR/companies" "$OUTPUT_DIR/people" +cp "$CSV_PATH" "$OUTPUT_DIR/input.csv" +``` + +Use the full literal home path — never `~` or `$HOME`. Pass `{OUTPUT_DIR}` as the full literal path to all subagent prompts. + +## Step 1: Capture Event Context + +Ask the user **in plain chat** for the event description (do NOT use AskUserQuestion). Without this, emails default to generic "we met at the event" framing — the exact failure mode this skill exists to prevent. + +Print this verbatim and wait for the user's reply: + +``` +What event is this follow-up for? Paste either: + - a 1-3 sentence description (theme, audience, your goal there), OR + - a URL to the event page (I'll extract the description automatically) +``` + +Parse the user's reply: +- Plain text → save verbatim as `{OUTPUT_DIR}/event_context.md` +- URL (matches `^https?://`) → run `node {SKILL_DIR}/scripts/extract_page.mjs "" --max-chars 2000`, save the title + first 1500 chars of body to `{OUTPUT_DIR}/event_context.md` + +```bash +# Example for the URL branch: +node {SKILL_DIR}/scripts/extract_page.mjs "$EVENT_URL" --max-chars 2000 > {OUTPUT_DIR}/event_context.md +``` + +The event context becomes part of every email-drafting subagent prompt as `{EVENT_CONTEXT}`. + +## Step 2: Promo Codes (optional) + +Ask via `AskUserQuestion` whether to include a discount/promo code in the drafted emails: + +``` +AskUserQuestion(questions: [ + { + question: "Include a promo/discount code in the follow-up emails?", + header: "Promo code", + multiSelect: false, + options: [ + { label: "No promo codes", description: "Default — no discount in emails" }, + { label: "Yes — HOT only", description: "Add to the highest-intent bucket only" }, + { label: "Yes — HOT and WARM", description: "Top two buckets" }, + { label: "Yes — all enriched", description: "HOT + WARM + NURTURE (skip COLD)" } + ] + } +]) +``` + +If the user picks any "Yes" option, follow up in plain chat: + +``` +Paste the promo code + offer details in one line, e.g.: + SESSIONS25 — 25% off first 3 months +``` + +Save to `{OUTPUT_DIR}/promo.json`: + +```bash +cat << 'PROMO_JSON' > {OUTPUT_DIR}/promo.json +{ + "code": "SESSIONS25", + "description": "25% off first 3 months", + "applies_to": ["HOT", "WARM"] +} +PROMO_JSON +``` + +If "No promo codes", write `{"code": null, "description": null, "applies_to": []}`. The email-drafting subagent reads this file and weaves the code in only for buckets listed in `applies_to`. + +## Step 3: Load User Profile + +The profile defines the ICP that ICP triage and deep research score against. Load from `{SKILL_DIR}/profiles/{user_slug}.json` (interchangeable across all GTM skills — same shape as company-research). `example.json` is a template, not a real profile — never use it. + +**DO NOT look outside `{SKILL_DIR}/profiles/`** for profiles — never reach into other skills' directories. + +**Resolution order**: +1. If the user invoked with `--user-company `, use that slug. +2. Else, list `profiles/*.json` excluding `example.json`. If exactly one profile exists, use it. If multiple, ask the user (plain chat) which one. +3. If zero profiles exist, **fail loudly** and instruct the user to create one (copy `profiles/example.json` to `profiles/.json`, or run the company-research skill). + +```bash +PROFILES=$(ls {SKILL_DIR}/profiles/*.json 2>/dev/null | xargs -n1 basename | sed 's/\.json$//' | grep -v '^example$') +COUNT=$(echo "$PROFILES" | grep -c .) + +if [ -z "$USER_SLUG" ]; then + if [ "$COUNT" -eq 0 ]; then + echo "No profiles found. Copy profiles/example.json to profiles/.json and fill it in."; exit 1 + elif [ "$COUNT" -eq 1 ]; then + USER_SLUG=$PROFILES + else + echo "Multiple profiles found:"; echo "$PROFILES" | sed 's/^/ - /' + echo "Re-invoke with --user-company to pick one."; exit 1 + fi +fi +cat {SKILL_DIR}/profiles/${USER_SLUG}.json +``` + +The profile yields: `company`, `product`, `icp_description`, `existing_customers`. These get embedded verbatim in every subagent prompt downstream. + +## Step 4: Parse CSV + +Normalize the input CSV into `people.jsonl` and `seed_companies.txt`. The parser auto-detects column headers across common event-platform schemas — see `references/csv-schemas.md` for the column mapping. + +```bash +node {SKILL_DIR}/scripts/parse_csv.mjs {OUTPUT_DIR}/input.csv {OUTPUT_DIR} --user-company {USER_SLUG} +``` + +Writes: +- `{OUTPUT_DIR}/people.jsonl` — one JSON record per attendee (`name`, `email`, `company`, `title`, `slug`, plus any extra event-context columns) +- `{OUTPUT_DIR}/seed_companies.txt` — deduped, sorted company names +- `{OUTPUT_DIR}/parse_stats.json` — counts and detected column mapping + +The `--user-company` flag drops attendees from the user's own org (your own SDRs aren't prospects). + +Sanity-check: +```bash +wc -l {OUTPUT_DIR}/people.jsonl {OUTPUT_DIR}/seed_companies.txt +head -3 {OUTPUT_DIR}/people.jsonl +cat {OUTPUT_DIR}/parse_stats.json +``` + +If the column mapping looks wrong (e.g., `company` mapped to a "Country" column), surface the detected mapping to the user and offer to re-run with explicit `--col-name=...`, `--col-email=...`, `--col-company=...`, `--col-title=...` flags. See `references/csv-schemas.md` → "Override Mapping". + +## Step 5: Group by Company + +`parse_csv.mjs` already deduped the companies. This step is informational: + +```bash +wc -l {OUTPUT_DIR}/seed_companies.txt +``` + +Expected: roughly 0.6-0.9× the attendee count for badge-scanned events. + +## Step 6: ICP Triage + +**Fast pass — one tool call per company, no deep research.** Score every company in `seed_companies.txt` against the user's ICP and write a thin triage stub to `companies/{slug}.md`. Companies with `icp_fit_score >= --icp-threshold` (default 6) advance to Step 8's deep research. + +**Dispatch pattern**: split `seed_companies.txt` into batches of ~10 and fan out N subagents in a SINGLE Agent batch. Each subagent runs the prompt from `references/workflow.md` → "ICP Triage" section. Hard cap: **1 tool call per company**. + +```bash +node -e ' +const fs = require("fs"); +const slugify = (s) => (s || "").toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, ""); +const seed = fs.readFileSync("{OUTPUT_DIR}/seed_companies.txt", "utf-8").split("\n").filter(Boolean); +const lines = seed.map(c => { + const slug = slugify(c); + const guessedHost = c.toLowerCase().replace(/[^a-z0-9]/g, ""); + return `${c}|https://${guessedHost}.com|${slug}`; +}); +fs.writeFileSync("{OUTPUT_DIR}/_seed_with_urls.txt", lines.join("\n") + "\n"); +' + +split -l 10 {OUTPUT_DIR}/_seed_with_urls.txt {OUTPUT_DIR}/_batch_triage_ +ls {OUTPUT_DIR}/_batch_triage_* | wc -l +``` + +Then in a single message, dispatch one Agent call per batch (up to 6 in parallel). Each Agent gets the prompt from `references/workflow.md` → "ICP Triage" with placeholders substituted (`{SKILL_DIR}`, `{OUTPUT_DIR}`, `{USER_COMPANY}`, `{USER_PRODUCT}`, `{ICP_DESCRIPTION}`, `{COMPANY_LIST}`, `{TOTAL}`). + +After all subagents return: +```bash +ls {OUTPUT_DIR}/companies/*.md | wc -l # should equal wc -l seed_companies.txt +rm {OUTPUT_DIR}/_batch_triage_* +``` + +## Step 7: Filter by ICP Threshold + +```bash +THRESHOLD=6 # from --icp-threshold flag +for f in {OUTPUT_DIR}/companies/*.md; do + score=$(awk '/^icp_fit_score:/{print $2; exit}' "$f") + if [ -n "$score" ] && [ "$(echo "$score" | cut -d. -f1)" -ge "$THRESHOLD" ]; then + basename "$f" .md + fi +done > {OUTPUT_DIR}/icp_fits.txt + +wc -l {OUTPUT_DIR}/icp_fits.txt +``` + +Expected: 20-40% of `seed_companies.txt`. If < 10%, surface a warning. + +## Step 8: Deep Research + +Full Plan→Research→Synthesize on ICP-fit companies only. Hard cap: **5 tool calls per company**. Subagents OVERWRITE the existing triage stub. + +```bash +while read slug; do + website=$(awk '/^website:/{print $2; exit}' {OUTPUT_DIR}/companies/${slug}.md) + echo "${slug}|${website}" +done < {OUTPUT_DIR}/icp_fits.txt > {OUTPUT_DIR}/_deep_targets.txt + +split -l 5 {OUTPUT_DIR}/_deep_targets.txt {OUTPUT_DIR}/_batch_deep_ +ls {OUTPUT_DIR}/_batch_deep_* | wc -l +``` + +Dispatch one Agent per batch in a single message with the prompt from `references/workflow.md` → "Deep Research". After all return: + +```bash +grep -l "triage_only: false" {OUTPUT_DIR}/companies/*.md | wc -l # should equal wc -l icp_fits.txt +``` + +## Step 9: Enrich Attendees + Draft Email (combined) + +Per attendee at an ICP-fit company: harvest LinkedIn URL, recent activity (podcast / blog / talk / GitHub / X), score sales-readiness, draft personalized email — all in one subagent pass. Hard cap: **4 tool calls per person**, four lanes: + +1. `bb search "{name} {company} linkedin"` (always) +2. `bb search "{name} podcast OR talk OR blog 2026"` (deep+) +3. `bb search "{name} github"` (deeper) +4. `bb search "{name} site:x.com OR site:twitter.com"` (deeper) + +Quick mode: skip Step 9 (everyone scored COLD). Deep mode: lanes 1-2. Deeper mode: lanes 1-4. + +### Step 9a — Ask the user: scope of enrichment + +Before dispatching, compute the two candidate counts: + +```bash +TOTAL=$(wc -l < {OUTPUT_DIR}/people.jsonl) +ICP_FITS=$(node -e ' +const fs = require("fs"); +const fits = new Set(fs.readFileSync("{OUTPUT_DIR}/icp_fits.txt", "utf-8").split("\n").filter(Boolean)); +const slug2name = {}; +for (const slug of fits) { + const md = fs.readFileSync(`{OUTPUT_DIR}/companies/${slug}.md`, "utf-8"); + const m = md.match(/^company_name:\s*(.+)$/m); + if (m) slug2name[slug] = m[1].trim(); +} +const want = new Set(Object.values(slug2name).map(s => s.toLowerCase())); +const ppl = fs.readFileSync("{OUTPUT_DIR}/people.jsonl","utf-8").split("\n").filter(Boolean).map(JSON.parse); +console.log(ppl.filter(p => p.company && want.has(p.company.toLowerCase())).length); +') +LANES=2 # 2 (deep) or 4 (deeper) +``` + +Then ask via `AskUserQuestion`: + +``` +AskUserQuestion(questions: [ + { + question: "Enrich which attendees?", + header: "Enrichment scope", + multiSelect: false, + options: [ + { label: "ICP fits only", description: "${ICP_FITS} attendees, ~$((ICP_FITS * LANES)) calls (recommended)" }, + { label: "All attendees", description: "${TOTAL} attendees, ~$((TOTAL * LANES)) calls" } + ] + } +]) +``` + +If "All attendees" and `TOTAL × LANES > 600`, print a warning and ask once more. + +### Step 9b — Filter and batch + +```bash +if [ "$ENRICH_SCOPE" = "all" ]; then + cp {OUTPUT_DIR}/people.jsonl {OUTPUT_DIR}/_people_to_enrich.jsonl +else + node -e ' +const fs = require("fs"); +const fits = new Set(fs.readFileSync("{OUTPUT_DIR}/icp_fits.txt", "utf-8").split("\n").filter(Boolean)); +const slug2name = {}; +for (const slug of fits) { + const md = fs.readFileSync(`{OUTPUT_DIR}/companies/${slug}.md`, "utf-8"); + const m = md.match(/^company_name:\s*(.+)$/m); + if (m) slug2name[slug] = m[1].trim(); +} +const wantNames = new Set(Object.values(slug2name).map(s => s.toLowerCase())); +const lines = fs.readFileSync("{OUTPUT_DIR}/people.jsonl", "utf-8").split("\n").filter(Boolean); +const keep = lines.filter(l => { const p = JSON.parse(l); return p.company && wantNames.has(p.company.toLowerCase()); }); +fs.writeFileSync("{OUTPUT_DIR}/_people_to_enrich.jsonl", keep.join("\n") + "\n"); +console.error(`Enriching ${keep.length} of ${lines.length} attendees`); +' +fi + +split -l 5 {OUTPUT_DIR}/_people_to_enrich.jsonl {OUTPUT_DIR}/_batch_people_ +``` + +Dispatch one Agent per batch in a single message with the prompt from `references/workflow.md` → "Person Enrichment + Email". The prompt template handles BOTH enrichment AND the personalized email + sales-readiness scoring in one combined pass — substitute `{EVENT_CONTEXT}` (from `event_context.md`) and `{PROMO}` (from `promo.json`) at dispatch time so emails reference the actual event and weave in the discount code where it applies. + +### Step 9c — Verify scoring + email distribution + +After all enrichment subagents return, sanity-check the output. Each `people/{slug}.md` should now contain in its frontmatter: +- `sales_readiness`: `HOT | WARM | NURTURE | COLD` +- `email_subject`: short subject line +- `email_body`: 4-6 sentence personalized follow-up (multi-line YAML pipe scalar) +- `email_cta`: the call-to-action verb (book demo / share resource / stay in touch / no follow-up) + +Sanity check the distribution: + +```bash +for level in HOT WARM NURTURE COLD; do + count=$(grep -l "sales_readiness: $level" {OUTPUT_DIR}/people/*.md 2>/dev/null | wc -l | tr -d ' ') + echo "$level: $count" +done +``` + +If 100% HOT or 100% COLD, the scoring prompt is miscalibrated — see `references/email-patterns.md` → "Calibrating Sales-Readiness". + +## Step 10: Compile Report + +```bash +node {SKILL_DIR}/scripts/compile_report.mjs {OUTPUT_DIR} --open +``` + +Generates: +- `{OUTPUT_DIR}/index.html` — attendees grouped by sales-readiness (HOT → WARM → NURTURE → COLD), each card with subject + email body + Copy buttons +- `{OUTPUT_DIR}/people.html` — filterable attendee list (alternate view, with chips for sales-readiness, role, company) +- `{OUTPUT_DIR}/companies.html` — ICP-ranked company table with attendees +- `{OUTPUT_DIR}/results.csv` — one row per person with `email_subject`, `email_body`, `sales_readiness` columns for direct CRM import + +Then present a summary in chat: + +``` +## Event Follow-Up Complete — {Event Name} + +- **Total attendees parsed**: {count} +- **Unique companies**: {count} +- **ICP fits (score ≥ {threshold})**: {count} +- **Attendees enriched**: {count} +- **Sales-readiness distribution**: + - 🔥 HOT (book a meeting): {count} + - 🌡️ WARM (qualify in nurture): {count} + - 🌱 NURTURE (educational content): {count} + - ❄️ COLD (skip / generic newsletter): {count} +- **Report opened in browser**: {OUTPUT_DIR}/index.html +``` + +Show the **top 5 HOT attendees** as a markdown table sorted by company ICP score, then offer to: +- Adjust `--icp-threshold` and re-run Steps 7-10 +- Export the CSV to a CRM +- Re-draft a specific person's email with a different angle (re-enrich just that person) diff --git a/skills/event-follow-up/profiles/example.json b/skills/event-follow-up/profiles/example.json new file mode 100644 index 0000000..ae469f5 --- /dev/null +++ b/skills/event-follow-up/profiles/example.json @@ -0,0 +1,9 @@ +{ + "company": "", + "website": "", + "product": "", + "existing_customers": [], + "competitors": [], + "use_cases": [], + "researched_at": "" +} diff --git a/skills/event-follow-up/references/csv-schemas.md b/skills/event-follow-up/references/csv-schemas.md new file mode 100644 index 0000000..c89f23f --- /dev/null +++ b/skills/event-follow-up/references/csv-schemas.md @@ -0,0 +1,76 @@ +# CSV Schemas — Event Follow-Up + +The `parse_csv.mjs` script auto-detects column headers across common event-platform exports. This document is the reference for what gets recognized and how to override the mapping when auto-detection fails. + +## Auto-detected columns (canonical → header candidates) + +| Canonical key | Recognized headers (case-insensitive, underscores/hyphens treated as spaces) | +|---|---| +| `email` (REQUIRED) | `Email`, `Email Address`, `Work Email`, `Attendee Email`, `Contact Email`, `E-mail` | +| `name` | `Name`, `Full Name`, `Attendee Name`, `Contact Name` | +| `first` | `First Name`, `Firstname`, `Given Name`, `First` | +| `last` | `Last Name`, `Lastname`, `Surname`, `Family Name`, `Last` | +| `company` | `Company`, `Company Name`, `Organization`, `Organisation`, `Org`, `Employer`, `Account`, `Account Name` | +| `title` | `Title`, `Job Title`, `Role`, `Position`, `Job Role`, `Jobtitle` | +| `linkedin` | `LinkedIn`, `LinkedIn URL`, `LinkedIn Profile` | +| `notes` | `Notes`, `Note`, `Comments`, `Comment`, `Team Notes` | +| `scanned_at` | `Scanned At`, `Badge Scan`, `Scan Time`, `Check-in Time`, `Checkin Time`, `Timestamp` | +| `track` | `Track`, `Event Track`, `Session Track`, `Topic` | + +If `name` isn't found, the parser builds it from `first + last`. If neither exists, it falls back to the email local-part. + +## Common event platforms (verified) + +| Platform | Notable headers | Notes | +|---|---|---| +| **Hubspot list export** | `First Name`, `Last Name`, `Email`, `Company`, `Job Title` | Auto-detects cleanly. | +| **Stripe Sessions scanner** | `First Name`, `Last Name`, `Email`, `Company`, `Job Title`, `Notes` | Auto-detects cleanly. | +| **Eventbrite check-in CSV** | `Attendee Name`, `Email`, `Company` | Title may be missing — pass `--col-title` if available in a custom field. | +| **Lu.ma attendee export** | `Name`, `Email`, `Position` (sometimes `Job Title`) | "Position" is recognized; "Tagline" is not — pass `--col-title="Tagline"` if needed. | +| **Sessionize speakers export** | `Speaker Name`, `Email`, `Tagline`, `Company` | "Speaker Name" is NOT auto-detected. Use `--col-name="Speaker Name"`. | +| **Custom badge scanner** | varies | Inspect headers in `parse_stats.json.csv_headers` and override as needed. | + +## Override mapping + +If auto-detection misfires (`parse_stats.json.detected_columns` shows wrong mapping), re-run with explicit flags: + +```bash +node {SKILL_DIR}/scripts/parse_csv.mjs input.csv {OUTPUT_DIR} \ + --user-company browserbase \ + --col-name "Speaker Name" \ + --col-email "Contact Email" \ + --col-company "Account" \ + --col-title "Tagline" +``` + +Override values are matched against the CSV header line by exact (normalized) name. + +## Email-based company derivation + +When a row has an email but no `company` field, the parser derives one from the email domain (`alice@acme.com` → `Acme`). Public-mail domains (`gmail.com`, `yahoo.com`, `outlook.com`, `hotmail.com`, `icloud.com`, `aol.com`, `proton.me`, `protonmail.com`, `live.com`, `me.com`, `msn.com`) yield `null` — those rows are kept but flagged for the ICP triage subagent to skip cheaply. + +## Filters applied at parse time + +- Rows with no email or malformed email (`@` missing) → dropped, counted in `skipped.no_email` +- Rows with `company` matching `--user-company` (case-insensitive) → dropped, counted in `skipped.user_company` +- Duplicate emails → first wins, rest counted in `skipped.dup` + +## Output schema (`people.jsonl`) + +One JSON object per line: + +```json +{ + "name": "Greg Brockman", + "email": "greg@openai.com", + "company": "OpenAI", + "title": "Cofounder and President", + "linkedin": "https://www.linkedin.com/in/thegdb/", + "notes": "Spoke about ChatGPT Agent", + "scanned_at": null, + "track": null, + "slug": "greg-brockman" +} +``` + +`null` values mean the column was absent or empty for this row. Downstream subagents must handle nulls gracefully (no fabrication). diff --git a/skills/event-follow-up/references/email-patterns.md b/skills/event-follow-up/references/email-patterns.md new file mode 100644 index 0000000..d3cc2fc --- /dev/null +++ b/skills/event-follow-up/references/email-patterns.md @@ -0,0 +1,119 @@ +# Email Patterns — Follow-Up + Sales-Readiness Rubric + +This skill exists because generic post-event follow-up emails ("Great meeting you at the event!") get ignored. Every email this skill produces must reference a SPECIFIC finding from research — recent activity, a public talk, a hiring move, a product launch — and tie it to the user's product wedge. + +## Contents +- [Sales-Readiness Rubric](#sales-readiness-rubric-4-buckets) — the 4 buckets (HOT / WARM / NURTURE / COLD) +- [Calibrating Sales-Readiness](#calibrating-sales-readiness) — healthy distributions + failure modes +- [Email Structure](#email-structure-4-6-sentences) — the 5-6 sentence skeleton +- [Examples](#example-hot--greg-brockman--openai) — HOT / WARM / NURTURE +- [Subject Line Patterns](#subject-line-patterns) +- [Anti-hallucination rules](#anti-hallucination-rules-for-email-drafting) + +## Sales-Readiness Rubric (4 buckets) + +Each enriched attendee gets exactly one bucket. The rubric is biased toward NURTURE — most attendees are not ready for a sales conversation tomorrow, and routing every name to sales burns rep time. + +### 🔥 HOT — book a meeting this week + +ALL of: +- Senior at an ICP-fit company (`icp_fit_score >= 7`): VP / Director / Head of / Cofounder / Chief / Lead +- AND at least ONE buying signal in the last 90 days: + - Public talk / podcast about a problem the user's product solves + - Hiring for roles directly relevant to the user's product (job posts mentioning the user's category) + - Recent funding round, product launch, or expansion announcement + - Direct mention of the user's product or category on their site, blog, or social +- AND not already an existing customer (per user profile's `existing_customers`) + +CTA: "Worth a 20-min call this week? I want to walk you through how X helps with Y." + +### 🌡️ WARM — qualify in nurture sequence + +EITHER: +- Senior at ICP-fit company with no recent buying signal — they fit the buyer profile but aren't visibly in-market. Drop into a nurture sequence with relevant content. +- Mid-level (Manager / Senior Engineer / Senior PM) at ICP-fit company with a buying signal. + +CTA: "Sharing a case study on how a similar team used X — open to a quick chat after you've read it?" + +### 🌱 NURTURE — educational content only + +EITHER: +- IC (engineer / analyst / individual contributor) at ICP-fit company +- OR mid-level at adjacent (`icp_fit_score 4-6`) company +- OR senior at adjacent company with no buying signal + +CTA: "Drop you in the dev newsletter — quarterly tips on X." or "Wrote up a piece on Y, thought you'd find it useful." + +### ❄️ COLD — no follow-up email + +ANY of: +- Outside ICP entirely (`icp_fit_score < 4`) +- Already an existing customer per the user profile +- Suspected spam / public-mail domain with no company match +- Title is a clear non-buyer / non-influencer at this stage (intern, recruiter, partnerships-only at non-ICP) + +For COLD records, write `email_subject: ""` and `email_body: ""` — the skill compiles them into the report so the user knows they were considered, but no email is drafted. + +## Calibrating Sales-Readiness + +A healthy distribution at a typical SaaS booth: +- HOT: 5-15% of enriched attendees +- WARM: 15-30% +- NURTURE: 35-55% +- COLD: 10-30% + +Failure modes to flag back to the user: +- **All HOT (>40%)** — the rubric is too lenient OR the ICP is too narrow (everyone who passed the ICP filter looks senior). Tighten the buying-signal requirement. +- **All COLD (>50%)** — the ICP description is too narrow or the threshold is too high. Lower `--icp-threshold` and re-run. +- **Zero HOT** — either no enriched person had a recent buying signal, or the rubric is being mis-applied. Spot-check 3 random people. + +## Email Structure (4-6 sentences) + +Every drafted email follows this skeleton: + +``` +[1 sentence: event reference + their attendance — concrete, NOT "great to meet you"] +[1 sentence: the specific signal you found in research — quote/paraphrase the finding, with confidence] +[1 sentence: the wedge — connect their signal to the user's product] +[1 sentence: short proof point or social proof (existing customer, similar team's outcome, specific feature)] +[1 sentence: CTA matching the sales-readiness bucket] +[Optional 6th sentence: low-pressure off-ramp — "no worries if not the right time"] +``` + +### Example (HOT — Greg Brockman / OpenAI) + +> Subject: ChatGPT Agent + browser infra at Sessions +> +> Greg — caught your Sessions panel on agent reliability and the "agents are bottlenecked on the browser" framing was exactly the conversation we keep having with teams shipping CUA-style products. Browserbase runs the cloud-browser layer that several ChatGPT-Agent-competitor products are built on — managed Chrome, stealth, captcha-solving, session recording. Worth a 20-min walkthrough this week before you scope your next quarter? Happy to send the durability deck ahead of time. + +### Example (WARM — mid-level at ICP fit) + +> Subject: Quick read on Replit Agent + headless browsers +> +> Adam — saw your team at Sessions and the Replit Agent demo on the Stripe stage was the cleanest "agent that actually ships an app" I've seen all conference. Wrote up a short piece on how teams we work with handle the browser layer when their generated apps need to test against a real Stripe checkout — link below. Open to a 15-min chat next week if any of it lands? + +### Example (NURTURE — IC at ICP fit) + +> Subject: Browser-infra primer for AI builders +> +> Hey — saw you at Sessions and we share a few mutual builders in the agent space. Pulling together a quarterly digest of what's working in cloud-browser infra for AI agents (latency tricks, captcha patterns, session-replay use cases) — happy to add you. Reply with anything you'd want covered. + +## Subject Line Patterns + +Subject lines that get opens (verified across SDR sequences): +- `{Specific thing they did} + {your wedge}` — "ChatGPT Agent + browser infra at Sessions" +- `Quick read on {their product} + {your category}` — "Quick read on Replit Agent + headless browsers" +- `{Their company} + {one-line value prop}` — "Ramp + agent receipts: 20-min?" +- Question subjects only when the question is specific and answerable + +Avoid: +- "Following up on Sessions" — vague, generic, deletable +- "Did you have a chance to..." — assumes a prior thread that didn't exist +- ALL CAPS, emojis in subject (flagged by enterprise filters) + +## Anti-hallucination rules for email drafting + +- Every email body MUST quote or paraphrase a SPECIFIC finding (talk title, podcast episode, blog post, GitHub repo, hiring signal, product launch). If no finding exists, fall back to event-context (their attendance + track if known) — never fabricate one. +- NEVER claim the target is an "existing customer" unless their company is in the user profile's `existing_customers` array. +- NEVER reference details from the research that didn't actually appear in `bb search` results — if uncertain, generalize. +- If the user profile's `existing_customers` includes the target's company, draft an EXPANSION email (different framing — congratulations on growth, sharing a new feature, intro to a different team) — NOT a net-new sales pitch. diff --git a/skills/event-follow-up/references/example-research.md b/skills/event-follow-up/references/example-research.md new file mode 100644 index 0000000..a1b01e3 --- /dev/null +++ b/skills/event-follow-up/references/example-research.md @@ -0,0 +1,195 @@ +# Example Research Files + +## Contents +- [Company File — Triage Stub (Step 6 output)](#company-file--triage-stub-step-6-output) +- [Company File — Deep Research (Step 8 output)](#company-file--deep-research-step-8-output) +- [Person File (Step 9 output)](#person-file-step-9-output) — the combined enrichment + email-drafting output +- [Field Rules](#field-rules) +- [Writing via Bash Heredoc](#writing-via-bash-heredoc) — how subagents emit files + +Event-follow-up writes TWO kinds of markdown files: + +1. **Company files** — one per company in `seed_companies.txt`, written to `{OUTPUT_DIR}/companies/{slug}.md`. Comes in two flavors: triage stubs (Step 6) and deep-research files (Step 8). +2. **Person files** — one per attendee at an ICP-fit company, written to `{OUTPUT_DIR}/people/{slug}.md`. Created in Step 9 (combined enrichment + email-drafting pass). + +The YAML frontmatter contains structured fields for report compilation. The body contains human-readable research. + +`{OUTPUT_DIR}` is the per-run Desktop directory set up by the main agent in Step 0 (e.g., `/Users/jay/Desktop/{event_slug}_followup_2026-04-26-1930/`). + +--- + +## Company File — Triage Stub (Step 6 output) + +Every company in `seed_companies.txt` gets one of these. Captures a 1-call, ICP-only assessment. + +```markdown +--- +company_name: OpenAI +website: https://openai.com +product_description: AI lab building safe AGI; ChatGPT, GPT API, ChatGPT Agent +icp_fit_score: 9 +icp_fit_reasoning: AI agents at scale need cloud browser infrastructure; ChatGPT Agent shipped Mar 2026 +triage_only: true +--- + +## Triage Notes +Homepage: "ChatGPT, GPT API, and ChatGPT Agent — AI tools and APIs for everyone." +Score 9 because ChatGPT Agent ships browser-using AI agents at consumer scale — the canonical fit for browser infrastructure. +``` + +**Required fields**: `company_name`, `website`, `icp_fit_score`, `icp_fit_reasoning`, `triage_only: true`. + +--- + +## Company File — Deep Research (Step 8 output) + +When a company's `icp_fit_score >= --icp-threshold`, Step 8's deep research overwrites the triage stub with this richer version. `triage_only` flips to `false`. + +```markdown +--- +company_name: OpenAI +website: https://openai.com +product_description: Foundational AI lab; products span ChatGPT, GPT API, and ChatGPT Agent (browser-using autonomous agent) +industry: AI / Foundation Models +target_audience: Consumers, developers, enterprise +key_features: ChatGPT Agent | GPT-5 API | Sora video | enterprise data residency +icp_fit_score: 9 +icp_fit_reasoning: ChatGPT Agent (Mar 2026) is a browser-using agent at consumer scale — directly addresses the "agents need a browser" wedge. +employee_estimate: 3000+ +funding_info: $11.3B raised; reported $300B valuation 2026 +headquarters: San Francisco, CA +triage_only: false +--- + +## Product +Foundational AI lab. Three product surfaces: ChatGPT (consumer/team chat), GPT API (developer platform), ChatGPT Agent (autonomous browsing agent). + +## Research Findings +- **[high]** ChatGPT Agent launched Mar 2026 — autonomous web-browsing agent (source: openai.com/index/chatgpt-agent) +- **[medium]** Hiring across "Agent Reliability" team — 12 open roles for browser-automation engineers (source: openai.com/careers, search 2026-04) +``` + +**Body sections**: `## Product`, `## Research Findings`. The deep-research file may also include `## Event Relevance` if the team had context from the event. + +--- + +## Person File (Step 9 output) + +Created for each enriched attendee. Combines person research + sales-readiness scoring + drafted follow-up email in one file. + +```markdown +--- +name: Greg Brockman +slug: greg-brockman +email: greg@openai.com +company: OpenAI +company_slug: openai +title: Cofounder and President +links: + linkedin: https://www.linkedin.com/in/thegdb/ + x: https://x.com/gdb + github: https://github.com/gdb + blog: null + podcast: https://lexfridman.com/greg-brockman/ +sales_readiness: HOT +sales_readiness_reason: Cofounder/President at OpenAI (canonical ICP) AND publicly discussed agent reliability (the user's wedge) on Lex Fridman in March 2026 — strong buying signal in the last 90 days. +hook: Lex Fridman conversation on agent reliability (45 min, dropped 2026-03-12) +email_subject: ChatGPT Agent + browser infra at Sessions +email_body: | + Greg — caught your Sessions panel on agent reliability and the "agents are + bottlenecked on the browser" framing was exactly the conversation we keep + having with teams shipping CUA-style products. Browserbase runs the + cloud-browser layer that several ChatGPT-Agent-competitor products are + built on — managed Chrome, stealth, captcha-solving, session recording. + Worth a 20-min walkthrough this week before you scope your next quarter? + Happy to send the durability deck ahead of time. +email_cta: book demo +role_reason: Cofounder; sets infrastructure direction across all OpenAI product surfaces, including the agent runtime story. +icp_fit_score: 9 +icp_fit_reasoning: ChatGPT Agent is the canonical browser-infra customer — see companies/openai.md +enriched_at: 2026-04-26T19:30:00Z +--- + +## Why reach out +- **Why the person**: Cofounder; sets infra direction; specifically called out agent reliability on Lex (Mar 2026) +- **Hook**: Lex Fridman conversation on agent reliability (45 min, dropped 2026-03-12) + +## Public links +- LinkedIn: https://www.linkedin.com/in/thegdb/ +- X: https://x.com/gdb +- GitHub: https://github.com/gdb +- Podcast: https://lexfridman.com/greg-brockman/ + +## Recent activity +- **[high]** Lex Fridman podcast episode on agent reliability, Mar 2026 (source: lexfridman.com/greg-brockman) +- **[medium]** X thread on "the bottleneck for agents is the browser, not the model" — Apr 2026 (source: x.com/gdb) +``` + +**Required frontmatter fields**: `name`, `slug`, `email`, `company`, `links` (object), `sales_readiness`, `sales_readiness_reason`, `email_subject`, `email_body`, `email_cta`. + +**Body sections**: `## Why reach out`, `## Public links`, `## Recent activity` (findings list with confidence levels). + +For COLD attendees, set `email_subject: ""` and `email_body: ""` — the report still emits the file so the user knows they were considered, but no email is drafted. + +--- + +## Field Rules + +### Company files + +- `key_features`: pipe-separated (`|`) list, NOT a JSON array +- `icp_fit_score`: integer 1-10 +- `icp_fit_reasoning`: one line, references specific findings +- `triage_only`: boolean (`true` for stubs, `false` after deep research) +- Filename: `{OUTPUT_DIR}/companies/{slug}.md` where slug is lowercase, hyphenated + +### Person files + +- `links`: YAML object with keys `linkedin`, `x`, `github`, `blog`, `podcast`. Use `null` when not found, not empty string. +- `sales_readiness`: one of `HOT | WARM | NURTURE | COLD` — see `references/email-patterns.md` for the rubric +- `email_body`: 4-6 sentences, multi-line YAML pipe scalar (`email_body: |` then indented). Every email MUST quote or paraphrase a specific finding (recent activity, team note, event context). NEVER fabricate. +- `email_subject`: 5-9 words, specific not generic. NOT "Following up on Sessions". +- `icp_fit_score` is INHERITED from the corresponding `companies/{company_slug}.md` +- Filename: `{OUTPUT_DIR}/people/{slug}.md` where slug is the lowercased + hyphenated person name + +### Both + +- One file per entity. If a subagent encounters a duplicate, OVERWRITE with richer data. + +--- + +## Writing via Bash Heredoc + +Subagents write these files using bash heredoc to avoid security prompts. Use the full literal `{OUTPUT_DIR}` path — no `~` or `$HOME`: + +```bash +cat << 'PERSON_MD' > /Users/jay/Desktop/{event_slug}_followup_2026-04-26-1930/people/greg-brockman.md +--- +name: Greg Brockman +slug: greg-brockman +... +--- + +## Why reach out +... +PERSON_MD +``` + +Use `'PERSON_MD'` (quoted) as the delimiter to prevent shell variable expansion. Use `'COMPANY_MD'` for company files. + +**IMPORTANT**: Write ALL files in a SINGLE Bash call using chained heredocs to minimize permission prompts. One subagent batch (~5 attendees) = one Bash invocation = one permission prompt. + +```bash +cat << 'PERSON_MD' > {OUTPUT_DIR}/people/greg-brockman.md +--- +... +--- +PERSON_MD +cat << 'PERSON_MD' > {OUTPUT_DIR}/people/sam-altman.md +--- +... +--- +PERSON_MD +``` + +Chained heredocs in one bash call. The subagent reports back ONLY a count, never raw content. diff --git a/skills/event-follow-up/references/report-template.html b/skills/event-follow-up/references/report-template.html new file mode 100644 index 0000000..97abeb7 --- /dev/null +++ b/skills/event-follow-up/references/report-template.html @@ -0,0 +1,139 @@ + + + + + +Company Research — {{COMPANY_NAME}} + + + + + + +
+
+
+

{{TITLE}}

+
{{META}}
+
+ + Powered by Browserbase + + +
+ +
+
Companies
{{TOTAL}}
+
Strong Fit (8-10)
{{HIGH_COUNT}}
+
Partial Fit (5-7)
{{MEDIUM_COUNT}}
+
Weak Fit (1-4)
{{LOW_COUNT}}
+
+ +
+
+ Score Distribution + {{HIGH_PCT}}% strong fit +
+
+
+
+
+
+
+ Strong (8-10) + Partial (5-7) + Weak (1-4) +
+
+ + + + + + + + + + + + + {{TABLE_ROWS}} + +
ScoreCompanyProductIndustryFit Reasoning
+
+ + + + + diff --git a/skills/event-follow-up/references/research-patterns.md b/skills/event-follow-up/references/research-patterns.md new file mode 100644 index 0000000..b9bb857 --- /dev/null +++ b/skills/event-follow-up/references/research-patterns.md @@ -0,0 +1,277 @@ + + + + +# Event-Prospecting — Research Patterns + +## Contents +- [Plan→Research→Synthesize](#planresearchsynthesize) — canonical pattern (verbatim from company-research) +- [Self-Research (User's Company)](#self-research-users-company) — done by company-research, consumed here as a profile +- [Target Company Research](#target-company-research) — sub-question templates +- [Finding Format](#finding-format) — schema for accumulated facts +- [Research Loop Rules](#research-loop-rules) — how to stop hallucinating +- [Depth Mode Behavior](#depth-mode-behavior) — quick / deep / deeper +- [Synthesis Instructions](#synthesis-instructions) — turn findings into frontmatter +- [ICP Triage (Step 6 — fast pass)](#icp-triage-step-6--fast-pass) — event-specific +- [Deep Research (Step 8 — full pass)](#deep-research-step-8--full-pass) — event-specific +- [Person Enrichment + Email (Step 9 — attendees at ICP fits only)](#person-enrichment--email-step-9--attendees-at-icp-fits-only) — event-specific + +--- + +## Plan→Research→Synthesize + +This reference defines two research contexts: +1. **Self-Research** — Deep research on the user's own company to build a strong ICP foundation. (For event-follow-up, this is done once by `company-research` and persisted in `profiles/{slug}.json`. Event-follow-up reads the profile at Step 3.) +2. **Target Research** — Research each ICP-fit company using Plan→Research→Synthesize. + +Both use the same 3-phase pattern but with different sub-questions and goals. + +## Self-Research (User's Company) + +This is the most important research in the pipeline. Every downstream decision depends on it. + +### Sub-Questions +- "What does {company} sell and what specific problem does it solve?" +- "Who are {company}'s existing customers? What industries, company sizes, and use cases?" +- "Who are {company}'s competitors and what differentiates them?" +- "What pricing model does {company} use and who is the typical buyer persona?" +- "What use cases and pain points does {company}'s marketing emphasize?" + +### Page Discovery +Discover site pages dynamically — do NOT hardcode paths like `/about` or `/customers`: +1. Fetch `bb fetch --allow-redirects "{company website}/sitemap.xml"` — primary source, has ALL pages +2. Scan sitemap URLs for keywords: `customer`, `case-stud`, `pricing`, `about`, `use-case`, `blog`, `docs`, `industry`, `solution` +3. Optionally fetch `bb fetch --allow-redirects "{company website}/llms.txt"` for page descriptions +4. Pick the 3-5 most relevant URLs from the sitemap and fetch those +5. Sitemap is the source of truth. llms.txt is bonus context but often incomplete. + +### External Research +- Search: `"{company} customers use cases reviews"` +- Search: `"{company} alternatives competitors vs"` +- Fetch 1-2 of the most informative third-party results (G2, blog posts, comparisons) + +### Synthesis Output +From all findings, produce a company profile: +- **Company**: name +- **Product**: what they sell, how it works, key capabilities (2-3 sentences, specific) +- **Existing Customers**: named customers or customer types found +- **Competitors**: who they compete with, key differentiators +- **Use Cases**: broad list of use cases the product serves (NOT tied to one vertical) + +Do NOT include ICP, pitch angle, or sub-verticals in the profile. Those are per-run targeting decisions made in Step 4 after the profile is confirmed. The profile is a general-purpose company fact sheet that works regardless of which vertical you target next. + +### Why This Matters +A thin profile produces generic search queries, weak lead scoring, and cookie-cutter emails. A rich profile with specific customers, competitors, and use cases produces targeted queries, accurate scoring, and emails that reference real pain points. + +--- + +## Target Company Research + +### Sub-Question Templates + +Generate sub-questions from these categories based on the ICP and enrichment fields requested. Not every category applies to every company — pick the most relevant. + +### Priority 1 (Always ask) +- **Product/Market**: "What does {company} sell and who are their customers?" +- **ICP Fit**: "How does {company}'s product/market relate to {sender's ICP description}?" + +### Priority 2 (Ask in deep/deeper) +- **Tech Stack**: "What technologies, frameworks, or infrastructure does {company} use?" +- **Growth Signals**: "Has {company} raised funding, launched products, or expanded recently?" +- **Pain Points**: "What challenges might {company} face that {sender's product} addresses?" + +### Priority 3 (Ask in deeper only) +- **Decision Makers**: "Who leads engineering, product, or growth at {company}?" +- **Competitive Landscape**: "Who are {company}'s competitors and how are they differentiated?" +- **Customers/Case Studies**: "Who are {company}'s notable customers and what results do they highlight?" + +### Search Query Patterns + +For each sub-question, generate 2-3 search query variations: + +``` +# Product/Market +"{company name} what they do" +"{company name} product features customers" + +# Tech Stack +"{company name} tech stack engineering blog" +"{company name} careers software engineer" (job posts reveal stack) + +# Growth Signals +"{company name} funding round 2025 2026" +"{company name} launch announcement" +"{company name} hiring" + +# Pain Points +"{company name} challenges {relevant domain}" +"{company name} {problem sender solves}" + +# Decision Makers +"{company name} VP engineering CTO LinkedIn" +"{company name} head of growth product" +``` + +## Finding Format + +Each finding is a self-contained factual statement tied to a source: + +```json +{ + "subQuestion": "What does Acme sell and who are their customers?", + "fact": "Acme provides checkout optimization for Shopify stores, serving mid-market DTC brands with $5M-$50M revenue", + "sourceUrl": "https://acme.com/about", + "sourceTitle": "About Acme - Checkout Optimization", + "confidence": "high" +} +``` + +**Confidence levels**: +- `high`: Directly stated on the company's own website or official press +- `medium`: Inferred from job postings, third-party articles, or indirect signals +- `low`: Speculative based on industry/category, or from outdated sources + +## Research Loop Rules + +1. **Process sub-questions by priority** — Priority 1 first, then 2, then 3 +2. **3-5 findings per sub-question, then move on** — Don't exhaust a topic +3. **Use parallel tool calls** — Search multiple queries simultaneously when possible +4. **Rephrase, don't retry** — If a search returns poor results, try different keywords +5. **Fetch selectively** — Don't fetch every URL from search results. Pick the 1-2 most relevant based on title and URL +6. **Stop at step limit** — Respect the depth mode's step budget per company +7. **Homepage first** — Always fetch the company's homepage before branching to other pages +8. **Deduplicate findings** — Don't record the same fact twice from different sources + +### Logo / customer-relationship direction (CRITICAL) + +A logo on a company's homepage carries no implicit direction. Do NOT infer a buyer/seller relationship from logo placement alone — the direction is the opposite of what you'd guess in many cases. + +- If `{TARGET}`'s homepage shows **`{USER_COMPANY}`'s logo** in a "trusted by" / "customers" / "loved by" / "powering" section, then **the user is the target's customer**, not the other way around. (Example: Browserbase's logo on Clerk's homepage means Browserbase uses Clerk for auth — Clerk is NOT a Browserbase customer.) +- If `{USER_COMPANY}`'s homepage shows the target's logo in a "customers" section, then the target is the user's customer. +- If neither homepage carries the other's logo, do NOT claim any customer relationship. +- Search results that say "X uses Y" or "X integrates Y" are stronger evidence than logos. Quote the source phrase. +- When unsure of direction, write the relationship neutrally: "Browserbase and Clerk both serve dev-tools/agent ICP — possible co-marketing fit" — NOT "Clerk is a Browserbase customer". + +The user's profile (`profiles/{slug}.json`) lists `existing_customers`. **Only treat a target as an existing customer if its name appears in that array.** Logos and assumptions don't qualify. + +## Depth Mode Behavior + +### Quick Mode (100+ leads) +- **Skip Phase A** — No sub-question decomposition +- **Phase B**: Fetch the company homepage. Run 1-2 supplementary searches if homepage data is thin. +- **Phase C**: Extract available data, score ICP, write email from what's available +- **Budget**: 2-3 total tool calls per company +- **Trade-off**: Fast and cheap, but emails may be less personalized + +### Deep Mode (25-50 leads) +- **Phase A**: Decompose into 2-3 sub-questions (Priority 1 + selected Priority 2) +- **Phase B**: For each sub-question, run 2-3 searches + fetch 1-2 URLs. Target 3-5 findings per sub-question. +- **Phase C**: Synthesize from all findings. ICP reasoning references specific evidence. Email uses the most specific/compelling finding. +- **Budget**: 5-8 total tool calls per company +- **Trade-off**: Good balance of depth and scale + +### Deeper Mode (10-25 leads) +- **Phase A**: Decompose into 4-5 sub-questions (Priority 1 + 2 + selected Priority 3) +- **Phase B**: Research exhaustively. Fetch multiple pages per company (homepage, about, blog, careers, product pages). Target 3-5 findings per sub-question. +- **Phase C**: Synthesize with cited evidence. ICP reasoning is detailed. Email references multiple specific signals. +- **Budget**: 10-15 total tool calls per company +- **Trade-off**: High quality intelligence, but slow and expensive + +## Synthesis Instructions + +After the research loop completes for a company, synthesize findings into the output record: + +### ICP Scoring +Score 1-10 using ALL accumulated findings as evidence: +- **8-10**: Strong match. Multiple high-confidence findings confirm right industry, company stage, and clear pain point alignment. The pitch angle directly addresses a visible need supported by evidence. +- **5-7**: Partial match. Some findings suggest relevance but key signals are missing or low-confidence. Adjacent industry or unclear pain point. +- **1-4**: Weak match. Findings indicate wrong segment, too large/small, or no apparent connection to sender's product. + +Write `icp_fit_reasoning` referencing specific findings: "Series A fintech (from Crunchbase), uses Selenium for scraping (from job posting), expanding to EU market (from blog) — strong fit for browser infrastructure." + +### Email Personalization +Use the **richest, most specific** findings for email context: +- Opening: Use the most concrete finding (a specific product feature, a recent launch, a job posting) +- Bridge: Connect a finding about their challenges/stack to the sender's pitch angle +- If only low-confidence findings exist, keep the email shorter and more general — don't fabricate specificity + +### Enrichment Fields +Map findings to enrichment fields: +- `product_description` → from Product/Market findings +- `industry` → inferred from Product/Market +- `employee_estimate` → from LinkedIn search or careers page findings +- `funding_info` → from Growth Signals findings +- `headquarters` → from company homepage or about page +- `target_audience` → from Product/Market findings +- `key_features` → from product page findings + +If a field has no supporting findings, leave it empty rather than guessing. + +### Anti-Hallucination Rules + +Apply these at synthesis time. They exist because the failure mode — especially on Framer/Next.js landing pages with little server-rendered copy — is for the subagent to pattern-match visual cues onto the sender's ICP and fabricate a plausible-sounding description: + +1. **Typography is not a product.** Never infer `product_description`, `industry`, or `target_audience` from fonts, design system, framework choice (Framer, Next.js, React), or site polish. "Framer-built" and "uses Geist Mono" are observations about tooling, not signals of what the company sells. +2. **No ICP leakage.** If the homepage is thin and external search turns up nothing, do NOT default the target's description toward the sender's ICP. Manufacturing AI ≠ browser automation just because both use AI. +3. **Quote, don't paraphrase from memory.** `product_description` must quote or closely paraphrase a specific phrase from `extract_page.mjs` output (TITLE / META_DESCRIPTION / OG_DESCRIPTION / HEADINGS / BODY) or from an external search result. If no such phrase exists, write `Unknown — homepage content not accessible`. +4. **Cap scores on thin evidence.** If `product_description` is `Unknown`, set `icp_fit_score` ≤ 3 and `icp_fit_reasoning: Insufficient evidence — homepage returned no readable content`. Do not justify a higher score on inferred signals alone. + +--- + +## ICP Triage (Step 6 — fast pass) + +For each company in `seed_companies.txt`, run ONE tool call to fetch the homepage + extract a 1-line product description, then score against the ICP. Output goes to `companies/{slug}.md` with frontmatter: + +```yaml +company_name: OpenAI +website: https://openai.com +product_description: "AI lab building safe AGI for everyone" +icp_fit_score: 9 +icp_fit_reasoning: "AI agents need cloud browser infrastructure at scale; ChatGPT Agent shipped Mar 2026" +triage_only: true # NOT yet deep-researched +``` + +Companies with `icp_fit_score < {threshold}` (default 6) stay as triage stubs and never get deep-researched. Companies above the threshold advance to Step 8. + +**Hard cap: 1 tool call per company.** The only allowed call is `node {SKILL_DIR}/scripts/extract_page.mjs "{company_homepage}"`. Anti-hallucination rule applies in full: if the homepage is JS-rendered and `extract_page.mjs` returns empty BODY, write `product_description: Unknown — homepage content not accessible` and cap the score at 3. Do NOT do a second search to "save" the company — the budget is one call. + +The triage subagent batches its 10 `extract_page.mjs` calls and 10 heredoc writes into a SINGLE Bash invocation using `&&` chaining and pipe-separated heredocs. One Bash call = one permission prompt. + +## Deep Research (Step 8 — full pass) + +Identical to company-research's target research. The ICP-fit companies (typically 20-40% of the seed list) get the full Plan→Research→Synthesize treatment with sub-questions tailored to the event context. + +**Hard cap: 5 tool calls per company.** Budget breakdown for deep mode: +- 1 call: `extract_page.mjs` on the homepage (re-extract; the triage version was scraped down to a 1-liner) +- 2-3 calls: `bb search` for sub-questions from Priority 1 + 2 (product, tech stack, growth signals) +- 1-2 calls: `extract_page.mjs` on the most relevant search results (case study, blog post, careers page) + +Event-context tweaks the sub-questions. Instead of generic "What does {company} do?", the subagent asks "What is {company} doing that's relevant to {EVENT_CONTEXT}?" — the event description (from `event_context.md`) is woven into Priority 2 sub-questions so the research surfaces the most-conversation-worthy facts for the follow-up email. + +The deep-research subagent OVERWRITES the triage stub with the richer file (frontmatter `triage_only: false`). The compile step looks at `triage_only` to decide rendering. + +## Person Enrichment + Email (Step 9 — attendees at ICP fits only) + +Per person at an ICP-fit company: +- `bb search "{name} {company} linkedin"` — verify role + harvest LinkedIn URL (always) +- `bb search "{name} podcast OR talk OR blog 2026"` — last 6 months for hooks (deep+) +- `bb search "{name} github"` — open-source signal (deeper) +- `bb search "{name} site:x.com OR site:twitter.com"` — recent posts (deeper) + +**Hard cap: 4 tool calls per person.** Deep mode runs lanes 1-2 (max 2 calls). Deeper mode runs lanes 1-4 (max 4 calls). Quick mode skips Step 9 entirely. + +Each attendee yields a `people/{slug}.md` containing enrichment + sales-readiness bucket + drafted follow-up email — see `references/example-research.md` for the full schema. Key frontmatter fields: + +- `sales_readiness`: `HOT | WARM | NURTURE | COLD` per the rubric in `references/email-patterns.md` +- `email_subject`: 5-9 words, specific (NOT "Following up on the event") +- `email_body`: 4-6 sentence YAML pipe scalar, anchored on a SPECIFIC finding +- `hook`: 1-sentence summary of the anchoring fact +- `links`: nested object with `linkedin`, `x`, `github`, `blog`, `podcast` + +The `hook` source priority (run sequentially, stop at first hit): +1. **Team notes** (the `notes` field from the input CSV) — most concrete, in-person context +2. **Recent activity** (last 6 months): podcast / talk / blog / GitHub / LinkedIn post — surfaced by lanes 2-4 +3. **Event context** (the `{EVENT_CONTEXT}` block + their attendance) +4. **Company-context**: signal from their company's recent news — pulled from the `companies/{slug}.md` deep-research file + +The hook becomes the anchor of the follow-up email's second sentence. NEVER fabricate a hook — if all lanes return nothing, fall back to event-context and write an honest "saw you at {event}" framing. diff --git a/skills/event-follow-up/references/workflow.md b/skills/event-follow-up/references/workflow.md new file mode 100644 index 0000000..0154584 --- /dev/null +++ b/skills/event-follow-up/references/workflow.md @@ -0,0 +1,430 @@ +# Event-Follow-Up Workflow + +Subagent prompt templates and tool-call governance for every fan-out step in the pipeline. The main agent in `SKILL.md` dispatches Agent batches that load these prompts; each subagent must obey the HARD TOOL-CALL CAPS below or the run is invalidated. + +## Contents +- [Inputs](#inputs) — CSV parse + event context (NOT fanned out; main agent runs these directly) +- [ICP Triage](#icp-triage) — fast company-level scoring (1 call/company hard cap) +- [Deep Research](#deep-research) — full Plan→Research→Synthesize on ICP fits (5 calls/company hard cap) +- [Person Enrichment + Email](#person-enrichment--email) — attendees at ICP-fit companies (4 calls/person hard cap) +- [Compilation](#compilation) — HTML + CSV via `compile_report.mjs` +- [Wave Management](#wave-management) — sizing, parallelism, error handling + +--- + +## Inputs + +CSV parsing + event-context capture are deterministic single-process steps run by the main agent. NOT fanned out. See SKILL.md Steps 1-5 for the orchestrator commands. This section exists only to document the artifacts the downstream subagents consume: + +- `{OUTPUT_DIR}/event_context.md` — user-provided event description (read by Person Enrichment + Email) +- `{OUTPUT_DIR}/promo.json` — optional discount code + bucket scope (read by Person Enrichment + Email) +- `{OUTPUT_DIR}/people.jsonl` — one JSON-encoded attendee per line (read by Step 9 batching) +- `{OUTPUT_DIR}/seed_companies.txt` — deduped, sorted company names (read by Step 6 batching) + +--- + +## ICP Triage + +**HARD TOOL-CALL CAP: 1 tool call per company.** The only allowed call is `extract_page.mjs` on the company homepage. NO follow-up searches, NO sitemap discovery, NO secondary fetches. If the homepage returns thin content, write `Unknown` and cap the score at 3 — that is the correct behavior, not a failure. + +**ENFORCEMENT** — at the start of every Bash call, prepend a comment like `# bb call N/1` so the cap is visible in tool output. If a subagent emits more than `K` calls for a batch of `K` companies, the main agent's compile step will detect the over-budget run from the call log and flag it. + +**Subagent prompt template** — substitute the curly-brace placeholders before dispatching: + +``` +You are an ICP triage subagent for the event-follow-up skill. For each company in your batch, run ONE tool call to fetch the homepage, then score it against the user's ICP and write a triage stub to {OUTPUT_DIR}/companies/{slug}.md. + +CONTEXT: +- User's company: {USER_COMPANY} +- User's product: {USER_PRODUCT} +- ICP description: {ICP_DESCRIPTION} +- Event context: {EVENT_CONTEXT} +- Output directory: {OUTPUT_DIR} ← write company files HERE, full literal path + +COMPANIES TO TRIAGE (one per line — `name|guessed_homepage|slug`): +{COMPANY_LIST} + +The guessed_homepage is a heuristic (`https://{lowercased company name without spaces}.com`). For most companies it's correct. For a few it 404s — that's expected and the fallback is documented in rule 3 below. + +The slug is the canonical filename to write to: `{OUTPUT_DIR}/companies/{slug}.md`. Use it verbatim — do not re-slugify the name yourself or you'll create duplicate files. + +TOOL RULES — CRITICAL, FOLLOW EXACTLY: +1. You may ONLY use the Bash tool. No exceptions. +2. The ONLY allowed extraction call is: + node {SKILL_DIR}/scripts/extract_page.mjs "" --max-chars 2000 +3. HARD TOOL-CALL CAP: ONE call per company. If a homepage returns FETCH_OK: false with empty BODY (e.g. the guessed URL 404s), write product_description: "Unknown — homepage content not accessible" and cap icp_fit_score at 3. DO NOT attempt a second call to "save" the company. +4. ENFORCEMENT — at the start of EVERY Bash call, prepend a comment like `# bb call N/{TOTAL}` where N counts up and TOTAL is the number of companies in your batch. Example for a 10-company batch: + # bb call 1/10 + node {SKILL_DIR}/scripts/extract_page.mjs "https://openai.com" --max-chars 2000 +5. BANNED TOOLS: WebFetch, WebSearch, Write, Read, Glob, Grep — ALL BANNED. Use ONLY Bash. +6. NEVER use ~ or $HOME — full literal paths only. + +ANTI-HALLUCINATION RULES: +- NEVER infer product_description from fonts, framework, or design system. Typography is not a product. +- NEVER let the user's ICP leak into the target's description. If you don't know what the target does, write "Unknown". +- product_description MUST quote or closely paraphrase a phrase from extract_page.mjs output (TITLE / META_DESCRIPTION / OG_DESCRIPTION / HEADINGS / BODY). If none yield a recognizable product statement, write "Unknown — homepage content not accessible" and cap icp_fit_score at 3. + +ICP SCORING RUBRIC: +- 8-10: Strong match. Homepage clearly states a product/audience that aligns with {ICP_DESCRIPTION}. +- 5-7: Partial match. Adjacent industry, OR clear product but unclear pain-point alignment. +- 1-4: Weak match. Wrong segment, or homepage too thin to assess (cap at 3 if Unknown). + +OUTPUT — write ALL company files in a SINGLE Bash call using chained heredocs: + +# bb call 1/{TOTAL} +node {SKILL_DIR}/scripts/extract_page.mjs "{url1}" --max-chars 2000 && \ +# bb call 2/{TOTAL} +node {SKILL_DIR}/scripts/extract_page.mjs "{url2}" --max-chars 2000 && \ +... && \ +cat << 'COMPANY_MD' > {OUTPUT_DIR}/companies/{slug1}.md +--- +company_name: {name1} +website: {url1} +product_description: {description1} +icp_fit_score: {score1} +icp_fit_reasoning: {reasoning1} +triage_only: true +--- + +## Triage Notes +{1-2 sentences citing the homepage phrase that drove the score} +COMPANY_MD +cat << 'COMPANY_MD' > {OUTPUT_DIR}/companies/{slug2}.md +--- +... +--- + +... +COMPANY_MD + +Use 'COMPANY_MD' (quoted) as the heredoc delimiter to prevent shell variable expansion. + +Report back ONLY: "ICP triage batch: {scored}/{total} companies, score distribution: high={N} mid={N} low={N}". +Do NOT return raw homepage content or per-company reasoning to the main conversation. +``` + +--- + +## Deep Research + +**HARD TOOL-CALL CAP: 5 tool calls per company.** Budget breakdown: +- 1 call: `extract_page.mjs` on the homepage +- 2-3 calls: `bb search` for sub-questions (Priority 1 + selected Priority 2) +- 1-2 calls: `extract_page.mjs` on the most relevant search results (case study / blog / careers) + +**ENFORCEMENT** — at the start of every Bash call, prepend `# bb call N/{TOTAL}` where TOTAL is `5 × batch_size`. A 5-company batch caps at 25 total tool calls. The main agent's compile step monitors this from the call log. + +**Subagent prompt template**: + +``` +You are a deep-research subagent for the event-follow-up skill. For each ICP-fit company in your batch, follow the Plan→Research→Synthesize pattern from references/research-patterns.md and OVERWRITE the existing triage stub at {OUTPUT_DIR}/companies/{slug}.md with the deep-research version. + +CONTEXT: +- User's company: {USER_COMPANY} +- User's product: {USER_PRODUCT} +- ICP description: {ICP_DESCRIPTION} +- Event context: {EVENT_CONTEXT} ← description of the event we're following up on +- Output directory: {OUTPUT_DIR} + +COMPANIES TO RESEARCH (one per line, slug|website format): +{COMPANY_LIST} + +TOOL RULES — CRITICAL: +1. You may ONLY use the Bash tool. No exceptions. +2. All searches: bb search "..." --num-results 10 +3. All page extractions: node {SKILL_DIR}/scripts/extract_page.mjs "URL" --max-chars 3000 + (handles JSON envelope, meta tags, JS-render fallback to bb browse) + DO NOT hand-roll a `bb fetch | sed` pipeline. Use raw `bb fetch` only for sitemap.xml / llms.txt. +4. HARD TOOL-CALL CAP: 5 calls per company. Budget: + 1× extract_page on homepage + 2-3× bb search on sub-questions + 1-2× extract_page on the best search result + DO NOT exceed 5 calls per company. If you've burned the budget, synthesize from what you have. +5. ENFORCEMENT — at the start of EVERY Bash call, prepend a comment like `# bb call N/5 (company: {slug})`. Reset N to 1 for each company in the batch. +6. BATCH all writes: write ALL deep-research files in a SINGLE Bash call using chained heredocs. +7. BANNED TOOLS: WebFetch, WebSearch, Write, Read, Glob, Grep — ALL BANNED. +8. NEVER use ~ or $HOME — full literal paths. + +ANTI-HALLUCINATION RULES (same as research-patterns.md): +- Typography is not a product. +- No ICP leakage — if homepage is thin and search yields nothing, write "Unknown" and cap score at 3. +- product_description MUST quote/paraphrase a phrase from extract_page.mjs output or a search result. +- LOGO DIRECTION: a logo on a homepage does NOT establish a customer relationship. If {TARGET}'s homepage shows {USER_COMPANY}'s logo in a "trusted by"/"customers" section, the USER is the TARGET's customer — NOT the other way around. Only call a target an "existing customer" if its name appears in the user profile's `existing_customers` array. Otherwise describe the relationship neutrally (e.g. "shared ecosystem", "possible partnership", "adjacent stack"). + +RESEARCH PATTERN per company (deep mode): + +Phase A — Plan: +Decompose into 2-3 sub-questions. Always include "What does {company} do?" (Priority 1). Add 1-2 from Priority 2 chosen for relevance to {EVENT_CONTEXT} and the user's wedge. EXAMPLE: + - "What does {company} sell and who are their customers?" + - "What is {company} doing in the area covered by {EVENT_CONTEXT} that's relevant to {USER_PRODUCT}?" + - "Has {company} raised funding, launched products, or expanded recently?" (recency-of-buying-signal — feeds the sales-readiness rubric) + +Phase B — Research Loop: +1. # bb call 1/5 — extract_page on homepage +2. # bb call 2/5 — bb search for Priority 1 sub-question +3. # bb call 3/5 — bb search for event-context sub-question +4. # bb call 4/5 — extract_page on the most relevant search result +5. # bb call 5/5 — (optional) one more search OR fetch if budget remains +Accumulate findings: factual statement + source URL + confidence level (high/medium/low). + +Phase C — Synthesize: +1. Score ICP fit 1-10 using the rubric (high-confidence findings lift the score; thin evidence caps at 3). +2. Fill enrichment fields: product_description, industry, target_audience, key_features, employee_estimate, funding_info, headquarters. +3. Reference specific findings in icp_fit_reasoning. Capture any recent buying signals (funding, launch, hiring) explicitly — those drive the per-attendee sales-readiness scoring downstream. + +OUTPUT — overwrite the triage stub. ALL files in a SINGLE Bash call. + +**FORMAT RULES — non-negotiable, parser breaks if violated**: +- Every file MUST have a closing `---` line after the YAML frontmatter, BEFORE the first markdown section. Do NOT skip it. +- All structured data goes in the YAML frontmatter (above the closing `---`). Markdown sections (`## Product`, `## Research Findings`) go AFTER the closing `---`. + +cat << 'COMPANY_MD' > {OUTPUT_DIR}/companies/{slug}.md +--- +company_name: {name} +website: {url} +product_description: {description} +industry: {industry} +target_audience: {audience} +key_features: {feature1} | {feature2} | {feature3} +icp_fit_score: {score} +icp_fit_reasoning: {reasoning, references findings} +employee_estimate: {estimate} +funding_info: {funding} +headquarters: {location} +triage_only: false +recent_signals: {1-line summary of any recent funding/launch/hiring signal — feeds sales-readiness scoring} +--- + +## Product +{2-3 sentences specific, sourced} + +## Research Findings +- **[{confidence}]** {fact} (source: {url}) +- ... +COMPANY_MD + +Report back ONLY: "Deep research batch: {researched}/{total} companies, {findings_count} total findings, avg ICP score {N.N}". +``` + +--- + +## Person Enrichment + Email + +**HARD TOOL-CALL CAP: 4 tool calls per person.** Lanes: +1. `bb search "{name} {company} linkedin"` — always (deep + deeper) +2. `bb search "{name} podcast OR talk OR blog 2026"` — deep + deeper +3. `bb search "{name} github"` — deeper only +4. `bb search "{name} site:x.com OR site:twitter.com"` — deeper only + +Deep mode: lanes 1-2 (max 2 calls/person). Deeper mode: lanes 1-4 (max 4 calls/person). + +**ENFORCEMENT** — every Bash call prepends `# bb call N/{LANES} (person: {slug})`, where LANES is 2 (deep) or 4 (deeper). Reset N to 1 for each person. + +This single subagent pass produces enrichment data AND the personalized follow-up email AND the sales-readiness bucket — all in one file per person. + +**Subagent prompt template**: + +``` +You are a person-enrichment + follow-up-email subagent for the event-follow-up skill. For each attendee in your batch, run 2-4 bb searches, score sales-readiness, draft a personalized follow-up email, and write {OUTPUT_DIR}/people/{slug}.md. + +CONTEXT: +- User's company: {USER_COMPANY} +- User's product: {USER_PRODUCT} +- ICP description: {ICP_DESCRIPTION} +- Existing customers (do NOT pitch as net-new): {EXISTING_CUSTOMERS} +- Event context: {EVENT_CONTEXT} +- Promo code (apply only to listed buckets, else ignore): {PROMO} +- Depth mode: {DEPTH} ← `deep` (2 lanes) or `deeper` (4 lanes) +- Output directory: {OUTPUT_DIR} + +ATTENDEES TO ENRICH (one JSON record per line): +{PEOPLE_BATCH} + +Each record has fields: + { "name": "...", "email": "...", "title": "...", "company": "...", "linkedin": "...", "slug": "...", "notes": "..." } + +The `notes` field (when non-null) is what the team scribbled on the badge-scanner about this person — TREAT AS HIGH-PRIORITY context for the email. + +TOOL RULES — CRITICAL: +1. You may ONLY use the Bash tool. No exceptions. +2. All searches: bb search "..." --num-results 5 +3. HARD TOOL-CALL CAP per person: + deep mode: 2 calls (lanes 1 + 2) + deeper mode: 4 calls (lanes 1 + 2 + 3 + 4) + DO NOT exceed the cap. +4. ENFORCEMENT — at the start of EVERY Bash call, prepend a comment like `# bb call N/{LANES} (person: {slug})`. Reset N to 1 for each person. +5. BATCH all writes: write ALL people files in a SINGLE Bash call using chained heredocs. +6. BANNED TOOLS: WebFetch, WebSearch, Write, Read, Glob, Grep — ALL BANNED. +7. NEVER use ~ or $HOME — full literal paths. + +ANTI-HALLUCINATION RULES: +- The email body MUST quote or paraphrase a SPECIFIC finding from a bb search result, the team `notes` field, or the event context. NEVER fabricate a podcast/talk/blog the person didn't appear in. +- If no public signal exists, fall back to event-context: "saw you at {event}, your team's work on X" — never invent activity. +- A logo on a target's homepage does NOT establish a customer relationship. Only call a target an "existing customer" if their company name is in {EXISTING_CUSTOMERS}. +- If the company IS in {EXISTING_CUSTOMERS}, draft an EXPANSION email (different framing) — NOT a net-new pitch. + +LANE PROMPTS (run only the lanes for your DEPTH): + +Lane 1 (always): + # bb call 1/{LANES} (person: {slug}) + bb search "\"{name}\" \"{company}\" linkedin" --num-results 5 + → harvest LinkedIn URL + verify current title + +Lane 2 (deep + deeper): + # bb call 2/{LANES} (person: {slug}) + bb search "\"{name}\" podcast OR talk OR blog 2026" --num-results 5 + → harvest most-recent activity. Podcast/blog/talk URLs are the best email hooks. + +Lane 3 (deeper only): + # bb call 3/{LANES} (person: {slug}) + bb search "\"{name}\" github" --num-results 5 + +Lane 4 (deeper only): + # bb call 4/{LANES} (person: {slug}) + bb search "\"{name}\" site:x.com OR site:twitter.com" --num-results 5 + +HOOK SOURCE PRIORITY (stop at first hit): +1. Team notes (the `notes` field on the input record) — most concrete, in-person context. +2. Recent activity (lane 2): podcast/blog/talk title from the last 6 months. +3. Event-context (the {EVENT_CONTEXT} block + their attendance). +4. Company-context: pull from {OUTPUT_DIR}/companies/{company_slug}.md `recent_signals` or `icp_fit_reasoning` (read via awk; allowed because it's local — not a tool call). + +SALES-READINESS RUBRIC (assign exactly one): + HOT — Senior at ICP-fit company (icp_fit_score >= 7) AND a buying signal in last 90 days + (public talk on the user's problem space, hiring relevant roles, recent funding/launch, + or direct mention of the user's category). NOT already an existing customer. + WARM — Senior at ICP-fit with no recent buying signal, OR mid-level at ICP-fit with a signal. + NURTURE — IC at ICP-fit, OR mid-level at adjacent (icp_fit_score 4-6), OR senior at adjacent + with no signal. + COLD — outside ICP entirely, OR existing customer (handled separately as expansion), + OR clear non-buyer (intern, recruiter, partnerships-at-non-ICP). + +EMAIL FORMAT (4-6 sentences, see references/email-patterns.md): + - sentence 1: event reference + their attendance, concrete (NOT "great to meet you") + - sentence 2: the specific signal you found (or fall back to team notes / event context) + - sentence 3: wedge — connect their signal to {USER_PRODUCT} + - sentence 4 (optional): proof point or social proof + - sentence 5: CTA matching the bucket: + HOT → "Worth a 20-min call this week?" + WARM → "Sharing a case study — open to a quick chat after?" + NURTURE → "Drop you in the dev newsletter?" or "Wrote a piece you'd find useful" + COLD → no email (write empty subject + body) + - sentence 6 (optional): low-pressure off-ramp. + +If {PROMO} has a code AND this person's bucket is in PROMO.applies_to, weave the code into the email body as a single sentence: "Sessions attendees get {PROMO.description} with {PROMO.code} — valid through {PROMO.expires}." Do NOT add the code to buckets outside PROMO.applies_to. + +OUTPUT — write ALL people files in a SINGLE Bash call using chained heredocs. + +**FORMAT RULES — non-negotiable, parser breaks if violated**: +- Every file MUST have a closing `---` line after the YAML frontmatter. Do NOT skip it. +- `email_subject`, `email_body`, `sales_readiness`, `email_cta` MUST be YAML frontmatter fields — NEVER markdown sections like `## Email`. +- `links` MUST be a nested YAML object. NEVER flat top-level keys. +- `email_body` is a YAML pipe scalar (`email_body: |` then indented multi-line text). + +cat << 'PERSON_MD' > {OUTPUT_DIR}/people/{slug}.md +--- +name: {full name} +slug: {slug} +email: {email} +company: {company} +company_slug: {company_slug} +title: {title} +links: + linkedin: {url or null} + x: {url or null} + github: {url or null} + blog: {url or null} + podcast: {url or null} +sales_readiness: {HOT | WARM | NURTURE | COLD} +sales_readiness_reason: {1 sentence — why this bucket; reference the buying signal or its absence} +hook: {1 sentence, sourced — the email's anchoring fact} +email_subject: {short subject line, 5-9 words, specific not generic} +email_body: | + {sentence 1: event reference + attendance} + {sentence 2: the specific signal} + {sentence 3: wedge tie-in to {USER_PRODUCT}} + {sentence 4: proof point} + {sentence 5: CTA} +email_cta: {book demo | share resource | stay in touch | no follow-up} +role_reason: {why this person matters at the company} +icp_fit_score: {inherited from companies/{company_slug}.md} +icp_fit_reasoning: {inherited} +enriched_at: {ISO timestamp} +--- + +## Why reach out +- **Why the person**: {role_reason restated as 1 line} +- **Hook**: {hook, with source URL inline} + +## Public links +{bullet list of every harvested link, one per line} + +## Recent activity +- **[{confidence}]** {finding} (source: {url}) +- ... +PERSON_MD + +For COLD attendees, write empty strings for email_subject and email_body — but still emit the file with sales_readiness: COLD and sales_readiness_reason set so the report knows they were considered. + +Report back ONLY: "Enrichment batch: {enriched}/{total} attendees, distribution: HOT={N} WARM={N} NURTURE={N} COLD={N}". +``` + +--- + +## Compilation + +After all subagents complete, the main agent runs the compile step ONCE. NOT fanned out. From SKILL.md Step 10: + +```bash +node {SKILL_DIR}/scripts/compile_report.mjs {OUTPUT_DIR} --open +``` + +The compile script: +1. Reads every `companies/*.md` and `people/*.md` +2. Joins people to their company files (via `company_slug` frontmatter) +3. Groups people by `sales_readiness` (HOT → WARM → NURTURE → COLD), then by company ICP score within each bucket +4. Renders: + - `index.html` — attendees grouped by sales-readiness, each card showing the email subject + body + Copy buttons (the primary deliverable) + - `people.html` — filterable attendee list (alternate view, with chips for sales-readiness, role, company) + - `companies.html` — ICP-ranked company table with attendees expandable per row + - `results.csv` — one row per person with `email_subject`, `email_body`, `sales_readiness` columns for direct CRM import +5. Opens `index.html` in the default browser (`--open` flag) + +The compile step does NOT mutate any `.md` files. All HTML is generated fresh from the markdown sources every run, so re-running compile after a manual edit to a `.md` file regenerates the report. + +--- + +## Wave Management + +### Key Principle: Maximize Parallelism, Minimize Prompts + +Launch as many subagents as possible in a single Agent fan-out (up to ~6 Agent calls per message). Each subagent MUST batch all its Bash operations into a single call to minimize permission prompts. One subagent batch = one Bash call = one permission prompt. + +### Sizing Formula + +``` +seed_companies = wc -l seed_companies.txt +icp_fits = wc -l icp_fits.txt (typically 20-40% of seed) +people_to_enrich = wc -l _people_to_enrich.jsonl (typically 1.5-2.5× icp_fits) + +triage_subagents = ceil(seed_companies / 10) # 10 companies/subagent, 1 call each +deep_subagents = ceil(icp_fits / 5) # 5 companies/subagent, 5 calls each +person_subagents = ceil(people_to_enrich / 5) # 5 people/subagent, 2-4 calls each +``` + +For Stripe Sessions (99 seed → ~30 ICP fits → ~50 people): +- Triage: 10 subagents × 10 calls = 100 calls (matches the cost model: 99 calls) +- Deep research: 6 subagents × 25 calls = 150 calls +- Person enrichment: 10 subagents × ~10 calls = 100 calls +- Total: ~350 tool calls, matches the design doc cost model. + +### Wave Cadence + +Dispatch all subagents for a given step in **a single Agent fan-out message** (up to 6 per message; if more needed, run a second wave after the first completes). Do NOT serialize subagents that can run in parallel. + +### Error Handling + +- If a single subagent fails, log the error and continue. The compile step ignores missing files gracefully. +- If >50% of subagents in a wave fail, pause and surface to the user before continuing. +- If `extract_page.mjs` returns FETCH_OK: false with empty BODY, the triage subagent should write `product_description: Unknown — homepage content not accessible` and cap score at 3 (NOT skip the company — the file must exist for compile to render the row). +- The HARD TOOL-CALL CAP is non-negotiable. If a subagent exceeds its budget, the run is invalidated for that batch (compile step warns; user can re-dispatch). diff --git a/skills/event-follow-up/scripts/__fixtures__/example.csv b/skills/event-follow-up/scripts/__fixtures__/example.csv new file mode 100644 index 0000000..9b5aecd --- /dev/null +++ b/skills/event-follow-up/scripts/__fixtures__/example.csv @@ -0,0 +1,10 @@ +First Name,Last Name,Email,Company,Job Title,LinkedIn,Notes +Greg,Brockman,greg@openai.com,OpenAI,Cofounder and President,https://www.linkedin.com/in/thegdb/,Spoke about ChatGPT Agent +Sam,Smith,sam@browserbase.com,Browserbase,Engineer,,Internal team member +Adam,Sommer,adam@ramp.com,Ramp,Director of Product,https://www.linkedin.com/in/adamsommer/,Existing customer +Kate,Jensen,kate@anthropic.com,Anthropic,Head of Americas,,Computer Use product +,,,,, +Cristina,Cordova,cristina@linear.com,Linear,COO,, +Jane,Doe,jane@gmail.com,,Founder,,No company column for personal email +Devang,Kothari,devang@wizard.com,Wizard,CTO,,AI shopping assistant +"O'Brien, Patrick",,patrick@example.com,Example Inc,"VP, Sales",,Quoted name and title with commas diff --git a/skills/event-follow-up/scripts/compile_report.mjs b/skills/event-follow-up/scripts/compile_report.mjs new file mode 100644 index 0000000..d770a03 --- /dev/null +++ b/skills/event-follow-up/scripts/compile_report.mjs @@ -0,0 +1,894 @@ +#!/usr/bin/env node + +// Compiles per-company + per-person markdown research files into a follow-up +// HTML report (index.html) grouped by sales-readiness, plus people.html and +// companies.html alternate views. +// +// Reads: +// /companies/*.md — one per company (frontmatter + body) +// /people/*.md — one per attendee (frontmatter + body, with sales_readiness + email) +// +// Writes: +// /index.html — attendees grouped by sales-readiness (HOT → WARM → NURTURE → COLD) +// /people.html — filterable attendee list (chips: sales-readiness, ICP band, role, company) +// /companies.html — ICP-ranked company table with expandable attendees +// /companies/.html — individual company research pages +// /results.csv — one row per attendee (name, email, company, sales_readiness, email_subject, email_body, ...) +// +// Usage: node compile_report.mjs [--template ] [--open] + +import { readdirSync, readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const args = process.argv.slice(2); + +if (args.includes('--help') || args.includes('-h') || args.length === 0) { + console.error(`Usage: node compile_report.mjs [--template ] [--open] + +Reads companies/*.md and people/*.md from , generates: + - index.html — people grouped by company (ranked by company ICP) + - people.html — filterable people list (chips: company, role, ICP band) + - companies.html — ICP-ranked company table with expandable attendees + - companies/.html — individual company research pages + - results.csv — scored spreadsheet + +Options: + --template Path to report-template.html (default: auto-detect) + --open Open index.html in browser after generation + --help, -h Show this help message`); + process.exit(args.includes('--help') || args.includes('-h') ? 0 : 1); +} + +const dir = args[0]; +const shouldOpen = args.includes('--open'); +const templateIdx = args.indexOf('--template'); +let templatePath = templateIdx !== -1 ? args[templateIdx + 1] : null; + +// Auto-detect template +if (!templatePath) { + const candidates = [ + join(__dirname, '..', 'references', 'report-template.html'), + join(__dirname, 'report-template.html'), + ]; + templatePath = candidates.find(p => existsSync(p)); + if (!templatePath) { + console.error('Error: Could not find report-template.html. Use --template to specify path.'); + process.exit(1); + } +} + +const template = readFileSync(templatePath, 'utf-8'); + +// ----- Frontmatter / body parsing (shared) --------------------------------- + +function parseFrontmatter(content) { + // Tolerant frontmatter match: prefer closing ---, but if a subagent forgot it, + // fall back to stopping at the first markdown heading (e.g. ## Product) so the + // file still parses instead of vanishing from the report. + const fmMatch = content.match(/^---\n([\s\S]*?)(?:\n---\s*\n|\n(?=## ))/); + if (!fmMatch) return null; + const fields = {}; + const lines = fmMatch[1].split('\n'); + let i = 0; + while (i < lines.length) { + const line = lines[i]; + // Multi-line YAML pipe scalar: key: | + // line one + // line two + const pipeMatch = line.match(/^([a-zA-Z_][\w]*)\s*:\s*\|\s*$/); + if (pipeMatch) { + const key = pipeMatch[1]; + const buf = []; + i++; + while (i < lines.length && /^\s{2,}/.test(lines[i])) { + buf.push(lines[i].replace(/^\s{2}/, '')); + i++; + } + fields[key] = buf.join('\n').trim(); + continue; + } + // Nested block (e.g. links: with indented children) + const nestedHeadMatch = line.match(/^([a-zA-Z_][\w]*)\s*:\s*$/); + if (nestedHeadMatch && i + 1 < lines.length && /^\s{2,}\S/.test(lines[i + 1])) { + const key = nestedHeadMatch[1]; + const child = {}; + i++; + while (i < lines.length && /^\s{2,}\S/.test(lines[i])) { + const c = lines[i].trim(); + const idx = c.indexOf(':'); + if (idx > 0) { + const ck = c.slice(0, idx).trim(); + const cv = c.slice(idx + 1).trim().replace(/^["']|["']$/g, ''); + child[ck] = (cv === 'null' || cv === '') ? null : cv; + } + i++; + } + fields[key] = child; + continue; + } + const idx = line.indexOf(':'); + if (idx > 0) { + const key = line.slice(0, idx).trim(); + const val = line.slice(idx + 1).trim().replace(/^["']|["']$/g, ''); + if (key) fields[key] = val; + } + i++; + } + return fields; +} + +function parseBody(content) { + // Mirror parseFrontmatter's tolerance — body starts after closing --- if present, + // else at the first ## heading. + const closed = content.match(/^---\n[\s\S]*?\n---\s*\n([\s\S]*)/); + if (closed) return closed[1].trim(); + const fallback = content.match(/^---\n[\s\S]*?\n(## [\s\S]*)/); + return fallback ? fallback[1].trim() : ''; +} + +// Pull a markdown section's content given its heading text. Used as a fallback when +// person-enrichment subagents wrote hook/dm_opener/etc. as ## sections instead of YAML. +function extractSection(body, heading) { + if (!body) return null; + const escaped = heading.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const re = new RegExp(`^##\\s+${escaped}\\s*\\n+([\\s\\S]*?)(?=\\n##\\s|$)`, 'im'); + const m = body.match(re); + return m ? m[1].trim() : null; +} + +function escapeHtml(str) { + return (str || '').toString().replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"'); +} + +function escapeAttr(str) { + return escapeHtml(str).replace(/\n/g, ' '); +} + +function scoreClass(score) { + const s = parseInt(score) || 0; + if (s >= 8) return 'high'; + if (s >= 5) return 'medium'; + return 'low'; +} + +function icpBand(score) { + const s = parseInt(score) || 0; + if (s >= 8) return 'high'; + if (s >= 6) return 'mid'; + return 'low'; +} + +function slugify(s) { + return (s || '').toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, ''); +} + +function roleBucket(title) { + const t = (title || '').toLowerCase(); + if (/(ceo|founder|co-?founder|president|chief)/.test(t)) return 'Founder/CXO'; + if (/(vp|vice president|head of|director)/.test(t)) return 'VP/Director'; + if (/(engineer|developer|programmer|architect|sre|devops)/.test(t)) return 'Engineering'; + if (/(product|pm|product manager)/.test(t)) return 'Product'; + if (/(design|ux|ui)/.test(t)) return 'Design'; + if (/(market|growth|content)/.test(t)) return 'Marketing'; + if (/(sales|account|revenue|gtm)/.test(t)) return 'Sales/GTM'; + if (/(research|scientist|ml|ai)/.test(t)) return 'Research/AI'; + return 'Other'; +} + +function mdToHtml(md) { + const lines = md.split('\n'); + const out = []; + let inList = false; + let paraLines = []; + + function flushPara() { + if (paraLines.length > 0) { + let text = escapeHtml(paraLines.join(' ').trim()); + text = text.replace(/\*\*\[(\w+)\]\*\*/g, '[$1]'); + text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); + if (text) out.push(`

${text}

`); + paraLines = []; + } + } + + function closeList() { + if (inList) { out.push(''); inList = false; } + } + + for (const line of lines) { + const trimmed = line.trim(); + + if (!trimmed) { + flushPara(); + closeList(); + continue; + } + + if (trimmed.startsWith('## ')) { + flushPara(); closeList(); + out.push(`

${escapeHtml(trimmed.slice(3))}

`); + continue; + } + if (trimmed.startsWith('### ')) { + flushPara(); closeList(); + out.push(`

${escapeHtml(trimmed.slice(4))}

`); + continue; + } + + if (trimmed.startsWith('- ')) { + flushPara(); + if (!inList) { out.push('
    '); inList = true; } + let text = escapeHtml(trimmed.slice(2)); + text = text.replace(/\*\*\[(\w+)\]\*\*/g, '[$1]'); + text = text.replace(/\*\*([^*]+)\*\*/g, '$1'); + out.push(`
  • ${text}
  • `); + continue; + } + + closeList(); + paraLines.push(trimmed); + } + + flushPara(); + closeList(); + return out.join('\n'); +} + +// ----- Read companies + people -------------------------------------------- + +function readMdDir(p) { + if (!existsSync(p)) return []; + let entries = []; + try { entries = readdirSync(p); } catch { return []; } + return entries.filter(f => f.endsWith('.md')).sort().map(f => { + const content = readFileSync(join(p, f), 'utf-8'); + const fields = parseFrontmatter(content); + if (!fields) return null; + const body = parseBody(content); + const slug = f.replace('.md', ''); + return { ...fields, body, slug, file: f }; + }).filter(Boolean); +} + +const companiesDir = join(dir, 'companies'); +let companies = readMdDir(companiesDir); + +// Legacy fallback: top-level *.md files = companies (company-research's format) +if (companies.length === 0) { + companies = readMdDir(dir); +} + +const peopleDir = join(dir, 'people'); +const people = readMdDir(peopleDir); + +if (companies.length === 0 && people.length === 0) { + console.error(`No .md files found in ${dir} (looked in companies/, people/, and top-level)`); + process.exit(1); +} + +// Sort companies by ICP score descending +companies.sort((a, b) => (parseInt(b.icp_fit_score) || 0) - (parseInt(a.icp_fit_score) || 0)); + +// Deduplicate companies by normalized name +const seen = new Map(); +for (const c of companies) { + const name = (c.company_name || '').toLowerCase().replace(/[,\s]+(inc|llc|ltd|corp|co)\.?$/i, '').trim(); + if (!name) continue; + if (!seen.has(name)) seen.set(name, c); +} +const deduped = [...seen.values()]; + +// Build company lookup: slug → company, name(lowered) → company +const companyBySlug = new Map(); +const companyByName = new Map(); +for (const c of deduped) { + if (c.slug) companyBySlug.set(c.slug, c); + if (c.company_name) companyByName.set(c.company_name.toLowerCase().trim(), c); +} + +function resolveCompany(person) { + if (person.company_slug && companyBySlug.has(person.company_slug)) return companyBySlug.get(person.company_slug); + if (person.company) { + const k = person.company.toLowerCase().trim(); + if (companyByName.has(k)) return companyByName.get(k); + const slugGuess = slugify(person.company); + if (companyBySlug.has(slugGuess)) return companyBySlug.get(slugGuess); + } + return null; +} + +// Augment each person with effective company + score for sorting +for (const p of people) { + const comp = resolveCompany(p); + p._company = comp; + // Effective ICP: company score wins (per the plan), else person frontmatter, else -1 (last) + const cs = comp ? parseInt(comp.icp_fit_score) : NaN; + const ps = parseInt(p.icp_fit_score); + p._effectiveScore = !isNaN(cs) ? cs : (!isNaN(ps) ? ps : -1); +} + +// Sort people: sales-readiness bucket (HOT → WARM → NURTURE → COLD), then ICP +// score desc within each bucket, then name. Buckets with no readiness field +// default to COLD. +const READINESS_ORDER = { HOT: 0, WARM: 1, NURTURE: 2, COLD: 3 }; +people.sort((a, b) => { + const ra = READINESS_ORDER[(a.sales_readiness || 'COLD').toUpperCase()] ?? 3; + const rb = READINESS_ORDER[(b.sales_readiness || 'COLD').toUpperCase()] ?? 3; + if (ra !== rb) return ra - rb; + if (b._effectiveScore !== a._effectiveScore) return b._effectiveScore - a._effectiveScore; + return (a.name || '').localeCompare(b.name || ''); +}); + +// ----- Stats -------------------------------------------------------------- + +const scores = deduped.map(c => parseInt(c.icp_fit_score) || 0); +const high = scores.filter(s => s >= 8).length; +const medium = scores.filter(s => s >= 5 && s < 8).length; +const low = scores.filter(s => s < 5).length; +const total = deduped.length; +const highPct = total > 0 ? Math.round((high / total) * 100) : 0; +const mediumPct = total > 0 ? Math.round((medium / total) * 100) : 0; +const lowPct = total > 0 ? 100 - highPct - mediumPct : 0; + +const dirName = dir.split('/').filter(Boolean).pop() || 'event'; +const title = dirName.replace(/_/g, ' ').replace(/-/g, ' ').replace(/\b\w/g, c => c.toUpperCase()); + +// ----- Person card render -------------------------------------------------- + +function initials(name) { + return (name || '?').split(/\s+/).filter(Boolean).slice(0, 2).map(w => w[0].toUpperCase()).join(''); +} + +const READINESS_LABELS = { + HOT: { label: 'HOT', emoji: '🔥', desc: 'book a meeting' }, + WARM: { label: 'WARM', emoji: '🌡️', desc: 'qualify in nurture' }, + NURTURE: { label: 'NURTURE', emoji: '🌱', desc: 'educational content' }, + COLD: { label: 'COLD', emoji: '❄️', desc: 'skip / no follow-up' }, +}; + +function readinessBucket(person) { + const r = (person.sales_readiness || '').toString().toUpperCase().trim(); + if (READINESS_LABELS[r]) return r; + return 'COLD'; +} + +function renderPersonCard(person, company) { + const c = company || {}; + const links = (person.links && typeof person.links === 'object') ? person.links : { + linkedin: person.linkedin || null, + x: person.x || person.twitter || null, + github: person.github || null, + blog: person.blog || null, + podcast: person.podcast || null, + }; + const linkPills = ['linkedin', 'x', 'github', 'blog', 'podcast'] + .filter(k => links[k]) + .map(k => `${k.toUpperCase()}`) + .join(' '); + + const score = c.icp_fit_score || person.icp_fit_score || '?'; + const band = icpBand(score); + const bucket = readinessBucket(person); + const hook = person.hook || extractSection(person.body, 'Hook') || ''; + const roleReason = person.role_reason || extractSection(person.body, 'Why the person') || ''; + const readinessReason = person.sales_readiness_reason || ''; + + // Email fields with body-section fallback for tolerance to subagent format drift. + const emailSubject = person.email_subject || extractSection(person.body, 'Email Subject') || ''; + const emailBody = person.email_body || extractSection(person.body, 'Email') || extractSection(person.body, 'Email Body') || ''; + const photo = person.image + ? `${escapeHtml(person.name || '')}` + : `
    ${escapeHtml(initials(person.name))}
    `; + + const emailBlock = (emailSubject || emailBody) ? ` + ` : (bucket === 'COLD' ? `` : ''); + + return `
    + ${photo} +
    +
    +

    ${escapeHtml(person.name || person.slug)}

    +
    + ${READINESS_LABELS[bucket].emoji} ${bucket} + ICP ${escapeHtml(String(score))} +
    +
    +
    ${escapeHtml(person.title || '')}${person.title && person.company ? ' · ' : ''}${escapeHtml(person.company || '')}${person.email ? ` · ${escapeHtml(person.email)}` : ''}
    + ${linkPills ? `` : ''} +
      + ${readinessReason ? `
    • Why ${bucket}: ${escapeHtml(readinessReason)}
    • ` : ''} + ${roleReason ? `
    • Role: ${escapeHtml(roleReason)}
    • ` : ''} + ${hook ? `
    • Hook: ${escapeHtml(hook)}
    • ` : ''} +
    + ${emailBlock} +
    +
    `; +} + +// ----- Shared CSS for the event-follow-up UI ------------------------------ + +const eventCss = ` + .nav-bar { display:flex; gap:0.5rem; margin-bottom:1.25rem; font-size:0.875rem; } + .nav-bar a { padding:0.4rem 0.85rem; border:1px solid var(--border); border-radius:4px; background:var(--card); color:var(--muted); font-weight:500; text-decoration:none; } + .nav-bar a.active { background:var(--brand); color:#fff; border-color:var(--brand); } + .filter-bar { display:flex; gap:0.75rem; flex-wrap:wrap; margin-bottom:1.25rem; align-items:center; } + .filter-group { display:flex; gap:0.4rem; flex-wrap:wrap; align-items:center; padding:0.4rem 0.6rem; background:var(--card); border:1px solid var(--border); border-radius:4px; } + .filter-group .label { font-size:0.7rem; color:var(--muted); text-transform:uppercase; letter-spacing:0.05em; font-weight:600; margin-right:0.25rem; } + .chip { display:inline-block; padding:0.2rem 0.6rem; border:1px solid var(--border); border-radius:999px; background:#fafafa; font-size:0.7rem; color:var(--muted); cursor:pointer; user-select:none; } + .chip.active { background:var(--brand); color:#fff; border-color:var(--brand); } + .chip:hover { border-color:var(--brand); } + .person-grid { display:flex; flex-direction:column; gap:0.75rem; } + .person-card { background:var(--card); border:1px solid var(--border); border-radius:6px; padding:1rem 1.1rem; display:flex; flex-direction:row; gap:1rem; align-items:stretch; } + .person-card.hidden { display:none; } + .person-card .photo { width:96px; height:96px; flex:0 0 96px; border-radius:6px; object-fit:cover; background:#f0eeec; } + .person-card .photo-placeholder { display:flex; align-items:center; justify-content:center; font-weight:700; font-size:1.5rem; color:var(--muted); letter-spacing:0.04em; } + .card-body { flex:1; min-width:0; display:flex; flex-direction:column; gap:0.45rem; } + .card-header { display:flex; justify-content:space-between; align-items:flex-start; gap:0.5rem; } + .card-header h3 { font-size:1rem; font-weight:600; color:var(--black); margin:0; } + .company-groups { display:flex; flex-direction:column; gap:1.5rem; } + .company-group { background:transparent; } + .company-header { display:flex; flex-direction:column; gap:0.25rem; padding:0.5rem 0.1rem 0.75rem; border-bottom:1px solid var(--border); margin-bottom:0.75rem; } + .company-header-row { display:flex; align-items:center; gap:0.6rem; } + .company-header h2 { font-size:1.05rem; font-weight:600; color:var(--black); margin:0; } + .company-header .company-meta { font-size:0.75rem; color:var(--muted); margin:0; } + .company-header .company-fit { font-size:0.8125rem; color:var(--text); margin:0.15rem 0 0; } + .company-header a { color:var(--brand); text-decoration:none; } + .company-header a:hover { text-decoration:underline; } + .company-people { display:flex; flex-direction:column; gap:0.6rem; } + .readiness-groups { display:flex; flex-direction:column; gap:1.75rem; } + .readiness-group { background:transparent; } + .readiness-header { padding:0.5rem 0.1rem 0.75rem; border-bottom:2px solid var(--border); margin-bottom:0.85rem; } + .readiness-header-row { display:flex; align-items:baseline; gap:0.6rem; flex-wrap:wrap; } + .readiness-header h2 { font-size:1.1rem; font-weight:700; color:var(--black); margin:0; letter-spacing:0.02em; } + .readiness-header .readiness-count { font-size:0.8125rem; color:var(--muted); } + .readiness-people { display:flex; flex-direction:column; gap:0.6rem; } + .badges { display:flex; gap:0.4rem; align-items:center; flex-shrink:0; } + .readiness-badge { font-size:0.7rem; font-weight:700; padding:2px 8px; border-radius:3px; white-space:nowrap; letter-spacing:0.04em; } + .readiness-badge.readiness-HOT { background:rgba(240,54,3,0.12); color:#c4410d; } + .readiness-badge.readiness-WARM { background:rgba(244,186,65,0.18); color:#9a7520; } + .readiness-badge.readiness-NURTURE { background:rgba(144,201,77,0.16); color:#5a8a1a; } + .readiness-badge.readiness-COLD { background:rgba(81,79,79,0.10); color:var(--muted); } + .email-block { background:#fafafa; border:1px solid var(--border); border-radius:4px; padding:0.6rem 0.75rem; margin-top:0.4rem; font-size:0.8125rem; } + .email-block .email-subject { font-weight:600; margin-bottom:0.3rem; color:var(--black); } + .email-block .email-subject .label { font-weight:500; color:var(--muted); margin-right:0.25rem; } + .email-block .email-body { white-space:normal; line-height:1.5; color:var(--text); } + .email-block.email-skip { color:var(--muted); font-style:italic; } + .email-block .card-actions { margin-top:0.5rem; padding-top:0; } + .email-block .btn-mailto { font:inherit; font-size:0.75rem; font-weight:600; padding:0.4rem 0.7rem; border-radius:4px; border:1px solid var(--border); background:var(--card); color:var(--text); cursor:pointer; text-decoration:none; } + .email-block .btn-mailto:hover { background:var(--brand); color:#fff; border-color:var(--brand); } + @media (max-width: 640px) { + .person-card { flex-direction:column; } + .person-card .photo { width:80px; height:80px; flex-basis:80px; } + } + .icp-badge { font-size:0.7rem; font-weight:700; padding:2px 8px; border-radius:3px; white-space:nowrap; } + .icp-badge.icp-high { background:rgba(144,201,77,0.14); color:#5a8a1a; } + .icp-badge.icp-mid { background:rgba(244,186,65,0.14); color:#9a7520; } + .icp-badge.icp-low { background:rgba(240,54,3,0.10); color:var(--low); } + .card-meta { font-size:0.8125rem; color:var(--muted); } + .card-links { display:flex; flex-wrap:wrap; gap:0.3rem; } + .link-pill { font-size:0.7rem; font-weight:600; padding:2px 8px; border-radius:3px; text-decoration:none; border:1px solid var(--border); color:var(--text); background:#fafafa; letter-spacing:0.04em; } + .link-pill:hover { background:var(--brand); color:#fff; border-color:var(--brand); } + .card-why { list-style:none; margin:0; padding:0; display:flex; flex-direction:column; gap:0.3rem; font-size:0.8125rem; color:var(--text); } + .card-why li { line-height:1.45; } + .card-why strong { color:var(--black); font-weight:600; } + .card-actions { display:flex; gap:0.5rem; margin-top:auto; padding-top:0.5rem; } + .card-actions button { font:inherit; font-size:0.75rem; font-weight:600; padding:0.4rem 0.7rem; border-radius:4px; border:1px solid var(--border); background:var(--card); color:var(--text); cursor:pointer; } + .card-actions button:hover { background:var(--brand); color:#fff; border-color:var(--brand); } + .card-actions button.copied { background:var(--high); color:#fff; border-color:var(--high); } + details.attendees { margin-top:0.4rem; } + details.attendees summary { cursor:pointer; color:var(--brand); font-size:0.8125rem; font-weight:500; } + details.attendees ul { margin:0.4rem 0 0 1rem; padding:0; list-style:disc; } + details.attendees li { font-size:0.8125rem; color:var(--text); margin-bottom:0.2rem; } +`; + +const clipboardScript = ` +`; + +// ----- Person grid + filter chips ----------------------------------------- + +function renderPeopleGrid(personList) { + if (personList.length === 0) { + return '

    No people found.

    '; + } + return `
    +${personList.map(p => renderPersonCard(p, p._company)).join('\n')} +
    `; +} + +// Index page: attendees grouped by sales-readiness (HOT → WARM → NURTURE → COLD), +// then by company ICP score desc within each bucket. Empty buckets are skipped. +function renderGroupedByReadiness(personList) { + if (personList.length === 0) { + return '

    No people found.

    '; + } + const order = ['HOT', 'WARM', 'NURTURE', 'COLD']; + const buckets = Object.fromEntries(order.map(b => [b, []])); + for (const p of personList) { + buckets[readinessBucket(p)].push(p); + } + // Within each bucket, sort by company ICP score desc, then by name. + for (const b of order) { + buckets[b].sort((a, b2) => { + const sa = a._company ? (parseInt(a._company.icp_fit_score) || 0) : -1; + const sb = b2._company ? (parseInt(b2._company.icp_fit_score) || 0) : -1; + if (sb !== sa) return sb - sa; + return (a.name || '').localeCompare(b2.name || ''); + }); + } + + const sections = order + .filter(bucket => buckets[bucket].length > 0) + .map(bucket => { + const members = buckets[bucket]; + const meta = READINESS_LABELS[bucket]; + return `
    +
    +
    +

    ${meta.emoji} ${meta.label}

    + ${members.length} attendee${members.length === 1 ? '' : 's'} · ${meta.desc} +
    +
    +
    + ${members.map(p => renderPersonCard(p, p._company)).join('\n')} +
    +
    `; + }); + + return `
    \n${sections.join('\n')}\n
    `; +} + +function uniqValues(list, fn) { + return [...new Set(list.map(fn).filter(Boolean))].sort(); +} + +// people.html filter chips: ICP band, role bucket, company. +// Activating a chip applies a single-value filter against the matching +// data-* attribute on each .person-card. Click handlers are in clipboardScript. +function renderFilterBar(personList) { + const compNames = uniqValues(personList, p => p.company); + const roles = uniqValues(personList, p => roleBucket(p.title)); + const readinessOrder = ['HOT', 'WARM', 'NURTURE', 'COLD']; + const bands = ['high', 'mid', 'low']; + + const chip = (val, label) => `${escapeHtml(label)}`; + + const bandLabels = { high: 'High (8-10)', mid: 'Mid (6-7)', low: 'Low (1-5)' }; + + return `
    +
    + Sales + ${chip('', 'All')} + ${readinessOrder.map(r => chip(r, `${READINESS_LABELS[r].emoji} ${r}`)).join(' ')} +
    +
    + ICP + ${chip('', 'All')} + ${bands.map(b => chip(b, bandLabels[b])).join(' ')} +
    +
    + Role + ${chip('', 'All')} + ${roles.map(r => chip(r, r)).join(' ')} +
    +
    + Company + ${chip('', 'All')} + ${compNames.map(c => chip(c.toLowerCase(), c)).join(' ')} +
    +
    `; +} + +// ----- Companies table with attendees expandable --------------------------- + +function renderCompaniesTable() { + // Group people by company slug or name (lowered) so each row can show its attendees. + const byCompany = new Map(); + for (const p of people) { + const key = p._company ? (p._company.slug || (p._company.company_name || '').toLowerCase()) : null; + if (!key) continue; + if (!byCompany.has(key)) byCompany.set(key, []); + byCompany.get(key).push(p); + } + + return deduped.map(c => { + const sc = scoreClass(c.icp_fit_score); + const hasDetail = c.body && c.body.length > 50; + const nameHtml = hasDetail + ? `${escapeHtml(c.company_name)}` + : escapeHtml(c.company_name); + const websiteHtml = c.website + ? `
    ${escapeHtml(c.website.replace(/^https?:\/\/(www\.)?/, ''))}` + : ''; + const key = c.slug || (c.company_name || '').toLowerCase(); + const attendees = byCompany.get(key) || []; + const attendeeBlock = attendees.length ? ` +
    + ${attendees.length} attendee${attendees.length === 1 ? '' : 's'} +
      ${attendees.map(a => `
    • ${escapeHtml(a.name || a.slug)}${a.title ? ' — ' + escapeHtml(a.title) : ''}${(a.links && a.links.linkedin) ? ` · LinkedIn` : ''}
    • `).join('')}
    +
    ` : ''; + return ` + ${escapeHtml(c.icp_fit_score || '—')} + ${nameHtml}${websiteHtml}${attendeeBlock} + ${escapeHtml(c.product_description || '')} + ${escapeHtml(c.industry || '')} + ${escapeHtml(c.icp_fit_reasoning || '')} + `; + }).join('\n'); +} + +// ----- Compose final pages ------------------------------------------------- + +const escapedTitle = escapeHtml(title); +const metaLine = `${people.length} speakers · ${deduped.length} companies · ${new Date().toLocaleDateString('en-US', { year: 'numeric', month: 'long', day: 'numeric' })}`; + +const navHtml = (active) => ``; + +function injectCss(html) { + return html.replace('', `${eventCss}\n`); +} + +function injectScript(html) { + return html.replace('', `${clipboardScript}\n`); +} + +function renderShell(activeNav, contentHtml, pageTitle) { + let html = template + .replace(/\{\{TITLE\}\}/g, escapeHtml(pageTitle)) + .replace(/\{\{COMPANY_NAME\}\}/g, escapedTitle) + .replace(/\{\{META\}\}/g, metaLine) + .replace(/\{\{TOTAL\}\}/g, String(total)) + .replace(/\{\{HIGH_COUNT\}\}/g, String(high)) + .replace(/\{\{MEDIUM_COUNT\}\}/g, String(medium)) + .replace(/\{\{LOW_COUNT\}\}/g, String(low)) + .replace(/\{\{HIGH_PCT\}\}/g, String(highPct)) + .replace(/\{\{MEDIUM_PCT\}\}/g, String(mediumPct)) + .replace(/\{\{LOW_PCT\}\}/g, String(lowPct)) + .replace(/\{\{TABLE_ROWS\}\}/g, () => ''); + + // Replace the entire ...
    block with our content + html = html.replace(/[\s\S]*?<\/table>/, `
    ${navHtml(activeNav)}\n${contentHtml}
    `); + + html = injectCss(html); + html = injectScript(html); + return html; +} + +const indexHtml = renderShell('index', renderGroupedByReadiness(people), `Event Follow-Up — ${title}`); +writeFileSync(join(dir, 'index.html'), indexHtml); + +const peopleHtml = renderShell( + 'people', + `${renderFilterBar(people)}\n${renderPeopleGrid(people)}`, + `People — ${title}` +); +writeFileSync(join(dir, 'people.html'), peopleHtml); + +const companiesContent = `
    + + + + + + + + + + +${renderCompaniesTable()} + +
    ScoreCompanyProductIndustryFit Reasoning
    `; +const companiesHtml = renderShell('companies', companiesContent, `Companies — ${title}`); +writeFileSync(join(dir, 'companies.html'), companiesHtml); + +// ----- Per-company detail pages ------------------------------------------- + +try { mkdirSync(join(dir, 'companies'), { recursive: true }); } catch {} + +for (const c of deduped) { + if (!c.body || c.body.length < 50) continue; + const sc = scoreClass(c.icp_fit_score); + const bodyHtml = mdToHtml(c.body); + + const companyHtml = ` + + + + +${escapeHtml(c.company_name)} — Research + + + + +
    + ← Back to overview +
    +

    ${escapeHtml(c.company_name)}

    +
    + ICP Score: ${escapeHtml(c.icp_fit_score || '—')} + ${c.website ? `${escapeHtml(c.website)}` : ''} +
    +
    +
    + ${c.product_description ? `
    Product
    ${escapeHtml(c.product_description)}
    ` : ''} + ${c.industry ? `
    Industry
    ${escapeHtml(c.industry)}
    ` : ''} + ${c.target_audience ? `
    Target Audience
    ${escapeHtml(c.target_audience)}
    ` : ''} + ${c.key_features ? `
    Key Features
    ${escapeHtml(c.key_features)}
    ` : ''} + ${c.employee_estimate ? `
    Employees
    ${escapeHtml(c.employee_estimate)}
    ` : ''} + ${c.funding_info ? `
    Funding
    ${escapeHtml(c.funding_info)}
    ` : ''} + ${c.headquarters ? `
    HQ
    ${escapeHtml(c.headquarters)}
    ` : ''} + ${c.icp_fit_reasoning ? `
    Fit Reasoning
    ${escapeHtml(c.icp_fit_reasoning)}
    ` : ''} +
    +
    + ${bodyHtml} +
    +
    + + +`; + + writeFileSync(join(dir, 'companies', `${c.slug}.html`), companyHtml); +} + +// ----- CSV ---------------------------------------------------------------- +// One row per attendee — the primary deliverable for direct CRM import. + +function csvEscape(v) { + if (v == null) return ''; + const s = typeof v === 'object' ? JSON.stringify(v) : String(v); + if (s.includes(',') || s.includes('"') || s.includes('\n')) return '"' + s.replace(/"/g, '""') + '"'; + return s; +} + +const csvCols = [ + 'sales_readiness', 'name', 'email', 'company', 'title', + 'icp_fit_score', 'email_subject', 'email_body', 'email_cta', + 'sales_readiness_reason', 'hook', 'role_reason', + 'linkedin', 'company_website', 'icp_fit_reasoning', +]; +const csvLines = [csvCols.join(',')]; +for (const p of people) { + const c = p._company || {}; + const links = (p.links && typeof p.links === 'object') ? p.links : {}; + const row = { + sales_readiness: (p.sales_readiness || 'COLD').toUpperCase(), + name: p.name || '', + email: p.email || '', + company: p.company || c.company_name || '', + title: p.title || '', + icp_fit_score: c.icp_fit_score || p.icp_fit_score || '', + email_subject: p.email_subject || '', + email_body: p.email_body || extractSection(p.body, 'Email') || '', + email_cta: p.email_cta || '', + sales_readiness_reason: p.sales_readiness_reason || '', + hook: p.hook || '', + role_reason: p.role_reason || '', + linkedin: links.linkedin || p.linkedin || '', + company_website: c.website || '', + icp_fit_reasoning: c.icp_fit_reasoning || '', + }; + csvLines.push(csvCols.map(k => csvEscape(row[k])).join(',')); +} +writeFileSync(join(dir, 'results.csv'), csvLines.join('\n') + '\n'); + +// ----- Summary ------------------------------------------------------------ + +const readinessCounts = { HOT: 0, WARM: 0, NURTURE: 0, COLD: 0 }; +for (const p of people) readinessCounts[readinessBucket(p)]++; + +console.error(JSON.stringify({ + total_companies: deduped.length, + total_people: people.length, + icp: { high_fit: high, medium_fit: medium, low_fit: low }, + sales_readiness: readinessCounts, + files_generated: { + index: join(dir, 'index.html'), + people: join(dir, 'people.html'), + companies: join(dir, 'companies.html'), + company_pages: deduped.filter(c => c.body && c.body.length > 50).length, + csv: join(dir, 'results.csv') + } +}, null, 2)); + +console.log(join(dir, 'index.html')); + +if (shouldOpen) { + const { execSync } = await import('child_process'); + try { execSync(`open "${join(dir, 'index.html')}"`); } catch {} +} diff --git a/skills/event-follow-up/scripts/enrich_person.mjs b/skills/event-follow-up/scripts/enrich_person.mjs new file mode 100755 index 0000000..bd77c27 --- /dev/null +++ b/skills/event-follow-up/scripts/enrich_person.mjs @@ -0,0 +1,60 @@ +#!/usr/bin/env node +// enrich_person.mjs — given a person record, run a sequence of bb searches and +// emit a structured enrichment record. Used by the per-person subagent. +// +// Usage: enrich_person.mjs --name "Greg Brockman" --company "OpenAI" --linkedin "https://..." --depth deep + +import { execFileSync } from 'child_process'; + +function flag(name, def) { + const i = process.argv.indexOf(name); + return i !== -1 ? process.argv[i + 1] : def; +} + +const name = flag('--name'); +const company = flag('--company', ''); +const linkedinIn = flag('--linkedin', ''); +const depth = flag('--depth', 'deep'); + +if (!name) { console.error('--name required'); process.exit(1); } + +function bbSearch(query, n = 5) { + const out = execFileSync('bb', ['search', query, '--num-results', String(n)], { + encoding: 'utf-8', maxBuffer: 4 * 1024 * 1024, timeout: 20000, + }); + return JSON.parse(out); +} + +function harvestLinks(results) { + const links = { linkedin: linkedinIn || null, x: null, github: null, blog: null, podcast: null }; + for (const r of results) { + const u = r.url || ''; + if (!links.linkedin && /linkedin\.com\/in\//.test(u)) links.linkedin = u; + if (!links.x && /(x|twitter)\.com/.test(u)) links.x = u; + if (!links.github && /github\.com/.test(u)) links.github = u; + if (!links.podcast && /(spotify|podcast|simplecast|transistor)/.test(u)) links.podcast = u; + if (!links.blog && /(medium|substack|hashnode|dev\.to|\.blog)/.test(u)) links.blog = u; + } + return links; +} + +const out = { name, company, linkedin: linkedinIn, hooks: [], links: {} }; + +// Lane 1 — LinkedIn verify (always) +const r1 = bbSearch(`${name} ${company} linkedin`); +out.links = harvestLinks(r1.results || []); + +// Lane 2 — Recent activity (deep+) +if (depth === 'deep' || depth === 'deeper') { + const r2 = bbSearch(`"${name}" podcast OR talk OR blog 2026`); + out.recentActivity = (r2.results || []).slice(0, 3).map(r => ({ title: r.title, url: r.url })); +} + +// Lane 3 — GitHub + X (deeper) +if (depth === 'deeper') { + const r3 = bbSearch(`"${name}" github`); + const r4 = bbSearch(`"${name}" site:x.com OR site:twitter.com`); + out.links = { ...out.links, ...harvestLinks([...(r3.results || []), ...(r4.results || [])]) }; +} + +console.log(JSON.stringify(out, null, 2)); diff --git a/skills/event-follow-up/scripts/extract_page.mjs b/skills/event-follow-up/scripts/extract_page.mjs new file mode 100755 index 0000000..ad17997 --- /dev/null +++ b/skills/event-follow-up/scripts/extract_page.mjs @@ -0,0 +1,168 @@ +#!/usr/bin/env node +// Extract structured page content for company research. +// Fetches via `bb fetch` (raw HTML to a temp file), pulls title + meta tags +// + visible body text, and auto-falls back to `bb browse` when content is thin. +// +// Usage: node extract_page.mjs [--max-chars N] +// Output (stdout): structured block consumable by a research subagent. + +import { execFileSync } from "node:child_process"; +import { mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +const THIN_CONTENT_THRESHOLD = 200; // body chars under this → JS-rendered, fall back + +function parseArgs(argv) { + const args = { url: null, maxChars: 3000 }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a === "--max-chars") args.maxChars = parseInt(argv[++i], 10); + else if (!args.url) args.url = a; + } + if (!args.url) { + console.error("Usage: extract_page.mjs [--max-chars N]"); + process.exit(2); + } + return args; +} + +function bbFetch(url, outFile) { + execFileSync("bb", ["fetch", "--allow-redirects", url, "--output", outFile], { + stdio: ["ignore", "ignore", "ignore"], + }); +} + +function bbBrowseMarkdown(url) { + try { + execFileSync("bb", ["browse", "--headless", "open", url], { + stdio: ["ignore", "ignore", "ignore"], + timeout: 90000, + }); + const out = execFileSync("bb", ["browse", "--headless", "get", "markdown"], { + encoding: "utf8", + timeout: 90000, + maxBuffer: 50 * 1024 * 1024, + }); + // bb browse prints banners (e.g. "Update available...") before the JSON blob. + // Find the first '{' and try to JSON.parse from there. + const start = out.indexOf("{"); + if (start < 0) return ""; + try { + const parsed = JSON.parse(out.slice(start)); + if (parsed && typeof parsed.markdown === "string") return parsed.markdown; + } catch { + // Fallback: extract "markdown": "..." with a lenient regex that handles + // escaped quotes and newlines. + const m = out.slice(start).match(/"markdown"\s*:\s*"((?:\\.|[^"\\])*)"/s); + if (m) { + try { return JSON.parse(`"${m[1]}"`); } catch { return m[1]; } + } + } + return ""; + } catch (err) { + return ""; + } +} + +function extractMeta(html, name, attr = "name") { + const re = new RegExp( + `]*>([^<]*)<\/title>/i); + return m ? m[1].trim() : ""; +} + +function extractVisibleText(html, maxChars) { + // Multi-line aware script/style removal. + let s = html + .replace(/]*>[\s\S]*?<\/script>/gi, " ") + .replace(/]*>[\s\S]*?<\/style>/gi, " ") + .replace(/]*>[\s\S]*?<\/noscript>/gi, " ") + .replace(//g, " ") + .replace(/<[^>]+>/g, " ") + .replace(/ /g, " ") + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/&#[0-9]+;/g, " ") + .replace(/\s+/g, " ") + .trim(); + return s.slice(0, maxChars); +} + +function extractHeadings(html, limit = 10) { + const re = /]*>([\s\S]*?)<\/h[1-3]>/gi; + const out = []; + let m; + while ((m = re.exec(html)) && out.length < limit) { + const text = m[1].replace(/<[^>]+>/g, "").replace(/\s+/g, " ").trim(); + if (text) out.push(text); + } + return out; +} + +function main() { + const { url, maxChars } = parseArgs(process.argv.slice(2)); + const dir = mkdtempSync(join(tmpdir(), "extract_page_")); + const htmlFile = join(dir, "page.html"); + + let html = ""; + let fetchOk = false; + try { + bbFetch(url, htmlFile); + html = readFileSync(htmlFile, "utf8"); + fetchOk = true; + } catch (err) { + console.error(`[extract_page] bb fetch failed: ${err.message}`); + } + + const title = extractTitle(html); + const metaDesc = extractMeta(html, "description"); + const ogTitle = extractMeta(html, "og:title", "property"); + const ogDesc = extractMeta(html, "og:description", "property"); + const headings = extractHeadings(html); + let body = extractVisibleText(html, maxChars); + + // Thin content → JS-rendered SPA → fall back to bb browse. + let fallbackUsed = false; + if (body.length < THIN_CONTENT_THRESHOLD) { + const md = bbBrowseMarkdown(url); + if (md && md.length > body.length) { + body = md.replace(/\s+/g, " ").slice(0, maxChars); + fallbackUsed = true; + } + } + + rmSync(dir, { recursive: true, force: true }); + + // Structured output for subagent to read. + const lines = [ + `URL: ${url}`, + `FETCH_OK: ${fetchOk}`, + `FALLBACK_TO_BROWSE: ${fallbackUsed}`, + `TITLE: ${title}`, + `META_DESCRIPTION: ${metaDesc}`, + `OG_TITLE: ${ogTitle}`, + `OG_DESCRIPTION: ${ogDesc}`, + `HEADINGS: ${headings.join(" | ")}`, + `BODY_CHARS: ${body.length}`, + `BODY:`, + body, + ]; + process.stdout.write(lines.join("\n") + "\n"); +} + +main(); diff --git a/skills/event-follow-up/scripts/package.json b/skills/event-follow-up/scripts/package.json new file mode 100644 index 0000000..0e6ea7e --- /dev/null +++ b/skills/event-follow-up/scripts/package.json @@ -0,0 +1,6 @@ +{ + "name": "event-prospecting-scripts", + "version": "0.1.0", + "type": "module", + "private": true +} diff --git a/skills/event-follow-up/scripts/parse_csv.mjs b/skills/event-follow-up/scripts/parse_csv.mjs new file mode 100755 index 0000000..6f4abab --- /dev/null +++ b/skills/event-follow-up/scripts/parse_csv.mjs @@ -0,0 +1,218 @@ +#!/usr/bin/env node +// parse_csv.mjs — read an event-attendee CSV, auto-detect column headers, and +// emit a normalized people.jsonl + seed_companies.txt + parse_stats.json. +// +// Usage: node parse_csv.mjs [--user-company ] +// [--col-name ] [--col-email ] +// [--col-company ] [--col-title ] + +import { readFileSync, writeFileSync } from 'fs'; +import { join } from 'path'; + +const args = process.argv.slice(2); +if (args.length < 2 || args.includes('--help')) { + console.error(`Usage: parse_csv.mjs [--user-company ] + [--col-name ] [--col-email ] + [--col-company ] [--col-title ]`); + process.exit(1); +} + +const inputPath = args[0]; +const outDir = args[1]; +const flag = (name) => { const i = args.indexOf(name); return i !== -1 ? args[i + 1] : null; }; +const userCompany = flag('--user-company'); +const overrides = { + name: flag('--col-name'), + email: flag('--col-email'), + company: flag('--col-company'), + title: flag('--col-title'), +}; + +// --- Header fuzzy match --------------------------------------------------- + +// Each canonical key has an ordered list of header candidates. The first match +// wins. Casing/whitespace/underscores are normalized before matching. +const CANDIDATES = { + email: ['email', 'email address', 'work email', 'attendee email', 'contact email', 'e-mail'], + name: ['name', 'full name', 'attendee name', 'contact name'], + first: ['first name', 'firstname', 'given name', 'first'], + last: ['last name', 'lastname', 'surname', 'family name', 'last'], + company: ['company', 'company name', 'organization', 'organisation', 'org', 'employer', 'account', 'account name'], + title: ['title', 'job title', 'role', 'position', 'job role', 'jobtitle'], + linkedin: ['linkedin', 'linkedin url', 'linkedin profile'], + notes: ['notes', 'note', 'comments', 'comment', 'team notes'], + scanned_at: ['scanned at', 'badge scan', 'scan time', 'check-in time', 'checkin time', 'timestamp'], + track: ['track', 'event track', 'session track', 'topic'], +}; + +function normHeader(h) { + return (h || '').toString().trim().toLowerCase().replace(/[_\-]+/g, ' ').replace(/\s+/g, ' '); +} + +function detectColumns(headers) { + const norm = headers.map(normHeader); + const map = {}; + for (const [key, list] of Object.entries(CANDIDATES)) { + for (const cand of list) { + const idx = norm.indexOf(cand); + if (idx !== -1) { map[key] = idx; break; } + } + } + return map; +} + +// --- CSV parser (RFC 4180-ish, handles quoted fields with commas/newlines) --- + +function parseCSV(text) { + // Strip UTF-8 BOM if present + if (text.charCodeAt(0) === 0xFEFF) text = text.slice(1); + const rows = []; + let cur = []; + let field = ''; + let inQuotes = false; + let i = 0; + while (i < text.length) { + const c = text[i]; + if (inQuotes) { + if (c === '"') { + if (text[i + 1] === '"') { field += '"'; i += 2; continue; } // escaped quote + inQuotes = false; i++; continue; + } + field += c; i++; continue; + } + if (c === '"') { inQuotes = true; i++; continue; } + if (c === ',') { cur.push(field); field = ''; i++; continue; } + if (c === '\r') { i++; continue; } // ignore CR; LF closes the row + if (c === '\n') { cur.push(field); rows.push(cur); cur = []; field = ''; i++; continue; } + field += c; i++; + } + // flush trailing field/row + if (field.length > 0 || cur.length > 0) { cur.push(field); rows.push(cur); } + return rows.filter(r => r.length > 1 || (r.length === 1 && r[0].trim().length > 0)); +} + +// --- Field cleaning helpers ---------------------------------------------- + +function clean(s) { + return (s == null ? '' : String(s)).trim(); +} + +function slugify(s) { + return clean(s).toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, ''); +} + +function emailDomain(email) { + const m = clean(email).toLowerCase().match(/@([a-z0-9.-]+)$/); + return m ? m[1] : null; +} + +// Best-effort: derive a company name from an email domain when the CSV has no +// company column. Strips common public-mail providers and TLDs. +const PUBLIC_MAIL = new Set(['gmail.com','yahoo.com','outlook.com','hotmail.com','icloud.com','aol.com','proton.me','protonmail.com','live.com','me.com','msn.com']); +function companyFromEmail(email) { + const d = emailDomain(email); + if (!d || PUBLIC_MAIL.has(d)) return null; + const root = d.replace(/^(www|mail|email)\./, '').split('.')[0]; + if (!root || root.length < 2) return null; + // Title-case the slug for readability + return root.charAt(0).toUpperCase() + root.slice(1); +} + +// --- Main ---------------------------------------------------------------- + +const raw = readFileSync(inputPath, 'utf-8'); +const rows = parseCSV(raw); +if (rows.length === 0) { + console.error('CSV is empty'); + process.exit(1); +} + +const headers = rows[0].map(clean); +const detected = detectColumns(headers); + +// Apply user overrides (--col-name etc.) by header name lookup +function findCol(want) { + if (!want) return null; + const target = normHeader(want); + return headers.findIndex(h => normHeader(h) === target); +} +for (const k of Object.keys(overrides)) { + if (overrides[k]) { + const i = findCol(overrides[k]); + if (i !== -1) detected[k] = i; else console.error(`--col-${k} "${overrides[k]}" did not match any header`); + } +} + +if (detected.email == null) { + console.error(`ERROR: could not auto-detect an email column. Headers found: ${headers.join(' | ')}`); + console.error('Re-run with --col-email "
    " to specify.'); + process.exit(1); +} + +const userCompanyLower = userCompany ? userCompany.toLowerCase() : null; + +const dataRows = rows.slice(1); +const out = []; +const skipped = { no_email: 0, user_company: 0, dup: 0 }; +const seenEmails = new Set(); + +for (const row of dataRows) { + const get = (k) => detected[k] != null ? clean(row[detected[k]]) : ''; + + const email = get('email').toLowerCase(); + if (!email || !email.includes('@')) { skipped.no_email++; continue; } + if (seenEmails.has(email)) { skipped.dup++; continue; } + seenEmails.add(email); + + // Build name from full Name OR first+last + let name = get('name'); + if (!name) { + const f = get('first'); const l = get('last'); + name = [f, l].filter(Boolean).join(' ').trim(); + } + if (!name) name = email.split('@')[0]; // last-resort fallback + + let company = get('company'); + if (!company) company = companyFromEmail(email) || ''; + + // Skip the user's own org employees — they aren't prospects + if (userCompanyLower && company && company.toLowerCase() === userCompanyLower) { + skipped.user_company++; continue; + } + // Also drop public-mail rows that ended up with no company at all (poor signal) + if (!company) { /* keep but flag */ } + + const record = { + name, + email, + company: company || null, + title: get('title') || null, + linkedin: get('linkedin') || null, + notes: get('notes') || null, + scanned_at: get('scanned_at') || null, + track: get('track') || null, + slug: slugify(name) || slugify(email.replace('@', '-at-')), + }; + out.push(record); +} + +writeFileSync(join(outDir, 'people.jsonl'), out.map(p => JSON.stringify(p)).join('\n') + '\n'); + +// Deduped, sorted company list (drop blanks and the user's own org) +const companies = [...new Set(out.map(p => p.company).filter(Boolean))].sort((a, b) => a.localeCompare(b)); +writeFileSync(join(outDir, 'seed_companies.txt'), companies.join('\n') + '\n'); + +const stats = { + input_path: inputPath, + total_rows: dataRows.length, + parsed: out.length, + unique_companies: companies.length, + skipped, + detected_columns: Object.fromEntries(Object.entries(detected).map(([k, i]) => [k, headers[i]])), + csv_headers: headers, + user_company_filter: userCompany || null, +}; +writeFileSync(join(outDir, 'parse_stats.json'), JSON.stringify(stats, null, 2)); + +console.error(`Parsed ${out.length} attendees / ${companies.length} unique companies → ${outDir}`); +console.log(JSON.stringify({ peopleCount: out.length, companyCount: companies.length, detected: stats.detected_columns }, null, 2));