ScrapeGraphAI · VinciGit00 · Mar 31, 2026
diff --git a/README.md b/README.md
diff --git a/bun.lock b/bun.lock
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "just-scrape",
-	"version": "0.2.1",
+	"version": "0.3.0",
 	"description": "ScrapeGraph AI CLI tool",
 	"type": "module",
 	"main": "dist/cli.mjs",
@@ -28,7 +28,7 @@
 		"chalk": "^5.4.1",
 		"citty": "^0.1.6",
 		"dotenv": "^17.2.4",
-		"scrapegraph-js": "^1.0.0"
+		"scrapegraph-js": "github:ScrapeGraphAI/scrapegraph-js#feat/sdk-v2-migration"
 	},
 	"devDependencies": {
 		"@biomejs/biome": "^1.9.4",

diff --git a/src/cli.ts b/src/cli.ts
@@ -12,17 +12,13 @@ const main = defineCommand({
 		description: "ScrapeGraph AI CLI tool",
 	},
 	subCommands: {
-		"smart-scraper": () => import("./commands/smart-scraper.js").then((m) => m.default),
-		"search-scraper": () => import("./commands/search-scraper.js").then((m) => m.default),
+		extract: () => import("./commands/extract.js").then((m) => m.default),
+		search: () => import("./commands/search.js").then((m) => m.default),
+		scrape: () => import("./commands/scrape.js").then((m) => m.default),
 		markdownify: () => import("./commands/markdownify.js").then((m) => m.default),
 		crawl: () => import("./commands/crawl.js").then((m) => m.default),
-		sitemap: () => import("./commands/sitemap.js").then((m) => m.default),
-		scrape: () => import("./commands/scrape.js").then((m) => m.default),
-		"agentic-scraper": () => import("./commands/agentic-scraper.js").then((m) => m.default),
-		"generate-schema": () => import("./commands/generate-schema.js").then((m) => m.default),
 		history: () => import("./commands/history.js").then((m) => m.default),
 		credits: () => import("./commands/credits.js").then((m) => m.default),
-		validate: () => import("./commands/validate.js").then((m) => m.default),
 	},
 });
 

diff --git a/src/commands/agentic-scraper.ts b/src/commands/agentic-scraper.ts
diff --git a/src/commands/crawl.ts b/src/commands/crawl.ts
@@ -1,8 +1,9 @@
 import { defineCommand } from "citty";
-import * as scrapegraphai from "scrapegraph-js";
-import { resolveApiKey } from "../lib/folders.js";
+import { createClient } from "../lib/client.js";
 import * as log from "../lib/log.js";
 
+const POLL_INTERVAL_MS = 3000;
+
 export default defineCommand({
 	meta: {
 		name: "crawl",
@@ -14,49 +15,54 @@ export default defineCommand({
 			description: "Starting URL to crawl",
 			required: true,
 		},
-		prompt: {
-			type: "string",
-			alias: "p",
-			description: "Extraction prompt (required when extraction mode is on)",
-		},
-		"no-extraction": {
-			type: "boolean",
-			description: "Return markdown only (2 credits/page instead of 10)",
-		},
-		"max-pages": { type: "string", description: "Maximum pages to crawl (default 10)" },
-		depth: { type: "string", description: "Crawl depth (default 1)" },
-		schema: { type: "string", description: "Output JSON schema (as JSON string)" },
-		rules: { type: "string", description: "Crawl rules as JSON object string" },
-		"no-sitemap": { type: "boolean", description: "Disable sitemap-based URL discovery" },
+		"max-pages": { type: "string", description: "Maximum pages to crawl (default 50)" },
+		"max-depth": { type: "string", description: "Crawl depth (default 2)" },
+		"max-links-per-page": { type: "string", description: "Max links per page (default 10)" },
+		"allow-external": { type: "boolean", description: "Allow crawling external domains" },
 		stealth: { type: "boolean", description: "Bypass bot detection (+4 credits)" },
 		json: { type: "boolean", description: "Output raw JSON (pipeable)" },
 	},
 	run: async ({ args }) => {
 		const out = log.create(!!args.json);
-		out.docs("https://docs.scrapegraphai.com/services/smartcrawler");
-		const key = await resolveApiKey(!!args.json);
+		out.docs("https://docs.scrapegraphai.com/api-reference/crawl");
+		const sgai = await createClient(!!args.json);
 
-		const base: Record<string, unknown> = { url: args.url };
-		if (args["max-pages"]) base.max_pages = Number(args["max-pages"]);
-		if (args.depth) base.depth = Number(args.depth);
-		if (args.rules) base.rules = JSON.parse(args.rules);
-		if (args["no-sitemap"]) base.sitemap = false;
-		if (args.stealth) base.stealth = true;
+		const crawlOptions: Record<string, unknown> = {};
+		if (args["max-pages"]) crawlOptions.maxPages = Number(args["max-pages"]);
+		if (args["max-depth"]) crawlOptions.maxDepth = Number(args["max-depth"]);
+		if (args["max-links-per-page"])
+			crawlOptions.maxLinksPerPage = Number(args["max-links-per-page"]);
+		if (args["allow-external"]) crawlOptions.allowExternal = true;
+		if (args.stealth) crawlOptions.fetchConfig = { stealth: true };
 
-		if (args["no-extraction"]) {
-			base.extraction_mode = false;
-		} else {
-			if (args.prompt) base.prompt = args.prompt;
-			if (args.schema) base.schema = JSON.parse(args.schema);
-		}
+		out.start("Crawling");
+		const t0 = performance.now();
+		try {
+			const job = await sgai.crawl.start(args.url, crawlOptions as any);
+			const jobId = (job.data as { id: string }).id;
 
-		const params = base as scrapegraphai.CrawlParams;
+			if (!jobId) {
+				out.stop(Math.round(performance.now() - t0));
+				out.result(job.data);
+				return;
+			}
 
-		out.start("Crawling");
-		const result = await scrapegraphai.crawl(key, params, out.poll);
-		out.stop(result.elapsedMs);
+			// Poll until the crawl completes
+			while (true) {
+				await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
+				const status = await sgai.crawl.status(jobId);
+				const statusData = status.data as { status: string; [key: string]: unknown };
+				out.poll(statusData.status);
 
-		if (result.data) out.result(result.data);
-		else out.error(result.error);
+				if (statusData.status === "completed" || statusData.status === "failed" || statusData.status === "cancelled") {
+					out.stop(Math.round(performance.now() - t0));
+					out.result(status.data);
+					return;
+				}
+			}
+		} catch (err) {
+			out.stop(Math.round(performance.now() - t0));
+			out.error(err instanceof Error ? err.message : String(err));
+		}
 	},
 });
diff --git a/src/commands/credits.ts b/src/commands/credits.ts
@@ -1,6 +1,5 @@
 import { defineCommand } from "citty";
-import * as scrapegraphai from "scrapegraph-js";
-import { resolveApiKey } from "../lib/folders.js";
+import { createClient } from "../lib/client.js";
 import * as log from "../lib/log.js";
 
 export default defineCommand({
@@ -13,13 +12,17 @@ export default defineCommand({
 	},
 	run: async ({ args }) => {
 		const out = log.create(!!args.json);
-		const key = await resolveApiKey(!!args.json);
+		const sgai = await createClient(!!args.json);
 
 		out.start("Fetching credits");
-		const result = await scrapegraphai.getCredits(key);
-		out.stop(result.elapsedMs);
-
-		if (result.data) out.result(result.data);
-		else out.error(result.error);
+		const t0 = performance.now();
+		try {
+			const result = await sgai.credits();
+			out.stop(Math.round(performance.now() - t0));
+			out.result(result.data);
+		} catch (err) {
+			out.stop(Math.round(performance.now() - t0));
+			out.error(err instanceof Error ? err.message : String(err));
+		}
 	},
 });
diff --git a/src/commands/extract.ts b/src/commands/extract.ts
@@ -0,0 +1,57 @@
+import { defineCommand } from "citty";
+import { createClient } from "../lib/client.js";
+import * as log from "../lib/log.js";
+
+export default defineCommand({
+	meta: {
+		name: "extract",
+		description: "Extract structured data from a URL using AI",
+	},
+	args: {
+		url: {
+			type: "positional",
+			description: "Website URL to scrape",
+			required: true,
+		},
+		prompt: {
+			type: "string",
+			alias: "p",
+			description: "Extraction prompt",
+			required: true,
+		},
+		schema: { type: "string", description: "Output JSON schema (as JSON string)" },
+		scrolls: { type: "string", description: "Number of infinite scrolls (0-100)" },
+		stealth: { type: "boolean", description: "Bypass bot detection (+4 credits)" },
+		cookies: { type: "string", description: "Cookies as JSON object string" },
+		headers: { type: "string", description: "Custom headers as JSON object string" },
+		country: { type: "string", description: "ISO country code for geo-targeting" },
+		json: { type: "boolean", description: "Output raw JSON (pipeable)" },
+	},
+	run: async ({ args }) => {
+		const out = log.create(!!args.json);
+		out.docs("https://docs.scrapegraphai.com/api-reference/extract");
+		const sgai = await createClient(!!args.json);
+
+		const fetchConfig: Record<string, unknown> = {};
+		if (args.scrolls) fetchConfig.scrolls = Number(args.scrolls);
+		if (args.stealth) fetchConfig.stealth = true;
+		if (args.cookies) fetchConfig.cookies = JSON.parse(args.cookies);
+		if (args.headers) fetchConfig.headers = JSON.parse(args.headers);
+		if (args.country) fetchConfig.country = args.country;
+
+		const extractOptions: Record<string, unknown> = { prompt: args.prompt };
+		if (args.schema) extractOptions.schema = JSON.parse(args.schema);
+		if (Object.keys(fetchConfig).length > 0) extractOptions.fetchConfig = fetchConfig;
+
+		out.start("Extracting");
+		const t0 = performance.now();
+		try {
+			const result = await sgai.extract(args.url, extractOptions as any);
+			out.stop(Math.round(performance.now() - t0));
+			out.result(result.data);
+		} catch (err) {
+			out.stop(Math.round(performance.now() - t0));
+			out.error(err instanceof Error ? err.message : String(err));
+		}
+	},
+});
diff --git a/src/commands/generate-schema.ts b/src/commands/generate-schema.ts