Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,8 @@ OpenCLI supports downloading images, videos, and articles from supported platfor
For video downloads, install `yt-dlp` first: `brew install yt-dlp`

```bash
opencli xiaohongshu download abc123 --output ./xhs
opencli xiaohongshu download "https://www.xiaohongshu.com/search_result/<id>?xsec_token=..." --output ./xhs
opencli xiaohongshu download "https://xhslink.com/..." --output ./xhs
opencli bilibili download BV1xxx --output ./bilibili
opencli twitter download elonmusk --limit 20 --output ./twitter
opencli 1688 download 841141931191 --output ./1688-downloads
Expand Down
3 changes: 2 additions & 1 deletion README.zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,8 @@ brew install yt-dlp

```bash
# 下载小红书笔记中的图片/视频
opencli xiaohongshu download abc123 --output ./xhs
opencli xiaohongshu download "https://www.xiaohongshu.com/search_result/<id>?xsec_token=..." --output ./xhs
opencli xiaohongshu download "https://xhslink.com/..." --output ./xhs

# 下载B站视频(需要 yt-dlp)
opencli bilibili download BV1xxx --output ./bilibili
Expand Down
4 changes: 2 additions & 2 deletions clis/xiaohongshu/comments.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ cli({
strategy: Strategy.COOKIE,
navigateBefore: false,
args: [
{ name: 'note-id', required: true, positional: true, help: 'Note ID or full URL (preserves xsec_token for access)' },
{ name: 'note-id', required: true, positional: true, help: 'Full Xiaohongshu note URL with xsec_token' },
{ name: 'limit', type: 'int', default: 20, help: 'Number of top-level comments (max 50)' },
{ name: 'with-replies', type: 'boolean', default: false, help: 'Include nested replies (楼中楼)' },
],
Expand All @@ -32,7 +32,7 @@ cli({
const withReplies = Boolean(kwargs['with-replies']);
const raw = String(kwargs['note-id']);
const noteId = parseNoteId(raw);
await page.goto(buildNoteUrl(raw));
await page.goto(buildNoteUrl(raw, { commandName: 'xiaohongshu comments' }));
await page.wait({ time: 2 + Math.random() * 3 });
const data = await page.evaluate(`
(async () => {
Expand Down
71 changes: 46 additions & 25 deletions clis/xiaohongshu/comments.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,30 +27,40 @@ function createPageMock(evaluateResult) {
}
describe('xiaohongshu comments', () => {
const command = getRegistry().get('xiaohongshu/comments');
it('returns ranked comment rows', async () => {
it('returns ranked comment rows for signed full URLs', async () => {
const page = createPageMock({
loginWall: false,
results: [
{ author: 'Alice', text: 'Great note!', likes: 10, time: '2024-01-01', is_reply: false, reply_to: '' },
{ author: 'Bob', text: 'Very helpful', likes: 0, time: '2024-01-02', is_reply: false, reply_to: '' },
],
});
const result = (await command.func(page, { 'note-id': '69aadbcb000000002202f131', limit: 5 }));
expect(page.goto.mock.calls[0][0]).toContain('/search_result/69aadbcb000000002202f131');
const signedUrl = 'https://www.xiaohongshu.com/search_result/69aadbcb000000002202f131?xsec_token=abc&xsec_source=pc_search';
const result = (await command.func(page, { 'note-id': signedUrl, limit: 5 }));
expect(page.goto.mock.calls[0][0]).toBe(signedUrl);
expect(result).toHaveLength(2);
expect(result[0]).toMatchObject({ rank: 1, author: 'Alice', text: 'Great note!', likes: 10 });
expect(result[1]).toMatchObject({ rank: 2, author: 'Bob', text: 'Very helpful', likes: 0 });
});
it('preserves full /explore/ URL as-is for navigation', async () => {
it('rejects bare note IDs before browser navigation', async () => {
const page = createPageMock({ loginWall: false, results: [] });
await expect(command.func(page, { 'note-id': '69aadbcb000000002202f131', limit: 5 })).rejects.toMatchObject({
code: 'ARGUMENT',
message: expect.stringContaining('signed URL'),
hint: expect.stringContaining('xsec_token'),
});
expect(page.goto).not.toHaveBeenCalled();
});
it('preserves signed /explore/ URL as-is for navigation', async () => {
const page = createPageMock({
loginWall: false,
results: [{ author: 'Alice', text: 'Nice', likes: 1, time: '2024-01-01', is_reply: false, reply_to: '' }],
});
await command.func(page, {
'note-id': 'https://www.xiaohongshu.com/explore/69aadbcb000000002202f131',
'note-id': 'https://www.xiaohongshu.com/explore/69aadbcb000000002202f131?xsec_token=abc&xsec_source=pc_search',
limit: 5,
});
expect(page.goto.mock.calls[0][0]).toContain('/explore/69aadbcb000000002202f131');
expect(page.goto.mock.calls[0][0]).toContain('/explore/69aadbcb000000002202f131?xsec_token=abc');
});
it('preserves full search_result URL with xsec_token for navigation', async () => {
const page = createPageMock({
Expand All @@ -61,22 +71,21 @@ describe('xiaohongshu comments', () => {
await command.func(page, { 'note-id': fullUrl, limit: 5 });
expect(page.goto.mock.calls[0][0]).toBe(fullUrl);
});
it('throws AuthRequiredError when login wall is detected', async () => {
const page = createPageMock({ loginWall: true, results: [] });
await expect(command.func(page, { 'note-id': 'abc123', limit: 5 })).rejects.toThrow('Note comments require login');
});
it('throws SECURITY_BLOCK with bare-id guidance when risk control blocks the comments page', async () => {
it('preserves signed /user/profile/<user>/<note> URLs for navigation', async () => {
const page = createPageMock({
pageUrl: 'https://www.xiaohongshu.com/website-login/error?error_code=300017',
securityBlock: true,
loginWall: false,
results: [],
});
await expect(command.func(page, { 'note-id': 'abc123', limit: 5 })).rejects.toMatchObject({
code: 'SECURITY_BLOCK',
hint: expect.stringContaining('xsec_token'),
results: [{ author: 'Alice', text: 'Nice', likes: 1, time: '2024-01-01', is_reply: false, reply_to: '' }],
});
expect(page.wait).toHaveBeenCalledWith(expect.objectContaining({ time: expect.any(Number) }));
const fullUrl = 'https://www.xiaohongshu.com/user/profile/user123/69aadbcb000000002202f131?xsec_token=abc&xsec_source=pc_user';
await command.func(page, { 'note-id': fullUrl, limit: 5 });
expect(page.goto.mock.calls[0][0]).toBe(fullUrl);
});
it('throws AuthRequiredError when login wall is detected', async () => {
const page = createPageMock({ loginWall: true, results: [] });
await expect(command.func(page, {
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok',
limit: 5,
})).rejects.toThrow('Note comments require login');
});
it('throws SECURITY_BLOCK with retry guidance when a full URL comments page is blocked', async () => {
const page = createPageMock({
Expand All @@ -95,11 +104,17 @@ describe('xiaohongshu comments', () => {
});
it('returns empty array when no comments are found', async () => {
const page = createPageMock({ loginWall: false, results: [] });
await expect(command.func(page, { 'note-id': 'abc123', limit: 5 })).resolves.toEqual([]);
await expect(command.func(page, {
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok',
limit: 5,
})).resolves.toEqual([]);
});
it('uses condition-based comment scrolling instead of a fixed blind loop', async () => {
const page = createPageMock({ loginWall: false, results: [] });
await command.func(page, { 'note-id': 'abc123', limit: 5 });
await command.func(page, {
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok',
limit: 5,
});
const script = page.evaluate.mock.calls[0][0];
expect(script).toContain("const beforeCount = scroller.querySelectorAll('.parent-comment').length");
expect(script).toContain("const afterCount = scroller.querySelectorAll('.parent-comment').length");
Expand All @@ -115,7 +130,10 @@ describe('xiaohongshu comments', () => {
reply_to: '',
}));
const page = createPageMock({ loginWall: false, results: manyComments });
const result = (await command.func(page, { 'note-id': 'abc123', limit: 3 }));
const result = (await command.func(page, {
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok',
limit: 3,
}));
expect(result).toHaveLength(3);
expect(result[0].rank).toBe(1);
expect(result[2].rank).toBe(3);
Expand All @@ -128,7 +146,10 @@ describe('xiaohongshu comments', () => {
{ author: 'Bob', text: 'Very helpful', likes: 0, time: '2024-01-02', is_reply: false, reply_to: '' },
],
});
const result = (await command.func(page, { 'note-id': 'abc123', limit: -3 }));
const result = (await command.func(page, {
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok',
limit: -3,
}));
expect(result).toHaveLength(1);
expect(result[0]).toMatchObject({ rank: 1, author: 'Alice' });
});
Expand All @@ -143,7 +164,7 @@ describe('xiaohongshu comments', () => {
],
});
const result = (await command.func(page, {
'note-id': 'abc123', limit: 50, 'with-replies': true,
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok', limit: 50, 'with-replies': true,
}));
expect(result).toHaveLength(3);
expect(result[0]).toMatchObject({ author: 'Alice', is_reply: false, reply_to: '' });
Expand All @@ -166,7 +187,7 @@ describe('xiaohongshu comments', () => {
});
// Limit to 2 top-level comments — should include A + 2 replies + B = 4 rows
const result = (await command.func(page, {
'note-id': 'abc123', limit: 2, 'with-replies': true,
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok', limit: 2, 'with-replies': true,
}));
expect(result).toHaveLength(4);
expect(result.map((r) => r.author)).toEqual(['A', 'A1', 'A2', 'B']);
Expand Down
13 changes: 6 additions & 7 deletions clis/xiaohongshu/download.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
* Xiaohongshu download — download images and videos from a note.
*
* Usage:
* opencli xiaohongshu download <note-id-or-url> --output ./xhs
* opencli xiaohongshu download <signed-note-url-or-shortlink> --output ./xhs
*
* Accepts a bare note ID, a full xiaohongshu.com URL (with xsec_token),
* or a short link (http://xhslink.com/...).
* Accepts a full xiaohongshu.com URL with xsec_token or an xhslink short link.
*/
import { cli, Strategy } from '@jackwener/opencli/registry';
import { formatCookieHeader } from '@jackwener/opencli/download';
Expand All @@ -20,15 +19,15 @@ cli({
strategy: Strategy.COOKIE,
navigateBefore: false,
args: [
{ name: 'note-id', positional: true, required: true, help: 'Note ID, full URL, or short link' },
{ name: 'note-id', positional: true, required: true, help: 'Full Xiaohongshu note URL with xsec_token, or xhslink short link' },
{ name: 'output', default: './xiaohongshu-downloads', help: 'Output directory' },
],
columns: ['index', 'type', 'status', 'size'],
func: async (page, kwargs) => {
const rawInput = String(kwargs['note-id']);
const output = kwargs.output;
const noteId = parseNoteId(rawInput);
await page.goto(buildNoteUrl(rawInput));
await page.goto(buildNoteUrl(rawInput, { allowShortLink: true, commandName: 'xiaohongshu download' }));
await page.wait({ time: 1 + Math.random() * 2 });
// Extract note info and media URLs
const data = await page.evaluate(`
Expand All @@ -51,9 +50,9 @@ cli({
seenMedia.add(key);
result.media.push({ type, url });
};
const locationMatch = (location.pathname || '').match(/\\/(?:explore|note|search_result|discovery\\/item)\\/([a-f0-9]+)/i);
const locationMatch = (location.pathname || '').match(/\\/(?:explore|note|search_result|discovery\\/item)\\/([a-f0-9]+)|\\/user\\/profile\\/[^/?#]+\\/([a-f0-9]+)/i);
if (locationMatch) {
result.noteId = locationMatch[1];
result.noteId = locationMatch[1] || locationMatch[2];
}

// Get title
Expand Down
22 changes: 17 additions & 5 deletions clis/xiaohongshu/download.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,19 +70,31 @@ describe('xiaohongshu download', () => {
filenamePrefix: '69bc166f000000001a02069a',
}));
});
it('throws SECURITY_BLOCK with bare-id guidance before starting downloads', async () => {
it('uses canonical note id for signed user profile note URLs', async () => {
const page = createPageMock({
noteId: '',
media: [{ type: 'image', url: 'https://ci.xiaohongshu.com/example.jpg' }],
});
const fullUrl = 'https://www.xiaohongshu.com/user/profile/user123/69bc166f000000001a02069a?xsec_token=abc&xsec_source=pc_user';
await command.func(page, { 'note-id': fullUrl, output: './out' });
expect(page.goto.mock.calls[0][0]).toBe(fullUrl);
expect(mockDownloadMedia).toHaveBeenCalledWith([{ type: 'image', url: 'https://ci.xiaohongshu.com/example.jpg' }], expect.objectContaining({
subdir: '69bc166f000000001a02069a',
filenamePrefix: '69bc166f000000001a02069a',
}));
});
it('rejects bare note IDs before browser navigation', async () => {
const page = createPageMock({
pageUrl: 'https://www.xiaohongshu.com/website-login/error?error_code=300017',
securityBlock: true,
noteId: '69bc166f000000001a02069a',
media: [],
});
await expect(command.func(page, { 'note-id': '69bc166f000000001a02069a', output: './out' })).rejects.toMatchObject({
code: 'SECURITY_BLOCK',
code: 'ARGUMENT',
message: expect.stringContaining('signed URL'),
hint: expect.stringContaining('xsec_token'),
});
expect(page.goto).not.toHaveBeenCalled();
expect(mockDownloadMedia).not.toHaveBeenCalled();
expect(page.wait).toHaveBeenCalledWith(expect.objectContaining({ time: expect.any(Number) }));
});
it('throws SECURITY_BLOCK with retry guidance for blocked full URLs', async () => {
const page = createPageMock({
Expand Down
58 changes: 46 additions & 12 deletions clis/xiaohongshu/note-helpers.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,59 @@
import { ArgumentError } from '@jackwener/opencli/errors';

/** Side-effect-free helpers shared by xiaohongshu note and comments commands. */
/** Extract a bare note ID from a full URL or raw ID string. */
export function parseNoteId(input) {
const trimmed = input.trim();
const match = trimmed.match(/\/(?:explore|note|search_result)\/([a-f0-9]+)/);
return match ? match[1] : trimmed;
const match = trimmed.match(/\/(?:explore|note|search_result|discovery\/item)\/([a-f0-9]+)|\/user\/profile\/[^/?#]+\/([a-f0-9]+)/i);
return match ? (match[1] || match[2]) : trimmed;
}

export const XHS_SIGNED_URL_HINT = 'Pass a full Xiaohongshu note URL with xsec_token from search results or user/profile context.';

function isShortLink(input) {
return /^https?:\/\/xhslink\.com\//i.test(input);
}

function isXiaohongshuHost(hostname) {
const normalized = hostname.toLowerCase();
return normalized === 'xiaohongshu.com' || normalized.endsWith('.xiaohongshu.com');
}

function isSupportedNotePath(pathname) {
return /^\/(?:explore|note|search_result|discovery\/item)\/[a-f0-9]+(?:[/?#]|$)/i.test(pathname)
|| /^\/user\/profile\/[^/?#]+\/[a-f0-9]+(?:[/?#]|$)/i.test(pathname);
}

/**
* Build the best navigation URL for a note.
*
* XHS blocks direct `/explore/<id>` access without a valid `xsec_token`.
* When the user passes a full URL (from search results), we preserve it
* so the browser navigates with the token intact. For bare IDs we now use
* `/search_result/<id>` which works without xsec_token when cookies are present.
* XHS note detail pages now require a valid signed URL for reliable access.
* Bare note IDs no longer resolve deterministically, so callers must provide
* a full note URL with xsec_token or, for downloads only, an xhslink short link.
*/
export function buildNoteUrl(input) {
export function buildNoteUrl(input, options = {}) {
const { allowShortLink = false, commandName = 'xiaohongshu note' } = options;
const trimmed = input.trim();
const message = `${commandName} now requires a full signed URL`;
const hint = allowShortLink
? `${XHS_SIGNED_URL_HINT} For downloads, xhslink short links are also supported.`
: XHS_SIGNED_URL_HINT;

if (/^https?:\/\//.test(trimmed)) {
// Full URL — navigate as-is; the browser will follow any redirects
return trimmed;
if (isShortLink(trimmed)) {
if (allowShortLink)
return trimmed;
throw new ArgumentError(message, hint);
}
try {
const url = new URL(trimmed);
const xsecToken = url.searchParams.get('xsec_token')?.trim();
if (isXiaohongshuHost(url.hostname) && isSupportedNotePath(url.pathname) && xsecToken) {
return trimmed;
}
}
catch { }
throw new ArgumentError(message, hint);
}
// Use /search_result/<id> instead of /explore/<id> — works without xsec_token
// when the user is logged in via cookies (which is always the case with opencli).
return `https://www.xiaohongshu.com/search_result/${trimmed}`;
throw new ArgumentError(message, hint);
}
8 changes: 3 additions & 5 deletions clis/xiaohongshu/note.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
* Extracts title, author, description text, and engagement metrics
* (likes, collects, comment count) via DOM extraction.
*
* Supports both bare note IDs and full URLs (with xsec_token).
* Bare IDs now use /search_result/<id> which works without xsec_token
* when the user is logged in via cookies.
* Requires a full Xiaohongshu note URL with xsec_token.
*/
import { cli, Strategy } from '@jackwener/opencli/registry';
import { AuthRequiredError, CliError, EmptyResultError } from '@jackwener/opencli/errors';
Expand All @@ -19,13 +17,13 @@ cli({
strategy: Strategy.COOKIE,
navigateBefore: false,
args: [
{ name: 'note-id', required: true, positional: true, help: 'Note ID or full URL (preserves xsec_token for access)' },
{ name: 'note-id', required: true, positional: true, help: 'Full Xiaohongshu note URL with xsec_token' },
],
columns: ['field', 'value'],
func: async (page, kwargs) => {
const raw = String(kwargs['note-id']);
const noteId = parseNoteId(raw);
const url = buildNoteUrl(raw);
const url = buildNoteUrl(raw, { commandName: 'xiaohongshu note' });
await page.goto(url);
await page.wait({ time: 2 + Math.random() * 3 });
const data = await page.evaluate(`
Expand Down
Loading