Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 111 additions & 20 deletions clis/xiaohongshu/download.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,81 @@ export function buildDownloadExtractJs(noteId) {
seenMedia.add(key);
result.media.push({ type, url });
};
const cleanImageUrl = (value) => {
if (typeof value !== 'string') return '';
let src = value.trim();
if (!src) return '';
if (!(src.includes('xhscdn') || src.includes('xiaohongshu') || src.includes('rednote'))) return '';
src = src.split('?')[0];
return src.replace(/\\/imageView\\d+\\/\\d+\\/w\\/\\d+/, '');
};
const imageUrls = new Set();
const addImageUrl = (value) => {
const src = cleanImageUrl(value);
if (src) imageUrls.add(src);
};
const getImageUrl = (value) => {
if (!value) return '';
const direct = cleanImageUrl(value);
if (direct) return direct;
if (typeof value !== 'object') return '';
const candidates = [
value.urlDefault, value.url_default,
value.urlPre, value.url_pre,
value.url, value.src,
value.originUrl, value.origin_url,
value.masterUrl, value.master_url,
];
for (const candidate of candidates) {
const src = cleanImageUrl(candidate);
if (src) return src;
}
const nestedLists = [value.infoList, value.info_list, value.urlList, value.url_list].filter(Array.isArray);
for (const list of nestedLists) {
for (const item of list) {
const src = getImageUrl(item);
if (src) return src;
}
}
return '';
};
const addImagesFromList = (list) => {
if (!Array.isArray(list)) return 0;
let added = 0;
for (const item of list) {
const src = getImageUrl(item);
if (!src) continue;
const before = imageUrls.size;
imageUrls.add(src);
if (imageUrls.size > before) added++;
}
return added;
};
const noteMatches = (note, key) => {
const ids = [
key,
note?.noteId, note?.note_id, note?.id,
note?.noteCard?.noteId, note?.noteCard?.note_id,
note?.note_card?.noteId, note?.note_card?.note_id,
].filter(Boolean).map(String);
return !result.noteId || ids.includes(result.noteId);
};
const addImagesFromNote = (note) => {
if (!note || typeof note !== 'object') return 0;
const imageLists = [
note.imageList, note.image_list,
note.images, note.image,
note.image?.imageList, note.image?.image_list,
note.image?.images, note.image?.list,
note.noteCard?.imageList, note.noteCard?.image_list,
note.note_card?.imageList, note.note_card?.image_list,
];
for (const list of imageLists) {
const added = Array.isArray(list) ? addImagesFromList(list) : 0;
if (added > 0) return added;
}
return 0;
};
const locationMatch = (location.pathname || '').match(/\\/(?:explore|note|search_result|discovery\\/item)\\/([a-f0-9]+)|\\/user\\/profile\\/[^/?#]+\\/([a-f0-9]+)/i);
if (locationMatch) {
result.noteId = locationMatch[1] || locationMatch[2];
Expand All @@ -52,27 +127,43 @@ export function buildDownloadExtractJs(noteId) {
const authorEl = document.querySelector('.username, .author-name, .name');
result.author = authorEl?.textContent?.trim() || 'unknown';

// Get images - try multiple selectors
const imageSelectors = [
'.swiper-slide img',
'.carousel-image img',
'.note-slider img',
'.note-image img',
'.image-wrapper img',
'#noteContainer .media-container img[src*="xhscdn"]',
'img[src*="ci.xiaohongshu.com"]'
];
// Prefer page state image arrays because DOM carousel nodes can be
// duplicated, hidden, or preloaded out of display order.
try {
const state = window.__INITIAL_STATE__;
const noteData = state?.note?.noteDetailMap || state?.note?.note || {};
const entries = [];
if (Array.isArray(noteData)) {
noteData.forEach((value, index) => entries.push([String(index), value]));
} else if (noteData && typeof noteData === 'object') {
Object.keys(noteData).forEach(key => entries.push([key, noteData[key]]));
}
const notes = entries
.map(([key, value]) => [key, value?.note || value?.noteCard || value?.note_card || value])
.filter(([, note]) => note && typeof note === 'object');
const matchingNotes = notes.filter(([key, note]) => noteMatches(note, key));
const candidates = matchingNotes.length > 0 ? matchingNotes : (notes.length === 1 ? notes : []);
for (const [, note] of candidates) {
if (addImagesFromNote(note) > 0) break;
}
} catch(e) {}

const imageUrls = new Set();
for (const selector of imageSelectors) {
document.querySelectorAll(selector).forEach(img => {
let src = img.src || img.getAttribute('data-src') || '';
if (src && (src.includes('xhscdn') || src.includes('xiaohongshu') || src.includes('rednote'))) {
src = src.split('?')[0];
src = src.replace(/\\/imageView\\d+\\/\\d+\\/w\\/\\d+/, '');
imageUrls.add(src);
}
});
// Fall back to DOM selectors when structured note media is unavailable.
if (imageUrls.size === 0) {
const imageSelectors = [
'.swiper-slide img',
'.carousel-image img',
'.note-slider img',
'.note-image img',
'.image-wrapper img',
'#noteContainer .media-container img[src*="xhscdn"]',
'img[src*="ci.xiaohongshu.com"]'
];
for (const selector of imageSelectors) {
document.querySelectorAll(selector).forEach(img => {
addImageUrl(img.src || img.getAttribute('data-src') || '');
});
}
}

// Get video — prefer real URL from page state over blob: URLs
Expand Down
54 changes: 53 additions & 1 deletion clis/xiaohongshu/download.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ vi.mock('@jackwener/opencli/download', () => ({
formatCookieHeader: mockFormatCookieHeader,
}));
import { getRegistry } from '@jackwener/opencli/registry';
import './download.js';
import { buildDownloadExtractJs } from './download.js';
function createPageMock(evaluateResult) {
return {
goto: vi.fn().mockResolvedValue(undefined),
Expand Down Expand Up @@ -112,4 +112,56 @@ describe('xiaohongshu download', () => {
});
expect(mockDownloadMedia).not.toHaveBeenCalled();
});
it('prefers structured note image order over DOM discovery order', () => {
const noteId = '69bc166f000000001a02069a';
const previousWindow = globalThis.window;
const previousDocument = globalThis.document;
const previousLocation = globalThis.location;
globalThis.window = {
__INITIAL_STATE__: {
note: {
noteDetailMap: {
[noteId]: {
note: {
noteId,
imageList: [
{ urlDefault: 'https://sns-webpic-qc.xhscdn.com/cover.jpg?imageView2/2/w/1080' },
{ urlPre: 'https://sns-webpic-qc.xhscdn.com/body.jpg?imageView2/2/w/1080' },
],
},
},
},
},
},
};
globalThis.location = {
href: `https://www.xiaohongshu.com/explore/${noteId}`,
pathname: `/explore/${noteId}`,
};
globalThis.document = {
body: { innerText: '' },
querySelector: vi.fn(() => null),
querySelectorAll: vi.fn((selector) => {
if (selector.includes('img')) {
return [
{ src: 'https://sns-webpic-qc.xhscdn.com/body-first-in-dom.jpg', getAttribute: vi.fn(() => '') },
{ src: 'https://sns-webpic-qc.xhscdn.com/cover-second-in-dom.jpg', getAttribute: vi.fn(() => '') },
];
}
return [];
}),
};
try {
const result = eval(buildDownloadExtractJs(noteId));
expect(result.media).toEqual([
{ type: 'image', url: 'https://sns-webpic-qc.xhscdn.com/cover.jpg' },
{ type: 'image', url: 'https://sns-webpic-qc.xhscdn.com/body.jpg' },
]);
}
finally {
globalThis.window = previousWindow;
globalThis.document = previousDocument;
globalThis.location = previousLocation;
}
});
});