Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
2fee00d
feat(report): implement directory-based report format with separate i…
quanru Dec 31, 2025
05633d4
feat(report): extract base64 images to separate script tags in direct…
quanru Dec 31, 2025
2814154
feat(api): add useDirectoryReport option for directory-based report f…
quanru Dec 31, 2025
04eacc8
refactor(report): streamline image handling and extraction processes
quanru Dec 31, 2025
68d3869
refactor(tests): update comments to English for directory report form…
quanru Dec 31, 2025
e93cad0
refactor(pixi-loader): remove unused file protocol checks and image l…
quanru Dec 31, 2025
f65c8ab
Revert "feat(components): replace anchor tags with Link components fo…
quanru Dec 31, 2025
b4ac5bc
feat(agent): clear base64 from dump memory after report generation to…
quanru Jan 4, 2026
4dc41b1
refactor(agent, utils): enhance report generation and image processin…
quanru Jan 4, 2026
6693288
refactor(api): clarify useDirectoryReport note regarding CORS restric…
quanru Jan 4, 2026
6d137e1
refactor(utils): enhance base64 extraction error handling and sanitiz…
quanru Jan 4, 2026
a856dd5
refactor(utils): simplify error handling in extractBase64ToScriptTags…
quanru Jan 4, 2026
d223720
docs(api): update useDirectoryReport note to suggest using npx http-s…
quanru Jan 4, 2026
cce11cd
refactor(agent, utils, tests): optimize report generation and memory …
quanru Jan 4, 2026
c924e3a
feat(core): implement ScreenshotRegistry for efficient screenshot man…
quanru Jan 5, 2026
001223d
refactor(agent, utils): optimize screenshot handling to reduce memory…
quanru Jan 5, 2026
f290380
feat(agent): enhance screenshot handling by integrating screenshot re…
quanru Jan 5, 2026
290c214
Update packages/core/src/utils.ts
quanru Jan 5, 2026
82d3d27
Update apps/report/src/App.tsx
quanru Jan 5, 2026
9a4f70f
Update packages/core/tests/unit-test/screenshot-registry.test.ts
quanru Jan 5, 2026
a060183
Update packages/core/src/utils.ts
quanru Jan 5, 2026
19a1234
fix(screenshot-registry): prevent ID gaps by incrementing counter aft…
quanru Jan 5, 2026
b1425de
test(screenshot-registry): add integration tests for report generatio…
quanru Jan 5, 2026
428dc56
feat(screenshot-registry): enhance image handling and restore referen…
quanru Jan 5, 2026
f39fb28
feat(core): introduce ScreenshotItem class for handling screenshot data
quanru Jan 6, 2026
f90e539
fix(playground): enhance image reference restoration and update test …
quanru Jan 6, 2026
c2672a9
fix(api): improve directory report instructions and cleanup memory us…
quanru Jan 6, 2026
b267964
fix(utils): enhance filename sanitization by trimming, limiting lengt…
quanru Jan 6, 2026
30075bf
Update packages/core/src/screenshot-item.ts
quanru Jan 6, 2026
46ff26b
fix(core): improve error handling in ScreenshotRegistry and Screensho…
quanru Jan 6, 2026
b97a0d0
fix(agent): remove unnecessary screenshot registry cleanup after repo…
quanru Jan 6, 2026
725d75e
fix(chore): implement type guard for ScreenshotItem validation and im…
quanru Jan 6, 2026
9f83442
fix(utils): remove legacy base64 image handling and streamline image …
quanru Jan 6, 2026
41ccfb0
refactor(screenshot-registry): remove legacy image reference handling…
quanru Jan 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 37 additions & 6 deletions apps/report/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { Alert, ConfigProvider, Empty, theme } from 'antd';
import { useEffect, useRef, useState } from 'react';
import { Panel, PanelGroup, PanelResizeHandle } from 'react-resizable-panels';

import type { GroupedActionDump } from '@midscene/core';
import { antiEscapeScriptTag } from '@midscene/shared/utils';
import {
Logo,
Expand All @@ -24,6 +25,10 @@ import type {
PlaywrightTasks,
VisualizerProps,
} from './types';
import {
loadImageMap,
restoreImageReferences,
} from './utils/image-restoration';

let globalRenderCount = 1;

Expand Down Expand Up @@ -258,6 +263,14 @@ function Visualizer(props: VisualizerProps): JSX.Element {

export function App() {
function getDumpElements(): PlaywrightTasks[] {
// Load image map once at the start (images extracted from dump JSON)
const imageMap = loadImageMap();
const imageCount = Object.keys(imageMap).length;
const hasImages = imageCount > 0;
if (hasImages) {
console.log(`Loaded ${imageCount} images from script tags`);
}

const dumpElements = document.querySelectorAll(
'script[type="midscene_web_dump"]',
);
Expand Down Expand Up @@ -292,7 +305,7 @@ export function App() {
});

// Lazy loading: Store raw content and parse only when get() is called
let cachedJsonContent: any = null;
let cachedJsonContent: GroupedActionDump | null = null;
let isParsed = false;

reportDump.push({
Expand All @@ -301,21 +314,39 @@ export function App() {
try {
console.time('parse_dump');
const content = antiEscapeScriptTag(el.textContent || '');
cachedJsonContent = JSON.parse(content);
cachedJsonContent = JSON.parse(content) as GroupedActionDump;

// Restore image references (handles both embedded script tags and directory-based reports)
// Must always run to convert { $screenshot: "..." } objects to strings
console.time('restore_images');
cachedJsonContent = restoreImageReferences(
cachedJsonContent,
imageMap,
);
console.timeEnd('restore_images');

console.timeEnd('parse_dump');
cachedJsonContent.attributes = attributes;
isParsed = true;
} catch (e) {
console.error(el);
console.error('failed to parse json content', e);
// Return a fallback object to prevent crashes
cachedJsonContent = {
attributes,
error: 'Failed to parse JSON content',
};
sdkVersion: 'unknown',
groupName: 'Error',
groupDescription: 'Failed to parse JSON content',
modelBriefs: [],
executions: [],
} as GroupedActionDump;
isParsed = true;
}
}
if (cachedJsonContent === null) {
console.error(
'Invariant violation: cachedJsonContent is null after parsing.',
);
throw new Error('Failed to load dump content from script tag.');
}
return cachedJsonContent;
},
attributes: attributes as PlaywrightTaskAttributes,
Expand Down
108 changes: 108 additions & 0 deletions apps/report/src/utils/image-restoration.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import { antiEscapeScriptTag } from '@midscene/shared/utils';

// Constants matching backend definitions in packages/core/src/utils.ts
const IMAGE_SCRIPT_TYPE = 'midscene-image';

/** Map of image ID to base64 data string, loaded from script tags */
type ImageIdToBase64Map = Record<string, string>;

/**
* Load all image script tags into a map.
* These are base64 images that were extracted from dump JSON during report generation.
*
* @returns Map of image IDs to their base64 data
*/
export function loadImageMap(): ImageIdToBase64Map {
const scripts = document.querySelectorAll(
`script[type="${IMAGE_SCRIPT_TYPE}"]`,
);
const map: ImageIdToBase64Map = {};

scripts.forEach((script) => {
const id = script.getAttribute('data-id');
if (id && script.textContent) {
map[id] = antiEscapeScriptTag(script.textContent.trim());
}
});

return map;
}

/**
* Recursively restore image references in parsed data.
* Handles { $screenshot: "id" } format.
*
* @param data - The parsed JSON data with image references
* @param imageMap - Map of image IDs to base64 data
* @returns Data with image references restored to base64
*/
export function restoreImageReferences<T>(
data: T,
imageMap: ImageIdToBase64Map,
): T {
if (typeof data === 'string') {
return data;
}

if (Array.isArray(data)) {
return data.map((item) => restoreImageReferences(item, imageMap)) as T;
}

if (typeof data === 'object' && data !== null) {
// Handle { $screenshot: ... } format (including empty/undefined values)
if ('$screenshot' in data) {
const screenshot = (data as { $screenshot: unknown }).$screenshot;

// Handle undefined or null
if (screenshot === undefined || screenshot === null) {
return '' as T;
}

// Handle non-string values
if (typeof screenshot !== 'string') {
console.warn('Invalid $screenshot value type:', typeof screenshot);
return '' as T;
}

// Handle empty string
if (screenshot.length === 0) {
return '' as T;
}

// Check if it's already base64 data
if (screenshot.startsWith('data:image/')) {
return screenshot as T;
}

// Check if it's a file path (for directory-based reports)
if (screenshot.startsWith('./') || screenshot.startsWith('/')) {
return screenshot as T;
}

// It's an ID, look up in imageMap
const base64 = imageMap[screenshot];
if (base64) {
return base64 as T;
}

// Fallback: return the value as-is (could be a placeholder)
if (Object.keys(imageMap).length > 0) {
const availableIds = Object.keys(imageMap).join(', ');
console.warn(
`Image not found for ID: ${screenshot}. Available IDs: ${availableIds}`,
);
}
return screenshot as T;
}

const result: Record<string, unknown> = {};
for (const [key, value] of Object.entries(data)) {
result[key] = restoreImageReferences(value, imageMap);
}
return result as T;
}

return data;
}

export { IMAGE_SCRIPT_TYPE };
4 changes: 4 additions & 0 deletions apps/site/docs/en/api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ All agents share these base options:
- `generateReport: boolean`: If true, a report file will be generated. (Default: true)
- `reportFileName: string`: The name of the report file. (Default: generated by midscene)
- `autoPrintReportMsg: boolean`: If true, report messages will be printed. (Default: true)
- `useDirectoryReport: boolean`: If true, uses directory-based report format with screenshots saved as separate PNG files instead of embedded base64. Useful for large reports. (Default: false). **Note**: When using this option, the report must be accessed via an HTTP server or CDN URL. It cannot be opened directly using the `file://` protocol because of browser CORS (Cross-Origin Resource Sharing) restrictions that prevent loading local images from relative paths when using the file protocol. For local testing, you can serve the report directory with a simple HTTP server. Navigate to the report directory and run one of these commands:
- Using Node.js: `npx serve`
- Using Python: `python -m http.server` or `python3 -m http.server`
Then open the report via `http://localhost:3000` (or the port shown in terminal).
- `cacheId: string | undefined`: If provided, this cacheId will be used to save or match the cache. (Default: undefined, means cache feature is disabled)
- `aiActContext: string`: Some background knowledge that should be sent to the AI model when calling `agent.aiAct()`, like 'close the cookie consent dialog first if it exists' (Default: undefined). Previously exposed as `aiActionContext`; the legacy name is still accepted for backward compatibility.
- `replanningCycleLimit: number`: The maximum number of `aiAct` replanning cycles. Default is 20 (40 for UI-TARS models). Prefer setting this via the agent option; reading `MIDSCENE_REPLANNING_CYCLE_LIMIT` is only for backward compatibility.
Expand Down
4 changes: 4 additions & 0 deletions apps/site/docs/zh/api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ Midscene 针对每个不同环境都有对应的 Agent。每个 Agent 的构造
- `generateReport: boolean`: 如果为 true,则生成报告文件。默认值为 true。
- `reportFileName: string`: 报告文件的名称,默认值由 midscene 内部生成。
- `autoPrintReportMsg: boolean`: 如果为 true,则打印报告消息。默认值为 true。
- `useDirectoryReport: boolean`: 如果为 true,则使用目录格式的报告,将截图保存为独立的 PNG 文件而非内嵌 base64。适用于报告文件过大的场景。默认值为 false。**注意**:使用此选项时,报告必须通过 HTTP 服务器或 CDN 地址访问,无法直接使用 `file://` 协议打开。这是因为浏览器的 CORS(跨源资源共享)限制会阻止从 file 协议加载相对路径的本地图片。如需在本地测试,可在报告目录下启动简易的 HTTP 服务器。进入报告目录后运行以下命令之一:
- 使用 Node.js:`npx serve`
- 使用 Python:`python -m http.server` 或 `python3 -m http.server`
然后通过 `http://localhost:3000`(或终端显示的端口)访问报告。
- `cacheId: string | undefined`: 如果配置,则使用此 cacheId 保存或匹配缓存。默认值为 undefined,也就是不启用缓存。
- `aiActContext: string`: 调用 `agent.aiAct()` 时,发送给 AI 模型的背景知识,比如 "有 cookie 对话框时先关闭它",默认值为空。此前名为 `aiActionContext`,旧名称仍然兼容。
- `replanningCycleLimit: number`: `aiAct` 的最大重规划次数。默认值为 20(UI-TARS 模型默认 40)。推荐通过 agent 入参设置;`MIDSCENE_REPLANNING_CYCLE_LIMIT` 环境变量仅作兼容读取。
Expand Down
5 changes: 2 additions & 3 deletions apps/site/theme/components/Banner.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { Link } from '@rspress/core/theme-original';
import { useI18n } from '../i18n';
import { CTAButtons } from './CTAButtons';

Expand All @@ -20,12 +19,12 @@ export function Banner() {
<span className="font-sans font-medium text-[10px] md:text-xs text-black/85 dark:text-white/85">
{t('newBadge')}
<span className="mx-1 text-black/40 dark:text-white/40">|</span>
<Link
<a
className="text-black/60 hover:underline dark:text-white/60"
href="./changelog"
>
{t('changelogLink')}
</Link>
</a>
</span>
</div>
</div>
Expand Down
9 changes: 4 additions & 5 deletions apps/site/theme/components/CTAButtons.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { Link } from '@rspress/core/theme-original';
import { useI18n, useI18nUrl } from '../i18n';

export function CTAButtons() {
Expand All @@ -7,22 +6,22 @@ export function CTAButtons() {

return (
<div className="flex flex-col md:flex-row items-stretch md:items-start gap-3 md:gap-x-5 w-full md:w-auto">
<Link
<a
href={tUrl('/introduction')}
className="w-full md:w-auto h-12 px-6 flex items-center justify-center rounded-full bg-[#0555FF] hover:bg-[#0444DD] transition-colors"
>
<span className="text-sm font-semibold font-sans text-[#FAFAFA] whitespace-nowrap">
{t('introduction')}
</span>
</Link>
<Link
</a>
<a
href={tUrl('/showcases')}
className="w-full md:w-auto h-12 px-6 flex items-center justify-center rounded-full transition-colors bg-[#E5E5E5] hover:bg-[#D4D4D4] dark:bg-[#292929] dark:hover:bg-[#333333]"
>
<span className="font-semibold font-sans text-sm whitespace-nowrap text-black dark:text-white">
{t('whatsNew')}
</span>
</Link>
</a>
</div>
);
}
Loading
Loading