diff --git a/apps/site/docs/en/api.mdx b/apps/site/docs/en/api.mdx
index 1ad22c0d36..a4c2aeb66a 100644
--- a/apps/site/docs/en/api.mdx
+++ b/apps/site/docs/en/api.mdx
@@ -113,7 +113,7 @@ In Midscene, you can choose to use either auto planning or instant action.
 
 :::
 
-### `agent.aiAct()` or `.ai()`
+### `agent.aiAct()` or `agent.ai()`
 
 This method allows you to perform a series of UI actions described in natural language. Midscene automatically plans the steps and executes them.
 
@@ -131,6 +131,7 @@ function aiAct(
   options?: {
     cacheable?: boolean;
     deepThink?: 'unset' | true | false;
+    fileChooserAccept?: string | string[];
   },
 ): Promise<void>;
 function ai(prompt: string): Promise<void>; // shorthand form
@@ -142,6 +143,9 @@ function ai(prompt: string): Promise<void>; // shorthand form
   - `options?: Object` - Optional, a configuration object containing:
     - `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
     - `deepThink?: 'unset' | true | false` - Whether to enable deep thinking during planning when the model supports it (depends on MIDSCENE_MODEL_FAMILY). Default is `'unset'` (same as omitting) and follows the model provider's default strategy. [Learn more about deepThink](./model-strategy#about-the-deepthink-option-in-aiact).
+    - `fileChooserAccept?: string | string[]` - When a file chooser pops up, specify the file path(s) to accept. Can be a single file path or an array of paths. Only available in web pages (Playwright, Puppeteer).
+      - **Note**: If the file input does not support multiple files (no `multiple` attribute) but multiple files are provided, an error will be thrown.
+      - **Note**: If a file chooser is triggered but no `fileChooserAccept` parameter is provided, the file chooser will be ignored and the page can continue to operate normally.
 
 - Return Value:
 
@@ -193,6 +197,9 @@ function aiTap(locate: string | Object, options?: Object): Promise<void>;
     - `deepThink?: boolean` - If true, Midscene will call AI model twice to precisely locate the element, which can improve accuracy. False by default. With newer models (e.g. Qwen3 / Doubao 1.6 / Gemini 3), the gain is less obvious.
     - `xpath?: string` - The xpath of the element to operate. If provided, Midscene will first use this xpath to locate the element before using the cache and the AI model. Empty by default.
     - `cacheable?: boolean` - Whether cacheable when enabling [caching feature](./caching.mdx). True by default.
+    - `fileChooserAccept?: string | string[]` - When a file chooser pops up, specify the file path(s) to accept. Can be a single file path or an array of paths. Only available in web pages (Playwright, Puppeteer).
+      - **Note**: If the file input does not support multiple files (no `multiple` attribute) but multiple files are provided, an error will be thrown.
+      - **Note**: If a file chooser is triggered but no `fileChooserAccept` parameter is provided, the file chooser will be ignored and the page can continue to operate normally.
 
 - Return Value:
 
@@ -207,6 +214,10 @@ await agent.aiTap('The login button at the top of the page');
 await agent.aiTap('The login button at the top of the page', {
   deepThink: true,
 });
+
+// File upload: tap the upload button and select files
+await agent.aiTap('Choose file button', { fileChooserAccept: ['./document.pdf'] });
+await agent.aiTap('Upload images', { fileChooserAccept: ['./image1.jpg', './image2.png'] });
 ```
 
 ### `agent.aiHover()`
diff --git a/apps/site/docs/zh/api.mdx b/apps/site/docs/zh/api.mdx
index 0d46567e54..20dc524bf8 100644
--- a/apps/site/docs/zh/api.mdx
+++ b/apps/site/docs/zh/api.mdx
@@ -115,7 +115,7 @@ const agent = new PuppeteerAgent(page, {
 
 :::
 
-### `agent.aiAct()` 或 `.ai()`
+### `agent.aiAct()` 或 `agent.ai()`
 
 这个方法允许你通过自然语言描述一系列 UI 操作步骤。Midscene 会自动规划这些步骤并执行。
 
@@ -133,6 +133,7 @@ function aiAct(
   options?: {
     cacheable?: boolean;
     deepThink?: 'unset' | true | false;
+    fileChooserAccept?: string | string[];
   },
 ): Promise<void>;
 function ai(prompt: string): Promise<void>; // 简写形式
@@ -144,6 +145,9 @@ function ai(prompt: string): Promise<void>; // 简写形式
   - `options?: Object` - 可选，一个配置对象，包含：
     - `cacheable?: boolean` - 当启用 [缓存功能](./caching.mdx) 时，是否允许缓存当前 API 调用结果。默认值为 true
     - `deepThink?: 'unset' | true | false` - 当模型支持时（取决于 MIDSCENE_MODEL_FAMILY），是否开启规划阶段的深度思考能力。默认值为 `'unset'`（等同于省略该参数），跟随模型服务商的默认策略。[详情参阅 deepThink 说明](./model-strategy#关于-aiact-方法的-deepthink-参数)。
+    - `fileChooserAccept?: string | string[]` - 当文件选择器弹出时，指定对应的文件路径。可以是单个文件路径或路径数组。仅在 web 页面（Playwright、Puppeteer）中可用。
+      - **注意**：如果文件输入框不支持多文件（没有 `multiple` 属性），但是传入了多个文件，会抛出错误。
+      - **注意**：如果点击触发了文件选择器但没有传入 `fileChooserAccept` 参数，文件选择器会被忽略，页面可以继续正常操作。
 
 - 返回值：
 
@@ -178,7 +182,7 @@ await agent.aiAct('发布一条微博，内容为 "Hello World"');
 
 ### `agent.aiTap()`
 
-点击某个元素。
+点击某个元素
 
 - 类型
 
@@ -192,6 +196,9 @@ function aiTap(locate: string | Object, options?: Object): Promise<void>;
     - `deepThink?: boolean` - 是否开启深度思考。如果为 true，Midscene 会调用 AI 模型两次以精确定位元素，从而提升准确性。默认值为 false。对于新一代模型（如 Qwen3 / Doubao 1.6 / Gemini 3），带来的收益不明显。
     - `xpath?: string` - 目标元素的 xpath 路径，用于执行当前操作。如果提供了这个 xpath，Midscene 会优先使用该 xpath 来找到元素，然后依次使用缓存和 AI 模型。默认值为空
     - `cacheable?: boolean` - 当启用 [缓存功能](./caching.mdx) 时，是否允许缓存当前 API 调用结果。默认值为 true
+    - `fileChooserAccept?: string | string[]` - 当文件选择器弹出时，指定对应的文件路径。可以是单个文件路径或路径数组。仅在 web 页面（Playwright、Puppeteer）中可用。
+      - **注意**：如果文件输入框不支持多文件（没有 `multiple` 属性），但是传入了多个文件，会抛出错误。
+      - **注意**：如果点击触发了文件选择器但没有传入 `fileChooserAccept` 参数，文件选择器会被忽略，页面可以继续正常操作。
 - 返回值：
 
   - `Promise<void>`
@@ -203,6 +210,10 @@ await agent.aiTap('页面顶部的登录按钮');
 
 // 使用 deepThink 功能精确定位元素
 await agent.aiTap('页面顶部的登录按钮', { deepThink: true });
+
+// 文件上传：点击上传按钮并选择文件
+await agent.aiTap('选择文件按钮', { fileChooserAccept: ['./document.pdf'] });
+await agent.aiTap('上传图片', { fileChooserAccept: ['./image1.jpg', './image2.png'] });
 ```
 
 ### `agent.aiHover()`
diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts
index c548f0de16..bbcaf1f865 100644
--- a/packages/core/src/agent/agent.ts
+++ b/packages/core/src/agent/agent.ts
@@ -7,6 +7,7 @@ import {
   type AgentWaitForOpt,
   type CacheConfig,
   type DeepThinkOption,
+  type DetailedLocateParam,
   type DeviceAction,
   type ExecutionDump,
   type ExecutionRecorderItem,
@@ -52,6 +53,8 @@ import {
   parseYamlScript,
 } from '../yaml/index';
 
+import { existsSync } from 'node:fs';
+import { resolve } from 'node:path';
 import type { AbstractInterface } from '@/device';
 import type { TaskRunner } from '@/task-runner';
 import {
@@ -65,9 +68,13 @@ import { imageInfoOfBase64, resizeImgBase64 } from '@midscene/shared/img';
 import { getDebug } from '@midscene/shared/logger';
 import { assert } from '@midscene/shared/utils';
 import { defineActionAssert } from '../device';
-// import type { AndroidDeviceInputOpt } from '../device';
 import { TaskCache } from './task-cache';
-import { TaskExecutionError, TaskExecutor, locatePlanForLocate } from './tasks';
+import {
+  TaskExecutionError,
+  TaskExecutor,
+  locatePlanForLocate,
+  withFileChooser,
+} from './tasks';
 import { locateParamStr, paramStr, taskTitleStr, typeStr } from './ui-utils';
 import {
   commonContextParser,
@@ -139,6 +146,7 @@ const defaultVlmUiTarsReplanningCycleLimit = 40;
 
 export type AiActOptions = {
   cacheable?: boolean;
+  fileChooserAccept?: string | string[];
   deepThink?: DeepThinkOption;
 };
 
@@ -590,13 +598,22 @@ export class Agent<
     return output;
   }
 
-  async aiTap(locatePrompt: TUserPrompt, opt?: LocateOption) {
+  async aiTap(
+    locatePrompt: TUserPrompt,
+    opt?: LocateOption & { fileChooserAccept?: string | string[] },
+  ) {
     assert(locatePrompt, 'missing locate prompt for tap');
 
     const detailedLocateParam = buildDetailedLocateParam(locatePrompt, opt);
 
-    return this.callActionInActionSpace('Tap', {
-      locate: detailedLocateParam,
+    const fileChooserAccept = opt?.fileChooserAccept
+      ? this.normalizeFileInput(opt.fileChooserAccept)
+      : undefined;
+
+    return withFileChooser(this.interface, fileChooserAccept, async () => {
+      return this.callActionInActionSpace('Tap', {
+        locate: detailedLocateParam,
+      });
     });
   }
 
@@ -848,81 +865,97 @@ export class Agent<
   }
 
   async aiAct(taskPrompt: string, opt?: AiActOptions) {
-    const modelConfigForPlanning =
-      this.modelConfigManager.getModelConfig('planning');
-    const defaultIntentModelConfig =
-      this.modelConfigManager.getModelConfig('default');
-
-    const includeBboxInPlanning =
-      modelConfigForPlanning.modelName === defaultIntentModelConfig.modelName &&
-      modelConfigForPlanning.openaiBaseURL ===
-        defaultIntentModelConfig.openaiBaseURL;
-    debug('setting includeBboxInPlanning to', includeBboxInPlanning);
+    const fileChooserAccept = opt?.fileChooserAccept
+      ? this.normalizeFileInput(opt.fileChooserAccept)
+      : undefined;
 
-    const cacheable = opt?.cacheable;
-    const deepThink = opt?.deepThink === 'unset' ? undefined : opt?.deepThink;
-    const replanningCycleLimit = this.resolveReplanningCycleLimit(
-      modelConfigForPlanning,
-    );
-    // if vlm-ui-tars, plan cache is not used
-    const isVlmUiTars = modelConfigForPlanning.vlMode === 'vlm-ui-tars';
-    const matchedCache =
-      isVlmUiTars || cacheable === false
-        ? undefined
-        : this.taskCache?.matchPlanCache(taskPrompt);
-    if (
-      matchedCache &&
-      this.taskCache?.isCacheResultUsed &&
-      matchedCache.cacheContent?.yamlWorkflow?.trim()
-    ) {
-      // log into report file
-      await this.taskExecutor.loadYamlFlowAsPlanning(
-        taskPrompt,
-        matchedCache.cacheContent.yamlWorkflow,
+    const runAiAct = async () => {
+      const modelConfigForPlanning =
+        this.modelConfigManager.getModelConfig('planning');
+      const defaultIntentModelConfig =
+        this.modelConfigManager.getModelConfig('default');
+
+      const includeBboxInPlanning =
+        modelConfigForPlanning.modelName ===
+          defaultIntentModelConfig.modelName &&
+        modelConfigForPlanning.openaiBaseURL ===
+          defaultIntentModelConfig.openaiBaseURL;
+      debug('setting includeBboxInPlanning to', includeBboxInPlanning);
+
+      const cacheable = opt?.cacheable;
+      const deepThink = opt?.deepThink === 'unset' ? undefined : opt?.deepThink;
+      const replanningCycleLimit = this.resolveReplanningCycleLimit(
+        modelConfigForPlanning,
       );
+      // if vlm-ui-tars, plan cache is not used
+      const isVlmUiTars = modelConfigForPlanning.vlMode === 'vlm-ui-tars';
+      const matchedCache =
+        isVlmUiTars || cacheable === false
+          ? undefined
+          : this.taskCache?.matchPlanCache(taskPrompt);
+      if (
+        matchedCache &&
+        this.taskCache?.isCacheResultUsed &&
+        matchedCache.cacheContent?.yamlWorkflow?.trim()
+      ) {
+        // log into report file
+        await this.taskExecutor.loadYamlFlowAsPlanning(
+          taskPrompt,
+          matchedCache.cacheContent.yamlWorkflow,
+        );
 
-      debug('matched cache, will call .runYaml to run the action');
-      const yaml = matchedCache.cacheContent.yamlWorkflow;
-      return this.runYaml(yaml);
-    }
+        debug('matched cache, will call .runYaml to run the action');
+        const yaml = matchedCache.cacheContent.yamlWorkflow;
+        return this.runYaml(yaml);
+      }
 
-    // If cache matched but yamlWorkflow is empty, fall through to normal execution
+      // If cache matched but yamlWorkflow is empty, fall through to normal execution
 
-    const imagesIncludeCount: number | undefined = 2;
-    const { output } = await this.taskExecutor.action(
-      taskPrompt,
-      modelConfigForPlanning,
-      defaultIntentModelConfig,
-      includeBboxInPlanning,
-      this.aiActContext,
-      cacheable,
-      replanningCycleLimit,
-      imagesIncludeCount,
-      deepThink,
-    );
+      const useDeepThink = (this.opts as any)?._deepThink;
+      if (useDeepThink) {
+        debug('using deep think planning settings');
+      }
+      const imagesIncludeCount: number | undefined = useDeepThink
+        ? undefined
+        : 2;
+      const { output } = await this.taskExecutor.action(
+        taskPrompt,
+        modelConfigForPlanning,
+        defaultIntentModelConfig,
+        includeBboxInPlanning,
+        this.aiActContext,
+        cacheable,
+        replanningCycleLimit,
+        imagesIncludeCount,
+        deepThink,
+        fileChooserAccept,
+      );
 
-    // update cache
-    if (this.taskCache && output?.yamlFlow && cacheable !== false) {
-      const yamlContent: MidsceneYamlScript = {
-        tasks: [
+      // update cache
+      if (this.taskCache && output?.yamlFlow && cacheable !== false) {
+        const yamlContent: MidsceneYamlScript = {
+          tasks: [
+            {
+              name: taskPrompt,
+              flow: output.yamlFlow,
+            },
+          ],
+        };
+        const yamlFlowStr = yaml.dump(yamlContent);
+        this.taskCache.updateOrAppendCacheRecord(
           {
-            name: taskPrompt,
-            flow: output.yamlFlow,
+            type: 'plan',
+            prompt: taskPrompt,
+            yamlWorkflow: yamlFlowStr,
           },
-        ],
-      };
-      const yamlFlowStr = yaml.dump(yamlContent);
-      this.taskCache.updateOrAppendCacheRecord(
-        {
-          type: 'plan',
-          prompt: taskPrompt,
-          yamlWorkflow: yamlFlowStr,
-        },
-        matchedCache,
-      );
-    }
+          matchedCache,
+        );
+      }
 
-    return output;
+      return output;
+    };
+
+    return await runAiAct();
   }
 
   /**
@@ -1472,6 +1505,21 @@ export class Agent<
     return null;
   }
 
+  private normalizeFilePaths(files: string[]): string[] {
+    return files.map((file) => {
+      const absolutePath = resolve(file);
+      if (!existsSync(absolutePath)) {
+        throw new Error(`File not found: ${file}`);
+      }
+      return absolutePath;
+    });
+  }
+
+  private normalizeFileInput(files: string | string[]): string[] {
+    const filesArray = Array.isArray(files) ? files : [files];
+    return this.normalizeFilePaths(filesArray);
+  }
+
   /**
    * Manually flush cache to file
    * @param options - Optional configuration
diff --git a/packages/core/src/agent/tasks.ts b/packages/core/src/agent/tasks.ts
index 332d2ce96e..61afb9bd07 100644
--- a/packages/core/src/agent/tasks.ts
+++ b/packages/core/src/agent/tasks.ts
@@ -1,6 +1,6 @@
 import { ConversationHistory, plan, uiTarsPlanning } from '@/ai-model';
 import type { TMultimodalPrompt, TUserPrompt } from '@/common';
-import type { AbstractInterface } from '@/device';
+import type { AbstractInterface, FileChooserHandler } from '@/device';
 import type Service from '@/service';
 import type { TaskRunner } from '@/task-runner';
 import { TaskExecutionError } from '@/task-runner';
@@ -210,6 +210,40 @@ export class TaskExecutor {
     replanningCycleLimitOverride?: number,
     imagesIncludeCount?: number,
     deepThink?: DeepThinkOption,
+    fileChooserAccept?: string[],
+  ): Promise<
+    ExecutionResult<
+      | {
+          yamlFlow?: MidsceneYamlFlowItem[]; // for cache use
+        }
+      | undefined
+    >
+  > {
+    return withFileChooser(this.interface, fileChooserAccept, async () => {
+      return this.runAction(
+        userPrompt,
+        modelConfigForPlanning,
+        modelConfigForDefaultIntent,
+        includeBboxInPlanning,
+        aiActContext,
+        cacheable,
+        replanningCycleLimitOverride,
+        imagesIncludeCount,
+        deepThink,
+      );
+    });
+  }
+
+  private async runAction(
+    userPrompt: string,
+    modelConfigForPlanning: IModelConfig,
+    modelConfigForDefaultIntent: IModelConfig,
+    includeBboxInPlanning: boolean,
+    aiActContext?: string,
+    cacheable?: boolean,
+    replanningCycleLimitOverride?: number,
+    imagesIncludeCount?: number,
+    deepThink?: DeepThinkOption,
   ): Promise<
     ExecutionResult<
       | {
@@ -408,13 +442,12 @@ export class TaskExecutor {
       }
     }
 
-    const finalResult = {
+    return {
       output: {
         yamlFlow,
       },
       runner,
     };
-    return finalResult;
   }
 
   private createTypeQueryTask(
@@ -663,3 +696,37 @@ export class TaskExecutor {
     return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);
   }
 }
+
+export async function withFileChooser<T>(
+  interfaceInstance: AbstractInterface,
+  fileChooserAccept: string[] | undefined,
+  action: () => Promise<T>,
+): Promise<T> {
+  if (!fileChooserAccept?.length) {
+    return action();
+  }
+
+  if (!interfaceInstance.registerFileChooserListener) {
+    throw new Error(
+      `File upload is not supported on ${interfaceInstance.interfaceType}`,
+    );
+  }
+
+  const handler = async (chooser: FileChooserHandler) => {
+    await chooser.accept(fileChooserAccept);
+  };
+
+  const { dispose, getError } =
+    await interfaceInstance.registerFileChooserListener(handler);
+  try {
+    const result = await action();
+    // Check for errors that occurred during file chooser handling
+    const error = getError();
+    if (error) {
+      throw error;
+    }
+    return result;
+  } finally {
+    dispose();
+  }
+}
diff --git a/packages/core/src/device/index.ts b/packages/core/src/device/index.ts
index 75be7f1d55..d7220e887e 100644
--- a/packages/core/src/device/index.ts
+++ b/packages/core/src/device/index.ts
@@ -11,6 +11,10 @@ import { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';
 import { z } from 'zod';
 import type { ElementCacheFeature, Rect, Size, UIContext } from '../types';
 
+export interface FileChooserHandler {
+  accept(files: string[]): Promise<void>;
+}
+
 export abstract class AbstractInterface {
   abstract interfaceType: string;
 
@@ -35,6 +39,11 @@ export abstract class AbstractInterface {
   abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;
   abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;
 
+  // for web only
+  registerFileChooserListener?(
+    handler: (chooser: FileChooserHandler) => Promise<void>,
+  ): Promise<{ dispose: () => void; getError: () => Error | undefined }>;
+
   // @deprecated do NOT extend this method
   abstract getElementsNodeTree?: () => Promise<ElementNode>;
 
@@ -76,7 +85,6 @@ export const defineAction = <
 export const actionTapParamSchema = z.object({
   locate: getMidsceneLocationSchema().describe('The element to be tapped'),
 });
-// Override the inferred type to use LocateResultElement for the runtime locate field
 export type ActionTapParam = {
   locate: LocateResultElement;
 };
diff --git a/packages/core/src/yaml.ts b/packages/core/src/yaml.ts
index 266c2eac88..68452286be 100644
--- a/packages/core/src/yaml.ts
+++ b/packages/core/src/yaml.ts
@@ -9,6 +9,7 @@ export interface LocateOption {
   cacheable?: boolean; // user can set this param to false to disable the cache for a single agent api
   xpath?: string; // only available in web
   uiContext?: UIContext;
+  fileChooserAccept?: string | string[]; // file path(s) to upload when tapping triggers a file chooser
 }
 
 export interface ServiceExtractOption {
diff --git a/packages/web-integration/src/playwright/page.ts b/packages/web-integration/src/playwright/page.ts
index 0b8a525f01..086305edde 100644
--- a/packages/web-integration/src/playwright/page.ts
+++ b/packages/web-integration/src/playwright/page.ts
@@ -1,9 +1,47 @@
-import type { Page as PlaywrightPageType } from 'playwright';
+import type { FileChooser, Page as PlaywrightPageType } from 'playwright';
 import { Page as BasePage } from '../puppeteer/base-page';
 import type { WebPageOpt } from '../web-element';
 
 export class WebPage extends BasePage<'playwright', PlaywrightPageType> {
+  private playwrightFileChooserHandler?: (
+    chooser: FileChooser,
+  ) => Promise<void>;
+
   constructor(page: PlaywrightPageType, opts?: WebPageOpt) {
     super(page, 'playwright', opts);
   }
+
+  async registerFileChooserListener(
+    handler: (
+      chooser: import('@midscene/core/device').FileChooserHandler,
+    ) => Promise<void>,
+  ): Promise<{ dispose: () => void; getError: () => Error | undefined }> {
+    const page = this.underlyingPage as PlaywrightPageType;
+
+    let capturedError: Error | undefined;
+
+    this.playwrightFileChooserHandler = async (chooser: FileChooser) => {
+      try {
+        await handler({
+          accept: async (files: string[]) => {
+            await chooser.setFiles(files);
+          },
+        });
+      } catch (error) {
+        capturedError = error as Error;
+      }
+    };
+
+    page.on('filechooser', this.playwrightFileChooserHandler);
+
+    return {
+      dispose: () => {
+        if (this.playwrightFileChooserHandler) {
+          page.off('filechooser', this.playwrightFileChooserHandler);
+          this.playwrightFileChooserHandler = undefined;
+        }
+      },
+      getError: () => capturedError,
+    };
+  }
 }
diff --git a/packages/web-integration/src/puppeteer/base-page.ts b/packages/web-integration/src/puppeteer/base-page.ts
index 187eb5ced2..84c4697303 100644
--- a/packages/web-integration/src/puppeteer/base-page.ts
+++ b/packages/web-integration/src/puppeteer/base-page.ts
@@ -30,7 +30,7 @@ import {
 } from '@midscene/shared/node';
 import { assert } from '@midscene/shared/utils';
 import type { Page as PlaywrightPage } from 'playwright';
-import type { Page as PuppeteerPage } from 'puppeteer';
+import type { CDPSession, Protocol, Page as PuppeteerPage } from 'puppeteer';
 import {
   type KeyInput,
   type MouseButton,
@@ -66,6 +66,10 @@ export class Page<
   private onAfterInvokeAction?: AbstractInterface['afterInvokeAction'];
   private customActions?: DeviceAction<any>[];
   private enableTouchEventsInActionSpace: boolean;
+  private puppeteerFileChooserSession?: CDPSession;
+  private puppeteerFileChooserHandler?: (
+    event: Protocol.Page.FileChooserOpenedEvent,
+  ) => Promise<void>;
   interfaceType: AgentType;
 
   actionSpace(): DeviceAction[] {
@@ -365,22 +369,18 @@ export class Page<
           await (this.underlyingPage as PlaywrightPage).mouse.dblclick(x, y, {
             button,
           });
-        } else {
-          if (this.interfaceType === 'puppeteer') {
-            if (button === 'left' && count === 1) {
-              await (this.underlyingPage as PuppeteerPage).mouse.click(x, y);
-            } else {
-              await (this.underlyingPage as PuppeteerPage).mouse.click(x, y, {
-                button,
-                count,
-              });
-            }
-          } else if (this.interfaceType === 'playwright') {
-            (this.underlyingPage as PlaywrightPage).mouse.click(x, y, {
-              button,
-              clickCount: count,
-            });
+        } else if (this.interfaceType === 'puppeteer') {
+          const page = this.underlyingPage as PuppeteerPage;
+          if (button === 'left' && count === 1) {
+            await page.mouse.click(x, y);
+          } else {
+            await page.mouse.click(x, y, { button, count });
           }
+        } else if (this.interfaceType === 'playwright') {
+          await (this.underlyingPage as PlaywrightPage).mouse.click(x, y, {
+            button,
+            clickCount: count,
+          });
         }
       },
       wheel: async (deltaX: number, deltaY: number) => {
@@ -686,6 +686,89 @@ export class Page<
       await page.mouse.up({ button: 'left' });
     }
   }
+
+  private async ensurePuppeteerFileChooserSession(
+    page: PuppeteerPage,
+  ): Promise<CDPSession> {
+    if (this.puppeteerFileChooserSession) {
+      return this.puppeteerFileChooserSession;
+    }
+    const session = await page.target().createCDPSession();
+    await session.send('Page.enable');
+    await session.send('DOM.enable');
+    await session.send('Page.setInterceptFileChooserDialog', { enabled: true });
+    this.puppeteerFileChooserSession = session;
+    return session;
+  }
+
+  async registerFileChooserListener(
+    handler: (
+      chooser: import('@midscene/core/device').FileChooserHandler,
+    ) => Promise<void>,
+  ): Promise<{ dispose: () => void; getError: () => Error | undefined }> {
+    if (this.interfaceType !== 'puppeteer') {
+      throw new Error(
+        'registerFileChooserListener is only supported in Puppeteer',
+      );
+    }
+
+    const page = this.underlyingPage as PuppeteerPage;
+    const session = await this.ensurePuppeteerFileChooserSession(page);
+    if (this.puppeteerFileChooserHandler) {
+      session.off('Page.fileChooserOpened', this.puppeteerFileChooserHandler);
+    }
+
+    let capturedError: Error | undefined;
+
+    this.puppeteerFileChooserHandler = async (event) => {
+      if (event.backendNodeId === undefined) {
+        debugPage('puppeteer file chooser opened without backendNodeId, skip');
+        return;
+      }
+      try {
+        await handler({
+          accept: async (files: string[]) => {
+            // Check if input supports multiple files
+            if (files.length > 1) {
+              const { node } = await session.send('DOM.describeNode', {
+                backendNodeId: event.backendNodeId,
+              });
+              // attributes is a flat array: ['attr1', 'value1', 'attr2', 'value2', ...]
+              const hasMultiple = node.attributes?.includes('multiple');
+              if (!hasMultiple) {
+                throw new Error(
+                  'Non-multiple file input can only accept single file',
+                );
+              }
+            }
+            await session.send('DOM.setFileInputFiles', {
+              files,
+              backendNodeId: event.backendNodeId,
+            });
+          },
+        });
+      } catch (error) {
+        capturedError = error as Error;
+      }
+    };
+    session.on('Page.fileChooserOpened', this.puppeteerFileChooserHandler);
+    return {
+      dispose: () => {
+        if (this.puppeteerFileChooserHandler) {
+          session.off(
+            'Page.fileChooserOpened',
+            this.puppeteerFileChooserHandler,
+          );
+        }
+        void session.detach();
+        this.puppeteerFileChooserHandler = undefined;
+        if (this.puppeteerFileChooserSession === session) {
+          this.puppeteerFileChooserSession = undefined;
+        }
+      },
+      getError: () => capturedError,
+    };
+  }
 }
 
 export function forceClosePopup(
diff --git a/packages/web-integration/src/web-page.ts b/packages/web-integration/src/web-page.ts
index a4c69b530b..6e3c50c1a8 100644
--- a/packages/web-integration/src/web-page.ts
+++ b/packages/web-integration/src/web-page.ts
@@ -429,6 +429,8 @@ export const commonWebActionsForWebPage = <T extends AbstractWebPage>(
   defineActionTap(async (param) => {
     const element = param.locate;
     assert(element, 'Element not found, cannot tap');
+
+    // Pure tap action - file handling is done at Page layer via setFileChooserHandler
     await page.mouse.click(element.center[0], element.center[1], {
       button: 'left',
     });
diff --git a/packages/web-integration/tests/ai/fixtures/file-upload.html b/packages/web-integration/tests/ai/fixtures/file-upload.html
new file mode 100644
index 0000000000..445b8125d7
--- /dev/null
+++ b/packages/web-integration/tests/ai/fixtures/file-upload.html
@@ -0,0 +1,57 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>File Upload Test</title>
+    <style>
+        body { font-family: Arial, sans-serif; padding: 20px; }
+        .upload-area { border: 2px dashed #ccc; padding: 20px; margin: 20px 0; text-align: center; }
+        .upload-btn { background: #007bff; color: white; padding: 10px 20px; border: none; cursor: pointer; }
+        .file-list { margin-top: 20px; }
+        .file-item { padding: 5px; background: #f0f0f0; margin: 5px 0; }
+    </style>
+</head>
+<body>
+    <h1>File Upload Test Page</h1>
+
+    <div class="upload-area">
+        <input type="file" id="file-input" multiple style="display: none;">
+        <button class="upload-btn" onclick="document.getElementById('file-input').click()">Choose Files</button>
+        <p>Supports multiple file upload</p>
+    </div>
+
+    <div class="upload-area">
+        <input type="file" id="single-file-input" style="display: none;">
+        <button class="upload-btn" onclick="document.getElementById('single-file-input').click()">Choose Single File</button>
+        <p>Supports single file upload only</p>
+    </div>
+
+    <div class="file-list" id="file-list">
+        <h3>Selected Files:</h3>
+        <div id="selected-files"></div>
+    </div>
+
+    <script>
+        document.getElementById('file-input').addEventListener('change', function(e) {
+            const files = Array.from(e.target.files);
+            displayFiles(files, 'multiple');
+        });
+
+        document.getElementById('single-file-input').addEventListener('change', function(e) {
+            const files = Array.from(e.target.files);
+            displayFiles(files, 'single');
+        });
+
+        function displayFiles(files, type) {
+            const container = document.getElementById('selected-files');
+            container.innerHTML = '';
+
+            files.forEach(file => {
+                const div = document.createElement('div');
+                div.className = 'file-item';
+                div.textContent = `${file.name} (${file.size} bytes) - ${type}`;
+                container.appendChild(div);
+            });
+        }
+    </script>
+</body>
+</html>
diff --git a/packages/web-integration/tests/ai/fixtures/relative-test.txt b/packages/web-integration/tests/ai/fixtures/relative-test.txt
new file mode 100644
index 0000000000..c10bdc72eb
--- /dev/null
+++ b/packages/web-integration/tests/ai/fixtures/relative-test.txt
@@ -0,0 +1 @@
+Relative path test
\ No newline at end of file
diff --git a/packages/web-integration/tests/ai/fixtures/test-file-1.txt b/packages/web-integration/tests/ai/fixtures/test-file-1.txt
new file mode 100644
index 0000000000..387f499b27
--- /dev/null
+++ b/packages/web-integration/tests/ai/fixtures/test-file-1.txt
@@ -0,0 +1 @@
+Test file 1 content
\ No newline at end of file
diff --git a/packages/web-integration/tests/ai/fixtures/test-file-2.txt b/packages/web-integration/tests/ai/fixtures/test-file-2.txt
new file mode 100644
index 0000000000..cadc748681
--- /dev/null
+++ b/packages/web-integration/tests/ai/fixtures/test-file-2.txt
@@ -0,0 +1 @@
+Test file 2 content
\ No newline at end of file
diff --git a/packages/web-integration/tests/ai/fixtures/test-file.txt b/packages/web-integration/tests/ai/fixtures/test-file.txt
new file mode 100644
index 0000000000..e4f2ad66fb
--- /dev/null
+++ b/packages/web-integration/tests/ai/fixtures/test-file.txt
@@ -0,0 +1 @@
+This is a test file for upload
\ No newline at end of file
diff --git a/packages/web-integration/tests/ai/web/playwright/file-upload.spec.ts b/packages/web-integration/tests/ai/web/playwright/file-upload.spec.ts
new file mode 100644
index 0000000000..72de174434
--- /dev/null
+++ b/packages/web-integration/tests/ai/web/playwright/file-upload.spec.ts
@@ -0,0 +1,118 @@
+import { join } from 'node:path';
+import { expect } from 'playwright/test';
+import { test } from './fixture';
+
+test.describe('file upload functionality', () => {
+  test('should upload single file', async ({ aiTap, aiAssert, page }) => {
+    const testFile = join(__dirname, '../../fixtures/test-file.txt');
+
+    await page.goto(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+
+    // Upload single file
+    await aiTap('Choose Single File', { fileChooserAccept: [testFile] });
+
+    // Verify file is selected
+    await aiAssert('page displays "test-file.txt"');
+    await aiAssert('page displays "single"');
+  });
+
+  test('should upload multiple files', async ({ aiTap, aiAssert, page }) => {
+    const testFile1 = join(__dirname, '../../fixtures/test-file-1.txt');
+    const testFile2 = join(__dirname, '../../fixtures/test-file-2.txt');
+
+    await page.goto(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+
+    // Upload multiple files
+    await aiTap('Choose Files', {
+      fileChooserAccept: [testFile1, testFile2],
+    });
+
+    // Verify files are selected
+    await aiAssert('page displays "test-file-1.txt"');
+    await aiAssert('page displays "test-file-2.txt"');
+    await aiAssert('page displays "multiple"');
+  });
+
+  test('should handle relative paths', async ({ aiTap, aiAssert, page }) => {
+    await page.goto(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+
+    // Upload file using relative path
+    await aiTap('Choose Single File', {
+      fileChooserAccept: ['./tests/ai/fixtures/relative-test.txt'],
+    });
+
+    // Verify file is selected
+    await aiAssert('page displays "relative-test.txt"');
+  });
+
+  test('should throw error for non-existent file', async ({ aiTap, page }) => {
+    await page.goto(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+
+    // Attempt to upload non-existent file
+    await expect(
+      aiTap('Choose Files', {
+        fileChooserAccept: ['./non-existent-file.txt'],
+      }),
+    ).rejects.toThrow(/File not found/);
+  });
+
+  test('should throw error when uploading multiple files to single-file input', async ({
+    aiTap,
+    aiAssert,
+    page,
+  }) => {
+    const testFile1 = join(__dirname, '../../fixtures/test-file-1.txt');
+    const testFile2 = join(__dirname, '../../fixtures/test-file-2.txt');
+
+    await page.goto(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+
+    // Attempt to upload multiple files to single-file input (no 'multiple' attribute)
+    // This should throw an error because the input only accepts single file
+    await expect(
+      aiTap('Choose Single File', {
+        fileChooserAccept: [testFile1, testFile2],
+      }),
+    ).rejects.toThrow(/Non-multiple file input/);
+
+    // Verify that no files were uploaded after the error
+    await aiAssert('page does not display "test-file-1.txt"');
+    await aiAssert('page does not display "test-file-2.txt"');
+
+    // Verify page is still interactive - can upload a single file successfully
+    const testFile = join(__dirname, '../../fixtures/test-file.txt');
+    await aiTap('Choose Single File', { fileChooserAccept: [testFile] });
+    await aiAssert('page displays "test-file.txt"');
+  });
+
+  test('should allow page interaction when file chooser is triggered but no files provided', async ({
+    aiTap,
+    aiAssert,
+    page,
+  }) => {
+    await page.goto(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+
+    // Click the upload button without providing fileChooserAccept
+    // The file chooser will be triggered but dismissed without selecting files
+    await aiTap('Choose Single File');
+
+    // Verify page is still interactive - can perform other actions
+    await aiAssert('page displays "File Upload Test Page"');
+
+    // Can still upload files after dismissing the chooser
+    const testFile = join(__dirname, '../../fixtures/test-file.txt');
+    await aiTap('Choose Single File', { fileChooserAccept: [testFile] });
+    await aiAssert('page displays "test-file.txt"');
+  });
+});
diff --git a/packages/web-integration/tests/ai/web/puppeteer/file-upload.test.ts b/packages/web-integration/tests/ai/web/puppeteer/file-upload.test.ts
new file mode 100644
index 0000000000..3fa2b473b3
--- /dev/null
+++ b/packages/web-integration/tests/ai/web/puppeteer/file-upload.test.ts
@@ -0,0 +1,204 @@
+import { join } from 'node:path';
+import { PuppeteerAgent } from '@/puppeteer';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+import { launchPage } from './utils';
+
+vi.setConfig({
+  testTimeout: 60 * 1000,
+});
+
+describe('file upload functionality', () => {
+  let resetFn: () => Promise<void>;
+  let agent: PuppeteerAgent;
+
+  afterEach(async () => {
+    if (agent) {
+      try {
+        await agent.destroy();
+      } catch (e) {
+        console.warn('agent destroy error', e);
+      }
+    }
+    if (resetFn) {
+      await resetFn();
+    }
+  });
+
+  it('should upload single file', async () => {
+    const testFile = join(__dirname, '../../fixtures/test-file.txt');
+
+    const { originPage, reset } = await launchPage(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+    resetFn = reset;
+
+    agent = new PuppeteerAgent(originPage);
+
+    // Upload single file
+    await agent.aiTap('Choose Single File', { fileChooserAccept: [testFile] });
+
+    // Verify file is selected
+    await agent.aiAssert('page displays "test-file.txt"');
+    await agent.aiAssert('page displays "single"');
+  });
+
+  it('should upload multiple files', async () => {
+    const testFile1 = join(__dirname, '../../fixtures/test-file-1.txt');
+    const testFile2 = join(__dirname, '../../fixtures/test-file-2.txt');
+
+    const { originPage, reset } = await launchPage(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+    resetFn = reset;
+
+    agent = new PuppeteerAgent(originPage);
+
+    // Upload multiple files
+    await agent.aiTap('Choose Files', {
+      fileChooserAccept: [testFile1, testFile2],
+    });
+
+    // Verify files are selected
+    await agent.aiAssert('page displays "test-file-1.txt"');
+    await agent.aiAssert('page displays "test-file-2.txt"');
+    await agent.aiAssert('page displays "multiple"');
+  });
+
+  it('should upload files via aiAct', async () => {
+    const testFile1 = join(__dirname, '../../fixtures/test-file-1.txt');
+    const testFile2 = join(__dirname, '../../fixtures/test-file-2.txt');
+
+    const { originPage, reset } = await launchPage(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+    resetFn = reset;
+
+    agent = new PuppeteerAgent(originPage);
+
+    await agent.aiAct(
+      'click "Choose Files" button above the text "Supports multiple file upload"',
+      {
+        fileChooserAccept: [testFile1, testFile2],
+      },
+    );
+
+    await agent.aiAssert('page displays "test-file-1.txt"');
+    await agent.aiAssert('page displays "test-file-2.txt"');
+    await agent.aiAssert('page displays "multiple"');
+  });
+
+  it('should not time out when no file chooser is triggered in aiAct', async () => {
+    const testFile = join(__dirname, '../../fixtures/test-file.txt');
+
+    const { originPage, reset } = await launchPage(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+    resetFn = reset;
+
+    agent = new PuppeteerAgent(originPage);
+
+    await agent.aiAct('click the page title', {
+      fileChooserAccept: [testFile],
+    });
+  });
+
+  it('should handle relative paths', async () => {
+    const { originPage, reset } = await launchPage(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+    resetFn = reset;
+
+    agent = new PuppeteerAgent(originPage);
+
+    // Upload file using relative path
+    await agent.aiTap('Choose Single File', {
+      fileChooserAccept: ['./tests/ai/fixtures/relative-test.txt'],
+    });
+
+    // Verify file is selected
+    await agent.aiAssert('page displays "relative-test.txt"');
+  });
+
+  it('should throw error for non-existent file', async () => {
+    const { originPage, reset } = await launchPage(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+    resetFn = reset;
+
+    agent = new PuppeteerAgent(originPage);
+
+    // Attempt to upload non-existent file
+    await expect(
+      agent.aiTap('Choose Files', {
+        fileChooserAccept: ['./non-existent-file.txt'],
+      }),
+    ).rejects.toThrow(/File not found/);
+  });
+
+  it('should not time out when no file chooser is triggered', async () => {
+    const testFile = join(__dirname, '../../fixtures/test-file.txt');
+
+    const { originPage, reset } = await launchPage(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+    resetFn = reset;
+
+    agent = new PuppeteerAgent(originPage);
+
+    await agent.aiTap('the title "File Upload Test Page"', {
+      fileChooserAccept: [testFile],
+    });
+  });
+
+  it('should throw error when uploading multiple files to single-file input', async () => {
+    const testFile1 = join(__dirname, '../../fixtures/test-file-1.txt');
+    const testFile2 = join(__dirname, '../../fixtures/test-file-2.txt');
+
+    const { originPage, reset } = await launchPage(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+    resetFn = reset;
+
+    agent = new PuppeteerAgent(originPage);
+
+    // Attempt to upload multiple files to single-file input (no 'multiple' attribute)
+    // This should throw an error because the input only accepts single file
+    await expect(
+      agent.aiTap('Choose Single File', {
+        fileChooserAccept: [testFile1, testFile2],
+      }),
+    ).rejects.toThrow(/Non-multiple file input can only accept single file/);
+
+    // Verify that no files were uploaded after the error
+    await agent.aiAssert('page does not display "test-file-1.txt"');
+    await agent.aiAssert('page does not display "test-file-2.txt"');
+
+    // Verify page is still interactive - can upload a single file successfully
+    const testFile = join(__dirname, '../../fixtures/test-file.txt');
+    await agent.aiTap('Choose Single File', { fileChooserAccept: [testFile] });
+    await agent.aiAssert('page displays "test-file.txt"');
+  });
+
+  it('should allow page interaction when file chooser is triggered but no files provided', async () => {
+    const { originPage, reset } = await launchPage(
+      `file://${join(__dirname, '../../fixtures/file-upload.html')}`,
+    );
+    resetFn = reset;
+
+    agent = new PuppeteerAgent(originPage);
+
+    // Click the upload button without providing fileChooserAccept
+    // The file chooser will be triggered but dismissed without selecting files
+    await agent.aiTap('Choose Single File');
+
+    // Verify page is still interactive - can perform other actions
+    await agent.aiAssert('page displays "File Upload Test Page"');
+
+    // Can still upload files after dismissing the chooser
+    const testFile = join(__dirname, '../../fixtures/test-file.txt');
+    await agent.aiTap('Choose Single File', {
+      fileChooserAccept: [testFile],
+    });
+    await agent.aiAssert('page displays "test-file.txt"');
+  });
+});