Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,49 @@ const plugin = {
return await controller.handleInboundClaim(event);
});

const registerInternalHook = (
api as OpenClawPluginApi & {
registerHook?: (
events: string | string[],
handler: (event: unknown) => Promise<void> | void,
opts?: { name?: string; description?: string },
) => void;
}
).registerHook;
if (typeof registerInternalHook === "function") {
registerInternalHook(
"message:transcribed",
async (event) => {
await controller.handleMessageTranscribed(event as {
type?: string;
action?: string;
sessionKey?: string;
context?: Record<string, unknown>;
});
},
{
name: "codex-transcribed-handoff",
description: "Send transcribed inbound audio to the bound Codex thread as text.",
},
);

registerInternalHook(
"message:preprocessed",
async (event) => {
await controller.handleMessagePreprocessed(event as {
type?: string;
action?: string;
sessionKey?: string;
context?: Record<string, unknown>;
});
},
{
name: "codex-preprocessed-audio-fallback",
description: "Fallback: transcribe bound inbound audio from mediaPath when transcript events do not arrive.",
},
);
}

api.registerInteractiveHandler({
channel: "telegram",
namespace: INTERACTIVE_NAMESPACE,
Expand Down
91 changes: 87 additions & 4 deletions src/controller.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3582,7 +3582,7 @@ describe("Discord controller flows", () => {
{ type: "text", text: "Read this file" },
{
type: "text",
text: "Attached file: note.txt\nContent-Type: text/plain\n\nhello",
text: `Attached file: note.txt\nLocal path: ${filePath}\nContent-Type: text/plain\n\nhello`,
},
],
}),
Expand Down Expand Up @@ -3635,14 +3635,90 @@ describe("Discord controller flows", () => {
input: [
{
type: "text",
text: "Attached file: README.md\n\n# Heading\n\nBody text.\n",
text: `Attached file: README.md\nLocal path: ${filePath}\n\n# Heading\n\nBody text.\n`,
},
],
}),
);
});

it("still ignores unsupported binary document attachments", async () => {
it("starts a turn when a message:transcribed event arrives for a bound session", async () => {
const { controller } = await createControllerHarness();
await (controller as any).store.upsertBinding({
conversation: {
channel: "telegram",
accountId: "default",
conversationId: TEST_TELEGRAM_PEER_ID,
},
sessionKey: "session-1",
threadId: "thread-1",
workspaceDir: "/repo/openclaw",
updatedAt: Date.now(),
});
const startTurn = vi.fn(async () => undefined);
(controller as any).startTurn = startTurn;

await controller.handleMessageTranscribed({
type: "message",
action: "transcribed",
sessionKey: "session-1",
context: {
transcript: "Hello from voice",
mediaType: "audio/ogg",
},
});

expect(startTurn).toHaveBeenCalledWith(
expect.objectContaining({
conversation: expect.objectContaining({ conversationId: TEST_TELEGRAM_PEER_ID }),
prompt: "Hello from voice",
reason: "inbound",
}),
);
});

it("falls back to runtime transcription on message:preprocessed when transcript is missing", async () => {
const { controller } = await createControllerHarness();
await (controller as any).store.upsertBinding({
conversation: {
channel: "telegram",
accountId: "default",
conversationId: TEST_TELEGRAM_PEER_ID,
},
sessionKey: "session-1",
threadId: "thread-1",
workspaceDir: "/repo/openclaw",
updatedAt: Date.now(),
});
const startTurn = vi.fn(async () => undefined);
(controller as any).startTurn = startTurn;
(controller as any).api.runtime = {
mediaUnderstanding: {
transcribeAudioFile: vi.fn(async () => ({ text: "Fallback transcript" })),
},
};
(controller as any).api.config = {};

await controller.handleMessagePreprocessed({
type: "message",
action: "preprocessed",
sessionKey: "session-1",
context: {
mediaPath: "/tmp/test.ogg",
mediaType: "audio/ogg",
},
});

expect(startTurn).toHaveBeenCalledWith(
expect.objectContaining({
conversation: expect.objectContaining({ conversationId: TEST_TELEGRAM_PEER_ID }),
prompt: "Fallback transcript",
reason: "inbound",
}),
);
});

it("passes unsupported binary documents through as local file references", async () => {
const { controller, stateDir } = await createControllerHarness();
const filePath = path.join(stateDir, "tmp", "manual.pdf");
fs.mkdirSync(path.dirname(filePath), { recursive: true });
Expand Down Expand Up @@ -3685,7 +3761,14 @@ describe("Discord controller flows", () => {
expect(startTurn).toHaveBeenCalledWith(
expect.objectContaining({
prompt: "Read this document",
input: [{ type: "text", text: "Read this document" }],
input: [
{ type: "text", text: "Read this document" },
{
type: "text",
text:
`Attached file: manual.pdf\nLocal path: ${filePath}\nContent-Type: application/pdf\n\nUse this local file path directly from the server workspace. Do not ask the user to re-upload it unless the path is unreadable.`,
},
],
}),
);
});
Expand Down
147 changes: 145 additions & 2 deletions src/controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ async function toCodexTextAttachmentInputItem(
const displayName =
media.fileName?.trim() || path.basename(normalizedPath) || "attached-file.txt";
const mimeType = normalizeMimeType(media.mimeType);
const lines = [`Attached file: ${displayName}`];
const lines = [`Attached file: ${displayName}`, `Local path: ${normalizedPath}`];
if (mimeType) {
lines.push(`Content-Type: ${mimeType}`);
}
Expand All @@ -618,6 +618,32 @@ async function toCodexTextAttachmentInputItem(
return { type: "text", text: lines.join("\n") };
}

function toCodexGenericFileReferenceInputItem(
media: PluginInboundMedia,
): CodexTurnInputItem | null {
if (media.kind === "image") {
return null;
}
const normalizedPath = normalizeInboundMediaPath(media.path ?? media.url);
if (!normalizedPath || !path.isAbsolute(normalizedPath)) {
return null;
}
const displayName = media.fileName?.trim() || path.basename(normalizedPath) || "attached-file";
const mimeType = normalizeMimeType(media.mimeType);
const lines = [
`Attached file: ${displayName}`,
`Local path: ${normalizedPath}`,
];
if (mimeType) {
lines.push(`Content-Type: ${mimeType}`);
}
lines.push(
"",
"Use this local file path directly from the server workspace. Do not ask the user to re-upload it unless the path is unreadable.",
);
return { type: "text", text: lines.join("\n") };
}

async function buildInboundTurnInput(event: {
content: string;
media?: PluginInboundMedia[];
Expand All @@ -629,7 +655,10 @@ async function buildInboundTurnInput(event: {
}
const seen = new Set<string>();
for (const media of [...(event.media ?? []), ...extractInboundMetadataMedia(event.metadata)]) {
const item = toCodexImageInputItem(media) ?? (await toCodexTextAttachmentInputItem(media));
const item =
toCodexImageInputItem(media) ??
(await toCodexTextAttachmentInputItem(media)) ??
toCodexGenericFileReferenceInputItem(media);
if (!item) {
continue;
}
Expand Down Expand Up @@ -1409,6 +1438,120 @@ export class CodexPluginController {
}
}

async handleMessageTranscribed(event: {
type?: string;
action?: string;
sessionKey?: string;
context?: Record<string, unknown>;
}): Promise<void> {
try {
if (!this.settings.enabled) {
return;
}
if (event.type !== "message" || event.action !== "transcribed") {
return;
}
await this.start();
const sessionKey = typeof event.sessionKey === "string" ? event.sessionKey : "";
const ctx = event.context ?? {};
const transcript = typeof ctx.transcript === "string" ? ctx.transcript.trim() : "";
if (!sessionKey || !transcript) {
return;
}
const binding = this.store.listBindings().find((entry) => entry.sessionKey === sessionKey) ?? null;
if (!binding) {
return;
}
const mediaType = typeof ctx.mediaType === "string" ? ctx.mediaType : "";
const conversation = binding.conversation;
this.api.logger.info(
`codex message:transcribed starting turn ${this.formatConversationForLog(conversation)} mediaType=${mediaType || "<unknown>"} prompt="${summarizeTextForLog(transcript)}"`,
);
await this.startTurn({
conversation,
binding,
workspaceDir: binding.workspaceDir,
prompt: transcript,
reason: "inbound",
});
} catch (error) {
const detail = error instanceof Error ? `${error.message}\n${error.stack ?? ""}`.trim() : String(error);
this.api.logger.error(`codex message:transcribed failed: ${detail}`);
}
}

async handleMessagePreprocessed(event: {
type?: string;
action?: string;
sessionKey?: string;
context?: Record<string, unknown>;
}): Promise<void> {
try {
if (!this.settings.enabled) {
return;
}
if (event.type !== "message" || event.action !== "preprocessed") {
return;
}
await this.start();
const sessionKey = typeof event.sessionKey === "string" ? event.sessionKey : "";
const ctx = event.context ?? {};
const mediaPath = typeof ctx.mediaPath === "string" ? ctx.mediaPath : "";
const mediaType = typeof ctx.mediaType === "string" ? ctx.mediaType : "";
const existingTranscript = typeof ctx.transcript === "string" ? ctx.transcript.trim() : "";
if (
!sessionKey ||
!mediaPath ||
(!mediaType.startsWith("audio/") && !mediaPath.match(/\.(ogg|oga|opus|mp3|wav|m4a)$/i))
) {
return;
}
const binding = this.store.listBindings().find((entry) => entry.sessionKey === sessionKey) ?? null;
if (!binding) {
return;
}
const mediaUnderstanding = (this.api as OpenClawPluginApi & {
runtime?: {
mediaUnderstanding?: {
transcribeAudioFile?: (params: {
filePath: string;
cfg?: unknown;
mime?: string;
}) => Promise<{ text?: string } | null | undefined>;
};
};
config?: unknown;
}).runtime?.mediaUnderstanding;
const transcript =
existingTranscript ||
(await mediaUnderstanding?.transcribeAudioFile?.({
filePath: mediaPath,
cfg: (this.api as { config?: unknown }).config,
mime: mediaType || undefined,
}))?.text?.trim() ||
"";
if (!transcript) {
this.api.logger.warn(
`codex message:preprocessed audio fallback produced no transcript ${this.formatConversationForLog(binding.conversation)} mediaPath=${mediaPath}`,
);
return;
}
this.api.logger.info(
`codex message:preprocessed audio fallback starting turn ${this.formatConversationForLog(binding.conversation)} prompt="${summarizeTextForLog(transcript)}"`,
);
await this.startTurn({
conversation: binding.conversation,
binding,
workspaceDir: binding.workspaceDir,
prompt: transcript,
reason: "inbound",
});
} catch (error) {
const detail = error instanceof Error ? `${error.message}\n${error.stack ?? ""}`.trim() : String(error);
this.api.logger.error(`codex message:preprocessed fallback failed: ${detail}`);
}
}

async handleTelegramInteractive(ctx: PluginInteractiveTelegramHandlerContext): Promise<void> {
await this.start();
const bindingApi = asScopedBindingApi(ctx);
Expand Down