feat(gateway): support images and audio for LINE/Telegram#726
feat(gateway): support images and audio for LINE/Telegram#726iamninihuang wants to merge 8 commits intoopenabdev:mainfrom
Conversation
There was a problem hiding this comment.
Pull request overview
This PR aims to add inbound media support to the custom gateway so LINE and Telegram messages can carry image/audio data into OpenAB through temporary proxy URLs. It extends the gateway-side event schema and adapters, while also updating platform docs to describe the new media behavior.
Changes:
- Add attachment fields to gateway events plus an in-memory
/media/:uuidproxy store in the gateway. - Update Telegram and LINE webhook handlers to fetch inbound image/audio media and attach proxy URLs to emitted gateway events.
- Update LINE/Telegram/config docs, and add a separate feature-request document related to LINE reply/push behavior.
Reviewed changes
Copilot reviewed 10 out of 10 changed files in this pull request and generated 9 comments.
Show a summary per file
| File | Description |
|---|---|
gateway/src/schema.rs |
Extends gateway event schema with attachments. |
gateway/src/main.rs |
Adds media store state, media route, public URL config, and cleanup task. |
gateway/src/adapters/telegram.rs |
Downloads Telegram photo/audio media and adds attachment URLs to events. |
gateway/src/adapters/line.rs |
Downloads LINE image/audio media and adds attachment URLs to events. |
gateway/src/adapters/googlechat.rs |
Small dead-code annotation cleanup. |
gateway/src/adapters/feishu.rs |
Small lint/compatibility cleanup in Feishu adapter. |
docs/telegram.md |
Documents Telegram media support. |
docs/line.md |
Documents LINE media support. |
docs/feature-requests/line-reply-api.md |
Adds a separate feature-request document for LINE reply/push strategy. |
docs/config-reference.md |
Adds gateway media-related environment variable documentation. |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| #[serde(default, skip_serializing_if = "Vec::is_empty")] | ||
| pub attachments: Vec<Attachment>, |
|
|
||
| let listen_addr = std::env::var("GATEWAY_LISTEN").unwrap_or_else(|_| "0.0.0.0:8080".into()); | ||
| let ws_token = std::env::var("GATEWAY_WS_TOKEN").ok(); | ||
| let public_url = std::env::var("GATEWAY_PUBLIC_URL").unwrap_or_else(|_| "http://localhost:8080".into()); |
| /// uuid -> (binary data, mime type, created_at) | ||
| pub type MediaStore = Arc<std::sync::Mutex<HashMap<String, (Vec<u8>, String, Instant)>>>; | ||
|
|
| let url = format!("https://api-data.line.me/v2/bot/message/{}/content", msg.id); | ||
| let client = reqwest::Client::new(); | ||
| let resp = client | ||
| .get(url) | ||
| .bearer_auth(access_token) | ||
| .send() | ||
| .await; | ||
|
|
||
| if let Ok(r) = resp { | ||
| let mime = r | ||
| .headers() | ||
| .get("content-type") | ||
| .and_then(|v| v.to_str().ok()) | ||
| .unwrap_or(if msg.message_type == "image" { | ||
| "image/jpeg" | ||
| } else { | ||
| "audio/x-m4a" | ||
| }) | ||
| .to_string(); | ||
|
|
||
| if let Ok(data) = r.bytes().await { | ||
| let uuid = uuid::Uuid::new_v4().to_string(); | ||
| { | ||
| let mut store = | ||
| state.media_store.lock().unwrap_or_else(|e| e.into_inner()); | ||
| store.insert( | ||
| uuid.clone(), | ||
| (data.to_vec(), mime, std::time::Instant::now()), | ||
| ); | ||
| } | ||
| attachments.push(Attachment { | ||
| attachment_type: msg.message_type.clone(), | ||
| url: format!("{}/media/{}", state.public_url, uuid), | ||
| }); | ||
| if text.is_empty() { | ||
| text = format!("[{}]", msg.message_type); | ||
| } | ||
| info!(id = %msg.id, uuid = %uuid, "proxied LINE inbound media"); | ||
| } | ||
| } |
| let entry = { | ||
| let cache = state.media_store.lock().unwrap_or_else(|e| e.into_inner()); | ||
| cache.get(&uuid).cloned() | ||
| }; | ||
|
|
||
| if let Some((data, mime, _)) = entry { | ||
| axum::response::Response::builder() | ||
| .header("content-type", mime) | ||
| .body(axum::body::Body::from(data)) | ||
| .unwrap() | ||
| } else { | ||
| axum::http::StatusCode::NOT_FOUND.into_response() |
| let mime = r | ||
| .headers() | ||
| .get("content-type") | ||
| .and_then(|v| v.to_str().ok()) | ||
| .unwrap_or(if msg.message_type == "image" { | ||
| "image/jpeg" | ||
| } else { | ||
| "audio/x-m4a" | ||
| }) | ||
| .to_string(); | ||
|
|
||
| if let Ok(data) = r.bytes().await { | ||
| let uuid = uuid::Uuid::new_v4().to_string(); | ||
| { | ||
| let mut store = | ||
| state.media_store.lock().unwrap_or_else(|e| e.into_inner()); | ||
| store.insert( | ||
| uuid.clone(), | ||
| (data.to_vec(), mime, std::time::Instant::now()), | ||
| ); | ||
| } | ||
| attachments.push(Attachment { | ||
| attachment_type: msg.message_type.clone(), | ||
| url: format!("{}/media/{}", state.public_url, uuid), | ||
| }); | ||
| if text.is_empty() { | ||
| text = format!("[{}]", msg.message_type); | ||
| } | ||
| info!(id = %msg.id, uuid = %uuid, "proxied LINE inbound media"); | ||
| } |
| if let Ok(body) = resp.json::<serde_json::Value>().await { | ||
| if let Some(file_path) = body["result"]["file_path"].as_str() { | ||
| // 2. Download the file | ||
| let download_url = format!("{TELEGRAM_API_BASE}/file/bot{token}/{file_path}"); | ||
| if let Ok(r) = client.get(download_url).send().await { | ||
| let mime = r | ||
| .headers() | ||
| .get("content-type") | ||
| .and_then(|v| v.to_str().ok()) | ||
| .unwrap_or(if m_type == "image" { | ||
| "image/jpeg" | ||
| } else { | ||
| "audio/ogg" | ||
| }) | ||
| .to_string(); | ||
|
|
||
| if let Ok(data) = r.bytes().await { | ||
| let uuid = uuid::Uuid::new_v4().to_string(); | ||
| { | ||
| let mut store = | ||
| state.media_store.lock().unwrap_or_else(|e| e.into_inner()); | ||
| store.insert( | ||
| uuid.clone(), | ||
| (data.to_vec(), mime, std::time::Instant::now()), | ||
| ); | ||
| } | ||
| attachments.push(Attachment { | ||
| attachment_type: m_type.into(), | ||
| url: format!("{}/media/{}", state.public_url, uuid), | ||
| }); | ||
| if text.is_empty() { | ||
| text = format!("[{}]", m_type); | ||
| } | ||
| info!(id = %file_id, uuid = %uuid, "proxied Telegram inbound media"); | ||
| } | ||
| } | ||
| } |
| | `GATEWAY_MEDIA_BASE_URL` | Public base URL for media proxy (e.g. `https://gateway.example.com`). | `http://127.0.0.1:9090` | | ||
| | `GATEWAY_MEDIA_STORE_TTL` | Seconds to keep media in memory before expiration. | `300` | | ||
|
|
| # Feature Request: Hybrid LINE Reply/Push API Strategy | ||
|
|
||
| **Title**: `feat(gateway): implement hybrid LINE Reply/Push API strategy` | ||
|
|
||
| **Labels**: `feature` | ||
|
|
||
| **GitHub Issue**: [#607](https://github.com/openabdev/openab/issues/607) |
OpenAB PR ScreeningThis is auto-generated by the OpenAB project-screening flow for context collection and reviewer handoff.
Screening report## IntentPR #726 aims to let the custom gateway handle non-text inbound messages from LINE and Telegram. The current operator-visible gap is that those adapters only pass text into OpenAB, so images, audio clips, and voice messages sent by users cannot be analyzed by agents or routed through speech-to-text. The PR proposes solving this by downloading protected platform media through the gateway, exposing it through temporary local HTTP URLs, and adding those URLs to gateway events as attachments. FeatThis is a feature with a schema extension. Behavioral change:
Who It ServesPrimary beneficiaries:
Secondary beneficiaries:
Rewritten PromptImplement inbound media support for LINE and Telegram in the gateway. Add an optional Include configuration for media base URL and TTL with safe defaults, document the new settings, and add tests or focused validation for:
Avoid unrelated adapter refactors unless required by the shared attachment schema. Merge PitchThis is worth advancing because it closes a real parity gap: Discord already has an image-understanding path, while LINE and Telegram currently lose media context. Normalizing protected platform media into temporary gateway URLs is a practical bridge between chat platforms and agent media analysis. Risk profile is moderate. The main reviewer concerns should be around serving local media safely, memory-only cache behavior under load, URL exposure, TTL cleanup, and whether the schema shape matches downstream agent expectations. The feature is mergeable if the proxy is clearly scoped, configurable, and tested for expiry and failure paths. Best-Practice ComparisonOpenClaw principles that apply:
Hermes Agent principles that apply:
The relevant best-practice takeaway is that the gateway should own media normalization explicitly, keep temporary state bounded, log failures, and avoid leaking platform-authenticated URLs directly to agents. Implementation OptionsOption 1: Conservative adapter-local support Implement LINE and Telegram media downloads directly inside each adapter, add optional Pros: fastest path, minimal architectural churn, likely matches the PR shape. Option 2: Balanced shared media proxy Introduce a small shared Pros: keeps adapter code focused, easier to test expiry and serving behavior, sets a reusable pattern for future platforms. Option 3: Ambitious durable media pipeline Create a gateway media subsystem with pluggable storage, durable metadata, background cleanup, configurable size limits, richer content-type handling, and future support for async processing or retries. Pros: strongest long-term reliability and scale story. Option 4: Downstream-only media handling Pass platform media identifiers to OpenAB Core or agents and let them fetch media using platform credentials. Pros: avoids running a media-serving endpoint in the gateway. Comparison Table
RecommendationAdvance the PR toward Option 2: a shared gateway-owned media proxy with a bounded in-memory store, explicit attachment schema, TTL expiry, and clear docs. That is the right merge discussion target because it solves the user-facing media gap without turning this PR into a broader storage or job-processing redesign. The next reviewer should focus on whether the current implementation already has a clean shared If the PR currently mixes this with unrelated docs or adapter cleanup, split only the unrelated material. Keep the schema, media proxy, LINE support, Telegram support, and config docs together because they form one coherent feature. |
544163c to
99e0d63
Compare
|
@copilot Please re-review. All issues resolved. |
This comment has been minimized.
This comment has been minimized.
…bdev#726) - Update GATEWAY_MEDIA_BASE_URL default in docs - Add GATEWAY_MEDIA_STORE_MAX_ENTRIES limit to prevent OOM - Update Axum route syntax to 0.8 - Add rationale comments for reply_to auto-fill - Fix indentation in Telegram adapter - Fix clippy warnings in cron.rs
This comment has been minimized.
This comment has been minimized.
chaodu-agent
left a comment
There was a problem hiding this comment.
Approve — contributor addressed all blocking issues from first review. Media proxy architecture is solid.
wangyuyan-agent
left a comment
There was a problem hiding this comment.
Review Summary
Solid media proxy architecture that follows #690 recommendations. Testing is comprehensive and the implementation is clean.
However, I found 3 high-severity issues that must be fixed before merge:
🔴 High Severity — Must Fix
1. TTL not enforced in media_handler
Issue: Expired media is still served until the next cleanup cycle (up to 60s after expiration).
Location: gateway/src/main.rs — media_handler
Current code:
if let Some((data, mime, _)) = entry { // ← timestamp discarded
axum::response::Response::builder()
.header("content-type", mime)
.body(axum::body::Body::from(data))
.unwrap()
}Fix:
if let Some((data, mime, created_at)) = entry {
if created_at.elapsed().as_secs() >= state.media_ttl {
return axum::http::StatusCode::NOT_FOUND.into_response();
}
axum::response::Response::builder()
.header("content-type", mime)
.body(axum::body::Body::from(data))
.unwrap()
}2. Memory unbounded by size
Issue: GATEWAY_MEDIA_STORE_MAX_ENTRIES=1000 limits entry count but not total bytes. 1000 × 20MB images = 20GB RAM.
Location: gateway/src/adapters/line.rs and telegram.rs — media download
Additional issue: r.bytes().await loads the full file into memory before checking if the store is full, so rejected entries still consume memory temporarily.
Recommended fix: Add a per-file size limit and check it before downloading:
// In main.rs, add to AppState:
pub media_max_file_size: u64, // from GATEWAY_MEDIA_MAX_FILE_SIZE, default 10MB
// In line.rs and telegram.rs, before r.bytes().await:
let content_length = r.headers()
.get("content-length")
.and_then(|v| v.to_str().ok())
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(0);
if content_length > state.media_max_file_size {
warn!(size = content_length, "media too large, skipping");
continue; // or break, depending on context
}
// Then proceed with r.bytes().awaitThis prevents OOM from a single large file and gives users a clear error message.
3. reply_to auto-fill can cross-contaminate channels
Issue: last_event_id is keyed by channel.id, but if a reply has an empty or wrong channel.id, it will pick up the last event from a different channel. This causes LINE reply tokens to be used for the wrong message, leading to silent failures.
Location: gateway/src/main.rs — handle_oab_connection
Current code:
if reply.reply_to.is_empty() {
let last = last_event_id_for_recv.lock().await;
if let Some(eid) = last.get(&reply.channel.id) { // ← no validation
reply.reply_to = eid.clone();
}
}Fix: Only auto-fill if the channel.id is non-empty and matches a known channel:
if reply.reply_to.is_empty() && !reply.channel.id.is_empty() {
let last = last_event_id_for_recv.lock().await;
if let Some(eid) = last.get(&reply.channel.id) {
reply.reply_to = eid.clone();
}
}🟡 Medium Severity — Should Fix
4. /media/{uuid} endpoint has no authentication
Issue: Anyone who knows a UUID can download media. In typical deployments where the gateway is behind a firewall, this is low risk. However, if the gateway's port 8080 is exposed to the internet, UUIDs could leak via logs (info!(uuid = %uuid, ...)), error messages, or monitoring tools.
Location: gateway/src/main.rs — Router setup
Risk assessment:
- Low risk if gateway is internal-only (typical deployment)
- Medium risk if gateway port is publicly accessible
Recommended mitigation (choose one):
Option A (deployment-level): Document that the gateway's HTTP port should not be exposed to the public internet. Only the webhook paths need to be accessible (via reverse proxy if needed).
Option B (code-level): Add a shared secret query parameter:
// In AppState:
pub media_secret: String, // from GATEWAY_MEDIA_SECRET env var
// In media_handler:
async fn media_handler(
State(state): State<Arc<AppState>>,
axum::extract::Path(uuid): axum::extract::Path<String>,
axum::extract::Query(params): axum::extract::Query<HashMap<String, String>>,
) -> impl IntoResponse {
if params.get("secret") != Some(&state.media_secret) {
return axum::http::StatusCode::FORBIDDEN.into_response();
}
// ... rest of handler
}
// When constructing URLs in adapters:
url: format!("{}/media/{}?secret={}", state.public_url, uuid, state.media_secret),Note: UUIDv4 has 122 bits of entropy, making brute-force infeasible within the 300s TTL. The main risk is UUID leakage, not guessing.
5. LINE media silently dropped when access_token is None
Issue: If LINE_ACCESS_TOKEN is not set, image/audio messages hit continue with no warning. Users get no response and no error.
Location: gateway/src/adapters/line.rs
Fix: Add a warning log:
} else if msg.message_type != "text" {
warn!(msg_type = %msg.message_type, "LINE media message dropped (access_token not configured)");
continue;
}6. Internal URL leaked to LLM on download failure
Issue: When media::download_and_encode_image fails, the fallback pushes [Image attachment URL]: http://localhost:8080/media/{uuid} to the LLM. This exposes internal infrastructure details.
Location: src/gateway.rs — build_attachment_blocks
Fix: Use a generic message instead:
blocks.push(ContentBlock::Text {
text: "[Image attachment failed to load]".to_string(),
});Or omit the fallback entirely (just skip the attachment).
7. Mutex poisoning recovery may proceed with corrupted state
Issue: All lock() calls use unwrap_or_else(|e| e.into_inner()), which recovers poisoned mutexes. If a thread panics while holding the lock, subsequent operations proceed on potentially inconsistent state.
Location: line.rs, telegram.rs, main.rs — all MediaStore accesses
Impact: Low probability (requires a panic inside a critical section), but could lead to serving wrong media or double-counting entries.
Recommendation: Either:
- Accept the risk (document it), or
- Panic on poisoned mutex:
.lock().expect("media store poisoned")
The current approach is a reasonable trade-off for availability over strict consistency.
✅ What I Like
- Clean URL-proxy architecture that decouples platform auth from core
- Backward-compatible schema with
skip_serializing_if - STT integration works out of the box
- Good fallback behavior (text URL if download fails)
- Comprehensive testing (LINE/Telegram image/audio, STT)
Verdict
Request changes — Issues #1-3 are high-severity and must be fixed before merge. Issues #4-7 are medium-severity and should be addressed but are not blocking if deployment follows best practices (gateway not exposed to public internet).
Once the high-severity issues are fixed, this PR will be a solid foundation for multi-platform media support. 🚀
wangyuyan-agent
left a comment
There was a problem hiding this comment.
Follow-up: Revised Severity Assessment (K8s Deployment Context)
After reviewing the Helm chart (charts/openab/templates/gateway.yaml), the gateway Service is ClusterIP — port 8080 is cluster-internal only. This changes the severity of a few items from my previous review.
🔄 Revised: Issue #4 — /media/{uuid} endpoint auth
Previous assessment: 🟡 Medium — recommended adding a shared secret query param.
Revised assessment: ✅ Non-issue in standard deployment. ClusterIP means the media endpoint is only reachable from within the cluster. External actors cannot access it. The Option B code I provided (secret query param) is unnecessary complexity for this deployment model and can be ignored.
Apologies for the noise.
🔴 New Issue (previously missed): GATEWAY_PUBLIC_URL default silently breaks media in K8s
The default http://localhost:8080 is actively broken in any Kubernetes deployment. The gateway and OAB core run in separate Pods — there is no shared localhost. Media URLs generated with the default will point to the gateway container's own loopback interface, which OAB core cannot reach.
The result: media is downloaded, stored, and the attachment URL is emitted — but OAB core silently fails to fetch it. No error, no log at the OAB side, media just disappears.
This is not a "remember to configure it" edge case — it is a guaranteed silent failure for every K8s deployment that doesn't set this variable.
Recommended fix: either make the variable required (startup error if unset when any media-capable adapter is configured), or at minimum emit a warn! at startup:
if public_url.contains("localhost") || public_url.contains("127.0.0.1") {
warn!(
public_url = %public_url,
"GATEWAY_PUBLIC_URL looks like a loopback address. Media attachments will not be reachable from other pods/containers. Set GATEWAY_PUBLIC_URL to the gateway's cluster-internal Service URL (e.g. http://openab-gateway:8080) or its external ingress URL."
);
}The Helm chart template (gateway.yaml) currently does not inject GATEWAY_PUBLIC_URL — so every Helm-based deployment silently inherits the broken default. The chart should either inject the Service's cluster-internal URL automatically or surface this as a required value.
🔴 New Issue (previously missed): HTTP response status not checked before storing media
In both line.rs and telegram.rs, the download response is stored without checking r.status().is_success(). If the platform API returns 401, 403, or any error response, the gateway stores the error page body as the media payload and emits an attachment URL pointing to it. OAB core then receives what looks like a valid attachment but contains an HTML error page.
line.rs — after if let Ok(r) = resp {, add:
if !r.status().is_success() {
warn!(status = %r.status(), id = %msg.id, "LINE media download failed");
continue;
}telegram.rs — same pattern after the file download client.get(download_url).send().await:
if !r.status().is_success() {
warn!(status = %r.status(), file_id = %file_id, "Telegram media download failed");
continue;
}The three original 🔴 issues (#1 TTL not enforced in handler, #2 memory unbounded by size, #3 reply_to cross-channel contamination) stand unchanged.
- TTL not enforced in media_handler: add TTL check before serving media - Memory unbounded by size: add media_max_file_size limit (default 10MB) - reply_to auto-fill cross-contamination: only auto-fill if channel.id is non-empty - LINE media silently dropped: add warning log when access_token not configured - Internal URL leaked to LLM: use generic message on download failure
There was a problem hiding this comment.
Pull request overview
Copilot reviewed 12 out of 12 changed files in this pull request and generated 9 comments.
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| // Inbound media proxy config (Issue #690) | ||
| let public_url = std::env::var("GATEWAY_MEDIA_BASE_URL") | ||
| .or_else(|_| std::env::var("GATEWAY_PUBLIC_URL")) | ||
| .unwrap_or_else(|_| "http://localhost:8080".into()); |
| let last_event_id: Arc<Mutex<std::collections::HashMap<String, String>>> = Arc::new(Mutex::new(std::collections::HashMap::new())); | ||
|
|
||
| // Forward gateway events → OAB | ||
| let last_event_id_for_send = last_event_id.clone(); | ||
| let send_task = tokio::spawn(async move { | ||
| loop { | ||
| tokio::select! { | ||
| Ok(event_json) = event_rx.recv() => { | ||
| // Track the last event ID sent to this client per channel | ||
| if let Ok(v) = serde_json::from_str::<serde_json::Value>(&event_json) { | ||
| if let (Some(eid), Some(cid)) = (v["event_id"].as_str(), v["channel"]["id"].as_str()) { | ||
| let mut last = last_event_id_for_send.lock().await; | ||
| last.insert(cid.to_string(), eid.to_string()); |
| let public_url = std::env::var("GATEWAY_MEDIA_BASE_URL") | ||
| .or_else(|_| std::env::var("GATEWAY_PUBLIC_URL")) | ||
| .unwrap_or_else(|_| "http://localhost:8080".into()); | ||
| let media_ttl = std::env::var("GATEWAY_MEDIA_STORE_TTL") | ||
| .ok() | ||
| .and_then(|v| v.parse::<u64>().ok()) | ||
| .unwrap_or(300); | ||
| let media_max_entries = std::env::var("GATEWAY_MEDIA_STORE_MAX_ENTRIES") | ||
| .ok() | ||
| .and_then(|v| v.parse::<usize>().ok()) | ||
| .unwrap_or(1000); | ||
| // Issue #690 review fix: per-file size limit to prevent OOM | ||
| let media_max_file_size = std::env::var("GATEWAY_MEDIA_MAX_FILE_SIZE") | ||
| .ok() | ||
| .and_then(|v| v.parse::<u64>().ok()) | ||
| .unwrap_or(10 * 1024 * 1024); // 10MB default |
| axum::response::Response::builder() | ||
| .header("content-type", mime) | ||
| .body(axum::body::Body::from(data)) | ||
| .unwrap() |
| let mut text = msg | ||
| .text | ||
| .clone() | ||
| .unwrap_or_else(|| msg.caption.clone().unwrap_or_default()); |
| // Issue #690 review fix: Check file size before downloading | ||
| let content_length = r | ||
| .headers() | ||
| .get("content-length") | ||
| .and_then(|v| v.to_str().ok()) | ||
| .and_then(|s| s.parse::<u64>().ok()) | ||
| .unwrap_or(0); | ||
|
|
||
| if content_length > state.media_max_file_size { | ||
| warn!( | ||
| size = content_length, | ||
| max = state.media_max_file_size, | ||
| id = %file_id, | ||
| "Telegram media too large, skipping" | ||
| ); | ||
| } else { | ||
| let mime = r | ||
| .headers() | ||
| .get("content-type") | ||
| .and_then(|v| v.to_str().ok()) | ||
| .unwrap_or(if m_type == "image" { | ||
| "image/jpeg" | ||
| } else { | ||
| "audio/ogg" | ||
| }) | ||
| .to_string(); | ||
|
|
||
| if let Ok(data) = r.bytes().await { |
| // Issue #690 review fix: Check file size before downloading | ||
| let content_length = r | ||
| .headers() | ||
| .get("content-length") | ||
| .and_then(|v| v.to_str().ok()) | ||
| .and_then(|s| s.parse::<u64>().ok()) | ||
| .unwrap_or(0); | ||
|
|
||
| if content_length > state.media_max_file_size { | ||
| warn!( | ||
| size = content_length, | ||
| max = state.media_max_file_size, | ||
| id = %msg.id, | ||
| "LINE media too large, skipping" | ||
| ); | ||
| continue; | ||
| } | ||
|
|
||
| let mime = r | ||
| .headers() | ||
| .get("content-type") | ||
| .and_then(|v| v.to_str().ok()) | ||
| .unwrap_or(if msg.message_type == "image" { | ||
| "image/jpeg" | ||
| } else { | ||
| "audio/x-m4a" | ||
| }) | ||
| .to_string(); | ||
|
|
||
| if let Ok(data) = r.bytes().await { |
| } else if msg.message_type != "text" { | ||
| // Issue #690 review fix: Warn when media message is dropped due to missing access_token | ||
| warn!( | ||
| msg_type = %msg.message_type, | ||
| "LINE media message dropped (access_token not configured)" | ||
| ); |
| |----------|-------------|---------| | ||
| | `GATEWAY_MEDIA_BASE_URL` | Public base URL for media proxy (e.g. `https://gateway.example.com`). | `http://localhost:8080` | | ||
| | `GATEWAY_MEDIA_STORE_TTL` | Seconds to keep media in memory before expiration. | `300` | | ||
| | `GATEWAY_MEDIA_STORE_MAX_ENTRIES` | Maximum number of media items to keep in memory. | `1000` | |
|
經過幾次來回,發現這個 PR 不太好處理,仍然有多個地方需要修正,想請問還是先Close這個 PR,日後再戰? |
|
Don't close this — you're essentially done. Five rounds of fixes, all high-severity issues resolved. The only remaining item is a one-liner: a startup Add that and I'll approve. The hard work is already done. |
wangyuyan-agent
left a comment
There was a problem hiding this comment.
All issues resolved — approved. Good work pushing this through.
🟡 Mostly Good — Media proxy architecture is sound, but has code quality issues that should be addressedWhat problem does it solve? The Custom Gateway only supports text messages. Users on LINE/Telegram cannot send images or voice messages to the AI agent. This PR adds an in-memory media proxy that downloads platform media (which requires auth) and serves it at a temporary HTTP endpoint for OAB core to fetch. How?
Considered alternatives? The PR description references Issue #690 which proposed this "Gateway Media Proxy" approach. This avoids changes to OAB core's media handling — the gateway presents media as standard URLs, same pattern as Discord CDN. Review Details🟢 INFO — Done well
🔴 SUGGESTED CHANGES
🟡 NIT — Non-blocking
Verdict: The architecture is solid and well-thought-out. Items #1 and #2 need verification/fix before merge. Item #3 is a strong suggestion for maintainability but not strictly blocking. |
PR Review: #726Based on commit: Summary
Core Assessment
Findings🔴 Critical:
|
Follow-up: CI Blind Spot ConfirmationAfter a second pass, I verified the Why CI does not catch this: The This means any compile error in Suggestion: Consider adding a Note: The LINE adapter's |
…bdev#726) - Update GATEWAY_MEDIA_BASE_URL default in docs - Add GATEWAY_MEDIA_STORE_MAX_ENTRIES limit to prevent OOM - Update Axum route syntax to 0.8 - Add rationale comments for reply_to auto-fill - Fix indentation in Telegram adapter - Fix clippy warnings in cron.rs
- TTL not enforced in media_handler: add TTL check before serving media - Memory unbounded by size: add media_max_file_size limit (default 10MB) - reply_to auto-fill cross-contamination: only auto-fill if channel.id is non-empty - LINE media silently dropped: add warning log when access_token not configured - Internal URL leaked to LLM: use generic message on download failure
- Add startup warning when GATEWAY_PUBLIC_URL contains localhost/127.0.0.1 - Add HTTP status checks before storing media in LINE/Telegram adapters
…n telegram adapter
aeeb4bd to
f15494b
Compare
…oxy support - Remove duplicate GwAttachment/Attachment definitions - Fix syntax errors in LINE adapter - Update Attachment schema to support both URL proxy and legacy base64 - Update Feishu and Telegram adapters to match new schema - Fix main crate gateway display logic for optional URLs
|
原本修的差不多了,但main branch 更新造成衝突,因為attachment其它元件也有共用,不太好解, |
Re-review: PR #726 (commit
|
|
Hi @iamninihuang — thanks for fixing the One more thing to resolve: Here's what's happening: How to fix: You likely want side A only (the "Updated upstream" block), because:
Suggested resolution: let listen_addr = std::env::var("GATEWAY_LISTEN").unwrap_or_else(|_| "0.0.0.0:8080".into());
let ws_token = std::env::var("GATEWAY_WS_TOKEN").ok();
// Inbound media proxy config (Issue #690)
let public_url = std::env::var("GATEWAY_MEDIA_BASE_URL")
.or_else(|_| std::env::var("GATEWAY_PUBLIC_URL"))
.unwrap_or_else(|_| "http://localhost:8080".into());
// Warn if public_url looks like a loopback address
if public_url.contains("localhost") || public_url.contains("127.0.0.1") {
warn!(
public_url = %public_url,
"GATEWAY_PUBLIC_URL looks like a loopback address. ..."
);
}
// Optional Cloudflare Tunnel domain for logging
let cloudflare_tunnel_url = std::env::var("CLOUDFLARE_TUNNEL_URL").ok();Then delete the conflict markers ( Let me know if you have any questions! |
|
原本修的差不多了,但main branch 更新造成衝突,因為attachment其它元件也有共用,不太好解, |
Extends src/gateway.rs attachment handling to transcribe audio attachments via the existing STT pipeline (previously only Discord/Slack adapters went through download_and_transcribe; Custom Gateway adapters got no audio path even though stt::transcribe was available). When a gateway adapter (Feishu, Google Chat, etc.) sends an Attachment with attachment_type = "audio", core now: 1. Decodes base64 → audio bytes 2. Calls stt::transcribe with the configured SttConfig 3. Wraps the transcript as a ContentBlock::Text: "[Voice message transcript]: ..." The audio branch is gated on stt_config.enabled — if STT is disabled in config, audio attachments fall through unchanged (same as before). Threads stt_config through GatewayParams and run_gateway_adapter. This closes the audio attachment gap left by the (now-closed) PR openabdev#726 without re-introducing the HTTP MediaStore proxy approach. Pairs with the Google Chat adapter audio download (separate PR) — once both land, Google Chat voice/audio attachments work end-to-end. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Summary
Added support for incoming images and audio/voice messages for LINE and Telegram adapters.
The Custom Gateway currently only supports text messages. This PR implements a Media Proxy that downloads media from platform APIs (which require auth) and serves it at a temporary local HTTP endpoint. This allows OpenAB Core and AI agents to fetch and analyze media via standard URLs, mirroring the image-understanding workflow already present in the Discord adapter.
Features
attachmentsfield toGatewayEventto propagate media URLs to the agent.Testing
cargo test— 96 passedcargo clippy— 0 warningsBreaking Changes
None. Added optional
attachmentsfield to the gateway event schema. New environment variablesGATEWAY_MEDIA_BASE_URLandGATEWAY_MEDIA_STORE_TTLhave safe defaults.Discord Discussion URL
https://discord.com/channels/1491295327620169908/1500160821567684660/1499859716409393172
Closes #690