diff --git a/CHANGELOG.md b/CHANGELOG.md
index f1ff586c08..e1b1f9c30c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,37 @@ All notable changes to this project will be documented in this file.
## [Unreleased]
+### Changed
+
+- tiptap float css - allow floating on mobile as well
+
+## [7.5.1] - 2026-03-19
+
+### Fixed
+
+- **CRA reference_id uniqueness**: Added video ID to reference_id format (`{env}-{slug}-{id}-{s3_etag}-{generation}`) to prevent cross-contamination between videos with identical slugs
+- **CRA encoding_generation race condition**: Added reload fallback in CreateMediaJob when encoding_generation is nil due to uncommitted transaction from S3::CreateFileJob
+- **CRA MonitorProcessingJob orphan detection**: Added 10-minute threshold to orphan detection for videos with reference_id but no remote_id, preventing false positives on just-uploaded videos
+
+## [7.5.0] - 2026-03-19
+
+### Added
+
+- **CRA presigned S3 URLs**: Encoder no longer downloads video to local disk or uploads via SFTP. CRA fetches video directly from S3 via presigned URL (7-day expiry). Only the XML manifest is uploaded via SFTP.
+- **Two-phase encoding**: When `encoder_processing_phases` > 1, CreateMediaJob submits two manifests with the same `refId` — SD first, then HD. Backward compatible: single-phase when `encoder_processing_phases` is nil/1.
+- **Encoding progress tracking**: CheckProgressJob parses CRA `messages` array for per-phase milestones, extracts video duration, and estimates completion time. New processing states: `sd_processing → sd_processed → hd_processing → full_media_processed`.
+- **Console encoding info component**: `EncodingInfoComponent` shows current encoding phase and progress percentage on video file detail page, with real-time updates via MessageBus.
+- **S3 client and jobs**: `Folio::S3::Client` for presigned URL generation, `Folio::S3::CreateFileJob` for S3-based file creation, `Folio::File::GetVideoMetadataJob` for video metadata extraction.
+- **Video thumbnail generation**: `GenerateThumbnailJob` reworked for reliable thumbnail generation from video files.
+
+### Changed
+
+- `ShowComponent` now exposes `aasmState` as a Stimulus value and reloads via Turbo on state transitions (encoding progress, file updates)
+- `ShowComponent` layout: state badge moved to right side (`ms-auto`), encoding info rendered inline after state
+
+### Fixed
+
+- add `try` to `dont_run_after_save_jobs` to enable thumbnail generation for `private_attachments`
## [7.4.1] - 2026-03-11
diff --git a/Gemfile.lock b/Gemfile.lock
index d961a9a9a2..10fa11c905 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -10,7 +10,7 @@ GIT
PATH
remote: .
specs:
- folio (7.4.1)
+ folio (7.5.1)
aasm
activejob-uniqueness (>= 0.3.0)
acts-as-taggable-on
@@ -89,29 +89,29 @@ GEM
specs:
aasm (5.5.2)
concurrent-ruby (~> 1.0)
- actioncable (8.0.4)
- actionpack (= 8.0.4)
- activesupport (= 8.0.4)
+ actioncable (8.0.4.1)
+ actionpack (= 8.0.4.1)
+ activesupport (= 8.0.4.1)
nio4r (~> 2.0)
websocket-driver (>= 0.6.1)
zeitwerk (~> 2.6)
- actionmailbox (8.0.4)
- actionpack (= 8.0.4)
- activejob (= 8.0.4)
- activerecord (= 8.0.4)
- activestorage (= 8.0.4)
- activesupport (= 8.0.4)
+ actionmailbox (8.0.4.1)
+ actionpack (= 8.0.4.1)
+ activejob (= 8.0.4.1)
+ activerecord (= 8.0.4.1)
+ activestorage (= 8.0.4.1)
+ activesupport (= 8.0.4.1)
mail (>= 2.8.0)
- actionmailer (8.0.4)
- actionpack (= 8.0.4)
- actionview (= 8.0.4)
- activejob (= 8.0.4)
- activesupport (= 8.0.4)
+ actionmailer (8.0.4.1)
+ actionpack (= 8.0.4.1)
+ actionview (= 8.0.4.1)
+ activejob (= 8.0.4.1)
+ activesupport (= 8.0.4.1)
mail (>= 2.8.0)
rails-dom-testing (~> 2.2)
- actionpack (8.0.4)
- actionview (= 8.0.4)
- activesupport (= 8.0.4)
+ actionpack (8.0.4.1)
+ actionview (= 8.0.4.1)
+ activesupport (= 8.0.4.1)
nokogiri (>= 1.8.5)
rack (>= 2.2.4)
rack-session (>= 1.0.1)
@@ -119,38 +119,38 @@ GEM
rails-dom-testing (~> 2.2)
rails-html-sanitizer (~> 1.6)
useragent (~> 0.16)
- actiontext (8.0.4)
- actionpack (= 8.0.4)
- activerecord (= 8.0.4)
- activestorage (= 8.0.4)
- activesupport (= 8.0.4)
+ actiontext (8.0.4.1)
+ actionpack (= 8.0.4.1)
+ activerecord (= 8.0.4.1)
+ activestorage (= 8.0.4.1)
+ activesupport (= 8.0.4.1)
globalid (>= 0.6.0)
nokogiri (>= 1.8.5)
- actionview (8.0.4)
- activesupport (= 8.0.4)
+ actionview (8.0.4.1)
+ activesupport (= 8.0.4.1)
builder (~> 3.1)
erubi (~> 1.11)
rails-dom-testing (~> 2.2)
rails-html-sanitizer (~> 1.6)
- activejob (8.0.4)
- activesupport (= 8.0.4)
+ activejob (8.0.4.1)
+ activesupport (= 8.0.4.1)
globalid (>= 0.3.6)
activejob-uniqueness (0.4.0)
activejob (>= 4.2, < 8.1)
redlock (>= 2.0, < 3)
- activemodel (8.0.4)
- activesupport (= 8.0.4)
- activerecord (8.0.4)
- activemodel (= 8.0.4)
- activesupport (= 8.0.4)
+ activemodel (8.0.4.1)
+ activesupport (= 8.0.4.1)
+ activerecord (8.0.4.1)
+ activemodel (= 8.0.4.1)
+ activesupport (= 8.0.4.1)
timeout (>= 0.4.0)
- activestorage (8.0.4)
- actionpack (= 8.0.4)
- activejob (= 8.0.4)
- activerecord (= 8.0.4)
- activesupport (= 8.0.4)
+ activestorage (8.0.4.1)
+ actionpack (= 8.0.4.1)
+ activejob (= 8.0.4.1)
+ activerecord (= 8.0.4.1)
+ activesupport (= 8.0.4.1)
marcel (~> 1.0)
- activesupport (8.0.4)
+ activesupport (8.0.4.1)
base64
benchmark (>= 0.3)
bigdecimal
@@ -159,7 +159,7 @@ GEM
drb
i18n (>= 1.6, < 2)
logger (>= 1.4.2)
- minitest (>= 5.1)
+ minitest (>= 5.1, < 6)
securerandom (>= 0.3)
tzinfo (~> 2.0, >= 2.0.5)
uri (>= 0.13.1)
@@ -205,7 +205,7 @@ GEM
babel-source (>= 4.0, < 6)
execjs (~> 2.0)
base64 (0.3.0)
- bcrypt (3.1.20)
+ bcrypt (3.1.22)
benchmark (0.5.0)
better_errors (2.10.1)
erubi (>= 1.0.0)
@@ -247,8 +247,8 @@ GEM
cocoon (1.2.15)
coderay (1.1.3)
colorize (0.8.1)
- concurrent-ruby (1.3.5)
- connection_pool (2.5.4)
+ concurrent-ruby (1.3.6)
+ connection_pool (2.5.5)
countries (8.0.4)
unaccent (~> 0.3)
country_select (11.0.0)
@@ -378,7 +378,7 @@ GEM
multi_xml (>= 0.5.2)
httpparty (0.2.0)
httparty (> 0)
- i18n (1.14.7)
+ i18n (1.14.8)
concurrent-ruby (~> 1.0)
i18n-tasks (1.0.15)
activesupport (>= 4.0.2)
@@ -405,7 +405,7 @@ GEM
rails-dom-testing (>= 1, < 3)
railties (>= 4.2.0)
thor (>= 0.14, < 2.0)
- json (2.18.1)
+ json (2.19.2)
json-jwt (1.17.0)
activesupport (>= 4.2)
aes_key_wrap
@@ -427,7 +427,7 @@ GEM
rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10)
logger (1.7.0)
- loofah (2.24.1)
+ loofah (2.25.1)
crass (~> 1.0.2)
nokogiri (>= 1.12.0)
lumberjack (1.4.2)
@@ -470,9 +470,9 @@ GEM
net-protocol
net-ssh (7.3.0)
nio4r (2.7.5)
- nokogiri (1.18.10-arm64-darwin)
+ nokogiri (1.19.2-arm64-darwin)
racc (~> 1.4)
- nokogiri (1.18.10-x86_64-linux-gnu)
+ nokogiri (1.19.2-x86_64-linux-gnu)
racc (~> 1.4)
notiffany (0.1.3)
nenv (~> 0.1)
@@ -551,7 +551,7 @@ GEM
nio4r (~> 2.0)
raabro (1.4.0)
racc (1.8.1)
- rack (2.2.21)
+ rack (2.2.22)
rack-mini-profiler (4.0.1)
rack (>= 1.2.0)
rack-protection (3.2.0)
@@ -564,33 +564,33 @@ GEM
rackup (1.0.1)
rack (< 3)
webrick
- rails (8.0.4)
- actioncable (= 8.0.4)
- actionmailbox (= 8.0.4)
- actionmailer (= 8.0.4)
- actionpack (= 8.0.4)
- actiontext (= 8.0.4)
- actionview (= 8.0.4)
- activejob (= 8.0.4)
- activemodel (= 8.0.4)
- activerecord (= 8.0.4)
- activestorage (= 8.0.4)
- activesupport (= 8.0.4)
+ rails (8.0.4.1)
+ actioncable (= 8.0.4.1)
+ actionmailbox (= 8.0.4.1)
+ actionmailer (= 8.0.4.1)
+ actionpack (= 8.0.4.1)
+ actiontext (= 8.0.4.1)
+ actionview (= 8.0.4.1)
+ activejob (= 8.0.4.1)
+ activemodel (= 8.0.4.1)
+ activerecord (= 8.0.4.1)
+ activestorage (= 8.0.4.1)
+ activesupport (= 8.0.4.1)
bundler (>= 1.15.0)
- railties (= 8.0.4)
+ railties (= 8.0.4.1)
rails-dom-testing (2.3.0)
activesupport (>= 5.0.0)
minitest
nokogiri (>= 1.6)
- rails-html-sanitizer (1.6.2)
- loofah (~> 2.21)
+ rails-html-sanitizer (1.7.0)
+ loofah (~> 2.25)
nokogiri (>= 1.15.7, != 1.16.7, != 1.16.6, != 1.16.5, != 1.16.4, != 1.16.3, != 1.16.2, != 1.16.1, != 1.16.0.rc1, != 1.16.0)
rails-i18n (8.0.2)
i18n (>= 0.7, < 2)
railties (>= 8.0.0, < 9)
- railties (8.0.4)
- actionpack (= 8.0.4)
- activesupport (= 8.0.4)
+ railties (8.0.4.1)
+ actionpack (= 8.0.4.1)
+ activesupport (= 8.0.4.1)
irb (~> 1.13)
rackup (>= 1.0.0)
rake (>= 12.2)
@@ -736,7 +736,7 @@ GEM
execjs (>= 0.3.0, < 3)
thor (1.4.0)
tilt (2.0.11)
- timeout (0.4.4)
+ timeout (0.6.1)
traco (5.3.3)
activerecord (>= 4.2)
trailblazer-option (0.1.2)
diff --git a/ROADMAP.md b/ROADMAP.md
new file mode 100644
index 0000000000..d94022ea56
--- /dev/null
+++ b/ROADMAP.md
@@ -0,0 +1,276 @@
+# Folio Roadmap
+
+This document collects proposed roadmap themes for Folio as an open-source Rails CMS engine.
+It is a planning draft intended to support discussion and prioritization, not a delivery commitment.
+
+## Planning Principles
+
+- Prefer engine-level contracts over one-off rewrites.
+- Keep the default path simple for small projects.
+- Support multiple infrastructure models where justified.
+- Treat generators, documentation, tests, and automation as part of the product surface.
+- Reduce legacy surface area in stages, with clear migration paths.
+- Optimize for both human contributors and AI-assisted workflows.
+
+## Now
+
+The "Now" horizon is split into foundational tracks and migration tracks.
+Foundational tracks define the contracts and platform direction that later migrations should build on.
+Migration tracks reduce the current legacy surface without losing delivery focus.
+
+### Foundational Tracks
+
+#### 1. Pluggable Image Transformation Pipeline
+
+**Problem**
+
+Dragonfly-based thumbnail generation creates operational and architectural pain:
+
+- background thumbnail jobs are fragile and add queue pressure
+- storage, URL generation, and processing orchestration are tightly coupled
+- cache behavior is hard to reason about
+- different projects need different infrastructure models
+
+**Target Outcome**
+
+Introduce a provider-based image transformation layer with a stable Folio contract and multiple backend implementations.
+
+**Initial Scope**
+
+- Define a canonical Folio thumbnail interface.
+- Introduce stable application-facing thumbnail URLs.
+- Internally use versioned or immutable result objects derived from source checksum + variant specification.
+- Add a built-in compatibility provider for Sidekiq-based processing.
+- Design a remote transformer API contract for external processing services.
+- Make private files, signed access, crop variants, and invalidation part of the design from day one.
+
+**Delivery Options**
+
+- Built-in Sidekiq provider for small projects.
+- Remote transformer service running on Kubernetes, either per app or per cluster.
+- AWS-oriented provider using Thumbor and S3-compatible storage, with [`sinfin/aws-file-handler`](https://github.com/sinfin/aws-file-handler) as an existing reference point.
+- Optional future serverless provider for low-traffic or bursty workloads.
+
+**Success Criteria**
+
+- Folio no longer depends on Dragonfly thumbnail jobs as the only model.
+- Projects can switch providers without changing view-level APIs.
+- Thumbnail URLs remain stable at the application level.
+- Processing failures and cache misses are observable and debuggable.
+
+#### 2. Cache Architecture Refresh
+
+**Problem**
+
+The current `cache_key_base` approach is not sufficient for larger projects and does not provide a robust invalidation model across dimensions such as site, locale, session-sensitive rendering, and public/private variants.
+
+**Target Outcome**
+
+Move from a narrow cache key convention to a clearer cache architecture with explicit dimensions, invalidation rules, and debugging support.
+
+**Initial Scope**
+
+- Formalize cache dimensions: site, locale, user/session requirements, published state, content version, and other relevant axes.
+- Replace or supersede `cache_key_base` with a better engine-level contract.
+- Integrate with existing HTTP cache work and component session requirements.
+- Add cache diagnostics and developer tooling so cache decisions are explainable.
+- Validate the approach against a larger-project proof of concept.
+- Use the existing exploratory branch `petr/has-folio-tiptap-and-cache` as an initial reference point, then clean up and extract the durable architectural direction from it.
+
+**Success Criteria**
+
+- Cache invalidation is predictable on large installs.
+- Cache contracts are documented and testable.
+- Developers can inspect why a response or component was cached or bypassed.
+
+#### 3. Packwerk and Modular Folio Surface
+
+**Problem**
+
+Folio still behaves largely as one large engine surface.
+That makes architectural boundaries harder to enforce, increases accidental coupling, and makes it difficult to enable only selected parts of the engine in a clean way.
+
+**Target Outcome**
+
+Introduce explicit package boundaries and a more modular Folio layout so projects can reason about dependencies and selectively adopt engine capabilities.
+
+**Initial Scope**
+
+- Use the existing cache proof of concept, currently explored in `petr/has-folio-tiptap-and-cache`, as one of the first validation areas for package boundaries.
+- Introduce Packwerk in a way that provides architectural feedback without blocking all development immediately.
+- Identify candidate packages such as caching, files/media, console UI, TipTap, users, newsletter features, and other separable engine areas.
+- Define which parts of Folio should be independently switchable at the configuration level and which should remain core.
+- Reduce implicit cross-package dependencies and document allowed dependency directions.
+
+**Success Criteria**
+
+- Architectural boundaries become visible and enforceable.
+- Large projects can adopt only the Folio areas they need with less incidental coupling.
+- New engine work happens inside clearer module boundaries instead of expanding a monolith.
+
+#### 4. OSS Contributor Platform
+
+**Problem**
+
+Folio works today, but the open-source contributor experience is still too dependent on internal knowledge and manual setup steps.
+
+**Target Outcome**
+
+Make the repository easy to install, run, test, and change for any external Rails developer.
+
+**Initial Scope**
+
+- Standardize local entrypoints such as setup, dev, test, lint, and CI commands.
+- Reduce or isolate secrets required for local development.
+- Define and document the supported version matrix for Ruby, Rails, Node, and external tooling.
+- Treat generators as public API and harden them with real smoke tests.
+- Improve release metadata, docs consistency, and contributor-facing guidance.
+
+**Success Criteria**
+
+- A new contributor can boot the project from documented commands alone.
+- Generator workflows are tested, not just documented.
+- Documentation reflects the actual supported stack.
+
+### Migration Tracks
+
+#### 5. UI Modernization Phase 1
+
+**Problem**
+
+Folio still carries a large legacy UI surface across Cells, jQuery, and legacy React islands.
+That slows down maintenance, increases onboarding cost, and keeps multiple frontend patterns alive at the same time.
+
+**Target Outcome**
+
+Make ViewComponent + Stimulus the default and preferred path for Folio UI.
+
+**Initial Scope**
+
+- Continue the staged migration from Cells to ViewComponents on the most-used engine surfaces.
+- Replace jQuery-driven interactions with Stimulus controllers where practical.
+- Identify legacy React islands that should be migrated to Stimulus rather than expanded.
+- Stop growing the legacy surface area through generators and new features.
+- Publish a migration tracker so the remaining legacy footprint is visible.
+
+**Success Criteria**
+
+- New engine UI work does not introduce additional Cells or jQuery.
+- The highest-value admin and public components have ViewComponent-based replacements.
+- Frontend interaction patterns become more uniform across the codebase.
+
+## Next
+
+### 6. Atom to TipTap Migration Program
+
+**Problem**
+
+Atoms and TipTap currently coexist, but there is no complete engine-level migration program covering authoring UX, content migration, coexistence rules, and project guidance.
+
+**Target Outcome**
+
+Provide a realistic path for teams that want to move from atom-heavy editing flows to TipTap-driven structured content.
+
+**Scope**
+
+- Define the target role of Atoms vs TipTap nodes in Folio.
+- Prepare authoring UI and editor affordances needed for wider TipTap adoption.
+- Write migration guidelines for teams and projects.
+- Support coexistence during migration rather than forcing a big-bang rewrite.
+- Add tooling for content migration where possible.
+
+**Success Criteria**
+
+- Teams understand when to use Atoms, when to use TipTap, and how to migrate.
+- Folio can support mixed-mode projects during transition.
+- New content modeling guidance is coherent and maintainable.
+
+### 7. UI Modernization Phase 2
+
+**Problem**
+
+After the first modernization pass, some legacy frontend surface will still remain for edge cases, generators, and older admin workflows.
+
+**Target Outcome**
+
+Complete the shift to the modern engine UI stack and retire legacy defaults.
+
+**Scope**
+
+- Finish the Cells to ViewComponents migration where a compatible replacement exists.
+- Remove remaining jQuery-heavy workflows from core engine paths.
+- Reassess the role of the legacy React app and either shrink it further or replace it.
+- Update generators so newly generated code always follows the modern stack.
+
+**Success Criteria**
+
+- Legacy UI technologies are no longer the default scaffolding path.
+- The maintenance burden of multiple frontend stacks is materially reduced.
+
+### 8. AI Agent Readiness
+
+**Problem**
+
+Folio already includes AI-oriented instructions, but host applications generated by Folio do not yet get a strong, deterministic, agent-friendly contract.
+
+**Target Outcome**
+
+Make Folio-generated projects easier to use with coding agents such as Codex, Cursor, and Claude Code.
+
+**Scope**
+
+- Generate a richer local `AGENTS.md` for installed apps instead of only pointing back to the gem source.
+- Provide deterministic setup, lint, test, and build entrypoints.
+- Expose generators, config keys, and environment expectations in a machine-friendly way where useful.
+- Reduce ambiguity around which stack is authoritative in each part of the repository.
+
+**Success Criteria**
+
+- Agents can bootstrap work from local project instructions without manual discovery.
+- Folio-generated apps are easier to navigate and modify safely.
+
+## Later
+
+### 9. Data Model Cleanup
+
+**Problem**
+
+Some engine areas still rely on older persistence conventions and compatibility code that complicate upgrades and long-term maintenance.
+
+**Target Outcome**
+
+Reduce legacy persistence patterns and simplify the internal model layer.
+
+**Scope**
+
+- Replace remaining YAML `serialize` usage with more modern typed or JSON-based approaches where appropriate.
+- Continue removing transitional compatibility branches once replacement paths are established.
+- Document deprecation timelines for internal contracts that should disappear in the next major version.
+
+### 10. Deployment Model Portfolio
+
+**Problem**
+
+Different Folio projects have very different scale and infrastructure requirements.
+A single mandatory operations model is not a good fit.
+
+**Target Outcome**
+
+Support multiple validated deployment models without forcing the same trade-offs on every installation.
+
+**Candidate Models**
+
+- Simple in-app processing for small projects.
+- Shared or dedicated transformer service for Kubernetes-based stacks.
+- AWS-native image pipeline for teams that prefer cloud-managed primitives.
+
+**Goal**
+
+Keep the Folio developer-facing contract stable while making infrastructure a deployment choice instead of an engine constraint.
+
+## Cross-Cutting Questions
+
+- Which parts of the current engine are true public API and need compatibility guarantees?
+- Which migrations should be automated, and which should remain guided/manual?
+- Where do we want strict defaults, and where do we want provider-based extensibility?
+- Which large reference projects should be used to validate the roadmap decisions before declaring them as engine direction?
diff --git a/app/assets/javascripts/folio/console/base.js b/app/assets/javascripts/folio/console/base.js
index 8e42fc92d0..aedc53766e 100644
--- a/app/assets/javascripts/folio/console/base.js
+++ b/app/assets/javascripts/folio/console/base.js
@@ -111,6 +111,7 @@
//= require folio/console/files/picker/document_component
//= require folio/console/files/picker/image_component
//= require folio/console/files/picker_component
+//= require folio/console/files/show/encoding_info_component
//= require folio/console/files/show/thumbnails/crop_edit_component
//= require folio/console/files/show_component
//= require folio/console/files/show_modal_component
diff --git a/app/assets/stylesheets/folio/tiptap/_styles.scss b/app/assets/stylesheets/folio/tiptap/_styles.scss
index 110f97e405..082cc361d7 100644
--- a/app/assets/stylesheets/folio/tiptap/_styles.scss
+++ b/app/assets/stylesheets/folio/tiptap/_styles.scss
@@ -53,13 +53,14 @@ $f-tiptap__media-min-width--desktop: 708px !default;
--f-tiptap-columns__gap: var(--f-tiptap__spacer);
--f-tiptap-float__aside-margin-y: var(--f-tiptap__spacer);
- --f-tiptap-float__aside-width: 0;
--f-tiptap-float__aside-margin-x: 0;
--f-tiptap-float__aside-offset: 0;
--f-tiptap-float__aside-offset--tablet: 0;
--f-tiptap-float__aside-margin-x--tablet: 1rem;
--f-tiptap-float__aside-offset--desktop: 0;
--f-tiptap-float__aside-margin-x--desktop: 1rem;
+ --f-tiptap-float__aside-side: left;
+ --f-tiptap-float__aside-width: 100%;
--f-tiptap-li__margin-top: 0.5rem;
--f-tiptap-li__margin-bottom: 0.5rem;
@@ -416,9 +417,13 @@ $f-tiptap__media-min-width--desktop: 708px !default;
}
.f-tiptap-float__aside {
- margin-bottom: var(--f-tiptap-float__aside-margin-y);
position: relative;
z-index: 2;
+ margin-bottom: var(--f-tiptap-float__aside-margin-y);
+ margin-right: var(--f-tiptap-float__aside-margin-x);
+ margin-left: var(--f-tiptap-float__aside-offset);
+ width: var(--f-tiptap-float__aside-width);
+ float: var(--f-tiptap-float__aside-side);
}
.f-tiptap-editor & .f-tiptap-column::before,
@@ -555,7 +560,22 @@ $f-tiptap__media-min-width--desktop: 708px !default;
}
}
+ .f-tiptap-float[data-f-tiptap-float-side="right"] .f-tiptap-float__aside {
+ margin-right: var(--f-tiptap-float__aside-offset);
+ margin-left: var(--f-tiptap-float__aside-margin-x);
+ }
+
+ .f-tiptap-float[data-f-tiptap-float-size="small"] .f-tiptap-float__aside {
+ width: var(--f-tiptap-float__aside-width);
+ }
+
+ .f-tiptap-float[data-f-tiptap-float-size="large"] .f-tiptap-float__aside {
+ width: var(--f-tiptap-float__aside-width);
+ }
+ .f-tiptap-float[data-f-tiptap-float-side="right"] .f-tiptap-float__aside {
+ float: var(--f-tiptap-float__aside-side);
+ }
@container (min-width: #{$f-tiptap__media-min-width--tablet}) {
.f-tiptap-float {
@@ -564,6 +584,7 @@ $f-tiptap__media-min-width--desktop: 708px !default;
--f-tiptap-float__aside-side: left;
--f-tiptap-float__aside-margin-x: var(--f-tiptap-float__aside-margin-x--tablet);
--f-tiptap-float__aside-offset: var(--f-tiptap-float__aside-offset--tablet);
+ --f-tiptap-float__aside-margin-y: var(--f-tiptap__spacer);
&::after {
content: "";
@@ -586,30 +607,12 @@ $f-tiptap__media-min-width--desktop: 708px !default;
.f-tiptap-float__aside {
float: left;
- margin-right: var(--f-tiptap-float__aside-margin-x);
- margin-bottom: var(--f-tiptap__spacer);
- margin-left: var(--f-tiptap-float__aside-offset);
- width: var(--f-tiptap-float__aside-width);
position: relative;
container-type: inline-size;
box-sizing: border-box;
min-height: 2rem;
}
- .f-tiptap-float[data-f-tiptap-float-side="right"] .f-tiptap-float__aside {
- float: right;
- margin-right: var(--f-tiptap-float__aside-offset);
- margin-left: var(--f-tiptap-float__aside-margin-x);
- }
-
- .f-tiptap-float[data-f-tiptap-float-size="small"] .f-tiptap-float__aside {
- width: var(--f-tiptap-float__aside-width);
- }
-
- .f-tiptap-float[data-f-tiptap-float-size="large"] .f-tiptap-float__aside {
- width: var(--f-tiptap-float__aside-width);
- }
-
.f-tiptap-columns {
display: grid;
grid-auto-columns: 1fr;
diff --git a/app/components/folio/console/files/show/encoding_info_component.js b/app/components/folio/console/files/show/encoding_info_component.js
new file mode 100644
index 0000000000..6e5d055b90
--- /dev/null
+++ b/app/components/folio/console/files/show/encoding_info_component.js
@@ -0,0 +1,46 @@
+window.Folio.Stimulus.register('f-c-files-show-encoding-info', class extends window.Stimulus.Controller {
+ static values = {
+ fileId: Number
+ }
+
+ connect () {
+ this.messageBusCallbackKey = `f-c-files-show-encoding-info--${this.fileIdValue}`
+ window.Folio.MessageBus.callbacks[this.messageBusCallbackKey] = (message) => {
+ if (message.type === 'Folio::CraMediaCloud::CheckProgressJob/encoding_progress' &&
+ message.data.id === this.fileIdValue) {
+ this.update(message.data)
+ }
+ }
+ }
+
+ disconnect () {
+ if (this.messageBusCallbackKey && window.Folio.MessageBus.callbacks) {
+ delete window.Folio.MessageBus.callbacks[this.messageBusCallbackKey]
+ }
+ }
+
+ update (data) {
+ const phaseEl = this.element.querySelector('.f-c-files-show-encoding-info__phase')
+ const progressEl = this.element.querySelector('.f-c-files-show-encoding-info__progress')
+
+ if (data.aasm_state === 'processing_failed') {
+ if (phaseEl) {
+ phaseEl.classList.add('f-c-files-show-encoding-info__phase--failed')
+ phaseEl.textContent = data.failed_label || ''
+ }
+ if (progressEl) {
+ progressEl.textContent = ''
+ }
+ return
+ }
+
+ if (phaseEl && data.current_phase_label) {
+ phaseEl.classList.remove('f-c-files-show-encoding-info__phase--failed')
+ phaseEl.textContent = data.current_phase_label
+ }
+
+ if (progressEl) {
+ progressEl.textContent = data.progress_percentage != null ? `${data.progress_percentage}%` : ''
+ }
+ }
+})
diff --git a/app/components/folio/console/files/show/encoding_info_component.rb b/app/components/folio/console/files/show/encoding_info_component.rb
new file mode 100644
index 0000000000..9ba94d1c70
--- /dev/null
+++ b/app/components/folio/console/files/show/encoding_info_component.rb
@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+
+class Folio::Console::Files::Show::EncodingInfoComponent < Folio::Console::ApplicationComponent
+ def initialize(file:)
+ @file = file
+ @rsd = file.remote_services_data || {}
+ end
+
+ def render?
+ cra_file? && (processing? || failed?)
+ end
+
+ def processing?
+ @file.processing?
+ end
+
+ def failed?
+ @file.processing_failed?
+ end
+
+ def retrying?
+ failed? && @rsd["retry_scheduled_at"].present? && @rsd["retry_count"].to_i < 2
+ end
+
+ def current_phase
+ @rsd["current_phase"]
+ end
+
+ def current_phase_label
+ return current_phase&.humanize if current_phase.blank?
+
+ encoding_phase = @rsd["current_encoding_phase"]
+ processing_phases = @rsd["processing_phases"].to_i
+
+ if processing_phases > 1 && encoding_phase.present?
+ phase_name = @file.try(:encoder_phase_name, encoding_phase)
+ if phase_name
+ t(".phase_#{current_phase}_named",
+ name: phase_name,
+ default: t(".phase_#{current_phase}", default: current_phase.humanize))
+ else
+ t(".phase_#{current_phase}_multi",
+ phase: encoding_phase,
+ total: processing_phases,
+ default: t(".phase_#{current_phase}", default: current_phase.humanize))
+ end
+ else
+ t(".phase_#{current_phase}", default: current_phase.humanize)
+ end
+ end
+
+ def encoding_progress
+ @rsd["progress_percentage"]
+ end
+
+ def data
+ {
+ "controller" => "f-c-files-show-encoding-info",
+ "f-c-files-show-encoding-info-file-id-value" => @file.id,
+ }
+ end
+
+ private
+ def cra_file?
+ # Check capability first (covers enqueued state before 'service' is written)
+ return true if @file.is_a?(Folio::CraMediaCloud::FileProcessing)
+
+ # Fallback for plain Folio::File::Video without concern (legacy or plain video)
+ @file.try(:processing_service) == "cra_media_cloud" ||
+ @rsd["service"] == "cra_media_cloud" ||
+ @rsd["current_phase"].present? ||
+ @rsd["retry_count"].present?
+ end
+end
diff --git a/app/components/folio/console/files/show/encoding_info_component.sass b/app/components/folio/console/files/show/encoding_info_component.sass
new file mode 100644
index 0000000000..2b6a9b8d7f
--- /dev/null
+++ b/app/components/folio/console/files/show/encoding_info_component.sass
@@ -0,0 +1,26 @@
+.f-c-files-show-encoding-info
+ display: inline
+ color: $gray-600
+ font-size: $font-size-sm
+ white-space: nowrap
+
+ &:empty
+ display: none
+
+ &__progress
+ &:not(:empty)::before
+ content: " · "
+
+ &__phase--failed
+ color: $danger
+
+// Pulse the yellow state dot when encoding info component follows the state cell
+.f-c-files-show__meta-item:has(+ .f-c-files-show-encoding-info:not(:empty))
+ .f-c-state__state-square--state-processing
+ animation: f-c-files-show-encoding-info-pulse 2s ease-in-out infinite
+
+@keyframes f-c-files-show-encoding-info-pulse
+ 0%, 100%
+ opacity: 1
+ 50%
+ opacity: 0.35
diff --git a/app/components/folio/console/files/show/encoding_info_component.slim b/app/components/folio/console/files/show/encoding_info_component.slim
new file mode 100644
index 0000000000..72661dae45
--- /dev/null
+++ b/app/components/folio/console/files/show/encoding_info_component.slim
@@ -0,0 +1,13 @@
+span.f-c-files-show-encoding-info data=data
+ - if failed?
+ span.f-c-files-show-encoding-info__phase.f-c-files-show-encoding-info__phase--failed
+ - if retrying?
+ = t(".phase_failed_retrying")
+ - else
+ = t(".phase_failed")
+ - elsif processing?
+ span.f-c-files-show-encoding-info__phase
+ = current_phase_label
+ span.f-c-files-show-encoding-info__progress
+ - if encoding_progress.present?
+ = "#{encoding_progress}%"
diff --git a/app/components/folio/console/files/show_component.js b/app/components/folio/console/files/show_component.js
index 243e544262..d35675e23a 100644
--- a/app/components/folio/console/files/show_component.js
+++ b/app/components/folio/console/files/show_component.js
@@ -6,7 +6,8 @@ window.Folio.Stimulus.register('f-c-files-show', class extends window.Stimulus.C
fileType: String,
id: String,
showUrl: String,
- indexUrl: String
+ indexUrl: String,
+ aasmState: String
}
disconnect () {
@@ -93,6 +94,15 @@ window.Folio.Stimulus.register('f-c-files-show', class extends window.Stimulus.C
messageBusCallback (event) {
const message = event.detail.message
+
+ if (message.type === 'Folio::CraMediaCloud::CheckProgressJob/encoding_progress') {
+ return this.handleEncodingProgress(message.data)
+ }
+
+ if (message.type === 'Folio::ApplicationJob/file_update') {
+ return this.handleFileUpdate(message.data)
+ }
+
if (message.type !== 'Folio::S3::CreateFileJob') return
switch (message.data.type) {
case 'replace-success':
@@ -104,10 +114,34 @@ window.Folio.Stimulus.register('f-c-files-show', class extends window.Stimulus.C
}
}
- messageBusSuccess (data) {
+ handleEncodingProgress (data) {
+ if (data.aasm_state === 'processing') {
+ // Update state badge label
+ const stateLabel = this.element.querySelector('.f-c-state__state-label')
+ if (stateLabel) stateLabel.textContent = data.aasm_state_human
+ } else {
+ // Encoding finished or failed — reload to show final state
+ this.reloadFrame()
+ }
+ }
+
+ handleFileUpdate (data) {
+ if (!data || !data.attributes) return
+
+ const newState = data.attributes.aasm_state
+ if (newState && newState !== this.aasmStateValue) {
+ this.reloadFrame()
+ }
+ }
+
+ reloadFrame () {
window.Turbo.visit(this.showUrlValue, { frame: this.element.closest('turbo-frame').id })
}
+ messageBusSuccess (data) {
+ this.reloadFrame()
+ }
+
messageBusFailure (data) {
this.loadingValue = false
delete this.replacingFileData
@@ -128,6 +162,24 @@ if (window.Folio && window.Folio.MessageBus && window.Folio.MessageBus.callbacks
}
}
+ if (message.type === 'Folio::CraMediaCloud::CheckProgressJob/encoding_progress') {
+ const selector = `.f-c-files-show[data-f-c-files-show-id-value="${message.data.id}"]`
+ const targets = document.querySelectorAll(selector)
+
+ for (const target of targets) {
+ target.dispatchEvent(new CustomEvent('f-c-files-show/message', { detail: { message } }))
+ }
+ }
+
+ if (message.type === 'Folio::ApplicationJob/file_update') {
+ const selector = `.f-c-files-show[data-f-c-files-show-id-value="${message.data.id}"]`
+ const targets = document.querySelectorAll(selector)
+
+ for (const target of targets) {
+ target.dispatchEvent(new CustomEvent('f-c-files-show/message', { detail: { message } }))
+ }
+ }
+
if (message.type === 'f-c-files-show:reload') {
const selector = `.f-c-files-show[data-f-c-files-show-id-value="${message.data.id}"]`
const targets = document.querySelectorAll(selector)
diff --git a/app/components/folio/console/files/show_component.rb b/app/components/folio/console/files/show_component.rb
index 28cdd813ff..cff49c4071 100644
--- a/app/components/folio/console/files/show_component.rb
+++ b/app/components/folio/console/files/show_component.rb
@@ -14,7 +14,8 @@ def data
id: @file.id,
file_type: @file.class.to_s,
show_url: controller.folio.url_for([:console, @file]),
- index_url: controller.folio.url_for([:console, @file.class])
+ index_url: controller.folio.url_for([:console, @file.class]),
+ aasm_state: @file.aasm_state
},
action: {
"f-uppy:upload-success": "uppyUploadSuccess",
diff --git a/app/components/folio/console/files/show_component.slim b/app/components/folio/console/files/show_component.slim
index 564f50ef76..c77b67b4b2 100644
--- a/app/components/folio/console/files/show_component.slim
+++ b/app/components/folio/console/files/show_component.slim
@@ -53,12 +53,14 @@
= @file.file_mime_type
- if @file.created_at.present?
- .f-c-files-show__meta-item.me-auto
+ .f-c-files-show__meta-item
' #{t(".created_at")}: #{l(@file.created_at.to_date, format: :console_short)}
- .f-c-files-show__meta-item
+ .f-c-files-show__meta-item.ms-auto
== cell("folio/console/state", @file, active: false)
+ = render(Folio::Console::Files::Show::EncodingInfoComponent.new(file: @file))
+
.f-c-files-show__table
- table_rows.each do |key, config|
.f-c-files-show__tr
diff --git a/app/components/folio/embed/box_component.sass b/app/components/folio/embed/box_component.sass
index d8f8a04d15..9adf457631 100644
--- a/app/components/folio/embed/box_component.sass
+++ b/app/components/folio/embed/box_component.sass
@@ -1,6 +1,7 @@
.f-embed-box
min-height: 150px
position: relative
+ white-space: normal
&__iframe
width: 100%
diff --git a/app/controllers/concerns/folio/console/api/file_controller_base.rb b/app/controllers/concerns/folio/console/api/file_controller_base.rb
index f291d72594..33094801c5 100644
--- a/app/controllers/concerns/folio/console/api/file_controller_base.rb
+++ b/app/controllers/concerns/folio/console/api/file_controller_base.rb
@@ -314,7 +314,8 @@ def update_thumbnails_crop
}
end
- @file.dont_run_after_save_jobs = true
+ @file.try(:dont_run_after_save_jobs=, true)
+
@file.update!(thumbnail_configuration:,
thumbnail_sizes:,
diff --git a/app/jobs/folio/cra_media_cloud/check_progress_job.rb b/app/jobs/folio/cra_media_cloud/check_progress_job.rb
index efdb2de51c..da83aa8e3f 100644
--- a/app/jobs/folio/cra_media_cloud/check_progress_job.rb
+++ b/app/jobs/folio/cra_media_cloud/check_progress_job.rb
@@ -5,69 +5,276 @@ class Folio::CraMediaCloud::CheckProgressJob < Folio::ApplicationJob
queue_as :default
+ unique :until_and_while_executing
+
+ # Maximum time to poll CRA before giving up (4 hours).
+ # Long videos can take 2+ hours for HD encoding across multiple phases.
+ MAX_PROCESSING_DURATION = 4.hours
+
attr_reader :media_file
def perform(media_file, preview: false, encoding_generation: nil)
@media_file = media_file
@encoding_generation = encoding_generation
- # CraMediaCloud doesn't use preview parameter, but we accept it for consistency
- # If encoding_generation is provided, check if it matches current generation
- # This prevents stale jobs from interfering with newer encodings
if @encoding_generation.present? && media_file.encoding_generation != @encoding_generation
Rails.logger.info "[CraMediaCloud::CheckProgressJob] Skipping stale job for #{media_file.class.name}##{media_file.id} " \
"(job generation: #{@encoding_generation}, current: #{media_file.encoding_generation})"
return
end
- # Early return if video doesn't need progress checking
if media_file.ready?
- Rails.logger.info "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} is already in ready state, skipping progress check"
+ Rails.logger.info "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} is already in ready state"
return
end
- response = fetch_job_response
+ if processing_timed_out?
+ Rails.logger.error "[CraMediaCloud::CheckProgressJob] Timed out after #{MAX_PROCESSING_DURATION.inspect} " \
+ "for video #{media_file.id}. Marking as processing_failed."
+ if media_file.may_processing_failed?
+ # No with_lock here: timeout is a one-time terminal state written only by
+ # this code path. CheckProgressJob is unique-constrained so no concurrent
+ # instance runs. Broadcasts immediately follow the DB write intentionally.
+ media_file.processing_failed!
+ broadcast_file_update(media_file)
+ broadcast_encoding_progress
+ end
+ return
+ end
- return check_again_later if response.nil?
+ check_progress
+ end
- update_remote_service_data(response)
+ private
+ def multi_phase?
+ media_file.remote_services_data["processing_phases"].to_i > 1
+ end
- if media_file.full_media_processed?
- media_file.processing_done!
- broadcast_file_update(media_file)
- elsif media_file.upload_failed?
- # Don't reschedule for failed uploads - MonitorProcessingJob will handle retries
- media_file.save!
- broadcast_file_update(media_file)
- Rails.logger.info "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} upload failed, not rescheduling"
- elsif media_file.changed?
- media_file.save!
- broadcast_file_update(media_file)
- check_again_later
- else
- check_again_later
+ def expected_phases
+ media_file.remote_services_data["processing_phases"].to_i
+ end
+
+ def check_progress
+ response = fetch_job_response
+ return if response == :finalized # already handled by finalize_from_completed_phases!
+ return check_again_later if response.nil?
+
+ # REMOVED means job content was explicitly deleted via DeleteMediaJob.
+ # CRA does NOT auto-purge jobs — production data confirms DONE jobs persist
+ # indefinitely. Clear remote_id so the next poll uses the reference_id path,
+ # which can finalize from stored phase data or eventually time out cleanly.
+ if response["status"] == "REMOVED"
+ Rails.logger.warn "[CraMediaCloud::CheckProgressJob] Job #{response['id']} for video #{media_file.id} " \
+ "has been REMOVED. Clearing remote_id to fall back to reference_id polling."
+ media_file.with_lock do
+ media_file.remote_services_data.delete("remote_id")
+ media_file.save!
+ end
+ return check_again_later
+ end
+
+ # All Redis I/O (check_again_later, CreateMediaJob.perform_later, broadcasts)
+ # is deferred until AFTER the Postgres row lock is released.
+ should_broadcast = false
+ should_reschedule = false
+ @pending_retry = false
+
+ media_file.with_lock do
+ update_remote_service_data(response)
+
+ if media_file.full_media_processed?
+ media_file.processing_done!
+ should_broadcast = true
+ elsif media_file.processing_failed?
+ should_broadcast = true # state set by handle_job_failure; @pending_retry set there too
+ elsif media_file.changed?
+ media_file.save!
+ should_broadcast = true
+ should_reschedule = true
+ else
+ should_reschedule = true
+ end
+ end
+
+ check_again_later if should_reschedule
+ Folio::CraMediaCloud::CreateMediaJob.set(wait: 2.minutes).perform_later(media_file) if @pending_retry
+
+ if should_broadcast
+ broadcast_file_update(media_file)
+ broadcast_encoding_progress
+ end
end
- end
- private
def fetch_job_response
if media_file.remote_id.present?
- api.get_job(media_file.remote_id)
+ response = api.get_job(media_file.remote_id)
+ Rails.logger.info "[CraMediaCloud::CheckProgressJob] Job #{media_file.remote_id} for video #{media_file.id}: " \
+ "status=#{response&.dig('status')}, progress=#{response&.dig('progress')}, " \
+ "profileGroup=#{response&.dig('profileGroup')}, phase=#{response&.dig('phase')}"
+
+ # Multi-phase: if the tracked job is DONE but not the final phase,
+ # save intermediate data, clear remote_id, and look up by reference_id.
+ # Intermediate save is wrapped in with_lock to protect against concurrent
+ # MonitorProcessingJob or retry CreateMediaJob runs.
+ if multi_phase? && response&.dig("status") == "DONE" && response&.dig("phase").to_i < expected_phases
+ media_file.with_lock do
+ save_intermediate_phase_data(response)
+ media_file.remote_services_data.delete("remote_id")
+ media_file.save!
+ end
+ broadcast_encoding_progress
+ broadcast_file_update(media_file)
+ Rails.logger.info "[CraMediaCloud::CheckProgressJob] Phase #{response['phase']} done, cleared remote_id to discover next phase"
+ return nil
+ end
+
+ response
elsif media_file.remote_reference_id.present?
- jobs = api.get_jobs(ref_id: media_file.remote_reference_id)
+ all_jobs = api.get_jobs(ref_id: media_file.remote_reference_id)
+ # Filter out REMOVED jobs. REMOVED appears when job content has been
+ # explicitly deleted via DeleteMediaJob (DELETE /jobs/{id}/content) —
+ # production data confirms CRA does NOT auto-purge completed jobs.
+ jobs = all_jobs.reject { |j| j["status"] == "REMOVED" }
if jobs.empty?
- Rails.logger.warn "[CraMediaCloud::CheckProgressJob] No jobs found for reference_id #{media_file.remote_reference_id}"
+ # All jobs REMOVED with stored phase data: job content was deleted
+ # (e.g. via DeleteMediaJob) after encoding completed. Finalize from
+ # the phase output we already saved locally rather than hitting CRA.
+ if multi_phase? && all_jobs.present? && has_any_completed_phase?
+ Rails.logger.info "[CraMediaCloud::CheckProgressJob] All CRA jobs REMOVED for video #{media_file.id} " \
+ "with completed phase data. Finalizing from stored phase output."
+ finalize_from_completed_phases!
+ return :finalized
+ end
+
+ Rails.logger.info "[CraMediaCloud::CheckProgressJob] No active jobs found for reference_id #{media_file.remote_reference_id} " \
+ "(video #{media_file.id}, #{all_jobs.size} removed) — CRA may still be downloading the file"
return nil
end
- # Get the most recent job by lastModified
- job = jobs.max_by { |j| Time.parse(j["lastModified"]) }
- Rails.logger.debug "[CraMediaCloud::CheckProgressJob] Found #{jobs.size} job(s) for #{media_file.remote_reference_id}, using most recent from #{job['lastModified']}"
- job
+ if multi_phase?
+ select_multi_phase_job(jobs)
+ else
+ job = jobs.max_by { |j| Time.parse(j["lastModified"]) }
+ Rails.logger.info "[CraMediaCloud::CheckProgressJob] Found #{jobs.size} job(s) for #{media_file.remote_reference_id} (video #{media_file.id}): " \
+ "status=#{job['status']}, progress=#{job['progress']}, id=#{job['id']}, " \
+ "profileGroup=#{job['profileGroup']}, lastModified=#{job['lastModified']}"
+ job
+ end
else
- # No remote references exist - this should be handled by MonitorProcessingJob
- Rails.logger.info "[CraMediaCloud::CheckProgressJob] No remote_id or remote_reference_id found for #{media_file.class.name} ID #{media_file.id}. MonitorProcessingJob should handle this."
- nil # Return nil to stop processing this check job
+ Rails.logger.info "[CraMediaCloud::CheckProgressJob] No remote_id or remote_reference_id for #{media_file.class.name} ID #{media_file.id}"
+ nil
+ end
+ end
+
+ def select_multi_phase_job(jobs)
+ # Sort by phase descending, pick the highest-phase job
+ job = jobs.sort_by { |j| -(j["phase"].to_i) }.first
+
+ phase = job["phase"].to_i
+ Rails.logger.debug "[CraMediaCloud::CheckProgressJob] Multi-phase: found #{jobs.size} job(s), highest phase=#{phase}/#{expected_phases}, status=#{job['status']}"
+
+ # If the highest-phase job is DONE but we haven't reached the final phase,
+ # check if CRA created a next phase job. CRA creates all phase jobs upfront —
+ # if no higher phase exists by now, CRA decided this is the final output.
+ if job["status"] == "DONE" && phase < expected_phases
+ next_phase_exists = jobs.any? { |j| j["phase"].to_i > phase }
+
+ if next_phase_exists
+ # Next phase job exists but hasn't surpassed the current one yet — wait.
+ # Lock to guard against concurrent MonitorProcessingJob runs.
+ phase_data_saved = false
+ media_file.with_lock do
+ unless media_file.remote_services_data["phase_#{phase}_completed_at"].present?
+ save_intermediate_phase_data(job)
+ phase_data_saved = true
+ end
+ end
+ # Broadcast after lock so the UI reflects SD playback availability.
+ if phase_data_saved
+ broadcast_encoding_progress
+ broadcast_file_update(media_file)
+ end
+ return nil
+ else
+ # CRA did not create further phases — treat this as the final output
+ Rails.logger.info "[CraMediaCloud::CheckProgressJob] CRA created no phase #{phase + 1} job for video #{media_file.id}. " \
+ "Treating phase #{phase} output as final."
+ return job
+ end
+ end
+
+ job
+ end
+
+ def has_any_completed_phase?
+ (1..expected_phases).any? { |p| media_file.remote_services_data["phase_#{p}_completed_at"].present? }
+ end
+
+ # When all CRA jobs are REMOVED but we have stored phase output data,
+ # finalize the video using the last completed phase's output.
+ def finalize_from_completed_phases!
+ last_phase = (1..expected_phases).reverse_each.find { |p|
+ media_file.remote_services_data["phase_#{p}_completed_at"].present?
+ }
+
+ media_file.with_lock do
+ # Build content_mp4_paths from all completed phases
+ content_mp4_paths = {}
+ (1..last_phase).each do |p|
+ phase_paths = media_file.remote_services_data["phase_#{p}_content_mp4_paths"]
+ content_mp4_paths.merge!(phase_paths) if phase_paths.present?
+ end
+
+ media_file.remote_services_data.merge!(
+ "content_mp4_paths" => content_mp4_paths,
+ "processing_state" => "full_media_processed",
+ "progress_percentage" => 100.0,
+ "encoding_completed_at" => Time.current.iso8601,
+ )
+
+ media_file.processing_done!
+ end
+
+ # Broadcasts after lock release to avoid Redis I/O while holding a Postgres row lock.
+ broadcast_file_update(media_file)
+ broadcast_encoding_progress
+
+ Rails.logger.info "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} finalized from #{last_phase} completed phase(s)"
+ end
+
+ def save_intermediate_phase_data(phase_job)
+ phase_num = phase_job["phase"].to_i
+ mp4_paths = {}
+ manifest_hls = nil
+ manifest_dash = nil
+
+ phase_job["output"]&.each do |output_file|
+ case output_file["type"]
+ when "MP4"
+ mp4_paths[output_file["profiles"].first] = output_file["path"]
+ when "HLS"
+ manifest_hls = select_output_file(manifest_hls, output_file)
+ when "DASH"
+ manifest_dash = select_output_file(manifest_dash, output_file)
+ when "THUMBNAILS"
+ update_thumbnail_path(output_file)
+ end
end
+
+ updates = {
+ "phase_#{phase_num}_content_mp4_paths" => mp4_paths,
+ "phase_#{phase_num}_completed_at" => Time.current.iso8601,
+ "phase_#{phase_num}_remote_id" => phase_job["id"],
+ }
+ updates["manifest_hls_path"] = manifest_hls["path"] if manifest_hls
+ updates["manifest_dash_path"] = manifest_dash["path"] if manifest_dash
+
+ media_file.remote_services_data.merge!(updates)
+ media_file.save!
+
+ Rails.logger.info "[CraMediaCloud::CheckProgressJob] Phase #{phase_num}/#{expected_phases} complete for video #{media_file.id}, " \
+ "saved #{mp4_paths.size} MP4 paths" \
+ "#{manifest_hls ? ', HLS manifest' : ''}" \
+ "#{manifest_dash ? ', DASH manifest' : ''}."
end
def update_remote_service_data(response)
@@ -76,35 +283,113 @@ def update_remote_service_data(response)
case response["status"]
when "DONE"
process_output_hash(response["output"])
+ parse_encoding_messages(response)
media_file.remote_services_data.merge!(
"output" => response["output"],
"processing_state" => "full_media_processed",
+ "progress_percentage" => 100.0,
+ "encoding_completed_at" => Time.current.iso8601,
)
- when "PROCESSING", "CREATED"
- media_file.remote_services_data.merge!(
- "processing_state" => "full_media_processing",
- "progress_percentage" => (response["progress"] ? response["progress"] * 100.0 : 0).round(1),
- )
+ when "WAITING", "PROCESSING", "CREATED", "VALIDATING"
+ update_progress(response)
when "FAILED", "ERROR"
- error_messages = response["messages"]&.filter_map { |msg| msg["message"] if msg["type"] == "ERROR" }&.join("; ")
+ handle_job_failure(response)
+ end
+ end
- media_file.remote_services_data.merge!(
- "processing_state" => "upload_failed",
- "error_message" => error_messages || "Upload failed",
- "failed_at" => Time.current.iso8601,
- "progress_percentage" => nil
- )
+ def update_progress(response)
+ return unless response
+
+ media_file.remote_services_data["remote_id"] ||= response["id"]
+ media_file.remote_services_data["cra_status"] = response["status"]
+ media_file.remote_services_data["last_progress_check_at"] = Time.current.iso8601
+
+ raw_progress = response["progress"].to_f
+ media_file.remote_services_data["cra_raw_progress"] = raw_progress
+
+ parse_encoding_messages(response)
+
+ phase = current_phase(response)
+ media_file.remote_services_data["current_phase"] = phase
+
+ if multi_phase? && response["phase"].to_i > 0
+ media_file.remote_services_data["current_encoding_phase"] = response["phase"].to_i
+ end
+
+ media_file.remote_services_data["progress_percentage"] = phase == "encoding" ? (raw_progress * 100).round(0) : nil
+ end
+
+ # Derive current phase from CRA status and completed message phases.
+ def current_phase(response)
+ case response["status"]
+ when "WAITING", "CREATED", "VALIDATING"
+ "waiting"
+ when "PROCESSING"
+ phases = media_file.remote_services_data["phases_completed"] || []
+ phases.include?("video") ? "packaging" : "encoding"
+ end
+ end
+
+ def handle_job_failure(response)
+ error_messages = response["messages"]&.filter_map { |msg| msg["message"] if msg["type"] == "ERROR" }&.join("; ")
+ retry_count = (media_file.remote_services_data["retry_count"] || 0) + 1
+ will_retry = retry_count <= 1
+
+ media_file.remote_services_data.merge!(
+ "processing_state" => "encoding_failed",
+ "error_message" => error_messages || "Encoding failed",
+ "failed_at" => Time.current.iso8601,
+ "progress_percentage" => nil,
+ "current_phase" => nil,
+ "retry_count" => retry_count,
+ )
+
+ if will_retry
+ media_file.remote_services_data["retry_scheduled_at"] = (Time.current + 2.minutes).iso8601
+ else
+ media_file.remote_services_data.delete("retry_scheduled_at")
+ end
- Rails.logger.error "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} failed: #{error_messages}"
+ # Single save via processing_failed! — all data merged above.
+ # Broadcasts are emitted by check_progress after with_lock returns.
+ media_file.processing_failed!
+
+ if will_retry
+ @pending_retry = true
+ Rails.logger.warn "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} failed (attempt #{retry_count}), scheduling retry in 2 minutes: #{error_messages}"
+ else
+ Rails.logger.error "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} failed permanently (attempt #{retry_count}): #{error_messages}"
+ end
+ end
+
+ def parse_encoding_messages(response)
+ messages = response["messages"]
+ return unless messages.present?
+
+ phases_completed = []
+ messages.each do |msg|
+ text = msg["message"].to_s
+ phases_completed << "validation" if text.include?("verification: finished")
+ phases_completed << "audio" if text.include?("Transcoding worker - audio: finished")
+ phases_completed << "thumbnails" if text.include?("Transcoding worker - thumbnails: finished")
+ phases_completed << "video" if text.include?("Transcoding worker - video: finished")
+ phases_completed << "packaging" if text.include?("copying: started")
end
+
+ # Extract video duration from outputParams
+ video_duration = response.dig("outputParams", "duration")
+
+ media_file.remote_services_data["phases_completed"] = phases_completed.uniq
+ media_file.remote_services_data["video_duration"] = video_duration if video_duration
end
- def process_output_hash(process_output_hash)
- content_mp4_paths = {}
- manifest_hls, manifest_dash = nil, nil
+ def process_output_hash(output_data)
+ content_mp4_paths = media_file.remote_services_data["content_mp4_paths"] || {}
+ manifest_hls = nil
+ manifest_dash = nil
- process_output_hash.each do |output_file|
+ output_data.each do |output_file|
case output_file["type"]
when "MP4"
content_mp4_paths[output_file["profiles"].first] = output_file["path"]
@@ -117,11 +402,10 @@ def process_output_hash(process_output_hash)
end
end
- media_file.remote_services_data.merge!(
- "content_mp4_paths" => content_mp4_paths,
- "manifest_hls_path" => manifest_hls["path"],
- "manifest_dash_path" => manifest_dash["path"],
- )
+ updates = { "content_mp4_paths" => content_mp4_paths }
+ updates["manifest_hls_path"] = manifest_hls["path"] if manifest_hls
+ updates["manifest_dash_path"] = manifest_dash["path"] if manifest_dash
+ media_file.remote_services_data.merge!(updates)
end
def select_output_file(current, incoming)
@@ -137,14 +421,70 @@ def update_thumbnail_path(output_file)
end
end
+ def broadcast_encoding_progress
+ return if message_bus_user_ids.blank?
+
+ phase = media_file.remote_services_data["current_phase"]
+ retry_count = media_file.remote_services_data["retry_count"].to_i
+
+ failed_label = if media_file.processing_failed?
+ if retry_count < 2 && media_file.remote_services_data["retry_scheduled_at"].present?
+ I18n.t("folio.console.files.show.encoding_info_component.phase_failed_retrying")
+ else
+ I18n.t("folio.console.files.show.encoding_info_component.phase_failed")
+ end
+ end
+
+ phase_label = if phase.present?
+ encoding_phase = media_file.remote_services_data["current_encoding_phase"]
+ if multi_phase? && encoding_phase.present?
+ phase_name = media_file.encoder_phase_name(encoding_phase)
+ if phase_name
+ I18n.t("folio.console.files.show.encoding_info_component.phase_#{phase}_named",
+ name: phase_name,
+ default: I18n.t("folio.console.files.show.encoding_info_component.phase_#{phase}", default: phase.humanize))
+ else
+ I18n.t("folio.console.files.show.encoding_info_component.phase_#{phase}_multi",
+ phase: encoding_phase,
+ total: expected_phases,
+ default: I18n.t("folio.console.files.show.encoding_info_component.phase_#{phase}", default: phase.humanize))
+ end
+ else
+ I18n.t("folio.console.files.show.encoding_info_component.phase_#{phase}", default: phase.humanize)
+ end
+ end
+
+ MessageBus.publish Folio::MESSAGE_BUS_CHANNEL,
+ {
+ type: "Folio::CraMediaCloud::CheckProgressJob/encoding_progress",
+ data: {
+ id: media_file.id,
+ aasm_state: media_file.aasm_state,
+ aasm_state_human: serialized_file(media_file).dig(:data, :attributes, :aasm_state_human),
+ progress_percentage: media_file.remote_services_data["progress_percentage"],
+ current_phase: phase,
+ current_phase_label: phase_label,
+ failed_label: failed_label,
+ cra_status: media_file.remote_services_data["cra_status"],
+ },
+ }.to_json,
+ user_ids: message_bus_user_ids
+ end
+
def check_again_later
- # Pass encoding_generation to ensure stale jobs don't interfere
Folio::CraMediaCloud::CheckProgressJob.set(wait: 15.seconds).perform_later(
media_file,
encoding_generation: @encoding_generation || media_file.encoding_generation
)
end
+ def processing_timed_out?
+ started_at = media_file.remote_services_data["processing_step_started_at"]
+ return false if started_at.blank?
+
+ Time.parse(started_at.to_s) < MAX_PROCESSING_DURATION.ago
+ end
+
def api
@api ||= Folio::CraMediaCloud::Api.new
end
diff --git a/app/jobs/folio/cra_media_cloud/create_media_job.rb b/app/jobs/folio/cra_media_cloud/create_media_job.rb
index 66d43fb38a..7d0de890ed 100644
--- a/app/jobs/folio/cra_media_cloud/create_media_job.rb
+++ b/app/jobs/folio/cra_media_cloud/create_media_job.rb
@@ -1,6 +1,8 @@
# frozen_string_literal: true
class Folio::CraMediaCloud::CreateMediaJob < Folio::ApplicationJob
+ include Folio::S3::Client
+
# Discard if file no longer exists
discard_on ActiveJob::DeserializationError
@@ -9,8 +11,22 @@ class Folio::CraMediaCloud::CreateMediaJob < Folio::ApplicationJob
def perform(media_file)
fail "only video files are supported" unless media_file.is_a?(Folio::File::Video)
- # Generate reference_id based on current file content
- current_reference_id = generate_reference_id(media_file)
+ # If retrying after failure, transition back to processing
+ if media_file.processing_failed? && media_file.remote_services_data&.dig("retry_count").to_i > 0
+ media_file.retry_processing!
+ Rails.logger.info "[CraMediaCloud::CreateMediaJob] Video #{media_file.id} retrying after failure"
+ end
+
+ # Generate reference_id based on current file content.
+ # If the source file no longer exists on S3, mark as permanently failed
+ # and don't retry — the file cannot be re-uploaded without the original.
+ begin
+ current_reference_id = generate_reference_id(media_file)
+ rescue Excon::Error::NotFound => e
+ Rails.logger.error "[CraMediaCloud::CreateMediaJob] Source file not found on S3 for video #{media_file.id}: #{e.message}"
+ mark_source_file_missing!(media_file)
+ return
+ end
# Check API for existing job with this reference_id
existing_job_result = check_existing_job(current_reference_id, media_file)
@@ -34,63 +50,56 @@ def perform(media_file)
private
def generate_reference_id(media_file)
- # Combine video slug with S3 ETag (actual file content MD5) for stable, unique reference
- # Format: {slug}-{s3_etag}
- # This ensures uniqueness across environments and file versions
+ # Combine environment, video slug, ID, S3 ETag, and encoding_generation for unique reference.
+ # ID guarantees uniqueness per video record (slug alone is derived from filename and can collide).
+ # encoding_generation changes on each re-encode, ensuring CRA gets a fresh refId.
+ # Format: {env}-{slug}-{id}-{s3_etag}-{generation}
+ # Total length is capped at 128 chars to avoid CRA lookup failures with long slugs.
s3_etag = get_s3_etag(media_file)
+ env_prefix = ENV.fetch("DRAGONFLY_RAILS_ENV", Rails.env)
+ generation = media_file.encoding_generation
+
+ if generation.nil?
+ # encoding_generation may not be visible yet if the enclosing transaction
+ # (e.g. S3::CreateFileJob save) hasn't committed. Reload to get committed data.
+ media_file.reload
+ generation = media_file.encoding_generation
+ end
- "#{media_file.slug}-#{s3_etag[0..7]}"
- end
+ if generation.nil?
+ fail "encoding_generation not set for video #{media_file.id} — cannot generate unique reference_id (would match stale CRA jobs)"
+ end
- def get_s3_etag(media_file)
- # Get S3 ETag (MD5 hash) without downloading the file
- s3_metadata = get_s3_metadata(media_file)
- extract_etag(s3_metadata).delete_prefix('"').delete_suffix('"')
- end
+ suffix = "-#{media_file.id}-#{s3_etag[0..7]}-#{generation}"
+ max_slug_length = 128 - env_prefix.length - 1 - suffix.length
+ slug = media_file.slug.to_s[0, [max_slug_length, 1].max]
- def get_s3_metadata(media_file)
- s3_datastore = Dragonfly.app.datastore
- s3_object_key = [s3_datastore.root_path, media_file.file_uid].join("/")
- Rails.logger.debug("[CraMediaCloud::CreateMediaJob] Fetching S3 metadata for key: #{s3_object_key}")
- s3_datastore.storage.head_object(ENV["S3_BUCKET_NAME"], s3_object_key)
+ "#{env_prefix}-#{slug}#{suffix}"
end
- def extract_etag(response)
- # Handle different response types (AWS SDK, Excon, etc.)
- if response.respond_to?(:etag)
- response.etag
- elsif response.respond_to?(:headers)
- response.headers["ETag"] || response.headers["etag"] || response.headers["Etag"]
- else
- raise "Cannot extract ETag from response type: #{response.class}"
- end
+ def get_s3_etag(media_file)
+ s3_metadata = s3_dragonfly_head_object(media_file.file_uid)
+ extract_s3_etag(s3_metadata).delete_prefix('"').delete_suffix('"')
end
def check_existing_job(reference_id, media_file)
api = Folio::CraMediaCloud::Api.new
jobs = api.get_jobs(ref_id: reference_id)
- if jobs.empty?
- { status: :not_found, job: nil }
- else
- # Get the most recent job with this reference_id by lastModified
- job = jobs.max_by { |j| Time.parse(j["lastModified"]) }
- Rails.logger.debug "[CraMediaCloud::CreateMediaJob] Found #{jobs.size} job(s) for #{reference_id}, using most recent from #{job['lastModified']}"
-
- case job["status"]
- when "PROCESSING", "CREATED"
- { status: :processing, job: job }
- when "DONE"
- { status: :done, job: job }
- when "FAILED", "ERROR"
- { status: :failed, job: job }
- else
- { status: :not_found, job: job }
- end
- end
+ # No need to pre-filter REMOVED jobs: JobResolver maps REMOVED → :not_found,
+ # so CreateMediaJob will proceed with a fresh upload. (MonitorProcessingJob
+ # pre-filters REMOVED before passing to reconcile_with_remote_jobs because it
+ # needs to distinguish "all REMOVED with stored phase data → finalize" from
+ # "no jobs at all → clear state". CreateMediaJob has no such distinction to make.)
+ result = Folio::CraMediaCloud::JobResolver.resolve(jobs)
+
+ Rails.logger.debug "[CraMediaCloud::CreateMediaJob] Job check for #{reference_id}: " \
+ "#{jobs.size} job(s), status=#{result[:status]}"
+
+ result
rescue => e
Rails.logger.warn "[CraMediaCloud::CreateMediaJob] Could not check existing job for #{reference_id}: #{e.message}"
- { status: :not_found, job: nil } # Assume not found if API call fails
+ { status: :not_found, job: nil }
end
def update_local_state_for_successful_job(media_file, job, reference_id)
@@ -125,13 +134,33 @@ def update_local_state_for_successful_job(media_file, job, reference_id)
Rails.logger.info "[CraMediaCloud::CreateMediaJob] Successfully updated local state for video #{media_file.id} to point to successful job #{successful_job_id}"
else
- Rails.logger.debug "[CraMediaCloud::CreateMediaJob] Local state already points to correct job #{successful_job_id} for video #{media_file.id}"
+ # remote_id already matches, but if local processing_state is stale (e.g. upload_failed
+ # or encoding_failed set before CRA recovered), schedule CheckProgressJob to finalize.
+ # This handles videos that got stuck with a failed state while the CRA job eventually
+ # completed successfully on CRA's side.
+ if media_file.remote_services_data["processing_state"] != "full_media_processed"
+ media_file.remote_services_data.merge!(
+ "processing_state" => "full_media_processing",
+ "processing_step_started_at" => Time.current.iso8601
+ )
+ media_file.save!
+ Folio::CraMediaCloud::CheckProgressJob.perform_later(
+ media_file,
+ encoding_generation: media_file.encoding_generation
+ )
+ Rails.logger.info "[CraMediaCloud::CreateMediaJob] Remote ID #{successful_job_id} matches but state " \
+ "was stale (#{media_file.remote_services_data['processing_state']}), " \
+ "scheduling CheckProgressJob for video #{media_file.id}"
+ else
+ Rails.logger.debug "[CraMediaCloud::CreateMediaJob] Local state already points to correct job #{successful_job_id} for video #{media_file.id}"
+ end
end
end
def process_media_upload(media_file, reference_id)
# Capture encoding_generation before any state updates
current_generation = media_file.encoding_generation
+ profile_group = media_file.try(:encoder_profile_group)
# Set state to creating_media_job before starting upload
rs_data = media_file.remote_services_data || {}
@@ -145,24 +174,29 @@ def process_media_upload(media_file, reference_id)
Rails.logger.info "[CraMediaCloud::CreateMediaJob] Starting upload for video #{media_file.id} with reference_id: #{reference_id}"
begin
- Folio::CraMediaCloud::Encoder.new.upload_file(
+ processing_phases = media_file.try(:encoder_processing_phases)
+ encoder = Folio::CraMediaCloud::Encoder.new
+
+ encoder.upload_file(
media_file,
- profile_group: media_file.try(:encoder_profile_group),
+ profile_group: profile_group,
+ processing_phases: processing_phases,
reference_id: reference_id
)
- # Update to processing state after successful upload
media_file.remote_services_data.merge!({
"reference_id" => reference_id,
"processing_state" => "full_media_processing",
- "processing_step_started_at" => Time.current.iso8601
+ "processing_step_started_at" => Time.current.iso8601,
+ "processing_phases" => processing_phases.to_i > 1 ? processing_phases : nil,
})
+
# Clear any old remote_id since we're starting fresh
media_file.remote_services_data.delete("remote_id")
media_file.save!
# Pass encoding_generation so CheckProgressJob can detect stale jobs
- Folio::CraMediaCloud::CheckProgressJob.set(wait: 30.seconds).perform_later(
+ Folio::CraMediaCloud::CheckProgressJob.set(wait: 10.seconds).perform_later(
media_file,
encoding_generation: current_generation
)
@@ -185,4 +219,24 @@ def process_media_upload(media_file, reference_id)
raise e
end
end
+
+ def mark_source_file_missing!(media_file)
+ rs_data = media_file.remote_services_data || {}
+ rs_data.merge!({
+ "service" => "cra_media_cloud",
+ "processing_state" => "source_file_missing",
+ "error_message" => "Source file not found on S3 (file_uid: #{media_file.file_uid})",
+ "processing_step_started_at" => Time.current.iso8601,
+ })
+ media_file.remote_services_data = rs_data
+
+ begin
+ media_file.processing_failed!
+ rescue => e
+ Rails.logger.warn "[CraMediaCloud::CreateMediaJob] AASM transition failed for video #{media_file.id} (#{e.message}), forcing state"
+ media_file.update_columns(aasm_state: "processing_failed", remote_services_data: rs_data, updated_at: Time.current)
+ end
+
+ broadcast_file_update(media_file)
+ end
end
diff --git a/app/jobs/folio/cra_media_cloud/delete_media_job.rb b/app/jobs/folio/cra_media_cloud/delete_media_job.rb
index e30ea28814..70d6f369a0 100644
--- a/app/jobs/folio/cra_media_cloud/delete_media_job.rb
+++ b/app/jobs/folio/cra_media_cloud/delete_media_job.rb
@@ -4,26 +4,39 @@ class Folio::CraMediaCloud::DeleteMediaJob < Folio::ApplicationJob
queue_as :slow
def perform(id, reference_id: nil)
- if id.present?
- api.delete_job_content(id)
- elsif reference_id.present?
- # Get all jobs with this reference_id
+ if id.blank? && reference_id.blank?
+ Rails.logger.warn "[CraMediaCloud::DeleteMediaJob] Skipping — no remote_id or reference_id (file was never processed by CRA)"
+ return
+ end
+
+ if reference_id.present?
+ # Prefer reference_id — deletes all phase jobs (multi-phase encoding creates multiple jobs per ref)
jobs = api.get_jobs(ref_id: reference_id)
if jobs.any?
- # Delete content for all jobs with this reference_id
jobs.each do |job|
Rails.logger.info "[CraMediaCloud::DeleteMediaJob] Deleting job content for job ID #{job['id']} (ref: #{reference_id})"
- api.delete_job_content(job["id"])
+ safe_delete_job_content(job["id"])
end
Rails.logger.info "[CraMediaCloud::DeleteMediaJob] Deleted content for #{jobs.size} job(s) with reference_id #{reference_id}"
end
- else
- raise "Missing remote_key and remote_reference_id"
+ elsif id.present?
+ safe_delete_job_content(id)
end
end
private
+ def safe_delete_job_content(job_id)
+ api.delete_job_content(job_id)
+ rescue RuntimeError => e
+ # CRA returns 400 when content was already deleted — that's fine, goal achieved
+ if e.message.include?("status 400") || e.message.include?("status 404")
+ Rails.logger.info "[CraMediaCloud::DeleteMediaJob] Job #{job_id} content already removed (#{e.message})"
+ else
+ raise
+ end
+ end
+
def api
@api ||= Folio::CraMediaCloud::Api.new
end
diff --git a/app/jobs/folio/cra_media_cloud/monitor_processing_job.rb b/app/jobs/folio/cra_media_cloud/monitor_processing_job.rb
index ffa1f55a0f..a3a9e9c8db 100644
--- a/app/jobs/folio/cra_media_cloud/monitor_processing_job.rb
+++ b/app/jobs/folio/cra_media_cloud/monitor_processing_job.rb
@@ -8,6 +8,9 @@ def perform
return if another_monitor_job_running?
begin
+ # Handle videos stuck in unprocessed state with existing files
+ handle_stuck_unprocessed_videos
+
# Handle videos with orphaned or inconsistent states first
handle_orphaned_videos
@@ -17,6 +20,9 @@ def perform
# Handle videos with failed uploads that need retry
handle_failed_uploads_needing_retry
+ # Safety net: retry failed videos whose retry job was lost
+ handle_failed_videos_awaiting_retry
+
# Handle videos that are already processing and need progress checking
handle_videos_needing_progress_check
ensure
@@ -26,22 +32,53 @@ def perform
end
private
+ def handle_stuck_unprocessed_videos
+ stuck = Folio::File::Video
+ .where(aasm_state: :unprocessed)
+ .where("file_uid IS NOT NULL AND file_uid != ''")
+ .where("created_at < ?", 5.minutes.ago)
+
+ return if stuck.empty?
+
+ Rails.logger.info("MonitorProcessingJob: Found #{stuck.count} stuck unprocessed video(s) with files")
+
+ stuck.each do |video|
+ Rails.logger.info("MonitorProcessingJob: Triggering process! for stuck video ##{video.id} (created #{video.created_at})")
+ begin
+ video.process!
+ rescue => e
+ Rails.logger.error("MonitorProcessingJob: Failed to process stuck video ##{video.id}: #{e.message}")
+ end
+ end
+ end
+
def find_processing_videos
Folio::File::Video
.where(aasm_state: :processing)
.where("remote_services_data ->> 'service' = ?", "cra_media_cloud")
- .where("remote_services_data ->> 'processing_state' IN (?)", ["full_media_processing", "upload_completed"])
+ .where("remote_services_data ->> 'processing_state' IN (?)",
+ %w[full_media_processing upload_completed])
end
def find_videos_needing_upload
- # Find videos that need initial upload (no remote references)
+ # Find videos that need initial upload (no remote references).
+ # Freshly enqueued videos (< 10 min) are excluded — they already have a
+ # CreateMediaJob queued. But enqueued videos older than 10 min are included
+ # because the job was likely lost (e.g., pod OOMKill). The Ruby handler
+ # checks for running/scheduled jobs before re-scheduling.
Folio::File::Video
.where(aasm_state: :processing)
.where(
"(remote_services_data ->> 'service' IS NULL OR remote_services_data ->> 'service' = ?) AND " \
"(remote_services_data ->> 'remote_id' IS NULL) AND " \
- "(remote_services_data ->> 'reference_id' IS NULL)",
- "cra_media_cloud"
+ "(remote_services_data ->> 'reference_id' IS NULL) AND " \
+ "(remote_services_data ->> 'processing_state' IS DISTINCT FROM ? OR " \
+ " (remote_services_data ->> 'processing_state' = ? AND " \
+ " (remote_services_data ->> 'processing_step_started_at')::timestamptz < ?))",
+ "cra_media_cloud",
+ "enqueued",
+ "enqueued",
+ 10.minutes.ago
)
end
@@ -50,8 +87,8 @@ def find_failed_uploads_needing_retry
Folio::File::Video
.where(aasm_state: :processing)
.where("remote_services_data ->> 'service' = ?", "cra_media_cloud")
- .where("remote_services_data ->> 'processing_state' = ?", "upload_failed")
- .where("(remote_services_data ->> 'processing_step_started_at')::timestamp < ?", 5.minutes.ago)
+ .where("remote_services_data ->> 'processing_state' IN (?)", %w[upload_failed encoding_failed])
+ .where("(remote_services_data ->> 'processing_step_started_at')::timestamptz < ?", 5.minutes.ago)
end
def handle_videos_needing_upload
@@ -72,13 +109,13 @@ def handle_videos_needing_upload
next
end
- # Check if video is stuck in creating state
+ # Check if video is stuck in creating/enqueued state
rs_data = video.remote_services_data || {}
Rails.logger.info("MonitorProcessingJob: Video ##{video.id} remote_services_data: #{rs_data}")
- if rs_data["processing_state"] == "creating_media_job"
+ if rs_data["processing_state"].in?(%w[creating_media_job enqueued])
started_at = rs_data["processing_step_started_at"]
- Rails.logger.info("MonitorProcessingJob: Video ##{video.id} is in creating_media_job state, started_at: #{started_at}")
+ Rails.logger.info("MonitorProcessingJob: Video ##{video.id} is in #{rs_data['processing_state']} state, started_at: #{started_at}")
# Check if upload is genuinely stuck vs. just taking a long time
if started_at && !upload_is_stuck?(video, Time.parse(started_at))
@@ -122,6 +159,31 @@ def handle_failed_uploads_needing_retry
end
end
+ def handle_failed_videos_awaiting_retry
+ # Safety net: find videos that were scheduled for retry but the retry job was lost
+ videos = Folio::File::Video
+ .where(aasm_state: :processing_failed)
+ .where("remote_services_data ->> 'service' = ?", "cra_media_cloud")
+ .where("COALESCE((remote_services_data ->> 'retry_count')::int, 0) < 2")
+ .where("(remote_services_data ->> 'retry_scheduled_at')::timestamptz < ?", 5.minutes.ago)
+
+ return if videos.empty?
+
+ Rails.logger.info("MonitorProcessingJob: Found #{videos.count} failed videos awaiting retry (safety net)")
+
+ scheduled_create_jobs = find_scheduled_create_media_job_ids
+
+ videos.each do |video|
+ if scheduled_create_jobs.include?(video.id)
+ Rails.logger.debug("MonitorProcessingJob: Failed video ##{video.id} already has scheduled CreateMediaJob")
+ next
+ end
+
+ Rails.logger.info("MonitorProcessingJob: Re-scheduling retry for failed video ##{video.id}")
+ Folio::CraMediaCloud::CreateMediaJob.perform_later(video)
+ end
+ end
+
def handle_videos_needing_progress_check
processing_videos = find_processing_videos
@@ -155,7 +217,7 @@ def handle_videos_needing_progress_check
end
Rails.logger.debug("MonitorProcessingJob: Scheduling CheckProgressJob for video ##{video.id}")
- Folio::CraMediaCloud::CheckProgressJob.perform_later(video)
+ Folio::CraMediaCloud::CheckProgressJob.perform_later(video, encoding_generation: video.remote_services_data&.dig("encoding_generation"))
end
end
@@ -180,17 +242,19 @@ def handle_orphaned_videos
end
def find_orphaned_videos
- # Find videos that are processing but might have lost track of their remote jobs
+ # Find videos that are processing but might have lost track of their remote jobs.
+ # Both conditions require a time threshold to avoid racing with just-uploaded videos
+ # (CRA needs time to ingest the manifest before a remote_id appears).
Folio::File::Video
.where(aasm_state: :processing)
.where("remote_services_data ->> 'service' = ?", "cra_media_cloud")
.where(
- # Videos with reference_id but no remote_id, or videos that have been
- # in creating_media_job state for a very long time
- "(remote_services_data ->> 'reference_id' IS NOT NULL AND remote_services_data ->> 'remote_id' IS NULL) OR " \
+ "(remote_services_data ->> 'reference_id' IS NOT NULL AND " \
+ "remote_services_data ->> 'remote_id' IS NULL AND " \
+ "(remote_services_data ->> 'processing_step_started_at')::timestamptz < ?) OR " \
"(remote_services_data ->> 'processing_state' = 'creating_media_job' AND " \
- "(remote_services_data ->> 'processing_step_started_at')::timestamp < ?)",
- 3.hours.ago
+ "(remote_services_data ->> 'processing_step_started_at')::timestamptz < ?)",
+ 10.minutes.ago, 30.minutes.ago
)
end
@@ -208,7 +272,6 @@ def reconcile_video_state(video)
if jobs.empty?
Rails.logger.warn("MonitorProcessingJob: No remote jobs found for video ##{video.id} reference_id: #{reference_id}")
- # Video has reference_id but no remote jobs - needs re-upload
rs_data.delete("reference_id")
rs_data.delete("remote_id")
rs_data.delete("processing_state")
@@ -217,42 +280,61 @@ def reconcile_video_state(video)
return
end
- latest_job = jobs.max_by { |j| Time.parse(j["lastModified"]) }
- current_remote_id = rs_data["remote_id"]
+ reconcile_with_remote_jobs(video, rs_data, jobs)
- case latest_job["status"]
- when "DONE"
- if current_remote_id != latest_job["id"]
- Rails.logger.info("MonitorProcessingJob: Updating video ##{video.id} to point to successful job #{latest_job['id']}")
- rs_data["remote_id"] = latest_job["id"]
- rs_data["processing_state"] = "full_media_processing"
- video.update_column(:remote_services_data, rs_data)
+ rescue => e
+ Rails.logger.error("MonitorProcessingJob: Error reconciling video ##{video.id}: #{e.message}")
+ end
+ end
- # Schedule progress check to update final state
- Folio::CraMediaCloud::CheckProgressJob.perform_later(video)
- end
- when "PROCESSING", "CREATED"
- if current_remote_id != latest_job["id"]
- Rails.logger.info("MonitorProcessingJob: Updating video ##{video.id} to point to processing job #{latest_job['id']}")
- rs_data["remote_id"] = latest_job["id"]
- rs_data["processing_state"] = "full_media_processing"
- video.update_column(:remote_services_data, rs_data)
- end
+ # NOTE: update_column calls below are non-atomic read-modify-write on
+ # remote_services_data. Safe because MonitorProcessingJob uses a Redis lock
+ # (another_monitor_job_running?) to prevent concurrent instances.
+ def reconcile_with_remote_jobs(video, rs_data, jobs)
+ # Filter out REMOVED jobs before resolution: for multi-phase encodings, a
+ # REMOVED phase-1 job may have a later lastModified than an active phase-2
+ # job, causing JobResolver to select it and return :not_found — silently
+ # skipping the active job. If all remaining jobs are REMOVED, schedule
+ # CheckProgressJob which handles the finalize_from_completed_phases! path.
+ active_jobs = jobs.reject { |j| j["status"] == "REMOVED" }
+
+ if active_jobs.empty?
+ Rails.logger.info("MonitorProcessingJob: All CRA jobs REMOVED for video ##{video.id} — scheduling CheckProgressJob to finalize")
+ Folio::CraMediaCloud::CheckProgressJob.perform_later(video, encoding_generation: rs_data["encoding_generation"])
+ return
+ end
- # Schedule progress check
- Folio::CraMediaCloud::CheckProgressJob.perform_later(video)
- when "FAILED", "ERROR"
- Rails.logger.warn("MonitorProcessingJob: Latest job for video ##{video.id} failed, marking for retry")
- rs_data.merge!({
- "processing_state" => "upload_failed",
- "error_message" => "Remote job failed: #{latest_job['status']}",
- "processing_step_started_at" => Time.current.iso8601
- })
+ result = Folio::CraMediaCloud::JobResolver.resolve(active_jobs)
+ latest_job = result[:job]
+ return unless latest_job
+
+ current_remote_id = rs_data["remote_id"]
+
+ case result[:status]
+ when :done
+ if current_remote_id != latest_job["id"]
+ Rails.logger.info("MonitorProcessingJob: Updating video ##{video.id} to point to successful job #{latest_job['id']}")
+ rs_data["remote_id"] = latest_job["id"]
+ rs_data["processing_state"] = "full_media_processing"
video.update_column(:remote_services_data, rs_data)
end
-
- rescue => e
- Rails.logger.error("MonitorProcessingJob: Error reconciling video ##{video.id}: #{e.message}")
+ Folio::CraMediaCloud::CheckProgressJob.perform_later(video, encoding_generation: video.remote_services_data&.dig("encoding_generation"))
+ when :processing
+ if current_remote_id != latest_job["id"]
+ Rails.logger.info("MonitorProcessingJob: Updating video ##{video.id} to point to processing job #{latest_job['id']}")
+ rs_data["remote_id"] = latest_job["id"]
+ rs_data["processing_state"] = "full_media_processing"
+ video.update_column(:remote_services_data, rs_data)
+ end
+ Folio::CraMediaCloud::CheckProgressJob.perform_later(video, encoding_generation: video.remote_services_data&.dig("encoding_generation"))
+ when :failed
+ Rails.logger.warn("MonitorProcessingJob: Latest job for video ##{video.id} failed, marking for retry")
+ rs_data.merge!({
+ "processing_state" => "encoding_failed",
+ "error_message" => "Remote job failed: #{latest_job['status']}",
+ "processing_step_started_at" => Time.current.iso8601
+ })
+ video.update_column(:remote_services_data, rs_data)
end
end
@@ -334,42 +416,44 @@ def extract_video_id_from_job_data(job_data)
end
def processing_too_long?(video)
- # Consider a video stuck if it's been processing for more than 2 hours
started_at = video.remote_services_data["processing_step_started_at"]
return false unless started_at
+ # Multi-phase encoding can legitimately take longer — scale timeouts by phase count
+ phases = video.remote_services_data["processing_phases"].to_i
+ phase_multiplier = [phases, 1].max
+
elapsed_hours = (Time.current - Time.parse(started_at)) / 1.hour
+ hard_timeout = 6 * phase_multiplier
+ warn_timeout = 2 * phase_multiplier
- # Mark as failed after very long processing (6+ hours)
- if elapsed_hours > 6
- Rails.logger.error("MonitorProcessingJob: Marking video ##{video.id} as failed after #{elapsed_hours.round(1)} hours")
+ # Mark as failed after very long processing
+ if elapsed_hours > hard_timeout
+ Rails.logger.error("MonitorProcessingJob: Marking video ##{video.id} as failed after #{elapsed_hours.round(1)} hours (timeout: #{hard_timeout}h)")
- # Persist failure state even if validations fail
begin
video.processing_failed!
+ broadcast_file_update(video)
rescue => e
Rails.logger.warn("MonitorProcessingJob: AASM transition failed (#{e.message}), forcing state via update_columns")
- # Use update_columns to update in DB and then reload to sync memory
video.update_columns(aasm_state: "processing_failed", updated_at: Time.current)
video.reload
+ broadcast_file_update(video)
end
return true
- elsif elapsed_hours > 2
- Rails.logger.warn("MonitorProcessingJob: Video ##{video.id} has been processing for #{elapsed_hours.round(1)} hours")
+ elsif elapsed_hours > warn_timeout
+ Rails.logger.warn("MonitorProcessingJob: Video ##{video.id} has been processing for #{elapsed_hours.round(1)} hours (warning: #{warn_timeout}h)")
end
- # Return whether it's been processing too long (>2 hours) but without marking as failed
- elapsed_hours > 2
+ # Return whether it's been processing too long but without marking as failed
+ elapsed_hours > warn_timeout
rescue => e
Rails.logger.error("MonitorProcessingJob: Error checking processing time for video ##{video.id}: #{e.message}")
false
end
def upload_is_stuck?(video, upload_started_at)
- rs_data = video.remote_services_data || {}
- rs_data["upload_progress"]
-
# Calculate appropriate timeout based on file size
file_size = video.file_size || 0
base_timeout = 5.minutes # Base timeout for small files
diff --git a/app/jobs/folio/file/get_video_metadata_job.rb b/app/jobs/folio/file/get_video_metadata_job.rb
new file mode 100644
index 0000000000..ce348b51c6
--- /dev/null
+++ b/app/jobs/folio/file/get_video_metadata_job.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+class Folio::File::GetVideoMetadataJob < Folio::ApplicationJob
+ include Folio::Shell
+
+ queue_as :default
+
+ # Returns { duration: Integer|nil, width: Integer|nil, height: Integer|nil }
+ # Accepts local file path OR HTTP(S) URL (presigned S3 URL).
+ # ffprobe streams only container headers from URLs — does NOT download the whole file.
+ def perform(file_path_or_url)
+ output = shell("ffprobe",
+ "-select_streams", "v:0",
+ "-show_entries", "stream=duration,width,height",
+ "-show_entries", "format=duration",
+ "-of", "json",
+ "-v", "fatal",
+ file_path_or_url)
+
+ data = JSON.parse(output)
+ stream = data.dig("streams", 0) || {}
+ format_data = data.dig("format") || {}
+
+ duration_raw = stream["duration"] || format_data["duration"]
+
+ {
+ duration: duration_raw ? duration_raw.to_f.ceil : nil,
+ width: stream["width"]&.to_i,
+ height: stream["height"]&.to_i,
+ }
+ rescue => e
+ Rails.logger.error("[GetVideoMetadataJob] ffprobe failed for #{file_path_or_url.to_s.truncate(100)}: #{e.message}")
+ { duration: nil, width: nil, height: nil }
+ end
+end
diff --git a/app/jobs/folio/files/after_save_job.rb b/app/jobs/folio/files/after_save_job.rb
index 105c9cc4e0..e5c76f14b0 100644
--- a/app/jobs/folio/files/after_save_job.rb
+++ b/app/jobs/folio/files/after_save_job.rb
@@ -31,7 +31,6 @@ def perform(file, changed_attrs = {})
end
private
-
def sync_metadata_to_placements(file, placements, changed_attrs)
if changed_attrs.key?("description")
old_desc, new_desc = changed_attrs["description"]
diff --git a/app/jobs/folio/generate_missing_thumb_webp_job.rb b/app/jobs/folio/generate_missing_thumb_webp_job.rb
index e440a92754..7006874bfd 100644
--- a/app/jobs/folio/generate_missing_thumb_webp_job.rb
+++ b/app/jobs/folio/generate_missing_thumb_webp_job.rb
@@ -27,7 +27,7 @@ def perform(image)
if changed
image.thumbnail_sizes = thumbnail_sizes
- image.dont_run_after_save_jobs = true
+ image.try(:dont_run_after_save_jobs=, true)
image.save!(validate: false)
end
end
diff --git a/app/jobs/folio/generate_thumbnail_job.rb b/app/jobs/folio/generate_thumbnail_job.rb
index d5c006b518..0b7d4efc24 100644
--- a/app/jobs/folio/generate_thumbnail_job.rb
+++ b/app/jobs/folio/generate_thumbnail_job.rb
@@ -1,5 +1,8 @@
# frozen_string_literal: true
+require "open3"
+require "open-uri"
+
class Folio::GenerateThumbnailJob < Folio::ApplicationJob
queue_as :slow
@@ -24,7 +27,7 @@ def perform(image, size, quality, x: nil, y: nil, force: false)
# need to reload here because of parallel jobs
image.reload.with_lock do
thumbnail_sizes = image.thumbnail_sizes || {}
- image.dont_run_after_save_jobs = true
+ image.try(:dont_run_after_save_jobs=, true)
image.thumbnail_sizes = thumbnail_sizes.merge(size => new_thumb)
image.save!(validate: false)
end
@@ -253,26 +256,126 @@ def make_thumb(image, raw_size, quality, x: nil, y: nil)
end
def image_file(image)
+ if image.class.human_type == "video"
+ return video_screenshot(image)
+ end
+
if Rails.env.development? && ENV["DRAGONFLY_PRODUCTION_S3_URL_BASE"] && image.respond_to?(:development_safe_file)
thumbnail = image.development_safe_file(logger)
else
thumbnail = image.file
end
- if image.class.human_type == "video"
- thumbnail = thumbnail.ffmpeg_screenshot_to_jpg(image.screenshot_time_in_ffmpeg_format)
- thumbnail.name = Pathname.new(image.file_name).sub_ext(".jpg")
- thumbnail.meta["mime_type"] = "image/jpeg"
- else
- thumbnail.name = image.file_name
- thumbnail.meta["mime_type"] = image.file_mime_type
+ thumbnail.name = image.file_name
+ thumbnail.meta["mime_type"] = image.file_mime_type
+ thumbnail
+ rescue Dragonfly::Job::Fetch::NotFound
+ fallback_image(image)
+ end
+
+ # Get a screenshot frame for video thumbnail generation.
+ # Priority: 1) Provider-supplied poster image (no decoding needed)
+ # 2) ffmpeg frame extraction (only for ≤4K — safe memory)
+ # 3) fallback placeholder image
+ def video_screenshot(image)
+ # Prefer a provider-supplied poster image when available — avoids decoding
+ # the source video entirely. Critical for high-res (4K/8K) HEVC content
+ # where decoding a single frame can require 800+ MB for reference frame buffers.
+ if (cover_url = image.video_poster_url).present?
+ thumbnail = download_remote_image(image, cover_url)
+ return thumbnail if thumbnail
+ end
+
+ input = image.file_url_or_path
+ return fallback_image(image) if input.blank?
+
+ # Check resolution via ffprobe before attempting decode. Decoding
+ # video above 4K can require 800+ MB for codec reference frame
+ # buffers (DPB), which OOMKills pods with typical memory limits.
+ # For >4K videos without a poster image, use fallback. The provider
+ # may supply one asynchronously after encoding completes.
+ if video_resolution_too_high?(input)
+ Rails.logger.info("GenerateThumbnailJob: Skipping ffmpeg for high-res video ##{image.id}, using fallback")
+ return fallback_image(image)
+ end
+
+ screenshot_time = image.screenshot_time_in_ffmpeg_format
+
+ tmpfile = Tempfile.new(["video_thumb", ".jpg"])
+ begin
+ # Place -ss before -i for fast HTTP range-based seeking (avoids
+ # downloading entire file). Use -threads 1 to limit memory usage
+ # for high-resolution video decoding.
+ success = system(
+ "ffmpeg", "-y", "-ss", screenshot_time,
+ "-i", input,
+ "-frames:v", "1", "-q:v", "2", "-threads", "1",
+ tmpfile.path,
+ out: File::NULL, err: File::NULL
+ )
+
+ unless success && File.size?(tmpfile.path)
+ Rails.logger.warn("GenerateThumbnailJob: ffmpeg screenshot failed for file ##{image.id}")
+ return fallback_image(image)
+ end
+
+ thumbnail = Dragonfly.app.create(File.binread(tmpfile.path))
+ ensure
+ tmpfile.close!
end
+ thumbnail.name = Pathname.new(image.file_name).sub_ext(".jpg").to_s
+ thumbnail.meta["mime_type"] = "image/jpeg"
thumbnail
- rescue Dragonfly::Job::Fetch::NotFound
- missing_image_path = Folio::Engine.root.join("data/images/missing-image.png")
- thumbnail = Dragonfly.app.create(File.binread(missing_image_path))
- thumbnail.name = image.file_name || "missing-image.png"
+ rescue => e
+ Rails.logger.error("GenerateThumbnailJob: Video screenshot error for file ##{image.id}: #{e.message}")
+ fallback_image(image)
+ end
+
+ MAX_FFMPEG_DECODE_HEIGHT = 2160 # 4K
+
+ def video_resolution_too_high?(input)
+ stdout, _stderr, status = Open3.capture3(
+ "ffprobe", "-v", "error",
+ "-select_streams", "v:0",
+ "-show_entries", "stream=height",
+ "-of", "csv=p=0",
+ input
+ )
+ return false unless status.success?
+
+ height = stdout.strip.to_i
+ return false if height == 0
+
+ height > MAX_FFMPEG_DECODE_HEIGHT
+ rescue => e
+ # Fail safe: if ffprobe cannot determine resolution, skip ffmpeg to avoid
+ # OOMKilling the pod on a potentially high-resolution video.
+ Rails.logger.warn("GenerateThumbnailJob: ffprobe resolution check failed, skipping ffmpeg: #{e.message}")
+ true
+ end
+
+ def download_remote_image(image, url)
+ data = URI.parse(url).open(read_timeout: 15).read
+ thumbnail = Dragonfly.app.create(data)
+ thumbnail.name = Pathname.new(image.file_name).sub_ext(".jpg").to_s
+ thumbnail.meta["mime_type"] = "image/jpeg"
+ thumbnail
+ rescue => e
+ Rails.logger.warn("GenerateThumbnailJob: remote poster download failed for file ##{image.id}: #{e.message}")
+ nil
+ end
+
+ def fallback_image(image)
+ filename = if image.class.human_type == "video"
+ "missing-video.png"
+ else
+ "missing-image.png"
+ end
+
+ path = Folio::Engine.root.join("data/images/#{filename}")
+ thumbnail = Dragonfly.app.create(File.binread(path))
+ thumbnail.name = filename
thumbnail.meta["mime_type"] = "image/png"
thumbnail.meta["fallback_image"] = true
thumbnail
diff --git a/app/jobs/folio/pregenerate_thumbnails/check_job.rb b/app/jobs/folio/pregenerate_thumbnails/check_job.rb
index a738e10309..cd10df9c03 100644
--- a/app/jobs/folio/pregenerate_thumbnails/check_job.rb
+++ b/app/jobs/folio/pregenerate_thumbnails/check_job.rb
@@ -9,7 +9,7 @@ class Folio::PregenerateThumbnails::CheckJob < Folio::ApplicationJob
def perform(attachmentable)
if attachmentable && attachmentable.respond_to?(:file_placements)
attachmentable.file_placements.find_each do |file_placement|
- file_placement.dont_run_after_save_jobs = true
+ file_placement.try(:dont_run_after_save_jobs=, true)
file_placement.try(:pregenerate_thumbnails)
end
end
diff --git a/app/jobs/folio/regenerate_thumb_webp_job.rb b/app/jobs/folio/regenerate_thumb_webp_job.rb
index 117b7e1ab0..5ff41fc520 100644
--- a/app/jobs/folio/regenerate_thumb_webp_job.rb
+++ b/app/jobs/folio/regenerate_thumb_webp_job.rb
@@ -27,7 +27,7 @@ def perform(image)
if changed
image.thumbnail_sizes = thumbnail_sizes
- image.dont_run_after_save_jobs = true
+ image.try(:dont_run_after_save_jobs=, true)
image.save!(validate: false)
end
end
diff --git a/app/jobs/folio/s3/create_file_job.rb b/app/jobs/folio/s3/create_file_job.rb
index 2983a50953..702deda7a7 100644
--- a/app/jobs/folio/s3/create_file_job.rb
+++ b/app/jobs/folio/s3/create_file_job.rb
@@ -8,6 +8,12 @@ def perform_for_valid(s3_path:, klass:, existing_id:, web_session_id:, user_id:,
@file = prepare_file_model(klass, id: existing_id, web_session_id:, user_id:, attributes:)
replacing_file = @file.persisted?
+ # For video files on S3: use server-side copy instead of download+reupload
+ if klass <= Folio::File::Video && !use_local_file_system?
+ perform_with_s3_copy(s3_path:, klass:, replacing_file:)
+ return
+ end
+
Dir.mktmpdir("folio-file-s3") do |tmpdir|
@file.file = downloaded_file(s3_path, tmpdir)
@@ -45,6 +51,46 @@ def perform_for_valid(s3_path:, klass:, existing_id:, web_session_id:, user_id:,
end
private
+ def perform_with_s3_copy(s3_path:, klass:, replacing_file:)
+ file_name = s3_path.split("/").pop
+ sanitized_name = file_name.split(".").map(&:parameterize).join(".")
+
+ # Generate Dragonfly-compatible UID and S3 destination key
+ uid = generate_dragonfly_uid(sanitized_name)
+ dest_key = [dragonfly_s3_root_path, uid].compact_blank.join("/")
+ source_key = test_aware_s3_path(s3_path)
+
+ # Server-side S3 copy (instant, no data transfer through pod)
+ s3_copy_object(source_key: source_key, dest_key: dest_key)
+
+ # Get file metadata from S3 HEAD (no download)
+ head = s3_head_object(key: source_key)
+
+ # Set file attributes directly — bypass Dragonfly download+upload
+ @file.file_uid = uid
+ @file.file_name = sanitized_name
+ @file.file_size = head.content_length
+ @file.file_mime_type = head.content_type.presence || Marcel::MimeType.for(name: sanitized_name)
+
+ if save_file_with_slug_retry
+ if replacing_file
+ broadcast_replace_success(file: @file, s3_path:, file_type: klass.to_s)
+ else
+ broadcast_success(file: @file, s3_path:, file_type: klass.to_s)
+ end
+ else
+ # Rollback: delete the copied file
+ test_aware_s3_delete(s3_path: uid)
+ if replacing_file
+ broadcast_replace_error(file: @file, s3_path:, file_type: klass.to_s)
+ else
+ broadcast_error(file: @file, s3_path:, file_type: klass.to_s)
+ end
+ end
+ ensure
+ test_aware_s3_delete(s3_path:)
+ end
+
def downloaded_file(s3_path, tmpdir)
tmp_file_path = "#{tmpdir}/#{s3_path.split("/").pop}"
diff --git a/app/lib/folio/cra_media_cloud/encoder.rb b/app/lib/folio/cra_media_cloud/encoder.rb
index edf61d6b28..b7126591d9 100644
--- a/app/lib/folio/cra_media_cloud/encoder.rb
+++ b/app/lib/folio/cra_media_cloud/encoder.rb
@@ -5,6 +5,8 @@
module Folio
module CraMediaCloud
class Encoder
+ include Folio::S3::Client
+
DEFAULT_PROFILE_GROUP = "VoD"
# SFTP connection configuration
@@ -13,191 +15,62 @@ class Encoder
SFTP_MAX_RETRIES = 3
SFTP_RETRY_DELAY = 5.seconds
- # SFTP upload configuration
- CHUNK_SIZE = 1.megabyte # Standard chunk size for file operations
-
- def upload_file(file, priority: "regular", profile_group: nil, reference_id: nil, media_file: nil)
+ def upload_file(file, priority: "regular", profile_group: nil, reference_id: nil, media_file: nil, processing_phases: nil)
ref_id = reference_id || [file.id, Time.current.to_i].join("-")
- Rails.logger.info("[CraMediaCloud::Encoder] Starting upload for file ID: #{file.id}, ref_id: #{ref_id}")
-
- # Get metadata without downloading the file
- s3_metadata = get_s3_metadata(file)
- md5 = extract_etag(s3_metadata).delete_prefix('"').delete_suffix('"')
-
- xml_manifest = build_ingest_manifest(file, md5:, ref_id:, profile_group:)
-
- folder_path = "/ingest/#{priority}"
- file_path = "#{folder_path}/#{file.file_name}"
- xml_manifest_path = "#{folder_path}/#{file.file_name.split(".").first}_manifest.xml"
-
- # Use plain temp file path to avoid Ruby memory buffering
- temp_file_path = ::File.join(Dir.tmpdir, "cra_upload_#{ref_id}_#{Process.pid}_#{Time.current.to_i}.tmp")
+ Rails.logger.info("[CraMediaCloud::Encoder] Starting manifest upload for file ID: #{file.id}, ref_id: #{ref_id}")
- begin
- # Download using system tools (no Ruby file handles involved)
- download_to_file_path(file, temp_file_path)
+ # Get S3 metadata for MD5 checksum
+ s3_metadata = s3_dragonfly_head_object(file.file_uid)
+ md5 = extract_s3_etag(s3_metadata).delete_prefix('"').delete_suffix('"')
- # Verify file size
- actual_size = ::File.size(temp_file_path)
- if actual_size != file.file_size
- Rails.logger.error("[CraMediaCloud::Encoder] Downloaded file size mismatch: got #{actual_size}, expected #{file.file_size}")
- raise "Downloaded file size mismatch: got #{actual_size}, expected #{file.file_size}"
- end
+ # Generate presigned URL for CRA to download directly from S3
+ presigned_url = generate_presigned_url(file)
+ Rails.logger.info("[CraMediaCloud::Encoder] Generated presigned S3 URL for CRA (expires in 7 days)")
- # Upload to SFTP with robust session management
- with_robust_sftp_session do |sftp|
- # Use standard upload for better performance
- upload_with_retry(sftp, temp_file_path, file_path)
- Rails.logger.info("[CraMediaCloud::Encoder] File uploaded to SFTP: #{file_path}")
+ xml_manifest = build_ingest_manifest(file, md5:, ref_id:, profile_group:, presigned_url:, processing_phases:)
- # Upload manifest
- upload_with_retry(sftp, StringIO.new(xml_manifest), xml_manifest_path)
- Rails.logger.info("[CraMediaCloud::Encoder] Manifest uploaded to SFTP: #{xml_manifest_path}")
- end
+ folder_path = "/ingest/#{priority}"
+ xml_manifest_path = "#{folder_path}/#{ref_id}_manifest.xml"
- rescue => e
- Rails.logger.error("[CraMediaCloud::Encoder] Error during upload process: #{e.class}: #{e.message}")
- raise
- ensure
- # Clean up temp file
- if ::File.exist?(temp_file_path)
- begin
- ::File.delete(temp_file_path)
- rescue => e
- Rails.logger.warn("[CraMediaCloud::Encoder] Could not delete temp file #{temp_file_path}: #{e.message}")
- end
- end
+ # Upload only the manifest via SFTP (CRA downloads the video itself)
+ with_robust_sftp_session do |sftp|
+ upload_with_retry(sftp, StringIO.new(xml_manifest), xml_manifest_path)
+ Rails.logger.info("[CraMediaCloud::Encoder] Manifest uploaded to SFTP: #{xml_manifest_path}")
end
{
ref_id:,
- file_path:,
xml_manifest_path:,
+ presigned_url: presigned_url.present?,
}
end
private
- def get_s3_metadata(file)
+ def generate_presigned_url(file)
s3_datastore = Dragonfly.app.datastore
- s3_object_key = [s3_datastore.root_path, file.file_uid].join("/")
- Rails.logger.info("[CraMediaCloud::Encoder] Fetching S3 metadata for key: #{s3_object_key}")
- s3_datastore.storage.head_object(ENV["S3_BUCKET_NAME"], s3_object_key)
- end
-
- def extract_etag(response)
- # Handle different response types (AWS SDK, Excon, etc.)
- if response.respond_to?(:etag)
- response.etag
- elsif response.respond_to?(:headers)
- response.headers["ETag"] || response.headers["etag"] || response.headers["Etag"]
- else
- raise "Cannot extract ETag from response type: #{response.class}"
- end
- end
-
- def download_to_file_path(file, file_path)
- s3_datastore = Dragonfly.app.datastore
- s3_object_key = [s3_datastore.root_path, file.file_uid].join("/")
-
- download_success = false
-
- # Try AWS CLI first (if available)
- if system("which aws > /dev/null 2>&1")
- s3_url = "s3://#{ENV['S3_BUCKET_NAME']}/#{s3_object_key}"
- aws_command = "aws s3 cp #{s3_url} #{file_path} --no-progress"
-
- if system(aws_command)
- download_success = true
- end
- end
-
- # Fallback to curl with S3 presigned URL
- unless download_success
- begin
- s3_client = s3_datastore.storage
- presigned_url = s3_client.presigned_url(
- :get_object,
- bucket: ENV["S3_BUCKET_NAME"],
- key: s3_object_key,
- expires_in: 3600
- )
-
- curl_command = [
- "curl", "-L", "-s", "-S",
- "-o", file_path,
- "--max-time", "1800",
- "--connect-timeout", "30",
- presigned_url
- ]
-
- if system(*curl_command)
- download_success = true
- end
-
- rescue => e
- Rails.logger.error("[CraMediaCloud::Encoder] Error generating presigned URL: #{e.message}")
- end
- end
-
- # Final fallback to Ruby download
- unless download_success
- Rails.logger.warn("[CraMediaCloud::Encoder] System download failed, using Ruby fallback")
-
- downloaded_bytes = 0
-
- ::File.open(file_path, "wb") do |output_file|
- loop do
- range_start = downloaded_bytes
- range_end = [downloaded_bytes + CHUNK_SIZE - 1, file.file_size - 1].min
-
- break if range_start >= file.file_size
-
- begin
- s3_response = s3_datastore.storage.get_object(
- ENV["S3_BUCKET_NAME"],
- s3_object_key,
- range: "bytes=#{range_start}-#{range_end}"
- )
-
- chunk_data = s3_response.body
- output_file.write(chunk_data)
- output_file.flush
-
- downloaded_bytes += chunk_data.length
-
- # Clear references
- nil
- nil
-
- rescue => e
- Rails.logger.error("[CraMediaCloud::Encoder] Error downloading chunk #{range_start}-#{range_end}: #{e.message}")
- raise "Failed to download chunk from S3: #{e.message}"
- end
- end
- end
-
- download_success = true
- end
-
- unless download_success
- raise "All download methods failed"
- end
-
- actual_size = ::File.size(file_path)
- if actual_size != file.file_size
- raise "Downloaded size mismatch: got #{actual_size}, expected #{file.file_size}"
- end
+ s3_object_key = [s3_datastore.root_path, file.file_uid].compact_blank.join("/")
+ s3_presigner.presigned_url(
+ :get_object,
+ bucket: s3_bucket,
+ key: s3_object_key,
+ expires_in: 7.days.to_i # 604800 seconds
+ )
end
- def build_ingest_manifest(file, md5:, ref_id:, profile_group:)
+ def build_ingest_manifest(file, md5:, ref_id:, profile_group:, presigned_url: nil, processing_phases: nil)
xml = Builder::XmlMarkup.new; nil
xml.instruct!(:xml, version: "1.0", encoding: "utf-8")
- xml.vod_encoder_job do
- xml.input(type: "VIDEO",
- file: file.file_name,
- size: file.file_size.to_s,
- md5: md5) do
+ root_attrs = processing_phases.to_i > 1 ? { processingPhases: processing_phases } : {}
+ xml.vod_encoder_job(root_attrs) do
+ input_attrs = { type: "VIDEO", size: file.file_size.to_s, md5: md5 }
+ if presigned_url.present?
+ input_attrs[:src] = presigned_url
+ else
+ input_attrs[:file] = file.file_name
+ end
+
+ xml.input(input_attrs) do
xml.audioTrack(language: "cze", channels: "auto")
end
xml.profileGroup(profile_group || DEFAULT_PROFILE_GROUP)
diff --git a/app/lib/folio/cra_media_cloud/job_resolver.rb b/app/lib/folio/cra_media_cloud/job_resolver.rb
new file mode 100644
index 0000000000..5727fac7c6
--- /dev/null
+++ b/app/lib/folio/cra_media_cloud/job_resolver.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+module Folio
+ module CraMediaCloud
+ class JobResolver
+ STATUS_MAP = {
+ "WAITING" => :processing,
+ "PROCESSING" => :processing,
+ "CREATED" => :processing,
+ "VALIDATING" => :processing,
+ "DONE" => :done,
+ "FAILED" => :failed,
+ "ERROR" => :failed,
+ "REMOVED" => :not_found,
+ }.freeze
+
+ def self.resolve(jobs)
+ return { status: :not_found, job: nil } if jobs.empty?
+
+ job = latest_job(jobs)
+ status = STATUS_MAP[job["status"]] || :not_found
+ { status:, job: }
+ end
+
+ def self.latest_job(jobs)
+ return nil if jobs.empty?
+ jobs.max_by { |j| Time.parse(j["lastModified"]) }
+ end
+
+ private_class_method :latest_job
+ end
+ end
+end
diff --git a/app/lib/folio/s3/client.rb b/app/lib/folio/s3/client.rb
index b6fd900c03..698278579f 100644
--- a/app/lib/folio/s3/client.rb
+++ b/app/lib/folio/s3/client.rb
@@ -94,6 +94,48 @@ def test_aware_s3_upload(s3_path:, file:, acl: "private")
end
end
+ def s3_copy_object(source_key:, dest_key:)
+ s3_client.copy_object(
+ bucket: s3_bucket,
+ copy_source: "#{s3_bucket}/#{source_key}",
+ key: dest_key
+ )
+ end
+
+ def s3_head_object(key:)
+ s3_client.head_object(bucket: s3_bucket, key: key)
+ end
+
+ def generate_dragonfly_uid(file_name)
+ if Dragonfly.app.datastore.respond_to?(:generate_uid)
+ Dragonfly.app.datastore.generate_uid(file_name)
+ else
+ "#{Time.now.strftime '%Y/%m/%d/%H/%M/%S'}/#{SecureRandom.uuid}/#{file_name}"
+ end
+ end
+
+ def dragonfly_s3_root_path
+ Dragonfly.app.datastore.root_path
+ end
+
+ # Fetch S3 HEAD metadata via Dragonfly's Fog storage layer.
+ # Returns Excon response (use extract_s3_etag to read ETag).
+ def s3_dragonfly_head_object(file_uid)
+ s3_object_key = [dragonfly_s3_root_path, file_uid].compact_blank.join("/")
+ Dragonfly.app.datastore.storage.head_object(s3_bucket, s3_object_key)
+ end
+
+ # Extract ETag from either Fog/Excon or AWS SDK response.
+ def extract_s3_etag(response)
+ if response.respond_to?(:etag)
+ response.etag
+ elsif response.respond_to?(:headers)
+ response.headers["ETag"] || response.headers["etag"] || response.headers["Etag"]
+ else
+ raise "Cannot extract ETag from response type: #{response.class}"
+ end
+ end
+
private
def use_local_file_system?
@use_local_file_system ||= Dragonfly.app.datastore.is_a?(Dragonfly::FileDataStore)
diff --git a/app/models/concerns/folio/cra_media_cloud/file_processing.rb b/app/models/concerns/folio/cra_media_cloud/file_processing.rb
index 9891ed4451..733b4eb466 100644
--- a/app/models/concerns/folio/cra_media_cloud/file_processing.rb
+++ b/app/models/concerns/folio/cra_media_cloud/file_processing.rb
@@ -8,6 +8,14 @@ def encoder_profile_group
nil # use encoder's default
end
+ def encoder_processing_phases
+ 1 # default: single phase; override in app for multi-phase
+ end
+
+ def encoder_phase_name(phase_number)
+ nil # override in app to return e.g. "SD", "HD"
+ end
+
def remote_content_mp4_url_for(profile)
path = remote_services_data.dig("content_mp4_paths", profile.to_s)
remote_content_url_base + path if path
@@ -39,6 +47,10 @@ def remote_cover_url
end
end
+ def video_poster_url
+ remote_cover_url
+ end
+
def remote_thumbnails_url
if remote_services_data["thumbnails_path"]
remote_content_url_base + remote_services_data["thumbnails_path"]
@@ -58,6 +70,8 @@ def update_preview_media_length
end
def destroy_attached_file
+ return if remote_id.blank? && remote_reference_id.blank?
+
delete_media_job_class.perform_later(remote_id, reference_id: remote_reference_id)
end
@@ -77,7 +91,11 @@ def processed_by
"cra_media_cloud"
end
+ def check_media_processing(preview: false)
+ check_media_processing_job_class.perform_later(self, preview:, encoding_generation:)
+ end
+
def upload_failed?
- processing_state == "upload_failed"
+ processing_state.in?(%w[upload_failed encoding_failed])
end
end
diff --git a/app/models/concerns/folio/media_file_processing_base.rb b/app/models/concerns/folio/media_file_processing_base.rb
index 90d531e213..bb06b19cdf 100644
--- a/app/models/concerns/folio/media_file_processing_base.rb
+++ b/app/models/concerns/folio/media_file_processing_base.rb
@@ -19,8 +19,11 @@ module Folio::MediaFileProcessingBase
def process_attached_file
regenerate_thumbnails if try(:thumbnailable?)
- # Set new encoding generation to invalidate any old CheckProgressJobs
- self.update(remote_services_data: {
+ # Set new encoding generation to invalidate any old CheckProgressJobs.
+ # Use update_columns to bypass validations — remote_services_data is processing
+ # metadata and must not be blocked by unrelated validation failures (e.g. missing
+ # file dimensions when ffprobe fails).
+ update_remote_services_data({
"processing_step_started_at" => Time.current,
"encoding_generation" => Time.current.to_i
})
@@ -91,8 +94,7 @@ def preview_media_processed?
def create_full_media
full_media_job_class.perform_later(self)
- rsd = remote_services_data || {}
- self.update(remote_services_data: rsd.merge!({ "service" => processed_by, "processing_state" => "enqueued", "processing_step_started_at" => Time.current }))
+ update_remote_services_data("service" => processed_by, "processing_state" => "enqueued", "processing_step_started_at" => Time.current)
end
def create_preview_media
@@ -100,7 +102,7 @@ def create_preview_media
preview_media_processed!
else
preview_media_job_class.perform_later(self)
- self.update(remote_services_data: self.remote_services_data.merge!({ "processing_step_started_at" => Time.current }))
+ update_remote_services_data("processing_step_started_at" => Time.current)
end
end
@@ -130,7 +132,7 @@ def preview_duration_in_seconds
if (remote_services_data || {}).dig("preview_interval").present?
preview_ends_at_second - preview_starts_at_second
else
- [file_track_duration_in_seconds, DEFAULT_PREVIEW_DURATION].min
+ file_track_duration_in_seconds.present? ? [file_track_duration_in_seconds, DEFAULT_PREVIEW_DURATION].min : DEFAULT_PREVIEW_DURATION
end
end
@@ -148,4 +150,21 @@ def preview_duration=(secs)
def preview_duration
@preview_duration ||= ActiveSupport::Duration.build(preview_duration_in_seconds)
end
+
+ private
+ # Merge new data into remote_services_data and persist directly to DB,
+ # bypassing model validations and callbacks. This is necessary because
+ # remote_services_data is processing metadata that must not be blocked
+ # by unrelated validation failures on the model.
+ #
+ # NOTE: Non-atomic read-modify-write — if another process updates
+ # remote_services_data between read and write, changes will be lost.
+ # Acceptable at current call sites (process_attached_file, create_full_media)
+ # where the video is being initially processed and no concurrent job
+ # is modifying remote_services_data yet.
+ def update_remote_services_data(new_data)
+ merged = (remote_services_data || {}).merge(new_data)
+ update_columns(remote_services_data: merged)
+ write_attribute(:remote_services_data, merged)
+ end
end
diff --git a/app/models/folio/file.rb b/app/models/folio/file.rb
index dd44804f11..4b62c91474 100644
--- a/app/models/folio/file.rb
+++ b/app/models/folio/file.rb
@@ -9,6 +9,7 @@ class Folio::File < Folio::ApplicationRecord
include Folio::Taggable
include Folio::HasAasmStates
include Folio::BelongsToSite
+ include Folio::S3::Client
include Folio::FilesSharedAccrossSites if Rails.application.config.folio_shared_files_between_sites
READY_STATE = :ready
@@ -196,6 +197,10 @@ class Folio::File < Folio::ApplicationRecord
event :reprocess do
transitions from: READY_STATE, to: :processing
end
+
+ event :retry_processing do
+ transitions from: :processing_failed, to: :processing
+ end
end
def self.correct_site(site)
@@ -267,6 +272,17 @@ def regenerate_thumbnails
end
end
+ # Returns a path or URL suitable for ffprobe/ffmpeg.
+ # For S3 storage with stored file: presigned URL (streams headers only, no full download).
+ # For pending uploads (file= assigned but not yet saved) or local storage: file system path.
+ def file_url_or_path
+ if file_uid.present? && !Dragonfly.app.datastore.is_a?(Dragonfly::FileDataStore)
+ file_presigned_url
+ else
+ file&.path.to_s
+ end
+ end
+
def thumbnailable?
false
end
@@ -417,6 +433,15 @@ def update_file_placements_counts!
end
private
+ def file_presigned_url(expires_in: 1.hour.to_i)
+ s3_object_key = [dragonfly_s3_root_path, file_uid].compact_blank.join("/")
+ s3_presigner.presigned_url(:get_object,
+ bucket: s3_bucket,
+ key: s3_object_key,
+ expires_in: expires_in
+ )
+ end
+
def slug_candidates
%i[slug headline hash_id_for_slug to_label]
end
@@ -457,16 +482,24 @@ def check_usage_before_destroy
end
def set_file_track_duration
- if %w[audio video].include?(self.class.human_type)
- self.file_track_duration = Folio::File::GetFileTrackDurationJob.perform_now(file.path.to_s, self.class.human_type) # in seconds
+ if self.class.human_type == "video"
+ # For video: handled together with dimensions in set_video_file_dimensions
+ nil
+ elsif self.class.human_type == "audio"
+ self.file_track_duration = Folio::File::GetFileTrackDurationJob.perform_now(file_url_or_path, "audio")
self.preview_track_duration_in_seconds = self.respond_to?(:preview_duration_in_seconds) ? preview_duration_in_seconds : 0
end
end
def set_video_file_dimensions
- if %w[video].include?(self.class.human_type)
- self.file_width, self.file_height = Folio::File::GetVideoDimensionsJob.perform_now(file.path.to_s, self.class.human_type)
- end
+ return unless self.class.human_type == "video"
+
+ metadata = Folio::File::GetVideoMetadataJob.perform_now(file_url_or_path)
+
+ self.file_width = metadata[:width]
+ self.file_height = metadata[:height]
+ self.file_track_duration = metadata[:duration]
+ self.preview_track_duration_in_seconds = self.respond_to?(:preview_duration_in_seconds) ? preview_duration_in_seconds : 0
end
def validate_attribution_and_texts_if_needed
diff --git a/app/models/folio/file/video.rb b/app/models/folio/file/video.rb
index bb5e9a8427..f9429ef042 100644
--- a/app/models/folio/file/video.rb
+++ b/app/models/folio/file/video.rb
@@ -19,6 +19,10 @@ def thumbnailable?
true
end
+ def video_poster_url
+ nil # override in provider concerns to return a static thumbnail image URL
+ end
+
def self.human_type
"video"
end
diff --git a/app/serializers/folio/console/file_serializer.rb b/app/serializers/folio/console/file_serializer.rb
index 48f9a1e8ef..f4f332b4d0 100644
--- a/app/serializers/folio/console/file_serializer.rb
+++ b/app/serializers/folio/console/file_serializer.rb
@@ -86,11 +86,7 @@ class Folio::Console::FileSerializer
end
attribute :aasm_state_human do |object|
- if object.processing? && object.remote_services_data.try(:[], "progress_percentage")
- "#{object.aasm.human_state} (#{object.remote_services_data.try(:[], "progress_percentage")}%)"
- else
- object.aasm.human_state
- end
+ object.aasm.human_state
end
attribute :aasm_state_color do |object|
diff --git a/config/locales/aasm.cs.yml b/config/locales/aasm.cs.yml
index 54f31b891a..9110a5a082 100644
--- a/config/locales/aasm.cs.yml
+++ b/config/locales/aasm.cs.yml
@@ -7,6 +7,7 @@ cs:
folio/file:
aasm_state/unprocessed: Nezpracováno
aasm_state/processing: Zpracováváno
+ aasm_state/processing_failed: Zpracování selhalo
aasm_state/ready: Připraveno
folio/lead:
diff --git a/config/locales/aasm.en.yml b/config/locales/aasm.en.yml
index 26cc89b411..e63c318ea9 100644
--- a/config/locales/aasm.en.yml
+++ b/config/locales/aasm.en.yml
@@ -7,6 +7,7 @@ en:
folio/file:
aasm_state/unprocessed: Unprocessed
aasm_state/processing: Processing
+ aasm_state/processing_failed: Processing failed
aasm_state/ready: Ready
folio/lead:
diff --git a/config/locales/console/files.cs.yml b/config/locales/console/files.cs.yml
index c09108f60e..9e7b7e1351 100644
--- a/config/locales/console/files.cs.yml
+++ b/config/locales/console/files.cs.yml
@@ -22,6 +22,19 @@ cs:
navigation_previous: Předchozí
show:
+ encoding_info_component:
+ phase_waiting: "Čekání ve frontě"
+ phase_waiting_multi: "Čekání ve frontě (fáze %{phase}/%{total})"
+ phase_waiting_named: "Čekání ve frontě – %{name}"
+ phase_encoding: "Kódování videa"
+ phase_encoding_multi: "Kódování videa (%{phase}/%{total})"
+ phase_encoding_named: "Kódování %{name}"
+ phase_packaging: "Balení"
+ phase_packaging_multi: "Balení (%{phase}/%{total})"
+ phase_packaging_named: "Balení – %{name}"
+ phase_failed_retrying: "Zpracování selhalo, pokusíme se znovu"
+ phase_failed: "Zpracování selhalo. Zkuste video nahrát znovu, nebo kontaktujte podporu."
+
metadata_component:
no_metadata: Žádná metadata
extract_metadata: Znovu extrahovat metadata
diff --git a/config/locales/console/files.en.yml b/config/locales/console/files.en.yml
index 014c5663f2..db393517b1 100644
--- a/config/locales/console/files.en.yml
+++ b/config/locales/console/files.en.yml
@@ -22,6 +22,19 @@ en:
navigation_previous: Previous
show:
+ encoding_info_component:
+ phase_waiting: "Waiting in queue"
+ phase_waiting_multi: "Waiting in queue (phase %{phase}/%{total})"
+ phase_waiting_named: "Waiting in queue – %{name}"
+ phase_encoding: "Encoding video"
+ phase_encoding_multi: "Encoding video (%{phase}/%{total})"
+ phase_encoding_named: "Encoding %{name}"
+ phase_packaging: "Packaging"
+ phase_packaging_multi: "Packaging (%{phase}/%{total})"
+ phase_packaging_named: "Packaging – %{name}"
+ phase_failed_retrying: "Processing failed, retrying automatically"
+ phase_failed: "Processing failed. Try re-uploading or contact support."
+
metadata_component:
no_metadata: No metadata
extract_metadata: Extract metadata again
diff --git a/data/images/missing-video.png b/data/images/missing-video.png
new file mode 100644
index 0000000000..9167532d35
Binary files /dev/null and b/data/images/missing-video.png differ
diff --git a/docs/design/cra-encoding-system.md b/docs/design/cra-encoding-system.md
new file mode 100644
index 0000000000..3d6cc8df47
--- /dev/null
+++ b/docs/design/cra-encoding-system.md
@@ -0,0 +1,342 @@
+# CRA Video Encoding System — Design Document
+
+## Overview
+
+The CRA (CraMediaCloud) integration encodes uploaded videos into multiple quality profiles (SD/HD), HLS/DASH streaming manifests, and generates thumbnails/cover images. Videos become progressively available — SD quality is playable while HD encoding continues.
+
+**Repos:** folio gem (core engine) + economia app (overrides, player, UI)
+
+---
+
+## 1. Upload & Manifest Delivery
+
+### Presigned S3 URL (no file transfer through pod)
+
+When a video is uploaded, the encoder generates a **presigned S3 URL** (7-day expiry) and embeds it in an XML manifest. Only the manifest (~1 KB) is uploaded via SFTP — CRA fetches the video directly from S3.
+
+```xml
+
+
+
+
+ VoDHDAuto
+ prod-video-slug-123-a1b2c3d4-1710000000
+
+```
+
+### Reference ID format
+
+`{env}-{slug(truncated)}-{id}-{s3_etag[0..7]}-{encoding_generation}`
+
+- Total capped at **128 chars** (CRA lookup fails with longer IDs)
+- `encoding_generation` changes on each re-encode, ensuring CRA gets a fresh refId
+
+### Files
+
+| File (folio) | Purpose |
+|---|---|
+| `app/lib/folio/cra_media_cloud/encoder.rb` | Builds XML manifest, uploads via SFTP |
+| `app/jobs/folio/cra_media_cloud/create_media_job.rb` | Orchestrates upload: generates ref ID, checks for existing jobs, calls Encoder |
+| `app/lib/folio/s3/client.rb` | Shared S3 helpers: presigned URLs, HEAD metadata, ETag extraction |
+
+---
+
+## 2. Two-Phase Encoding
+
+When `encoder_processing_phases` returns `> 1` (economia overrides to `2`), a **single manifest** is submitted with the `processingPhases="2"` XML attribute (same format as shown in §1). The profile group is always `VoDHDAuto` — CRA handles phasing internally.
+
+CRA creates multiple internal jobs (one per phase), each with a `phase` field in API responses:
+
+| CRA phase | Output | Enables |
+|---|---|---|
+| 1 (SD) | sd profiles, HLS/DASH (SD), cover, thumbnails | Playback at SD quality while HD encodes |
+| 2 (HD) | All profiles incl. HD, full HLS/DASH | Full quality playback |
+
+When `CheckProgressJob` sees a phase-1 job reach DONE, `save_intermediate_phase_data` writes the SD manifest/cover paths to the top-level `remote_services_data` keys the player reads — making the video playable at SD quality. It then clears `remote_id` and polls by `reference_id` to discover the phase-2 job. Phase-2 output overwrites phase-1 paths when it completes.
+
+### Backward compatibility
+
+When `encoder_processing_phases` is `1` or `nil` (default), the manifest is submitted without the `processingPhases` attribute. All existing behavior preserved.
+
+### economia override (`feature/cra-encoding-improvements` branch)
+
+```ruby
+# app/overrides/models/folio/file/video_override.rb
+def encoder_profile_group
+ Rails.env.production? ? "VoDHDAuto" : "VoD"
+end
+
+def encoder_processing_phases
+ 2
+end
+
+def encoder_phase_name(phase_number)
+ { 1 => "SD", 2 => "HD" }[phase_number]
+end
+```
+
+---
+
+## 3. Progress Tracking
+
+### CRA API polling
+
+`CheckProgressJob` polls every 15 seconds. It parses the CRA `messages` array to determine encoding phase:
+
+| CRA message | Internal phase |
+|---|---|
+| `verification: finished` | `validation` |
+| `Transcoding worker - audio: finished` | `audio` |
+| `Transcoding worker - video: finished` | `video` |
+| `copying: started` | `packaging` |
+
+Progress percentage is raw CRA `progress` field × 100 (per-phase, not mapped across phases).
+
+### MessageBus real-time updates
+
+`broadcast_encoding_progress` publishes to `Folio::MESSAGE_BUS_CHANNEL` with phase label, progress %, and failure state. The `EncodingInfoComponent` Stimulus controller updates the UI badge in real time.
+
+### Files
+
+| File (folio) | Purpose |
+|---|---|
+| `app/jobs/folio/cra_media_cloud/check_progress_job.rb` | Polls CRA API, updates `remote_services_data`, handles phase transitions |
+| `app/components/folio/console/files/show/encoding_info_component.*` | UI badge (Ruby + Stimulus + Sass + Slim) |
+
+---
+
+## 4. State Machine
+
+### AASM states (`aasm_state` column)
+
+```
+unprocessed → [process!] → processing → [processing_done!] → ready
+ ↓
+ [processing_failed!]
+ ↓
+ processing_failed → [retry_processing!] → processing
+```
+
+### Processing states (`remote_services_data["processing_state"]`)
+
+```
+enqueued → creating_media_job → full_media_processing → full_media_processed
+ ↓
+ encoding_failed (CRA FAILED/ERROR)
+ upload_failed (SFTP/S3 error in CreateMediaJob)
+ source_file_missing (S3 404)
+```
+
+Multi-phase adds intermediate data (`phase_N_content_mp4_paths`, `phase_N_completed_at`) but no new processing states.
+
+### `remote_services_data` JSON structure
+
+```json
+{
+ "service": "cra_media_cloud",
+ "processing_state": "full_media_processing",
+ "reference_id": "prod-video-slug-123-a1b2c3d4-1710000000",
+ "remote_id": "JOB123",
+ "encoding_generation": 1710000000,
+ "processing_step_started_at": "2026-03-17T10:30:00Z",
+
+ "cra_status": "PROCESSING",
+ "progress_percentage": 60,
+ "current_phase": "encoding",
+ "current_encoding_phase": 1,
+ "processing_phases": 2,
+ "phases_completed": ["validation", "audio"],
+ "video_duration": 120,
+
+ "phase_1_content_mp4_paths": { "sd0": "/path/sd0.mp4", "sd1": "/path/sd1.mp4" },
+ "phase_1_completed_at": "2026-03-17T11:00:00Z",
+ "phase_1_remote_id": "JOB111",
+
+ "content_mp4_paths": { "sd0": "/path/sd0.mp4", "hd1": "/path/hd1.mp4" },
+ "manifest_hls_path": "/path/master.m3u8",
+ "manifest_dash_path": "/path/manifest.mpd",
+ "cover_path": "/path/cover.jpg",
+ "thumbnails_path": "/path/thumb.vtt",
+
+ "error_message": null,
+ "retry_count": 0,
+ "retry_scheduled_at": null,
+ "failed_at": null
+}
+```
+
+---
+
+## 5. Error Handling & Recovery
+
+### Automatic retry
+
+On CRA `FAILED`/`ERROR`, `CheckProgressJob`:
+1. Sets `processing_state` to `"encoding_failed"`, `retry_count` += 1
+2. Calls `processing_failed!` (single save)
+3. Broadcasts failure state to UI
+4. If `retry_count <= 1`: schedules `CreateMediaJob` in 2 minutes
+5. If `retry_count > 1`: final failure, no retry
+
+### Timeout
+
+- **CheckProgressJob**: 4-hour `MAX_PROCESSING_DURATION` (flat, per video) — marks as `processing_failed` if `processing_step_started_at` is older. The unique-job constraint means one instance runs per video; if the worker is restarted the next `CheckProgressJob` re-checks this on each run.
+- **MonitorProcessingJob**: 6-hour hard timeout (flat, not phase-multiplied in current code) — marks as `processing_failed` for any video that has been in `processing` AASM state for over 6 hours. Effectively a backstop for videos whose `CheckProgressJob` was lost or never fired.
+
+> **Note:** The two timeouts are intentionally overlapping rather than sequential. `CheckProgressJob` handles the common case (actively polling video); `MonitorProcessingJob` is the safety net for stuck/orphaned videos. A video that times out in `CheckProgressJob` at 4 hours will also be caught by `MonitorProcessingJob` at 6 hours if it somehow transitions back to `processing`.
+
+### Safety nets (MonitorProcessingJob)
+
+Runs periodically with Redis lock to prevent concurrent instances. Catches:
+
+| Scenario | Action |
+|---|---|
+| Stuck in `unprocessed` with `file_uid` > 5 min | Triggers `process!` |
+| Stuck in `enqueued` > 10 min | Re-enqueues `CreateMediaJob` |
+| `upload_failed` / `encoding_failed` > 5 min | Re-enqueues `CreateMediaJob` |
+| `processing_failed` with `retry_count < 2` and lost retry job | Re-enqueues `CreateMediaJob` |
+| Processing > 6 hours | Marks as `processing_failed` |
+| Orphaned (has `reference_id` but no `remote_id`, or stuck in `creating_media_job` > 30 min) | Reconciles via API |
+| All CRA jobs `REMOVED` with stored phase data | `finalize_from_completed_phases!` merges stored output and transitions to `ready` |
+
+**Note on `REMOVED` status:** Production data confirms CRA does **not** auto-purge completed jobs — DONE jobs remain accessible indefinitely (verified 4+ months). `REMOVED` appears only when job content is explicitly deleted via `DeleteMediaJob` (`DELETE /jobs/{id}/content`). The all-REMOVED handler therefore covers the edge case where both phase job contents were deleted while the video was still in `processing` AASM state.
+
+### Tracked job becomes REMOVED (CheckProgressJob)
+
+When `CheckProgressJob` is polling via `remote_id` and that specific job returns `REMOVED`, it:
+1. Clears `remote_id` from `remote_services_data`
+2. Calls `check_again_later` to resume polling by `reference_id`
+
+This handles the edge case where a single phase job is deleted while encoding is still in progress. Polling falls back to `reference_id` lookup to discover any replacement or remaining jobs.
+
+### Stale processing state after CRA recovery (CreateMediaJob)
+
+When `CreateMediaJob` finds an existing CRA job with `remote_id` matching the stored value but `processing_state` is stale (e.g. `upload_failed` or `encoding_failed` set while CRA eventually completed), it:
+1. Resets `processing_state` to `"full_media_processing"`
+2. Schedules `CheckProgressJob` to finalize
+
+Without this, videos could loop forever: `MonitorProcessingJob` re-enqueues `CreateMediaJob` every 5 min, `CreateMediaJob` finds matching `remote_id` and does nothing, state never advances. Root cause confirmed in production (video stuck 25+ hours while CRA job was `DONE`).
+
+### Missing S3 source file
+
+If S3 returns 404 during `CreateMediaJob`, video is marked `source_file_missing` + `processing_failed` permanently (no retry).
+
+---
+
+## 6. Progressive Video Availability
+
+After phase 1 completes, `save_intermediate_phase_data` writes SD manifest/cover paths to the same top-level keys the player reads (`manifest_hls_path`, `manifest_dash_path`, `cover_path`). The video is playable at SD quality while AASM state remains `processing`.
+
+The economia `PlayerComponent` gates on manifest URL presence (not AASM `ready?`):
+```ruby
+@valid = @file.remote_manifest_hls_url.present? || @file.remote_manifest_dash_url.present?
+```
+
+When phase 2 completes, `process_output_hash` overwrites with HD paths. Next page load serves HD.
+
+### Console video detail (economia)
+
+`AdditionalHtmlComponent` shows:
+- **Iframe with player** when manifest URL is present (same gate as PlayerComponent — manifest is available as soon as phase 1 completes, so this covers the SD-quality interim state)
+- **"File not ready"** when no manifest is available yet
+
+---
+
+## 7. Thumbnail Generation
+
+Priority order:
+1. **CRA cover image** (small JPEG from CDN) — preferred, no decoding needed
+2. **ffmpeg frame extraction** — only for ≤4K resolution (checked via `ffprobe`)
+3. **Fallback placeholder** (`missing-video.png`) — for >4K or when both above fail
+
+Both ffprobe (resolution check) and ffmpeg (frame extraction) receive the **presigned S3 URL** from `file_url_or_path`. ffprobe reads only container headers (no full download). ffmpeg uses `-ss` before `-i` for fast HTTP range-based seeking, avoiding a full file download to the pod.
+
+### OOMKill prevention
+
+Videos >4K (2160p) skip ffmpeg decoding entirely — HEVC reference frame buffers can require 800+ MB. The fallback placeholder is used until CRA provides the cover image.
+
+### Files
+
+| File (folio) | Purpose |
+|---|---|
+| `app/jobs/folio/generate_thumbnail_job.rb` | Video screenshot extraction with resolution check |
+| `app/jobs/folio/file/get_video_metadata_job.rb` | Single ffprobe call for duration + dimensions via presigned URL |
+
+---
+
+## 8. S3 Optimizations
+
+| Optimization | File | Effect |
+|---|---|---|
+| Presigned URL for CRA | `encoder.rb` | No video download to pod |
+| Presigned URL for ffprobe | `file.rb` → `file_url_or_path` | Streams ~100 KB headers, not full file |
+| S3 server-side copy for uploads | `app/jobs/folio/s3/create_file_job.rb` | Zero data transfer for video copy |
+| Shared S3 helpers | `app/lib/folio/s3/client.rb` | `s3_dragonfly_head_object`, `extract_s3_etag` |
+
+---
+
+## 9. Legacy Video Support (economia)
+
+Videos imported from old Wowza/CDN77 system have `legacy_data["skip_cra_encoding"] = true`. These:
+- Skip CRA encoding entirely (`process_attached_file` → just thumbnails + `processing_done!`)
+- Use direct CDN URLs for playback
+- Override `remote_manifest_url_base` and `remote_content_url_base` to match import domain
+- Skip CRA delete on destroy
+
+### Files (economia, branch `feature/cra-encoding-improvements`)
+
+| File | Purpose |
+|---|---|
+| `app/overrides/models/folio/file/video_override.rb` | CRA concern inclusion, profile group, 2-phase config, legacy video handling |
+| `app/overrides/jobs/folio/cra_media_cloud/create_media_job_override.rb` | Sets queue to `:video` |
+| `app/components/economia/cra_media_cloud/player_component.rb` | OTT player rendering with manifest-based gate, subtitles, Gemius analytics |
+| `app/components/economia/cra_media_cloud/player_component.js` | Stimulus controller: player lifecycle, viewport awareness, multi-instance coordination |
+| `app/components/folio/console/economia/files/additional_html_component.rb` | Console video detail: iframe player (manifest gate) + manifest URL links |
+| `app/components/folio/console/economia/files/additional_html_component.slim` | Template with manifest gate — shows player iframe or "not ready" |
+| `app/jobs/economia/import_video_from_url_job.rb` | Legacy video import from article URLs |
+| `app/lib/economia/article_storage/video_creator.rb` | Creates video records from Article Storage API |
+| `lib/tasks/cra_audit.rake` | CRA audit rake task (330 lines) |
+
+---
+
+## 10. Environment Variables
+
+```
+# SFTP (manifest upload)
+CRA_MEDIA_CLOUD_SFTP_HOST / _USERNAME / _PASSWORD
+
+# API (job status polling)
+CRA_MEDIA_CLOUD_API_BASE_URL / _USERNAME / _PASSWORD
+
+# CDN (output URLs)
+CRA_MEDIA_CLOUD_CDN_CONTENT_URL # MP4, cover, thumbnails
+CRA_MEDIA_CLOUD_CDN_MANIFEST_URL # HLS/DASH manifests
+
+# S3
+S3_BUCKET_NAME / S3_REGION / AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY
+```
+
+---
+
+## 11. Known Gaps & TODO
+
+### Not yet implemented
+
+- [ ] **Subtitle trigger on SD completion** — ElevenLabs transcription from sd1 MP4 after phase 1. Not wired up.
+- [ ] **Dynamic timeouts in MonitorProcessingJob** — currently fixed 6h. Design doc specified file-size/duration-based formula.
+- [ ] **`playable` field in API JSON** — `videos_controller.rb` still returns `ready: video.ready?`. Should add `playable:` based on manifest presence.
+- [ ] **SD quality badge on player** — no visual indicator that video is SD-only while HD encodes.
+
+### Test coverage gaps (folio)
+
+- [x] MonitorProcessingJob handler integration tests (`handle_failed_uploads_needing_retry`, `reconcile_video_state`, `reconcile_with_remote_jobs`)
+- [x] CheckProgressJob: `processing_timed_out?` — video >4h old marks as `processing_failed`; video <4h continues polling
+- [x] CheckProgressJob: `finalize_from_completed_phases!` — all jobs REMOVED + stored phase data → `ready` with merged MP4 paths
+- [x] CheckProgressJob: tracked job becomes REMOVED → clears `remote_id`, reschedules
+- [x] Encoder: `upload_file` method, SFTP session management, retry logic
+- [x] CreateFileJob: S3 server-side copy path for videos
+- [x] AASM state transition integration tests with CRA concern
+
+### Test coverage gaps (economia)
+
+- [x] `AdditionalHtmlComponent` — additional state coverage (legacy video, unprocessed video, ready video)
diff --git a/lib/folio/version.rb b/lib/folio/version.rb
index 1a824a3dd5..ac03020a08 100644
--- a/lib/folio/version.rb
+++ b/lib/folio/version.rb
@@ -1,5 +1,5 @@
# frozen_string_literal: true
module Folio
- VERSION = "7.4.1"
+ VERSION = "7.5.1"
end
diff --git a/package-lock.json b/package-lock.json
index eee5ea858f..c49da7c173 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -2534,11 +2534,10 @@
}
},
"node_modules/minimatch": {
- "version": "3.1.2",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
- "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
+ "version": "3.1.5",
+ "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
+ "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
"dev": true,
- "license": "ISC",
"dependencies": {
"brace-expansion": "^1.1.7"
},
@@ -5553,9 +5552,9 @@
"dev": true
},
"minimatch": {
- "version": "3.1.2",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
- "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
+ "version": "3.1.5",
+ "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
+ "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
"dev": true,
"requires": {
"brace-expansion": "^1.1.7"
diff --git a/test/dummy/package-lock.json b/test/dummy/package-lock.json
index 5a8c8369b2..8648434e4b 100644
--- a/test/dummy/package-lock.json
+++ b/test/dummy/package-lock.json
@@ -259,16 +259,6 @@
"node": ">= 10"
}
},
- "node_modules/@trysound/sax": {
- "version": "0.2.0",
- "resolved": "https://registry.npmjs.org/@trysound/sax/-/sax-0.2.0.tgz",
- "integrity": "sha512-L7z9BgrNEcYyUYtF+HaEfiS5ebkh9jXqbszz7pC0hRBPaatV0XjSD3+eHrpqFemQfgwiFF0QPIarnIihIDn7OA==",
- "dev": true,
- "license": "ISC",
- "engines": {
- "node": ">=10.13.0"
- }
- },
"node_modules/@types/triple-beam": {
"version": "1.3.5",
"resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.5.tgz",
@@ -826,9 +816,9 @@
"license": "CC0-1.0"
},
"node_modules/minimatch": {
- "version": "3.1.2",
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
- "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
+ "version": "3.1.5",
+ "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz",
+ "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==",
"dev": true,
"license": "ISC",
"dependencies": {
@@ -971,6 +961,16 @@
"node": ">=10"
}
},
+ "node_modules/sax": {
+ "version": "1.5.0",
+ "resolved": "https://registry.npmjs.org/sax/-/sax-1.5.0.tgz",
+ "integrity": "sha512-21IYA3Q5cQf089Z6tgaUTr7lDAyzoTPx5HRtbhsME8Udispad8dC/+sziTNugOEx54ilvatQ9YCzl4KQLPcRHA==",
+ "dev": true,
+ "license": "BlueOak-1.0.0",
+ "engines": {
+ "node": ">=11.0.0"
+ }
+ },
"node_modules/simple-swizzle": {
"version": "0.2.2",
"resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
@@ -1080,18 +1080,18 @@
}
},
"node_modules/svgo": {
- "version": "2.8.0",
- "resolved": "https://registry.npmjs.org/svgo/-/svgo-2.8.0.tgz",
- "integrity": "sha512-+N/Q9kV1+F+UeWYoSiULYo4xYSDQlTgb+ayMobAXPwMnLvop7oxKMo9OzIrX5x3eS4L4f2UHhc9axXwY8DpChg==",
+ "version": "2.8.2",
+ "resolved": "https://registry.npmjs.org/svgo/-/svgo-2.8.2.tgz",
+ "integrity": "sha512-TyzE4NVGLUFy+H/Uy4N6c3G0HEeprsVfge6Lmq+0FdQQ/zqoVYB62IsBZORsiL+o96s6ff/V6/3UQo/C0cgCAA==",
"dev": true,
"license": "MIT",
"dependencies": {
- "@trysound/sax": "0.2.0",
"commander": "^7.2.0",
"css-select": "^4.1.3",
"css-tree": "^1.1.3",
"csso": "^4.2.0",
"picocolors": "^1.0.0",
+ "sax": "^1.5.0",
"stable": "^0.1.8"
},
"bin": {
diff --git a/test/integration/video_upload_no_download_test.rb b/test/integration/video_upload_no_download_test.rb
new file mode 100644
index 0000000000..5be7e8c504
--- /dev/null
+++ b/test/integration/video_upload_no_download_test.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class VideoUploadNoDownloadTest < ActiveSupport::TestCase
+ test "video file metadata is extracted without full file download" do
+ video = create(:folio_file_video)
+
+ # Verify metadata was extracted
+ assert_not_nil video.file_track_duration, "Duration should be extracted"
+ assert_not_nil video.file_width, "Width should be extracted"
+ assert_not_nil video.file_height, "Height should be extracted"
+
+ # Verify file_url_or_path returns correct type
+ result = video.file_url_or_path
+ assert result.is_a?(String)
+
+ # In test env (FileDataStore), should be local path
+ if Dragonfly.app.datastore.is_a?(Dragonfly::FileDataStore)
+ assert_not result.start_with?("http")
+ end
+ end
+end
diff --git a/test/jobs/folio/cra_media_cloud/check_progress_job_test.rb b/test/jobs/folio/cra_media_cloud/check_progress_job_test.rb
index 97e850d722..3f24e8f63b 100644
--- a/test/jobs/folio/cra_media_cloud/check_progress_job_test.rb
+++ b/test/jobs/folio/cra_media_cloud/check_progress_job_test.rb
@@ -16,12 +16,10 @@ class TestVideoFile < Folio::File::Video
"reference_id" => "REF123"
))
- # Job with old generation should be skipped - no new CheckProgressJob enqueued
assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do
Folio::CraMediaCloud::CheckProgressJob.perform_now(video, encoding_generation: 11111)
end
- # Video state should be unchanged
video.reload
assert_equal "full_media_processing", video.remote_services_data["processing_state"]
end
@@ -40,7 +38,6 @@ class TestVideoFile < Folio::File::Video
api_mock = Minitest::Mock.new
api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123")
- # Job with matching generation should process and reschedule
assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
expect_method_called_on(
object: Folio::CraMediaCloud::Api,
@@ -66,7 +63,6 @@ class TestVideoFile < Folio::File::Video
api_mock = Minitest::Mock.new
api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123")
- # Job without generation (old jobs) should still process
assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
expect_method_called_on(
object: Folio::CraMediaCloud::Api,
@@ -80,6 +76,260 @@ class TestVideoFile < Folio::File::Video
api_mock.verify
end
+ # --- Multi-phase tests ---
+
+ test "phase 1 DONE does not trigger processing_done when processing_phases is 2" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "processing_phases" => 2,
+ "remote_id" => "JOB_PHASE1",
+ "reference_id" => "REF123"
+ ))
+
+ phase1_job = {
+ "id" => "JOB_PHASE1",
+ "status" => "DONE",
+ "phase" => 1,
+ "processingPhases" => 2,
+ "progress" => 1.0,
+ "lastModified" => Time.current.iso8601,
+ "output" => [
+ { "type" => "MP4", "profiles" => ["sd0"], "path" => "/video/sd0.mp4" },
+ { "type" => "MP4", "profiles" => ["sd1"], "path" => "/video/sd1.mp4" },
+ { "type" => "MP4", "profiles" => ["sd2"], "path" => "/video/sd2.mp4" },
+ { "type" => "HLS", "profiles" => ["sd0", "sd1", "sd2"], "path" => "/video/sd_master.m3u8" },
+ { "type" => "DASH", "profiles" => ["sd0", "sd1", "sd2"], "path" => "/video/sd_manifest.mpd" },
+ { "type" => "THUMBNAILS", "profiles" => ["cover"], "path" => "/video/cover.jpg" },
+ { "type" => "THUMBNAILS", "profiles" => ["thumb"], "path" => "/video/thumb.vtt" },
+ ]
+ }
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_job, phase1_job, ["JOB_PHASE1"])
+
+ # Should reschedule (phase 1 done, waiting for phase 2)
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+ end
+
+ api_mock.verify
+ video.reload
+
+ # AASM should stay in processing (not ready)
+ assert_equal "processing", video.aasm_state
+ assert_equal "full_media_processing", video.remote_services_data["processing_state"]
+
+ # Intermediate phase data should be saved
+ assert_equal({ "sd0" => "/video/sd0.mp4", "sd1" => "/video/sd1.mp4", "sd2" => "/video/sd2.mp4" },
+ video.remote_services_data["phase_1_content_mp4_paths"])
+ assert_equal "JOB_PHASE1", video.remote_services_data["phase_1_remote_id"]
+ assert video.remote_services_data["phase_1_completed_at"].present?
+
+ # Manifest/cover/thumbnails paths populated for immediate playability
+ assert_equal "/video/sd_master.m3u8", video.remote_services_data["manifest_hls_path"]
+ assert_equal "/video/sd_manifest.mpd", video.remote_services_data["manifest_dash_path"]
+ assert_equal "/video/cover.jpg", video.remote_services_data["cover_path"]
+ assert_equal "/video/thumb.vtt", video.remote_services_data["thumbnails_path"]
+ end
+
+ test "phase 2 DONE triggers processing_done" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "processing_phases" => 2,
+ "reference_id" => "REF123",
+ "phase_1_content_mp4_paths" => { "sd0" => "/video/sd0.mp4", "sd1" => "/video/sd1.mp4" },
+ "phase_1_completed_at" => 1.minute.ago.iso8601,
+ "phase_1_remote_id" => "JOB_PHASE1",
+ ))
+
+ full_output = [
+ { "type" => "MP4", "profiles" => ["sd0"], "path" => "/video/sd0.mp4" },
+ { "type" => "MP4", "profiles" => ["sd1"], "path" => "/video/sd1.mp4" },
+ { "type" => "MP4", "profiles" => ["hd1"], "path" => "/video/hd1.mp4" },
+ { "type" => "MP4", "profiles" => ["hd2"], "path" => "/video/hd2.mp4" },
+ { "type" => "HLS", "profiles" => ["sd0", "sd1", "hd1", "hd2"], "path" => "/video/master.m3u8" },
+ { "type" => "DASH", "profiles" => ["sd0", "sd1", "hd1", "hd2"], "path" => "/video/manifest.mpd" },
+ { "type" => "THUMBNAILS", "profiles" => ["cover"], "path" => "/video/cover.jpg" },
+ { "type" => "THUMBNAILS", "profiles" => ["thumb"], "path" => "/video/thumb.jpg" },
+ ]
+
+ phase1_job = {
+ "id" => "JOB_PHASE1", "status" => "DONE", "phase" => 1,
+ "processingPhases" => 2, "progress" => 1.0,
+ "lastModified" => 2.minutes.ago.iso8601,
+ "output" => full_output.select { |o| o["profiles"].first&.start_with?("sd") || o["type"] != "MP4" }
+ }
+
+ phase2_job = {
+ "id" => "JOB_PHASE2", "status" => "DONE", "phase" => 2,
+ "processingPhases" => 2, "prevPhaseJobId" => "JOB_PHASE1",
+ "progress" => 1.0, "lastModified" => Time.current.iso8601,
+ "output" => full_output
+ }
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, [phase1_job, phase2_job], [], ref_id: "REF123")
+
+ # Should NOT reschedule — processing is complete
+ assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+ end
+
+ api_mock.verify
+ video.reload
+
+ assert_equal "ready", video.aasm_state
+ assert_equal({ "sd0" => "/video/sd0.mp4", "sd1" => "/video/sd1.mp4",
+ "hd1" => "/video/hd1.mp4", "hd2" => "/video/hd2.mp4" },
+ video.remote_services_data["content_mp4_paths"])
+ assert_equal "/video/master.m3u8", video.remote_services_data["manifest_hls_path"]
+ assert_equal "/video/manifest.mpd", video.remote_services_data["manifest_dash_path"]
+ end
+
+ test "phase 2 PROCESSING continues polling with mapped progress" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "processing_phases" => 2,
+ "reference_id" => "REF123",
+ "phase_1_content_mp4_paths" => { "sd0" => "/video/sd0.mp4" },
+ ))
+
+ phase1_job = {
+ "id" => "JOB_PHASE1", "status" => "DONE", "phase" => 1,
+ "processingPhases" => 2, "progress" => 1.0,
+ "lastModified" => 2.minutes.ago.iso8601,
+ "output" => []
+ }
+
+ phase2_job = {
+ "id" => "JOB_PHASE2", "status" => "PROCESSING", "phase" => 2,
+ "processingPhases" => 2, "progress" => 0.6,
+ "lastModified" => Time.current.iso8601,
+ }
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, [phase1_job, phase2_job], [], ref_id: "REF123")
+
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+ end
+
+ api_mock.verify
+ video.reload
+
+ # Raw CRA progress for current phase: 0.6 * 100 = 60
+ assert_equal 60, video.remote_services_data["progress_percentage"]
+ assert_equal "processing", video.aasm_state
+ end
+
+ test "single-phase job backward compat — DONE triggers ready" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "reference_id" => "REF123"
+ ))
+ # No processing_phases key at all
+
+ full_output = [
+ { "type" => "MP4", "profiles" => ["sd0"], "path" => "/video/sd0.mp4" },
+ { "type" => "MP4", "profiles" => ["hd1"], "path" => "/video/hd1.mp4" },
+ { "type" => "HLS", "profiles" => ["sd0", "hd1"], "path" => "/video/master.m3u8" },
+ { "type" => "DASH", "profiles" => ["sd0", "hd1"], "path" => "/video/manifest.mpd" },
+ { "type" => "THUMBNAILS", "profiles" => ["cover"], "path" => "/video/cover.jpg" },
+ { "type" => "THUMBNAILS", "profiles" => ["thumb"], "path" => "/video/thumb.jpg" },
+ ]
+
+ api_response = {
+ "id" => "JOB123", "status" => "DONE", "progress" => 1.0,
+ "lastModified" => Time.current.iso8601,
+ "output" => full_output
+ }
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123")
+
+ assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+ end
+
+ api_mock.verify
+ video.reload
+
+ assert_equal "ready", video.aasm_state
+ assert_equal({ "sd0" => "/video/sd0.mp4", "hd1" => "/video/hd1.mp4" },
+ video.remote_services_data["content_mp4_paths"])
+ assert_equal "/video/master.m3u8", video.remote_services_data["manifest_hls_path"]
+ assert_equal "/video/manifest.mpd", video.remote_services_data["manifest_dash_path"]
+ end
+
+ test "phase 2 FAILED triggers failure" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "processing_phases" => 2,
+ "reference_id" => "REF123",
+ "phase_1_content_mp4_paths" => { "sd0" => "/video/sd0.mp4" },
+ ))
+
+ phase1_job = {
+ "id" => "JOB_PHASE1", "status" => "DONE", "phase" => 1,
+ "processingPhases" => 2, "progress" => 1.0,
+ "lastModified" => 2.minutes.ago.iso8601,
+ "output" => []
+ }
+
+ phase2_job = {
+ "id" => "JOB_PHASE2", "status" => "FAILED", "phase" => 2,
+ "processingPhases" => 2, "progress" => 0.3,
+ "lastModified" => Time.current.iso8601,
+ "messages" => [{ "type" => "ERROR", "message" => "HD encoding failed" }]
+ }
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, [phase1_job, phase2_job], [], ref_id: "REF123")
+
+ # Should NOT reschedule — failure stops polling
+ assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+ end
+
+ api_mock.verify
+ video.reload
+
+ assert_equal "encoding_failed", video.remote_services_data["processing_state"]
+ assert_equal "HD encoding failed", video.remote_services_data["error_message"]
+ end
+
+ # --- Existing encoding generation tests ---
+
test "skips already ready video regardless of encoding_generation" do
video = create_test_video_in_processing_state
video.update_column(:aasm_state, "ready")
@@ -88,24 +338,294 @@ class TestVideoFile < Folio::File::Video
"reference_id" => "REF123"
))
- # Should skip because video is already ready
assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do
Folio::CraMediaCloud::CheckProgressJob.perform_now(video, encoding_generation: 12345)
end
end
+ # --- Progress tracking tests ---
+
+ test "parses encoding messages for progress milestones" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "processing_state" => "full_media_processing",
+ "reference_id" => "REF123"
+ ))
+
+ api_response = {
+ "id" => "JOB123", "status" => "PROCESSING", "progress" => 0.5,
+ "lastModified" => Time.current.iso8601,
+ "outputParams" => { "duration" => 600.0 },
+ "messages" => [
+ { "createdDate" => "2026-02-25T10:00:00Z", "type" => "INFO", "message" => "validation started at host vodenc1" },
+ { "createdDate" => "2026-02-25T10:00:05Z", "type" => "INFO", "message" => "processing started at host vodenc1" },
+ { "createdDate" => "2026-02-25T10:00:06Z", "type" => "INFO", "message" => "Transcoding worker - video: going to transcode 600.0 seconds for 7 VIDEO profiles" },
+ { "createdDate" => "2026-02-25T10:00:06Z", "type" => "INFO", "message" => "Transcoding worker - audio: going to transcode 600.0 seconds for 2 AUDIO profiles" },
+ { "createdDate" => "2026-02-25T10:02:00Z", "type" => "INFO", "message" => "Transcoding worker - audio: finished" }
+ ]
+ }
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123")
+
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+
+ video.reload
+ assert_equal 600.0, video.remote_services_data["video_duration"]
+ assert_includes video.remote_services_data["phases_completed"], "audio"
+ end
+
+ test "DONE transition sets progress to 100 and state to ready" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "processing_state" => "full_media_processing",
+ "reference_id" => "REF123"
+ ))
+
+ output = [
+ { "type" => "MP4", "profiles" => ["sd1"], "path" => "/test/sd1.mp4" },
+ { "type" => "MP4", "profiles" => ["hd1"], "path" => "/test/hd1.mp4" },
+ { "type" => "HLS", "profiles" => ["sd1", "hd1"], "path" => "/test/master.m3u8" },
+ { "type" => "DASH", "profiles" => ["sd1", "hd1"], "path" => "/test/master.mpd" },
+ { "type" => "THUMBNAILS", "profiles" => ["cover"], "path" => "/test/cover.jpg" },
+ { "type" => "THUMBNAILS", "profiles" => ["thumb"], "path" => "/test/thumb.jpg" }
+ ]
+
+ api_response = {
+ "id" => "JOB123", "status" => "DONE", "progress" => 1.0,
+ "lastModified" => Time.current.iso8601,
+ "output" => output,
+ "outputParams" => { "duration" => 120.0 },
+ "messages" => []
+ }
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123")
+
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+
+ video.reload
+ assert_equal "full_media_processed", video.remote_services_data["processing_state"]
+ assert_equal 100.0, video.remote_services_data["progress_percentage"]
+ assert_equal "ready", video.aasm_state
+ end
+
+ test "FAILED job transitions to processing_failed and schedules retry on first failure" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "processing_state" => "full_media_processing",
+ "reference_id" => "REF123",
+ "progress_percentage" => 45.0
+ ))
+
+ api_response = {
+ "id" => "JOB123", "status" => "FAILED",
+ "lastModified" => Time.current.iso8601,
+ "messages" => [
+ { "type" => "ERROR", "message" => "filesize mismatch" }
+ ]
+ }
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123")
+
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+ end
+
+ video.reload
+ assert_equal "processing_failed", video.aasm_state
+ assert_nil video.remote_services_data["progress_percentage"]
+ assert_equal "filesize mismatch", video.remote_services_data["error_message"]
+ assert_equal 1, video.remote_services_data["retry_count"]
+ assert video.remote_services_data["retry_scheduled_at"].present?
+ end
+
+ test "FAILED job on second failure is final — no retry scheduled" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "processing_state" => "full_media_processing",
+ "reference_id" => "REF123",
+ "retry_count" => 1
+ ))
+
+ api_response = {
+ "id" => "JOB123", "status" => "FAILED",
+ "lastModified" => Time.current.iso8601,
+ "messages" => [
+ { "type" => "ERROR", "message" => "filesize mismatch again" }
+ ]
+ }
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123")
+
+ assert_no_enqueued_jobs only: Folio::CraMediaCloud::CreateMediaJob do
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+ end
+
+ video.reload
+ assert_equal "processing_failed", video.aasm_state
+ assert_equal 2, video.remote_services_data["retry_count"]
+ assert_nil video.remote_services_data["retry_scheduled_at"]
+ end
+
+ # --- Timeout tests ---
+
+ test "processing_timed_out? marks video as failed after 4 hours" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "processing_step_started_at" => 5.hours.ago.iso8601,
+ "reference_id" => "REF123"
+ ))
+
+ assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+
+ video.reload
+ assert_equal "processing_failed", video.aasm_state
+ end
+
+ test "processing_timed_out? does not fire within 4 hours" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "processing_step_started_at" => 3.hours.ago.iso8601,
+ "reference_id" => "REF123"
+ ))
+
+ api_response = { "id" => "JOB123", "status" => "PROCESSING", "progress" => 0.5,
+ "lastModified" => Time.current.iso8601 }
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123")
+
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+ end
+
+ video.reload
+ assert_equal "processing", video.aasm_state
+ end
+
+ # --- Finalize from completed phases test ---
+
+ test "finalizes from stored phase data when all CRA jobs are REMOVED" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "processing_phases" => 2,
+ "reference_id" => "REF123",
+ "phase_1_content_mp4_paths" => { "sd0" => "/video/sd0.mp4", "sd1" => "/video/sd1.mp4" },
+ "phase_1_completed_at" => 5.minutes.ago.iso8601,
+ "phase_1_remote_id" => "JOB_PHASE1",
+ "phase_2_content_mp4_paths" => { "hd1" => "/video/hd1.mp4", "hd2" => "/video/hd2.mp4" },
+ "phase_2_completed_at" => 1.minute.ago.iso8601,
+ "phase_2_remote_id" => "JOB_PHASE2",
+ "manifest_hls_path" => "/video/master.m3u8",
+ "manifest_dash_path" => "/video/manifest.mpd",
+ ))
+
+ removed_jobs = [
+ { "id" => "JOB_PHASE1", "status" => "REMOVED", "phase" => 1, "lastModified" => 2.minutes.ago.iso8601 },
+ { "id" => "JOB_PHASE2", "status" => "REMOVED", "phase" => 2, "lastModified" => 1.minute.ago.iso8601 },
+ ]
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, removed_jobs, [], ref_id: "REF123")
+
+ assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+ end
+
+ api_mock.verify
+ video.reload
+
+ assert_equal "ready", video.aasm_state
+ assert_equal "full_media_processed", video.remote_services_data["processing_state"]
+ assert_equal 100.0, video.remote_services_data["progress_percentage"]
+ expected_mp4 = { "sd0" => "/video/sd0.mp4", "sd1" => "/video/sd1.mp4",
+ "hd1" => "/video/hd1.mp4", "hd2" => "/video/hd2.mp4" }
+ assert_equal expected_mp4, video.remote_services_data["content_mp4_paths"]
+ end
+
+ # --- REMOVED remote_id handling ---
+
+ test "clears remote_id and reschedules when tracked job becomes REMOVED" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "remote_id" => "JOB_GONE",
+ "reference_id" => "REF123"
+ ))
+
+ removed_job = { "id" => "JOB_GONE", "status" => "REMOVED", "lastModified" => Time.current.iso8601 }
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_job, removed_job, ["JOB_GONE"])
+
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
+ expect_method_called_on(
+ object: Folio::CraMediaCloud::Api,
+ method: :new,
+ return_value: api_mock
+ ) do
+ Folio::CraMediaCloud::CheckProgressJob.perform_now(video)
+ end
+ end
+
+ api_mock.verify
+ video.reload
+
+ # remote_id cleared so next poll falls back to reference_id path
+ assert_nil video.remote_services_data["remote_id"]
+ # AASM stays in processing — not marked failed
+ assert_equal "processing", video.aasm_state
+ end
+
private
def create_test_video_in_processing_state
video = TestVideoFile.new(site: get_any_site)
video.file = Folio::Engine.root.join("test/fixtures/folio/blank.mp4")
video.dont_run_after_save_jobs = true
- # Stub create_full_media to prevent the full processing chain during save
expect_method_called_on(object: video, method: :create_full_media) do
video.save!
end
- # Set desired initial state (merge to preserve encoding_generation from process_attached_file)
video.update!(remote_services_data: video.remote_services_data.merge(
"service" => "cra_media_cloud",
"processing_state" => "full_media_processing"
diff --git a/test/jobs/folio/cra_media_cloud/create_media_job_test.rb b/test/jobs/folio/cra_media_cloud/create_media_job_test.rb
index 6a27acbc46..c04205829f 100644
--- a/test/jobs/folio/cra_media_cloud/create_media_job_test.rb
+++ b/test/jobs/folio/cra_media_cloud/create_media_job_test.rb
@@ -14,54 +14,241 @@ class TestVideoFile < Folio::File::Video
"encoding_generation" => generation_value
))
- # Mock S3 metadata for reference_id generation
- s3_metadata_mock = Struct.new(:etag).new('"abc12345def67890"')
+ with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock|
+ encoder_mock.expect(:upload_file, nil, [video], profile_group: nil, processing_phases: 1, reference_id: String)
+ api_mock.expect(:get_jobs, [], [], ref_id: String)
+
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
+ perform_job(video, encoder_mock, api_mock)
+ end
+ end
+
+ video.reload
+ assert_equal generation_value, video.encoding_generation,
+ "encoding_generation should be preserved through CreateMediaJob"
+ end
+
+ test "submits single manifest and sets full_media_processing state" do
+ video = create_test_video_in_processing_state
+
+ with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock|
+ encoder_mock.expect(:upload_file, nil, [video], profile_group: nil, processing_phases: 1, reference_id: String)
+ api_mock.expect(:get_jobs, [], [], ref_id: String)
+
+ perform_job(video, encoder_mock, api_mock)
+ encoder_mock.verify
+ end
+
+ video.reload
+ assert_equal "full_media_processing", video.remote_services_data["processing_state"]
+ end
+
+ test "passes processing_phases to encoder when video defines it" do
+ video = create_test_video_in_processing_state
+ video.define_singleton_method(:encoder_processing_phases) { 2 }
+
+ with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock|
+ encoder_mock.expect(:upload_file, nil, [video],
+ profile_group: nil, processing_phases: 2, reference_id: String)
+ api_mock.expect(:get_jobs, [], [], ref_id: String)
+
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
+ perform_job(video, encoder_mock, api_mock)
+ end
+
+ encoder_mock.verify
+ end
+
+ video.reload
+ assert_equal 2, video.remote_services_data["processing_phases"]
+ end
+
+ test "check_existing_job: DONE returns :done" do
+ video = create_test_video_in_processing_state
+
+ mock_api = Minitest::Mock.new
+ mock_api.expect(:get_jobs, [
+ { "id" => 1, "refId" => "test-abc123", "status" => "DONE",
+ "profileGroup" => "VoD", "lastModified" => "2026-01-01T00:00:00Z",
+ "messages" => [], "output" => [] },
+ ], [], ref_id: "test-abc123")
+
+ job_instance = Folio::CraMediaCloud::CreateMediaJob.new
+
+ Folio::CraMediaCloud::Api.stub(:new, mock_api) do
+ result = job_instance.send(:check_existing_job, "test-abc123", video)
+ assert_equal :done, result[:status]
+ end
+ end
+
+ test "check_existing_job: picks latest job when multiple exist" do
+ video = create_test_video_in_processing_state
+
+ mock_api = Minitest::Mock.new
+ mock_api.expect(:get_jobs, [
+ { "id" => 1, "refId" => "test-abc123", "status" => "FAILED",
+ "profileGroup" => "VoD", "lastModified" => "2026-01-01T00:00:00Z",
+ "messages" => [], "output" => [] },
+ { "id" => 2, "refId" => "test-abc123", "status" => "DONE",
+ "profileGroup" => "VoD", "lastModified" => "2026-01-02T00:00:00Z",
+ "messages" => [], "output" => [] },
+ ], [], ref_id: "test-abc123")
+
+ job_instance = Folio::CraMediaCloud::CreateMediaJob.new
+
+ Folio::CraMediaCloud::Api.stub(:new, mock_api) do
+ result = job_instance.send(:check_existing_job, "test-abc123", video)
+ assert_equal :done, result[:status]
+ assert_equal 2, result[:job]["id"]
+ end
+ end
+
+ test "check_existing_job: empty jobs returns :not_found" do
+ video = create_test_video_in_processing_state
+
+ mock_api = Minitest::Mock.new
+ mock_api.expect(:get_jobs, [], [], ref_id: "test-abc123")
+
+ job_instance = Folio::CraMediaCloud::CreateMediaJob.new
+
+ Folio::CraMediaCloud::Api.stub(:new, mock_api) do
+ result = job_instance.send(:check_existing_job, "test-abc123", video)
+ assert_equal :not_found, result[:status]
+ end
+ end
+
+ test "raises when encoding_generation is nil to prevent stale CRA job matching" do
+ video = create_test_video_in_processing_state
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "encoding_generation" => nil
+ ))
+
+ with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock|
+ error = assert_raises(RuntimeError) do
+ perform_job(video, encoder_mock, api_mock)
+ end
+ assert_match(/encoding_generation not set/, error.message)
+ end
+ end
+
+ test "reference_id includes video ID to prevent cross-contamination between records" do
+ video = create_test_video_in_processing_state
+
+ with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock|
+ captured_reference_id = nil
+ encoder_mock.expect(:upload_file, nil) do |_file, **kwargs|
+ captured_reference_id = kwargs[:reference_id]
+ true
+ end
+ api_mock.expect(:get_jobs, []) do |**_kwargs|
+ true
+ end
+
+ perform_job(video, encoder_mock, api_mock)
+
+ assert_not_nil captured_reference_id
+ assert_includes captured_reference_id, "-#{video.id}-",
+ "reference_id must contain video ID for per-record uniqueness"
+ end
+ end
+
+ test "marks video as permanently failed when S3 source file is missing" do
+ video = create_test_video_in_processing_state
+
+ # Mock S3 datastore to raise NotFound (simulates missing source file)
s3_datastore_mock = Minitest::Mock.new
storage_mock = Minitest::Mock.new
- storage_mock.expect(:head_object, s3_metadata_mock, [String, String])
+ storage_mock.expect(:head_object, nil) do |*_args|
+ raise Excon::Error::NotFound.new("Expected(200) <=> Actual(404 Not Found)")
+ end
s3_datastore_mock.expect(:root_path, "uploads")
s3_datastore_mock.expect(:storage, storage_mock)
- # Mock encoder
- encoder_mock = Minitest::Mock.new
- encoder_mock.expect(:upload_file, nil, [video], profile_group: nil, reference_id: String)
-
- # Mock API for existing job check
- api_mock = Minitest::Mock.new
- api_mock.expect(:get_jobs, [], [], ref_id: String)
-
- assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
+ ENV["S3_BUCKET_NAME"] = "test-bucket"
+ begin
Dragonfly.app.stub(:datastore, s3_datastore_mock) do
- Folio::CraMediaCloud::Encoder.stub(:new, encoder_mock) do
- Folio::CraMediaCloud::Api.stub(:new, api_mock) do
- Folio::CraMediaCloud::CreateMediaJob.perform_now(video)
- end
+ assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do
+ Folio::CraMediaCloud::CreateMediaJob.perform_now(video)
end
end
+ ensure
+ ENV.delete("S3_BUCKET_NAME")
end
- # Verify encoding_generation is preserved in remote_services_data
video.reload
- assert_equal generation_value, video.encoding_generation,
- "encoding_generation should be preserved through CreateMediaJob"
+ assert_equal "processing_failed", video.aasm_state
+ assert_equal "source_file_missing", video.remote_services_data["processing_state"]
+ assert_includes video.remote_services_data["error_message"], "Source file not found"
+ end
+
+ test "retries from processing_failed state via retry_processing!" do
+ video = create_test_video_in_processing_state
+ video.update_column(:aasm_state, "processing_failed")
+ video.update!(remote_services_data: video.remote_services_data.merge(
+ "retry_count" => 1,
+ "retry_scheduled_at" => Time.current.iso8601
+ ))
+
+ with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock|
+ encoder_mock.expect(:upload_file, nil, [video], profile_group: nil, processing_phases: 1, reference_id: String)
+ api_mock.expect(:get_jobs, [], [], ref_id: String)
+
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
+ perform_job(video, encoder_mock, api_mock)
+ end
+ end
+
+ video.reload
+ assert_equal "processing", video.aasm_state
+ assert_equal "full_media_processing", video.remote_services_data["processing_state"]
end
private
- def create_test_video_in_processing_state
- video = TestVideoFile.new(site: get_any_site)
+ def create_test_video_in_processing_state(klass: TestVideoFile)
+ video = klass.new(site: get_any_site)
video.file = Folio::Engine.root.join("test/fixtures/folio/blank.mp4")
video.dont_run_after_save_jobs = true
- # Stub create_full_media to prevent the full processing chain during save
expect_method_called_on(object: video, method: :create_full_media) do
video.save!
end
- # Set desired initial state (merge to preserve encoding_generation from process_attached_file)
video.update!(remote_services_data: video.remote_services_data.merge(
"service" => "cra_media_cloud",
- "processing_state" => "full_media_processing"
+ "processing_state" => "full_media_processing",
+ "encoding_generation" => Time.current.to_i
))
video
end
+
+ def with_mocked_s3_and_encoder(video)
+ s3_metadata_mock = Struct.new(:etag).new('"abc12345def67890"')
+ s3_datastore_mock = Minitest::Mock.new
+ storage_mock = Minitest::Mock.new
+
+ # head_object is called with bucket_name (ENV) and key — allow any args
+ storage_mock.expect(:head_object, s3_metadata_mock) do |*_args|
+ true
+ end
+ s3_datastore_mock.expect(:root_path, "uploads")
+ s3_datastore_mock.expect(:storage, storage_mock)
+
+ encoder_mock = Minitest::Mock.new
+ api_mock = Minitest::Mock.new
+
+ ENV["S3_BUCKET_NAME"] = "test-bucket"
+ Dragonfly.app.stub(:datastore, s3_datastore_mock) do
+ yield encoder_mock, api_mock
+ end
+ ensure
+ ENV.delete("S3_BUCKET_NAME")
+ end
+
+ def perform_job(video, encoder_mock, api_mock)
+ Folio::CraMediaCloud::Encoder.stub(:new, encoder_mock) do
+ Folio::CraMediaCloud::Api.stub(:new, api_mock) do
+ Folio::CraMediaCloud::CreateMediaJob.perform_now(video)
+ end
+ end
+ end
end
diff --git a/test/jobs/folio/cra_media_cloud/encoder_test.rb b/test/jobs/folio/cra_media_cloud/encoder_test.rb
new file mode 100644
index 0000000000..7220e32fa9
--- /dev/null
+++ b/test/jobs/folio/cra_media_cloud/encoder_test.rb
@@ -0,0 +1,196 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class Folio::CraMediaCloud::EncoderTest < ActiveSupport::TestCase
+ test "build_ingest_manifest uses src attribute with presigned URL" do
+ encoder = Folio::CraMediaCloud::Encoder.new
+
+ file_mock = Struct.new(:file_name, :file_size, :file_uid, :id).new(
+ "video.mp4", 123456, "uploads/video.mp4", 1
+ )
+
+ presigned_url = "https://s3.amazonaws.com/bucket/uploads/video.mp4?X-Amz-Credential=xxx&X-Amz-Expires=604800"
+
+ manifest_xml = encoder.send(
+ :build_ingest_manifest,
+ file_mock,
+ md5: "abc123def456",
+ ref_id: "test-ref-001",
+ profile_group: "VoDSD",
+ presigned_url: presigned_url
+ )
+
+ assert_includes manifest_xml, 'src="https://s3.amazonaws.com/bucket/uploads/video.mp4'
+ assert_not_includes manifest_xml, "file="
+ assert_includes manifest_xml, 'size="123456"'
+ assert_includes manifest_xml, 'md5="abc123def456"'
+ assert_includes manifest_xml, "VoDSD"
+ assert_includes manifest_xml, "test-ref-001"
+ end
+
+ test "build_ingest_manifest falls back to file attribute when no presigned URL" do
+ encoder = Folio::CraMediaCloud::Encoder.new
+
+ file_mock = Struct.new(:file_name, :file_size, :file_uid, :id).new(
+ "video.mp4", 123456, "uploads/video.mp4", 1
+ )
+
+ manifest_xml = encoder.send(
+ :build_ingest_manifest,
+ file_mock,
+ md5: "abc123def456",
+ ref_id: "test-ref-001",
+ profile_group: "VoD",
+ presigned_url: nil
+ )
+
+ assert_includes manifest_xml, 'file="video.mp4"'
+ assert_not_includes manifest_xml, "src="
+ end
+
+ # --- upload_file ---
+
+ test "upload_file builds manifest and uploads it via SFTP, returns result hash" do
+ encoder = Folio::CraMediaCloud::Encoder.new
+
+ file_mock = Struct.new(:file_name, :file_size, :file_uid, :id, :slug).new(
+ "video.mp4", 123456, "uploads/video.mp4", 42, "my-video"
+ )
+
+ s3_metadata_mock = Struct.new(:headers).new({ "ETag" => '"abcd1234"' })
+ fake_presigned_url = "https://s3.amazonaws.com/bucket/video.mp4?X-Amz-Expires=604800"
+
+ uploaded_path = nil
+ uploaded_xml = nil
+ fake_sftp = Object.new
+ fake_sftp.define_singleton_method(:upload!) do |source, dest|
+ uploaded_path = dest
+ uploaded_xml = source.read
+ end
+
+ encoder.define_singleton_method(:with_robust_sftp_session) { |&blk| blk.call(fake_sftp) }
+
+ result = encoder.stub(:s3_dragonfly_head_object, s3_metadata_mock) do
+ encoder.stub(:generate_presigned_url, fake_presigned_url) do
+ encoder.upload_file(file_mock, reference_id: "test-ref-001")
+ end
+ end
+
+ assert_equal "test-ref-001", result[:ref_id]
+ assert_equal "/ingest/regular/test-ref-001_manifest.xml", result[:xml_manifest_path]
+ assert result[:presigned_url], "presigned_url flag should be truthy"
+ assert_equal "/ingest/regular/test-ref-001_manifest.xml", uploaded_path
+ assert_includes uploaded_xml, "test-ref-001"
+ assert_includes uploaded_xml, fake_presigned_url
+ end
+
+ test "upload_file uses provided reference_id in SFTP path" do
+ encoder = Folio::CraMediaCloud::Encoder.new
+
+ file_mock = Struct.new(:file_name, :file_size, :file_uid, :id, :slug).new(
+ "video.mp4", 100, "uploads/video.mp4", 1, "slug"
+ )
+
+ s3_metadata_mock = Struct.new(:headers).new({ "ETag" => '"ff00ff00"' })
+ uploaded_path = nil
+ fake_sftp = Object.new
+ fake_sftp.define_singleton_method(:upload!) { |_src, dest| uploaded_path = dest }
+
+ encoder.define_singleton_method(:with_robust_sftp_session) { |&blk| blk.call(fake_sftp) }
+
+ encoder.stub(:s3_dragonfly_head_object, s3_metadata_mock) do
+ encoder.stub(:generate_presigned_url, "https://s3.example.com/v.mp4") do
+ encoder.upload_file(file_mock, reference_id: "custom-ref-xyz")
+ end
+ end
+
+ assert_equal "/ingest/regular/custom-ref-xyz_manifest.xml", uploaded_path
+ end
+
+ # --- upload_with_retry ---
+
+ test "upload_with_retry raises immediately when max_retries is 0" do
+ encoder = Folio::CraMediaCloud::Encoder.new
+
+ failing_sftp = Object.new
+ failing_sftp.define_singleton_method(:upload!) { |_, _| raise "network error" }
+
+ err = assert_raises(RuntimeError) do
+ encoder.send(:upload_with_retry, failing_sftp, StringIO.new("data"), "/dest/manifest.xml", max_retries: 0)
+ end
+ assert_match "network error", err.message
+ end
+
+ test "upload_with_retry retries on transient failure and succeeds on next attempt" do
+ encoder = Folio::CraMediaCloud::Encoder.new
+ encoder.define_singleton_method(:sleep) { |_| } # no-op to avoid real sleep in tests
+
+ attempts = 0
+ flaky_sftp = Object.new
+ flaky_sftp.define_singleton_method(:upload!) do |_src, _dest|
+ attempts += 1
+ raise "transient error" if attempts < 2
+ end
+
+ encoder.send(:upload_with_retry, flaky_sftp, StringIO.new("data"), "/dest/manifest.xml", max_retries: 1)
+
+ assert_equal 2, attempts
+ end
+
+ test "upload_with_retry raises after all retries exhausted" do
+ encoder = Folio::CraMediaCloud::Encoder.new
+ encoder.define_singleton_method(:sleep) { |_| }
+
+ attempts = 0
+ always_fail_sftp = Object.new
+ always_fail_sftp.define_singleton_method(:upload!) do |_, _|
+ attempts += 1
+ raise "persistent error"
+ end
+
+ error = assert_raises(RuntimeError) do
+ encoder.send(:upload_with_retry, always_fail_sftp, StringIO.new("data"), "/dest/manifest.xml", max_retries: 2)
+ end
+ assert_match(/persistent error/, error.message)
+
+ assert_equal 3, attempts # 1 initial + 2 retries
+ end
+
+ # --- with_robust_sftp_session ---
+
+ test "with_robust_sftp_session wraps SSH authentication failure" do
+ encoder = Folio::CraMediaCloud::Encoder.new
+
+ # ENV vars must be present so Net::SSH.start is actually reached (before the stub fires)
+ ENV["CRA_MEDIA_CLOUD_SFTP_HOST"] = "sftp.example.com"
+ ENV["CRA_MEDIA_CLOUD_SFTP_USERNAME"] = "user"
+ ENV["CRA_MEDIA_CLOUD_SFTP_PASSWORD"] = "pass"
+
+ Net::SSH.stub(:start, ->(*_args, **_kwargs) { raise Net::SSH::AuthenticationFailed, "bad credentials" }) do
+ err = assert_raises(RuntimeError) do
+ encoder.send(:with_robust_sftp_session) { |_sftp| }
+ end
+ assert_match "SSH authentication failed", err.message
+ end
+ ensure
+ %w[CRA_MEDIA_CLOUD_SFTP_HOST CRA_MEDIA_CLOUD_SFTP_USERNAME CRA_MEDIA_CLOUD_SFTP_PASSWORD].each { |k| ENV.delete(k) }
+ end
+
+ test "with_robust_sftp_session wraps generic SFTP errors" do
+ encoder = Folio::CraMediaCloud::Encoder.new
+
+ ENV["CRA_MEDIA_CLOUD_SFTP_HOST"] = "sftp.example.com"
+ ENV["CRA_MEDIA_CLOUD_SFTP_USERNAME"] = "user"
+ ENV["CRA_MEDIA_CLOUD_SFTP_PASSWORD"] = "pass"
+
+ Net::SSH.stub(:start, ->(*_args, **_kwargs) { raise "connection refused" }) do
+ err = assert_raises(RuntimeError) do
+ encoder.send(:with_robust_sftp_session) { |_sftp| }
+ end
+ assert_match "SFTP session error", err.message
+ end
+ ensure
+ %w[CRA_MEDIA_CLOUD_SFTP_HOST CRA_MEDIA_CLOUD_SFTP_USERNAME CRA_MEDIA_CLOUD_SFTP_PASSWORD].each { |k| ENV.delete(k) }
+ end
+end
diff --git a/test/jobs/folio/cra_media_cloud/monitor_processing_job_test.rb b/test/jobs/folio/cra_media_cloud/monitor_processing_job_test.rb
index 0fbcd1225a..79ad591e7c 100644
--- a/test/jobs/folio/cra_media_cloud/monitor_processing_job_test.rb
+++ b/test/jobs/folio/cra_media_cloud/monitor_processing_job_test.rb
@@ -78,6 +78,125 @@ def eval(*); end # no-op for lock release
assert_equal "processing_failed", video.aasm_state
end
+ test "rescues failed video awaiting retry when retry job is lost" do
+ video = create(:folio_file_video)
+ video.update!(
+ aasm_state: :processing_failed,
+ remote_services_data: {
+ "service" => "cra_media_cloud",
+ "retry_count" => 1,
+ "retry_scheduled_at" => 10.minutes.ago.iso8601,
+ }
+ )
+
+ with_unlocked_monitor_job do
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do
+ Folio::CraMediaCloud::MonitorProcessingJob.perform_now
+ end
+ end
+ end
+
+ test "does not rescue finally failed video (retry_count >= 2)" do
+ video = create(:folio_file_video)
+ video.update!(
+ aasm_state: :processing_failed,
+ remote_services_data: {
+ "service" => "cra_media_cloud",
+ "retry_count" => 2,
+ }
+ )
+
+ with_unlocked_monitor_job do
+ assert_no_enqueued_jobs only: Folio::CraMediaCloud::CreateMediaJob do
+ Folio::CraMediaCloud::MonitorProcessingJob.perform_now
+ end
+ end
+ end
+
+ test "triggers process! for stuck unprocessed video with file_uid" do
+ video = create(:folio_file_video)
+ video.update_columns(
+ aasm_state: "unprocessed",
+ file_uid: "2026/03/09/13/20/26/test-uuid/test.mp4",
+ created_at: 10.minutes.ago
+ )
+
+ with_unlocked_monitor_job do
+ Folio::CraMediaCloud::MonitorProcessingJob.perform_now
+ end
+
+ video.reload
+ assert_not_equal "unprocessed", video.aasm_state, "Video should no longer be unprocessed after safety net"
+ end
+
+ test "does not trigger process! for recently created unprocessed video" do
+ video = create(:folio_file_video)
+ video.update_columns(
+ aasm_state: "unprocessed",
+ file_uid: "2026/03/09/13/20/26/test-uuid/test.mp4",
+ created_at: 2.minutes.ago
+ )
+
+ with_unlocked_monitor_job do
+ Folio::CraMediaCloud::MonitorProcessingJob.perform_now
+ end
+
+ video.reload
+ assert_equal "unprocessed", video.aasm_state
+ end
+
+ test "does not trigger process! for unprocessed video without file_uid" do
+ video = create(:folio_file_video)
+ video.update_columns(
+ aasm_state: "unprocessed",
+ file_uid: nil,
+ created_at: 10.minutes.ago
+ )
+
+ with_unlocked_monitor_job do
+ Folio::CraMediaCloud::MonitorProcessingJob.perform_now
+ end
+
+ video.reload
+ assert_equal "unprocessed", video.aasm_state
+ end
+
+ test "rescues video stuck in enqueued state for over 10 minutes" do
+ video = create(:folio_file_video)
+ video.update!(
+ aasm_state: :processing,
+ remote_services_data: {
+ "service" => "cra_media_cloud",
+ "processing_state" => "enqueued",
+ "processing_step_started_at" => 15.minutes.ago.iso8601
+ }
+ )
+
+ with_unlocked_monitor_job do
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do
+ Folio::CraMediaCloud::MonitorProcessingJob.perform_now
+ end
+ end
+ end
+
+ test "does not rescue freshly enqueued video" do
+ video = create(:folio_file_video)
+ video.update!(
+ aasm_state: :processing,
+ remote_services_data: {
+ "service" => "cra_media_cloud",
+ "processing_state" => "enqueued",
+ "processing_step_started_at" => 3.minutes.ago.iso8601
+ }
+ )
+
+ with_unlocked_monitor_job do
+ assert_no_enqueued_jobs only: Folio::CraMediaCloud::CreateMediaJob do
+ Folio::CraMediaCloud::MonitorProcessingJob.perform_now
+ end
+ end
+ end
+
test "upload_is_stuck? returns false for small file within timeout" do
video = create(:folio_file_video, file_size: 10.megabytes)
upload_started_at = 2.minutes.ago
@@ -152,4 +271,149 @@ def eval(*); end # no-op for lock release
# Should use base timeout of 5 minutes
assert_equal false, result
end
+
+ # --- handle_failed_uploads_needing_retry ---
+
+ test "schedules CreateMediaJob for upload_failed video older than 5 minutes" do
+ video = create(:folio_file_video)
+ video.update!(
+ aasm_state: :processing,
+ remote_services_data: {
+ "service" => "cra_media_cloud",
+ "processing_state" => "upload_failed",
+ "processing_step_started_at" => 10.minutes.ago.iso8601
+ }
+ )
+
+ job = Folio::CraMediaCloud::MonitorProcessingJob.new
+
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do
+ job.send(:handle_failed_uploads_needing_retry)
+ end
+ end
+
+ test "schedules CreateMediaJob for encoding_failed video older than 5 minutes" do
+ video = create(:folio_file_video)
+ video.update!(
+ aasm_state: :processing,
+ remote_services_data: {
+ "service" => "cra_media_cloud",
+ "processing_state" => "encoding_failed",
+ "processing_step_started_at" => 10.minutes.ago.iso8601
+ }
+ )
+
+ job = Folio::CraMediaCloud::MonitorProcessingJob.new
+
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do
+ job.send(:handle_failed_uploads_needing_retry)
+ end
+ end
+
+ test "does not retry upload_failed video within 5 minutes" do
+ video = create(:folio_file_video)
+ video.update!(
+ aasm_state: :processing,
+ remote_services_data: {
+ "service" => "cra_media_cloud",
+ "processing_state" => "upload_failed",
+ "processing_step_started_at" => 3.minutes.ago.iso8601
+ }
+ )
+
+ job = Folio::CraMediaCloud::MonitorProcessingJob.new
+
+ assert_no_enqueued_jobs only: Folio::CraMediaCloud::CreateMediaJob do
+ job.send(:handle_failed_uploads_needing_retry)
+ end
+ end
+
+ # --- reconcile_video_state ---
+
+ test "reconcile_video_state schedules CheckProgressJob and updates remote_id when API finds active job" do
+ video = create(:folio_file_video)
+ video.update!(
+ aasm_state: :processing,
+ remote_services_data: {
+ "service" => "cra_media_cloud",
+ "processing_state" => "full_media_processing",
+ "reference_id" => "REF456",
+ "encoding_generation" => 99
+ }
+ )
+
+ active_job = { "id" => "JOB_ACTIVE", "status" => "PROCESSING", "lastModified" => Time.current.iso8601 }
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, [active_job], [], ref_id: "REF456")
+
+ job = Folio::CraMediaCloud::MonitorProcessingJob.new
+
+ Folio::CraMediaCloud::Api.stub(:new, api_mock) do
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
+ job.send(:reconcile_video_state, video)
+ end
+ end
+
+ api_mock.verify
+ video.reload
+ assert_equal "JOB_ACTIVE", video.remote_services_data["remote_id"]
+ end
+
+ test "reconcile_video_state clears reference_id and processing_state when API finds no jobs" do
+ video = create(:folio_file_video)
+ video.update!(
+ aasm_state: :processing,
+ remote_services_data: {
+ "service" => "cra_media_cloud",
+ "processing_state" => "full_media_processing",
+ "reference_id" => "REF456"
+ }
+ )
+
+ api_mock = Minitest::Mock.new
+ api_mock.expect(:get_jobs, [], [], ref_id: "REF456")
+
+ job = Folio::CraMediaCloud::MonitorProcessingJob.new
+
+ Folio::CraMediaCloud::Api.stub(:new, api_mock) do
+ assert_enqueued_jobs 0 do
+ job.send(:reconcile_video_state, video)
+ end
+ end
+
+ api_mock.verify
+ video.reload
+ assert_nil video.remote_services_data["reference_id"]
+ assert_nil video.remote_services_data["processing_state"]
+ end
+
+ # --- reconcile_with_remote_jobs: all-REMOVED path ---
+
+ test "reconcile_with_remote_jobs schedules CheckProgressJob when all CRA jobs are REMOVED" do
+ video = create(:folio_file_video)
+ rs_data = {
+ "service" => "cra_media_cloud",
+ "processing_state" => "full_media_processing",
+ "reference_id" => "REF123",
+ "encoding_generation" => 42,
+ "phase_1_completed_at" => 5.minutes.ago.iso8601,
+ "phase_1_content_mp4_paths" => { "sd0" => "/video/sd0.mp4" },
+ }
+ video.update_column(:remote_services_data, rs_data)
+
+ removed_jobs = [
+ { "id" => "JOB1", "status" => "REMOVED", "phase" => 1, "lastModified" => 2.minutes.ago.iso8601 },
+ { "id" => "JOB2", "status" => "REMOVED", "phase" => 2, "lastModified" => 1.minute.ago.iso8601 },
+ ]
+
+ job = Folio::CraMediaCloud::MonitorProcessingJob.new
+
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do
+ job.send(:reconcile_with_remote_jobs, video, rs_data, removed_jobs)
+ end
+
+ # Should NOT update processing_state — CheckProgressJob handles finalization
+ video.reload
+ assert_equal "full_media_processing", video.remote_services_data["processing_state"]
+ end
end
diff --git a/test/jobs/folio/file/get_video_metadata_job_test.rb b/test/jobs/folio/file/get_video_metadata_job_test.rb
new file mode 100644
index 0000000000..00b67c7e12
--- /dev/null
+++ b/test/jobs/folio/file/get_video_metadata_job_test.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class Folio::File::GetVideoMetadataJobTest < ActiveJob::TestCase
+ test "extracts duration, width, height from local video file" do
+ file_path = Folio::Engine.root.join("test/fixtures/folio/blank.mp4").to_s
+ result = Folio::File::GetVideoMetadataJob.perform_now(file_path)
+
+ assert result.is_a?(Hash)
+ assert result[:duration].is_a?(Integer)
+ assert result[:duration] > 0
+ assert result[:width].is_a?(Integer)
+ assert result[:width] > 0
+ assert result[:height].is_a?(Integer)
+ assert result[:height] > 0
+ end
+
+ test "returns nil values gracefully for invalid path" do
+ result = Folio::File::GetVideoMetadataJob.perform_now("/nonexistent/file.mp4")
+
+ assert result.is_a?(Hash)
+ assert_nil result[:duration]
+ assert_nil result[:width]
+ assert_nil result[:height]
+ end
+end
diff --git a/test/jobs/folio/s3/create_file_job_test.rb b/test/jobs/folio/s3/create_file_job_test.rb
new file mode 100644
index 0000000000..a6b6b580ae
--- /dev/null
+++ b/test/jobs/folio/s3/create_file_job_test.rb
@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class Folio::S3::CreateFileJobTest < ActiveJob::TestCase
+ test "video upload falls back to download flow for local file system" do
+ # In test env with FileDataStore, video upload should use the standard download path
+ # (S3 copy path is only for actual S3 storage)
+ s3_path = "test_video.mp4"
+
+ # Create a temp file simulating S3 uploaded file
+ source_path = "#{Folio::S3::Client::LOCAL_TEST_PATH}/#{s3_path}"
+ FileUtils.mkdir_p(File.dirname(source_path))
+ fixture_path = Folio::Engine.root.join("test/fixtures/folio/blank.mp4").to_s
+ FileUtils.cp(fixture_path, source_path)
+
+ site = get_any_site
+
+ Folio::S3::CreateFileJob.perform_now(
+ s3_path: s3_path,
+ type: "Folio::File::Video",
+ attributes: { site_id: site.id }
+ )
+
+ # File should be created successfully via download path
+ created_video = Folio::File::Video.last
+ assert created_video.present?, "Video should be created"
+ assert created_video.file_uid.present?, "Video should have file_uid"
+ assert created_video.file_name.present?, "Video should have file_name"
+ ensure
+ FileUtils.rm_f(source_path) if source_path
+ end
+
+ test "video upload uses S3 server-side copy when on real S3 storage" do
+ site = get_any_site
+ s3_path = "uploads/test_video.mp4"
+
+ fake_head = Struct.new(:content_length, :content_type).new(5_000_000, "video/mp4")
+ copy_source_key = nil
+
+ job = Folio::S3::CreateFileJob.new
+ job.define_singleton_method(:use_local_file_system?) { false }
+
+ # Stub before_validation :set_video_file_dimensions — it calls file_url_or_path which tries
+ # to fetch the Dragonfly-generated UID from FileDataStore (no actual file exists there).
+ # Provide fake dimensions so file_width/file_height validations pass.
+ Folio::File::Video.define_method(:set_video_file_dimensions) do
+ self.file_width = 1280
+ self.file_height = 720
+ self.file_track_duration = 0
+ end
+
+ job.stub(:test_aware_s3_exists?, true) do
+ job.stub(:s3_copy_object, ->(source_key:, dest_key:) { copy_source_key = source_key }) do
+ job.stub(:s3_head_object, fake_head) do
+ job.stub(:test_aware_s3_delete, nil) do
+ job.perform(s3_path: s3_path, type: "Folio::File::Video", attributes: { site_id: site.id })
+ end
+ end
+ end
+ end
+
+ created_video = Folio::File::Video.last
+ assert created_video.present?, "Video should be created"
+ assert created_video.file_uid.present?, "Video should have a Dragonfly UID"
+ assert_equal "test_video.mp4", created_video.file_name
+ assert_equal 5_000_000, created_video.file_size
+ assert_equal "video/mp4", created_video.file_mime_type
+ assert_includes copy_source_key, s3_path, "s3_copy_object should have been called with the source path"
+ ensure
+ Folio::File::Video.remove_method(:set_video_file_dimensions)
+ end
+end
diff --git a/test/lib/folio/cra_media_cloud/encoder_test.rb b/test/lib/folio/cra_media_cloud/encoder_test.rb
new file mode 100644
index 0000000000..1926c70bbf
--- /dev/null
+++ b/test/lib/folio/cra_media_cloud/encoder_test.rb
@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class Folio::CraMediaCloud::EncoderTest < ActiveSupport::TestCase
+ setup do
+ @encoder = Folio::CraMediaCloud::Encoder.new
+ @file = OpenStruct.new(
+ id: 42,
+ file_name: "test_video.mp4",
+ file_size: "123456",
+ )
+ @defaults = { md5: "abc123", ref_id: "42-1234567890", profile_group: "VoD" }
+ end
+
+ test "build_ingest_manifest includes processingPhases attribute when processing_phases is 2" do
+ xml = @encoder.send(:build_ingest_manifest, @file, **@defaults, processing_phases: 2)
+
+ assert_includes xml, 'processingPhases="2"'
+ assert_includes xml, " id,
+ "refId" => ref_id,
+ "status" => status,
+ "profileGroup" => profile_group,
+ "lastModified" => last_modified,
+ "messages" => [],
+ "output" => [],
+ }
+ end
+
+ test "returns latest job by lastModified" do
+ jobs = [
+ make_job(id: 1, ref_id: "abc-123", status: "FAILED", last_modified: "2026-01-01T00:00:00Z"),
+ make_job(id: 2, ref_id: "abc-123", status: "DONE", last_modified: "2026-01-02T00:00:00Z"),
+ ]
+ result = Folio::CraMediaCloud::JobResolver.resolve(jobs)
+ assert_equal :done, result[:status]
+ assert_equal 2, result[:job]["id"]
+ end
+
+ test "returns :not_found for empty jobs" do
+ result = Folio::CraMediaCloud::JobResolver.resolve([])
+ assert_equal :not_found, result[:status]
+ assert_nil result[:job]
+ end
+
+ test "maps CRA statuses correctly" do
+ { "PROCESSING" => :processing, "CREATED" => :processing,
+ "DONE" => :done, "FAILED" => :failed, "ERROR" => :failed,
+ "REMOVED" => :not_found }.each do |cra_status, expected|
+ jobs = [make_job(id: 1, ref_id: "x", status: cra_status)]
+ result = Folio::CraMediaCloud::JobResolver.resolve(jobs)
+ assert_equal expected, result[:status], "Expected #{expected} for CRA status #{cra_status}"
+ end
+ end
+end
diff --git a/test/models/concerns/folio/media_file_processing_base_test.rb b/test/models/concerns/folio/media_file_processing_base_test.rb
index eeda860996..f15bbe852f 100644
--- a/test/models/concerns/folio/media_file_processing_base_test.rb
+++ b/test/models/concerns/folio/media_file_processing_base_test.rb
@@ -58,4 +58,54 @@ class TestVideoFile < Folio::File::Video
video.reload
assert_equal original_generation, video.encoding_generation
end
+
+ test "encoding_generation is set even when model has validation errors" do
+ video = TestVideoFile.new(site: get_any_site)
+ video.file = Folio::Engine.root.join("test/fixtures/folio/blank.mp4")
+ video.dont_run_after_save_jobs = true
+
+ expect_method_called_on(object: video, method: :create_full_media) do
+ video.save!
+ end
+
+ # Simulate a video that would fail validation (e.g. ffprobe failed, dimensions missing)
+ video.update_columns(file_width: nil, file_height: nil)
+ video.reload
+
+ assert_not video.valid?, "video should be invalid without dimensions"
+
+ # process_attached_file uses update_columns, so it should succeed despite invalid model
+ freeze_time = Time.current
+ travel_to freeze_time do
+ video.send(:update_remote_services_data, {
+ "processing_step_started_at" => Time.current,
+ "encoding_generation" => freeze_time.to_i
+ })
+ end
+
+ video.reload
+ assert_equal freeze_time.to_i, video.encoding_generation
+ end
+
+ test "create_full_media preserves encoding_generation" do
+ video = TestVideoFile.new(site: get_any_site)
+ video.file = Folio::Engine.root.join("test/fixtures/folio/blank.mp4")
+ video.dont_run_after_save_jobs = true
+ video.save!
+
+ # Set encoding_generation like process_attached_file does
+ video.update_columns(aasm_state: "processing")
+ video.send(:update_remote_services_data, {
+ "processing_step_started_at" => Time.current,
+ "encoding_generation" => 12345
+ })
+
+ # create_full_media should merge in service/state without losing encoding_generation
+ video.create_full_media
+
+ video.reload
+ assert_equal 12345, video.encoding_generation
+ assert_equal "cra_media_cloud", video.remote_services_data["service"]
+ assert_equal "enqueued", video.remote_services_data["processing_state"]
+ end
end
diff --git a/test/models/folio/file/cra_media_cloud_file_processing_test.rb b/test/models/folio/file/cra_media_cloud_file_processing_test.rb
new file mode 100644
index 0000000000..88fab6f2d6
--- /dev/null
+++ b/test/models/folio/file/cra_media_cloud_file_processing_test.rb
@@ -0,0 +1,127 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+# Integration tests for AASM state machine + Folio::CraMediaCloud::FileProcessing concern.
+# Verifies that the full state machine works correctly when the CRA concern is included.
+class Folio::File::CraMediaCloudFileProcessingTest < ActiveJob::TestCase
+ class TestVideoFile < Folio::File::Video
+ include Folio::CraMediaCloud::FileProcessing
+ end
+
+ # --- process! triggers CRA encoding ---
+
+ test "process! transitions to processing and enqueues CreateMediaJob" do
+ video = build_saved_video
+ # after_commit :process! fires during build_saved_video's save!, leaving state = "processing".
+ # Reset to unprocessed so we can test a clean process! transition.
+ video.update_column(:aasm_state, "unprocessed")
+
+ video.stub(:regenerate_thumbnails, nil) do
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do
+ video.process!
+ end
+ end
+
+ video.reload
+ assert_equal "processing", video.aasm_state
+ assert_equal "cra_media_cloud", video.remote_services_data["service"]
+ assert_equal "enqueued", video.remote_services_data["processing_state"]
+ assert video.remote_services_data["encoding_generation"].present?,
+ "encoding_generation must be set so CheckProgressJob can detect stale jobs"
+ end
+
+ test "process_attached_file sets a new encoding_generation each time" do
+ video = build_saved_video
+ # Reset state (after_commit :process! fired during save!, leaving state = "processing")
+ video.update_column(:aasm_state, "unprocessed")
+ video.update!(remote_services_data: { "encoding_generation" => 999 })
+
+ video.stub(:regenerate_thumbnails, nil) do
+ video.process!
+ end
+
+ video.reload
+ assert_not_equal 999, video.remote_services_data["encoding_generation"],
+ "encoding_generation should change on re-encode"
+ end
+
+ # --- AASM state transitions ---
+
+ test "processing_done! transitions processing to ready" do
+ video = build_saved_video
+ video.update_column(:aasm_state, "processing")
+
+ video.processing_done!
+
+ assert_equal "ready", video.reload.aasm_state
+ end
+
+ test "processing_failed! transitions processing to processing_failed" do
+ video = build_saved_video
+ video.update_column(:aasm_state, "processing")
+
+ video.processing_failed!
+
+ assert_equal "processing_failed", video.reload.aasm_state
+ end
+
+ test "retry_processing! transitions processing_failed back to processing" do
+ video = build_saved_video
+ video.update_column(:aasm_state, "processing_failed")
+
+ video.retry_processing!
+
+ assert_equal "processing", video.reload.aasm_state
+ end
+
+ # --- destroy_attached_file enqueues DeleteMediaJob ---
+
+ test "destroy_attached_file enqueues DeleteMediaJob when remote_id is present" do
+ video = build_saved_video
+ video.update!(remote_services_data: {
+ "remote_id" => "JOB123",
+ "reference_id" => "REF456"
+ })
+
+ assert_enqueued_jobs 1, only: Folio::CraMediaCloud::DeleteMediaJob do
+ video.destroy_attached_file
+ end
+ end
+
+ test "destroy_attached_file does nothing when no remote_id or reference_id" do
+ video = build_saved_video
+ video.update!(remote_services_data: {})
+
+ assert_no_enqueued_jobs only: Folio::CraMediaCloud::DeleteMediaJob do
+ video.destroy_attached_file
+ end
+ end
+
+ # --- video_poster_url interface ---
+
+ test "video_poster_url returns nil for Folio::File::Video with no provider concern" do
+ plain_video_class = Class.new(Folio::File::Video)
+ assert_nil plain_video_class.new.video_poster_url
+ end
+
+ test "video_poster_url delegates to remote_cover_url in CRA concern" do
+ video = build_saved_video
+ video.stub(:remote_cover_url, "https://cdn.example.com/cover.jpg") do
+ assert_equal "https://cdn.example.com/cover.jpg", video.video_poster_url
+ end
+ end
+
+ private
+ def build_saved_video
+ video = TestVideoFile.new(site: get_any_site)
+ video.file = Folio::Engine.root.join("test/fixtures/folio/blank.mp4")
+ video.dont_run_after_save_jobs = true
+
+ expect_method_called_on(object: video, method: :create_full_media) do
+ video.save!
+ end
+
+ video
+ end
+end
diff --git a/test/models/folio/file_test.rb b/test/models/folio/file_test.rb
index 2ef95e2f36..a687e91397 100644
--- a/test/models/folio/file_test.rb
+++ b/test/models/folio/file_test.rb
@@ -345,6 +345,15 @@ def f_file.process_attached_file # hacking method to check if it is called
end
end
+class Folio::FileUrlOrPathTest < ActiveSupport::TestCase
+ test "file_url_or_path returns local path for FileDataStore" do
+ video = create(:folio_file_video)
+ result = video.file_url_or_path
+ assert result.is_a?(String)
+ assert_not result.start_with?("http"), "Expected local path, got URL: #{result}"
+ end
+end
+
class Folio::FileImageMetadataKeywordsTest < ActiveSupport::TestCase
include ActiveJob::TestHelper