diff --git a/CHANGELOG.md b/CHANGELOG.md index f1ff586c08..e1b1f9c30c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,37 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Changed + +- tiptap float css - allow floating on mobile as well + +## [7.5.1] - 2026-03-19 + +### Fixed + +- **CRA reference_id uniqueness**: Added video ID to reference_id format (`{env}-{slug}-{id}-{s3_etag}-{generation}`) to prevent cross-contamination between videos with identical slugs +- **CRA encoding_generation race condition**: Added reload fallback in CreateMediaJob when encoding_generation is nil due to uncommitted transaction from S3::CreateFileJob +- **CRA MonitorProcessingJob orphan detection**: Added 10-minute threshold to orphan detection for videos with reference_id but no remote_id, preventing false positives on just-uploaded videos + +## [7.5.0] - 2026-03-19 + +### Added + +- **CRA presigned S3 URLs**: Encoder no longer downloads video to local disk or uploads via SFTP. CRA fetches video directly from S3 via presigned URL (7-day expiry). Only the XML manifest is uploaded via SFTP. +- **Two-phase encoding**: When `encoder_processing_phases` > 1, CreateMediaJob submits two manifests with the same `refId` — SD first, then HD. Backward compatible: single-phase when `encoder_processing_phases` is nil/1. +- **Encoding progress tracking**: CheckProgressJob parses CRA `messages` array for per-phase milestones, extracts video duration, and estimates completion time. New processing states: `sd_processing → sd_processed → hd_processing → full_media_processed`. +- **Console encoding info component**: `EncodingInfoComponent` shows current encoding phase and progress percentage on video file detail page, with real-time updates via MessageBus. +- **S3 client and jobs**: `Folio::S3::Client` for presigned URL generation, `Folio::S3::CreateFileJob` for S3-based file creation, `Folio::File::GetVideoMetadataJob` for video metadata extraction. +- **Video thumbnail generation**: `GenerateThumbnailJob` reworked for reliable thumbnail generation from video files. + +### Changed + +- `ShowComponent` now exposes `aasmState` as a Stimulus value and reloads via Turbo on state transitions (encoding progress, file updates) +- `ShowComponent` layout: state badge moved to right side (`ms-auto`), encoding info rendered inline after state + +### Fixed + +- add `try` to `dont_run_after_save_jobs` to enable thumbnail generation for `private_attachments` ## [7.4.1] - 2026-03-11 diff --git a/Gemfile.lock b/Gemfile.lock index d961a9a9a2..10fa11c905 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -10,7 +10,7 @@ GIT PATH remote: . specs: - folio (7.4.1) + folio (7.5.1) aasm activejob-uniqueness (>= 0.3.0) acts-as-taggable-on @@ -89,29 +89,29 @@ GEM specs: aasm (5.5.2) concurrent-ruby (~> 1.0) - actioncable (8.0.4) - actionpack (= 8.0.4) - activesupport (= 8.0.4) + actioncable (8.0.4.1) + actionpack (= 8.0.4.1) + activesupport (= 8.0.4.1) nio4r (~> 2.0) websocket-driver (>= 0.6.1) zeitwerk (~> 2.6) - actionmailbox (8.0.4) - actionpack (= 8.0.4) - activejob (= 8.0.4) - activerecord (= 8.0.4) - activestorage (= 8.0.4) - activesupport (= 8.0.4) + actionmailbox (8.0.4.1) + actionpack (= 8.0.4.1) + activejob (= 8.0.4.1) + activerecord (= 8.0.4.1) + activestorage (= 8.0.4.1) + activesupport (= 8.0.4.1) mail (>= 2.8.0) - actionmailer (8.0.4) - actionpack (= 8.0.4) - actionview (= 8.0.4) - activejob (= 8.0.4) - activesupport (= 8.0.4) + actionmailer (8.0.4.1) + actionpack (= 8.0.4.1) + actionview (= 8.0.4.1) + activejob (= 8.0.4.1) + activesupport (= 8.0.4.1) mail (>= 2.8.0) rails-dom-testing (~> 2.2) - actionpack (8.0.4) - actionview (= 8.0.4) - activesupport (= 8.0.4) + actionpack (8.0.4.1) + actionview (= 8.0.4.1) + activesupport (= 8.0.4.1) nokogiri (>= 1.8.5) rack (>= 2.2.4) rack-session (>= 1.0.1) @@ -119,38 +119,38 @@ GEM rails-dom-testing (~> 2.2) rails-html-sanitizer (~> 1.6) useragent (~> 0.16) - actiontext (8.0.4) - actionpack (= 8.0.4) - activerecord (= 8.0.4) - activestorage (= 8.0.4) - activesupport (= 8.0.4) + actiontext (8.0.4.1) + actionpack (= 8.0.4.1) + activerecord (= 8.0.4.1) + activestorage (= 8.0.4.1) + activesupport (= 8.0.4.1) globalid (>= 0.6.0) nokogiri (>= 1.8.5) - actionview (8.0.4) - activesupport (= 8.0.4) + actionview (8.0.4.1) + activesupport (= 8.0.4.1) builder (~> 3.1) erubi (~> 1.11) rails-dom-testing (~> 2.2) rails-html-sanitizer (~> 1.6) - activejob (8.0.4) - activesupport (= 8.0.4) + activejob (8.0.4.1) + activesupport (= 8.0.4.1) globalid (>= 0.3.6) activejob-uniqueness (0.4.0) activejob (>= 4.2, < 8.1) redlock (>= 2.0, < 3) - activemodel (8.0.4) - activesupport (= 8.0.4) - activerecord (8.0.4) - activemodel (= 8.0.4) - activesupport (= 8.0.4) + activemodel (8.0.4.1) + activesupport (= 8.0.4.1) + activerecord (8.0.4.1) + activemodel (= 8.0.4.1) + activesupport (= 8.0.4.1) timeout (>= 0.4.0) - activestorage (8.0.4) - actionpack (= 8.0.4) - activejob (= 8.0.4) - activerecord (= 8.0.4) - activesupport (= 8.0.4) + activestorage (8.0.4.1) + actionpack (= 8.0.4.1) + activejob (= 8.0.4.1) + activerecord (= 8.0.4.1) + activesupport (= 8.0.4.1) marcel (~> 1.0) - activesupport (8.0.4) + activesupport (8.0.4.1) base64 benchmark (>= 0.3) bigdecimal @@ -159,7 +159,7 @@ GEM drb i18n (>= 1.6, < 2) logger (>= 1.4.2) - minitest (>= 5.1) + minitest (>= 5.1, < 6) securerandom (>= 0.3) tzinfo (~> 2.0, >= 2.0.5) uri (>= 0.13.1) @@ -205,7 +205,7 @@ GEM babel-source (>= 4.0, < 6) execjs (~> 2.0) base64 (0.3.0) - bcrypt (3.1.20) + bcrypt (3.1.22) benchmark (0.5.0) better_errors (2.10.1) erubi (>= 1.0.0) @@ -247,8 +247,8 @@ GEM cocoon (1.2.15) coderay (1.1.3) colorize (0.8.1) - concurrent-ruby (1.3.5) - connection_pool (2.5.4) + concurrent-ruby (1.3.6) + connection_pool (2.5.5) countries (8.0.4) unaccent (~> 0.3) country_select (11.0.0) @@ -378,7 +378,7 @@ GEM multi_xml (>= 0.5.2) httpparty (0.2.0) httparty (> 0) - i18n (1.14.7) + i18n (1.14.8) concurrent-ruby (~> 1.0) i18n-tasks (1.0.15) activesupport (>= 4.0.2) @@ -405,7 +405,7 @@ GEM rails-dom-testing (>= 1, < 3) railties (>= 4.2.0) thor (>= 0.14, < 2.0) - json (2.18.1) + json (2.19.2) json-jwt (1.17.0) activesupport (>= 4.2) aes_key_wrap @@ -427,7 +427,7 @@ GEM rb-fsevent (~> 0.10, >= 0.10.3) rb-inotify (~> 0.9, >= 0.9.10) logger (1.7.0) - loofah (2.24.1) + loofah (2.25.1) crass (~> 1.0.2) nokogiri (>= 1.12.0) lumberjack (1.4.2) @@ -470,9 +470,9 @@ GEM net-protocol net-ssh (7.3.0) nio4r (2.7.5) - nokogiri (1.18.10-arm64-darwin) + nokogiri (1.19.2-arm64-darwin) racc (~> 1.4) - nokogiri (1.18.10-x86_64-linux-gnu) + nokogiri (1.19.2-x86_64-linux-gnu) racc (~> 1.4) notiffany (0.1.3) nenv (~> 0.1) @@ -551,7 +551,7 @@ GEM nio4r (~> 2.0) raabro (1.4.0) racc (1.8.1) - rack (2.2.21) + rack (2.2.22) rack-mini-profiler (4.0.1) rack (>= 1.2.0) rack-protection (3.2.0) @@ -564,33 +564,33 @@ GEM rackup (1.0.1) rack (< 3) webrick - rails (8.0.4) - actioncable (= 8.0.4) - actionmailbox (= 8.0.4) - actionmailer (= 8.0.4) - actionpack (= 8.0.4) - actiontext (= 8.0.4) - actionview (= 8.0.4) - activejob (= 8.0.4) - activemodel (= 8.0.4) - activerecord (= 8.0.4) - activestorage (= 8.0.4) - activesupport (= 8.0.4) + rails (8.0.4.1) + actioncable (= 8.0.4.1) + actionmailbox (= 8.0.4.1) + actionmailer (= 8.0.4.1) + actionpack (= 8.0.4.1) + actiontext (= 8.0.4.1) + actionview (= 8.0.4.1) + activejob (= 8.0.4.1) + activemodel (= 8.0.4.1) + activerecord (= 8.0.4.1) + activestorage (= 8.0.4.1) + activesupport (= 8.0.4.1) bundler (>= 1.15.0) - railties (= 8.0.4) + railties (= 8.0.4.1) rails-dom-testing (2.3.0) activesupport (>= 5.0.0) minitest nokogiri (>= 1.6) - rails-html-sanitizer (1.6.2) - loofah (~> 2.21) + rails-html-sanitizer (1.7.0) + loofah (~> 2.25) nokogiri (>= 1.15.7, != 1.16.7, != 1.16.6, != 1.16.5, != 1.16.4, != 1.16.3, != 1.16.2, != 1.16.1, != 1.16.0.rc1, != 1.16.0) rails-i18n (8.0.2) i18n (>= 0.7, < 2) railties (>= 8.0.0, < 9) - railties (8.0.4) - actionpack (= 8.0.4) - activesupport (= 8.0.4) + railties (8.0.4.1) + actionpack (= 8.0.4.1) + activesupport (= 8.0.4.1) irb (~> 1.13) rackup (>= 1.0.0) rake (>= 12.2) @@ -736,7 +736,7 @@ GEM execjs (>= 0.3.0, < 3) thor (1.4.0) tilt (2.0.11) - timeout (0.4.4) + timeout (0.6.1) traco (5.3.3) activerecord (>= 4.2) trailblazer-option (0.1.2) diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000000..d94022ea56 --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,276 @@ +# Folio Roadmap + +This document collects proposed roadmap themes for Folio as an open-source Rails CMS engine. +It is a planning draft intended to support discussion and prioritization, not a delivery commitment. + +## Planning Principles + +- Prefer engine-level contracts over one-off rewrites. +- Keep the default path simple for small projects. +- Support multiple infrastructure models where justified. +- Treat generators, documentation, tests, and automation as part of the product surface. +- Reduce legacy surface area in stages, with clear migration paths. +- Optimize for both human contributors and AI-assisted workflows. + +## Now + +The "Now" horizon is split into foundational tracks and migration tracks. +Foundational tracks define the contracts and platform direction that later migrations should build on. +Migration tracks reduce the current legacy surface without losing delivery focus. + +### Foundational Tracks + +#### 1. Pluggable Image Transformation Pipeline + +**Problem** + +Dragonfly-based thumbnail generation creates operational and architectural pain: + +- background thumbnail jobs are fragile and add queue pressure +- storage, URL generation, and processing orchestration are tightly coupled +- cache behavior is hard to reason about +- different projects need different infrastructure models + +**Target Outcome** + +Introduce a provider-based image transformation layer with a stable Folio contract and multiple backend implementations. + +**Initial Scope** + +- Define a canonical Folio thumbnail interface. +- Introduce stable application-facing thumbnail URLs. +- Internally use versioned or immutable result objects derived from source checksum + variant specification. +- Add a built-in compatibility provider for Sidekiq-based processing. +- Design a remote transformer API contract for external processing services. +- Make private files, signed access, crop variants, and invalidation part of the design from day one. + +**Delivery Options** + +- Built-in Sidekiq provider for small projects. +- Remote transformer service running on Kubernetes, either per app or per cluster. +- AWS-oriented provider using Thumbor and S3-compatible storage, with [`sinfin/aws-file-handler`](https://github.com/sinfin/aws-file-handler) as an existing reference point. +- Optional future serverless provider for low-traffic or bursty workloads. + +**Success Criteria** + +- Folio no longer depends on Dragonfly thumbnail jobs as the only model. +- Projects can switch providers without changing view-level APIs. +- Thumbnail URLs remain stable at the application level. +- Processing failures and cache misses are observable and debuggable. + +#### 2. Cache Architecture Refresh + +**Problem** + +The current `cache_key_base` approach is not sufficient for larger projects and does not provide a robust invalidation model across dimensions such as site, locale, session-sensitive rendering, and public/private variants. + +**Target Outcome** + +Move from a narrow cache key convention to a clearer cache architecture with explicit dimensions, invalidation rules, and debugging support. + +**Initial Scope** + +- Formalize cache dimensions: site, locale, user/session requirements, published state, content version, and other relevant axes. +- Replace or supersede `cache_key_base` with a better engine-level contract. +- Integrate with existing HTTP cache work and component session requirements. +- Add cache diagnostics and developer tooling so cache decisions are explainable. +- Validate the approach against a larger-project proof of concept. +- Use the existing exploratory branch `petr/has-folio-tiptap-and-cache` as an initial reference point, then clean up and extract the durable architectural direction from it. + +**Success Criteria** + +- Cache invalidation is predictable on large installs. +- Cache contracts are documented and testable. +- Developers can inspect why a response or component was cached or bypassed. + +#### 3. Packwerk and Modular Folio Surface + +**Problem** + +Folio still behaves largely as one large engine surface. +That makes architectural boundaries harder to enforce, increases accidental coupling, and makes it difficult to enable only selected parts of the engine in a clean way. + +**Target Outcome** + +Introduce explicit package boundaries and a more modular Folio layout so projects can reason about dependencies and selectively adopt engine capabilities. + +**Initial Scope** + +- Use the existing cache proof of concept, currently explored in `petr/has-folio-tiptap-and-cache`, as one of the first validation areas for package boundaries. +- Introduce Packwerk in a way that provides architectural feedback without blocking all development immediately. +- Identify candidate packages such as caching, files/media, console UI, TipTap, users, newsletter features, and other separable engine areas. +- Define which parts of Folio should be independently switchable at the configuration level and which should remain core. +- Reduce implicit cross-package dependencies and document allowed dependency directions. + +**Success Criteria** + +- Architectural boundaries become visible and enforceable. +- Large projects can adopt only the Folio areas they need with less incidental coupling. +- New engine work happens inside clearer module boundaries instead of expanding a monolith. + +#### 4. OSS Contributor Platform + +**Problem** + +Folio works today, but the open-source contributor experience is still too dependent on internal knowledge and manual setup steps. + +**Target Outcome** + +Make the repository easy to install, run, test, and change for any external Rails developer. + +**Initial Scope** + +- Standardize local entrypoints such as setup, dev, test, lint, and CI commands. +- Reduce or isolate secrets required for local development. +- Define and document the supported version matrix for Ruby, Rails, Node, and external tooling. +- Treat generators as public API and harden them with real smoke tests. +- Improve release metadata, docs consistency, and contributor-facing guidance. + +**Success Criteria** + +- A new contributor can boot the project from documented commands alone. +- Generator workflows are tested, not just documented. +- Documentation reflects the actual supported stack. + +### Migration Tracks + +#### 5. UI Modernization Phase 1 + +**Problem** + +Folio still carries a large legacy UI surface across Cells, jQuery, and legacy React islands. +That slows down maintenance, increases onboarding cost, and keeps multiple frontend patterns alive at the same time. + +**Target Outcome** + +Make ViewComponent + Stimulus the default and preferred path for Folio UI. + +**Initial Scope** + +- Continue the staged migration from Cells to ViewComponents on the most-used engine surfaces. +- Replace jQuery-driven interactions with Stimulus controllers where practical. +- Identify legacy React islands that should be migrated to Stimulus rather than expanded. +- Stop growing the legacy surface area through generators and new features. +- Publish a migration tracker so the remaining legacy footprint is visible. + +**Success Criteria** + +- New engine UI work does not introduce additional Cells or jQuery. +- The highest-value admin and public components have ViewComponent-based replacements. +- Frontend interaction patterns become more uniform across the codebase. + +## Next + +### 6. Atom to TipTap Migration Program + +**Problem** + +Atoms and TipTap currently coexist, but there is no complete engine-level migration program covering authoring UX, content migration, coexistence rules, and project guidance. + +**Target Outcome** + +Provide a realistic path for teams that want to move from atom-heavy editing flows to TipTap-driven structured content. + +**Scope** + +- Define the target role of Atoms vs TipTap nodes in Folio. +- Prepare authoring UI and editor affordances needed for wider TipTap adoption. +- Write migration guidelines for teams and projects. +- Support coexistence during migration rather than forcing a big-bang rewrite. +- Add tooling for content migration where possible. + +**Success Criteria** + +- Teams understand when to use Atoms, when to use TipTap, and how to migrate. +- Folio can support mixed-mode projects during transition. +- New content modeling guidance is coherent and maintainable. + +### 7. UI Modernization Phase 2 + +**Problem** + +After the first modernization pass, some legacy frontend surface will still remain for edge cases, generators, and older admin workflows. + +**Target Outcome** + +Complete the shift to the modern engine UI stack and retire legacy defaults. + +**Scope** + +- Finish the Cells to ViewComponents migration where a compatible replacement exists. +- Remove remaining jQuery-heavy workflows from core engine paths. +- Reassess the role of the legacy React app and either shrink it further or replace it. +- Update generators so newly generated code always follows the modern stack. + +**Success Criteria** + +- Legacy UI technologies are no longer the default scaffolding path. +- The maintenance burden of multiple frontend stacks is materially reduced. + +### 8. AI Agent Readiness + +**Problem** + +Folio already includes AI-oriented instructions, but host applications generated by Folio do not yet get a strong, deterministic, agent-friendly contract. + +**Target Outcome** + +Make Folio-generated projects easier to use with coding agents such as Codex, Cursor, and Claude Code. + +**Scope** + +- Generate a richer local `AGENTS.md` for installed apps instead of only pointing back to the gem source. +- Provide deterministic setup, lint, test, and build entrypoints. +- Expose generators, config keys, and environment expectations in a machine-friendly way where useful. +- Reduce ambiguity around which stack is authoritative in each part of the repository. + +**Success Criteria** + +- Agents can bootstrap work from local project instructions without manual discovery. +- Folio-generated apps are easier to navigate and modify safely. + +## Later + +### 9. Data Model Cleanup + +**Problem** + +Some engine areas still rely on older persistence conventions and compatibility code that complicate upgrades and long-term maintenance. + +**Target Outcome** + +Reduce legacy persistence patterns and simplify the internal model layer. + +**Scope** + +- Replace remaining YAML `serialize` usage with more modern typed or JSON-based approaches where appropriate. +- Continue removing transitional compatibility branches once replacement paths are established. +- Document deprecation timelines for internal contracts that should disappear in the next major version. + +### 10. Deployment Model Portfolio + +**Problem** + +Different Folio projects have very different scale and infrastructure requirements. +A single mandatory operations model is not a good fit. + +**Target Outcome** + +Support multiple validated deployment models without forcing the same trade-offs on every installation. + +**Candidate Models** + +- Simple in-app processing for small projects. +- Shared or dedicated transformer service for Kubernetes-based stacks. +- AWS-native image pipeline for teams that prefer cloud-managed primitives. + +**Goal** + +Keep the Folio developer-facing contract stable while making infrastructure a deployment choice instead of an engine constraint. + +## Cross-Cutting Questions + +- Which parts of the current engine are true public API and need compatibility guarantees? +- Which migrations should be automated, and which should remain guided/manual? +- Where do we want strict defaults, and where do we want provider-based extensibility? +- Which large reference projects should be used to validate the roadmap decisions before declaring them as engine direction? diff --git a/app/assets/javascripts/folio/console/base.js b/app/assets/javascripts/folio/console/base.js index 8e42fc92d0..aedc53766e 100644 --- a/app/assets/javascripts/folio/console/base.js +++ b/app/assets/javascripts/folio/console/base.js @@ -111,6 +111,7 @@ //= require folio/console/files/picker/document_component //= require folio/console/files/picker/image_component //= require folio/console/files/picker_component +//= require folio/console/files/show/encoding_info_component //= require folio/console/files/show/thumbnails/crop_edit_component //= require folio/console/files/show_component //= require folio/console/files/show_modal_component diff --git a/app/assets/stylesheets/folio/tiptap/_styles.scss b/app/assets/stylesheets/folio/tiptap/_styles.scss index 110f97e405..082cc361d7 100644 --- a/app/assets/stylesheets/folio/tiptap/_styles.scss +++ b/app/assets/stylesheets/folio/tiptap/_styles.scss @@ -53,13 +53,14 @@ $f-tiptap__media-min-width--desktop: 708px !default; --f-tiptap-columns__gap: var(--f-tiptap__spacer); --f-tiptap-float__aside-margin-y: var(--f-tiptap__spacer); - --f-tiptap-float__aside-width: 0; --f-tiptap-float__aside-margin-x: 0; --f-tiptap-float__aside-offset: 0; --f-tiptap-float__aside-offset--tablet: 0; --f-tiptap-float__aside-margin-x--tablet: 1rem; --f-tiptap-float__aside-offset--desktop: 0; --f-tiptap-float__aside-margin-x--desktop: 1rem; + --f-tiptap-float__aside-side: left; + --f-tiptap-float__aside-width: 100%; --f-tiptap-li__margin-top: 0.5rem; --f-tiptap-li__margin-bottom: 0.5rem; @@ -416,9 +417,13 @@ $f-tiptap__media-min-width--desktop: 708px !default; } .f-tiptap-float__aside { - margin-bottom: var(--f-tiptap-float__aside-margin-y); position: relative; z-index: 2; + margin-bottom: var(--f-tiptap-float__aside-margin-y); + margin-right: var(--f-tiptap-float__aside-margin-x); + margin-left: var(--f-tiptap-float__aside-offset); + width: var(--f-tiptap-float__aside-width); + float: var(--f-tiptap-float__aside-side); } .f-tiptap-editor & .f-tiptap-column::before, @@ -555,7 +560,22 @@ $f-tiptap__media-min-width--desktop: 708px !default; } } + .f-tiptap-float[data-f-tiptap-float-side="right"] .f-tiptap-float__aside { + margin-right: var(--f-tiptap-float__aside-offset); + margin-left: var(--f-tiptap-float__aside-margin-x); + } + + .f-tiptap-float[data-f-tiptap-float-size="small"] .f-tiptap-float__aside { + width: var(--f-tiptap-float__aside-width); + } + + .f-tiptap-float[data-f-tiptap-float-size="large"] .f-tiptap-float__aside { + width: var(--f-tiptap-float__aside-width); + } + .f-tiptap-float[data-f-tiptap-float-side="right"] .f-tiptap-float__aside { + float: var(--f-tiptap-float__aside-side); + } @container (min-width: #{$f-tiptap__media-min-width--tablet}) { .f-tiptap-float { @@ -564,6 +584,7 @@ $f-tiptap__media-min-width--desktop: 708px !default; --f-tiptap-float__aside-side: left; --f-tiptap-float__aside-margin-x: var(--f-tiptap-float__aside-margin-x--tablet); --f-tiptap-float__aside-offset: var(--f-tiptap-float__aside-offset--tablet); + --f-tiptap-float__aside-margin-y: var(--f-tiptap__spacer); &::after { content: ""; @@ -586,30 +607,12 @@ $f-tiptap__media-min-width--desktop: 708px !default; .f-tiptap-float__aside { float: left; - margin-right: var(--f-tiptap-float__aside-margin-x); - margin-bottom: var(--f-tiptap__spacer); - margin-left: var(--f-tiptap-float__aside-offset); - width: var(--f-tiptap-float__aside-width); position: relative; container-type: inline-size; box-sizing: border-box; min-height: 2rem; } - .f-tiptap-float[data-f-tiptap-float-side="right"] .f-tiptap-float__aside { - float: right; - margin-right: var(--f-tiptap-float__aside-offset); - margin-left: var(--f-tiptap-float__aside-margin-x); - } - - .f-tiptap-float[data-f-tiptap-float-size="small"] .f-tiptap-float__aside { - width: var(--f-tiptap-float__aside-width); - } - - .f-tiptap-float[data-f-tiptap-float-size="large"] .f-tiptap-float__aside { - width: var(--f-tiptap-float__aside-width); - } - .f-tiptap-columns { display: grid; grid-auto-columns: 1fr; diff --git a/app/components/folio/console/files/show/encoding_info_component.js b/app/components/folio/console/files/show/encoding_info_component.js new file mode 100644 index 0000000000..6e5d055b90 --- /dev/null +++ b/app/components/folio/console/files/show/encoding_info_component.js @@ -0,0 +1,46 @@ +window.Folio.Stimulus.register('f-c-files-show-encoding-info', class extends window.Stimulus.Controller { + static values = { + fileId: Number + } + + connect () { + this.messageBusCallbackKey = `f-c-files-show-encoding-info--${this.fileIdValue}` + window.Folio.MessageBus.callbacks[this.messageBusCallbackKey] = (message) => { + if (message.type === 'Folio::CraMediaCloud::CheckProgressJob/encoding_progress' && + message.data.id === this.fileIdValue) { + this.update(message.data) + } + } + } + + disconnect () { + if (this.messageBusCallbackKey && window.Folio.MessageBus.callbacks) { + delete window.Folio.MessageBus.callbacks[this.messageBusCallbackKey] + } + } + + update (data) { + const phaseEl = this.element.querySelector('.f-c-files-show-encoding-info__phase') + const progressEl = this.element.querySelector('.f-c-files-show-encoding-info__progress') + + if (data.aasm_state === 'processing_failed') { + if (phaseEl) { + phaseEl.classList.add('f-c-files-show-encoding-info__phase--failed') + phaseEl.textContent = data.failed_label || '' + } + if (progressEl) { + progressEl.textContent = '' + } + return + } + + if (phaseEl && data.current_phase_label) { + phaseEl.classList.remove('f-c-files-show-encoding-info__phase--failed') + phaseEl.textContent = data.current_phase_label + } + + if (progressEl) { + progressEl.textContent = data.progress_percentage != null ? `${data.progress_percentage}%` : '' + } + } +}) diff --git a/app/components/folio/console/files/show/encoding_info_component.rb b/app/components/folio/console/files/show/encoding_info_component.rb new file mode 100644 index 0000000000..9ba94d1c70 --- /dev/null +++ b/app/components/folio/console/files/show/encoding_info_component.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +class Folio::Console::Files::Show::EncodingInfoComponent < Folio::Console::ApplicationComponent + def initialize(file:) + @file = file + @rsd = file.remote_services_data || {} + end + + def render? + cra_file? && (processing? || failed?) + end + + def processing? + @file.processing? + end + + def failed? + @file.processing_failed? + end + + def retrying? + failed? && @rsd["retry_scheduled_at"].present? && @rsd["retry_count"].to_i < 2 + end + + def current_phase + @rsd["current_phase"] + end + + def current_phase_label + return current_phase&.humanize if current_phase.blank? + + encoding_phase = @rsd["current_encoding_phase"] + processing_phases = @rsd["processing_phases"].to_i + + if processing_phases > 1 && encoding_phase.present? + phase_name = @file.try(:encoder_phase_name, encoding_phase) + if phase_name + t(".phase_#{current_phase}_named", + name: phase_name, + default: t(".phase_#{current_phase}", default: current_phase.humanize)) + else + t(".phase_#{current_phase}_multi", + phase: encoding_phase, + total: processing_phases, + default: t(".phase_#{current_phase}", default: current_phase.humanize)) + end + else + t(".phase_#{current_phase}", default: current_phase.humanize) + end + end + + def encoding_progress + @rsd["progress_percentage"] + end + + def data + { + "controller" => "f-c-files-show-encoding-info", + "f-c-files-show-encoding-info-file-id-value" => @file.id, + } + end + + private + def cra_file? + # Check capability first (covers enqueued state before 'service' is written) + return true if @file.is_a?(Folio::CraMediaCloud::FileProcessing) + + # Fallback for plain Folio::File::Video without concern (legacy or plain video) + @file.try(:processing_service) == "cra_media_cloud" || + @rsd["service"] == "cra_media_cloud" || + @rsd["current_phase"].present? || + @rsd["retry_count"].present? + end +end diff --git a/app/components/folio/console/files/show/encoding_info_component.sass b/app/components/folio/console/files/show/encoding_info_component.sass new file mode 100644 index 0000000000..2b6a9b8d7f --- /dev/null +++ b/app/components/folio/console/files/show/encoding_info_component.sass @@ -0,0 +1,26 @@ +.f-c-files-show-encoding-info + display: inline + color: $gray-600 + font-size: $font-size-sm + white-space: nowrap + + &:empty + display: none + + &__progress + &:not(:empty)::before + content: " · " + + &__phase--failed + color: $danger + +// Pulse the yellow state dot when encoding info component follows the state cell +.f-c-files-show__meta-item:has(+ .f-c-files-show-encoding-info:not(:empty)) + .f-c-state__state-square--state-processing + animation: f-c-files-show-encoding-info-pulse 2s ease-in-out infinite + +@keyframes f-c-files-show-encoding-info-pulse + 0%, 100% + opacity: 1 + 50% + opacity: 0.35 diff --git a/app/components/folio/console/files/show/encoding_info_component.slim b/app/components/folio/console/files/show/encoding_info_component.slim new file mode 100644 index 0000000000..72661dae45 --- /dev/null +++ b/app/components/folio/console/files/show/encoding_info_component.slim @@ -0,0 +1,13 @@ +span.f-c-files-show-encoding-info data=data + - if failed? + span.f-c-files-show-encoding-info__phase.f-c-files-show-encoding-info__phase--failed + - if retrying? + = t(".phase_failed_retrying") + - else + = t(".phase_failed") + - elsif processing? + span.f-c-files-show-encoding-info__phase + = current_phase_label + span.f-c-files-show-encoding-info__progress + - if encoding_progress.present? + = "#{encoding_progress}%" diff --git a/app/components/folio/console/files/show_component.js b/app/components/folio/console/files/show_component.js index 243e544262..d35675e23a 100644 --- a/app/components/folio/console/files/show_component.js +++ b/app/components/folio/console/files/show_component.js @@ -6,7 +6,8 @@ window.Folio.Stimulus.register('f-c-files-show', class extends window.Stimulus.C fileType: String, id: String, showUrl: String, - indexUrl: String + indexUrl: String, + aasmState: String } disconnect () { @@ -93,6 +94,15 @@ window.Folio.Stimulus.register('f-c-files-show', class extends window.Stimulus.C messageBusCallback (event) { const message = event.detail.message + + if (message.type === 'Folio::CraMediaCloud::CheckProgressJob/encoding_progress') { + return this.handleEncodingProgress(message.data) + } + + if (message.type === 'Folio::ApplicationJob/file_update') { + return this.handleFileUpdate(message.data) + } + if (message.type !== 'Folio::S3::CreateFileJob') return switch (message.data.type) { case 'replace-success': @@ -104,10 +114,34 @@ window.Folio.Stimulus.register('f-c-files-show', class extends window.Stimulus.C } } - messageBusSuccess (data) { + handleEncodingProgress (data) { + if (data.aasm_state === 'processing') { + // Update state badge label + const stateLabel = this.element.querySelector('.f-c-state__state-label') + if (stateLabel) stateLabel.textContent = data.aasm_state_human + } else { + // Encoding finished or failed — reload to show final state + this.reloadFrame() + } + } + + handleFileUpdate (data) { + if (!data || !data.attributes) return + + const newState = data.attributes.aasm_state + if (newState && newState !== this.aasmStateValue) { + this.reloadFrame() + } + } + + reloadFrame () { window.Turbo.visit(this.showUrlValue, { frame: this.element.closest('turbo-frame').id }) } + messageBusSuccess (data) { + this.reloadFrame() + } + messageBusFailure (data) { this.loadingValue = false delete this.replacingFileData @@ -128,6 +162,24 @@ if (window.Folio && window.Folio.MessageBus && window.Folio.MessageBus.callbacks } } + if (message.type === 'Folio::CraMediaCloud::CheckProgressJob/encoding_progress') { + const selector = `.f-c-files-show[data-f-c-files-show-id-value="${message.data.id}"]` + const targets = document.querySelectorAll(selector) + + for (const target of targets) { + target.dispatchEvent(new CustomEvent('f-c-files-show/message', { detail: { message } })) + } + } + + if (message.type === 'Folio::ApplicationJob/file_update') { + const selector = `.f-c-files-show[data-f-c-files-show-id-value="${message.data.id}"]` + const targets = document.querySelectorAll(selector) + + for (const target of targets) { + target.dispatchEvent(new CustomEvent('f-c-files-show/message', { detail: { message } })) + } + } + if (message.type === 'f-c-files-show:reload') { const selector = `.f-c-files-show[data-f-c-files-show-id-value="${message.data.id}"]` const targets = document.querySelectorAll(selector) diff --git a/app/components/folio/console/files/show_component.rb b/app/components/folio/console/files/show_component.rb index 28cdd813ff..cff49c4071 100644 --- a/app/components/folio/console/files/show_component.rb +++ b/app/components/folio/console/files/show_component.rb @@ -14,7 +14,8 @@ def data id: @file.id, file_type: @file.class.to_s, show_url: controller.folio.url_for([:console, @file]), - index_url: controller.folio.url_for([:console, @file.class]) + index_url: controller.folio.url_for([:console, @file.class]), + aasm_state: @file.aasm_state }, action: { "f-uppy:upload-success": "uppyUploadSuccess", diff --git a/app/components/folio/console/files/show_component.slim b/app/components/folio/console/files/show_component.slim index 564f50ef76..c77b67b4b2 100644 --- a/app/components/folio/console/files/show_component.slim +++ b/app/components/folio/console/files/show_component.slim @@ -53,12 +53,14 @@ = @file.file_mime_type - if @file.created_at.present? - .f-c-files-show__meta-item.me-auto + .f-c-files-show__meta-item ' #{t(".created_at")}: #{l(@file.created_at.to_date, format: :console_short)} - .f-c-files-show__meta-item + .f-c-files-show__meta-item.ms-auto == cell("folio/console/state", @file, active: false) + = render(Folio::Console::Files::Show::EncodingInfoComponent.new(file: @file)) + .f-c-files-show__table - table_rows.each do |key, config| .f-c-files-show__tr diff --git a/app/components/folio/embed/box_component.sass b/app/components/folio/embed/box_component.sass index d8f8a04d15..9adf457631 100644 --- a/app/components/folio/embed/box_component.sass +++ b/app/components/folio/embed/box_component.sass @@ -1,6 +1,7 @@ .f-embed-box min-height: 150px position: relative + white-space: normal &__iframe width: 100% diff --git a/app/controllers/concerns/folio/console/api/file_controller_base.rb b/app/controllers/concerns/folio/console/api/file_controller_base.rb index f291d72594..33094801c5 100644 --- a/app/controllers/concerns/folio/console/api/file_controller_base.rb +++ b/app/controllers/concerns/folio/console/api/file_controller_base.rb @@ -314,7 +314,8 @@ def update_thumbnails_crop } end - @file.dont_run_after_save_jobs = true + @file.try(:dont_run_after_save_jobs=, true) + @file.update!(thumbnail_configuration:, thumbnail_sizes:, diff --git a/app/jobs/folio/cra_media_cloud/check_progress_job.rb b/app/jobs/folio/cra_media_cloud/check_progress_job.rb index efdb2de51c..da83aa8e3f 100644 --- a/app/jobs/folio/cra_media_cloud/check_progress_job.rb +++ b/app/jobs/folio/cra_media_cloud/check_progress_job.rb @@ -5,69 +5,276 @@ class Folio::CraMediaCloud::CheckProgressJob < Folio::ApplicationJob queue_as :default + unique :until_and_while_executing + + # Maximum time to poll CRA before giving up (4 hours). + # Long videos can take 2+ hours for HD encoding across multiple phases. + MAX_PROCESSING_DURATION = 4.hours + attr_reader :media_file def perform(media_file, preview: false, encoding_generation: nil) @media_file = media_file @encoding_generation = encoding_generation - # CraMediaCloud doesn't use preview parameter, but we accept it for consistency - # If encoding_generation is provided, check if it matches current generation - # This prevents stale jobs from interfering with newer encodings if @encoding_generation.present? && media_file.encoding_generation != @encoding_generation Rails.logger.info "[CraMediaCloud::CheckProgressJob] Skipping stale job for #{media_file.class.name}##{media_file.id} " \ "(job generation: #{@encoding_generation}, current: #{media_file.encoding_generation})" return end - # Early return if video doesn't need progress checking if media_file.ready? - Rails.logger.info "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} is already in ready state, skipping progress check" + Rails.logger.info "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} is already in ready state" return end - response = fetch_job_response + if processing_timed_out? + Rails.logger.error "[CraMediaCloud::CheckProgressJob] Timed out after #{MAX_PROCESSING_DURATION.inspect} " \ + "for video #{media_file.id}. Marking as processing_failed." + if media_file.may_processing_failed? + # No with_lock here: timeout is a one-time terminal state written only by + # this code path. CheckProgressJob is unique-constrained so no concurrent + # instance runs. Broadcasts immediately follow the DB write intentionally. + media_file.processing_failed! + broadcast_file_update(media_file) + broadcast_encoding_progress + end + return + end - return check_again_later if response.nil? + check_progress + end - update_remote_service_data(response) + private + def multi_phase? + media_file.remote_services_data["processing_phases"].to_i > 1 + end - if media_file.full_media_processed? - media_file.processing_done! - broadcast_file_update(media_file) - elsif media_file.upload_failed? - # Don't reschedule for failed uploads - MonitorProcessingJob will handle retries - media_file.save! - broadcast_file_update(media_file) - Rails.logger.info "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} upload failed, not rescheduling" - elsif media_file.changed? - media_file.save! - broadcast_file_update(media_file) - check_again_later - else - check_again_later + def expected_phases + media_file.remote_services_data["processing_phases"].to_i + end + + def check_progress + response = fetch_job_response + return if response == :finalized # already handled by finalize_from_completed_phases! + return check_again_later if response.nil? + + # REMOVED means job content was explicitly deleted via DeleteMediaJob. + # CRA does NOT auto-purge jobs — production data confirms DONE jobs persist + # indefinitely. Clear remote_id so the next poll uses the reference_id path, + # which can finalize from stored phase data or eventually time out cleanly. + if response["status"] == "REMOVED" + Rails.logger.warn "[CraMediaCloud::CheckProgressJob] Job #{response['id']} for video #{media_file.id} " \ + "has been REMOVED. Clearing remote_id to fall back to reference_id polling." + media_file.with_lock do + media_file.remote_services_data.delete("remote_id") + media_file.save! + end + return check_again_later + end + + # All Redis I/O (check_again_later, CreateMediaJob.perform_later, broadcasts) + # is deferred until AFTER the Postgres row lock is released. + should_broadcast = false + should_reschedule = false + @pending_retry = false + + media_file.with_lock do + update_remote_service_data(response) + + if media_file.full_media_processed? + media_file.processing_done! + should_broadcast = true + elsif media_file.processing_failed? + should_broadcast = true # state set by handle_job_failure; @pending_retry set there too + elsif media_file.changed? + media_file.save! + should_broadcast = true + should_reschedule = true + else + should_reschedule = true + end + end + + check_again_later if should_reschedule + Folio::CraMediaCloud::CreateMediaJob.set(wait: 2.minutes).perform_later(media_file) if @pending_retry + + if should_broadcast + broadcast_file_update(media_file) + broadcast_encoding_progress + end end - end - private def fetch_job_response if media_file.remote_id.present? - api.get_job(media_file.remote_id) + response = api.get_job(media_file.remote_id) + Rails.logger.info "[CraMediaCloud::CheckProgressJob] Job #{media_file.remote_id} for video #{media_file.id}: " \ + "status=#{response&.dig('status')}, progress=#{response&.dig('progress')}, " \ + "profileGroup=#{response&.dig('profileGroup')}, phase=#{response&.dig('phase')}" + + # Multi-phase: if the tracked job is DONE but not the final phase, + # save intermediate data, clear remote_id, and look up by reference_id. + # Intermediate save is wrapped in with_lock to protect against concurrent + # MonitorProcessingJob or retry CreateMediaJob runs. + if multi_phase? && response&.dig("status") == "DONE" && response&.dig("phase").to_i < expected_phases + media_file.with_lock do + save_intermediate_phase_data(response) + media_file.remote_services_data.delete("remote_id") + media_file.save! + end + broadcast_encoding_progress + broadcast_file_update(media_file) + Rails.logger.info "[CraMediaCloud::CheckProgressJob] Phase #{response['phase']} done, cleared remote_id to discover next phase" + return nil + end + + response elsif media_file.remote_reference_id.present? - jobs = api.get_jobs(ref_id: media_file.remote_reference_id) + all_jobs = api.get_jobs(ref_id: media_file.remote_reference_id) + # Filter out REMOVED jobs. REMOVED appears when job content has been + # explicitly deleted via DeleteMediaJob (DELETE /jobs/{id}/content) — + # production data confirms CRA does NOT auto-purge completed jobs. + jobs = all_jobs.reject { |j| j["status"] == "REMOVED" } if jobs.empty? - Rails.logger.warn "[CraMediaCloud::CheckProgressJob] No jobs found for reference_id #{media_file.remote_reference_id}" + # All jobs REMOVED with stored phase data: job content was deleted + # (e.g. via DeleteMediaJob) after encoding completed. Finalize from + # the phase output we already saved locally rather than hitting CRA. + if multi_phase? && all_jobs.present? && has_any_completed_phase? + Rails.logger.info "[CraMediaCloud::CheckProgressJob] All CRA jobs REMOVED for video #{media_file.id} " \ + "with completed phase data. Finalizing from stored phase output." + finalize_from_completed_phases! + return :finalized + end + + Rails.logger.info "[CraMediaCloud::CheckProgressJob] No active jobs found for reference_id #{media_file.remote_reference_id} " \ + "(video #{media_file.id}, #{all_jobs.size} removed) — CRA may still be downloading the file" return nil end - # Get the most recent job by lastModified - job = jobs.max_by { |j| Time.parse(j["lastModified"]) } - Rails.logger.debug "[CraMediaCloud::CheckProgressJob] Found #{jobs.size} job(s) for #{media_file.remote_reference_id}, using most recent from #{job['lastModified']}" - job + if multi_phase? + select_multi_phase_job(jobs) + else + job = jobs.max_by { |j| Time.parse(j["lastModified"]) } + Rails.logger.info "[CraMediaCloud::CheckProgressJob] Found #{jobs.size} job(s) for #{media_file.remote_reference_id} (video #{media_file.id}): " \ + "status=#{job['status']}, progress=#{job['progress']}, id=#{job['id']}, " \ + "profileGroup=#{job['profileGroup']}, lastModified=#{job['lastModified']}" + job + end else - # No remote references exist - this should be handled by MonitorProcessingJob - Rails.logger.info "[CraMediaCloud::CheckProgressJob] No remote_id or remote_reference_id found for #{media_file.class.name} ID #{media_file.id}. MonitorProcessingJob should handle this." - nil # Return nil to stop processing this check job + Rails.logger.info "[CraMediaCloud::CheckProgressJob] No remote_id or remote_reference_id for #{media_file.class.name} ID #{media_file.id}" + nil + end + end + + def select_multi_phase_job(jobs) + # Sort by phase descending, pick the highest-phase job + job = jobs.sort_by { |j| -(j["phase"].to_i) }.first + + phase = job["phase"].to_i + Rails.logger.debug "[CraMediaCloud::CheckProgressJob] Multi-phase: found #{jobs.size} job(s), highest phase=#{phase}/#{expected_phases}, status=#{job['status']}" + + # If the highest-phase job is DONE but we haven't reached the final phase, + # check if CRA created a next phase job. CRA creates all phase jobs upfront — + # if no higher phase exists by now, CRA decided this is the final output. + if job["status"] == "DONE" && phase < expected_phases + next_phase_exists = jobs.any? { |j| j["phase"].to_i > phase } + + if next_phase_exists + # Next phase job exists but hasn't surpassed the current one yet — wait. + # Lock to guard against concurrent MonitorProcessingJob runs. + phase_data_saved = false + media_file.with_lock do + unless media_file.remote_services_data["phase_#{phase}_completed_at"].present? + save_intermediate_phase_data(job) + phase_data_saved = true + end + end + # Broadcast after lock so the UI reflects SD playback availability. + if phase_data_saved + broadcast_encoding_progress + broadcast_file_update(media_file) + end + return nil + else + # CRA did not create further phases — treat this as the final output + Rails.logger.info "[CraMediaCloud::CheckProgressJob] CRA created no phase #{phase + 1} job for video #{media_file.id}. " \ + "Treating phase #{phase} output as final." + return job + end + end + + job + end + + def has_any_completed_phase? + (1..expected_phases).any? { |p| media_file.remote_services_data["phase_#{p}_completed_at"].present? } + end + + # When all CRA jobs are REMOVED but we have stored phase output data, + # finalize the video using the last completed phase's output. + def finalize_from_completed_phases! + last_phase = (1..expected_phases).reverse_each.find { |p| + media_file.remote_services_data["phase_#{p}_completed_at"].present? + } + + media_file.with_lock do + # Build content_mp4_paths from all completed phases + content_mp4_paths = {} + (1..last_phase).each do |p| + phase_paths = media_file.remote_services_data["phase_#{p}_content_mp4_paths"] + content_mp4_paths.merge!(phase_paths) if phase_paths.present? + end + + media_file.remote_services_data.merge!( + "content_mp4_paths" => content_mp4_paths, + "processing_state" => "full_media_processed", + "progress_percentage" => 100.0, + "encoding_completed_at" => Time.current.iso8601, + ) + + media_file.processing_done! + end + + # Broadcasts after lock release to avoid Redis I/O while holding a Postgres row lock. + broadcast_file_update(media_file) + broadcast_encoding_progress + + Rails.logger.info "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} finalized from #{last_phase} completed phase(s)" + end + + def save_intermediate_phase_data(phase_job) + phase_num = phase_job["phase"].to_i + mp4_paths = {} + manifest_hls = nil + manifest_dash = nil + + phase_job["output"]&.each do |output_file| + case output_file["type"] + when "MP4" + mp4_paths[output_file["profiles"].first] = output_file["path"] + when "HLS" + manifest_hls = select_output_file(manifest_hls, output_file) + when "DASH" + manifest_dash = select_output_file(manifest_dash, output_file) + when "THUMBNAILS" + update_thumbnail_path(output_file) + end end + + updates = { + "phase_#{phase_num}_content_mp4_paths" => mp4_paths, + "phase_#{phase_num}_completed_at" => Time.current.iso8601, + "phase_#{phase_num}_remote_id" => phase_job["id"], + } + updates["manifest_hls_path"] = manifest_hls["path"] if manifest_hls + updates["manifest_dash_path"] = manifest_dash["path"] if manifest_dash + + media_file.remote_services_data.merge!(updates) + media_file.save! + + Rails.logger.info "[CraMediaCloud::CheckProgressJob] Phase #{phase_num}/#{expected_phases} complete for video #{media_file.id}, " \ + "saved #{mp4_paths.size} MP4 paths" \ + "#{manifest_hls ? ', HLS manifest' : ''}" \ + "#{manifest_dash ? ', DASH manifest' : ''}." end def update_remote_service_data(response) @@ -76,35 +283,113 @@ def update_remote_service_data(response) case response["status"] when "DONE" process_output_hash(response["output"]) + parse_encoding_messages(response) media_file.remote_services_data.merge!( "output" => response["output"], "processing_state" => "full_media_processed", + "progress_percentage" => 100.0, + "encoding_completed_at" => Time.current.iso8601, ) - when "PROCESSING", "CREATED" - media_file.remote_services_data.merge!( - "processing_state" => "full_media_processing", - "progress_percentage" => (response["progress"] ? response["progress"] * 100.0 : 0).round(1), - ) + when "WAITING", "PROCESSING", "CREATED", "VALIDATING" + update_progress(response) when "FAILED", "ERROR" - error_messages = response["messages"]&.filter_map { |msg| msg["message"] if msg["type"] == "ERROR" }&.join("; ") + handle_job_failure(response) + end + end - media_file.remote_services_data.merge!( - "processing_state" => "upload_failed", - "error_message" => error_messages || "Upload failed", - "failed_at" => Time.current.iso8601, - "progress_percentage" => nil - ) + def update_progress(response) + return unless response + + media_file.remote_services_data["remote_id"] ||= response["id"] + media_file.remote_services_data["cra_status"] = response["status"] + media_file.remote_services_data["last_progress_check_at"] = Time.current.iso8601 + + raw_progress = response["progress"].to_f + media_file.remote_services_data["cra_raw_progress"] = raw_progress + + parse_encoding_messages(response) + + phase = current_phase(response) + media_file.remote_services_data["current_phase"] = phase + + if multi_phase? && response["phase"].to_i > 0 + media_file.remote_services_data["current_encoding_phase"] = response["phase"].to_i + end + + media_file.remote_services_data["progress_percentage"] = phase == "encoding" ? (raw_progress * 100).round(0) : nil + end + + # Derive current phase from CRA status and completed message phases. + def current_phase(response) + case response["status"] + when "WAITING", "CREATED", "VALIDATING" + "waiting" + when "PROCESSING" + phases = media_file.remote_services_data["phases_completed"] || [] + phases.include?("video") ? "packaging" : "encoding" + end + end + + def handle_job_failure(response) + error_messages = response["messages"]&.filter_map { |msg| msg["message"] if msg["type"] == "ERROR" }&.join("; ") + retry_count = (media_file.remote_services_data["retry_count"] || 0) + 1 + will_retry = retry_count <= 1 + + media_file.remote_services_data.merge!( + "processing_state" => "encoding_failed", + "error_message" => error_messages || "Encoding failed", + "failed_at" => Time.current.iso8601, + "progress_percentage" => nil, + "current_phase" => nil, + "retry_count" => retry_count, + ) + + if will_retry + media_file.remote_services_data["retry_scheduled_at"] = (Time.current + 2.minutes).iso8601 + else + media_file.remote_services_data.delete("retry_scheduled_at") + end - Rails.logger.error "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} failed: #{error_messages}" + # Single save via processing_failed! — all data merged above. + # Broadcasts are emitted by check_progress after with_lock returns. + media_file.processing_failed! + + if will_retry + @pending_retry = true + Rails.logger.warn "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} failed (attempt #{retry_count}), scheduling retry in 2 minutes: #{error_messages}" + else + Rails.logger.error "[CraMediaCloud::CheckProgressJob] Video #{media_file.id} failed permanently (attempt #{retry_count}): #{error_messages}" + end + end + + def parse_encoding_messages(response) + messages = response["messages"] + return unless messages.present? + + phases_completed = [] + messages.each do |msg| + text = msg["message"].to_s + phases_completed << "validation" if text.include?("verification: finished") + phases_completed << "audio" if text.include?("Transcoding worker - audio: finished") + phases_completed << "thumbnails" if text.include?("Transcoding worker - thumbnails: finished") + phases_completed << "video" if text.include?("Transcoding worker - video: finished") + phases_completed << "packaging" if text.include?("copying: started") end + + # Extract video duration from outputParams + video_duration = response.dig("outputParams", "duration") + + media_file.remote_services_data["phases_completed"] = phases_completed.uniq + media_file.remote_services_data["video_duration"] = video_duration if video_duration end - def process_output_hash(process_output_hash) - content_mp4_paths = {} - manifest_hls, manifest_dash = nil, nil + def process_output_hash(output_data) + content_mp4_paths = media_file.remote_services_data["content_mp4_paths"] || {} + manifest_hls = nil + manifest_dash = nil - process_output_hash.each do |output_file| + output_data.each do |output_file| case output_file["type"] when "MP4" content_mp4_paths[output_file["profiles"].first] = output_file["path"] @@ -117,11 +402,10 @@ def process_output_hash(process_output_hash) end end - media_file.remote_services_data.merge!( - "content_mp4_paths" => content_mp4_paths, - "manifest_hls_path" => manifest_hls["path"], - "manifest_dash_path" => manifest_dash["path"], - ) + updates = { "content_mp4_paths" => content_mp4_paths } + updates["manifest_hls_path"] = manifest_hls["path"] if manifest_hls + updates["manifest_dash_path"] = manifest_dash["path"] if manifest_dash + media_file.remote_services_data.merge!(updates) end def select_output_file(current, incoming) @@ -137,14 +421,70 @@ def update_thumbnail_path(output_file) end end + def broadcast_encoding_progress + return if message_bus_user_ids.blank? + + phase = media_file.remote_services_data["current_phase"] + retry_count = media_file.remote_services_data["retry_count"].to_i + + failed_label = if media_file.processing_failed? + if retry_count < 2 && media_file.remote_services_data["retry_scheduled_at"].present? + I18n.t("folio.console.files.show.encoding_info_component.phase_failed_retrying") + else + I18n.t("folio.console.files.show.encoding_info_component.phase_failed") + end + end + + phase_label = if phase.present? + encoding_phase = media_file.remote_services_data["current_encoding_phase"] + if multi_phase? && encoding_phase.present? + phase_name = media_file.encoder_phase_name(encoding_phase) + if phase_name + I18n.t("folio.console.files.show.encoding_info_component.phase_#{phase}_named", + name: phase_name, + default: I18n.t("folio.console.files.show.encoding_info_component.phase_#{phase}", default: phase.humanize)) + else + I18n.t("folio.console.files.show.encoding_info_component.phase_#{phase}_multi", + phase: encoding_phase, + total: expected_phases, + default: I18n.t("folio.console.files.show.encoding_info_component.phase_#{phase}", default: phase.humanize)) + end + else + I18n.t("folio.console.files.show.encoding_info_component.phase_#{phase}", default: phase.humanize) + end + end + + MessageBus.publish Folio::MESSAGE_BUS_CHANNEL, + { + type: "Folio::CraMediaCloud::CheckProgressJob/encoding_progress", + data: { + id: media_file.id, + aasm_state: media_file.aasm_state, + aasm_state_human: serialized_file(media_file).dig(:data, :attributes, :aasm_state_human), + progress_percentage: media_file.remote_services_data["progress_percentage"], + current_phase: phase, + current_phase_label: phase_label, + failed_label: failed_label, + cra_status: media_file.remote_services_data["cra_status"], + }, + }.to_json, + user_ids: message_bus_user_ids + end + def check_again_later - # Pass encoding_generation to ensure stale jobs don't interfere Folio::CraMediaCloud::CheckProgressJob.set(wait: 15.seconds).perform_later( media_file, encoding_generation: @encoding_generation || media_file.encoding_generation ) end + def processing_timed_out? + started_at = media_file.remote_services_data["processing_step_started_at"] + return false if started_at.blank? + + Time.parse(started_at.to_s) < MAX_PROCESSING_DURATION.ago + end + def api @api ||= Folio::CraMediaCloud::Api.new end diff --git a/app/jobs/folio/cra_media_cloud/create_media_job.rb b/app/jobs/folio/cra_media_cloud/create_media_job.rb index 66d43fb38a..7d0de890ed 100644 --- a/app/jobs/folio/cra_media_cloud/create_media_job.rb +++ b/app/jobs/folio/cra_media_cloud/create_media_job.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class Folio::CraMediaCloud::CreateMediaJob < Folio::ApplicationJob + include Folio::S3::Client + # Discard if file no longer exists discard_on ActiveJob::DeserializationError @@ -9,8 +11,22 @@ class Folio::CraMediaCloud::CreateMediaJob < Folio::ApplicationJob def perform(media_file) fail "only video files are supported" unless media_file.is_a?(Folio::File::Video) - # Generate reference_id based on current file content - current_reference_id = generate_reference_id(media_file) + # If retrying after failure, transition back to processing + if media_file.processing_failed? && media_file.remote_services_data&.dig("retry_count").to_i > 0 + media_file.retry_processing! + Rails.logger.info "[CraMediaCloud::CreateMediaJob] Video #{media_file.id} retrying after failure" + end + + # Generate reference_id based on current file content. + # If the source file no longer exists on S3, mark as permanently failed + # and don't retry — the file cannot be re-uploaded without the original. + begin + current_reference_id = generate_reference_id(media_file) + rescue Excon::Error::NotFound => e + Rails.logger.error "[CraMediaCloud::CreateMediaJob] Source file not found on S3 for video #{media_file.id}: #{e.message}" + mark_source_file_missing!(media_file) + return + end # Check API for existing job with this reference_id existing_job_result = check_existing_job(current_reference_id, media_file) @@ -34,63 +50,56 @@ def perform(media_file) private def generate_reference_id(media_file) - # Combine video slug with S3 ETag (actual file content MD5) for stable, unique reference - # Format: {slug}-{s3_etag} - # This ensures uniqueness across environments and file versions + # Combine environment, video slug, ID, S3 ETag, and encoding_generation for unique reference. + # ID guarantees uniqueness per video record (slug alone is derived from filename and can collide). + # encoding_generation changes on each re-encode, ensuring CRA gets a fresh refId. + # Format: {env}-{slug}-{id}-{s3_etag}-{generation} + # Total length is capped at 128 chars to avoid CRA lookup failures with long slugs. s3_etag = get_s3_etag(media_file) + env_prefix = ENV.fetch("DRAGONFLY_RAILS_ENV", Rails.env) + generation = media_file.encoding_generation + + if generation.nil? + # encoding_generation may not be visible yet if the enclosing transaction + # (e.g. S3::CreateFileJob save) hasn't committed. Reload to get committed data. + media_file.reload + generation = media_file.encoding_generation + end - "#{media_file.slug}-#{s3_etag[0..7]}" - end + if generation.nil? + fail "encoding_generation not set for video #{media_file.id} — cannot generate unique reference_id (would match stale CRA jobs)" + end - def get_s3_etag(media_file) - # Get S3 ETag (MD5 hash) without downloading the file - s3_metadata = get_s3_metadata(media_file) - extract_etag(s3_metadata).delete_prefix('"').delete_suffix('"') - end + suffix = "-#{media_file.id}-#{s3_etag[0..7]}-#{generation}" + max_slug_length = 128 - env_prefix.length - 1 - suffix.length + slug = media_file.slug.to_s[0, [max_slug_length, 1].max] - def get_s3_metadata(media_file) - s3_datastore = Dragonfly.app.datastore - s3_object_key = [s3_datastore.root_path, media_file.file_uid].join("/") - Rails.logger.debug("[CraMediaCloud::CreateMediaJob] Fetching S3 metadata for key: #{s3_object_key}") - s3_datastore.storage.head_object(ENV["S3_BUCKET_NAME"], s3_object_key) + "#{env_prefix}-#{slug}#{suffix}" end - def extract_etag(response) - # Handle different response types (AWS SDK, Excon, etc.) - if response.respond_to?(:etag) - response.etag - elsif response.respond_to?(:headers) - response.headers["ETag"] || response.headers["etag"] || response.headers["Etag"] - else - raise "Cannot extract ETag from response type: #{response.class}" - end + def get_s3_etag(media_file) + s3_metadata = s3_dragonfly_head_object(media_file.file_uid) + extract_s3_etag(s3_metadata).delete_prefix('"').delete_suffix('"') end def check_existing_job(reference_id, media_file) api = Folio::CraMediaCloud::Api.new jobs = api.get_jobs(ref_id: reference_id) - if jobs.empty? - { status: :not_found, job: nil } - else - # Get the most recent job with this reference_id by lastModified - job = jobs.max_by { |j| Time.parse(j["lastModified"]) } - Rails.logger.debug "[CraMediaCloud::CreateMediaJob] Found #{jobs.size} job(s) for #{reference_id}, using most recent from #{job['lastModified']}" - - case job["status"] - when "PROCESSING", "CREATED" - { status: :processing, job: job } - when "DONE" - { status: :done, job: job } - when "FAILED", "ERROR" - { status: :failed, job: job } - else - { status: :not_found, job: job } - end - end + # No need to pre-filter REMOVED jobs: JobResolver maps REMOVED → :not_found, + # so CreateMediaJob will proceed with a fresh upload. (MonitorProcessingJob + # pre-filters REMOVED before passing to reconcile_with_remote_jobs because it + # needs to distinguish "all REMOVED with stored phase data → finalize" from + # "no jobs at all → clear state". CreateMediaJob has no such distinction to make.) + result = Folio::CraMediaCloud::JobResolver.resolve(jobs) + + Rails.logger.debug "[CraMediaCloud::CreateMediaJob] Job check for #{reference_id}: " \ + "#{jobs.size} job(s), status=#{result[:status]}" + + result rescue => e Rails.logger.warn "[CraMediaCloud::CreateMediaJob] Could not check existing job for #{reference_id}: #{e.message}" - { status: :not_found, job: nil } # Assume not found if API call fails + { status: :not_found, job: nil } end def update_local_state_for_successful_job(media_file, job, reference_id) @@ -125,13 +134,33 @@ def update_local_state_for_successful_job(media_file, job, reference_id) Rails.logger.info "[CraMediaCloud::CreateMediaJob] Successfully updated local state for video #{media_file.id} to point to successful job #{successful_job_id}" else - Rails.logger.debug "[CraMediaCloud::CreateMediaJob] Local state already points to correct job #{successful_job_id} for video #{media_file.id}" + # remote_id already matches, but if local processing_state is stale (e.g. upload_failed + # or encoding_failed set before CRA recovered), schedule CheckProgressJob to finalize. + # This handles videos that got stuck with a failed state while the CRA job eventually + # completed successfully on CRA's side. + if media_file.remote_services_data["processing_state"] != "full_media_processed" + media_file.remote_services_data.merge!( + "processing_state" => "full_media_processing", + "processing_step_started_at" => Time.current.iso8601 + ) + media_file.save! + Folio::CraMediaCloud::CheckProgressJob.perform_later( + media_file, + encoding_generation: media_file.encoding_generation + ) + Rails.logger.info "[CraMediaCloud::CreateMediaJob] Remote ID #{successful_job_id} matches but state " \ + "was stale (#{media_file.remote_services_data['processing_state']}), " \ + "scheduling CheckProgressJob for video #{media_file.id}" + else + Rails.logger.debug "[CraMediaCloud::CreateMediaJob] Local state already points to correct job #{successful_job_id} for video #{media_file.id}" + end end end def process_media_upload(media_file, reference_id) # Capture encoding_generation before any state updates current_generation = media_file.encoding_generation + profile_group = media_file.try(:encoder_profile_group) # Set state to creating_media_job before starting upload rs_data = media_file.remote_services_data || {} @@ -145,24 +174,29 @@ def process_media_upload(media_file, reference_id) Rails.logger.info "[CraMediaCloud::CreateMediaJob] Starting upload for video #{media_file.id} with reference_id: #{reference_id}" begin - Folio::CraMediaCloud::Encoder.new.upload_file( + processing_phases = media_file.try(:encoder_processing_phases) + encoder = Folio::CraMediaCloud::Encoder.new + + encoder.upload_file( media_file, - profile_group: media_file.try(:encoder_profile_group), + profile_group: profile_group, + processing_phases: processing_phases, reference_id: reference_id ) - # Update to processing state after successful upload media_file.remote_services_data.merge!({ "reference_id" => reference_id, "processing_state" => "full_media_processing", - "processing_step_started_at" => Time.current.iso8601 + "processing_step_started_at" => Time.current.iso8601, + "processing_phases" => processing_phases.to_i > 1 ? processing_phases : nil, }) + # Clear any old remote_id since we're starting fresh media_file.remote_services_data.delete("remote_id") media_file.save! # Pass encoding_generation so CheckProgressJob can detect stale jobs - Folio::CraMediaCloud::CheckProgressJob.set(wait: 30.seconds).perform_later( + Folio::CraMediaCloud::CheckProgressJob.set(wait: 10.seconds).perform_later( media_file, encoding_generation: current_generation ) @@ -185,4 +219,24 @@ def process_media_upload(media_file, reference_id) raise e end end + + def mark_source_file_missing!(media_file) + rs_data = media_file.remote_services_data || {} + rs_data.merge!({ + "service" => "cra_media_cloud", + "processing_state" => "source_file_missing", + "error_message" => "Source file not found on S3 (file_uid: #{media_file.file_uid})", + "processing_step_started_at" => Time.current.iso8601, + }) + media_file.remote_services_data = rs_data + + begin + media_file.processing_failed! + rescue => e + Rails.logger.warn "[CraMediaCloud::CreateMediaJob] AASM transition failed for video #{media_file.id} (#{e.message}), forcing state" + media_file.update_columns(aasm_state: "processing_failed", remote_services_data: rs_data, updated_at: Time.current) + end + + broadcast_file_update(media_file) + end end diff --git a/app/jobs/folio/cra_media_cloud/delete_media_job.rb b/app/jobs/folio/cra_media_cloud/delete_media_job.rb index e30ea28814..70d6f369a0 100644 --- a/app/jobs/folio/cra_media_cloud/delete_media_job.rb +++ b/app/jobs/folio/cra_media_cloud/delete_media_job.rb @@ -4,26 +4,39 @@ class Folio::CraMediaCloud::DeleteMediaJob < Folio::ApplicationJob queue_as :slow def perform(id, reference_id: nil) - if id.present? - api.delete_job_content(id) - elsif reference_id.present? - # Get all jobs with this reference_id + if id.blank? && reference_id.blank? + Rails.logger.warn "[CraMediaCloud::DeleteMediaJob] Skipping — no remote_id or reference_id (file was never processed by CRA)" + return + end + + if reference_id.present? + # Prefer reference_id — deletes all phase jobs (multi-phase encoding creates multiple jobs per ref) jobs = api.get_jobs(ref_id: reference_id) if jobs.any? - # Delete content for all jobs with this reference_id jobs.each do |job| Rails.logger.info "[CraMediaCloud::DeleteMediaJob] Deleting job content for job ID #{job['id']} (ref: #{reference_id})" - api.delete_job_content(job["id"]) + safe_delete_job_content(job["id"]) end Rails.logger.info "[CraMediaCloud::DeleteMediaJob] Deleted content for #{jobs.size} job(s) with reference_id #{reference_id}" end - else - raise "Missing remote_key and remote_reference_id" + elsif id.present? + safe_delete_job_content(id) end end private + def safe_delete_job_content(job_id) + api.delete_job_content(job_id) + rescue RuntimeError => e + # CRA returns 400 when content was already deleted — that's fine, goal achieved + if e.message.include?("status 400") || e.message.include?("status 404") + Rails.logger.info "[CraMediaCloud::DeleteMediaJob] Job #{job_id} content already removed (#{e.message})" + else + raise + end + end + def api @api ||= Folio::CraMediaCloud::Api.new end diff --git a/app/jobs/folio/cra_media_cloud/monitor_processing_job.rb b/app/jobs/folio/cra_media_cloud/monitor_processing_job.rb index ffa1f55a0f..a3a9e9c8db 100644 --- a/app/jobs/folio/cra_media_cloud/monitor_processing_job.rb +++ b/app/jobs/folio/cra_media_cloud/monitor_processing_job.rb @@ -8,6 +8,9 @@ def perform return if another_monitor_job_running? begin + # Handle videos stuck in unprocessed state with existing files + handle_stuck_unprocessed_videos + # Handle videos with orphaned or inconsistent states first handle_orphaned_videos @@ -17,6 +20,9 @@ def perform # Handle videos with failed uploads that need retry handle_failed_uploads_needing_retry + # Safety net: retry failed videos whose retry job was lost + handle_failed_videos_awaiting_retry + # Handle videos that are already processing and need progress checking handle_videos_needing_progress_check ensure @@ -26,22 +32,53 @@ def perform end private + def handle_stuck_unprocessed_videos + stuck = Folio::File::Video + .where(aasm_state: :unprocessed) + .where("file_uid IS NOT NULL AND file_uid != ''") + .where("created_at < ?", 5.minutes.ago) + + return if stuck.empty? + + Rails.logger.info("MonitorProcessingJob: Found #{stuck.count} stuck unprocessed video(s) with files") + + stuck.each do |video| + Rails.logger.info("MonitorProcessingJob: Triggering process! for stuck video ##{video.id} (created #{video.created_at})") + begin + video.process! + rescue => e + Rails.logger.error("MonitorProcessingJob: Failed to process stuck video ##{video.id}: #{e.message}") + end + end + end + def find_processing_videos Folio::File::Video .where(aasm_state: :processing) .where("remote_services_data ->> 'service' = ?", "cra_media_cloud") - .where("remote_services_data ->> 'processing_state' IN (?)", ["full_media_processing", "upload_completed"]) + .where("remote_services_data ->> 'processing_state' IN (?)", + %w[full_media_processing upload_completed]) end def find_videos_needing_upload - # Find videos that need initial upload (no remote references) + # Find videos that need initial upload (no remote references). + # Freshly enqueued videos (< 10 min) are excluded — they already have a + # CreateMediaJob queued. But enqueued videos older than 10 min are included + # because the job was likely lost (e.g., pod OOMKill). The Ruby handler + # checks for running/scheduled jobs before re-scheduling. Folio::File::Video .where(aasm_state: :processing) .where( "(remote_services_data ->> 'service' IS NULL OR remote_services_data ->> 'service' = ?) AND " \ "(remote_services_data ->> 'remote_id' IS NULL) AND " \ - "(remote_services_data ->> 'reference_id' IS NULL)", - "cra_media_cloud" + "(remote_services_data ->> 'reference_id' IS NULL) AND " \ + "(remote_services_data ->> 'processing_state' IS DISTINCT FROM ? OR " \ + " (remote_services_data ->> 'processing_state' = ? AND " \ + " (remote_services_data ->> 'processing_step_started_at')::timestamptz < ?))", + "cra_media_cloud", + "enqueued", + "enqueued", + 10.minutes.ago ) end @@ -50,8 +87,8 @@ def find_failed_uploads_needing_retry Folio::File::Video .where(aasm_state: :processing) .where("remote_services_data ->> 'service' = ?", "cra_media_cloud") - .where("remote_services_data ->> 'processing_state' = ?", "upload_failed") - .where("(remote_services_data ->> 'processing_step_started_at')::timestamp < ?", 5.minutes.ago) + .where("remote_services_data ->> 'processing_state' IN (?)", %w[upload_failed encoding_failed]) + .where("(remote_services_data ->> 'processing_step_started_at')::timestamptz < ?", 5.minutes.ago) end def handle_videos_needing_upload @@ -72,13 +109,13 @@ def handle_videos_needing_upload next end - # Check if video is stuck in creating state + # Check if video is stuck in creating/enqueued state rs_data = video.remote_services_data || {} Rails.logger.info("MonitorProcessingJob: Video ##{video.id} remote_services_data: #{rs_data}") - if rs_data["processing_state"] == "creating_media_job" + if rs_data["processing_state"].in?(%w[creating_media_job enqueued]) started_at = rs_data["processing_step_started_at"] - Rails.logger.info("MonitorProcessingJob: Video ##{video.id} is in creating_media_job state, started_at: #{started_at}") + Rails.logger.info("MonitorProcessingJob: Video ##{video.id} is in #{rs_data['processing_state']} state, started_at: #{started_at}") # Check if upload is genuinely stuck vs. just taking a long time if started_at && !upload_is_stuck?(video, Time.parse(started_at)) @@ -122,6 +159,31 @@ def handle_failed_uploads_needing_retry end end + def handle_failed_videos_awaiting_retry + # Safety net: find videos that were scheduled for retry but the retry job was lost + videos = Folio::File::Video + .where(aasm_state: :processing_failed) + .where("remote_services_data ->> 'service' = ?", "cra_media_cloud") + .where("COALESCE((remote_services_data ->> 'retry_count')::int, 0) < 2") + .where("(remote_services_data ->> 'retry_scheduled_at')::timestamptz < ?", 5.minutes.ago) + + return if videos.empty? + + Rails.logger.info("MonitorProcessingJob: Found #{videos.count} failed videos awaiting retry (safety net)") + + scheduled_create_jobs = find_scheduled_create_media_job_ids + + videos.each do |video| + if scheduled_create_jobs.include?(video.id) + Rails.logger.debug("MonitorProcessingJob: Failed video ##{video.id} already has scheduled CreateMediaJob") + next + end + + Rails.logger.info("MonitorProcessingJob: Re-scheduling retry for failed video ##{video.id}") + Folio::CraMediaCloud::CreateMediaJob.perform_later(video) + end + end + def handle_videos_needing_progress_check processing_videos = find_processing_videos @@ -155,7 +217,7 @@ def handle_videos_needing_progress_check end Rails.logger.debug("MonitorProcessingJob: Scheduling CheckProgressJob for video ##{video.id}") - Folio::CraMediaCloud::CheckProgressJob.perform_later(video) + Folio::CraMediaCloud::CheckProgressJob.perform_later(video, encoding_generation: video.remote_services_data&.dig("encoding_generation")) end end @@ -180,17 +242,19 @@ def handle_orphaned_videos end def find_orphaned_videos - # Find videos that are processing but might have lost track of their remote jobs + # Find videos that are processing but might have lost track of their remote jobs. + # Both conditions require a time threshold to avoid racing with just-uploaded videos + # (CRA needs time to ingest the manifest before a remote_id appears). Folio::File::Video .where(aasm_state: :processing) .where("remote_services_data ->> 'service' = ?", "cra_media_cloud") .where( - # Videos with reference_id but no remote_id, or videos that have been - # in creating_media_job state for a very long time - "(remote_services_data ->> 'reference_id' IS NOT NULL AND remote_services_data ->> 'remote_id' IS NULL) OR " \ + "(remote_services_data ->> 'reference_id' IS NOT NULL AND " \ + "remote_services_data ->> 'remote_id' IS NULL AND " \ + "(remote_services_data ->> 'processing_step_started_at')::timestamptz < ?) OR " \ "(remote_services_data ->> 'processing_state' = 'creating_media_job' AND " \ - "(remote_services_data ->> 'processing_step_started_at')::timestamp < ?)", - 3.hours.ago + "(remote_services_data ->> 'processing_step_started_at')::timestamptz < ?)", + 10.minutes.ago, 30.minutes.ago ) end @@ -208,7 +272,6 @@ def reconcile_video_state(video) if jobs.empty? Rails.logger.warn("MonitorProcessingJob: No remote jobs found for video ##{video.id} reference_id: #{reference_id}") - # Video has reference_id but no remote jobs - needs re-upload rs_data.delete("reference_id") rs_data.delete("remote_id") rs_data.delete("processing_state") @@ -217,42 +280,61 @@ def reconcile_video_state(video) return end - latest_job = jobs.max_by { |j| Time.parse(j["lastModified"]) } - current_remote_id = rs_data["remote_id"] + reconcile_with_remote_jobs(video, rs_data, jobs) - case latest_job["status"] - when "DONE" - if current_remote_id != latest_job["id"] - Rails.logger.info("MonitorProcessingJob: Updating video ##{video.id} to point to successful job #{latest_job['id']}") - rs_data["remote_id"] = latest_job["id"] - rs_data["processing_state"] = "full_media_processing" - video.update_column(:remote_services_data, rs_data) + rescue => e + Rails.logger.error("MonitorProcessingJob: Error reconciling video ##{video.id}: #{e.message}") + end + end - # Schedule progress check to update final state - Folio::CraMediaCloud::CheckProgressJob.perform_later(video) - end - when "PROCESSING", "CREATED" - if current_remote_id != latest_job["id"] - Rails.logger.info("MonitorProcessingJob: Updating video ##{video.id} to point to processing job #{latest_job['id']}") - rs_data["remote_id"] = latest_job["id"] - rs_data["processing_state"] = "full_media_processing" - video.update_column(:remote_services_data, rs_data) - end + # NOTE: update_column calls below are non-atomic read-modify-write on + # remote_services_data. Safe because MonitorProcessingJob uses a Redis lock + # (another_monitor_job_running?) to prevent concurrent instances. + def reconcile_with_remote_jobs(video, rs_data, jobs) + # Filter out REMOVED jobs before resolution: for multi-phase encodings, a + # REMOVED phase-1 job may have a later lastModified than an active phase-2 + # job, causing JobResolver to select it and return :not_found — silently + # skipping the active job. If all remaining jobs are REMOVED, schedule + # CheckProgressJob which handles the finalize_from_completed_phases! path. + active_jobs = jobs.reject { |j| j["status"] == "REMOVED" } + + if active_jobs.empty? + Rails.logger.info("MonitorProcessingJob: All CRA jobs REMOVED for video ##{video.id} — scheduling CheckProgressJob to finalize") + Folio::CraMediaCloud::CheckProgressJob.perform_later(video, encoding_generation: rs_data["encoding_generation"]) + return + end - # Schedule progress check - Folio::CraMediaCloud::CheckProgressJob.perform_later(video) - when "FAILED", "ERROR" - Rails.logger.warn("MonitorProcessingJob: Latest job for video ##{video.id} failed, marking for retry") - rs_data.merge!({ - "processing_state" => "upload_failed", - "error_message" => "Remote job failed: #{latest_job['status']}", - "processing_step_started_at" => Time.current.iso8601 - }) + result = Folio::CraMediaCloud::JobResolver.resolve(active_jobs) + latest_job = result[:job] + return unless latest_job + + current_remote_id = rs_data["remote_id"] + + case result[:status] + when :done + if current_remote_id != latest_job["id"] + Rails.logger.info("MonitorProcessingJob: Updating video ##{video.id} to point to successful job #{latest_job['id']}") + rs_data["remote_id"] = latest_job["id"] + rs_data["processing_state"] = "full_media_processing" video.update_column(:remote_services_data, rs_data) end - - rescue => e - Rails.logger.error("MonitorProcessingJob: Error reconciling video ##{video.id}: #{e.message}") + Folio::CraMediaCloud::CheckProgressJob.perform_later(video, encoding_generation: video.remote_services_data&.dig("encoding_generation")) + when :processing + if current_remote_id != latest_job["id"] + Rails.logger.info("MonitorProcessingJob: Updating video ##{video.id} to point to processing job #{latest_job['id']}") + rs_data["remote_id"] = latest_job["id"] + rs_data["processing_state"] = "full_media_processing" + video.update_column(:remote_services_data, rs_data) + end + Folio::CraMediaCloud::CheckProgressJob.perform_later(video, encoding_generation: video.remote_services_data&.dig("encoding_generation")) + when :failed + Rails.logger.warn("MonitorProcessingJob: Latest job for video ##{video.id} failed, marking for retry") + rs_data.merge!({ + "processing_state" => "encoding_failed", + "error_message" => "Remote job failed: #{latest_job['status']}", + "processing_step_started_at" => Time.current.iso8601 + }) + video.update_column(:remote_services_data, rs_data) end end @@ -334,42 +416,44 @@ def extract_video_id_from_job_data(job_data) end def processing_too_long?(video) - # Consider a video stuck if it's been processing for more than 2 hours started_at = video.remote_services_data["processing_step_started_at"] return false unless started_at + # Multi-phase encoding can legitimately take longer — scale timeouts by phase count + phases = video.remote_services_data["processing_phases"].to_i + phase_multiplier = [phases, 1].max + elapsed_hours = (Time.current - Time.parse(started_at)) / 1.hour + hard_timeout = 6 * phase_multiplier + warn_timeout = 2 * phase_multiplier - # Mark as failed after very long processing (6+ hours) - if elapsed_hours > 6 - Rails.logger.error("MonitorProcessingJob: Marking video ##{video.id} as failed after #{elapsed_hours.round(1)} hours") + # Mark as failed after very long processing + if elapsed_hours > hard_timeout + Rails.logger.error("MonitorProcessingJob: Marking video ##{video.id} as failed after #{elapsed_hours.round(1)} hours (timeout: #{hard_timeout}h)") - # Persist failure state even if validations fail begin video.processing_failed! + broadcast_file_update(video) rescue => e Rails.logger.warn("MonitorProcessingJob: AASM transition failed (#{e.message}), forcing state via update_columns") - # Use update_columns to update in DB and then reload to sync memory video.update_columns(aasm_state: "processing_failed", updated_at: Time.current) video.reload + broadcast_file_update(video) end return true - elsif elapsed_hours > 2 - Rails.logger.warn("MonitorProcessingJob: Video ##{video.id} has been processing for #{elapsed_hours.round(1)} hours") + elsif elapsed_hours > warn_timeout + Rails.logger.warn("MonitorProcessingJob: Video ##{video.id} has been processing for #{elapsed_hours.round(1)} hours (warning: #{warn_timeout}h)") end - # Return whether it's been processing too long (>2 hours) but without marking as failed - elapsed_hours > 2 + # Return whether it's been processing too long but without marking as failed + elapsed_hours > warn_timeout rescue => e Rails.logger.error("MonitorProcessingJob: Error checking processing time for video ##{video.id}: #{e.message}") false end def upload_is_stuck?(video, upload_started_at) - rs_data = video.remote_services_data || {} - rs_data["upload_progress"] - # Calculate appropriate timeout based on file size file_size = video.file_size || 0 base_timeout = 5.minutes # Base timeout for small files diff --git a/app/jobs/folio/file/get_video_metadata_job.rb b/app/jobs/folio/file/get_video_metadata_job.rb new file mode 100644 index 0000000000..ce348b51c6 --- /dev/null +++ b/app/jobs/folio/file/get_video_metadata_job.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +class Folio::File::GetVideoMetadataJob < Folio::ApplicationJob + include Folio::Shell + + queue_as :default + + # Returns { duration: Integer|nil, width: Integer|nil, height: Integer|nil } + # Accepts local file path OR HTTP(S) URL (presigned S3 URL). + # ffprobe streams only container headers from URLs — does NOT download the whole file. + def perform(file_path_or_url) + output = shell("ffprobe", + "-select_streams", "v:0", + "-show_entries", "stream=duration,width,height", + "-show_entries", "format=duration", + "-of", "json", + "-v", "fatal", + file_path_or_url) + + data = JSON.parse(output) + stream = data.dig("streams", 0) || {} + format_data = data.dig("format") || {} + + duration_raw = stream["duration"] || format_data["duration"] + + { + duration: duration_raw ? duration_raw.to_f.ceil : nil, + width: stream["width"]&.to_i, + height: stream["height"]&.to_i, + } + rescue => e + Rails.logger.error("[GetVideoMetadataJob] ffprobe failed for #{file_path_or_url.to_s.truncate(100)}: #{e.message}") + { duration: nil, width: nil, height: nil } + end +end diff --git a/app/jobs/folio/files/after_save_job.rb b/app/jobs/folio/files/after_save_job.rb index 105c9cc4e0..e5c76f14b0 100644 --- a/app/jobs/folio/files/after_save_job.rb +++ b/app/jobs/folio/files/after_save_job.rb @@ -31,7 +31,6 @@ def perform(file, changed_attrs = {}) end private - def sync_metadata_to_placements(file, placements, changed_attrs) if changed_attrs.key?("description") old_desc, new_desc = changed_attrs["description"] diff --git a/app/jobs/folio/generate_missing_thumb_webp_job.rb b/app/jobs/folio/generate_missing_thumb_webp_job.rb index e440a92754..7006874bfd 100644 --- a/app/jobs/folio/generate_missing_thumb_webp_job.rb +++ b/app/jobs/folio/generate_missing_thumb_webp_job.rb @@ -27,7 +27,7 @@ def perform(image) if changed image.thumbnail_sizes = thumbnail_sizes - image.dont_run_after_save_jobs = true + image.try(:dont_run_after_save_jobs=, true) image.save!(validate: false) end end diff --git a/app/jobs/folio/generate_thumbnail_job.rb b/app/jobs/folio/generate_thumbnail_job.rb index d5c006b518..0b7d4efc24 100644 --- a/app/jobs/folio/generate_thumbnail_job.rb +++ b/app/jobs/folio/generate_thumbnail_job.rb @@ -1,5 +1,8 @@ # frozen_string_literal: true +require "open3" +require "open-uri" + class Folio::GenerateThumbnailJob < Folio::ApplicationJob queue_as :slow @@ -24,7 +27,7 @@ def perform(image, size, quality, x: nil, y: nil, force: false) # need to reload here because of parallel jobs image.reload.with_lock do thumbnail_sizes = image.thumbnail_sizes || {} - image.dont_run_after_save_jobs = true + image.try(:dont_run_after_save_jobs=, true) image.thumbnail_sizes = thumbnail_sizes.merge(size => new_thumb) image.save!(validate: false) end @@ -253,26 +256,126 @@ def make_thumb(image, raw_size, quality, x: nil, y: nil) end def image_file(image) + if image.class.human_type == "video" + return video_screenshot(image) + end + if Rails.env.development? && ENV["DRAGONFLY_PRODUCTION_S3_URL_BASE"] && image.respond_to?(:development_safe_file) thumbnail = image.development_safe_file(logger) else thumbnail = image.file end - if image.class.human_type == "video" - thumbnail = thumbnail.ffmpeg_screenshot_to_jpg(image.screenshot_time_in_ffmpeg_format) - thumbnail.name = Pathname.new(image.file_name).sub_ext(".jpg") - thumbnail.meta["mime_type"] = "image/jpeg" - else - thumbnail.name = image.file_name - thumbnail.meta["mime_type"] = image.file_mime_type + thumbnail.name = image.file_name + thumbnail.meta["mime_type"] = image.file_mime_type + thumbnail + rescue Dragonfly::Job::Fetch::NotFound + fallback_image(image) + end + + # Get a screenshot frame for video thumbnail generation. + # Priority: 1) Provider-supplied poster image (no decoding needed) + # 2) ffmpeg frame extraction (only for ≤4K — safe memory) + # 3) fallback placeholder image + def video_screenshot(image) + # Prefer a provider-supplied poster image when available — avoids decoding + # the source video entirely. Critical for high-res (4K/8K) HEVC content + # where decoding a single frame can require 800+ MB for reference frame buffers. + if (cover_url = image.video_poster_url).present? + thumbnail = download_remote_image(image, cover_url) + return thumbnail if thumbnail + end + + input = image.file_url_or_path + return fallback_image(image) if input.blank? + + # Check resolution via ffprobe before attempting decode. Decoding + # video above 4K can require 800+ MB for codec reference frame + # buffers (DPB), which OOMKills pods with typical memory limits. + # For >4K videos without a poster image, use fallback. The provider + # may supply one asynchronously after encoding completes. + if video_resolution_too_high?(input) + Rails.logger.info("GenerateThumbnailJob: Skipping ffmpeg for high-res video ##{image.id}, using fallback") + return fallback_image(image) + end + + screenshot_time = image.screenshot_time_in_ffmpeg_format + + tmpfile = Tempfile.new(["video_thumb", ".jpg"]) + begin + # Place -ss before -i for fast HTTP range-based seeking (avoids + # downloading entire file). Use -threads 1 to limit memory usage + # for high-resolution video decoding. + success = system( + "ffmpeg", "-y", "-ss", screenshot_time, + "-i", input, + "-frames:v", "1", "-q:v", "2", "-threads", "1", + tmpfile.path, + out: File::NULL, err: File::NULL + ) + + unless success && File.size?(tmpfile.path) + Rails.logger.warn("GenerateThumbnailJob: ffmpeg screenshot failed for file ##{image.id}") + return fallback_image(image) + end + + thumbnail = Dragonfly.app.create(File.binread(tmpfile.path)) + ensure + tmpfile.close! end + thumbnail.name = Pathname.new(image.file_name).sub_ext(".jpg").to_s + thumbnail.meta["mime_type"] = "image/jpeg" thumbnail - rescue Dragonfly::Job::Fetch::NotFound - missing_image_path = Folio::Engine.root.join("data/images/missing-image.png") - thumbnail = Dragonfly.app.create(File.binread(missing_image_path)) - thumbnail.name = image.file_name || "missing-image.png" + rescue => e + Rails.logger.error("GenerateThumbnailJob: Video screenshot error for file ##{image.id}: #{e.message}") + fallback_image(image) + end + + MAX_FFMPEG_DECODE_HEIGHT = 2160 # 4K + + def video_resolution_too_high?(input) + stdout, _stderr, status = Open3.capture3( + "ffprobe", "-v", "error", + "-select_streams", "v:0", + "-show_entries", "stream=height", + "-of", "csv=p=0", + input + ) + return false unless status.success? + + height = stdout.strip.to_i + return false if height == 0 + + height > MAX_FFMPEG_DECODE_HEIGHT + rescue => e + # Fail safe: if ffprobe cannot determine resolution, skip ffmpeg to avoid + # OOMKilling the pod on a potentially high-resolution video. + Rails.logger.warn("GenerateThumbnailJob: ffprobe resolution check failed, skipping ffmpeg: #{e.message}") + true + end + + def download_remote_image(image, url) + data = URI.parse(url).open(read_timeout: 15).read + thumbnail = Dragonfly.app.create(data) + thumbnail.name = Pathname.new(image.file_name).sub_ext(".jpg").to_s + thumbnail.meta["mime_type"] = "image/jpeg" + thumbnail + rescue => e + Rails.logger.warn("GenerateThumbnailJob: remote poster download failed for file ##{image.id}: #{e.message}") + nil + end + + def fallback_image(image) + filename = if image.class.human_type == "video" + "missing-video.png" + else + "missing-image.png" + end + + path = Folio::Engine.root.join("data/images/#{filename}") + thumbnail = Dragonfly.app.create(File.binread(path)) + thumbnail.name = filename thumbnail.meta["mime_type"] = "image/png" thumbnail.meta["fallback_image"] = true thumbnail diff --git a/app/jobs/folio/pregenerate_thumbnails/check_job.rb b/app/jobs/folio/pregenerate_thumbnails/check_job.rb index a738e10309..cd10df9c03 100644 --- a/app/jobs/folio/pregenerate_thumbnails/check_job.rb +++ b/app/jobs/folio/pregenerate_thumbnails/check_job.rb @@ -9,7 +9,7 @@ class Folio::PregenerateThumbnails::CheckJob < Folio::ApplicationJob def perform(attachmentable) if attachmentable && attachmentable.respond_to?(:file_placements) attachmentable.file_placements.find_each do |file_placement| - file_placement.dont_run_after_save_jobs = true + file_placement.try(:dont_run_after_save_jobs=, true) file_placement.try(:pregenerate_thumbnails) end end diff --git a/app/jobs/folio/regenerate_thumb_webp_job.rb b/app/jobs/folio/regenerate_thumb_webp_job.rb index 117b7e1ab0..5ff41fc520 100644 --- a/app/jobs/folio/regenerate_thumb_webp_job.rb +++ b/app/jobs/folio/regenerate_thumb_webp_job.rb @@ -27,7 +27,7 @@ def perform(image) if changed image.thumbnail_sizes = thumbnail_sizes - image.dont_run_after_save_jobs = true + image.try(:dont_run_after_save_jobs=, true) image.save!(validate: false) end end diff --git a/app/jobs/folio/s3/create_file_job.rb b/app/jobs/folio/s3/create_file_job.rb index 2983a50953..702deda7a7 100644 --- a/app/jobs/folio/s3/create_file_job.rb +++ b/app/jobs/folio/s3/create_file_job.rb @@ -8,6 +8,12 @@ def perform_for_valid(s3_path:, klass:, existing_id:, web_session_id:, user_id:, @file = prepare_file_model(klass, id: existing_id, web_session_id:, user_id:, attributes:) replacing_file = @file.persisted? + # For video files on S3: use server-side copy instead of download+reupload + if klass <= Folio::File::Video && !use_local_file_system? + perform_with_s3_copy(s3_path:, klass:, replacing_file:) + return + end + Dir.mktmpdir("folio-file-s3") do |tmpdir| @file.file = downloaded_file(s3_path, tmpdir) @@ -45,6 +51,46 @@ def perform_for_valid(s3_path:, klass:, existing_id:, web_session_id:, user_id:, end private + def perform_with_s3_copy(s3_path:, klass:, replacing_file:) + file_name = s3_path.split("/").pop + sanitized_name = file_name.split(".").map(&:parameterize).join(".") + + # Generate Dragonfly-compatible UID and S3 destination key + uid = generate_dragonfly_uid(sanitized_name) + dest_key = [dragonfly_s3_root_path, uid].compact_blank.join("/") + source_key = test_aware_s3_path(s3_path) + + # Server-side S3 copy (instant, no data transfer through pod) + s3_copy_object(source_key: source_key, dest_key: dest_key) + + # Get file metadata from S3 HEAD (no download) + head = s3_head_object(key: source_key) + + # Set file attributes directly — bypass Dragonfly download+upload + @file.file_uid = uid + @file.file_name = sanitized_name + @file.file_size = head.content_length + @file.file_mime_type = head.content_type.presence || Marcel::MimeType.for(name: sanitized_name) + + if save_file_with_slug_retry + if replacing_file + broadcast_replace_success(file: @file, s3_path:, file_type: klass.to_s) + else + broadcast_success(file: @file, s3_path:, file_type: klass.to_s) + end + else + # Rollback: delete the copied file + test_aware_s3_delete(s3_path: uid) + if replacing_file + broadcast_replace_error(file: @file, s3_path:, file_type: klass.to_s) + else + broadcast_error(file: @file, s3_path:, file_type: klass.to_s) + end + end + ensure + test_aware_s3_delete(s3_path:) + end + def downloaded_file(s3_path, tmpdir) tmp_file_path = "#{tmpdir}/#{s3_path.split("/").pop}" diff --git a/app/lib/folio/cra_media_cloud/encoder.rb b/app/lib/folio/cra_media_cloud/encoder.rb index edf61d6b28..b7126591d9 100644 --- a/app/lib/folio/cra_media_cloud/encoder.rb +++ b/app/lib/folio/cra_media_cloud/encoder.rb @@ -5,6 +5,8 @@ module Folio module CraMediaCloud class Encoder + include Folio::S3::Client + DEFAULT_PROFILE_GROUP = "VoD" # SFTP connection configuration @@ -13,191 +15,62 @@ class Encoder SFTP_MAX_RETRIES = 3 SFTP_RETRY_DELAY = 5.seconds - # SFTP upload configuration - CHUNK_SIZE = 1.megabyte # Standard chunk size for file operations - - def upload_file(file, priority: "regular", profile_group: nil, reference_id: nil, media_file: nil) + def upload_file(file, priority: "regular", profile_group: nil, reference_id: nil, media_file: nil, processing_phases: nil) ref_id = reference_id || [file.id, Time.current.to_i].join("-") - Rails.logger.info("[CraMediaCloud::Encoder] Starting upload for file ID: #{file.id}, ref_id: #{ref_id}") - - # Get metadata without downloading the file - s3_metadata = get_s3_metadata(file) - md5 = extract_etag(s3_metadata).delete_prefix('"').delete_suffix('"') - - xml_manifest = build_ingest_manifest(file, md5:, ref_id:, profile_group:) - - folder_path = "/ingest/#{priority}" - file_path = "#{folder_path}/#{file.file_name}" - xml_manifest_path = "#{folder_path}/#{file.file_name.split(".").first}_manifest.xml" - - # Use plain temp file path to avoid Ruby memory buffering - temp_file_path = ::File.join(Dir.tmpdir, "cra_upload_#{ref_id}_#{Process.pid}_#{Time.current.to_i}.tmp") + Rails.logger.info("[CraMediaCloud::Encoder] Starting manifest upload for file ID: #{file.id}, ref_id: #{ref_id}") - begin - # Download using system tools (no Ruby file handles involved) - download_to_file_path(file, temp_file_path) + # Get S3 metadata for MD5 checksum + s3_metadata = s3_dragonfly_head_object(file.file_uid) + md5 = extract_s3_etag(s3_metadata).delete_prefix('"').delete_suffix('"') - # Verify file size - actual_size = ::File.size(temp_file_path) - if actual_size != file.file_size - Rails.logger.error("[CraMediaCloud::Encoder] Downloaded file size mismatch: got #{actual_size}, expected #{file.file_size}") - raise "Downloaded file size mismatch: got #{actual_size}, expected #{file.file_size}" - end + # Generate presigned URL for CRA to download directly from S3 + presigned_url = generate_presigned_url(file) + Rails.logger.info("[CraMediaCloud::Encoder] Generated presigned S3 URL for CRA (expires in 7 days)") - # Upload to SFTP with robust session management - with_robust_sftp_session do |sftp| - # Use standard upload for better performance - upload_with_retry(sftp, temp_file_path, file_path) - Rails.logger.info("[CraMediaCloud::Encoder] File uploaded to SFTP: #{file_path}") + xml_manifest = build_ingest_manifest(file, md5:, ref_id:, profile_group:, presigned_url:, processing_phases:) - # Upload manifest - upload_with_retry(sftp, StringIO.new(xml_manifest), xml_manifest_path) - Rails.logger.info("[CraMediaCloud::Encoder] Manifest uploaded to SFTP: #{xml_manifest_path}") - end + folder_path = "/ingest/#{priority}" + xml_manifest_path = "#{folder_path}/#{ref_id}_manifest.xml" - rescue => e - Rails.logger.error("[CraMediaCloud::Encoder] Error during upload process: #{e.class}: #{e.message}") - raise - ensure - # Clean up temp file - if ::File.exist?(temp_file_path) - begin - ::File.delete(temp_file_path) - rescue => e - Rails.logger.warn("[CraMediaCloud::Encoder] Could not delete temp file #{temp_file_path}: #{e.message}") - end - end + # Upload only the manifest via SFTP (CRA downloads the video itself) + with_robust_sftp_session do |sftp| + upload_with_retry(sftp, StringIO.new(xml_manifest), xml_manifest_path) + Rails.logger.info("[CraMediaCloud::Encoder] Manifest uploaded to SFTP: #{xml_manifest_path}") end { ref_id:, - file_path:, xml_manifest_path:, + presigned_url: presigned_url.present?, } end private - def get_s3_metadata(file) + def generate_presigned_url(file) s3_datastore = Dragonfly.app.datastore - s3_object_key = [s3_datastore.root_path, file.file_uid].join("/") - Rails.logger.info("[CraMediaCloud::Encoder] Fetching S3 metadata for key: #{s3_object_key}") - s3_datastore.storage.head_object(ENV["S3_BUCKET_NAME"], s3_object_key) - end - - def extract_etag(response) - # Handle different response types (AWS SDK, Excon, etc.) - if response.respond_to?(:etag) - response.etag - elsif response.respond_to?(:headers) - response.headers["ETag"] || response.headers["etag"] || response.headers["Etag"] - else - raise "Cannot extract ETag from response type: #{response.class}" - end - end - - def download_to_file_path(file, file_path) - s3_datastore = Dragonfly.app.datastore - s3_object_key = [s3_datastore.root_path, file.file_uid].join("/") - - download_success = false - - # Try AWS CLI first (if available) - if system("which aws > /dev/null 2>&1") - s3_url = "s3://#{ENV['S3_BUCKET_NAME']}/#{s3_object_key}" - aws_command = "aws s3 cp #{s3_url} #{file_path} --no-progress" - - if system(aws_command) - download_success = true - end - end - - # Fallback to curl with S3 presigned URL - unless download_success - begin - s3_client = s3_datastore.storage - presigned_url = s3_client.presigned_url( - :get_object, - bucket: ENV["S3_BUCKET_NAME"], - key: s3_object_key, - expires_in: 3600 - ) - - curl_command = [ - "curl", "-L", "-s", "-S", - "-o", file_path, - "--max-time", "1800", - "--connect-timeout", "30", - presigned_url - ] - - if system(*curl_command) - download_success = true - end - - rescue => e - Rails.logger.error("[CraMediaCloud::Encoder] Error generating presigned URL: #{e.message}") - end - end - - # Final fallback to Ruby download - unless download_success - Rails.logger.warn("[CraMediaCloud::Encoder] System download failed, using Ruby fallback") - - downloaded_bytes = 0 - - ::File.open(file_path, "wb") do |output_file| - loop do - range_start = downloaded_bytes - range_end = [downloaded_bytes + CHUNK_SIZE - 1, file.file_size - 1].min - - break if range_start >= file.file_size - - begin - s3_response = s3_datastore.storage.get_object( - ENV["S3_BUCKET_NAME"], - s3_object_key, - range: "bytes=#{range_start}-#{range_end}" - ) - - chunk_data = s3_response.body - output_file.write(chunk_data) - output_file.flush - - downloaded_bytes += chunk_data.length - - # Clear references - nil - nil - - rescue => e - Rails.logger.error("[CraMediaCloud::Encoder] Error downloading chunk #{range_start}-#{range_end}: #{e.message}") - raise "Failed to download chunk from S3: #{e.message}" - end - end - end - - download_success = true - end - - unless download_success - raise "All download methods failed" - end - - actual_size = ::File.size(file_path) - if actual_size != file.file_size - raise "Downloaded size mismatch: got #{actual_size}, expected #{file.file_size}" - end + s3_object_key = [s3_datastore.root_path, file.file_uid].compact_blank.join("/") + s3_presigner.presigned_url( + :get_object, + bucket: s3_bucket, + key: s3_object_key, + expires_in: 7.days.to_i # 604800 seconds + ) end - def build_ingest_manifest(file, md5:, ref_id:, profile_group:) + def build_ingest_manifest(file, md5:, ref_id:, profile_group:, presigned_url: nil, processing_phases: nil) xml = Builder::XmlMarkup.new; nil xml.instruct!(:xml, version: "1.0", encoding: "utf-8") - xml.vod_encoder_job do - xml.input(type: "VIDEO", - file: file.file_name, - size: file.file_size.to_s, - md5: md5) do + root_attrs = processing_phases.to_i > 1 ? { processingPhases: processing_phases } : {} + xml.vod_encoder_job(root_attrs) do + input_attrs = { type: "VIDEO", size: file.file_size.to_s, md5: md5 } + if presigned_url.present? + input_attrs[:src] = presigned_url + else + input_attrs[:file] = file.file_name + end + + xml.input(input_attrs) do xml.audioTrack(language: "cze", channels: "auto") end xml.profileGroup(profile_group || DEFAULT_PROFILE_GROUP) diff --git a/app/lib/folio/cra_media_cloud/job_resolver.rb b/app/lib/folio/cra_media_cloud/job_resolver.rb new file mode 100644 index 0000000000..5727fac7c6 --- /dev/null +++ b/app/lib/folio/cra_media_cloud/job_resolver.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Folio + module CraMediaCloud + class JobResolver + STATUS_MAP = { + "WAITING" => :processing, + "PROCESSING" => :processing, + "CREATED" => :processing, + "VALIDATING" => :processing, + "DONE" => :done, + "FAILED" => :failed, + "ERROR" => :failed, + "REMOVED" => :not_found, + }.freeze + + def self.resolve(jobs) + return { status: :not_found, job: nil } if jobs.empty? + + job = latest_job(jobs) + status = STATUS_MAP[job["status"]] || :not_found + { status:, job: } + end + + def self.latest_job(jobs) + return nil if jobs.empty? + jobs.max_by { |j| Time.parse(j["lastModified"]) } + end + + private_class_method :latest_job + end + end +end diff --git a/app/lib/folio/s3/client.rb b/app/lib/folio/s3/client.rb index b6fd900c03..698278579f 100644 --- a/app/lib/folio/s3/client.rb +++ b/app/lib/folio/s3/client.rb @@ -94,6 +94,48 @@ def test_aware_s3_upload(s3_path:, file:, acl: "private") end end + def s3_copy_object(source_key:, dest_key:) + s3_client.copy_object( + bucket: s3_bucket, + copy_source: "#{s3_bucket}/#{source_key}", + key: dest_key + ) + end + + def s3_head_object(key:) + s3_client.head_object(bucket: s3_bucket, key: key) + end + + def generate_dragonfly_uid(file_name) + if Dragonfly.app.datastore.respond_to?(:generate_uid) + Dragonfly.app.datastore.generate_uid(file_name) + else + "#{Time.now.strftime '%Y/%m/%d/%H/%M/%S'}/#{SecureRandom.uuid}/#{file_name}" + end + end + + def dragonfly_s3_root_path + Dragonfly.app.datastore.root_path + end + + # Fetch S3 HEAD metadata via Dragonfly's Fog storage layer. + # Returns Excon response (use extract_s3_etag to read ETag). + def s3_dragonfly_head_object(file_uid) + s3_object_key = [dragonfly_s3_root_path, file_uid].compact_blank.join("/") + Dragonfly.app.datastore.storage.head_object(s3_bucket, s3_object_key) + end + + # Extract ETag from either Fog/Excon or AWS SDK response. + def extract_s3_etag(response) + if response.respond_to?(:etag) + response.etag + elsif response.respond_to?(:headers) + response.headers["ETag"] || response.headers["etag"] || response.headers["Etag"] + else + raise "Cannot extract ETag from response type: #{response.class}" + end + end + private def use_local_file_system? @use_local_file_system ||= Dragonfly.app.datastore.is_a?(Dragonfly::FileDataStore) diff --git a/app/models/concerns/folio/cra_media_cloud/file_processing.rb b/app/models/concerns/folio/cra_media_cloud/file_processing.rb index 9891ed4451..733b4eb466 100644 --- a/app/models/concerns/folio/cra_media_cloud/file_processing.rb +++ b/app/models/concerns/folio/cra_media_cloud/file_processing.rb @@ -8,6 +8,14 @@ def encoder_profile_group nil # use encoder's default end + def encoder_processing_phases + 1 # default: single phase; override in app for multi-phase + end + + def encoder_phase_name(phase_number) + nil # override in app to return e.g. "SD", "HD" + end + def remote_content_mp4_url_for(profile) path = remote_services_data.dig("content_mp4_paths", profile.to_s) remote_content_url_base + path if path @@ -39,6 +47,10 @@ def remote_cover_url end end + def video_poster_url + remote_cover_url + end + def remote_thumbnails_url if remote_services_data["thumbnails_path"] remote_content_url_base + remote_services_data["thumbnails_path"] @@ -58,6 +70,8 @@ def update_preview_media_length end def destroy_attached_file + return if remote_id.blank? && remote_reference_id.blank? + delete_media_job_class.perform_later(remote_id, reference_id: remote_reference_id) end @@ -77,7 +91,11 @@ def processed_by "cra_media_cloud" end + def check_media_processing(preview: false) + check_media_processing_job_class.perform_later(self, preview:, encoding_generation:) + end + def upload_failed? - processing_state == "upload_failed" + processing_state.in?(%w[upload_failed encoding_failed]) end end diff --git a/app/models/concerns/folio/media_file_processing_base.rb b/app/models/concerns/folio/media_file_processing_base.rb index 90d531e213..bb06b19cdf 100644 --- a/app/models/concerns/folio/media_file_processing_base.rb +++ b/app/models/concerns/folio/media_file_processing_base.rb @@ -19,8 +19,11 @@ module Folio::MediaFileProcessingBase def process_attached_file regenerate_thumbnails if try(:thumbnailable?) - # Set new encoding generation to invalidate any old CheckProgressJobs - self.update(remote_services_data: { + # Set new encoding generation to invalidate any old CheckProgressJobs. + # Use update_columns to bypass validations — remote_services_data is processing + # metadata and must not be blocked by unrelated validation failures (e.g. missing + # file dimensions when ffprobe fails). + update_remote_services_data({ "processing_step_started_at" => Time.current, "encoding_generation" => Time.current.to_i }) @@ -91,8 +94,7 @@ def preview_media_processed? def create_full_media full_media_job_class.perform_later(self) - rsd = remote_services_data || {} - self.update(remote_services_data: rsd.merge!({ "service" => processed_by, "processing_state" => "enqueued", "processing_step_started_at" => Time.current })) + update_remote_services_data("service" => processed_by, "processing_state" => "enqueued", "processing_step_started_at" => Time.current) end def create_preview_media @@ -100,7 +102,7 @@ def create_preview_media preview_media_processed! else preview_media_job_class.perform_later(self) - self.update(remote_services_data: self.remote_services_data.merge!({ "processing_step_started_at" => Time.current })) + update_remote_services_data("processing_step_started_at" => Time.current) end end @@ -130,7 +132,7 @@ def preview_duration_in_seconds if (remote_services_data || {}).dig("preview_interval").present? preview_ends_at_second - preview_starts_at_second else - [file_track_duration_in_seconds, DEFAULT_PREVIEW_DURATION].min + file_track_duration_in_seconds.present? ? [file_track_duration_in_seconds, DEFAULT_PREVIEW_DURATION].min : DEFAULT_PREVIEW_DURATION end end @@ -148,4 +150,21 @@ def preview_duration=(secs) def preview_duration @preview_duration ||= ActiveSupport::Duration.build(preview_duration_in_seconds) end + + private + # Merge new data into remote_services_data and persist directly to DB, + # bypassing model validations and callbacks. This is necessary because + # remote_services_data is processing metadata that must not be blocked + # by unrelated validation failures on the model. + # + # NOTE: Non-atomic read-modify-write — if another process updates + # remote_services_data between read and write, changes will be lost. + # Acceptable at current call sites (process_attached_file, create_full_media) + # where the video is being initially processed and no concurrent job + # is modifying remote_services_data yet. + def update_remote_services_data(new_data) + merged = (remote_services_data || {}).merge(new_data) + update_columns(remote_services_data: merged) + write_attribute(:remote_services_data, merged) + end end diff --git a/app/models/folio/file.rb b/app/models/folio/file.rb index dd44804f11..4b62c91474 100644 --- a/app/models/folio/file.rb +++ b/app/models/folio/file.rb @@ -9,6 +9,7 @@ class Folio::File < Folio::ApplicationRecord include Folio::Taggable include Folio::HasAasmStates include Folio::BelongsToSite + include Folio::S3::Client include Folio::FilesSharedAccrossSites if Rails.application.config.folio_shared_files_between_sites READY_STATE = :ready @@ -196,6 +197,10 @@ class Folio::File < Folio::ApplicationRecord event :reprocess do transitions from: READY_STATE, to: :processing end + + event :retry_processing do + transitions from: :processing_failed, to: :processing + end end def self.correct_site(site) @@ -267,6 +272,17 @@ def regenerate_thumbnails end end + # Returns a path or URL suitable for ffprobe/ffmpeg. + # For S3 storage with stored file: presigned URL (streams headers only, no full download). + # For pending uploads (file= assigned but not yet saved) or local storage: file system path. + def file_url_or_path + if file_uid.present? && !Dragonfly.app.datastore.is_a?(Dragonfly::FileDataStore) + file_presigned_url + else + file&.path.to_s + end + end + def thumbnailable? false end @@ -417,6 +433,15 @@ def update_file_placements_counts! end private + def file_presigned_url(expires_in: 1.hour.to_i) + s3_object_key = [dragonfly_s3_root_path, file_uid].compact_blank.join("/") + s3_presigner.presigned_url(:get_object, + bucket: s3_bucket, + key: s3_object_key, + expires_in: expires_in + ) + end + def slug_candidates %i[slug headline hash_id_for_slug to_label] end @@ -457,16 +482,24 @@ def check_usage_before_destroy end def set_file_track_duration - if %w[audio video].include?(self.class.human_type) - self.file_track_duration = Folio::File::GetFileTrackDurationJob.perform_now(file.path.to_s, self.class.human_type) # in seconds + if self.class.human_type == "video" + # For video: handled together with dimensions in set_video_file_dimensions + nil + elsif self.class.human_type == "audio" + self.file_track_duration = Folio::File::GetFileTrackDurationJob.perform_now(file_url_or_path, "audio") self.preview_track_duration_in_seconds = self.respond_to?(:preview_duration_in_seconds) ? preview_duration_in_seconds : 0 end end def set_video_file_dimensions - if %w[video].include?(self.class.human_type) - self.file_width, self.file_height = Folio::File::GetVideoDimensionsJob.perform_now(file.path.to_s, self.class.human_type) - end + return unless self.class.human_type == "video" + + metadata = Folio::File::GetVideoMetadataJob.perform_now(file_url_or_path) + + self.file_width = metadata[:width] + self.file_height = metadata[:height] + self.file_track_duration = metadata[:duration] + self.preview_track_duration_in_seconds = self.respond_to?(:preview_duration_in_seconds) ? preview_duration_in_seconds : 0 end def validate_attribution_and_texts_if_needed diff --git a/app/models/folio/file/video.rb b/app/models/folio/file/video.rb index bb5e9a8427..f9429ef042 100644 --- a/app/models/folio/file/video.rb +++ b/app/models/folio/file/video.rb @@ -19,6 +19,10 @@ def thumbnailable? true end + def video_poster_url + nil # override in provider concerns to return a static thumbnail image URL + end + def self.human_type "video" end diff --git a/app/serializers/folio/console/file_serializer.rb b/app/serializers/folio/console/file_serializer.rb index 48f9a1e8ef..f4f332b4d0 100644 --- a/app/serializers/folio/console/file_serializer.rb +++ b/app/serializers/folio/console/file_serializer.rb @@ -86,11 +86,7 @@ class Folio::Console::FileSerializer end attribute :aasm_state_human do |object| - if object.processing? && object.remote_services_data.try(:[], "progress_percentage") - "#{object.aasm.human_state} (#{object.remote_services_data.try(:[], "progress_percentage")}%)" - else - object.aasm.human_state - end + object.aasm.human_state end attribute :aasm_state_color do |object| diff --git a/config/locales/aasm.cs.yml b/config/locales/aasm.cs.yml index 54f31b891a..9110a5a082 100644 --- a/config/locales/aasm.cs.yml +++ b/config/locales/aasm.cs.yml @@ -7,6 +7,7 @@ cs: folio/file: aasm_state/unprocessed: Nezpracováno aasm_state/processing: Zpracováváno + aasm_state/processing_failed: Zpracování selhalo aasm_state/ready: Připraveno folio/lead: diff --git a/config/locales/aasm.en.yml b/config/locales/aasm.en.yml index 26cc89b411..e63c318ea9 100644 --- a/config/locales/aasm.en.yml +++ b/config/locales/aasm.en.yml @@ -7,6 +7,7 @@ en: folio/file: aasm_state/unprocessed: Unprocessed aasm_state/processing: Processing + aasm_state/processing_failed: Processing failed aasm_state/ready: Ready folio/lead: diff --git a/config/locales/console/files.cs.yml b/config/locales/console/files.cs.yml index c09108f60e..9e7b7e1351 100644 --- a/config/locales/console/files.cs.yml +++ b/config/locales/console/files.cs.yml @@ -22,6 +22,19 @@ cs: navigation_previous: Předchozí show: + encoding_info_component: + phase_waiting: "Čekání ve frontě" + phase_waiting_multi: "Čekání ve frontě (fáze %{phase}/%{total})" + phase_waiting_named: "Čekání ve frontě – %{name}" + phase_encoding: "Kódování videa" + phase_encoding_multi: "Kódování videa (%{phase}/%{total})" + phase_encoding_named: "Kódování %{name}" + phase_packaging: "Balení" + phase_packaging_multi: "Balení (%{phase}/%{total})" + phase_packaging_named: "Balení – %{name}" + phase_failed_retrying: "Zpracování selhalo, pokusíme se znovu" + phase_failed: "Zpracování selhalo. Zkuste video nahrát znovu, nebo kontaktujte podporu." + metadata_component: no_metadata: Žádná metadata extract_metadata: Znovu extrahovat metadata diff --git a/config/locales/console/files.en.yml b/config/locales/console/files.en.yml index 014c5663f2..db393517b1 100644 --- a/config/locales/console/files.en.yml +++ b/config/locales/console/files.en.yml @@ -22,6 +22,19 @@ en: navigation_previous: Previous show: + encoding_info_component: + phase_waiting: "Waiting in queue" + phase_waiting_multi: "Waiting in queue (phase %{phase}/%{total})" + phase_waiting_named: "Waiting in queue – %{name}" + phase_encoding: "Encoding video" + phase_encoding_multi: "Encoding video (%{phase}/%{total})" + phase_encoding_named: "Encoding %{name}" + phase_packaging: "Packaging" + phase_packaging_multi: "Packaging (%{phase}/%{total})" + phase_packaging_named: "Packaging – %{name}" + phase_failed_retrying: "Processing failed, retrying automatically" + phase_failed: "Processing failed. Try re-uploading or contact support." + metadata_component: no_metadata: No metadata extract_metadata: Extract metadata again diff --git a/data/images/missing-video.png b/data/images/missing-video.png new file mode 100644 index 0000000000..9167532d35 Binary files /dev/null and b/data/images/missing-video.png differ diff --git a/docs/design/cra-encoding-system.md b/docs/design/cra-encoding-system.md new file mode 100644 index 0000000000..3d6cc8df47 --- /dev/null +++ b/docs/design/cra-encoding-system.md @@ -0,0 +1,342 @@ +# CRA Video Encoding System — Design Document + +## Overview + +The CRA (CraMediaCloud) integration encodes uploaded videos into multiple quality profiles (SD/HD), HLS/DASH streaming manifests, and generates thumbnails/cover images. Videos become progressively available — SD quality is playable while HD encoding continues. + +**Repos:** folio gem (core engine) + economia app (overrides, player, UI) + +--- + +## 1. Upload & Manifest Delivery + +### Presigned S3 URL (no file transfer through pod) + +When a video is uploaded, the encoder generates a **presigned S3 URL** (7-day expiry) and embeds it in an XML manifest. Only the manifest (~1 KB) is uploaded via SFTP — CRA fetches the video directly from S3. + +```xml + + + + + VoDHDAuto + prod-video-slug-123-a1b2c3d4-1710000000 + +``` + +### Reference ID format + +`{env}-{slug(truncated)}-{id}-{s3_etag[0..7]}-{encoding_generation}` + +- Total capped at **128 chars** (CRA lookup fails with longer IDs) +- `encoding_generation` changes on each re-encode, ensuring CRA gets a fresh refId + +### Files + +| File (folio) | Purpose | +|---|---| +| `app/lib/folio/cra_media_cloud/encoder.rb` | Builds XML manifest, uploads via SFTP | +| `app/jobs/folio/cra_media_cloud/create_media_job.rb` | Orchestrates upload: generates ref ID, checks for existing jobs, calls Encoder | +| `app/lib/folio/s3/client.rb` | Shared S3 helpers: presigned URLs, HEAD metadata, ETag extraction | + +--- + +## 2. Two-Phase Encoding + +When `encoder_processing_phases` returns `> 1` (economia overrides to `2`), a **single manifest** is submitted with the `processingPhases="2"` XML attribute (same format as shown in §1). The profile group is always `VoDHDAuto` — CRA handles phasing internally. + +CRA creates multiple internal jobs (one per phase), each with a `phase` field in API responses: + +| CRA phase | Output | Enables | +|---|---|---| +| 1 (SD) | sd profiles, HLS/DASH (SD), cover, thumbnails | Playback at SD quality while HD encodes | +| 2 (HD) | All profiles incl. HD, full HLS/DASH | Full quality playback | + +When `CheckProgressJob` sees a phase-1 job reach DONE, `save_intermediate_phase_data` writes the SD manifest/cover paths to the top-level `remote_services_data` keys the player reads — making the video playable at SD quality. It then clears `remote_id` and polls by `reference_id` to discover the phase-2 job. Phase-2 output overwrites phase-1 paths when it completes. + +### Backward compatibility + +When `encoder_processing_phases` is `1` or `nil` (default), the manifest is submitted without the `processingPhases` attribute. All existing behavior preserved. + +### economia override (`feature/cra-encoding-improvements` branch) + +```ruby +# app/overrides/models/folio/file/video_override.rb +def encoder_profile_group + Rails.env.production? ? "VoDHDAuto" : "VoD" +end + +def encoder_processing_phases + 2 +end + +def encoder_phase_name(phase_number) + { 1 => "SD", 2 => "HD" }[phase_number] +end +``` + +--- + +## 3. Progress Tracking + +### CRA API polling + +`CheckProgressJob` polls every 15 seconds. It parses the CRA `messages` array to determine encoding phase: + +| CRA message | Internal phase | +|---|---| +| `verification: finished` | `validation` | +| `Transcoding worker - audio: finished` | `audio` | +| `Transcoding worker - video: finished` | `video` | +| `copying: started` | `packaging` | + +Progress percentage is raw CRA `progress` field × 100 (per-phase, not mapped across phases). + +### MessageBus real-time updates + +`broadcast_encoding_progress` publishes to `Folio::MESSAGE_BUS_CHANNEL` with phase label, progress %, and failure state. The `EncodingInfoComponent` Stimulus controller updates the UI badge in real time. + +### Files + +| File (folio) | Purpose | +|---|---| +| `app/jobs/folio/cra_media_cloud/check_progress_job.rb` | Polls CRA API, updates `remote_services_data`, handles phase transitions | +| `app/components/folio/console/files/show/encoding_info_component.*` | UI badge (Ruby + Stimulus + Sass + Slim) | + +--- + +## 4. State Machine + +### AASM states (`aasm_state` column) + +``` +unprocessed → [process!] → processing → [processing_done!] → ready + ↓ + [processing_failed!] + ↓ + processing_failed → [retry_processing!] → processing +``` + +### Processing states (`remote_services_data["processing_state"]`) + +``` +enqueued → creating_media_job → full_media_processing → full_media_processed + ↓ + encoding_failed (CRA FAILED/ERROR) + upload_failed (SFTP/S3 error in CreateMediaJob) + source_file_missing (S3 404) +``` + +Multi-phase adds intermediate data (`phase_N_content_mp4_paths`, `phase_N_completed_at`) but no new processing states. + +### `remote_services_data` JSON structure + +```json +{ + "service": "cra_media_cloud", + "processing_state": "full_media_processing", + "reference_id": "prod-video-slug-123-a1b2c3d4-1710000000", + "remote_id": "JOB123", + "encoding_generation": 1710000000, + "processing_step_started_at": "2026-03-17T10:30:00Z", + + "cra_status": "PROCESSING", + "progress_percentage": 60, + "current_phase": "encoding", + "current_encoding_phase": 1, + "processing_phases": 2, + "phases_completed": ["validation", "audio"], + "video_duration": 120, + + "phase_1_content_mp4_paths": { "sd0": "/path/sd0.mp4", "sd1": "/path/sd1.mp4" }, + "phase_1_completed_at": "2026-03-17T11:00:00Z", + "phase_1_remote_id": "JOB111", + + "content_mp4_paths": { "sd0": "/path/sd0.mp4", "hd1": "/path/hd1.mp4" }, + "manifest_hls_path": "/path/master.m3u8", + "manifest_dash_path": "/path/manifest.mpd", + "cover_path": "/path/cover.jpg", + "thumbnails_path": "/path/thumb.vtt", + + "error_message": null, + "retry_count": 0, + "retry_scheduled_at": null, + "failed_at": null +} +``` + +--- + +## 5. Error Handling & Recovery + +### Automatic retry + +On CRA `FAILED`/`ERROR`, `CheckProgressJob`: +1. Sets `processing_state` to `"encoding_failed"`, `retry_count` += 1 +2. Calls `processing_failed!` (single save) +3. Broadcasts failure state to UI +4. If `retry_count <= 1`: schedules `CreateMediaJob` in 2 minutes +5. If `retry_count > 1`: final failure, no retry + +### Timeout + +- **CheckProgressJob**: 4-hour `MAX_PROCESSING_DURATION` (flat, per video) — marks as `processing_failed` if `processing_step_started_at` is older. The unique-job constraint means one instance runs per video; if the worker is restarted the next `CheckProgressJob` re-checks this on each run. +- **MonitorProcessingJob**: 6-hour hard timeout (flat, not phase-multiplied in current code) — marks as `processing_failed` for any video that has been in `processing` AASM state for over 6 hours. Effectively a backstop for videos whose `CheckProgressJob` was lost or never fired. + +> **Note:** The two timeouts are intentionally overlapping rather than sequential. `CheckProgressJob` handles the common case (actively polling video); `MonitorProcessingJob` is the safety net for stuck/orphaned videos. A video that times out in `CheckProgressJob` at 4 hours will also be caught by `MonitorProcessingJob` at 6 hours if it somehow transitions back to `processing`. + +### Safety nets (MonitorProcessingJob) + +Runs periodically with Redis lock to prevent concurrent instances. Catches: + +| Scenario | Action | +|---|---| +| Stuck in `unprocessed` with `file_uid` > 5 min | Triggers `process!` | +| Stuck in `enqueued` > 10 min | Re-enqueues `CreateMediaJob` | +| `upload_failed` / `encoding_failed` > 5 min | Re-enqueues `CreateMediaJob` | +| `processing_failed` with `retry_count < 2` and lost retry job | Re-enqueues `CreateMediaJob` | +| Processing > 6 hours | Marks as `processing_failed` | +| Orphaned (has `reference_id` but no `remote_id`, or stuck in `creating_media_job` > 30 min) | Reconciles via API | +| All CRA jobs `REMOVED` with stored phase data | `finalize_from_completed_phases!` merges stored output and transitions to `ready` | + +**Note on `REMOVED` status:** Production data confirms CRA does **not** auto-purge completed jobs — DONE jobs remain accessible indefinitely (verified 4+ months). `REMOVED` appears only when job content is explicitly deleted via `DeleteMediaJob` (`DELETE /jobs/{id}/content`). The all-REMOVED handler therefore covers the edge case where both phase job contents were deleted while the video was still in `processing` AASM state. + +### Tracked job becomes REMOVED (CheckProgressJob) + +When `CheckProgressJob` is polling via `remote_id` and that specific job returns `REMOVED`, it: +1. Clears `remote_id` from `remote_services_data` +2. Calls `check_again_later` to resume polling by `reference_id` + +This handles the edge case where a single phase job is deleted while encoding is still in progress. Polling falls back to `reference_id` lookup to discover any replacement or remaining jobs. + +### Stale processing state after CRA recovery (CreateMediaJob) + +When `CreateMediaJob` finds an existing CRA job with `remote_id` matching the stored value but `processing_state` is stale (e.g. `upload_failed` or `encoding_failed` set while CRA eventually completed), it: +1. Resets `processing_state` to `"full_media_processing"` +2. Schedules `CheckProgressJob` to finalize + +Without this, videos could loop forever: `MonitorProcessingJob` re-enqueues `CreateMediaJob` every 5 min, `CreateMediaJob` finds matching `remote_id` and does nothing, state never advances. Root cause confirmed in production (video stuck 25+ hours while CRA job was `DONE`). + +### Missing S3 source file + +If S3 returns 404 during `CreateMediaJob`, video is marked `source_file_missing` + `processing_failed` permanently (no retry). + +--- + +## 6. Progressive Video Availability + +After phase 1 completes, `save_intermediate_phase_data` writes SD manifest/cover paths to the same top-level keys the player reads (`manifest_hls_path`, `manifest_dash_path`, `cover_path`). The video is playable at SD quality while AASM state remains `processing`. + +The economia `PlayerComponent` gates on manifest URL presence (not AASM `ready?`): +```ruby +@valid = @file.remote_manifest_hls_url.present? || @file.remote_manifest_dash_url.present? +``` + +When phase 2 completes, `process_output_hash` overwrites with HD paths. Next page load serves HD. + +### Console video detail (economia) + +`AdditionalHtmlComponent` shows: +- **Iframe with player** when manifest URL is present (same gate as PlayerComponent — manifest is available as soon as phase 1 completes, so this covers the SD-quality interim state) +- **"File not ready"** when no manifest is available yet + +--- + +## 7. Thumbnail Generation + +Priority order: +1. **CRA cover image** (small JPEG from CDN) — preferred, no decoding needed +2. **ffmpeg frame extraction** — only for ≤4K resolution (checked via `ffprobe`) +3. **Fallback placeholder** (`missing-video.png`) — for >4K or when both above fail + +Both ffprobe (resolution check) and ffmpeg (frame extraction) receive the **presigned S3 URL** from `file_url_or_path`. ffprobe reads only container headers (no full download). ffmpeg uses `-ss` before `-i` for fast HTTP range-based seeking, avoiding a full file download to the pod. + +### OOMKill prevention + +Videos >4K (2160p) skip ffmpeg decoding entirely — HEVC reference frame buffers can require 800+ MB. The fallback placeholder is used until CRA provides the cover image. + +### Files + +| File (folio) | Purpose | +|---|---| +| `app/jobs/folio/generate_thumbnail_job.rb` | Video screenshot extraction with resolution check | +| `app/jobs/folio/file/get_video_metadata_job.rb` | Single ffprobe call for duration + dimensions via presigned URL | + +--- + +## 8. S3 Optimizations + +| Optimization | File | Effect | +|---|---|---| +| Presigned URL for CRA | `encoder.rb` | No video download to pod | +| Presigned URL for ffprobe | `file.rb` → `file_url_or_path` | Streams ~100 KB headers, not full file | +| S3 server-side copy for uploads | `app/jobs/folio/s3/create_file_job.rb` | Zero data transfer for video copy | +| Shared S3 helpers | `app/lib/folio/s3/client.rb` | `s3_dragonfly_head_object`, `extract_s3_etag` | + +--- + +## 9. Legacy Video Support (economia) + +Videos imported from old Wowza/CDN77 system have `legacy_data["skip_cra_encoding"] = true`. These: +- Skip CRA encoding entirely (`process_attached_file` → just thumbnails + `processing_done!`) +- Use direct CDN URLs for playback +- Override `remote_manifest_url_base` and `remote_content_url_base` to match import domain +- Skip CRA delete on destroy + +### Files (economia, branch `feature/cra-encoding-improvements`) + +| File | Purpose | +|---|---| +| `app/overrides/models/folio/file/video_override.rb` | CRA concern inclusion, profile group, 2-phase config, legacy video handling | +| `app/overrides/jobs/folio/cra_media_cloud/create_media_job_override.rb` | Sets queue to `:video` | +| `app/components/economia/cra_media_cloud/player_component.rb` | OTT player rendering with manifest-based gate, subtitles, Gemius analytics | +| `app/components/economia/cra_media_cloud/player_component.js` | Stimulus controller: player lifecycle, viewport awareness, multi-instance coordination | +| `app/components/folio/console/economia/files/additional_html_component.rb` | Console video detail: iframe player (manifest gate) + manifest URL links | +| `app/components/folio/console/economia/files/additional_html_component.slim` | Template with manifest gate — shows player iframe or "not ready" | +| `app/jobs/economia/import_video_from_url_job.rb` | Legacy video import from article URLs | +| `app/lib/economia/article_storage/video_creator.rb` | Creates video records from Article Storage API | +| `lib/tasks/cra_audit.rake` | CRA audit rake task (330 lines) | + +--- + +## 10. Environment Variables + +``` +# SFTP (manifest upload) +CRA_MEDIA_CLOUD_SFTP_HOST / _USERNAME / _PASSWORD + +# API (job status polling) +CRA_MEDIA_CLOUD_API_BASE_URL / _USERNAME / _PASSWORD + +# CDN (output URLs) +CRA_MEDIA_CLOUD_CDN_CONTENT_URL # MP4, cover, thumbnails +CRA_MEDIA_CLOUD_CDN_MANIFEST_URL # HLS/DASH manifests + +# S3 +S3_BUCKET_NAME / S3_REGION / AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY +``` + +--- + +## 11. Known Gaps & TODO + +### Not yet implemented + +- [ ] **Subtitle trigger on SD completion** — ElevenLabs transcription from sd1 MP4 after phase 1. Not wired up. +- [ ] **Dynamic timeouts in MonitorProcessingJob** — currently fixed 6h. Design doc specified file-size/duration-based formula. +- [ ] **`playable` field in API JSON** — `videos_controller.rb` still returns `ready: video.ready?`. Should add `playable:` based on manifest presence. +- [ ] **SD quality badge on player** — no visual indicator that video is SD-only while HD encodes. + +### Test coverage gaps (folio) + +- [x] MonitorProcessingJob handler integration tests (`handle_failed_uploads_needing_retry`, `reconcile_video_state`, `reconcile_with_remote_jobs`) +- [x] CheckProgressJob: `processing_timed_out?` — video >4h old marks as `processing_failed`; video <4h continues polling +- [x] CheckProgressJob: `finalize_from_completed_phases!` — all jobs REMOVED + stored phase data → `ready` with merged MP4 paths +- [x] CheckProgressJob: tracked job becomes REMOVED → clears `remote_id`, reschedules +- [x] Encoder: `upload_file` method, SFTP session management, retry logic +- [x] CreateFileJob: S3 server-side copy path for videos +- [x] AASM state transition integration tests with CRA concern + +### Test coverage gaps (economia) + +- [x] `AdditionalHtmlComponent` — additional state coverage (legacy video, unprocessed video, ready video) diff --git a/lib/folio/version.rb b/lib/folio/version.rb index 1a824a3dd5..ac03020a08 100644 --- a/lib/folio/version.rb +++ b/lib/folio/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Folio - VERSION = "7.4.1" + VERSION = "7.5.1" end diff --git a/package-lock.json b/package-lock.json index eee5ea858f..c49da7c173 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2534,11 +2534,10 @@ } }, "node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", + "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==", "dev": true, - "license": "ISC", "dependencies": { "brace-expansion": "^1.1.7" }, @@ -5553,9 +5552,9 @@ "dev": true }, "minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", + "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==", "dev": true, "requires": { "brace-expansion": "^1.1.7" diff --git a/test/dummy/package-lock.json b/test/dummy/package-lock.json index 5a8c8369b2..8648434e4b 100644 --- a/test/dummy/package-lock.json +++ b/test/dummy/package-lock.json @@ -259,16 +259,6 @@ "node": ">= 10" } }, - "node_modules/@trysound/sax": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/@trysound/sax/-/sax-0.2.0.tgz", - "integrity": "sha512-L7z9BgrNEcYyUYtF+HaEfiS5ebkh9jXqbszz7pC0hRBPaatV0XjSD3+eHrpqFemQfgwiFF0QPIarnIihIDn7OA==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=10.13.0" - } - }, "node_modules/@types/triple-beam": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.5.tgz", @@ -826,9 +816,9 @@ "license": "CC0-1.0" }, "node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "version": "3.1.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", + "integrity": "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==", "dev": true, "license": "ISC", "dependencies": { @@ -971,6 +961,16 @@ "node": ">=10" } }, + "node_modules/sax": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/sax/-/sax-1.5.0.tgz", + "integrity": "sha512-21IYA3Q5cQf089Z6tgaUTr7lDAyzoTPx5HRtbhsME8Udispad8dC/+sziTNugOEx54ilvatQ9YCzl4KQLPcRHA==", + "dev": true, + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=11.0.0" + } + }, "node_modules/simple-swizzle": { "version": "0.2.2", "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", @@ -1080,18 +1080,18 @@ } }, "node_modules/svgo": { - "version": "2.8.0", - "resolved": "https://registry.npmjs.org/svgo/-/svgo-2.8.0.tgz", - "integrity": "sha512-+N/Q9kV1+F+UeWYoSiULYo4xYSDQlTgb+ayMobAXPwMnLvop7oxKMo9OzIrX5x3eS4L4f2UHhc9axXwY8DpChg==", + "version": "2.8.2", + "resolved": "https://registry.npmjs.org/svgo/-/svgo-2.8.2.tgz", + "integrity": "sha512-TyzE4NVGLUFy+H/Uy4N6c3G0HEeprsVfge6Lmq+0FdQQ/zqoVYB62IsBZORsiL+o96s6ff/V6/3UQo/C0cgCAA==", "dev": true, "license": "MIT", "dependencies": { - "@trysound/sax": "0.2.0", "commander": "^7.2.0", "css-select": "^4.1.3", "css-tree": "^1.1.3", "csso": "^4.2.0", "picocolors": "^1.0.0", + "sax": "^1.5.0", "stable": "^0.1.8" }, "bin": { diff --git a/test/integration/video_upload_no_download_test.rb b/test/integration/video_upload_no_download_test.rb new file mode 100644 index 0000000000..5be7e8c504 --- /dev/null +++ b/test/integration/video_upload_no_download_test.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +require "test_helper" + +class VideoUploadNoDownloadTest < ActiveSupport::TestCase + test "video file metadata is extracted without full file download" do + video = create(:folio_file_video) + + # Verify metadata was extracted + assert_not_nil video.file_track_duration, "Duration should be extracted" + assert_not_nil video.file_width, "Width should be extracted" + assert_not_nil video.file_height, "Height should be extracted" + + # Verify file_url_or_path returns correct type + result = video.file_url_or_path + assert result.is_a?(String) + + # In test env (FileDataStore), should be local path + if Dragonfly.app.datastore.is_a?(Dragonfly::FileDataStore) + assert_not result.start_with?("http") + end + end +end diff --git a/test/jobs/folio/cra_media_cloud/check_progress_job_test.rb b/test/jobs/folio/cra_media_cloud/check_progress_job_test.rb index 97e850d722..3f24e8f63b 100644 --- a/test/jobs/folio/cra_media_cloud/check_progress_job_test.rb +++ b/test/jobs/folio/cra_media_cloud/check_progress_job_test.rb @@ -16,12 +16,10 @@ class TestVideoFile < Folio::File::Video "reference_id" => "REF123" )) - # Job with old generation should be skipped - no new CheckProgressJob enqueued assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do Folio::CraMediaCloud::CheckProgressJob.perform_now(video, encoding_generation: 11111) end - # Video state should be unchanged video.reload assert_equal "full_media_processing", video.remote_services_data["processing_state"] end @@ -40,7 +38,6 @@ class TestVideoFile < Folio::File::Video api_mock = Minitest::Mock.new api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123") - # Job with matching generation should process and reschedule assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do expect_method_called_on( object: Folio::CraMediaCloud::Api, @@ -66,7 +63,6 @@ class TestVideoFile < Folio::File::Video api_mock = Minitest::Mock.new api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123") - # Job without generation (old jobs) should still process assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do expect_method_called_on( object: Folio::CraMediaCloud::Api, @@ -80,6 +76,260 @@ class TestVideoFile < Folio::File::Video api_mock.verify end + # --- Multi-phase tests --- + + test "phase 1 DONE does not trigger processing_done when processing_phases is 2" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "processing_phases" => 2, + "remote_id" => "JOB_PHASE1", + "reference_id" => "REF123" + )) + + phase1_job = { + "id" => "JOB_PHASE1", + "status" => "DONE", + "phase" => 1, + "processingPhases" => 2, + "progress" => 1.0, + "lastModified" => Time.current.iso8601, + "output" => [ + { "type" => "MP4", "profiles" => ["sd0"], "path" => "/video/sd0.mp4" }, + { "type" => "MP4", "profiles" => ["sd1"], "path" => "/video/sd1.mp4" }, + { "type" => "MP4", "profiles" => ["sd2"], "path" => "/video/sd2.mp4" }, + { "type" => "HLS", "profiles" => ["sd0", "sd1", "sd2"], "path" => "/video/sd_master.m3u8" }, + { "type" => "DASH", "profiles" => ["sd0", "sd1", "sd2"], "path" => "/video/sd_manifest.mpd" }, + { "type" => "THUMBNAILS", "profiles" => ["cover"], "path" => "/video/cover.jpg" }, + { "type" => "THUMBNAILS", "profiles" => ["thumb"], "path" => "/video/thumb.vtt" }, + ] + } + + api_mock = Minitest::Mock.new + api_mock.expect(:get_job, phase1_job, ["JOB_PHASE1"]) + + # Should reschedule (phase 1 done, waiting for phase 2) + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + end + + api_mock.verify + video.reload + + # AASM should stay in processing (not ready) + assert_equal "processing", video.aasm_state + assert_equal "full_media_processing", video.remote_services_data["processing_state"] + + # Intermediate phase data should be saved + assert_equal({ "sd0" => "/video/sd0.mp4", "sd1" => "/video/sd1.mp4", "sd2" => "/video/sd2.mp4" }, + video.remote_services_data["phase_1_content_mp4_paths"]) + assert_equal "JOB_PHASE1", video.remote_services_data["phase_1_remote_id"] + assert video.remote_services_data["phase_1_completed_at"].present? + + # Manifest/cover/thumbnails paths populated for immediate playability + assert_equal "/video/sd_master.m3u8", video.remote_services_data["manifest_hls_path"] + assert_equal "/video/sd_manifest.mpd", video.remote_services_data["manifest_dash_path"] + assert_equal "/video/cover.jpg", video.remote_services_data["cover_path"] + assert_equal "/video/thumb.vtt", video.remote_services_data["thumbnails_path"] + end + + test "phase 2 DONE triggers processing_done" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "processing_phases" => 2, + "reference_id" => "REF123", + "phase_1_content_mp4_paths" => { "sd0" => "/video/sd0.mp4", "sd1" => "/video/sd1.mp4" }, + "phase_1_completed_at" => 1.minute.ago.iso8601, + "phase_1_remote_id" => "JOB_PHASE1", + )) + + full_output = [ + { "type" => "MP4", "profiles" => ["sd0"], "path" => "/video/sd0.mp4" }, + { "type" => "MP4", "profiles" => ["sd1"], "path" => "/video/sd1.mp4" }, + { "type" => "MP4", "profiles" => ["hd1"], "path" => "/video/hd1.mp4" }, + { "type" => "MP4", "profiles" => ["hd2"], "path" => "/video/hd2.mp4" }, + { "type" => "HLS", "profiles" => ["sd0", "sd1", "hd1", "hd2"], "path" => "/video/master.m3u8" }, + { "type" => "DASH", "profiles" => ["sd0", "sd1", "hd1", "hd2"], "path" => "/video/manifest.mpd" }, + { "type" => "THUMBNAILS", "profiles" => ["cover"], "path" => "/video/cover.jpg" }, + { "type" => "THUMBNAILS", "profiles" => ["thumb"], "path" => "/video/thumb.jpg" }, + ] + + phase1_job = { + "id" => "JOB_PHASE1", "status" => "DONE", "phase" => 1, + "processingPhases" => 2, "progress" => 1.0, + "lastModified" => 2.minutes.ago.iso8601, + "output" => full_output.select { |o| o["profiles"].first&.start_with?("sd") || o["type"] != "MP4" } + } + + phase2_job = { + "id" => "JOB_PHASE2", "status" => "DONE", "phase" => 2, + "processingPhases" => 2, "prevPhaseJobId" => "JOB_PHASE1", + "progress" => 1.0, "lastModified" => Time.current.iso8601, + "output" => full_output + } + + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, [phase1_job, phase2_job], [], ref_id: "REF123") + + # Should NOT reschedule — processing is complete + assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + end + + api_mock.verify + video.reload + + assert_equal "ready", video.aasm_state + assert_equal({ "sd0" => "/video/sd0.mp4", "sd1" => "/video/sd1.mp4", + "hd1" => "/video/hd1.mp4", "hd2" => "/video/hd2.mp4" }, + video.remote_services_data["content_mp4_paths"]) + assert_equal "/video/master.m3u8", video.remote_services_data["manifest_hls_path"] + assert_equal "/video/manifest.mpd", video.remote_services_data["manifest_dash_path"] + end + + test "phase 2 PROCESSING continues polling with mapped progress" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "processing_phases" => 2, + "reference_id" => "REF123", + "phase_1_content_mp4_paths" => { "sd0" => "/video/sd0.mp4" }, + )) + + phase1_job = { + "id" => "JOB_PHASE1", "status" => "DONE", "phase" => 1, + "processingPhases" => 2, "progress" => 1.0, + "lastModified" => 2.minutes.ago.iso8601, + "output" => [] + } + + phase2_job = { + "id" => "JOB_PHASE2", "status" => "PROCESSING", "phase" => 2, + "processingPhases" => 2, "progress" => 0.6, + "lastModified" => Time.current.iso8601, + } + + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, [phase1_job, phase2_job], [], ref_id: "REF123") + + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + end + + api_mock.verify + video.reload + + # Raw CRA progress for current phase: 0.6 * 100 = 60 + assert_equal 60, video.remote_services_data["progress_percentage"] + assert_equal "processing", video.aasm_state + end + + test "single-phase job backward compat — DONE triggers ready" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "reference_id" => "REF123" + )) + # No processing_phases key at all + + full_output = [ + { "type" => "MP4", "profiles" => ["sd0"], "path" => "/video/sd0.mp4" }, + { "type" => "MP4", "profiles" => ["hd1"], "path" => "/video/hd1.mp4" }, + { "type" => "HLS", "profiles" => ["sd0", "hd1"], "path" => "/video/master.m3u8" }, + { "type" => "DASH", "profiles" => ["sd0", "hd1"], "path" => "/video/manifest.mpd" }, + { "type" => "THUMBNAILS", "profiles" => ["cover"], "path" => "/video/cover.jpg" }, + { "type" => "THUMBNAILS", "profiles" => ["thumb"], "path" => "/video/thumb.jpg" }, + ] + + api_response = { + "id" => "JOB123", "status" => "DONE", "progress" => 1.0, + "lastModified" => Time.current.iso8601, + "output" => full_output + } + + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123") + + assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + end + + api_mock.verify + video.reload + + assert_equal "ready", video.aasm_state + assert_equal({ "sd0" => "/video/sd0.mp4", "hd1" => "/video/hd1.mp4" }, + video.remote_services_data["content_mp4_paths"]) + assert_equal "/video/master.m3u8", video.remote_services_data["manifest_hls_path"] + assert_equal "/video/manifest.mpd", video.remote_services_data["manifest_dash_path"] + end + + test "phase 2 FAILED triggers failure" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "processing_phases" => 2, + "reference_id" => "REF123", + "phase_1_content_mp4_paths" => { "sd0" => "/video/sd0.mp4" }, + )) + + phase1_job = { + "id" => "JOB_PHASE1", "status" => "DONE", "phase" => 1, + "processingPhases" => 2, "progress" => 1.0, + "lastModified" => 2.minutes.ago.iso8601, + "output" => [] + } + + phase2_job = { + "id" => "JOB_PHASE2", "status" => "FAILED", "phase" => 2, + "processingPhases" => 2, "progress" => 0.3, + "lastModified" => Time.current.iso8601, + "messages" => [{ "type" => "ERROR", "message" => "HD encoding failed" }] + } + + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, [phase1_job, phase2_job], [], ref_id: "REF123") + + # Should NOT reschedule — failure stops polling + assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + end + + api_mock.verify + video.reload + + assert_equal "encoding_failed", video.remote_services_data["processing_state"] + assert_equal "HD encoding failed", video.remote_services_data["error_message"] + end + + # --- Existing encoding generation tests --- + test "skips already ready video regardless of encoding_generation" do video = create_test_video_in_processing_state video.update_column(:aasm_state, "ready") @@ -88,24 +338,294 @@ class TestVideoFile < Folio::File::Video "reference_id" => "REF123" )) - # Should skip because video is already ready assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do Folio::CraMediaCloud::CheckProgressJob.perform_now(video, encoding_generation: 12345) end end + # --- Progress tracking tests --- + + test "parses encoding messages for progress milestones" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "processing_state" => "full_media_processing", + "reference_id" => "REF123" + )) + + api_response = { + "id" => "JOB123", "status" => "PROCESSING", "progress" => 0.5, + "lastModified" => Time.current.iso8601, + "outputParams" => { "duration" => 600.0 }, + "messages" => [ + { "createdDate" => "2026-02-25T10:00:00Z", "type" => "INFO", "message" => "validation started at host vodenc1" }, + { "createdDate" => "2026-02-25T10:00:05Z", "type" => "INFO", "message" => "processing started at host vodenc1" }, + { "createdDate" => "2026-02-25T10:00:06Z", "type" => "INFO", "message" => "Transcoding worker - video: going to transcode 600.0 seconds for 7 VIDEO profiles" }, + { "createdDate" => "2026-02-25T10:00:06Z", "type" => "INFO", "message" => "Transcoding worker - audio: going to transcode 600.0 seconds for 2 AUDIO profiles" }, + { "createdDate" => "2026-02-25T10:02:00Z", "type" => "INFO", "message" => "Transcoding worker - audio: finished" } + ] + } + + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123") + + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + + video.reload + assert_equal 600.0, video.remote_services_data["video_duration"] + assert_includes video.remote_services_data["phases_completed"], "audio" + end + + test "DONE transition sets progress to 100 and state to ready" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "processing_state" => "full_media_processing", + "reference_id" => "REF123" + )) + + output = [ + { "type" => "MP4", "profiles" => ["sd1"], "path" => "/test/sd1.mp4" }, + { "type" => "MP4", "profiles" => ["hd1"], "path" => "/test/hd1.mp4" }, + { "type" => "HLS", "profiles" => ["sd1", "hd1"], "path" => "/test/master.m3u8" }, + { "type" => "DASH", "profiles" => ["sd1", "hd1"], "path" => "/test/master.mpd" }, + { "type" => "THUMBNAILS", "profiles" => ["cover"], "path" => "/test/cover.jpg" }, + { "type" => "THUMBNAILS", "profiles" => ["thumb"], "path" => "/test/thumb.jpg" } + ] + + api_response = { + "id" => "JOB123", "status" => "DONE", "progress" => 1.0, + "lastModified" => Time.current.iso8601, + "output" => output, + "outputParams" => { "duration" => 120.0 }, + "messages" => [] + } + + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123") + + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + + video.reload + assert_equal "full_media_processed", video.remote_services_data["processing_state"] + assert_equal 100.0, video.remote_services_data["progress_percentage"] + assert_equal "ready", video.aasm_state + end + + test "FAILED job transitions to processing_failed and schedules retry on first failure" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "processing_state" => "full_media_processing", + "reference_id" => "REF123", + "progress_percentage" => 45.0 + )) + + api_response = { + "id" => "JOB123", "status" => "FAILED", + "lastModified" => Time.current.iso8601, + "messages" => [ + { "type" => "ERROR", "message" => "filesize mismatch" } + ] + } + + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123") + + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + end + + video.reload + assert_equal "processing_failed", video.aasm_state + assert_nil video.remote_services_data["progress_percentage"] + assert_equal "filesize mismatch", video.remote_services_data["error_message"] + assert_equal 1, video.remote_services_data["retry_count"] + assert video.remote_services_data["retry_scheduled_at"].present? + end + + test "FAILED job on second failure is final — no retry scheduled" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "processing_state" => "full_media_processing", + "reference_id" => "REF123", + "retry_count" => 1 + )) + + api_response = { + "id" => "JOB123", "status" => "FAILED", + "lastModified" => Time.current.iso8601, + "messages" => [ + { "type" => "ERROR", "message" => "filesize mismatch again" } + ] + } + + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123") + + assert_no_enqueued_jobs only: Folio::CraMediaCloud::CreateMediaJob do + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + end + + video.reload + assert_equal "processing_failed", video.aasm_state + assert_equal 2, video.remote_services_data["retry_count"] + assert_nil video.remote_services_data["retry_scheduled_at"] + end + + # --- Timeout tests --- + + test "processing_timed_out? marks video as failed after 4 hours" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "processing_step_started_at" => 5.hours.ago.iso8601, + "reference_id" => "REF123" + )) + + assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + + video.reload + assert_equal "processing_failed", video.aasm_state + end + + test "processing_timed_out? does not fire within 4 hours" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "processing_step_started_at" => 3.hours.ago.iso8601, + "reference_id" => "REF123" + )) + + api_response = { "id" => "JOB123", "status" => "PROCESSING", "progress" => 0.5, + "lastModified" => Time.current.iso8601 } + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, [api_response], [], ref_id: "REF123") + + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + end + + video.reload + assert_equal "processing", video.aasm_state + end + + # --- Finalize from completed phases test --- + + test "finalizes from stored phase data when all CRA jobs are REMOVED" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "processing_phases" => 2, + "reference_id" => "REF123", + "phase_1_content_mp4_paths" => { "sd0" => "/video/sd0.mp4", "sd1" => "/video/sd1.mp4" }, + "phase_1_completed_at" => 5.minutes.ago.iso8601, + "phase_1_remote_id" => "JOB_PHASE1", + "phase_2_content_mp4_paths" => { "hd1" => "/video/hd1.mp4", "hd2" => "/video/hd2.mp4" }, + "phase_2_completed_at" => 1.minute.ago.iso8601, + "phase_2_remote_id" => "JOB_PHASE2", + "manifest_hls_path" => "/video/master.m3u8", + "manifest_dash_path" => "/video/manifest.mpd", + )) + + removed_jobs = [ + { "id" => "JOB_PHASE1", "status" => "REMOVED", "phase" => 1, "lastModified" => 2.minutes.ago.iso8601 }, + { "id" => "JOB_PHASE2", "status" => "REMOVED", "phase" => 2, "lastModified" => 1.minute.ago.iso8601 }, + ] + + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, removed_jobs, [], ref_id: "REF123") + + assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + end + + api_mock.verify + video.reload + + assert_equal "ready", video.aasm_state + assert_equal "full_media_processed", video.remote_services_data["processing_state"] + assert_equal 100.0, video.remote_services_data["progress_percentage"] + expected_mp4 = { "sd0" => "/video/sd0.mp4", "sd1" => "/video/sd1.mp4", + "hd1" => "/video/hd1.mp4", "hd2" => "/video/hd2.mp4" } + assert_equal expected_mp4, video.remote_services_data["content_mp4_paths"] + end + + # --- REMOVED remote_id handling --- + + test "clears remote_id and reschedules when tracked job becomes REMOVED" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "remote_id" => "JOB_GONE", + "reference_id" => "REF123" + )) + + removed_job = { "id" => "JOB_GONE", "status" => "REMOVED", "lastModified" => Time.current.iso8601 } + + api_mock = Minitest::Mock.new + api_mock.expect(:get_job, removed_job, ["JOB_GONE"]) + + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do + expect_method_called_on( + object: Folio::CraMediaCloud::Api, + method: :new, + return_value: api_mock + ) do + Folio::CraMediaCloud::CheckProgressJob.perform_now(video) + end + end + + api_mock.verify + video.reload + + # remote_id cleared so next poll falls back to reference_id path + assert_nil video.remote_services_data["remote_id"] + # AASM stays in processing — not marked failed + assert_equal "processing", video.aasm_state + end + private def create_test_video_in_processing_state video = TestVideoFile.new(site: get_any_site) video.file = Folio::Engine.root.join("test/fixtures/folio/blank.mp4") video.dont_run_after_save_jobs = true - # Stub create_full_media to prevent the full processing chain during save expect_method_called_on(object: video, method: :create_full_media) do video.save! end - # Set desired initial state (merge to preserve encoding_generation from process_attached_file) video.update!(remote_services_data: video.remote_services_data.merge( "service" => "cra_media_cloud", "processing_state" => "full_media_processing" diff --git a/test/jobs/folio/cra_media_cloud/create_media_job_test.rb b/test/jobs/folio/cra_media_cloud/create_media_job_test.rb index 6a27acbc46..c04205829f 100644 --- a/test/jobs/folio/cra_media_cloud/create_media_job_test.rb +++ b/test/jobs/folio/cra_media_cloud/create_media_job_test.rb @@ -14,54 +14,241 @@ class TestVideoFile < Folio::File::Video "encoding_generation" => generation_value )) - # Mock S3 metadata for reference_id generation - s3_metadata_mock = Struct.new(:etag).new('"abc12345def67890"') + with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock| + encoder_mock.expect(:upload_file, nil, [video], profile_group: nil, processing_phases: 1, reference_id: String) + api_mock.expect(:get_jobs, [], [], ref_id: String) + + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do + perform_job(video, encoder_mock, api_mock) + end + end + + video.reload + assert_equal generation_value, video.encoding_generation, + "encoding_generation should be preserved through CreateMediaJob" + end + + test "submits single manifest and sets full_media_processing state" do + video = create_test_video_in_processing_state + + with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock| + encoder_mock.expect(:upload_file, nil, [video], profile_group: nil, processing_phases: 1, reference_id: String) + api_mock.expect(:get_jobs, [], [], ref_id: String) + + perform_job(video, encoder_mock, api_mock) + encoder_mock.verify + end + + video.reload + assert_equal "full_media_processing", video.remote_services_data["processing_state"] + end + + test "passes processing_phases to encoder when video defines it" do + video = create_test_video_in_processing_state + video.define_singleton_method(:encoder_processing_phases) { 2 } + + with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock| + encoder_mock.expect(:upload_file, nil, [video], + profile_group: nil, processing_phases: 2, reference_id: String) + api_mock.expect(:get_jobs, [], [], ref_id: String) + + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do + perform_job(video, encoder_mock, api_mock) + end + + encoder_mock.verify + end + + video.reload + assert_equal 2, video.remote_services_data["processing_phases"] + end + + test "check_existing_job: DONE returns :done" do + video = create_test_video_in_processing_state + + mock_api = Minitest::Mock.new + mock_api.expect(:get_jobs, [ + { "id" => 1, "refId" => "test-abc123", "status" => "DONE", + "profileGroup" => "VoD", "lastModified" => "2026-01-01T00:00:00Z", + "messages" => [], "output" => [] }, + ], [], ref_id: "test-abc123") + + job_instance = Folio::CraMediaCloud::CreateMediaJob.new + + Folio::CraMediaCloud::Api.stub(:new, mock_api) do + result = job_instance.send(:check_existing_job, "test-abc123", video) + assert_equal :done, result[:status] + end + end + + test "check_existing_job: picks latest job when multiple exist" do + video = create_test_video_in_processing_state + + mock_api = Minitest::Mock.new + mock_api.expect(:get_jobs, [ + { "id" => 1, "refId" => "test-abc123", "status" => "FAILED", + "profileGroup" => "VoD", "lastModified" => "2026-01-01T00:00:00Z", + "messages" => [], "output" => [] }, + { "id" => 2, "refId" => "test-abc123", "status" => "DONE", + "profileGroup" => "VoD", "lastModified" => "2026-01-02T00:00:00Z", + "messages" => [], "output" => [] }, + ], [], ref_id: "test-abc123") + + job_instance = Folio::CraMediaCloud::CreateMediaJob.new + + Folio::CraMediaCloud::Api.stub(:new, mock_api) do + result = job_instance.send(:check_existing_job, "test-abc123", video) + assert_equal :done, result[:status] + assert_equal 2, result[:job]["id"] + end + end + + test "check_existing_job: empty jobs returns :not_found" do + video = create_test_video_in_processing_state + + mock_api = Minitest::Mock.new + mock_api.expect(:get_jobs, [], [], ref_id: "test-abc123") + + job_instance = Folio::CraMediaCloud::CreateMediaJob.new + + Folio::CraMediaCloud::Api.stub(:new, mock_api) do + result = job_instance.send(:check_existing_job, "test-abc123", video) + assert_equal :not_found, result[:status] + end + end + + test "raises when encoding_generation is nil to prevent stale CRA job matching" do + video = create_test_video_in_processing_state + video.update!(remote_services_data: video.remote_services_data.merge( + "encoding_generation" => nil + )) + + with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock| + error = assert_raises(RuntimeError) do + perform_job(video, encoder_mock, api_mock) + end + assert_match(/encoding_generation not set/, error.message) + end + end + + test "reference_id includes video ID to prevent cross-contamination between records" do + video = create_test_video_in_processing_state + + with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock| + captured_reference_id = nil + encoder_mock.expect(:upload_file, nil) do |_file, **kwargs| + captured_reference_id = kwargs[:reference_id] + true + end + api_mock.expect(:get_jobs, []) do |**_kwargs| + true + end + + perform_job(video, encoder_mock, api_mock) + + assert_not_nil captured_reference_id + assert_includes captured_reference_id, "-#{video.id}-", + "reference_id must contain video ID for per-record uniqueness" + end + end + + test "marks video as permanently failed when S3 source file is missing" do + video = create_test_video_in_processing_state + + # Mock S3 datastore to raise NotFound (simulates missing source file) s3_datastore_mock = Minitest::Mock.new storage_mock = Minitest::Mock.new - storage_mock.expect(:head_object, s3_metadata_mock, [String, String]) + storage_mock.expect(:head_object, nil) do |*_args| + raise Excon::Error::NotFound.new("Expected(200) <=> Actual(404 Not Found)") + end s3_datastore_mock.expect(:root_path, "uploads") s3_datastore_mock.expect(:storage, storage_mock) - # Mock encoder - encoder_mock = Minitest::Mock.new - encoder_mock.expect(:upload_file, nil, [video], profile_group: nil, reference_id: String) - - # Mock API for existing job check - api_mock = Minitest::Mock.new - api_mock.expect(:get_jobs, [], [], ref_id: String) - - assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do + ENV["S3_BUCKET_NAME"] = "test-bucket" + begin Dragonfly.app.stub(:datastore, s3_datastore_mock) do - Folio::CraMediaCloud::Encoder.stub(:new, encoder_mock) do - Folio::CraMediaCloud::Api.stub(:new, api_mock) do - Folio::CraMediaCloud::CreateMediaJob.perform_now(video) - end + assert_no_enqueued_jobs only: Folio::CraMediaCloud::CheckProgressJob do + Folio::CraMediaCloud::CreateMediaJob.perform_now(video) end end + ensure + ENV.delete("S3_BUCKET_NAME") end - # Verify encoding_generation is preserved in remote_services_data video.reload - assert_equal generation_value, video.encoding_generation, - "encoding_generation should be preserved through CreateMediaJob" + assert_equal "processing_failed", video.aasm_state + assert_equal "source_file_missing", video.remote_services_data["processing_state"] + assert_includes video.remote_services_data["error_message"], "Source file not found" + end + + test "retries from processing_failed state via retry_processing!" do + video = create_test_video_in_processing_state + video.update_column(:aasm_state, "processing_failed") + video.update!(remote_services_data: video.remote_services_data.merge( + "retry_count" => 1, + "retry_scheduled_at" => Time.current.iso8601 + )) + + with_mocked_s3_and_encoder(video) do |encoder_mock, api_mock| + encoder_mock.expect(:upload_file, nil, [video], profile_group: nil, processing_phases: 1, reference_id: String) + api_mock.expect(:get_jobs, [], [], ref_id: String) + + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do + perform_job(video, encoder_mock, api_mock) + end + end + + video.reload + assert_equal "processing", video.aasm_state + assert_equal "full_media_processing", video.remote_services_data["processing_state"] end private - def create_test_video_in_processing_state - video = TestVideoFile.new(site: get_any_site) + def create_test_video_in_processing_state(klass: TestVideoFile) + video = klass.new(site: get_any_site) video.file = Folio::Engine.root.join("test/fixtures/folio/blank.mp4") video.dont_run_after_save_jobs = true - # Stub create_full_media to prevent the full processing chain during save expect_method_called_on(object: video, method: :create_full_media) do video.save! end - # Set desired initial state (merge to preserve encoding_generation from process_attached_file) video.update!(remote_services_data: video.remote_services_data.merge( "service" => "cra_media_cloud", - "processing_state" => "full_media_processing" + "processing_state" => "full_media_processing", + "encoding_generation" => Time.current.to_i )) video end + + def with_mocked_s3_and_encoder(video) + s3_metadata_mock = Struct.new(:etag).new('"abc12345def67890"') + s3_datastore_mock = Minitest::Mock.new + storage_mock = Minitest::Mock.new + + # head_object is called with bucket_name (ENV) and key — allow any args + storage_mock.expect(:head_object, s3_metadata_mock) do |*_args| + true + end + s3_datastore_mock.expect(:root_path, "uploads") + s3_datastore_mock.expect(:storage, storage_mock) + + encoder_mock = Minitest::Mock.new + api_mock = Minitest::Mock.new + + ENV["S3_BUCKET_NAME"] = "test-bucket" + Dragonfly.app.stub(:datastore, s3_datastore_mock) do + yield encoder_mock, api_mock + end + ensure + ENV.delete("S3_BUCKET_NAME") + end + + def perform_job(video, encoder_mock, api_mock) + Folio::CraMediaCloud::Encoder.stub(:new, encoder_mock) do + Folio::CraMediaCloud::Api.stub(:new, api_mock) do + Folio::CraMediaCloud::CreateMediaJob.perform_now(video) + end + end + end end diff --git a/test/jobs/folio/cra_media_cloud/encoder_test.rb b/test/jobs/folio/cra_media_cloud/encoder_test.rb new file mode 100644 index 0000000000..7220e32fa9 --- /dev/null +++ b/test/jobs/folio/cra_media_cloud/encoder_test.rb @@ -0,0 +1,196 @@ +# frozen_string_literal: true + +require "test_helper" + +class Folio::CraMediaCloud::EncoderTest < ActiveSupport::TestCase + test "build_ingest_manifest uses src attribute with presigned URL" do + encoder = Folio::CraMediaCloud::Encoder.new + + file_mock = Struct.new(:file_name, :file_size, :file_uid, :id).new( + "video.mp4", 123456, "uploads/video.mp4", 1 + ) + + presigned_url = "https://s3.amazonaws.com/bucket/uploads/video.mp4?X-Amz-Credential=xxx&X-Amz-Expires=604800" + + manifest_xml = encoder.send( + :build_ingest_manifest, + file_mock, + md5: "abc123def456", + ref_id: "test-ref-001", + profile_group: "VoDSD", + presigned_url: presigned_url + ) + + assert_includes manifest_xml, 'src="https://s3.amazonaws.com/bucket/uploads/video.mp4' + assert_not_includes manifest_xml, "file=" + assert_includes manifest_xml, 'size="123456"' + assert_includes manifest_xml, 'md5="abc123def456"' + assert_includes manifest_xml, "VoDSD" + assert_includes manifest_xml, "test-ref-001" + end + + test "build_ingest_manifest falls back to file attribute when no presigned URL" do + encoder = Folio::CraMediaCloud::Encoder.new + + file_mock = Struct.new(:file_name, :file_size, :file_uid, :id).new( + "video.mp4", 123456, "uploads/video.mp4", 1 + ) + + manifest_xml = encoder.send( + :build_ingest_manifest, + file_mock, + md5: "abc123def456", + ref_id: "test-ref-001", + profile_group: "VoD", + presigned_url: nil + ) + + assert_includes manifest_xml, 'file="video.mp4"' + assert_not_includes manifest_xml, "src=" + end + + # --- upload_file --- + + test "upload_file builds manifest and uploads it via SFTP, returns result hash" do + encoder = Folio::CraMediaCloud::Encoder.new + + file_mock = Struct.new(:file_name, :file_size, :file_uid, :id, :slug).new( + "video.mp4", 123456, "uploads/video.mp4", 42, "my-video" + ) + + s3_metadata_mock = Struct.new(:headers).new({ "ETag" => '"abcd1234"' }) + fake_presigned_url = "https://s3.amazonaws.com/bucket/video.mp4?X-Amz-Expires=604800" + + uploaded_path = nil + uploaded_xml = nil + fake_sftp = Object.new + fake_sftp.define_singleton_method(:upload!) do |source, dest| + uploaded_path = dest + uploaded_xml = source.read + end + + encoder.define_singleton_method(:with_robust_sftp_session) { |&blk| blk.call(fake_sftp) } + + result = encoder.stub(:s3_dragonfly_head_object, s3_metadata_mock) do + encoder.stub(:generate_presigned_url, fake_presigned_url) do + encoder.upload_file(file_mock, reference_id: "test-ref-001") + end + end + + assert_equal "test-ref-001", result[:ref_id] + assert_equal "/ingest/regular/test-ref-001_manifest.xml", result[:xml_manifest_path] + assert result[:presigned_url], "presigned_url flag should be truthy" + assert_equal "/ingest/regular/test-ref-001_manifest.xml", uploaded_path + assert_includes uploaded_xml, "test-ref-001" + assert_includes uploaded_xml, fake_presigned_url + end + + test "upload_file uses provided reference_id in SFTP path" do + encoder = Folio::CraMediaCloud::Encoder.new + + file_mock = Struct.new(:file_name, :file_size, :file_uid, :id, :slug).new( + "video.mp4", 100, "uploads/video.mp4", 1, "slug" + ) + + s3_metadata_mock = Struct.new(:headers).new({ "ETag" => '"ff00ff00"' }) + uploaded_path = nil + fake_sftp = Object.new + fake_sftp.define_singleton_method(:upload!) { |_src, dest| uploaded_path = dest } + + encoder.define_singleton_method(:with_robust_sftp_session) { |&blk| blk.call(fake_sftp) } + + encoder.stub(:s3_dragonfly_head_object, s3_metadata_mock) do + encoder.stub(:generate_presigned_url, "https://s3.example.com/v.mp4") do + encoder.upload_file(file_mock, reference_id: "custom-ref-xyz") + end + end + + assert_equal "/ingest/regular/custom-ref-xyz_manifest.xml", uploaded_path + end + + # --- upload_with_retry --- + + test "upload_with_retry raises immediately when max_retries is 0" do + encoder = Folio::CraMediaCloud::Encoder.new + + failing_sftp = Object.new + failing_sftp.define_singleton_method(:upload!) { |_, _| raise "network error" } + + err = assert_raises(RuntimeError) do + encoder.send(:upload_with_retry, failing_sftp, StringIO.new("data"), "/dest/manifest.xml", max_retries: 0) + end + assert_match "network error", err.message + end + + test "upload_with_retry retries on transient failure and succeeds on next attempt" do + encoder = Folio::CraMediaCloud::Encoder.new + encoder.define_singleton_method(:sleep) { |_| } # no-op to avoid real sleep in tests + + attempts = 0 + flaky_sftp = Object.new + flaky_sftp.define_singleton_method(:upload!) do |_src, _dest| + attempts += 1 + raise "transient error" if attempts < 2 + end + + encoder.send(:upload_with_retry, flaky_sftp, StringIO.new("data"), "/dest/manifest.xml", max_retries: 1) + + assert_equal 2, attempts + end + + test "upload_with_retry raises after all retries exhausted" do + encoder = Folio::CraMediaCloud::Encoder.new + encoder.define_singleton_method(:sleep) { |_| } + + attempts = 0 + always_fail_sftp = Object.new + always_fail_sftp.define_singleton_method(:upload!) do |_, _| + attempts += 1 + raise "persistent error" + end + + error = assert_raises(RuntimeError) do + encoder.send(:upload_with_retry, always_fail_sftp, StringIO.new("data"), "/dest/manifest.xml", max_retries: 2) + end + assert_match(/persistent error/, error.message) + + assert_equal 3, attempts # 1 initial + 2 retries + end + + # --- with_robust_sftp_session --- + + test "with_robust_sftp_session wraps SSH authentication failure" do + encoder = Folio::CraMediaCloud::Encoder.new + + # ENV vars must be present so Net::SSH.start is actually reached (before the stub fires) + ENV["CRA_MEDIA_CLOUD_SFTP_HOST"] = "sftp.example.com" + ENV["CRA_MEDIA_CLOUD_SFTP_USERNAME"] = "user" + ENV["CRA_MEDIA_CLOUD_SFTP_PASSWORD"] = "pass" + + Net::SSH.stub(:start, ->(*_args, **_kwargs) { raise Net::SSH::AuthenticationFailed, "bad credentials" }) do + err = assert_raises(RuntimeError) do + encoder.send(:with_robust_sftp_session) { |_sftp| } + end + assert_match "SSH authentication failed", err.message + end + ensure + %w[CRA_MEDIA_CLOUD_SFTP_HOST CRA_MEDIA_CLOUD_SFTP_USERNAME CRA_MEDIA_CLOUD_SFTP_PASSWORD].each { |k| ENV.delete(k) } + end + + test "with_robust_sftp_session wraps generic SFTP errors" do + encoder = Folio::CraMediaCloud::Encoder.new + + ENV["CRA_MEDIA_CLOUD_SFTP_HOST"] = "sftp.example.com" + ENV["CRA_MEDIA_CLOUD_SFTP_USERNAME"] = "user" + ENV["CRA_MEDIA_CLOUD_SFTP_PASSWORD"] = "pass" + + Net::SSH.stub(:start, ->(*_args, **_kwargs) { raise "connection refused" }) do + err = assert_raises(RuntimeError) do + encoder.send(:with_robust_sftp_session) { |_sftp| } + end + assert_match "SFTP session error", err.message + end + ensure + %w[CRA_MEDIA_CLOUD_SFTP_HOST CRA_MEDIA_CLOUD_SFTP_USERNAME CRA_MEDIA_CLOUD_SFTP_PASSWORD].each { |k| ENV.delete(k) } + end +end diff --git a/test/jobs/folio/cra_media_cloud/monitor_processing_job_test.rb b/test/jobs/folio/cra_media_cloud/monitor_processing_job_test.rb index 0fbcd1225a..79ad591e7c 100644 --- a/test/jobs/folio/cra_media_cloud/monitor_processing_job_test.rb +++ b/test/jobs/folio/cra_media_cloud/monitor_processing_job_test.rb @@ -78,6 +78,125 @@ def eval(*); end # no-op for lock release assert_equal "processing_failed", video.aasm_state end + test "rescues failed video awaiting retry when retry job is lost" do + video = create(:folio_file_video) + video.update!( + aasm_state: :processing_failed, + remote_services_data: { + "service" => "cra_media_cloud", + "retry_count" => 1, + "retry_scheduled_at" => 10.minutes.ago.iso8601, + } + ) + + with_unlocked_monitor_job do + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do + Folio::CraMediaCloud::MonitorProcessingJob.perform_now + end + end + end + + test "does not rescue finally failed video (retry_count >= 2)" do + video = create(:folio_file_video) + video.update!( + aasm_state: :processing_failed, + remote_services_data: { + "service" => "cra_media_cloud", + "retry_count" => 2, + } + ) + + with_unlocked_monitor_job do + assert_no_enqueued_jobs only: Folio::CraMediaCloud::CreateMediaJob do + Folio::CraMediaCloud::MonitorProcessingJob.perform_now + end + end + end + + test "triggers process! for stuck unprocessed video with file_uid" do + video = create(:folio_file_video) + video.update_columns( + aasm_state: "unprocessed", + file_uid: "2026/03/09/13/20/26/test-uuid/test.mp4", + created_at: 10.minutes.ago + ) + + with_unlocked_monitor_job do + Folio::CraMediaCloud::MonitorProcessingJob.perform_now + end + + video.reload + assert_not_equal "unprocessed", video.aasm_state, "Video should no longer be unprocessed after safety net" + end + + test "does not trigger process! for recently created unprocessed video" do + video = create(:folio_file_video) + video.update_columns( + aasm_state: "unprocessed", + file_uid: "2026/03/09/13/20/26/test-uuid/test.mp4", + created_at: 2.minutes.ago + ) + + with_unlocked_monitor_job do + Folio::CraMediaCloud::MonitorProcessingJob.perform_now + end + + video.reload + assert_equal "unprocessed", video.aasm_state + end + + test "does not trigger process! for unprocessed video without file_uid" do + video = create(:folio_file_video) + video.update_columns( + aasm_state: "unprocessed", + file_uid: nil, + created_at: 10.minutes.ago + ) + + with_unlocked_monitor_job do + Folio::CraMediaCloud::MonitorProcessingJob.perform_now + end + + video.reload + assert_equal "unprocessed", video.aasm_state + end + + test "rescues video stuck in enqueued state for over 10 minutes" do + video = create(:folio_file_video) + video.update!( + aasm_state: :processing, + remote_services_data: { + "service" => "cra_media_cloud", + "processing_state" => "enqueued", + "processing_step_started_at" => 15.minutes.ago.iso8601 + } + ) + + with_unlocked_monitor_job do + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do + Folio::CraMediaCloud::MonitorProcessingJob.perform_now + end + end + end + + test "does not rescue freshly enqueued video" do + video = create(:folio_file_video) + video.update!( + aasm_state: :processing, + remote_services_data: { + "service" => "cra_media_cloud", + "processing_state" => "enqueued", + "processing_step_started_at" => 3.minutes.ago.iso8601 + } + ) + + with_unlocked_monitor_job do + assert_no_enqueued_jobs only: Folio::CraMediaCloud::CreateMediaJob do + Folio::CraMediaCloud::MonitorProcessingJob.perform_now + end + end + end + test "upload_is_stuck? returns false for small file within timeout" do video = create(:folio_file_video, file_size: 10.megabytes) upload_started_at = 2.minutes.ago @@ -152,4 +271,149 @@ def eval(*); end # no-op for lock release # Should use base timeout of 5 minutes assert_equal false, result end + + # --- handle_failed_uploads_needing_retry --- + + test "schedules CreateMediaJob for upload_failed video older than 5 minutes" do + video = create(:folio_file_video) + video.update!( + aasm_state: :processing, + remote_services_data: { + "service" => "cra_media_cloud", + "processing_state" => "upload_failed", + "processing_step_started_at" => 10.minutes.ago.iso8601 + } + ) + + job = Folio::CraMediaCloud::MonitorProcessingJob.new + + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do + job.send(:handle_failed_uploads_needing_retry) + end + end + + test "schedules CreateMediaJob for encoding_failed video older than 5 minutes" do + video = create(:folio_file_video) + video.update!( + aasm_state: :processing, + remote_services_data: { + "service" => "cra_media_cloud", + "processing_state" => "encoding_failed", + "processing_step_started_at" => 10.minutes.ago.iso8601 + } + ) + + job = Folio::CraMediaCloud::MonitorProcessingJob.new + + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do + job.send(:handle_failed_uploads_needing_retry) + end + end + + test "does not retry upload_failed video within 5 minutes" do + video = create(:folio_file_video) + video.update!( + aasm_state: :processing, + remote_services_data: { + "service" => "cra_media_cloud", + "processing_state" => "upload_failed", + "processing_step_started_at" => 3.minutes.ago.iso8601 + } + ) + + job = Folio::CraMediaCloud::MonitorProcessingJob.new + + assert_no_enqueued_jobs only: Folio::CraMediaCloud::CreateMediaJob do + job.send(:handle_failed_uploads_needing_retry) + end + end + + # --- reconcile_video_state --- + + test "reconcile_video_state schedules CheckProgressJob and updates remote_id when API finds active job" do + video = create(:folio_file_video) + video.update!( + aasm_state: :processing, + remote_services_data: { + "service" => "cra_media_cloud", + "processing_state" => "full_media_processing", + "reference_id" => "REF456", + "encoding_generation" => 99 + } + ) + + active_job = { "id" => "JOB_ACTIVE", "status" => "PROCESSING", "lastModified" => Time.current.iso8601 } + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, [active_job], [], ref_id: "REF456") + + job = Folio::CraMediaCloud::MonitorProcessingJob.new + + Folio::CraMediaCloud::Api.stub(:new, api_mock) do + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do + job.send(:reconcile_video_state, video) + end + end + + api_mock.verify + video.reload + assert_equal "JOB_ACTIVE", video.remote_services_data["remote_id"] + end + + test "reconcile_video_state clears reference_id and processing_state when API finds no jobs" do + video = create(:folio_file_video) + video.update!( + aasm_state: :processing, + remote_services_data: { + "service" => "cra_media_cloud", + "processing_state" => "full_media_processing", + "reference_id" => "REF456" + } + ) + + api_mock = Minitest::Mock.new + api_mock.expect(:get_jobs, [], [], ref_id: "REF456") + + job = Folio::CraMediaCloud::MonitorProcessingJob.new + + Folio::CraMediaCloud::Api.stub(:new, api_mock) do + assert_enqueued_jobs 0 do + job.send(:reconcile_video_state, video) + end + end + + api_mock.verify + video.reload + assert_nil video.remote_services_data["reference_id"] + assert_nil video.remote_services_data["processing_state"] + end + + # --- reconcile_with_remote_jobs: all-REMOVED path --- + + test "reconcile_with_remote_jobs schedules CheckProgressJob when all CRA jobs are REMOVED" do + video = create(:folio_file_video) + rs_data = { + "service" => "cra_media_cloud", + "processing_state" => "full_media_processing", + "reference_id" => "REF123", + "encoding_generation" => 42, + "phase_1_completed_at" => 5.minutes.ago.iso8601, + "phase_1_content_mp4_paths" => { "sd0" => "/video/sd0.mp4" }, + } + video.update_column(:remote_services_data, rs_data) + + removed_jobs = [ + { "id" => "JOB1", "status" => "REMOVED", "phase" => 1, "lastModified" => 2.minutes.ago.iso8601 }, + { "id" => "JOB2", "status" => "REMOVED", "phase" => 2, "lastModified" => 1.minute.ago.iso8601 }, + ] + + job = Folio::CraMediaCloud::MonitorProcessingJob.new + + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CheckProgressJob do + job.send(:reconcile_with_remote_jobs, video, rs_data, removed_jobs) + end + + # Should NOT update processing_state — CheckProgressJob handles finalization + video.reload + assert_equal "full_media_processing", video.remote_services_data["processing_state"] + end end diff --git a/test/jobs/folio/file/get_video_metadata_job_test.rb b/test/jobs/folio/file/get_video_metadata_job_test.rb new file mode 100644 index 0000000000..00b67c7e12 --- /dev/null +++ b/test/jobs/folio/file/get_video_metadata_job_test.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require "test_helper" + +class Folio::File::GetVideoMetadataJobTest < ActiveJob::TestCase + test "extracts duration, width, height from local video file" do + file_path = Folio::Engine.root.join("test/fixtures/folio/blank.mp4").to_s + result = Folio::File::GetVideoMetadataJob.perform_now(file_path) + + assert result.is_a?(Hash) + assert result[:duration].is_a?(Integer) + assert result[:duration] > 0 + assert result[:width].is_a?(Integer) + assert result[:width] > 0 + assert result[:height].is_a?(Integer) + assert result[:height] > 0 + end + + test "returns nil values gracefully for invalid path" do + result = Folio::File::GetVideoMetadataJob.perform_now("/nonexistent/file.mp4") + + assert result.is_a?(Hash) + assert_nil result[:duration] + assert_nil result[:width] + assert_nil result[:height] + end +end diff --git a/test/jobs/folio/s3/create_file_job_test.rb b/test/jobs/folio/s3/create_file_job_test.rb new file mode 100644 index 0000000000..a6b6b580ae --- /dev/null +++ b/test/jobs/folio/s3/create_file_job_test.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +require "test_helper" + +class Folio::S3::CreateFileJobTest < ActiveJob::TestCase + test "video upload falls back to download flow for local file system" do + # In test env with FileDataStore, video upload should use the standard download path + # (S3 copy path is only for actual S3 storage) + s3_path = "test_video.mp4" + + # Create a temp file simulating S3 uploaded file + source_path = "#{Folio::S3::Client::LOCAL_TEST_PATH}/#{s3_path}" + FileUtils.mkdir_p(File.dirname(source_path)) + fixture_path = Folio::Engine.root.join("test/fixtures/folio/blank.mp4").to_s + FileUtils.cp(fixture_path, source_path) + + site = get_any_site + + Folio::S3::CreateFileJob.perform_now( + s3_path: s3_path, + type: "Folio::File::Video", + attributes: { site_id: site.id } + ) + + # File should be created successfully via download path + created_video = Folio::File::Video.last + assert created_video.present?, "Video should be created" + assert created_video.file_uid.present?, "Video should have file_uid" + assert created_video.file_name.present?, "Video should have file_name" + ensure + FileUtils.rm_f(source_path) if source_path + end + + test "video upload uses S3 server-side copy when on real S3 storage" do + site = get_any_site + s3_path = "uploads/test_video.mp4" + + fake_head = Struct.new(:content_length, :content_type).new(5_000_000, "video/mp4") + copy_source_key = nil + + job = Folio::S3::CreateFileJob.new + job.define_singleton_method(:use_local_file_system?) { false } + + # Stub before_validation :set_video_file_dimensions — it calls file_url_or_path which tries + # to fetch the Dragonfly-generated UID from FileDataStore (no actual file exists there). + # Provide fake dimensions so file_width/file_height validations pass. + Folio::File::Video.define_method(:set_video_file_dimensions) do + self.file_width = 1280 + self.file_height = 720 + self.file_track_duration = 0 + end + + job.stub(:test_aware_s3_exists?, true) do + job.stub(:s3_copy_object, ->(source_key:, dest_key:) { copy_source_key = source_key }) do + job.stub(:s3_head_object, fake_head) do + job.stub(:test_aware_s3_delete, nil) do + job.perform(s3_path: s3_path, type: "Folio::File::Video", attributes: { site_id: site.id }) + end + end + end + end + + created_video = Folio::File::Video.last + assert created_video.present?, "Video should be created" + assert created_video.file_uid.present?, "Video should have a Dragonfly UID" + assert_equal "test_video.mp4", created_video.file_name + assert_equal 5_000_000, created_video.file_size + assert_equal "video/mp4", created_video.file_mime_type + assert_includes copy_source_key, s3_path, "s3_copy_object should have been called with the source path" + ensure + Folio::File::Video.remove_method(:set_video_file_dimensions) + end +end diff --git a/test/lib/folio/cra_media_cloud/encoder_test.rb b/test/lib/folio/cra_media_cloud/encoder_test.rb new file mode 100644 index 0000000000..1926c70bbf --- /dev/null +++ b/test/lib/folio/cra_media_cloud/encoder_test.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +require "test_helper" + +class Folio::CraMediaCloud::EncoderTest < ActiveSupport::TestCase + setup do + @encoder = Folio::CraMediaCloud::Encoder.new + @file = OpenStruct.new( + id: 42, + file_name: "test_video.mp4", + file_size: "123456", + ) + @defaults = { md5: "abc123", ref_id: "42-1234567890", profile_group: "VoD" } + end + + test "build_ingest_manifest includes processingPhases attribute when processing_phases is 2" do + xml = @encoder.send(:build_ingest_manifest, @file, **@defaults, processing_phases: 2) + + assert_includes xml, 'processingPhases="2"' + assert_includes xml, " id, + "refId" => ref_id, + "status" => status, + "profileGroup" => profile_group, + "lastModified" => last_modified, + "messages" => [], + "output" => [], + } + end + + test "returns latest job by lastModified" do + jobs = [ + make_job(id: 1, ref_id: "abc-123", status: "FAILED", last_modified: "2026-01-01T00:00:00Z"), + make_job(id: 2, ref_id: "abc-123", status: "DONE", last_modified: "2026-01-02T00:00:00Z"), + ] + result = Folio::CraMediaCloud::JobResolver.resolve(jobs) + assert_equal :done, result[:status] + assert_equal 2, result[:job]["id"] + end + + test "returns :not_found for empty jobs" do + result = Folio::CraMediaCloud::JobResolver.resolve([]) + assert_equal :not_found, result[:status] + assert_nil result[:job] + end + + test "maps CRA statuses correctly" do + { "PROCESSING" => :processing, "CREATED" => :processing, + "DONE" => :done, "FAILED" => :failed, "ERROR" => :failed, + "REMOVED" => :not_found }.each do |cra_status, expected| + jobs = [make_job(id: 1, ref_id: "x", status: cra_status)] + result = Folio::CraMediaCloud::JobResolver.resolve(jobs) + assert_equal expected, result[:status], "Expected #{expected} for CRA status #{cra_status}" + end + end +end diff --git a/test/models/concerns/folio/media_file_processing_base_test.rb b/test/models/concerns/folio/media_file_processing_base_test.rb index eeda860996..f15bbe852f 100644 --- a/test/models/concerns/folio/media_file_processing_base_test.rb +++ b/test/models/concerns/folio/media_file_processing_base_test.rb @@ -58,4 +58,54 @@ class TestVideoFile < Folio::File::Video video.reload assert_equal original_generation, video.encoding_generation end + + test "encoding_generation is set even when model has validation errors" do + video = TestVideoFile.new(site: get_any_site) + video.file = Folio::Engine.root.join("test/fixtures/folio/blank.mp4") + video.dont_run_after_save_jobs = true + + expect_method_called_on(object: video, method: :create_full_media) do + video.save! + end + + # Simulate a video that would fail validation (e.g. ffprobe failed, dimensions missing) + video.update_columns(file_width: nil, file_height: nil) + video.reload + + assert_not video.valid?, "video should be invalid without dimensions" + + # process_attached_file uses update_columns, so it should succeed despite invalid model + freeze_time = Time.current + travel_to freeze_time do + video.send(:update_remote_services_data, { + "processing_step_started_at" => Time.current, + "encoding_generation" => freeze_time.to_i + }) + end + + video.reload + assert_equal freeze_time.to_i, video.encoding_generation + end + + test "create_full_media preserves encoding_generation" do + video = TestVideoFile.new(site: get_any_site) + video.file = Folio::Engine.root.join("test/fixtures/folio/blank.mp4") + video.dont_run_after_save_jobs = true + video.save! + + # Set encoding_generation like process_attached_file does + video.update_columns(aasm_state: "processing") + video.send(:update_remote_services_data, { + "processing_step_started_at" => Time.current, + "encoding_generation" => 12345 + }) + + # create_full_media should merge in service/state without losing encoding_generation + video.create_full_media + + video.reload + assert_equal 12345, video.encoding_generation + assert_equal "cra_media_cloud", video.remote_services_data["service"] + assert_equal "enqueued", video.remote_services_data["processing_state"] + end end diff --git a/test/models/folio/file/cra_media_cloud_file_processing_test.rb b/test/models/folio/file/cra_media_cloud_file_processing_test.rb new file mode 100644 index 0000000000..88fab6f2d6 --- /dev/null +++ b/test/models/folio/file/cra_media_cloud_file_processing_test.rb @@ -0,0 +1,127 @@ +# frozen_string_literal: true + +require "test_helper" + +# Integration tests for AASM state machine + Folio::CraMediaCloud::FileProcessing concern. +# Verifies that the full state machine works correctly when the CRA concern is included. +class Folio::File::CraMediaCloudFileProcessingTest < ActiveJob::TestCase + class TestVideoFile < Folio::File::Video + include Folio::CraMediaCloud::FileProcessing + end + + # --- process! triggers CRA encoding --- + + test "process! transitions to processing and enqueues CreateMediaJob" do + video = build_saved_video + # after_commit :process! fires during build_saved_video's save!, leaving state = "processing". + # Reset to unprocessed so we can test a clean process! transition. + video.update_column(:aasm_state, "unprocessed") + + video.stub(:regenerate_thumbnails, nil) do + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::CreateMediaJob do + video.process! + end + end + + video.reload + assert_equal "processing", video.aasm_state + assert_equal "cra_media_cloud", video.remote_services_data["service"] + assert_equal "enqueued", video.remote_services_data["processing_state"] + assert video.remote_services_data["encoding_generation"].present?, + "encoding_generation must be set so CheckProgressJob can detect stale jobs" + end + + test "process_attached_file sets a new encoding_generation each time" do + video = build_saved_video + # Reset state (after_commit :process! fired during save!, leaving state = "processing") + video.update_column(:aasm_state, "unprocessed") + video.update!(remote_services_data: { "encoding_generation" => 999 }) + + video.stub(:regenerate_thumbnails, nil) do + video.process! + end + + video.reload + assert_not_equal 999, video.remote_services_data["encoding_generation"], + "encoding_generation should change on re-encode" + end + + # --- AASM state transitions --- + + test "processing_done! transitions processing to ready" do + video = build_saved_video + video.update_column(:aasm_state, "processing") + + video.processing_done! + + assert_equal "ready", video.reload.aasm_state + end + + test "processing_failed! transitions processing to processing_failed" do + video = build_saved_video + video.update_column(:aasm_state, "processing") + + video.processing_failed! + + assert_equal "processing_failed", video.reload.aasm_state + end + + test "retry_processing! transitions processing_failed back to processing" do + video = build_saved_video + video.update_column(:aasm_state, "processing_failed") + + video.retry_processing! + + assert_equal "processing", video.reload.aasm_state + end + + # --- destroy_attached_file enqueues DeleteMediaJob --- + + test "destroy_attached_file enqueues DeleteMediaJob when remote_id is present" do + video = build_saved_video + video.update!(remote_services_data: { + "remote_id" => "JOB123", + "reference_id" => "REF456" + }) + + assert_enqueued_jobs 1, only: Folio::CraMediaCloud::DeleteMediaJob do + video.destroy_attached_file + end + end + + test "destroy_attached_file does nothing when no remote_id or reference_id" do + video = build_saved_video + video.update!(remote_services_data: {}) + + assert_no_enqueued_jobs only: Folio::CraMediaCloud::DeleteMediaJob do + video.destroy_attached_file + end + end + + # --- video_poster_url interface --- + + test "video_poster_url returns nil for Folio::File::Video with no provider concern" do + plain_video_class = Class.new(Folio::File::Video) + assert_nil plain_video_class.new.video_poster_url + end + + test "video_poster_url delegates to remote_cover_url in CRA concern" do + video = build_saved_video + video.stub(:remote_cover_url, "https://cdn.example.com/cover.jpg") do + assert_equal "https://cdn.example.com/cover.jpg", video.video_poster_url + end + end + + private + def build_saved_video + video = TestVideoFile.new(site: get_any_site) + video.file = Folio::Engine.root.join("test/fixtures/folio/blank.mp4") + video.dont_run_after_save_jobs = true + + expect_method_called_on(object: video, method: :create_full_media) do + video.save! + end + + video + end +end diff --git a/test/models/folio/file_test.rb b/test/models/folio/file_test.rb index 2ef95e2f36..a687e91397 100644 --- a/test/models/folio/file_test.rb +++ b/test/models/folio/file_test.rb @@ -345,6 +345,15 @@ def f_file.process_attached_file # hacking method to check if it is called end end +class Folio::FileUrlOrPathTest < ActiveSupport::TestCase + test "file_url_or_path returns local path for FileDataStore" do + video = create(:folio_file_video) + result = video.file_url_or_path + assert result.is_a?(String) + assert_not result.start_with?("http"), "Expected local path, got URL: #{result}" + end +end + class Folio::FileImageMetadataKeywordsTest < ActiveSupport::TestCase include ActiveJob::TestHelper