diff --git a/src/docs/routes/experimental/camera-control-h264.tsx b/src/docs/routes/experimental/camera-control-h264.tsx index bd4ccfd..0681e0e 100644 --- a/src/docs/routes/experimental/camera-control-h264.tsx +++ b/src/docs/routes/experimental/camera-control-h264.tsx @@ -1,4 +1,4 @@ -import { useState } from 'react'; +import { useState, useMemo } from 'react'; import { createFileRoute } from '@tanstack/react-router' import { PageHeader } from "../../components/page-header" import { CameraControl } from '../../../ui/experimental/camera-control' @@ -7,6 +7,7 @@ import { TypographyH1 } from '../../../ui/elements/typography' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "../../../ui/layout/card" import { DemoContainer } from "@/docs/components/demo-container" +import { h264FetchApi } from '@/ui/experimental/h264-fetch'; export const Route = createFileRoute('/experimental/camera-control-h264')({ component: CameraControlWebsocketH264Page, @@ -17,9 +18,10 @@ export const Route = createFileRoute('/experimental/camera-control-h264')({ function CameraControlWebsocketH264Demo() { const [mousePos, setMousePos] = useState<{ x: number, y: number, intensity: number } | null>(null); const [clickPos, setClickPos] = useState<{ x: number, y: number, intensity: number } | null>(null); + const api = useMemo(() => h264FetchApi("localhost:9999"), []); return ( -
- +
+ void; - onClick?: (pos: { x: number; y: number; intensity: number }) => void; - showIntensity?: boolean; -} - -export const CameraControl: React.FC = ({ - className, - onMousePositionChange, - onClick, - showIntensity = false, -}) => { - const source = useContext(ImageContext); - const canvasRef = useRef(null); - const [popupPos, setPopupPos] = useState<{ x: number; y: number } | null>(null); - const [pixelValue, setPixelValue] = useState(null); - const [cursorPosition, setCursorPosition] = useState<{ x: number; y: number } | null>(null); - const [frameCount, setFrameCount] = useState(0); - const [startTime, setStartTime] = useState(performance.now()); - - // Helper to get dimensions safely - const getDimensions = () => { - if (!source) return { w: 0, h: 0 }; - if (typeof source === "object" && "video" in source && source.video) { - return { w: source.video.videoWidth, h: source.video.videoHeight }; - } - if (source instanceof ImageBitmap) { - return { w: source.width, h: source.height }; - } - return { w: 0, h: 0 }; - }; - - // Draw source when updated - useEffect(() => { - if (!source || !canvasRef.current) return; - - const canvas = canvasRef.current; - const ctx = canvas.getContext("2d"); - if (!ctx) return; - - const { w, h } = getDimensions(); - if (w === 0 || h === 0) return; - - canvas.width = w; - canvas.height = h; - - try { - if (typeof source === "object" && "video" in source && source.video) { - ctx.drawImage(source.video, 0, 0); - } else if (source instanceof ImageBitmap) { - ctx.drawImage(source, 0, 0); - } - } catch (err) { - console.warn("Draw failed:", err); - return; - } - - // FPS tracking - setFrameCount((prev) => prev + 1); - const now = performance.now(); - if (now - startTime >= 1000) { - console.log(`FPS: ${frameCount}`); - setFrameCount(0); - setStartTime(now); - } - }, [source instanceof ImageBitmap ? source : (source as any)?.frameId]); - - // Throttle mouse move - let lastMove = 0; - const handleMouseMove = (e: React.MouseEvent) => { - const now = performance.now(); - if (now - lastMove < 50) return; - lastMove = now; - - const info = getMousePixelInfo(e); - setPopupPos({ x: e.clientX, y: e.clientY }); - setPixelValue(showIntensity ? `intensity: ${info.intensity}` : `rgba(${info.pixel[0]}, ${info.pixel[1]}, ${info.pixel[2]}, ${info.pixel[3] / 255})`); - onMousePositionChange?.({ x: info.x, y: info.y, intensity: info.intensity }); - }; - - const getMousePixelInfo = (e: React.MouseEvent) => { - const canvas = canvasRef.current; - if (!canvas || canvas.width === 0 || canvas.height === 0) return { x: 0, y: 0, intensity: 0, pixel: [0, 0, 0, 0] }; - - const rect = canvas.getBoundingClientRect(); - const x = Math.floor(e.clientX - rect.left); - const y = Math.floor(e.clientY - rect.top); - - const ctx = canvas.getContext("2d"); - if (!ctx) return { x, y, intensity: 0, pixel: [0, 0, 0, 0] }; - - try { - const pixel = ctx.getImageData(x, y, 1, 1).data; - const intensity = Math.round((pixel[0] + pixel[1] + pixel[2]) / 3); - return { x, y, intensity, pixel }; - } catch { - return { x, y, intensity: 0, pixel: [0, 0, 0, 0] }; - } - }; - - const handleMouseLeave = () => { - setPopupPos(null); - setPixelValue(null); - onMousePositionChange?.(null); - }; - - const handleMouseClick = (e: React.MouseEvent) => { - const info = getMousePixelInfo(e); - onClick?.({ x: info.x, y: info.y, intensity: info.intensity }); - if (e.ctrlKey) { - setCursorPosition({ x: info.x, y: info.y }); - e.preventDefault(); - e.stopPropagation(); - } - }; - - const { w, h } = getDimensions(); - - return ( -
- - {/* Crosshair */} - {cursorPosition && canvasRef.current && ( -
-
-
-
- )} - {/* Popup */} - {popupPos && ( -
- {pixelValue} -
- )} -
- ); -}; \ No newline at end of file diff --git a/src/ui/experimental/camera-control/camera-control.tsx b/src/ui/experimental/camera-control/camera-control.tsx new file mode 100644 index 0000000..8400f3a --- /dev/null +++ b/src/ui/experimental/camera-control/camera-control.tsx @@ -0,0 +1,415 @@ +import React, { + useEffect, + useRef, + useState, + useContext, + useCallback, +} from "react"; +import { cn } from "../../../lib/utils"; +import { ImageContext, isVideo } from "./image-context"; + +function debounceResize( + fn: (entry: ResizeObserverEntry) => void, + delay: number = 100 +) { + let timer: ReturnType | undefined; + + const debounced = (entry: ResizeObserverEntry) => { + if (timer) clearTimeout(timer); + timer = setTimeout(() => fn(entry), delay); + }; + + debounced.cancel = () => { + if (timer) clearTimeout(timer); + timer = undefined; + }; + + return debounced; +} + +type BoundingBox = { + startX: number; + startY: number; + width: number; + height: number; +}; + +export interface CameraControlProps { + className?: string; + onMousePositionChange?: ( + pos: { x: number; y: number; intensity: number } | null, + ) => void; + onClick?: (pos: { x: number; y: number; intensity: number }) => void; + showIntensity?: boolean; + onZoom?: (box: BoundingBox) => void; + sizeFollowsImage?: boolean; +} + +/** + * Returns a webcomponent that provides a container (canvas) for various video or image + * producers, and implements various controls and annotations. + * @param onMousePositionChange - Callback called on mouse movement inside canvas. + * @param onClick - Callback called on a mouse click within canvas. + * @param showIntensity - Whether to show the pixel intensity at mouse position as a tooltip. + * @param onZoom - Callback called after a bounding box is drawn. + * @param sizeFollowsImage - Resize the canvas if the image size changes + * @returns + */ +export const CameraControl: React.FC = ({ + className, + onMousePositionChange, + onClick, + showIntensity = false, + onZoom, + sizeFollowsImage = false, +}) => { + const { image, reportSize, reportZoom, reportDrag, clearZoom } = + useContext(ImageContext); + const canvasRef = useRef(null); + const [popupPos, setPopupPos] = useState<{ x: number; y: number } | null>( + null, + ); + const [pixelValue, setPixelValue] = useState(null); + const [cursorPosition, setCursorPosition] = useState<{ + x: number; + y: number; + } | null>(null); + const [zoomBox, setZoomBox] = useState(null); + const [dragStart, setDragStart] = useState<{ + startX: number; + startY: number; + lastX: number; + lastY: number; + } | null>(null); + const [spaceHeld, setSpaceHeld] = useState(false); + const [cursorDisplay, setCursorDisplay] = useState("crosshair"); + const [frameCount, setFrameCount] = useState(0); + const [startTime, setStartTime] = useState(performance.now()); + + // Helper to get dimensions safely + const getDimensions = () => { + if (isVideo(image)) { + return { w: image.video.videoWidth, h: image.video.videoHeight }; + } + if (image instanceof ImageBitmap) { + return { w: image.width, h: image.height }; + } + return { w: 0, h: 0 }; + }; + + useEffect(() => { + const canvas = canvasRef.current; + if (!canvas || !reportSize) return; + + const handler = debounceResize((entry) => { + const { width, height } = entry.contentRect; + // Set canvas to size of canvas? + canvas.width = width; + canvas.height = height; + + // Report the size back to the provider + reportSize(Math.floor(width), Math.floor(height)); + }); + + const observer = new ResizeObserver((entries) => handler(entries[0])); + + observer.observe(canvas); + return () => observer.disconnect(); + }, [reportSize]); + + // Draw source when updated + useEffect(() => { + if (!image || !canvasRef.current) return; + + const canvas = canvasRef.current; + const ctx = canvas.getContext("2d"); + if (!ctx) return; + + if (sizeFollowsImage) { + const { w, h } = getDimensions(); + if (w === 0 || h === 0) return; + canvas.width = w; + canvas.height = h; + } + + try { + if (isVideo(image)) { + ctx.drawImage(image.video, 0, 0); + } else if (image instanceof ImageBitmap) { + ctx.drawImage(image, 0, 0); + } + if (zoomBox) { + ctx.strokeStyle = "yellow"; // Set bounding box color + ctx.lineWidth = 2; // Set line width + ctx.strokeRect( + zoomBox.startX, + zoomBox.startY, + zoomBox.width, + zoomBox.height, + ); + } + } catch (err) { + console.warn("Draw failed:", err); + return; + } + + // FPS tracking + setFrameCount((prev) => prev + 1); + const now = performance.now(); + if (now - startTime >= 1000) { + // console.log(`FPS: ${frameCount}`); + setFrameCount(0); + setStartTime(now); + } + }, [image instanceof ImageBitmap ? image : (image as any)?.frameId]); + + // Throttle mouse move + let lastMove = 0; + const handleMouseMove = (e: React.MouseEvent) => { + if (dragStart) { + const deltaX = e.clientX - dragStart.lastX; + const deltaY = e.clientY - dragStart.lastY; + setDragStart({ ...dragStart, lastX: e.clientX, lastY: e.clientY }); + reportDrag( + e.clientX - dragStart.startX, + e.clientY - dragStart.startY, + deltaX, + deltaY, + true, + ); + return; + } + + const now = performance.now(); + if (now - lastMove < 50) return; + lastMove = now; + + const info = getMousePixelInfo(e); + + if (zoomBox) { + setZoomBox({ + ...zoomBox, + width: info.x - zoomBox.startX, + height: info.y - zoomBox.startY, + }); + } + + setPopupPos({ x: e.clientX, y: e.clientY }); + setPixelValue( + showIntensity + ? `intensity: ${info.intensity}` + : `rgba(${info.pixel[0]}, ${info.pixel[1]}, ${info.pixel[2]}, ${info.pixel[3] / 255})`, + ); + onMousePositionChange?.({ + x: info.x, + y: info.y, + intensity: info.intensity, + }); + }; + + const getMousePixelInfo = (e: React.MouseEvent) => { + const canvas = canvasRef.current; + if (!canvas || canvas.width === 0 || canvas.height === 0) + return { x: 0, y: 0, intensity: 0, pixel: [0, 0, 0, 0] }; + + const rect = canvas.getBoundingClientRect(); + const x = Math.floor(e.clientX - rect.left); + const y = Math.floor(e.clientY - rect.top); + + const ctx = canvas.getContext("2d"); + + if (!ctx) return { x, y, intensity: 0, pixel: [0, 0, 0, 0] }; + + try { + const pixel = ctx.getImageData(x, y, 1, 1).data; + const intensity = Math.round((pixel[0] + pixel[1] + pixel[2]) / 3); + return { x, y, intensity, pixel }; + } catch { + return { x, y, intensity: 0, pixel: [0, 0, 0, 0] }; + } + }; + + const handleMouseLeave = () => { + setPopupPos(null); + setPixelValue(null); + onMousePositionChange?.(null); + }; + + const handleMouseClick = (e: React.MouseEvent) => { + const info = getMousePixelInfo(e); + + onClick?.({ x: info.x, y: info.y, intensity: info.intensity }); + if (e.ctrlKey) { + setCursorPosition({ x: info.x, y: info.y }); + e.preventDefault(); + e.stopPropagation(); + } + }; + + const handleMouseDown = useCallback( + (e: React.MouseEvent) => { + if (spaceHeld) { + setCursorDisplay("grabbing"); + setDragStart({ + startX: e.clientX, + startY: e.clientY, + lastX: e.clientX, + lastY: e.clientY, + }); + } else { + const info = getMousePixelInfo(e); + setZoomBox({ startX: info.x, startY: info.y, width: 0, height: 0 }); + } + }, + [spaceHeld], + ); + + const handleMouseUp = useCallback( + (e: React.MouseEvent) => { + if (dragStart) { + if (spaceHeld) { + setCursorDisplay("grab"); + } else { + setCursorDisplay("crosshair"); + } + reportDrag( + e.clientX - dragStart.startX, + e.clientY - dragStart.startY, + 0, + 0, + false, + ); + setDragStart(null); + } + if (zoomBox) { + onZoom?.(zoomBox); + reportZoom( + zoomBox.startX, + zoomBox.startY, + zoomBox.width, + zoomBox.height, + ); + } + setZoomBox(null); + }, + [dragStart, spaceHeld, zoomBox, onZoom, reportDrag, reportZoom], + ); + + const handleDoubleClick = (e: React.MouseEvent) => { + clearZoom(); + }; + + const handleKeyDown = useCallback( + (e: React.KeyboardEvent) => { + if (e.code === "Space") { + if (!spaceHeld) { + setCursorDisplay("grab"); + setSpaceHeld(true); + } + e.preventDefault(); + } + }, + [spaceHeld], + ); + + const handleKeyUp = (e: React.KeyboardEvent) => { + if (e.code === "Space") { + setSpaceHeld(false); + if (!dragStart) { + setCursorDisplay("crosshair"); + } + e.preventDefault(); + } + }; + + const handleMouseEnter = (e: React.MouseEvent) => { + e.currentTarget.focus(); + }; + + return ( +
+ + {/* Crosshair */} + {cursorPosition && canvasRef.current && ( +
+
+
+
+ )} + {/* Popup */} + {popupPos && ( +
+ {pixelValue} +
+ )} +
+ ); +}; diff --git a/src/ui/experimental/camera-control/h264-api.tsx b/src/ui/experimental/camera-control/h264-api.tsx new file mode 100644 index 0000000..ab95d29 --- /dev/null +++ b/src/ui/experimental/camera-control/h264-api.tsx @@ -0,0 +1,58 @@ +export type Crop = { x: number; y: number; width: number; height: number }; +export type Resolution = { width: number; height: number }; +export type SessionResolution = Resolution & { + paddingWidth: number; + paddingHeight: number; +}; + +type DefaultResolutionType = { + width: number, + height: number +} + +export const DefaultResolution: DefaultResolutionType = { + width: 1024, + height: 1024 +}; + +export interface H264Api { + /** Create a session if needed. Return the session ID. */ + createSession: (signal?: AbortSignal) => Promise; + + /** Get source resolution - this only works if there is an active encoder */ + getSourceResolution: () => Promise; + + /** Get session resolution */ + getSessionResolution: ( + sessionId: string, + signal?: AbortSignal, + ) => Promise; + + /** Resolution of the view (stream == display by design). */ + setResolution: ( + sessionId: string, + width: number, + height: number, + signal?: AbortSignal, + ) => Promise; + + /** Get current crop box. */ + getCrop: (sessionId: string, signal?: AbortSignal) => Promise; + + /** Set crop box. */ + setCrop: ( + sessionId: string, + crop: Crop, + signal?: AbortSignal, + ) => Promise; + + /** Clear crop. */ + clearCrop: (sessionId: string, signal?: AbortSignal) => Promise; + + /** Optional: customize WebSocket construction (auth headers, subprotocols, polyfills). */ + wsFactory: (sessionId: string) => WebSocket; + + /** Optional: build absolute HTTP/WS URLs if you don’t want the component to concatenate strings. */ + buildHttpUrl?: (path: string) => string; // e.g., p => `${base}${p}` + buildWsUrl?: (path: string) => string; // e.g., p => `${wssBase}${p}` +} diff --git a/src/ui/experimental/camera-control/h264-fetch.tsx b/src/ui/experimental/camera-control/h264-fetch.tsx new file mode 100644 index 0000000..58e3c68 --- /dev/null +++ b/src/ui/experimental/camera-control/h264-fetch.tsx @@ -0,0 +1,132 @@ +import { type Crop, type H264Api, DefaultResolution } from "./h264-api"; + +export function h264FetchApi(url: string): H264Api { + return { + async createSession(signal) { + const res = await fetch("http://" + url + "/api/sessions", { + method: "POST", + headers: { "Content-Type": "application/json" }, + signal: signal, + body: JSON.stringify({ + colour_mapping: "none", + crop: null, + resolution: { + width: DefaultResolution.width, + height: DefaultResolution.height, + }, + }), + }); + if (!res.ok) + throw new Error( + `Failed to create session: ${res.status} ${res.statusText}`, + ); + + const body = await res.json(); + const sid = body.id as string; + if (!sid) throw new Error("Server did not return session_id"); + return String(sid); + }, + async getSourceResolution(signal?: AbortSignal) { + const res = await fetch("http://" + url + "/api/resolution", { + method: "GET", + signal: signal, + }); + if (!res.ok) + throw new Error( + `Failed to get resolution. Is there an encoder running?`, + ); + const data = await res.json(); + return { + width: data.source_width, + height: data.source_height, + }; + }, + async getSessionResolution(sessionId: string, signal?: AbortSignal) { + const res = await fetch("http://" + url + "/api/sessions/" + sessionId + "/resolution", { + method: "GET", + signal: signal, + }); + if (!res.ok) + throw new Error( + `Failed to get resolution.`, + ); + const data = await res.json(); + return { + width: data.source_width, + height: data.source_height, + paddingWidth: data.padding_width, + paddingHeight: data.padding_height, + }; + }, + async setResolution( + sessionId: string, + width: number, + height: number, + signal?: AbortSignal, + ) { + const res = await fetch( + "http://" + url + "/api/sessions/" + sessionId + "/resolution", + { + method: "POST", + signal: signal, + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ width, height }), + }, + ); + if (!res.ok) + throw new Error( + `Failed to set resolution: ${res.status} ${res.statusText}`, + ); + return; + }, + async getCrop(sessionId: string, signal?: AbortSignal) { + const crop_response = await fetch( + "http://" + url + "/api/sessions/" + sessionId + "/crop", + { + method: "GET", + signal: signal, + }, + ); + if (!crop_response.ok) + throw new Error( + `Failed to get current crop: ${crop_response.status} ${crop_response.statusText}`, + ); + const crop: Crop = await crop_response.json(); + return crop; + }, + async setCrop(sessionID: string, crop: Crop, signal?: AbortSignal) { + const res = await fetch( + "http://" + url + "/api/sessions/" + sessionID + "/crop", + { + method: "POST", + signal: signal, + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(crop), + }, + ); + if (!res.ok) + throw new Error(`Failed to set crop: ${res.status} ${res.statusText}`); + return; + }, + async clearCrop(sessionId: string, signal?: AbortSignal) { + const res = await fetch( + "http://" + url + "/api/sessions/" + sessionId + "/crop", + { + method: "DELETE", + signal: signal, + }, + ); + if (!res.ok) + throw new Error( + `Failed to clear crop: ${res.status} ${res.statusText}`, + ); + }, + wsFactory(sessionId) { + return new WebSocket("ws://" + url + "/ws?session_id=" + sessionId); + }, + }; +} diff --git a/src/ui/experimental/camera-control/image-context.tsx b/src/ui/experimental/camera-control/image-context.tsx new file mode 100644 index 0000000..1020ab0 --- /dev/null +++ b/src/ui/experimental/camera-control/image-context.tsx @@ -0,0 +1,23 @@ +import { createContext } from "react"; + +export type ImageSource = { + image: { video: HTMLVideoElement | null; frameId: number } | ImageBitmap | null; + reportSize: (width: number, height: number) => void; + reportZoom: (startX: number, startY: number, width: number, height: number) => void; + reportDrag: (totalX: number, totalY: number, deltaX: number, deltaY: number, active: boolean) => void; + clearZoom: () => void; +} +export const ImageContext = createContext({ + image: null, + reportSize: () => { }, + reportZoom: () => { }, + reportDrag: () => { }, + clearZoom: () => { } +}); + +export type VideoFrame = { video: HTMLVideoElement; frameId: number }; +export const isVideo = (image: unknown): image is VideoFrame => + typeof image === "object" && + image !== null && + "video" in image && + !!(image as Record).video; \ No newline at end of file diff --git a/src/ui/experimental/video-provider.tsx b/src/ui/experimental/camera-control/video-provider.tsx similarity index 100% rename from src/ui/experimental/video-provider.tsx rename to src/ui/experimental/camera-control/video-provider.tsx diff --git a/src/ui/experimental/camera-control/websocket-h264-provider.tsx b/src/ui/experimental/camera-control/websocket-h264-provider.tsx new file mode 100644 index 0000000..6167aa0 --- /dev/null +++ b/src/ui/experimental/camera-control/websocket-h264-provider.tsx @@ -0,0 +1,555 @@ +import React, { + useState, + useEffect, + useMemo, + useCallback, + useRef, +} from "react"; + +import { ImageContext } from "./image-context"; + +import { type H264Api, DefaultResolution } from "./h264-api"; + +export interface WebsocketH264ProviderProps { + children: React.ReactNode; + sessionId?: string; + onSessionCreated?: (sessionId: string) => void; + api: H264Api; +} + +/** + * A provider to be used with the CameraControl Webcomponent. This provider + * receives streams from the h264-websocket-stream server and maps camera control + * controls to approriate endpoints to control the stream. + * @param api - An API the satisfies the H264Api Interface for + * communicating with h264 websocket stream server. + * @returns + */ +export const WebsocketH264Provider: React.FC = ({ + children, + sessionId = null, + onSessionCreated = null, + api, +}) => { + // ================== + // State + // ================== + const [imageBitmap, setImageBitmap] = useState(null); + const [sourceWidth, setSourceWidth] = useState(DefaultResolution.width); + const [sourceHeight, setSourceHeight] = useState(DefaultResolution.height); + const [currentWidth, setCurrentWidth] = useState(DefaultResolution.width); + const [currentHeight, setCurrentHeight] = useState(DefaultResolution.height); + const [currentCropHeight, setCurrentCropHeight] = useState(DefaultResolution.height); + const [currentCropWidth, setCurrentCropWidth] = useState(DefaultResolution.width); + const [currentCropStartX, setCurrentCropStartX] = useState(0); + const [currentCropStartY, setCurrentCropStartY] = useState(0); + const [paddingWidth, setPaddingWidth] = useState(0); + const [paddingHeight, setPaddingHeight] = useState(0); + + // ================== + // Refs + // ================== + const wsRef = useRef(null); + const dimsRef = useRef<{ width: number; height: number }>({ + width: DefaultResolution.width, + height: DefaultResolution.height, + }); + const abortedRef = useRef(false); + const configuringRef = useRef(false); + const configuredRef = useRef(false); + const decoderRef = useRef(null); + const reconnectTimerRef = useRef(null); + const spsRef = useRef(null); + const ppsRef = useRef(null); + const nextTsRef = useRef(0); + const sidRef = useRef(sessionId ?? null); + const lastConfigRef = useRef<{ width: number; height: number } | null>(null); + + // Internal resolved session ID. We mirror the prop; if null, we create and then set it here. + const [resolvedSessionId, setResolvedSessionId] = useState( + sessionId, + ); + + useEffect(() => { + setResolvedSessionId(sessionId ?? null); + }, [sessionId]); + + // ================== + // Helper Functions + // ================== + const frameDurationUs = Math.round(1_000_000 / 50); + + const splitAnnexB = (buf: ArrayBuffer): Uint8Array[] => { + const b = new Uint8Array(buf), + out: Uint8Array[] = []; + let i = 0; + const isStart = (i: number) => + (i + 3 < b.length && b[i] === 0 && b[i + 1] === 0 && b[i + 2] === 1) || + (i + 4 < b.length && + b[i] === 0 && + b[i + 1] === 0 && + b[i + 2] === 0 && + b[i + 3] === 1); + const consumeStart = (i: number) => (b[i + 2] === 1 ? i + 3 : i + 4); + + while (i < b.length - 3 && !isStart(i)) i++; + if (i >= b.length - 3) return out; + i = consumeStart(i); + let start = i; + while (i < b.length) { + if (isStart(i)) { + out.push(b.subarray(start, i)); + i = consumeStart(i); + start = i; + } else i++; + } + if (start < b.length) out.push(b.subarray(start)); + return out; + }; + + const nalType = (nal: Uint8Array) => nal[0] & 0x1f; + const isKeyframe = (nals: Uint8Array[]) => nals.some((n) => nalType(n) === 5); + + const buildAvcC = (spsNal: Uint8Array, ppsNal: Uint8Array): Uint8Array => { + const spsLen = spsNal.length, + ppsLen = ppsNal.length; + const avcc = new Uint8Array(7 + 2 + spsLen + 1 + 2 + ppsLen); + let o = 0; + avcc[o++] = 1; + avcc[o++] = spsNal[1]; + avcc[o++] = spsNal[2]; + avcc[o++] = spsNal[3]; + avcc[o++] = 0xff; + avcc[o++] = 0xe1; + avcc[o++] = (spsLen >>> 8) & 0xff; + avcc[o++] = spsLen & 0xff; + avcc.set(spsNal, o); + o += spsLen; + avcc[o++] = 1; + avcc[o++] = (ppsLen >>> 8) & 0xff; + avcc[o++] = ppsLen & 0xff; + avcc.set(ppsNal, o); + o += ppsLen; + return avcc; + }; + + const codecFromSps = (spsNal: Uint8Array): string => { + const hex = (n: number) => n.toString(16).toUpperCase().padStart(2, "0"); + return `avc1.${hex(spsNal[1])}${hex(spsNal[2])}${hex(spsNal[3])}`; + }; + + const ensureDecoder = () => { + if (decoderRef.current && decoderRef.current.state !== "closed") return; + + decoderRef.current = new VideoDecoder({ + output: async (frame) => { + const bitmap = await createImageBitmap(frame); + setImageBitmap(bitmap); + frame.close(); + }, + error: (e) => console.error("Decoder error:", e), + }); + }; + + const tryConfigure = async (): Promise => { + if (abortedRef.current) return false; + if (configuringRef.current) return false; + if (configuredRef.current || !spsRef.current || !ppsRef.current) + return false; + + configuringRef.current = true; + try { + ensureDecoder(); + + const description = buildAvcC(spsRef.current, ppsRef.current); + const codec = codecFromSps(spsRef.current); + const { width: codedWidth, height: codedHeight } = dimsRef.current; + + const config: VideoDecoderConfig = { + codec, + codedWidth: codedWidth, + codedHeight: codedHeight, + description: description, + }; + + const support = await VideoDecoder.isConfigSupported(config).catch( + () => null, + ); + if (!support?.supported) { + console.warn("Unsupported config:", config); + return false; + } + + if (abortedRef.current) return false; + + const dec = decoderRef.current; + if (!dec || dec.state === "closed") return false; + + if (dec.state === "configured") dec.reset(); + dec.configure(config); + configuredRef.current = true; + return true; + } catch (e) { + console.warn("tryConfigure failed:", e); + return false; + } finally { + configuringRef.current = false; + } + }; + + const feedChunk = (nals: Uint8Array[]) => { + let total = 0; + for (const n of nals) total += 4 + n.length; + const payload = new Uint8Array(total); + let o = 0; + for (const n of nals) { + const L = n.length; + payload[o++] = (L >>> 24) & 0xff; + payload[o++] = (L >>> 16) & 0xff; + payload[o++] = (L >>> 8) & 0xff; + payload[o++] = L & 0xff; + payload.set(n, o); + o += L; + } + + const chunk = new EncodedVideoChunk({ + type: isKeyframe(nals) ? "key" : "delta", + timestamp: nextTsRef.current, + data: payload, + }); + nextTsRef.current += frameDurationUs; + + const dec = decoderRef.current; + if (!dec || dec.state === "closed") return; + try { + dec.decode(chunk); + } catch { + /**/ + } + }; + + const onAccessUnit = (buf: ArrayBuffer) => { + const nals = splitAnnexB(buf); + if (!nals.length) return; + + for (const n of nals) { + const t = nalType(n); + if (t === 7) spsRef.current = n.slice(); + else if (t === 8) ppsRef.current = n.slice(); + } + + if (!configuredRef.current && spsRef.current && ppsRef.current) { + console.log("Reconfigure"); + void tryConfigure().then((ok) => { + if (ok) feedChunk(nals); + }); + } else if (configuredRef.current) feedChunk(nals); + }; + + useEffect(() => { + if (!("VideoDecoder" in window)) { + console.error("WebCodecs VideoDecoder is not supported in this browser."); + return; + } + + const aborter = new AbortController(); + abortedRef.current = false; + + let ws: WebSocket; + spsRef.current = null; + ppsRef.current = null; + nextTsRef.current = 0; + + const connect = (sid: string) => { + ws = api.wsFactory(sid); + + wsRef.current = ws; + ws.binaryType = "arraybuffer"; + + ws.onopen = () => { + console.log("Connected"); + configuredRef.current = false; + }; + + ws.onmessage = (ev) => { + if (typeof ev.data === "string") { + try { + const meta = JSON.parse(ev.data); + if (meta.type === "config") { + console.log(meta); + setCurrentWidth(meta.width); + setCurrentHeight(meta.height); + setSourceWidth(meta.source_width); + setSourceHeight(meta.source_height); + setPaddingWidth(meta.padding_width); + setPaddingHeight(meta.padding_height); + setCurrentCropWidth(meta.crop_width); + setCurrentCropHeight(meta.crop_height); + setCurrentCropStartX(meta.crop_x); + setCurrentCropStartY(meta.crop_y); + + const last = lastConfigRef.current; + const changed = + !last || + last.width !== meta.width || + last.height !== meta.height; + + if (changed) { + lastConfigRef.current = { + width: meta.width, + height: meta.height, + }; + dimsRef.current = { width: meta.width, height: meta.height }; + configuredRef.current = false; // force reconfigure + void tryConfigure(); + } + } + } catch (e) { + console.warn("Failed to parse metadata:", e); + } + } else { + onAccessUnit(ev.data); + } + }; + + ws.onclose = () => { + configuredRef.current = false; + try { + decoderRef.current?.flush().catch(() => {}); + } catch { + /**/ + } + if (!abortedRef.current && sidRef.current) { + reconnectTimerRef.current = window.setTimeout( + () => connect(sidRef.current!), + 3000, + ); + } + }; + + ws.onerror = (e) => { + console.error("WebSocket error:", e); + try { + ws.close(); + } catch { + /**/ + } + }; + }; + + const ensureSessionId = async (): Promise => { + if (sidRef.current) return sidRef.current; + if (resolvedSessionId) { + sidRef.current = resolvedSessionId; + return resolvedSessionId; + } + + const sid = await api.createSession(aborter.signal); + if (!sid) throw new Error("Server did not return session_id"); + + // Inform parent so it can persist/store as it sees fit + onSessionCreated?.(sid); + + // Keep our internal view so we can connect immediately + setResolvedSessionId(sid); + sidRef.current = sid; + return sid; + }; + + (async () => { + try { + const sid = await ensureSessionId(); + if (!abortedRef.current) connect(sid); + } catch (e) { + if ( + !abortedRef.current && + !(e instanceof DOMException && e.name === "AbortError") + ) { + console.error(e); + } + } + })(); + + return () => { + abortedRef.current = true; + aborter.abort(); + + if (reconnectTimerRef.current !== null) { + clearTimeout(reconnectTimerRef.current); + reconnectTimerRef.current = null; + } + try { + wsRef.current?.close(); + } catch { + /**/ + } + try { + decoderRef.current?.close(); + } catch { + /**/ + } + }; + + // `tryConfigure` and `onAccessUnit` read only from refs and are intentionally stable. + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [api]); + + const reportSize = useCallback( + async (width: number, height: number) => { + const sid = sidRef.current; + if (!sid) return; + api.setResolution(sid, width, height); + }, + [api], + ); + + const reportZoom = useCallback( + async (startX: number, startY: number, width: number, height: number) => { + + + const native_aspect = sourceWidth / sourceHeight; + const aspect = currentWidth / currentHeight; + + let x_pad = currentCropStartX; + let y_pad = currentCropStartY; + let scale = 0; + + if (native_aspect >= aspect) { + scale = currentWidth / currentCropWidth; + x_pad = currentCropStartX; + y_pad = currentCropStartY - Math.floor(paddingHeight / (2 * scale)); + } else if (native_aspect < aspect) { + scale = currentHeight / currentCropHeight; + x_pad = currentCropStartX - Math.floor(paddingWidth / (2 * scale)); + y_pad = currentCropStartY; + } + + if (width < 0) { + startX = startX + width; + width = -1 * width; + } + if (height < 0) { + startY = startY + height; + height = -1 * height; + } + + let x = 0, + y = 0; + + let cropWidth = 0, + cropHeight = 0; + + x = x_pad + Math.floor(startX / scale); + y = y_pad + Math.floor(startY / scale); + x = Math.max(x, 0); + y = Math.max(y, 0); + + cropWidth = Math.floor(width / scale); + cropHeight = Math.floor(height / scale); + + if (cropWidth == 0 || cropHeight == 0) { + return; + } + + const sid = sidRef.current; + if (!sid) return; + await api.setCrop(sid, { + x, + y, + width: cropWidth, + height: cropHeight, + }); + }, + [ + api, + sourceWidth, + sourceHeight, + currentWidth, + currentHeight, + paddingWidth, + paddingHeight, + currentCropHeight, + currentCropWidth, + currentCropStartX, + currentCropStartY, + ], + ); + + const reportDrag = useCallback( + async ( + totalX: number, + totalY: number, + deltaX: number, + deltaY: number, + active: boolean, + ) => { + if (!active) { + const sid = sidRef.current; + if (!sid) return; + + const crop_response = await api.getCrop(sid); + + const { + x: currentCropX, + y: currentCropY, + width: currentCropWidth, + height: currentCropHeight, + } = crop_response ?? { + x: 0, + y: 0, + width: sourceWidth, + height: sourceHeight, + }; + + const xScale = currentCropWidth / currentWidth; + const yScale = currentCropHeight / currentHeight; + + const scale = Math.max(xScale, yScale); + + const x = currentCropX - Math.floor(totalX * scale); + const y = currentCropY - Math.floor(totalY * scale); + + console.log({ + currentCropX, + currentCropWidth, + currentWidth, + xScale, + totalX, + shiftX: Math.floor(totalX * xScale), + x, + }); + + await api.setCrop(sid, { + x, + y, + width: currentCropWidth, + height: currentCropHeight, + }); + } + }, + [api, sourceWidth, sourceHeight, currentWidth, currentHeight], + ); + + const clearZoom = useCallback(async () => { + const sid = sidRef.current; + if (!sid) return; + api.clearCrop(sid); + }, [api]); + + const contextValue = useMemo( + () => ({ + image: imageBitmap, + reportSize, + reportZoom, + reportDrag, + clearZoom, + }), + [imageBitmap, reportSize, reportZoom, reportDrag, clearZoom], + ); + + return ( + + {children} + + ); +}; diff --git a/src/ui/experimental/image-context.tsx b/src/ui/experimental/image-context.tsx deleted file mode 100644 index 6f3bf9a..0000000 --- a/src/ui/experimental/image-context.tsx +++ /dev/null @@ -1,4 +0,0 @@ -import { createContext } from "react"; - -export type ImageSource = { video: HTMLVideoElement | null; frameId: number } | ImageBitmap | null; -export const ImageContext = createContext(null); \ No newline at end of file diff --git a/src/ui/experimental/websocket-h264-provider.tsx b/src/ui/experimental/websocket-h264-provider.tsx deleted file mode 100644 index d63dcf8..0000000 --- a/src/ui/experimental/websocket-h264-provider.tsx +++ /dev/null @@ -1,206 +0,0 @@ -import React, { useState, useEffect } from 'react'; - - -import { ImageContext } from './image-context'; - - -export interface WebsocketH264ProviderProps { - children: React.ReactNode, - wsUrl: string; -} - -export const WebsocketH264Provider: React.FC = ({ children, wsUrl }) => { - - const [imageBitmap, setImageBitmap] = useState(null); - const [currentWidth, setCurrentWidth] = useState(1024); - const [currentHeight, setCurrentHeight] = useState(1024); - - useEffect(() => { - let ws: WebSocket; - let decoder: VideoDecoder | null = null; - let configured = false; - let sps: Uint8Array | null = null; - let pps: Uint8Array | null = null; - let nextTs = 0; - const frameDurationUs = Math.round(1_000_000 / 50); - - const splitAnnexB = (buf: ArrayBuffer): Uint8Array[] => { - const b = new Uint8Array(buf), out: Uint8Array[] = []; - let i = 0; - const isStart = (i: number) => - (i + 3 < b.length && b[i] === 0 && b[i + 1] === 0 && b[i + 2] === 1) || - (i + 4 < b.length && b[i] === 0 && b[i + 1] === 0 && b[i + 2] === 0 && b[i + 3] === 1); - const consumeStart = (i: number) => (b[i + 2] === 1 ? i + 3 : i + 4); - - while (i < b.length - 3 && !isStart(i)) i++; - if (i >= b.length - 3) return out; - i = consumeStart(i); let start = i; - while (i < b.length) { - if (isStart(i)) { - out.push(b.subarray(start, i)); - i = consumeStart(i); - start = i; - } else i++; - } - if (start < b.length) out.push(b.subarray(start)); - return out; - }; - - const nalType = (nal: Uint8Array) => nal[0] & 0x1f; - const isKeyframe = (nals: Uint8Array[]) => nals.some(n => nalType(n) === 5); - - const buildAvcC = (spsNal: Uint8Array, ppsNal: Uint8Array): Uint8Array => { - const spsLen = spsNal.length, ppsLen = ppsNal.length; - const avcc = new Uint8Array(7 + 2 + spsLen + 1 + 2 + ppsLen); - let o = 0; - avcc[o++] = 1; - avcc[o++] = spsNal[1]; - avcc[o++] = spsNal[2]; - avcc[o++] = spsNal[3]; - avcc[o++] = 0xFF; - avcc[o++] = 0xE1; - avcc[o++] = (spsLen >>> 8) & 0xff; avcc[o++] = spsLen & 0xff; avcc.set(spsNal, o); o += spsLen; - avcc[o++] = 1; - avcc[o++] = (ppsLen >>> 8) & 0xff; avcc[o++] = ppsLen & 0xff; avcc.set(ppsNal, o); o += ppsLen; - return avcc; - }; - - const codecFromSps = (spsNal: Uint8Array): string => { - const hex = (n: number) => n.toString(16).toUpperCase().padStart(2, '0'); - return `avc1.${hex(spsNal[1])}${hex(spsNal[2])}${hex(spsNal[3])}`; - }; - - const ensureDecoder = () => { - if (decoder) return; - - decoder = new VideoDecoder({ - output: async frame => { - const bitmap = await createImageBitmap(frame); - setImageBitmap(bitmap); - frame.close(); - }, - error: e => console.error("Decoder error:", e) - }); - }; - - const tryConfigure = async (): Promise => { - if (configured || !sps || !pps) return false; - ensureDecoder(); - - const description = buildAvcC(sps, pps); - const codec = codecFromSps(sps); - const config: VideoDecoderConfig = { - codec, - codedWidth: currentWidth, - codedHeight: currentHeight, - description: description.buffer - }; - - const support = await VideoDecoder.isConfigSupported(config).catch(() => null); - if (!support?.supported) { - console.warn("Unsupported config:", config); - return false; - } - - if (decoder!.state === 'configured') decoder!.reset(); - decoder!.configure(config); - configured = true; - return true; - }; - - const feedChunk = (nals: Uint8Array[]) => { - let total = 0; - for (const n of nals) total += 4 + n.length; - const payload = new Uint8Array(total); - let o = 0; - for (const n of nals) { - const L = n.length; - payload[o++] = (L >>> 24) & 0xff; - payload[o++] = (L >>> 16) & 0xff; - payload[o++] = (L >>> 8) & 0xff; - payload[o++] = L & 0xff; - payload.set(n, o); o += L; - } - - const chunk = new EncodedVideoChunk({ - type: isKeyframe(nals) ? 'key' : 'delta', - timestamp: nextTs, - data: payload - }); - nextTs += frameDurationUs; - decoder!.decode(chunk); - }; - - const onAccessUnit = (buf: ArrayBuffer) => { - const nals = splitAnnexB(buf); - if (!nals.length) return; - - for (const n of nals) { - const t = nalType(n); - if (t === 7) sps = n.slice(); - else if (t === 8) pps = n.slice(); - } - - if (!configured && sps && pps) tryConfigure().then(ok => { if (ok) feedChunk(nals); }); - else if (configured) feedChunk(nals); - }; - - const connect = () => { - ws = new WebSocket(wsUrl); - ws.binaryType = 'arraybuffer'; - - ws.onopen = () => { - console.log("Connected"); - configured = false; - }; - - ws.onmessage = ev => { - if (typeof ev.data === 'string') { - try { - const meta = JSON.parse(ev.data); - if (meta.type === 'config') { - console.log("meta.width"); - console.log(meta.width); - console.log("meta.height"); - console.log(meta.height); - setCurrentWidth(meta.width); - setCurrentHeight(meta.height); - configured = false; // force reconfigure - } - } catch (e) { - console.warn("Failed to parse metadata:", e); - } - } else { - - onAccessUnit(ev.data); - } - }; - - ws.onclose = () => { - configured = false; - try { - decoder?.flush().catch(() => { }); - } catch { } - setTimeout(connect, 3000); - }; - - ws.onerror = e => { - console.error('WebSocket error:', e); - try { ws.close(); } catch { } - }; - }; - - connect(); - - return () => { - ws?.close(); - decoder?.close(); - }; - }, [wsUrl]); - - return ( - - {children} - - ) -} \ No newline at end of file