diff --git a/extensions/src/doodlebot/Doodlebot.ts b/extensions/src/doodlebot/Doodlebot.ts index 0f10917b9..b15e7d5af 100644 --- a/extensions/src/doodlebot/Doodlebot.ts +++ b/extensions/src/doodlebot/Doodlebot.ts @@ -8,6 +8,7 @@ import { base64ToInt32Array, deferred, makeWebsocket, Max32Int } from "./utils"; import { LineDetector } from "./LineDetection"; import { calculateArcTime } from "./TimeHelper"; import type { BLEDeviceWithUartService } from "./ble"; +import { saveAudioBufferToWav } from "./SoundUtils"; export type MotorStepRequest = { /** Number of steps for the stepper (+ == forward, - == reverse) */ @@ -287,7 +288,7 @@ export default class Doodlebot { private updateSensor(type: T, value: SensorData[T]) { this.onSensor.emit(type, value); if (type == "distance" && value as number >= 63) { - this.sensorData[type] = 100; + (this.sensorData[type] as number) = 100; } else { this.sensorData[type] = value; } @@ -1119,6 +1120,62 @@ export default class Doodlebot { }; } + async sendAudioFileToChatEndpoint(file, endpoint, voice_id, pitch_value) { + console.log("sending audio file"); + const url = `https://doodlebot.media.mit.edu/${endpoint}?voice=${voice_id}&pitch=${pitch_value}` + const formData = new FormData(); + formData.append("audio_file", file); + const audioURL = URL.createObjectURL(file); + const audio = new Audio(audioURL); + //audio.play(); + + try { + let response; + let uint8array; + + response = await fetch(url, { + method: "POST", + body: formData, + }); + + if (!response.ok) { + const errorText = await response.text(); + console.log("Error response:", errorText); + throw new Error(`HTTP error! status: ${response.status}`); + } + + const textResponse = response.headers.get("text-response"); + console.log("Text Response:", textResponse); + + const blob = await response.blob(); + const audioUrl = URL.createObjectURL(blob); + console.log("Audio URL:", audioUrl); + + const audio = new Audio(audioUrl); + const array = await blob.arrayBuffer(); + uint8array = new Uint8Array(array); + + this.sendAudioData(uint8array); + + + } catch (error) { + console.error("Error sending audio file:", error); + } + } + + + async processAndSendAudio(buffer, endpoint, voice_id, pitch_value) { + try { + const wavBlob = await saveAudioBufferToWav(buffer); + console.log(wavBlob); + const wavFile = new File([wavBlob], "output.wav", { type: "audio/wav" }); + + await this.sendAudioFileToChatEndpoint(wavFile, endpoint, voice_id, pitch_value); + } catch (error) { + console.error("Error processing and sending audio:", error); + } + } + recordAudio(numSeconds = 1) { if (!this.setupAudioStream()) return; @@ -1237,4 +1294,40 @@ export default class Doodlebot { this.pending[type] = undefined; return value; } + + async uploadFile(type: string, blobURL: string) { + const tld = await this.topLevelDomain.promise; + let uploadEndpoint; + if (type == "sound") { + uploadEndpoint = "https://" + tld + "/api/v1/upload/sounds_upload"; + } else { + uploadEndpoint = "https://" + tld + "/api/v1/upload/img_upload"; + } + + try { + const components = blobURL.split("---name---"); + const response1 = await fetch(components[1]); + if (!response1.ok) { + throw new Error(`Failed to fetch Blob from URL: ${blobURL}`); + } + const blob = await response1.blob(); + // Convert Blob to File + const file = new File([blob], components[0], { type: blob.type }); + const formData = new FormData(); + formData.append("file", file); + + const response2 = await fetch(uploadEndpoint, { + method: "POST", + body: formData, + }); + + + if (!response2.ok) { + throw new Error(`Failed to upload file: ${response2.statusText}`); + } + + } catch (error) { + console.error("Error:", error); + } + } } diff --git a/extensions/src/doodlebot/ModelUtils.ts b/extensions/src/doodlebot/ModelUtils.ts new file mode 100644 index 000000000..04895df44 --- /dev/null +++ b/extensions/src/doodlebot/ModelUtils.ts @@ -0,0 +1,212 @@ +import tmPose from '@teachablemachine/pose'; +import tmImage from '@teachablemachine/image'; +import * as speechCommands from '@tensorflow-models/speech-commands'; + + + +export default class TeachableMachine { + latestAudioResults: any; + predictionState; + modelConfidences = {}; + maxConfidence: number = null; + teachableImageModel; + isPredicting: number = 0; + ModelType = { + POSE: 'pose', + IMAGE: 'image', + AUDIO: 'audio', + }; + + constructor() { + this.predictionState = {}; + } + + useModel = async (url: string): Promise<{type: "success" | "error" | "warning", msg: string}> => { + try { + const modelUrl = this.modelArgumentToURL(url); + console.log('Loading model from URL:', modelUrl); + + // Initialize prediction state if needed + this.predictionState[modelUrl] = {}; + + // Load and initialize the model + const { model, type } = await this.initModel(modelUrl); + + this.predictionState[modelUrl].modelType = type; + this.predictionState[modelUrl].model = model; + + // Update the current model reference + this.teachableImageModel = modelUrl; + + return { + type: "success", + msg: "Model loaded successfully" + }; + } catch (e) { + console.error('Error loading model:', e); + this.teachableImageModel = null; + return { + type: "error", + msg: "Failed to load model" + }; + } + } + + + modelArgumentToURL = (modelArg: string) => { + // Convert user-provided model URL/ID to the correct format + const endpointProvidedFromInterface = "https://teachablemachine.withgoogle.com/models/"; + const redirectEndpoint = "https://storage.googleapis.com/tm-model/"; + + return modelArg.startsWith(endpointProvidedFromInterface) + ? modelArg.replace(endpointProvidedFromInterface, redirectEndpoint) + : redirectEndpoint + modelArg + "/"; + } + + initModel = async (modelUrl: string) => { + const avoidCache = `?x=${Date.now()}`; + const modelURL = modelUrl + "model.json" + avoidCache; + const metadataURL = modelUrl + "metadata.json" + avoidCache; + + // First try loading as an image model + try { + const customMobileNet = await tmImage.load(modelURL, metadataURL); + + // Check if it's actually an audio model + if ((customMobileNet as any)._metadata.hasOwnProperty('tfjsSpeechCommandsVersion')) { + const recognizer = await speechCommands.create("BROWSER_FFT", undefined, modelURL, metadataURL); + await recognizer.ensureModelLoaded(); + + // Setup audio listening + await recognizer.listen(async result => { + this.latestAudioResults = result; + }, { + includeSpectrogram: true, + probabilityThreshold: 0.75, + invokeCallbackOnNoiseAndUnknown: true, + overlapFactor: 0.50 + }); + + return { model: recognizer, type: this.ModelType.AUDIO }; + } + // Check if it's a pose model + else if ((customMobileNet as any)._metadata.packageName === "@teachablemachine/pose") { + const customPoseNet = await tmPose.load(modelURL, metadataURL); + return { model: customPoseNet, type: this.ModelType.POSE }; + } + // Otherwise it's an image model + else { + return { model: customMobileNet, type: this.ModelType.IMAGE }; + } + } catch (e) { + console.error("Failed to load model:", e); + throw e; + } + } + + getPredictionFromModel = async (modelUrl: string, frame: ImageBitmap) => { + const { model, modelType } = this.predictionState[modelUrl]; + switch (modelType) { + case this.ModelType.IMAGE: + if (!frame) return null; + return await model.predict(frame); + case this.ModelType.POSE: + if (!frame) return null; + const { pose, posenetOutput } = await model.estimatePose(frame); + return await model.predict(posenetOutput); + case this.ModelType.AUDIO: + if (this.latestAudioResults) { + return model.wordLabels().map((label, i) => ({ + className: label, + probability: this.latestAudioResults.scores[i] + })); + } + return null; + } + } + + private getPredictionStateOrStartPredicting(modelUrl: string) { + if (!modelUrl || !this.predictionState || !this.predictionState[modelUrl]) { + console.warn('No prediction state available for model:', modelUrl); + return null; + } + return this.predictionState[modelUrl]; + } + + model_match(state) { + const modelUrl = this.teachableImageModel; + const className = state; + + const predictionState = this.getPredictionStateOrStartPredicting(modelUrl); + if (!predictionState) { + return false; + } + + const currentMaxClass = predictionState.topClass; + return (currentMaxClass === String(className)); + } + + getModelClasses(): string[] { + if ( + !this.teachableImageModel || + !this.predictionState || + !this.predictionState[this.teachableImageModel] || + !this.predictionState[this.teachableImageModel].hasOwnProperty('model') + ) { + return ["Select a class"]; + } + + if (this.predictionState[this.teachableImageModel].modelType === this.ModelType.AUDIO) { + return this.predictionState[this.teachableImageModel].model.wordLabels(); + } + + return this.predictionState[this.teachableImageModel].model.getClassLabels(); + } + + getModelPrediction() { + const modelUrl = this.teachableImageModel; + const predictionState: { topClass: string } = this.getPredictionStateOrStartPredicting(modelUrl); + if (!predictionState) { + console.error("No prediction state found"); + return ''; + } + return predictionState.topClass; + } + + async predictAllBlocks(frame: ImageBitmap) { + for (let modelUrl in this.predictionState) { + if (!this.predictionState[modelUrl].model) { + console.log('No model found for:', modelUrl); + continue; + } + if (this.teachableImageModel !== modelUrl) { + console.log('Model URL mismatch:', modelUrl); + continue; + } + ++this.isPredicting; + const prediction = await this.predictModel(modelUrl, frame); + this.predictionState[modelUrl].topClass = prediction; + --this.isPredicting; + } + } + + private async predictModel(modelUrl: string, frame: ImageBitmap) { + const predictions = await this.getPredictionFromModel(modelUrl, frame); + if (!predictions) { + return; + } + let maxProbability = 0; + let maxClassName = ""; + for (let i = 0; i < predictions.length; i++) { + const probability = predictions[i].probability.toFixed(2); + const className = predictions[i].className; + this.modelConfidences[className] = probability; + if (probability > maxProbability) { + maxClassName = className; + maxProbability = probability; + } + } + this.maxConfidence = maxProbability; + return maxClassName; + } +} diff --git a/extensions/src/doodlebot/SoundUtils.ts b/extensions/src/doodlebot/SoundUtils.ts new file mode 100644 index 000000000..28eb9b187 --- /dev/null +++ b/extensions/src/doodlebot/SoundUtils.ts @@ -0,0 +1,176 @@ +export const writeString = (view: DataView, offset: number, text: string) => { + for (let i = 0; i < text.length; i++) { + view.setUint8(offset + i, text.charCodeAt(i)); + } + } + +export const saveAudioBufferToWav = async (buffer) => { + function createWavHeader(buffer) { + const numChannels = buffer.numberOfChannels; + const sampleRate = buffer.sampleRate / 4; + const bitsPerSample = 16; // 16-bit PCM + const blockAlign = (numChannels * bitsPerSample) / 8; + const byteRate = sampleRate * blockAlign; + const dataLength = buffer.length * numChannels * 2; // 16-bit PCM = 2 bytes per sample + const header = new ArrayBuffer(44); + const view = new DataView(header); + // "RIFF" chunk descriptor + writeString(view, 0, "RIFF"); + view.setUint32(4, 36 + dataLength, true); // File size - 8 bytes + writeString(view, 8, "WAVE"); + // "fmt " sub-chunk + writeString(view, 12, "fmt "); + view.setUint32(16, 16, true); // Sub-chunk size (16 for PCM) + view.setUint16(20, 1, true); // Audio format (1 = PCM) + view.setUint16(22, numChannels, true); // Number of channels + view.setUint32(24, sampleRate, true); // Sample rate + view.setUint32(28, byteRate, true); // Byte rate + view.setUint16(32, blockAlign, true); // Block align + view.setUint16(34, bitsPerSample, true); // Bits per sample + // "data" sub-chunk + writeString(view, 36, "data"); + view.setUint32(40, dataLength, true); // Data length + console.log("WAV Header:", new Uint8Array(header)); + return header; + } + function writeString(view, offset, string) { + for (let i = 0; i < string.length; i++) { + view.setUint8(offset + i, string.charCodeAt(i)); + } + } + function interleave(buffer) { + const numChannels = buffer.numberOfChannels; + const length = buffer.length * numChannels; + const result = new Float32Array(length); + const channelData = []; + for (let i = 0; i < numChannels; i++) { + channelData.push(buffer.getChannelData(i)); + } + let index = 0; + for (let i = 0; i < buffer.length; i++) { + for (let j = 0; j < numChannels; j++) { + result[index++] = channelData[j][i]; + } + } + console.log("Interleaved data:", result); + return result; + } + function floatTo16BitPCM(output, offset, input) { + for (let i = 0; i < input.length; i++, offset += 2) { + let s = Math.max(-1, Math.min(1, input[i])); // Clamp to [-1, 1] + s = s < 0 ? s * 0x8000 : s * 0x7FFF; // Convert to 16-bit PCM + output.setInt16(offset, s, true); // Little-endian + } + } + const header = createWavHeader(buffer); + const interleaved = interleave(buffer); + const wavBuffer = new ArrayBuffer(header.byteLength + interleaved.length * 2); + const view = new DataView(wavBuffer); + //return this.createAndSaveWAV(interleaved, buffer.sampleRate); + // Write header + new Uint8Array(wavBuffer).set(new Uint8Array(header), 0); + // Write PCM data + floatTo16BitPCM(view, header.byteLength, interleaved); + console.log("Final WAV buffer length:", wavBuffer.byteLength); + console.log("Expected data length:", header.byteLength + interleaved.length * 2); + // Return a Blob + return new Blob([wavBuffer], { type: "audio/wav" }); + } + + export const generateWAV = (interleaved: Float32Array, sampleRate: number): Uint8Array => { + const numChannels = 1; // Mono + const bitsPerSample = 16; + const byteRate = sampleRate * numChannels * (bitsPerSample / 8); + const blockAlign = numChannels * (bitsPerSample / 8); + const dataLength = interleaved.length * (bitsPerSample / 8); + const bufferLength = 44 + dataLength; + const buffer = new ArrayBuffer(bufferLength); + const view = new DataView(buffer); + // RIFF header + writeString(view, 0, "RIFF"); + view.setUint32(4, bufferLength - 8, true); // File size + writeString(view, 8, "WAVE"); + // fmt subchunk + writeString(view, 12, "fmt "); + view.setUint32(16, 16, true); // Subchunk size + view.setUint16(20, 1, true); // PCM format + view.setUint16(22, numChannels, true); // Channels + view.setUint32(24, sampleRate, true); // Sample rate + view.setUint32(28, byteRate, true); // Byte rate + view.setUint16(32, blockAlign, true); // Block align + view.setUint16(34, bitsPerSample, true); // Bits per sample + // data subchunk + writeString(view, 36, "data"); + view.setUint32(40, dataLength, true); + // PCM data + const offset = 44; + for (let i = 0; i < interleaved.length; i++) { + const sample = Math.max(-1, Math.min(1, interleaved[i])); + view.setInt16(offset + i * 2, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true); + } + return new Uint8Array(buffer); + } + + export const createAndSaveWAV = (interleaved, sampleRate) => { + // Step 1: Get interleaved audio data and sample rate + // Step 2: Generate WAV file + const wavData = generateWAV(interleaved, sampleRate); + // Step 3: Save or process the WAV file + // Example: Create a Blob and download the file + const blob = new Blob([wavData], { type: "audio/wav" }); + const url = URL.createObjectURL(blob); + // Create a link to download the file + const a = document.createElement("a"); + a.href = url; + a.download = "output.wav"; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + return blob; + } + + export const blobToBase64 = (blob) => { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onloadend = () => resolve(reader.result); // "data:;base64," + reader.onerror = reject; + reader.readAsDataURL(blob); + }); + } + +export const isValidWavFile = async (file) => { + const arrayBuffer = await file.arrayBuffer(); + const dataView = new DataView(arrayBuffer); + + // Check the "RIFF" chunk descriptor + const riff = String.fromCharCode(...new Uint8Array(arrayBuffer.slice(0, 4))); + if (riff !== "RIFF") { + console.error("Invalid WAV file: Missing RIFF header"); + return false; + } + + // Check the "WAVE" format + const wave = String.fromCharCode(...new Uint8Array(arrayBuffer.slice(8, 12))); + if (wave !== "WAVE") { + console.error("Invalid WAV file: Missing WAVE format"); + return false; + } + + // Check for "fmt " subchunk + const fmt = String.fromCharCode(...new Uint8Array(arrayBuffer.slice(12, 16))); + if (fmt !== "fmt ") { + console.error("Invalid WAV file: Missing fmt subchunk"); + return false; + } + + // Check for "data" subchunk + const dataIndex = arrayBuffer.byteLength - 8; // Approximate location + const dataChunk = String.fromCharCode(...new Uint8Array(arrayBuffer.slice(dataIndex, dataIndex + 4))); + if (dataChunk !== "data") { + console.error("Invalid WAV file: Missing data subchunk"); + return false; + } + + console.log("Valid WAV file"); + return true; + } \ No newline at end of file diff --git a/extensions/src/doodlebot/detection.ts b/extensions/src/doodlebot/detection.ts deleted file mode 100644 index 28dc3c932..000000000 --- a/extensions/src/doodlebot/detection.ts +++ /dev/null @@ -1,161 +0,0 @@ -import type { ReverseMap } from "$common"; -import { - ObjectDetector, - FilesetResolver, - GestureRecognizer, - Detection, - ObjectDetectorResult -} from "@mediapipe/tasks-vision"; - -let objectDetector: ObjectDetector | null = null; - -export const classes = [ - "person", - "bicycle", - "car", - "motorcycle", - "airplane", - "bus", - "train", - "truck", - "boat", - "traffic light", - "fire hydrant", - "stop sign", - "parking meter", - "bench", - "bird", - "cat", - "dog", - "horse", - "sheep", - "cow", - "elephant", - "bear", - "zebra", - "giraffe", - "backpack", - "umbrella", - "handbag", - "tie", - "suitcase", - "frisbee", - "skis", - "snowboard", - "sports ball", - "kite", - "baseball bat", - "baseball glove", - "skateboard", - "surfboard", - "tennis racket", - "bottle", - "wine glass", - "cup", - "fork", - "knife", - "spoon", - "bowl", - "banana", - "apple", - "sandwich", - "orange", - "broccoli", - "carrot", - "hot dog", - "pizza", - "donut", - "cake", - "chair", - "couch", - "potted plant", - "bed", - "dining table", - "toilet", - "tv", - "laptop", - "mouse", - "remote", - "keyboard", - "cell phone", - "microwave", - "oven", - "toaster", - "sink", - "refrigerator", - "book", - "clock", - "vase", - "scissors", - "teddy bear", - "hair drier", - "toothbrush" -] as const; - -const createObjectDetector = async () => { - const vision = await FilesetResolver.forVisionTasks( - "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.3/wasm" - ); - - return ObjectDetector.createFromOptions(vision, { - baseOptions: { - modelAssetPath: `https://storage.googleapis.com/mediapipe-models/object_detector/efficientdet_lite0/float16/1/efficientdet_lite0.tflite`, - delegate: "GPU" // TODO: is this the best setting? - }, - scoreThreshold: 0.5, - runningMode: "IMAGE" - }); -}; - -export const objectDetection = async (image: HTMLImageElement) => { - objectDetector ??= await createObjectDetector(); - return objectDetector.detect(image); -} - -let gestureRecognizer: GestureRecognizer | null = null; - -export const gestures = { - thumbsUp: "Thumb_Up", - thumbsDown: "Thumb_Down", - peace: "Victory", - up: "Pointing_Up", - fist: "Closed_Fist", - love: "ILoveYou", - open: "Open_Palm", -} as const; - -export const categoryByGesture = Object.entries(gestures).reduce( - (acc, [key, value]) => ({ ...acc, [value]: key }), - {} as ReverseMap -); - -export const emojiByGesture = { - "👍": "Thumb_Up", - "👎": "Thumb_Down", - "✌️": "Victory", - "☝️": "Pointing_Up", - "✊": "Closed_Fist", - "👋": "Open_Palm", - "🤟": "ILoveYou", -} as const satisfies Record; - -export const gestureMenuItems = Object.entries(emojiByGesture).map(([text, value]) => ({ text, value })) as Array<{ text: keyof typeof emojiByGesture, value: keyof typeof categoryByGesture }>; - -const createGestureRecognizer = async () => { - const vision = await FilesetResolver.forVisionTasks( - "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@0.10.3/wasm" - ); - return GestureRecognizer.createFromOptions(vision, { - baseOptions: { - modelAssetPath: - "https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task", - delegate: "GPU" - }, - runningMode: "IMAGE" - }); -}; - -export const gestureDetection = async (image: HTMLImageElement | ImageData) => { - gestureRecognizer ??= await createGestureRecognizer(); - return gestureRecognizer.recognize(image); -} \ No newline at end of file diff --git a/extensions/src/doodlebot/index.ts b/extensions/src/doodlebot/index.ts index 289501181..78a7691cd 100644 --- a/extensions/src/doodlebot/index.ts +++ b/extensions/src/doodlebot/index.ts @@ -2,11 +2,10 @@ import { Environment, ExtensionMenuDisplayDetails, extension, block, buttonBlock import { DisplayKey, displayKeys, command, type Command, SensorKey, sensorKeys, units, keyBySensor, sensor } from "./enums"; import Doodlebot from "./Doodlebot"; import EventEmitter from "events"; -import { categoryByGesture } from "./detection"; +import TeachableMachine from "./ModelUtils"; +import { convertSvgUint8ArrayToPng } from "./utils"; //import { createLineDetector } from "./LineDetection"; -import tmPose from '@teachablemachine/pose'; -import tmImage from '@teachablemachine/image'; -import * as speechCommands from '@tensorflow-models/speech-commands'; + import JSZip from 'jszip'; import type { BLEDeviceWithUartService } from "./ble"; @@ -47,6 +46,8 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg doodlebot = new Doodlebot(); connected = false; + teachableMachine; + private indicator: Promise<{ close(): void; }>; private lineDetector: (() => Promise) | null = null; bluetoothEmitter = new EventEmitter(); @@ -54,123 +55,75 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg gestureLoop: ReturnType; objectLoop: ReturnType; - gestureState = { - "Closed_Fist": false, - "Thumb_Up": false, - "Thumb_Down": false, - "Victory": false, - "Pointing_Up": false, - "ILoveYou": false, - "Open_Palm": false - } satisfies Record; - imageStream: HTMLImageElement; videoDrawable: ReturnType; - predictionState = {}; - latestAudioResults: any; - ModelType = { - POSE: 'pose', - IMAGE: 'image', - AUDIO: 'audio', - }; - teachableImageModel; - - lastUpdate: number = null; - maxConfidence: number = null; - modelConfidences = {}; - isPredicting: number = 0; + + INTERVAL = 16; DIMENSIONS = [480, 360]; soundDictionary: {} | { string: string[] }; costumeDictionary: {} | { string: string[] }; + lastUpdate: number = null; + isPredicting; + voice_id: number; pitch_value: number; - blocksRun: number; - - SOCIAL = false; - socialness = 1.0; // Value from 0 to 1, where 1 is always social and 0 is never social - - sleep(ms: number) { - return new Promise(resolve => setTimeout(resolve, ms)); - } - - async blockCounter(utility: BlockUtilityWithID) { - // if (JSON.parse(JSON.stringify(utility.blockID)) == JSON.parse(JSON.stringify(utility.thread.topBlock))) { - // this.blocksRun = 0; - // const r = Math.random(); - // console.log("starting", r); - // if (r < 0.3) { - // await this.speakText("Here I go!"); - // } else if (r < 0.6) { - // await this.speakText("Let's do it!"); - // } - // } - this.blocksRun = this.blocksRun + 1; - } - async init(env: Environment) { - this.blocksRun = 0; this.voice_id = 1; this.pitch_value = 0; this.soundDictionary = {}; this.costumeDictionary = {}; //requestAnimationFrame(() => this.setIndicator("disconnected")); this.openUI("Connect") - this._loop(); env.runtime.on("TARGETS_UPDATE", async () => { await this.setDictionaries(); }) - await this.setDictionaries(); + + // move dictionaries to doodlebot + await this.setDictionaries(); + this.teachableMachine = new TeachableMachine(); + this._loop(); soundFiles = ["File"]; imageFiles = ["File"]; - // try { - // const url = `https://doodlebot.media.mit.edu/settings?voice=1&pitch=0`; - // const response = await fetch(url, { - // method: "POST" - // }); - - // if (!response.ok) { - // const text = await response.text(); - // console.error("Error setting voice/pitch:", text); - // } - // } catch (error) { - // console.error("Failed to update settings:", error); - // } - - // env.runtime.on("PROJECT_RUN_START", async () => { - - // }) - // env.runtime.on("PROJECT_RUN_STOP", async () => { - // if (this.blocksRun > 10) { - // const r = Math.random(); - // console.log("blocks > 10", r); - // if (r < 0.3) { - // await this.speakText("Whew!") - // } else if (r < 0.6) { - // await this.speakText("That was tough!") - // } - // } else { - // const r = Math.random(); - // console.log("blocks < 10", r); - // if (r < 0.2) { - // await this.speakText("Yay!"); - // } else if (r < 0.4) { - // await this.speakText("Yippee!"); - // } else if (r < 0.5) { - // await this.speakText("I did it!"); - // } else if (r < 0.6) { - // await this.speakText("Go Doodlebot!"); - // } - // } - // }) + } + + private _loop() { + setTimeout(this._loop.bind(this), Math.max(this.runtime.currentStepTime, this.INTERVAL)); + const time = Date.now(); + if (this.lastUpdate === null) { + this.lastUpdate = time; + } + if (!this.teachableMachine.isPredicting) { + this.teachableMachine.isPredicting = 0; + } + const offset = time - this.lastUpdate; + + if (offset > this.INTERVAL && this.teachableMachine.isPredicting === 0) { + this.lastUpdate = time; + this.teachableMachine.isPredicting = 0; + this.getImageStreamAndPredict(); + } + } + private async getImageStreamAndPredict() { + try { + const imageStream = this.getImageStream(); + if (!imageStream) { + // console.error("Failed to get image stream"); + return; + } + const imageBitmap = await createImageBitmap(imageStream); + this.teachableMachine.predictAllBlocks(imageBitmap); + } catch (error) { + console.error("Error in getting image stream and predicting:", error); + } } async setDictionaries() { @@ -186,7 +139,7 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg for (const costume of target.sprite.costumes) { let id = "Costume: " + costume.name; if (costume.asset.dataFormat == "svg") { - await this.convertSvgUint8ArrayToPng(costume.asset.data, costume.size[0], costume.size[1]) + await convertSvgUint8ArrayToPng(costume.asset.data, costume.size[0], costume.size[1]) .then((pngBlob: Blob) => { const url = URL.createObjectURL(pngBlob) this.costumeDictionary[target.id][id] = "costume9999.png---name---" + url; @@ -202,48 +155,7 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg } } - async convertSvgUint8ArrayToPng(uint8Array, width, height) { - return new Promise((resolve, reject) => { - // Convert Uint8Array to a string - const svgString = new TextDecoder().decode(uint8Array); - - // Create an SVG Blob - const svgBlob = new Blob([svgString], { type: 'image/svg+xml;charset=utf-8' }); - const url = URL.createObjectURL(svgBlob); - - // Create an Image element - const img = new Image(); - img.onload = () => { - // Create a canvas element - const canvas = document.createElement('canvas'); - canvas.width = width; - canvas.height = height; - const ctx = canvas.getContext('2d'); - - // Draw the image on the canvas - ctx.drawImage(img, 0, 0, width, height); - - // Convert the canvas to PNG data URL - const pngDataUrl = canvas.toDataURL('image/png'); - - // Convert the data URL to a Blob - fetch(pngDataUrl) - .then(res => res.blob()) - .then(blob => { - // Clean up - URL.revokeObjectURL(url); - resolve(blob); - }) - .catch(err => { - URL.revokeObjectURL(url); - reject(err); - }); - }; - - img.onerror = reject; - img.src = url; - }); - } + getCurrentSounds(id): string[] { return (this.soundDictionary && this.soundDictionary[id]) ? Object.keys(this.soundDictionary[id]) : []; @@ -278,16 +190,6 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg }); - try { - if (this.SOCIAL && Math.random() < this.socialness && this.doodlebot) { - await this.doodlebot.display("happy"); - await this.speakText("Hi! I'm Doodlebot! I'm here to help you learn about AI and robotics! If you have any questions, just click on the chat with me block and I'll do my best to help you out!"); - } - } catch (error) { - console.error("Error during welcome message:", error); - // Don't throw the error - we still want the robot to be usable even if the welcome message fails - } - await this.doodlebot.display("happy"); } @@ -308,24 +210,6 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg return this.imageStream; } - // async createVideoStreamDrawable() { - // this.imageStream ??= this.doodlebot?.getImageStream(); - // if (!this.imageStream) { - // console.error("Failed to get image stream"); - // return; - // } - // //console.log("Image stream dimensions:", this.imageStream.width, "x", this.imageStream.height); - // const drawable = this.createDrawable(this.imageStream); - // drawable.setVisible(true); - // const self = this; - // const update = () => { - // drawable.update(this.doodlebot?.getImageStream()); - // requestAnimationFrame(update); - // } - // requestAnimationFrame(update); - // return drawable; - // } - async createVideoStreamDrawable() { this.imageStream ??= this.doodlebot?.getImageStream(); if (!this.imageStream) { @@ -365,41 +249,6 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg this.openUI("Connect"); } - // @block({ - // type: "command", - // text: (value) => `set socialness to ${value}`, - // arg: { - // type: "number", - // defaultValue: 1.0 - // } - // }) - // async setSocialness(value: number) { - // // Ensure value is between 0 and 1 - // this.socialness = Math.max(0, Math.min(1, value)); - - // if (this.SOCIAL && Math.random() < this.socialness) { - // await this.doodlebot?.display("happy"); - // await this.speakText(`I'll be ${Math.round(this.socialness * 100)}% social from now on!`); - // } - // } - - // @block({ - // type: "command", - // text: (seconds) => `chat with me for ${seconds} seconds`, - // arg: { type: "number", defaultValue: 3 } - // }) - // async testChatAPI(seconds: number) { - // await this.handleChatInteraction(seconds, "chat"); - // } - - // @block({ - // type: "command", - // text: (seconds) => `repeat after me for ${seconds} seconds`, - // arg: { type: "number", defaultValue: 3 } - // }) - // async testRepeatAPI(seconds: number) { - // await this.handleChatInteraction(seconds, "repeat_after_me"); - // } @block({ type: "command", @@ -414,47 +263,15 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg this.pitch_value = pitch; } - - // @block({ - // type: "command", - // text: (seconds) => `listen for ${seconds} seconds and repeat`, - // arg: { type: "number", defaultValue: 3 } - // }) - // async repeatAfterMe(seconds: number) { - // // Record the audio - // const { context, buffer } = await this.doodlebot?.recordAudio(seconds); - - // // Convert to WAV format - // const wavBlob = await this.saveAudioBufferToWav(buffer); - // const arrayBuffer = await wavBlob.arrayBuffer(); - - // // Send the audio data directly to the Doodlebot for playback - // await this.doodlebot.sendAudioData(new Uint8Array(arrayBuffer)); - - // // Wait until playback is complete (approximately buffer duration) - // const playbackDuration = buffer.duration * 1000; // convert to milliseconds - // await new Promise(resolve => setTimeout(resolve, playbackDuration)); - // } - @block({ type: "command", text: (text) => `speak ${text}`, arg: { type: "string", defaultValue: "Hello!" } }) async speak(text: string, utility: BlockUtilityWithID) { - await this.blockCounter(utility); await this.speakText(text); } - // @block({ - // type: "command", - // text: (type: DisplayKey) => `display emotion ${type}`, - // arg: { type: "string", options: displayKeys.filter(key => key !== "clear"), defaultValue: "happy" } - // }) - // async setDisplay(display: DisplayKey) { - // await this.doodlebot?.display(display); - // } - @block({ type: "command", text: (text: string) => `display text ${text}`, @@ -513,11 +330,6 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg ] }) async arc(direction: "left" | "right", radius: number, degrees: number, utility: BlockUtilityWithID) { - await this.blockCounter(utility); - if (this.SOCIAL && Math.random() < this.socialness) { - await this.doodlebot?.display("happy"); - await this.speakText(`Driving ${direction} arc with radius ${radius} for ${degrees} degrees`); - } if (direction == "right") degrees *= -1; await this.doodlebot?.motorCommand("arc", radius/2.54, degrees); } @@ -528,11 +340,6 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg arg: { type: "number", defaultValue: 90 } }) async spin(degrees: number, utility: BlockUtilityWithID) { - await this.blockCounter(utility); - if (this.SOCIAL && Math.random() < this.socialness) { - await this.doodlebot?.display("happy"); - await this.speakText(`Spinning ${degrees} degrees`); - } if (degrees === 0) return; await this.doodlebot?.motorCommand("arc", 0, -degrees); } @@ -542,36 +349,9 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg text: "stop driving" }) async stop() { - if (this.SOCIAL && Math.random() < this.socialness) { - await this.doodlebot?.display("disgust"); - await this.speakText("okk, I will stop driving now."); - } await this.doodlebot?.motorCommand("stop"); } - - // @block({ - // type: "command", - // text: "perform line following" - // }) - // async testLine2() { - // if (this.SOCIAL) { - // await this.speakText("Starting line following now!"); - // } - // await this.doodlebot.followLine(); - // } - - // @block({ - // type: "command", - // text: "get predictions" - // }) - // async testLine2() { - // if (this.SOCIAL) { - // await this.speakText("Starting line following now!"); - // } - // await this.doodlebot.followLine(); - // } - @block({ type: "command", text: (direction) => `move pen ${direction}`, @@ -742,7 +522,7 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg async setDisplay(display: DisplayKey | string) { let costumeNames = Object.keys(this.costumeDictionary[this.runtime._editingTarget.id]); if (costumeNames.includes(display)) { - await this.uploadFile("image", this.costumeDictionary[this.runtime._editingTarget.id][display]); + await this.doodlebot.uploadFile("image", this.costumeDictionary[this.runtime._editingTarget.id][display]); await this.setArrays(); await this.doodlebot.displayFile("costume9999.png"); } else if (imageFiles.includes(display)) { @@ -752,6 +532,7 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg } } + // @ts-ignore @block((self) => ({ type: "command", text: (type: DisplayKey | string, seconds: number) => `display ${type} for ${seconds} seconds`, @@ -762,14 +543,14 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg (self.costumeDictionary && self.costumeDictionary[self.runtime._editingTarget.id]) ? Object.keys(self.costumeDictionary[self.runtime._editingTarget.id]) : [] as any[] ).filter((item: string) => item != "costume9999.png") }, defaultValue: "happy" - }, { type: "string", defaultValue: 1 }] + }, { type: "number", defaultValue: 1 }] })) async setDisplayForSeconds(display: DisplayKey | string, seconds: number) { const lastDisplayedKey = this.doodlebot.getLastDisplayedKey(); const lastDisplayedType = this.doodlebot.getLastDisplayedType(); let costumeNames = Object.keys(this.costumeDictionary[this.runtime._editingTarget.id]); if (costumeNames.includes(display)) { - await this.uploadFile("image", this.costumeDictionary[this.runtime._editingTarget.id][display]); + await this.doodlebot.uploadFile("image", this.costumeDictionary[this.runtime._editingTarget.id][display]); await this.setArrays(); await this.doodlebot.displayFile("costume9999.png"); await new Promise(resolve => setTimeout(resolve, seconds * 1000)); @@ -788,15 +569,6 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg } - // @block({ - // type: "command", - // text: (transparency) => `display video with ${transparency}% transparency`, - // arg: { type: "number", defaultValue: 0 } - // }) - // async connectToVideo(transparency: number) { - // this.videoDrawable ??= await this.createVideoStreamDrawable(); - // this.videoDrawable.setTransparency(transparency); - // } @block({ type: "command", @@ -804,164 +576,14 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg }) async connectToVideo() { this.videoDrawable ??= await this.createVideoStreamDrawable(); - if (this.SOCIAL && Math.random() < this.socialness) { - await this.doodlebot?.display("happy"); - await this.speakText("You can now see what I see on your screen!"); - } } - // @block({ - // type: "command", - // text: "start video segmentation", - // }) - // async startVideoSegmentation() { - // const ip = await this.getIP(); - // await this.doodlebot?.callSegmentation(ip); - // } - - // @block({ - // type: "command", - // text: "stop video segmentation", - // }) - // async stopVideoSegmentation() { - // const ip = await this.getIP(); - // await this.doodlebot?.stopSegmentation(ip); - // } - - // @block({ - // type: "hat", - // text: (gesture) => `when ${gesture} detected`, - // arg: { type: "string", defaultValue: "Thumb_Up", options: gestureMenuItems } - // }) - // whenGesture(gesture: keyof typeof this.gestureState) { - // const self = this; - - // this.gestureLoop ??= looper(async () => { - // self.imageStream ??= await self.doodlebot?.getImageStream(); - // const result = await gestureDetection(self.imageStream); - - // for (const k in self.gestureState) self.gestureState[k] = false; - - // for (const arr of result.gestures) - // for (const gesture of arr) - // self.gestureState[gesture.categoryName] = true; - // }, "gesture detection"); - - // return this.gestureState[gesture]; - // } - - // @block({ - // type: "reporter", - // text: (object) => `degrees from ${object}`, - // arg: { type: "string", defaultValue: "cup", options: classes } - // }) - // async getOffsetFromObject(object: typeof classes[number]) { - // this.imageStream ??= await this.doodlebot?.getImageStream(); - // const result = await objectDetection(this.imageStream); - // for (const detection of result.detections) { - // const isCup = detection.categories.some(({ categoryName }) => categoryName === object); - // if (!isCup) continue; - // if (!detection.boundingBox) continue; - // const x = detection.boundingBox.originX + detection.boundingBox.width / 2; - // const xOffset = x - this.imageStream.width / 2; - // return xOffset * 90 / this.imageStream.width; - // } - // return 0; - // } - - // @block({ - // type: "command", - // text: (seconds) => `record for ${seconds} seconds and play`, - // arg: { type: "number", defaultValue: 1 } - // }) - // async recordAudio(seconds: number) { - // const { context, buffer } = await this.doodlebot?.recordAudio(seconds); - - // const audioBufferSource = context.createBufferSource(); - // audioBufferSource.buffer = buffer; - - // const gainNode = context.createGain(); - // audioBufferSource.connect(gainNode); - // gainNode.connect(context.destination); - - // const fadeInDuration = 0.1; - // const fadeOutDuration = 0.1; - // const audioDuration = audioBufferSource.buffer.duration; - - // // Start with silence - // gainNode.gain.setValueAtTime(0, context.currentTime); - // gainNode.gain.linearRampToValueAtTime(1, context.currentTime + fadeInDuration); - - // gainNode.gain.setValueAtTime(1, context.currentTime + audioDuration - fadeOutDuration); - // gainNode.gain.linearRampToValueAtTime(0, context.currentTime + audioDuration); - - // audioBufferSource.start(); - // audioBufferSource.stop(context.currentTime + audioDuration); - - // await new Promise((resolve) => setTimeout(resolve, audioDuration * 1000)); - // } - async setArrays() { imageFiles = await this.doodlebot.findImageFiles(); soundFiles = await this.doodlebot.findSoundFiles(); - console.log("SETTING"); } - async uploadFile(type: string, blobURL: string) { - console.log("BEFORE IP"); - const tld = await this.doodlebot.topLevelDomain.promise; - console.log("GOT IP"); - let uploadEndpoint; - if (type == "sound") { - uploadEndpoint = "https://" + tld + "/api/v1/upload/sounds_upload"; - } else { - uploadEndpoint = "https://" + tld + "/api/v1/upload/img_upload"; - } - - try { - const components = blobURL.split("---name---"); - console.log("COMPONENTS"); - console.log(components); - console.log("BEFORE BLOB"); - const response1 = await fetch(components[1]); - console.log("AFTER BLOB"); - if (!response1.ok) { - throw new Error(`Failed to fetch Blob from URL: ${blobURL}`); - } - const blob = await response1.blob(); - console.log("BEFORE BLOB 2"); - - console.log("AFTER BLOB 2"); - // Convert Blob to File - const file = new File([blob], components[0], { type: blob.type }); - const formData = new FormData(); - formData.append("file", file); - - console.log("file"); - console.log(file); - console.log("BEFORE FETCH"); - const response2 = await fetch(uploadEndpoint, { - method: "POST", - body: formData, - }); - console.log("AFTER FETCH"); - console.log(response2); - - if (!response2.ok) { - throw new Error(`Failed to upload file: ${response2.statusText}`); - } - - console.log("File uploaded successfully"); - this.setArrays(); - } catch (error) { - console.error("Error:", error); - } - } - - async callSinglePredict() { - return await this.doodlebot.callSinglePredict(); - - } + @block({ type: "reporter", @@ -991,11 +613,6 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg return true; } - // @(scratch.command((self, $) => $`Upload sound file ${self.makeCustomArgument({ component: FileArgument, initial: { value: "", text: "File" } })}`)) - // async uploadSoundFile(test: string) { - // await this.uploadFile("sound", test); - // } - @(scratch.button`Upload sound`) uploadSoundUI() { this.openUI("UploadSound"); @@ -1006,11 +623,6 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg this.openUI("UploadImage"); } - // @(scratch.command((self, $) => $`Upload image file ${self.makeCustomArgument({ component: FileArgument, initial: { value: "", text: "File" } })}`)) - // async uploadImageFile(test: string) { - // await this.uploadFile("image", test); - // } - @block({ type: "command", text: (volume) => `set volume to ${volume}`, @@ -1022,665 +634,78 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg } - - - - - // @block({ - // type: "command", - // text: (_command, args, protocol) => `send (${_command}, ${args}) over ${protocol}`, - // args: [ - // { type: "string", defaultValue: "u" }, - // { type: "string", defaultValue: "0" }, - // { type: "string", options: ["BLE", "Websocket"], defaultValue: "BLE" } - // ] - // }) - // async sendMessage(_command: string, args: string, protocol: "BLE" | "Websocket") { - // const candidates = Object.values(command).filter((entry) => entry === _command) - // if (candidates.length === 0) return console.error(`Command ${command} not found`); - - // protocol === "BLE" - // ? await this.doodlebot?.sendBLECommand(candidates[0], ...splitArgsString(args)) - // : await this.doodlebot?.sendWebsocketCommand(candidates[0], ...splitArgsString(args)); - // } - - // @block({ - // type: "command", - // text: (url) => `import AI model ${url}`, - // arg: { - // type: "string", - // defaultValue: "URL HERE" - // } - // }) - // async importModel(url: string) { - // if (this.SOCIAL && Math.random() < this.socialness) { - // await this.doodlebot?.display("happy"); - // await this.speakText(`Importing Teachable Machine model`); - // } - // await this.useModel(url); - - // if (this.SOCIAL && Math.random() < this.socialness) { - // await this.doodlebot?.display("happy"); - // await this.speakText(`Model imported successfully. You can access your image classes using the model prediction blocks.`); - // await this.speakText(`Let me know if you have any questions.`); - // } - // } - - - // @block({ - // type: "hat", - // text: (className) => `when model detects ${className}`, - // arg: { - // type: "string", - // options: function () { - // if (!this) { - // throw new Error('Context is undefined'); - // } - // return this.getModelClasses() || ["Select a class"]; - // }, - // defaultValue: "Select a class" - // } - // }) - // whenModelDetects(className: string) { - // return this.model_match(className); - // } - - // @block({ - // type: "reporter", - // text: "get AI prediction", - // }) - // modelPrediction() { - // return this.getModelPrediction(); - // } - - // @block({ - // type: "reporter", - // text: (className) => `confidence for ${className}`, - // arg: { - // type: "string", - // options: function () { - // if (!this) { - // throw new Error('Context is undefined'); - // } - // return this.getModelClasses() || ["Select a class"]; - // }, - // defaultValue: "Select a class" - // } - // }) - getConfidence(className: string) { - if (!this.modelConfidences || !this.modelConfidences[className]) { - return 0; + @block({ + type: "command", + text: (url) => `import AI model ${url}`, + arg: { + type: "string", + defaultValue: "URL HERE" } - return Math.round(this.modelConfidences[className] * 100); + }) + async importModel(url: string) { + const result = await this.teachableMachine.useModel(url); + await this.indicate(result); } - writeString(view: DataView, offset: number, text: string) { - for (let i = 0; i < text.length; i++) { - view.setUint8(offset + i, text.charCodeAt(i)); - } - } - async saveAudioBufferToWav(buffer) { - function createWavHeader(buffer) { - const numChannels = buffer.numberOfChannels; - const sampleRate = buffer.sampleRate / 4; - const bitsPerSample = 16; // 16-bit PCM - const blockAlign = (numChannels * bitsPerSample) / 8; - const byteRate = sampleRate * blockAlign; - const dataLength = buffer.length * numChannels * 2; // 16-bit PCM = 2 bytes per sample - const header = new ArrayBuffer(44); - const view = new DataView(header); - // "RIFF" chunk descriptor - writeString(view, 0, "RIFF"); - view.setUint32(4, 36 + dataLength, true); // File size - 8 bytes - writeString(view, 8, "WAVE"); - // "fmt " sub-chunk - writeString(view, 12, "fmt "); - view.setUint32(16, 16, true); // Sub-chunk size (16 for PCM) - view.setUint16(20, 1, true); // Audio format (1 = PCM) - view.setUint16(22, numChannels, true); // Number of channels - view.setUint32(24, sampleRate, true); // Sample rate - view.setUint32(28, byteRate, true); // Byte rate - view.setUint16(32, blockAlign, true); // Block align - view.setUint16(34, bitsPerSample, true); // Bits per sample - // "data" sub-chunk - writeString(view, 36, "data"); - view.setUint32(40, dataLength, true); // Data length - console.log("WAV Header:", new Uint8Array(header)); - return header; - } - function writeString(view, offset, string) { - for (let i = 0; i < string.length; i++) { - view.setUint8(offset + i, string.charCodeAt(i)); - } - } - function interleave(buffer) { - const numChannels = buffer.numberOfChannels; - const length = buffer.length * numChannels; - const result = new Float32Array(length); - const channelData = []; - for (let i = 0; i < numChannels; i++) { - channelData.push(buffer.getChannelData(i)); - } - let index = 0; - for (let i = 0; i < buffer.length; i++) { - for (let j = 0; j < numChannels; j++) { - result[index++] = channelData[j][i]; + @block({ + type: "hat", + text: (className) => `when model detects ${className}`, + arg: { + type: "string", + options: function () { + if (!this) { + throw new Error('Context is undefined'); } - } - console.log("Interleaved data:", result); - return result; - } - function floatTo16BitPCM(output, offset, input) { - for (let i = 0; i < input.length; i++, offset += 2) { - let s = Math.max(-1, Math.min(1, input[i])); // Clamp to [-1, 1] - s = s < 0 ? s * 0x8000 : s * 0x7FFF; // Convert to 16-bit PCM - output.setInt16(offset, s, true); // Little-endian - } + return this.teachableMachine.getModelClasses() || ["Select a class"]; + }, + defaultValue: "Select a class" } - const header = createWavHeader(buffer); - const interleaved = interleave(buffer); - const wavBuffer = new ArrayBuffer(header.byteLength + interleaved.length * 2); - const view = new DataView(wavBuffer); - //return this.createAndSaveWAV(interleaved, buffer.sampleRate); - // Write header - new Uint8Array(wavBuffer).set(new Uint8Array(header), 0); - // Write PCM data - floatTo16BitPCM(view, header.byteLength, interleaved); - console.log("Final WAV buffer length:", wavBuffer.byteLength); - console.log("Expected data length:", header.byteLength + interleaved.length * 2); - // Return a Blob - return new Blob([wavBuffer], { type: "audio/wav" }); - } - - generateWAV(interleaved: Float32Array, sampleRate: number): Uint8Array { - const numChannels = 1; // Mono - const bitsPerSample = 16; - const byteRate = sampleRate * numChannels * (bitsPerSample / 8); - const blockAlign = numChannels * (bitsPerSample / 8); - const dataLength = interleaved.length * (bitsPerSample / 8); - const bufferLength = 44 + dataLength; - const buffer = new ArrayBuffer(bufferLength); - const view = new DataView(buffer); - // RIFF header - this.writeString(view, 0, "RIFF"); - view.setUint32(4, bufferLength - 8, true); // File size - this.writeString(view, 8, "WAVE"); - // fmt subchunk - this.writeString(view, 12, "fmt "); - view.setUint32(16, 16, true); // Subchunk size - view.setUint16(20, 1, true); // PCM format - view.setUint16(22, numChannels, true); // Channels - view.setUint32(24, sampleRate, true); // Sample rate - view.setUint32(28, byteRate, true); // Byte rate - view.setUint16(32, blockAlign, true); // Block align - view.setUint16(34, bitsPerSample, true); // Bits per sample - // data subchunk - this.writeString(view, 36, "data"); - view.setUint32(40, dataLength, true); - // PCM data - const offset = 44; - for (let i = 0; i < interleaved.length; i++) { - const sample = Math.max(-1, Math.min(1, interleaved[i])); - view.setInt16(offset + i * 2, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true); - } - return new Uint8Array(buffer); - } - - createAndSaveWAV(interleaved, sampleRate) { - // Step 1: Get interleaved audio data and sample rate - // Step 2: Generate WAV file - const wavData = this.generateWAV(interleaved, sampleRate); - // Step 3: Save or process the WAV file - // Example: Create a Blob and download the file - const blob = new Blob([wavData], { type: "audio/wav" }); - const url = URL.createObjectURL(blob); - // Create a link to download the file - const a = document.createElement("a"); - a.href = url; - a.download = "output.wav"; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - return blob; - } - - blobToBase64(blob) { - return new Promise((resolve, reject) => { - const reader = new FileReader(); - reader.onloadend = () => resolve(reader.result); // "data:;base64," - reader.onerror = reject; - reader.readAsDataURL(blob); - }); - } - - async sendAudioFileToChatEndpoint(file, endpoint, blob, seconds) { - console.log("sending audio file"); - const url = `https://doodlebot.media.mit.edu/${endpoint}?voice=${this.voice_id}&pitch=${this.pitch_value}` - const formData = new FormData(); - formData.append("audio_file", file); - const audioURL = URL.createObjectURL(file); - const audio = new Audio(audioURL); - //audio.play(); - - try { - let response; - let uint8array; - // if (window.isSecureContext) { - - // if (endpoint == "repeat_after_me") { - // const eventSource = new EventSource("http://doodlebot.media.mit.edu/viseme-events"); - - // eventSource.onmessage = (event) => { - // console.log("Received viseme event:", event.data); - // try { - // const data = JSON.parse(event.data); - // const visemeId = data.visemeId; - // const offsetMs = data.offsetMs; - - // // You can customize which viseme IDs should trigger a command. - // // For now, all non-silence visemes trigger it. - // if (visemeId !== 0) { - // setTimeout(() => { - // this.doodlebot.display("happy"); - // console.log("DISPLAYING"); - // }, offsetMs); - // } - // } catch (err) { - // console.error("Failed to parse viseme event:", err); - // } - // }; - - // eventSource.onerror = (err) => { - // console.error("EventSource failed:", err); - // eventSource.close(); - // }; - // } - - response = await fetch(url, { - method: "POST", - body: formData, - }); - - if (!response.ok) { - const errorText = await response.text(); - console.log("Error response:", errorText); - throw new Error(`HTTP error! status: ${response.status}`); - } - - const textResponse = response.headers.get("text-response"); - console.log("Text Response:", textResponse); - - const blob = await response.blob(); - const audioUrl = URL.createObjectURL(blob); - console.log("Audio URL:", audioUrl); - - const audio = new Audio(audioUrl); - const array = await blob.arrayBuffer(); - uint8array = new Uint8Array(array); - // } else { - // const base64 = await this.blobToBase64(blob); - // const payload = { - // filename: file.name, - // content: base64, - // mimeType: blob.type, - // }; - // response = await this.doodlebot.fetch(endpoint, "chatgpt", payload); - // uint8array = new Uint8Array([...atob(response)].map(char => char.charCodeAt(0))); - // } - const interval = 50; // 0.2 seconds in milliseconds - const endTime = Date.now() + 1 * 1000; - - this.doodlebot.sendAudioData(uint8array); - while (Date.now() < endTime) { - await this.doodlebot.sendWebsocketCommand("d,O"); - await new Promise((res) => setTimeout(res, interval)); - await this.doodlebot.sendWebsocketCommand("d,N"); - await new Promise((res) => setTimeout(res, interval)); - } - - - } catch (error) { - console.error("Error sending audio file:", error); - } - } - async isValidWavFile(file) { - const arrayBuffer = await file.arrayBuffer(); - const dataView = new DataView(arrayBuffer); - - // Check the "RIFF" chunk descriptor - const riff = String.fromCharCode(...new Uint8Array(arrayBuffer.slice(0, 4))); - if (riff !== "RIFF") { - console.error("Invalid WAV file: Missing RIFF header"); - return false; - } - - // Check the "WAVE" format - const wave = String.fromCharCode(...new Uint8Array(arrayBuffer.slice(8, 12))); - if (wave !== "WAVE") { - console.error("Invalid WAV file: Missing WAVE format"); - return false; - } - - // Check for "fmt " subchunk - const fmt = String.fromCharCode(...new Uint8Array(arrayBuffer.slice(12, 16))); - if (fmt !== "fmt ") { - console.error("Invalid WAV file: Missing fmt subchunk"); - return false; - } - - // Check for "data" subchunk - const dataIndex = arrayBuffer.byteLength - 8; // Approximate location - const dataChunk = String.fromCharCode(...new Uint8Array(arrayBuffer.slice(dataIndex, dataIndex + 4))); - if (dataChunk !== "data") { - console.error("Invalid WAV file: Missing data subchunk"); - return false; - } - - console.log("Valid WAV file"); - return true; - } - - async processAndSendAudio(buffer, endpoint, seconds) { - try { - const wavBlob = await this.saveAudioBufferToWav(buffer); - console.log(wavBlob); - const wavFile = new File([wavBlob], "output.wav", { type: "audio/wav" }); - - // // Create a temporary URL for the file - // const url = URL.createObjectURL(wavFile); - - // // Create a temporary anchor element - // const a = document.createElement("a"); - // a.href = url; - // a.download = "output.wav"; - // document.body.appendChild(a); - - // // Trigger the download - // a.click(); - - // // Clean up - // document.body.removeChild(a); - // URL.revokeObjectURL(url); - // const isValid = await this.isValidWavFile(wavFile); - // if (!isValid) { - // throw new Error("Generated file is not a valid WAV file"); - // } - await this.sendAudioFileToChatEndpoint(wavFile, endpoint, wavBlob, seconds); - } catch (error) { - console.error("Error processing and sending audio:", error); - } - } - - // Internal method that can be called directly - private async handleChatInteraction(seconds: number, endpoint: string) { - console.log(`recording audio for ${seconds} seconds`); - // Display "listening" while recording - await this.doodlebot?.display("clear"); - await this.doodlebot?.displayText("listening"); - console.log("recording audio?") - const { context, buffer } = await this.doodlebot?.recordAudio(seconds); - console.log("finished recording audio"); - - // Display "thinking" while processing and waiting for response - await this.doodlebot?.display("clear"); - await this.doodlebot?.displayText("thinking"); - - // Before sending audio to be played - await this.processAndSendAudio(buffer, endpoint, seconds); - - // Display "speaking" when ready to speak - // await this.doodlebot?.display("clear"); - // await this.doodlebot?.displayText("speaking"); - - // Wait a moment before clearing the display - await new Promise(resolve => setTimeout(resolve, 2000)); - await this.doodlebot?.display("h"); - } - - async useModel(url: string) { - try { - const modelUrl = this.modelArgumentToURL(url); - console.log('Loading model from URL:', modelUrl); - - // Initialize prediction state if needed - this.predictionState[modelUrl] = {}; - - // Load and initialize the model - const { model, type } = await this.initModel(modelUrl); - this.predictionState[modelUrl].modelType = type; - this.predictionState[modelUrl].model = model; - - // Update the current model reference - this.teachableImageModel = modelUrl; - - await this.indicate({ - type: "success", - msg: "Model loaded successfully" - }); - } catch (e) { - console.error('Error loading model:', e); - this.teachableImageModel = null; - await this.indicate({ - type: "error", - msg: "Failed to load model" - }); - } - } - - modelArgumentToURL(modelArg: string) { - // Convert user-provided model URL/ID to the correct format - const endpointProvidedFromInterface = "https://teachablemachine.withgoogle.com/models/"; - const redirectEndpoint = "https://storage.googleapis.com/tm-model/"; - - return modelArg.startsWith(endpointProvidedFromInterface) - ? modelArg.replace(endpointProvidedFromInterface, redirectEndpoint) - : redirectEndpoint + modelArg + "/"; - } - - async initModel(modelUrl: string) { - const avoidCache = `?x=${Date.now()}`; - const modelURL = modelUrl + "model.json" + avoidCache; - const metadataURL = modelUrl + "metadata.json" + avoidCache; - - // First try loading as an image model - try { - const customMobileNet = await tmImage.load(modelURL, metadataURL); - - // Check if it's actually an audio model - if ((customMobileNet as any)._metadata.hasOwnProperty('tfjsSpeechCommandsVersion')) { - const recognizer = await speechCommands.create("BROWSER_FFT", undefined, modelURL, metadataURL); - await recognizer.ensureModelLoaded(); - - // Setup audio listening - await recognizer.listen(async result => { - this.latestAudioResults = result; - }, { - includeSpectrogram: true, - probabilityThreshold: 0.75, - invokeCallbackOnNoiseAndUnknown: true, - overlapFactor: 0.50 - }); - - return { model: recognizer, type: this.ModelType.AUDIO }; - } - // Check if it's a pose model - else if ((customMobileNet as any)._metadata.packageName === "@teachablemachine/pose") { - const customPoseNet = await tmPose.load(modelURL, metadataURL); - return { model: customPoseNet, type: this.ModelType.POSE }; - } - // Otherwise it's an image model - else { - return { model: customMobileNet, type: this.ModelType.IMAGE }; - } - } catch (e) { - console.error("Failed to load model:", e); - throw e; - } - } - - updateStageModel(modelUrl) { - const stage = this.runtime.getTargetForStage(); - this.teachableImageModel = modelUrl; - if (stage) { - (stage as any).teachableImageModel = modelUrl; - } - } - - private getPredictionStateOrStartPredicting(modelUrl: string) { - if (!modelUrl || !this.predictionState || !this.predictionState[modelUrl]) { - console.warn('No prediction state available for model:', modelUrl); - return null; - } - return this.predictionState[modelUrl]; - } - - model_match(state) { - const modelUrl = this.teachableImageModel; - const className = state; - - const predictionState = this.getPredictionStateOrStartPredicting(modelUrl); - if (!predictionState) { - return false; - } - - const currentMaxClass = predictionState.topClass; - return (currentMaxClass === String(className)); - } - - getModelClasses(): string[] { - if ( - !this.teachableImageModel || - !this.predictionState || - !this.predictionState[this.teachableImageModel] || - !this.predictionState[this.teachableImageModel].hasOwnProperty('model') - ) { - return ["Select a class"]; - } - - if (this.predictionState[this.teachableImageModel].modelType === this.ModelType.AUDIO) { - return this.predictionState[this.teachableImageModel].model.wordLabels(); - } - - return this.predictionState[this.teachableImageModel].model.getClassLabels(); - } - - getModelPrediction() { - const modelUrl = this.teachableImageModel; - const predictionState: { topClass: string } = this.getPredictionStateOrStartPredicting(modelUrl); - if (!predictionState) { - console.error("No prediction state found"); - return ''; - } - return predictionState.topClass; + }) + whenModelDetects(className: string) { + return this.teachableMachine.model_match(className); } - private _loop() { - setTimeout(this._loop.bind(this), Math.max(this.runtime.currentStepTime, this.INTERVAL)); - const time = Date.now(); - if (this.lastUpdate === null) { - this.lastUpdate = time; - } - if (!this.isPredicting) { - this.isPredicting = 0; - } - const offset = time - this.lastUpdate; - - if (offset > this.INTERVAL && this.isPredicting === 0) { - this.lastUpdate = time; - this.isPredicting = 0; - this.getImageStreamAndPredict(); - } + @block({ + type: "reporter", + text: "get AI prediction", + }) + modelPrediction() { + return this.teachableMachine.getModelPrediction(); } - private async getImageStreamAndPredict() { - try { - const imageStream = this.getImageStream(); - if (!imageStream) { - // console.error("Failed to get image stream"); - return; - } - // const imageBitmap = await createImageBitmap(imageStream); - // this.predictAllBlocks(imageBitmap); - } catch (error) { - console.error("Error in getting image stream and predicting:", error); + @block({ + type: "reporter", + text: (className) => `confidence for ${className}`, + arg: { + type: "string", + options: function () { + if (!this) { + throw new Error('Context is undefined'); + } + return this.teachableMachine.getModelClasses() || ["Select a class"]; + }, + defaultValue: "Select a class" } - } - - private async predictAllBlocks(frame: ImageBitmap) { - for (let modelUrl in this.predictionState) { - if (!this.predictionState[modelUrl].model) { - console.log('No model found for:', modelUrl); - continue; - } - if (this.teachableImageModel !== modelUrl) { - console.log('Model URL mismatch:', modelUrl); - continue; - } - ++this.isPredicting; - const prediction = await this.predictModel(modelUrl, frame); - console.log('Prediction:', prediction); - this.predictionState[modelUrl].topClass = prediction; - --this.isPredicting; + }) + getConfidence(className: string) { + if (!this.teachableMachine.modelConfidences || !this.teachableMachine.modelConfidences[className]) { + return 0; } + return Math.round(this.teachableMachine.modelConfidences[className] * 100); } - private async predictModel(modelUrl: string, frame: ImageBitmap) { - const predictions = await this.getPredictionFromModel(modelUrl, frame); - if (!predictions) { - return; - } - let maxProbability = 0; - let maxClassName = ""; - for (let i = 0; i < predictions.length; i++) { - const probability = predictions[i].probability.toFixed(2); - const className = predictions[i].className; - this.modelConfidences[className] = probability; - if (probability > maxProbability) { - maxClassName = className; - maxProbability = probability; - } - } - this.maxConfidence = maxProbability; - return maxClassName; - } - private async getPredictionFromModel(modelUrl: string, frame: ImageBitmap) { - const { model, modelType } = this.predictionState[modelUrl]; - switch (modelType) { - case this.ModelType.IMAGE: - if (!frame) return null; - return await model.predict(frame); - case this.ModelType.POSE: - if (!frame) return null; - const { pose, posenetOutput } = await model.estimatePose(frame); - return await model.predict(posenetOutput); - case this.ModelType.AUDIO: - if (this.latestAudioResults) { - return model.wordLabels().map((label, i) => ({ - className: label, - probability: this.latestAudioResults.scores[i] - })); - } - return null; - } - } - // @block({ - // type: "command", - // text: (imageClass, seconds) => `capture snapshots of ${imageClass} class for ${seconds} seconds`, - // args: [ - // { type: "string", defaultValue: "class name" }, - // { type: "number", defaultValue: 10 } - // ] - // }) + @block({ + type: "command", + text: (imageClass, seconds) => `capture snapshots of ${imageClass} class for ${seconds} seconds`, + args: [ + { type: "string", defaultValue: "class name" }, + { type: "number", defaultValue: 10 } + ] + }) async captureSnapshots(imageClass: string, seconds: number) { - if (this.SOCIAL && Math.random() < this.socialness) { - await this.doodlebot?.display("love"); - await this.speakText(`Bring the ${imageClass} to my camera, and I will capture snapshots of it for you.`); - await this.speakText("Ready Set Go!"); - } - // Create indicator to show progress const indicator = await this.indicate({ type: "warning", @@ -1750,12 +775,6 @@ export default class DoodlebotBlocks extends extension(details, "ui", "customArg msg: `Captured ${snapshots.length} snapshots of ${imageClass}` }); - if (this.SOCIAL && Math.random() < this.socialness) { - await this.doodlebot?.display("happy"); - await this.speakText(`Done. I've saved the snapshots to the Downloads folder, to a zip file called ${imageClass}_snapshots`); - await this.speakText("You can now upload this file to the Teachable Machine interface to train your model."); - await this.speakText("Let me know if you have any questions."); - } } private async speakText(text: string, showDisplay: boolean = false) { diff --git a/extensions/src/doodlebot/utils.ts b/extensions/src/doodlebot/utils.ts index 58ca8f327..8f86e4508 100644 --- a/extensions/src/doodlebot/utils.ts +++ b/extensions/src/doodlebot/utils.ts @@ -50,4 +50,47 @@ export const deferred = () => { return { promise, resolve: resolve!, reject: reject! }; }; +export const convertSvgUint8ArrayToPng = async (uint8Array: Uint8Array, width: number, height: number): Promise => { + return new Promise((resolve, reject) => { + // Convert Uint8Array to a string + const svgString = new TextDecoder().decode(uint8Array); + + // Create an SVG Blob + const svgBlob = new Blob([svgString], { type: 'image/svg+xml;charset=utf-8' }); + const url = URL.createObjectURL(svgBlob); + + // Create an Image element + const img = new Image(); + img.onload = () => { + // Create a canvas element + const canvas = document.createElement('canvas'); + canvas.width = width; + canvas.height = height; + const ctx = canvas.getContext('2d'); + + // Draw the image on the canvas + ctx.drawImage(img, 0, 0, width, height); + + // Convert the canvas to PNG data URL + const pngDataUrl = canvas.toDataURL('image/png'); + + // Convert the data URL to a Blob + fetch(pngDataUrl) + .then(res => res.blob()) + .then(blob => { + // Clean up + URL.revokeObjectURL(url); + resolve(blob); + }) + .catch(err => { + URL.revokeObjectURL(url); + reject(err); + }); + }; + + img.onerror = reject; + img.src = url; + }); + } + export type Deferred = ReturnType>; \ No newline at end of file