Skip to content

Commit

Permalink
🎥 feat: YouTube Tool (danny-avila#5582)
Browse files Browse the repository at this point in the history
* adding youtube tool

* refactor: use short `url` param instead of `videoUrl`

* refactor: move API key retrieval to a separate credentials module

* refactor: remove unnecessary `isEdited` message property

* refactor: remove unnecessary `isEdited` message property pt. 2

* refactor: YouTube Tool with new `tool()` generator, handle tools already created by new `tool` generator

* fix: only reset request data for multi-convo messages

* refactor: enhance YouTube tool by adding transcript parsing and returning structured JSON responses

* refactor: update transcript parsing to handle raw response and clean up text output

* feat: support toolkits and refactor YouTube tool as a toolkit for better LLM usage

* refactor: remove unused OpenAPI specs and streamline tools transformation in loadAsyncEndpoints

* refactor: implement manifestToolMap for better tool management and streamline authentication handling

* feat: support toolkits for assistants

* refactor: rename loadedTools to toolDefinitions for clarity in PluginController and assistant controllers

* feat: complete support of toolkits for assistants

---------

Co-authored-by: Danilo Pejakovic <[email protected]>
  • Loading branch information
danny-avila and leondape authored Feb 1, 2025
1 parent 33f6093 commit 352565c
Show file tree
Hide file tree
Showing 29 changed files with 456 additions and 102 deletions.
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,10 @@ AZURE_AI_SEARCH_SEARCH_OPTION_SELECT=
GOOGLE_SEARCH_API_KEY=
GOOGLE_CSE_ID=

# YOUTUBE
#-----------------
YOUTUBE_API_KEY=

# SerpAPI
#-----------------
SERPAPI_API_KEY=
Expand Down
2 changes: 0 additions & 2 deletions api/app/clients/PluginsClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ class PluginsClient extends OpenAIClient {
logger.debug('[PluginsClient] sendMessage', { userMessageText: message, opts });
const {
user,
isEdited,
conversationId,
responseMessageId,
saveOptions,
Expand Down Expand Up @@ -359,7 +358,6 @@ class PluginsClient extends OpenAIClient {
conversationId,
parentMessageId: userMessage.messageId,
isCreatedByUser: false,
isEdited,
model: this.modelOptions.model,
sender: this.sender,
promptTokens,
Expand Down
1 change: 0 additions & 1 deletion api/app/clients/prompts/formatMessages.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ describe('formatMessage', () => {
error: false,
finish_reason: null,
isCreatedByUser: true,
isEdited: false,
model: null,
parentMessageId: Constants.NO_PARENT,
sender: 'User',
Expand Down
21 changes: 19 additions & 2 deletions api/app/clients/tools/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,40 @@ const availableTools = require('./manifest.json');

// Structured Tools
const DALLE3 = require('./structured/DALLE3');
const OpenWeather = require('./structured/OpenWeather');
const createYouTubeTools = require('./structured/YouTube');
const StructuredWolfram = require('./structured/Wolfram');
const StructuredACS = require('./structured/AzureAISearch');
const StructuredSD = require('./structured/StableDiffusion');
const GoogleSearchAPI = require('./structured/GoogleSearch');
const TraversaalSearch = require('./structured/TraversaalSearch');
const TavilySearchResults = require('./structured/TavilySearchResults');
const OpenWeather = require('./structured/OpenWeather');

/** @type {Record<string, TPlugin | undefined>} */
const manifestToolMap = {};

/** @type {Array<TPlugin>} */
const toolkits = [];

availableTools.forEach((tool) => {
manifestToolMap[tool.pluginKey] = tool;
if (tool.toolkit === true) {
toolkits.push(tool);
}
});

module.exports = {
toolkits,
availableTools,
manifestToolMap,
// Structured Tools
DALLE3,
OpenWeather,
StructuredSD,
StructuredACS,
GoogleSearchAPI,
TraversaalSearch,
StructuredWolfram,
createYouTubeTools,
TavilySearchResults,
OpenWeather,
};
14 changes: 14 additions & 0 deletions api/app/clients/tools/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,20 @@
}
]
},
{
"name": "YouTube",
"pluginKey": "youtube",
"toolkit": true,
"description": "Get YouTube video information, retrieve comments, analyze transcripts and search for videos.",
"icon": "https://www.youtube.com/s/desktop/7449ebf7/img/favicon_144x144.png",
"authConfig": [
{
"authField": "YOUTUBE_API_KEY",
"label": "YouTube API Key",
"description": "Your YouTube Data API v3 key."
}
]
},
{
"name": "Wolfram",
"pluginKey": "wolfram",
Expand Down
10 changes: 1 addition & 9 deletions api/app/clients/tools/structured/TavilySearch.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,13 @@
const { z } = require('zod');
const { tool } = require('@langchain/core/tools');
const { getEnvironmentVariable } = require('@langchain/core/utils/env');
const { getApiKey } = require('./credentials');

function createTavilySearchTool(fields = {}) {
const envVar = 'TAVILY_API_KEY';
const override = fields.override ?? false;
const apiKey = fields.apiKey ?? getApiKey(envVar, override);
const kwargs = fields?.kwargs ?? {};

function getApiKey(envVar, override) {
const key = getEnvironmentVariable(envVar);
if (!key && !override) {
throw new Error(`Missing ${envVar} environment variable.`);
}
return key;
}

return tool(
async (input) => {
const { query, ...rest } = input;
Expand Down
203 changes: 203 additions & 0 deletions api/app/clients/tools/structured/YouTube.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
const { z } = require('zod');
const { tool } = require('@langchain/core/tools');
const { youtube } = require('@googleapis/youtube');
const { YoutubeTranscript } = require('youtube-transcript');
const { getApiKey } = require('./credentials');
const { logger } = require('~/config');

function extractVideoId(url) {
const rawIdRegex = /^[a-zA-Z0-9_-]{11}$/;
if (rawIdRegex.test(url)) {
return url;
}

const regex = new RegExp(
'(?:youtu\\.be/|youtube(?:\\.com)?/(?:' +
'(?:watch\\?v=)|(?:embed/)|(?:shorts/)|(?:live/)|(?:v/)|(?:/))?)' +
'([a-zA-Z0-9_-]{11})(?:\\S+)?$',
);
const match = url.match(regex);
return match ? match[1] : null;
}

function parseTranscript(transcriptResponse) {
if (!Array.isArray(transcriptResponse)) {
return '';
}

return transcriptResponse
.map((entry) => entry.text.trim())
.filter((text) => text)
.join(' ')
.replaceAll('&amp;#39;', '\'');
}

function createYouTubeTools(fields = {}) {
const envVar = 'YOUTUBE_API_KEY';
const override = fields.override ?? false;
const apiKey = fields.apiKey ?? fields[envVar] ?? getApiKey(envVar, override);

const youtubeClient = youtube({
version: 'v3',
auth: apiKey,
});

const searchTool = tool(
async ({ query, maxResults = 5 }) => {
const response = await youtubeClient.search.list({
part: 'snippet',
q: query,
type: 'video',
maxResults: maxResults || 5,
});
const result = response.data.items.map((item) => ({
title: item.snippet.title,
description: item.snippet.description,
url: `https://www.youtube.com/watch?v=${item.id.videoId}`,
}));
return JSON.stringify(result, null, 2);
},
{
name: 'youtube_search',
description: `Search for YouTube videos by keyword or phrase.
- Required: query (search terms to find videos)
- Optional: maxResults (number of videos to return, 1-50, default: 5)
- Returns: List of videos with titles, descriptions, and URLs
- Use for: Finding specific videos, exploring content, research
Example: query="cooking pasta tutorials" maxResults=3`,
schema: z.object({
query: z.string().describe('Search query terms'),
maxResults: z.number().int().min(1).max(50).optional().describe('Number of results (1-50)'),
}),
},
);

const infoTool = tool(
async ({ url }) => {
const videoId = extractVideoId(url);
if (!videoId) {
throw new Error('Invalid YouTube URL or video ID');
}

const response = await youtubeClient.videos.list({
part: 'snippet,statistics',
id: videoId,
});

if (!response.data.items?.length) {
throw new Error('Video not found');
}
const video = response.data.items[0];

const result = {
title: video.snippet.title,
description: video.snippet.description,
views: video.statistics.viewCount,
likes: video.statistics.likeCount,
comments: video.statistics.commentCount,
};
return JSON.stringify(result, null, 2);
},
{
name: 'youtube_info',
description: `Get detailed metadata and statistics for a specific YouTube video.
- Required: url (full YouTube URL or video ID)
- Returns: Video title, description, view count, like count, comment count
- Use for: Getting video metrics and basic metadata
- DO NOT USE FOR VIDEO SUMMARIES, USE TRANSCRIPTS FOR COMPREHENSIVE ANALYSIS
- Accepts both full URLs and video IDs
Example: url="https://youtube.com/watch?v=abc123" or url="abc123"`,
schema: z.object({
url: z.string().describe('YouTube video URL or ID'),
}),
},
);

const commentsTool = tool(
async ({ url, maxResults = 10 }) => {
const videoId = extractVideoId(url);
if (!videoId) {
throw new Error('Invalid YouTube URL or video ID');
}

const response = await youtubeClient.commentThreads.list({
part: 'snippet',
videoId,
maxResults: maxResults || 10,
});

const result = response.data.items.map((item) => ({
author: item.snippet.topLevelComment.snippet.authorDisplayName,
text: item.snippet.topLevelComment.snippet.textDisplay,
likes: item.snippet.topLevelComment.snippet.likeCount,
}));
return JSON.stringify(result, null, 2);
},
{
name: 'youtube_comments',
description: `Retrieve top-level comments from a YouTube video.
- Required: url (full YouTube URL or video ID)
- Optional: maxResults (number of comments, 1-50, default: 10)
- Returns: Comment text, author names, like counts
- Use for: Sentiment analysis, audience feedback, engagement review
Example: url="abc123" maxResults=20`,
schema: z.object({
url: z.string().describe('YouTube video URL or ID'),
maxResults: z
.number()
.int()
.min(1)
.max(50)
.optional()
.describe('Number of comments to retrieve'),
}),
},
);

const transcriptTool = tool(
async ({ url }) => {
const videoId = extractVideoId(url);
if (!videoId) {
throw new Error('Invalid YouTube URL or video ID');
}

try {
try {
const transcript = await YoutubeTranscript.fetchTranscript(videoId, { lang: 'en' });
return parseTranscript(transcript);
} catch (e) {
logger.error(e);
}

try {
const transcript = await YoutubeTranscript.fetchTranscript(videoId, { lang: 'de' });
return parseTranscript(transcript);
} catch (e) {
logger.error(e);
}

const transcript = await YoutubeTranscript.fetchTranscript(videoId);
return parseTranscript(transcript);
} catch (error) {
throw new Error(`Failed to fetch transcript: ${error.message}`);
}
},
{
name: 'youtube_transcript',
description: `Fetch and parse the transcript/captions of a YouTube video.
- Required: url (full YouTube URL or video ID)
- Returns: Full video transcript as plain text
- Use for: Content analysis, summarization, translation reference
- This is the "Go-to" tool for analyzing actual video content
- Attempts to fetch English first, then German, then any available language
Example: url="https://youtube.com/watch?v=abc123"`,
schema: z.object({
url: z.string().describe('YouTube video URL or ID'),
}),
},
);

return [searchTool, infoTool, commentsTool, transcriptTool];
}

module.exports = createYouTubeTools;
13 changes: 13 additions & 0 deletions api/app/clients/tools/structured/credentials.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
const { getEnvironmentVariable } = require('@langchain/core/utils/env');

function getApiKey(envVar, override) {
const key = getEnvironmentVariable(envVar);
if (!key && !override) {
throw new Error(`Missing ${envVar} environment variable.`);
}
return key;
}

module.exports = {
getApiKey,
};
Loading

0 comments on commit 352565c

Please sign in to comment.