From e3d2d788e3eed16667867620c8aaa17bd20f1d64 Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Fri, 27 Dec 2024 05:03:50 +0100 Subject: [PATCH] refactor: reduce vertex duplication Signed-off-by: Alexander Alemayhu --- .../adapters/fileConversion/constants.ts | 25 ++++++-- .../fileConversion/contentGenerationUtils.ts | 28 +++++++++ .../fileConversion/convertImageToHTML.ts | 60 ++++--------------- .../fileConversion/convertPDFToHTML.ts | 46 ++------------ .../adapters/fileConversion/vertexAIUtils.ts | 15 +++++ 5 files changed, 80 insertions(+), 94 deletions(-) create mode 100644 src/infrastracture/adapters/fileConversion/contentGenerationUtils.ts create mode 100644 src/infrastracture/adapters/fileConversion/vertexAIUtils.ts diff --git a/src/infrastracture/adapters/fileConversion/constants.ts b/src/infrastracture/adapters/fileConversion/constants.ts index 56f0972f..f312e06a 100644 --- a/src/infrastracture/adapters/fileConversion/constants.ts +++ b/src/infrastracture/adapters/fileConversion/constants.ts @@ -1,12 +1,16 @@ -import { HarmBlockThreshold, HarmCategory } from '@google-cloud/vertexai'; +import { + HarmBlockThreshold, + HarmCategory, + SafetySetting, +} from '@google-cloud/vertexai'; -export const SAFETY_SETTINGS = [ +export const SAFETY_SETTINGS: SafetySetting[] = [ { - category: HarmCategory.HARM_CATEGORY_HATE_SPEECH, + category: HarmCategory.HARM_CATEGORY_HARASSMENT, threshold: HarmBlockThreshold.BLOCK_NONE, }, { - category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, + category: HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold: HarmBlockThreshold.BLOCK_NONE, }, { @@ -14,7 +18,18 @@ export const SAFETY_SETTINGS = [ threshold: HarmBlockThreshold.BLOCK_NONE, }, { - category: HarmCategory.HARM_CATEGORY_HARASSMENT, + category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold: HarmBlockThreshold.BLOCK_NONE, }, ]; + +export const VERTEX_AI_CONFIG = { + project: 'notion-to-anki', + location: 'europe-west3', + model: 'gemini-1.5-pro-002', + generationConfig: { + maxOutputTokens: 8192, + temperature: 1, + topP: 0.95, + }, +}; diff --git a/src/infrastracture/adapters/fileConversion/contentGenerationUtils.ts b/src/infrastracture/adapters/fileConversion/contentGenerationUtils.ts new file mode 100644 index 00000000..ada90209 --- /dev/null +++ b/src/infrastracture/adapters/fileConversion/contentGenerationUtils.ts @@ -0,0 +1,28 @@ +import { GenerateContentRequest } from '@google-cloud/vertexai'; +import { setupVertexAI } from './vertexAIUtils'; + +export async function generateContent( + req: GenerateContentRequest +): Promise { + const generativeModel = setupVertexAI(); + let content = ''; + + try { + const streamingResp = await generativeModel.generateContentStream(req); + for await (const item of streamingResp.stream) { + if ( + item.candidates && + item.candidates[0].content && + item.candidates[0].content.parts + ) { + content += item.candidates[0].content.parts + .map((part) => part.text) + .join(''); + } + } + } catch (error) { + console.error('Error generating content stream:', error); + } + + return content; +} diff --git a/src/infrastracture/adapters/fileConversion/convertImageToHTML.ts b/src/infrastracture/adapters/fileConversion/convertImageToHTML.ts index c863d266..f3b10032 100644 --- a/src/infrastracture/adapters/fileConversion/convertImageToHTML.ts +++ b/src/infrastracture/adapters/fileConversion/convertImageToHTML.ts @@ -1,5 +1,5 @@ -import { VertexAI } from '@google-cloud/vertexai'; -import { SAFETY_SETTINGS } from './constants'; +import { GenerateContentRequest } from '@google-cloud/vertexai'; +import { generateContent } from './contentGenerationUtils'; /** * Google VertexAI is returning Markdown: @@ -16,38 +16,22 @@ export function removeFirstAndLastLine(content: string): string { export const convertImageToHTML = async ( imageData: string ): Promise => { - const vertexAI = new VertexAI({ - project: 'notion-to-anki', - location: 'europe-west3', - }); - const model = 'gemini-1.5-pro-002'; - - const generativeModel = vertexAI.preview.getGenerativeModel({ - model: model, - generationConfig: { - maxOutputTokens: 8192, - temperature: 1, - topP: 0.95, - }, - safetySettings: SAFETY_SETTINGS, - }); - const text1 = { text: `Convert the text in this image to the following format for (every question is their own ul):
    -   
  • -    
    -      +
  • +
    + n) question -      +
  • A) ...,

    B)...

    - etc.  + etc.

    and finally Answer: D

    -    
    -   
  • -   
+ + + — - Extra rules: n=is the number for the question, question=the question text @@ -62,28 +46,10 @@ export const convertImageToHTML = async ( }, }; - const req = { + const req: GenerateContentRequest = { contents: [{ role: 'user', parts: [text1, image1] }], }; - let htmlContent = ''; - try { - const streamingResp = await generativeModel.generateContentStream(req); - for await (const item of streamingResp.stream) { - if ( - item.candidates && - item.candidates[0].content && - item.candidates[0].content.parts - ) { - htmlContent += item.candidates[0].content.parts - .map((part) => part.text) - .join(''); - } - } - } catch (error) { - console.error('Error generating content stream:', error); - } - htmlContent = removeFirstAndLastLine(htmlContent); - - return htmlContent; + const htmlContent = await generateContent(req); + return removeFirstAndLastLine(htmlContent); }; diff --git a/src/infrastracture/adapters/fileConversion/convertPDFToHTML.ts b/src/infrastracture/adapters/fileConversion/convertPDFToHTML.ts index 78f9b28d..c176cc2e 100644 --- a/src/infrastracture/adapters/fileConversion/convertPDFToHTML.ts +++ b/src/infrastracture/adapters/fileConversion/convertPDFToHTML.ts @@ -1,25 +1,9 @@ import path from 'path'; import fs from 'fs'; +import { GenerateContentRequest } from '@google-cloud/vertexai'; +import { generateContent } from './contentGenerationUtils'; -import { GenerateContentRequest, VertexAI } from '@google-cloud/vertexai'; -import { SAFETY_SETTINGS } from './constants'; - -export const convertPDFToHTML = async (pdf: string): Promise => { - const vertexAI = new VertexAI({ - project: 'notion-to-anki', - location: 'europe-west3', - }); - const model = 'gemini-1.5-pro-002'; - const generativeModel = vertexAI.preview.getGenerativeModel({ - model: model, - generationConfig: { - maxOutputTokens: 8192, - temperature: 1, - topP: 0.95, - }, - safetySettings: SAFETY_SETTINGS, - }); - +export const convertPDFToHTML = (pdf: string): Promise => { const document1 = { inlineData: { mimeType: 'application/pdf', @@ -43,27 +27,5 @@ export const convertPDFToHTML = async (pdf: string): Promise => { contents: [{ role: 'user', parts: [document1, text1] }], }; - let htmlContent = ''; - try { - const streamingResp = await generativeModel.generateContentStream(req); - for await (const item of streamingResp.stream) { - if ( - item.candidates && - item.candidates[0].content && - item.candidates[0].content.parts - ) { - htmlContent += item.candidates[0].content.parts - .map((part) => part.text) - .join(''); - } - } - } catch (error) { - console.error('Error generating content stream:', error); - - // const workSpace = process.cwd(); - // const outputPath = path.join(workSpace, 'output.html'); - // fs.writeFileSync(outputPath, htmlContent); - // console.log(outputPath); - } - return htmlContent; + return generateContent(req); }; diff --git a/src/infrastracture/adapters/fileConversion/vertexAIUtils.ts b/src/infrastracture/adapters/fileConversion/vertexAIUtils.ts new file mode 100644 index 00000000..4f6ff6da --- /dev/null +++ b/src/infrastracture/adapters/fileConversion/vertexAIUtils.ts @@ -0,0 +1,15 @@ +import { GenerativeModel, VertexAI } from '@google-cloud/vertexai'; +import { SAFETY_SETTINGS, VERTEX_AI_CONFIG } from './constants'; + +export function setupVertexAI(): GenerativeModel { + const vertexAI = new VertexAI({ + project: VERTEX_AI_CONFIG.project, + location: VERTEX_AI_CONFIG.location, + }); + + return vertexAI.getGenerativeModel({ + model: VERTEX_AI_CONFIG.model, + generationConfig: VERTEX_AI_CONFIG.generationConfig, + safetySettings: SAFETY_SETTINGS, + }); +}