-
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3708707
commit 5638fde
Showing
17 changed files
with
1,336 additions
and
2,987 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
export const DEFAULT_MODEL_TEMPERATURE = 0.3; | ||
export const DEFAULT_MODEL_TEMPERATURE = 0.1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,9 @@ | ||
export {Ocra} from './core/ocra'; | ||
export { | ||
OcraConfig, | ||
ImageResult, | ||
PageResult, | ||
InputSource, | ||
OCRMetadata, | ||
Provider, | ||
} from './types'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
/** | ||
* Removes code block markers from text content. | ||
* Handles both language-specific (```language) and plain (```) code blocks. | ||
* @param text - The text content to process | ||
* @returns The text with code block markers removed | ||
*/ | ||
export function removeCodeBlockMarkers(text: string): string { | ||
// Remove ```language and closing ``` | ||
text = text.replace(/```[a-zA-Z0-9]*\n([\s\S]*?)```/g, '$1'); | ||
|
||
// Remove plain ``` wrapping | ||
text = text.replace(/```\n([\s\S]*?)```/g, '$1'); | ||
|
||
return text.trim(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import {Ocra} from 'ocra'; | ||
|
||
const ocra = new Ocra({ | ||
provider: 'openai', | ||
key: process.env.OPENAI_API_KEY!, | ||
}); | ||
|
||
export async function POST(request: Request) { | ||
try { | ||
if (!process.env.OPENAI_API_KEY) { | ||
return Response.json( | ||
{error: 'OpenAI API key not configured'}, | ||
{status: 500}, | ||
); | ||
} | ||
|
||
const {url, type} = await request.json(); | ||
|
||
if (!url || !type) { | ||
return Response.json( | ||
{error: 'Missing required fields: url and type'}, | ||
{status: 400}, | ||
); | ||
} | ||
|
||
if (type !== 'image' && type !== 'pdf') { | ||
return Response.json( | ||
{error: 'Invalid type: must be "image" or "pdf"'}, | ||
{status: 400}, | ||
); | ||
} | ||
|
||
const result = | ||
type === 'image' ? await ocra.image(url) : await ocra.pdf(url); | ||
|
||
return Response.json({result}); | ||
} catch (error) { | ||
console.error('Error processing request:', error); | ||
return Response.json( | ||
{error: 'An error occurred while processing your request'}, | ||
{status: 500}, | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,119 @@ | ||
export default function Home() { | ||
'use client'; | ||
|
||
import {useState} from 'react'; | ||
|
||
import FileUpload from '@/components/file-upload'; | ||
import {ImageResult, PageResult} from 'ocra'; | ||
|
||
// Preview component for displaying uploaded file | ||
const Preview = ({url, type}: {url: string; type: 'image' | 'pdf'}) => { | ||
return type === 'image' ? ( | ||
<img | ||
src={url} | ||
alt="Preview" | ||
className="w-full h-full object-contain rounded-lg" | ||
/> | ||
) : ( | ||
<iframe | ||
src={url} | ||
className="w-full h-full rounded-lg border-0" | ||
title="PDF preview" | ||
/> | ||
); | ||
}; | ||
|
||
// Content display component | ||
const ContentDisplay = ({ | ||
contents, | ||
isLoading, | ||
}: { | ||
contents?: (PageResult | ImageResult)[]; | ||
isLoading: boolean; | ||
}) => { | ||
if (isLoading) { | ||
return ( | ||
<div className="flex items-center justify-center h-full"> | ||
<div className="text-xl text-neutral-500 animate-pulse"> | ||
Extracting... | ||
</div> | ||
</div> | ||
); | ||
} | ||
|
||
return ( | ||
<div className="h-full overflow-y-auto"> | ||
<div className="whitespace-pre-wrap space-y-6"> | ||
{contents?.map((content, i) => ( | ||
<div key={i} className="text-base leading-relaxed"> | ||
{content.content} | ||
</div> | ||
))} | ||
</div> | ||
</div> | ||
); | ||
}; | ||
|
||
// Landing component | ||
const Landing = ({ | ||
onUpload, | ||
}: { | ||
onUpload: (url: string, type: 'image' | 'pdf') => void; | ||
}) => { | ||
return ( | ||
<div className="grid grid-rows-[20px_1fr_20px] items-center justify-items-center min-h-screen p-8 pb-20 gap-16 sm:p-20"> | ||
<h1 className="text-4xl tracking-tighter">Ocra</h1> | ||
<div className="flex flex-col items-center justify-center min-h-screen p-4 sm:p-8 md:p-12 lg:p-20"> | ||
<div className="max-w-3xl w-full space-y-8"> | ||
<div className="space-y-4 text-center"> | ||
<h1 className="text-4xl sm:text-5xl font-bold tracking-tighter"> | ||
Ocra | ||
</h1> | ||
<p className="text-lg sm:text-xl text-neutral-500 leading-relaxed"> | ||
Fast, ultra-accurate text extraction from any image{' '} | ||
<br className="hidden sm:block" /> | ||
or PDF, even challenging ones, with structured markdown output. | ||
</p> | ||
</div> | ||
<FileUpload onUpload={onUpload} /> | ||
</div> | ||
</div> | ||
); | ||
}; | ||
|
||
export default function Home() { | ||
const [contents, setContents] = useState<PageResult[] | ImageResult[]>(); | ||
const [isLoading, setIsLoading] = useState(false); | ||
const [previewUrl, setPreviewUrl] = useState<string>(); | ||
const [fileType, setFileType] = useState<'image' | 'pdf'>(); | ||
|
||
const handleUpload = async (url: string, type: 'image' | 'pdf') => { | ||
setIsLoading(true); | ||
setPreviewUrl(url); | ||
setFileType(type); | ||
|
||
try { | ||
const response = await fetch('/api/extract', { | ||
method: 'POST', | ||
body: JSON.stringify({url, type}), | ||
}); | ||
|
||
const {result} = (await response.json()) ?? {}; | ||
setContents(type === 'image' ? [result] : result); | ||
} finally { | ||
setIsLoading(false); | ||
} | ||
}; | ||
|
||
if (previewUrl && fileType) { | ||
return ( | ||
<div className="flex flex-col lg:flex-row min-h-screen p-4 sm:p-6 md:p-8 lg:p-12 gap-6 lg:gap-8"> | ||
<div className="w-full lg:w-1/2 h-[40vh] lg:h-[85vh]"> | ||
<Preview url={previewUrl} type={fileType} /> | ||
</div> | ||
<div className="w-full lg:w-1/2 h-[40vh] lg:h-[85vh]"> | ||
<ContentDisplay contents={contents} isLoading={isLoading} /> | ||
</div> | ||
</div> | ||
); | ||
} | ||
|
||
return <Landing onUpload={handleUpload} />; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
"$schema": "https://ui.shadcn.com/schema.json", | ||
"style": "default", | ||
"rsc": true, | ||
"tsx": true, | ||
"tailwind": { | ||
"config": "tailwind.config.ts", | ||
"css": "app/globals.css", | ||
"baseColor": "neutral", | ||
"cssVariables": true, | ||
"prefix": "" | ||
}, | ||
"aliases": { | ||
"components": "@/components", | ||
"utils": "@/lib/utils", | ||
"ui": "@/components/ui", | ||
"lib": "@/lib", | ||
"hooks": "@/hooks" | ||
} | ||
} |
Oops, something went wrong.