Skip to content

Commit

Permalink
Add files
Browse files Browse the repository at this point in the history
  • Loading branch information
TheMorpheus407 committed Nov 28, 2023
0 parents commit 4a939d0
Show file tree
Hide file tree
Showing 7 changed files with 423 additions and 0 deletions.
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Text-to-Audio Converter / Audiobook Creator

## Overview
This project is a web-based application that converts text into audio, primarily focusing on creating audiobooks. It leverages the OpenAI API to transform text into speech and offers users the ability to upload text files, including ePub, or directly input text for conversion. The application is designed to be minimalistic and user-friendly, emphasizing privacy and simplicity.

## Features
- **Text Input:** Users can type or paste text into a text box or upload text and ePub files for conversion.
- **OpenAI API Integration:** Utilizes the OpenAI Text-to-Speech API to generate audio from text.
- **Clienside Only:** You use your own OpenAI key to create the language. I don't have any serverside code running.
- **Audio File Generation:** Converts text segments into audio files and merges them into a single audio file.
- **Progress Tracking:** Includes progress bars to track the status of text-to-speech conversion and file creation.
- **Download Option:** Users can download the generated audiobook in a suitable audio format.
- **Client-Side Processing:** All functionalities, including audio file merging, are handled client-side for enhanced privacy.
- **No Ads or Tracking:** The application is free from advertisements and user tracking, ensuring a focus on functionality and user privacy.

## Technical Details
- **Frontend:** Built using HTML, CSS, and JavaScript.
- **Audio Processing:** Utilizes the Web Audio API for audio decoding and custom JavaScript logic for merging audio files.
- **Responsive Design:** Crafted with a responsive layout for a seamless experience across various devices.
- **No Backend:** Operates entirely on the client side, with no server-side processing.
- **Privacy-Focused:** Does not store user data, ensuring complete privacy.

## Usage
1. **Text Input:** Enter text or upload a file containing the text.
2. **API Key:** Provide your OpenAI API key for text-to-speech conversion.
3. **Generate Audiobook:** Initiate the conversion process and wait for the audio file to be processed.
4. **Download:** Download the final merged audio file.

## License
This project is licensed under the [MIT License](LICENSE).
8 changes: 8 additions & 0 deletions audiobuffer-to-wav.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions bootstrap.min.css

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>
Audiobook Generator
</title>
<link href="bootstrap.min.css" rel="stylesheet">
<link rel="stylesheet" href="style.css">
</head>
<body>
<!-- Example Key for test usage: -->
<!-- Example data: eins. zwei. drei. vier. fünf. sechs. sieben. acht. neun. zehn. -->
<!-- Header Section -->
<header>
<h1>Text-to-Audio Converter</h1>
</header>

<!-- API Key Input Section -->
<div id="api-key-section">
<label for="api-key">Enter OpenAI API Key:</label>
<input type="text" id="api-key" name="api-key">
</div>

<!-- File Upload and Text Input Section -->
<div id="input-section">
<input type="file" id="file-upload" name="file-upload">
<label for="file-upload">Upload .txt or .ePub File OR enter some text</label>
<textarea id="text-input" name="text-input" rows="10" cols="50" placeholder="Paste or type text here"></textarea>
</div>

<!-- Cost Estimation Section -->
<div id="cost-estimation">
<p id="cost-estimate-display">Estimated Cost for Conversion: $X.XX</p>
</div>

<!-- Audiobook Generation Section -->
<div id="generate-section">
<button id="generate-audiobook">Generate Audiobook</button>
</div>

<!-- Progress Indicator (Hidden Initially) -->
<div id="progress-indicator" style="display: none;">
<p>Converting...</p>
</div>

<!-- Footer Section -->
<footer>
<p>Get your OpenAI API Key <a href="https://platform.openai.com/api-keys">here</a>.</p>
<p id="status">This will take some time... Make sure to not interrupt the process!</p>
<p>Step 1: Creating TTS: <progress id="progressbar1" value="0" max="0"></progress></p>
<p>Step 2: Creating your file: <progress id="progressbar2" value="0" max="0"></progress></p>
</footer>


<script src="jszip.min.js"></script>
<script src="audiobuffer-to-wav.js"></script>
<script src="main.js"></script>
</body>
</html>
13 changes: 13 additions & 0 deletions jszip.min.js

Large diffs are not rendered by default.

228 changes: 228 additions & 0 deletions main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
document.getElementById('generate-audiobook').addEventListener('click', generateAudiobook);

async function mergeAudioBlobsAndDownload(audioBlobs) {
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
const buffers = [];

// Decode MP3 blobs into AudioBuffers
for (const blob of audioBlobs) {
const arrayBuffer = await blob.arrayBuffer();
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
buffers.push(audioBuffer);
}

// Convert AudioBuffers to WAV and concatenate
const wavBuffers = buffers.map(buffer => audioBufferToWAV(buffer));
const concatenatedWav = concatenateWAVBuffers(wavBuffers);

// Trigger download of concatenated WAV file
triggerDownload(new Blob([concatenatedWav], {type: 'audio/wav'}), 'merged_audio.wav');
}

function audioBufferToWAV(buffer) {
// Convert an AudioBuffer to a WAV Blob using audiobuffer-to-wav
return audioBufferToWav(buffer);
}

function concatenateWAVBuffers(wavBuffers) {
// Extract and sum the lengths of the data chunks (excluding headers)
const dataLength = wavBuffers.reduce((acc, buffer) => acc + (buffer.byteLength - 44), 0);

// Create a new buffer to hold the concatenated WAV file
const concatenatedBuffer = new Uint8Array(44 + dataLength);

// Copy the header from the first buffer (44 bytes)
concatenatedBuffer.set(new Uint8Array(wavBuffers[0].slice(0, 44)));

// Update the total file size field in the header (4 bytes after "RIFF")
const totalSize = 36 + dataLength;
concatenatedBuffer[4] = (totalSize & 0xff);
concatenatedBuffer[5] = ((totalSize >> 8) & 0xff);
concatenatedBuffer[6] = ((totalSize >> 16) & 0xff);
concatenatedBuffer[7] = ((totalSize >> 24) & 0xff);

// Update the total data chunk size field (4 bytes after "data")
const dataSize = dataLength;
concatenatedBuffer[40] = (dataSize & 0xff);
concatenatedBuffer[41] = ((dataSize >> 8) & 0xff);
concatenatedBuffer[42] = ((dataSize >> 16) & 0xff);
concatenatedBuffer[43] = ((dataSize >> 24) & 0xff);

// Concatenate the actual data chunks
let offset = 44;

var progressBar = document.getElementById('progressbar2');
progressBar.max = totalSize;
progressBar.value = offset;

wavBuffers.forEach(buffer => {
concatenatedBuffer.set(new Uint8Array(buffer.slice(44)), offset);
offset += buffer.byteLength - 44;
progressBar.value = offset;
});
console.log("Individual buffer sizes:", wavBuffers.map(b => b.byteLength));
console.log("Concatenated buffer size:", concatenatedBuffer.byteLength);


return concatenatedBuffer.buffer;
}


function triggerDownload(blob, filename) {
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename;
a.click();
}


function generateAudiobook() {
var text = document.getElementById('text-input').value;
var apiKey = document.getElementById('api-key').value;
var segments = splitTextIntoSegments(text, 4000);
var audioBlobs = new Array(segments.length);
var progressBar = document.getElementById('progressbar1');
progressBar.max = segments.length;
progressBar.value = 0;

segments.forEach((segment, index) => {
callOpenAIAPI(segment, apiKey, function (audioBlob) {
audioBlobs[index] = audioBlob;
progressBar.value = audioBlobs.filter(Boolean).length;
if (audioBlobs.length === segments.length) {
// All segments are loaded, merge them!
mergeAudioBlobsAndDownload(audioBlobs);
}
});
});
}

function splitTextIntoSegments(text, maxLength) {
var segments = [];
var currentSegment = '';

text.split('. ').forEach(sentence => {
if (currentSegment.length + sentence.length > maxLength) {
segments.push(currentSegment);
currentSegment = '';
}
currentSegment += sentence + '. ';
});

// Add the last segment if it's not empty
if (currentSegment.trim() !== '') {
segments.push(currentSegment);
}

return segments;
}

function callOpenAIAPI(segment, apiKey, callback) {
var xhr = new XMLHttpRequest();
xhr.open("POST", "https://api.openai.com/v1/audio/speech", true);
xhr.setRequestHeader("Authorization", "Bearer " + apiKey);
xhr.setRequestHeader("Content-Type", "application/json");
xhr.responseType = 'blob'; // Expect a binary response

xhr.onload = function () {
if (xhr.status === 200) {
var audioBlob = xhr.response;
callback(audioBlob);
} else {
console.error("Error calling OpenAI API: " + xhr.statusText);
}
};

console.log("TTS running for: ");
console.log(segment);

var data = JSON.stringify({
"model": "tts-1",
"input": segment,
"voice": "nova"
});
xhr.send(data);
}


document.addEventListener('DOMContentLoaded', function () {
var textInput = document.getElementById('text-input');
var fileUpload = document.getElementById('file-upload');
var costDisplay = document.getElementById('cost-estimate-display');

fileUpload.addEventListener('change', handleFileUpload);
textInput.addEventListener('input', calculateCost);

function calculateCost() {
var textLength = textInput.value.length;
var cost = (textLength / 1000) * 0.015;
costDisplay.textContent = 'Estimated Cost for Conversion: $' + cost.toFixed(2);
}

function handleFileUpload(event) {
var file = event.target.files[0];
if (file) {
if (file.type === 'text/plain') {
var reader = new FileReader();
reader.onload = function (e) {
textInput.value = e.target.result;
calculateCost();
};
reader.readAsText(file);
} else if (file.name.endsWith('.epub')) {
var reader = new FileReader();
reader.onload = function (e) {
var epubContent = e.target.result;
readEpub(epubContent);
};
reader.readAsBinaryString(file);
} else {
alert('Please upload a text or ePub file.');
}
}
}

function readEpub(epubContent) {
var new_zip = new JSZip();
new_zip.loadAsync(epubContent)
.then(function (zip) {
Object.keys(zip.files).forEach(function (filename) {
if (!(filename.includes("cover") || filename.includes("toc") || filename.includes("nav")) && filename.endsWith('html')) {
zip.files[filename].async('string').then(function (content) {
var text = extractTextFromHTML(content);
document.getElementById('text-input').value += removeWhitespace(filterUnwantedContent(text)) + '\n';
calculateCost();
});
}
});
});
}

function extractTextFromHTML(htmlContent) {
var tempDiv = document.createElement('div');
tempDiv.innerHTML = htmlContent;

// Remove elements with epub:type="pagebreak"
var pageBreaks = tempDiv.querySelectorAll('[epub\\:type="pagebreak"]');
pageBreaks.forEach(function (elem) {
elem.parentNode.removeChild(elem);
});

return tempDiv.textContent || tempDiv.innerText || '';
}


function filterUnwantedContent(text) {
// Remove page numbers and bibliographies
// Adjust these regex patterns as needed based on the actual content structure
var filteredText = text.replace(/Page_[0-9]+\s*[0-9]+/g, ''); // Remove page numbers
filteredText = filteredText.replace(/BIBLIOGRAPHY[\s\S]*?INTRODUCTORY/g, ''); // Remove bibliography section

return filteredText;
}

function removeWhitespace(text) {
return text.replace(/\s+/g, ' ').trim();
}
});
Loading

0 comments on commit 4a939d0

Please sign in to comment.