diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..dfe0770 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8dcd8e0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +Original work Copyright (c) 2013 Marco Azimonti +Modified work Copyright (c) 2015 Matteo Maggioni +Modified work Copyright (c) 2017 Oswell Chan + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..a1df1f5 --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# Heroku Buildpack Tesseract + +This package provides a custom Heroku buildpack providing the [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) binary and all the required libraries to Heroku apps. Training data for English language is provided. + +## Configuration + + +1. add teh buildpack + ``` + heroku buildpacks:add https://github.com/teketekepon/heroku-buildpack-tesseract + ``` + or add by copy the URL in the Dashboard to add the buildpack. +2. you can use the `tesseract` binary in your Heroku app! +3. deploy :) + +## Note +This fork uses the Tesseract version 5.3.1 + +## License +MIT License. + +Original work Copyright (c) 2013 Marco Azimonti +Modified work Copyright (c) 2015 Matteo Maggioni +Modified work Copyright (c) 2015 Oswell Chan +Modified work Copyright (c) 2018 Malcolm Patterson +Modified work Copyright (c) 2020 Takahiro Furukawa diff --git a/bin/compile b/bin/compile new file mode 100755 index 0000000..a2f3c92 --- /dev/null +++ b/bin/compile @@ -0,0 +1,18 @@ +#!/bin/bash +BUILD_DIR=$1 +TESSERACT_OCR_VERSION=5.3.1 +TESSERACT_OCR_TGZ=tesseract-$TESSERACT_OCR_VERSION.tar.gz + +INSTALL_DIR=$BUILD_DIR/vendor/tesseract-ocr/ +TESSERACT_OCR_DIR=${HOME}/vendor/tesseract-ocr +ENVSCRIPT=$BUILD_DIR/.profile.d/tesseract-ocr.sh + +echo "Unpacking Tesseract-OCR binaries" +mkdir -p $INSTALL_DIR +tar -zxvf $TESSERACT_OCR_TGZ -C $INSTALL_DIR + +echo "Building runtime environment for Tesseract-OCR" +mkdir -p $BUILD_DIR/.profile.d +echo "export PATH=\"$TESSERACT_OCR_DIR/bin:\$PATH\"" > $ENVSCRIPT +echo "export LD_LIBRARY_PATH=\"$TESSERACT_OCR_DIR/lib:\$LD_LIBRARY_PATH\"" >> $ENVSCRIPT +echo "export TESSDATA_PREFIX=\"$TESSERACT_OCR_DIR/share/tessdata\"" >> $ENVSCRIPT diff --git a/bin/detect b/bin/detect new file mode 100755 index 0000000..57ae792 --- /dev/null +++ b/bin/detect @@ -0,0 +1,2 @@ +#!/bin/sh +echo "detect" diff --git a/bin/release b/bin/release new file mode 100755 index 0000000..df2858b --- /dev/null +++ b/bin/release @@ -0,0 +1,2 @@ +#!/bin/sh +echo "--- {}" diff --git a/tesseract-5.3.1.tar.gz b/tesseract-5.3.1.tar.gz new file mode 100644 index 0000000..ab376a4 Binary files /dev/null and b/tesseract-5.3.1.tar.gz differ