-
Notifications
You must be signed in to change notification settings - Fork 105
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0c6b25a
commit 1920902
Showing
1 changed file
with
37 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
ARG BASE_IMAGE=ubuntu:18.04 | ||
FROM $BASE_IMAGE | ||
LABEL maintainer="NeuML" | ||
LABEL repository="paperai" | ||
|
||
# Locale environment variables | ||
ENV LC_ALL=C.UTF-8 | ||
ENV LANG=C.UTF-8 | ||
|
||
# Install required packages | ||
RUN apt-get update && \ | ||
apt-get -y --no-install-recommends install libgomp1 gcc g++ python3.6 python3-dev python3-pip wget && \ | ||
rm -rf /var/lib/apt/lists | ||
|
||
# Install paperai project and dependencies | ||
RUN ln -sf /usr/bin/python3.6 /usr/bin/python && \ | ||
python -m pip install --no-cache-dir -U pip wheel setuptools && \ | ||
python -m pip install --no-cache-dir paperai && \ | ||
python -c "import nltk; nltk.download('punkt')" | ||
|
||
# Cleanup build packages | ||
RUN apt-get -y purge gcc g++ python3-dev && apt-get -y autoremove | ||
|
||
# Copy paperai scripts | ||
RUN mkdir -p scripts | ||
COPY scripts/ ./scripts/ | ||
|
||
# Create paperetl directories | ||
RUN mkdir -p cord19/data cord19/report && \ | ||
mkdir -p paperetl/data paperetl/report | ||
|
||
# Install vector model | ||
RUN scripts/getvectors.sh cord19/models && \ | ||
scripts/getvectors.sh paperetl/models | ||
|
||
# Start script | ||
ENTRYPOINT /bin/bash |