-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
41 lines (32 loc) · 1.13 KB
/
Dockerfile
File metadata and controls
41 lines (32 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
FROM ubuntu:latest
MAINTAINER Al <noreply@datageek.info>
LABEL version="1.0"
ENV SCAN_FOLDER=/data
RUN adduser ocr
# Base
RUN apt-get update
RUN apt-get install -y autoconf \
build-essential \
git \
liblept4 \
libleptonica-dev \
libgomp1 \
libtool
# Install Tesseract
RUN apt-get install -y tesseract-ocr tesseract-ocr-eng
# Install pypdfocr
RUN apt-get install -y libjpeg-dev zlib1g-dev
RUN apt-get install -y python-pip python-dev imagemagick poppler-utils
RUN pip install git+https://github.com/abrown43/pypdfocr
# Make folder
RUN mkdir /data
RUN chown -R ocr:ocr /data
# Cleanup
RUN apt-get purge --auto-remove -y autoconf \
build-essential \
git \
libleptonica-dev \
libtool
RUN rm -rf /var/cache/apk/*
#ENTRYPOINT ["ls", "-l", "/data"]
ENTRYPOINT ["pypdfocr", "-w", "/data", "--archive", "--archive_suffix", "_orig.pdf", "--initial_scan"]