From f52bd317a1f74399125c85379684f89bc15c2f25 Mon Sep 17 00:00:00 2001 From: Simon Rieger Date: Sat, 12 Feb 2022 00:08:06 +0100 Subject: [PATCH] remove force-ocr for only scan documents --- ocr-deamon.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocr-deamon.sh b/ocr-deamon.sh index 582f1af..6d717c9 100755 --- a/ocr-deamon.sh +++ b/ocr-deamon.sh @@ -4,7 +4,7 @@ DOKUMENT_DIR=/home/simono41/data1/Dokumente find ${DOKUMENT_DIR} -path ${DOKUMENT_DIR}/.stversions -prune -type f -o -name "*.pdf" ! -name '*-ocr.pdf' -print | while read file; do if ! [ -f "${file%.*}-ocr.pdf" ]; then - ocrmypdf -l deu --force-ocr --jbig2-lossy "${file}" "${file%.*}-ocr.pdf" + ocrmypdf -l deu+eng -c --jbig2-lossy --tesseract-timeout 60 "${file}" "${file%.*}-ocr.pdf" sleep 1 else echo "Datei wurde bereits verarbeitet"