From 0ef21c47a8b1f636554a61b589d81828134818ae Mon Sep 17 00:00:00 2001 From: frnmst/Franco Masotti Date: Sun, 26 Feb 2017 01:09:36 +0100 Subject: [PATCH] Added unpaper. Various fixes. --- .gitignore | 1 + README.md | 5 +++- spectrscan | 71 +++++++++++++++++++++++++++++++----------------------- 3 files changed, 46 insertions(+), 31 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a136337 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pdf diff --git a/README.md b/README.md index 28b6a9c..b2c2d57 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,8 @@ is NO WARRANTY, to the extent permitted by law. - Odd-even page numbers scanning. +- Basic unpaper and ImageMagick post-processing. + ## Relevant links and ideas https://bugs.launchpad.net/simple-scan/+bug/983441 @@ -66,7 +68,7 @@ https://www.ubuntu-user.com/Magazine/Archive/2013/18/Scanning-and-editing-text-w - Compression - Number of pages to scan - Basic image enhancer options (to pass to ImageMagick) - - `unpaper` + - Better options to pass to unpaper - OCR (training (GOCR)? + text file outputs) - Better parallel processing - Watch inotifies for a new out*.pnm @@ -78,6 +80,7 @@ https://www.ubuntu-user.com/Magazine/Archive/2013/18/Scanning-and-editing-text-w - [Gawk](http://www.gnu.org/software/gawk/) - [SANE](http://www.sane-project.org/) - [ImageMagick](http://www.imagemagick.org/) +- [unpaper](https://github.com/Flameeyes/unpaper) - [PDFtk](https://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/) [Debian version which is fully free](https://libreplanet.org/wiki/List_of_software_that_does_not_respect_the_Free_System_Distribution_Guidelines#pdftk) [AUR version](https://aur.archlinux.org/packages/pdftk-bin/) diff --git a/spectrscan b/spectrscan index a171965..76c8c00 100755 --- a/spectrscan +++ b/spectrscan @@ -22,22 +22,13 @@ # along with spectrscan. If not, see . # -tmp_dir="/tmp/spectrscan-$RANDOM" +# /tmp fill up quickly +tmp_dir="$(pwd)/.spectrscan-$RANDOM" src_dir="$(pwd)" ######## ######## -# Options to add -# Compress -# Number of pages to scan -# Basic image enhancer options -# unpaper -# OCR -# Parallel processing: -# Watch inotifies for a new out*.pnm -# then process - help() { cat <<-EOF @@ -111,7 +102,7 @@ scan() # https://bugs.launchpad.net/simple-scan/+bug/983441 scanimage \ --source "$source" \ - --batch=spectrscan_out%d.pnm \ + --batch=spectrscan_out%08d.pnm \ --batch-start $batch_start \ --batch-increment $batch_increment \ --resolution "$resolution" \ @@ -138,23 +129,27 @@ exists_output_file() fi } +convert_single() +{ + filE="$1" + + pamfix -truncate "$filE" > "$filE".tmp.pnm + unpaper --overwrite -q "$filE".tmp.pnm "$filE" + # OCR stuff can go here. + # Contrast enhancement and pdf. + convert -contrast-stretch 0.5%x10% -compress lzw "$filE" "$filE".pdf +} 1>/dev/null 2>/dev/null + pnm_to_pdf() { printf "PNM to PDF...\n" 1>&2- - # n = number of new pages - - # Contrast enhancement and pdf - # OCR stuff can go after the mv command - # PNM file are removed to avoid filling up the RAM. + # A hack to avoid GNU Parallel's message + mkdir -p ~/.parallel && touch ~/.parallel/will-cite + # n = number of new pages # Time complexity: O(n/#cores) - ls spectrscan_out*.pnm | parallel \ - "pamfix -truncate {} > {}.tmp; \ - mv {}.tmp {}; \ - convert -brightness-contrast 0x50 -compress lzw {} {}.pdf; \ - rm {}" \ - 2>/dev/null - + export -f convert_single + parallel --bar convert_single ::: "$(ls spectrscan_*.pnm)" } pdf_cat() @@ -204,14 +199,20 @@ chain() init if [ "$odd_even" = "true" ]; then - scan "$mode" "$resolution" "$source" "odd" - printf "Turn the paper(s) and hit return when ready\n" - read - scan "$mode" "$resolution" "$source" "even" + scan "$mode" "$resolution" "$source" "odd" \ + && printf "Turn the paper(s) and hit return when ready\n" \ + && read \ + && scan "$mode" "$resolution" "$source" "even" else scan "$mode" "$resolution" "$source" fi + # Feeder out of documents +# if [ $? -eq 7 ]; then +# printf "Turn the paper(s) and hit return when ready\n" +# return 1 +# fi + pnm_to_pdf pdf_cat "$output_file" cleanup @@ -303,7 +304,12 @@ option_parser() output_file="$1" - printf ""$mode" "$resolution" "$source" "$odd_even" "$output_file"" + printf "\ + "$mode" \ + "$resolution" \ + "$source" \ + "$odd_even" \ + "$output_file"" } check_supported_parameters() @@ -352,7 +358,12 @@ preliminary_controls() && check_supported_parameters "resolutions" "$resolution" \ && check_supported_parameters "sources" "$source"; } || return 1 - printf ""$mode" "$resolution" "$source" "$odd_even" "$output_file"" + printf "\ + "$mode" \ + "$resolution" \ + "$source" \ + "$odd_even" \ + "$output_file"" } main()