An unintrusive frontend of scanimage which acts as a paper to pdf converter suitable for texts.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.
spectrscan/spectrscan

255 lines
6.2 KiB

#!/usr/bin/env bash
#
# spectrscan
#
# Copyright (C) 2017 frnmst (Franco Masotti) <franco.masotti@live.com>
# <franco.masotti@student.unife.it>
#
# This file is part of spectrscan.
#
# spectrscan is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# spectrscan is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with spectrscan. If not, see <http://www.gnu.org/licenses/>.
#
set -x
#set -e
TMP_DIR="$(pwd)/.spectrscan-$RANDOM"
SRC_DIR="$(pwd)"
check_software()
{
which \
scanimage \
unapaper \
awk \
convert \
pdftk \
parallel \
file \
pamfix \
netpbm
} 1>/dev/null 2>/dev/null
scan()
{
local file_counter="${1}"
local batch_start=1
local batch_increment=1
if [ "$file_counter" = "odd" ]; then
batch_start=1
batch_increment=2
elif [ "$file_counter" = "even" ]; then
batch_start=2
batch_increment=2
else
batch_start=1
batch_increment=1
fi
printf "%s\n" 'Scanning...'
# Put source option before resolution to avoid the error:
# https://bugs.launchpad.net/simple-scan/+bug/983441
scanimage \
--source "$source" \
--batch=spectrscan_out%08d.pnm \
--batch-start ${batch_start} \
--batch-increment ${batch_increment} \
--resolution "$resolution" \
--mode "$mode" \
--progress \
--format=pnm
}
exists_output_file()
{
# Check if output file exists and is a pdf file.
if [ -f "$SRC_DIR"/"$output_file" ]; then
if [ "$(file --mime-type "$SRC_DIR"/"$output_file" \
| awk '{ print $2 }')" = "application/pdf" ]; then
printf "true"
else
# Not a pdf file.
printf "error"
fi
else
printf "false"
fi
}
convert_single()
{
# First convert to pdf with the image enhancements, then apply unpaper.
# This gives better results because it avoids missing information
# removed by unpaper in the final result.
local filE="${1}"
pamfix -truncate "$filE" > "$filE".tmp.pnm.bis
if [ "$imagemagick_options" != "true" ] \
&& [ "$imagemagick_options" != "false" ]; then
convert $imagemagick_options "$filE".tmp.pnm.bis "$filE".tmp.pnm
else
mv "$filE".tmp.pnm.bis "$filE".tmp.pnm
fi
if [ "$unpaper_options" = "true" ]; then
unpaper --overwrite -q "$filE".tmp.pnm "$filE".pnm
elif [ "$unpaper_options" = "false" ]; then
mv "$filE".tmp.pnm "$filE".pnm
else
unpaper --overwrite -q $unpaper_options "$filE".tmp.pnm "$filE".pnm
fi
# OCR stuff goes here.
convert -compress lzw "$filE".pnm "$filE".pdf
} 1>/dev/null 2>/dev/null
pnm_to_pdf()
{
printf "%s\n" 'PNM to PDF...'
# A hack to avoid GNU Parallel's message
mkdir -p ~/.parallel && touch ~/.parallel/will-cite
# n = number of new pages
# Time complexity: O(n/#threads)
export -f convert_single
export unpaper_options
export imagemagick_options
parallel --bar convert_single ::: "$(ls spectrscan_*.pnm)"
}
pdf_cat()
{
printf "%s\n" 'Assembling PDF...'
# O(n)
# Always cat to the output file.
# Unlike pdfunite, pdftk does not corrupt the pdf.
pdftk *.pdf cat output "$output_file".tmp
# O(1)
if [ "$(exists_output_file)" = "true" ]; then
cp "$SRC_DIR"/"$output_file" "$SRC_DIR"/."$output_file"
pdftk "$SRC_DIR"/."$output_file" "$output_file".tmp \
cat output "$SRC_DIR"/"$output_file"
rm "$SRC_DIR"/."$output_file"
elif [ "$(exists_output_file)" = "false" ]; then
mv "$output_file".tmp "$SRC_DIR"/"$output_file"
else
printf "[ERROR]\n" 1>&2-
exit 1
fi
}
get_supported_parameters()
{
scanimage -A | grep -e '--source' -e '--mode' -e '--resolution'
}
get_supported_resolutions()
{
echo "${supported_params}" | awk '{print $2}' | tr '|' ' ' | tr -d 'dpi'
}
get_supported_modes()
{
echo "${supported_params}" | grep mode | awk '{print $2}' | tr '|' ' '
}
get_supported_sources()
{
echo "${supported_params}" | grep source | awk '{print $2}' | tr '|' ' '
}
check_supported_parameters()
{
local parameter="${1}"
local value="${2}"
local supported_parameters="${3}"
local counter=0
local parameter_full_name=''
local p=''
parameter_full_name="supported_${parameter}"
eval "$parameter_full_name='$(get_supported_"${parameter}" "${supported_parameters}")'"
for p in ${!parameter_full_name}; do
if [ "$value" = "$p" ]; then
counter=$(($counter+1))
fi
done
if [ $counter -eq 0 ]; then
printf "ERROR: Supported $parameter: ${!parameter_full_name}\n" 1>&2-
return 1
fi
}
chain()
{
mkdir "$TMP_DIR"
pushd "$TMP_DIR"
if [ "$odd_even" = "true" ]; then
scan "odd" \
&& printf "%s\n" 'turn the paper(s) and hit return when ready' \
&& read \
&& scan "even"
else
scan
fi
# Feeder out of documents.
if [ $? -eq 7 ]; then
printf "No paper inserted\n" 1>&2
return 1
fi
pnm_to_pdf
pdf_cat
cleanup
popd
rm -rf "$TMP_DIR"
}
preliminary_checks()
{
# Leak all variables to avoid passing them every time.
imagemagick="${1}"
mode="${2}"
resolution="${3}"
source="${4}"
unpaper="${5}"
output_file="${6}"
# Get the parameters and check that the scanner is connected.
supported_params="$(get_supported_parameters)"
[ ${?} -ne 0 ] && return ${?}
{ check_supported_parameters "modes" "${mode}" \
&& check_supported_parameters "resolutions" "${resolution}" \
&& check_supported_parameters "sources" "${source}"; } \
|| return 1
chain
}
check_software || { printf "%s\n" 'check software'; exit ${?}; }
. ./fbopt