246 lines
6.4 KiB
Bash
Executable File
246 lines
6.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
#
|
|
# spectrscan
|
|
#
|
|
# Copyright (C) 2017-2018 frnmst (Franco Masotti) <franco.masotti@live.com>
|
|
# <franco.masotti@student.unife.it>
|
|
#
|
|
# This file is part of spectrscan.
|
|
#
|
|
# spectrscan is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# spectrscan is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with spectrscan. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
|
|
. ./spectrscan.conf
|
|
|
|
check_software()
|
|
{
|
|
which \
|
|
scanimage \
|
|
unpaper \
|
|
awk \
|
|
convert \
|
|
pdftk \
|
|
parallel \
|
|
file \
|
|
pamfix
|
|
}
|
|
|
|
get_last_page_side_a()
|
|
{
|
|
ls spectrscan_out*.pnm | sort --reverse | head -n1 | tr -d [a-z]._
|
|
}
|
|
|
|
select_page_parameters()
|
|
{
|
|
if [ "${two_sided}" = 'true' ] && [ "${side_b}" = 'true' ]; then
|
|
# side_b
|
|
batch_start=$(("$(get_last_page_side_a)"+1))
|
|
batch_increment=-2
|
|
elif [ "${two_sided}" = 'reverse' ] && [ "${side_b}" = 'true' ]; then
|
|
# side_b
|
|
batch_start=2
|
|
batch_increment=2
|
|
fi
|
|
if [ "${two_sided}" != 'false' ] && [ "${side_a}" = 'true' ]; then
|
|
# true, reverse; first side.
|
|
batch_start=1
|
|
batch_increment=2
|
|
fi
|
|
if [ "${two_sided}" = 'false' ]; then
|
|
# single sided
|
|
batch_start=1
|
|
batch_increment=1
|
|
fi
|
|
}
|
|
|
|
build_scan_sub_command()
|
|
{
|
|
# Since these are scanner specific options, they need to be probed.
|
|
#
|
|
# Put source option before resolution to avoid the error:
|
|
# https://bugs.launchpad.net/simple-scan/+bug/983441
|
|
local scan_command=''
|
|
[ -n "$(get_supported_resolutions)" ] && scan_command=""${scan_command}" --source "${source}""
|
|
[ -n "$(get_supported_modes)" ] && scan_command=""${scan_command}" --resolution "${resolution}""
|
|
[ -n "$(get_supported_sources)" ] && scan_command=""${scan_command}" --mode "${mode}""
|
|
|
|
echo "${scan_command}"
|
|
}
|
|
|
|
scan()
|
|
{
|
|
select_page_parameters
|
|
local scan_sub_command="$(build_scan_sub_command)"
|
|
|
|
scanimage \
|
|
${scan_sub_command} \
|
|
--batch=spectrscan_out%08d.pnm \
|
|
--batch-start ${batch_start} \
|
|
--batch-increment ${batch_increment} \
|
|
--progress \
|
|
--format=pnm
|
|
}
|
|
|
|
exists_output_file()
|
|
{
|
|
# Check if output file exists and is a pdf file.
|
|
# This is needed to concatenate new documents to an
|
|
# already existing pdf file.
|
|
if [ -f "$SRC_DIR"/"$output_file" ]; then
|
|
if [ "$(file --mime-type "$SRC_DIR"/"$output_file" \
|
|
| awk '{ print $2 }')" = "application/pdf" ]; then
|
|
echo 'true'
|
|
else
|
|
# Not a pdf file.
|
|
echo 'error'
|
|
fi
|
|
else
|
|
echo 'false'
|
|
fi
|
|
}
|
|
|
|
convert_single()
|
|
{
|
|
# First convert to pdf with the image enhancements, then apply unpaper.
|
|
# This gives better results because it avoids missing information
|
|
# removed by unpaper in the final result.
|
|
local filE="${1}"
|
|
|
|
pamfix -truncate "$filE" > "$filE".tmp.pnm.bis
|
|
|
|
if [ "${imagemagick_options}" != 'false' ]; then
|
|
convert ${imagemagick_options} "$filE".tmp.pnm.bis "$filE".tmp.pnm
|
|
else
|
|
mv "$filE".tmp.pnm.bis "$filE".tmp.pnm
|
|
fi
|
|
|
|
if [ "${unpaper_options}" = 'true' ]; then
|
|
unpaper --overwrite -q "$filE".tmp.pnm "$filE".pnm
|
|
elif [ "${unpaper_options}" = 'false' ]; then
|
|
mv "$filE".tmp.pnm "$filE".pnm
|
|
else
|
|
unpaper --overwrite -q ${unpaper_options} "$filE".tmp.pnm "$filE".pnm
|
|
fi
|
|
|
|
#
|
|
# OCR stuff goes here.
|
|
#
|
|
|
|
convert -compress lzw "$filE".pnm "$filE".pdf
|
|
|
|
} 1>/dev/null 2>/dev/null
|
|
|
|
# TODO use inotify || add option to post process if not available
|
|
# (eg: sshfs filesystem).
|
|
# process in /tmp if possible. -> the previous check is not needed.
|
|
pnm_to_pdf()
|
|
{
|
|
# A hack to avoid GNU Parallel's message
|
|
mkdir -p ~/.parallel && touch ~/.parallel/will-cite
|
|
# n = number of new pages
|
|
# Time complexity: O(n/#threads)
|
|
export -f convert_single
|
|
export unpaper_options
|
|
export imagemagick_options
|
|
parallel --bar convert_single ::: "$(ls spectrscan_*.pnm)"
|
|
}
|
|
|
|
pdf_cat()
|
|
{
|
|
printf "%s\n" 'assembling PDF...'
|
|
# O(n)
|
|
# Always cat to the output file.
|
|
# Unlike pdfunite, pdftk does not corrupt the pdf.
|
|
pdftk *.pdf cat output "$output_file".tmp
|
|
|
|
# O(1)
|
|
if [ "$(exists_output_file)" = 'true' ]; then
|
|
cp "$SRC_DIR"/"$output_file" "$SRC_DIR"/."$output_file"
|
|
pdftk "$SRC_DIR"/."$output_file" "$output_file".tmp \
|
|
cat output "$SRC_DIR"/"$output_file"
|
|
rm "$SRC_DIR"/."$output_file"
|
|
elif [ "$(exists_output_file)" = 'false' ]; then
|
|
mv "$output_file".tmp "$SRC_DIR"/"$output_file"
|
|
else
|
|
printf "[ERROR]\n" 1>&2-
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
get_supported_parameters()
|
|
{
|
|
scanimage -A | grep -e '--source' -e '--mode' -e '--resolution'
|
|
}
|
|
|
|
get_supported_resolutions()
|
|
{
|
|
echo "${supported_params}" | grep resolution | awk '{print $2}' | tr '|' ' ' | tr -d 'dpi'
|
|
}
|
|
|
|
get_supported_modes()
|
|
{
|
|
echo "${supported_params}" | grep mode | awk '{print $2}' | tr '|' ' '
|
|
}
|
|
|
|
get_supported_sources()
|
|
{
|
|
echo "${supported_params}" | grep source | awk '{print $2}' | tr '|' ' '
|
|
}
|
|
|
|
chain()
|
|
{
|
|
mkdir "${TMP_DIR}"
|
|
pushd "${TMP_DIR}"
|
|
|
|
if [ "${two_sided}" != 'false' ]; then
|
|
side_a='true'
|
|
scan \
|
|
&& printf "%s\n" 'turn the paper(s) and hit return when ready' \
|
|
&& read \
|
|
&& side_a='false'; side_b='true' \
|
|
&& scan
|
|
else
|
|
scan
|
|
fi
|
|
|
|
pnm_to_pdf
|
|
pdf_cat
|
|
|
|
popd
|
|
rm -rf "${TMP_DIR}"
|
|
}
|
|
|
|
preliminary_checks()
|
|
{
|
|
# Leak all variables to avoid passing them every time.
|
|
imagemagick_options="${1}"
|
|
mode="${2}"
|
|
resolution="${3}"
|
|
source="${4}"
|
|
unpaper_options="${5}"
|
|
two_sided="${6}"
|
|
output_file="${7}"
|
|
|
|
# Get the parameters and check that the scanner is connected.
|
|
supported_params="$(get_supported_parameters)"
|
|
# FIXME: this might not work.
|
|
local retval=${?}
|
|
[ ${retval} -ne 0 ] && return ${retval}
|
|
chain
|
|
}
|
|
|
|
check_software || exit ${?}
|
|
. ./fbopt
|