Bug-fixes to add and fix tests. Decoding greatly improved.

Fix resize tests by increasing data bounds threshold.

Easier modify image expression

Clean and fix block sizes, colored rows test working

Fix pages by being flexible to missing sectors

Use exponential drop-off within dots to find pixel values.

Try different right and bottom bounds to improve decoding.
master v0.1.0.dev6
Justin Bass 4 years ago
parent 1d09fac9ff
commit 233216dc92
  1. 6
      README.md
  2. 2
      colorsafe/__init__.py
  3. 4
      colorsafe/constants.py
  4. 56
      colorsafe/csdatastructures.py
  5. 15
      colorsafe/debugutils.py
  6. 2
      colorsafe/decoder/csdecoder.py
  7. 123
      colorsafe/decoder/csdecoder_getbounds.py
  8. 120
      colorsafe/decoder/csdecoder_getchannels.py
  9. 8
      colorsafe/decoder/csimages_decoder.py
  10. 6
      colorsafe/encoder/csencoder.py
  11. 10
      colorsafe/utils.py
  12. 15
      docs/TODO.md
  13. 67
      test/test_e2e.py
  14. 73
      test/test_unit.py
  15. 75
      test/test_utils.py

@ -1,9 +1,9 @@
# ColorSafe
A data matrix scheme for printing on paper. Inspired by
A data matrix scheme for printing on paper and microfilm. Inspired by
[PaperBak](https://github.com/Rupan/paperbak), ColorSafe is written in Python
and has a well-defined specification. It aims to allow a few Megabytes of data
(or more) to be stored on paper for a worst case scenario backup, for
and has a flexible specification. It aims to allow a few Megabytes of data
(or more) to be stored on printable media for a worst case scenario backup, for
extremely long-term archiving, or just for fun. With best practices, ColorSafe
encoded data can safely withstand the vicissitudes of technology changes over
long periods of time.

@ -1,3 +1,3 @@
from decoder import ColorSafeDecoder
from encoder import ColorSafeEncoder
from exceptions import DecodingError, EncodingError
from exceptions import DecodingError, EncodingError

@ -34,4 +34,6 @@ TotalPagesMaxBytes = 8 # 8 bytes per page maximum for the total-pages field
MaxSkew = 5
MaxSkewPerc = 0.002
DefaultThresholdWeight = 0.5
DefaultThresholdWeight = 0.5
HalfPixel = 0.5

@ -1,5 +1,6 @@
import math
import constants
import utils
class ColorChannels:
@ -108,44 +109,39 @@ class Sector:
"""
@staticmethod
def getBlockSizes(height, width, colorDepth, eccRate):
rsBlockSizes = list()
dataBlockSizes = list()
eccBlockSizes = list()
def get_block_sizes(height, width, color_depth, ecc_rate):
def fill_block_sizes(bytes, max_bytes_per_block):
if bytes <= max_bytes_per_block:
return [bytes]
else:
block_sizes = [max_bytes_per_block] * (bytes / max_bytes_per_block)
dataRowCount = Sector.getDataRowCount(height, eccRate)
eccRowCount = height - constants.MagicRowHeight - dataRowCount
remainder = bytes % max_bytes_per_block
if remainder != 0:
last_two_avg = utils.average([remainder, max_bytes_per_block])
block_sizes[-1] = int(math.ceil(last_two_avg))
block_sizes.append(int(math.floor(last_two_avg)))
totalBytes = (height - 1) * width * \
colorDepth / constants.ByteSize
return block_sizes
if totalBytes <= constants.RSBlockSizeMax:
rsBlockSizes.append(totalBytes)
else:
rsBlockSizes = [constants.RSBlockSizeMax] * \
(totalBytes / constants.RSBlockSizeMax)
if totalBytes % constants.RSBlockSizeMax != 0:
rsBlockSizes.append(totalBytes % constants.RSBlockSizeMax)
data_row_count = Sector.getDataRowCount(height, ecc_rate)
ecc_row_count = height - constants.MagicRowHeight - data_row_count
lastVal = int(math.floor(
(rsBlockSizes[-1] + rsBlockSizes[-2]) / 2.0))
secondLastVal = int(math.ceil(
(rsBlockSizes[-1] + rsBlockSizes[-2]) / 2.0))
total_bytes = (data_row_count + ecc_row_count) * width * color_depth / constants.ByteSize
data_bytes = data_row_count * width * color_depth / constants.ByteSize
ecc_bytes = ecc_row_count * width * color_depth / constants.ByteSize
rsBlockSizes[-1] = lastVal
rsBlockSizes[-2] = secondLastVal
data_row_percentage = float(data_row_count) / (height - constants.MagicRowHeight)
ecc_row_percentage = float(ecc_row_count) / (height - constants.MagicRowHeight)
for size in rsBlockSizes:
dataRowPercentage = float(
dataRowCount) / (height - constants.MagicRowHeight)
eccRowPercentage = float(eccRowCount) / (height - constants.MagicRowHeight)
max_data_block_size = int(round(data_row_percentage * constants.RSBlockSizeMax))
max_ecc_block_size = int(round(ecc_row_percentage * constants.RSBlockSizeMax))
dataBlockSizes.append(
int(math.floor(size * dataRowPercentage)))
eccBlockSizes.append(int(math.ceil(size * eccRowPercentage)))
rs_block_sizes = fill_block_sizes(total_bytes, constants.RSBlockSizeMax)
data_block_sizes = fill_block_sizes(data_bytes, max_data_block_size)
ecc_block_sizes = fill_block_sizes(ecc_bytes, max_ecc_block_size)
return dataRowCount, eccRowCount, rsBlockSizes, dataBlockSizes, eccBlockSizes
return data_row_count, ecc_row_count, rs_block_sizes, data_block_sizes, ecc_block_sizes
@staticmethod
def getDataRowCount(height, eccRate):

@ -4,7 +4,7 @@ import os
from PIL import ImageDraw, Image
def draw_page(page, tmpdir, filename, pixels=None, lines=None, color=(255, 0, 0)):
def draw_page(page, tmpdir, filename, pixels=None, lines=None, pixels_colors=None):
"""
Draw page with additional pixels and lines for debugging purposes
:param page: InputPage type
@ -30,11 +30,18 @@ def draw_page(page, tmpdir, filename, pixels=None, lines=None, color=(255, 0, 0)
if pixel:
y, x = pixel
if page.width > x >= 0 and page.height > y >= 0:
image_pixels[x, y] = color
image_pixels[x, y] = (255, 0, 0)
if lines:
draw = ImageDraw.Draw(image)
for y1, x1, y2, x2 in lines:
draw.line((x1, y1, x2, y2), fill=color)
draw.line((x1, y1, x2, y2), fill=(255, 0, 0))
if pixels_colors:
for pixel_color in pixels_colors:
if pixel_color:
y, x, color = pixel_color
if page.width > x >= 0 and page.height > y >= 0:
image_pixels[x, y] = color
image.save(os.path.join(tmpdir, filename + ".png"))
image.save(os.path.join(tmpdir, filename + ".png"))

@ -165,7 +165,7 @@ class SectorDecoder(Sector):
self.colorDepth = colorDepth
self.eccRate = eccRate
self.dataRowCount, self.eccRowCount, self.rsBlockSizes, self.dataBlockSizes, self.eccBlockSizes = \
Sector.getBlockSizes(height, width, colorDepth, eccRate)
Sector.get_block_sizes(height, width, colorDepth, eccRate)
for row in range(0, height * width, width):
channels = channelsList[row: row + width]

@ -1,4 +1,3 @@
import itertools
import operator
import os
import sys
@ -6,12 +5,13 @@ from copy import copy
from colorsafe.debugutils import draw_page
from colorsafe import utils, defaults, exceptions
from colorsafe import constants, defaults, exceptions, utils
from colorsafe.decoder.csdecoder_getchannels import get_pixels_and_weight
def get_data_bounds(page, sector_height, sector_width, gap_size, tmpdir):
def get_data_bounds(page, sector_height, sector_width, gap_size, page_num, tmpdir):
if tmpdir:
tmpdir_bounds = os.path.join(str(tmpdir), "bounds")
tmpdir_bounds = os.path.join(str(tmpdir), "bounds_" + str(page_num))
os.mkdir(tmpdir_bounds)
tmpdir = tmpdir_bounds
@ -28,17 +28,25 @@ def get_data_bounds(page, sector_height, sector_width, gap_size, tmpdir):
if height_per_dot < 1.0 or width_per_dot < 1.0:
raise exceptions.DecodingError("Image has less than 1.0x resolution, cannot get all dots.")
top, bottom, left, right = get_real_sector_data_boundaries(page,
height_per_dot,
width_per_dot,
top_temp,
bottom_temp,
left_temp,
right_temp)
data_bound = get_real_sector_data_boundaries(page,
height_per_dot,
width_per_dot,
top_temp,
bottom_temp,
left_temp,
right_temp)
if (top_temp, bottom_temp, left_temp, right_temp) != data_bound:
corrected_data_bound = correct_data_bound(data_bound, sector_height, sector_width, page)
data_bounds.append(corrected_data_bound)
else:
# No data found within the bounds - this sector is most likely not valid or readable, so don't add it
pass
data_bounds.append((top, bottom, left, right))
if tmpdir:
top, bottom, left, right = data_bound
debug_data_bounds.extend([(top, left), (top, right), (bottom, left), (bottom, right)])
if tmpdir:
@ -86,9 +94,6 @@ def get_bounds(page, tmpdir):
# Transpose lists so each borders points are within 1 list, not spread across all lists
# NOTE: Transposing turns the vertical sub-borders into a list of horizontal lines, and vice-versa
# TODO: Need to either tranpose by matching like values, or else work on inferring missing borders above
# TODO: Left off here
horizontal_borders = transpose_and_infer(clean_vertical_borders, True)
vertical_borders = transpose_and_infer(clean_horizontal_borders, False)
@ -537,7 +542,7 @@ def get_real_sector_data_boundary(page, leastAlong, mostAlong, leastPerp, mostPe
return dataIndex
# TODO: Improve this value
gapToDataTolerance = 0.25
gapToDataTolerance = 0.4
# Get the closest value that has a sizeable drop from the max of all previous shades
# This only works if the initial shade is assumed to be the darkest part of the border
@ -607,4 +612,88 @@ def get_real_sector_data_boundaries(page, heightPerDot, widthPerDot, topmost, bo
left = left if left else leftmost
right = right if right else rightmost
return top, bottom, left, right
return top, bottom, left, right
def correct_one_data_bound(data_bound, sector_height, sector_width, page, right_else_bottom):
"""
Get the corrected data bound for right or bottom data bound. The bound passed in will be found by looking where
the pixels start, but the encoding may have each dot filled partially; the partial pixels will be in the top-left.
Thus, we find the right or bottom bound where the pixels begin, since whitespace won't be counted, and decoding will
be shifted off slightly. There is no way to look for dots without a timing pattern, and looking for pixels is wrong.
To fix this, look for a bound that optimizes some rows or columns to have a weighted standard deviation as
small as possible. This happens when dots and whitespace overlap as little as possible within the row or column,
e.g. each dot is filled with pixels that have minimal variance.
An encoded timing pattern would simplify this, at the expense of allowing less data to be encoded.
TODO: Support shades
TODO: Search for larger than 1 pixel modifier to support dots with > 1 whitespace pixel
TODO: For normal (blurred) data, relaxing the low_data threshold that generates data_bound to get a better bound
:param data_bound: The data bounds found by looking where pixels begin.
:param sector_height: Dot height of sector
:param sector_width: Dot width of sector
:param page: Page to be decoded
:param right_else_bottom: True for right, False for bottom
:return: The correct bound modifier, either right or bottom
"""
top, bottom, left, right = data_bound
min_sum_weighted_stds = sys.maxint
best_modifier = 0
divisions = 4
modifier_possibilites = map(lambda i: float(i) / divisions, range(divisions + 1))
for bound_modifier in modifier_possibilites:
weighted_stds = list()
# Right bound
along_max = sector_height if right_else_bottom else sector_width
along_division = 4
perp_max = sector_width if right_else_bottom else sector_height
for along_iter in range(0, along_max, along_max / along_division):
for perp_iter in range(0, perp_max):
x = perp_iter if right_else_bottom else along_iter
y = along_iter if right_else_bottom else perp_iter
right_modifier = bound_modifier if right_else_bottom else 0
bottom_modifier = bound_modifier if not right_else_bottom else 0
pixels_and_weight, weight_sum, y_center, x_center = get_pixels_and_weight(y,
x,
top,
bottom + bottom_modifier,
left,
right + right_modifier,
sector_height,
sector_width,
page)
for i in range(0, constants.ColorChannels):
shade_and_weight = map(lambda (pixel, weight, _, __): (pixel[i], weight), pixels_and_weight)
weighted_std = utils.weighted_standard_deviation_squared(shade_and_weight)
weighted_stds.append(weighted_std)
sum_weighted_stds = sum(weighted_stds)
if sum_weighted_stds < min_sum_weighted_stds:
min_sum_weighted_stds = sum_weighted_stds
best_modifier = bound_modifier
continue
return best_modifier
def correct_data_bound(data_bound, sector_height, sector_width, page):
top, bottom, left, right = data_bound
right_modifier = correct_one_data_bound(data_bound, sector_height, sector_width, page, True)
bottom_modifier = correct_one_data_bound(data_bound, sector_height, sector_width, page, False)
return (top, bottom + bottom_modifier, left, right + right_modifier)

@ -1,14 +1,13 @@
import math
import os
import sys
from colorsafe.debugutils import draw_page
from colorsafe import exceptions, utils
from colorsafe import constants, exceptions
from colorsafe.csdatastructures import ColorChannels
def get_normalized_channels_list(page, data_bounds, sector_height, sector_width, sectorNum, tmpdir):
def get_normalized_channels_list(page, data_bounds, sector_height, sector_width, page_num, sector_num, tmpdir):
if tmpdir:
tmpdir_bounds = os.path.join(str(tmpdir), "channels")
try:
@ -26,13 +25,14 @@ def get_normalized_channels_list(page, data_bounds, sector_height, sector_width,
right,
sector_height,
sector_width,
sectorNum,
page_num,
sector_num,
tmpdir)
normalized_channels_list = normalizeChannelsList(channels_list)
if (tmpdir):
f = open(os.path.join(tmpdir, "normalized_channels_" + str(sectorNum) + ".txt"), "w")
f = open(os.path.join(tmpdir, "normalized_channels_" + str(page_num) + "_" + str(sector_num) + ".txt"), "w")
for i in channels_list:
f.write(str(i.getChannels()) + "\r")
f.close()
@ -40,61 +40,76 @@ def get_normalized_channels_list(page, data_bounds, sector_height, sector_width,
return normalized_channels_list
def get_channels_list(page, top, bottom, left, right, sector_height, sector_width, sector_num, tmpdir):
# TODO: Use bilinear interpolation to get pixel values instead
def get_pixels_and_weight(y, x, top, bottom, left, right, sector_height, sector_width, page):
# TODO: Improve speed by not getting values that would add an insigificant amount to weight
total_pixels_height = bottom - top + 1
total_pixels_width = right - left + 1
pixels_per_dot_width = float(total_pixels_height) / float(sector_height)
pixels_per_dot_height = float(total_pixels_width) / float(sector_width)
pixels_per_dot_width = float(total_pixels_width) / float(sector_width)
pixels_per_dot_height = float(total_pixels_height) / float(sector_height)
if tmpdir:
all_pixels_and_weight = list()
# Center halfway through the dot
y_center = pixels_per_dot_height * (y + constants.HalfPixel) + top
x_center = pixels_per_dot_width * (x + constants.HalfPixel) + left
# For each dot in the sector
channels_list = list()
for y in range(sector_height):
for x in range(sector_width):
# Center halfway through the dot, y + 0.5 and x + 0.5
y_center = pixels_per_dot_height * (y + 0.5) + top
x_center = pixels_per_dot_width * (x + 0.5) + left
# Don't use coordinates outside the page bounds
y_min = max(y_center - pixels_per_dot_height / 2, 0)
y_max = min(y_center + pixels_per_dot_height / 2, page.height - 1)
x_min = max(x_center - pixels_per_dot_width / 2, 0)
x_max = min(x_center + pixels_per_dot_width / 2, page.width - 1)
pixels_and_weight = list()
weight_sum = 0.0
y_min = y_center - pixels_per_dot_height / 2
y_max = y_center + pixels_per_dot_height / 2
x_min = x_center - pixels_per_dot_width / 2
x_max = x_center + pixels_per_dot_width / 2
y_pixel_min = int(math.floor(y_min))
y_pixel_max = int(math.floor(y_max))
x_pixel_min = int(math.floor(x_min))
x_pixel_max = int(math.floor(x_max))
for y_pixel in range(y_pixel_min, y_pixel_max + 1):
for x_pixel in range(x_pixel_min, x_pixel_max + 1):
pixel = page.get_pixel(y_pixel, x_pixel)
pixels_and_weight = list()
weight_sum = 0.0
weight = 1.0
y_pixel_min = int(math.floor(y_min))
y_pixel_max = int(math.floor(y_max))
x_pixel_min = int(math.floor(x_min))
x_pixel_max = int(math.floor(x_max))
for y_pixel in range(y_pixel_min, y_pixel_max + 1):
for x_pixel in range(x_pixel_min, x_pixel_max + 1):
pixel = page.get_pixel(y_pixel, x_pixel)
y_diff = abs(y_pixel + constants.HalfPixel - y_center)
x_diff = abs(x_pixel + constants.HalfPixel - x_center)
weight = 1.0
if y_diff > 0.5:
weight *= 1 / ((2 * y_diff) ** 2)
if y_pixel > y_max - 1:
weight *= (y_max % 1)
if x_diff > 0.5:
weight *= 1 / ((2 * x_diff) ** 2)
if y_pixel < y_min:
weight *= ((1 - y_min) % 1)
pixels_and_weight.append((pixel, weight, y_pixel, x_pixel))
weight_sum += weight
if x_pixel > x_max - 1:
weight *= (x_max % 1)
return pixels_and_weight, weight_sum, y_center, x_center
if x_pixel < x_min:
weight *= ((1 - x_min) % 1)
weight_sum += weight
def get_channels_list(page, top, bottom, left, right, sector_height, sector_width, page_num, sector_num, tmpdir):
# TODO: Would bilinear interpolation be more accurate?
pixels_and_weight.append((pixel, weight, y_pixel, x_pixel))
if tmpdir:
all_pixels_and_weight = list()
# For each dot in the sector
channels_list = list()
for y in range(sector_height):
for x in range(sector_width):
pixels_and_weight, weight_sum, y_center, x_center = get_pixels_and_weight(y,
x,
top,
bottom,
left,
right,
sector_height,
sector_width,
page)
if tmpdir:
all_pixels_and_weight.append((y, x, pixels_and_weight))
all_pixels_and_weight.append((y, x, pixels_and_weight, y_center, x_center))
number_of_channels = len(page.get_pixel(0, 0))
channels_sum = [0] * number_of_channels
@ -107,13 +122,24 @@ def get_channels_list(page, top, bottom, left, right, sector_height, sector_widt
channels_list.append(channels_avg)
if tmpdir:
f = open(os.path.join(tmpdir, "all_pixels_and_weight_" + str(sector_num) + ".txt"), "w")
for y, x, pixels_and_weight in all_pixels_and_weight:
f.write(str(y) + "," + str(x) + ":\r")
pixels_centers = list()
pixels_colors = list()
f = open(os.path.join(tmpdir, "all_pixels_and_weight_" + str(page_num) + "_" + str(sector_num) + ".txt"), "w")
for y, x, pixels_and_weight, y_center, x_center in all_pixels_and_weight:
f.write(str(y) + "," + str(x) + " (" + str(y_center) + "," + str(x_center) + "):\r")
for i in pixels_and_weight:
f.write(" " + str(i) + "\r")
pixel, weight, y_pixel, x_pixel = i
f.write(" " + str((y_pixel, x_pixel, pixel, weight)) + "\r")
pixels_centers.append((int(math.floor(y_center)), int(math.floor(x_center))))
if not x % 2 and not y % 2:
pixels_colors.append((y_pixel, x_pixel, (255 - int(weight * 255), 255, 255)))
f.close()
draw_page(page, tmpdir, "pixels_sampling_" + str(page_num) + "_" + str(sector_num), None, None, pixels_colors)
draw_page(page, tmpdir, "pixels_centers_" + str(page_num) + "_" + str(sector_num), pixels_centers, None, None)
color_channels_list = map(lambda i: ColorChannels(*i), channels_list)
return color_channels_list

@ -29,8 +29,8 @@ class ColorSafeImagesDecoder(ColorSafeImages):
dataStr = ""
metadataStr = ""
for pageNum in range(pages.totalPages):
page = InputPage(pages, pageNum)
for page_num in range(pages.totalPages):
page = InputPage(pages, page_num)
# TODO: Calculate dynamically
# TODO: Override by command-line argument
@ -39,7 +39,7 @@ class ColorSafeImagesDecoder(ColorSafeImages):
gapSize = defaults.gapSize
eccRate = defaults.eccRate
bounds = get_data_bounds(page, sectorHeight, sectorWidth, gapSize, tmpdir)
bounds = get_data_bounds(page, sectorHeight, sectorWidth, gapSize, page_num, tmpdir)
sectorNum = -1
@ -50,7 +50,7 @@ class ColorSafeImagesDecoder(ColorSafeImages):
# perc = str(int(100.0 * sectorNum / (sectorsHorizontal*sectorsVertical))) + "%"
channelsList = get_normalized_channels_list(page, each_bounds, sectorHeight, sectorWidth,
sectorNum, tmpdir)
page_num, sectorNum, tmpdir)
# TODO: Calculate dynamically
bucketNum = 40

@ -172,7 +172,7 @@ class SectorEncoder(Sector):
self.data = data
self.dataRowCount, self.eccRowCount, self.rsBlockSizes, self.dataBlockSizes, self.eccBlockSizes = \
Sector.getBlockSizes(height, width, colorDepth, eccRate)
Sector.get_block_sizes(height, width, colorDepth, eccRate)
self.putData(dataStart)
self.putECCData(dataStart)
@ -258,7 +258,7 @@ class MetadataSectorEncoder(MetadataSector, SectorEncoder):
self.dataStart = 0 # TODO: Make this an argument for putdata, not a self object
self.dataRowCount, self.eccRowCount, self.rsBlockSizes, self.dataBlockSizes, self.eccBlockSizes = \
Sector.getBlockSizes(height, width, colorDepth, eccRate)
Sector.get_block_sizes(height, width, colorDepth, eccRate)
self.putMetadata(metadata)
self.putData()
@ -422,7 +422,7 @@ class ColorSafeFileEncoder(ColorSafeFile):
self.dataRowCount / constants.ByteSize
for dataStart in range(0, len(self.data), self.dataPerSector):
# TODO: Setting data into Sector in place (using Sector's dataStart
# TODO: Setting data into Sector in place (using Sector's dataStart)
# argument) may improve performance
data = self.data[dataStart: dataStart + self.dataPerSector]

@ -28,6 +28,16 @@ def standard_deviation_squared(l):
return sum(map(lambda x: (x - average(l)) ** 2, l)) / (len(l) - 1)
def weighted_standard_deviation_squared(l):
if len(l) < 2:
# TODO: Throw error
pass
w_sum = sum(map(lambda (x, w): w, l))
l_avg = sum(map(lambda (x, w): x * w, l)) / w_sum
return sum(map(lambda (x, w): ((x - l_avg) * w) ** 2, l)) / (w_sum ** 2)
def sum_of_squares(l):
return sum(map(lambda x: x ** 2, l))

@ -0,0 +1,15 @@
# To Do
General changes to encoding:
- The solid grid and gaps could potentially be replaced by special symbols where the grid lines meet, similar to QR codes and Optar. Assuming that decoding accuracy is roughly the same, we could fit in sector-local ECC bits where the border and gap were (each border of ECC bits correcting the half of the sector nearest to it, and directly between the symbols could be a timing pattern), without changing the simple sector scheme already in place. This could also be optional, as part of a different data encoding mode.
- An optional encoding feature could fill remaining page space with extra ECC bits (rather than metadata sectors). Extra ECC improves metadata redundancy indirectly.
- Metadata can be arbitrary length if we use a multipart key: NAM0, NAM1, NAM2, ... NAMT (total number of NAM keys).
- Add a shade byte to metadata header, right after color depth.
- The XOR mask could be changed by the data encoding mode to nearly eliminate the possibility of ambiguous encoding (data with an unintentional first magic row in any sector can be modified by trying other XOR schemes). A different mask can also optimize data to be as readable as possible (no too-dark or too-light sectors). If the mask is only applied to data sectors, then this could be driven by a metadata key-pair rather than metadata header.
- All ambiguous sectors should have their sector number marked in another metadata sector, rather than the file being marked as ambiguous without knowledge of the affected locations.
- Eventually, predefined file-types could be supported with well-defined metadata parameters. Future-proof image and audio could be created by converting from common standard types.
- A future-proof optional data compression and encryption scheme should be supported eventually.
- Each metadata block should have a small CRC value regarding its important header information to avoid it being read incorrectly. If it's incorrect, technically we cannot unambiguously correct it, since we need to read it first to know the data, ECC, and metadata schemes unambiguously.
- The first block on any page should be a metadata block, to improve decoding for the common case. Random reproducible can be applied to subsequent blocks. But without local ECC or CRC, reading the metadata block will not improve speed.
- Magic row can technically be configurable, assuming ECC rows are preceded by the same magic row, and there are a sufficient number of sectors to infer it - or if we use the first metadata sector's first row (could be flaky).

@ -1,35 +1,39 @@
import glob
import os
import pytest
import random
from colorsafe.exceptions import EncodingError
from colorsafe import utils
from colorsafe.decoder.csdecoder_manager import ColorSafeDecoder
from colorsafe.encoder.csencoder_manager import ColorSafeEncoder
from test_utils import texts, in_file_name, image_alterations, out_file_name, metadata_file_name
from test_utils import texts, get_random_string, gaussian_blur_image, modify, shift_image, rotate_image, resize_image, \
no_modify, offset_partial
# TODO: Try a local threshold, rather than sector-wide, for better decoding results.
COLOR_DEPTH_RANGE = range(1, 4)
COLOR_DEPTH_MIN = 1
COLOR_DEPTH_MAX = 3
COLOR_DEPTH_RANGE = range(COLOR_DEPTH_MIN, COLOR_DEPTH_MAX + 1)
DEBUG = False
RANDOM_TEST_TRIALS = 3
RANDOM_TEST_TRIALS = 2
params_random = {
("random_data", "random", "none", 3, 3, 1, 1, 100),
("random_data_2_ppd", "random", "none", 3, 3, 2, 2, 100),
("random_1000", "random_1000", no_modify, 3, 3, 1, 1, 100),
# ("random_1000_2_ppd", "random_1000", no_modify, 3, 3, 2, 2, 100), # TODO: This test is flaky
}
# TODO: Fix for color depths 2 and 3
@pytest.mark.parametrize('execution_number', range(RANDOM_TEST_TRIALS))
@pytest.mark.parametrize(
"color_depth",
range(1, 2))
COLOR_DEPTH_RANGE)
@pytest.mark.parametrize(
"test_name,"
"text_index,"
"image_alteration,"
"modify,"
"page_height,"
"page_width,"
"dot_fill_pixels,"
@ -40,7 +44,7 @@ def test_e2e_random(execution_number,
tmpdir,
test_name,
text_index,
image_alteration,
modify,
color_depth,
page_height,
page_width,
@ -50,14 +54,13 @@ def test_e2e_random(execution_number,
):
# TODO: Test a random unicode string case
def get_random_string():
return ''.join(chr(random.randint(0, 2 ** 7 - 1)) for i in range(1000))
texts['random'] = get_random_string()
texts['random_1000'] = get_random_string(1000)
test_e2e(tmpdir,
test_name,
text_index,
image_alteration,
modify,
color_depth,
page_height,
page_width,
@ -69,16 +72,22 @@ def test_e2e_random(execution_number,
# Params: Test Name, Colors, Height, Width, DFP, PPD, DPI, Text index
params = [
("standard", "lorem", "none", 11, 8.5, 1, 1, 100),
("smaller_page", "lorem", "none", 3, 3, 1, 1, 100),
("2_ppd_1_dfp", "lorem", "none", 3, 3, 1, 2, 100),
("4_ppd_4_dfp", "lorem", "none", 3, 3, 4, 4, 100),
("150_dpi", "lorem", "none", 3, 3, 1, 1, 150),
("blur_2", "lorem", "gaussian_blur0.2", 3, 3, 1, 1, 100),
("blur_2_dfp_2", "lorem", "gaussian_blur0.2", 3, 3, 2, 2, 100),
("rotate_0.1", "lorem", "rotate0.1", 3, 3, 4, 4, 100),
("shift_10", "lorem", "shift10", 3, 3, 1, 1, 100), # TODO: Test more data rows here, e.g. a bigger string
# ("resize_2.1x", "lorem", "resize2.1x", 3, 3, 1, 1, 100), # TODO: Fix this, decoding needs bilinear interpolation
("ansi_letter", "lorem", no_modify, 11, 8.5, 1, 1, 100),
("smaller_page", "lorem", no_modify, 3, 3, 1, 1, 100),
("multiple_rows", "random_const_3000", no_modify, 3, 3, 1, 1, 100),
("multiple_pages", "random_const_20000", no_modify, 3, 3, 1, 1, 100), # TODO: This works, but pages not filled in
("2_ppd_1_dfp", "lorem", no_modify, 3, 3, 1, 2, 100),
("4_ppd_4_dfp", "lorem", no_modify, 3, 3, 4, 4, 100),
("150_dpi", "lorem", no_modify, 3, 3, 1, 1, 150),
("blur_0.2", "lorem", modify(gaussian_blur_image, 0.2), 3, 3, 1, 1, 100),
("blur_0.2_dfp_2", "lorem", modify(gaussian_blur_image, 0.2), 3, 3, 2, 2, 100),
("rotate_0.2", "lorem", modify(rotate_image, 0.2), 3, 3, 4, 4, 100),
("shift_10", "lorem", modify(shift_image, 10), 3, 3, 1, 1, 100),
("offset_partial", "lorem", modify(offset_partial, 3.045, 2.981), 3, 3, 1, 1, 100),
("resize_2x", "lorem", modify(resize_image, 2, 2), 3, 3, 1, 1, 100),
("resize_2.5x", "lorem", modify(resize_image, 2.5, 2.5), 3, 3, 1, 1, 100),
("resize_3x", "lorem", modify(resize_image, 3, 3), 3, 3, 1, 1, 100),
("resize_2.5x_3x", "lorem", modify(resize_image, 2.5, 3), 3, 3, 1, 1, 100),
]
@ -88,7 +97,7 @@ params = [
@pytest.mark.parametrize(
"test_name,"
"text_index,"
"image_alteration,"
"modify,"
"page_height,"
"page_width,"
"dot_fill_pixels,"
@ -98,7 +107,7 @@ params = [
def test_e2e(tmpdir,
test_name,
text_index, # Use text index, not text, to avoid an extremely large PyTest test name
image_alteration,
modify,
color_depth,
page_height,
page_width,
@ -111,6 +120,10 @@ def test_e2e(tmpdir,
:param test_name Test name, to make it easier to associate pytest results with the test they belong to.
"""
in_file_name = "text.txt"
out_file_name = "out.txt"
metadata_file_name = "metadata.txt"
border_top = 0.2
border_bottom = border_left = border_right = 0.1
@ -143,7 +156,7 @@ def test_e2e(tmpdir,
True)
# Alterations
desired_wildcard = image_alterations[image_alteration](tmpdir)
desired_wildcard = modify(tmpdir)
# Decoding
out_file = tmpdir.join(out_file_name)

@ -1,7 +1,7 @@
from colorsafe.constants import MagicByte, DefaultThresholdWeight
from colorsafe import constants
from colorsafe.csdatastructures import ColorChannels
from colorsafe.csdatastructures import ColorChannels, Sector
from colorsafe.decoder.csdecoder import DotDecoder, DotByteDecoder, DotRowDecoder
from colorsafe.encoder.csencoder import DotEncoder, DotRowEncoder, DotByteEncoder
@ -167,6 +167,7 @@ def test_dotByte_decode():
dotByte = DotByteDecoder(c * 2, 1, DefaultThresholdWeight)
assert dotByte.bytesList == [0b11001100]
# DotRow
@ -247,3 +248,73 @@ def test_dotRow_decode():
]
dotRow = DotRowDecoder(c * 4, 1, 16, 0, 0.5, True)
assert dotRow.bytesList == [ord('S'), ord('e')]
# Color depth 2
c = [
ColorChannels(1.0, 0.0, 1.0),
ColorChannels(0.0, 1.0, 1.0),
ColorChannels(1.0, 1.0, 1.0),
ColorChannels(1.0, 0.0, 1.0),
ColorChannels(1.0, 0.0, 1.0),
ColorChannels(1.0, 1.0, 1.0),
ColorChannels(1.0, 1.0, 1.0),
ColorChannels(1.0, 1.0, 1.0),
ColorChannels(0.0, 1.0, 1.0),
ColorChannels(0.0, 1.0, 1.0),
ColorChannels(0.0, 1.0, 1.0),
ColorChannels(1.0, 1.0, 0.0),
ColorChannels(1.0, 0.0, 1.0),
ColorChannels(1.0, 1.0, 1.0),
ColorChannels(1.0, 1.0, 1.0),
ColorChannels(1.0, 1.0, 1.0)]
dotRow = DotRowDecoder(c, 2, 16, 0, 0.5, True)
assert dotRow.bytesList == [ord('W'), ord('L'), ord('Z'), ord('M')]
def test_sector_get_block_sizes_color_1():
color_depth = 1
dataRowCount, eccRowCount, rsBlockSizes, dataBlockSizes, eccBlockSizes = \
Sector.get_block_sizes(64, 64, color_depth, 0.2)
assert dataRowCount == 52
assert eccRowCount == 11
assert sum(rsBlockSizes) == (dataRowCount + eccRowCount) * 64 * 1 / 8
assert sum(dataBlockSizes) == dataRowCount * 64 * 1 / 8
assert sum(eccBlockSizes) == eccRowCount * 64 * 1 / 8
assert rsBlockSizes == [252, 252]
assert dataBlockSizes == [208, 208]
assert eccBlockSizes == [44, 44]
def test_sector_get_block_sizes_color_2():
color_depth = 2
dataRowCount, eccRowCount, rsBlockSizes, dataBlockSizes, eccBlockSizes = \
Sector.get_block_sizes(64, 64, color_depth, 0.2)
assert dataRowCount == 52
assert eccRowCount == 11
assert sum(rsBlockSizes) == (dataRowCount + eccRowCount) * 64 * color_depth / 8
assert sum(dataBlockSizes) == dataRowCount * 64 * color_depth / 8
assert sum(eccBlockSizes) == eccRowCount * 64 * color_depth / 8
assert rsBlockSizes == [255, 255, 249, 249]
assert dataBlockSizes == [210, 210, 206, 206]
assert eccBlockSizes == [45, 45, 43, 43]
def test_sector_get_block_sizes_color_3():
color_depth = 3
dataRowCount, eccRowCount, rsBlockSizes, dataBlockSizes, eccBlockSizes = \
Sector.get_block_sizes(64, 64, color_depth, 0.2)
assert dataRowCount == 52
assert eccRowCount == 11
assert sum(rsBlockSizes) == (dataRowCount + eccRowCount) * 64 * color_depth / 8
assert sum(dataBlockSizes) == dataRowCount * 64 * color_depth / 8
assert sum(eccBlockSizes) == eccRowCount * 64 * color_depth / 8
assert rsBlockSizes == [255, 255, 255, 255, 246, 246]
assert dataBlockSizes == [210, 210, 210, 210, 204, 204]
assert eccBlockSizes == [45, 45, 45, 45, 42, 42]

@ -1,3 +1,6 @@
import random
import string
import PIL
import glob
import os
@ -6,13 +9,16 @@ from PIL import Image, ImageFilter
from colorsafe.constants import MagicByte
in_file_name = "text.txt"
out_file_name = "out.txt"
metadata_file_name = "metadata.txt"
out_image_name_wildcard = "out_*.png"
altered_image_name_wildcard = "altered_*.png"
out_image_name_prefix = "out"
altered_image_name_prefix = "altered"
def get_random_string(n, seed=None):
random.seed(seed)
return ''.join(chr(random.randint(0, 2 ** 7 - 1)) for _ in xrange(n))
def get_random_alphanumeric_string(n, seed=None):
random.seed(seed)
return ''.join(random.choice(string.ascii_letters + string.digits) for _ in xrange(n))
texts = {"lorem":
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus sollicitudin tincidunt diam id gravida."
@ -23,18 +29,21 @@ texts = {"lorem":
"amet augue. Nam rhoncus leo non urna sodales, vitae elementum magna viverra. Aliquam aliquam eu neque vel"
"dictum. Nulla fermentum placerat elit. Vivamus non augue congue, maximus sem non, mollis nulla. Donec non"
"elit purus.",
"magic_bytes": chr(MagicByte) * 1000}
# TODO: Test a unicode string case
"magic_bytes": chr(MagicByte) * 1000,
"random_const_3000": get_random_alphanumeric_string(3000, 0),
"random_const_20000": get_random_alphanumeric_string(20000, 0)}
image_alterations = {"none": lambda tmpdir: out_image_name_wildcard,
"rotate0.1": lambda tmpdir: modify_image(tmpdir, rotate_image, 0.2),
"gaussian_blur0.2": lambda tmpdir: modify_image(tmpdir, gaussian_blur_image, 0.2),
"shift10": lambda tmpdir: modify_image(tmpdir, shift_image, 10),
"resize2.1x": lambda tmpdir: modify_image(tmpdir, resize_image, 2.1)
}
def modify(alter, *args):
return lambda tmpdir: modify_tmpdir(tmpdir, alter, *args)
def modify_image(tmpdir, alter_image_function, *alter_function_args):
def modify_tmpdir(tmpdir, alter, *args):
out_image_name_wildcard = "out_*.png"
altered_image_name_wildcard = "altered_*.png"
out_image_name_prefix = "out"
altered_image_name_prefix = "altered"
filenames = glob.glob(
os.path.join(
str(tmpdir),
@ -44,7 +53,7 @@ def modify_image(tmpdir, alter_image_function, *alter_function_args):
try:
img = Image.open(filename)
out = alter_image_function(img, alter_function_args)
out = alter(img, *args)
altered_file_name = filename.replace(out_image_name_prefix, altered_image_name_prefix)
out.convert(img.mode).save(altered_file_name)
@ -55,8 +64,12 @@ def modify_image(tmpdir, alter_image_function, *alter_function_args):
return altered_image_name_wildcard
def rotate_image(image, args):
angle = args[0]
def no_modify(tmpdir):
out_image_name_wildcard = "out_*.png"
return out_image_name_wildcard
def rotate_image(image, angle):
image2 = image.convert('RGBA')
@ -67,22 +80,28 @@ def rotate_image(image, args):
return Image.composite(rotated_image, rotated_image2, rotated_image)
def gaussian_blur_image(image, args):
radius = args[0]
def offset_partial(image, width_factor, height_factor):
# Half pixel offset in x and y
resize = image.resize((int(image.width * width_factor), int(image.height * height_factor)), PIL.Image.BICUBIC)
resize2 = PIL.ImageChops.offset(resize, 1, yoffset=1)
blend1 = PIL.ImageChops.blend(resize, resize2, 0.5)
blend2 = PIL.ImageChops.blend(resize, blend1, 0.5)
return blend2
def gaussian_blur_image(image, radius):
return image.filter(ImageFilter.GaussianBlur(radius=radius))
def shift_image(image, args):
yoffset = args[0]
def shift_image(image, yoffset):
return PIL.ImageChops.offset(image, 0, yoffset=yoffset)
def resize_image(image, args):
factor = args[0]
def resize_image(image, width_factor, height_factor):
out = image.resize((int(image.width * factor), int(image.height * factor)), PIL.Image.BICUBIC)
out2 = image.resize((int(out.width * (1 / factor)), int(out.height * (1 / factor))), PIL.Image.BICUBIC)
out = image.resize((int(image.width * width_factor), int(image.height * height_factor)), PIL.Image.BICUBIC)
return out2
return out

Loading…
Cancel
Save