Xit/xit.py

#!/usr/bin/python3

""" xit

Usage:
  xit (<FILE>)... [--size <SIZE>] [--seed <SEED>] [--overwrite] [--help]

XOR(in files) == XOR(out files)

Use cases:
$ xit A ^ A1 A2     # splits the secret A into two chuncks A1 and A2
$ xit A1 A2 ^ A     # gets the secret back, turns A1 and A2 into A
$ xit ^ R1 R2 R3    # generates random files R1, R2, and R3

The files at the LEFT  of ^ are the INput  files.
The files at the RIGHT of ^ are the OUTput files.

Special cases:
$ xit A1 A2 ^ B1 B2  # A1 xor A2 == B1 xor B2
$ xit C ^ B A-{3}    # generates 4 files: A1, A-|1|, A-|2|, A-|3|
                     # So, C == B ^ A-|1| ^ A-|2| ^ A-|3|
$ xit C ^ B A-{2} B-{2}  # generates 5 files:
                         # So,  C == B ^ A-|1| ^ A-|2|
                         # AND: C == B ^ B-|1| ^ B-|2|

|x| means the sha1 digest, included in the file name, of the file itself.

Arguments:
    size          'even':  assumes all input files are of the same size [default]
                  'min':   keeps the smallest size, truncate others if needed
                  'max':   keeps the biggest size, pad others if needed
                  integer: sets all files at the same size, truncating or padding

    FILE          A bunch of files, first the input files then the output files.
                  Separated by ^. Without input files, it generates random files.

    input files   Enclosed in underscores (_) means a string as a key
                  Otherwise, means a readable file

    output files  With one {nb} means a multiple output, more than one is an error
                  Otherwise, means a writable file

Options:
    --size, -s SIZE  input size: first, min, max, even [default: even], last or a size
    --seed, -e SEED  Uses a seed to generate test randomness, a zero triggers the
                     Python's internal Random instead of urandom
    --overwrite, -w  Allows overwritting of existing output files
"""

import sys, re, random
from docopt import docopt
from pathlib import Path
from collections import Counter
import lib_xit
import commands
import utils

if __name__ != '__main__': sys.exit()

def manageFileErrors(inArgs, outFiles, outSha1, overwrite, size):
    errors = {
        'Input files not found':
            [a for a in inArgs if not Path(a).is_file()],
        'Output files already exist (see --overwrite)':
            [] if overwrite else [a for a in outFiles if Path(a).is_file()],
        'Duplicate input files':
            [f for f,count in
                Counter([str(Path(a).resolve()) for a in inArgs]).items()
                if count>1],
        'Duplicate output files':
            [f for f,count in Counter(outFiles).items() if count>1],
        'Files both input AND output':
            list(set(inArgs) & set(outFiles)),
        'Input files have different sizes (see --size)':
            [] if size!='even'
            else len(set((Path(a).lstat().st_size for a in inArgs if Path(a).is_file()))) > 1 and inArgs,
        'Some input files have null size':
            [f for f in inArgs if Path(f).is_file() and Path(f).lstat().st_size==0],
    }
    return errors

arguments = docopt(__doc__)
files, overwrite, size, seed = (
    arguments[a]
    for a in ('<FILE>', '--overwrite', '--size', '--seed')
)
if seed: # This makes predictable outputs for random generators! ONLY usefull for test purpose!
    if int(seed): random.seed(seed) # A zero just forces the use of Python's random instead of urandom
    lib_xit.useUrandom = False
try:
    try:
        inArgs, outArgs = utils.splitList(files, '^')
    except ValueError:
        raise ValueError('Exactly one ^ required!')
    if not outArgs:
        raise ValueError('There must be at least an output after ^')

    inFileArgs, inStrArgs = [], []
    for i in inArgs:
        m = utils.StrKey(i)
        if m:
            inStrArgs.append(str(m))
        else:
            inFileArgs.append(i)

    outFiles, outSha1Files = [], []
    for o in outArgs:
        s = utils.Sha1File(o)
        if s:
            outSha1Files.append(s)
        else:
            outFiles.append(o)

    # Prevents output file being the same as the input file
    if len(inArgs)==1 : # Only one input, file. If there are two, it can be a simple xor.
        if not len(outFiles): # Without normal file, all {1} sha1 file will be the same as the input file
            for o in outSha1Files:
                if int(o) == 1:
                    raise OSError('The output file will have the same content as the input file!')
        elif len(outFiles)==1 and not outSha1Files: # One input file and one output file
            raise OSError('The output file will have the same content as the input file!')

    sizesAvailable = ('first', 'even', 'min', 'max', 'last')
    size = size.lower()
    if size not in sizesAvailable:
        try:
            size = utils.evalMultiple(size)
        except ValueError:
            raise ValueError('size is not an int and not in: '+', '.join(sizesAvailable))
        if size <= 0:
            raise ValueError('size must be strictly positive!')

    assert type(size) is int or size in sizesAvailable
    if outSha1Files: overwrite = True # As sha1 is known after the file checks...
    errors = manageFileErrors(inFileArgs, outFiles, outSha1Files, overwrite, size)
    if any(errors.values()):
        for error, files in errors.items():
            if files:
                sys.stderr.write(error+':\n')
                for e in files: sys.stderr.write('  '+e+'\n')

    commands.xorFiles(inStrArgs, inFileArgs, outFiles, outSha1Files, size)
except (OSError, ValueError, IndexError) as e:
    sys.exit(e)