Xit/xit.py

146 lines
5.5 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/python3
""" xit
Usage:
xit (<FILE>)... [--size <SIZE>] [--seed <SEED>] [--overwrite] [--help]
XOR(in files) == XOR(out files)
Use cases:
$ xit A ^ A1 A2 # splits the secret A into two chuncks A1 and A2
$ xit A1 A2 ^ A # gets the secret back, turns A1 and A2 into A
$ xit ^ R1 R2 R3 # generates random files R1, R2, and R3
The files at the LEFT of ^ are the INput files.
The files at the RIGHT of ^ are the OUTput files.
Special cases:
$ xit A1 A2 ^ B1 B2 # A1 xor A2 == B1 xor B2
$ xit C ^ B A-{3} # generates 4 files: A1, A-|1|, A-|2|, A-|3|
# So, C == B ^ A-|1| ^ A-|2| ^ A-|3|
$ xit C ^ B A-{2} B-{2} # generates 5 files:
# So, C == B ^ A-|1| ^ A-|2|
# AND: C == B ^ B-|1| ^ B-|2|
|x| means the sha1 digest, included in the file name, of the file itself.
Arguments:
size 'even': assumes all input files are of the same size [default]
'min': keeps the smallest size, truncate others if needed
'max': keeps the biggest size, pad others if needed
integer: sets all files at the same size, truncating or padding
FILE A bunch of files, first the input files then the output files.
Separated by ^. Without input files, it generates random files.
input files Enclosed in underscores (_) means a string as a key
Otherwise, means a readable file
output files With one {nb} means a multiple output, more than one is an error
Otherwise, means a writable file
Options:
--size, -s SIZE input size: first, min, max, even [default: even], last or a size
--seed, -e SEED Uses a seed to generate test randomness, a zero triggers the
Python's internal Random instead of urandom
--overwrite, -w Allows overwritting of existing output files
"""
import sys, re, random
from docopt import docopt
from pathlib import Path
from collections import Counter
import lib_xit
import commands
import utils
if __name__ != '__main__': sys.exit()
def manageFileErrors(inArgs, outFiles, outSha1, overwrite, size):
errors = {
'Input files not found':
[a for a in inArgs if not Path(a).is_file()],
'Output files already exist (see --overwrite)':
[] if overwrite else [a for a in outFiles if Path(a).is_file()],
'Duplicate input files':
[f for f,count in
Counter([str(Path(a).resolve()) for a in inArgs]).items()
if count>1],
'Duplicate output files':
[f for f,count in Counter(outFiles).items() if count>1],
'Files both input AND output':
list(set(inArgs) & set(outFiles)),
'Input files have different sizes (see --size)':
[] if size!='even'
else len(set((Path(a).lstat().st_size for a in inArgs if Path(a).is_file()))) > 1 and inArgs,
'Some input files have null size':
[f for f in inArgs if Path(f).is_file() and Path(f).lstat().st_size==0],
}
return errors
arguments = docopt(__doc__)
files, overwrite, size, seed = (
arguments[a]
for a in ('<FILE>', '--overwrite', '--size', '--seed')
)
if seed: # This makes predictable outputs for random generators! ONLY usefull for test purpose!
if int(seed): random.seed(seed) # A zero just forces the use of Python's random instead of urandom
lib_xit.useUrandom = False
try:
try:
inArgs, outArgs = utils.splitList(files, '^')
except ValueError:
raise ValueError('Exactly one ^ required!')
if not outArgs:
raise ValueError('There must be at least an output after ^')
inFileArgs, inStrArgs = [], []
for i in inArgs:
m = utils.StrKey(i)
if m:
inStrArgs.append(str(m))
else:
inFileArgs.append(i)
outFiles, outSha1Files = [], []
for o in outArgs:
s = utils.Sha1File(o)
if s:
outSha1Files.append(s)
else:
outFiles.append(o)
# Prevents output file being the same as the input file
if len(inArgs)==1 : # Only one input, file. If there are two, it can be a simple xor.
if not len(outFiles): # Without normal file, all {1} sha1 file will be the same as the input file
for o in outSha1Files:
if int(o) == 1:
raise OSError('The output file will have the same content as the input file!')
elif len(outFiles)==1 and not outSha1Files: # One input file and one output file
raise OSError('The output file will have the same content as the input file!')
sizesAvailable = ('first', 'even', 'min', 'max', 'last')
size = size.lower()
if size not in sizesAvailable:
try:
size = utils.evalMultiple(size)
except ValueError:
raise ValueError('size is not an int and not in: '+', '.join(sizesAvailable))
if size <= 0:
raise ValueError('size must be strictly positive!')
assert type(size) is int or size in sizesAvailable
if outSha1Files: overwrite = True # As sha1 is known after the file checks...
errors = manageFileErrors(inFileArgs, outFiles, outSha1Files, overwrite, size)
if any(errors.values()):
for error, files in errors.items():
if files:
sys.stderr.write(error+':\n')
for e in files: sys.stderr.write(' '+e+'\n')
commands.xorFiles(inStrArgs, inFileArgs, outFiles, outSha1Files, size)
except (OSError, ValueError, IndexError) as e:
sys.exit(e)