Christophe HENRY
3d71aa2e6f
It's not designed to be used for cryptographic stuff, unless you know what you're doing or you want to hide from your little sister. Xit processes files in the memory and is not designed to treat huge data.
146 lines
5.5 KiB
Python
146 lines
5.5 KiB
Python
#!/usr/bin/python3
|
||
|
||
""" xit
|
||
|
||
Usage:
|
||
xit (<FILE>)... [--size <SIZE>] [--seed <SEED>] [--overwrite] [--help]
|
||
|
||
XOR(in files) == XOR(out files)
|
||
|
||
Use cases:
|
||
$ xit A ^ A1 A2 # splits the secret A into two chuncks A1 and A2
|
||
$ xit A1 A2 ^ A # gets the secret back, turns A1 and A2 into A
|
||
$ xit ^ R1 R2 R3 # generates random files R1, R2, and R3
|
||
|
||
The files at the LEFT of ^ are the INput files.
|
||
The files at the RIGHT of ^ are the OUTput files.
|
||
|
||
Special cases:
|
||
$ xit A1 A2 ^ B1 B2 # A1 xor A2 == B1 xor B2
|
||
$ xit C ^ B A-{3} # generates 4 files: A1, A-|1|, A-|2|, A-|3|
|
||
# So, C == B ^ A-|1| ^ A-|2| ^ A-|3|
|
||
$ xit C ^ B A-{2} B-{2} # generates 5 files:
|
||
# So, C == B ^ A-|1| ^ A-|2|
|
||
# AND: C == B ^ B-|1| ^ B-|2|
|
||
|
||
|x| means the sha1 digest, included in the file name, of the file itself.
|
||
|
||
Arguments:
|
||
size 'even': assumes all input files are of the same size [default]
|
||
'min': keeps the smallest size, truncate others if needed
|
||
'max': keeps the biggest size, pad others if needed
|
||
integer: sets all files at the same size, truncating or padding
|
||
|
||
FILE A bunch of files, first the input files then the output files.
|
||
Separated by ^. Without input files, it generates random files.
|
||
|
||
input files Enclosed in underscores (_) means a string as a key
|
||
Otherwise, means a readable file
|
||
|
||
output files With one {nb} means a multiple output, more than one is an error
|
||
Otherwise, means a writable file
|
||
|
||
Options:
|
||
--size, -s SIZE input size: first, min, max, even [default: even], last or a size
|
||
--seed, -e SEED Uses a seed to generate test randomness, a zero triggers the
|
||
Python's internal Random instead of urandom
|
||
--overwrite, -w Allows overwritting of existing output files
|
||
"""
|
||
|
||
import sys, re, random
|
||
from docopt import docopt
|
||
from pathlib import Path
|
||
from collections import Counter
|
||
import lib_xit
|
||
import commands
|
||
import utils
|
||
|
||
if __name__ != '__main__': sys.exit()
|
||
|
||
def manageFileErrors(inArgs, outFiles, outSha1, overwrite, size):
|
||
errors = {
|
||
'Input files not found':
|
||
[a for a in inArgs if not Path(a).is_file()],
|
||
'Output files already exist (see --overwrite)':
|
||
[] if overwrite else [a for a in outFiles if Path(a).is_file()],
|
||
'Duplicate input files':
|
||
[f for f,count in
|
||
Counter([str(Path(a).resolve()) for a in inArgs]).items()
|
||
if count>1],
|
||
'Duplicate output files':
|
||
[f for f,count in Counter(outFiles).items() if count>1],
|
||
'Files both input AND output':
|
||
list(set(inArgs) & set(outFiles)),
|
||
'Input files have different sizes (see --size)':
|
||
[] if size!='even'
|
||
else len(set((Path(a).lstat().st_size for a in inArgs if Path(a).is_file()))) > 1 and inArgs,
|
||
'Some input files have null size':
|
||
[f for f in inArgs if Path(f).is_file() and Path(f).lstat().st_size==0],
|
||
}
|
||
return errors
|
||
|
||
arguments = docopt(__doc__)
|
||
files, overwrite, size, seed = (
|
||
arguments[a]
|
||
for a in ('<FILE>', '--overwrite', '--size', '--seed')
|
||
)
|
||
if seed: # This makes predictable outputs for random generators! ONLY usefull for test purpose!
|
||
if int(seed): random.seed(seed) # A zero just forces the use of Python's random instead of urandom
|
||
lib_xit.useUrandom = False
|
||
try:
|
||
try:
|
||
inArgs, outArgs = utils.splitList(files, '^')
|
||
except ValueError:
|
||
raise ValueError('Exactly one ^ required!')
|
||
if not outArgs:
|
||
raise ValueError('There must be at least an output after ^')
|
||
|
||
inFileArgs, inStrArgs = [], []
|
||
for i in inArgs:
|
||
m = utils.StrKey(i)
|
||
if m:
|
||
inStrArgs.append(str(m))
|
||
else:
|
||
inFileArgs.append(i)
|
||
|
||
outFiles, outSha1Files = [], []
|
||
for o in outArgs:
|
||
s = utils.Sha1File(o)
|
||
if s:
|
||
outSha1Files.append(s)
|
||
else:
|
||
outFiles.append(o)
|
||
|
||
# Prevents output file being the same as the input file
|
||
if len(inArgs)==1 : # Only one input, file. If there are two, it can be a simple xor.
|
||
if not len(outFiles): # Without normal file, all {1} sha1 file will be the same as the input file
|
||
for o in outSha1Files:
|
||
if int(o) == 1:
|
||
raise OSError('The output file will have the same content as the input file!')
|
||
elif len(outFiles)==1 and not outSha1Files: # One input file and one output file
|
||
raise OSError('The output file will have the same content as the input file!')
|
||
|
||
sizesAvailable = ('first', 'even', 'min', 'max', 'last')
|
||
size = size.lower()
|
||
if size not in sizesAvailable:
|
||
try:
|
||
size = utils.evalMultiple(size)
|
||
except ValueError:
|
||
raise ValueError('size is not an int and not in: '+', '.join(sizesAvailable))
|
||
if size <= 0:
|
||
raise ValueError('size must be strictly positive!')
|
||
|
||
assert type(size) is int or size in sizesAvailable
|
||
if outSha1Files: overwrite = True # As sha1 is known after the file checks...
|
||
errors = manageFileErrors(inFileArgs, outFiles, outSha1Files, overwrite, size)
|
||
if any(errors.values()):
|
||
for error, files in errors.items():
|
||
if files:
|
||
sys.stderr.write(error+':\n')
|
||
for e in files: sys.stderr.write(' '+e+'\n')
|
||
|
||
commands.xorFiles(inStrArgs, inFileArgs, outFiles, outSha1Files, size)
|
||
except (OSError, ValueError, IndexError) as e:
|
||
sys.exit(e)
|
||
|