1
0
mirror of https://gitlab.os-k.eu/os-k-team/kvisc.git synced 2023-08-25 14:05:46 +02:00
kvisc/as/k-as.py
2019-06-16 12:17:31 +02:00

779 lines
19 KiB
Python
Executable File

#!/usr/bin/python3
# The OS/K Team licenses this file to you under the MIT license.
# See the LICENSE file in the project root for more information.
import os
import sys
import subprocess
from array import array
from tempfile import TemporaryFile
WANT_DISASM = False
if len(sys.argv) != 4:
print("Usage: {} (output file) (memory entry point) (source file)"
.format(sys.argv[0]))
sys.exit(1)
source = TemporaryFile(mode="w+")
instrs = TemporaryFile(mode="w+")
b_data = TemporaryFile(mode="w+b")
b_text = TemporaryFile(mode="w+b")
lst_regs = open(os.path.join(sys.path[0], "regs.lst"))
lst_instrs = open(os.path.join(sys.path[0], "instrs.lst"))
main_src = open(sys.argv[1])
b_out = open(sys.argv[3], "wb")
start_addr = int(sys.argv[2], base=0)
def leave():
source.close()
instrs.close()
b_out.close()
b_data.close()
b_text.close()
main_src.close()
lst_regs.close()
lst_instrs.close()
#-------------------------------------------------------------------------------
# Defines
pdefs = dict()
# registers
pregs = list()
# instructions
pinstrs = list()
# labels
plabels_text = dict()
plabels_data = dict()
# size of .data section
pdata = 0
# size of .text section
ptext = 0
# for local labels
plastlabel = ''
# after parse() is done, pdata and ptext are never modified
# padding bytes between .text and .data
pdata_pad = 0
#-------------------------------------------------------------------------------
def name_valid(name):
for c in name.lower():
if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$._+]=,'):
return False
return True
def is_number(s):
try:
int(s, base=0)
except ValueError:
return False
return True
#-------------------------------------------------------------------------------
def parse_lst_regs():
global pregs
for _, line in enumerate(lst_regs):
pregs.append(line.strip())
def parse_lst_instrs():
global pinstrs
for _, line in enumerate(lst_instrs):
pinstrs.append(line.strip())
#-------------------------------------------------------------------------------
inc_depth = 0
inc_depth_max = 16
# Quickly goes through source file and resolves "include" directives ONLY
def do_includes(fi):
global inc_depth
for _, line in enumerate(fi):
line = line.rstrip()
tok = line.split(' ', 1)
if len(tok) == 0:
continue
if tok[0] == "include":
if len(tok) == 1:
print("Missing parameter for include directive")
leave()
sys.exit(1)
if tok[1][0] not in "'\"" or tok[1][-1] != tok[1][0]:
print("Invalid format for include directive: {}".format(line))
leave()
sys.exit(1)
inc = tok[1][1:-1]
try:
new_fi = open(inc, "r")
except:
print("Couldn't open file: {}".format(line))
leave()
sys.exit(1)
inc_depth += 1
if inc_depth >= inc_depth_max:
print("Maximal include depth reached: {}".format(line))
leave()
sys.exit(1)
do_includes(new_fi)
else:
source.write("{}\n".format(line))
#-------------------------------------------------------------------------------
def parse():
global ptext
global plastlabel
source.seek(0)
for count, line in enumerate(source):
line = line.rstrip()
if len(line) == 0:
continue
quote = False
for i in range(len(line)):
if line[i] in "'\"":
quote = not quote
if line[i] in '#;@!/' and not quote:
line = line[:i].rstrip()
break
if quote:
print("Unterminated string in line: {}".format(line))
leave()
sys.exit(1)
if len(line) == 0:
continue
if line[0] == ' ' or line[0] == '\t':
line = line.lstrip()
ptext += parse_instr(line)
instrs.write("\n")
continue
# Preprocessor or label?
if line[-1] == ':':
if name_valid(line[:-1]):
label = line[:-1]
if label[0] == '.':
label = plastlabel + label
else:
plastlabel = label
plabels_text[label] = ptext
else:
print("Bad label name: {}".format(line[:-1]))
leave()
sys.exit(1)
continue
# Preprocessor, .data, or invalid
parse_preproc(line)
#-------------------------------------------------------------------------------
def parse_preproc(line):
global pdata
tok = line.split(None, 2)
# preprocessor
if len(tok) > 1 and tok[1] == ':=':
if len(tok) < 3:
print("Invalid format: {}".format(line))
leave()
sys.exit(1)
pdefs[tok[0]] = tok[2]
return
# .data
if len(tok) > 1 and tok[1] == '=':
if len(tok) < 3:
print("Invalid format: {}".format(line))
leave()
sys.exit(1)
label = tok[0]
if label[0] == '.':
label = plastlabel + label
plabels_data[label] = pdata
# number data
if is_number(tok[2]):
written = b_data.write(int(tok[2], base=0).to_bytes(8, byteorder='little', signed=False))
assert(written == 8)
pdata += written
# buffer / bss
elif tok[2][0] == '[':
assert(tok[2][-1] == ']')
s = tok[2][1:-1].strip()
if not is_number(s):
print("Invalid bss format: {}".format(line))
leave()
sys.exit(1)
i = int(s, base=0)
i = i + (8 - i % 8)
written = b_data.write(bytearray(i))
assert(written == i)
pdefs[label + "_len"] = s
pdata += written
# string data
elif tok[2][0] in "'\"":
s = tok[2].strip()
assert(s[-1] == tok[2][0])
s = s[1:-1]
real_len = 0
escaping = False
for c in s:
# escape sequences
if c == '\\':
escaping = True
continue
if escaping:
escaping = False
if c == 'n':
c = '\n'
elif c == 't':
c = '\t'
else:
print("Unrecognized escape sequence: {}".format(line))
leave()
sys.exit(1)
written = b_data.write(ord(c).to_bytes(1, byteorder='little', signed=False))
assert(written == 1)
real_len += 1
pdata += 1
# align
for i in range(8 - len(s) % 8):
written = b_data.write(int(0).to_bytes(1, byteorder='little', signed=False))
assert(written == 1)
pdata += 1
pdefs[label + "_len"] = str(real_len)
else:
print("Invalid format: {}".format(line))
leave()
sys.exit(1)
return
print("Unrecognized directive: {}".format(line))
#-------------------------------------------------------------------------------
pconds = {
'c': 0b00001,
'o': 0b00010,
'z': 0b00011,
'e': 0b00011,
's': 0b00100,
'p': 0b00101,
'a': 0b00110,
'ae': 0b00111,
'b': 0b01000,
'be': 0b01001,
'g': 0b01010,
'ge': 0b01011,
'l': 0b01100,
'le': 0b01101,
'cxz': 0b01110,
'cxnz': 0b11110,
}
def get_cond_mask(cond, line):
mask = 0
if cond[0] == 'n':
cond = cond[1:]
mask = 0b10000
if cond not in pconds:
print("Invalid condition suffix: {}".format(line))
leave()
sys.exit(1)
return (mask | pconds[cond]) << 10
#-------------------------------------------------------------------------------
pfts = {
"reg": 0b00001,
"imm64": 0b00010,
"bimm64": 0b00100,
"brr": 0b00101,
"brri": 0b00110,
"brrii": 0b00111,
"wimm64": 0b01000,
"wrr": 0b01001,
"wrri": 0b01010,
"wrrii": 0b01011,
"limm64": 0b01100,
"lrr": 0b01101,
"lrri": 0b01110,
"lrrii": 0b01111,
"qimm64": 0b10000,
"qrr": 0b10001,
"qrri": 0b10010,
"qrrii": 0b10011,
}
def get_fts_mask(ft, line):
if ft not in pfts:
print("Invalid operand format ({}): {}".format(ft, line))
else:
return pfts[ft]
#-------------------------------------------------------------------------------
def parse_instr(line):
if line == None or len(line) == 0:
return 0
tok = line.split(' ', 1)
instr = tok[0].strip()
if len(tok) > 1:
params = tok[1].strip()
else:
params = None
fellthrough = False
size = 4
# Word 2 (rep|cond|ft1|ft2)
w2 = 0
if '.' in instr:
w2 |= get_cond_mask(instr.split('.', 1)[1], line)
instr = instr.split('.', 1)[0]
else:
instr = instr
if instr == "rep":
if params == None:
print("Missing instruction after rep prefix: {}".format(line))
leave()
sys.exit(1)
w2 |= 0x8000 # 16th bit
if len(params.split(' ', 1)) == 2:
instr, params = params.split(' ', 1)
else:
instr = params.split(' ', 1)[0]
params = None
instr_name = instr
instr_args = ''
if params == None or len(params) == 0:
instrs.write("{} ".format(instr_name))
instrs.write("%%imm16 {}".format(w2))
return size
tok = params.split(',')
# FT1 and FT2
fts = ''
#
# Parse operands, generating fts along the way
#
for word in tok:
word = word.strip()
instr_args += ' '
gotPref = False
if len(fts) != 0:
fts += ' '
# memory length prefixes
if len(word) > 2 and '[' in word:
if word[0] in 'bwlq':
fts += word[0]
gotPref = True
else:
print("Bad memory length prefix: {}".format(line))
leave()
sys.exit(1)
word = word[1:].strip()
assert(word[0] == '[')
#
# Determine memory format and save it into fts
#
if word[0] == '[':
assert(word[-1] == ']')
word = word[1:-1]
#
# Make sure we got an access length prefix
#
if not gotPref:
print("Missing access length modifier: {}".format(line))
leave()
sys.exit(1)
instr_name += "_m"
#
# Offsets
#
if '+' in word:
reg1 = "inv"
reg2 = "inv"
imm1 = '1'
imm2 = '0'
wtok = word.split('+')
#
# [reg] and [reg*imm16]
#
if len(wtok) == 1:
if '*' in wtok[0]:
assert(len(wtok[0].split('*')) == 2)
reg2, imm1 = wtok[0].split('*', 1)
else:
reg1 = wtok[0]
#
# [reg+reg], [reg+imm16], [reg*imm16+imm16], [reg+reg*imm16]
#
elif len(wtok) == 2:
# Must be [reg*imm16+imm16]
if '*' in wtok[0]:
assert(len(wtok[0].split('*')) == 2)
assert(is_number(wtok[1].strip()))
print(wtok)
reg2, imm1 = wtok[0].split('*', 1)
imm2 = wtok[1]
# Must be [reg+reg*imm16]
elif '*' in wtok[1]:
assert(len(wtok[1].split('*')) == 2)
reg1 = wtok[0]
reg2, imm1 = wtok[1].split('*', 1)
elif is_number(wtok[1].strip()):
reg1 = wtok[0]
imm2 = wtok[1]
# Must be [reg+reg]
else:
reg1 = wtok[0]
reg2 = wtok[1]
#
# [reg+reg+imm16], [reg+reg*imm16+imm16]
#
else:
assert(len(wtok) == 3)
reg1 = wtok[0]
imm2 = wtok[2]
if '*' in wtok[1]:
assert(len(wtok[1].split('*')) == 2)
reg2, imm1 = wtok[1].split('*', 1)
else:
reg2 = wtok[1]
#
# Update fts and instr_args
#
instr_args += "{}:{} ".format(reg2.strip(), reg1.strip())
size += 2
if imm1 == '1':
if imm2 == '0':
fts += 'rr'
else:
fts += 'rri'
size += 2
instr_args += "%%imm16 {}".format(imm2)
else:
size += 4
fts += 'rrii'
instr_args += "%%imm16 {} %%imm16 {}".format(imm1, imm2)
continue
"""
# +2 for A_OFF, +2 for offset, +2 for regoff, +2 for register
size += 2 + 2 + 2 + 2
instr_args += "off "
assert(len(word) > 3)
regoff = "inv"
# [reg+off] or [reg+regoff]
if len(word.split('+')) == 2:
reg, off = word.split('+', 1)
if not is_number(off):
regoff = off
off = '0'
# [reg+regoff+off]
else:
assert(len(word.split('+')) == 3)
reg, regoff, off = word.split('+', 2)
off = off.strip()
reg = reg.strip()
regoff = regoff.strip()
instr_args += "{} {} {}".format(off, regoff, reg)
continue
"""
#
# [imm64] or [reg]
#
else:
fellthrough = True
# FALLTHROUGH
# preprocessor
if word in pdefs:
word = pdefs[word]
# Fall through
# for now every immediate is 64-bit
if is_number(word):
# +8 for immediate
size += 8
if not fellthrough:
instr_name += "_i"
fts += "imm64"
instr_args += "%%imm64 "
instr_args += word
fellthrough = False
continue
# register
if word in pregs:
size += 2
if not fellthrough:
instr_name += "_r"
fts += "reg"
else:
fts += "rr"
instr_args += word
fellthrough = False
continue
# it's a label (a 64-bit immediate)
# +8 for immediate
size += 8
if not fellthrough:
instr_name += "_i"
fts += "imm64"
instr_args += "%%imm64 "
if word[0] == '.':
instr_args += plastlabel
instr_args += word
fellthrough = False
#
# Compute FT1 and FT2
#
if ' ' in fts:
assert(len(fts.split(' ')) == 2)
ft1, ft2 = fts.split(' ')
w2 |= get_fts_mask(ft1, line) << 5
w2 |= get_fts_mask(ft2, line)
else:
assert(len(fts) > 0)
w2 |= get_fts_mask(fts, line) << 5
instrs.write("{} %%imm16 {}{}".format(instr_name, w2, instr_args))
return size
#-------------------------------------------------------------------------------
special_syms = {
"%%imm16",
"%%imm32",
"%%imm64",
"%%signed"
}
def gentext():
instrs.seek(0)
if WANT_DISASM:
int(instrs.read())
instrs.seek(0)
text_start = 0x100000
data_start = text_start + ptext
data_start += (8 - data_start % 8)
for _, line in enumerate(instrs):
tok = line.strip().split(' ')
for word in tok:
if len(word) == 0:
continue
if word in pregs:
idx = pregs.index(word)
b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
continue
if ':' in word:
reg2, reg1 = word.split(':', 1)
idx1 = pregs.index(reg1)
idx2 = pregs.index(reg2)
b_text.write(((idx1 << 8) | idx2).to_bytes(2, byteorder='little', signed=False))
continue
if word in pinstrs:
idx = pinstrs.index(word)
b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
continue
if word in plabels_text:
addr = text_start + plabels_text[word]
b_text.write(addr.to_bytes(8, byteorder='little', signed=False))
continue
if word in plabels_data:
addr = data_start + plabels_data[word] + pdata_pad
b_text.write(addr.to_bytes(8, byteorder='little', signed=False))
continue
if word in special_syms:
if word == "%%imm16":
lastimm = 2
elif word == "%%imm32":
lastimm = 4
elif word == "%%imm64":
lastimm = 8
elif word == "%%signed":
lastimm = 2
isSigned = True
else:
isSigned = False
continue
if is_number(word):
if word[0] == '-':
isSigned = True
else:
isSigned = False
b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=isSigned))
continue
print("Assembly error, unknown token '{}' in line: {}".format(word, line))
leave()
sys.exit(1)
#-------------------------------------------------------------------------------
def genout():
b_text.seek(0)
b_data.seek(0)
b_out.write(b_text.read())
data_align = (8 - ptext % 8)
for i in range(data_align):
b_out.write(int(0).to_bytes(1, byteorder='little', signed=False))
b_out.write(b_data.read())
#-------------------------------------------------------------------------------
parse_lst_instrs()
parse_lst_regs()
do_includes(main_src)
parse()
gentext()
genout()
leave()
sys.exit(0)