1
0
mirror of https://gitlab.os-k.eu/os-k-team/kvisc.git synced 2023-08-25 14:05:46 +02:00
kvisc/as/k-as.py

529 lines
13 KiB
Python
Raw Normal View History

2019-05-16 19:59:14 +02:00
#!/usr/bin/python3
2019-05-16 20:09:20 +02:00
# The OS/K Team licenses this file to you under the MIT license.
# See the LICENSE file in the project root for more information.
2019-05-29 22:59:17 +02:00
import os
2019-05-16 21:42:23 +02:00
import sys
2019-05-22 18:39:46 +02:00
import subprocess
2019-05-19 19:54:29 +02:00
from array import array
from tempfile import TemporaryFile
2019-05-16 19:59:14 +02:00
2019-05-19 19:54:29 +02:00
if len(sys.argv) != 4:
print("Usage: {} (output file) (memory entry point) (source file)"
.format(sys.argv[0]))
sys.exit(1)
2019-05-16 21:42:23 +02:00
2019-05-30 12:19:38 +02:00
source = TemporaryFile(mode="w+")
instrs = TemporaryFile(mode="w+")
b_data = TemporaryFile(mode="w+b")
b_text = TemporaryFile(mode="w+b")
2019-05-19 19:54:29 +02:00
2019-05-29 22:59:17 +02:00
lst_regs = open(os.path.join(sys.path[0], "regs.lst"))
lst_instrs = open(os.path.join(sys.path[0], "instrs.lst"))
2019-05-19 19:54:29 +02:00
2019-05-30 12:19:38 +02:00
main_src = open(sys.argv[1])
2019-05-22 18:39:46 +02:00
b_out = open(sys.argv[3], "wb")
2019-05-19 19:54:29 +02:00
start_addr = int(sys.argv[2], base=0)
def leave():
2019-05-30 12:19:38 +02:00
source.close()
2019-05-19 19:54:29 +02:00
instrs.close()
2019-05-22 18:39:46 +02:00
b_out.close()
2019-05-19 19:54:29 +02:00
b_data.close()
b_text.close()
2019-05-30 12:19:38 +02:00
main_src.close()
2019-05-19 19:54:29 +02:00
lst_regs.close()
lst_instrs.close()
#-------------------------------------------------------------------------------
# Defines
2019-05-29 22:59:52 +02:00
pdefs = dict()
2019-05-19 19:54:29 +02:00
# registers
pregs = list()
# instructions
pinstrs = list()
# labels
plabels_text = dict()
plabels_data = dict()
# size of .data section
pdata = 0
# size of .text section
ptext = 0
2019-05-30 11:19:16 +02:00
# for local labels
plastlabel = ''
2019-05-19 19:54:29 +02:00
# after parse() is done, pdata and ptext are never modified
# padding bytes between .text and .data
pdata_pad = 0
#-------------------------------------------------------------------------------
def name_valid(name):
for c in name.lower():
2019-05-30 11:19:16 +02:00
if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$._+]=,'):
2019-05-19 19:54:29 +02:00
return False
return True
2019-05-30 11:19:16 +02:00
def is_number(s):
try:
int(s, base=0)
except ValueError:
return False
return True
2019-05-19 19:54:29 +02:00
#-------------------------------------------------------------------------------
def parse_lst_regs():
global pregs
for _, line in enumerate(lst_regs):
pregs.append(line.strip())
def parse_lst_instrs():
global pinstrs
for _, line in enumerate(lst_instrs):
pinstrs.append(line.strip())
#-------------------------------------------------------------------------------
2019-05-30 12:19:38 +02:00
inc_depth = 0
inc_depth_max = 16
# Quickly goes through source file and resolves "include" directives ONLY
def do_includes(fi):
global inc_depth
for _, line in enumerate(fi):
line = line.rstrip()
tok = line.split(' ', 1)
if len(tok) == 0:
continue
if tok[0] == "include":
if len(tok) == 1:
print("Missing parameter for include directive")
leave()
sys.exit(1)
if tok[1][0] not in "'\"" or tok[1][-1] != tok[1][0]:
print("Invalid format for include directive: {}".format(line))
leave()
sys.exit(1)
inc = tok[1][1:-1]
try:
new_fi = open(inc, "r")
except:
print("Couldn't open file: {}".format(line))
leave()
sys.exit(1)
inc_depth += 1
if inc_depth >= inc_depth_max:
print("Maximal include depth reached: {}".format(line))
leave()
sys.exit(1)
do_includes(new_fi)
else:
source.write("{}\n".format(line))
#-------------------------------------------------------------------------------
def parse():
global ptext
global plastlabel
source.seek(0)
for count, line in enumerate(source):
line = line.rstrip()
if len(line) == 0:
continue
2019-05-30 21:46:00 +02:00
quote = False
2019-05-30 12:19:38 +02:00
for i in range(len(line)):
2019-05-30 21:46:00 +02:00
if line[i] in "'\"":
quote = not quote
if line[i] in '#;@!/' and not quote:
2019-05-30 12:19:38 +02:00
line = line[:i].rstrip()
break
2019-05-30 21:46:00 +02:00
if quote:
print("Unterminated string in line: {}".format(line))
leave()
sys.exit(1)
2019-05-30 12:19:38 +02:00
if len(line) == 0:
continue
if line[0] == ' ' or line[0] == '\t':
line = line.lstrip()
ptext += parse_instr(line)
instrs.write("\n")
continue
# Preprocessor or label?
if line[-1] == ':':
if name_valid(line[:-1]):
label = line[:-1]
if label[0] == '.':
label = plastlabel + label
else:
plastlabel = label
plabels_text[label] = ptext
else:
print("Bad label name: {}".format(line[:-1]))
leave()
sys.exit(1)
continue
# Preprocessor, .data, or invalid
parse_preproc(line)
#-------------------------------------------------------------------------------
2019-05-19 19:54:29 +02:00
def parse_preproc(line):
global pdata
tok = line.split(' ', 2)
2019-05-29 16:57:22 +02:00
# preprocessor
if len(tok) > 1 and tok[1] == ':=':
2019-05-19 19:54:29 +02:00
if len(tok) < 3:
print("Invalid format: {}".format(line))
2019-05-29 16:57:22 +02:00
leave()
sys.exit(1)
pdefs[tok[0]] = tok[2]
2019-05-19 19:54:29 +02:00
return
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
# .data
if len(tok) > 1 and tok[1] == '=':
if len(tok) < 3:
print("Invalid format: {}".format(line))
2019-05-29 16:57:22 +02:00
leave()
sys.exit(1)
2019-05-30 12:37:22 +02:00
label = tok[0]
if label[0] == '.':
label = plastlabel + label
plabels_data[label] = pdata
# number data
2019-05-29 16:57:22 +02:00
if is_number(tok[2]):
written = b_data.write(int(tok[2], base=0).to_bytes(8, byteorder='little', signed=False))
2019-05-19 19:54:29 +02:00
assert(written == 8)
pdata += written
2019-05-29 16:57:22 +02:00
2019-05-30 12:37:22 +02:00
# string data
2019-05-29 22:27:49 +02:00
elif tok[2][0] in "'\"":
2019-05-29 16:57:22 +02:00
s = tok[2].strip()
2019-05-29 22:27:49 +02:00
assert(s[-1] == tok[2][0])
2019-05-30 12:19:38 +02:00
2019-05-29 16:57:22 +02:00
s = s[1:-1]
2019-05-30 11:32:00 +02:00
real_len = 0
escaping = False
2019-05-29 16:57:22 +02:00
for c in s:
2019-05-30 11:32:00 +02:00
# escape sequences
if c == '\\':
escaping = True
continue
if escaping:
escaping = False
2019-05-30 12:19:38 +02:00
2019-05-30 11:32:00 +02:00
if c == 'n':
c = '\n'
elif c == 't':
c = '\t'
else:
print("Unrecognized escape sequence: {}".format(line))
leave()
sys.exit(1)
2019-05-29 16:57:22 +02:00
written = b_data.write(ord(c).to_bytes(1, byteorder='little', signed=False))
assert(written == 1)
2019-05-30 11:32:00 +02:00
real_len += 1
2019-05-29 16:57:22 +02:00
pdata += 1
2019-05-30 12:19:38 +02:00
2019-05-29 16:57:22 +02:00
# align
for i in range(8 - len(s) % 8):
written = b_data.write(int(0).to_bytes(1, byteorder='little', signed=False))
assert(written == 1)
pdata += 1
2019-05-30 11:32:00 +02:00
2019-05-30 18:31:50 +02:00
pdefs[label + "_len"] = str(real_len)
2019-05-30 11:32:00 +02:00
2019-05-29 16:57:22 +02:00
else:
print("Invalid format: {}".format(line))
leave()
sys.exit(1)
2019-05-19 19:54:29 +02:00
return
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
print("Unrecognized directive: {}".format(line))
#-------------------------------------------------------------------------------
2019-05-30 11:19:16 +02:00
def parse_instr(line):
if line == None or len(line) == 0:
return 0
2019-05-19 19:54:29 +02:00
2019-05-30 11:19:16 +02:00
tok = line.split(' ', 1)
2019-05-30 12:19:38 +02:00
2019-05-30 11:19:16 +02:00
instr = tok[0].strip()
2019-05-19 19:54:29 +02:00
2019-05-30 11:19:16 +02:00
if len(tok) > 1:
params = tok[1].strip()
else:
params = None
2019-05-19 19:54:29 +02:00
2019-05-22 18:39:46 +02:00
fellthrough = False
2019-05-29 16:57:22 +02:00
size = 2
2019-05-30 11:19:16 +02:00
if instr == "rep":
if params == None:
print("Missing instruction after rep prefix: {}".format(line))
leave()
sys.exit(1)
2019-05-29 16:57:22 +02:00
instrs.write("%rep ")
2019-05-30 11:19:16 +02:00
instr, params = params.split(' ', 1)
2019-05-29 16:57:22 +02:00
size += 2
2019-05-30 12:19:38 +02:00
2019-05-30 11:19:16 +02:00
instr_name = instr
2019-05-22 18:39:46 +02:00
instr_args = ''
2019-05-19 19:54:29 +02:00
2019-05-30 11:19:16 +02:00
if params == None or len(params) == 0:
2019-05-22 18:39:46 +02:00
instrs.write("{}".format(instr_name))
2019-05-19 19:54:29 +02:00
return 2 # instruction
2019-05-30 11:19:16 +02:00
tok = params.split(',')
2019-05-29 16:57:22 +02:00
for word in tok:
2019-05-30 11:19:16 +02:00
word = word.strip()
2019-05-22 18:39:46 +02:00
instr_args += ' '
pref = None
# memory length prefixes
2019-05-30 11:19:16 +02:00
if len(word) > 2 and '[' in word:
2019-05-22 18:39:46 +02:00
if word[0] == 'b':
pref = "%b"
elif word[0] == 'w':
pref = "%w"
2019-05-29 16:57:22 +02:00
elif word[0] == 'l' or word[0] == 'd':
2019-05-22 18:39:46 +02:00
pref = "%l"
elif word[0] == 'q':
pref = "%q"
else:
2019-05-30 12:21:17 +02:00
print("Bad memory length prefix: {}".format(line))
2019-05-22 18:39:46 +02:00
leave()
sys.exit(1)
2019-05-30 11:19:16 +02:00
word = word[1:].strip()
assert(word[0] == '[')
2019-05-19 19:54:29 +02:00
# [reg+off] or [imm64]
if word[0] == '[':
assert(word[-1] == ']')
word = word[1:-1]
2019-05-22 18:39:46 +02:00
if pref == None:
2019-05-29 18:26:28 +02:00
print("Missing access length modifier: {}".format(line))
leave()
sys.exit(1)
2019-05-22 18:39:46 +02:00
instr_name += "_m"
instr_args += "{}".format(pref)
2019-05-19 19:54:29 +02:00
if '+' in word:
# +2 for A_OFF, +2 for offset, +2 for register
size += 2 + 2 + 2
2019-05-22 18:39:46 +02:00
instr_args += "off "
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
assert(len(word) > 3)
2019-05-30 12:19:38 +02:00
2019-05-30 11:19:16 +02:00
reg, off = word.split('+', 1)
2019-05-29 16:57:22 +02:00
reg = reg.strip()
off = off.strip()
2019-05-30 12:19:38 +02:00
2019-05-29 16:57:22 +02:00
instr_args += "{} {}".format(off, reg)
2019-05-19 19:54:29 +02:00
continue
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
else:
# +2 for A_MEM
size += 2
2019-05-22 18:39:46 +02:00
instr_args += "mem "
2019-05-19 19:54:29 +02:00
2019-05-22 18:39:46 +02:00
fellthrough = True
2019-05-19 19:54:29 +02:00
# FALLTHROUGH
# for now every immediate is 64-bit
if is_number(word):
# +2 for A_IMM64, +8 for immediate
size += 2 + 8
2019-05-22 18:39:46 +02:00
if not fellthrough:
instr_name += "_i"
instr_args += "%imm64 "
instr_args += word
fellthrough = False
2019-05-19 19:54:29 +02:00
continue
2019-05-30 11:19:16 +02:00
# preprocessor
if word in pdefs:
word = pdefs[word]
# Fall through
# register
2019-05-19 19:54:29 +02:00
if word in pregs:
size += 2
2019-05-22 18:39:46 +02:00
if not fellthrough:
instr_name += "_r"
instr_args += word
fellthrough = False
2019-05-19 19:54:29 +02:00
continue
2019-05-30 12:19:38 +02:00
2019-05-22 18:39:46 +02:00
# it's a label (a 64-bit immediate)
2019-05-29 16:57:22 +02:00
# +2 for A_IMM64, +8 for immediate
2019-05-22 18:39:46 +02:00
size += 2 + 8
if not fellthrough:
instr_name += "_i"
instr_args += "%imm64 "
2019-05-30 12:19:38 +02:00
2019-05-30 11:19:16 +02:00
if word[0] == '.':
instr_args += plastlabel
2019-05-30 12:19:38 +02:00
2019-05-22 18:39:46 +02:00
instr_args += word
fellthrough = False
instrs.write("{}{}".format(instr_name, instr_args))
2019-05-19 19:54:29 +02:00
return size
#-------------------------------------------------------------------------------
2019-05-22 18:39:46 +02:00
special_syms = {
"%bmem": 0x7001,
"%wmem": 0x7002,
"%lmem": 0x7004,
"%qmem": 0x7008,
2019-05-30 12:19:38 +02:00
2019-05-22 18:39:46 +02:00
"%boff": 0x7701,
"%woff": 0x7702,
"%loff": 0x7704,
"%qoff": 0x7708,
2019-05-30 12:19:38 +02:00
2019-05-22 18:39:46 +02:00
"%imm16": 0x7772,
"%imm32": 0x7774,
"%imm64": 0x7778,
2019-05-30 12:19:38 +02:00
2019-05-29 16:57:22 +02:00
"%rep": 0x8000
2019-05-22 18:39:46 +02:00
}
def gentext():
2019-05-19 19:54:29 +02:00
instrs.seek(0)
2019-05-30 11:19:16 +02:00
text_start = 0x100000
2019-05-29 16:57:22 +02:00
data_start = text_start + ptext
data_start += (8 - data_start % 8)
2019-05-19 19:54:29 +02:00
for _, line in enumerate(instrs):
tok = line.strip().split(' ')
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
for word in tok:
if word in pregs:
idx = pregs.index(word)
b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
continue
if word in pinstrs:
idx = pinstrs.index(word)
b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
continue
if word in plabels_text:
2019-05-22 18:39:46 +02:00
addr = text_start + plabels_text[word]
2019-05-29 16:57:22 +02:00
b_text.write(addr.to_bytes(8, byteorder='little', signed=False))
2019-05-19 19:54:29 +02:00
continue
if word in plabels_data:
2019-05-29 16:57:22 +02:00
addr = data_start + plabels_data[word] + pdata_pad
2019-05-22 18:39:46 +02:00
b_text.write(addr.to_bytes(8, byteorder='little', signed=False))
2019-05-19 19:54:29 +02:00
continue
2019-05-22 18:39:46 +02:00
if word in special_syms:
if word == "%imm16":
lastimm = 2
elif word == "%imm32":
lastimm = 4
elif word == "%imm64":
lastimm = 8
2019-05-30 12:19:38 +02:00
2019-05-30 11:41:00 +02:00
if word[2:] == "off" and word[0] == '%':
2019-05-29 16:57:22 +02:00
lastimm = 2
2019-05-30 11:41:00 +02:00
isSigned = True
else:
isSigned = False
b_text.write(special_syms[word].to_bytes(2, byteorder='little', signed=isSigned))
2019-05-19 19:54:29 +02:00
continue
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
if is_number(word):
2019-05-30 11:41:00 +02:00
if word[0] == '-':
isSigned = True
else:
isSigned = False
b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=isSigned))
2019-05-19 19:54:29 +02:00
continue
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
print("Assembly error, unknown token '{}' in line: {}".format(word, line))
leave()
sys.exit(1)
#-------------------------------------------------------------------------------
2019-05-22 18:39:46 +02:00
def genout():
b_text.seek(0)
b_data.seek(0)
b_out.write(b_text.read())
2019-05-29 16:57:22 +02:00
data_align = (8 - ptext % 8)
2019-05-30 12:19:38 +02:00
2019-05-29 16:57:22 +02:00
for i in range(data_align):
b_out.write(int(0).to_bytes(1, byteorder='little', signed=False))
2019-05-22 18:39:46 +02:00
b_out.write(b_data.read())
#-------------------------------------------------------------------------------
2019-05-19 19:54:29 +02:00
parse_lst_instrs()
parse_lst_regs()
2019-05-30 12:19:38 +02:00
do_includes(main_src)
2019-05-19 19:54:29 +02:00
parse()
gentext()
2019-05-22 18:39:46 +02:00
genout()
2019-05-19 19:54:29 +02:00
leave()
2019-05-30 18:31:50 +02:00
sys.exit(0)