kvisc/as/k-as.py

316 lines
7.6 KiB
Python
Executable File

#!/usr/bin/python3
# The OS/K Team licenses this file to you under the MIT license.
# See the LICENSE file in the project root for more information.
import sys
from array import array
from tempfile import TemporaryFile
if len(sys.argv) != 4:
print("Usage: {} (output file) (memory entry point) (source file)"
.format(sys.argv[0]))
sys.exit(1)
# TemporaryFile(mode='w+')
instrs = open("testfile.out.1", "w+")
b_data = open("testfile.out.2", "w+b")
b_text = open("testfile.out.3", "w+b")
lst_regs = open("regs.lst")
lst_instrs = open("instrs.lst")
fi = open(sys.argv[1])
out = open(sys.argv[3], "wb")
start_addr = int(sys.argv[2], base=0)
def leave():
fi.close()
out.close()
instrs.close()
b_data.close()
b_text.close()
lst_regs.close()
lst_instrs.close()
#-------------------------------------------------------------------------------
# Defines
pdefs = {
"tst1": 3,
"tst2": "5"
}
# registers
pregs = list()
# instructions
pinstrs = list()
# labels
plabels_text = dict()
plabels_data = dict()
# size of .data section
pdata = 0
# size of .text section
ptext = 0
# after parse() is done, pdata and ptext are never modified
# padding bytes between .text and .data
pdata_pad = 0
#-------------------------------------------------------------------------------
def name_valid(name):
for c in name.lower():
if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$%._+]='):
print("BAD '{}'".format(c))
return False
return True
#-------------------------------------------------------------------------------
def parse_lst_regs():
global pregs
for _, line in enumerate(lst_regs):
pregs.append(line.strip())
def parse_lst_instrs():
global pinstrs
for _, line in enumerate(lst_instrs):
pinstrs.append(line.strip())
#-------------------------------------------------------------------------------
def parse_preproc(line):
global pdata
tok = line.split(' ', 2)
for word in tok:
if not name_valid(word):
print("Invalid token in line: {}".format(line))
return
if tok[0] == 'def':
if len(tok) < 3:
print("Invalid format: {}".format(line))
else:
pdefs[tok[1]] = tok[2]
return
# .data
if len(tok) > 1 and tok[1] == '=':
if len(tok) < 3:
print("Invalid format: {}".format(line))
else:
plabels_data[tok[0]] = pdata
written = b_data.write(int(tok[2]).to_bytes(8, byteorder='little', signed=False))
assert(written == 8)
pdata += written
return
print("Unrecognized directive: {}".format(line))
#-------------------------------------------------------------------------------
def apply_pdefs(line):
tok = line.split(' ')
new = '' + tok[0]
if len(tok) > 3:
print("Too many tokens in line: {}".format(line))
return False, None
# 'mov rax, rax' => 'mov rax rax'
if len(tok) == 3 and tok[1][-1] == ',':
tok[1] = tok[1][:-1]
for word in tok[1:]:
new += ' '
if word[0] == '[':
assert(word[-1] == ']')
new += '['
word = word[1:-1]
need_bracket = True
else:
need_bracket = False
if word in pdefs:
word = pdefs[word]
if not name_valid(word):
print("Invalid token in line: {}".format(line))
return False, None
new += word
if need_bracket:
new += ']'
return True, new
def parse():
global ptext
for count, line in enumerate(fi):
line = line.rstrip()
if len(line) == 0:
continue
if line[0] == '#' or line[0] == ';':
continue
if line[0] == ' ' or line[0] == '\t':
line = line.lstrip()
if line[0] == '#' or line[0] == ';':
continue
ok, line = apply_pdefs(line)
if ok:
# instrs.write("{} ".format(ptext))
ptext += parse_instr(line)
instrs.write("\n")
continue
# Preprocessor or label?
if line[-1] == ':':
if name_valid(line[:-1]):
plabels_text[line[:-1]] = ptext
else:
print("Bad label name: {}".format(line[:-1]))
continue
# Preprocessor, .data, or invalid
parse_preproc(line)
#-------------------------------------------------------------------------------
def is_number(s):
try:
int(s, base=0)
except ValueError:
return False
return True
def parse_instr(line):
tok = line.split(' ')
instrs.write(tok[0])
if len(tok) == 1:
return 2 # instruction
size = 2
for word in line.split(' ')[1:]:
instrs.write(' ')
# [reg+off] or [imm64]
if word[0] == '[':
assert(word[-1] == ']')
word = word[1:-1]
if '+' in word:
# +2 for A_OFF, +2 for offset, +2 for register
size += 2 + 2 + 2
instrs.write("%off ")
assert(len(word) > 3)
assert(word[2] == '+' or word[3] == '+')
if word[2] == '+':
# r8, k0, etc
instrs.write("{} {}".format(word[3:], word[:2]))
else:
# rax, cr0, etc
instrs.write("{} {}".format(word[4:], word[:3]))
continue
else:
# +2 for A_MEM
size += 2
instrs.write("%mem ")
# FALLTHROUGH
# for now every immediate is 64-bit
if is_number(word):
# +2 for A_IMM64, +8 for immediate
size += 2 + 8
instrs.write("%imm64 ")
instrs.write(word)
continue
if word in pregs:
size += 2
instrs.write(word)
continue
# it's a label (so a 16-bit immediate for now)
# +2 for A_IMM16, +2 for immediate
size += 2 + 2
instrs.write("%imm16 ")
instrs.write(word)
return size
#-------------------------------------------------------------------------------
def gentext():
# offset in memory
off = start_addr
instrs.seek(0)
for _, line in enumerate(instrs):
tok = line.strip().split(' ')
for word in tok:
if word in pregs:
idx = pregs.index(word)
b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
continue
if word in pinstrs:
idx = pinstrs.index(word)
b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
continue
if word in plabels_text:
addr = plabels_text[word]
b_text.write(addr.to_bytes(2, byteorder='little', signed=False))
continue
if word in plabels_data:
addr = plabels_data[word] + ptext + pdata_pad
b_text.write(addr.to_bytes(2, byteorder='little', signed=False))
continue
if 0:
continue
if is_number(word):
continue
print("Assembly error, unknown token '{}' in line: {}".format(word, line))
leave()
sys.exit(1)
#-------------------------------------------------------------------------------
parse_lst_instrs()
parse_lst_regs()
parse()
gentext()
#genout()
leave()