2019-05-16 19:59:14 +02:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
2019-05-16 20:09:20 +02:00
|
|
|
# The OS/K Team licenses this file to you under the MIT license.
|
|
|
|
# See the LICENSE file in the project root for more information.
|
|
|
|
|
2019-05-29 22:59:17 +02:00
|
|
|
import os
|
2019-05-16 21:42:23 +02:00
|
|
|
import sys
|
2019-05-22 18:39:46 +02:00
|
|
|
import subprocess
|
2019-05-19 19:54:29 +02:00
|
|
|
from array import array
|
|
|
|
from tempfile import TemporaryFile
|
2019-05-16 19:59:14 +02:00
|
|
|
|
2019-05-19 19:54:29 +02:00
|
|
|
if len(sys.argv) != 4:
|
|
|
|
print("Usage: {} (output file) (memory entry point) (source file)"
|
|
|
|
.format(sys.argv[0]))
|
|
|
|
sys.exit(1)
|
2019-05-16 21:42:23 +02:00
|
|
|
|
2019-05-29 19:00:13 +02:00
|
|
|
instrs = open(".{}.instr".format(sys.argv[3]), "w+")
|
|
|
|
b_data = open(".{}.data".format(sys.argv[3]), "w+b")
|
|
|
|
b_text = open(".{}.text".format(sys.argv[3]), "w+b")
|
2019-05-19 19:54:29 +02:00
|
|
|
|
2019-05-29 22:59:17 +02:00
|
|
|
lst_regs = open(os.path.join(sys.path[0], "regs.lst"))
|
|
|
|
lst_instrs = open(os.path.join(sys.path[0], "instrs.lst"))
|
2019-05-19 19:54:29 +02:00
|
|
|
|
|
|
|
fi = open(sys.argv[1])
|
2019-05-22 18:39:46 +02:00
|
|
|
b_out = open(sys.argv[3], "wb")
|
2019-05-19 19:54:29 +02:00
|
|
|
|
|
|
|
start_addr = int(sys.argv[2], base=0)
|
|
|
|
|
|
|
|
def leave():
|
2019-05-16 21:42:23 +02:00
|
|
|
fi.close()
|
2019-05-19 19:54:29 +02:00
|
|
|
instrs.close()
|
2019-05-22 18:39:46 +02:00
|
|
|
b_out.close()
|
2019-05-19 19:54:29 +02:00
|
|
|
b_data.close()
|
|
|
|
b_text.close()
|
|
|
|
lst_regs.close()
|
|
|
|
lst_instrs.close()
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
# Defines
|
2019-05-29 22:59:52 +02:00
|
|
|
pdefs = dict()
|
2019-05-19 19:54:29 +02:00
|
|
|
|
|
|
|
# registers
|
|
|
|
pregs = list()
|
|
|
|
|
|
|
|
# instructions
|
|
|
|
pinstrs = list()
|
|
|
|
|
|
|
|
# labels
|
|
|
|
plabels_text = dict()
|
|
|
|
plabels_data = dict()
|
|
|
|
|
|
|
|
# size of .data section
|
|
|
|
pdata = 0
|
|
|
|
|
|
|
|
# size of .text section
|
|
|
|
ptext = 0
|
|
|
|
|
|
|
|
# after parse() is done, pdata and ptext are never modified
|
|
|
|
|
|
|
|
# padding bytes between .text and .data
|
|
|
|
pdata_pad = 0
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
def name_valid(name):
|
|
|
|
for c in name.lower():
|
2019-05-29 16:57:22 +02:00
|
|
|
if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$._+]='):
|
2019-05-19 19:54:29 +02:00
|
|
|
print("BAD '{}'".format(c))
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
def parse_lst_regs():
|
|
|
|
global pregs
|
|
|
|
for _, line in enumerate(lst_regs):
|
|
|
|
pregs.append(line.strip())
|
|
|
|
|
|
|
|
def parse_lst_instrs():
|
|
|
|
global pinstrs
|
|
|
|
for _, line in enumerate(lst_instrs):
|
|
|
|
pinstrs.append(line.strip())
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
def parse_preproc(line):
|
|
|
|
global pdata
|
|
|
|
|
|
|
|
tok = line.split(' ', 2)
|
|
|
|
|
2019-05-29 16:57:22 +02:00
|
|
|
#for word in tok:
|
|
|
|
# if not name_valid(word):
|
|
|
|
# print("Invalid token in line: {}".format(line))
|
|
|
|
# return
|
2019-05-19 19:54:29 +02:00
|
|
|
|
2019-05-29 16:57:22 +02:00
|
|
|
# preprocessor
|
|
|
|
if len(tok) > 1 and tok[1] == ':=':
|
2019-05-19 19:54:29 +02:00
|
|
|
if len(tok) < 3:
|
|
|
|
print("Invalid format: {}".format(line))
|
2019-05-29 16:57:22 +02:00
|
|
|
leave()
|
|
|
|
sys.exit(1)
|
|
|
|
pdefs[tok[0]] = tok[2]
|
2019-05-19 19:54:29 +02:00
|
|
|
return
|
|
|
|
|
|
|
|
# .data
|
|
|
|
if len(tok) > 1 and tok[1] == '=':
|
|
|
|
if len(tok) < 3:
|
|
|
|
print("Invalid format: {}".format(line))
|
2019-05-29 16:57:22 +02:00
|
|
|
leave()
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
if is_number(tok[2]):
|
2019-05-19 19:54:29 +02:00
|
|
|
plabels_data[tok[0]] = pdata
|
2019-05-29 16:57:22 +02:00
|
|
|
written = b_data.write(int(tok[2], base=0).to_bytes(8, byteorder='little', signed=False))
|
2019-05-19 19:54:29 +02:00
|
|
|
assert(written == 8)
|
|
|
|
pdata += written
|
2019-05-29 16:57:22 +02:00
|
|
|
|
2019-05-29 22:27:49 +02:00
|
|
|
elif tok[2][0] in "'\"":
|
2019-05-29 16:57:22 +02:00
|
|
|
s = tok[2].strip()
|
2019-05-29 22:27:49 +02:00
|
|
|
assert(s[-1] == tok[2][0])
|
2019-05-29 16:57:22 +02:00
|
|
|
|
|
|
|
s = s[1:-1]
|
|
|
|
|
|
|
|
plabels_data[tok[0]] = pdata
|
|
|
|
pdefs[tok[0] + "_len"] = str(len(s))
|
|
|
|
|
|
|
|
for c in s:
|
|
|
|
written = b_data.write(ord(c).to_bytes(1, byteorder='little', signed=False))
|
|
|
|
assert(written == 1)
|
|
|
|
pdata += 1
|
|
|
|
|
|
|
|
# align
|
|
|
|
for i in range(8 - len(s) % 8):
|
|
|
|
written = b_data.write(int(0).to_bytes(1, byteorder='little', signed=False))
|
|
|
|
assert(written == 1)
|
|
|
|
pdata += 1
|
|
|
|
|
|
|
|
else:
|
|
|
|
print("Invalid format: {}".format(line))
|
|
|
|
leave()
|
|
|
|
sys.exit(1)
|
|
|
|
|
2019-05-19 19:54:29 +02:00
|
|
|
return
|
|
|
|
|
|
|
|
print("Unrecognized directive: {}".format(line))
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
def apply_pdefs(line):
|
|
|
|
tok = line.split(' ')
|
|
|
|
new = '' + tok[0]
|
|
|
|
|
|
|
|
if len(tok) > 3:
|
|
|
|
print("Too many tokens in line: {}".format(line))
|
|
|
|
return False, None
|
|
|
|
|
|
|
|
# 'mov rax, rax' => 'mov rax rax'
|
|
|
|
if len(tok) == 3 and tok[1][-1] == ',':
|
|
|
|
tok[1] = tok[1][:-1]
|
|
|
|
|
|
|
|
for word in tok[1:]:
|
|
|
|
new += ' '
|
|
|
|
if word[0] == '[':
|
|
|
|
assert(word[-1] == ']')
|
|
|
|
new += '['
|
|
|
|
word = word[1:-1]
|
|
|
|
need_bracket = True
|
|
|
|
else:
|
|
|
|
need_bracket = False
|
|
|
|
|
|
|
|
if word in pdefs:
|
|
|
|
word = pdefs[word]
|
|
|
|
|
|
|
|
if not name_valid(word):
|
|
|
|
print("Invalid token in line: {}".format(line))
|
|
|
|
return False, None
|
|
|
|
|
|
|
|
new += word
|
|
|
|
|
|
|
|
if need_bracket:
|
|
|
|
new += ']'
|
|
|
|
|
|
|
|
return True, new
|
|
|
|
|
|
|
|
def parse():
|
|
|
|
global ptext
|
|
|
|
for count, line in enumerate(fi):
|
|
|
|
line = line.rstrip()
|
|
|
|
|
|
|
|
if len(line) == 0:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if line[0] == '#' or line[0] == ';':
|
|
|
|
continue
|
|
|
|
|
|
|
|
if line[0] == ' ' or line[0] == '\t':
|
|
|
|
line = line.lstrip()
|
|
|
|
|
2019-05-29 22:59:17 +02:00
|
|
|
if line[0] in '#;@!/':
|
2019-05-19 19:54:29 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
ok, line = apply_pdefs(line)
|
|
|
|
if ok:
|
|
|
|
# instrs.write("{} ".format(ptext))
|
|
|
|
ptext += parse_instr(line)
|
|
|
|
instrs.write("\n")
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Preprocessor or label?
|
|
|
|
if line[-1] == ':':
|
|
|
|
if name_valid(line[:-1]):
|
|
|
|
plabels_text[line[:-1]] = ptext
|
|
|
|
else:
|
|
|
|
print("Bad label name: {}".format(line[:-1]))
|
2019-05-22 18:39:46 +02:00
|
|
|
leave()
|
|
|
|
sys.exit(1)
|
2019-05-19 19:54:29 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
# Preprocessor, .data, or invalid
|
|
|
|
parse_preproc(line)
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
def is_number(s):
|
|
|
|
try:
|
|
|
|
int(s, base=0)
|
|
|
|
|
|
|
|
except ValueError:
|
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
def parse_instr(line):
|
2019-05-29 16:57:22 +02:00
|
|
|
# instrs.write(hex(ptext))
|
2019-05-19 19:54:29 +02:00
|
|
|
tok = line.split(' ')
|
2019-05-22 18:39:46 +02:00
|
|
|
fellthrough = False
|
2019-05-29 16:57:22 +02:00
|
|
|
|
|
|
|
size = 2
|
|
|
|
|
|
|
|
if tok[0] == "rep":
|
|
|
|
instrs.write("%rep ")
|
|
|
|
tok = tok[1:]
|
|
|
|
size += 2
|
|
|
|
|
2019-05-22 18:39:46 +02:00
|
|
|
instr_name = tok[0]
|
|
|
|
instr_args = ''
|
2019-05-19 19:54:29 +02:00
|
|
|
|
|
|
|
if len(tok) == 1:
|
2019-05-22 18:39:46 +02:00
|
|
|
instrs.write("{}".format(instr_name))
|
2019-05-19 19:54:29 +02:00
|
|
|
return 2 # instruction
|
|
|
|
|
2019-05-29 16:57:22 +02:00
|
|
|
tok = tok[1:]
|
|
|
|
|
|
|
|
for word in tok:
|
2019-05-22 18:39:46 +02:00
|
|
|
instr_args += ' '
|
|
|
|
|
|
|
|
pref = None
|
|
|
|
|
|
|
|
# memory length prefixes
|
|
|
|
if len(word) > 1 and word[1] == '[':
|
|
|
|
if word[0] == 'b':
|
|
|
|
pref = "%b"
|
|
|
|
elif word[0] == 'w':
|
|
|
|
pref = "%w"
|
2019-05-29 16:57:22 +02:00
|
|
|
elif word[0] == 'l' or word[0] == 'd':
|
2019-05-22 18:39:46 +02:00
|
|
|
pref = "%l"
|
|
|
|
elif word[0] == 'q':
|
|
|
|
pref = "%q"
|
|
|
|
else:
|
|
|
|
print("Bad memory length prefix")
|
|
|
|
leave()
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
word = word[1:]
|
2019-05-19 19:54:29 +02:00
|
|
|
|
|
|
|
# [reg+off] or [imm64]
|
|
|
|
if word[0] == '[':
|
|
|
|
assert(word[-1] == ']')
|
|
|
|
word = word[1:-1]
|
|
|
|
|
2019-05-22 18:39:46 +02:00
|
|
|
if pref == None:
|
2019-05-29 18:26:28 +02:00
|
|
|
print("Missing access length modifier: {}".format(line))
|
|
|
|
leave()
|
|
|
|
sys.exit(1)
|
2019-05-22 18:39:46 +02:00
|
|
|
|
|
|
|
instr_name += "_m"
|
|
|
|
instr_args += "{}".format(pref)
|
|
|
|
|
2019-05-19 19:54:29 +02:00
|
|
|
if '+' in word:
|
|
|
|
# +2 for A_OFF, +2 for offset, +2 for register
|
|
|
|
size += 2 + 2 + 2
|
2019-05-22 18:39:46 +02:00
|
|
|
instr_args += "off "
|
2019-05-19 19:54:29 +02:00
|
|
|
|
|
|
|
assert(len(word) > 3)
|
2019-05-29 16:57:22 +02:00
|
|
|
|
|
|
|
reg, off = word.strip().split('+', 1)
|
|
|
|
reg = reg.strip()
|
|
|
|
off = off.strip()
|
|
|
|
|
|
|
|
print(reg)
|
|
|
|
print(off)
|
|
|
|
|
|
|
|
instr_args += "{} {}".format(off, reg)
|
2019-05-19 19:54:29 +02:00
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
else:
|
|
|
|
# +2 for A_MEM
|
|
|
|
size += 2
|
2019-05-22 18:39:46 +02:00
|
|
|
instr_args += "mem "
|
2019-05-19 19:54:29 +02:00
|
|
|
|
2019-05-22 18:39:46 +02:00
|
|
|
fellthrough = True
|
2019-05-19 19:54:29 +02:00
|
|
|
# FALLTHROUGH
|
|
|
|
|
|
|
|
# for now every immediate is 64-bit
|
|
|
|
if is_number(word):
|
|
|
|
# +2 for A_IMM64, +8 for immediate
|
|
|
|
size += 2 + 8
|
2019-05-22 18:39:46 +02:00
|
|
|
if not fellthrough:
|
|
|
|
instr_name += "_i"
|
|
|
|
instr_args += "%imm64 "
|
|
|
|
instr_args += word
|
|
|
|
fellthrough = False
|
2019-05-19 19:54:29 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
if word in pregs:
|
|
|
|
size += 2
|
2019-05-22 18:39:46 +02:00
|
|
|
if not fellthrough:
|
|
|
|
instr_name += "_r"
|
|
|
|
instr_args += word
|
|
|
|
fellthrough = False
|
2019-05-19 19:54:29 +02:00
|
|
|
continue
|
|
|
|
|
2019-05-22 18:39:46 +02:00
|
|
|
# it's a label (a 64-bit immediate)
|
2019-05-29 16:57:22 +02:00
|
|
|
# +2 for A_IMM64, +8 for immediate
|
2019-05-22 18:39:46 +02:00
|
|
|
size += 2 + 8
|
|
|
|
if not fellthrough:
|
|
|
|
instr_name += "_i"
|
|
|
|
instr_args += "%imm64 "
|
|
|
|
instr_args += word
|
|
|
|
fellthrough = False
|
|
|
|
|
|
|
|
instrs.write("{}{}".format(instr_name, instr_args))
|
2019-05-19 19:54:29 +02:00
|
|
|
return size
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
2019-05-22 18:39:46 +02:00
|
|
|
special_syms = {
|
|
|
|
"%bmem": 0x7001,
|
|
|
|
"%wmem": 0x7002,
|
|
|
|
"%lmem": 0x7004,
|
|
|
|
"%qmem": 0x7008,
|
2019-05-19 19:54:29 +02:00
|
|
|
|
2019-05-22 18:39:46 +02:00
|
|
|
"%boff": 0x7701,
|
|
|
|
"%woff": 0x7702,
|
|
|
|
"%loff": 0x7704,
|
|
|
|
"%qoff": 0x7708,
|
|
|
|
|
|
|
|
"%imm16": 0x7772,
|
|
|
|
"%imm32": 0x7774,
|
|
|
|
"%imm64": 0x7778,
|
2019-05-29 16:57:22 +02:00
|
|
|
|
|
|
|
"%rep": 0x8000
|
2019-05-22 18:39:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
def gentext():
|
2019-05-19 19:54:29 +02:00
|
|
|
instrs.seek(0)
|
2019-05-29 16:57:22 +02:00
|
|
|
text_start = 0 # 0x100000
|
|
|
|
data_start = text_start + ptext
|
|
|
|
data_start += (8 - data_start % 8)
|
2019-05-19 19:54:29 +02:00
|
|
|
|
|
|
|
for _, line in enumerate(instrs):
|
|
|
|
tok = line.strip().split(' ')
|
|
|
|
|
|
|
|
for word in tok:
|
|
|
|
if word in pregs:
|
|
|
|
idx = pregs.index(word)
|
|
|
|
b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
|
|
|
|
continue
|
|
|
|
|
|
|
|
if word in pinstrs:
|
|
|
|
idx = pinstrs.index(word)
|
|
|
|
b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
|
|
|
|
continue
|
|
|
|
|
|
|
|
if word in plabels_text:
|
2019-05-22 18:39:46 +02:00
|
|
|
addr = text_start + plabels_text[word]
|
2019-05-29 16:57:22 +02:00
|
|
|
b_text.write(addr.to_bytes(8, byteorder='little', signed=False))
|
2019-05-19 19:54:29 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
if word in plabels_data:
|
2019-05-29 16:57:22 +02:00
|
|
|
addr = data_start + plabels_data[word] + pdata_pad
|
2019-05-22 18:39:46 +02:00
|
|
|
b_text.write(addr.to_bytes(8, byteorder='little', signed=False))
|
2019-05-19 19:54:29 +02:00
|
|
|
continue
|
2019-05-22 18:39:46 +02:00
|
|
|
|
|
|
|
if word in special_syms:
|
|
|
|
if word == "%imm16":
|
|
|
|
lastimm = 2
|
|
|
|
elif word == "%imm32":
|
|
|
|
lastimm = 4
|
|
|
|
elif word == "%imm64":
|
|
|
|
lastimm = 8
|
2019-05-29 16:57:22 +02:00
|
|
|
elif word[2:] == "off" and word[0] == '%':
|
|
|
|
lastimm = 2
|
2019-05-22 18:39:46 +02:00
|
|
|
b_text.write(special_syms[word].to_bytes(2, byteorder='little', signed=False))
|
2019-05-19 19:54:29 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
if is_number(word):
|
2019-05-22 18:39:46 +02:00
|
|
|
b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=False))
|
2019-05-19 19:54:29 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
print("Assembly error, unknown token '{}' in line: {}".format(word, line))
|
|
|
|
leave()
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
2019-05-22 18:39:46 +02:00
|
|
|
def genout():
|
|
|
|
b_text.seek(0)
|
|
|
|
b_data.seek(0)
|
|
|
|
b_out.write(b_text.read())
|
2019-05-29 16:57:22 +02:00
|
|
|
|
|
|
|
data_align = (8 - ptext % 8)
|
|
|
|
|
|
|
|
for i in range(data_align):
|
|
|
|
b_out.write(int(0).to_bytes(1, byteorder='little', signed=False))
|
|
|
|
|
2019-05-22 18:39:46 +02:00
|
|
|
b_out.write(b_data.read())
|
|
|
|
|
|
|
|
#-------------------------------------------------------------------------------
|
|
|
|
|
2019-05-19 19:54:29 +02:00
|
|
|
parse_lst_instrs()
|
|
|
|
parse_lst_regs()
|
|
|
|
parse()
|
|
|
|
gentext()
|
2019-05-22 18:39:46 +02:00
|
|
|
genout()
|
2019-05-19 19:54:29 +02:00
|
|
|
leave()
|
2019-05-16 19:59:14 +02:00
|
|
|
|