kvisc/as/k-as.py

870 lines
22 KiB
Python
Raw Normal View History

2019-05-16 19:59:14 +02:00
#!/usr/bin/python3
2019-05-16 20:09:20 +02:00
# The OS/K Team licenses this file to you under the MIT license.
# See the LICENSE file in the project root for more information.
2019-06-21 12:13:09 +02:00
import re
2019-05-29 22:59:17 +02:00
import os
2019-05-16 21:42:23 +02:00
import sys
2019-05-22 18:39:46 +02:00
import subprocess
2019-05-19 19:54:29 +02:00
from array import array
from tempfile import TemporaryFile
2019-06-19 13:47:10 +02:00
from collections import OrderedDict
2019-05-16 19:59:14 +02:00
2019-07-01 13:16:17 +02:00
#print("k-as command line: '{}'".format(sys.argv))
2019-08-03 17:41:44 +02:00
WANT_DISASM = False
2019-06-16 12:17:31 +02:00
2019-06-19 13:47:10 +02:00
if len(sys.argv) != 5:
2019-07-17 20:26:03 +02:00
print("Usage: {} (source file) (memory entry point) (output file) (symbols file)"
2019-05-19 19:54:29 +02:00
.format(sys.argv[0]))
sys.exit(1)
2019-05-16 21:42:23 +02:00
2019-05-30 12:19:38 +02:00
source = TemporaryFile(mode="w+")
instrs = TemporaryFile(mode="w+")
b_data = TemporaryFile(mode="w+b")
b_text = TemporaryFile(mode="w+b")
2019-05-19 19:54:29 +02:00
2019-05-29 22:59:17 +02:00
lst_regs = open(os.path.join(sys.path[0], "regs.lst"))
lst_instrs = open(os.path.join(sys.path[0], "instrs.lst"))
2019-05-19 19:54:29 +02:00
2019-05-30 12:19:38 +02:00
main_src = open(sys.argv[1])
2019-05-22 18:39:46 +02:00
b_out = open(sys.argv[3], "wb")
2019-06-19 13:47:10 +02:00
b_sym = open(sys.argv[4], "w")
2019-05-19 19:54:29 +02:00
start_addr = int(sys.argv[2], base=0)
2019-07-01 13:16:17 +02:00
# os.chdir(os.path.dirname(sys.argv[1]))
2019-06-21 12:13:09 +02:00
2019-07-17 20:26:03 +02:00
def leave(i):
2019-05-30 12:19:38 +02:00
source.close()
2019-05-19 19:54:29 +02:00
instrs.close()
2019-05-22 18:39:46 +02:00
b_out.close()
2019-06-19 13:47:10 +02:00
b_sym.close()
2019-05-19 19:54:29 +02:00
b_data.close()
b_text.close()
2019-05-30 12:19:38 +02:00
main_src.close()
2019-05-19 19:54:29 +02:00
lst_regs.close()
lst_instrs.close()
2019-07-17 20:26:03 +02:00
sys.exit(i)
2019-05-19 19:54:29 +02:00
#-------------------------------------------------------------------------------
# Defines
2019-05-29 22:59:52 +02:00
pdefs = dict()
2019-05-19 19:54:29 +02:00
# registers
2019-07-08 15:01:31 +02:00
pregs = dict()
2019-05-19 19:54:29 +02:00
# instructions
pinstrs = list()
# labels
2019-06-19 13:47:10 +02:00
plabels_text = OrderedDict()
plabels_data = OrderedDict()
2019-05-19 19:54:29 +02:00
# size of .data section
pdata = 0
# size of .text section
ptext = 0
2019-05-30 11:19:16 +02:00
# for local labels
plastlabel = ''
2019-07-17 20:26:03 +02:00
# file currently being parsed
pcurfile = sys.argv[1]
2019-05-19 19:54:29 +02:00
# after parse() is done, pdata and ptext are never modified
#-------------------------------------------------------------------------------
def name_valid(name):
for c in name.lower():
2019-05-30 11:19:16 +02:00
if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$._+]=,'):
2019-05-19 19:54:29 +02:00
return False
return True
2019-05-30 11:19:16 +02:00
def is_number(s):
try:
int(s, base=0)
except ValueError:
return False
return True
2019-06-21 12:13:09 +02:00
arith_expr = re.compile(r'((0x[0-9A-Fa-f]+|[0-9]+)\s*([|&^+\-*]|<<|>>))+\s*(0x[0-9A-Fa-f]+|[0-9]+)')
def arith_eval(s):
if arith_expr.match(s):
return str(eval(s))
return s
2019-05-19 19:54:29 +02:00
#-------------------------------------------------------------------------------
def parse_lst_regs():
global pregs
2019-07-08 15:01:31 +02:00
i = 0
2019-05-19 19:54:29 +02:00
for _, line in enumerate(lst_regs):
2019-07-08 15:01:31 +02:00
line = line.strip()
if len(line) == 0:
continue
for reg in line.split():
pregs[reg] = i
i += 1
2019-05-19 19:54:29 +02:00
def parse_lst_instrs():
global pinstrs
for _, line in enumerate(lst_instrs):
pinstrs.append(line.strip())
#-------------------------------------------------------------------------------
2019-05-30 12:19:38 +02:00
inc_depth = 0
inc_depth_max = 16
# Quickly goes through source file and resolves "include" directives ONLY
def do_includes(fi):
global inc_depth
2019-07-17 22:25:50 +02:00
global pcurfile
2019-07-17 20:26:03 +02:00
2019-05-30 12:19:38 +02:00
for _, line in enumerate(fi):
line = line.rstrip()
2019-06-17 20:59:30 +02:00
tok = line.split(None, 1)
2019-05-30 12:19:38 +02:00
if len(tok) == 0:
continue
if tok[0] == "include":
if len(tok) == 1:
print("Missing parameter for include directive")
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-30 12:19:38 +02:00
if tok[1][0] not in "'\"" or tok[1][-1] != tok[1][0]:
print("Invalid format for include directive: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-30 12:19:38 +02:00
2019-07-17 22:25:50 +02:00
old_curf = pcurfile
pcurfile = tok[1][1:-1]
2019-05-30 12:19:38 +02:00
try:
2019-07-17 22:25:50 +02:00
new_fi = open(pcurfile, "r")
2019-05-30 12:19:38 +02:00
except:
print("Couldn't open file: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-30 12:19:38 +02:00
inc_depth += 1
if inc_depth >= inc_depth_max:
print("Maximal include depth reached: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-30 12:19:38 +02:00
2019-07-17 22:25:50 +02:00
source.write("$file: {}:\n".format(pcurfile.replace(' ', '')))
2019-05-30 12:19:38 +02:00
do_includes(new_fi)
2019-07-17 22:25:50 +02:00
pcurfile = old_curf
source.write("$file: {}:\n".format(pcurfile.replace(' ', '')))
2019-05-30 12:19:38 +02:00
else:
source.write("{}\n".format(line))
2019-06-17 20:59:30 +02:00
inc_depth -= 1
2019-05-30 12:19:38 +02:00
#-------------------------------------------------------------------------------
def parse():
global ptext
2019-07-17 20:26:03 +02:00
global pcurfile
2019-05-30 12:19:38 +02:00
global plastlabel
source.seek(0)
2019-07-17 22:25:50 +02:00
pcurfile = sys.argv[1]
2019-05-30 12:19:38 +02:00
2019-07-17 20:26:03 +02:00
for ln_no, line in enumerate(source):
2019-05-30 12:19:38 +02:00
line = line.rstrip()
if len(line) == 0:
continue
2019-07-17 20:26:03 +02:00
# len("$file: ") == 7
if len(line) > 7 and line[:7] == "$file: ":
pcurfile = line[7:]
continue
2019-05-30 21:46:00 +02:00
quote = False
2019-05-30 12:19:38 +02:00
for i in range(len(line)):
2019-05-30 21:46:00 +02:00
if line[i] in "'\"":
quote = not quote
if line[i] in '#;@!/' and not quote:
2019-05-30 12:19:38 +02:00
line = line[:i].rstrip()
break
2019-05-30 21:46:00 +02:00
if quote:
print("Unterminated string in line: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-30 21:46:00 +02:00
2019-05-30 12:19:38 +02:00
if len(line) == 0:
continue
if line[0] == ' ' or line[0] == '\t':
line = line.lstrip()
2019-07-24 16:52:26 +02:00
instrs.write(pcurfile + ' ' + hex(ptext) + ' ')
2019-05-30 12:19:38 +02:00
ptext += parse_instr(line)
instrs.write("\n")
continue
# Preprocessor or label?
if line[-1] == ':':
if name_valid(line[:-1]):
label = line[:-1]
if label[0] == '.':
label = plastlabel + label
else:
plastlabel = label
plabels_text[label] = ptext
else:
print("Bad label name: {}".format(line[:-1]))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-30 12:19:38 +02:00
continue
# Preprocessor, .data, or invalid
parse_preproc(line)
#-------------------------------------------------------------------------------
2019-06-21 19:38:31 +02:00
escape_dict = {
'n': '\n',
't': '\t',
'r': '\r',
'v': '\v',
'f': '\f',
'"': '"',
'\'': '\'',
'\\': '\\',
}
2019-05-19 19:54:29 +02:00
def parse_preproc(line):
global pdata
2019-06-13 22:20:35 +02:00
tok = line.split(None, 2)
2019-05-19 19:54:29 +02:00
2019-05-29 16:57:22 +02:00
# preprocessor
if len(tok) > 1 and tok[1] == ':=':
2019-05-19 19:54:29 +02:00
if len(tok) < 3:
print("Invalid format: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-07-10 17:17:45 +02:00
s = tok[0]
if s in pdefs:
s = pdefs[s]
if s[0] == '.':
s = plastlabel + s
pdefs[s] = tok[2]
2019-05-19 19:54:29 +02:00
return
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
# .data
if len(tok) > 1 and tok[1] == '=':
if len(tok) < 3:
print("Invalid format: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-29 16:57:22 +02:00
2019-05-30 12:37:22 +02:00
label = tok[0]
if label[0] == '.':
label = plastlabel + label
plabels_data[label] = pdata
# number data
2019-05-29 16:57:22 +02:00
if is_number(tok[2]):
written = b_data.write(int(tok[2], base=0).to_bytes(8, byteorder='little', signed=False))
2019-05-19 19:54:29 +02:00
assert(written == 8)
pdata += written
2019-05-29 16:57:22 +02:00
2019-06-05 19:31:48 +02:00
# buffer / bss
elif tok[2][0] == '[':
assert(tok[2][-1] == ']')
s = tok[2][1:-1].strip()
2019-07-10 17:17:45 +02:00
if s[0] == '.':
s = plastlabel + s
if s in pdefs:
s = pdefs[s]
2019-06-05 19:31:48 +02:00
if not is_number(s):
print("Invalid bss format: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-06-05 19:31:48 +02:00
i = int(s, base=0)
2019-07-11 18:34:21 +02:00
2019-07-24 17:42:42 +02:00
# if (i % 8) != 0:
# i = i + (8 - i % 8)
2019-07-11 18:34:21 +02:00
2019-06-05 19:31:48 +02:00
written = b_data.write(bytearray(i))
assert(written == i)
pdata += written
2019-05-30 12:37:22 +02:00
# string data
2019-05-29 22:27:49 +02:00
elif tok[2][0] in "'\"":
2019-05-29 16:57:22 +02:00
s = tok[2].strip()
2019-05-29 22:27:49 +02:00
assert(s[-1] == tok[2][0])
2019-05-30 12:19:38 +02:00
2019-05-29 16:57:22 +02:00
s = s[1:-1]
2019-05-30 11:32:00 +02:00
real_len = 0
escaping = False
2019-05-29 16:57:22 +02:00
for c in s:
2019-05-30 11:32:00 +02:00
# escape sequences
2019-07-01 14:04:32 +02:00
if not escaping and c == '\\':
2019-05-30 11:32:00 +02:00
escaping = True
continue
if escaping:
escaping = False
2019-05-30 12:19:38 +02:00
2019-06-21 19:38:31 +02:00
if c in escape_dict:
c = escape_dict[c]
2019-05-30 11:32:00 +02:00
else:
print("Unrecognized escape sequence: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-30 11:32:00 +02:00
2019-07-13 12:38:03 +02:00
b_data.write(ord(c).to_bytes(1, byteorder='little', signed=False))
2019-05-30 11:32:00 +02:00
real_len += 1
2019-05-29 16:57:22 +02:00
pdata += 1
2019-05-30 12:19:38 +02:00
2019-07-13 12:38:03 +02:00
b_data.write(int(0).to_bytes(1, byteorder='little', signed=False))
pdata += 1
l = real_len + 1 # s + null-term
2019-05-29 16:57:22 +02:00
# align
2019-07-24 17:42:42 +02:00
#if (l % 8) != 0:
# for i in range(8 - l % 8):
# written = b_data.write(int(0).to_bytes(1, byteorder='little', signed=False))
# pdata += 1
2019-05-30 11:32:00 +02:00
2019-05-30 18:31:50 +02:00
pdefs[label + "_len"] = str(real_len)
2019-05-30 11:32:00 +02:00
2019-05-29 16:57:22 +02:00
else:
print("Invalid format: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-29 16:57:22 +02:00
2019-05-19 19:54:29 +02:00
return
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
print("Unrecognized directive: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-19 19:54:29 +02:00
#-------------------------------------------------------------------------------
2019-06-12 15:30:35 +02:00
pconds = {
'c': 0b00001,
'o': 0b00010,
'z': 0b00011,
'e': 0b00011,
's': 0b00100,
2019-07-22 14:41:50 +02:00
'pe': 0b00101,
'po': 0b10101,
'b': 0b00001,
'be': 0b00110,
'l': 0b00111,
'le': 0b01000,
'a': 0b10110, # nbe
'ae': 0b10001, # nb
'g': 0b11000, # nle
'ge': 0b10111, # nl
'axz': 0b01001,
'bxz': 0b01010,
'cxz': 0b01011,
'dxz': 0b01100,
'axnz': 0b11001,
'bxnz': 0b11010,
'cxnz': 0b11011,
'dxnz': 0b11100,
2019-06-12 15:30:35 +02:00
}
def get_cond_mask(cond, line):
mask = 0
if cond[0] == 'n':
cond = cond[1:]
mask = 0b10000
if cond not in pconds:
print("Invalid condition suffix: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-06-12 15:30:35 +02:00
2019-07-01 21:46:36 +02:00
return (mask | pconds[cond])
2019-07-24 16:52:26 +02:00
2019-06-12 15:30:35 +02:00
#-------------------------------------------------------------------------------
2019-07-24 16:52:26 +02:00
fmts = {
"r": 0b00000000,
"m_r": 0b00100000,
"m_rr": 0b01000000,
"m_rriw": 0b01100000,
"m_rrid": 0b10000000,
"m_rrii": 0b10100000,
"m_riq": 0b11000000,
"imm8": 0b11100001,
"imm16": 0b11100010,
"imm32": 0b11100100,
"imm64": 0b11101000,
2019-06-12 15:30:35 +02:00
}
2019-07-24 16:52:26 +02:00
pref2len = {
"b" : 1,
"w" : 2,
"d" : 4,
"l" : 4,
"q" : 8,
}
2019-06-12 15:30:35 +02:00
2019-05-30 11:19:16 +02:00
def parse_instr(line):
if line == None or len(line) == 0:
return 0
2019-05-19 19:54:29 +02:00
2019-06-17 20:59:30 +02:00
tok = line.split(None, 1)
2019-05-30 12:19:38 +02:00
2019-05-30 11:19:16 +02:00
instr = tok[0].strip()
2019-05-19 19:54:29 +02:00
2019-05-30 11:19:16 +02:00
if len(tok) > 1:
params = tok[1].strip()
else:
params = None
2019-05-19 19:54:29 +02:00
2019-08-03 18:20:36 +02:00
size = 1
2019-07-01 21:46:36 +02:00
2019-07-24 16:52:26 +02:00
# Byte 2 (rep|lock|0|cond)
b2 = 0
2019-07-01 21:46:36 +02:00
2019-06-16 21:17:56 +02:00
if len(instr) > 2 and '.' in instr:
instr, suf = instr.split('.', 1)
if len(instr) == 0:
print("Missing instruction name before suffixes: {}".format(line))
if len(suf) > 2 and suf[:3] == "rep":
if len(suf) > 3:
suf = suf[3:]
if len(suf) > 0 and suf[0] == '.':
suf = suf[1:]
else:
suf = ''
2019-06-12 15:30:35 +02:00
2019-07-24 16:52:26 +02:00
b2 |= 1<<7 # REP
2019-06-16 21:17:56 +02:00
if len(suf) > 0:
2019-07-24 16:52:26 +02:00
b2 |= get_cond_mask(suf, line)
2019-05-30 12:19:38 +02:00
2019-06-13 17:13:59 +02:00
instr_name = instr
2019-05-22 18:39:46 +02:00
instr_args = ''
2019-05-19 19:54:29 +02:00
2019-05-30 11:19:16 +02:00
if params == None or len(params) == 0:
2019-08-03 18:20:36 +02:00
if b2 == 0:
instrs.write("{}".format(instr_name))
else:
size += 1
instrs.write("%%suff {} %%imm8 {}".format(instr_name, b2))
2019-06-12 15:30:35 +02:00
return size
2019-05-19 19:54:29 +02:00
2019-05-30 11:19:16 +02:00
tok = params.split(',')
2019-05-29 16:57:22 +02:00
2019-08-03 17:41:44 +02:00
# 'call' special case... temporary
if instr_name == 'call':
if len(tok) == 2:
instr_name = 'xcall2'
elif len(tok) == 3:
instr_name = 'xcall3'
2019-06-12 15:30:35 +02:00
#
2019-07-24 16:52:26 +02:00
# Parse operands
2019-06-12 15:30:35 +02:00
#
2019-05-29 16:57:22 +02:00
for word in tok:
2019-05-30 11:19:16 +02:00
word = word.strip()
2019-05-22 18:39:46 +02:00
instr_args += ' '
2019-07-24 16:52:26 +02:00
mlen = 0
2019-05-22 18:39:46 +02:00
2019-07-10 17:17:45 +02:00
if len(word) == 0:
print("Wrong syntax in line: '{}'".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-07-10 17:17:45 +02:00
# local labels
if word[0] == '.':
word = plastlabel + word
2019-06-17 20:59:30 +02:00
# preprocessor
if word in pdefs:
word = pdefs[word]
# Fall through
2019-06-21 12:13:09 +02:00
# arithmetic expressions
word = arith_eval(word)
2019-05-22 18:39:46 +02:00
# memory length prefixes
2019-05-30 11:19:16 +02:00
if len(word) > 2 and '[' in word:
2019-07-24 16:52:26 +02:00
if word[0] in 'bwldq':
mlen = pref2len[word[0]]
2019-05-22 18:39:46 +02:00
else:
2019-05-30 12:21:17 +02:00
print("Bad memory length prefix: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-22 18:39:46 +02:00
2019-05-30 11:19:16 +02:00
word = word[1:].strip()
assert(word[0] == '[')
2019-05-19 19:54:29 +02:00
2019-06-12 15:30:35 +02:00
#
2019-07-24 16:52:26 +02:00
# Determine memory format
2019-06-12 15:30:35 +02:00
#
2019-08-05 14:56:22 +02:00
if word[0] in '[(':
assert(word[-1] in '])')
2019-05-19 19:54:29 +02:00
word = word[1:-1]
2019-06-17 20:59:30 +02:00
# preprocessor, again
if word in pdefs:
word = pdefs[word]
# Fall through
2019-05-19 19:54:29 +02:00
2019-06-12 15:30:35 +02:00
# Make sure we got an access length prefix
2019-07-24 16:52:26 +02:00
if mlen == 0:
2019-05-29 18:26:28 +02:00
print("Missing access length modifier: {}".format(line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-22 18:39:46 +02:00
2019-06-17 20:59:30 +02:00
# cheap way of getting [reg - imm] to work
word = word.replace('-', '+ -')
# remove every spaces!
word = word.replace(' ', '')
2019-06-12 15:30:35 +02:00
#
# Offsets
#
2019-05-19 19:54:29 +02:00
if '+' in word:
2019-07-24 16:52:26 +02:00
reg1 = "zero"
reg2 = "zero"
2019-06-12 15:30:35 +02:00
imm1 = '1'
imm2 = '0'
wtok = word.split('+')
#
2019-07-24 16:52:26 +02:00
# [reg] and [reg*imm]
2019-06-12 15:30:35 +02:00
#
if len(wtok) == 1:
if '*' in wtok[0]:
assert(len(wtok[0].split('*')) == 2)
reg2, imm1 = wtok[0].split('*', 1)
else:
reg1 = wtok[0]
#
2019-07-24 16:52:26 +02:00
# [reg+reg], [reg+imm], [reg*imm+imm], [reg+reg*imm]
2019-06-12 15:30:35 +02:00
#
elif len(wtok) == 2:
2019-07-24 16:52:26 +02:00
# Must be [reg*imm+imm]
2019-06-12 15:30:35 +02:00
if '*' in wtok[0]:
assert(len(wtok[0].split('*')) == 2)
assert(is_number(wtok[1].strip()))
reg2, imm1 = wtok[0].split('*', 1)
imm2 = wtok[1]
2019-07-24 16:52:26 +02:00
# Must be [reg+reg*imm]
2019-06-12 15:30:35 +02:00
elif '*' in wtok[1]:
assert(len(wtok[1].split('*')) == 2)
reg1 = wtok[0]
reg2, imm1 = wtok[1].split('*', 1)
elif is_number(wtok[1].strip()):
reg1 = wtok[0]
imm2 = wtok[1]
# Must be [reg+reg]
else:
reg1 = wtok[0]
reg2 = wtok[1]
#
2019-07-24 16:52:26 +02:00
# [reg+reg+imm], [reg+reg*imm8+imm]
2019-06-12 15:30:35 +02:00
#
else:
assert(len(wtok) == 3)
reg1 = wtok[0]
imm2 = wtok[2]
if '*' in wtok[1]:
assert(len(wtok[1].split('*')) == 2)
reg2, imm1 = wtok[1].split('*', 1)
else:
reg2 = wtok[1]
#
2019-07-24 16:52:26 +02:00
# Update instr_args
2019-06-12 15:30:35 +02:00
#
if imm1 == '1':
2019-07-24 16:52:26 +02:00
# [reg+reg]
2019-06-12 15:30:35 +02:00
if imm2 == '0':
2019-08-06 22:47:39 +02:00
instr_args += "%%imm8 {} {} {}".format(fmts["m_rr"]|mlen, '$'+reg1, '$'+reg2)
2019-07-24 16:52:26 +02:00
size += 3
# [reg+reg+imm]
2019-06-12 15:30:35 +02:00
else:
2019-08-06 22:47:39 +02:00
instr_args += "%%imm8 {} {} {} %%signed %%imm16 {}"\
.format(fmts["m_rriw"]|mlen, '$'+reg1, '$'+reg2, imm2)
2019-07-24 16:52:26 +02:00
size += 5
2019-06-12 15:30:35 +02:00
2019-07-24 16:52:26 +02:00
# [reg+reg*imm+imm]
2019-06-12 15:30:35 +02:00
else:
2019-08-03 18:20:36 +02:00
instr_args += "%%imm8 {} {} {} %%imm8 {} %%signed %%imm32 {}"\
2019-08-06 22:47:39 +02:00
.format(fmts["m_rrii"]|mlen, '$'+reg1, '$'+reg2, imm1, imm2)
2019-07-24 16:52:26 +02:00
size += 8
2019-06-12 15:30:35 +02:00
2019-07-24 16:52:26 +02:00
# [reg]
2019-08-06 22:47:39 +02:00
elif '$'+word in pregs:
instr_args += "%%imm8 {} {}".format(fmts["m_r"]|mlen, '$'+word)
2019-07-24 16:52:26 +02:00
size += 2
2019-05-30 12:19:38 +02:00
2019-07-24 16:52:26 +02:00
# [imm], converted to [zero+imm]
2019-05-19 19:54:29 +02:00
else:
2019-08-06 22:47:39 +02:00
instr_args += "%%imm8 {} $zero $zero %%signed %%imm32 {}".format(fmts["m_rrid"]|mlen, word)
2019-07-24 16:52:26 +02:00
size += 7
continue
2019-05-19 19:54:29 +02:00
2019-06-17 20:59:30 +02:00
# preprocessor, yet again
2019-06-12 15:30:35 +02:00
if word in pdefs:
word = pdefs[word]
2019-07-24 16:52:26 +02:00
# fallthrough
2019-06-12 15:30:35 +02:00
2019-06-17 20:59:30 +02:00
# characters 'c'
if len(word) == 3 and word[0] == word[-1] == "'":
word = str(ord(word[1]))
2019-07-24 16:52:26 +02:00
# fallthrough
2019-06-17 20:59:30 +02:00
2019-06-18 22:56:41 +02:00
# register index $reg
if len(word) == 4 and word[0] == '$':
2019-08-06 22:47:39 +02:00
if '$'+word[1:] in pregs:
word = str(pregs['$'+word[1:]])
2019-07-24 16:52:26 +02:00
# fallthrough
2019-06-18 22:56:41 +02:00
2019-07-24 17:42:42 +02:00
# immediates
2019-05-19 19:54:29 +02:00
if is_number(word):
2019-07-24 17:42:42 +02:00
n = int(word, base=0)
2019-06-12 15:30:35 +02:00
2019-07-24 17:42:42 +02:00
if n < 0 or n > 0xFFFFFFFF:
size += 9
instr_args += "%%imm8 {} ".format(fmts["imm64"])
instr_args += "%%imm64 {}".format(word)
elif n > 0xFFFF:
size += 5
instr_args += "%%imm8 {} ".format(fmts["imm32"])
instr_args += "%%imm32 {}".format(word)
elif n > 0xFF:
size += 3
instr_args += "%%imm8 {} ".format(fmts["imm16"])
instr_args += "%%imm16 {}".format(word)
else:
size += 2
instr_args += "%%imm8 {} ".format(fmts["imm8"])
instr_args += "%%imm8 {}".format(word)
2019-05-19 19:54:29 +02:00
continue
2019-05-30 11:19:16 +02:00
# register
2019-08-06 22:47:39 +02:00
elif '$'+word in pregs:
2019-07-24 16:52:26 +02:00
size += 1
2019-08-06 22:47:39 +02:00
instr_args += '$'+word
2019-05-19 19:54:29 +02:00
continue
2019-05-30 12:19:38 +02:00
2019-07-24 16:52:26 +02:00
# it's a label (a 32-bit immediate)
# ModRM + imm
size += 5
2019-06-12 15:30:35 +02:00
2019-07-24 16:52:26 +02:00
instr_args += "%%imm8 {} ".format(fmts["imm32"])
2019-05-30 12:19:38 +02:00
2019-05-30 11:19:16 +02:00
if word[0] == '.':
instr_args += plastlabel
2019-05-30 12:19:38 +02:00
2019-05-22 18:39:46 +02:00
instr_args += word
2019-08-03 18:20:36 +02:00
if b2 == 0:
instrs.write("{} {}".format(instr_name, instr_args))
else:
size += 1
instrs.write("%%suff {} %%imm8 {} {}".format(instr_name, b2, instr_args))
2019-06-12 15:30:35 +02:00
2019-05-19 19:54:29 +02:00
return size
#-------------------------------------------------------------------------------
2019-05-22 18:39:46 +02:00
special_syms = {
2019-08-03 18:20:36 +02:00
"%%suff",
2019-07-24 16:52:26 +02:00
"%%imm8",
2019-06-12 15:30:35 +02:00
"%%imm16",
"%%imm32",
"%%imm64",
2019-07-01 21:46:36 +02:00
"%%signed",
2019-05-22 18:39:46 +02:00
}
def gentext():
2019-06-19 13:47:10 +02:00
text_start = start_addr
2019-05-29 16:57:22 +02:00
data_start = text_start + ptext
2019-07-11 18:34:21 +02:00
if (data_start % 8) != 0:
data_start += (8 - data_start % 8)
2019-05-19 19:54:29 +02:00
2019-06-19 13:47:10 +02:00
instrs.seek(0)
2019-08-03 18:20:36 +02:00
suff_mask = 0
2019-06-19 13:47:10 +02:00
2019-05-19 19:54:29 +02:00
for _, line in enumerate(instrs):
2019-06-17 20:59:30 +02:00
tok = line.strip().split()
2019-06-19 13:47:10 +02:00
if WANT_DISASM:
print(tok)
2019-07-24 16:52:26 +02:00
tok = tok[2:]
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
for word in tok:
2019-06-12 15:30:35 +02:00
if len(word) == 0:
continue
2019-05-19 19:54:29 +02:00
if word in pregs:
2019-07-08 15:01:31 +02:00
idx = pregs[word]
2019-07-24 16:52:26 +02:00
b_text.write(idx.to_bytes(1, byteorder='little', signed=False))
2019-06-12 15:30:35 +02:00
continue
2019-05-19 19:54:29 +02:00
if word in pinstrs:
2019-08-03 18:20:36 +02:00
idx = pinstrs.index(word) | suff_mask
2019-07-24 16:52:26 +02:00
b_text.write(idx.to_bytes(1, byteorder='little', signed=False))
2019-08-03 18:20:36 +02:00
suff_mask = 0
2019-05-19 19:54:29 +02:00
continue
if word in plabels_text:
2019-05-22 18:39:46 +02:00
addr = text_start + plabels_text[word]
2019-07-24 16:52:26 +02:00
b_text.write(addr.to_bytes(4, byteorder='little', signed=False))
2019-05-19 19:54:29 +02:00
continue
if word in plabels_data:
2019-06-19 13:47:10 +02:00
addr = data_start + plabels_data[word]
2019-07-24 16:52:26 +02:00
b_text.write(addr.to_bytes(4, byteorder='little', signed=False))
2019-05-19 19:54:29 +02:00
continue
2019-05-22 18:39:46 +02:00
if word in special_syms:
2019-08-03 18:20:36 +02:00
if word == "%%suff":
suff_mask = 1<<7
elif word == "%%imm8":
2019-07-24 16:52:26 +02:00
lastimm = 1
elif word == "%%imm16":
2019-05-22 18:39:46 +02:00
lastimm = 2
2019-06-12 15:30:35 +02:00
elif word == "%%imm32":
2019-05-22 18:39:46 +02:00
lastimm = 4
2019-06-12 15:30:35 +02:00
elif word == "%%imm64":
2019-05-22 18:39:46 +02:00
lastimm = 8
2019-06-12 15:30:35 +02:00
elif word == "%%signed":
2019-05-29 16:57:22 +02:00
lastimm = 2
2019-05-30 11:41:00 +02:00
isSigned = True
else:
isSigned = False
2019-05-19 19:54:29 +02:00
continue
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
if is_number(word):
2019-05-30 11:41:00 +02:00
if word[0] == '-':
isSigned = True
else:
isSigned = False
b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=isSigned))
2019-05-19 19:54:29 +02:00
continue
2019-05-30 12:19:38 +02:00
2019-05-19 19:54:29 +02:00
print("Assembly error, unknown token '{}' in line: {}".format(word, line))
2019-07-17 20:26:03 +02:00
leave(1)
2019-05-19 19:54:29 +02:00
#-------------------------------------------------------------------------------
2019-06-19 13:47:10 +02:00
def sort_by_list(dict_, list_):
for key in list_:
dict_.move_to_end(key)
def gensym():
text_start = start_addr
data_start = text_start + ptext
2019-07-11 18:34:21 +02:00
if (data_start % 8) != 0:
data_start += (8 - data_start % 8)
2019-06-19 13:47:10 +02:00
for label in plabels_text:
plabels_text[label] += text_start
for label in plabels_data:
plabels_data[label] += data_start
plabels_all = OrderedDict(list(plabels_text.items()) + list(plabels_data.items()))
for key, value in sorted(plabels_all.items(), key=lambda item: item[1]):
b_sym.write("{} {}\n".format(key, value))
#-------------------------------------------------------------------------------
2019-05-22 18:39:46 +02:00
def genout():
b_text.seek(0)
b_data.seek(0)
b_out.write(b_text.read())
2019-05-29 16:57:22 +02:00
2019-07-11 18:34:21 +02:00
if (ptext % 8) != 0:
data_align = (8 - ptext % 8)
2019-05-30 12:19:38 +02:00
2019-07-11 18:34:21 +02:00
for i in range(data_align):
b_out.write(int(0).to_bytes(1, byteorder='little', signed=False))
2019-05-29 16:57:22 +02:00
2019-05-22 18:39:46 +02:00
b_out.write(b_data.read())
#-------------------------------------------------------------------------------
2019-05-19 19:54:29 +02:00
parse_lst_instrs()
parse_lst_regs()
2019-05-30 12:19:38 +02:00
do_includes(main_src)
2019-05-19 19:54:29 +02:00
parse()
gentext()
2019-05-22 18:39:46 +02:00
genout()
2019-06-19 13:47:10 +02:00
gensym()
2019-07-24 17:42:42 +02:00
#-------------------------------------------------------------------------------
print("Finished producing {}\n\ttext section size: {} bytes\n\tdata section size: {} bytes\n" \
.format(sys.argv[3], ptext, pdata))
2019-07-17 20:26:03 +02:00
leave(0)
2019-07-24 17:42:42 +02:00
#-------------------------------------------------------------------------------