#!/usr/bin/python3 # The OS/K Team licenses this file to you under the MIT license. # See the LICENSE file in the project root for more information. import re import os import sys import subprocess from array import array from tempfile import TemporaryFile from collections import OrderedDict WANT_DISASM = False if len(sys.argv) != 5: print("Usage: {} (output file) (memory entry point) (source file) (symbols file)" .format(sys.argv[0])) sys.exit(1) source = TemporaryFile(mode="w+") instrs = TemporaryFile(mode="w+") b_data = TemporaryFile(mode="w+b") b_text = TemporaryFile(mode="w+b") lst_regs = open(os.path.join(sys.path[0], "regs.lst")) lst_instrs = open(os.path.join(sys.path[0], "instrs.lst")) main_src = open(sys.argv[1]) b_out = open(sys.argv[3], "wb") b_sym = open(sys.argv[4], "w") start_addr = int(sys.argv[2], base=0) os.chdir(os.path.dirname(sys.argv[1])) def leave(): source.close() instrs.close() b_out.close() b_sym.close() b_data.close() b_text.close() main_src.close() lst_regs.close() lst_instrs.close() #------------------------------------------------------------------------------- # Defines pdefs = dict() # registers pregs = list() # instructions pinstrs = list() # labels plabels_text = OrderedDict() plabels_data = OrderedDict() # size of .data section pdata = 0 # size of .text section ptext = 0 # for local labels plastlabel = '' # after parse() is done, pdata and ptext are never modified #------------------------------------------------------------------------------- def name_valid(name): for c in name.lower(): if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$._+]=,'): return False return True def is_number(s): try: int(s, base=0) except ValueError: return False return True arith_expr = re.compile(r'((0x[0-9A-Fa-f]+|[0-9]+)\s*([|&^+\-*]|<<|>>))+\s*(0x[0-9A-Fa-f]+|[0-9]+)') def arith_eval(s): if arith_expr.match(s): return str(eval(s)) return s #------------------------------------------------------------------------------- def parse_lst_regs(): global pregs for _, line in enumerate(lst_regs): pregs.append(line.strip()) def parse_lst_instrs(): global pinstrs for _, line in enumerate(lst_instrs): pinstrs.append(line.strip()) #------------------------------------------------------------------------------- inc_depth = 0 inc_depth_max = 16 # Quickly goes through source file and resolves "include" directives ONLY def do_includes(fi): global inc_depth for _, line in enumerate(fi): line = line.rstrip() tok = line.split(None, 1) if len(tok) == 0: continue if tok[0] == "include": if len(tok) == 1: print("Missing parameter for include directive") leave() sys.exit(1) if tok[1][0] not in "'\"" or tok[1][-1] != tok[1][0]: print("Invalid format for include directive: {}".format(line)) leave() sys.exit(1) inc = tok[1][1:-1] try: new_fi = open(inc, "r") except: print("Couldn't open file: {}".format(line)) leave() sys.exit(1) inc_depth += 1 if inc_depth >= inc_depth_max: print("Maximal include depth reached: {}".format(line)) leave() sys.exit(1) do_includes(new_fi) else: source.write("{}\n".format(line)) inc_depth -= 1 #------------------------------------------------------------------------------- def parse(): global ptext global plastlabel source.seek(0) for count, line in enumerate(source): line = line.rstrip() if len(line) == 0: continue quote = False for i in range(len(line)): if line[i] in "'\"": quote = not quote if line[i] in '#;@!/' and not quote: line = line[:i].rstrip() break if quote: print("Unterminated string in line: {}".format(line)) leave() sys.exit(1) if len(line) == 0: continue if line[0] == ' ' or line[0] == '\t': line = line.lstrip() instrs.write(hex(ptext + start_addr) + ' ') ptext += parse_instr(line) instrs.write("\n") continue # Preprocessor or label? if line[-1] == ':': if name_valid(line[:-1]): label = line[:-1] if label[0] == '.': label = plastlabel + label else: plastlabel = label plabels_text[label] = ptext else: print("Bad label name: {}".format(line[:-1])) leave() sys.exit(1) continue # Preprocessor, .data, or invalid parse_preproc(line) #------------------------------------------------------------------------------- def parse_preproc(line): global pdata tok = line.split(None, 2) # preprocessor if len(tok) > 1 and tok[1] == ':=': if len(tok) < 3: print("Invalid format: {}".format(line)) leave() sys.exit(1) pdefs[tok[0]] = tok[2] return # .data if len(tok) > 1 and tok[1] == '=': if len(tok) < 3: print("Invalid format: {}".format(line)) leave() sys.exit(1) label = tok[0] if label[0] == '.': label = plastlabel + label plabels_data[label] = pdata # number data if is_number(tok[2]): written = b_data.write(int(tok[2], base=0).to_bytes(8, byteorder='little', signed=False)) assert(written == 8) pdata += written # buffer / bss elif tok[2][0] == '[': assert(tok[2][-1] == ']') s = tok[2][1:-1].strip() if not is_number(s): print("Invalid bss format: {}".format(line)) leave() sys.exit(1) i = int(s, base=0) i = i + (8 - i % 8) written = b_data.write(bytearray(i)) assert(written == i) pdefs[label + "_len"] = s pdata += written # string data elif tok[2][0] in "'\"": s = tok[2].strip() assert(s[-1] == tok[2][0]) s = s[1:-1] real_len = 0 escaping = False for c in s: # escape sequences if c == '\\': escaping = True continue if escaping: escaping = False if c == 'n': c = '\n' elif c == 't': c = '\t' else: print("Unrecognized escape sequence: {}".format(line)) leave() sys.exit(1) written = b_data.write(ord(c).to_bytes(1, byteorder='little', signed=False)) assert(written == 1) real_len += 1 pdata += 1 # align for i in range(8 - len(s) % 8): written = b_data.write(int(0).to_bytes(1, byteorder='little', signed=False)) assert(written == 1) pdata += 1 pdefs[label + "_len"] = str(real_len) else: print("Invalid format: {}".format(line)) leave() sys.exit(1) return print("Unrecognized directive: {}".format(line)) leave() sys.exit(1) #------------------------------------------------------------------------------- pconds = { 'c': 0b00001, 'o': 0b00010, 'z': 0b00011, 'e': 0b00011, 's': 0b00100, 'p': 0b00101, 'a': 0b00110, 'ae': 0b00111, 'b': 0b01000, 'be': 0b01001, 'g': 0b01010, 'ge': 0b01011, 'l': 0b01100, 'le': 0b01101, 'cxz': 0b01110, 'cxnz': 0b11110, } def get_cond_mask(cond, line): mask = 0 if cond[0] == 'n': cond = cond[1:] mask = 0b10000 if cond not in pconds: print("Invalid condition suffix: {}".format(line)) leave() sys.exit(1) return (mask | pconds[cond]) << 10 #------------------------------------------------------------------------------- pfts = { "reg": 0b00001, "imm64": 0b00010, "bimm64": 0b00100, "brr": 0b00101, "brri": 0b00110, "brrii": 0b00111, "wimm64": 0b01000, "wrr": 0b01001, "wrri": 0b01010, "wrrii": 0b01011, "limm64": 0b01100, "lrr": 0b01101, "lrri": 0b01110, "lrrii": 0b01111, "qimm64": 0b10000, "qrr": 0b10001, "qrri": 0b10010, "qrrii": 0b10011, } def get_fts_mask(ft, line): if ft not in pfts: print("Invalid operand format ({}): {}".format(ft, line)) else: return pfts[ft] #------------------------------------------------------------------------------- def parse_instr(line): if line == None or len(line) == 0: return 0 tok = line.split(None, 1) instr = tok[0].strip() if len(tok) > 1: params = tok[1].strip() else: params = None fellthrough = False size = 4 # Word 2 (rep|cond|ft1|ft2) w2 = 0 if len(instr) > 2 and '.' in instr: instr, suf = instr.split('.', 1) if len(instr) == 0: print("Missing instruction name before suffixes: {}".format(line)) if len(suf) > 2 and suf[:3] == "rep": if len(suf) > 3: suf = suf[3:] if len(suf) > 0 and suf[0] == '.': suf = suf[1:] else: suf = '' w2 |= 0x8000 # REP if len(suf) > 0: w2 |= get_cond_mask(suf, line) instr_name = instr instr_args = '' if params == None or len(params) == 0: instrs.write("{} ".format(instr_name)) instrs.write("%%imm16 {}".format(w2)) return size tok = params.split(',') # FT1 and FT2 fts = '' # # Parse operands, generating fts along the way # for word in tok: word = word.strip() instr_args += ' ' gotPref = False if len(fts) != 0: fts += ' ' # preprocessor if word in pdefs: word = pdefs[word] # Fall through if len(word) == 0: continue # arithmetic expressions word = arith_eval(word) # memory length prefixes if len(word) > 2 and '[' in word: if word[0] in 'bwlq': fts += word[0] gotPref = True else: print("Bad memory length prefix: {}".format(line)) leave() sys.exit(1) word = word[1:].strip() assert(word[0] == '[') # # Determine memory format and save it into fts # if word[0] == '[': assert(word[-1] == ']') word = word[1:-1] # preprocessor, again if word in pdefs: word = pdefs[word] # Fall through # # Make sure we got an access length prefix # if not gotPref: print("Missing access length modifier: {}".format(line)) leave() sys.exit(1) instr_name += "_m" # cheap way of getting [reg - imm] to work word = word.replace('-', '+ -') # remove every spaces! word = word.replace(' ', '') # # Offsets # if '+' in word: reg1 = "inv" reg2 = "inv" imm1 = '1' imm2 = '0' wtok = word.split('+') # # [reg] and [reg*imm16] # if len(wtok) == 1: if '*' in wtok[0]: assert(len(wtok[0].split('*')) == 2) reg2, imm1 = wtok[0].split('*', 1) else: reg1 = wtok[0] # # [reg+reg], [reg+imm16], [reg*imm16+imm16], [reg+reg*imm16] # elif len(wtok) == 2: # Must be [reg*imm16+imm16] if '*' in wtok[0]: assert(len(wtok[0].split('*')) == 2) assert(is_number(wtok[1].strip())) print(wtok) reg2, imm1 = wtok[0].split('*', 1) imm2 = wtok[1] # Must be [reg+reg*imm16] elif '*' in wtok[1]: assert(len(wtok[1].split('*')) == 2) reg1 = wtok[0] reg2, imm1 = wtok[1].split('*', 1) elif is_number(wtok[1].strip()): reg1 = wtok[0] imm2 = wtok[1] # Must be [reg+reg] else: reg1 = wtok[0] reg2 = wtok[1] # # [reg+reg+imm16], [reg+reg*imm16+imm16] # else: assert(len(wtok) == 3) reg1 = wtok[0] imm2 = wtok[2] if '*' in wtok[1]: assert(len(wtok[1].split('*')) == 2) reg2, imm1 = wtok[1].split('*', 1) else: reg2 = wtok[1] # # Update fts and instr_args # instr_args += "{}:{} ".format(reg2, reg1) size += 2 if imm1 == '1': if imm2 == '0': fts += 'rr' else: fts += 'rri' size += 2 instr_args += "%%imm16 {}".format(imm2) else: size += 4 fts += 'rrii' instr_args += "%%imm16 {} %%imm16 {}".format(imm1, imm2) continue # # [imm64] or [reg] # else: fellthrough = True # FALLTHROUGH # preprocessor, yet again if word in pdefs: word = pdefs[word] # Fall through # characters 'c' if len(word) == 3 and word[0] == word[-1] == "'": word = str(ord(word[1])) # register index $reg if len(word) == 4 and word[0] == '$': if word[1:] in pregs: word = str(pregs.index(word[1:])) # for now every immediate is 64-bit if is_number(word): # +8 for immediate size += 8 if not fellthrough: instr_name += "_i" fts += "imm64" instr_args += "%%imm64 " instr_args += word fellthrough = False continue # register if word in pregs: size += 2 if not fellthrough: instr_name += "_r" fts += "reg" else: fts += "rr" instr_args += word fellthrough = False continue # it's a label (a 64-bit immediate) # +8 for immediate size += 8 if not fellthrough: instr_name += "_i" fts += "imm64" instr_args += "%%imm64 " if word[0] == '.': instr_args += plastlabel instr_args += word fellthrough = False # # Compute FT1 and FT2 # if ' ' in fts: assert(len(fts.split()) == 2) ft1, ft2 = fts.split() w2 |= get_fts_mask(ft1, line) << 5 w2 |= get_fts_mask(ft2, line) else: assert(len(fts) > 0) w2 |= get_fts_mask(fts, line) << 5 instrs.write("{} %%imm16 {}{}".format(instr_name, w2, instr_args)) return size #------------------------------------------------------------------------------- special_syms = { "%%imm16", "%%imm32", "%%imm64", "%%signed" } def gentext(): text_start = start_addr data_start = text_start + ptext data_start += (8 - data_start % 8) instrs.seek(0) for _, line in enumerate(instrs): tok = line.strip().split() if WANT_DISASM: print(tok) tok = tok[1:] for word in tok: if len(word) == 0: continue if word in pregs: idx = pregs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if ':' in word: if len(word.split(':')) < 2: print("Stray ':' in line: {}".format(line)) leave() sys.exit(1) reg2, reg1 = word.split(':', 1) idx1 = pregs.index(reg1) idx2 = pregs.index(reg2) b_text.write(((idx1 << 8) | idx2).to_bytes(2, byteorder='little', signed=False)) continue if word in pinstrs: idx = pinstrs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if word in plabels_text: addr = text_start + plabels_text[word] b_text.write(addr.to_bytes(8, byteorder='little', signed=False)) continue if word in plabels_data: addr = data_start + plabels_data[word] b_text.write(addr.to_bytes(8, byteorder='little', signed=False)) continue if word in special_syms: if word == "%%imm16": lastimm = 2 elif word == "%%imm32": lastimm = 4 elif word == "%%imm64": lastimm = 8 elif word == "%%signed": lastimm = 2 isSigned = True else: isSigned = False continue if is_number(word): if word[0] == '-': isSigned = True else: isSigned = False b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=isSigned)) continue print("Assembly error, unknown token '{}' in line: {}".format(word, line)) leave() sys.exit(1) #------------------------------------------------------------------------------- def sort_by_list(dict_, list_): for key in list_: dict_.move_to_end(key) def gensym(): text_start = start_addr data_start = text_start + ptext data_start += (8 - data_start % 8) for label in plabels_text: plabels_text[label] += text_start for label in plabels_data: plabels_data[label] += data_start plabels_all = OrderedDict(list(plabels_text.items()) + list(plabels_data.items())) for key, value in sorted(plabels_all.items(), key=lambda item: item[1]): b_sym.write("{} {}\n".format(key, value)) #------------------------------------------------------------------------------- def genout(): b_text.seek(0) b_data.seek(0) b_out.write(b_text.read()) data_align = (8 - ptext % 8) for i in range(data_align): b_out.write(int(0).to_bytes(1, byteorder='little', signed=False)) b_out.write(b_data.read()) #------------------------------------------------------------------------------- parse_lst_instrs() parse_lst_regs() do_includes(main_src) parse() gentext() genout() gensym() leave() sys.exit(0)