#!/usr/bin/python3 # The OS/K Team licenses this file to you under the MIT license. # See the LICENSE file in the project root for more information. import os import sys import subprocess from array import array from tempfile import TemporaryFile if len(sys.argv) != 4: print("Usage: {} (output file) (memory entry point) (source file)" .format(sys.argv[0])) sys.exit(1) source = TemporaryFile(mode="w+") instrs = TemporaryFile(mode="w+") b_data = TemporaryFile(mode="w+b") b_text = TemporaryFile(mode="w+b") lst_regs = open(os.path.join(sys.path[0], "regs.lst")) lst_instrs = open(os.path.join(sys.path[0], "instrs.lst")) main_src = open(sys.argv[1]) b_out = open(sys.argv[3], "wb") start_addr = int(sys.argv[2], base=0) def leave(): source.close() instrs.close() b_out.close() b_data.close() b_text.close() main_src.close() lst_regs.close() lst_instrs.close() #------------------------------------------------------------------------------- # Defines pdefs = dict() # registers pregs = list() # instructions pinstrs = list() # labels plabels_text = dict() plabels_data = dict() # size of .data section pdata = 0 # size of .text section ptext = 0 # for local labels plastlabel = '' # after parse() is done, pdata and ptext are never modified # padding bytes between .text and .data pdata_pad = 0 #------------------------------------------------------------------------------- def name_valid(name): for c in name.lower(): if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$._+]=,'): return False return True def is_number(s): try: int(s, base=0) except ValueError: return False return True #------------------------------------------------------------------------------- def parse_lst_regs(): global pregs for _, line in enumerate(lst_regs): pregs.append(line.strip()) def parse_lst_instrs(): global pinstrs for _, line in enumerate(lst_instrs): pinstrs.append(line.strip()) #------------------------------------------------------------------------------- inc_depth = 0 inc_depth_max = 16 # Quickly goes through source file and resolves "include" directives ONLY def do_includes(fi): global inc_depth for _, line in enumerate(fi): line = line.rstrip() tok = line.split(' ', 1) if len(tok) == 0: continue if tok[0] == "include": if len(tok) == 1: print("Missing parameter for include directive") leave() sys.exit(1) if tok[1][0] not in "'\"" or tok[1][-1] != tok[1][0]: print("Invalid format for include directive: {}".format(line)) leave() sys.exit(1) inc = tok[1][1:-1] try: new_fi = open(inc, "r") except: print("Couldn't open file: {}".format(line)) leave() sys.exit(1) inc_depth += 1 if inc_depth >= inc_depth_max: print("Maximal include depth reached: {}".format(line)) leave() sys.exit(1) do_includes(new_fi) else: source.write("{}\n".format(line)) #------------------------------------------------------------------------------- def parse(): global ptext global plastlabel source.seek(0) for count, line in enumerate(source): line = line.rstrip() if len(line) == 0: continue quote = False for i in range(len(line)): if line[i] in "'\"": quote = not quote if line[i] in '#;@!/' and not quote: line = line[:i].rstrip() break if quote: print("Unterminated string in line: {}".format(line)) leave() sys.exit(1) if len(line) == 0: continue if line[0] == ' ' or line[0] == '\t': line = line.lstrip() ptext += parse_instr(line) instrs.write("\n") continue # Preprocessor or label? if line[-1] == ':': if name_valid(line[:-1]): label = line[:-1] if label[0] == '.': label = plastlabel + label else: plastlabel = label plabels_text[label] = ptext else: print("Bad label name: {}".format(line[:-1])) leave() sys.exit(1) continue # Preprocessor, .data, or invalid parse_preproc(line) #------------------------------------------------------------------------------- def parse_preproc(line): global pdata tok = line.split(' ', 2) # preprocessor if len(tok) > 1 and tok[1] == ':=': if len(tok) < 3: print("Invalid format: {}".format(line)) leave() sys.exit(1) pdefs[tok[0]] = tok[2] return # .data if len(tok) > 1 and tok[1] == '=': if len(tok) < 3: print("Invalid format: {}".format(line)) leave() sys.exit(1) label = tok[0] if label[0] == '.': label = plastlabel + label plabels_data[label] = pdata # number data if is_number(tok[2]): written = b_data.write(int(tok[2], base=0).to_bytes(8, byteorder='little', signed=False)) assert(written == 8) pdata += written # buffer / bss elif tok[2][0] == '[': assert(tok[2][-1] == ']') s = tok[2][1:-1].strip() if not is_number(s): print("Invalid bss format: {}".format(line)) leave() sys.exit(1) i = int(s, base=0) i = i + (8 - i % 8) written = b_data.write(bytearray(i)) assert(written == i) pdefs[label + "_len"] = s pdata += written # string data elif tok[2][0] in "'\"": s = tok[2].strip() assert(s[-1] == tok[2][0]) s = s[1:-1] real_len = 0 escaping = False for c in s: # escape sequences if c == '\\': escaping = True continue if escaping: escaping = False if c == 'n': c = '\n' elif c == 't': c = '\t' else: print("Unrecognized escape sequence: {}".format(line)) leave() sys.exit(1) written = b_data.write(ord(c).to_bytes(1, byteorder='little', signed=False)) assert(written == 1) real_len += 1 pdata += 1 # align for i in range(8 - len(s) % 8): written = b_data.write(int(0).to_bytes(1, byteorder='little', signed=False)) assert(written == 1) pdata += 1 pdefs[label + "_len"] = str(real_len) else: print("Invalid format: {}".format(line)) leave() sys.exit(1) return print("Unrecognized directive: {}".format(line)) #------------------------------------------------------------------------------- pconds = { 'c': 0b00001, 'o': 0b00010, 'z': 0b00011, 'e': 0b00011, 's': 0b00100, 'p': 0b00101, 'a': 0b00110, 'ae': 0b00111, 'b': 0b01000, 'be': 0b01001, 'g': 0b01010, 'ge': 0b01011, 'l': 0b01100, 'le': 0b01101, 'cxz': 0b01110, 'cxnz': 0b11110, } def get_cond_mask(cond, line): mask = 0 if cond[0] == 'n': cond = cond[1:] mask = 0b10000 if cond not in pconds: print("Invalid condition suffix: {}".format(line)) leave() sys.exit(1) return (mask | pconds[cond]) << 10 #------------------------------------------------------------------------------- pfts = { "reg": 0b00001, "imm64": 0b00010, "bimm64": 0b00100, "brr": 0b00101, "brri": 0b00110, "brrii": 0b00111, "wimm64": 0b01000, "wrr": 0b01001, "wrri": 0b01010, "wrrii": 0b01011, "limm64": 0b01100, "lrr": 0b01101, "lrri": 0b01110, "lrrii": 0b01111, "qimm64": 0b10000, "qrr": 0b10001, "qrri": 0b10010, "qrrii": 0b10011, } def get_fts_mask(ft, line): if ft not in pfts: print("Invalid operand format ({}): {}".format(ft, line)) else: return pfts[ft] #------------------------------------------------------------------------------- def parse_instr(line): if line == None or len(line) == 0: return 0 tok = line.split(' ', 1) instr = tok[0].strip() if len(tok) > 1: params = tok[1].strip() else: params = None fellthrough = False size = 4 # Word 2 (rep|cond|ft1|ft2) w2 = 0 if '.' in instr: w2 |= get_cond_mask(instr.split('.', 1)[1], line) instr = instr.split('.', 1)[0] else: instr = instr if instr == "rep": if params == None: print("Missing instruction after rep prefix: {}".format(line)) leave() sys.exit(1) w2 |= 0x8000 # 16th bit if len(params.split(' ', 1)) == 2: instr, params = params.split(' ', 1) else: instr = params.split(' ', 1)[0] params = None instr_name = instr instr_args = '' if params == None or len(params) == 0: instrs.write("{} ".format(instr_name)) instrs.write("%%imm16 {}".format(w2)) return size tok = params.split(',') # FT1 and FT2 fts = '' # # Parse operands, generating fts along the way # for word in tok: word = word.strip() instr_args += ' ' gotPref = False if len(fts) != 0: fts += ' ' # memory length prefixes if len(word) > 2 and '[' in word: if word[0] in 'bwlq': fts += word[0] gotPref = True else: print("Bad memory length prefix: {}".format(line)) leave() sys.exit(1) word = word[1:].strip() assert(word[0] == '[') # # Determine memory format and save it into fts # if word[0] == '[': assert(word[-1] == ']') word = word[1:-1] # # Make sure we got an access length prefix # if not gotPref: print("Missing access length modifier: {}".format(line)) leave() sys.exit(1) instr_name += "_m" # # Offsets # if '+' in word: reg1 = "inv" reg2 = "inv" imm1 = '1' imm2 = '0' wtok = word.split('+') # # [reg] and [reg*imm16] # if len(wtok) == 1: if '*' in wtok[0]: assert(len(wtok[0].split('*')) == 2) reg2, imm1 = wtok[0].split('*', 1) else: reg1 = wtok[0] # # [reg+reg], [reg+imm16], [reg*imm16+imm16], [reg+reg*imm16] # elif len(wtok) == 2: # Must be [reg*imm16+imm16] if '*' in wtok[0]: assert(len(wtok[0].split('*')) == 2) assert(is_number(wtok[1].strip())) print(wtok) reg2, imm1 = wtok[0].split('*', 1) imm2 = wtok[1] # Must be [reg+reg*imm16] elif '*' in wtok[1]: assert(len(wtok[1].split('*')) == 2) reg1 = wtok[0] reg2, imm1 = wtok[1].split('*', 1) elif is_number(wtok[1].strip()): reg1 = wtok[0] imm2 = wtok[1] # Must be [reg+reg] else: reg1 = wtok[0] reg2 = wtok[1] # # [reg+reg+imm16], [reg+reg*imm16+imm16] # else: assert(len(wtok) == 3) reg1 = wtok[0] imm2 = wtok[2] if '*' in wtok[1]: assert(len(wtok[1].split('*')) == 2) reg2, imm1 = wtok[1].split('*', 1) else: reg2 = wtok[1] # # Update fts and instr_args # instr_args += "{}:{} ".format(reg2.strip(), reg1.strip()) size += 2 if imm1 == '1': if imm2 == '0': fts += 'rr' else: fts += 'rri' size += 2 instr_args += "%%imm16 {}".format(imm2) else: size += 4 fts += 'rrii' instr_args += "%%imm16 {} %%imm16 {}".format(imm1, imm2) continue """ # +2 for A_OFF, +2 for offset, +2 for regoff, +2 for register size += 2 + 2 + 2 + 2 instr_args += "off " assert(len(word) > 3) regoff = "inv" # [reg+off] or [reg+regoff] if len(word.split('+')) == 2: reg, off = word.split('+', 1) if not is_number(off): regoff = off off = '0' # [reg+regoff+off] else: assert(len(word.split('+')) == 3) reg, regoff, off = word.split('+', 2) off = off.strip() reg = reg.strip() regoff = regoff.strip() instr_args += "{} {} {}".format(off, regoff, reg) continue """ # # [imm64] or [reg] # else: fellthrough = True # FALLTHROUGH # preprocessor if word in pdefs: word = pdefs[word] # Fall through # for now every immediate is 64-bit if is_number(word): # +8 for immediate size += 8 if not fellthrough: instr_name += "_i" fts += "imm64" instr_args += "%%imm64 " instr_args += word fellthrough = False continue # register if word in pregs: size += 2 if not fellthrough: instr_name += "_r" fts += "reg" else: fts += "rr" instr_args += word fellthrough = False continue # it's a label (a 64-bit immediate) # +8 for immediate size += 8 if not fellthrough: instr_name += "_i" fts += "imm64" instr_args += "%%imm64 " if word[0] == '.': instr_args += plastlabel instr_args += word fellthrough = False # # Compute FT1 and FT2 # if ' ' in fts: assert(len(fts.split(' ')) == 2) ft1, ft2 = fts.split(' ') w2 |= get_fts_mask(ft1, line) << 5 w2 |= get_fts_mask(ft2, line) else: assert(len(fts) > 0) w2 |= get_fts_mask(fts, line) << 5 instrs.write("{} %%imm16 {}{}".format(instr_name, w2, instr_args)) return size #------------------------------------------------------------------------------- special_syms = { "%%imm16", "%%imm32", "%%imm64", "%%signed" } def gentext(): instrs.seek(0) # print(instrs.read()) # instrs.seek(0) text_start = 0x100000 data_start = text_start + ptext data_start += (8 - data_start % 8) for _, line in enumerate(instrs): tok = line.strip().split(' ') for word in tok: if len(word) == 0: continue if word in pregs: idx = pregs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if ':' in word: reg2, reg1 = word.split(':', 1) idx1 = pregs.index(reg1) idx2 = pregs.index(reg2) b_text.write(((idx1 << 8) | idx2).to_bytes(2, byteorder='little', signed=False)) continue if word in pinstrs: idx = pinstrs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if word in plabels_text: addr = text_start + plabels_text[word] b_text.write(addr.to_bytes(8, byteorder='little', signed=False)) continue if word in plabels_data: addr = data_start + plabels_data[word] + pdata_pad b_text.write(addr.to_bytes(8, byteorder='little', signed=False)) continue if word in special_syms: if word == "%%imm16": lastimm = 2 elif word == "%%imm32": lastimm = 4 elif word == "%%imm64": lastimm = 8 elif word == "%%signed": lastimm = 2 isSigned = True else: isSigned = False continue if is_number(word): if word[0] == '-': isSigned = True else: isSigned = False b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=isSigned)) continue print("Assembly error, unknown token '{}' in line: {}".format(word, line)) leave() sys.exit(1) #------------------------------------------------------------------------------- def genout(): b_text.seek(0) b_data.seek(0) b_out.write(b_text.read()) data_align = (8 - ptext % 8) for i in range(data_align): b_out.write(int(0).to_bytes(1, byteorder='little', signed=False)) b_out.write(b_data.read()) #------------------------------------------------------------------------------- parse_lst_instrs() parse_lst_regs() do_includes(main_src) parse() gentext() genout() leave() sys.exit(0)