#!/usr/bin/python3 # The OS/K Team licenses this file to you under the MIT license. # See the LICENSE file in the project root for more information. import os import sys import subprocess from array import array from tempfile import TemporaryFile if len(sys.argv) != 4: print("Usage: {} (output file) (memory entry point) (source file)" .format(sys.argv[0])) sys.exit(1) source = TemporaryFile(mode="w+") instrs = TemporaryFile(mode="w+") b_data = TemporaryFile(mode="w+b") b_text = TemporaryFile(mode="w+b") lst_regs = open(os.path.join(sys.path[0], "regs.lst")) lst_instrs = open(os.path.join(sys.path[0], "instrs.lst")) main_src = open(sys.argv[1]) b_out = open(sys.argv[3], "wb") start_addr = int(sys.argv[2], base=0) def leave(): source.close() instrs.close() b_out.close() b_data.close() b_text.close() main_src.close() lst_regs.close() lst_instrs.close() #------------------------------------------------------------------------------- # Defines pdefs = dict() # registers pregs = list() # instructions pinstrs = list() # labels plabels_text = dict() plabels_data = dict() # size of .data section pdata = 0 # size of .text section ptext = 0 # for local labels plastlabel = '' # after parse() is done, pdata and ptext are never modified # padding bytes between .text and .data pdata_pad = 0 #------------------------------------------------------------------------------- def name_valid(name): for c in name.lower(): if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$._+]=,'): print("BAD '{}'".format(c)) return False return True def is_number(s): try: int(s, base=0) except ValueError: return False return True #------------------------------------------------------------------------------- def parse_lst_regs(): global pregs for _, line in enumerate(lst_regs): pregs.append(line.strip()) def parse_lst_instrs(): global pinstrs for _, line in enumerate(lst_instrs): pinstrs.append(line.strip()) #------------------------------------------------------------------------------- inc_depth = 0 inc_depth_max = 16 # Quickly goes through source file and resolves "include" directives ONLY def do_includes(fi): global inc_depth for _, line in enumerate(fi): line = line.rstrip() tok = line.split(' ', 1) if len(tok) == 0: continue if tok[0] == "include": if len(tok) == 1: print("Missing parameter for include directive") leave() sys.exit(1) if tok[1][0] not in "'\"" or tok[1][-1] != tok[1][0]: print("Invalid format for include directive: {}".format(line)) leave() sys.exit(1) inc = tok[1][1:-1] try: new_fi = open(inc, "r") except: print("Couldn't open file: {}".format(line)) leave() sys.exit(1) inc_depth += 1 if inc_depth >= inc_depth_max: print("Maximal include depth reached: {}".format(line)) leave() sys.exit(1) do_includes(new_fi) else: source.write("{}\n".format(line)) #------------------------------------------------------------------------------- def parse(): global ptext global plastlabel source.seek(0) for count, line in enumerate(source): print(line) line = line.rstrip() if len(line) == 0: continue for i in range(len(line)): if line[i] in '#;@!/': line = line[:i].rstrip() break if len(line) == 0: continue if line[0] == ' ' or line[0] == '\t': line = line.lstrip() ptext += parse_instr(line) instrs.write("\n") continue # Preprocessor or label? if line[-1] == ':': if name_valid(line[:-1]): label = line[:-1] if label[0] == '.': label = plastlabel + label else: plastlabel = label plabels_text[label] = ptext else: print("Bad label name: {}".format(line[:-1])) leave() sys.exit(1) continue # Preprocessor, .data, or invalid parse_preproc(line) #------------------------------------------------------------------------------- def parse_preproc(line): global pdata tok = line.split(' ', 2) #for word in tok: # if not name_valid(word): # print("Invalid token in line: {}".format(line)) # return # preprocessor if len(tok) > 1 and tok[1] == ':=': if len(tok) < 3: print("Invalid format: {}".format(line)) leave() sys.exit(1) pdefs[tok[0]] = tok[2] return # .data if len(tok) > 1 and tok[1] == '=': if len(tok) < 3: print("Invalid format: {}".format(line)) leave() sys.exit(1) if is_number(tok[2]): plabels_data[tok[0]] = pdata written = b_data.write(int(tok[2], base=0).to_bytes(8, byteorder='little', signed=False)) assert(written == 8) pdata += written elif tok[2][0] in "'\"": s = tok[2].strip() assert(s[-1] == tok[2][0]) s = s[1:-1] plabels_data[tok[0]] = pdata real_len = 0 escaping = False for c in s: # escape sequences if c == '\\': escaping = True continue if escaping: escaping = False if c == 'n': c = '\n' elif c == 't': c = '\t' else: print("Unrecognized escape sequence: {}".format(line)) leave() sys.exit(1) written = b_data.write(ord(c).to_bytes(1, byteorder='little', signed=False)) assert(written == 1) real_len += 1 pdata += 1 # align for i in range(8 - len(s) % 8): written = b_data.write(int(0).to_bytes(1, byteorder='little', signed=False)) assert(written == 1) pdata += 1 pdefs[tok[0] + "_len"] = str(real_len) else: print("Invalid format: {}".format(line)) leave() sys.exit(1) return print("Unrecognized directive: {}".format(line)) #------------------------------------------------------------------------------- def parse_instr(line): if line == None or len(line) == 0: return 0 tok = line.split(' ', 1) instr = tok[0].strip() if len(tok) > 1: params = tok[1].strip() else: params = None fellthrough = False size = 2 if instr == "rep": if params == None: print("Missing instruction after rep prefix: {}".format(line)) leave() sys.exit(1) instrs.write("%rep ") instr, params = params.split(' ', 1) size += 2 instr_name = instr instr_args = '' if params == None or len(params) == 0: instrs.write("{}".format(instr_name)) return 2 # instruction tok = params.split(',') for word in tok: word = word.strip() instr_args += ' ' pref = None # memory length prefixes #print(word) if len(word) > 2 and '[' in word: if word[0] == 'b': pref = "%b" elif word[0] == 'w': pref = "%w" elif word[0] == 'l' or word[0] == 'd': pref = "%l" elif word[0] == 'q': pref = "%q" else: print("Bad memory length prefix") leave() sys.exit(1) word = word[1:].strip() assert(word[0] == '[') # [reg+off] or [imm64] if word[0] == '[': assert(word[-1] == ']') word = word[1:-1] if pref == None: print("Missing access length modifier: {}".format(line)) leave() sys.exit(1) instr_name += "_m" instr_args += "{}".format(pref) if '+' in word: # +2 for A_OFF, +2 for offset, +2 for register size += 2 + 2 + 2 instr_args += "off " assert(len(word) > 3) reg, off = word.split('+', 1) reg = reg.strip() off = off.strip() instr_args += "{} {}".format(off, reg) continue else: # +2 for A_MEM size += 2 instr_args += "mem " fellthrough = True # FALLTHROUGH # for now every immediate is 64-bit if is_number(word): # +2 for A_IMM64, +8 for immediate size += 2 + 8 if not fellthrough: instr_name += "_i" instr_args += "%imm64 " instr_args += word fellthrough = False continue # preprocessor if word in pdefs: word = pdefs[word] # Fall through # register if word in pregs: size += 2 if not fellthrough: instr_name += "_r" instr_args += word fellthrough = False continue # it's a label (a 64-bit immediate) # +2 for A_IMM64, +8 for immediate size += 2 + 8 if not fellthrough: instr_name += "_i" instr_args += "%imm64 " if word[0] == '.': instr_args += plastlabel instr_args += word fellthrough = False instrs.write("{}{}".format(instr_name, instr_args)) return size #------------------------------------------------------------------------------- special_syms = { "%bmem": 0x7001, "%wmem": 0x7002, "%lmem": 0x7004, "%qmem": 0x7008, "%boff": 0x7701, "%woff": 0x7702, "%loff": 0x7704, "%qoff": 0x7708, "%imm16": 0x7772, "%imm32": 0x7774, "%imm64": 0x7778, "%rep": 0x8000 } def gentext(): instrs.seek(0) text_start = 0x100000 data_start = text_start + ptext data_start += (8 - data_start % 8) for _, line in enumerate(instrs): tok = line.strip().split(' ') for word in tok: if word in pregs: idx = pregs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if word in pinstrs: idx = pinstrs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if word in plabels_text: addr = text_start + plabels_text[word] b_text.write(addr.to_bytes(8, byteorder='little', signed=False)) continue if word in plabels_data: addr = data_start + plabels_data[word] + pdata_pad b_text.write(addr.to_bytes(8, byteorder='little', signed=False)) continue if word in special_syms: if word == "%imm16": lastimm = 2 elif word == "%imm32": lastimm = 4 elif word == "%imm64": lastimm = 8 if word[2:] == "off" and word[0] == '%': lastimm = 2 isSigned = True else: isSigned = False b_text.write(special_syms[word].to_bytes(2, byteorder='little', signed=isSigned)) continue if is_number(word): if word[0] == '-': isSigned = True else: isSigned = False b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=isSigned)) continue print("Assembly error, unknown token '{}' in line: {}".format(word, line)) leave() sys.exit(1) #------------------------------------------------------------------------------- def genout(): b_text.seek(0) b_data.seek(0) b_out.write(b_text.read()) data_align = (8 - ptext % 8) for i in range(data_align): b_out.write(int(0).to_bytes(1, byteorder='little', signed=False)) b_out.write(b_data.read()) #------------------------------------------------------------------------------- parse_lst_instrs() parse_lst_regs() do_includes(main_src) parse() gentext() genout() leave()