#!/usr/bin/python3 # The OS/K Team licenses this file to you under the MIT license. # See the LICENSE file in the project root for more information. import sys import subprocess from array import array from tempfile import TemporaryFile if len(sys.argv) != 4: print("Usage: {} (output file) (memory entry point) (source file)" .format(sys.argv[0])) sys.exit(1) # TemporaryFile(mode='w+') instrs = open("testfile.out.1", "w+") b_data = open("testfile.out.2", "w+b") b_text = open("testfile.out.3", "w+b") lst_regs = open("regs.lst") lst_instrs = open("instrs.lst") fi = open(sys.argv[1]) b_out = open(sys.argv[3], "wb") start_addr = int(sys.argv[2], base=0) def leave(): fi.close() instrs.close() b_out.close() b_data.close() b_text.close() lst_regs.close() lst_instrs.close() #------------------------------------------------------------------------------- # Defines pdefs = { "tst1": 3, "tst2": "5" } # registers pregs = list() # instructions pinstrs = list() # labels plabels_text = dict() plabels_data = dict() # size of .data section pdata = 0 # size of .text section ptext = 0 # after parse() is done, pdata and ptext are never modified # padding bytes between .text and .data pdata_pad = 0 #------------------------------------------------------------------------------- def name_valid(name): for c in name.lower(): if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$%._+]='): print("BAD '{}'".format(c)) return False return True #------------------------------------------------------------------------------- def parse_lst_regs(): global pregs for _, line in enumerate(lst_regs): pregs.append(line.strip()) def parse_lst_instrs(): global pinstrs for _, line in enumerate(lst_instrs): pinstrs.append(line.strip()) #------------------------------------------------------------------------------- def parse_preproc(line): global pdata tok = line.split(' ', 2) for word in tok: if not name_valid(word): print("Invalid token in line: {}".format(line)) return if tok[0] == 'def': if len(tok) < 3: print("Invalid format: {}".format(line)) else: pdefs[tok[1]] = tok[2] return # .data if len(tok) > 1 and tok[1] == '=': if len(tok) < 3: print("Invalid format: {}".format(line)) else: plabels_data[tok[0]] = pdata written = b_data.write(int(tok[2]).to_bytes(8, byteorder='little', signed=False)) assert(written == 8) pdata += written return print("Unrecognized directive: {}".format(line)) #------------------------------------------------------------------------------- def apply_pdefs(line): tok = line.split(' ') new = '' + tok[0] if len(tok) > 3: print("Too many tokens in line: {}".format(line)) return False, None # 'mov rax, rax' => 'mov rax rax' if len(tok) == 3 and tok[1][-1] == ',': tok[1] = tok[1][:-1] for word in tok[1:]: new += ' ' if word[0] == '[': assert(word[-1] == ']') new += '[' word = word[1:-1] need_bracket = True else: need_bracket = False if word in pdefs: word = pdefs[word] if not name_valid(word): print("Invalid token in line: {}".format(line)) return False, None new += word if need_bracket: new += ']' return True, new def parse(): global ptext for count, line in enumerate(fi): line = line.rstrip() if len(line) == 0: continue if line[0] == '#' or line[0] == ';': continue if line[0] == ' ' or line[0] == '\t': line = line.lstrip() if line[0] == '#' or line[0] == ';': continue ok, line = apply_pdefs(line) if ok: # instrs.write("{} ".format(ptext)) ptext += parse_instr(line) instrs.write("\n") continue # Preprocessor or label? if line[-1] == ':': if name_valid(line[:-1]): plabels_text[line[:-1]] = ptext else: print("Bad label name: {}".format(line[:-1])) leave() sys.exit(1) continue # Preprocessor, .data, or invalid parse_preproc(line) #------------------------------------------------------------------------------- def is_number(s): try: int(s, base=0) except ValueError: return False return True def parse_instr(line): tok = line.split(' ') fellthrough = False instr_name = tok[0] instr_args = '' if len(tok) == 1: instrs.write("{}".format(instr_name)) return 2 # instruction size = 2 for word in line.split(' ')[1:]: instr_args += ' ' pref = None # memory length prefixes if len(word) > 1 and word[1] == '[': if word[0] == 'b': pref = "%b" elif word[0] == 'w': pref = "%w" elif word[0] == 'l': pref = "%l" elif word[0] == 'q': pref = "%q" else: print("Bad memory length prefix") leave() sys.exit(1) word = word[1:] # [reg+off] or [imm64] if word[0] == '[': assert(word[-1] == ']') word = word[1:-1] if pref == None: pref = "%q" instr_name += "_m" instr_args += "{}".format(pref) if '+' in word: # +2 for A_OFF, +2 for offset, +2 for register size += 2 + 2 + 2 instr_args += "off " assert(len(word) > 3) assert(word[2] == '+' or word[3] == '+') if word[2] == '+': # r8, k0, etc instr_args += "{} {}".format(word[3:], word[:2]) else: # rax, cr0, etc instr_args += "{} {}".format(word[4:], word[:3]) continue else: # +2 for A_MEM size += 2 instr_args += "mem " fellthrough = True # FALLTHROUGH # for now every immediate is 64-bit if is_number(word): # +2 for A_IMM64, +8 for immediate size += 2 + 8 if not fellthrough: instr_name += "_i" instr_args += "%imm64 " instr_args += word fellthrough = False continue if word in pregs: size += 2 if not fellthrough: instr_name += "_r" instr_args += word fellthrough = False continue # it's a label (a 64-bit immediate) # +2 for A_IMM16, +2 for immediate size += 2 + 8 if not fellthrough: instr_name += "_i" instr_args += "%imm64 " instr_args += word fellthrough = False instrs.write("{}{}".format(instr_name, instr_args)) return size #------------------------------------------------------------------------------- special_syms = { "%bmem": 0x7001, "%wmem": 0x7002, "%lmem": 0x7004, "%qmem": 0x7008, "%boff": 0x7701, "%woff": 0x7702, "%loff": 0x7704, "%qoff": 0x7708, "%imm16": 0x7772, "%imm32": 0x7774, "%imm64": 0x7778, } def gentext(): instrs.seek(0) text_start = 0x100000 for _, line in enumerate(instrs): tok = line.strip().split(' ') for word in tok: if word in pregs: idx = pregs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if word in pinstrs: idx = pinstrs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if word in plabels_text: addr = text_start + plabels_text[word] print(addr.to_bytes(8, byteorder='big', signed=False)) b_text.write(addr.to_bytes(8, byteorder='big', signed=False)) continue if word in plabels_data: addr = text_start + plabels_data[word] + ptext + pdata_pad b_text.write(addr.to_bytes(8, byteorder='little', signed=False)) continue if word in special_syms: if word == "%imm16": lastimm = 2 elif word == "%imm32": lastimm = 4 elif word == "%imm64": lastimm = 8 b_text.write(special_syms[word].to_bytes(2, byteorder='little', signed=False)) continue if is_number(word): b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=False)) continue print("Assembly error, unknown token '{}' in line: {}".format(word, line)) leave() sys.exit(1) #------------------------------------------------------------------------------- def genout(): b_text.seek(0) b_data.seek(0) b_out.write(b_text.read()) b_out.write(b_data.read()) #------------------------------------------------------------------------------- parse_lst_instrs() parse_lst_regs() parse() gentext() genout() leave()