#!/usr/bin/python3 # The OS/K Team licenses this file to you under the MIT license. # See the LICENSE file in the project root for more information. import os import sys import subprocess from array import array from tempfile import TemporaryFile if len(sys.argv) != 4: print("Usage: {} (output file) (memory entry point) (source file)" .format(sys.argv[0])) sys.exit(1) instrs = open(".{}.instr".format(sys.argv[3]), "w+") b_data = open(".{}.data".format(sys.argv[3]), "w+b") b_text = open(".{}.text".format(sys.argv[3]), "w+b") lst_regs = open(os.path.join(sys.path[0], "regs.lst")) lst_instrs = open(os.path.join(sys.path[0], "instrs.lst")) fi = open(sys.argv[1]) b_out = open(sys.argv[3], "wb") start_addr = int(sys.argv[2], base=0) def leave(): fi.close() instrs.close() b_out.close() b_data.close() b_text.close() lst_regs.close() lst_instrs.close() #------------------------------------------------------------------------------- # Defines pdefs = dict() # registers pregs = list() # instructions pinstrs = list() # labels plabels_text = dict() plabels_data = dict() # size of .data section pdata = 0 # size of .text section ptext = 0 # after parse() is done, pdata and ptext are never modified # padding bytes between .text and .data pdata_pad = 0 #------------------------------------------------------------------------------- def name_valid(name): for c in name.lower(): if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$._+]='): print("BAD '{}'".format(c)) return False return True #------------------------------------------------------------------------------- def parse_lst_regs(): global pregs for _, line in enumerate(lst_regs): pregs.append(line.strip()) def parse_lst_instrs(): global pinstrs for _, line in enumerate(lst_instrs): pinstrs.append(line.strip()) #------------------------------------------------------------------------------- def parse_preproc(line): global pdata tok = line.split(' ', 2) #for word in tok: # if not name_valid(word): # print("Invalid token in line: {}".format(line)) # return # preprocessor if len(tok) > 1 and tok[1] == ':=': if len(tok) < 3: print("Invalid format: {}".format(line)) leave() sys.exit(1) pdefs[tok[0]] = tok[2] return # .data if len(tok) > 1 and tok[1] == '=': if len(tok) < 3: print("Invalid format: {}".format(line)) leave() sys.exit(1) if is_number(tok[2]): plabels_data[tok[0]] = pdata written = b_data.write(int(tok[2], base=0).to_bytes(8, byteorder='little', signed=False)) assert(written == 8) pdata += written elif tok[2][0] in "'\"": s = tok[2].strip() assert(s[-1] == tok[2][0]) s = s[1:-1] plabels_data[tok[0]] = pdata pdefs[tok[0] + "_len"] = str(len(s)) for c in s: written = b_data.write(ord(c).to_bytes(1, byteorder='little', signed=False)) assert(written == 1) pdata += 1 # align for i in range(8 - len(s) % 8): written = b_data.write(int(0).to_bytes(1, byteorder='little', signed=False)) assert(written == 1) pdata += 1 else: print("Invalid format: {}".format(line)) leave() sys.exit(1) return print("Unrecognized directive: {}".format(line)) #------------------------------------------------------------------------------- def apply_pdefs(line): tok = line.split(' ') new = '' + tok[0] if len(tok) > 3: print("Too many tokens in line: {}".format(line)) return False, None # 'mov rax, rax' => 'mov rax rax' if len(tok) == 3 and tok[1][-1] == ',': tok[1] = tok[1][:-1] for word in tok[1:]: new += ' ' if word[0] == '[': assert(word[-1] == ']') new += '[' word = word[1:-1] need_bracket = True else: need_bracket = False if word in pdefs: word = pdefs[word] if not name_valid(word): print("Invalid token in line: {}".format(line)) return False, None new += word if need_bracket: new += ']' return True, new def parse(): global ptext for count, line in enumerate(fi): line = line.rstrip() if len(line) == 0: continue if line[0] == '#' or line[0] == ';': continue if line[0] == ' ' or line[0] == '\t': line = line.lstrip() if line[0] in '#;@!/': continue ok, line = apply_pdefs(line) if ok: # instrs.write("{} ".format(ptext)) ptext += parse_instr(line) instrs.write("\n") continue # Preprocessor or label? if line[-1] == ':': if name_valid(line[:-1]): plabels_text[line[:-1]] = ptext else: print("Bad label name: {}".format(line[:-1])) leave() sys.exit(1) continue # Preprocessor, .data, or invalid parse_preproc(line) #------------------------------------------------------------------------------- def is_number(s): try: int(s, base=0) except ValueError: return False return True def parse_instr(line): # instrs.write(hex(ptext)) tok = line.split(' ') fellthrough = False size = 2 if tok[0] == "rep": instrs.write("%rep ") tok = tok[1:] size += 2 instr_name = tok[0] instr_args = '' if len(tok) == 1: instrs.write("{}".format(instr_name)) return 2 # instruction tok = tok[1:] for word in tok: instr_args += ' ' pref = None # memory length prefixes if len(word) > 1 and word[1] == '[': if word[0] == 'b': pref = "%b" elif word[0] == 'w': pref = "%w" elif word[0] == 'l' or word[0] == 'd': pref = "%l" elif word[0] == 'q': pref = "%q" else: print("Bad memory length prefix") leave() sys.exit(1) word = word[1:] # [reg+off] or [imm64] if word[0] == '[': assert(word[-1] == ']') word = word[1:-1] if pref == None: print("Missing access length modifier: {}".format(line)) leave() sys.exit(1) instr_name += "_m" instr_args += "{}".format(pref) if '+' in word: # +2 for A_OFF, +2 for offset, +2 for register size += 2 + 2 + 2 instr_args += "off " assert(len(word) > 3) reg, off = word.strip().split('+', 1) reg = reg.strip() off = off.strip() print(reg) print(off) instr_args += "{} {}".format(off, reg) continue else: # +2 for A_MEM size += 2 instr_args += "mem " fellthrough = True # FALLTHROUGH # for now every immediate is 64-bit if is_number(word): # +2 for A_IMM64, +8 for immediate size += 2 + 8 if not fellthrough: instr_name += "_i" instr_args += "%imm64 " instr_args += word fellthrough = False continue if word in pregs: size += 2 if not fellthrough: instr_name += "_r" instr_args += word fellthrough = False continue # it's a label (a 64-bit immediate) # +2 for A_IMM64, +8 for immediate size += 2 + 8 if not fellthrough: instr_name += "_i" instr_args += "%imm64 " instr_args += word fellthrough = False instrs.write("{}{}".format(instr_name, instr_args)) return size #------------------------------------------------------------------------------- special_syms = { "%bmem": 0x7001, "%wmem": 0x7002, "%lmem": 0x7004, "%qmem": 0x7008, "%boff": 0x7701, "%woff": 0x7702, "%loff": 0x7704, "%qoff": 0x7708, "%imm16": 0x7772, "%imm32": 0x7774, "%imm64": 0x7778, "%rep": 0x8000 } def gentext(): instrs.seek(0) text_start = 0 # 0x100000 data_start = text_start + ptext data_start += (8 - data_start % 8) for _, line in enumerate(instrs): tok = line.strip().split(' ') for word in tok: if word in pregs: idx = pregs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if word in pinstrs: idx = pinstrs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if word in plabels_text: addr = text_start + plabels_text[word] b_text.write(addr.to_bytes(8, byteorder='little', signed=False)) continue if word in plabels_data: addr = data_start + plabels_data[word] + pdata_pad b_text.write(addr.to_bytes(8, byteorder='little', signed=False)) continue if word in special_syms: if word == "%imm16": lastimm = 2 elif word == "%imm32": lastimm = 4 elif word == "%imm64": lastimm = 8 elif word[2:] == "off" and word[0] == '%': lastimm = 2 b_text.write(special_syms[word].to_bytes(2, byteorder='little', signed=False)) continue if is_number(word): b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=False)) continue print("Assembly error, unknown token '{}' in line: {}".format(word, line)) leave() sys.exit(1) #------------------------------------------------------------------------------- def genout(): b_text.seek(0) b_data.seek(0) b_out.write(b_text.read()) data_align = (8 - ptext % 8) for i in range(data_align): b_out.write(int(0).to_bytes(1, byteorder='little', signed=False)) b_out.write(b_data.read()) #------------------------------------------------------------------------------- parse_lst_instrs() parse_lst_regs() parse() gentext() genout() leave()