#!/usr/bin/python3 # The OS/K Team licenses this file to you under the MIT license. # See the LICENSE file in the project root for more information. import sys from array import array from tempfile import TemporaryFile if len(sys.argv) != 4: print("Usage: {} (output file) (memory entry point) (source file)" .format(sys.argv[0])) sys.exit(1) # TemporaryFile(mode='w+') instrs = open("testfile.out.1", "w+") b_data = open("testfile.out.2", "w+b") b_text = open("testfile.out.3", "w+b") lst_regs = open("regs.lst") lst_instrs = open("instrs.lst") fi = open(sys.argv[1]) out = open(sys.argv[3], "wb") start_addr = int(sys.argv[2], base=0) def leave(): fi.close() out.close() instrs.close() b_data.close() b_text.close() lst_regs.close() lst_instrs.close() #------------------------------------------------------------------------------- # Defines pdefs = { "tst1": 3, "tst2": "5" } # registers pregs = list() # instructions pinstrs = list() # labels plabels_text = dict() plabels_data = dict() # size of .data section pdata = 0 # size of .text section ptext = 0 # after parse() is done, pdata and ptext are never modified # padding bytes between .text and .data pdata_pad = 0 #------------------------------------------------------------------------------- def name_valid(name): for c in name.lower(): if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$%._+]='): print("BAD '{}'".format(c)) return False return True #------------------------------------------------------------------------------- def parse_lst_regs(): global pregs for _, line in enumerate(lst_regs): pregs.append(line.strip()) def parse_lst_instrs(): global pinstrs for _, line in enumerate(lst_instrs): pinstrs.append(line.strip()) #------------------------------------------------------------------------------- def parse_preproc(line): global pdata tok = line.split(' ', 2) for word in tok: if not name_valid(word): print("Invalid token in line: {}".format(line)) return if tok[0] == 'def': if len(tok) < 3: print("Invalid format: {}".format(line)) else: pdefs[tok[1]] = tok[2] return # .data if len(tok) > 1 and tok[1] == '=': if len(tok) < 3: print("Invalid format: {}".format(line)) else: plabels_data[tok[0]] = pdata written = b_data.write(int(tok[2]).to_bytes(8, byteorder='little', signed=False)) assert(written == 8) pdata += written return print("Unrecognized directive: {}".format(line)) #------------------------------------------------------------------------------- def apply_pdefs(line): tok = line.split(' ') new = '' + tok[0] if len(tok) > 3: print("Too many tokens in line: {}".format(line)) return False, None # 'mov rax, rax' => 'mov rax rax' if len(tok) == 3 and tok[1][-1] == ',': tok[1] = tok[1][:-1] for word in tok[1:]: new += ' ' if word[0] == '[': assert(word[-1] == ']') new += '[' word = word[1:-1] need_bracket = True else: need_bracket = False if word in pdefs: word = pdefs[word] if not name_valid(word): print("Invalid token in line: {}".format(line)) return False, None new += word if need_bracket: new += ']' return True, new def parse(): global ptext for count, line in enumerate(fi): line = line.rstrip() if len(line) == 0: continue if line[0] == '#' or line[0] == ';': continue if line[0] == ' ' or line[0] == '\t': line = line.lstrip() if line[0] == '#' or line[0] == ';': continue ok, line = apply_pdefs(line) if ok: # instrs.write("{} ".format(ptext)) ptext += parse_instr(line) instrs.write("\n") continue # Preprocessor or label? if line[-1] == ':': if name_valid(line[:-1]): plabels_text[line[:-1]] = ptext else: print("Bad label name: {}".format(line[:-1])) continue # Preprocessor, .data, or invalid parse_preproc(line) #------------------------------------------------------------------------------- def is_number(s): try: int(s, base=0) except ValueError: return False return True def parse_instr(line): tok = line.split(' ') instrs.write(tok[0]) if len(tok) == 1: return 2 # instruction size = 2 for word in line.split(' ')[1:]: instrs.write(' ') # [reg+off] or [imm64] if word[0] == '[': assert(word[-1] == ']') word = word[1:-1] if '+' in word: # +2 for A_OFF, +2 for offset, +2 for register size += 2 + 2 + 2 instrs.write("%off ") assert(len(word) > 3) assert(word[2] == '+' or word[3] == '+') if word[2] == '+': # r8, k0, etc instrs.write("{} {}".format(word[3:], word[:2])) else: # rax, cr0, etc instrs.write("{} {}".format(word[4:], word[:3])) continue else: # +2 for A_MEM size += 2 instrs.write("%mem ") # FALLTHROUGH # for now every immediate is 64-bit if is_number(word): # +2 for A_IMM64, +8 for immediate size += 2 + 8 instrs.write("%imm64 ") instrs.write(word) continue if word in pregs: size += 2 instrs.write(word) continue # it's a label (so a 16-bit immediate for now) # +2 for A_IMM16, +2 for immediate size += 2 + 2 instrs.write("%imm16 ") instrs.write(word) return size #------------------------------------------------------------------------------- def gentext(): # offset in memory off = start_addr instrs.seek(0) for _, line in enumerate(instrs): tok = line.strip().split(' ') for word in tok: if word in pregs: idx = pregs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if word in pinstrs: idx = pinstrs.index(word) b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) continue if word in plabels_text: addr = plabels_text[word] b_text.write(addr.to_bytes(2, byteorder='little', signed=False)) continue if word in plabels_data: addr = plabels_data[word] + ptext + pdata_pad b_text.write(addr.to_bytes(2, byteorder='little', signed=False)) continue if 0: continue if is_number(word): continue print("Assembly error, unknown token '{}' in line: {}".format(word, line)) leave() sys.exit(1) #------------------------------------------------------------------------------- parse_lst_instrs() parse_lst_regs() parse() gentext() #genout() leave()