From b5a9286ce0dfcc9cefa217bf4f9f29654a31d9fc Mon Sep 17 00:00:00 2001 From: julianb0 Date: Sun, 19 May 2019 19:54:29 +0200 Subject: [PATCH] assembly --- .gitignore | 2 + LICENSE | 2 +- Makefile | 10 +- as/Makefile | 4 - as/k-as.py | 311 +++++++++++++++++++++++++++++++++++++++++++++++++-- as/regs.lst | 34 ++++++ pc/arch.h | 4 +- pc/dump.c | 5 +- pc/instrs.py | 4 + pc/regs.c | 20 ++-- 10 files changed, 365 insertions(+), 31 deletions(-) delete mode 100644 as/Makefile create mode 100644 as/regs.lst diff --git a/.gitignore b/.gitignore index 9afd788..3d89817 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,6 @@ test* arch_i.h *.dis +instrs.lst +*.out.* diff --git a/LICENSE b/LICENSE index a56966a..a2e12a5 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright © 2019 The OS/K Team +Copyright (C) 2019 The OS/K Team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index 6d5ba5b..2e2e7ca 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,15 @@ # The OS/K Team licenses this file to you under the MIT license. # See the LICENSE file in the project root for more information. -all: kpc +all: kas # kas kpc: @cd pc && make --no-print-directory -kas: - @cd as && make --no-print-directory +kas: kpc as/k-as.py as/regs.lst as/testfile.asm + @cp pc/instrs.lst as -test: kas - as/k-as.py as/testfile.asm as/testout.out +test: + @cd as && ./k-as.py testfile.asm 0x100000 testout.out diff --git a/as/Makefile b/as/Makefile deleted file mode 100644 index 29e151a..0000000 --- a/as/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# The OS/K Team licenses this file to you under the MIT license. -# See the LICENSE file in the project root for more information. - -k-as: k-as.py diff --git a/as/k-as.py b/as/k-as.py index 300939a..3d6ac6e 100755 --- a/as/k-as.py +++ b/as/k-as.py @@ -4,15 +4,312 @@ # See the LICENSE file in the project root for more information. import sys +from array import array +from tempfile import TemporaryFile -if __name__ == '__main__': - if len(sys.argv) != 3: - print("Usage: {} input.asm output.bin".format(sys.argv[0])) - sys.exit(1) +if len(sys.argv) != 4: + print("Usage: {} (output file) (memory entry point) (source file)" + .format(sys.argv[0])) + sys.exit(1) - fi = open(sys.argv[1], "r") - out = open(sys.argv[2], "wb") +# TemporaryFile(mode='w+') +instrs = open("testfile.out.1", "w+") - out.close() +b_data = open("testfile.out.2", "w+b") +b_text = open("testfile.out.3", "w+b") + +lst_regs = open("regs.lst") +lst_instrs = open("instrs.lst") + +fi = open(sys.argv[1]) +out = open(sys.argv[3], "wb") + +start_addr = int(sys.argv[2], base=0) + +def leave(): fi.close() + out.close() + instrs.close() + b_data.close() + b_text.close() + lst_regs.close() + lst_instrs.close() + +#------------------------------------------------------------------------------- + +# Defines +pdefs = { + "tst1": 3, + "tst2": "5" +} + +# registers +pregs = list() + +# instructions +pinstrs = list() + +# labels +plabels_text = dict() +plabels_data = dict() + +# size of .data section +pdata = 0 + +# size of .text section +ptext = 0 + +# after parse() is done, pdata and ptext are never modified + +# padding bytes between .text and .data +pdata_pad = 0 + +#------------------------------------------------------------------------------- + +def name_valid(name): + for c in name.lower(): + if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$%._+]='): + print("BAD '{}'".format(c)) + return False + return True + +#------------------------------------------------------------------------------- + +def parse_lst_regs(): + global pregs + for _, line in enumerate(lst_regs): + pregs.append(line.strip()) + +def parse_lst_instrs(): + global pinstrs + for _, line in enumerate(lst_instrs): + pinstrs.append(line.strip()) + +#------------------------------------------------------------------------------- + +def parse_preproc(line): + global pdata + + tok = line.split(' ', 2) + + for word in tok: + if not name_valid(word): + print("Invalid token in line: {}".format(line)) + return + + if tok[0] == 'def': + if len(tok) < 3: + print("Invalid format: {}".format(line)) + else: + pdefs[tok[1]] = tok[2] + return + + # .data + if len(tok) > 1 and tok[1] == '=': + if len(tok) < 3: + print("Invalid format: {}".format(line)) + else: + plabels_data[tok[0]] = pdata + written = b_data.write(int(tok[2]).to_bytes(8, byteorder='little', signed=False)) + assert(written == 8) + pdata += written + return + + print("Unrecognized directive: {}".format(line)) + +#------------------------------------------------------------------------------- + +def apply_pdefs(line): + tok = line.split(' ') + new = '' + tok[0] + + if len(tok) > 3: + print("Too many tokens in line: {}".format(line)) + return False, None + + # 'mov rax, rax' => 'mov rax rax' + if len(tok) == 3 and tok[1][-1] == ',': + tok[1] = tok[1][:-1] + + for word in tok[1:]: + new += ' ' + if word[0] == '[': + assert(word[-1] == ']') + new += '[' + word = word[1:-1] + need_bracket = True + else: + need_bracket = False + + if word in pdefs: + word = pdefs[word] + + if not name_valid(word): + print("Invalid token in line: {}".format(line)) + return False, None + + new += word + + if need_bracket: + new += ']' + + return True, new + +def parse(): + global ptext + for count, line in enumerate(fi): + line = line.rstrip() + + if len(line) == 0: + continue + + if line[0] == '#' or line[0] == ';': + continue + + if line[0] == ' ' or line[0] == '\t': + line = line.lstrip() + + if line[0] == '#' or line[0] == ';': + continue + + ok, line = apply_pdefs(line) + if ok: + # instrs.write("{} ".format(ptext)) + ptext += parse_instr(line) + instrs.write("\n") + continue + + # Preprocessor or label? + if line[-1] == ':': + if name_valid(line[:-1]): + plabels_text[line[:-1]] = ptext + else: + print("Bad label name: {}".format(line[:-1])) + continue + + # Preprocessor, .data, or invalid + parse_preproc(line) + +#------------------------------------------------------------------------------- + +def is_number(s): + try: + int(s, base=0) + + except ValueError: + return False + + return True + +def parse_instr(line): + tok = line.split(' ') + instrs.write(tok[0]) + + if len(tok) == 1: + return 2 # instruction + + size = 2 + for word in line.split(' ')[1:]: + instrs.write(' ') + + # [reg+off] or [imm64] + if word[0] == '[': + assert(word[-1] == ']') + word = word[1:-1] + + if '+' in word: + # +2 for A_OFF, +2 for offset, +2 for register + size += 2 + 2 + 2 + instrs.write("%off ") + + assert(len(word) > 3) + assert(word[2] == '+' or word[3] == '+') + + if word[2] == '+': + # r8, k0, etc + instrs.write("{} {}".format(word[3:], word[:2])) + else: + # rax, cr0, etc + instrs.write("{} {}".format(word[4:], word[:3])) + + continue + + else: + # +2 for A_MEM + size += 2 + instrs.write("%mem ") + + # FALLTHROUGH + + # for now every immediate is 64-bit + if is_number(word): + # +2 for A_IMM64, +8 for immediate + size += 2 + 8 + instrs.write("%imm64 ") + instrs.write(word) + continue + + if word in pregs: + size += 2 + instrs.write(word) + continue + + # it's a label (so a 16-bit immediate for now) + # +2 for A_IMM16, +2 for immediate + size += 2 + 2 + instrs.write("%imm16 ") + instrs.write(word) + + return size + +#------------------------------------------------------------------------------- + +def gentext(): + # offset in memory + off = start_addr + + instrs.seek(0) + + for _, line in enumerate(instrs): + tok = line.strip().split(' ') + + for word in tok: + if word in pregs: + idx = pregs.index(word) + b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) + continue + + if word in pinstrs: + idx = pinstrs.index(word) + b_text.write(idx.to_bytes(2, byteorder='little', signed=False)) + continue + + if word in plabels_text: + addr = plabels_text[word] + b_text.write(addr.to_bytes(2, byteorder='little', signed=False)) + continue + + if word in plabels_data: + addr = plabels_data[word] + ptext + pdata_pad + b_text.write(addr.to_bytes(2, byteorder='little', signed=False)) + continue + + if 0: + continue + + if is_number(word): + continue + + print("Assembly error, unknown token '{}' in line: {}".format(word, line)) + leave() + sys.exit(1) + +#------------------------------------------------------------------------------- + +parse_lst_instrs() +parse_lst_regs() +parse() +gentext() +#genout() +leave() diff --git a/as/regs.lst b/as/regs.lst new file mode 100644 index 0000000..00b5136 --- /dev/null +++ b/as/regs.lst @@ -0,0 +1,34 @@ +rax +rbx +rcx +rdx +rdi +rsi +rbp +rsp +r8 +r9 +r10 +r11 +r12 +r13 +r14 +r15 +k0 +k1 +k2 +k3 +k4 +k5 +k6 +k7 +cr0 +cr1 +cr2 +cr3 +cr4 +cr5 +cr6 +cr7 +rip +flg diff --git a/pc/arch.h b/pc/arch.h index 9e301d2..d030e16 100644 --- a/pc/arch.h +++ b/pc/arch.h @@ -29,8 +29,8 @@ typedef struct arch_t arch_t; enum { RAX, RBX, RCX, RDX, RDI, RSI, RBP, RSP, - R08, R09, R10, R11, R12, R13, R14, R15, - K00, K01, K02, K03, K04, K05, K06, K07, + R8, R9, R10, R11, R12, R13, R14, R15, + K0, K1, K2, K3, K4, K5, K6, K7, CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, RIP, FLG, diff --git a/pc/dump.c b/pc/dump.c index 7b05043..a4a181f 100644 --- a/pc/dump.c +++ b/pc/dump.c @@ -11,12 +11,13 @@ void dumpregs(ctx_t *ctx) log("\nRegisters:\n"); log("rip=0x%016lX flg=0x%016lX", ctx->r[RIP].val, ctx->r[FLG].val); - for (i = 0; i < K00; i++) { + for (i = 0; i < K0; i++) { if (i % 4 == 0) log("\n"); r = &ctx->r[i]; - log("%s=0x%016lX ", r->name, r->val); + log("%s%s=0x%016lX ", r->name, + (strlen(r->name) == 2 ? "=" : ""), r->val); } log("\n"); } diff --git a/pc/instrs.py b/pc/instrs.py index e070255..f1b4940 100644 --- a/pc/instrs.py +++ b/pc/instrs.py @@ -5,6 +5,7 @@ fi = open("INSTRS") hd = open("arch_i.h", "w") +ls = open("instrs.lst", "w") count = 0 @@ -52,6 +53,8 @@ for _, line in enumerate(fi): p1 = "__ERROR__" p2 = "__ERROR__" + ls.write("{}\n".format(name)); + hd.write("#ifdef _NEED_ARCH_I\n") hd.write('{{ "{}", "{}", {}, {}, i_{} }},\n'\ .format(tok[0], name, p1, p2, tok[0])) @@ -69,6 +72,7 @@ hd.write("#else\n") hd.write("#define NINSTRS {}\n\n".format(count)) hd.write("#endif\n") +ls.close() hd.close() fi.close() diff --git a/pc/regs.c b/pc/regs.c index 5fbe145..f94ccd6 100644 --- a/pc/regs.c +++ b/pc/regs.c @@ -13,22 +13,22 @@ reg_t arch_r[NREGS] = { "rsi", "Accumulator 5", 0, GPR }, { "rbp", "Stack base", 0, GPR }, { "rsp", "Stack pointer", 0, GPR }, - { "r08", "Accumulator 8", 0, GPR }, - { "r09", "Accumulator 9", 0, GPR }, + { "r8", "Accumulator 8", 0, GPR }, + { "r9", "Accumulator 9", 0, GPR }, { "r10", "Accumulator 10", 0, GPR }, { "r11", "Accumulator 11", 0, GPR }, { "r12", "Accumulator 12", 0, GPR }, { "r13", "Accumulator 13", 0, GPR }, { "r14", "Accumulator 14", 0, GPR }, { "r15", "Accumulator 15", 0, GPR }, - { "k00", "Supervisor accumulator 0", 0, SYS }, - { "k01", "Supervisor accumulator 1", 0, SYS }, - { "k02", "Supervisor accumulator 2", 0, SYS }, - { "k03", "Supervisor accumulator 3", 0, SYS }, - { "k04", "Supervisor accumulator 4", 0, SYS }, - { "k05", "Supervisor accumulator 5", 0, SYS }, - { "k06", "Supervisor accumulator 6", 0, SYS }, - { "k07", "Supervisor accumulator 7", 0, SYS }, + { "k0", "Supervisor accumulator 0", 0, SYS }, + { "k1", "Supervisor accumulator 1", 0, SYS }, + { "k2", "Supervisor accumulator 2", 0, SYS }, + { "k3", "Supervisor accumulator 3", 0, SYS }, + { "k4", "Supervisor accumulator 4", 0, SYS }, + { "k5", "Supervisor accumulator 5", 0, SYS }, + { "k6", "Supervisor accumulator 6", 0, SYS }, + { "k7", "Supervisor accumulator 7", 0, SYS }, { "cr0", "Control register 0", 0, CTL }, { "cr1", "Control register 1", 0, CTL }, { "cr2", "Control register 2", 0, CTL },