From b5a9286ce0dfcc9cefa217bf4f9f29654a31d9fc Mon Sep 17 00:00:00 2001
From: julianb0 <spectral@os-k.eu>
Date: Sun, 19 May 2019 19:54:29 +0200
Subject: [PATCH] assembly

---
 .gitignore   |   2 +
 LICENSE      |   2 +-
 Makefile     |  10 +-
 as/Makefile  |   4 -
 as/k-as.py   | 311 +++++++++++++++++++++++++++++++++++++++++++++++++--
 as/regs.lst  |  34 ++++++
 pc/arch.h    |   4 +-
 pc/dump.c    |   5 +-
 pc/instrs.py |   4 +
 pc/regs.c    |  20 ++--
 10 files changed, 365 insertions(+), 31 deletions(-)
 delete mode 100644 as/Makefile
 create mode 100644 as/regs.lst

diff --git a/.gitignore b/.gitignore
index 9afd788..3d89817 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@
 test*
 arch_i.h
 *.dis
+instrs.lst
+*.out.*
 
diff --git a/LICENSE b/LICENSE
index a56966a..a2e12a5 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright © 2019 The OS/K Team
+Copyright (C) 2019 The OS/K Team
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/Makefile b/Makefile
index 6d5ba5b..2e2e7ca 100644
--- a/Makefile
+++ b/Makefile
@@ -1,15 +1,15 @@
 # The OS/K Team licenses this file to you under the MIT license.
 # See the LICENSE file in the project root for more information.
 
-all: kpc
+all: kas
 # kas
 
 kpc:
 	@cd pc && make --no-print-directory
 
-kas:
-	@cd as && make --no-print-directory
+kas: kpc as/k-as.py as/regs.lst as/testfile.asm
+	@cp pc/instrs.lst as
 
-test: kas
-	as/k-as.py as/testfile.asm as/testout.out
+test:
+	@cd as && ./k-as.py testfile.asm 0x100000 testout.out
 
diff --git a/as/Makefile b/as/Makefile
deleted file mode 100644
index 29e151a..0000000
--- a/as/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# The OS/K Team licenses this file to you under the MIT license.
-# See the LICENSE file in the project root for more information.
-
-k-as: k-as.py
diff --git a/as/k-as.py b/as/k-as.py
index 300939a..3d6ac6e 100755
--- a/as/k-as.py
+++ b/as/k-as.py
@@ -4,15 +4,312 @@
 # See the LICENSE file in the project root for more information.
 
 import sys
+from array import array
+from tempfile import TemporaryFile
 
-if __name__ == '__main__':
-    if len(sys.argv) != 3:
-        print("Usage: {} input.asm output.bin".format(sys.argv[0]))
-        sys.exit(1)
+if len(sys.argv) != 4:
+    print("Usage: {} (output file) (memory entry point) (source file)"
+                    .format(sys.argv[0]))
+    sys.exit(1)
 
-    fi = open(sys.argv[1], "r")
-    out = open(sys.argv[2], "wb")
+# TemporaryFile(mode='w+')
+instrs = open("testfile.out.1", "w+")
 
-    out.close()
+b_data = open("testfile.out.2", "w+b")
+b_text = open("testfile.out.3", "w+b")
+
+lst_regs = open("regs.lst")
+lst_instrs = open("instrs.lst")
+
+fi = open(sys.argv[1])
+out = open(sys.argv[3], "wb")
+
+start_addr = int(sys.argv[2], base=0)
+
+def leave():
     fi.close()
+    out.close()
+    instrs.close()
+    b_data.close()
+    b_text.close()
+    lst_regs.close()
+    lst_instrs.close()
+
+#-------------------------------------------------------------------------------
+
+# Defines
+pdefs = {
+    "tst1": 3,
+    "tst2": "5"
+}
+
+# registers
+pregs = list()
+
+# instructions
+pinstrs = list()
+
+# labels
+plabels_text = dict()
+plabels_data = dict()
+
+# size of .data section
+pdata = 0
+
+# size of .text section
+ptext = 0
+
+# after parse() is done, pdata and ptext are never modified
+
+# padding bytes between .text and .data
+pdata_pad = 0
+
+#-------------------------------------------------------------------------------
+
+def name_valid(name):
+    for c in name.lower():
+        if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$%._+]='):
+            print("BAD '{}'".format(c))
+            return False
+    return True
+    
+#-------------------------------------------------------------------------------
+
+def parse_lst_regs():
+    global pregs
+    for _, line in enumerate(lst_regs):
+        pregs.append(line.strip())
+
+def parse_lst_instrs():
+    global pinstrs
+    for _, line in enumerate(lst_instrs):
+        pinstrs.append(line.strip())
+
+#-------------------------------------------------------------------------------
+
+def parse_preproc(line):
+    global pdata
+
+    tok = line.split(' ', 2)
+
+    for word in tok:
+        if not name_valid(word):
+            print("Invalid token in line: {}".format(line))
+            return
+
+    if tok[0] == 'def':
+        if len(tok) < 3:
+            print("Invalid format: {}".format(line))
+        else:
+            pdefs[tok[1]] = tok[2]
+        return
+    
+    # .data
+    if len(tok) > 1 and tok[1] == '=':
+        if len(tok) < 3:
+            print("Invalid format: {}".format(line))
+        else:
+            plabels_data[tok[0]] = pdata
+            written = b_data.write(int(tok[2]).to_bytes(8, byteorder='little', signed=False))
+            assert(written == 8)
+            pdata += written
+        return
+    
+    print("Unrecognized directive: {}".format(line))
+
+#-------------------------------------------------------------------------------
+
+def apply_pdefs(line):
+    tok = line.split(' ')
+    new = '' + tok[0]
+    
+    if len(tok) > 3:
+        print("Too many tokens in line: {}".format(line))
+        return False, None
+        
+    # 'mov rax, rax' => 'mov rax rax'
+    if len(tok) == 3 and tok[1][-1] == ',':
+        tok[1] = tok[1][:-1]
+
+    for word in tok[1:]:
+        new += ' '
+        if word[0] == '[':
+            assert(word[-1] == ']')
+            new += '['
+            word = word[1:-1]
+            need_bracket = True
+        else:
+            need_bracket = False
+
+        if word in pdefs:
+            word = pdefs[word]
+
+        if not name_valid(word):
+            print("Invalid token in line: {}".format(line))
+            return False, None
+
+        new += word
+
+        if need_bracket:
+            new += ']'
+
+    return True, new
+
+def parse():
+    global ptext
+    for count, line in enumerate(fi):
+        line = line.rstrip()
+        
+        if len(line) == 0:
+            continue
+        
+        if line[0] == '#' or line[0] == ';':
+            continue
+        
+        if line[0] == ' ' or line[0] == '\t':
+            line = line.lstrip()
+
+            if line[0] == '#' or line[0] == ';':
+                continue
+
+            ok, line = apply_pdefs(line)
+            if ok:
+                # instrs.write("{} ".format(ptext))
+                ptext += parse_instr(line)
+                instrs.write("\n")
+            continue
+        
+        # Preprocessor or label?
+        if line[-1] == ':':
+            if name_valid(line[:-1]):
+                plabels_text[line[:-1]] = ptext
+            else:
+                print("Bad label name: {}".format(line[:-1]))
+            continue
+        
+        # Preprocessor, .data, or invalid
+        parse_preproc(line)
+
+#-------------------------------------------------------------------------------
+
+def is_number(s):
+    try:
+        int(s, base=0)
+
+    except ValueError:
+        return False
+
+    return True
+   
+def parse_instr(line):
+    tok = line.split(' ')
+    instrs.write(tok[0])
+
+    if len(tok) == 1:
+        return 2 # instruction
+
+    size = 2
+    for word in line.split(' ')[1:]:
+        instrs.write(' ')
+
+        # [reg+off] or [imm64]
+        if word[0] == '[':
+            assert(word[-1] == ']')
+            word = word[1:-1]
+
+            if '+' in word:
+                # +2 for A_OFF, +2 for offset, +2 for register
+                size += 2 + 2 + 2
+                instrs.write("%off ")
+                
+                assert(len(word) > 3)
+                assert(word[2] == '+' or word[3] == '+')
+
+                if word[2] == '+':
+                    # r8, k0, etc
+                    instrs.write("{} {}".format(word[3:], word[:2]))
+                else:
+                    # rax, cr0, etc
+                    instrs.write("{} {}".format(word[4:], word[:3]))
+
+                continue
+            
+            else:
+                # +2 for A_MEM
+                size += 2
+                instrs.write("%mem ")
+
+                # FALLTHROUGH
+
+        # for now every immediate is 64-bit
+        if is_number(word):
+            # +2 for A_IMM64, +8 for immediate
+            size += 2 + 8
+            instrs.write("%imm64 ")
+            instrs.write(word)
+            continue
+        
+        if word in pregs:
+            size += 2
+            instrs.write(word)
+            continue
+        
+        # it's a label (so a 16-bit immediate for now)
+        # +2 for A_IMM16, +2 for immediate
+        size += 2 + 2
+        instrs.write("%imm16 ")
+        instrs.write(word)
+
+    return size
+
+#-------------------------------------------------------------------------------
+
+def gentext():
+    # offset in memory
+    off = start_addr
+    
+    instrs.seek(0)
+
+    for _, line in enumerate(instrs):
+        tok = line.strip().split(' ')
+        
+        for word in tok:
+            if word in pregs:
+                idx = pregs.index(word)
+                b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
+                continue
+
+            if word in pinstrs:
+                idx = pinstrs.index(word)
+                b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
+                continue
+
+            if word in plabels_text:
+                addr = plabels_text[word]
+                b_text.write(addr.to_bytes(2, byteorder='little', signed=False))
+                continue
+
+            if word in plabels_data:
+                addr = plabels_data[word] + ptext + pdata_pad
+                b_text.write(addr.to_bytes(2, byteorder='little', signed=False))
+                continue
+            
+            if 0:
+                continue
+            
+            if is_number(word):
+                continue
+            
+            print("Assembly error, unknown token '{}' in line: {}".format(word, line))
+            leave()
+            sys.exit(1)
+
+#-------------------------------------------------------------------------------
+
+parse_lst_instrs()
+parse_lst_regs()
+parse()
+gentext()
+#genout()
+leave()
 
diff --git a/as/regs.lst b/as/regs.lst
new file mode 100644
index 0000000..00b5136
--- /dev/null
+++ b/as/regs.lst
@@ -0,0 +1,34 @@
+rax
+rbx
+rcx
+rdx
+rdi
+rsi
+rbp
+rsp
+r8
+r9
+r10
+r11
+r12
+r13
+r14
+r15
+k0
+k1
+k2
+k3
+k4
+k5
+k6
+k7
+cr0
+cr1
+cr2
+cr3
+cr4
+cr5
+cr6
+cr7
+rip
+flg
diff --git a/pc/arch.h b/pc/arch.h
index 9e301d2..d030e16 100644
--- a/pc/arch.h
+++ b/pc/arch.h
@@ -29,8 +29,8 @@ typedef struct arch_t       arch_t;
 enum
 {
     RAX, RBX, RCX, RDX, RDI, RSI, RBP, RSP,
-    R08, R09, R10, R11, R12, R13, R14, R15,
-    K00, K01, K02, K03, K04, K05, K06, K07,
+    R8,  R9,  R10, R11, R12, R13, R14, R15,
+    K0,  K1,  K2,  K3,  K4,  K5,  K6,  K7,
     CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7,
     RIP, FLG,
 
diff --git a/pc/dump.c b/pc/dump.c
index 7b05043..a4a181f 100644
--- a/pc/dump.c
+++ b/pc/dump.c
@@ -11,12 +11,13 @@ void dumpregs(ctx_t *ctx)
     log("\nRegisters:\n");
     log("rip=0x%016lX flg=0x%016lX", ctx->r[RIP].val, ctx->r[FLG].val);
     
-    for (i = 0; i < K00; i++) {
+    for (i = 0; i < K0; i++) {
         if (i % 4 == 0)
             log("\n");
         
         r = &ctx->r[i];
-        log("%s=0x%016lX ", r->name, r->val);
+        log("%s%s=0x%016lX ", r->name,
+            (strlen(r->name) == 2 ? "=" : ""), r->val);
     }
     log("\n");
 }
diff --git a/pc/instrs.py b/pc/instrs.py
index e070255..f1b4940 100644
--- a/pc/instrs.py
+++ b/pc/instrs.py
@@ -5,6 +5,7 @@
 
 fi = open("INSTRS")
 hd = open("arch_i.h", "w")
+ls = open("instrs.lst", "w")
 
 count = 0
 
@@ -52,6 +53,8 @@ for _, line in enumerate(fi):
         p1 = "__ERROR__"
         p2 = "__ERROR__"
 
+    ls.write("{}\n".format(name));
+
     hd.write("#ifdef _NEED_ARCH_I\n")
     hd.write('{{ "{}", "{}", {}, {}, i_{} }},\n'\
                 .format(tok[0], name, p1, p2, tok[0]))
@@ -69,6 +72,7 @@ hd.write("#else\n")
 hd.write("#define NINSTRS {}\n\n".format(count))
 hd.write("#endif\n")
 
+ls.close()
 hd.close()
 fi.close()
 
diff --git a/pc/regs.c b/pc/regs.c
index 5fbe145..f94ccd6 100644
--- a/pc/regs.c
+++ b/pc/regs.c
@@ -13,22 +13,22 @@ reg_t arch_r[NREGS] =
     { "rsi", "Accumulator 5", 0, GPR },
     { "rbp", "Stack base",    0, GPR },
     { "rsp", "Stack pointer", 0, GPR },
-    { "r08", "Accumulator 8", 0, GPR },
-    { "r09", "Accumulator 9", 0, GPR },
+    { "r8", "Accumulator 8", 0, GPR },
+    { "r9", "Accumulator 9", 0, GPR },
     { "r10", "Accumulator 10", 0, GPR },
     { "r11", "Accumulator 11", 0, GPR },
     { "r12", "Accumulator 12", 0, GPR },
     { "r13", "Accumulator 13", 0, GPR },
     { "r14", "Accumulator 14", 0, GPR },
     { "r15", "Accumulator 15", 0, GPR },
-    { "k00", "Supervisor accumulator 0", 0, SYS },
-    { "k01", "Supervisor accumulator 1", 0, SYS },
-    { "k02", "Supervisor accumulator 2", 0, SYS },
-    { "k03", "Supervisor accumulator 3", 0, SYS },
-    { "k04", "Supervisor accumulator 4", 0, SYS },
-    { "k05", "Supervisor accumulator 5", 0, SYS },
-    { "k06", "Supervisor accumulator 6", 0, SYS },
-    { "k07", "Supervisor accumulator 7", 0, SYS },
+    { "k0", "Supervisor accumulator 0", 0, SYS },
+    { "k1", "Supervisor accumulator 1", 0, SYS },
+    { "k2", "Supervisor accumulator 2", 0, SYS },
+    { "k3", "Supervisor accumulator 3", 0, SYS },
+    { "k4", "Supervisor accumulator 4", 0, SYS },
+    { "k5", "Supervisor accumulator 5", 0, SYS },
+    { "k6", "Supervisor accumulator 6", 0, SYS },
+    { "k7", "Supervisor accumulator 7", 0, SYS },
     { "cr0", "Control register 0", 0, CTL },
     { "cr1", "Control register 1", 0, CTL },
     { "cr2", "Control register 2", 0, CTL },