kvisc/as/k-as.py

#!/usr/bin/python3

# The OS/K Team licenses this file to you under the MIT license.
# See the LICENSE file in the project root for more information.

import sys
from array import array
from tempfile import TemporaryFile

if len(sys.argv) != 4:
    print("Usage: {} (output file) (memory entry point) (source file)"
                    .format(sys.argv[0]))
    sys.exit(1)

# TemporaryFile(mode='w+')
instrs = open("testfile.out.1", "w+")

b_data = open("testfile.out.2", "w+b")
b_text = open("testfile.out.3", "w+b")

lst_regs = open("regs.lst")
lst_instrs = open("instrs.lst")

fi = open(sys.argv[1])
out = open(sys.argv[3], "wb")

start_addr = int(sys.argv[2], base=0)

def leave():
    fi.close()
    out.close()
    instrs.close()
    b_data.close()
    b_text.close()
    lst_regs.close()
    lst_instrs.close()

#-------------------------------------------------------------------------------

# Defines
pdefs = {
    "tst1": 3,
    "tst2": "5"
}

# registers
pregs = list()

# instructions
pinstrs = list()

# labels
plabels_text = dict()
plabels_data = dict()

# size of .data section
pdata = 0

# size of .text section
ptext = 0

# after parse() is done, pdata and ptext are never modified

# padding bytes between .text and .data
pdata_pad = 0

#-------------------------------------------------------------------------------

def name_valid(name):
    for c in name.lower():
        if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$%._+]='):
            print("BAD '{}'".format(c))
            return False
    return True

#-------------------------------------------------------------------------------

def parse_lst_regs():
    global pregs
    for _, line in enumerate(lst_regs):
        pregs.append(line.strip())

def parse_lst_instrs():
    global pinstrs
    for _, line in enumerate(lst_instrs):
        pinstrs.append(line.strip())

#-------------------------------------------------------------------------------

def parse_preproc(line):
    global pdata

    tok = line.split(' ', 2)

    for word in tok:
        if not name_valid(word):
            print("Invalid token in line: {}".format(line))
            return

    if tok[0] == 'def':
        if len(tok) < 3:
            print("Invalid format: {}".format(line))
        else:
            pdefs[tok[1]] = tok[2]
        return

    # .data
    if len(tok) > 1 and tok[1] == '=':
        if len(tok) < 3:
            print("Invalid format: {}".format(line))
        else:
            plabels_data[tok[0]] = pdata
            written = b_data.write(int(tok[2]).to_bytes(8, byteorder='little', signed=False))
            assert(written == 8)
            pdata += written
        return

    print("Unrecognized directive: {}".format(line))

#-------------------------------------------------------------------------------

def apply_pdefs(line):
    tok = line.split(' ')
    new = '' + tok[0]

    if len(tok) > 3:
        print("Too many tokens in line: {}".format(line))
        return False, None

    # 'mov rax, rax' => 'mov rax rax'
    if len(tok) == 3 and tok[1][-1] == ',':
        tok[1] = tok[1][:-1]

    for word in tok[1:]:
        new += ' '
        if word[0] == '[':
            assert(word[-1] == ']')
            new += '['
            word = word[1:-1]
            need_bracket = True
        else:
            need_bracket = False

        if word in pdefs:
            word = pdefs[word]

        if not name_valid(word):
            print("Invalid token in line: {}".format(line))
            return False, None

        new += word

        if need_bracket:
            new += ']'

    return True, new

def parse():
    global ptext
    for count, line in enumerate(fi):
        line = line.rstrip()

        if len(line) == 0:
            continue

        if line[0] == '#' or line[0] == ';':
            continue

        if line[0] == ' ' or line[0] == '\t':
            line = line.lstrip()

            if line[0] == '#' or line[0] == ';':
                continue

            ok, line = apply_pdefs(line)
            if ok:
                # instrs.write("{} ".format(ptext))
                ptext += parse_instr(line)
                instrs.write("\n")
            continue

        # Preprocessor or label?
        if line[-1] == ':':
            if name_valid(line[:-1]):
                plabels_text[line[:-1]] = ptext
            else:
                print("Bad label name: {}".format(line[:-1]))
            continue

        # Preprocessor, .data, or invalid
        parse_preproc(line)

#-------------------------------------------------------------------------------

def is_number(s):
    try:
        int(s, base=0)

    except ValueError:
        return False

    return True

def parse_instr(line):
    tok = line.split(' ')
    instrs.write(tok[0])

    if len(tok) == 1:
        return 2 # instruction

    size = 2
    for word in line.split(' ')[1:]:
        instrs.write(' ')

        # [reg+off] or [imm64]
        if word[0] == '[':
            assert(word[-1] == ']')
            word = word[1:-1]

            if '+' in word:
                # +2 for A_OFF, +2 for offset, +2 for register
                size += 2 + 2 + 2
                instrs.write("%off ")

                assert(len(word) > 3)
                assert(word[2] == '+' or word[3] == '+')

                if word[2] == '+':
                    # r8, k0, etc
                    instrs.write("{} {}".format(word[3:], word[:2]))
                else:
                    # rax, cr0, etc
                    instrs.write("{} {}".format(word[4:], word[:3]))

                continue

            else:
                # +2 for A_MEM
                size += 2
                instrs.write("%mem ")

                # FALLTHROUGH

        # for now every immediate is 64-bit
        if is_number(word):
            # +2 for A_IMM64, +8 for immediate
            size += 2 + 8
            instrs.write("%imm64 ")
            instrs.write(word)
            continue

        if word in pregs:
            size += 2
            instrs.write(word)
            continue

        # it's a label (so a 16-bit immediate for now)
        # +2 for A_IMM16, +2 for immediate
        size += 2 + 2
        instrs.write("%imm16 ")
        instrs.write(word)

    return size

#-------------------------------------------------------------------------------

def gentext():
    # offset in memory
    off = start_addr

    instrs.seek(0)

    for _, line in enumerate(instrs):
        tok = line.strip().split(' ')

        for word in tok:
            if word in pregs:
                idx = pregs.index(word)
                b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
                continue

            if word in pinstrs:
                idx = pinstrs.index(word)
                b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
                continue

            if word in plabels_text:
                addr = plabels_text[word]
                b_text.write(addr.to_bytes(2, byteorder='little', signed=False))
                continue

            if word in plabels_data:
                addr = plabels_data[word] + ptext + pdata_pad
                b_text.write(addr.to_bytes(2, byteorder='little', signed=False))
                continue

            if 0:
                continue

            if is_number(word):
                continue

            print("Assembly error, unknown token '{}' in line: {}".format(word, line))
            leave()
            sys.exit(1)

#-------------------------------------------------------------------------------

parse_lst_instrs()
parse_lst_regs()
parse()
gentext()
#genout()
leave()