kvisc/as/k-as.py

#!/usr/bin/python3

# The OS/K Team licenses this file to you under the MIT license.
# See the LICENSE file in the project root for more information.

import os
import sys
import subprocess
from array import array
from tempfile import TemporaryFile

if len(sys.argv) != 4:
    print("Usage: {} (output file) (memory entry point) (source file)"
                    .format(sys.argv[0]))
    sys.exit(1)

source = TemporaryFile(mode="w+")
instrs = TemporaryFile(mode="w+")
b_data = TemporaryFile(mode="w+b")
b_text = TemporaryFile(mode="w+b")

lst_regs = open(os.path.join(sys.path[0], "regs.lst"))
lst_instrs = open(os.path.join(sys.path[0], "instrs.lst"))

main_src = open(sys.argv[1])
b_out = open(sys.argv[3], "wb")

start_addr = int(sys.argv[2], base=0)

def leave():
    source.close()
    instrs.close()
    b_out.close()
    b_data.close()
    b_text.close()
    main_src.close()
    lst_regs.close()
    lst_instrs.close()

#-------------------------------------------------------------------------------

# Defines
pdefs = dict()

# registers
pregs = list()

# instructions
pinstrs = list()

# labels
plabels_text = dict()
plabels_data = dict()

# size of .data section
pdata = 0

# size of .text section
ptext = 0

# for local labels
plastlabel = ''

# after parse() is done, pdata and ptext are never modified

# padding bytes between .text and .data
pdata_pad = 0

#-------------------------------------------------------------------------------

def name_valid(name):
    for c in name.lower():
        if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$._+]=,'):
            return False
    return True

def is_number(s):
    try:
        int(s, base=0)

    except ValueError:
        return False

    return True
#-------------------------------------------------------------------------------

def parse_lst_regs():
    global pregs
    for _, line in enumerate(lst_regs):
        pregs.append(line.strip())

def parse_lst_instrs():
    global pinstrs
    for _, line in enumerate(lst_instrs):
        pinstrs.append(line.strip())

#-------------------------------------------------------------------------------

inc_depth = 0
inc_depth_max = 16

# Quickly goes through source file and resolves "include" directives ONLY
def do_includes(fi):
    global inc_depth
    for _, line in enumerate(fi):
        line = line.rstrip()
        tok = line.split(' ', 1)

        if len(tok) == 0:
            continue

        if tok[0] == "include":
            if len(tok) == 1:
                print("Missing parameter for include directive")
                leave()
                sys.exit(1)

            if tok[1][0] not in "'\"" or tok[1][-1] != tok[1][0]:
                print("Invalid format for include directive: {}".format(line))
                leave()
                sys.exit(1)

            inc = tok[1][1:-1]

            try:
                new_fi = open(inc, "r")

            except:
                print("Couldn't open file: {}".format(line))
                leave()
                sys.exit(1)

            inc_depth += 1
            if inc_depth >= inc_depth_max:
                print("Maximal include depth reached: {}".format(line))
                leave()
                sys.exit(1)

            do_includes(new_fi)

        else:
            source.write("{}\n".format(line))


#-------------------------------------------------------------------------------

def parse():
    global ptext
    global plastlabel

    source.seek(0)

    for count, line in enumerate(source):
        line = line.rstrip()

        if len(line) == 0:
            continue

        quote = False
        for i in range(len(line)):
            if line[i] in "'\"":
                quote = not quote

            if line[i] in '#;@!/' and not quote:
                line = line[:i].rstrip()
                break

        if quote:
            print("Unterminated string in line: {}".format(line))
            leave()
            sys.exit(1)

        if len(line) == 0:
            continue

        if line[0] == ' ' or line[0] == '\t':
            line = line.lstrip()
            ptext += parse_instr(line)
            instrs.write("\n")

            continue

        # Preprocessor or label?
        if line[-1] == ':':
            if name_valid(line[:-1]):
                label = line[:-1]
                if label[0] == '.':
                    label = plastlabel + label
                else:
                    plastlabel = label
                plabels_text[label] = ptext
            else:
                print("Bad label name: {}".format(line[:-1]))
                leave()
                sys.exit(1)
            continue

        # Preprocessor, .data, or invalid
        parse_preproc(line)

#-------------------------------------------------------------------------------

def parse_preproc(line):
    global pdata

    tok = line.split(' ', 2)

    # preprocessor
    if len(tok) > 1 and tok[1] == ':=':
        if len(tok) < 3:
            print("Invalid format: {}".format(line))
            leave()
            sys.exit(1)
        pdefs[tok[0]] = tok[2]
        return

    # .data
    if len(tok) > 1 and tok[1] == '=':
        if len(tok) < 3:
            print("Invalid format: {}".format(line))
            leave()
            sys.exit(1)

        label = tok[0]
        if label[0] == '.':
            label = plastlabel + label

        plabels_data[label] = pdata

        # number data
        if is_number(tok[2]):
            written = b_data.write(int(tok[2], base=0).to_bytes(8, byteorder='little', signed=False))
            assert(written == 8)
            pdata += written

        # string data
        elif tok[2][0] in "'\"":
            s = tok[2].strip()
            assert(s[-1] == tok[2][0])

            s = s[1:-1]

            real_len = 0
            escaping = False

            for c in s:
                # escape sequences
                if c == '\\':
                    escaping = True
                    continue

                if escaping:
                    escaping = False

                    if c == 'n':
                        c = '\n'
                    elif c == 't':
                        c = '\t'
                    else:
                        print("Unrecognized escape sequence: {}".format(line))
                        leave()
                        sys.exit(1)

                written = b_data.write(ord(c).to_bytes(1, byteorder='little', signed=False))
                assert(written == 1)
                real_len += 1
                pdata += 1

            # align
            for i in range(8 - len(s) % 8):
                written = b_data.write(int(0).to_bytes(1, byteorder='little', signed=False))
                assert(written == 1)
                pdata += 1

            pdefs[label + "_len"] = str(real_len)

        else:
            print("Invalid format: {}".format(line))
            leave()
            sys.exit(1)

        return

    print("Unrecognized directive: {}".format(line))

#-------------------------------------------------------------------------------

def parse_instr(line):
    if line == None or len(line) == 0:
        return 0

    tok = line.split(' ', 1)

    instr = tok[0].strip()

    if len(tok) > 1:
        params = tok[1].strip()
    else:
        params = None

    fellthrough = False

    size = 2

    if instr == "rep":
        if params == None:
            print("Missing instruction after rep prefix: {}".format(line))
            leave()
            sys.exit(1)

        instrs.write("%rep ")
        instr, params = params.split(' ', 1)
        size += 2

    instr_name = instr
    instr_args = ''

    if params == None or len(params) == 0:
        instrs.write("{}".format(instr_name))
        return 2 # instruction

    tok = params.split(',')

    for word in tok:
        word = word.strip()

        instr_args += ' '

        pref = None

        # memory length prefixes
        if len(word) > 2 and '[' in word:
            if word[0] == 'b':
                pref = "%b"
            elif word[0] == 'w':
                pref = "%w"
            elif word[0] == 'l' or word[0] == 'd':
                pref = "%l"
            elif word[0] == 'q':
                pref = "%q"
            else:
                print("Bad memory length prefix: {}".format(line))
                leave()
                sys.exit(1)

            word = word[1:].strip()
            assert(word[0] == '[')

        # [reg+off] or [imm64]
        if word[0] == '[':
            assert(word[-1] == ']')
            word = word[1:-1]

            if pref == None:
                print("Missing access length modifier: {}".format(line))
                leave()
                sys.exit(1)

            instr_name += "_m"
            instr_args += "{}".format(pref)

            if '+' in word:
                # +2 for A_OFF, +2 for offset, +2 for register
                size += 2 + 2 + 2
                instr_args += "off "

                assert(len(word) > 3)

                reg, off = word.split('+', 1)
                reg = reg.strip()
                off = off.strip()

                instr_args += "{} {}".format(off, reg)

                continue

            else:
                # +2 for A_MEM
                size += 2
                instr_args += "mem "

                fellthrough = True
                # FALLTHROUGH

        # for now every immediate is 64-bit
        if is_number(word):
            # +2 for A_IMM64, +8 for immediate
            size += 2 + 8
            if not fellthrough:
                instr_name += "_i"
            instr_args += "%imm64 "
            instr_args += word
            fellthrough = False
            continue

        # preprocessor
        if word in pdefs:
            word = pdefs[word]
            # Fall through

        # register
        if word in pregs:
            size += 2
            if not fellthrough:
                instr_name += "_r"
            instr_args += word
            fellthrough = False
            continue

        # it's a label (a 64-bit immediate)
        # +2 for A_IMM64, +8 for immediate
        size += 2 + 8
        if not fellthrough:
            instr_name += "_i"
        instr_args += "%imm64 "

        if word[0] == '.':
            instr_args += plastlabel

        instr_args += word
        fellthrough = False

    instrs.write("{}{}".format(instr_name, instr_args))
    return size

#-------------------------------------------------------------------------------

special_syms = {
    "%bmem": 0x7001,
    "%wmem": 0x7002,
    "%lmem": 0x7004,
    "%qmem": 0x7008,

    "%boff": 0x7701,
    "%woff": 0x7702,
    "%loff": 0x7704,
    "%qoff": 0x7708,

    "%imm16": 0x7772,
    "%imm32": 0x7774,
    "%imm64": 0x7778,

    "%rep": 0x8000
}

def gentext():
    instrs.seek(0)
    text_start = 0x100000
    data_start = text_start + ptext
    data_start += (8 - data_start % 8)

    for _, line in enumerate(instrs):
        tok = line.strip().split(' ')

        for word in tok:
            if word in pregs:
                idx = pregs.index(word)
                b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
                continue

            if word in pinstrs:
                idx = pinstrs.index(word)
                b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
                continue

            if word in plabels_text:
                addr = text_start + plabels_text[word]
                b_text.write(addr.to_bytes(8, byteorder='little', signed=False))
                continue

            if word in plabels_data:
                addr = data_start + plabels_data[word] + pdata_pad
                b_text.write(addr.to_bytes(8, byteorder='little', signed=False))
                continue

            if word in special_syms:
                if word == "%imm16":
                    lastimm = 2
                elif word == "%imm32":
                    lastimm = 4
                elif word == "%imm64":
                    lastimm = 8

                if word[2:] == "off" and word[0] == '%':
                    lastimm = 2
                    isSigned = True
                else:
                    isSigned = False

                b_text.write(special_syms[word].to_bytes(2, byteorder='little', signed=isSigned))
                continue

            if is_number(word):
                if word[0] == '-':
                    isSigned = True
                else:
                    isSigned = False
                b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=isSigned))
                continue

            print("Assembly error, unknown token '{}' in line: {}".format(word, line))
            leave()
            sys.exit(1)

#-------------------------------------------------------------------------------

def genout():
    b_text.seek(0)
    b_data.seek(0)
    b_out.write(b_text.read())

    data_align = (8 - ptext % 8)

    for i in range(data_align):
        b_out.write(int(0).to_bytes(1, byteorder='little', signed=False))

    b_out.write(b_data.read())

#-------------------------------------------------------------------------------

parse_lst_instrs()
parse_lst_regs()
do_includes(main_src)
parse()
gentext()
genout()
leave()
sys.exit(0)