#!/usr/bin/python3

# The OS/K Team licenses this file to you under the MIT license.
# See the LICENSE file in the project root for more information.

import sys
import subprocess
from array import array
from tempfile import TemporaryFile

if len(sys.argv) != 4:
    print("Usage: {} (output file) (memory entry point) (source file)"
                    .format(sys.argv[0]))
    sys.exit(1)

# TemporaryFile(mode='w+')
instrs = open("testfile.out.1", "w+")

b_data = open("testfile.out.2", "w+b")
b_text = open("testfile.out.3", "w+b")

lst_regs = open("regs.lst")
lst_instrs = open("instrs.lst")

fi = open(sys.argv[1])
b_out = open(sys.argv[3], "wb")

start_addr = int(sys.argv[2], base=0)

def leave():
    fi.close()
    instrs.close()
    b_out.close()
    b_data.close()
    b_text.close()
    lst_regs.close()
    lst_instrs.close()

#-------------------------------------------------------------------------------

# Defines
pdefs = {
    "tst1": 3,
    "tst2": "5"
}

# registers
pregs = list()

# instructions
pinstrs = list()

# labels
plabels_text = dict()
plabels_data = dict()

# size of .data section
pdata = 0

# size of .text section
ptext = 0

# after parse() is done, pdata and ptext are never modified

# padding bytes between .text and .data
pdata_pad = 0

#-------------------------------------------------------------------------------

def name_valid(name):
    for c in name.lower():
        if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$%._+]='):
            print("BAD '{}'".format(c))
            return False
    return True
    
#-------------------------------------------------------------------------------

def parse_lst_regs():
    global pregs
    for _, line in enumerate(lst_regs):
        pregs.append(line.strip())

def parse_lst_instrs():
    global pinstrs
    for _, line in enumerate(lst_instrs):
        pinstrs.append(line.strip())

#-------------------------------------------------------------------------------

def parse_preproc(line):
    global pdata

    tok = line.split(' ', 2)

    for word in tok:
        if not name_valid(word):
            print("Invalid token in line: {}".format(line))
            return

    if tok[0] == 'def':
        if len(tok) < 3:
            print("Invalid format: {}".format(line))
        else:
            pdefs[tok[1]] = tok[2]
        return
    
    # .data
    if len(tok) > 1 and tok[1] == '=':
        if len(tok) < 3:
            print("Invalid format: {}".format(line))
        else:
            plabels_data[tok[0]] = pdata
            written = b_data.write(int(tok[2]).to_bytes(8, byteorder='little', signed=False))
            assert(written == 8)
            pdata += written
        return
    
    print("Unrecognized directive: {}".format(line))

#-------------------------------------------------------------------------------

def apply_pdefs(line):
    tok = line.split(' ')
    new = '' + tok[0]
    
    if len(tok) > 3:
        print("Too many tokens in line: {}".format(line))
        return False, None
        
    # 'mov rax, rax' => 'mov rax rax'
    if len(tok) == 3 and tok[1][-1] == ',':
        tok[1] = tok[1][:-1]

    for word in tok[1:]:
        new += ' '
        if word[0] == '[':
            assert(word[-1] == ']')
            new += '['
            word = word[1:-1]
            need_bracket = True
        else:
            need_bracket = False

        if word in pdefs:
            word = pdefs[word]

        if not name_valid(word):
            print("Invalid token in line: {}".format(line))
            return False, None

        new += word

        if need_bracket:
            new += ']'

    return True, new

def parse():
    global ptext
    for count, line in enumerate(fi):
        line = line.rstrip()
        
        if len(line) == 0:
            continue
        
        if line[0] == '#' or line[0] == ';':
            continue
        
        if line[0] == ' ' or line[0] == '\t':
            line = line.lstrip()

            if line[0] == '#' or line[0] == ';':
                continue

            ok, line = apply_pdefs(line)
            if ok:
                # instrs.write("{} ".format(ptext))
                ptext += parse_instr(line)
                instrs.write("\n")
            continue
        
        # Preprocessor or label?
        if line[-1] == ':':
            if name_valid(line[:-1]):
                plabels_text[line[:-1]] = ptext
            else:
                print("Bad label name: {}".format(line[:-1]))
                leave()
                sys.exit(1)
            continue
        
        # Preprocessor, .data, or invalid
        parse_preproc(line)

#-------------------------------------------------------------------------------

def is_number(s):
    try:
        int(s, base=0)

    except ValueError:
        return False

    return True
   
def parse_instr(line):
    tok = line.split(' ')
    fellthrough = False
    instr_name = tok[0]
    instr_args = ''

    if len(tok) == 1:
        instrs.write("{}".format(instr_name))
        return 2 # instruction

    size = 2
    for word in line.split(' ')[1:]:
        instr_args += ' '

        pref = None

        # memory length prefixes
        if len(word) > 1 and word[1] == '[':
            if word[0] == 'b':
                pref = "%b"
            elif word[0] == 'w':
                pref = "%w"
            elif word[0] == 'l':
                pref = "%l"
            elif word[0] == 'q':
                pref = "%q"
            else:
                print("Bad memory length prefix")
                leave()
                sys.exit(1)

            word = word[1:]

        # [reg+off] or [imm64]
        if word[0] == '[':
            assert(word[-1] == ']')
            word = word[1:-1]

            if pref == None:
                pref = "%q"

            instr_name += "_m"
            instr_args += "{}".format(pref)

            if '+' in word:
                # +2 for A_OFF, +2 for offset, +2 for register
                size += 2 + 2 + 2
                instr_args += "off "
                
                assert(len(word) > 3)
                assert(word[2] == '+' or word[3] == '+')

                if word[2] == '+':
                    # r8, k0, etc
                    instr_args += "{} {}".format(word[3:], word[:2])
                else:
                    # rax, cr0, etc
                    instr_args += "{} {}".format(word[4:], word[:3])

                continue
            
            else:
                # +2 for A_MEM
                size += 2
                instr_args += "mem "

                fellthrough = True
                # FALLTHROUGH

        # for now every immediate is 64-bit
        if is_number(word):
            # +2 for A_IMM64, +8 for immediate
            size += 2 + 8
            if not fellthrough:
                instr_name += "_i"
            instr_args += "%imm64 "
            instr_args += word
            fellthrough = False
            continue
        
        if word in pregs:
            size += 2
            if not fellthrough:
                instr_name += "_r"
            instr_args += word
            fellthrough = False
            continue
        
        # it's a label (a 64-bit immediate)
        # +2 for A_IMM16, +2 for immediate
        size += 2 + 8
        if not fellthrough:
            instr_name += "_i"
        instr_args += "%imm64 "
        instr_args += word
        fellthrough = False

    instrs.write("{}{}".format(instr_name, instr_args))
    return size

#-------------------------------------------------------------------------------

special_syms = {
    "%bmem": 0x7001,
    "%wmem": 0x7002,
    "%lmem": 0x7004,
    "%qmem": 0x7008,
    
    "%boff": 0x7701,
    "%woff": 0x7702,
    "%loff": 0x7704,
    "%qoff": 0x7708,
    
    "%imm16": 0x7772,
    "%imm32": 0x7774,
    "%imm64": 0x7778,
}

def gentext():
    instrs.seek(0)
    text_start = 0x100000

    for _, line in enumerate(instrs):
        tok = line.strip().split(' ')
        
        for word in tok:
            if word in pregs:
                idx = pregs.index(word)
                b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
                continue

            if word in pinstrs:
                idx = pinstrs.index(word)
                b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
                continue

            if word in plabels_text:
                addr = text_start + plabels_text[word]
                print(addr.to_bytes(8, byteorder='big', signed=False))
                b_text.write(addr.to_bytes(8, byteorder='big', signed=False))
                continue

            if word in plabels_data:
                addr = text_start + plabels_data[word] + ptext + pdata_pad
                b_text.write(addr.to_bytes(8, byteorder='little', signed=False))
                continue

            if word in special_syms:
                if word == "%imm16":
                    lastimm = 2
                elif word == "%imm32":
                    lastimm = 4
                elif word == "%imm64":
                    lastimm = 8
                b_text.write(special_syms[word].to_bytes(2, byteorder='little', signed=False))
                continue
            
            if is_number(word):
                b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=False))
                continue
            
            print("Assembly error, unknown token '{}' in line: {}".format(word, line))
            leave()
            sys.exit(1)

#-------------------------------------------------------------------------------

def genout():
    b_text.seek(0)
    b_data.seek(0)
    b_out.write(b_text.read())
    b_out.write(b_data.read())

#-------------------------------------------------------------------------------

parse_lst_instrs()
parse_lst_regs()
parse()
gentext()
genout()
leave()