#!/usr/bin/python3

# The OS/K Team licenses this file to you under the MIT license.
# See the LICENSE file in the project root for more information.

import os
import sys
import subprocess
from array import array
from tempfile import TemporaryFile

if len(sys.argv) != 4:
    print("Usage: {} (output file) (memory entry point) (source file)"
                    .format(sys.argv[0]))
    sys.exit(1)

instrs = open(".{}.instr".format(sys.argv[3]), "w+")
b_data = open(".{}.data".format(sys.argv[3]), "w+b")
b_text = open(".{}.text".format(sys.argv[3]), "w+b")

lst_regs = open(os.path.join(sys.path[0], "regs.lst"))
lst_instrs = open(os.path.join(sys.path[0], "instrs.lst"))

fi = open(sys.argv[1])
b_out = open(sys.argv[3], "wb")

start_addr = int(sys.argv[2], base=0)

def leave():
    fi.close()
    instrs.close()
    b_out.close()
    b_data.close()
    b_text.close()
    lst_regs.close()
    lst_instrs.close()

#-------------------------------------------------------------------------------

# Defines
pdefs = dict()

# registers
pregs = list()

# instructions
pinstrs = list()

# labels
plabels_text = dict()
plabels_data = dict()

# size of .data section
pdata = 0

# size of .text section
ptext = 0

# after parse() is done, pdata and ptext are never modified

# padding bytes between .text and .data
pdata_pad = 0

#-------------------------------------------------------------------------------

def name_valid(name):
    for c in name.lower():
        if not(c in 'abcdefghijklmnopqrstuvwxyz0123456789[$._+]='):
            print("BAD '{}'".format(c))
            return False
    return True
    
#-------------------------------------------------------------------------------

def parse_lst_regs():
    global pregs
    for _, line in enumerate(lst_regs):
        pregs.append(line.strip())

def parse_lst_instrs():
    global pinstrs
    for _, line in enumerate(lst_instrs):
        pinstrs.append(line.strip())

#-------------------------------------------------------------------------------

def parse_preproc(line):
    global pdata

    tok = line.split(' ', 2)

    #for word in tok:
    #    if not name_valid(word):
    #        print("Invalid token in line: {}".format(line))
    #        return

    # preprocessor
    if len(tok) > 1 and tok[1] == ':=':
        if len(tok) < 3:
            print("Invalid format: {}".format(line))
            leave()
            sys.exit(1)
        pdefs[tok[0]] = tok[2]
        return
    
    # .data
    if len(tok) > 1 and tok[1] == '=':
        if len(tok) < 3:
            print("Invalid format: {}".format(line))
            leave()
            sys.exit(1)

        if is_number(tok[2]):
            plabels_data[tok[0]] = pdata
            written = b_data.write(int(tok[2], base=0).to_bytes(8, byteorder='little', signed=False))
            assert(written == 8)
            pdata += written

        elif tok[2][0] in "'\"":
            s = tok[2].strip()
            assert(s[-1] == tok[2][0])
            
            s = s[1:-1]

            plabels_data[tok[0]] = pdata
            pdefs[tok[0] + "_len"] = str(len(s))

            for c in s:
                written = b_data.write(ord(c).to_bytes(1, byteorder='little', signed=False))
                assert(written == 1)
                pdata += 1
                
            # align
            for i in range(8 - len(s) % 8):
                written = b_data.write(int(0).to_bytes(1, byteorder='little', signed=False))
                assert(written == 1)
                pdata += 1
        
        else:
            print("Invalid format: {}".format(line))
            leave()
            sys.exit(1)

        return
    
    print("Unrecognized directive: {}".format(line))

#-------------------------------------------------------------------------------

def apply_pdefs(line):
    tok = line.split(' ')
    new = '' + tok[0]
    
    if len(tok) > 3:
        print("Too many tokens in line: {}".format(line))
        return False, None
        
    # 'mov rax, rax' => 'mov rax rax'
    if len(tok) == 3 and tok[1][-1] == ',':
        tok[1] = tok[1][:-1]

    for word in tok[1:]:
        new += ' '
        if word[0] == '[':
            assert(word[-1] == ']')
            new += '['
            word = word[1:-1]
            need_bracket = True
        else:
            need_bracket = False

        if word in pdefs:
            word = pdefs[word]

        if not name_valid(word):
            print("Invalid token in line: {}".format(line))
            return False, None

        new += word

        if need_bracket:
            new += ']'

    return True, new

def parse():
    global ptext
    for count, line in enumerate(fi):
        line = line.rstrip()
        
        if len(line) == 0:
            continue
        
        if line[0] == '#' or line[0] == ';':
            continue
        
        if line[0] == ' ' or line[0] == '\t':
            line = line.lstrip()

            if line[0] in '#;@!/':
                continue

            ok, line = apply_pdefs(line)
            if ok:
                # instrs.write("{} ".format(ptext))
                ptext += parse_instr(line)
                instrs.write("\n")
            continue
        
        # Preprocessor or label?
        if line[-1] == ':':
            if name_valid(line[:-1]):
                plabels_text[line[:-1]] = ptext
            else:
                print("Bad label name: {}".format(line[:-1]))
                leave()
                sys.exit(1)
            continue
        
        # Preprocessor, .data, or invalid
        parse_preproc(line)

#-------------------------------------------------------------------------------

def is_number(s):
    try:
        int(s, base=0)

    except ValueError:
        return False

    return True
   
def parse_instr(line):
    # instrs.write(hex(ptext))
    tok = line.split(' ')
    fellthrough = False

    size = 2

    if tok[0] == "rep":
        instrs.write("%rep ")
        tok = tok[1:]
        size += 2
    
    instr_name = tok[0]
    instr_args = ''

    if len(tok) == 1:
        instrs.write("{}".format(instr_name))
        return 2 # instruction

    tok = tok[1:]

    for word in tok:
        instr_args += ' '

        pref = None

        # memory length prefixes
        if len(word) > 1 and word[1] == '[':
            if word[0] == 'b':
                pref = "%b"
            elif word[0] == 'w':
                pref = "%w"
            elif word[0] == 'l' or word[0] == 'd':
                pref = "%l"
            elif word[0] == 'q':
                pref = "%q"
            else:
                print("Bad memory length prefix")
                leave()
                sys.exit(1)

            word = word[1:]

        # [reg+off] or [imm64]
        if word[0] == '[':
            assert(word[-1] == ']')
            word = word[1:-1]

            if pref == None:
                print("Missing access length modifier: {}".format(line))
                leave()
                sys.exit(1)

            instr_name += "_m"
            instr_args += "{}".format(pref)

            if '+' in word:
                # +2 for A_OFF, +2 for offset, +2 for register
                size += 2 + 2 + 2
                instr_args += "off "
                
                assert(len(word) > 3)
                
                reg, off = word.strip().split('+', 1)
                reg = reg.strip()
                off = off.strip()
                
                print(reg)
                print(off)
                
                instr_args += "{} {}".format(off, reg)

                continue
            
            else:
                # +2 for A_MEM
                size += 2
                instr_args += "mem "

                fellthrough = True
                # FALLTHROUGH

        # for now every immediate is 64-bit
        if is_number(word):
            # +2 for A_IMM64, +8 for immediate
            size += 2 + 8
            if not fellthrough:
                instr_name += "_i"
            instr_args += "%imm64 "
            instr_args += word
            fellthrough = False
            continue
        
        if word in pregs:
            size += 2
            if not fellthrough:
                instr_name += "_r"
            instr_args += word
            fellthrough = False
            continue
        
        # it's a label (a 64-bit immediate)
        # +2 for A_IMM64, +8 for immediate
        size += 2 + 8
        if not fellthrough:
            instr_name += "_i"
        instr_args += "%imm64 "
        instr_args += word
        fellthrough = False

    instrs.write("{}{}".format(instr_name, instr_args))
    return size

#-------------------------------------------------------------------------------

special_syms = {
    "%bmem": 0x7001,
    "%wmem": 0x7002,
    "%lmem": 0x7004,
    "%qmem": 0x7008,
    
    "%boff": 0x7701,
    "%woff": 0x7702,
    "%loff": 0x7704,
    "%qoff": 0x7708,
    
    "%imm16": 0x7772,
    "%imm32": 0x7774,
    "%imm64": 0x7778,
    
    "%rep": 0x8000
}

def gentext():
    instrs.seek(0)
    text_start = 0 # 0x100000
    data_start = text_start + ptext
    data_start += (8 - data_start % 8)

    for _, line in enumerate(instrs):
        tok = line.strip().split(' ')
        
        for word in tok:
            if word in pregs:
                idx = pregs.index(word)
                b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
                continue

            if word in pinstrs:
                idx = pinstrs.index(word)
                b_text.write(idx.to_bytes(2, byteorder='little', signed=False))
                continue

            if word in plabels_text:
                addr = text_start + plabels_text[word]
                b_text.write(addr.to_bytes(8, byteorder='little', signed=False))
                continue

            if word in plabels_data:
                addr = data_start + plabels_data[word] + pdata_pad
                b_text.write(addr.to_bytes(8, byteorder='little', signed=False))
                continue

            if word in special_syms:
                if word == "%imm16":
                    lastimm = 2
                elif word == "%imm32":
                    lastimm = 4
                elif word == "%imm64":
                    lastimm = 8
                elif word[2:] == "off" and word[0] == '%':
                    lastimm = 2
                b_text.write(special_syms[word].to_bytes(2, byteorder='little', signed=False))
                continue
            
            if is_number(word):
                b_text.write(int(word, base=0).to_bytes(lastimm, byteorder='little', signed=False))
                continue
            
            print("Assembly error, unknown token '{}' in line: {}".format(word, line))
            leave()
            sys.exit(1)

#-------------------------------------------------------------------------------

def genout():
    b_text.seek(0)
    b_data.seek(0)
    b_out.write(b_text.read())

    data_align = (8 - ptext % 8)
    
    for i in range(data_align):
        b_out.write(int(0).to_bytes(1, byteorder='little', signed=False))

    b_out.write(b_data.read())

#-------------------------------------------------------------------------------

parse_lst_instrs()
parse_lst_regs()
parse()
gentext()
genout()
leave()