; TODO actually enforce any of these *_SIZE constants :p LOAD_ADDR equ 0x00010000 ; address this program is loaded at TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at TOKEN_TABLE_SIZE equ 0x1000 ; max length of table TOKEN_TABLE_ENTRY_SIZE equ 2 ; size of token table entry; things may break ; if this ever changes OUTPUT_ADDR equ 0x00070000 ; address of outputed binary OUTPUT_SIZE equ 0x1000 ; max length of outputed binary STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP) TEST_LINE_LENGTH equ 80 ; right border of test suite results [bits 64] [org LOAD_ADDR] [default abs] ; TODO see if I actually need to do this ; afaik absolute addressing is not harmful on bare metal ; reasoning: stops annoying warning =D start: mov rsp, STACK_ADDR ; we might need more stack space, let's just be safe mov rsi, msg_welcome call print call run_tests call clear_token_table mov rdi, program ; -> program mov rsi, [program.size] ; = size of program call tokenise ; rax = number of tokens processed mov rdi, rax push rdi call clear_output_arena pop rdi call assemble jmp halt ; ------------------------------------------------------------------------------ ; assembling ; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------ ; assemble ; TODO write tests ; TODO make it work :/ putting the cart before the horse ; ; description: ; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat ; binary located at OUTPUT_ADDR. It's probably desirable to clear the output ; arena before calling this function. ; ; parameters: ; rdi = number of tokens in the token table ; ------------------------------------------------------------------------------ assemble: xor rax, rax ; number of tokens processed .loop: cmp rax, rdi ; check incrementer against the number of tokens in the token jge .break ; table. If overflown, break push rdi xor edi, edi mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte push rax ; di = next tte call get_tte_type ; al = type of token cmp al, 0x01 ; check if next tte's type is an operator je .operator ; if so, handle case of operator jne .continue_operator ; if not, jump past the case .operator: ; if next tte's type is an operator: push rax ; MUST be popped BEFORE returning to .continue_operator; it ; contains the type of token, which still needs to be used. push rdi ; di = tte call get_tte_typed_metadata ; al = tte typed metadata pop rdi and al, 11b ; mask for # operands cmp al, 0 ; check if operator has no operands je .operator_0 ; if so, handle case of no operands jne .operator_with_args ; if not, jump to case of multiple operands .operator_0: push rdi ; di = next tte call get_opcode ; al = opcode call .output_byte pop rdi pop rax ; from start of label .operator jmp .continue_operator .operator_with_args: mov [.pending_operator_num_args], al ; save # args fttb push rdi ; di = next tte call get_opcode ; al = opcode mov [.pending_operator_opcode], al ; save opcode fttb pop rdi pop rax ; from start of label .operator .continue_operator: cmp al, 0x02 ; check if next tte's type is a register je .register ; if so, handle case of register jne .continue_register ; if not, jump past the case .register: ; if next tte's type is a register: call .dec_num_args ; because we've found an argument, we need 1 fewer noch cmp byte [.pending_operator_num_args], 1 ; check if this is 1st of 2 args je .register_one_of_two ; if so, jump to handler cmp byte [.pending_operator_num_args], 0 ; check if this is the last arg je .register_last ; if so, jump to handler ; note: not necessarily the last ; of 2 args, it could also be the ; last of 1 ; otherwise, discard the token, reset things, and keep going :/ push rsi mov rsi, .warn_unexpected_register call print.warn pop rsi call .reset_state jmp .continue_register .register_one_of_two: ; if it's the first of 2 arguments: mov [.first_argument], di ; ax = tte jmp .continue_register .register_last: ; if it's the last argument: ; swap so the first argument sits in .first_argument push rax mov ax, di mov di, [.first_argument] mov [.first_argument], ax pop rax cmp di, UNRECOGNISED_TOKEN_ID ; check if the second argument is defined jne .operator_finalise_2 ; if so, there are 2 arguments ; if not, there is just 1 .operator_finalise_1: mov di, 0x0000 ; id of rax. reg bits 000b .operator_finalise_2: ; TODO avoid swapping earlier and now :/ mov cx, di mov di, [.first_argument] mov si, cx call get_direct_addressing_ModRM ; al = ModR/M byte push rax mov al, [.pending_operator_opcode] call .output_byte ; output operator's opcode pop rax call .output_byte ; output ModR/M byte call .reset_state ; reset all the state parts of this function jmp .continue_register .continue_register: pop rax ; incrementer pop rdi ; total number of tokens inc rax ; move to next token jmp .loop .break: ret ; constants .warn_unexpected_register db "ignoring unexpected register", 0x0A, 0x00 ; procedures ; al = byte to write .output_byte: mov edx, [.next_output_byte] ; get output byte's address mov [edx], al ; write byte to that address inc edx ; increment address mov [.next_output_byte], edx ; put output byte's address ret ; runs dec on .pending_operator_num_args .dec_num_args: push rax mov al, [.pending_operator_num_args] dec al mov [.pending_operator_num_args], al pop rax ret .reset_state: ; I don't actually know if these `word` directives are needed ; TODO check that. I think they are, becasue Nasm doesn't record the size ; of labels? mov word [.pending_operator_opcode], UNRECOGNISED_TOKEN_ID mov [.pending_operator_num_args], 0x00 mov word [.first_argument], UNRECOGNISED_TOKEN_ID ret ; state variables .pending_operator_opcode db 0x00 ; the operator seeking args .pending_operator_num_args db 0x00 ; # of args it takes .first_argument dw UNRECOGNISED_TOKEN_ID ; first argument if there are two .next_output_byte dd OUTPUT_ADDR ; next empty byte in output ; ------------------------------------------------------------------------------ ; get_tte_type ; ; description: ; given a token table entry, returns the declared type in `tokens.by_id`. If ; there is no entry, returns UNRECOGNISED_ID_TYPE ; ; parameters: ; di = token table entry ; ; returned: ; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are ; zeroed; the rest of rax is zeroed. ; ------------------------------------------------------------------------------ get_tte_type: and rdi, 0xFFFF ; mask input so it behaves as expected xor eax, eax .loop: cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range jg .not_found mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id cmp cx, di je .found inc rax jmp .loop .not_found: mov al, UNRECOGNISED_ID_TYPE and ax, 0xF ; mask as expected ret .found: mov al, [2 + tokens.by_id + rax * 4] and ax, 0xF ; mask as expected ret ; ------------------------------------------------------------------------------ ; get_tte_typed_metadata ; ; description: ; given a token table entry, returns the declared typed metadata in ; `tokens.by_id`. If there is no entry, returns UNRECOGNISED_ID_METADATA ; ; parameters: ; di = token table entry ; ; returned: ; al = typed metadata of token, or UNRECOGNISED_ID_METADATA; the rest of rax is ; zeroed. ; ------------------------------------------------------------------------------ get_tte_typed_metadata: and rdi, 0xFFFF ; mask input so it behaves as expected xor eax, eax .loop: cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range jg .not_found mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id cmp cx, di je .found inc rax jmp .loop .not_found: xor eax, eax mov al, UNRECOGNISED_ID_METADATA ret .found: mov al, [3 + tokens.by_id + rax * 4] and rax, 0xFF ret ; ------------------------------------------------------------------------------ ; get_direct_addressing_ModRM ; ; description: ; given 2 register tokens, returns the ModR/M byte in direct addressing ; (mod = 11b) mode ; ; parameters: ; di = token table entry `reg` ; si = token table entry `R/M` ; ; returned: ; al = ModR/M byte; the rest of rax is zeroed ; ------------------------------------------------------------------------------ get_direct_addressing_ModRM: mov dl, 11b call get_ModRM ret ; ------------------------------------------------------------------------------ ; get_ModRM ; ; description: ; given 2 register tokens and the mod bits, returns the ModR/M byte ; ; parameters: ; di = token table entry `reg` ; si = token table entry `R/M` ; dl = lower 2 bits: mod bits. The rest is ignored ; ; returned: ; al = ModR/M byte; the rest of rax is zeroed ; ------------------------------------------------------------------------------ get_ModRM: and dl, 11b ; mask for mod bits shl dl, 6 ; di = tte call get_reg_bits ; al = reg bits mov bl, al shl bl, 3 mov rdi, rsi ; do the other one ; di = tte call get_reg_bits ; al = reg bits mov cl, al xor eax, eax or al, dl ; mod bits or al, bl ; reg bits or al, cl ; R/M bits and rax, 0xFF ; mask for byte ret ; ------------------------------------------------------------------------------ ; get_opcode ; ; description: ; given an operator token, returns its opcode ; ; parameters: ; di = token table entry ; ; returned: ; al = opcode; the rest of rax is zeroed ; ------------------------------------------------------------------------------ get_opcode: and rdi, 0xFFFF xor eax, eax .loop: cmp rax, (opcodes.by_id_end - opcodes.by_id) / 4 ; make sure it's still in range jg .not_found mov cx, [opcodes.by_id + rax * 4] ; next entry in opcodes.by_id cmp cx, di je .found inc rax jmp .loop .not_found: xor eax, eax mov al, UNRECOGNISED_ID_OPCODE ret .found: mov al, [2 + opcodes.by_id + rax * 4] and rax, 0xFF ; mask ret ; ------------------------------------------------------------------------------ ; get_reg_bits ; ; description: ; given a register token, returns its reg bits metadata ; ; parameters: ; di = token table entry ; ; returned: ; al = register token; the rest of rax, including the upper 5 bits of al, are ; zeroed. ; ------------------------------------------------------------------------------ get_reg_bits: ; di = tte call get_tte_typed_metadata ; al = typed metadata shr al, 2 ; discard type data and al, 111b ; mask ret ; ------------------------------------------------------------------------------ ; tokenising ; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------ ; tokenise ; TODO write tests ; ; description: ; represents the program at the given address and puts it in the token table ; it's probably desirable to clear the token table before calling this function. ; ; parameters: ; rdi -> first byte of program ; rsi = size of program in bytes ; ; returned: ; rax = number of tokens processed ; ------------------------------------------------------------------------------ tokenise: add rsi, rdi ; last byte of program xor ecx, ecx ; number of tokens processed .loop: cmp rdi, rsi ; if current byte greater than last byte jg .break ; then break push rdi push rsi push rcx ; rdi -> current byte call identify_next_token ; ax = id of token ; dx = length of token pop rcx pop rsi pop rdi ; deal with terminator character (reported as 0 length token) cmp rdx, 0 je .token_length0 jne .continue0 .token_length0: mov ax, 0xFE00 ; terminator character mov al, [rdi] ; byte of terminator mov edx, 1 ; byte length is 1 .continue0: add rdi, rdx ; current byte + length of token = next unread byte mov [TOKEN_TABLE_ADDR + rcx * TOKEN_TABLE_ENTRY_SIZE], ax ; fill next entry ; in token table ; TODO fix undefined behaviour when open brackets and closed brackets aren't ; correctly paired or have too much distance between them cmp ax, 0x0051 ; check if read token is an open bracket je .open_bracket ; if so, handle it jne .continue_open_bracket ; if not, continue .open_bracket: ; TODO make brackets able to hold more mov [.data_open_bracket], cl ; record which entry the open bracket is at .continue_open_bracket: cmp ax, 0x0052 ; check if read token is a closing bracket je .close_bracket ; if so, handle it jne .continue_close_bracket ; if not, continue .close_bracket: ; rewrite open bracket token entry with a filled out one push rcx mov dl, [.data_open_bracket] sub cl, dl mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10 pop rcx .continue_close_bracket: inc rcx ; +1 token processed jmp .loop .break: mov rax, rcx ret .data_open_bracket db 0x00 ; represents the token # of the latest open bracket ; ------------------------------------------------------------------------------ ; identify_token ; ; description: ; returns the id of a given token. If there are multiple ways to represent a ; given token, like the open-bracket, it returns the one that doesn't require ; information about the surrounding tokens, because it has no such information. ; In other words, if it isn't in the `tokens` data structure, this function ; doesn't see it. If the first byte of the token points to a terminator ; byte, this function returns it as an unrecognised token. ; ; parameters: ; rdi -> first byte of token ; rsi = size of token in bytes ; ; returned: ; ax = id of token; the rest of rax is zeroed ; ------------------------------------------------------------------------------ identify_token: cmp rsi, 1 ; if the token has length 1 je .start_length1 ; then enter the length 1 loop cmp rsi, 2 ; if the token has length 2 je .start_length2 ; then enter the length 2 loop cmp rsi, 3 ; if the token has length 3 je .start_length3 ; then enter the length 3 loop cmp rsi, 4 ; if the token has length 4 je .start_length4 ; then enter the length 4 loop jmp .unrecognised ; else unrecognised ; length1 .start_length1: mov rcx, tokens.by_name_1 ; rcx -> list of known tokens .loop_length1: cmp rcx, tokens.by_name_2 ; check if rcx still in the bounds of length1 tokens jge .unrecognised ; if not, unrecognised mov r10b, [rcx] ; known token mov r11b, [rdi] ; token cmp r10b, r11b ; if known token matches token je .found_length1 ; exit loop add rcx, 3 ; length of token + length of id jmp .loop_length1 .found_length1: xor eax, eax ; make sure rest of rax is zeroed mov ax, [rcx + 1] ; return id of token ret ; length2 .start_length2: mov rcx, tokens.by_name_2 ; rcx -> list of known tokens .loop_length2: cmp rcx, tokens.by_name_3 ; check if rcx still in the bounds of length2 tokens jge .unrecognised ; if not, unrecognised mov r10w, [rcx] ; current entry in known tokens mov r11w, [rdi] ; token cmp r10w, r11w ; if current entry matches token, je .found_length2 ; exit loop add rcx, 4 ; length of token + length of id jmp .loop_length2 .found_length2: xor eax, eax ; make sure rest of rax is zeroed mov ax, [rcx + 2] ; return id of token ret ; length3 .start_length3: mov rcx, tokens.by_name_3 ; rcx -> list of known tokens .loop_length3: cmp rcx, tokens.by_name_4 ; check if rcx still in bounds of length3 tokens jge .unrecognised ; if not, unrecognised ; TODO make this safe (it overreaches 1 byte) mov r10d, [rcx] ; known token + next byte mov r11d, [rdi] ; token + next byte and r10d, 0x00FFFFFF ; mask for just the token and r11d, 0x00FFFFFF cmp r10d, r11d ; if known token matches token, je .found_length3 ; exit loop add rcx, 5 ; length of token + length of id jmp .loop_length3 .found_length3: xor rax, rax ; zero rax mov ax, [rcx + 3] ; return id of token ret ; length4 .start_length4: mov rcx, tokens.by_name_4 ; rcx -> list of known tokens .loop_length4: cmp rcx, tokens.by_name_5 ; check if rcx still in bounds of length3 tokens jge .unrecognised ; if not, unrecognised mov r10d, [rcx] ; known token mov r11d, [rdi] ; token cmp r10d, r11d ; if known token matches token, je .found_length4 ; exit loop add rcx, 6 ; length of token + length of id jmp .loop_length4 .found_length4: xor rax, rax ; zero rax mov ax, [rcx + 4] ; return id of token ret .unrecognised: xor eax, eax mov ax, UNRECOGNISED_TOKEN_ID ret ; ------------------------------------------------------------------------------ ; identify_next_token ; description: ; like identify_token, except it automatically finds the length. If the first ; byte of the token points to a terminator byte, it returns a length of 0. ; ; parameters: ; rdi -> first byte of token ; ; returned: ; ax = id of token; the rest of rax is zeroed ; dx = length of token in bytes; the rest of rdx is zeroed ; ------------------------------------------------------------------------------ identify_next_token: push rdi mov rsi, rdi ; rsi is the current byte xor rdi, rdi ; rdi is the length .loop: xor edx, edx mov dl, [rsi] push rsi push rdi push rdx mov rdi, 8 ; length of terminator list mov rsi, token_terminator_8 ; start of terminator list call elemb pop rdx pop rdi pop rsi cmp rax, 1 ; check if the next character is a token terminator je .break ; if so, break inc rdi ; next character inc rsi ; next byte of token jmp .loop .break: mov rsi, rdi ; length of token pop rdi push rsi call identify_token pop rsi mov rdx, rsi ; length ret ; ------------------------------------------------------------------------------ ; utilities ; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------ ; print ; ; description: ; prints a null-terminated string ; probably doesn't change any registers for ease of debugging ; ; parameters: ; rsi -> start of null-terminated string ; ------------------------------------------------------------------------------ print: push rdx push rax push rsi mov edx, 0x3F8 .loop: mov al, [rsi] test al, al jz .done out dx, al inc rsi jmp .loop .done: pop rsi pop rax pop rdx ret .debug: push rsi mov rsi, .debug_msg call print pop rsi call print ret .error: push rsi mov rsi, .error_msg call print pop rsi call print ret .test: push rsi mov rsi, .test_msg call print pop rsi call print ret .warn: push rsi mov rsi, .warn_msg call print pop rsi call print ret .debug_msg db "[DEBUG]: ", 0x00 .error_msg db "[ERROR]: ", 0x00 .test_msg db "[TEST]: ", 0x00 .warn_msg db "[WARN]: ", 0x00 ; ------------------------------------------------------------------------------ ; halt ; ; description: ; halts the program, silly :) ; ------------------------------------------------------------------------------ halt: mov rsi, msg_halt call print hlt jmp halt ; ------------------------------------------------------------------------------ ; elemb ; ; description: ; checks if given byte is element of the specified list ; ; parameters: ; rdi = size of list ; rsi -> start of list ; dl = given byte ; ; returned: ; rax = 0: is not an element ; 1: is an element ; ------------------------------------------------------------------------------ elemb: .loop: cmp rdi, 0 ; check if remaining length 0 je .not_found ; if so, break; dl not an element of list mov al, [rsi] cmp al, dl ; check if current byte in list is the desired byte je .found ; if so, break; dl an element of list inc rsi ; move to next byte dec rdi ; and reduce remaining length jmp .loop .not_found: xor eax, eax ; return 0; dl not an element of list ret .found: xor eax, eax mov rax, 1 ; return 1; dl an element of list ret .f db "found", 0x0A, 0x00 .nf db "not found", 0x0A, 0x00 ; ------------------------------------------------------------------------------ ; clear_token_table ; ; description: ; clears the token table as specified by TOKEN_TABLE_SIZE and TOKEN_TABLE_ADDR ; ------------------------------------------------------------------------------ clear_token_table: xor eax, eax ; value to write mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words mov rdi, TOKEN_TABLE_ADDR ; address to start rep stosd ret ; ------------------------------------------------------------------------------ ; clear_test_arena ; ; description: ; clears the test arena as specified by TEST_ARENA_SIZE and TEST_ARENA_ADDR ; ------------------------------------------------------------------------------ clear_test_arena: xor eax, eax ; value to write mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words mov rdi, TOKEN_TABLE_ADDR ; address to start rep stosd ret ; ------------------------------------------------------------------------------ ; clear_output_arena ; ; description: ; clears the output arena as specified by OUTPUT_SIZE and OUTPUT_ADDR ; ------------------------------------------------------------------------------ clear_output_arena: xor eax, eax ; value to write mov rcx, OUTPUT_SIZE / 4 ; number of double words mov rdi, OUTPUT_ADDR ; address to start rep stosd ret %include "asm/tests.asm" ; ------------------------------------------------------------------------------ ; data ; ------------------------------------------------------------------------------ tokens: .by_name_1: db "[" dw 0x0051 db "]" dw 0x0052 db "+" dw 0x0062 db "-" dw 0x0063 db "*" dw 0x0064 db "/" dw 0x0065 .by_name_2: db "r8" dw 0x0008 db "r9" dw 0x0009 db "ax" dw 0x0020 db "bx" dw 0x0021 db "cx" dw 0x0022 db "dx" dw 0x0023 db "si" dw 0x0024 db "di" dw 0x0025 db "sp" dw 0x0026 db "bp" dw 0x0027 db "al" dw 0x0030 db "bl" dw 0x0031 db "cl" dw 0x0032 db "dl" dw 0x0033 db "ah" dw 0x0040 db "bh" dw 0x0041 db "ch" dw 0x0042 db "dh" dw 0x0043 db "cs" dw 0x0044 db "ds" dw 0x0045 db "es" dw 0x0046 db "fs" dw 0x0047 db "gs" dw 0x0048 db "ss" dw 0x0049 db "je" dw 0x005C db "jg" dw 0x005F db "jl" dw 0x0061 .by_name_3: db "rax" dw 0x0000 db "rbx" dw 0x0001 db "rcx" dw 0x0002 db "rdx" dw 0x0003 db "rsi" dw 0x0004 db "rdi" dw 0x0005 db "rsp" dw 0x0006 db "rbp" dw 0x0007 db "r10" dw 0x000A db "r11" dw 0x000B db "r12" dw 0x000C db "r13" dw 0x000D db "r14" dw 0x000E db "r15" dw 0x000F db "eax" dw 0x0010 db "ebx" dw 0x0011 db "ecx" dw 0x0012 db "edx" dw 0x0013 db "esi" dw 0x0014 db "edi" dw 0x0015 db "esp" dw 0x0016 db "ebp" dw 0x0017 db "r8d" dw 0x0018 db "r9d" dw 0x0019 db "r8w" dw 0x0028 db "r9w" dw 0x0029 db "sil" dw 0x0034 db "dil" dw 0x0035 db "spl" dw 0x0036 db "bpl" dw 0x0037 db "r8b" dw 0x0038 db "r9b" dw 0x0039 db "cr0" dw 0x004A db "cr2" dw 0x004B db "cr3" dw 0x004C db "cr4" dw 0x004D db "cr8" dw 0x004E db "hlt" dw 0x004F db "xor" dw 0x0053 db "inc" dw 0x0054 db "dec" dw 0x0055 db "mov" dw 0x0056 db "add" dw 0x0057 db "sub" dw 0x0058 db "ret" dw 0x005A db "cmp" dw 0x005B db "jne" dw 0x005D db "jge" dw 0x005E db "jle" dw 0x0060 .by_name_4: db "r10d" dw 0x001A db "r11d" dw 0x001B db "r12d" dw 0x001C db "r13d" dw 0x001D db "r14d" dw 0x001E db "r15d" dw 0x001F db "r10w" dw 0x002A db "r11w" dw 0x002B db "r12w" dw 0x002C db "r13w" dw 0x002D db "r14w" dw 0x002E db "r15w" dw 0x002F db "r10b" dw 0x003A db "r11b" dw 0x003B db "r12b" dw 0x003C db "r13b" dw 0x003D db "r14b" dw 0x003E db "r15b" dw 0x003F db "int3" dw 0x0050 db "call" dw 0x0059 .by_name_5: .by_id: dw 0x0010 ; eax db 0x02 ; type: register db 00000010b ; reg: 000b ; width: 10b (32 bits) dw 0x0000 ; rax db 0x02 ; type: register db 00000011b ; reg: 000b ; width: 11b (64 bits) dw 0x0003 ; rdx db 0x02 ; type: register db 00001011b ; reg: 010b ; width: 11b (64 bits) dw 0x0053 ; xor db 0x01 ; type: operator db 0x02 ; # operands dw 0x0054 ; inc db 0x01 ; type: operator db 0x01 ; # operands dw 0x0056 ; mov db 0x01 ; type: operator db 0x02 ; # operands dw 0x004F ; hlt db 0x01 ; type: operator db 0x00 ; # operands .by_id_end: opcodes: .by_id: dw 0x0053 ; xor db 0x31 db 0x00 ; reserved dw 0x0054 ; inc db 0xFF db 0x00 ; reserved dw 0x0056 ; mov db 0x89 db 0x00 ; reserved dw 0x004F ; hlt db 0xF4 db 0x00 ; reserved .by_id_end: msg_welcome db "Welcome to Twasm", 0x0A, 0x00 msg_halt db "halted.", 0x0A, 0x00 token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00 debug_string db "debug_string", 0x0A, 0x00 ; test program program: db "xor eax, eax", 0x0A db "inc rax", 0x0A db "mov [ rax ], rdx", 0x0A db "hlt", 0x0A db 0x00 ; just for the sake of being able to print it, I made it a string .size db $ - program - 1