; TODO actually enforce any of these *_SIZE constants :p LOAD_ADDR equ 0x00010000 ; address this program is loaded at TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at TOKEN_TABLE_SIZE equ 0x1000 ; max length of table TOKEN_TABLE_ENTRY_SIZE equ 2 ; size of token table entry; things may break ; if this ever changes OUTPUT_ADDR equ 0x00070000 ; address of outputed binary OUTPUT_SIZE equ 0x1000 ; max length of outputed binary STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP) TEST_LINE_LENGTH equ 80 ; right border of test suite results ; flags for expected values in tokeniser E_COMMENT equ 1 << 0 E_NEWLINE equ 1 << 1 E_WHITESPACE equ 1 << 2 E_COMMA equ 1 << 3 E_OPERATOR equ 1 << 4 E_OPERAND equ 1 << 5 [bits 64] [org LOAD_ADDR] [default abs] ; TODO see if I actually need to do this ; afaik absolute addressing is not harmful on bare metal ; reasoning: stops annoying warning =D start: mov rsp, STACK_ADDR ; we might need more stack space, let's just be safe mov rsi, msg_welcome call print call run_tests call clear_token_table mov rdi, program ; -> program mov rsi, [program.size] ; = size of program call tokenise ; rax = number of tokens processed mov rdi, rax push rdi call clear_output_arena pop rdi call assemble jmp halt ; ------------------------------------------------------------------------------ ; assembling ; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------ ; assemble ; TODO write tests ; TODO make it work :/ putting the cart before the horse ; ; description: ; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat ; binary located at OUTPUT_ADDR. It's probably desirable to clear the output ; arena before calling this function. ; ; parameters: ; rdi = number of tokens in the token table ; ------------------------------------------------------------------------------ assemble: xor rax, rax ; number of tokens processed .loop: cmp rax, rdi ; check incrementer against the number of tokens in the token jge .break ; table. If overflown, break push rdi xor edi, edi mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte push rax ; di = next tte call get_tte_type ; al = type of token cmp al, 0x01 ; check if next tte's type is an operator je .operator ; if so, handle case of operator jne .continue_operator ; if not, jump past the case .operator: ; if next tte's type is an operator: push rax ; MUST be popped BEFORE returning to .continue_operator; it ; contains the type of token, which still needs to be used. push rdi ; di = tte call get_tte_typed_metadata ; al = tte typed metadata pop rdi and al, 11b ; mask for # operands cmp al, 0 ; check if operator has no operands je .operator_0 ; if so, handle case of no operands jne .operator_with_args ; if not, jump to case of multiple operands .operator_0: push rdi ; di = next tte call get_opcode ; al = opcode call .output_byte pop rdi pop rax ; from start of label .operator jmp .continue_operator .operator_with_args: mov [.pending_operator_num_args], al ; save # args fttb push rdi ; di = next tte call get_opcode ; al = opcode mov [.pending_operator_opcode], al ; save opcode fttb pop rdi pop rax ; from start of label .operator .continue_operator: cmp al, 0x02 ; check if next tte's type is a register je .register ; if so, handle case of register jne .continue_register ; if not, jump past the case .register: ; if next tte's type is a register: call .dec_num_args ; because we've found an argument, we need 1 fewer noch cmp byte [.pending_operator_num_args], 1 ; check if this is 1st of 2 args je .register_one_of_two ; if so, jump to handler cmp byte [.pending_operator_num_args], 0 ; check if this is the last arg je .register_last ; if so, jump to handler ; note: not necessarily the last ; of 2 args, it could also be the ; last of 1 ; otherwise, discard the token, reset things, and keep going :/ push rsi mov rsi, .warn_unexpected_register call print.warn pop rsi call .reset_state jmp .continue_register .register_one_of_two: ; if it's the first of 2 arguments: mov [.first_argument], di ; ax = tte jmp .continue_register .register_last: ; if it's the last argument: ; swap so the first argument sits in .first_argument push rax mov ax, di mov di, [.first_argument] mov [.first_argument], ax pop rax cmp di, UNRECOGNISED_TOKEN_ID ; check if the second argument is defined jne .operator_finalise_2 ; if so, there are 2 arguments ; if not, there is just 1 .operator_finalise_1: mov di, 0x0000 ; id of rax. reg bits 000b .operator_finalise_2: ; TODO avoid swapping earlier and now :/ mov cx, di mov di, [.first_argument] mov si, cx call get_direct_addressing_ModRM ; al = ModR/M byte push rax mov al, [.pending_operator_opcode] call .output_byte ; output operator's opcode pop rax call .output_byte ; output ModR/M byte call .reset_state ; reset all the state parts of this function jmp .continue_register .continue_register: pop rax ; incrementer pop rdi ; total number of tokens inc rax ; move to next token jmp .loop .break: ret ; constants .warn_unexpected_register db "ignoring unexpected register", 0x0A, 0x00 ; procedures ; al = byte to write .output_byte: mov edx, [.next_output_byte] ; get output byte's address mov [edx], al ; write byte to that address inc edx ; increment address mov [.next_output_byte], edx ; put output byte's address ret ; runs dec on .pending_operator_num_args .dec_num_args: push rax mov al, [.pending_operator_num_args] dec al mov [.pending_operator_num_args], al pop rax ret .reset_state: ; I don't actually know if these `word` directives are needed ; TODO check that. I think they are, becasue Nasm doesn't record the size ; of labels? mov word [.pending_operator_opcode], UNRECOGNISED_TOKEN_ID mov [.pending_operator_num_args], 0x00 mov word [.first_argument], UNRECOGNISED_TOKEN_ID ret ; state variables .pending_operator_opcode db 0x00 ; the operator seeking args .pending_operator_num_args db 0x00 ; # of args it takes .first_argument dw UNRECOGNISED_TOKEN_ID ; first argument if there are two .next_output_byte dd OUTPUT_ADDR ; next empty byte in output ; ------------------------------------------------------------------------------ ; get_tte_type ; ; description: ; given a token table entry, returns the declared type in `tokens.by_id`. If ; there is no entry, returns UNRECOGNISED_ID_TYPE ; ; parameters: ; di = token table entry ; ; returned: ; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are ; zeroed; the rest of rax is zeroed. ; ------------------------------------------------------------------------------ get_tte_type: and rdi, 0xFFFF ; mask input so it behaves as expected xor eax, eax .loop: cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range jg .not_found mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id cmp cx, di je .found inc rax jmp .loop .not_found: mov al, UNRECOGNISED_ID_TYPE and ax, 0xF ; mask as expected ret .found: mov al, [2 + tokens.by_id + rax * 4] and ax, 0xF ; mask as expected ret ; ------------------------------------------------------------------------------ ; get_tte_typed_metadata ; ; description: ; given a token table entry, returns the declared typed metadata in ; `tokens.by_id`. If there is no entry, returns UNRECOGNISED_ID_METADATA ; ; parameters: ; di = token table entry ; ; returned: ; al = typed metadata of token, or UNRECOGNISED_ID_METADATA; the rest of rax is ; zeroed. ; ------------------------------------------------------------------------------ get_tte_typed_metadata: and rdi, 0xFFFF ; mask input so it behaves as expected xor eax, eax .loop: cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range jg .not_found mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id cmp cx, di je .found inc rax jmp .loop .not_found: xor eax, eax mov al, UNRECOGNISED_ID_METADATA ret .found: mov al, [3 + tokens.by_id + rax * 4] and rax, 0xFF ret ; ------------------------------------------------------------------------------ ; get_direct_addressing_ModRM ; ; description: ; given 2 register tokens, returns the ModR/M byte in direct addressing ; (mod = 11b) mode ; ; parameters: ; di = token table entry `reg` ; si = token table entry `R/M` ; ; returned: ; al = ModR/M byte; the rest of rax is zeroed ; ------------------------------------------------------------------------------ get_direct_addressing_ModRM: mov dl, 11b call get_ModRM ret ; ------------------------------------------------------------------------------ ; get_ModRM ; ; description: ; given 2 register tokens and the mod bits, returns the ModR/M byte ; ; parameters: ; di = token table entry `reg` ; si = token table entry `R/M` ; dl = lower 2 bits: mod bits. The rest is ignored ; ; returned: ; al = ModR/M byte; the rest of rax is zeroed ; ------------------------------------------------------------------------------ get_ModRM: and dl, 11b ; mask for mod bits shl dl, 6 ; di = tte call get_reg_bits ; al = reg bits mov bl, al shl bl, 3 mov rdi, rsi ; do the other one ; di = tte call get_reg_bits ; al = reg bits mov cl, al xor eax, eax or al, dl ; mod bits or al, bl ; reg bits or al, cl ; R/M bits and rax, 0xFF ; mask for byte ret ; ------------------------------------------------------------------------------ ; get_opcode ; ; description: ; given an operator token, returns its opcode ; ; parameters: ; di = token table entry ; ; returned: ; al = opcode; the rest of rax is zeroed ; ------------------------------------------------------------------------------ get_opcode: and rdi, 0xFFFF xor eax, eax .loop: cmp rax, (opcodes.by_id_end - opcodes.by_id) / 4 ; make sure it's still in range jg .not_found mov cx, [opcodes.by_id + rax * 4] ; next entry in opcodes.by_id cmp cx, di je .found inc rax jmp .loop .not_found: xor eax, eax mov al, UNRECOGNISED_ID_OPCODE ret .found: mov al, [2 + opcodes.by_id + rax * 4] and rax, 0xFF ; mask ret ; ------------------------------------------------------------------------------ ; get_reg_bits ; ; description: ; given a register token, returns its reg bits metadata ; ; parameters: ; di = token table entry ; ; returned: ; al = register token; the rest of rax, including the upper 5 bits of al, are ; zeroed. ; ------------------------------------------------------------------------------ get_reg_bits: ; di = tte call get_tte_typed_metadata ; al = typed metadata shr al, 2 ; discard type data and al, 111b ; mask ret ; ------------------------------------------------------------------------------ ; tokenising ; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------ ; tokenise ; TODO write tests ; ; description: ; represents the program at the given address and puts it in the token table ; it's probably desirable to clear the token table before calling this function. ; ; parameters: ; rdi -> first byte of program ; rsi = size of program in bytes ; ; returned: ; rax = number of tokens processed ; ------------------------------------------------------------------------------ tokenise: ; rdi -> current byte of program add rsi, rdi ; rsi -> last byte of program xor eax, eax ; rax = number of tokens processed xor edx, edx ; dl = current byte of program .loop: cmp rdi, rsi ; if current byte greater than last byte jg .break ; then break mov dl, [rdi] ; dl = current byte cmp dl, ";" ; if current byte is the start of a comment je .comment ; then handle the comment cmp dl, 0x0A ; if current byte is the end of a line je .newline_mk_flags ; then reset relevant flags cmp dl, "," ; if current byte is a comma je .comma ; then handle the comma push rsi push rdi push rax push rdx ; TODO probably should not ignore null bytes mov rsi, whitespace_3 ; rsi -> list of whitespace (ignored) bytes mov rdi, 3 ; rdi = size of list in bytes ; dl = current byte call elemb ; al = 0 if not whitespace, 1 if whitespace cmp al, 1 ; check if current byte is whitespace pop rdx pop rax pop rdi pop rsi je .skip_byte_whitespace test byte [.expecting], E_OPERATOR ; check if an operator is expected jnz .operator ; if so, handle it jmp .operand ; otherwise, handle as an operand .comment: push rsi mov rsi, .found call print.debug mov rsi, .msg_comment call print pop rsi test byte [.expecting], E_COMMENT ; make sure a comment is expected jz .unexpected_comment ; if not, error .comment_loop: ; TODO range check rdi mov dl, [rdi] ; dl = current byte cmp dl, 0x0A ; if current byte is a newline je .comment_break ; then break inc rdi ; point to next unread byte jmp .comment_loop .comment_break: jmp .loop .skip_byte_whitespace: push rsi mov rsi, .found call print.debug mov rsi, .msg_whitespace call print pop rsi test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected jz .unexpected_whitespace ; if not, error inc rdi jmp .loop ; else, loop .comma: ; found comma push rsi mov rsi, .found call print.debug mov rsi, .msg_comma call print pop rsi test byte [.expecting], E_COMMA ; make sure a comma was expected jz .unexpected_comma ; if not, error inc rdi mov [.expecting], E_WHITESPACE | E_OPERAND ; else, make operand expected jmp .loop ; and loop .newline_mk_flags: push rsi mov rsi, .found call print.debug mov rsi, .msg_newline call print pop rsi test byte [.expecting], E_NEWLINE ; make sure a newline was expected jz .unexpected_newline ; if not, error mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR inc rdi jmp .loop .operator: ; debug message push rsi mov rsi, .found call print.debug mov rsi, .msg_operator call print pop rsi push rax mov dword [.pending_operator], 0 ; zero pending operator xor eax, eax ; eax = number of bytes in operator .operator_loop: ; TODO give this its own error ; TODO make this pop rax cmp eax, 4 ; check that operator is short enough jg .unexpected_operator ; if not, error mov dl, [rdi] ; next byte ; TODO have better check for operator end cmp dl, " " je .operator_break cmp dl, 0x0A je .operator_break cmp dl, 0x00 je .operator_break cmp dl, ";" je .operator_break mov [.pending_operator + eax], dl inc rax ; inc byte counter inc rdi ; inc byte pointer jmp .operator_loop ; and loop .operator_break: pop rax push rcx mov rcx, tokens.operators ; rcx -> entry in lookup table .operator_id_loop: cmp rcx, tokens.operators_end ; check if index still in range ; TODO give own error jg .unexpected_operator ; if not, error ; TODO use something other than r8 and r9 mov r8d, [rcx] mov r9d, [.pending_operator] cmp r8d, r9d je .found_id add rcx, 6 ; next entry jmp .operator_id_loop .found_id push rdx mov dx, [rcx + 4] ; dx = token id mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], dx ; write to token inc rax ; table pop rdx pop rcx mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND jmp .loop .operand: ; debug message push rsi mov rsi, .found call print.debug mov rsi, .msg_operand call print pop rsi test byte [.expecting], E_OPERAND ; make sure an operand was expected jz .unexpected_operand ; if not, error .operand_loop: mov dl, [rdi] cmp dl, "," je .operand_break cmp dl, 0x0A je .operand_break cmp dl, 0x00 je .operand_break inc rdi jmp .operand_loop .operand_break: mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA jmp .loop .break: ret ; state .expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR .unexpected_whitespace: mov rsi, .err_unexpected call print.error mov rsi, .msg_whitespace call print jmp halt .unexpected_comment: mov rsi, .err_unexpected call print.error mov rsi, .msg_comment call print jmp halt .unexpected_newline: mov rsi, .err_unexpected call print.error mov rsi, .msg_newline call print jmp halt .unexpected_comma: mov rsi, .err_unexpected call print.error mov rsi, .msg_comma call print jmp halt .unexpected_operand: mov rsi, .err_unexpected call print.error mov rsi, .msg_operand call print jmp halt .unexpected_operator: mov rsi, .err_unexpected call print.error mov rsi, .msg_operator call print jmp halt .err_unexpected db "unexpected ", 0x00 .found db "found ", 0x00 .msg_whitespace db "whitespace.", 0x0A, 0x00 .msg_comment db "comment.", 0x0A, 0x00 .msg_newline db "newline.", 0x0A, 0x00 .msg_comma db "comma.", 0x0A, 0x00 .msg_operator db "operator.", 0x0A, 0x00 .msg_operand db "operand.", 0x0A, 0x00 .pending_operator dd 0 ; the operator token that is pending processing ; ------------------------------------------------------------------------------ ; evaluate_constant ; ; description: ; takes a constant and returns its hexidecimal representation. Currently the ; following constants are supported: ; ; | type | p. | description | ; |------|----|--------------| ; | 0x00 | 0x | hexidecimal | ; | 0xFF | | unrecognised | ; ; where `p.` is the prefix ; ; parameters: ; rdi -> first byte of constant ; rsi = size of constant in bytes ; ; returned: ; rax = value of the constant in hexidecimal ; dl = type of constant; the rest of rdx is zeroed ; ------------------------------------------------------------------------------ evaluate_constant: ; TODO fix this cheap trick xD mov dl, [rdi] cmp dl, '0' jne .unrecognised dec rsi ; one fewer byte left inc rdi ; point to next byte mov dl, [rdi] cmp dl, 'x' jne .unrecognised dec rsi ; one fewer byte left inc rdi ; point to next byte ; rsi = number of bytes left ; rdi -> current byte of constant xor eax, eax ; rax = value in hex of constant .loop: cmp rsi, 0 ; make sure we're in range je .break ; if not, break shl rax, 4 ; make room for next hex digit mov dl, [rdi] ; dl = next byte of constant sub dl, '0' ; dl = if digit: digit; else :shrug: cmp dl, 9 ; if !digit: jg .alpha ; letter jmp .continue ; else loop .alpha sub dl, 7 ; map [('A'-'0')..('F'-'0')] to [0xA..0xF] cmp dl, 0xF ; if not in the range [0xA..0xF] jg .unrecognised ; then unrecognised .continue and dl, 0x0F ; mask or al, dl ; and add newest nibble dec rsi ; one fewer byte left inc rdi ; point to next byte jmp .loop ; and loop .break: mov rdx, 0x00 ; hex type ret .unrecognised: mov rdx, 0xFF ; unrecognised type ret ; ------------------------------------------------------------------------------ ; utilities ; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------ ; print ; ; description: ; prints a null-terminated string ; probably doesn't change any registers for ease of debugging ; ; parameters: ; rsi -> start of null-terminated string ; ------------------------------------------------------------------------------ print: push rdx push rax push rsi mov edx, 0x3F8 .loop: mov al, [rsi] test al, al jz .done out dx, al inc rsi jmp .loop .done: pop rsi pop rax pop rdx ret .debug: push rsi mov rsi, .debug_msg call print pop rsi call print ret .error: push rsi mov rsi, .error_msg call print pop rsi call print ret .test: push rsi mov rsi, .test_msg call print pop rsi call print ret .warn: push rsi mov rsi, .warn_msg call print pop rsi call print ret .debug_msg db "[DEBUG]: ", 0x00 .error_msg db "[ERROR]: ", 0x00 .test_msg db "[TEST]: ", 0x00 .warn_msg db "[WARN]: ", 0x00 ; ------------------------------------------------------------------------------ ; halt ; ; description: ; halts the program, silly :) ; ------------------------------------------------------------------------------ halt: mov rsi, msg_halt call print hlt jmp halt ; ------------------------------------------------------------------------------ ; elemb ; ; description: ; checks if given byte is element of the specified list. ; ; parameters: ; rdi = size of list ; rsi -> start of list ; dl = given byte ; ; returned: ; rax = 0: is not an element ; 1: is an element ; ------------------------------------------------------------------------------ elemb: .loop: cmp rdi, 0 ; check if remaining length 0 je .not_found ; if so, break; dl not an element of list mov al, [rsi] cmp al, dl ; check if current byte in list is the desired byte je .found ; if so, break; dl an element of list inc rsi ; move to next byte dec rdi ; and reduce remaining length jmp .loop .not_found: xor eax, eax ; return 0; dl not an element of list ret .found: xor eax, eax mov rax, 1 ; return 1; dl an element of list ret ; ------------------------------------------------------------------------------ ; clear_token_table ; ; description: ; clears the token table as specified by TOKEN_TABLE_SIZE and TOKEN_TABLE_ADDR ; ------------------------------------------------------------------------------ clear_token_table: xor eax, eax ; value to write mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words mov rdi, TOKEN_TABLE_ADDR ; address to start rep stosd ret ; ------------------------------------------------------------------------------ ; clear_test_arena ; ; description: ; clears the test arena as specified by TEST_ARENA_SIZE and TEST_ARENA_ADDR ; ------------------------------------------------------------------------------ clear_test_arena: xor eax, eax ; value to write mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words mov rdi, TOKEN_TABLE_ADDR ; address to start rep stosd ret ; ------------------------------------------------------------------------------ ; clear_output_arena ; ; description: ; clears the output arena as specified by OUTPUT_SIZE and OUTPUT_ADDR ; ------------------------------------------------------------------------------ clear_output_arena: xor eax, eax ; value to write mov rcx, OUTPUT_SIZE / 4 ; number of double words mov rdi, OUTPUT_ADDR ; address to start rep stosd ret %include "asm/tests.asm" ; ------------------------------------------------------------------------------ ; data ; ------------------------------------------------------------------------------ tokens: .by_id: dw 0x0010 ; eax db 0x02 ; type: register db 00000010b ; reg: 000b ; width: 10b (32 bits) dw 0x0000 ; rax db 0x02 ; type: register db 00000011b ; reg: 000b ; width: 11b (64 bits) dw 0x0003 ; rdx db 0x02 ; type: register db 00001011b ; reg: 010b ; width: 11b (64 bits) dw 0x0053 ; xor db 0x01 ; type: operator db 0x02 ; # operands dw 0x0054 ; inc db 0x01 ; type: operator db 0x01 ; # operands dw 0x0056 ; mov db 0x01 ; type: operator db 0x02 ; # operands dw 0x004F ; hlt db 0x01 ; type: operator db 0x00 ; # operands .by_id_end: .operators: dd "je" dw 0x005C dd "jg" dw 0x005F dd "jl" dw 0x0061 dd "hlt" dw 0x004F dd "xor" dw 0x0053 dd "inc" dw 0x0054 dd "dec" dw 0x0055 dd "mov" dw 0x0056 dd "add" dw 0x0057 dd "sub" dw 0x0058 dd "ret" dw 0x005A dd "cmp" dw 0x005B dd "jne" dw 0x005D dd "jge" dw 0x005E dd "jle" dw 0x0060 dd "int3" dw 0x0050 dd "call" dw 0x0059 .operators_end: .registers: dd "r8" dw 0x0008 dd "r9" dw 0x0009 dd "ax" dw 0x0020 dd "bx" dw 0x0021 dd "cx" dw 0x0022 dd "dx" dw 0x0023 dd "si" dw 0x0024 dd "di" dw 0x0025 dd "sp" dw 0x0026 dd "bp" dw 0x0027 dd "al" dw 0x0030 dd "bl" dw 0x0031 dd "cl" dw 0x0032 dd "dl" dw 0x0033 dd "ah" dw 0x0040 dd "bh" dw 0x0041 dd "ch" dw 0x0042 dd "dh" dw 0x0043 dd "cs" dw 0x0044 dd "ds" dw 0x0045 dd "es" dw 0x0046 dd "fs" dw 0x0047 dd "gs" dw 0x0048 dd "ss" dw 0x0049 dd "rax" dw 0x0000 dd "rbx" dw 0x0001 dd "rcx" dw 0x0002 dd "rdx" dw 0x0003 dd "rsi" dw 0x0004 dd "rdi" dw 0x0005 dd "rsp" dw 0x0006 dd "rbp" dw 0x0007 dd "r10" dw 0x000A dd "r11" dw 0x000B dd "r12" dw 0x000C dd "r13" dw 0x000D dd "r14" dw 0x000E dd "r15" dw 0x000F dd "eax" dw 0x0010 dd "ebx" dw 0x0011 dd "ecx" dw 0x0012 dd "edx" dw 0x0013 dd "esi" dw 0x0014 dd "edi" dw 0x0015 dd "esp" dw 0x0016 dd "ebp" dw 0x0017 dd "r8d" dw 0x0018 dd "r9d" dw 0x0019 dd "r8w" dw 0x0028 dd "r9w" dw 0x0029 dd "sil" dw 0x0034 dd "dil" dw 0x0035 dd "spl" dw 0x0036 dd "bpl" dw 0x0037 dd "r8b" dw 0x0038 dd "r9b" dw 0x0039 dd "cr0" dw 0x004A dd "cr2" dw 0x004B dd "cr3" dw 0x004C dd "cr4" dw 0x004D dd "cr8" dw 0x004E dd "r10d" dw 0x001A dd "r11d" dw 0x001B dd "r12d" dw 0x001C dd "r13d" dw 0x001D dd "r14d" dw 0x001E dd "r15d" dw 0x001F dd "r10w" dw 0x002A dd "r11w" dw 0x002B dd "r12w" dw 0x002C dd "r13w" dw 0x002D dd "r14w" dw 0x002E dd "r15w" dw 0x002F dd "r10b" dw 0x003A dd "r11b" dw 0x003B dd "r12b" dw 0x003C dd "r13b" dw 0x003D dd "r14b" dw 0x003E dd "r15b" dw 0x003F .registers_end: opcodes: .by_id: dw 0x0053 ; xor db 0x31 db 0x00 ; reserved dw 0x0054 ; inc db 0xFF db 0x00 ; reserved dw 0x0056 ; mov db 0x89 db 0x00 ; reserved dw 0x004F ; hlt db 0xF4 db 0x00 ; reserved .by_id_end: msg_welcome db "Welcome to Twasm", 0x0A, 0x00 msg_halt db "halted.", 0x0A, 0x00 token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00 whitespace_3 db " ", 0x0D, 0x00 ; test program program: db "xor eax, eax", 0x0A db "inc rax ; inline comment", 0x0A db "; one line comment", 0x0A db "mov [ rax ], rdx", 0x0A db "hlt" db 0x00 ; just for the sake of being able to print it, I made it a string .size db $ - program