Files
bootler/twasm/asm/main.asm
2026-03-12 14:26:38 +01:00

1155 lines
27 KiB
NASM

; TODO actually enforce any of these *_SIZE constants :p
LOAD_ADDR equ 0x00010000 ; address this program is loaded at
TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at
TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
TOKEN_TABLE_SIZE equ 0x1000 ; max length of table
TOKEN_TABLE_ENTRY_SIZE equ 2 ; size of token table entry; things may break
; if this ever changes
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
OUTPUT_SIZE equ 0x1000 ; max length of outputed binary
STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at
UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token
UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id
UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id
UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP)
TEST_LINE_LENGTH equ 80 ; right border of test suite results
[bits 64]
[org LOAD_ADDR]
[default abs] ; TODO see if I actually need to do this
; afaik absolute addressing is not harmful on bare metal
; reasoning: stops annoying warning =D
start:
mov rsp, STACK_ADDR ; we might need more stack space, let's just be safe
mov rsi, msg_welcome
call print
call run_tests
call clear_token_table
mov rdi, program ; -> program
mov rsi, [program.size] ; = size of program
call tokenise
; rax = number of tokens processed
mov rdi, rax
push rdi
call clear_output_arena
pop rdi
call assemble
jmp halt
; ------------------------------------------------------------------------------
; assembling
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; assemble
; TODO write tests
; TODO make it work :/ putting the cart before the horse
;
; description:
; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat
; binary located at OUTPUT_ADDR. It's probably desirable to clear the output
; arena before calling this function.
;
; parameters:
; rdi = number of tokens in the token table
; ------------------------------------------------------------------------------
assemble:
xor rax, rax ; number of tokens processed
.loop:
cmp rax, rdi ; check incrementer against the number of tokens in the token
jge .break ; table. If overflown, break
push rdi
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte
push rax
; di = next tte
call get_tte_type
; al = type of token
cmp al, 0x01 ; check if next tte's type is an operator
je .operator ; if so, handle case of operator
jne .continue_operator ; if not, jump past the case
.operator: ; if next tte's type is an operator:
push rax ; MUST be popped BEFORE returning to .continue_operator; it
; contains the type of token, which still needs to be used.
push rdi
; di = tte
call get_tte_typed_metadata
; al = tte typed metadata
pop rdi
and al, 11b ; mask for # operands
cmp al, 0 ; check if operator has no operands
je .operator_0 ; if so, handle case of no operands
jne .operator_with_args ; if not, jump to case of multiple operands
.operator_0:
push rdi
; di = next tte
call get_opcode
; al = opcode
call .output_byte
pop rdi
pop rax ; from start of label .operator
jmp .continue_operator
.operator_with_args:
mov [.pending_operator_num_args], al ; save # args fttb
push rdi
; di = next tte
call get_opcode
; al = opcode
mov [.pending_operator_opcode], al ; save opcode fttb
pop rdi
pop rax ; from start of label .operator
.continue_operator:
cmp al, 0x02 ; check if next tte's type is a register
je .register ; if so, handle case of register
jne .continue_register ; if not, jump past the case
.register: ; if next tte's type is a register:
call .dec_num_args ; because we've found an argument, we need 1 fewer noch
cmp byte [.pending_operator_num_args], 1 ; check if this is 1st of 2 args
je .register_one_of_two ; if so, jump to handler
cmp byte [.pending_operator_num_args], 0 ; check if this is the last arg
je .register_last ; if so, jump to handler
; note: not necessarily the last
; of 2 args, it could also be the
; last of 1
; otherwise, discard the token, reset things, and keep going :/
push rsi
mov rsi, .warn_unexpected_register
call print.warn
pop rsi
call .reset_state
jmp .continue_register
.register_one_of_two: ; if it's the first of 2 arguments:
mov [.first_argument], di ; ax = tte
jmp .continue_register
.register_last: ; if it's the last argument:
; swap so the first argument sits in .first_argument
push rax
mov ax, di
mov di, [.first_argument]
mov [.first_argument], ax
pop rax
cmp di, UNRECOGNISED_TOKEN_ID ; check if the second argument is defined
jne .operator_finalise_2 ; if so, there are 2 arguments
; if not, there is just 1
.operator_finalise_1:
mov di, 0x0000 ; id of rax. reg bits 000b
.operator_finalise_2:
; TODO avoid swapping earlier and now :/
mov cx, di
mov di, [.first_argument]
mov si, cx
call get_direct_addressing_ModRM
; al = ModR/M byte
push rax
mov al, [.pending_operator_opcode]
call .output_byte ; output operator's opcode
pop rax
call .output_byte ; output ModR/M byte
call .reset_state ; reset all the state parts of this function
jmp .continue_register
.continue_register:
pop rax ; incrementer
pop rdi ; total number of tokens
inc rax ; move to next token
jmp .loop
.break:
ret
; constants
.warn_unexpected_register db "ignoring unexpected register", 0x0A, 0x00
; procedures
; al = byte to write
.output_byte:
mov edx, [.next_output_byte] ; get output byte's address
mov [edx], al ; write byte to that address
inc edx ; increment address
mov [.next_output_byte], edx ; put output byte's address
ret
; runs dec on .pending_operator_num_args
.dec_num_args:
push rax
mov al, [.pending_operator_num_args]
dec al
mov [.pending_operator_num_args], al
pop rax
ret
.reset_state:
; I don't actually know if these `word` directives are needed
; TODO check that. I think they are, becasue Nasm doesn't record the size
; of labels?
mov word [.pending_operator_opcode], UNRECOGNISED_TOKEN_ID
mov [.pending_operator_num_args], 0x00
mov word [.first_argument], UNRECOGNISED_TOKEN_ID
ret
; state variables
.pending_operator_opcode db 0x00 ; the operator seeking args
.pending_operator_num_args db 0x00 ; # of args it takes
.first_argument dw UNRECOGNISED_TOKEN_ID ; first argument if there are two
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
; ------------------------------------------------------------------------------
; get_tte_type
;
; description:
; given a token table entry, returns the declared type in `tokens.by_id`. If
; there is no entry, returns UNRECOGNISED_ID_TYPE
;
; parameters:
; di = token table entry
;
; returned:
; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are
; zeroed; the rest of rax is zeroed.
; ------------------------------------------------------------------------------
get_tte_type:
and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
.loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
jg .not_found
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
cmp cx, di
je .found
inc rax
jmp .loop
.not_found:
mov al, UNRECOGNISED_ID_TYPE
and ax, 0xF ; mask as expected
ret
.found:
mov al, [2 + tokens.by_id + rax * 4]
and ax, 0xF ; mask as expected
ret
; ------------------------------------------------------------------------------
; get_tte_typed_metadata
;
; description:
; given a token table entry, returns the declared typed metadata in
; `tokens.by_id`. If there is no entry, returns UNRECOGNISED_ID_METADATA
;
; parameters:
; di = token table entry
;
; returned:
; al = typed metadata of token, or UNRECOGNISED_ID_METADATA; the rest of rax is
; zeroed.
; ------------------------------------------------------------------------------
get_tte_typed_metadata:
and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
.loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
jg .not_found
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
cmp cx, di
je .found
inc rax
jmp .loop
.not_found:
xor eax, eax
mov al, UNRECOGNISED_ID_METADATA
ret
.found:
mov al, [3 + tokens.by_id + rax * 4]
and rax, 0xFF
ret
; ------------------------------------------------------------------------------
; get_direct_addressing_ModRM
;
; description:
; given 2 register tokens, returns the ModR/M byte in direct addressing
; (mod = 11b) mode
;
; parameters:
; di = token table entry `reg`
; si = token table entry `R/M`
;
; returned:
; al = ModR/M byte; the rest of rax is zeroed
; ------------------------------------------------------------------------------
get_direct_addressing_ModRM:
mov dl, 11b
call get_ModRM
ret
; ------------------------------------------------------------------------------
; get_ModRM
;
; description:
; given 2 register tokens and the mod bits, returns the ModR/M byte
;
; parameters:
; di = token table entry `reg`
; si = token table entry `R/M`
; dl = lower 2 bits: mod bits. The rest is ignored
;
; returned:
; al = ModR/M byte; the rest of rax is zeroed
; ------------------------------------------------------------------------------
get_ModRM:
and dl, 11b ; mask for mod bits
shl dl, 6
; di = tte
call get_reg_bits
; al = reg bits
mov bl, al
shl bl, 3
mov rdi, rsi ; do the other one
; di = tte
call get_reg_bits
; al = reg bits
mov cl, al
xor eax, eax
or al, dl ; mod bits
or al, bl ; reg bits
or al, cl ; R/M bits
and rax, 0xFF ; mask for byte
ret
; ------------------------------------------------------------------------------
; get_opcode
;
; description:
; given an operator token, returns its opcode
;
; parameters:
; di = token table entry
;
; returned:
; al = opcode; the rest of rax is zeroed
; ------------------------------------------------------------------------------
get_opcode:
and rdi, 0xFFFF
xor eax, eax
.loop:
cmp rax, (opcodes.by_id_end - opcodes.by_id) / 4 ; make sure it's still in range
jg .not_found
mov cx, [opcodes.by_id + rax * 4] ; next entry in opcodes.by_id
cmp cx, di
je .found
inc rax
jmp .loop
.not_found:
xor eax, eax
mov al, UNRECOGNISED_ID_OPCODE
ret
.found:
mov al, [2 + opcodes.by_id + rax * 4]
and rax, 0xFF ; mask
ret
; ------------------------------------------------------------------------------
; get_reg_bits
;
; description:
; given a register token, returns its reg bits metadata
;
; parameters:
; di = token table entry
;
; returned:
; al = register token; the rest of rax, including the upper 5 bits of al, are
; zeroed.
; ------------------------------------------------------------------------------
get_reg_bits:
; di = tte
call get_tte_typed_metadata
; al = typed metadata
shr al, 2 ; discard type data
and al, 111b ; mask
ret
; ------------------------------------------------------------------------------
; tokenising
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; tokenise
; TODO write tests
;
; description:
; represents the program at the given address and puts it in the token table
; it's probably desirable to clear the token table before calling this function.
;
; parameters:
; rdi -> first byte of program
; rsi = size of program in bytes
;
; returned:
; rax = number of tokens processed
; ------------------------------------------------------------------------------
tokenise:
add rsi, rdi ; last byte of program
xor ecx, ecx ; number of tokens processed
.loop:
cmp rdi, rsi ; if current byte greater than last byte
jg .break ; then break
push rdi
push rsi
push rcx
; rdi -> current byte
call identify_next_token
; ax = id of token
; dx = length of token
pop rcx
pop rsi
pop rdi
; deal with terminator character (reported as 0 length token)
cmp rdx, 0
je .token_length0
jne .continue0
.token_length0:
mov ax, 0xFE00 ; terminator character
mov al, [rdi] ; byte of terminator
mov edx, 1 ; byte length is 1
.continue0:
add rdi, rdx ; current byte + length of token = next unread byte
mov [TOKEN_TABLE_ADDR + rcx * TOKEN_TABLE_ENTRY_SIZE], ax ; fill next entry
; in token table
; TODO fix undefined behaviour when open brackets and closed brackets aren't
; correctly paired or have too much distance between them
cmp ax, 0x0051 ; check if read token is an open bracket
je .open_bracket ; if so, handle it
jne .continue_open_bracket ; if not, continue
.open_bracket:
; TODO make brackets able to hold more
mov [.data_open_bracket], cl ; record which entry the open bracket is at
.continue_open_bracket:
cmp ax, 0x0052 ; check if read token is a closing bracket
je .close_bracket ; if so, handle it
jne .continue_close_bracket ; if not, continue
.close_bracket:
; rewrite open bracket token entry with a filled out one
push rcx
mov dl, [.data_open_bracket]
sub cl, dl
mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl
mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10
pop rcx
.continue_close_bracket:
inc rcx ; +1 token processed
jmp .loop
.break:
mov rax, rcx
ret
.data_open_bracket db 0x00 ; represents the token # of the latest open bracket
; ------------------------------------------------------------------------------
; identify_token
;
; description:
; returns the id of a given token. If there are multiple ways to represent a
; given token, like the open-bracket, it returns the one that doesn't require
; information about the surrounding tokens, because it has no such information.
; In other words, if it isn't in the `tokens` data structure, this function
; doesn't see it. If the first byte of the token points to a terminator
; byte, this function returns it as an unrecognised token.
;
; parameters:
; rdi -> first byte of token
; rsi = size of token in bytes
;
; returned:
; ax = id of token; the rest of rax is zeroed
; ------------------------------------------------------------------------------
identify_token:
cmp rsi, 1 ; if the token has length 1
je .start_length1 ; then enter the length 1 loop
cmp rsi, 2 ; if the token has length 2
je .start_length2 ; then enter the length 2 loop
cmp rsi, 3 ; if the token has length 3
je .start_length3 ; then enter the length 3 loop
cmp rsi, 4 ; if the token has length 4
je .start_length4 ; then enter the length 4 loop
jmp .unrecognised ; else unrecognised
; length1
.start_length1:
mov rcx, tokens.by_name_1 ; rcx -> list of known tokens
.loop_length1:
cmp rcx, tokens.by_name_2 ; check if rcx still in the bounds of length1 tokens
jge .unrecognised ; if not, unrecognised
mov r10b, [rcx] ; known token
mov r11b, [rdi] ; token
cmp r10b, r11b ; if known token matches token
je .found_length1 ; exit loop
add rcx, 3 ; length of token + length of id
jmp .loop_length1
.found_length1:
xor eax, eax ; make sure rest of rax is zeroed
mov ax, [rcx + 1] ; return id of token
ret
; length2
.start_length2:
mov rcx, tokens.by_name_2 ; rcx -> list of known tokens
.loop_length2:
cmp rcx, tokens.by_name_3 ; check if rcx still in the bounds of length2 tokens
jge .unrecognised ; if not, unrecognised
mov r10w, [rcx] ; current entry in known tokens
mov r11w, [rdi] ; token
cmp r10w, r11w ; if current entry matches token,
je .found_length2 ; exit loop
add rcx, 4 ; length of token + length of id
jmp .loop_length2
.found_length2:
xor eax, eax ; make sure rest of rax is zeroed
mov ax, [rcx + 2] ; return id of token
ret
; length3
.start_length3:
mov rcx, tokens.by_name_3 ; rcx -> list of known tokens
.loop_length3:
cmp rcx, tokens.by_name_4 ; check if rcx still in bounds of length3 tokens
jge .unrecognised ; if not, unrecognised
; TODO make this safe (it overreaches 1 byte)
mov r10d, [rcx] ; known token + next byte
mov r11d, [rdi] ; token + next byte
and r10d, 0x00FFFFFF ; mask for just the token
and r11d, 0x00FFFFFF
cmp r10d, r11d ; if known token matches token,
je .found_length3 ; exit loop
add rcx, 5 ; length of token + length of id
jmp .loop_length3
.found_length3:
xor rax, rax ; zero rax
mov ax, [rcx + 3] ; return id of token
ret
; length4
.start_length4:
mov rcx, tokens.by_name_4 ; rcx -> list of known tokens
.loop_length4:
cmp rcx, tokens.by_name_5 ; check if rcx still in bounds of length3 tokens
jge .unrecognised ; if not, unrecognised
mov r10d, [rcx] ; known token
mov r11d, [rdi] ; token
cmp r10d, r11d ; if known token matches token,
je .found_length4 ; exit loop
add rcx, 6 ; length of token + length of id
jmp .loop_length4
.found_length4:
xor rax, rax ; zero rax
mov ax, [rcx + 4] ; return id of token
ret
.unrecognised:
xor eax, eax
mov ax, UNRECOGNISED_TOKEN_ID
ret
; ------------------------------------------------------------------------------
; identify_next_token
; description:
; like identify_token, except it automatically finds the length. If the first
; byte of the token points to a terminator byte, it returns a length of 0.
;
; parameters:
; rdi -> first byte of token
;
; returned:
; ax = id of token; the rest of rax is zeroed
; dx = length of token in bytes; the rest of rdx is zeroed
; ------------------------------------------------------------------------------
identify_next_token:
push rdi
mov rsi, rdi ; rsi is the current byte
xor rdi, rdi ; rdi is the length
.loop:
xor edx, edx
mov dl, [rsi]
push rsi
push rdi
push rdx
mov rdi, 8 ; length of terminator list
mov rsi, token_terminator_8 ; start of terminator list
call elemb
pop rdx
pop rdi
pop rsi
cmp rax, 1 ; check if the next character is a token terminator
je .break ; if so, break
inc rdi ; next character
inc rsi ; next byte of token
jmp .loop
.break:
mov rsi, rdi ; length of token
pop rdi
push rsi
call identify_token
pop rsi
mov rdx, rsi ; length
ret
; ------------------------------------------------------------------------------
; utilities
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; print
;
; description:
; prints a null-terminated string
; probably doesn't change any registers for ease of debugging
;
; parameters:
; rsi -> start of null-terminated string
; ------------------------------------------------------------------------------
print:
push rdx
push rax
push rsi
mov edx, 0x3F8
.loop:
mov al, [rsi]
test al, al
jz .done
out dx, al
inc rsi
jmp .loop
.done:
pop rsi
pop rax
pop rdx
ret
.debug:
push rsi
mov rsi, .debug_msg
call print
pop rsi
call print
ret
.error:
push rsi
mov rsi, .error_msg
call print
pop rsi
call print
ret
.test:
push rsi
mov rsi, .test_msg
call print
pop rsi
call print
ret
.warn:
push rsi
mov rsi, .warn_msg
call print
pop rsi
call print
ret
.debug_msg db "[DEBUG]: ", 0x00
.error_msg db "[ERROR]: ", 0x00
.test_msg db "[TEST]: ", 0x00
.warn_msg db "[WARN]: ", 0x00
; ------------------------------------------------------------------------------
; halt
;
; description:
; halts the program, silly :)
; ------------------------------------------------------------------------------
halt:
mov rsi, msg_halt
call print
hlt
jmp halt
; ------------------------------------------------------------------------------
; elemb
;
; description:
; checks if given byte is element of the specified list
;
; parameters:
; rdi = size of list
; rsi -> start of list
; dl = given byte
;
; returned:
; rax = 0: is not an element
; 1: is an element
; ------------------------------------------------------------------------------
elemb:
.loop:
cmp rdi, 0 ; check if remaining length 0
je .not_found ; if so, break; dl not an element of list
mov al, [rsi]
cmp al, dl ; check if current byte in list is the desired byte
je .found ; if so, break; dl an element of list
inc rsi ; move to next byte
dec rdi ; and reduce remaining length
jmp .loop
.not_found:
xor eax, eax ; return 0; dl not an element of list
ret
.found:
xor eax, eax
mov rax, 1 ; return 1; dl an element of list
ret
.f db "found", 0x0A, 0x00
.nf db "not found", 0x0A, 0x00
; ------------------------------------------------------------------------------
; clear_token_table
;
; description:
; clears the token table as specified by TOKEN_TABLE_SIZE and TOKEN_TABLE_ADDR
; ------------------------------------------------------------------------------
clear_token_table:
xor eax, eax ; value to write
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
mov rdi, TOKEN_TABLE_ADDR ; address to start
rep stosd
ret
; ------------------------------------------------------------------------------
; clear_test_arena
;
; description:
; clears the test arena as specified by TEST_ARENA_SIZE and TEST_ARENA_ADDR
; ------------------------------------------------------------------------------
clear_test_arena:
xor eax, eax ; value to write
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
mov rdi, TOKEN_TABLE_ADDR ; address to start
rep stosd
ret
; ------------------------------------------------------------------------------
; clear_output_arena
;
; description:
; clears the output arena as specified by OUTPUT_SIZE and OUTPUT_ADDR
; ------------------------------------------------------------------------------
clear_output_arena:
xor eax, eax ; value to write
mov rcx, OUTPUT_SIZE / 4 ; number of double words
mov rdi, OUTPUT_ADDR ; address to start
rep stosd
ret
%include "asm/tests.asm"
; ------------------------------------------------------------------------------
; data
; ------------------------------------------------------------------------------
tokens:
.by_name_1:
db "["
dw 0x0051
db "]"
dw 0x0052
db "+"
dw 0x0062
db "-"
dw 0x0063
db "*"
dw 0x0064
db "/"
dw 0x0065
.by_name_2:
db "r8"
dw 0x0008
db "r9"
dw 0x0009
db "ax"
dw 0x0020
db "bx"
dw 0x0021
db "cx"
dw 0x0022
db "dx"
dw 0x0023
db "si"
dw 0x0024
db "di"
dw 0x0025
db "sp"
dw 0x0026
db "bp"
dw 0x0027
db "al"
dw 0x0030
db "bl"
dw 0x0031
db "cl"
dw 0x0032
db "dl"
dw 0x0033
db "ah"
dw 0x0040
db "bh"
dw 0x0041
db "ch"
dw 0x0042
db "dh"
dw 0x0043
db "cs"
dw 0x0044
db "ds"
dw 0x0045
db "es"
dw 0x0046
db "fs"
dw 0x0047
db "gs"
dw 0x0048
db "ss"
dw 0x0049
db "je"
dw 0x005C
db "jg"
dw 0x005F
db "jl"
dw 0x0061
.by_name_3:
db "rax"
dw 0x0000
db "rbx"
dw 0x0001
db "rcx"
dw 0x0002
db "rdx"
dw 0x0003
db "rsi"
dw 0x0004
db "rdi"
dw 0x0005
db "rsp"
dw 0x0006
db "rbp"
dw 0x0007
db "r10"
dw 0x000A
db "r11"
dw 0x000B
db "r12"
dw 0x000C
db "r13"
dw 0x000D
db "r14"
dw 0x000E
db "r15"
dw 0x000F
db "eax"
dw 0x0010
db "ebx"
dw 0x0011
db "ecx"
dw 0x0012
db "edx"
dw 0x0013
db "esi"
dw 0x0014
db "edi"
dw 0x0015
db "esp"
dw 0x0016
db "ebp"
dw 0x0017
db "r8d"
dw 0x0018
db "r9d"
dw 0x0019
db "r8w"
dw 0x0028
db "r9w"
dw 0x0029
db "sil"
dw 0x0034
db "dil"
dw 0x0035
db "spl"
dw 0x0036
db "bpl"
dw 0x0037
db "r8b"
dw 0x0038
db "r9b"
dw 0x0039
db "cr0"
dw 0x004A
db "cr2"
dw 0x004B
db "cr3"
dw 0x004C
db "cr4"
dw 0x004D
db "cr8"
dw 0x004E
db "hlt"
dw 0x004F
db "xor"
dw 0x0053
db "inc"
dw 0x0054
db "dec"
dw 0x0055
db "mov"
dw 0x0056
db "add"
dw 0x0057
db "sub"
dw 0x0058
db "ret"
dw 0x005A
db "cmp"
dw 0x005B
db "jne"
dw 0x005D
db "jge"
dw 0x005E
db "jle"
dw 0x0060
.by_name_4:
db "r10d"
dw 0x001A
db "r11d"
dw 0x001B
db "r12d"
dw 0x001C
db "r13d"
dw 0x001D
db "r14d"
dw 0x001E
db "r15d"
dw 0x001F
db "r10w"
dw 0x002A
db "r11w"
dw 0x002B
db "r12w"
dw 0x002C
db "r13w"
dw 0x002D
db "r14w"
dw 0x002E
db "r15w"
dw 0x002F
db "r10b"
dw 0x003A
db "r11b"
dw 0x003B
db "r12b"
dw 0x003C
db "r13b"
dw 0x003D
db "r14b"
dw 0x003E
db "r15b"
dw 0x003F
db "int3"
dw 0x0050
db "call"
dw 0x0059
.by_name_5:
.by_id:
dw 0x0010 ; eax
db 0x02 ; type: register
db 00000010b ; reg: 000b
; width: 10b (32 bits)
dw 0x0000 ; rax
db 0x02 ; type: register
db 00000011b ; reg: 000b
; width: 11b (64 bits)
dw 0x0003 ; rdx
db 0x02 ; type: register
db 00001011b ; reg: 010b
; width: 11b (64 bits)
dw 0x0053 ; xor
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0054 ; inc
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x0056 ; mov
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x004F ; hlt
db 0x01 ; type: operator
db 0x00 ; # operands
.by_id_end:
opcodes:
.by_id:
dw 0x0053 ; xor
db 0x31
db 0x00 ; reserved
dw 0x0054 ; inc
db 0xFF
db 0x00 ; reserved
dw 0x0056 ; mov
db 0x89
db 0x00 ; reserved
dw 0x004F ; hlt
db 0xF4
db 0x00 ; reserved
.by_id_end:
msg_welcome db "Welcome to Twasm", 0x0A, 0x00
msg_halt db "halted.", 0x0A, 0x00
token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00
debug_string db "debug_string", 0x0A, 0x00
; test program
program:
db "xor eax, eax", 0x0A
db "inc rax", 0x0A
db "mov [ rax ], rdx", 0x0A
db "hlt", 0x0A
db 0x00 ; just for the sake of being able to print it, I made it a string
.size db $ - program - 1