Files
bootler/twasm/asm/main.asm

1230 lines
29 KiB
NASM

; TODO actually enforce any of these *_SIZE constants :p
LOAD_ADDR equ 0x00010000 ; address this program is loaded at
TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at
TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
TOKEN_TABLE_SIZE equ 0x1000 ; max length of table
TOKEN_TABLE_ENTRY_SIZE equ 2 ; size of token table entry; things may break
; if this ever changes
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
OUTPUT_SIZE equ 0x1000 ; max length of outputed binary
STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at
UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token
UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id
UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id
UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP)
TEST_LINE_LENGTH equ 80 ; right border of test suite results
; flags for expected values in tokeniser
E_COMMENT equ 1 << 0
E_NEWLINE equ 1 << 1
E_WHITESPACE equ 1 << 2
E_COMMA equ 1 << 3
E_OPERATOR equ 1 << 4
E_OPERAND equ 1 << 5
[bits 64]
[org LOAD_ADDR]
[default abs] ; TODO see if I actually need to do this
; afaik absolute addressing is not harmful on bare metal
; reasoning: stops annoying warning =D
start:
mov rsp, STACK_ADDR ; we might need more stack space, let's just be safe
mov rsi, msg_welcome
call print
call run_tests
call clear_token_table
mov rdi, program ; -> program
mov rsi, [program.size] ; = size of program
call tokenise
; rax = number of tokens processed
mov rdi, rax
push rdi
call clear_output_arena
pop rdi
call assemble
jmp halt
; ------------------------------------------------------------------------------
; assembling
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; assemble
; TODO write tests
; TODO make it work :/ putting the cart before the horse
;
; description:
; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat
; binary located at OUTPUT_ADDR. It's probably desirable to clear the output
; arena before calling this function.
;
; parameters:
; rdi = number of tokens in the token table
; ------------------------------------------------------------------------------
assemble:
xor rax, rax ; number of tokens processed
.loop:
cmp rax, rdi ; check incrementer against the number of tokens in the token
jge .break ; table. If overflown, break
push rdi
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte
push rax
; di = next tte
call get_tte_type
; al = type of token
cmp al, 0x01 ; check if next tte's type is an operator
je .operator ; if so, handle case of operator
jne .continue_operator ; if not, jump past the case
.operator: ; if next tte's type is an operator:
push rax ; MUST be popped BEFORE returning to .continue_operator; it
; contains the type of token, which still needs to be used.
push rdi
; di = tte
call get_tte_typed_metadata
; al = tte typed metadata
pop rdi
and al, 11b ; mask for # operands
cmp al, 0 ; check if operator has no operands
je .operator_0 ; if so, handle case of no operands
jne .operator_with_args ; if not, jump to case of multiple operands
.operator_0:
push rdi
; di = next tte
call get_opcode
; al = opcode
call .output_byte
pop rdi
pop rax ; from start of label .operator
jmp .continue_operator
.operator_with_args:
mov [.pending_operator_num_args], al ; save # args fttb
push rdi
; di = next tte
call get_opcode
; al = opcode
mov [.pending_operator_opcode], al ; save opcode fttb
pop rdi
pop rax ; from start of label .operator
.continue_operator:
cmp al, 0x02 ; check if next tte's type is a register
je .register ; if so, handle case of register
jne .continue_register ; if not, jump past the case
.register: ; if next tte's type is a register:
call .dec_num_args ; because we've found an argument, we need 1 fewer noch
cmp byte [.pending_operator_num_args], 1 ; check if this is 1st of 2 args
je .register_one_of_two ; if so, jump to handler
cmp byte [.pending_operator_num_args], 0 ; check if this is the last arg
je .register_last ; if so, jump to handler
; note: not necessarily the last
; of 2 args, it could also be the
; last of 1
; otherwise, discard the token, reset things, and keep going :/
push rsi
mov rsi, .warn_unexpected_register
call print.warn
pop rsi
call .reset_state
jmp .continue_register
.register_one_of_two: ; if it's the first of 2 arguments:
mov [.first_argument], di ; ax = tte
jmp .continue_register
.register_last: ; if it's the last argument:
; swap so the first argument sits in .first_argument
push rax
mov ax, di
mov di, [.first_argument]
mov [.first_argument], ax
pop rax
cmp di, UNRECOGNISED_TOKEN_ID ; check if the second argument is defined
jne .operator_finalise_2 ; if so, there are 2 arguments
; if not, there is just 1
.operator_finalise_1:
mov di, 0x0000 ; id of rax. reg bits 000b
.operator_finalise_2:
; TODO avoid swapping earlier and now :/
mov cx, di
mov di, [.first_argument]
mov si, cx
call get_direct_addressing_ModRM
; al = ModR/M byte
push rax
mov al, [.pending_operator_opcode]
call .output_byte ; output operator's opcode
pop rax
call .output_byte ; output ModR/M byte
call .reset_state ; reset all the state parts of this function
jmp .continue_register
.continue_register:
pop rax ; incrementer
pop rdi ; total number of tokens
inc rax ; move to next token
jmp .loop
.break:
ret
; constants
.warn_unexpected_register db "ignoring unexpected register", 0x0A, 0x00
; procedures
; al = byte to write
.output_byte:
mov edx, [.next_output_byte] ; get output byte's address
mov [edx], al ; write byte to that address
inc edx ; increment address
mov [.next_output_byte], edx ; put output byte's address
ret
; runs dec on .pending_operator_num_args
.dec_num_args:
push rax
mov al, [.pending_operator_num_args]
dec al
mov [.pending_operator_num_args], al
pop rax
ret
.reset_state:
; I don't actually know if these `word` directives are needed
; TODO check that. I think they are, becasue Nasm doesn't record the size
; of labels?
mov word [.pending_operator_opcode], UNRECOGNISED_TOKEN_ID
mov [.pending_operator_num_args], 0x00
mov word [.first_argument], UNRECOGNISED_TOKEN_ID
ret
; state variables
.pending_operator_opcode db 0x00 ; the operator seeking args
.pending_operator_num_args db 0x00 ; # of args it takes
.first_argument dw UNRECOGNISED_TOKEN_ID ; first argument if there are two
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
; ------------------------------------------------------------------------------
; get_tte_type
;
; description:
; given a token table entry, returns the declared type in `tokens.by_id`. If
; there is no entry, returns UNRECOGNISED_ID_TYPE
;
; parameters:
; di = token table entry
;
; returned:
; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are
; zeroed; the rest of rax is zeroed.
; ------------------------------------------------------------------------------
get_tte_type:
and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
.loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
jg .not_found
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
cmp cx, di
je .found
inc rax
jmp .loop
.not_found:
mov al, UNRECOGNISED_ID_TYPE
and ax, 0xF ; mask as expected
ret
.found:
mov al, [2 + tokens.by_id + rax * 4]
and ax, 0xF ; mask as expected
ret
; ------------------------------------------------------------------------------
; get_tte_typed_metadata
;
; description:
; given a token table entry, returns the declared typed metadata in
; `tokens.by_id`. If there is no entry, returns UNRECOGNISED_ID_METADATA
;
; parameters:
; di = token table entry
;
; returned:
; al = typed metadata of token, or UNRECOGNISED_ID_METADATA; the rest of rax is
; zeroed.
; ------------------------------------------------------------------------------
get_tte_typed_metadata:
and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
.loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
jg .not_found
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
cmp cx, di
je .found
inc rax
jmp .loop
.not_found:
xor eax, eax
mov al, UNRECOGNISED_ID_METADATA
ret
.found:
mov al, [3 + tokens.by_id + rax * 4]
and rax, 0xFF
ret
; ------------------------------------------------------------------------------
; get_direct_addressing_ModRM
;
; description:
; given 2 register tokens, returns the ModR/M byte in direct addressing
; (mod = 11b) mode
;
; parameters:
; di = token table entry `reg`
; si = token table entry `R/M`
;
; returned:
; al = ModR/M byte; the rest of rax is zeroed
; ------------------------------------------------------------------------------
get_direct_addressing_ModRM:
mov dl, 11b
call get_ModRM
ret
; ------------------------------------------------------------------------------
; get_ModRM
;
; description:
; given 2 register tokens and the mod bits, returns the ModR/M byte
;
; parameters:
; di = token table entry `reg`
; si = token table entry `R/M`
; dl = lower 2 bits: mod bits. The rest is ignored
;
; returned:
; al = ModR/M byte; the rest of rax is zeroed
; ------------------------------------------------------------------------------
get_ModRM:
and dl, 11b ; mask for mod bits
shl dl, 6
; di = tte
call get_reg_bits
; al = reg bits
mov bl, al
shl bl, 3
mov rdi, rsi ; do the other one
; di = tte
call get_reg_bits
; al = reg bits
mov cl, al
xor eax, eax
or al, dl ; mod bits
or al, bl ; reg bits
or al, cl ; R/M bits
and rax, 0xFF ; mask for byte
ret
; ------------------------------------------------------------------------------
; get_opcode
;
; description:
; given an operator token, returns its opcode
;
; parameters:
; di = token table entry
;
; returned:
; al = opcode; the rest of rax is zeroed
; ------------------------------------------------------------------------------
get_opcode:
and rdi, 0xFFFF
xor eax, eax
.loop:
cmp rax, (opcodes.by_id_end - opcodes.by_id) / 4 ; make sure it's still in range
jg .not_found
mov cx, [opcodes.by_id + rax * 4] ; next entry in opcodes.by_id
cmp cx, di
je .found
inc rax
jmp .loop
.not_found:
xor eax, eax
mov al, UNRECOGNISED_ID_OPCODE
ret
.found:
mov al, [2 + opcodes.by_id + rax * 4]
and rax, 0xFF ; mask
ret
; ------------------------------------------------------------------------------
; get_reg_bits
;
; description:
; given a register token, returns its reg bits metadata
;
; parameters:
; di = token table entry
;
; returned:
; al = register token; the rest of rax, including the upper 5 bits of al, are
; zeroed.
; ------------------------------------------------------------------------------
get_reg_bits:
; di = tte
call get_tte_typed_metadata
; al = typed metadata
shr al, 2 ; discard type data
and al, 111b ; mask
ret
; ------------------------------------------------------------------------------
; tokenising
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; tokenise
; TODO write tests
;
; description:
; represents the program at the given address and puts it in the token table
; it's probably desirable to clear the token table before calling this function.
;
; parameters:
; rdi -> first byte of program
; rsi = size of program in bytes
;
; returned:
; rax = number of tokens processed
; ------------------------------------------------------------------------------
tokenise:
; rdi -> current byte of program
add rsi, rdi ; rsi -> last byte of program
xor eax, eax ; rax = number of tokens processed
xor edx, edx ; dl = current byte of program
.loop:
cmp rdi, rsi ; if current byte greater than last byte
jg .break ; then break
mov dl, [rdi] ; dl = current byte
cmp dl, ";" ; if current byte is the start of a comment
je .comment ; then handle the comment
cmp dl, 0x0A ; if current byte is the end of a line
je .newline_mk_flags ; then reset relevant flags
cmp dl, "," ; if current byte is a comma
je .comma ; then handle the comma
push rsi
push rdi
push rax
push rdx
; TODO probably should not ignore null bytes
mov rsi, whitespace_3 ; rsi -> list of whitespace (ignored) bytes
mov rdi, 3 ; rdi = size of list in bytes
; dl = current byte
call elemb
; al = 0 if not whitespace, 1 if whitespace
cmp al, 1 ; check if current byte is whitespace
pop rdx
pop rax
pop rdi
pop rsi
je .skip_byte_whitespace
test byte [.expecting], E_OPERATOR ; check if an operator is expected
jnz .operator ; if so, handle it
jmp .operand ; otherwise, handle as an operand
.comment:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_comment
call print
pop rsi
test byte [.expecting], E_COMMENT ; make sure a comment is expected
jz .unexpected_comment ; if not, error
.comment_loop:
; TODO range check rdi
mov dl, [rdi] ; dl = current byte
cmp dl, 0x0A ; if current byte is a newline
je .comment_break ; then break
inc rdi ; point to next unread byte
jmp .comment_loop
.comment_break:
jmp .loop
.skip_byte_whitespace:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_whitespace
call print
pop rsi
test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected
jz .unexpected_whitespace ; if not, error
inc rdi
jmp .loop ; else, loop
.comma: ; found comma
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_comma
call print
pop rsi
test byte [.expecting], E_COMMA ; make sure a comma was expected
jz .unexpected_comma ; if not, error
inc rdi
mov [.expecting], E_WHITESPACE | E_OPERAND ; else, make operand expected
jmp .loop ; and loop
.newline_mk_flags:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_newline
call print
pop rsi
test byte [.expecting], E_NEWLINE ; make sure a newline was expected
jz .unexpected_newline ; if not, error
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
inc rdi
jmp .loop
.operator:
; debug message
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_operator
call print
pop rsi
push rax
mov dword [.pending_operator], 0 ; zero pending operator
xor eax, eax ; eax = number of bytes in operator
.operator_loop:
; TODO give this its own error
; TODO make this pop rax
cmp eax, 4 ; check that operator is short enough
jg .unexpected_operator ; if not, error
mov dl, [rdi] ; next byte
; TODO have better check for operator end
cmp dl, " "
je .operator_break
cmp dl, 0x0A
je .operator_break
cmp dl, 0x00
je .operator_break
cmp dl, ";"
je .operator_break
mov [.pending_operator + eax], dl
inc rax ; inc byte counter
inc rdi ; inc byte pointer
jmp .operator_loop ; and loop
.operator_break:
pop rax
push rcx
mov rcx, tokens.operators ; rcx -> entry in lookup table
.operator_id_loop:
cmp rcx, tokens.operators_end ; check if index still in range
; TODO give own error
jg .unexpected_operator ; if not, error
; TODO use something other than r8 and r9
mov r8d, [rcx]
mov r9d, [.pending_operator]
cmp r8d, r9d
je .found_id
add rcx, 6 ; next entry
jmp .operator_id_loop
.found_id
push rdx
mov dx, [rcx + 4] ; dx = token id
mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], dx ; write to token
inc rax ; table
pop rdx
pop rcx
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
jmp .loop
.operand:
; debug message
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_operand
call print
pop rsi
test byte [.expecting], E_OPERAND ; make sure an operand was expected
jz .unexpected_operand ; if not, error
.operand_loop:
mov dl, [rdi]
cmp dl, ","
je .operand_break
cmp dl, 0x0A
je .operand_break
cmp dl, 0x00
je .operand_break
inc rdi
jmp .operand_loop
.operand_break:
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA
jmp .loop
.break:
ret
; state
.expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
.unexpected_whitespace:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_whitespace
call print
jmp halt
.unexpected_comment:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_comment
call print
jmp halt
.unexpected_newline:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_newline
call print
jmp halt
.unexpected_comma:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_comma
call print
jmp halt
.unexpected_operand:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_operand
call print
jmp halt
.unexpected_operator:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_operator
call print
jmp halt
.err_unexpected db "unexpected ", 0x00
.found db "found ", 0x00
.msg_whitespace db "whitespace.", 0x0A, 0x00
.msg_comment db "comment.", 0x0A, 0x00
.msg_newline db "newline.", 0x0A, 0x00
.msg_comma db "comma.", 0x0A, 0x00
.msg_operator db "operator.", 0x0A, 0x00
.msg_operand db "operand.", 0x0A, 0x00
.pending_operator dd 0 ; the operator token that is pending processing
; ------------------------------------------------------------------------------
; evaluate_constant
;
; description:
; takes a constant and returns its hexidecimal representation. Currently the
; following constants are supported:
;
; | type | p. | description |
; |------|----|--------------|
; | 0x00 | 0x | hexidecimal |
; | 0xFF | | unrecognised |
;
; where `p.` is the prefix
;
; parameters:
; rdi -> first byte of constant
; rsi = size of constant in bytes
;
; returned:
; rax = value of the constant in hexidecimal
; dl = type of constant; the rest of rdx is zeroed
; ------------------------------------------------------------------------------
evaluate_constant:
; TODO fix this cheap trick xD
mov dl, [rdi]
cmp dl, '0'
jne .unrecognised
dec rsi ; one fewer byte left
inc rdi ; point to next byte
mov dl, [rdi]
cmp dl, 'x'
jne .unrecognised
dec rsi ; one fewer byte left
inc rdi ; point to next byte
; rsi = number of bytes left
; rdi -> current byte of constant
xor eax, eax ; rax = value in hex of constant
.loop:
cmp rsi, 0 ; make sure we're in range
je .break ; if not, break
shl rax, 4 ; make room for next hex digit
mov dl, [rdi] ; dl = next byte of constant
sub dl, '0' ; dl = if digit: digit; else :shrug:
cmp dl, 9 ; if !digit:
jg .alpha ; letter
jmp .continue ; else loop
.alpha
sub dl, 7 ; map [('A'-'0')..('F'-'0')] to [0xA..0xF]
cmp dl, 0xF ; if not in the range [0xA..0xF]
jg .unrecognised ; then unrecognised
.continue
and dl, 0x0F ; mask
or al, dl ; and add newest nibble
dec rsi ; one fewer byte left
inc rdi ; point to next byte
jmp .loop ; and loop
.break:
mov rdx, 0x00 ; hex type
ret
.unrecognised:
mov rdx, 0xFF ; unrecognised type
ret
; ------------------------------------------------------------------------------
; utilities
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; print
;
; description:
; prints a null-terminated string
; probably doesn't change any registers for ease of debugging
;
; parameters:
; rsi -> start of null-terminated string
; ------------------------------------------------------------------------------
print:
push rdx
push rax
push rsi
mov edx, 0x3F8
.loop:
mov al, [rsi]
test al, al
jz .done
out dx, al
inc rsi
jmp .loop
.done:
pop rsi
pop rax
pop rdx
ret
.debug:
push rsi
mov rsi, .debug_msg
call print
pop rsi
call print
ret
.error:
push rsi
mov rsi, .error_msg
call print
pop rsi
call print
ret
.test:
push rsi
mov rsi, .test_msg
call print
pop rsi
call print
ret
.warn:
push rsi
mov rsi, .warn_msg
call print
pop rsi
call print
ret
.debug_msg db "[DEBUG]: ", 0x00
.error_msg db "[ERROR]: ", 0x00
.test_msg db "[TEST]: ", 0x00
.warn_msg db "[WARN]: ", 0x00
; ------------------------------------------------------------------------------
; halt
;
; description:
; halts the program, silly :)
; ------------------------------------------------------------------------------
halt:
mov rsi, msg_halt
call print
hlt
jmp halt
; ------------------------------------------------------------------------------
; elemb
;
; description:
; checks if given byte is element of the specified list.
;
; parameters:
; rdi = size of list
; rsi -> start of list
; dl = given byte
;
; returned:
; rax = 0: is not an element
; 1: is an element
; ------------------------------------------------------------------------------
elemb:
.loop:
cmp rdi, 0 ; check if remaining length 0
je .not_found ; if so, break; dl not an element of list
mov al, [rsi]
cmp al, dl ; check if current byte in list is the desired byte
je .found ; if so, break; dl an element of list
inc rsi ; move to next byte
dec rdi ; and reduce remaining length
jmp .loop
.not_found:
xor eax, eax ; return 0; dl not an element of list
ret
.found:
xor eax, eax
mov rax, 1 ; return 1; dl an element of list
ret
; ------------------------------------------------------------------------------
; clear_token_table
;
; description:
; clears the token table as specified by TOKEN_TABLE_SIZE and TOKEN_TABLE_ADDR
; ------------------------------------------------------------------------------
clear_token_table:
xor eax, eax ; value to write
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
mov rdi, TOKEN_TABLE_ADDR ; address to start
rep stosd
ret
; ------------------------------------------------------------------------------
; clear_test_arena
;
; description:
; clears the test arena as specified by TEST_ARENA_SIZE and TEST_ARENA_ADDR
; ------------------------------------------------------------------------------
clear_test_arena:
xor eax, eax ; value to write
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
mov rdi, TOKEN_TABLE_ADDR ; address to start
rep stosd
ret
; ------------------------------------------------------------------------------
; clear_output_arena
;
; description:
; clears the output arena as specified by OUTPUT_SIZE and OUTPUT_ADDR
; ------------------------------------------------------------------------------
clear_output_arena:
xor eax, eax ; value to write
mov rcx, OUTPUT_SIZE / 4 ; number of double words
mov rdi, OUTPUT_ADDR ; address to start
rep stosd
ret
%include "asm/tests.asm"
; ------------------------------------------------------------------------------
; data
; ------------------------------------------------------------------------------
tokens:
.by_id:
dw 0x0010 ; eax
db 0x02 ; type: register
db 00000010b ; reg: 000b
; width: 10b (32 bits)
dw 0x0000 ; rax
db 0x02 ; type: register
db 00000011b ; reg: 000b
; width: 11b (64 bits)
dw 0x0003 ; rdx
db 0x02 ; type: register
db 00001011b ; reg: 010b
; width: 11b (64 bits)
dw 0x0053 ; xor
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0054 ; inc
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x0056 ; mov
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x004F ; hlt
db 0x01 ; type: operator
db 0x00 ; # operands
.by_id_end:
.operators:
dd "je"
dw 0x005C
dd "jg"
dw 0x005F
dd "jl"
dw 0x0061
dd "hlt"
dw 0x004F
dd "xor"
dw 0x0053
dd "inc"
dw 0x0054
dd "dec"
dw 0x0055
dd "mov"
dw 0x0056
dd "add"
dw 0x0057
dd "sub"
dw 0x0058
dd "ret"
dw 0x005A
dd "cmp"
dw 0x005B
dd "jne"
dw 0x005D
dd "jge"
dw 0x005E
dd "jle"
dw 0x0060
dd "int3"
dw 0x0050
dd "call"
dw 0x0059
.operators_end:
.registers:
dd "r8"
dw 0x0008
dd "r9"
dw 0x0009
dd "ax"
dw 0x0020
dd "bx"
dw 0x0021
dd "cx"
dw 0x0022
dd "dx"
dw 0x0023
dd "si"
dw 0x0024
dd "di"
dw 0x0025
dd "sp"
dw 0x0026
dd "bp"
dw 0x0027
dd "al"
dw 0x0030
dd "bl"
dw 0x0031
dd "cl"
dw 0x0032
dd "dl"
dw 0x0033
dd "ah"
dw 0x0040
dd "bh"
dw 0x0041
dd "ch"
dw 0x0042
dd "dh"
dw 0x0043
dd "cs"
dw 0x0044
dd "ds"
dw 0x0045
dd "es"
dw 0x0046
dd "fs"
dw 0x0047
dd "gs"
dw 0x0048
dd "ss"
dw 0x0049
dd "rax"
dw 0x0000
dd "rbx"
dw 0x0001
dd "rcx"
dw 0x0002
dd "rdx"
dw 0x0003
dd "rsi"
dw 0x0004
dd "rdi"
dw 0x0005
dd "rsp"
dw 0x0006
dd "rbp"
dw 0x0007
dd "r10"
dw 0x000A
dd "r11"
dw 0x000B
dd "r12"
dw 0x000C
dd "r13"
dw 0x000D
dd "r14"
dw 0x000E
dd "r15"
dw 0x000F
dd "eax"
dw 0x0010
dd "ebx"
dw 0x0011
dd "ecx"
dw 0x0012
dd "edx"
dw 0x0013
dd "esi"
dw 0x0014
dd "edi"
dw 0x0015
dd "esp"
dw 0x0016
dd "ebp"
dw 0x0017
dd "r8d"
dw 0x0018
dd "r9d"
dw 0x0019
dd "r8w"
dw 0x0028
dd "r9w"
dw 0x0029
dd "sil"
dw 0x0034
dd "dil"
dw 0x0035
dd "spl"
dw 0x0036
dd "bpl"
dw 0x0037
dd "r8b"
dw 0x0038
dd "r9b"
dw 0x0039
dd "cr0"
dw 0x004A
dd "cr2"
dw 0x004B
dd "cr3"
dw 0x004C
dd "cr4"
dw 0x004D
dd "cr8"
dw 0x004E
dd "r10d"
dw 0x001A
dd "r11d"
dw 0x001B
dd "r12d"
dw 0x001C
dd "r13d"
dw 0x001D
dd "r14d"
dw 0x001E
dd "r15d"
dw 0x001F
dd "r10w"
dw 0x002A
dd "r11w"
dw 0x002B
dd "r12w"
dw 0x002C
dd "r13w"
dw 0x002D
dd "r14w"
dw 0x002E
dd "r15w"
dw 0x002F
dd "r10b"
dw 0x003A
dd "r11b"
dw 0x003B
dd "r12b"
dw 0x003C
dd "r13b"
dw 0x003D
dd "r14b"
dw 0x003E
dd "r15b"
dw 0x003F
.registers_end:
opcodes:
.by_id:
dw 0x0053 ; xor
db 0x31
db 0x00 ; reserved
dw 0x0054 ; inc
db 0xFF
db 0x00 ; reserved
dw 0x0056 ; mov
db 0x89
db 0x00 ; reserved
dw 0x004F ; hlt
db 0xF4
db 0x00 ; reserved
.by_id_end:
msg_welcome db "Welcome to Twasm", 0x0A, 0x00
msg_halt db "halted.", 0x0A, 0x00
token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00
whitespace_3 db " ", 0x0D, 0x00
; test program
program:
db "xor eax, eax", 0x0A
db "inc rax ; inline comment", 0x0A
db "; one line comment", 0x0A
db "mov [ rax ], rdx", 0x0A
db "hlt"
db 0x00 ; just for the sake of being able to print it, I made it a string
.size db $ - program