1155 lines
27 KiB
NASM
1155 lines
27 KiB
NASM
; TODO actually enforce any of these *_SIZE constants :p
|
|
|
|
LOAD_ADDR equ 0x00010000 ; address this program is loaded at
|
|
|
|
TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at
|
|
TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use
|
|
|
|
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
|
|
TOKEN_TABLE_SIZE equ 0x1000 ; max length of table
|
|
TOKEN_TABLE_ENTRY_SIZE equ 2 ; size of token table entry; things may break
|
|
; if this ever changes
|
|
|
|
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
|
|
OUTPUT_SIZE equ 0x1000 ; max length of outputed binary
|
|
|
|
STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at
|
|
|
|
UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token
|
|
UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id
|
|
UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id
|
|
UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP)
|
|
|
|
TEST_LINE_LENGTH equ 80 ; right border of test suite results
|
|
|
|
[bits 64]
|
|
[org LOAD_ADDR]
|
|
[default abs] ; TODO see if I actually need to do this
|
|
; afaik absolute addressing is not harmful on bare metal
|
|
; reasoning: stops annoying warning =D
|
|
|
|
start:
|
|
mov rsp, STACK_ADDR ; we might need more stack space, let's just be safe
|
|
|
|
mov rsi, msg_welcome
|
|
call print
|
|
|
|
call run_tests
|
|
|
|
call clear_token_table
|
|
|
|
mov rdi, program ; -> program
|
|
mov rsi, [program.size] ; = size of program
|
|
call tokenise
|
|
; rax = number of tokens processed
|
|
mov rdi, rax
|
|
push rdi
|
|
call clear_output_arena
|
|
pop rdi
|
|
call assemble
|
|
|
|
jmp halt
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; assembling
|
|
; ------------------------------------------------------------------------------
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; assemble
|
|
; TODO write tests
|
|
; TODO make it work :/ putting the cart before the horse
|
|
;
|
|
; description:
|
|
; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat
|
|
; binary located at OUTPUT_ADDR. It's probably desirable to clear the output
|
|
; arena before calling this function.
|
|
;
|
|
; parameters:
|
|
; rdi = number of tokens in the token table
|
|
; ------------------------------------------------------------------------------
|
|
|
|
assemble:
|
|
xor rax, rax ; number of tokens processed
|
|
.loop:
|
|
cmp rax, rdi ; check incrementer against the number of tokens in the token
|
|
jge .break ; table. If overflown, break
|
|
|
|
push rdi
|
|
xor edi, edi
|
|
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte
|
|
push rax
|
|
|
|
; di = next tte
|
|
call get_tte_type
|
|
; al = type of token
|
|
cmp al, 0x01 ; check if next tte's type is an operator
|
|
je .operator ; if so, handle case of operator
|
|
jne .continue_operator ; if not, jump past the case
|
|
|
|
.operator: ; if next tte's type is an operator:
|
|
push rax ; MUST be popped BEFORE returning to .continue_operator; it
|
|
; contains the type of token, which still needs to be used.
|
|
|
|
push rdi
|
|
; di = tte
|
|
call get_tte_typed_metadata
|
|
; al = tte typed metadata
|
|
pop rdi
|
|
|
|
and al, 11b ; mask for # operands
|
|
|
|
cmp al, 0 ; check if operator has no operands
|
|
je .operator_0 ; if so, handle case of no operands
|
|
jne .operator_with_args ; if not, jump to case of multiple operands
|
|
|
|
.operator_0:
|
|
push rdi
|
|
; di = next tte
|
|
call get_opcode
|
|
; al = opcode
|
|
call .output_byte
|
|
pop rdi
|
|
|
|
pop rax ; from start of label .operator
|
|
jmp .continue_operator
|
|
|
|
.operator_with_args:
|
|
mov [.pending_operator_num_args], al ; save # args fttb
|
|
|
|
push rdi
|
|
; di = next tte
|
|
call get_opcode
|
|
; al = opcode
|
|
mov [.pending_operator_opcode], al ; save opcode fttb
|
|
pop rdi
|
|
|
|
pop rax ; from start of label .operator
|
|
|
|
.continue_operator:
|
|
cmp al, 0x02 ; check if next tte's type is a register
|
|
je .register ; if so, handle case of register
|
|
jne .continue_register ; if not, jump past the case
|
|
|
|
.register: ; if next tte's type is a register:
|
|
call .dec_num_args ; because we've found an argument, we need 1 fewer noch
|
|
|
|
cmp byte [.pending_operator_num_args], 1 ; check if this is 1st of 2 args
|
|
je .register_one_of_two ; if so, jump to handler
|
|
|
|
cmp byte [.pending_operator_num_args], 0 ; check if this is the last arg
|
|
je .register_last ; if so, jump to handler
|
|
; note: not necessarily the last
|
|
; of 2 args, it could also be the
|
|
; last of 1
|
|
|
|
; otherwise, discard the token, reset things, and keep going :/
|
|
push rsi
|
|
mov rsi, .warn_unexpected_register
|
|
call print.warn
|
|
pop rsi
|
|
call .reset_state
|
|
jmp .continue_register
|
|
|
|
.register_one_of_two: ; if it's the first of 2 arguments:
|
|
mov [.first_argument], di ; ax = tte
|
|
jmp .continue_register
|
|
|
|
.register_last: ; if it's the last argument:
|
|
; swap so the first argument sits in .first_argument
|
|
push rax
|
|
mov ax, di
|
|
mov di, [.first_argument]
|
|
mov [.first_argument], ax
|
|
pop rax
|
|
|
|
cmp di, UNRECOGNISED_TOKEN_ID ; check if the second argument is defined
|
|
jne .operator_finalise_2 ; if so, there are 2 arguments
|
|
; if not, there is just 1
|
|
|
|
.operator_finalise_1:
|
|
mov di, 0x0000 ; id of rax. reg bits 000b
|
|
|
|
.operator_finalise_2:
|
|
; TODO avoid swapping earlier and now :/
|
|
mov cx, di
|
|
mov di, [.first_argument]
|
|
mov si, cx
|
|
call get_direct_addressing_ModRM
|
|
; al = ModR/M byte
|
|
push rax
|
|
mov al, [.pending_operator_opcode]
|
|
call .output_byte ; output operator's opcode
|
|
pop rax
|
|
|
|
call .output_byte ; output ModR/M byte
|
|
|
|
call .reset_state ; reset all the state parts of this function
|
|
jmp .continue_register
|
|
|
|
.continue_register:
|
|
pop rax ; incrementer
|
|
pop rdi ; total number of tokens
|
|
|
|
inc rax ; move to next token
|
|
jmp .loop
|
|
|
|
.break:
|
|
ret
|
|
|
|
; constants
|
|
|
|
.warn_unexpected_register db "ignoring unexpected register", 0x0A, 0x00
|
|
|
|
; procedures
|
|
|
|
; al = byte to write
|
|
.output_byte:
|
|
mov edx, [.next_output_byte] ; get output byte's address
|
|
mov [edx], al ; write byte to that address
|
|
inc edx ; increment address
|
|
mov [.next_output_byte], edx ; put output byte's address
|
|
ret
|
|
|
|
; runs dec on .pending_operator_num_args
|
|
.dec_num_args:
|
|
push rax
|
|
mov al, [.pending_operator_num_args]
|
|
dec al
|
|
mov [.pending_operator_num_args], al
|
|
pop rax
|
|
ret
|
|
|
|
.reset_state:
|
|
; I don't actually know if these `word` directives are needed
|
|
; TODO check that. I think they are, becasue Nasm doesn't record the size
|
|
; of labels?
|
|
mov word [.pending_operator_opcode], UNRECOGNISED_TOKEN_ID
|
|
mov [.pending_operator_num_args], 0x00
|
|
mov word [.first_argument], UNRECOGNISED_TOKEN_ID
|
|
ret
|
|
|
|
; state variables
|
|
|
|
.pending_operator_opcode db 0x00 ; the operator seeking args
|
|
.pending_operator_num_args db 0x00 ; # of args it takes
|
|
|
|
.first_argument dw UNRECOGNISED_TOKEN_ID ; first argument if there are two
|
|
|
|
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_tte_type
|
|
;
|
|
; description:
|
|
; given a token table entry, returns the declared type in `tokens.by_id`. If
|
|
; there is no entry, returns UNRECOGNISED_ID_TYPE
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
;
|
|
; returned:
|
|
; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are
|
|
; zeroed; the rest of rax is zeroed.
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_tte_type:
|
|
and rdi, 0xFFFF ; mask input so it behaves as expected
|
|
xor eax, eax
|
|
|
|
.loop:
|
|
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
|
|
jg .not_found
|
|
|
|
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
|
|
|
|
cmp cx, di
|
|
je .found
|
|
|
|
inc rax
|
|
jmp .loop
|
|
.not_found:
|
|
mov al, UNRECOGNISED_ID_TYPE
|
|
and ax, 0xF ; mask as expected
|
|
ret
|
|
.found:
|
|
mov al, [2 + tokens.by_id + rax * 4]
|
|
and ax, 0xF ; mask as expected
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_tte_typed_metadata
|
|
;
|
|
; description:
|
|
; given a token table entry, returns the declared typed metadata in
|
|
; `tokens.by_id`. If there is no entry, returns UNRECOGNISED_ID_METADATA
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
;
|
|
; returned:
|
|
; al = typed metadata of token, or UNRECOGNISED_ID_METADATA; the rest of rax is
|
|
; zeroed.
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_tte_typed_metadata:
|
|
and rdi, 0xFFFF ; mask input so it behaves as expected
|
|
xor eax, eax
|
|
|
|
.loop:
|
|
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
|
|
jg .not_found
|
|
|
|
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
|
|
|
|
cmp cx, di
|
|
je .found
|
|
|
|
inc rax
|
|
jmp .loop
|
|
.not_found:
|
|
xor eax, eax
|
|
mov al, UNRECOGNISED_ID_METADATA
|
|
ret
|
|
.found:
|
|
mov al, [3 + tokens.by_id + rax * 4]
|
|
and rax, 0xFF
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_direct_addressing_ModRM
|
|
;
|
|
; description:
|
|
; given 2 register tokens, returns the ModR/M byte in direct addressing
|
|
; (mod = 11b) mode
|
|
;
|
|
; parameters:
|
|
; di = token table entry `reg`
|
|
; si = token table entry `R/M`
|
|
;
|
|
; returned:
|
|
; al = ModR/M byte; the rest of rax is zeroed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_direct_addressing_ModRM:
|
|
mov dl, 11b
|
|
call get_ModRM
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_ModRM
|
|
;
|
|
; description:
|
|
; given 2 register tokens and the mod bits, returns the ModR/M byte
|
|
;
|
|
; parameters:
|
|
; di = token table entry `reg`
|
|
; si = token table entry `R/M`
|
|
; dl = lower 2 bits: mod bits. The rest is ignored
|
|
;
|
|
; returned:
|
|
; al = ModR/M byte; the rest of rax is zeroed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_ModRM:
|
|
and dl, 11b ; mask for mod bits
|
|
shl dl, 6
|
|
|
|
; di = tte
|
|
call get_reg_bits
|
|
; al = reg bits
|
|
mov bl, al
|
|
shl bl, 3
|
|
|
|
mov rdi, rsi ; do the other one
|
|
|
|
; di = tte
|
|
call get_reg_bits
|
|
; al = reg bits
|
|
|
|
mov cl, al
|
|
|
|
xor eax, eax
|
|
or al, dl ; mod bits
|
|
or al, bl ; reg bits
|
|
or al, cl ; R/M bits
|
|
and rax, 0xFF ; mask for byte
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_opcode
|
|
;
|
|
; description:
|
|
; given an operator token, returns its opcode
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
;
|
|
; returned:
|
|
; al = opcode; the rest of rax is zeroed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_opcode:
|
|
and rdi, 0xFFFF
|
|
xor eax, eax
|
|
|
|
.loop:
|
|
cmp rax, (opcodes.by_id_end - opcodes.by_id) / 4 ; make sure it's still in range
|
|
jg .not_found
|
|
|
|
mov cx, [opcodes.by_id + rax * 4] ; next entry in opcodes.by_id
|
|
|
|
cmp cx, di
|
|
je .found
|
|
|
|
inc rax
|
|
jmp .loop
|
|
.not_found:
|
|
xor eax, eax
|
|
mov al, UNRECOGNISED_ID_OPCODE
|
|
ret
|
|
.found:
|
|
mov al, [2 + opcodes.by_id + rax * 4]
|
|
and rax, 0xFF ; mask
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_reg_bits
|
|
;
|
|
; description:
|
|
; given a register token, returns its reg bits metadata
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
;
|
|
; returned:
|
|
; al = register token; the rest of rax, including the upper 5 bits of al, are
|
|
; zeroed.
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_reg_bits:
|
|
; di = tte
|
|
call get_tte_typed_metadata
|
|
; al = typed metadata
|
|
shr al, 2 ; discard type data
|
|
and al, 111b ; mask
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; tokenising
|
|
; ------------------------------------------------------------------------------
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; tokenise
|
|
; TODO write tests
|
|
;
|
|
; description:
|
|
; represents the program at the given address and puts it in the token table
|
|
; it's probably desirable to clear the token table before calling this function.
|
|
;
|
|
; parameters:
|
|
; rdi -> first byte of program
|
|
; rsi = size of program in bytes
|
|
;
|
|
; returned:
|
|
; rax = number of tokens processed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
tokenise:
|
|
add rsi, rdi ; last byte of program
|
|
xor ecx, ecx ; number of tokens processed
|
|
.loop:
|
|
cmp rdi, rsi ; if current byte greater than last byte
|
|
jg .break ; then break
|
|
|
|
push rdi
|
|
push rsi
|
|
push rcx
|
|
|
|
; rdi -> current byte
|
|
call identify_next_token
|
|
; ax = id of token
|
|
; dx = length of token
|
|
|
|
pop rcx
|
|
pop rsi
|
|
pop rdi
|
|
|
|
; deal with terminator character (reported as 0 length token)
|
|
cmp rdx, 0
|
|
je .token_length0
|
|
jne .continue0
|
|
|
|
.token_length0:
|
|
mov ax, 0xFE00 ; terminator character
|
|
mov al, [rdi] ; byte of terminator
|
|
mov edx, 1 ; byte length is 1
|
|
|
|
.continue0:
|
|
add rdi, rdx ; current byte + length of token = next unread byte
|
|
|
|
mov [TOKEN_TABLE_ADDR + rcx * TOKEN_TABLE_ENTRY_SIZE], ax ; fill next entry
|
|
; in token table
|
|
|
|
; TODO fix undefined behaviour when open brackets and closed brackets aren't
|
|
; correctly paired or have too much distance between them
|
|
cmp ax, 0x0051 ; check if read token is an open bracket
|
|
je .open_bracket ; if so, handle it
|
|
jne .continue_open_bracket ; if not, continue
|
|
|
|
.open_bracket:
|
|
; TODO make brackets able to hold more
|
|
mov [.data_open_bracket], cl ; record which entry the open bracket is at
|
|
|
|
.continue_open_bracket:
|
|
cmp ax, 0x0052 ; check if read token is a closing bracket
|
|
je .close_bracket ; if so, handle it
|
|
jne .continue_close_bracket ; if not, continue
|
|
|
|
.close_bracket:
|
|
; rewrite open bracket token entry with a filled out one
|
|
push rcx
|
|
|
|
mov dl, [.data_open_bracket]
|
|
sub cl, dl
|
|
mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl
|
|
mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10
|
|
|
|
pop rcx
|
|
|
|
.continue_close_bracket:
|
|
inc rcx ; +1 token processed
|
|
jmp .loop
|
|
.break:
|
|
mov rax, rcx
|
|
ret
|
|
|
|
.data_open_bracket db 0x00 ; represents the token # of the latest open bracket
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; identify_token
|
|
;
|
|
; description:
|
|
; returns the id of a given token. If there are multiple ways to represent a
|
|
; given token, like the open-bracket, it returns the one that doesn't require
|
|
; information about the surrounding tokens, because it has no such information.
|
|
; In other words, if it isn't in the `tokens` data structure, this function
|
|
; doesn't see it. If the first byte of the token points to a terminator
|
|
; byte, this function returns it as an unrecognised token.
|
|
;
|
|
; parameters:
|
|
; rdi -> first byte of token
|
|
; rsi = size of token in bytes
|
|
;
|
|
; returned:
|
|
; ax = id of token; the rest of rax is zeroed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
identify_token:
|
|
cmp rsi, 1 ; if the token has length 1
|
|
je .start_length1 ; then enter the length 1 loop
|
|
|
|
cmp rsi, 2 ; if the token has length 2
|
|
je .start_length2 ; then enter the length 2 loop
|
|
|
|
cmp rsi, 3 ; if the token has length 3
|
|
je .start_length3 ; then enter the length 3 loop
|
|
|
|
cmp rsi, 4 ; if the token has length 4
|
|
je .start_length4 ; then enter the length 4 loop
|
|
|
|
jmp .unrecognised ; else unrecognised
|
|
|
|
; length1
|
|
.start_length1:
|
|
mov rcx, tokens.by_name_1 ; rcx -> list of known tokens
|
|
|
|
.loop_length1:
|
|
cmp rcx, tokens.by_name_2 ; check if rcx still in the bounds of length1 tokens
|
|
jge .unrecognised ; if not, unrecognised
|
|
|
|
mov r10b, [rcx] ; known token
|
|
mov r11b, [rdi] ; token
|
|
cmp r10b, r11b ; if known token matches token
|
|
je .found_length1 ; exit loop
|
|
|
|
add rcx, 3 ; length of token + length of id
|
|
jmp .loop_length1
|
|
|
|
.found_length1:
|
|
xor eax, eax ; make sure rest of rax is zeroed
|
|
mov ax, [rcx + 1] ; return id of token
|
|
ret
|
|
|
|
; length2
|
|
.start_length2:
|
|
mov rcx, tokens.by_name_2 ; rcx -> list of known tokens
|
|
|
|
.loop_length2:
|
|
cmp rcx, tokens.by_name_3 ; check if rcx still in the bounds of length2 tokens
|
|
jge .unrecognised ; if not, unrecognised
|
|
|
|
mov r10w, [rcx] ; current entry in known tokens
|
|
mov r11w, [rdi] ; token
|
|
cmp r10w, r11w ; if current entry matches token,
|
|
je .found_length2 ; exit loop
|
|
|
|
add rcx, 4 ; length of token + length of id
|
|
jmp .loop_length2
|
|
|
|
.found_length2:
|
|
xor eax, eax ; make sure rest of rax is zeroed
|
|
mov ax, [rcx + 2] ; return id of token
|
|
ret
|
|
|
|
; length3
|
|
.start_length3:
|
|
mov rcx, tokens.by_name_3 ; rcx -> list of known tokens
|
|
|
|
.loop_length3:
|
|
cmp rcx, tokens.by_name_4 ; check if rcx still in bounds of length3 tokens
|
|
jge .unrecognised ; if not, unrecognised
|
|
|
|
; TODO make this safe (it overreaches 1 byte)
|
|
mov r10d, [rcx] ; known token + next byte
|
|
mov r11d, [rdi] ; token + next byte
|
|
|
|
and r10d, 0x00FFFFFF ; mask for just the token
|
|
and r11d, 0x00FFFFFF
|
|
|
|
cmp r10d, r11d ; if known token matches token,
|
|
je .found_length3 ; exit loop
|
|
|
|
add rcx, 5 ; length of token + length of id
|
|
jmp .loop_length3
|
|
|
|
.found_length3:
|
|
xor rax, rax ; zero rax
|
|
mov ax, [rcx + 3] ; return id of token
|
|
ret
|
|
|
|
; length4
|
|
.start_length4:
|
|
mov rcx, tokens.by_name_4 ; rcx -> list of known tokens
|
|
|
|
.loop_length4:
|
|
cmp rcx, tokens.by_name_5 ; check if rcx still in bounds of length3 tokens
|
|
jge .unrecognised ; if not, unrecognised
|
|
|
|
mov r10d, [rcx] ; known token
|
|
mov r11d, [rdi] ; token
|
|
cmp r10d, r11d ; if known token matches token,
|
|
je .found_length4 ; exit loop
|
|
|
|
add rcx, 6 ; length of token + length of id
|
|
jmp .loop_length4
|
|
|
|
.found_length4:
|
|
xor rax, rax ; zero rax
|
|
mov ax, [rcx + 4] ; return id of token
|
|
ret
|
|
|
|
.unrecognised:
|
|
xor eax, eax
|
|
mov ax, UNRECOGNISED_TOKEN_ID
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; identify_next_token
|
|
; description:
|
|
; like identify_token, except it automatically finds the length. If the first
|
|
; byte of the token points to a terminator byte, it returns a length of 0.
|
|
;
|
|
; parameters:
|
|
; rdi -> first byte of token
|
|
;
|
|
; returned:
|
|
; ax = id of token; the rest of rax is zeroed
|
|
; dx = length of token in bytes; the rest of rdx is zeroed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
identify_next_token:
|
|
push rdi
|
|
|
|
mov rsi, rdi ; rsi is the current byte
|
|
xor rdi, rdi ; rdi is the length
|
|
.loop:
|
|
xor edx, edx
|
|
mov dl, [rsi]
|
|
|
|
push rsi
|
|
push rdi
|
|
push rdx
|
|
|
|
mov rdi, 8 ; length of terminator list
|
|
mov rsi, token_terminator_8 ; start of terminator list
|
|
call elemb
|
|
|
|
pop rdx
|
|
pop rdi
|
|
pop rsi
|
|
|
|
cmp rax, 1 ; check if the next character is a token terminator
|
|
je .break ; if so, break
|
|
|
|
inc rdi ; next character
|
|
inc rsi ; next byte of token
|
|
jmp .loop
|
|
|
|
.break:
|
|
mov rsi, rdi ; length of token
|
|
|
|
pop rdi
|
|
|
|
push rsi
|
|
call identify_token
|
|
pop rsi
|
|
mov rdx, rsi ; length
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; utilities
|
|
; ------------------------------------------------------------------------------
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; print
|
|
;
|
|
; description:
|
|
; prints a null-terminated string
|
|
; probably doesn't change any registers for ease of debugging
|
|
;
|
|
; parameters:
|
|
; rsi -> start of null-terminated string
|
|
; ------------------------------------------------------------------------------
|
|
|
|
print:
|
|
push rdx
|
|
push rax
|
|
push rsi
|
|
|
|
mov edx, 0x3F8
|
|
.loop:
|
|
mov al, [rsi]
|
|
test al, al
|
|
jz .done
|
|
out dx, al
|
|
inc rsi
|
|
jmp .loop
|
|
.done:
|
|
pop rsi
|
|
pop rax
|
|
pop rdx
|
|
ret
|
|
.debug:
|
|
push rsi
|
|
mov rsi, .debug_msg
|
|
call print
|
|
pop rsi
|
|
call print
|
|
ret
|
|
.error:
|
|
push rsi
|
|
mov rsi, .error_msg
|
|
call print
|
|
pop rsi
|
|
call print
|
|
ret
|
|
.test:
|
|
push rsi
|
|
mov rsi, .test_msg
|
|
call print
|
|
pop rsi
|
|
call print
|
|
ret
|
|
.warn:
|
|
push rsi
|
|
mov rsi, .warn_msg
|
|
call print
|
|
pop rsi
|
|
call print
|
|
ret
|
|
.debug_msg db "[DEBUG]: ", 0x00
|
|
.error_msg db "[ERROR]: ", 0x00
|
|
.test_msg db "[TEST]: ", 0x00
|
|
.warn_msg db "[WARN]: ", 0x00
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; halt
|
|
;
|
|
; description:
|
|
; halts the program, silly :)
|
|
; ------------------------------------------------------------------------------
|
|
|
|
halt:
|
|
mov rsi, msg_halt
|
|
call print
|
|
hlt
|
|
jmp halt
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; elemb
|
|
;
|
|
; description:
|
|
; checks if given byte is element of the specified list
|
|
;
|
|
; parameters:
|
|
; rdi = size of list
|
|
; rsi -> start of list
|
|
; dl = given byte
|
|
;
|
|
; returned:
|
|
; rax = 0: is not an element
|
|
; 1: is an element
|
|
; ------------------------------------------------------------------------------
|
|
|
|
elemb:
|
|
.loop:
|
|
cmp rdi, 0 ; check if remaining length 0
|
|
je .not_found ; if so, break; dl not an element of list
|
|
|
|
mov al, [rsi]
|
|
cmp al, dl ; check if current byte in list is the desired byte
|
|
je .found ; if so, break; dl an element of list
|
|
|
|
inc rsi ; move to next byte
|
|
dec rdi ; and reduce remaining length
|
|
|
|
jmp .loop
|
|
|
|
.not_found:
|
|
xor eax, eax ; return 0; dl not an element of list
|
|
ret
|
|
|
|
.found:
|
|
xor eax, eax
|
|
mov rax, 1 ; return 1; dl an element of list
|
|
ret
|
|
|
|
.f db "found", 0x0A, 0x00
|
|
.nf db "not found", 0x0A, 0x00
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; clear_token_table
|
|
;
|
|
; description:
|
|
; clears the token table as specified by TOKEN_TABLE_SIZE and TOKEN_TABLE_ADDR
|
|
; ------------------------------------------------------------------------------
|
|
|
|
clear_token_table:
|
|
xor eax, eax ; value to write
|
|
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
|
mov rdi, TOKEN_TABLE_ADDR ; address to start
|
|
rep stosd
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; clear_test_arena
|
|
;
|
|
; description:
|
|
; clears the test arena as specified by TEST_ARENA_SIZE and TEST_ARENA_ADDR
|
|
; ------------------------------------------------------------------------------
|
|
|
|
clear_test_arena:
|
|
xor eax, eax ; value to write
|
|
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
|
mov rdi, TOKEN_TABLE_ADDR ; address to start
|
|
rep stosd
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; clear_output_arena
|
|
;
|
|
; description:
|
|
; clears the output arena as specified by OUTPUT_SIZE and OUTPUT_ADDR
|
|
; ------------------------------------------------------------------------------
|
|
|
|
clear_output_arena:
|
|
xor eax, eax ; value to write
|
|
mov rcx, OUTPUT_SIZE / 4 ; number of double words
|
|
mov rdi, OUTPUT_ADDR ; address to start
|
|
rep stosd
|
|
ret
|
|
|
|
%include "asm/tests.asm"
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; data
|
|
; ------------------------------------------------------------------------------
|
|
|
|
tokens:
|
|
.by_name_1:
|
|
db "["
|
|
dw 0x0051
|
|
db "]"
|
|
dw 0x0052
|
|
db "+"
|
|
dw 0x0062
|
|
db "-"
|
|
dw 0x0063
|
|
db "*"
|
|
dw 0x0064
|
|
db "/"
|
|
dw 0x0065
|
|
.by_name_2:
|
|
db "r8"
|
|
dw 0x0008
|
|
db "r9"
|
|
dw 0x0009
|
|
db "ax"
|
|
dw 0x0020
|
|
db "bx"
|
|
dw 0x0021
|
|
db "cx"
|
|
dw 0x0022
|
|
db "dx"
|
|
dw 0x0023
|
|
db "si"
|
|
dw 0x0024
|
|
db "di"
|
|
dw 0x0025
|
|
db "sp"
|
|
dw 0x0026
|
|
db "bp"
|
|
dw 0x0027
|
|
db "al"
|
|
dw 0x0030
|
|
db "bl"
|
|
dw 0x0031
|
|
db "cl"
|
|
dw 0x0032
|
|
db "dl"
|
|
dw 0x0033
|
|
db "ah"
|
|
dw 0x0040
|
|
db "bh"
|
|
dw 0x0041
|
|
db "ch"
|
|
dw 0x0042
|
|
db "dh"
|
|
dw 0x0043
|
|
db "cs"
|
|
dw 0x0044
|
|
db "ds"
|
|
dw 0x0045
|
|
db "es"
|
|
dw 0x0046
|
|
db "fs"
|
|
dw 0x0047
|
|
db "gs"
|
|
dw 0x0048
|
|
db "ss"
|
|
dw 0x0049
|
|
db "je"
|
|
dw 0x005C
|
|
db "jg"
|
|
dw 0x005F
|
|
db "jl"
|
|
dw 0x0061
|
|
.by_name_3:
|
|
db "rax"
|
|
dw 0x0000
|
|
db "rbx"
|
|
dw 0x0001
|
|
db "rcx"
|
|
dw 0x0002
|
|
db "rdx"
|
|
dw 0x0003
|
|
db "rsi"
|
|
dw 0x0004
|
|
db "rdi"
|
|
dw 0x0005
|
|
db "rsp"
|
|
dw 0x0006
|
|
db "rbp"
|
|
dw 0x0007
|
|
db "r10"
|
|
dw 0x000A
|
|
db "r11"
|
|
dw 0x000B
|
|
db "r12"
|
|
dw 0x000C
|
|
db "r13"
|
|
dw 0x000D
|
|
db "r14"
|
|
dw 0x000E
|
|
db "r15"
|
|
dw 0x000F
|
|
db "eax"
|
|
dw 0x0010
|
|
db "ebx"
|
|
dw 0x0011
|
|
db "ecx"
|
|
dw 0x0012
|
|
db "edx"
|
|
dw 0x0013
|
|
db "esi"
|
|
dw 0x0014
|
|
db "edi"
|
|
dw 0x0015
|
|
db "esp"
|
|
dw 0x0016
|
|
db "ebp"
|
|
dw 0x0017
|
|
db "r8d"
|
|
dw 0x0018
|
|
db "r9d"
|
|
dw 0x0019
|
|
db "r8w"
|
|
dw 0x0028
|
|
db "r9w"
|
|
dw 0x0029
|
|
db "sil"
|
|
dw 0x0034
|
|
db "dil"
|
|
dw 0x0035
|
|
db "spl"
|
|
dw 0x0036
|
|
db "bpl"
|
|
dw 0x0037
|
|
db "r8b"
|
|
dw 0x0038
|
|
db "r9b"
|
|
dw 0x0039
|
|
db "cr0"
|
|
dw 0x004A
|
|
db "cr2"
|
|
dw 0x004B
|
|
db "cr3"
|
|
dw 0x004C
|
|
db "cr4"
|
|
dw 0x004D
|
|
db "cr8"
|
|
dw 0x004E
|
|
db "hlt"
|
|
dw 0x004F
|
|
db "xor"
|
|
dw 0x0053
|
|
db "inc"
|
|
dw 0x0054
|
|
db "dec"
|
|
dw 0x0055
|
|
db "mov"
|
|
dw 0x0056
|
|
db "add"
|
|
dw 0x0057
|
|
db "sub"
|
|
dw 0x0058
|
|
db "ret"
|
|
dw 0x005A
|
|
db "cmp"
|
|
dw 0x005B
|
|
db "jne"
|
|
dw 0x005D
|
|
db "jge"
|
|
dw 0x005E
|
|
db "jle"
|
|
dw 0x0060
|
|
.by_name_4:
|
|
db "r10d"
|
|
dw 0x001A
|
|
db "r11d"
|
|
dw 0x001B
|
|
db "r12d"
|
|
dw 0x001C
|
|
db "r13d"
|
|
dw 0x001D
|
|
db "r14d"
|
|
dw 0x001E
|
|
db "r15d"
|
|
dw 0x001F
|
|
db "r10w"
|
|
dw 0x002A
|
|
db "r11w"
|
|
dw 0x002B
|
|
db "r12w"
|
|
dw 0x002C
|
|
db "r13w"
|
|
dw 0x002D
|
|
db "r14w"
|
|
dw 0x002E
|
|
db "r15w"
|
|
dw 0x002F
|
|
db "r10b"
|
|
dw 0x003A
|
|
db "r11b"
|
|
dw 0x003B
|
|
db "r12b"
|
|
dw 0x003C
|
|
db "r13b"
|
|
dw 0x003D
|
|
db "r14b"
|
|
dw 0x003E
|
|
db "r15b"
|
|
dw 0x003F
|
|
db "int3"
|
|
dw 0x0050
|
|
db "call"
|
|
dw 0x0059
|
|
.by_name_5:
|
|
.by_id:
|
|
dw 0x0010 ; eax
|
|
db 0x02 ; type: register
|
|
db 00000010b ; reg: 000b
|
|
; width: 10b (32 bits)
|
|
|
|
dw 0x0000 ; rax
|
|
db 0x02 ; type: register
|
|
db 00000011b ; reg: 000b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0003 ; rdx
|
|
db 0x02 ; type: register
|
|
db 00001011b ; reg: 010b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0053 ; xor
|
|
db 0x01 ; type: operator
|
|
db 0x02 ; # operands
|
|
|
|
dw 0x0054 ; inc
|
|
db 0x01 ; type: operator
|
|
db 0x01 ; # operands
|
|
|
|
dw 0x0056 ; mov
|
|
db 0x01 ; type: operator
|
|
db 0x02 ; # operands
|
|
|
|
dw 0x004F ; hlt
|
|
db 0x01 ; type: operator
|
|
db 0x00 ; # operands
|
|
.by_id_end:
|
|
|
|
opcodes:
|
|
.by_id:
|
|
dw 0x0053 ; xor
|
|
db 0x31
|
|
db 0x00 ; reserved
|
|
|
|
dw 0x0054 ; inc
|
|
db 0xFF
|
|
db 0x00 ; reserved
|
|
|
|
dw 0x0056 ; mov
|
|
db 0x89
|
|
db 0x00 ; reserved
|
|
|
|
dw 0x004F ; hlt
|
|
db 0xF4
|
|
db 0x00 ; reserved
|
|
.by_id_end:
|
|
|
|
msg_welcome db "Welcome to Twasm", 0x0A, 0x00
|
|
msg_halt db "halted.", 0x0A, 0x00
|
|
|
|
token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00
|
|
|
|
debug_string db "debug_string", 0x0A, 0x00
|
|
|
|
; test program
|
|
program:
|
|
db "xor eax, eax", 0x0A
|
|
db "inc rax", 0x0A
|
|
db "mov [ rax ], rdx", 0x0A
|
|
db "hlt", 0x0A
|
|
db 0x00 ; just for the sake of being able to print it, I made it a string
|
|
.size db $ - program - 1
|