1830 lines
41 KiB
NASM
1830 lines
41 KiB
NASM
; TODO actually enforce any of these *_SIZE constants :p
|
|
|
|
LOAD_ADDR equ 0x00010000 ; address this program is loaded at
|
|
|
|
TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at
|
|
TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use
|
|
|
|
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
|
|
TOKEN_TABLE_SIZE equ 0x1000 ; max length of table
|
|
TOKEN_TABLE_ENTRY_SIZE equ 2 ; size of token table entry; things may break
|
|
; if this ever changes
|
|
|
|
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
|
|
OUTPUT_SIZE equ 0x1000 ; max length of outputed binary
|
|
|
|
STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at
|
|
|
|
UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token
|
|
UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id
|
|
UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id
|
|
UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP)
|
|
|
|
TEST_LINE_LENGTH equ 80 ; right border of test suite results
|
|
|
|
; flags for expected values in tokeniser
|
|
E_COMMENT equ 1 << 0
|
|
E_NEWLINE equ 1 << 1
|
|
E_WHITESPACE equ 1 << 2
|
|
E_COMMA equ 1 << 3
|
|
E_OPERATOR equ 1 << 4
|
|
E_OPERAND equ 1 << 5
|
|
|
|
[bits 64]
|
|
[org LOAD_ADDR]
|
|
[default abs] ; TODO see if I actually need to do this
|
|
; afaik absolute addressing is not harmful on bare metal
|
|
; reasoning: stops annoying warning =D
|
|
|
|
start:
|
|
mov rsp, STACK_ADDR ; we might need more stack space, let's just be safe
|
|
|
|
mov rsi, msg_welcome
|
|
call print
|
|
|
|
call run_tests
|
|
|
|
call clear_token_table
|
|
|
|
mov rdi, program ; -> program
|
|
mov rsi, [program.size] ; = size of program
|
|
call tokenise
|
|
; rax = number of tokens processed
|
|
mov rdi, rax
|
|
push rdi
|
|
call clear_output_arena
|
|
pop rdi
|
|
call assemble
|
|
|
|
jmp halt
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; assembling
|
|
; ------------------------------------------------------------------------------
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; assemble
|
|
; TODO write tests
|
|
; TODO make it work :/ putting the cart before the horse
|
|
;
|
|
; description:
|
|
; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat
|
|
; binary located at OUTPUT_ADDR.
|
|
; Behaviour is undefined when:
|
|
; - tokens are in an impossible order
|
|
; 0x1000 ; memory address, following byte should be a register
|
|
; 0x1000 ; not a register
|
|
; - operator tokens followed by the wrong number of arguments
|
|
; TODO enforce this in `tokenise`
|
|
; 0x004F ; hlt, expects 0 arguments
|
|
; 0x0000 ; rax, an argument
|
|
; - an undefined token is included, like 0x0051
|
|
;
|
|
; parameters:
|
|
; rdi = number of tokens in the token table
|
|
; ------------------------------------------------------------------------------
|
|
|
|
assemble:
|
|
xor eax, eax ; rax = number of tokens processed
|
|
; rdi = number of tokens in table
|
|
.loop:
|
|
push rdi
|
|
xor edi, edi
|
|
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte
|
|
push rax
|
|
|
|
; di = tte of operator
|
|
.operator: ; if next tte's type is an operator:
|
|
push rdi
|
|
; di = tte of operator
|
|
call get_tte_typed_metadata
|
|
; al = tte typed metadata
|
|
pop rdi
|
|
|
|
and al, 11b ; mask for # operands
|
|
|
|
cmp al, 0 ; check if operator has no operands
|
|
je .operator_0 ; if so, handle case of no operands
|
|
|
|
cmp al, 1 ; check if operator has one operand
|
|
je .operator_1 ; if so, handle case of one operand
|
|
|
|
cmp al, 2 ; check if operator has two operands
|
|
je .operator_2 ; if so, handle case of two operands
|
|
|
|
jmp .unexpected_token ; TODO actually check operator type or not first
|
|
; if get_tte_typed_metadata happens to return 0, 1,
|
|
; or 2 on a non-operator, it doesn't get caught
|
|
|
|
; di = tte of operator
|
|
.operator_0:
|
|
push rsi
|
|
mov rsi, .msg_operator_0
|
|
call print.debug
|
|
pop rsi
|
|
|
|
push rdi
|
|
push rsi
|
|
; di = tte of operator
|
|
mov sil, 0b ; standard opcode
|
|
call get_opcode
|
|
; al = opcode
|
|
call .output_byte
|
|
pop rsi
|
|
pop rdi
|
|
|
|
pop rax ; rax = number of tokens processed
|
|
pop rdi ; rdi = total number of tokens
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
jmp .loop
|
|
|
|
; di = tte of operator
|
|
.operator_1:
|
|
push rsi
|
|
mov rsi, .msg_operator_1
|
|
call print.debug
|
|
pop rsi
|
|
|
|
push rdi
|
|
push rsi
|
|
; di = tte of operator
|
|
mov sil, 0b ; dst=r/m
|
|
call get_opcode
|
|
; al = opcode
|
|
call .output_byte
|
|
pop rsi
|
|
pop rdi ; di = tte of operator
|
|
|
|
pop rax ; rax = number of tokens processed
|
|
pop rdi ; rdi = total number of tokens
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
push rdi
|
|
push rax
|
|
xor edi, edi
|
|
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
|
|
|
|
push rdi
|
|
and di, 0xFF00
|
|
cmp di, 0x1000 ; check if token is a memory address
|
|
pop rdi ; di = next tte
|
|
je .operator_1_memory_access
|
|
|
|
; di = next tte
|
|
call get_tte_type
|
|
; al = type of token
|
|
|
|
cmp al, 0x02 ; type: register
|
|
je .operator_1_register
|
|
|
|
pop rax ; rax = number of tokens processed
|
|
pop rdi ; rdi = total number of tokens
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
jmp .loop
|
|
|
|
; TODO figure out if this is relevant
|
|
.operator_1_memory_access:
|
|
push rsi
|
|
mov rsi, .msg_operator_1_memory_access
|
|
call print.debug
|
|
pop rsi
|
|
jmp .unsupported_memory_access
|
|
|
|
.operator_1_register:
|
|
push rsi
|
|
mov rsi, .msg_operator_1_register
|
|
call print.debug
|
|
pop rsi
|
|
|
|
mov si, di ; si = `R/M` tte
|
|
mov di, 0x0000 ; di = `reg` tte
|
|
mov dl, 11b ; dl bits
|
|
call get_ModRM
|
|
; al = Mod R/M byte
|
|
call .output_byte
|
|
|
|
pop rax ; rax = number of tokens processed
|
|
pop rdi ; rdi = total number of tokens
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
jmp .loop
|
|
|
|
.operator_2:
|
|
push rsi
|
|
mov rsi, .msg_operator_2
|
|
call print.debug
|
|
pop rsi
|
|
|
|
mov cx, di ; cx = tte of operator
|
|
|
|
pop rax
|
|
pop rdi
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
push rdi
|
|
push rax
|
|
xor edi, edi
|
|
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
|
|
|
|
push rdi
|
|
and di, 0xFF00
|
|
cmp di, 0x1000 ; check if token is a memory address
|
|
pop rdi ; di = next tte
|
|
je .operator_2_memory_access
|
|
|
|
push rcx
|
|
; di = next tte
|
|
call get_tte_type
|
|
; al = type of token
|
|
pop rcx ; cx = tte of operator
|
|
|
|
cmp al, 0x02 ; type: register
|
|
je .operator_2_register
|
|
|
|
pop rax ; rax = number of tokens processed
|
|
pop rdi ; rdi = total number of tokens
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
jmp .loop
|
|
|
|
.operator_2_memory_access:
|
|
push rsi
|
|
mov rsi, .msg_operator_2_memory_access
|
|
call print.debug
|
|
pop rsi
|
|
|
|
cmp di, 0x1000 ; check if token is addressing a register
|
|
jne .unsupported_memory_access ; if not, unsupported :/
|
|
|
|
|
|
push rdi
|
|
mov di, cx ; di = tte of operator
|
|
mov sil, 0 ; dst = r/m
|
|
call get_opcode
|
|
; al = opcode
|
|
call .output_byte
|
|
pop rdi
|
|
|
|
pop rax
|
|
pop rdi
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
push rdi
|
|
push rax
|
|
xor edi, edi
|
|
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
|
|
|
|
mov si, di ; si = dst tte
|
|
|
|
pop rax
|
|
pop rdi
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
push rdi
|
|
push rax
|
|
xor edi, edi
|
|
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
|
|
|
|
push rdi
|
|
and di, 0xFF00
|
|
cmp di, 0x1000 ; check if token is a memory address
|
|
pop rdi ; di = next tte
|
|
je .unsupported_memory_access ; no case of *],[* in asm
|
|
|
|
; di = next tte
|
|
call get_tte_type
|
|
; al = type of token
|
|
|
|
cmp al, 0x02
|
|
je .operator_2_memory_access_register
|
|
|
|
pop rax ; rax = number of tokens processed
|
|
pop rdi ; rdi = total number of tokens
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
jmp .loop
|
|
|
|
.operator_2_memory_access_register:
|
|
push rsi
|
|
mov rsi, .msg_operator_2_memory_access_register
|
|
call print.debug
|
|
pop rsi
|
|
|
|
; si = r/m; dst tte
|
|
; di = reg; src tte
|
|
mov dl, 00b ; dl = mod bits
|
|
call get_ModRM
|
|
; al = Mod R/M byte
|
|
call .output_byte
|
|
|
|
pop rax ; rax = number of tokens processed
|
|
pop rdi ; rdi = total number of tokens
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
jmp .loop
|
|
|
|
.operator_2_register:
|
|
push rsi
|
|
mov rsi, .msg_operator_2_register
|
|
call print.debug
|
|
pop rsi
|
|
|
|
push rdi
|
|
mov di, cx ; di = tte of operator
|
|
mov sil, 1 ; dst = reg
|
|
call get_opcode
|
|
; al = opcode
|
|
call .output_byte
|
|
pop rdi
|
|
|
|
mov si, di ; si = dst tte
|
|
|
|
pop rax
|
|
pop rdi
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
push rdi
|
|
push rax
|
|
xor edi, edi
|
|
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
|
|
|
|
push rdi
|
|
and di, 0xFF00
|
|
cmp di, 0x1000 ; check if token is a memory address
|
|
pop rdi ; di = next tte
|
|
je .operator_2_register_memory_access
|
|
|
|
; di = next tte
|
|
call get_tte_type
|
|
; al = type of token
|
|
|
|
cmp al, 0x02
|
|
je .operator_2_register_register
|
|
|
|
pop rax ; rax = number of tokens processed
|
|
pop rdi ; rdi = total number of tokens
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
jmp .loop
|
|
|
|
.operator_2_register_memory_access:
|
|
push rsi
|
|
mov rsi, .msg_operator_2_register_memory_access
|
|
call print.debug
|
|
pop rsi
|
|
|
|
cmp di, 0x1000 ; check if token is addressing to a register
|
|
jne .unsupported_memory_access ; if not, unsupported :/
|
|
|
|
pop rax
|
|
pop rdi
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
push rdi
|
|
push rax
|
|
xor edi, edi
|
|
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte
|
|
|
|
; si = `R/M` tte
|
|
; di = `reg` tte
|
|
push rsi
|
|
mov si, di
|
|
pop rdi
|
|
mov dl, 00b ; dl = mod bits
|
|
call get_ModRM
|
|
; al = Mod R/M byte
|
|
call .output_byte
|
|
|
|
pop rax ; rax = number of tokens processed
|
|
pop rdi ; rdi = total number of tokens
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
jmp .loop
|
|
|
|
.operator_2_register_register:
|
|
push rsi
|
|
mov rsi, .msg_operator_2_register_register
|
|
call print.debug
|
|
pop rsi
|
|
|
|
push rsi
|
|
mov si, di ; si = reg; src tte
|
|
pop rdi ; di = r/m; dst tte
|
|
mov dl, 11b ; dl = mod bits
|
|
call get_ModRM
|
|
; al = Mod R/M byte
|
|
call .output_byte
|
|
|
|
pop rax ; rax = number of tokens processed
|
|
pop rdi ; rdi = total number of tokens
|
|
inc rax
|
|
cmp rax, rdi
|
|
jge .break
|
|
jmp .loop
|
|
|
|
.break:
|
|
ret
|
|
|
|
.unexpected_token:
|
|
mov rsi, .msg_unexpected_token
|
|
call print.error
|
|
jmp halt
|
|
|
|
.unsupported_memory_access:
|
|
mov rsi, .msg_unsupported_memory_access
|
|
call print.error
|
|
jmp halt
|
|
|
|
; procedures
|
|
|
|
; al = byte to write
|
|
.output_byte:
|
|
mov edx, [.next_output_byte] ; get output byte's address
|
|
mov [edx], al ; write byte to that address
|
|
inc edx ; increment address
|
|
mov [.next_output_byte], edx ; put output byte's address
|
|
ret
|
|
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
|
|
; TODO get rid of this sketchy bit of state
|
|
|
|
.msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00
|
|
.msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00
|
|
.msg_operator_0 db "operator_0", 0x0A, 0x00
|
|
.msg_operator_1 db "operator_1", 0x0A, 0x00
|
|
.msg_operator_1_memory_access db "operator_1_memory_access", 0x0A, 0x00
|
|
.msg_operator_1_register db "operator_1_register", 0x0A, 0x00
|
|
.msg_operator_2 db "operator_2", 0x0A, 0x00
|
|
.msg_operator_2_memory_access db "operator_2_memory_access", 0x0A, 0x00
|
|
.msg_operator_2_memory_access_register db "operator_2_memory_access_register", 0x0A, 0x00
|
|
.msg_operator_2_register db "operator_2_register", 0x0A, 0x00
|
|
.msg_operator_2_register_memory_access db "operator_2_register_memory_access", 0x0A, 0x00
|
|
.msg_operator_2_register_register db "operator_2_register_register", 0x0A, 0x00
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_tte_type
|
|
;
|
|
; description:
|
|
; given a token table entry, returns the declared type in `tokens.by_id`. If
|
|
; there is no entry, returns UNRECOGNISED_ID_TYPE
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
;
|
|
; returned:
|
|
; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are
|
|
; zeroed; the rest of rax is zeroed.
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_tte_type:
|
|
and rdi, 0xFFFF ; mask input so it behaves as expected
|
|
xor eax, eax
|
|
|
|
.loop:
|
|
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
|
|
jg .not_found
|
|
|
|
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
|
|
|
|
cmp cx, di
|
|
je .found
|
|
|
|
inc rax
|
|
jmp .loop
|
|
.not_found:
|
|
mov al, UNRECOGNISED_ID_TYPE
|
|
and ax, 0xF ; mask as expected
|
|
ret
|
|
.found:
|
|
mov al, [2 + tokens.by_id + rax * 4]
|
|
and ax, 0xF ; mask as expected
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_tte_typed_metadata
|
|
;
|
|
; description:
|
|
; given a token table entry, returns the declared typed metadata in
|
|
; `tokens.by_id`. If there is no entry, returns UNRECOGNISED_ID_METADATA
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
;
|
|
; returned:
|
|
; al = typed metadata of token, or UNRECOGNISED_ID_METADATA; the rest of rax is
|
|
; zeroed.
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_tte_typed_metadata:
|
|
and rdi, 0xFFFF ; mask input so it behaves as expected
|
|
xor eax, eax
|
|
|
|
.loop:
|
|
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
|
|
jg .not_found
|
|
|
|
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
|
|
|
|
cmp cx, di
|
|
je .found
|
|
|
|
inc rax
|
|
jmp .loop
|
|
.not_found:
|
|
xor eax, eax
|
|
mov al, UNRECOGNISED_ID_METADATA
|
|
ret
|
|
.found:
|
|
mov al, [3 + tokens.by_id + rax * 4]
|
|
and rax, 0xFF
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_ModRM
|
|
;
|
|
; description:
|
|
; given 2 register tokens and the mod bits, returns the ModR/M byte
|
|
;
|
|
; parameters:
|
|
; di = token table entry `reg`
|
|
; si = token table entry `R/M`
|
|
; dl = lower 2 bits: mod bits. The rest is ignored
|
|
;
|
|
; returned:
|
|
; al = ModR/M byte; the rest of rax is zeroed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_ModRM:
|
|
and dl, 11b ; mask for mod bits
|
|
shl dl, 6
|
|
|
|
; di = tte
|
|
call get_reg_bits
|
|
; al = reg bits
|
|
mov bl, al
|
|
shl bl, 3
|
|
|
|
mov rdi, rsi ; do the other one
|
|
|
|
; di = tte
|
|
call get_reg_bits
|
|
; al = reg bits
|
|
|
|
mov cl, al
|
|
|
|
xor eax, eax
|
|
or al, dl ; mod bits
|
|
or al, bl ; reg bits
|
|
or al, cl ; R/M bits
|
|
and rax, 0xFF ; mask for byte
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_opcode
|
|
;
|
|
; description:
|
|
; given an operator token, returns its opcode. For operators with multiple
|
|
; opcodes, the variant can be specified.
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
; sil = lower bit: 0: dst=r/m or only opcode
|
|
; 1: dst=reg or 0x00
|
|
;
|
|
; returned:
|
|
; al = opcode; the rest of rax is zeroed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_opcode:
|
|
and rdi, 0xFFFF
|
|
and rsi, 1
|
|
xor eax, eax
|
|
|
|
.loop:
|
|
cmp rax, (opcodes.by_id_end - opcodes.by_id) / 4 ; make sure it's still in range
|
|
jg .not_found
|
|
|
|
mov cx, [opcodes.by_id + rax * 4] ; next entry in opcodes.by_id
|
|
|
|
cmp cx, di
|
|
je .found
|
|
|
|
inc rax
|
|
jmp .loop
|
|
.not_found:
|
|
xor eax, eax
|
|
mov al, UNRECOGNISED_ID_OPCODE
|
|
ret
|
|
.found:
|
|
mov al, [rsi + 2 + opcodes.by_id + rax * 4]
|
|
and rax, 0xFF ; mask
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_reg_bits
|
|
;
|
|
; description:
|
|
; given a register token, returns its reg bits metadata
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
;
|
|
; returned:
|
|
; al = register token; the rest of rax, including the upper 5 bits of al, are
|
|
; zeroed.
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_reg_bits:
|
|
; di = tte
|
|
call get_tte_typed_metadata
|
|
; al = typed metadata
|
|
shr al, 2 ; discard type data
|
|
and al, 111b ; mask
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; tokenising
|
|
; ------------------------------------------------------------------------------
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; tokenise
|
|
; TODO write tests
|
|
;
|
|
; description:
|
|
; represents the program at the given address and puts it in the token table
|
|
; it's probably desirable to clear the token table before calling this function.
|
|
;
|
|
; parameters:
|
|
; rdi -> first byte of program
|
|
; rsi = size of program in bytes
|
|
;
|
|
; returned:
|
|
; rax = number of tokens processed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
tokenise:
|
|
; rdi -> current byte of program
|
|
add rsi, rdi ; rsi -> last byte of program
|
|
xor eax, eax ; rax = number of tokens processed
|
|
xor edx, edx ; dl = current byte of program
|
|
|
|
.loop:
|
|
cmp rdi, rsi ; if current byte greater than last byte
|
|
jge .break ; then break
|
|
|
|
mov dl, [rdi] ; dl = current byte
|
|
|
|
cmp dl, ";" ; if current byte is the start of a comment
|
|
je .comment ; then handle the comment
|
|
|
|
cmp dl, 0x0A ; if current byte is the end of a line
|
|
je .newline_mk_flags ; then reset relevant flags
|
|
|
|
cmp dl, "," ; if current byte is a comma
|
|
je .comma ; then handle the comma
|
|
|
|
push rsi
|
|
push rdi
|
|
push rax
|
|
push rdx
|
|
; TODO probably should not ignore null bytes
|
|
mov rsi, whitespace_2 ; rsi -> list of whitespace (ignored) bytes
|
|
mov rdi, 2 ; rdi = size of list in bytes
|
|
; dl = current byte
|
|
call elemb
|
|
; al = 0 if not whitespace, 1 if whitespace
|
|
cmp al, 1 ; check if current byte is whitespace
|
|
pop rdx
|
|
pop rax
|
|
pop rdi
|
|
pop rsi
|
|
je .skip_byte_whitespace
|
|
|
|
test byte [.expecting], E_OPERATOR ; check if an operator is expected
|
|
jnz .operator ; if so, handle it
|
|
jmp .operand ; otherwise, handle as an operand
|
|
|
|
.comment:
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_comment
|
|
call print
|
|
pop rsi
|
|
test byte [.expecting], E_COMMENT ; make sure a comment is expected
|
|
jz .unexpected_comment ; if not, error
|
|
.comment_loop:
|
|
; TODO range check rdi
|
|
mov dl, [rdi] ; dl = current byte
|
|
|
|
cmp dl, 0x0A ; if current byte is a newline
|
|
je .comment_break ; then break
|
|
|
|
inc rdi ; point to next unread byte
|
|
jmp .comment_loop
|
|
.comment_break:
|
|
jmp .loop
|
|
|
|
.skip_byte_whitespace:
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_whitespace
|
|
call print
|
|
pop rsi
|
|
|
|
test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected
|
|
jz .unexpected_whitespace ; if not, error
|
|
inc rdi
|
|
jmp .loop ; else, loop
|
|
|
|
.comma: ; found comma
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_comma
|
|
call print
|
|
pop rsi
|
|
|
|
test byte [.expecting], E_COMMA ; make sure a comma was expected
|
|
jz .unexpected_comma ; if not, error
|
|
inc rdi
|
|
mov [.expecting], E_WHITESPACE | E_OPERAND ; else, make operand expected
|
|
jmp .loop ; and loop
|
|
|
|
.newline_mk_flags:
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_newline
|
|
call print
|
|
pop rsi
|
|
|
|
test byte [.expecting], E_NEWLINE ; make sure a newline was expected
|
|
jz .unexpected_newline ; if not, error
|
|
|
|
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
|
|
|
|
inc rdi
|
|
jmp .loop
|
|
|
|
.operator:
|
|
; debug message
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_operator
|
|
call print
|
|
pop rsi
|
|
|
|
push rax
|
|
xor eax, eax ; eax = number of bytes in operator
|
|
mov [.pending_operator], eax ; zero pending operator
|
|
|
|
.operator_loop:
|
|
; TODO give this its own error
|
|
; TODO make this pop rax
|
|
cmp eax, 4 ; check that operator is short enough
|
|
jg .unexpected_operator ; if not, error
|
|
|
|
mov dl, [rdi] ; next byte
|
|
|
|
; TODO have better check for operator end
|
|
cmp dl, " "
|
|
je .operator_break
|
|
cmp dl, 0x0A
|
|
je .operator_break
|
|
cmp dl, 0x00
|
|
je .operator_break
|
|
cmp dl, ";"
|
|
je .operator_break
|
|
|
|
mov [.pending_operator + eax], dl
|
|
|
|
inc rax ; inc byte counter
|
|
inc rdi ; inc byte pointer
|
|
jmp .operator_loop ; and loop
|
|
|
|
.operator_break:
|
|
; rax already pushed from .operator
|
|
push rdi
|
|
|
|
mov edi, [.pending_operator] ; edi = operator to be searched
|
|
call identify_operator
|
|
; ax = operator's token ID
|
|
mov cx, ax ; cx = operator's token ID for safe keeping
|
|
|
|
pop rdi ; rdi = byte counter
|
|
pop rax ; rax = tokens processed
|
|
|
|
mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx
|
|
inc rax ; plus 1 token processed
|
|
|
|
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
|
|
jmp .loop
|
|
|
|
.operand:
|
|
; debug message
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_operand
|
|
call print
|
|
pop rsi
|
|
|
|
test byte [.expecting], E_OPERAND ; make sure an operand was expected
|
|
jz .unexpected_operand ; if not, error
|
|
|
|
push rax
|
|
push rdi
|
|
xor eax, eax ; rax = length of operand
|
|
|
|
.operand_loop:
|
|
mov dl, [rdi]
|
|
|
|
cmp dl, ","
|
|
je .operand_break
|
|
cmp dl, 0x0A
|
|
je .operand_break
|
|
cmp dl, 0x00
|
|
je .operand_break
|
|
cmp dl, ";"
|
|
je .operand_break
|
|
|
|
inc rax ; inc length counter
|
|
inc rdi ; inc byte pointer
|
|
jmp .operand_loop
|
|
|
|
.operand_break:
|
|
pop rdi ; rdi -> first byte of operand
|
|
push rdi
|
|
push rsi
|
|
mov rsi, rax ; rsi = length of operand in bytes
|
|
|
|
mov cx, ax ; cx = length counter for safe keeping
|
|
call evaluate_operand
|
|
; dl = return code
|
|
; rax = binary data
|
|
pop rsi
|
|
pop rdi ; rdi = first byte of operand
|
|
add di, cx ; rdi = last byte of operand
|
|
mov rcx, rax ; rcx = evaluate_operand's binary return data
|
|
pop rax ; rax = number of tokens processed
|
|
|
|
; operand is some reg
|
|
; cx = token ID
|
|
cmp dl, 0x00
|
|
je .operand_register
|
|
|
|
; operand is some [reg]
|
|
; cx = token ID
|
|
cmp dl, 0x10
|
|
je .operand_addr_register
|
|
|
|
jmp .unexpected_operand
|
|
|
|
; cx = token ID
|
|
.operand_register:
|
|
mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx
|
|
inc rax ; another token processed
|
|
jmp .operand_break_continue
|
|
|
|
; cx = token ID
|
|
.operand_addr_register:
|
|
mov word [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], 0x1000
|
|
inc rax ; 0x1000: addr reg token, next token is the register
|
|
mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx
|
|
inc rax ; the register as returned by evaluate_operand
|
|
jmp .operand_break_continue
|
|
|
|
.operand_break_continue:
|
|
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA
|
|
jmp .loop
|
|
|
|
.break:
|
|
ret
|
|
|
|
; state
|
|
|
|
.expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
|
|
|
|
.unexpected_whitespace:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_whitespace
|
|
call print
|
|
jmp halt
|
|
.unexpected_comment:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_comment
|
|
call print
|
|
jmp halt
|
|
.unexpected_newline:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_newline
|
|
call print
|
|
jmp halt
|
|
.unexpected_comma:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_comma
|
|
call print
|
|
jmp halt
|
|
.unexpected_operand:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_operand
|
|
call print
|
|
jmp halt
|
|
.unexpected_operator:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_operator
|
|
call print
|
|
jmp halt
|
|
.err_unexpected db "unexpected ", 0x00
|
|
.found db "found ", 0x00
|
|
.msg_whitespace db "whitespace.", 0x0A, 0x00
|
|
.msg_comment db "comment.", 0x0A, 0x00
|
|
.msg_newline db "newline.", 0x0A, 0x00
|
|
.msg_comma db "comma.", 0x0A, 0x00
|
|
.msg_operator db "operator.", 0x0A, 0x00
|
|
.msg_operand db "operand.", 0x0A, 0x00
|
|
.pending_operator dd 0 ; the operator token that is pending processing
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; evaluate_operand
|
|
;
|
|
; description:
|
|
; takes the location and length of an operand and evaluates it into binary data
|
|
; and a return code to interpret the binary data.
|
|
;
|
|
; | code | rsi contents | notes |
|
|
; |------|----------------------|-------|
|
|
; | 0x00 | token ID of register | reg |
|
|
; | 0x10 | token ID of register | [reg] |
|
|
; | 0xFF | - | error |
|
|
;
|
|
; parameters:
|
|
; rdi -> first byte of operand
|
|
; rsi = size of operand in bytes
|
|
;
|
|
; returned:
|
|
; rax = binary data corresponding to the operand
|
|
; dl = return code
|
|
; ------------------------------------------------------------------------------
|
|
|
|
evaluate_operand:
|
|
push rdi
|
|
|
|
push rsi
|
|
mov rsi, rdi ; rsi -> start of operand
|
|
pop rdi ; rdi = size of operand
|
|
call trim_trailing_whitespace
|
|
|
|
pop rdi ; rdi -> first byte of operand
|
|
mov rsi, rax ; rsi = size of operand w/o trailing whitespace
|
|
|
|
cmp rsi, 0 ; case: 0 length
|
|
je .unrecognised ; unrecognised
|
|
|
|
cmp byte [rdi], '[' ; case: memory addressing
|
|
je .address
|
|
|
|
jmp .register ; otherwise: register
|
|
|
|
.address:
|
|
cmp byte [rdi + rsi - 1], ']' ; check if address is closed correctly
|
|
jne .unrecognised ; if not, fail
|
|
inc rdi ; rdi -> enclosed operand
|
|
sub rsi, 2 ; rsi = length of enclosed operand
|
|
call evaluate_operand
|
|
; rax = binary data
|
|
; dl = return code
|
|
cmp dl, 0x10 ; make sure return code isn't another memory reference
|
|
je .unrecognised ; if it is, fail
|
|
|
|
or dl, 0x10 ; flip bit for address return
|
|
ret
|
|
|
|
.register:
|
|
cmp rsi, 4
|
|
jg .unrecognised
|
|
push rdi
|
|
mov edi, [rdi] ; edi = register to be searched
|
|
|
|
; TODO figure out how to mask elegantly :/
|
|
; mask edi for lower rsi bits
|
|
cmp rsi, 4
|
|
je .register4
|
|
cmp rsi, 3
|
|
je .register3
|
|
cmp rsi, 2
|
|
je .register2
|
|
cmp rsi, 1
|
|
je .register1
|
|
.register1:
|
|
and edi, 0xFF
|
|
.register2:
|
|
and edi, 0xFFFF
|
|
.register3:
|
|
and edi, 0xFFFFFF
|
|
.register4:
|
|
|
|
call identify_register
|
|
; ax = register's token ID or UNRECOGNISED_TOKEN_ID
|
|
pop rdi
|
|
|
|
cmp ax, UNRECOGNISED_TOKEN_ID
|
|
je .unrecognised
|
|
|
|
mov dl, 0x00
|
|
ret
|
|
|
|
.unrecognised:
|
|
mov dl, 0xFF
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; evaluate_constant
|
|
;
|
|
; description:
|
|
; takes a constant and returns its hexidecimal representation. Currently the
|
|
; following constants are supported:
|
|
;
|
|
; | type | p. | description |
|
|
; |------|----|--------------|
|
|
; | 0x00 | 0x | hexidecimal |
|
|
; | 0x01 | 0q | octal |
|
|
; | 0x02 | 0b | binary |
|
|
; | 0x03 | " | char |
|
|
; | 0xFF | | unrecognised |
|
|
;
|
|
; where `p.` is the prefix or otherwise indicator
|
|
;
|
|
; parameters:
|
|
; rdi -> first byte of constant
|
|
; rsi = size of constant in bytes
|
|
;
|
|
; returned:
|
|
; rax = value of the constant in hexidecimal
|
|
; dl = type of constant; the rest of rdx is zeroed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
evaluate_constant:
|
|
; rsi = number of bytes left
|
|
; rdi -> current byte of constant
|
|
xor eax, eax ; rax = value of constant
|
|
|
|
; each case pushes the return value of dl into `rcx`, which is popped into dl
|
|
; to return
|
|
|
|
mov dl, [rdi]
|
|
dec rsi ; one fewer byte left
|
|
inc rdi ; point to next byte
|
|
|
|
; all numeric prefixes further handled in .numeric
|
|
cmp dl, '0'
|
|
je .numeric
|
|
|
|
; chr case
|
|
mov rcx, 0x03
|
|
push rcx
|
|
xor ecx, ecx ; rcx = number of times right-rolled
|
|
cmp dl, '"'
|
|
je .chr
|
|
pop rcx
|
|
|
|
jmp .unrecognised
|
|
|
|
.numeric:
|
|
mov dl, [rdi]
|
|
dec rsi ; one fewer byte left
|
|
inc rdi ; point to next byte
|
|
|
|
; hex case
|
|
mov rcx, 0x00
|
|
push rcx
|
|
cmp dl, 'x'
|
|
je .hex_loop
|
|
pop rcx
|
|
|
|
; octal case
|
|
mov rcx, 0x01
|
|
push rcx
|
|
cmp dl, 'q'
|
|
je .oct_loop
|
|
pop rcx
|
|
|
|
; binary case
|
|
mov rcx, 0x02
|
|
push rcx
|
|
cmp dl, 'b'
|
|
je .bin_loop
|
|
pop rcx
|
|
|
|
jmp .unrecognised
|
|
|
|
.hex_loop:
|
|
cmp rsi, 0 ; make sure we're in range
|
|
je .break ; if not, break
|
|
|
|
shl rax, 4 ; make room for next hex digit
|
|
|
|
mov dl, [rdi] ; dl = next byte of constant
|
|
|
|
sub dl, '0' ; dl = if digit: digit; else :shrug:
|
|
|
|
cmp dl, 9 ; if !digit:
|
|
jg .hex_alpha ; letter
|
|
jmp .hex_continue ; else loop
|
|
|
|
.hex_alpha:
|
|
sub dl, 7 ; map [('A'-'0')..('F'-'0')] to [0xA..0xF]
|
|
cmp dl, 0xF ; if not in the range [0xA..0xF]
|
|
jg .unrecognised ; then unrecognised
|
|
|
|
.hex_continue:
|
|
and dl, 0x0F ; mask
|
|
or al, dl ; and add newest nibble
|
|
|
|
dec rsi ; one fewer byte left
|
|
inc rdi ; point to next byte
|
|
jmp .hex_loop ; and loop
|
|
|
|
.oct_loop:
|
|
cmp rsi, 0 ; make sure we're in range
|
|
je .break ; if not, break
|
|
|
|
shl rax, 3 ; make room for next octal digit
|
|
|
|
mov dl, [rdi] ; dl = next byte of constant
|
|
|
|
sub dl, '0'
|
|
cmp dl, 7
|
|
jg .unrecognised
|
|
|
|
and dl, 7 ; mask
|
|
or al, dl ; and add newest 3-bit group
|
|
|
|
dec rsi ; one fewer byte left
|
|
inc rdi ; point to next byte
|
|
jmp .oct_loop ; and loop
|
|
|
|
.bin_loop:
|
|
cmp rsi, 0 ; range check
|
|
je .break
|
|
|
|
shl rax, 1
|
|
|
|
mov dl, [rdi]
|
|
|
|
sub dl, '0'
|
|
cmp dl, 1
|
|
jg .unrecognised
|
|
|
|
and dl, 1 ; mask
|
|
or al, dl ; and newest bit
|
|
|
|
dec rsi
|
|
inc rdi
|
|
jmp .bin_loop
|
|
|
|
.chr:
|
|
cmp rcx, 4 ; ensure char is only 4 bytes long
|
|
jg .unrecognised
|
|
cmp rsi, 1 ; range check
|
|
je .chr_break
|
|
|
|
ror rax, 8
|
|
inc rcx
|
|
|
|
mov dl, [rdi]
|
|
|
|
; bound check byte as printable char
|
|
cmp dl, 0x20
|
|
jl .unrecognised
|
|
cmp dl, 0x7E
|
|
jg .unrecognised
|
|
|
|
or al, dl
|
|
|
|
dec rsi
|
|
inc rdi
|
|
|
|
jmp .chr
|
|
|
|
.chr_break:
|
|
cmp rcx, 1 ; for each [1..rcx]
|
|
jle .chr_break_for_good
|
|
rol rax, 8 ; roll left to make up for the roll right earlier
|
|
dec rcx
|
|
jmp .chr_break
|
|
|
|
.chr_break_for_good:
|
|
mov dl, [rdi] ; make sure the chr is closed
|
|
cmp dl, '"'
|
|
jne .unrecognised
|
|
|
|
jmp .break
|
|
|
|
.break:
|
|
pop rdx
|
|
ret
|
|
|
|
.unrecognised:
|
|
pop rdx
|
|
mov rdx, 0xFF ; unrecognised type
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; identify_register
|
|
;
|
|
; description:
|
|
; takes a register in ascii-encoded text and returns its token ID or
|
|
; UNRECOGNISED_TOKEN_ID if not recognised
|
|
;
|
|
; parameters:
|
|
; edi = register to be searched
|
|
;
|
|
; returned:
|
|
; ax = register's token ID or UNRECOGNISED_TOKEN_ID
|
|
; ------------------------------------------------------------------------------
|
|
|
|
identify_register:
|
|
xor eax, eax ; tokens.registers + eax -> entry in tokens.registers
|
|
.loop:
|
|
cmp eax, (tokens.registers_end - tokens.registers)
|
|
jge .not_found
|
|
|
|
cmp edi, [tokens.registers + eax]
|
|
je .found
|
|
|
|
add eax, 6
|
|
jmp .loop
|
|
.found:
|
|
mov ax, [tokens.registers + eax + 4]
|
|
ret
|
|
.not_found:
|
|
mov ax, UNRECOGNISED_TOKEN_ID
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; identify_operator
|
|
; TODO combine with identify_register
|
|
;
|
|
; description:
|
|
; takes an operator in ascii-encoded text and returns its token ID or
|
|
; UNRECOGNISED_TOKEN_ID if not recognised
|
|
;
|
|
; parameters:
|
|
; edi = operator to be searched
|
|
;
|
|
; returned:
|
|
; ax = operator's token ID or UNRECOGNISED_TOKEN_ID
|
|
; ------------------------------------------------------------------------------
|
|
|
|
identify_operator:
|
|
xor eax, eax ; tokens.operators + eax -> entry in tokens.operators
|
|
.loop:
|
|
cmp eax, (tokens.operators_end - tokens.operators)
|
|
jge .not_found
|
|
|
|
cmp edi, [tokens.operators + eax]
|
|
je .found
|
|
|
|
add eax, 6
|
|
jmp .loop
|
|
.found:
|
|
mov ax, [tokens.operators + eax + 4]
|
|
ret
|
|
.not_found:
|
|
mov ax, UNRECOGNISED_TOKEN_ID
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; utilities
|
|
; ------------------------------------------------------------------------------
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; print
|
|
;
|
|
; description:
|
|
; prints a null-terminated string
|
|
; probably doesn't change any registers for ease of debugging
|
|
;
|
|
; parameters:
|
|
; rsi -> start of null-terminated string
|
|
; ------------------------------------------------------------------------------
|
|
|
|
print:
|
|
push rdx
|
|
push rax
|
|
push rsi
|
|
|
|
mov edx, 0x3F8
|
|
.loop:
|
|
mov al, [rsi]
|
|
test al, al
|
|
jz .done
|
|
out dx, al
|
|
inc rsi
|
|
jmp .loop
|
|
.done:
|
|
pop rsi
|
|
pop rax
|
|
pop rdx
|
|
ret
|
|
.debug:
|
|
push rsi
|
|
mov rsi, .debug_msg
|
|
call print
|
|
pop rsi
|
|
call print
|
|
ret
|
|
.error:
|
|
push rsi
|
|
mov rsi, .error_msg
|
|
call print
|
|
pop rsi
|
|
call print
|
|
ret
|
|
.test:
|
|
push rsi
|
|
mov rsi, .test_msg
|
|
call print
|
|
pop rsi
|
|
call print
|
|
ret
|
|
.warn:
|
|
push rsi
|
|
mov rsi, .warn_msg
|
|
call print
|
|
pop rsi
|
|
call print
|
|
ret
|
|
.debug_msg db "[DEBUG]: ", 0x00
|
|
.error_msg db "[ERROR]: ", 0x00
|
|
.test_msg db "[TEST]: ", 0x00
|
|
.warn_msg db "[WARN]: ", 0x00
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; halt
|
|
;
|
|
; description:
|
|
; halts the program, silly :)
|
|
; ------------------------------------------------------------------------------
|
|
|
|
halt:
|
|
mov rsi, msg_halt
|
|
call print
|
|
hlt
|
|
jmp halt
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; elemb
|
|
;
|
|
; description:
|
|
; checks if given byte is element of the specified list.
|
|
;
|
|
; parameters:
|
|
; rdi = size of list
|
|
; rsi -> start of list
|
|
; dl = given byte
|
|
;
|
|
; returned:
|
|
; rax = 0: is not an element
|
|
; 1: is an element
|
|
; ------------------------------------------------------------------------------
|
|
|
|
elemb:
|
|
.loop:
|
|
cmp rdi, 0 ; check if remaining length 0
|
|
je .not_found ; if so, break; dl not an element of list
|
|
|
|
mov al, [rsi]
|
|
cmp al, dl ; check if current byte in list is the desired byte
|
|
je .found ; if so, break; dl an element of list
|
|
|
|
inc rsi ; move to next byte
|
|
dec rdi ; and reduce remaining length
|
|
|
|
jmp .loop
|
|
|
|
.not_found:
|
|
xor eax, eax ; return 0; dl not an element of list
|
|
ret
|
|
|
|
.found:
|
|
xor eax, eax
|
|
mov rax, 1 ; return 1; dl an element of list
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; djb2
|
|
;
|
|
; description:
|
|
; gets the 64-bit djb2 hash of a given string
|
|
;
|
|
; parameters:
|
|
; rdi = size of string
|
|
; rsi -> start of string
|
|
;
|
|
; returned:
|
|
; rax = hash
|
|
; ------------------------------------------------------------------------------
|
|
|
|
djb2:
|
|
xor ecx, ecx ; rcx = index
|
|
mov rax, 5381 ; rax = hash
|
|
|
|
.loop:
|
|
cmp rcx, rdi
|
|
jge .break
|
|
|
|
mov rdx, rax
|
|
shl rax, 5
|
|
add rax, rdx
|
|
|
|
xor edx, edx
|
|
mov dl, [rsi + rcx] ; dl = current byte
|
|
add rax, rdx
|
|
|
|
inc rcx
|
|
jmp .loop
|
|
|
|
.break:
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; trim_trailing_whitespace
|
|
;
|
|
; description:
|
|
; trims whitespace from the start and end of the given byte array.
|
|
;
|
|
; parameters:
|
|
; rdi = size of list
|
|
; rsi -> start of list
|
|
;
|
|
; returned:
|
|
; rax = new size of list
|
|
; ------------------------------------------------------------------------------
|
|
|
|
trim_trailing_whitespace:
|
|
cmp rdi, 0 ; list of length zero
|
|
je .done ; already trimmed
|
|
|
|
push rdi
|
|
push rsi
|
|
|
|
mov dl, [rsi + rdi - 1] ; last element of given list
|
|
mov rsi, whitespace_2 ; pointer of whitespace list
|
|
mov rdi, 2 ; length of whitespace list
|
|
call elemb
|
|
|
|
pop rsi ; rsi -> start of list
|
|
pop rdi ; rdi = size of list
|
|
|
|
cmp al, 0 ; if last element whitespace
|
|
je .done ; then break
|
|
|
|
.trim: ; otherwise one shorter
|
|
dec rdi
|
|
call trim_trailing_whitespace
|
|
ret
|
|
|
|
.done:
|
|
mov rax, rdi
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; clear_token_table
|
|
;
|
|
; description:
|
|
; clears the token table as specified by TOKEN_TABLE_SIZE and TOKEN_TABLE_ADDR
|
|
; ------------------------------------------------------------------------------
|
|
|
|
clear_token_table:
|
|
xor eax, eax ; value to write
|
|
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
|
mov rdi, TOKEN_TABLE_ADDR ; address to start
|
|
rep stosd
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; clear_test_arena
|
|
;
|
|
; description:
|
|
; clears the test arena as specified by TEST_ARENA_SIZE and TEST_ARENA_ADDR
|
|
; ------------------------------------------------------------------------------
|
|
|
|
clear_test_arena:
|
|
xor eax, eax ; value to write
|
|
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
|
mov rdi, TOKEN_TABLE_ADDR ; address to start
|
|
rep stosd
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; clear_output_arena
|
|
;
|
|
; description:
|
|
; clears the output arena as specified by OUTPUT_SIZE and OUTPUT_ADDR
|
|
; ------------------------------------------------------------------------------
|
|
|
|
clear_output_arena:
|
|
xor eax, eax ; value to write
|
|
mov rcx, OUTPUT_SIZE / 4 ; number of double words
|
|
mov rdi, OUTPUT_ADDR ; address to start
|
|
rep stosd
|
|
ret
|
|
|
|
%include "asm/tests.asm"
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; data
|
|
; ------------------------------------------------------------------------------
|
|
|
|
tokens:
|
|
.by_id:
|
|
dw 0x0010 ; eax
|
|
db 0x02 ; type: register
|
|
db 00000010b ; reg: 000b
|
|
; width: 10b (32 bits)
|
|
|
|
dw 0x0000 ; rax
|
|
db 0x02 ; type: register
|
|
db 00000011b ; reg: 000b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0003 ; rdx
|
|
db 0x02 ; type: register
|
|
db 00001011b ; reg: 010b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0053 ; xor
|
|
db 0x01 ; type: operator
|
|
db 0x02 ; # operands
|
|
|
|
dw 0x0054 ; inc
|
|
db 0x01 ; type: operator
|
|
db 0x01 ; # operands
|
|
|
|
dw 0x0056 ; mov
|
|
db 0x01 ; type: operator
|
|
db 0x02 ; # operands
|
|
|
|
dw 0x004F ; hlt
|
|
db 0x01 ; type: operator
|
|
db 0x00 ; # operands
|
|
.by_id_end:
|
|
.operators:
|
|
dd "je"
|
|
dw 0x005C
|
|
dd "jg"
|
|
dw 0x005F
|
|
dd "jl"
|
|
dw 0x0061
|
|
dd "hlt"
|
|
dw 0x004F
|
|
dd "xor"
|
|
dw 0x0053
|
|
dd "inc"
|
|
dw 0x0054
|
|
dd "dec"
|
|
dw 0x0055
|
|
dd "mov"
|
|
dw 0x0056
|
|
dd "add"
|
|
dw 0x0057
|
|
dd "sub"
|
|
dw 0x0058
|
|
dd "ret"
|
|
dw 0x005A
|
|
dd "cmp"
|
|
dw 0x005B
|
|
dd "jne"
|
|
dw 0x005D
|
|
dd "jge"
|
|
dw 0x005E
|
|
dd "jle"
|
|
dw 0x0060
|
|
dd "int3"
|
|
dw 0x0050
|
|
dd "call"
|
|
dw 0x0059
|
|
.operators_end:
|
|
.registers:
|
|
dd "r8"
|
|
dw 0x0008
|
|
dd "r9"
|
|
dw 0x0009
|
|
dd "ax"
|
|
dw 0x0020
|
|
dd "bx"
|
|
dw 0x0021
|
|
dd "cx"
|
|
dw 0x0022
|
|
dd "dx"
|
|
dw 0x0023
|
|
dd "si"
|
|
dw 0x0024
|
|
dd "di"
|
|
dw 0x0025
|
|
dd "sp"
|
|
dw 0x0026
|
|
dd "bp"
|
|
dw 0x0027
|
|
dd "al"
|
|
dw 0x0030
|
|
dd "bl"
|
|
dw 0x0031
|
|
dd "cl"
|
|
dw 0x0032
|
|
dd "dl"
|
|
dw 0x0033
|
|
dd "ah"
|
|
dw 0x0040
|
|
dd "bh"
|
|
dw 0x0041
|
|
dd "ch"
|
|
dw 0x0042
|
|
dd "dh"
|
|
dw 0x0043
|
|
dd "cs"
|
|
dw 0x0044
|
|
dd "ds"
|
|
dw 0x0045
|
|
dd "es"
|
|
dw 0x0046
|
|
dd "fs"
|
|
dw 0x0047
|
|
dd "gs"
|
|
dw 0x0048
|
|
dd "ss"
|
|
dw 0x0049
|
|
dd "rax"
|
|
dw 0x0000
|
|
dd "rbx"
|
|
dw 0x0001
|
|
dd "rcx"
|
|
dw 0x0002
|
|
dd "rdx"
|
|
dw 0x0003
|
|
dd "rsi"
|
|
dw 0x0004
|
|
dd "rdi"
|
|
dw 0x0005
|
|
dd "rsp"
|
|
dw 0x0006
|
|
dd "rbp"
|
|
dw 0x0007
|
|
dd "r10"
|
|
dw 0x000A
|
|
dd "r11"
|
|
dw 0x000B
|
|
dd "r12"
|
|
dw 0x000C
|
|
dd "r13"
|
|
dw 0x000D
|
|
dd "r14"
|
|
dw 0x000E
|
|
dd "r15"
|
|
dw 0x000F
|
|
dd "eax"
|
|
dw 0x0010
|
|
dd "ebx"
|
|
dw 0x0011
|
|
dd "ecx"
|
|
dw 0x0012
|
|
dd "edx"
|
|
dw 0x0013
|
|
dd "esi"
|
|
dw 0x0014
|
|
dd "edi"
|
|
dw 0x0015
|
|
dd "esp"
|
|
dw 0x0016
|
|
dd "ebp"
|
|
dw 0x0017
|
|
dd "r8d"
|
|
dw 0x0018
|
|
dd "r9d"
|
|
dw 0x0019
|
|
dd "r8w"
|
|
dw 0x0028
|
|
dd "r9w"
|
|
dw 0x0029
|
|
dd "sil"
|
|
dw 0x0034
|
|
dd "dil"
|
|
dw 0x0035
|
|
dd "spl"
|
|
dw 0x0036
|
|
dd "bpl"
|
|
dw 0x0037
|
|
dd "r8b"
|
|
dw 0x0038
|
|
dd "r9b"
|
|
dw 0x0039
|
|
dd "cr0"
|
|
dw 0x004A
|
|
dd "cr2"
|
|
dw 0x004B
|
|
dd "cr3"
|
|
dw 0x004C
|
|
dd "cr4"
|
|
dw 0x004D
|
|
dd "cr8"
|
|
dw 0x004E
|
|
dd "r10d"
|
|
dw 0x001A
|
|
dd "r11d"
|
|
dw 0x001B
|
|
dd "r12d"
|
|
dw 0x001C
|
|
dd "r13d"
|
|
dw 0x001D
|
|
dd "r14d"
|
|
dw 0x001E
|
|
dd "r15d"
|
|
dw 0x001F
|
|
dd "r10w"
|
|
dw 0x002A
|
|
dd "r11w"
|
|
dw 0x002B
|
|
dd "r12w"
|
|
dw 0x002C
|
|
dd "r13w"
|
|
dw 0x002D
|
|
dd "r14w"
|
|
dw 0x002E
|
|
dd "r15w"
|
|
dw 0x002F
|
|
dd "r10b"
|
|
dw 0x003A
|
|
dd "r11b"
|
|
dw 0x003B
|
|
dd "r12b"
|
|
dw 0x003C
|
|
dd "r13b"
|
|
dw 0x003D
|
|
dd "r14b"
|
|
dw 0x003E
|
|
dd "r15b"
|
|
dw 0x003F
|
|
.registers_end:
|
|
|
|
opcodes:
|
|
.by_id:
|
|
dw 0x0053 ; xor
|
|
db 0x31 ; r/m <- reg
|
|
db 0x33 ; reg <- r/m
|
|
|
|
dw 0x0054 ; inc
|
|
db 0xFF ; r/m
|
|
db 0x00 ;
|
|
|
|
dw 0x0056 ; mov
|
|
db 0x89 ; r/m <- reg
|
|
db 0x8B ; reg <- r/m
|
|
|
|
dw 0x004F ; hlt
|
|
db 0xF4 ; .
|
|
db 0x00 ;
|
|
.by_id_end:
|
|
|
|
msg_welcome db "Welcome to Twasm", 0x0A, 0x00
|
|
msg_halt db "halted.", 0x0A, 0x00
|
|
|
|
whitespace_2 db " ", 0x0D
|
|
|
|
; test program
|
|
program:
|
|
db "xor eax, eax", 0x0A
|
|
db "inc rax ; inline comment", 0x0A
|
|
db "; one line comment", 0x0A
|
|
db "mov rdx, [rax]", 0x0A
|
|
db "mov [rax], rdx", 0x0A
|
|
db "hlt", 0x0A
|
|
.size dq $ - program
|
|
|
|
msg_end db "end of the binary ->|", 0x0A, 0x00
|