2720 lines
63 KiB
NASM
2720 lines
63 KiB
NASM
; TODO actually enforce any of these *_SIZE constants :p
|
|
|
|
LOAD_ADDR equ 0x00010000 ; address this program is loaded at
|
|
|
|
STACK_ADDR equ 0x00030000 ; address to put the 64-bit stack at
|
|
|
|
AWAITING_LABEL_TABLE_ADDR equ 0x00030000 ; address to store pending labels at
|
|
AWAITING_LABEL_TABLE_SIZE equ 0x00010000
|
|
LABEL_TABLE_ADDR equ 0x00040000 ; address to store labels at
|
|
LABEL_TABLE_SIZE equ 0x00010000
|
|
|
|
TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at
|
|
TEST_ARENA_SIZE equ 0x00010000 ; maximum size tests can use
|
|
|
|
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
|
|
TOKEN_TABLE_SIZE equ 0x00010000 ; max length of table
|
|
|
|
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
|
|
OUTPUT_SIZE equ 0x00010000 ; max length of outputed binary
|
|
|
|
UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token
|
|
UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id
|
|
UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id
|
|
UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP)
|
|
|
|
TEST_LINE_LENGTH equ 80 ; right border of test suite results
|
|
|
|
; flags for expected values in tokeniser
|
|
E_COMMENT equ 1 << 0
|
|
E_NEWLINE equ 1 << 1
|
|
E_WHITESPACE equ 1 << 2
|
|
E_COMMA equ 1 << 3
|
|
E_OPERATOR equ 1 << 4
|
|
E_OPERAND equ 1 << 5
|
|
E_LABEL equ 1 << 6
|
|
|
|
[bits 64]
|
|
[org LOAD_ADDR]
|
|
[default abs] ; TODO see if I actually need to do this
|
|
; afaik absolute addressing is not harmful on bare metal
|
|
; reasoning: stops annoying warning =D
|
|
|
|
start:
|
|
mov rsp, STACK_ADDR ; we might need more stack space, let's just be safe
|
|
|
|
mov rsi, msg_welcome
|
|
call print
|
|
|
|
call run_tests
|
|
|
|
call clear_token_table
|
|
call clear_label_tables
|
|
|
|
mov rdi, program ; -> program
|
|
mov rsi, [program.size] ; = size of program
|
|
call tokenise
|
|
; rax = number of tokens in token table
|
|
mov rdi, rax
|
|
|
|
push rdi
|
|
|
|
call clear_output_arena
|
|
|
|
pop rdi ; rdi = number of tokens in token table
|
|
call assemble
|
|
|
|
jmp halt
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; assembling
|
|
; ------------------------------------------------------------------------------
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; assemble
|
|
;
|
|
; description:
|
|
; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat
|
|
; binary located at OUTPUT_ADDR.
|
|
; Behaviour is undefined when:
|
|
; - tokens are in an impossible order
|
|
; 0x1000 ; memory address, following byte should be a register
|
|
; 0x1000 ; not a register
|
|
; - operator tokens followed by the wrong number of arguments
|
|
; TODO enforce this in `tokenise`
|
|
; 0x004F ; hlt, expects 0 arguments
|
|
; 0x0000 ; rax, an argument
|
|
; - an undefined token is included, like 0x0051
|
|
;
|
|
; parameters:
|
|
; rdi = number of tokens in the token table
|
|
; ------------------------------------------------------------------------------
|
|
|
|
assemble:
|
|
; TODO deal with src=imm and src=imm8
|
|
xor eax, eax
|
|
mov [.tokens_processed], eax ; eax = number of tokens processed
|
|
mov [.tokens_total], edi ; edi = total number of tokens in table
|
|
|
|
.loop:
|
|
call .flush_write_buffer
|
|
call .get_next_tte
|
|
; di = tte
|
|
call get_tte_type
|
|
; al = type
|
|
|
|
cmp al, 0x1 ; check if next tte is an operator
|
|
je .operator ; if so, handle
|
|
|
|
jmp .unexpected_token ; otherwise, fail
|
|
|
|
.operator: ; if next tte's type is an operator:
|
|
; di = tte of operator
|
|
call get_tte_typed_metadata
|
|
; al = tte typed metadata
|
|
|
|
cmp al, UNRECOGNISED_ID_METADATA ; make sure token has metadata on record
|
|
je .unexpected_token ; if not, fail
|
|
|
|
and al, 11b ; mask for # operands
|
|
|
|
cmp al, 0 ; check if operator has no operands
|
|
je .operator_0 ; if so, handle case of no operands
|
|
|
|
cmp al, 1 ; check if operator has one operand
|
|
je .operator_1 ; if so, handle case of one operand
|
|
|
|
cmp al, 2 ; check if operator has two operands
|
|
je .operator_2 ; if so, handle case of two operands
|
|
|
|
jmp .unexpected_token
|
|
.operator_0:
|
|
mov rsi, .msg_operator_0
|
|
call print.debug
|
|
|
|
; di = tte of operator
|
|
mov sil, 0b ; opcode
|
|
call get_opcode
|
|
; al = opcode
|
|
; dl = 0x00
|
|
call .write_byte
|
|
|
|
jmp .loop_next_token
|
|
.operator_1:
|
|
mov rsi, .msg_operator_1
|
|
call print.debug
|
|
|
|
; di = tte of operator
|
|
mov sil, 0b ; dst=r/m
|
|
call get_opcode
|
|
; al = opcode
|
|
; dl = op flag
|
|
push rdx
|
|
call .write_byte
|
|
pop rdx ; dl = op flag
|
|
|
|
call .next_token
|
|
jge .break
|
|
call .get_next_tte
|
|
|
|
push rdi
|
|
and di, 0xFF00
|
|
cmp di, 0x1000 ; check if token is a memory address
|
|
pop rdi ; di = next tte
|
|
je .operator_1_memory
|
|
|
|
push rdx
|
|
; di = next tte
|
|
call get_tte_type
|
|
; al = type of token
|
|
pop rdx ; dl = op flag
|
|
|
|
cmp al, 0x02 ; type: register
|
|
je .operator_1_register
|
|
|
|
jmp .unexpected_token
|
|
.operator_1_memory:
|
|
mov rsi, .msg_operator_1_memory
|
|
call print.debug
|
|
jmp .unsupported_memory_access
|
|
.operator_1_register:
|
|
mov rsi, .msg_operator_1_register
|
|
call print.debug
|
|
|
|
; di = token table entry
|
|
call get_tte_typed_metadata
|
|
; al = register typed metadata
|
|
|
|
and al, 11b ; al = register width
|
|
|
|
cmp al, 00b ; 8 bit
|
|
je .unexpected_token ; TODO handle 8 bit opcodes
|
|
|
|
cmp al, 10b ; 32 bit
|
|
je .operator_1_register_no_prefix ; default register length; no prefix
|
|
|
|
cmp al, 01b ; 16 bit
|
|
je .operator_1_register_16
|
|
|
|
cmp al, 11b ; 64 bit
|
|
je .operator_1_register_64
|
|
.operator_1_register_16:
|
|
mov al, 0x66
|
|
call .push_byte
|
|
jmp .operator_1_register_no_prefix
|
|
.operator_1_register_64:
|
|
mov al, 0x48
|
|
call .push_byte
|
|
jmp .operator_1_register_no_prefix
|
|
.operator_1_register_no_prefix:
|
|
mov si, di ; si = `R/M` tte
|
|
and edx, 0xFF
|
|
or edx, 0xFE00 ; pass di as direct value
|
|
mov edi, edx ; di = op flag
|
|
mov edx, 11b ; dl = mod bits
|
|
call get_ModRM
|
|
; al = Mod R/M byte
|
|
call .write_byte
|
|
|
|
jmp .loop_next_token
|
|
.operator_2:
|
|
mov rsi, .msg_operator_2
|
|
call print.debug
|
|
|
|
mov cx, di ; cx = tte of operator
|
|
|
|
call .next_token
|
|
jge .break
|
|
call .get_next_tte
|
|
|
|
push rdi
|
|
and di, 0xFF00
|
|
cmp di, 0x1000 ; check if token is a memory address
|
|
pop rdi ; di = next tte
|
|
je .operator_2_memory
|
|
|
|
push rcx
|
|
; di = next tte
|
|
call get_tte_type
|
|
; al = type of token
|
|
pop rcx ; cx = tte of operator
|
|
|
|
cmp al, 0x02 ; type: register
|
|
je .operator_2_register
|
|
|
|
jmp .unexpected_token
|
|
.operator_2_memory:
|
|
mov rsi, .msg_operator_2_memory
|
|
call print.debug
|
|
|
|
cmp di, 0x1000 ; check if token is addressing a register
|
|
jne .unsupported_memory_access ; if not, unsupported
|
|
|
|
mov edi, ecx ; di = tte of operator
|
|
xor esi, esi ; dst=r/m; src=r
|
|
call get_opcode
|
|
; al = opcode
|
|
; dl = op flag
|
|
call .write_byte
|
|
|
|
call .next_token
|
|
jge .break
|
|
call .get_next_tte
|
|
; di = tte
|
|
call get_tte_typed_metadata
|
|
|
|
and al, 11b ; al = register width
|
|
|
|
cmp al, 10b ; 32 bit
|
|
je .operator_2_memory_32
|
|
|
|
cmp al, 11b ; 64 bit
|
|
je .operator_2_memory_continue
|
|
|
|
; other cases: 16 bit, 8 bit both are not valid for addressing
|
|
jmp .size_mismatch
|
|
.operator_2_memory_32:
|
|
mov al, 0x67
|
|
call .push_byte
|
|
jmp .operator_2_memory_continue
|
|
.operator_2_memory_continue:
|
|
mov si, di ; si = dst register tte
|
|
|
|
call .next_token
|
|
jge .break
|
|
call .get_next_tte
|
|
|
|
push rdi
|
|
and di, 0xFF00
|
|
cmp di, 0x1000 ; check if token is a memory address
|
|
pop rdi ; di = next tte
|
|
je .unsupported_memory_access ; if so, fail; no case of *],[* in asm
|
|
|
|
; di = next tte
|
|
call get_tte_type
|
|
; al = type of token
|
|
|
|
cmp al, 0x02 ; check if token is a register
|
|
je .operator_2_memory_register ; if so, handle
|
|
|
|
jmp .unexpected_token
|
|
.operator_2_memory_register:
|
|
push rsi
|
|
mov rsi, .msg_operator_2_memory_register
|
|
call print.debug
|
|
pop rsi ; si = r/m
|
|
|
|
; di = src tte
|
|
call get_tte_typed_metadata
|
|
; al = register typed metadata
|
|
|
|
and al, 11b ; al = register width
|
|
|
|
cmp al, 00b ; 8 bit
|
|
je .unexpected_token ; TODO handle 8 bit opcodes
|
|
|
|
cmp al, 01b ; 16 bit
|
|
je .operator_2_memory_register_16
|
|
|
|
cmp al, 10b ; 32 bit
|
|
je .operator_2_memory_register_continue ; default register length; no prefix
|
|
|
|
cmp al, 11b ; 64 bit
|
|
je .operator_2_memory_register_64
|
|
.operator_2_memory_register_16:
|
|
mov al, 0x66
|
|
call .push_byte
|
|
jmp .operator_2_memory_register_continue
|
|
.operator_2_memory_register_64:
|
|
mov al, 0x48
|
|
call .push_byte
|
|
jmp .operator_2_memory_register_continue
|
|
.operator_2_memory_register_continue:
|
|
; si = r/m; dst tte
|
|
; di = reg; src tte
|
|
mov dl, 00b ; dl = mod bits
|
|
call get_ModRM
|
|
; al = Mod R/M byte
|
|
call .write_byte
|
|
|
|
jmp .loop_next_token
|
|
.operator_2_register:
|
|
mov rsi, .msg_operator_2_register
|
|
call print.debug
|
|
|
|
push rdi
|
|
mov di, cx ; di = tte of operator
|
|
mov sil, 1 ; dst = reg
|
|
call get_opcode
|
|
; al = opcode
|
|
; dl = op flag
|
|
; TODO do something if the op flag is present
|
|
call .write_byte
|
|
pop rdi ; di = dst tte
|
|
|
|
; di = token table entry
|
|
call get_tte_typed_metadata
|
|
; al = register typed metadata
|
|
|
|
and al, 11b ; al = register width
|
|
|
|
cmp al, 00b ; 8 bit
|
|
je .unexpected_token ; TODO handle 8 bit opcodes
|
|
|
|
cmp al, 01b ; 16 bit
|
|
je .operator_2_register_16
|
|
|
|
cmp al, 10b ; 32 bit
|
|
je .operator_2_register_continue ; default register length; no prefix
|
|
|
|
cmp al, 11b ; 64 bit
|
|
je .operator_2_register_64
|
|
.operator_2_register_16:
|
|
mov al, 0x66
|
|
call .push_byte
|
|
jmp .operator_2_register_continue
|
|
.operator_2_register_64:
|
|
mov al, 0x48
|
|
call .push_byte
|
|
jmp .operator_2_register_continue
|
|
.operator_2_register_continue:
|
|
mov si, di ; si = dst tte
|
|
|
|
call .next_token
|
|
jge .break
|
|
call .get_next_tte
|
|
|
|
push rdi
|
|
and di, 0xFF00
|
|
cmp di, 0x1000 ; check if token is a memory address
|
|
pop rdi ; di = next tte
|
|
je .operator_2_register_memory
|
|
|
|
; di = next tte
|
|
call get_tte_type
|
|
; al = type of token
|
|
|
|
cmp al, 0x02 ; check if token is a register
|
|
je .operator_2_register_register ; if so, handle
|
|
|
|
jmp .unexpected_token
|
|
.operator_2_register_memory:
|
|
push rsi
|
|
mov rsi, .msg_operator_2_register_memory
|
|
call print.debug
|
|
pop rsi ; si = dst tte
|
|
|
|
cmp di, 0x1000 ; check if token is addressing to a register
|
|
jne .unsupported_memory_access ; if not, unsupported
|
|
|
|
call .next_token
|
|
jge .break
|
|
call .get_next_tte
|
|
; di = tte
|
|
call get_tte_typed_metadata
|
|
; al = register typed metadata
|
|
|
|
and al, 11b ; al = register width
|
|
|
|
cmp al, 10b ; 32 bit
|
|
je .operator_2_register_memory_32
|
|
|
|
cmp al, 11b ; 64 bit
|
|
je .operator_2_register_memory_continue ; default addr length; no prefix
|
|
|
|
; other cases: 16 bit, 8 bit both are not valid for addressing
|
|
jmp .size_mismatch
|
|
.operator_2_register_memory_32:
|
|
mov al, 0x67
|
|
call .push_byte
|
|
jmp .operator_2_register_memory_continue
|
|
.operator_2_register_memory_continue:
|
|
; si = reg; dst tte
|
|
; di = r/m; src tte
|
|
push rsi
|
|
mov esi, edi ; si = reg; src tte
|
|
pop rdi ; di = r/m; dst tte
|
|
mov edx, 00b ; dl = mod bits
|
|
call get_ModRM
|
|
; al = Mod R/M byte
|
|
call .write_byte
|
|
|
|
jmp .loop_next_token
|
|
.operator_2_register_register:
|
|
push rsi
|
|
mov rsi, .msg_operator_2_register_register
|
|
call print.debug
|
|
pop rsi ; si = dst tte
|
|
|
|
; di = tte
|
|
call get_tte_typed_metadata
|
|
; al = register typed metadata
|
|
and al, 11b ; al = register width
|
|
|
|
cmp al, 00b ; 8 bit
|
|
je .unexpected_token ; TODO handle 8 bit opcodes
|
|
|
|
cmp al, 01b ; 16 bit
|
|
je .operator_2_register_register_16
|
|
|
|
cmp al, 10b ; 32 bit
|
|
je .operator_2_register_register_32
|
|
|
|
cmp al, 11b ; 64 bit
|
|
je .operator_2_register_register_64
|
|
.operator_2_register_register_16:
|
|
; 16 bit
|
|
push rdi
|
|
push rsi
|
|
mov edi, .buffer_end - .buffer ; length of buffer
|
|
mov rsi, .buffer ; buffer location
|
|
mov dl, 0x66
|
|
call elemb
|
|
pop rsi ; si = src tte
|
|
pop rdi ; di = dst tte
|
|
cmp al, 1
|
|
jne .size_mismatch
|
|
|
|
; 64 bit
|
|
push rdi
|
|
push rsi
|
|
mov edi, .buffer_end - .buffer ; length of buffer
|
|
mov rsi, .buffer ; buffer location
|
|
mov dl, 0x48
|
|
call elemb
|
|
pop rsi ; si = src tte
|
|
pop rdi ; di = dst tte
|
|
cmp al, 0
|
|
jne .size_mismatch
|
|
|
|
jmp .operator_2_register_register_continue
|
|
.operator_2_register_register_32:
|
|
; 16 bit
|
|
push rdi
|
|
push rsi
|
|
mov edi, .buffer_end - .buffer ; length of buffer
|
|
mov rsi, .buffer ; buffer location
|
|
mov dl, 0x66
|
|
call elemb
|
|
pop rsi ; si = src tte
|
|
pop rdi ; di = dst tte
|
|
cmp al, 0
|
|
jne .size_mismatch
|
|
|
|
; 64 bit
|
|
push rdi
|
|
push rsi
|
|
mov edi, .buffer_end - .buffer ; length of buffer
|
|
mov rsi, .buffer ; buffer location
|
|
mov dl, 0x48
|
|
call elemb
|
|
pop rsi ; si = src tte
|
|
pop rdi ; di = dst tte
|
|
cmp al, 0
|
|
jne .size_mismatch
|
|
|
|
jmp .operator_2_register_register_continue
|
|
.operator_2_register_register_64:
|
|
; 16 bit
|
|
push rdi
|
|
push rsi
|
|
mov edi, .buffer_end - .buffer ; length of buffer
|
|
mov rsi, .buffer ; buffer location
|
|
mov dl, 0x66
|
|
call elemb
|
|
pop rsi ; si = src tte
|
|
pop rdi ; di = dst tte
|
|
cmp al, 0
|
|
jne .size_mismatch
|
|
|
|
; 64 bit
|
|
push rdi
|
|
push rsi
|
|
mov edi, .buffer_end - .buffer ; length of buffer
|
|
mov rsi, .buffer ; buffer location
|
|
mov dl, 0x48
|
|
call elemb
|
|
pop rsi ; si = src tte
|
|
pop rdi ; di = dst tte
|
|
cmp al, 1
|
|
jne .size_mismatch
|
|
|
|
jmp .operator_2_register_register_continue
|
|
.operator_2_register_register_continue:
|
|
push rsi
|
|
mov esi, edi ; si = reg; src tte
|
|
pop rdi ; di = r/m; dst tte
|
|
mov edx, 11b ; dl = mod bits
|
|
call get_ModRM
|
|
; al = Mod R/M byte
|
|
call .write_byte
|
|
|
|
jmp .loop_next_token
|
|
|
|
.loop_next_token:
|
|
call .next_token
|
|
jge .break
|
|
jmp .loop
|
|
|
|
.break:
|
|
call .flush_write_buffer
|
|
ret
|
|
|
|
.unexpected_token:
|
|
mov rsi, .msg_unexpected_token
|
|
call print.error
|
|
jmp halt
|
|
|
|
.unsupported_memory_access:
|
|
mov rsi, .msg_unsupported_memory_access
|
|
call print.error
|
|
jmp halt
|
|
|
|
.size_mismatch:
|
|
mov rsi, .msg_size_mismatch
|
|
call print.error
|
|
jmp halt
|
|
|
|
; procedures
|
|
|
|
; add the line `jge .break` after call site
|
|
.next_token:
|
|
mov eax, [.tokens_processed]
|
|
mov edi, [.tokens_total]
|
|
inc eax
|
|
mov [.tokens_processed], eax
|
|
cmp eax, edi
|
|
ret
|
|
|
|
; eax = current entry index in token table
|
|
; returns di = next tte
|
|
.get_next_tte:
|
|
xor edi, edi
|
|
mov di, [eax * 2 + TOKEN_TABLE_ADDR]
|
|
ret
|
|
|
|
.tokens_processed dd 0
|
|
.tokens_total dd 0
|
|
|
|
; al = byte to write
|
|
.output_byte:
|
|
push rdx
|
|
mov edx, [.next_output_byte] ; get output byte's address
|
|
mov [edx], al ; write byte to that address
|
|
inc edx ; increment address
|
|
mov [.next_output_byte], edx ; put output byte's address
|
|
pop rdx
|
|
ret
|
|
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
|
|
; TODO get rid of this sketchy bit of state
|
|
|
|
; al = byte to push
|
|
.push_byte:
|
|
push rcx
|
|
mov ecx, [.buffer_pointer]
|
|
push rcx
|
|
push rax
|
|
mov ecx, .buffer_end
|
|
mov [.buffer_pointer], ecx
|
|
.push_byte_loop:
|
|
dec ecx
|
|
cmp ecx, .buffer
|
|
jl .push_byte_break
|
|
mov al, [ecx]
|
|
mov [ecx + 1], al
|
|
jmp .push_byte_loop
|
|
.push_byte_break:
|
|
pop rax ; al = byte to push
|
|
mov [.buffer], al ; write desired byte to front of buffer
|
|
pop rcx ; ecx = old buffer pointer
|
|
inc ecx ; ecx = pointer to next empty in buffer
|
|
mov [.buffer_pointer], ecx ; record write .buffer_pointer
|
|
pop rcx
|
|
ret
|
|
|
|
; al = byte to write
|
|
.write_byte:
|
|
push rdx
|
|
mov edx, [.buffer_pointer]
|
|
mov [edx], al
|
|
inc edx
|
|
mov [.buffer_pointer], edx
|
|
pop rdx
|
|
ret
|
|
|
|
.flush_write_buffer:
|
|
push rcx
|
|
push rax
|
|
mov ecx, .buffer
|
|
dec ecx
|
|
.flush_write_buffer_loop:
|
|
inc ecx
|
|
cmp ecx, [.buffer_pointer]
|
|
jge .flush_write_buffer_break
|
|
mov al, [ecx]
|
|
call .output_byte
|
|
mov byte [ecx], 0x00
|
|
jmp .flush_write_buffer_loop
|
|
.flush_write_buffer_break:
|
|
mov dword [.buffer_pointer], .buffer
|
|
pop rax
|
|
pop rcx
|
|
ret
|
|
.buffer dq 0, 0 ; octo word of space for max of 8 bytes per write
|
|
.buffer_end:
|
|
.buffer_pointer dd .buffer ; points to current byte in buffer
|
|
|
|
.msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00
|
|
.msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00
|
|
.msg_size_mismatch db "size mismatch, aborting", 0x0A, 0x00
|
|
.msg_operator_0 db "operator_0", 0x0A, 0x00
|
|
.msg_operator_1 db "operator_1", 0x0A, 0x00
|
|
.msg_operator_1_memory db "operator_1_memory", 0x0A, 0x00
|
|
.msg_operator_1_register db "operator_1_register", 0x0A, 0x00
|
|
.msg_operator_2 db "operator_2", 0x0A, 0x00
|
|
.msg_operator_2_memory db "operator_2_memory", 0x0A, 0x00
|
|
.msg_operator_2_memory_register db "operator_2_memory_register", 0x0A, 0x00
|
|
.msg_operator_2_register db "operator_2_register", 0x0A, 0x00
|
|
.msg_operator_2_register_memory db "operator_2_register_memory", 0x0A, 0x00
|
|
.msg_operator_2_register_register db "operator_2_register_register", 0x0A, 0x00
|
|
.msg_potential_label db "potential_label", 0x0A, 0x00
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_tte_type
|
|
;
|
|
; description:
|
|
; given a token table entry, returns the declared type in `tokens.by_id`. If
|
|
; there is no entry, returns UNRECOGNISED_ID_TYPE
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
;
|
|
; returned:
|
|
; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are
|
|
; zeroed; the rest of rax is zeroed.
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_tte_type:
|
|
and edi, 0xFFFF ; di = token table entry
|
|
xor eax, eax ; eax = tokens.by_id index
|
|
|
|
.loop:
|
|
cmp eax, (tokens.by_id_end - tokens.by_id) / 4 ; index range check
|
|
jg .not_found
|
|
|
|
mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id
|
|
|
|
cmp cx, di
|
|
je .found
|
|
|
|
inc eax
|
|
jmp .loop
|
|
.not_found:
|
|
mov eax, UNRECOGNISED_ID_TYPE
|
|
and eax, 0xF
|
|
ret
|
|
.found:
|
|
mov al, [2 + tokens.by_id + eax * 4]
|
|
and eax, 0xF ; mask as expected
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_tte_typed_metadata
|
|
;
|
|
; description:
|
|
; given a token table entry, returns the declared typed metadata in
|
|
; `tokens.by_id`. If there is no entry, returns UNRECOGNISED_ID_METADATA
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
;
|
|
; returned:
|
|
; al = typed metadata of token, or UNRECOGNISED_ID_METADATA; the rest of rax is
|
|
; zeroed.
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_tte_typed_metadata:
|
|
and edi, 0xFFFF ; di = token table entry
|
|
xor eax, eax ; eax = tokens.by_id index
|
|
|
|
.loop:
|
|
cmp eax, (tokens.by_id_end - tokens.by_id) / 4 ; index range check
|
|
jg .not_found
|
|
|
|
mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id
|
|
|
|
cmp cx, di
|
|
je .found
|
|
|
|
inc eax
|
|
jmp .loop
|
|
.not_found:
|
|
mov eax, UNRECOGNISED_ID_METADATA
|
|
ret
|
|
.found:
|
|
mov al, [3 + tokens.by_id + eax * 4]
|
|
and eax, 0xFF
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_ModRM
|
|
;
|
|
; description:
|
|
; given 2 register tokens and the mod bits, returns the ModR/M byte
|
|
;
|
|
; parameters:
|
|
; di = token table entry `reg`. 0xFEXX passes low 3 bytes as op flag
|
|
; si = token table entry `R/M`
|
|
; dl = lower 2 bits: mod bits. The rest is ignored
|
|
;
|
|
; returned:
|
|
; al = ModR/M byte; the rest of rax is zeroed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_ModRM:
|
|
push rbx
|
|
|
|
and edi, 0xFFFF ; di = token table entry `reg`
|
|
and esi, 0xFFFF ; si = token table entry `R/M`
|
|
|
|
and edx, 11b ; dl = mod bits
|
|
shl edx, 6 ; and position
|
|
|
|
push rdi
|
|
shr edi, 8
|
|
cmp dil, 0xFE
|
|
pop rdi
|
|
je .pass_di_as_op_flag
|
|
|
|
; di = tte
|
|
call get_reg_bits
|
|
; al = reg bits
|
|
|
|
mov bl, al ; bl = reg bits
|
|
jmp .continue
|
|
|
|
.pass_di_as_op_flag:
|
|
mov ebx, edi ; bl = op flag
|
|
and ebx, 111b
|
|
|
|
.continue:
|
|
shl ebx, 3
|
|
|
|
mov edi, esi ; do the other one
|
|
|
|
; di = tte
|
|
call get_reg_bits
|
|
; al = reg bits
|
|
|
|
mov ecx, eax ; cl = r/m bits
|
|
|
|
xor eax, eax
|
|
or eax, edx ; mod bits
|
|
or eax, ebx ; reg bits
|
|
or eax, ecx ; R/M bits
|
|
and eax, 0xFF ; mask for byte
|
|
pop rbx
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_opcode
|
|
;
|
|
; description:
|
|
; given an operator token, returns its opcode. For operators with multiple
|
|
; opcodes, the variant can be specified.
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
; sil = offset within opcode entry. 0 is the first opcode, 1 the second, and so
|
|
; on
|
|
;
|
|
; returned:
|
|
; al = opcode; the rest of rax is zeroed.
|
|
; dl = lower 3 bits: op flag, if applicable. The rest of rdx is zeroed.
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_opcode:
|
|
and edi, 0xFFFF ; di = token table entry
|
|
|
|
add esi, 2
|
|
and esi, 111b ; offset within opcode entry
|
|
sub esi, 2 ; between 0 and 5
|
|
|
|
xor eax, eax ; eax = opcodes.by_id index
|
|
|
|
.loop:
|
|
cmp eax, (opcodes.by_id_end - opcodes.by_id) / 16 ; make sure it's still in range
|
|
jg .not_found
|
|
|
|
shl eax, 4
|
|
mov cx, [opcodes.by_id + eax] ; next entry in opcodes.by_id
|
|
shr eax, 4
|
|
|
|
cmp cx, di
|
|
je .found
|
|
|
|
inc eax
|
|
jmp .loop
|
|
.not_found:
|
|
xor eax, eax
|
|
mov eax, UNRECOGNISED_ID_OPCODE
|
|
ret
|
|
.found:
|
|
shl eax, 4
|
|
push rsi
|
|
shr esi, 1
|
|
mov dl, [esi + 8 + opcodes.by_id + eax]
|
|
pop rsi
|
|
|
|
test esi, 1 ; check if offset is odd
|
|
jz .found_continue
|
|
shr edx, 4 ; if so, upper part of dl byte
|
|
.found_continue:
|
|
mov al, [esi + 2 + opcodes.by_id + eax]
|
|
and eax, 0xFF
|
|
and edx, 0x0F
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; get_reg_bits
|
|
;
|
|
; description:
|
|
; given a register token, returns its reg bits metadata
|
|
;
|
|
; parameters:
|
|
; di = token table entry
|
|
;
|
|
; returned:
|
|
; al = register token; the rest of rax, including the upper 5 bits of al, are
|
|
; zeroed.
|
|
; ------------------------------------------------------------------------------
|
|
|
|
get_reg_bits:
|
|
; di = tte
|
|
call get_tte_typed_metadata
|
|
; al = typed metadata
|
|
shr eax, 2 ; discard type data
|
|
and eax, 111b ; mask
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; tokenising
|
|
; ------------------------------------------------------------------------------
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; tokenise
|
|
; TODO write tests
|
|
;
|
|
; description:
|
|
; represents the program at the given address and puts it in the token table
|
|
; it's probably desirable to clear the token table before calling this function.
|
|
;
|
|
; parameters:
|
|
; rdi -> first byte of program
|
|
; rsi = size of program in bytes
|
|
;
|
|
; returned:
|
|
; rax = number of tokens processed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
tokenise:
|
|
; rdi -> current byte of program
|
|
add rsi, rdi ; rsi -> last byte of program
|
|
xor eax, eax ; rax = number of tokens processed
|
|
xor edx, edx ; dl = current byte of program
|
|
|
|
.loop:
|
|
cmp rdi, rsi ; if current byte greater than last byte
|
|
jge .break ; then break
|
|
|
|
mov dl, [rdi] ; dl = current byte
|
|
|
|
cmp dl, ";" ; if current byte is the start of a comment
|
|
je .comment ; then handle the comment
|
|
|
|
cmp dl, 0x0A ; if current byte is the end of a line
|
|
je .newline_mk_flags ; then reset relevant flags
|
|
|
|
cmp dl, "," ; if current byte is a comma
|
|
je .comma ; then handle the comma
|
|
|
|
push rsi
|
|
push rdi
|
|
push rax
|
|
push rdx
|
|
mov rsi, whitespace_2 ; rsi -> list of whitespace bytes
|
|
mov rdi, 2 ; rdi = size of the list in bytes
|
|
; dl = current byte
|
|
call elemb
|
|
; al = 0 if not whitespace, 1 if whitespace
|
|
test eax, 1 ; check if current byte is whitespace
|
|
pop rdx ; dl = current byte
|
|
pop rax ; rax = number of tokens processed
|
|
pop rdi ; rdi -> current byte of program
|
|
pop rsi ; rsi -> last byte of program
|
|
jnz .skip_byte_whitespace
|
|
|
|
test byte [.expecting], E_LABEL ; check if a label is expected
|
|
jnz .label ; if so, handle it
|
|
test byte [.expecting], E_OPERATOR ; else, check if an operator is expected
|
|
jnz .operator ; if so, handle it
|
|
jmp .operand ; else, handle as an operand
|
|
|
|
.comment:
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_comment
|
|
call print
|
|
pop rsi ; rsi -> last byte of program
|
|
|
|
test byte [.expecting], E_COMMENT ; make sure a comment is expected
|
|
jz .unexpected_comment ; if not, error
|
|
.comment_loop:
|
|
mov dl, [rdi] ; dl = current byte
|
|
|
|
cmp dl, 0x0A ; if current byte is a newline
|
|
je .comment_break ; then break
|
|
|
|
inc rdi ; point to next unread byte
|
|
cmp rdi, rsi
|
|
jge .break
|
|
jmp .comment_loop
|
|
.comment_break:
|
|
jmp .loop
|
|
|
|
.skip_byte_whitespace:
|
|
test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected
|
|
jz .unexpected_whitespace ; if not, error
|
|
inc rdi
|
|
jmp .loop ; else, loop
|
|
|
|
.comma: ; found comma
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_comma
|
|
call print
|
|
pop rsi
|
|
|
|
test byte [.expecting], E_COMMA ; make sure a comma was expected
|
|
jz .unexpected_comma ; if not, error
|
|
inc rdi
|
|
mov byte [.expecting], E_WHITESPACE | E_OPERAND ; else, make operand expected
|
|
jmp .loop ; and loop
|
|
|
|
.newline_mk_flags:
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_newline
|
|
call print
|
|
pop rsi
|
|
|
|
test byte [.expecting], E_NEWLINE ; make sure a newline was expected
|
|
jz .unexpected_newline ; if not, error
|
|
|
|
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL
|
|
|
|
inc rdi
|
|
jmp .loop
|
|
|
|
.label:
|
|
push rax
|
|
xor eax, eax ; rax = number of bytes in label
|
|
.label_loop:
|
|
mov dl, [rdi + rax] ; next byte
|
|
cmp dl, ":"
|
|
je .label_break
|
|
cmp dl, " "
|
|
je .label_not_found
|
|
cmp dl, 0x0A
|
|
je .label_not_found
|
|
cmp dl, 0x00
|
|
je .label_not_found
|
|
cmp dl, ";"
|
|
je .label_not_found
|
|
inc eax ; inc byte counter
|
|
cmp rdi, rsi
|
|
jge .break
|
|
jmp .label_loop
|
|
.label_break:
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_label
|
|
call print
|
|
pop rsi ; rsi -> last byte of program
|
|
|
|
push rax
|
|
push rdi
|
|
push rsi
|
|
|
|
mov rsi, rdi ; rsi -> start of string
|
|
mov rdi, rax ; rdi = size of string
|
|
call djb2
|
|
; rax = hash
|
|
mov rdi, rax ; rdi = hash
|
|
call add_label_hash
|
|
; rax = index on label table
|
|
mov cx, ax
|
|
and cx, 0x0FFF
|
|
or cx, 0x3000
|
|
|
|
pop rsi ; rsi -> last byte of program
|
|
pop rdi ; rdi -> current byte of program
|
|
pop rax ; rax = number of bytes in label
|
|
|
|
add rdi, rax ; move on to next byte
|
|
inc rdi ; move past the colon
|
|
|
|
pop rax ; rax = number of tokens processed
|
|
|
|
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
|
inc rax ; the next token
|
|
|
|
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE
|
|
jmp .loop
|
|
.label_not_found:
|
|
pop rax ; rax = number of tokens processed
|
|
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
|
|
jmp .loop
|
|
|
|
.operator:
|
|
mov rcx, rax ; rcx = number of tokens processed
|
|
xor eax, eax ; eax = number of bytes in operator
|
|
mov [.pending_operator], eax ; zero pending operator
|
|
.operator_loop:
|
|
; TODO give this its own error
|
|
|
|
mov dl, [rdi] ; next byte
|
|
|
|
; TODO have better check for operator end
|
|
cmp dl, " "
|
|
je .operator_break
|
|
cmp dl, 0x0A
|
|
je .operator_break
|
|
cmp dl, 0x00
|
|
je .operator_break
|
|
cmp dl, ";"
|
|
je .operator_break
|
|
|
|
mov [.pending_operator + eax], dl
|
|
|
|
inc eax ; inc byte counter
|
|
inc rdi ; inc byte pointer
|
|
cmp eax, 4 ; check that operator is short enough
|
|
jg .unexpected_operator ; if not, error
|
|
cmp rdi, rsi
|
|
jge .break
|
|
jmp .operator_loop ; and loop
|
|
.operator_break:
|
|
push rdi
|
|
|
|
mov edi, [.pending_operator] ; edi = operator to be searched
|
|
call identify_operator
|
|
; ax = operator's token ID
|
|
push rcx
|
|
mov ecx, eax ; cx = operator's token ID
|
|
mov edi, eax ; di = operator's token ID
|
|
call get_tte_type
|
|
; al = token type
|
|
mov sil, al
|
|
pop rax ; rax = tokens processed
|
|
pop rdi ; rdi = byte counter
|
|
|
|
cmp sil, 0x3 ; pseudo-operator
|
|
je .pseudo_operator
|
|
|
|
cmp sil, 0x1 ; operator
|
|
jne .unexpected_operator
|
|
|
|
; debug message
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_operator
|
|
call print
|
|
pop rsi
|
|
|
|
|
|
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
|
inc rax ; plus 1 token processed
|
|
|
|
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
|
|
jmp .loop
|
|
|
|
.pseudo_operator:
|
|
; debug message
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_pseudo_operator
|
|
call print
|
|
pop rsi
|
|
|
|
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
|
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
|
|
jmp .loop
|
|
|
|
.operand:
|
|
; debug message
|
|
push rsi
|
|
mov rsi, .found
|
|
call print.debug
|
|
mov rsi, .msg_operand
|
|
call print
|
|
pop rsi
|
|
|
|
test byte [.expecting], E_OPERAND ; make sure an operand was expected
|
|
jz .unexpected_operand ; if not, error
|
|
|
|
push rax
|
|
push rdi
|
|
xor eax, eax ; rax = length of operand
|
|
.operand_loop:
|
|
mov dl, [rdi]
|
|
|
|
cmp dl, ","
|
|
je .operand_break
|
|
cmp dl, 0x0A
|
|
je .operand_break
|
|
cmp dl, 0x00
|
|
je .operand_break
|
|
cmp dl, ";"
|
|
je .operand_break
|
|
|
|
inc rax ; inc length counter
|
|
inc rdi ; inc byte pointer
|
|
jmp .operand_loop
|
|
.operand_break:
|
|
pop rdi ; rdi -> first byte of operand
|
|
push rdi
|
|
push rsi
|
|
mov rsi, rax ; rsi = length of operand in bytes
|
|
|
|
mov cx, ax ; cx = length counter for safe keeping
|
|
push rcx
|
|
call evaluate_operand
|
|
; dl = return code
|
|
; rax = binary data
|
|
pop rcx
|
|
pop rsi
|
|
pop rdi ; rdi = first byte of operand
|
|
add di, cx ; rdi = last byte of operand
|
|
mov rcx, rax ; rcx = evaluate_operand's binary return data
|
|
pop rax ; rax = number of tokens processed
|
|
|
|
; operand is some reg
|
|
cmp dl, 0x00
|
|
; cx = token ID
|
|
je .operand_register
|
|
|
|
; operand is some [reg]
|
|
cmp dl, 0x10
|
|
; cx = token ID
|
|
je .operand_addr_register
|
|
|
|
; operand is some constant
|
|
cmp dl, 0x20
|
|
; rcx = constant value
|
|
je .operand_constant
|
|
|
|
; operand is some label
|
|
cmp dl, 0x30
|
|
; rcx = index of label in LT
|
|
je .operand_label
|
|
|
|
jmp .unexpected_operand
|
|
.operand_register:
|
|
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
|
inc rax ; another token processed
|
|
jmp .operand_break_continue
|
|
.operand_addr_register:
|
|
mov word [TOKEN_TABLE_ADDR + rax * 2], 0x1000
|
|
inc rax ; 0x1000: addr reg token, next token is the register
|
|
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
|
inc rax ; the register as returned by evaluate_operand
|
|
jmp .operand_break_continue
|
|
.operand_constant:
|
|
mov word [TOKEN_TABLE_ADDR + rax * 2], 0x2000
|
|
inc rax ; another token processed
|
|
mov [TOKEN_TABLE_ADDR + rax * 2], rcx
|
|
add rax, 4
|
|
jmp .operand_break_continue
|
|
.operand_label:
|
|
and cx, 0x0FFF
|
|
or cx, 0x3000
|
|
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
|
inc rax
|
|
jmp .operand_break_continue
|
|
.operand_break_continue:
|
|
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA
|
|
jmp .loop
|
|
|
|
.break:
|
|
ret
|
|
|
|
; state
|
|
.expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL
|
|
|
|
.unexpected_whitespace:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_whitespace
|
|
call print
|
|
jmp halt
|
|
.unexpected_comment:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_comment
|
|
call print
|
|
jmp halt
|
|
.unexpected_newline:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_newline
|
|
call print
|
|
jmp halt
|
|
.unexpected_comma:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_comma
|
|
call print
|
|
jmp halt
|
|
.unexpected_operand:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_operand
|
|
call print
|
|
jmp halt
|
|
.unexpected_operator:
|
|
mov rsi, .err_unexpected
|
|
call print.error
|
|
mov rsi, .msg_operator
|
|
call print
|
|
jmp halt
|
|
.err_unexpected db "unexpected ", 0x00
|
|
.found db "found ", 0x00
|
|
.msg_whitespace db "whitespace.", 0x0A, 0x00
|
|
.msg_comment db "comment.", 0x0A, 0x00
|
|
.msg_newline db "newline.", 0x0A, 0x00
|
|
.msg_comma db "comma.", 0x0A, 0x00
|
|
.msg_label db "label.", 0x0A, 0x00
|
|
.msg_operator db "operator.", 0x0A, 0x00
|
|
.msg_operand db "operand.", 0x0A, 0x00
|
|
.msg_pseudo_operator db "pseudo_operator.", 0x0A, 0x00
|
|
.pending_operator dd 0 ; the operator token that is pending processing
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; evaluate_operand
|
|
;
|
|
; description:
|
|
; takes the location and length of an operand and evaluates it into binary data
|
|
; and a return code to interpret the binary data.
|
|
;
|
|
; | code | rax contents | notes |
|
|
; |------|----------------------|-------|
|
|
; | 0x00 | token ID of register | reg |
|
|
; | 0x10 | token ID of register | [reg] |
|
|
; | 0x20 | constant value | const |
|
|
; | 0x30 | index of label in LT | label |
|
|
; | 0xFF | - | error |
|
|
;
|
|
; parameters:
|
|
; rdi -> first byte of operand
|
|
; rsi = size of operand in bytes
|
|
;
|
|
; returned:
|
|
; rax = binary data corresponding to the operand
|
|
; dl = return code
|
|
; ------------------------------------------------------------------------------
|
|
|
|
evaluate_operand:
|
|
push rdi ; rdi -> start of operand
|
|
; rsi = size of operand
|
|
call trim_trailing_whitespace
|
|
; rax = new size of operand
|
|
|
|
pop rdi ; rdi -> first byte of operand
|
|
mov rsi, rax ; rsi = size of operand w/o trailing whitespace
|
|
|
|
cmp rsi, 0 ; case: 0 length
|
|
je .unrecognised ; unrecognised
|
|
|
|
cmp byte [rdi], '[' ; case: memory addressing
|
|
je .address
|
|
|
|
jmp .register ; otherwise: register (or constant, or label)
|
|
|
|
.address:
|
|
cmp byte [rdi + rsi - 1], ']' ; check if address is closed correctly
|
|
jne .unrecognised ; if not, fail
|
|
inc rdi ; rdi -> enclosed operand
|
|
sub rsi, 2 ; rsi = length of enclosed operand
|
|
call evaluate_operand
|
|
; rax = binary data
|
|
; dl = return code
|
|
cmp dl, 0x10 ; make sure return code isn't another memory reference
|
|
je .unrecognised ; if it is, fail
|
|
|
|
shr edx, 4
|
|
or dl, 0x10 ; address return
|
|
ret
|
|
|
|
.register:
|
|
cmp rsi, 4
|
|
jg .constant ; not a register: too long. Maybe constant?
|
|
push rdi
|
|
mov edi, [rdi] ; edi = register to be searched
|
|
|
|
; TODO figure out how to mask elegantly :/
|
|
; mask edi for lower rsi bits
|
|
cmp rsi, 4
|
|
je .register4
|
|
cmp rsi, 3
|
|
je .register3
|
|
cmp rsi, 2
|
|
je .register2
|
|
cmp rsi, 1
|
|
je .register1
|
|
.register1:
|
|
and edi, 0xFF
|
|
.register2:
|
|
and edi, 0xFFFF
|
|
.register3:
|
|
and edi, 0xFFFFFF
|
|
.register4:
|
|
call identify_register
|
|
; ax = register's token ID or UNRECOGNISED_TOKEN_ID
|
|
pop rdi ; rdi -> first byte of operand
|
|
|
|
cmp ax, UNRECOGNISED_TOKEN_ID ; if not a register, constant?
|
|
je .constant
|
|
|
|
mov dl, 0x00
|
|
ret
|
|
|
|
.constant:
|
|
push rdi
|
|
push rsi
|
|
; rdi -> first byte of constant
|
|
; rsi = size of constant in bytes
|
|
call evaluate_constant
|
|
; dl = type of constant
|
|
; rax = hex value of constant
|
|
pop rdi ; rdi = size of label in bytes
|
|
pop rsi ; rsi -> first byte of label
|
|
|
|
cmp dl, 0xFF
|
|
je .label
|
|
|
|
; rax = hex value of constant
|
|
mov dl, 0x20
|
|
ret
|
|
|
|
.label:
|
|
; rdi = size of label in bytes
|
|
; rsi -> first byte of label
|
|
call djb2
|
|
; rax = hash
|
|
mov rdi, rax ; rdi = hash
|
|
call add_label_hash
|
|
; rax = index in LT of label
|
|
mov dl, 0x30
|
|
ret
|
|
|
|
.unrecognised:
|
|
xor eax, eax
|
|
mov dl, 0xFF
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; evaluate_constant
|
|
;
|
|
; description:
|
|
; takes a constant and returns its hexidecimal representation. Currently the
|
|
; following constants are supported:
|
|
;
|
|
; | type | p. | description |
|
|
; |------|----|--------------|
|
|
; | 0x00 | 0x | hexidecimal |
|
|
; | 0x01 | 0q | octal |
|
|
; | 0x02 | 0b | binary |
|
|
; | 0x03 | " | char |
|
|
; | 0xFF | | unrecognised |
|
|
;
|
|
; where `p.` is the prefix or otherwise indicator
|
|
;
|
|
; parameters:
|
|
; rdi -> first byte of constant
|
|
; rsi = size of constant in bytes
|
|
;
|
|
; returned:
|
|
; rax = value of the constant in hexidecimal
|
|
; dl = type of constant; the rest of rdx is zeroed
|
|
; ------------------------------------------------------------------------------
|
|
|
|
evaluate_constant:
|
|
; rsi = number of bytes left
|
|
; rdi -> current byte of constant
|
|
xor eax, eax ; rax = value of constant
|
|
|
|
; each case pushes the return value of dl into `rcx`, which is popped into dl
|
|
; to return
|
|
|
|
mov dl, [rdi]
|
|
dec rsi ; one fewer byte left
|
|
inc rdi ; point to next byte
|
|
|
|
; all numeric prefixes further handled in .numeric
|
|
cmp dl, '0'
|
|
je .numeric
|
|
|
|
; chr case
|
|
mov rcx, 0x03
|
|
push rcx
|
|
xor ecx, ecx ; rcx = number of times right-rolled
|
|
cmp dl, '"'
|
|
je .chr
|
|
pop rcx
|
|
|
|
push rcx ; waste value; .unrecognise expects something on the stack
|
|
jmp .unrecognised
|
|
|
|
.numeric:
|
|
mov dl, [rdi]
|
|
dec rsi ; one fewer byte left
|
|
inc rdi ; point to next byte
|
|
|
|
; hex case
|
|
mov rcx, 0x00
|
|
push rcx
|
|
cmp dl, 'x'
|
|
je .hex_loop
|
|
pop rcx
|
|
|
|
; octal case
|
|
mov rcx, 0x01
|
|
push rcx
|
|
cmp dl, 'q'
|
|
je .oct_loop
|
|
pop rcx
|
|
|
|
; binary case
|
|
mov rcx, 0x02
|
|
push rcx
|
|
cmp dl, 'b'
|
|
je .bin_loop
|
|
pop rcx
|
|
|
|
jmp .unrecognised
|
|
|
|
.hex_loop:
|
|
cmp rsi, 0 ; make sure we're in range
|
|
je .break ; if not, break
|
|
|
|
shl rax, 4 ; make room for next hex digit
|
|
|
|
mov dl, [rdi] ; dl = next byte of constant
|
|
|
|
sub dl, '0' ; dl = if digit: digit; else :shrug:
|
|
|
|
cmp dl, 9 ; if !digit:
|
|
jg .hex_alpha ; letter
|
|
jmp .hex_continue ; else loop
|
|
.hex_alpha:
|
|
sub dl, 7 ; map [('A'-'0')..('F'-'0')] to [0xA..0xF]
|
|
cmp dl, 0xF ; if not in the range [0xA..0xF]
|
|
jg .unrecognised ; then unrecognised
|
|
.hex_continue:
|
|
and dl, 0x0F ; mask
|
|
or al, dl ; and add newest nibble
|
|
|
|
dec rsi ; one fewer byte left
|
|
inc rdi ; point to next byte
|
|
jmp .hex_loop ; and loop
|
|
|
|
.oct_loop:
|
|
cmp rsi, 0 ; make sure we're in range
|
|
je .break ; if not, break
|
|
|
|
shl rax, 3 ; make room for next octal digit
|
|
|
|
mov dl, [rdi] ; dl = next byte of constant
|
|
|
|
sub dl, '0'
|
|
cmp dl, 7
|
|
jg .unrecognised
|
|
|
|
and dl, 7 ; mask
|
|
or al, dl ; and add newest 3-bit group
|
|
|
|
dec rsi ; one fewer byte left
|
|
inc rdi ; point to next byte
|
|
jmp .oct_loop ; and loop
|
|
|
|
.bin_loop:
|
|
cmp rsi, 0 ; range check
|
|
je .break
|
|
|
|
shl rax, 1
|
|
|
|
mov dl, [rdi]
|
|
|
|
sub dl, '0'
|
|
cmp dl, 1
|
|
jg .unrecognised
|
|
|
|
and dl, 1 ; mask
|
|
or al, dl ; and newest bit
|
|
|
|
dec rsi
|
|
inc rdi
|
|
jmp .bin_loop
|
|
|
|
.chr:
|
|
cmp rcx, 4 ; ensure char is only 4 bytes long
|
|
jg .unrecognised
|
|
cmp rsi, 1 ; range check
|
|
je .chr_break
|
|
|
|
ror rax, 8
|
|
inc rcx
|
|
|
|
mov dl, [rdi]
|
|
|
|
; bound check byte as printable char
|
|
cmp dl, 0x20
|
|
jl .unrecognised
|
|
cmp dl, 0x7E
|
|
jg .unrecognised
|
|
|
|
or al, dl
|
|
|
|
dec rsi
|
|
inc rdi
|
|
|
|
jmp .chr
|
|
.chr_break:
|
|
cmp rcx, 1 ; for each [1..rcx]
|
|
jle .chr_break_for_good
|
|
rol rax, 8 ; roll left to make up for the roll right earlier
|
|
dec rcx
|
|
jmp .chr_break
|
|
.chr_break_for_good:
|
|
mov dl, [rdi] ; make sure the chr is closed
|
|
cmp dl, '"'
|
|
jne .unrecognised
|
|
|
|
jmp .break
|
|
|
|
.break:
|
|
pop rdx
|
|
ret
|
|
|
|
.unrecognised:
|
|
pop rdx
|
|
mov edx, 0xFF ; unrecognised type
|
|
ret
|
|
.msg db "evaluate_constant", 0x0A, 0x00
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; identify_register
|
|
;
|
|
; description:
|
|
; takes a register in ascii-encoded text and returns its token ID or
|
|
; UNRECOGNISED_TOKEN_ID if not recognised
|
|
;
|
|
; parameters:
|
|
; edi = register to be searched
|
|
;
|
|
; returned:
|
|
; ax = register's token ID or UNRECOGNISED_TOKEN_ID
|
|
; ------------------------------------------------------------------------------
|
|
|
|
identify_register:
|
|
xor eax, eax ; tokens.registers + eax -> entry in tokens.registers
|
|
.loop:
|
|
cmp eax, (tokens.registers_end - tokens.registers)
|
|
jge .not_found
|
|
|
|
cmp edi, [tokens.registers + eax]
|
|
je .found
|
|
|
|
add eax, 6
|
|
jmp .loop
|
|
.found:
|
|
mov ax, [tokens.registers + eax + 4]
|
|
ret
|
|
.not_found:
|
|
mov ax, UNRECOGNISED_TOKEN_ID
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; identify_operator
|
|
;
|
|
; description:
|
|
; takes an operator in ascii-encoded text and returns its token ID or
|
|
; UNRECOGNISED_TOKEN_ID if not recognised
|
|
;
|
|
; parameters:
|
|
; edi = operator to be searched
|
|
;
|
|
; returned:
|
|
; ax = operator's token ID or UNRECOGNISED_TOKEN_ID
|
|
; ------------------------------------------------------------------------------
|
|
|
|
identify_operator:
|
|
xor eax, eax ; tokens.operators + eax -> entry in tokens.operators
|
|
.loop:
|
|
cmp eax, (tokens.operators_end - tokens.operators)
|
|
jge .not_found
|
|
|
|
cmp edi, [tokens.operators + eax]
|
|
je .found
|
|
|
|
add eax, 6
|
|
jmp .loop
|
|
.found:
|
|
mov ax, [tokens.operators + eax + 4]
|
|
ret
|
|
.not_found:
|
|
mov ax, UNRECOGNISED_TOKEN_ID
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; utilities
|
|
; ------------------------------------------------------------------------------
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; print
|
|
;
|
|
; description:
|
|
; prints a null-terminated string
|
|
; probably doesn't change any registers for ease of debugging
|
|
;
|
|
; parameters:
|
|
; rsi -> start of null-terminated string
|
|
; ------------------------------------------------------------------------------
|
|
|
|
print:
|
|
push rdx
|
|
push rax
|
|
push rsi
|
|
|
|
mov edx, 0x3F8
|
|
.loop:
|
|
mov al, [rsi]
|
|
cmp al, 0x00
|
|
je .done
|
|
out dx, al
|
|
inc rsi
|
|
jmp .loop
|
|
.done:
|
|
pop rsi
|
|
pop rax
|
|
pop rdx
|
|
ret
|
|
.debug:
|
|
push rsi
|
|
mov rsi, .debug_msg
|
|
call print
|
|
pop rsi
|
|
jmp print ; tail call
|
|
.error:
|
|
push rsi
|
|
mov rsi, .error_msg
|
|
call print
|
|
pop rsi
|
|
jmp print ; tail call
|
|
.test:
|
|
push rsi
|
|
mov rsi, .test_msg
|
|
call print
|
|
pop rsi
|
|
jmp print ; tail call
|
|
.warn:
|
|
push rsi
|
|
mov rsi, .warn_msg
|
|
call print
|
|
pop rsi
|
|
jmp print ; tail call
|
|
.debug_msg db 0x1B, "[36m", "[DEBUG]: ", 0x1B, "[0m", 0x00
|
|
.error_msg db 0x1B, "[1;31m", "[ERROR]: ", 0x1B, "[0m", 0x00
|
|
.test_msg db 0x1B, "[1;33m", "[TEST]: ", 0x1B, "[0m", 0x00
|
|
.warn_msg db 0x1B, "[1;35m", "[WARN]: ", 0x1B, "[0m", 0x00
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; halt
|
|
;
|
|
; description:
|
|
; halts the program, silly :)
|
|
; ------------------------------------------------------------------------------
|
|
|
|
halt:
|
|
mov rsi, msg_halt
|
|
call print
|
|
hlt
|
|
jmp halt
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; elemb
|
|
;
|
|
; description:
|
|
; checks if given byte is element of the specified list.
|
|
;
|
|
; parameters:
|
|
; rdi = size of list
|
|
; rsi -> start of list
|
|
; dl = given byte
|
|
;
|
|
; returned:
|
|
; rax = 0: is not an element
|
|
; 1: is an element
|
|
; ------------------------------------------------------------------------------
|
|
|
|
elemb:
|
|
.loop:
|
|
cmp rdi, 0 ; check if remaining length 0
|
|
je .not_found ; if so, break; dl not an element of list
|
|
|
|
mov al, [rsi]
|
|
cmp al, dl ; check if current byte in list is the desired byte
|
|
je .found ; if so, break; dl an element of list
|
|
|
|
inc rsi ; move to next byte
|
|
dec rdi ; and reduce remaining length
|
|
|
|
jmp .loop
|
|
|
|
.not_found:
|
|
xor eax, eax ; return 0; dl not an element of list
|
|
ret
|
|
|
|
.found:
|
|
xor eax, eax
|
|
mov rax, 1 ; return 1; dl an element of list
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; djb2
|
|
;
|
|
; description:
|
|
; gets the 64-bit djb2 hash of a given string
|
|
;
|
|
; parameters:
|
|
; rdi = size of string
|
|
; rsi -> start of string
|
|
;
|
|
; returned:
|
|
; rax = hash
|
|
; ------------------------------------------------------------------------------
|
|
|
|
djb2:
|
|
xor ecx, ecx ; rcx = index
|
|
mov rax, 5381 ; rax = hash
|
|
|
|
.loop:
|
|
cmp rcx, rdi
|
|
jge .break
|
|
|
|
mov rdx, rax
|
|
shl rax, 5
|
|
add rax, rdx
|
|
|
|
xor edx, edx
|
|
mov dl, [rsi + rcx] ; dl = current byte
|
|
add rax, rdx
|
|
|
|
inc rcx
|
|
jmp .loop
|
|
|
|
.break:
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; trim_trailing_whitespace
|
|
;
|
|
; description:
|
|
; trims whitespace from the start and end of the given byte array.
|
|
;
|
|
; parameters:
|
|
; rdi -> start of list
|
|
; rsi = size of list
|
|
;
|
|
; returned:
|
|
; rax = new size of list
|
|
; ------------------------------------------------------------------------------
|
|
|
|
trim_trailing_whitespace:
|
|
test rsi, rsi ; list of length zero
|
|
jz .done ; already trimmed
|
|
|
|
push rsi
|
|
push rdi
|
|
|
|
mov dl, [rdi + rsi - 1] ; last element of given list
|
|
mov rsi, whitespace_2 ; pointer of whitespace list
|
|
mov edi, 2 ; length of whitespace list
|
|
call elemb
|
|
|
|
pop rdi ; rdi -> start of list
|
|
pop rsi ; rsi = size of list
|
|
|
|
test eax, eax ; if last element whitespace
|
|
jz .done ; then break
|
|
|
|
.trim: ; otherwise one shorter
|
|
dec rsi
|
|
call trim_trailing_whitespace
|
|
ret
|
|
|
|
.done:
|
|
mov rax, rsi
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; add_label_hash
|
|
;
|
|
; description:
|
|
; adds a label hash to the label table, or just finds it if already present
|
|
;
|
|
; parameters
|
|
; rdi = 64-bit hash to be added
|
|
;
|
|
; returned
|
|
; rax = index in label table
|
|
; ------------------------------------------------------------------------------
|
|
|
|
add_label_hash:
|
|
xor eax, eax
|
|
.loop:
|
|
cmp rax, LABEL_TABLE_SIZE
|
|
jge .break
|
|
mov rcx, [LABEL_TABLE_ADDR + rax]
|
|
; TODO bug if there's an empty slot before the entry, it won't be found
|
|
cmp rcx, 0 ; empty slot
|
|
je .break
|
|
cmp rcx, rdi ; already present
|
|
je .break
|
|
add rax, 16
|
|
jmp .loop
|
|
.break:
|
|
mov [LABEL_TABLE_ADDR + rax], rdi
|
|
shr rax, 4 ; rax / 16
|
|
; rax = index
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; clear_token_table
|
|
;
|
|
; description:
|
|
; clears the token table as specified by TOKEN_TABLE_SIZE and TOKEN_TABLE_ADDR
|
|
; ------------------------------------------------------------------------------
|
|
|
|
clear_token_table:
|
|
xor eax, eax ; value to write
|
|
mov ecx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
|
mov edi, TOKEN_TABLE_ADDR ; address to start
|
|
rep stosd
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; clear_label_tables
|
|
;
|
|
; description:
|
|
; clears the label table as specified by LABEL_TABLE_SIZE and LABEL_TABLE_ADDR
|
|
; and the awaiting label table as specified by AWAITING_LABEL_TABLE_SIZE and
|
|
; AWAITING_LABEL_TABLE_ADDR
|
|
; ------------------------------------------------------------------------------
|
|
|
|
clear_label_tables:
|
|
xor eax, eax ; value to write
|
|
mov ecx, LABEL_TABLE_SIZE / 4 ; number of double words
|
|
mov edi, LABEL_TABLE_ADDR ; address to start
|
|
rep stosd
|
|
xor eax, eax ; value to write
|
|
mov ecx, AWAITING_LABEL_TABLE_SIZE / 4 ; number of double words
|
|
mov edi, AWAITING_LABEL_TABLE_ADDR ; address to start
|
|
rep stosd
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; clear_test_arena
|
|
;
|
|
; description:
|
|
; clears the test arena as specified by TEST_ARENA_SIZE and TEST_ARENA_ADDR
|
|
; ------------------------------------------------------------------------------
|
|
|
|
clear_test_arena:
|
|
xor eax, eax ; value to write
|
|
mov ecx, TEST_ARENA_SIZE / 4 ; number of double words
|
|
mov edi, TEST_ARENA_ADDR ; address to start
|
|
rep stosd
|
|
ret
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; clear_output_arena
|
|
;
|
|
; description:
|
|
; clears the output arena as specified by OUTPUT_SIZE and OUTPUT_ADDR
|
|
; ------------------------------------------------------------------------------
|
|
|
|
clear_output_arena:
|
|
xor eax, eax ; value to write
|
|
mov ecx, OUTPUT_SIZE / 4 ; number of double words
|
|
mov edi, OUTPUT_ADDR ; address to start
|
|
rep stosd
|
|
ret
|
|
|
|
%include "asm/tests.asm"
|
|
|
|
; ------------------------------------------------------------------------------
|
|
; data
|
|
; ------------------------------------------------------------------------------
|
|
|
|
align 16 ; for readability in hexdump
|
|
tokens:
|
|
.by_id:
|
|
dw 0x0000 ; rax
|
|
db 0x02 ; type: register
|
|
db 00000011b ; reg: 000b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0001 ; rbx
|
|
db 0x02 ; type: register
|
|
db 00001111b ; reg: 011b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0002 ; rcx
|
|
db 0x02 ; type: register
|
|
db 00000111b ; reg: 001b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0003 ; rdx
|
|
db 0x02 ; type: register
|
|
db 00001011b ; reg: 010b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0004 ; rsi
|
|
db 0x02 ; type: register
|
|
db 00011011b ; reg: 110b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0005 ; rdi
|
|
db 0x02 ; type: register
|
|
db 00011111b ; reg: 111b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0006 ; rsp
|
|
db 0x02 ; type: register
|
|
db 00010011b ; reg: 100b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0007 ; rbp
|
|
db 0x02 ; type: register
|
|
db 00010111b ; reg: 101b
|
|
; width: 11b (64 bits)
|
|
|
|
dw 0x0010 ; eax
|
|
db 0x02 ; type: register
|
|
db 00000010b ; reg: 000b
|
|
; width: 10b (32 bits)
|
|
|
|
dw 0x0011 ; ebx
|
|
db 0x02 ; type: register
|
|
db 00001110b ; reg: 011b
|
|
; width: 10b (32 bits)
|
|
|
|
dw 0x0012 ; ecx
|
|
db 0x02 ; type: register
|
|
db 00000110b ; reg: 001b
|
|
; width: 10b (32 bits)
|
|
|
|
dw 0x0013 ; edx
|
|
db 0x02 ; type: register
|
|
db 00001010b ; reg: 010b
|
|
; width: 10b (32 bits)
|
|
|
|
dw 0x0014 ; esi
|
|
db 0x02 ; type: register
|
|
db 00011010b ; reg: 110b
|
|
; width: 10b (32 bits)
|
|
|
|
dw 0x0015 ; edi
|
|
db 0x02 ; type: register
|
|
db 00011110b ; reg: 111b
|
|
; width: 10b (32 bits)
|
|
|
|
dw 0x0016 ; esp
|
|
db 0x02 ; type: register
|
|
db 00010010b ; reg: 100b
|
|
; width: 10b (32 bits)
|
|
|
|
dw 0x0017 ; ebp
|
|
db 0x02 ; type: register
|
|
db 00010110b ; reg: 101b
|
|
; width: 10b (32 bits)
|
|
|
|
dw 0x0020 ; ax
|
|
db 0x02 ; type: register
|
|
db 00000001b ; reg: 000b
|
|
; width: 01b (16 bits)
|
|
|
|
dw 0x0021 ; bx
|
|
db 0x02 ; type: register
|
|
db 00001101b ; reg: 011b
|
|
; width: 01b (16 bits)
|
|
|
|
dw 0x0022 ; cx
|
|
db 0x02 ; type: register
|
|
db 00000101b ; reg: 001b
|
|
; width: 01b (16 bits)
|
|
|
|
dw 0x0023 ; dx
|
|
db 0x02 ; type: register
|
|
db 00001001b ; reg: 010b
|
|
; width: 01b (16 bits)
|
|
|
|
dw 0x0024 ; si
|
|
db 0x02 ; type: register
|
|
db 00011001b ; reg: 110b
|
|
; width: 01b (16 bits)
|
|
|
|
dw 0x0025 ; di
|
|
db 0x02 ; type: register
|
|
db 00011101b ; reg: 111b
|
|
; width: 01b (16 bits)
|
|
|
|
dw 0x0026 ; sp
|
|
db 0x02 ; type: register
|
|
db 00010001b ; reg: 100b
|
|
; width: 01b (16 bits)
|
|
|
|
dw 0x0027 ; bp
|
|
db 0x02 ; type: register
|
|
db 00010101b ; reg: 101b
|
|
; width: 01b (16 bits)
|
|
|
|
dw 0x0030 ; al
|
|
db 0x02 ; type: register
|
|
db 00000000b ; reg: 000b
|
|
; width: 00b (8 bits)
|
|
|
|
dw 0x0031 ; bl
|
|
db 0x02 ; type: register
|
|
db 00001100b ; reg: 011b
|
|
; width: 00b (8 bits)
|
|
|
|
dw 0x0032 ; cl
|
|
db 0x02 ; type: register
|
|
db 00000100b ; reg: 001b
|
|
; width: 00b (8 bits)
|
|
|
|
dw 0x0033 ; dl
|
|
db 0x02 ; type: register
|
|
db 00001000b ; reg: 010b
|
|
; width: 00b (8 bits)
|
|
|
|
dw 0x0034 ; sil
|
|
db 0x02 ; type: register
|
|
db 00011000b ; reg: 110b
|
|
; width: 00b (8 bits)
|
|
|
|
dw 0x0035 ; dil
|
|
db 0x02 ; type: register
|
|
db 00011100b ; reg: 111b
|
|
; width: 00b (8 bits)
|
|
|
|
dw 0x0036 ; spl
|
|
db 0x02 ; type: register
|
|
db 00010000b ; reg: 100b
|
|
; width: 00b (8 bits)
|
|
|
|
dw 0x0037 ; bpl
|
|
db 0x02 ; type: register
|
|
db 00010100b ; reg: 101b
|
|
; width: 00b (8 bits)
|
|
|
|
dw 0x004F ; hlt
|
|
db 0x01 ; type: operator
|
|
db 0x00 ; # operands
|
|
|
|
dw 0x0050 ; int3
|
|
db 0x01 ; type: operator
|
|
db 0x00 ; # operands
|
|
|
|
dw 0x0053 ; xor
|
|
db 0x01 ; type: operator
|
|
db 0x02 ; # operands
|
|
|
|
dw 0x0054 ; inc
|
|
db 0x01 ; type: operator
|
|
db 0x01 ; # operands
|
|
|
|
dw 0x0055 ; dec
|
|
db 0x01 ; type: operator
|
|
db 0x01 ; # operands
|
|
|
|
dw 0x0056 ; mov
|
|
db 0x01 ; type: operator
|
|
db 0x02 ; # operands
|
|
|
|
dw 0x0057 ; add
|
|
db 0x01 ; type: operator
|
|
db 0x02 ; # operands
|
|
|
|
dw 0x0058 ; sub
|
|
db 0x01 ; type: operator
|
|
db 0x02 ; # operands
|
|
|
|
dw 0x0059 ; call
|
|
db 0x01 ; type: operator
|
|
db 0x01 ; # operands
|
|
|
|
dw 0x005A ; ret
|
|
db 0x01 ; type: operator
|
|
db 0x00 ; # operands
|
|
|
|
dw 0x005B ; cmp
|
|
db 0x01 ; type: operator
|
|
db 0x02 ; # operands
|
|
|
|
dw 0x005C ; jmp
|
|
db 0x01 ; type: operator
|
|
db 0x01 ; # operands
|
|
|
|
dw 0x005D ; je
|
|
db 0x01 ; type: operator
|
|
db 0x01 ; # operands
|
|
|
|
dw 0x005E ; jne
|
|
db 0x01 ; type: operator
|
|
db 0x01 ; # operands
|
|
|
|
dw 0x005F ; push
|
|
db 0x01 ; type: operator
|
|
db 0x01 ; # operands
|
|
|
|
dw 0x0060 ; pop
|
|
db 0x01 ; type: operator
|
|
db 0x01 ; # operands
|
|
|
|
dw 0x0061 ; out
|
|
db 0x01 ; type: operator
|
|
db 0x02 ; # operands
|
|
|
|
dw 0x0100 ; db
|
|
db 0x03 ; type: pseudo-operator
|
|
db 0x01 ; # operands
|
|
.by_id_end:
|
|
.operators:
|
|
dd "hlt"
|
|
dw 0x004F
|
|
dd "int3"
|
|
dw 0x0050
|
|
dd "xor"
|
|
dw 0x0053
|
|
dd "inc"
|
|
dw 0x0054
|
|
dd "dec"
|
|
dw 0x0055
|
|
dd "mov"
|
|
dw 0x0056
|
|
dd "add"
|
|
dw 0x0057
|
|
dd "sub"
|
|
dw 0x0058
|
|
dd "call"
|
|
dw 0x0059
|
|
dd "ret"
|
|
dw 0x005A
|
|
dd "cmp"
|
|
dw 0x005B
|
|
dd "jmp"
|
|
dw 0x005C
|
|
dd "je"
|
|
dw 0x005D
|
|
dd "jne"
|
|
dw 0x005E
|
|
dd "push"
|
|
dw 0x005F
|
|
dd "pop"
|
|
dw 0x0060
|
|
dd "out"
|
|
dw 0x0061
|
|
dd "db"
|
|
dw 0x0100
|
|
.operators_end:
|
|
.registers:
|
|
dd "r8"
|
|
dw 0x0008
|
|
dd "r9"
|
|
dw 0x0009
|
|
dd "ax"
|
|
dw 0x0020
|
|
dd "bx"
|
|
dw 0x0021
|
|
dd "cx"
|
|
dw 0x0022
|
|
dd "dx"
|
|
dw 0x0023
|
|
dd "si"
|
|
dw 0x0024
|
|
dd "di"
|
|
dw 0x0025
|
|
dd "sp"
|
|
dw 0x0026
|
|
dd "bp"
|
|
dw 0x0027
|
|
dd "al"
|
|
dw 0x0030
|
|
dd "bl"
|
|
dw 0x0031
|
|
dd "cl"
|
|
dw 0x0032
|
|
dd "dl"
|
|
dw 0x0033
|
|
dd "ah"
|
|
dw 0x0040
|
|
dd "bh"
|
|
dw 0x0041
|
|
dd "ch"
|
|
dw 0x0042
|
|
dd "dh"
|
|
dw 0x0043
|
|
dd "cs"
|
|
dw 0x0044
|
|
dd "ds"
|
|
dw 0x0045
|
|
dd "es"
|
|
dw 0x0046
|
|
dd "fs"
|
|
dw 0x0047
|
|
dd "gs"
|
|
dw 0x0048
|
|
dd "ss"
|
|
dw 0x0049
|
|
dd "rax"
|
|
dw 0x0000
|
|
dd "rbx"
|
|
dw 0x0001
|
|
dd "rcx"
|
|
dw 0x0002
|
|
dd "rdx"
|
|
dw 0x0003
|
|
dd "rsi"
|
|
dw 0x0004
|
|
dd "rdi"
|
|
dw 0x0005
|
|
dd "rsp"
|
|
dw 0x0006
|
|
dd "rbp"
|
|
dw 0x0007
|
|
dd "r10"
|
|
dw 0x000A
|
|
dd "r11"
|
|
dw 0x000B
|
|
dd "r12"
|
|
dw 0x000C
|
|
dd "r13"
|
|
dw 0x000D
|
|
dd "r14"
|
|
dw 0x000E
|
|
dd "r15"
|
|
dw 0x000F
|
|
dd "eax"
|
|
dw 0x0010
|
|
dd "ebx"
|
|
dw 0x0011
|
|
dd "ecx"
|
|
dw 0x0012
|
|
dd "edx"
|
|
dw 0x0013
|
|
dd "esi"
|
|
dw 0x0014
|
|
dd "edi"
|
|
dw 0x0015
|
|
dd "esp"
|
|
dw 0x0016
|
|
dd "ebp"
|
|
dw 0x0017
|
|
dd "r8d"
|
|
dw 0x0018
|
|
dd "r9d"
|
|
dw 0x0019
|
|
dd "r8w"
|
|
dw 0x0028
|
|
dd "r9w"
|
|
dw 0x0029
|
|
dd "sil"
|
|
dw 0x0034
|
|
dd "dil"
|
|
dw 0x0035
|
|
dd "spl"
|
|
dw 0x0036
|
|
dd "bpl"
|
|
dw 0x0037
|
|
dd "r8b"
|
|
dw 0x0038
|
|
dd "r9b"
|
|
dw 0x0039
|
|
dd "cr0"
|
|
dw 0x004A
|
|
dd "cr2"
|
|
dw 0x004B
|
|
dd "cr3"
|
|
dw 0x004C
|
|
dd "cr4"
|
|
dw 0x004D
|
|
dd "cr8"
|
|
dw 0x004E
|
|
dd "r10d"
|
|
dw 0x001A
|
|
dd "r11d"
|
|
dw 0x001B
|
|
dd "r12d"
|
|
dw 0x001C
|
|
dd "r13d"
|
|
dw 0x001D
|
|
dd "r14d"
|
|
dw 0x001E
|
|
dd "r15d"
|
|
dw 0x001F
|
|
dd "r10w"
|
|
dw 0x002A
|
|
dd "r11w"
|
|
dw 0x002B
|
|
dd "r12w"
|
|
dw 0x002C
|
|
dd "r13w"
|
|
dw 0x002D
|
|
dd "r14w"
|
|
dw 0x002E
|
|
dd "r15w"
|
|
dw 0x002F
|
|
dd "r10b"
|
|
dw 0x003A
|
|
dd "r11b"
|
|
dw 0x003B
|
|
dd "r12b"
|
|
dw 0x003C
|
|
dd "r13b"
|
|
dw 0x003D
|
|
dd "r14b"
|
|
dw 0x003E
|
|
dd "r15b"
|
|
dw 0x003F
|
|
.registers_end:
|
|
|
|
align 16 ; for readability in hexdump
|
|
opcodes:
|
|
.by_id:
|
|
; hlt
|
|
dw 0x004F
|
|
db 0xF4 ; opcode
|
|
db 0x00 ; reserved
|
|
dd 0x00000000
|
|
dd 0x00000000
|
|
dd 0x00000000
|
|
|
|
; int3
|
|
dw 0x0050
|
|
db 0xCC ; opcode
|
|
db 0x00 ; reserved
|
|
dd 0x00000000
|
|
dd 0x00000000
|
|
dd 0x00000000
|
|
|
|
; xor
|
|
dw 0x0053
|
|
db 0x31 ; r/m <- r
|
|
db 0x33 ; r <- r/m
|
|
|
|
db 0x81 ; r/m <- imm16/32
|
|
db 0x83 ; r/m <- imm8
|
|
dw 0x0000
|
|
|
|
dd 0x00006600 ; 00:
|
|
; 6: r/m <- imm16/32 op flag
|
|
; 6: r/m <- imm8 op flag
|
|
; 0x0000:
|
|
|
|
dd 0x00000000 ; reserved
|
|
|
|
; inc
|
|
dw 0x0054
|
|
db 0xFF ; r/m
|
|
db 0x00
|
|
|
|
dd 0x00000000
|
|
|
|
dd 0x00000000 ; 0: r/m op flag
|
|
; 0000000:
|
|
|
|
dd 0x00000000
|
|
|
|
; dec
|
|
dw 0x0055
|
|
db 0xFF ; r/m
|
|
db 0x00
|
|
|
|
dd 0x00000000
|
|
|
|
dd 0x00000001 ; 1: r/m op flag
|
|
; 0000000:
|
|
dd 0x00000000
|
|
|
|
; mov
|
|
dw 0x0056
|
|
db 0x89 ; r/m <- r
|
|
db 0x8B ; r <- r/m
|
|
|
|
db 0xC7 ; r/m <- imm16/32
|
|
db 0x00
|
|
dw 0x0000
|
|
|
|
dd 0x00000000 ; 00:
|
|
; 0: r/m <- imm16/32 op flag
|
|
; 00000:
|
|
|
|
dd 0x00000000
|
|
|
|
; add
|
|
dw 0x0057
|
|
db 0x01 ; r/m <- r
|
|
db 0x03 ; r <- r/m
|
|
|
|
db 0x81 ; r/m <- imm16/32
|
|
db 0x83 ; r/m <- imm8
|
|
dw 0x0000
|
|
|
|
dd 0x00000000 ; 00:
|
|
; 0: r/m <- imm16/32 op flag
|
|
; 0: r/m <- imm8 op flag
|
|
; 0000:
|
|
|
|
dd 0x00000000
|
|
|
|
; sub
|
|
dw 0x0058
|
|
db 0x29 ; r/m <- r
|
|
db 0x2B ; r <- r/m
|
|
|
|
db 0x81 ; r/m <- imm16/32
|
|
db 0x83 ; r/m <- imm8
|
|
dw 0x0000
|
|
|
|
dd 0x00005500 ; 00:
|
|
; 5: r/m <- imm16/32 op flag
|
|
; 5: r/m <- imm8 op flag
|
|
; 0000:
|
|
|
|
dd 0x00000000
|
|
|
|
; call
|
|
dw 0x0059
|
|
db 0xFF ; r/m
|
|
db 0x00
|
|
|
|
dw 0x0000
|
|
db 0xE8 ; rel16/32
|
|
db 0x00
|
|
|
|
dd 0x00000002 ; 2: r/m op flag
|
|
; 0000000:
|
|
|
|
dd 0x00000000
|
|
|
|
; retn
|
|
dw 0x005A
|
|
db 0xC3 ; opcode
|
|
db 0x00 ; reserved
|
|
dd 0x00000000
|
|
dd 0x00000000
|
|
dd 0x00000000
|
|
|
|
; cmp
|
|
dw 0x005B
|
|
db 0x39 ; r/m <- r
|
|
db 0x3B ; r <- r/m
|
|
|
|
db 0x81 ; r/m <- imm16/32
|
|
db 0x83 ; r/m <- imm8
|
|
dw 0x0000
|
|
|
|
dd 0x00007700 ; 00:
|
|
; 7: r/m <- imm16/32 op flag
|
|
; 7: r/m <- imm8 op flag
|
|
; 0000:
|
|
|
|
dd 0x00000000
|
|
|
|
; jmp
|
|
dw 0x005C
|
|
db 0xFF ; r/m
|
|
db 0x00
|
|
|
|
dw 0x0000
|
|
db 0xE9 ; rel16/32
|
|
db 0xEB ; rel8
|
|
|
|
dd 0x00000004 ; 4: r/m
|
|
; 000:
|
|
; 0: rel16/32
|
|
; 0: rel8
|
|
; 00:
|
|
|
|
dd 0x00000000
|
|
|
|
; je
|
|
dw 0x005D
|
|
dw 0x0000
|
|
|
|
dw 0x0000
|
|
db 0x00 ; TODO figure out the 0x0F prefix this will need
|
|
db 0x74 ; rel8
|
|
|
|
dd 0x00000000 ; 00000:
|
|
; 0: rel8
|
|
; 00:
|
|
|
|
dd 0x00000000
|
|
|
|
; jne
|
|
dw 0x005E
|
|
dw 0x0000
|
|
|
|
dw 0x0000
|
|
db 0x00 ; TODO figure out the 0x0F prefix this will need
|
|
db 0x75 ; rel8
|
|
|
|
dd 0x00000000 ; 00000:
|
|
; 0: rel8
|
|
; 00:
|
|
|
|
dd 0x00000000
|
|
|
|
; push
|
|
; TODO add support for the +r variation
|
|
dw 0x005F
|
|
db 0xFF ; r/m
|
|
db 0x00
|
|
|
|
db 0x68 ; imm16/32
|
|
db 0x6A ; imm8
|
|
dw 0x0000
|
|
|
|
dd 0x00000006 ; 6: r/m
|
|
; 0:
|
|
; 0: imm16/32
|
|
; 0: imm8
|
|
; 0000:
|
|
|
|
dd 0x00000000
|
|
|
|
; pop
|
|
; TODO add support for the +r variation
|
|
dw 0x0060
|
|
db 0x8F ; r/m
|
|
db 0x00
|
|
|
|
dd 0x00000000
|
|
|
|
dd 0x00000000 ; 0: r/m
|
|
; 0000000:
|
|
|
|
dd 0x00000000
|
|
|
|
; out
|
|
; TODO enforce DX AL requirement, ignore ModR/M correctly
|
|
dw 0x0061
|
|
db 0xEE
|
|
db 0x00
|
|
dd 0x00000000
|
|
dd 0x00000000
|
|
dd 0x00000000
|
|
.by_id_end:
|
|
|
|
msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00
|
|
msg_halt db "halted.", 0x0A, 0x00
|
|
|
|
whitespace_2 db " ", 0x0D
|
|
|
|
; test program
|
|
program:
|
|
db "print:", 0x0A
|
|
db " push rdx", 0x0A
|
|
db " push rax", 0x0A
|
|
db " push rsi", 0x0A
|
|
db "", 0x0A
|
|
db " mov edx, 0x3F8", 0x0A
|
|
db " .loop:", 0x0A
|
|
db " mov al, [rsi]", 0x0A
|
|
db " cmp al, 0x00", 0x0A
|
|
db " je .done", 0x0A
|
|
db " out dx, al", 0x0A
|
|
db " inc rsi", 0x0A
|
|
db " jmp .loop", 0x0A
|
|
db " .done:", 0x0A
|
|
db " pop rsi", 0x0A
|
|
db " pop rax", 0x0A
|
|
db " pop rdx", 0x0A
|
|
db " ret", 0x0A
|
|
db " .debug:", 0x0A
|
|
db " push rsi", 0x0A
|
|
db " mov rsi, .debug_msg", 0x0A
|
|
db " call print", 0x0A
|
|
db " pop rsi", 0x0A
|
|
db " jmp print ; tail call", 0x0A
|
|
db " .error:", 0x0A
|
|
db " push rsi", 0x0A
|
|
db " mov rsi, .error_msg", 0x0A
|
|
db " call print", 0x0A
|
|
db " pop rsi", 0x0A
|
|
db " jmp print ; tail call", 0x0A
|
|
db " .test:", 0x0A
|
|
db " push rsi", 0x0A
|
|
db " mov rsi, .test_msg", 0x0A
|
|
db " call print", 0x0A
|
|
db " pop rsi", 0x0A
|
|
db " jmp print ; tail call", 0x0A
|
|
db " .warn:", 0x0A
|
|
db " push rsi", 0x0A
|
|
db " mov rsi, .warn_msg", 0x0A
|
|
db " call print", 0x0A
|
|
db " pop rsi", 0x0A
|
|
db " jmp print ; tail call", 0x0A
|
|
db " .debug_msg:", 0x0A
|
|
db " db 0x1B", 0x0A
|
|
db ' db "[36m"', 0x0A
|
|
db ' db "[DEBUG]: "', 0x0A
|
|
db " db 0x1B", 0x0A
|
|
db ' db "[0m"', 0x0A
|
|
db " db 0x00", 0x0A
|
|
db " .error_msg:", 0x0A
|
|
db " db 0x1B", 0x0A
|
|
db ' db "[1;31m"', 0x0A
|
|
db ' db "[ERROR]: "', 0x0A
|
|
db " db 0x1B", 0x0A
|
|
db ' db "[0m"', 0x0A
|
|
db " db 0x00", 0x0A
|
|
db " .test_msg:", 0x0A
|
|
db " db 0x1B", 0x0A
|
|
db ' db "[1;33m"', 0x0A
|
|
db ' db "[TEST]: "', 0x0A
|
|
db " db 0x1B", 0x0A
|
|
db ' db "[0m"', 0x0A
|
|
db " db 0x00", 0x0A
|
|
db " .warn_msg:", 0x0A
|
|
db " db 0x1B", 0x0A
|
|
db ' db "[1;35m"', 0x0A
|
|
db ' db "[WARN]: "', 0x0A
|
|
db " db 0x1B", 0x0A
|
|
db ' db "[0m"', 0x0A
|
|
db " db 0x00", 0x0A
|
|
.size dq $ - program
|
|
|
|
msg_end db "end of the binary ->|", 0x0A, 0x00
|