Files
bootler/twasm/asm/main.asm
andromeda e8c1313ece :p
2026-04-02 23:44:17 +02:00

3242 lines
75 KiB
NASM

; TODO actually enforce any of these *_SIZE constants :p
LOAD_ADDR equ 0x00010000 ; address this program is loaded at
STACK_ADDR equ 0x00030000 ; address to put the 64-bit stack at
AWAITING_LABEL_TABLE_ADDR equ 0x00030000 ; address to store pending labels at
AWAITING_LABEL_TABLE_SIZE equ 0x00010000
LABEL_TABLE_ADDR equ 0x00040000 ; address to store labels at
LABEL_TABLE_SIZE equ 0x00010000
TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at
TEST_ARENA_SIZE equ 0x00010000 ; maximum size tests can use
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
TOKEN_TABLE_SIZE equ 0x00010000 ; max length of table
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
OUTPUT_SIZE equ 0x00010000 ; max length of outputed binary
UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token
UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id
UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id
UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP)
TEST_LINE_LENGTH equ 80 ; right border of test suite results
; flags for expected values in tokeniser
E_COMMENT equ 1 << 0
E_NEWLINE equ 1 << 1
E_WHITESPACE equ 1 << 2
E_COMMA equ 1 << 3
E_OPERATOR equ 1 << 4
E_OPERAND equ 1 << 5
E_LABEL equ 1 << 6
[bits 64]
[org LOAD_ADDR]
[default abs] ; TODO see if I actually need to do this
; afaik absolute addressing is not harmful on bare metal
; reasoning: stops annoying warning =D
start:
mov rsp, STACK_ADDR ; we might need more stack space, let's just be safe
mov rsi, msg_welcome
call print
call run_tests
call clear_token_table
call clear_label_tables
mov rdi, program
call tokenise
; rax = number of tokens in token table
mov rdi, rax
push rdi
call clear_output_arena
pop rdi ; rdi = number of tokens in token table
call assemble
jmp halt
; ------------------------------------------------------------------------------
; assembling
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; assemble
;
; description:
; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat
; binary located at OUTPUT_ADDR.
; Behaviour is undefined when:
; - tokens are in an impossible order
; 0x1000 ; memory address, following byte should be a register
; 0x1000 ; not a register
; - operator tokens followed by the wrong number of arguments
; TODO enforce this in `tokenise`
; 0x004F ; hlt, expects 0 arguments
; 0x0000 ; rax, an argument
; - an undefined token is included, like 0x0051
;
; parameters:
; rdi = number of tokens in the token table
; ------------------------------------------------------------------------------
assemble:
mov rbp, rsp
xor eax, eax
mov [.tokens_processed], eax ; eax = number of tokens processed
mov [.tokens_total], edi ; edi = total number of tokens in table
.loop:
call .flush_write_buffer
call .get_next_tte
push rdi ; di = tte
call get_tte_type
; al = type
pop rdi ; di = tte
cmp al, 0x1 ; check if next tte is an operator
je .operator
cmp al, 0x3 ; check if next tte is a pseudo-operator
je .pseudo_operator
cmp al, 0x4 ; check if next tte is a label
je .label
jmp .unexpected_token ; otherwise, fail
.label:
push rsi
mov rsi, .msg_label
call print.debug
pop rsi
mov esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label to start of program
and edi, 0x0FFF ; edi = index to add address hash to
call add_label_address
jmp .loop_next_token
.pseudo_operator:
push rsi
mov rsi, .msg_pseudo_operator
call print.debug
pop rsi
cmp di, 0x0100
je .pseudo_operator_db
jmp .unexpected_token
.pseudo_operator_db:
call .next_token
jge .break
call .get_next_tte
cmp di, 0x2000
jne .unexpected_token
call .next_token
jge .break
mov ecx, [.tokens_processed]
mov rax, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next 8 bytes from the tt
mov ecx, [.buffer_pointer]
mov [rcx], rax ; and add them to the buffer
add ecx, 8
mov [.buffer_pointer], ecx
call .next_token
jge .break
call .next_token
jge .break
call .next_token
jge .break
jmp .loop_next_token
.operator:
push rdi
; di = tte of operator
call get_tte_typed_metadata
; al = tte typed metadata
pop rdi ; di = tte of operator
cmp al, UNRECOGNISED_ID_METADATA ; make sure token has metadata on record
je .unexpected_token ; if not, fail
and al, 11b ; mask for # operands
cmp al, 0 ; check if operator has no operands
je .operator_0 ; if so, handle case of no operands
cmp al, 1 ; check if operator has one operand
je .operator_1 ; if so, handle case of one operand
cmp al, 2 ; check if operator has two operands
je .operator_2 ; if so, handle case of two operands
jmp .unexpected_token
.operator_0:
push rsi
mov rsi, .msg_operator_0
call print.debug
pop rsi
; di = tte of operator
mov sil, 0b ; opcode
xor ebx, ebx ; no flags
call get_opcode
; al = opcode
; dl = 0x00
call .write_byte
jmp .loop_next_token
.operator_1:
push rsi
mov rsi, .msg_operator_1
call print.debug
pop rsi
push rdi
call .next_token
jge .break
call .get_next_tte
mov rcx, rdi ; cx = operand tte
pop rdi
push rcx ; pushes until after write_prefix_continue
; di = tte of operator
mov sil, 0 ; dst=r/m
xor ebx, ebx ; no flags
shr ecx, 12
and ecx, 0xF
cmp ecx, 0x3
je .operator_1_get_opcode_label
jmp .operator_1_get_opcode_continue
.operator_1_get_opcode_label:
mov sil, 4 ; dst=rel
.operator_1_get_opcode_continue:
call get_opcode
; al = opcode
; dl = op flag
push rdx
and dl, 1000b
cmp dl, 1000b
jne .operator_1_write_prefix_continue
.operator_1_write_prefix
push rax
mov al, 0x0F
call .write_byte
pop rax ; al = opcode
.operator_1_write_prefix_continue
call .write_byte
pop rdx ; dl = op flag
pop rdi ; di = next tte
push rdi
and di, 0xFF00
cmp di, 0x1000 ; check if token is a memory address
pop rdi ; di = next tte
je .operator_1_memory
push rdi
push rdx
; di = next tte
call get_tte_type
; al = type of token
pop rdx ; dl = op flag
pop rdi ; di = next tte
cmp al, 0x02 ; type: register
je .operator_1_register
cmp al, 0x04 ; type: label reference
je .operator_1_label
jmp .unexpected_token
.operator_1_memory:
push rsi
mov rsi, .msg_operator_1_memory
call print.debug
pop rsi
jmp .unsupported_memory_access
.operator_1_register:
push rsi
mov rsi, .msg_operator_1_register
call print.debug
pop rsi
; di = token table entry
call get_tte_typed_metadata
; al = register typed metadata
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .unexpected_token ; TODO handle 8 bit opcodes
cmp al, 10b ; 32 bit
je .operator_1_register_no_prefix ; default register length; no prefix
cmp al, 01b ; 16 bit
je .operator_1_register_16
cmp al, 11b ; 64 bit
je .operator_1_register_64
.operator_1_register_16:
mov al, 0x66
call .push_byte
jmp .operator_1_register_no_prefix
.operator_1_register_64:
mov al, 0x48
call .push_byte
jmp .operator_1_register_no_prefix
.operator_1_register_no_prefix:
mov si, di ; si = `R/M` tte
and edx, 0xFF
or edx, 0xFE00 ; pass di as direct value
mov edi, edx ; di = op flag
mov edx, 11b ; dl = mod bits
call get_ModRM
; al = Mod R/M byte
call .write_byte
jmp .loop_next_token
.operator_1_label:
push rsi
mov rsi, .msg_operator_1_label
call print.debug
pop rsi
mov esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
; of program
mov eax, 0x04 ; al = first 4 bits: # bytes reserved
; 5th bit: abs flag
and edi, 0x0FFF ; edi = index of hash
call add_awaiting_label
mov al, 0xFF ; reserve space
call .write_byte
call .write_byte
call .write_byte
call .write_byte
jmp .loop_next_token
.operator_2:
push rsi
mov rsi, .msg_operator_2
call print.debug
pop rsi
mov cx, di ; cx = tte of operator
call .next_token
jge .break
call .get_next_tte
; di = next tte
push rdi
and di, 0xFF00
cmp di, 0x1000 ; check if token is a memory address
pop rdi ; di = next tte
je .operator_2_memory
push rcx
; di = next tte
call get_tte_type
; al = type of token
pop rcx ; cx = tte of operator
cmp al, 0x02 ; type: register
je .operator_2_register
jmp .unexpected_token
.operator_2_memory:
push rsi
mov rsi, .msg_operator_2_memory
call print.debug
pop rsi
cmp di, 0x1000 ; check if token is addressing a register
jne .unsupported_memory_access ; if not, unsupported
mov edi, ecx ; di = tte of operator
xor esi, esi ; dst=r/m; src=r
xor ebx, ebx ; no flags
call get_opcode
; al = opcode
; dl = op flag
call .write_byte
call .next_token
jge .break
call .get_next_tte
; di = tte
call get_tte_typed_metadata
and al, 11b ; al = register width
cmp al, 10b ; 32 bit
je .operator_2_memory_32
cmp al, 11b ; 64 bit
je .operator_2_memory_continue
; other cases: 16 bit, 8 bit both are not valid for addressing
jmp .size_mismatch
.operator_2_memory_32:
mov al, 0x67
call .push_byte
jmp .operator_2_memory_continue
.operator_2_memory_continue:
mov si, di ; si = dst register tte
call .next_token
jge .break
call .get_next_tte
push rdi
and di, 0xFF00
cmp di, 0x1000 ; check if token is a memory address
pop rdi ; di = next tte
je .unsupported_memory_access ; if so, fail; no case of *],[* in asm
; di = next tte
call get_tte_type
; al = type of token
cmp al, 0x02 ; check if token is a register
je .operator_2_memory_register ; if so, handle
jmp .unexpected_token
.operator_2_memory_register:
push rsi
mov rsi, .msg_operator_2_memory_register
call print.debug
pop rsi ; si = r/m
; di = src tte
call get_tte_typed_metadata
; al = register typed metadata
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .unexpected_token ; TODO handle 8 bit opcodes
cmp al, 01b ; 16 bit
je .operator_2_memory_register_16
cmp al, 10b ; 32 bit
je .operator_2_memory_register_continue ; default register length; no prefix
cmp al, 11b ; 64 bit
je .operator_2_memory_register_64
.operator_2_memory_register_16:
mov al, 0x66
call .push_byte
jmp .operator_2_memory_register_continue
.operator_2_memory_register_64:
mov al, 0x48
call .push_byte
jmp .operator_2_memory_register_continue
.operator_2_memory_register_continue:
; si = r/m; dst tte
; di = reg; src tte
mov dl, 00b ; dl = mod bits
call get_ModRM
; al = Mod R/M byte
call .write_byte
jmp .loop_next_token
.operator_2_register:
push rsi
mov rsi, .msg_operator_2_register
call print.debug
pop rsi
push rcx
; di = token table entry
call get_tte_typed_metadata
; al = register typed metadata
pop rcx ; cx = operator tte
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .operator_2_register_8
cmp al, 01b ; 16 bit
je .operator_2_register_16
cmp al, 10b ; 32 bit
je .operator_2_register_continue ; default register length; no prefix
cmp al, 11b ; 64 bit
je .operator_2_register_64
.operator_2_register_8:
mov bl, 1b ; operator flag 8bit
jmp .operator_2_register_continue
.operator_2_register_16:
xor ebx, ebx ; no operator flags
mov al, 0x66
call .push_byte
jmp .operator_2_register_continue
.operator_2_register_64:
xor ebx, ebx ; no operator flags
mov al, 0x48
call .push_byte
jmp .operator_2_register_continue
.operator_2_register_continue:
mov si, di ; si = dst tte
call .next_token
jge .break
call .get_next_tte
push rdi
and di, 0xFF00
cmp di, 0x1000 ; check if token is a memory address
pop rdi ; di = next tte
je .operator_2_register_memory
push rdi
and di, 0xFF00
cmp di, 0x2000 ; check if token is a constant
pop rdi ; di = next tte
je .operator_2_register_const
push rcx
push rdi
push rsi
; di = next tte
call get_tte_type
; al = type of token
pop rsi ; si = dst tte
pop rdi ; di = src tte
pop rcx ; cx = operator tte
cmp al, 0x02 ; check if token is a register
je .operator_2_register_register ; if so, handle
cmp al, 0x04 ; check if token is a label
je .operator_2_register_label ; if so, handle
jmp .unexpected_token ; otherwise, fail
.operator_2_register_label:
push rsi
mov rsi, .msg_operator_2_register_label
call print.debug
pop rsi ; si = dst tte
push rsi
mov di, cx ; di = tte of operator
mov sil, 2 ; dst=r/m,src=imm
; bl = operator flag byte
push rbx
and ebx, 1
cmp bl, 1 ; bit8 flag
pop rbx
je .operator_2_register_label_get_opcode_8
jmp .operator_2_register_label_get_opcode_continue
.operator_2_register_label_get_opcode_8:
mov sil, 3 ; dst=r/m,src=imm8
.operator_2_register_label_get_opcode_continue:
call get_opcode
; al = opcode
; dl = flags
call .write_byte
mov edi, edx ; di = op flag
and edi, 0xFF
or edi, 0xFE00
pop rsi ; si = r/m; dst tte
mov edx, 11b ; dl = mod bits
call get_ModRM
; al = Mod R/M byte
call .write_byte
push rbx
and ebx, 1
cmp ebx, 1 ; bit8 flag
pop rbx
je .operator_2_register_label_8
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x48
call elemb
pop rsi
pop rdi
cmp al, 1
je .operator_2_register_label_64
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x66
call elemb
pop rsi
pop rdi
cmp al, 1
je .operator_2_register_label_16
jmp .operator_2_register_label_32
.operator_2_register_label_8:
mov esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
mov eax, 0x11 ; al = first 4 bits: # bits reserved
; 5th bit: abs flag
and edi, 0x0FFF ; edi = index of hash
call add_awaiting_label
mov al, 0xFF ; reserve space
call .write_byte
jmp .loop_next_token
.operator_2_register_label_16:
mov esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
mov eax, 0x12 ; al = first 4 bits: # bits reserved
; 5th bit: abs flag
and edi, 0x0FFF ; edi = index of hash
call add_awaiting_label
mov al, 0xFF ; reserve space
call .write_byte
call .write_byte
jmp .loop_next_token
.operator_2_register_label_32:
mov esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
mov eax, 0x14 ; al = first 4 bits: # bits reserved
; 5th bit: abs flag
and edi, 0x0FFF ; edi = index of hash
call add_awaiting_label
mov al, 0xFF ; reserve space
call .write_byte
call .write_byte
call .write_byte
call .write_byte
jmp .loop_next_token
.operator_2_register_label_64:
; TODO do the B8+r variant :/
jmp .size_mismatch
.operator_2_register_label_continue:
jmp .unexpected_token
.operator_2_register_memory:
push rsi
mov rsi, .msg_operator_2_register_memory
call print.debug
pop rsi ; si = dst tte
push rdi
push rsi
mov di, cx ; di = tte of operator
mov sil, 1 ; dst = reg
; bl = operator flag byte
call get_opcode
; al = opcode
; dl = op flag
; TODO do something if the op flag is present
call .write_byte
pop rsi ; si = tte
pop rdi ; di = tte
cmp di, 0x1000 ; check if token is addressing to a register
jne .unsupported_memory_access ; if not, unsupported
call .next_token
jge .break
call .get_next_tte
; di = tte
call get_tte_typed_metadata
; al = register typed metadata
and al, 11b ; al = register width
cmp al, 10b ; 32 bit
je .operator_2_register_memory_32
cmp al, 11b ; 64 bit
je .operator_2_register_memory_continue ; default addr length; no prefix
; other cases: 16 bit, 8 bit both are not valid for addressing
jmp .size_mismatch
.operator_2_register_memory_32:
mov al, 0x67
call .push_byte
jmp .operator_2_register_memory_continue
.operator_2_register_memory_continue:
; si = reg; dst tte
; di = r/m; src tte
push rsi
mov esi, edi ; si = reg; src tte
pop rdi ; di = r/m; dst tte
mov edx, 00b ; dl = mod bits
call get_ModRM
; al = Mod R/M byte
call .write_byte
jmp .loop_next_token
.operator_2_register_register:
push rsi
mov rsi, .msg_operator_2_register_register
call print.debug
pop rsi ; si = dst tte
push rdi
push rsi
mov di, cx ; di = tte of operator
mov sil, 0 ; dst = r/m
; bl = operator flag byte
call get_opcode
; al = opcode
; dl = flags
call .write_byte
pop rsi ; si = tte
pop rdi ; di = tte
push rdx
; di = tte
call get_tte_typed_metadata
; al = register typed metadata
pop rdx ; dl = flags
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .unexpected_token ; TODO handle 8 bit opcodes
cmp al, 01b ; 16 bit
je .operator_2_register_register_16
cmp al, 10b ; 32 bit
je .operator_2_register_register_32
cmp al, 11b ; 64 bit
je .operator_2_register_register_64
.operator_2_register_register_16:
; 16 bit
push rdi
push rsi
push rdx
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x66
call elemb
pop rdx ; dl = flags
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 1
jne .size_mismatch
; 64 bit
push rdi
push rsi
push rdx
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x48
call elemb
pop rdx ; dl = flags
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
jmp .operator_2_register_register_continue
.operator_2_register_register_32:
; 16 bit
push rdi
push rsi
push rdx
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x66
call elemb
pop rdx ; dl = flags
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
; 64 bit
push rdi
push rsi
push rdx
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x48
call elemb
pop rdx ; dl = flags
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
jmp .operator_2_register_register_continue
.operator_2_register_register_64:
; 16 bit
push rdi
push rsi
push rdx
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x66
call elemb
pop rdx ; dl = flags
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
; 64 bit
push rdi
push rsi
push rdx
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x48
call elemb
pop rdx ; dl = flags
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 1
jne .size_mismatch
jmp .operator_2_register_register_continue
.operator_2_register_register_continue:
shr dl, 4
and dl, 1
cmp dl, 1
je .loop_next_token ; no ModRM flag
; si = reg; src tte
; di = r/m; dst tte
mov edx, 11b ; dl = mod bits
call get_ModRM
; al = Mod R/M byte
call .write_byte
jmp .loop_next_token
.operator_2_register_const:
push rsi
mov rsi, .msg_operator_2_register_const
call print.debug
pop rsi ; si = dst tte
push rdi
push rsi
mov di, cx ; di = tte of operator
mov sil, 2 ; dst=r/m,src=imm
; bl = operator flag byte
push rbx
and ebx, 1
cmp bl, 1 ; bit8 flag
pop rbx
je .operator_2_register_const_get_opcode_8
jmp .operator_2_register_const_get_opcode_continue
.operator_2_register_const_get_opcode_8:
mov sil, 3 ; dst=r/m,src=imm8
.operator_2_register_const_get_opcode_continue:
call get_opcode
; al = opcode
; dl = op flag
; TODO do something if the op flag is present
call .write_byte
mov edi, edx ; di = op flag
and edi, 0xFF
or edi, 0xFE00
pop rsi ; si = r/m; dst tte
mov edx, 11b ; dl = mod bits
call get_ModRM
; al = Mod R/M byte
call .write_byte
call .next_token
jge .break
push rbx
and ebx, 1
cmp bl, 1 ; bit8 flag
pop rbx
je .operator_2_register_const_8
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x48
call elemb
pop rsi
pop rdi
cmp al, 1
je .operator_2_register_const_64
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x66
call elemb
pop rsi
pop rdi
cmp al, 1
je .operator_2_register_const_16
jmp .operator_2_register_const_32
.operator_2_register_const_8:
mov ecx, [.tokens_processed]
mov al, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next byte from the tt
call .write_byte ; and add it to the buffer
jmp .operator_2_register_const_continue
.operator_2_register_const_16:
mov ecx, [.tokens_processed]
mov ax, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next 2 bytes from the tt
mov ecx, [.buffer_pointer]
mov [rcx], ax ; and add them to the buffer
add ecx, 2
mov [.buffer_pointer], ecx
jmp .operator_2_register_const_continue
.operator_2_register_const_32:
mov ecx, [.tokens_processed]
mov eax, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next 4 bytes from the tt
mov ecx, [.buffer_pointer]
mov [rcx], eax ; and add them to the buffer
add ecx, 4
mov [.buffer_pointer], ecx
jmp .operator_2_register_const_continue
.operator_2_register_const_64:
mov ecx, [.tokens_processed]
mov rax, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next 8 bytes from the tt
mov ecx, [.buffer_pointer]
mov [rcx], rax ; and add them to the buffer
add ecx, 8
mov [.buffer_pointer], ecx
jmp .operator_2_register_const_continue
.operator_2_register_const_continue:
; skip the next 4 tokens (8 bytes) as prescribed by 0x2000
call .next_token
jge .break
call .next_token
jge .break
call .next_token
jge .break
jmp .loop_next_token
.loop_next_token:
call .next_token
jge .break
jmp .loop
.break:
call .flush_write_buffer
push rsi
mov rsi, .msg_break
call print.debug
pop rsi
mov rsp, rbp
ret
.unexpected_token:
call .flush_write_buffer
push rsi
mov rsi, .msg_unexpected_token
call print.error
pop rsi
jmp halt
.unsupported_memory_access:
call .flush_write_buffer
push rsi
mov rsi, .msg_unsupported_memory_access
call print.error
pop rsi
jmp halt
.size_mismatch:
push rsi
mov rsi, .msg_size_mismatch
call print.error
pop rsi
jmp halt
; procedures
; add the line `jge .break` after call site
.next_token:
mov eax, [.tokens_processed]
mov edi, [.tokens_total]
inc eax
mov [.tokens_processed], eax
cmp eax, edi
ret
; eax = current entry index in token table
; returns di = next tte
.get_next_tte:
xor edi, edi
mov di, [eax * 2 + TOKEN_TABLE_ADDR]
ret
.tokens_processed dd 0
.tokens_total dd 0
; al = byte to write
.output_byte:
push rdx
mov edx, [.next_output_byte] ; get output byte's address
mov [edx], al ; write byte to that address
inc edx ; increment address
mov [.next_output_byte], edx ; put output byte's address
pop rdx
ret
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
; TODO get rid of this sketchy bit of state
; al = byte to push
.push_byte:
push rcx
mov ecx, [.buffer_pointer]
push rcx
push rax
mov ecx, .buffer_end
mov [.buffer_pointer], ecx
.push_byte_loop:
dec ecx
cmp ecx, .buffer
jl .push_byte_break
mov al, [ecx]
mov [ecx + 1], al
jmp .push_byte_loop
.push_byte_break:
pop rax ; al = byte to push
mov [.buffer], al ; write desired byte to front of buffer
pop rcx ; ecx = old buffer pointer
inc ecx ; ecx = pointer to next empty in buffer
mov [.buffer_pointer], ecx ; record write .buffer_pointer
pop rcx
ret
; al = byte to write
.write_byte:
push rdx
mov edx, [.buffer_pointer]
mov [edx], al
inc edx
mov [.buffer_pointer], edx
pop rdx
ret
.flush_write_buffer:
push rcx
push rax
mov ecx, .buffer
dec ecx
.flush_write_buffer_loop:
inc ecx
cmp ecx, [.buffer_pointer]
jge .flush_write_buffer_break
mov al, [ecx]
call .output_byte
mov byte [ecx], 0x00
jmp .flush_write_buffer_loop
.flush_write_buffer_break:
mov dword [.buffer_pointer], .buffer
pop rax
pop rcx
ret
.buffer dq 0, 0 ; octo word of space for max of 8 bytes per write
.buffer_end:
.buffer_pointer dd .buffer ; points to current byte in buffer
.msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00
.msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00
.msg_size_mismatch db "size mismatch, aborting", 0x0A, 0x00
.msg_break db "break", 0x0A, 0x00
.msg_label db "label", 0x0A, 0x00
.msg_operator_0 db "operator_0", 0x0A, 0x00
.msg_operator_1 db "operator_1", 0x0A, 0x00
.msg_operator_1_memory db "operator_1_memory", 0x0A, 0x00
.msg_operator_1_register db "operator_1_register", 0x0A, 0x00
.msg_operator_1_label db "operator_1_label", 0x0A, 0x00
.msg_operator_2 db "operator_2", 0x0A, 0x00
.msg_operator_2_memory db "operator_2_memory", 0x0A, 0x00
.msg_operator_2_memory_register db "operator_2_memory_register", 0x0A, 0x00
.msg_operator_2_register db "operator_2_register", 0x0A, 0x00
.msg_operator_2_register_label db "operator_2_register_label", 0x0A, 0x00
.msg_operator_2_register_memory db "operator_2_register_memory", 0x0A, 0x00
.msg_operator_2_register_register db "operator_2_register_register", 0x0A, 0x00
.msg_operator_2_register_const db "operator_2_register_const", 0x0A, 0x00
.msg_pseudo_operator db "pseudo_operator", 0x0A, 0x00
; ------------------------------------------------------------------------------
; get_tte_type
;
; description:
; given a token table entry, returns the declared type in `tokens.by_id`. If
; there is no entry, returns UNRECOGNISED_ID_TYPE
;
; +-----+-----------------+
; | hex | meaning |
; +-----+-----------------+
; | 0x0 | ignored |
; | 0x1 | operator |
; | 0x2 | register |
; | 0x3 | pseudo-operator |
; | 0x4 | label |
; | 0xF | unknown |
; +-----+-----------------+
;
; parameters:
; di = token table entry
;
; returned:
; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are
; zeroed; the rest of rax is zeroed.
; ------------------------------------------------------------------------------
get_tte_type:
and edi, 0xFFFF ; di = token table entry
xor eax, eax ; eax = tokens.by_id index
.loop:
cmp eax, (tokens.by_id_end - tokens.by_id) / 4 ; index range check
jg .not_found
mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id
cmp di, 0x0100
je .pseudo_operator
cmp di, cx
je .found
inc eax
jmp .loop
.not_found:
shr edi, 12
cmp edi, 0x3
je .label
mov eax, UNRECOGNISED_ID_TYPE
and eax, 0xF
ret
.pseudo_operator:
mov eax, 0x3
ret
.label:
mov eax, 0x4
ret
.found:
mov al, [2 + tokens.by_id + eax * 4]
and eax, 0xF ; mask as expected
ret
; ------------------------------------------------------------------------------
; get_tte_typed_metadata
;
; description:
; given a token table entry, returns the declared typed metadata in
; `tokens.by_id`. If there is no entry, returns UNRECOGNISED_ID_METADATA
;
; parameters:
; di = token table entry
;
; returned:
; al = typed metadata of token, or UNRECOGNISED_ID_METADATA; the rest of rax is
; zeroed.
; ------------------------------------------------------------------------------
get_tte_typed_metadata:
and edi, 0xFFFF ; di = token table entry
xor eax, eax ; eax = tokens.by_id index
.loop:
cmp eax, (tokens.by_id_end - tokens.by_id) / 4 ; index range check
jg .not_found
mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id
cmp cx, di
je .found
inc eax
jmp .loop
.not_found:
mov eax, UNRECOGNISED_ID_METADATA
ret
.found:
mov al, [3 + tokens.by_id + eax * 4]
and eax, 0xFF
ret
; ------------------------------------------------------------------------------
; get_ModRM
;
; description:
; given 2 register tokens and the mod bits, returns the ModR/M byte
;
; parameters:
; di = token table entry `reg`. 0xFEXX passes low 3 bytes as op flag
; si = token table entry `R/M`
; dl = lower 2 bits: mod bits. The rest is ignored
;
; returned:
; al = ModR/M byte; the rest of rax is zeroed
; ------------------------------------------------------------------------------
get_ModRM:
push rbx
and edi, 0xFFFF ; di = token table entry `reg`
and esi, 0xFFFF ; si = token table entry `R/M`
and edx, 11b ; dl = mod bits
shl edx, 6 ; and position
push rdi
shr edi, 8
cmp dil, 0xFE
pop rdi
je .pass_di_as_op_flag
; di = tte
call get_reg_bits
; al = reg bits
mov bl, al ; bl = reg bits
jmp .continue
.pass_di_as_op_flag:
mov ebx, edi ; bl = op flag
and ebx, 111b
.continue:
shl ebx, 3
mov edi, esi ; do the other one
; di = tte
call get_reg_bits
; al = reg bits
mov ecx, eax ; cl = r/m bits
xor eax, eax
or eax, edx ; mod bits
or eax, ebx ; reg bits
or eax, ecx ; R/M bits
and eax, 0xFF ; mask for byte
pop rbx
ret
; ------------------------------------------------------------------------------
; get_opcode
;
; description:
; given an operator token, returns its opcode. For operators with multiple
; opcodes, the variant can be specified.
;
; parameters:
; di = token table entry
; sil = offset within opcode entry. 0 is the first opcode, 1 the second, and so
; on
; bl = flag byte
;
; returned:
; al = opcode; the rest of rax is zeroed.
; dl = flagsX
; +----------------------------------------------------+
; | flagsX byte |
; +----------+-----------+-------------+---------------+
; | 7 5 | 4 | 3 | 2 0 |
; +----------+-----------+-------------+---------------+
; | reserved | no ModR/M | 0x0F prefix | operator flag |
; +----------+-----------+-------------+---------------+
;
; ------------------------------------------------------------------------------
get_opcode:
and edi, 0xFFFF ; di = token table entry
and ebx, 0xFF ; bl = flag byte
add esi, 2
and esi, 111b ; offset within opcode entry
sub esi, 2 ; between 0 and 5
xor eax, eax ; eax = opcodes.by_id index
.loop:
cmp eax, (opcodes.by_id_end - opcodes.by_id) / 16 ; make sure it's still in range
jg .not_found
shl eax, 4
mov cx, [opcodes.by_id + eax] ; next entry in opcodes.by_id
shr eax, 4
cmp cx, di
je .maybe_found
inc eax
jmp .loop
.maybe_found:
shl eax, 4
mov cl, [opcodes.by_id + 15 + eax]
shr eax, 4
cmp cl, bl
je .found
inc eax
jmp .loop
.not_found:
xor eax, eax
mov eax, UNRECOGNISED_ID_OPCODE
ret
.found:
shl eax, 4
mov dl, [esi + 8 + opcodes.by_id + eax]
mov al, [esi + 2 + opcodes.by_id + eax]
and eax, 0xFF
ret
; ------------------------------------------------------------------------------
; get_reg_bits
;
; description:
; given a register token, returns its reg bits metadata
;
; parameters:
; di = token table entry
;
; returned:
; al = register token; the rest of rax, including the upper 5 bits of al, are
; zeroed.
; ------------------------------------------------------------------------------
get_reg_bits:
; di = tte
call get_tte_typed_metadata
; al = typed metadata
shr eax, 2 ; discard type data
and eax, 111b ; mask
ret
; ------------------------------------------------------------------------------
; tokenising
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; tokenise
;
; description:
; creates a tokenised definition of the null-terminated program at rdi and puts
; it in memory at TOKEN_TABLE_ADDR
;
; parameters:
; rdi -> first byte of program
;
; returned:
; rax = number of tokens processed
; ------------------------------------------------------------------------------
tokenise:
; rdi -> current byte of program
xor eax, eax ; rax = number of tokens processed
xor edx, edx ; dl = current byte of program
.loop:
mov dl, [rdi] ; dl = current byte
cmp dl, 0x00 ; if current byte is null
je .break ; then break
cmp dl, ";" ; if current byte is the start of a comment
je .comment ; then handle the comment
cmp dl, 0x0A ; if current byte is the end of a line
je .newline_mk_flags ; then reset relevant flags
cmp dl, "," ; if current byte is a comma
je .comma ; then handle the comma
push rdi
push rax
push rdx
mov rsi, whitespace_2 ; rsi -> list of whitespace bytes
mov rdi, 2 ; rdi = size of the list in bytes
; dl = current byte
call elemb
; al = 0 if not whitespace, 1 if whitespace
test eax, 1 ; check if current byte is whitespace
pop rdx ; dl = current byte
pop rax ; rax = number of tokens processed
pop rdi ; rdi -> current byte of program
jnz .skip_byte_whitespace
test byte [.expecting], E_LABEL ; check if a label is expected
jnz .label ; if so, handle it
test byte [.expecting], E_OPERATOR ; else, check if an operator is expected
jnz .operator ; if so, handle it
jmp .operand ; else, handle as an operand
.comment:
mov rsi, .found
call print.debug
mov rsi, .msg_comment
call print
test byte [.expecting], E_COMMENT ; make sure a comment is expected
jz .unexpected_comment ; if not, error
.comment_loop:
mov dl, [rdi] ; dl = current byte
cmp dl, 0x0A ; if current byte is a newline
je .comment_break ; then break
inc rdi ; point to next unread byte
jmp .comment_loop
.comment_break:
jmp .loop
.skip_byte_whitespace:
test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected
jz .unexpected_whitespace ; if not, error
inc rdi
jmp .loop ; else, loop
.comma: ; found comma
mov rsi, .found
call print.debug
mov rsi, .msg_comma
call print
test byte [.expecting], E_COMMA ; make sure a comma was expected
jz .unexpected_comma ; if not, error
inc rdi
mov byte [.expecting], E_WHITESPACE | E_OPERAND ; else, make operand expected
jmp .loop ; and loop
.newline_mk_flags:
mov rsi, .found
call print.debug
mov rsi, .msg_newline
call print
test byte [.expecting], E_NEWLINE ; make sure a newline was expected
jz .unexpected_newline ; if not, error
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL
inc rdi
jmp .loop
.label:
push rax
xor eax, eax ; rax = number of bytes in label
.label_loop:
mov dl, [rdi + rax] ; next byte
cmp dl, ":"
je .label_break
cmp dl, " "
je .label_not_found
cmp dl, 0x0A
je .label_not_found
cmp dl, 0x00
je .label_not_found
cmp dl, ";"
je .label_not_found
inc eax ; inc byte counter
jmp .label_loop
.label_break:
mov rsi, .found
call print.debug
mov rsi, .msg_label
call print
push rax
push rdi
mov rsi, rdi ; rsi -> start of string
mov rdi, rax ; rdi = size of string
call djb2
; rax = hash
mov rdi, rax ; rdi = hash
call add_label_hash
; rax = index on label table
mov cx, ax
and cx, 0x0FFF
or cx, 0x3000
pop rdi ; rdi -> current byte of program
pop rax ; rax = number of bytes in label
add rdi, rax ; move on to next byte
inc rdi ; move past the colon
pop rax ; rax = number of tokens processed
mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax ; the next token
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE
jmp .loop
.label_not_found:
pop rax ; rax = number of tokens processed
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR
jmp .loop
.operator:
mov rcx, rax ; rcx = number of tokens processed
xor eax, eax ; eax = number of bytes in operator
mov [.pending_operator], eax ; zero pending operator
.operator_loop:
; TODO give this its own error
mov dl, [rdi] ; next byte
; TODO have better check for operator end
cmp dl, " "
je .operator_break
cmp dl, 0x0A
je .operator_break
cmp dl, 0x00
je .operator_break
cmp dl, ";"
je .operator_break
mov [.pending_operator + eax], dl
inc eax ; inc byte counter
cmp eax, 4 ; check that operator is short enough
jg .unexpected_operator ; if not, error
inc rdi ; inc byte pointer
jmp .operator_loop ; and loop
.operator_break:
push rdi
mov edi, [.pending_operator] ; edi = operator to be searched
call identify_operator
; ax = operator's token ID
push rcx
mov ecx, eax ; cx = operator's token ID
mov edi, eax ; di = operator's token ID
push rcx
call get_tte_type
; al = token type
pop rcx ; cx = operator's token ID
mov sil, al
pop rax ; rax = tokens processed
pop rdi ; rdi = byte counter
cmp sil, 0x3 ; pseudo-operator
je .pseudo_operator
cmp sil, 0x1 ; operator
jne .unexpected_operator
; debug message
mov rsi, .found
call print.debug
mov rsi, .msg_operator
call print
mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax ; plus 1 token processed
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
jmp .loop
.pseudo_operator:
mov rsi, .found
call print.debug
mov rsi, .msg_pseudo_operator
call print
mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
jmp .loop
.operand:
mov rsi, .found
call print.debug
mov rsi, .msg_operand
call print
test byte [.expecting], E_OPERAND ; make sure an operand was expected
jz .unexpected_operand ; if not, error
push rax
push rdi
xor eax, eax ; rax = length of operand
.operand_loop:
mov dl, [rdi]
cmp dl, ","
je .operand_break
cmp dl, 0x0A
je .operand_break
cmp dl, 0x00
je .operand_break
inc rax ; inc length counter
inc rdi ; inc byte pointer
jmp .operand_loop
.operand_break:
pop rdi ; rdi -> first byte of operand
push rdi
mov rsi, rax ; rsi = length of operand in bytes
mov cx, ax ; cx = length counter for safe keeping
push rcx
call evaluate_operand
; dl = return code
; rax = binary data
pop rcx
pop rdi ; rdi = first byte of operand
add di, cx ; rdi = last byte of operand
mov rcx, rax ; rcx = evaluate_operand's binary return data
pop rax ; rax = number of tokens processed
; operand is some reg
cmp dl, 0x00
; cx = token ID
je .operand_register
; operand is some [reg]
cmp dl, 0x10
; cx = token ID
je .operand_addr_register
; operand is some constant
cmp dl, 0x20
; rcx = constant value
je .operand_constant
; operand is some label
cmp dl, 0x30
; rcx = index of label in LT
je .operand_label
jmp .unexpected_operand
.operand_register:
mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax ; another token processed
jmp .operand_break_continue
.operand_addr_register:
mov word [TOKEN_TABLE_ADDR + rax * 2], 0x1000
inc rax ; 0x1000: addr reg token, next token is the register
mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax ; the register as returned by evaluate_operand
jmp .operand_break_continue
.operand_constant:
mov word [TOKEN_TABLE_ADDR + rax * 2], 0x2000
inc rax ; another token processed
mov [TOKEN_TABLE_ADDR + rax * 2], rcx
add rax, 4
jmp .operand_break_continue
.operand_label:
and cx, 0x0FFF
or cx, 0x3000
mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax
jmp .operand_break_continue
.operand_break_continue:
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA
jmp .loop
.break:
ret
; state
.expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL
.unexpected_whitespace:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_whitespace
call print
jmp halt
.unexpected_comment:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_comment
call print
jmp halt
.unexpected_newline:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_newline
call print
jmp halt
.unexpected_comma:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_comma
call print
jmp halt
.unexpected_operand:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_operand
call print
jmp halt
.unexpected_operator:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_operator
call print
jmp halt
.err_unexpected db "unexpected ", 0x00
.found db "found ", 0x00
.msg_whitespace db "whitespace.", 0x0A, 0x00
.msg_comment db "comment.", 0x0A, 0x00
.msg_newline db "newline.", 0x0A, 0x00
.msg_comma db "comma.", 0x0A, 0x00
.msg_label db "label.", 0x0A, 0x00
.msg_operator db "operator.", 0x0A, 0x00
.msg_operand db "operand.", 0x0A, 0x00
.msg_pseudo_operator db "pseudo_operator.", 0x0A, 0x00
.pending_operator dd 0 ; the operator token that is pending processing
; ------------------------------------------------------------------------------
; evaluate_operand
;
; description:
; takes the location and length of an operand and evaluates it into binary data
; and a return code to interpret the binary data.
;
; | code | rax contents | notes |
; |------|----------------------|-------|
; | 0x00 | token ID of register | reg |
; | 0x10 | token ID of register | [reg] |
; | 0x20 | constant value | const |
; | 0x30 | index of label in LT | label |
; | 0xFF | - | error |
;
; parameters:
; rdi -> first byte of operand
; rsi = size of operand in bytes
;
; returned:
; rax = binary data corresponding to the operand
; dl = return code
; ------------------------------------------------------------------------------
evaluate_operand:
push rdi ; rdi -> start of operand
; rsi = size of operand
call trim_trailing_whitespace
; rax = new size of operand
pop rdi ; rdi -> first byte of operand
mov rsi, rax ; rsi = size of operand w/o trailing whitespace
cmp rsi, 0 ; case: 0 length
je .unrecognised ; unrecognised
cmp byte [rdi], '[' ; case: memory addressing
je .address
jmp .register ; otherwise: register (or constant, or label)
.address:
cmp byte [rdi + rsi - 1], ']' ; check if address is closed correctly
jne .unrecognised ; if not, fail
inc rdi ; rdi -> enclosed operand
sub rsi, 2 ; rsi = length of enclosed operand
call evaluate_operand
; rax = binary data
; dl = return code
cmp dl, 0x10 ; make sure return code isn't another memory reference
je .unrecognised ; if it is, fail
shr edx, 4
or dl, 0x10 ; address return
ret
.register:
cmp rsi, 4
jg .constant ; not a register: too long. Maybe constant?
push rdi
mov edi, [rdi] ; edi = register to be searched
; TODO figure out how to mask elegantly :/
; mask edi for lower rsi bits
cmp rsi, 4
je .register4
cmp rsi, 3
je .register3
cmp rsi, 2
je .register2
cmp rsi, 1
je .register1
.register1:
and edi, 0xFF
.register2:
and edi, 0xFFFF
.register3:
and edi, 0xFFFFFF
.register4:
call identify_register
; ax = register's token ID or UNRECOGNISED_TOKEN_ID
pop rdi ; rdi -> first byte of operand
cmp ax, UNRECOGNISED_TOKEN_ID ; if not a register, constant?
je .constant
mov dl, 0x00
ret
.constant:
push rdi
push rsi
; rdi -> first byte of constant
; rsi = size of constant in bytes
call evaluate_constant
; dl = type of constant
; rax = hex value of constant
; these are intentionally swapped; `djb2` call in .label takes this order
pop rdi ; rdi = size of label in bytes
pop rsi ; rsi -> first byte of label
cmp dl, 0xFF
je .label
; rax = hex value of constant
mov dl, 0x20
ret
.label:
; rdi = size of label in bytes
; rsi -> first byte of label
call djb2
; rax = hash
mov rdi, rax ; rdi = hash
call add_label_hash
; rax = index in LT of label
mov dl, 0x30
ret
.unrecognised:
xor eax, eax
mov dl, 0xFF
ret
; ------------------------------------------------------------------------------
; evaluate_constant
;
; description:
; takes a constant and returns its hexidecimal representation. Currently the
; following constants are supported:
;
; | type | p. | description |
; |------|----|--------------|
; | 0x00 | 0x | hexidecimal |
; | 0x01 | 0q | octal |
; | 0x02 | 0b | binary |
; | 0x03 | " | char |
; | 0xFF | | unrecognised |
;
; where `p.` is the prefix or otherwise indicator
;
; parameters:
; rdi -> first byte of constant
; rsi = size of constant in bytes
;
; returned:
; rax = value of the constant in hexidecimal
; dl = type of constant; the rest of rdx is zeroed
; ------------------------------------------------------------------------------
evaluate_constant:
; rsi = number of bytes left
; rdi -> current byte of constant
xor eax, eax ; rax = value of constant
; each case pushes the return value of dl into `rcx`, which is popped into dl
; to return
mov dl, [rdi]
dec rsi ; one fewer byte left
inc rdi ; point to next byte
; all numeric prefixes further handled in .numeric
cmp dl, '0'
je .numeric
; chr case
mov rcx, 0x03
push rcx
xor ecx, ecx ; rcx = number of times right-rolled
cmp dl, '"'
je .chr
pop rcx
push rcx ; waste value; .unrecognise expects something on the stack
jmp .unrecognised
.numeric:
mov dl, [rdi]
dec rsi ; one fewer byte left
inc rdi ; point to next byte
; hex case
mov rcx, 0x00
push rcx
cmp dl, 'x'
je .hex_loop
pop rcx
; octal case
mov rcx, 0x01
push rcx
cmp dl, 'q'
je .oct_loop
pop rcx
; binary case
mov rcx, 0x02
push rcx
cmp dl, 'b'
je .bin_loop
pop rcx
jmp .unrecognised
.hex_loop:
cmp rsi, 0 ; make sure we're in range
je .break ; if not, break
shl rax, 4 ; make room for next hex digit
mov dl, [rdi] ; dl = next byte of constant
sub dl, '0' ; dl = if digit: digit; else :shrug:
cmp dl, 9 ; if !digit:
jg .hex_alpha ; letter
jmp .hex_continue ; else loop
.hex_alpha:
sub dl, 7 ; map [('A'-'0')..('F'-'0')] to [0xA..0xF]
cmp dl, 0xF ; if not in the range [0xA..0xF]
jg .unrecognised ; then unrecognised
.hex_continue:
and dl, 0x0F ; mask
or al, dl ; and add newest nibble
dec rsi ; one fewer byte left
inc rdi ; point to next byte
jmp .hex_loop ; and loop
.oct_loop:
cmp rsi, 0 ; make sure we're in range
je .break ; if not, break
shl rax, 3 ; make room for next octal digit
mov dl, [rdi] ; dl = next byte of constant
sub dl, '0'
cmp dl, 7
jg .unrecognised
and dl, 7 ; mask
or al, dl ; and add newest 3-bit group
dec rsi ; one fewer byte left
inc rdi ; point to next byte
jmp .oct_loop ; and loop
.bin_loop:
cmp rsi, 0 ; range check
je .break
shl rax, 1
mov dl, [rdi]
sub dl, '0'
cmp dl, 1
jg .unrecognised
and dl, 1 ; mask
or al, dl ; and newest bit
dec rsi
inc rdi
jmp .bin_loop
.chr:
; TODO check for overlength string and do something; probably in `tokenise`
cmp rsi, 1 ; range check
je .chr_break
ror rax, 8
inc rcx
mov dl, [rdi]
; bound check byte as printable char
cmp dl, 0x20
jl .unrecognised
cmp dl, 0x7E
jg .unrecognised
or al, dl
dec rsi
inc rdi
jmp .chr
.chr_break:
cmp rcx, 1 ; for each [1..rcx]
jle .chr_break_for_good
rol rax, 8 ; roll left to make up for the roll right earlier
dec rcx
jmp .chr_break
.chr_break_for_good:
mov dl, [rdi] ; make sure the chr is closed
cmp dl, '"'
jne .unrecognised
jmp .break
.break:
pop rdx
ret
.unrecognised:
pop rdx
mov edx, 0xFF ; unrecognised type
ret
.msg db "evaluate_constant", 0x0A, 0x00
; ------------------------------------------------------------------------------
; identify_register
;
; description:
; takes a register in ascii-encoded text and returns its token ID or
; UNRECOGNISED_TOKEN_ID if not recognised
;
; parameters:
; edi = register to be searched
;
; returned:
; ax = register's token ID or UNRECOGNISED_TOKEN_ID
; ------------------------------------------------------------------------------
identify_register:
xor eax, eax ; tokens.registers + eax -> entry in tokens.registers
.loop:
cmp eax, (tokens.registers_end - tokens.registers)
jge .not_found
cmp edi, [tokens.registers + eax]
je .found
add eax, 6
jmp .loop
.found:
mov ax, [tokens.registers + eax + 4]
ret
.not_found:
mov ax, UNRECOGNISED_TOKEN_ID
ret
; ------------------------------------------------------------------------------
; identify_operator
;
; description:
; takes an operator in ascii-encoded text and returns its token ID or
; UNRECOGNISED_TOKEN_ID if not recognised
;
; parameters:
; edi = operator to be searched
;
; returned:
; ax = operator's token ID or UNRECOGNISED_TOKEN_ID
; ------------------------------------------------------------------------------
identify_operator:
xor eax, eax ; tokens.operators + eax -> entry in tokens.operators
.loop:
cmp eax, (tokens.operators_end - tokens.operators)
jge .not_found
cmp edi, [tokens.operators + eax]
je .found
add eax, 6
jmp .loop
.found:
mov ax, [tokens.operators + eax + 4]
ret
.not_found:
mov ax, UNRECOGNISED_TOKEN_ID
ret
; ------------------------------------------------------------------------------
; utilities
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; print
;
; description:
; prints a null-terminated string
; probably doesn't change any registers for ease of debugging
;
; parameters:
; rsi -> start of null-terminated string
; ------------------------------------------------------------------------------
print:
push rdx
push rax
push rsi
mov edx, 0x3F8
.loop:
mov al, [rsi]
cmp al, 0x00
je .done
out dx, ax
inc rsi
jmp .loop
.done:
pop rsi
pop rax
pop rdx
ret
.debug:
push rsi
mov rsi, .debug_msg
call print
pop rsi
jmp print ; tail call
.error:
push rsi
mov rsi, .error_msg
call print
pop rsi
jmp print ; tail call
.test:
push rsi
mov rsi, .test_msg
call print
pop rsi
jmp print ; tail call
.warn:
push rsi
mov rsi, .warn_msg
call print
pop rsi
jmp print ; tail call
.debug_msg db 0x1B, "[36m", "[DEBUG]: ", 0x1B, "[0m", 0x00
.error_msg db 0x1B, "[1;31m", "[ERROR]: ", 0x1B, "[0m", 0x00
.test_msg db 0x1B, "[1;33m", "[TEST]: ", 0x1B, "[0m", 0x00
.warn_msg db 0x1B, "[1;35m", "[WARN]: ", 0x1B, "[0m", 0x00
; ------------------------------------------------------------------------------
; print_least_4_bits
;
; description:
; prints the least significant 4 bits of rax for debugging reasons
; ------------------------------------------------------------------------------
print_least_4_bits:
push rax
push rsi
add eax, 0x30
mov [.byte], al
mov rsi, .byte
call print.warn
pop rsi
pop rax
ret
.byte db 0x00, 0x0A, 0x00
; ------------------------------------------------------------------------------
; halt
;
; description:
; halts the program, silly :)
; ------------------------------------------------------------------------------
halt:
push rsi
mov rsi, msg_halt
call print
pop rsi
hlt
jmp halt
; ------------------------------------------------------------------------------
; elemb
;
; description:
; checks if given byte is element of the specified list.
;
; parameters:
; rdi = size of list
; rsi -> start of list
; dl = given byte
;
; returned:
; rax = 0: is not an element
; 1: is an element
; ------------------------------------------------------------------------------
elemb:
.loop:
cmp rdi, 0 ; check if remaining length 0
je .not_found ; if so, break; dl not an element of list
mov al, [rsi]
cmp al, dl ; check if current byte in list is the desired byte
je .found ; if so, break; dl an element of list
inc rsi ; move to next byte
dec rdi ; and reduce remaining length
jmp .loop
.not_found:
xor eax, eax ; return 0; dl not an element of list
ret
.found:
xor eax, eax
mov rax, 1 ; return 1; dl an element of list
ret
; ------------------------------------------------------------------------------
; djb2
;
; description:
; gets the 64-bit djb2 hash of a given string
;
; parameters:
; rdi = size of string
; rsi -> start of string
;
; returned:
; rax = hash
; ------------------------------------------------------------------------------
djb2:
xor ecx, ecx ; rcx = index
mov rax, 5381 ; rax = hash
.loop:
cmp rcx, rdi
jge .break
mov rdx, rax
shl rax, 5
add rax, rdx
xor edx, edx
mov dl, [rsi + rcx] ; dl = current byte
add rax, rdx
inc rcx
jmp .loop
.break:
ret
; ------------------------------------------------------------------------------
; trim_trailing_whitespace
;
; description:
; trims whitespace from the start and end of the given byte array.
;
; parameters:
; rdi -> start of list
; rsi = size of list
;
; returned:
; rax = new size of list
; ------------------------------------------------------------------------------
trim_trailing_whitespace:
test rsi, rsi ; list of length zero
jz .done ; already trimmed
push rsi
push rdi
mov dl, [rdi + rsi - 1] ; last element of given list
mov rsi, whitespace_2 ; pointer of whitespace list
mov edi, 2 ; length of whitespace list
call elemb
pop rdi ; rdi -> start of list
pop rsi ; rsi = size of list
test eax, eax ; if last element whitespace
jz .done ; then break
.trim: ; otherwise one shorter
dec rsi
call trim_trailing_whitespace
ret
.done:
mov rax, rsi
ret
; ------------------------------------------------------------------------------
; add_label_hash
;
; description:
; adds a label hash to the label table, or just finds it if already present
;
; parameters
; rdi = 64-bit hash to be added
;
; returned
; rax = index in label table
; ------------------------------------------------------------------------------
add_label_hash:
xor eax, eax
.loop:
cmp rax, LABEL_TABLE_SIZE
jge .break
mov rcx, [LABEL_TABLE_ADDR + rax]
; TODO bug if there's an empty slot before the entry, it won't be found
cmp rcx, 0 ; empty slot
je .break
cmp rcx, rdi ; already present
je .break
add rax, 16
jmp .loop
.break:
mov [LABEL_TABLE_ADDR + rax], rdi
shr rax, 4 ; rax / 16
; rax = index
ret
; ------------------------------------------------------------------------------
; add_label_address
;
; description:
; adds a label's address to the label table
;
; parameters
; rdi = lower 3 bytes: index of label table to add the address to
; esi = 32-bit address to be added, relative to start of program
;
; returned
; rax = return value: 0 = success
; 1 = failure: label already in the table
; ------------------------------------------------------------------------------
add_label_address:
and edi, 0xFFF
shl rdi, 4 ; rdi * 16
mov eax, [LABEL_TABLE_ADDR + 8 + rdi]
cmp eax, 0
jne .ret_1
mov [LABEL_TABLE_ADDR + 8 + rdi], esi
xor eax, eax
ret
.ret_1:
mov eax, 1
ret
; ------------------------------------------------------------------------------
; add_awaiting_label
;
; description:
; adds metadata of a forward reference to be completed in a later pass
;
; parameters
; rdi = lower 3 bytes: index of label table this forward reference is awaiting
; esi = 32-bit address of reference, relative to start of program
; al = lower 4 bytes: # bytes reserved at [OUTPUT_ADDR + esi] for the reference
; byte 5: abs flag if the reference is absolute
; ------------------------------------------------------------------------------
add_awaiting_label:
and edi, 0xFFF
and esi, esi
and eax, 0x1F
push rax
xor eax, eax
.loop:
cmp rax, AWAITING_LABEL_TABLE_SIZE
jge .break
mov rcx, [AWAITING_LABEL_TABLE_ADDR + rax]
cmp rcx, 0 ; empty slot
je .break
add rax, 16
jmp .loop
.break:
pop rcx ; cl = bits passed to al
shl rdi, 4
mov rdx, [LABEL_TABLE_ADDR + rdi]
mov [AWAITING_LABEL_TABLE_ADDR + rax], rdx ; hash
mov [AWAITING_LABEL_TABLE_ADDR + 8 + rax], esi ; address
mov [AWAITING_LABEL_TABLE_ADDR + 13 + rax], cl ; bits passed to al
ret
; ------------------------------------------------------------------------------
; clear_token_table
;
; description:
; clears the token table as specified by TOKEN_TABLE_SIZE and TOKEN_TABLE_ADDR
; ------------------------------------------------------------------------------
clear_token_table:
xor eax, eax ; value to write
mov ecx, TOKEN_TABLE_SIZE / 4 ; number of double words
mov edi, TOKEN_TABLE_ADDR ; address to start
rep stosd
ret
; ------------------------------------------------------------------------------
; clear_label_tables
;
; description:
; clears the label table as specified by LABEL_TABLE_SIZE and LABEL_TABLE_ADDR
; and the awaiting label table as specified by AWAITING_LABEL_TABLE_SIZE and
; AWAITING_LABEL_TABLE_ADDR
; ------------------------------------------------------------------------------
clear_label_tables:
xor eax, eax ; value to write
mov ecx, LABEL_TABLE_SIZE / 4 ; number of double words
mov edi, LABEL_TABLE_ADDR ; address to start
rep stosd
xor eax, eax ; value to write
mov ecx, AWAITING_LABEL_TABLE_SIZE / 4 ; number of double words
mov edi, AWAITING_LABEL_TABLE_ADDR ; address to start
rep stosd
ret
; ------------------------------------------------------------------------------
; clear_test_arena
;
; description:
; clears the test arena as specified by TEST_ARENA_SIZE and TEST_ARENA_ADDR
; ------------------------------------------------------------------------------
clear_test_arena:
xor eax, eax ; value to write
mov ecx, TEST_ARENA_SIZE / 4 ; number of double words
mov edi, TEST_ARENA_ADDR ; address to start
rep stosd
ret
; ------------------------------------------------------------------------------
; clear_output_arena
;
; description:
; clears the output arena as specified by OUTPUT_SIZE and OUTPUT_ADDR
; ------------------------------------------------------------------------------
clear_output_arena:
xor eax, eax ; value to write
mov ecx, OUTPUT_SIZE / 4 ; number of double words
mov edi, OUTPUT_ADDR ; address to start
rep stosd
ret
%include "asm/tests.asm"
; ------------------------------------------------------------------------------
; data
; ------------------------------------------------------------------------------
align 16 ; for readability in hexdump
tokens:
.by_id:
dw 0x0000 ; rax
db 0x02 ; type: register
db 00000011b ; reg: 000b
; width: 11b (64 bits)
dw 0x0001 ; rbx
db 0x02 ; type: register
db 00001111b ; reg: 011b
; width: 11b (64 bits)
dw 0x0002 ; rcx
db 0x02 ; type: register
db 00000111b ; reg: 001b
; width: 11b (64 bits)
dw 0x0003 ; rdx
db 0x02 ; type: register
db 00001011b ; reg: 010b
; width: 11b (64 bits)
dw 0x0004 ; rsi
db 0x02 ; type: register
db 00011011b ; reg: 110b
; width: 11b (64 bits)
dw 0x0005 ; rdi
db 0x02 ; type: register
db 00011111b ; reg: 111b
; width: 11b (64 bits)
dw 0x0006 ; rsp
db 0x02 ; type: register
db 00010011b ; reg: 100b
; width: 11b (64 bits)
dw 0x0007 ; rbp
db 0x02 ; type: register
db 00010111b ; reg: 101b
; width: 11b (64 bits)
dw 0x0010 ; eax
db 0x02 ; type: register
db 00000010b ; reg: 000b
; width: 10b (32 bits)
dw 0x0011 ; ebx
db 0x02 ; type: register
db 00001110b ; reg: 011b
; width: 10b (32 bits)
dw 0x0012 ; ecx
db 0x02 ; type: register
db 00000110b ; reg: 001b
; width: 10b (32 bits)
dw 0x0013 ; edx
db 0x02 ; type: register
db 00001010b ; reg: 010b
; width: 10b (32 bits)
dw 0x0014 ; esi
db 0x02 ; type: register
db 00011010b ; reg: 110b
; width: 10b (32 bits)
dw 0x0015 ; edi
db 0x02 ; type: register
db 00011110b ; reg: 111b
; width: 10b (32 bits)
dw 0x0016 ; esp
db 0x02 ; type: register
db 00010010b ; reg: 100b
; width: 10b (32 bits)
dw 0x0017 ; ebp
db 0x02 ; type: register
db 00010110b ; reg: 101b
; width: 10b (32 bits)
dw 0x0020 ; ax
db 0x02 ; type: register
db 00000001b ; reg: 000b
; width: 01b (16 bits)
dw 0x0021 ; bx
db 0x02 ; type: register
db 00001101b ; reg: 011b
; width: 01b (16 bits)
dw 0x0022 ; cx
db 0x02 ; type: register
db 00000101b ; reg: 001b
; width: 01b (16 bits)
dw 0x0023 ; dx
db 0x02 ; type: register
db 00001001b ; reg: 010b
; width: 01b (16 bits)
dw 0x0024 ; si
db 0x02 ; type: register
db 00011001b ; reg: 110b
; width: 01b (16 bits)
dw 0x0025 ; di
db 0x02 ; type: register
db 00011101b ; reg: 111b
; width: 01b (16 bits)
dw 0x0026 ; sp
db 0x02 ; type: register
db 00010001b ; reg: 100b
; width: 01b (16 bits)
dw 0x0027 ; bp
db 0x02 ; type: register
db 00010101b ; reg: 101b
; width: 01b (16 bits)
dw 0x0030 ; al
db 0x02 ; type: register
db 00000000b ; reg: 000b
; width: 00b (8 bits)
dw 0x0031 ; bl
db 0x02 ; type: register
db 00001100b ; reg: 011b
; width: 00b (8 bits)
dw 0x0032 ; cl
db 0x02 ; type: register
db 00000100b ; reg: 001b
; width: 00b (8 bits)
dw 0x0033 ; dl
db 0x02 ; type: register
db 00001000b ; reg: 010b
; width: 00b (8 bits)
dw 0x0034 ; sil
db 0x02 ; type: register
db 00011000b ; reg: 110b
; width: 00b (8 bits)
dw 0x0035 ; dil
db 0x02 ; type: register
db 00011100b ; reg: 111b
; width: 00b (8 bits)
dw 0x0036 ; spl
db 0x02 ; type: register
db 00010000b ; reg: 100b
; width: 00b (8 bits)
dw 0x0037 ; bpl
db 0x02 ; type: register
db 00010100b ; reg: 101b
; width: 00b (8 bits)
dw 0x004F ; hlt
db 0x01 ; type: operator
db 0x00 ; # operands
dw 0x0050 ; int3
db 0x01 ; type: operator
db 0x00 ; # operands
dw 0x0053 ; xor
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0054 ; inc
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x0055 ; dec
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x0056 ; mov
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0057 ; add
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0058 ; sub
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0059 ; call
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x005A ; ret
db 0x01 ; type: operator
db 0x00 ; # operands
dw 0x005B ; cmp
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x005C ; jmp
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x005D ; je
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x005E ; jne
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x005F ; push
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x0060 ; pop
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x0061 ; out
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0100 ; db
db 0x03 ; type: pseudo-operator
db 0x01 ; # operands
.by_id_end:
.operators:
dd "hlt"
dw 0x004F
dd "int3"
dw 0x0050
dd "xor"
dw 0x0053
dd "inc"
dw 0x0054
dd "dec"
dw 0x0055
dd "mov"
dw 0x0056
dd "add"
dw 0x0057
dd "sub"
dw 0x0058
dd "call"
dw 0x0059
dd "ret"
dw 0x005A
dd "cmp"
dw 0x005B
dd "jmp"
dw 0x005C
dd "je"
dw 0x005D
dd "jne"
dw 0x005E
dd "push"
dw 0x005F
dd "pop"
dw 0x0060
dd "out"
dw 0x0061
dd "db"
dw 0x0100
.operators_end:
.registers:
dd "r8"
dw 0x0008
dd "r9"
dw 0x0009
dd "ax"
dw 0x0020
dd "bx"
dw 0x0021
dd "cx"
dw 0x0022
dd "dx"
dw 0x0023
dd "si"
dw 0x0024
dd "di"
dw 0x0025
dd "sp"
dw 0x0026
dd "bp"
dw 0x0027
dd "al"
dw 0x0030
dd "bl"
dw 0x0031
dd "cl"
dw 0x0032
dd "dl"
dw 0x0033
dd "ah"
dw 0x0040
dd "bh"
dw 0x0041
dd "ch"
dw 0x0042
dd "dh"
dw 0x0043
dd "cs"
dw 0x0044
dd "ds"
dw 0x0045
dd "es"
dw 0x0046
dd "fs"
dw 0x0047
dd "gs"
dw 0x0048
dd "ss"
dw 0x0049
dd "rax"
dw 0x0000
dd "rbx"
dw 0x0001
dd "rcx"
dw 0x0002
dd "rdx"
dw 0x0003
dd "rsi"
dw 0x0004
dd "rdi"
dw 0x0005
dd "rsp"
dw 0x0006
dd "rbp"
dw 0x0007
dd "r10"
dw 0x000A
dd "r11"
dw 0x000B
dd "r12"
dw 0x000C
dd "r13"
dw 0x000D
dd "r14"
dw 0x000E
dd "r15"
dw 0x000F
dd "eax"
dw 0x0010
dd "ebx"
dw 0x0011
dd "ecx"
dw 0x0012
dd "edx"
dw 0x0013
dd "esi"
dw 0x0014
dd "edi"
dw 0x0015
dd "esp"
dw 0x0016
dd "ebp"
dw 0x0017
dd "r8d"
dw 0x0018
dd "r9d"
dw 0x0019
dd "r8w"
dw 0x0028
dd "r9w"
dw 0x0029
dd "sil"
dw 0x0034
dd "dil"
dw 0x0035
dd "spl"
dw 0x0036
dd "bpl"
dw 0x0037
dd "r8b"
dw 0x0038
dd "r9b"
dw 0x0039
dd "cr0"
dw 0x004A
dd "cr2"
dw 0x004B
dd "cr3"
dw 0x004C
dd "cr4"
dw 0x004D
dd "cr8"
dw 0x004E
dd "r10d"
dw 0x001A
dd "r11d"
dw 0x001B
dd "r12d"
dw 0x001C
dd "r13d"
dw 0x001D
dd "r14d"
dw 0x001E
dd "r15d"
dw 0x001F
dd "r10w"
dw 0x002A
dd "r11w"
dw 0x002B
dd "r12w"
dw 0x002C
dd "r13w"
dw 0x002D
dd "r14w"
dw 0x002E
dd "r15w"
dw 0x002F
dd "r10b"
dw 0x003A
dd "r11b"
dw 0x003B
dd "r12b"
dw 0x003C
dd "r13b"
dw 0x003D
dd "r14b"
dw 0x003E
dd "r15b"
dw 0x003F
.registers_end:
align 16 ; for readability in hexdump
opcodes:
.by_id:
; hlt
dw 0x004F
db 0xF4 ; opcode
db 0x00 ; reserved
dd 0x00000000
dd 0x00000000
dd 0x00000000
; int3
dw 0x0050
db 0xCC ; opcode
db 0x00 ; reserved
dd 0x00000000
dd 0x00000000
dd 0x00000000
; xor
dw 0x0053
db 0x31 ; r/m <- r
db 0x33 ; r <- r/m
db 0x81 ; r/m <- imm16/32
db 0x83 ; r/m <- imm8
dw 0x0000
dw 0x0000
db 0x06 ; 6: r/m <- imm16/32 op flag
db 0x06 ; 6: r/m <- imm8 op flag
dd 0x00000000
; inc
dw 0x0054
db 0xFF ; r/m
db 0x00
dd 0x00000000
dd 0x00000000
dd 0x00000000
; dec
dw 0x0055
db 0xFF ; r/m
db 0x00
dd 0x00000000
db 0x01 ; r/m op byte
db 0x00
dw 0x0000
dd 0x00000000
; mov
dw 0x0056
db 0x89 ; r/m <- r
db 0x8B ; r <- r/m
db 0xC7 ; r/m <- imm16/32
db 0x00
dw 0x0000
dd 0x00000000
dd 0x00000000
; mov bit8
dw 0x0056
db 0x88 ; r/m8 <- r8
db 0x8A ; r8 <- r/m8
db 0x00
db 0xC6 ; r/m8 <- imm8
dw 0x0000
dd 0x00000000
dw 0x0000
db 0x00
db 0x01 ; bit8 flag
; add
dw 0x0057
db 0x01 ; r/m <- r
db 0x03 ; r <- r/m
db 0x81 ; r/m <- imm16/32
db 0x83 ; r/m <- imm8
dw 0x0000
dd 0x00000000
dd 0x00000000
; sub
dw 0x0058
db 0x29 ; r/m <- r
db 0x2B ; r <- r/m
db 0x81 ; r/m <- imm16/32
db 0x83 ; r/m <- imm8
dw 0x0000
dw 0x0000
db 0x05 ; 5: r/m <- imm16/32 op flag
db 0x05 ; 5: r/m <- imm8 op flag
dd 0x00000000
; call
dw 0x0059
db 0xFF ; r/m
db 0x00
dw 0x0000
db 0xE8 ; rel16/32
db 0x00
db 0x02 ; 2: r/m op flag
db 0x00
dw 0x0000
dd 0x00000000
; retn
dw 0x005A
db 0xC3 ; opcode
db 0x00 ; reserved
dd 0x00000000
dd 0x00000000
dd 0x00000000
; cmp
dw 0x005B
db 0x39 ; r/m <- r
db 0x3B ; r <- r/m
db 0x81 ; r/m <- imm16/32
db 0x83 ; r/m <- imm8
dw 0x0000
dw 0x0000
db 0x07 ; 7: r/m <- imm16/32 op flag
db 0x07 ; 7: r/m <- imm8 op flag
dd 0x00000000
; cmp bit8
dw 0x005B
db 0x38 ; r/m8 <- r8
db 0x3A ; r8 <- r/m8
db 0x00
db 0x80 ; r/m8 <- imm8
dw 0x0000
dw 0x0000
db 0x00
db 0x07 ; 7: r/m8 <- imm8 op flag
dw 0x0000
db 0x00
db 0x01 ; bit8 flag
; jmp
dw 0x005C
db 0xFF ; r/m
db 0x00
dw 0x0000
db 0xE9 ; rel16/32
db 0xEB ; rel8
db 0x04 ; r/m
db 0x00
dw 0x0000
dd 0x00000000
; je
dw 0x005D
dw 0x0000
dw 0x0000
db 0x84 ; rel16/32
db 0x74 ; rel8
dd 0x00000000
db 0x08 ; 8: rel16/32 0x0F flag
db 0x00
dw 0x0000
; jne
dw 0x005E
dw 0x0000
dw 0x0000
db 0x85 ; rel16/32
db 0x75 ; rel8
dd 0x00000000
db 0x08 ; 8: rel16/32 0x0F flag
db 0x00
dw 0x0000
; push
; TODO add support for the +r variation
dw 0x005F
db 0xFF ; r/m
db 0x00
db 0x68 ; imm16/32
db 0x6A ; imm8
dw 0x0000
db 0x06 ; 6: r/m
db 0x00
dw 0x0000
dd 0x00000000
; pop
; TODO add support for the +r variation
dw 0x0060
db 0x8F ; r/m
db 0x00
dd 0x00000000
dd 0x00000000
dd 0x00000000
; out
; TODO enforce DX AL requirement, ignore ModR/M correctly
dw 0x0061
db 0xEE ; r/m <- r
db 0x00
dd 0x00000000
db 0x10 ; 10: no ModRM flag
db 0x00
dw 0x0000
dd 0x00000000
.by_id_end:
msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00
msg_halt db "halted.", 0x0A, 0x00
whitespace_2 db " ", 0x0D
; test program
align 128
program:
db "print:", 0x0A
db " push rdx", 0x0A
db " push rax", 0x0A
db " push rsi", 0x0A
db "", 0x0A
db " mov edx, 0x3F8", 0x0A
db " .loop:", 0x0A
db " mov al, [rsi]", 0x0A
db " cmp al, 0x00", 0x0A
db " je .done", 0x0A
db " out dx, ax", 0x0A
db " inc rsi", 0x0A
db " jmp .loop", 0x0A
db " .done:", 0x0A
db " pop rsi", 0x0A
db " pop rax", 0x0A
db " pop rdx", 0x0A
db " ret", 0x0A
db " .debug:", 0x0A
db " push rsi", 0x0A
db " mov esi, .debug_msg", 0x0A
db " call print", 0x0A
db " pop rsi", 0x0A
db " jmp print ; tail call", 0x0A
db " .error:", 0x0A
db " push rsi", 0x0A
db " mov esi, .error_msg", 0x0A
db " call print", 0x0A
db " pop rsi", 0x0A
db " jmp print ; tail call", 0x0A
db " .test:", 0x0A
db " push rsi", 0x0A
db " mov esi, .test_msg", 0x0A
db " call print", 0x0A
db " pop rsi", 0x0A
db " jmp print ; tail call", 0x0A
db " .warn:", 0x0A
db " push rsi", 0x0A
db " mov esi, .warn_msg", 0x0A
db " call print", 0x0A
db " pop rsi", 0x0A
db " jmp print ; tail call", 0x0A
db " .debug_msg:", 0x0A
db " db 0x1B", 0x0A
db ' db "[36m"', 0x0A
db ' db "[DEBUG]: "', 0x0A
db " db 0x1B", 0x0A
db ' db "[0m"', 0x0A
db " db 0x00", 0x0A
db " .error_msg:", 0x0A
db " db 0x1B", 0x0A
db ' db "[1;31m"', 0x0A
db ' db "[ERROR]: "', 0x0A
db " db 0x1B", 0x0A
db ' db "[0m"', 0x0A
db " db 0x00", 0x0A
db " .test_msg:", 0x0A
db " db 0x1B", 0x0A
db ' db "[1;33m"', 0x0A
db ' db "[TEST]: "', 0x0A
db " db 0x1B", 0x0A
db ' db "[0m"', 0x0A
db " db 0x00", 0x0A
db " .warn_msg:", 0x0A
db " db 0x1B", 0x0A
db ' db "[1;35m"', 0x0A
db ' db "[WARN]: "', 0x0A
db " db 0x1B", 0x0A
db ' db "[0m"', 0x0A
db " db 0x00", 0x0A, 0x00
program_end:
msg_end db "end of the binary ->|", 0x0A, 0x00