; TODO actually enforce any of these *_SIZE constants :p LOAD_ADDR equ 0x00010000 ; address this program is loaded at STACK_ADDR equ 0x00030000 ; address to put the 64-bit stack at AWAITING_LABEL_TABLE_ADDR equ 0x00030000 ; address to store pending labels at AWAITING_LABEL_TABLE_SIZE equ 0x00010000 LABEL_TABLE_ADDR equ 0x00040000 ; address to store labels at LABEL_TABLE_SIZE equ 0x00010000 TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at TEST_ARENA_SIZE equ 0x00010000 ; maximum size tests can use TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at TOKEN_TABLE_SIZE equ 0x00010000 ; max length of table OUTPUT_ADDR equ 0x00070000 ; address of outputed binary OUTPUT_SIZE equ 0x00010000 ; max length of outputed binary UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP) TEST_LINE_LENGTH equ 80 ; right border of test suite results ; flags for expected values in tokeniser E_COMMENT equ 1 << 0 E_NEWLINE equ 1 << 1 E_WHITESPACE equ 1 << 2 E_COMMA equ 1 << 3 E_OPERATOR equ 1 << 4 E_OPERAND equ 1 << 5 E_LABEL equ 1 << 6 [bits 64] [org LOAD_ADDR] [default abs] ; TODO see if I actually need to do this ; afaik absolute addressing is not harmful on bare metal ; reasoning: stops annoying warning =D start: mov rsp, STACK_ADDR ; we might need more stack space, let's just be safe mov rsi, msg_welcome call print call run_tests call clear_token_table call clear_label_tables mov rdi, program ; -> program mov rsi, [program.size] ; = size of program call tokenise ; rax = number of tokens in token table mov rdi, rax push rdi call clear_output_arena pop rdi ; rdi = number of tokens in token table call assemble jmp halt ; ------------------------------------------------------------------------------ ; assembling ; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------ ; assemble ; ; description: ; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat ; binary located at OUTPUT_ADDR. ; Behaviour is undefined when: ; - tokens are in an impossible order ; 0x1000 ; memory address, following byte should be a register ; 0x1000 ; not a register ; - operator tokens followed by the wrong number of arguments ; TODO enforce this in `tokenise` ; 0x004F ; hlt, expects 0 arguments ; 0x0000 ; rax, an argument ; - an undefined token is included, like 0x0051 ; ; parameters: ; rdi = number of tokens in the token table ; ------------------------------------------------------------------------------ assemble: ; TODO deal with src=imm and src=imm8 xor eax, eax mov [.tokens_processed], eax ; eax = number of tokens processed mov [.tokens_total], edi ; edi = total number of tokens in table .loop: call .flush_write_buffer call .get_next_tte push rdi ; di = tte call get_tte_type ; al = type pop rdi ; di = tte cmp al, 0x1 ; check if next tte is an operator je .operator ; if so, handle cmp al, 0x4 ; check if next tte is a label je .label jmp .unexpected_token ; otherwise, fail .label: push rsi mov rsi, .msg_label call print.debug pop rsi jmp .loop_next_token .operator: ; di = tte of operator call get_tte_typed_metadata ; al = tte typed metadata cmp al, UNRECOGNISED_ID_METADATA ; make sure token has metadata on record je .unexpected_token ; if not, fail and al, 11b ; mask for # operands cmp al, 0 ; check if operator has no operands je .operator_0 ; if so, handle case of no operands cmp al, 1 ; check if operator has one operand je .operator_1 ; if so, handle case of one operand cmp al, 2 ; check if operator has two operands je .operator_2 ; if so, handle case of two operands jmp .unexpected_token .operator_0: push rsi mov rsi, .msg_operator_0 call print.debug pop rsi ; di = tte of operator mov sil, 0b ; opcode call get_opcode ; al = opcode ; dl = 0x00 call .write_byte jmp .loop_next_token .operator_1: push rsi mov rsi, .msg_operator_1 call print.debug pop rsi ; di = tte of operator mov sil, 0b ; dst=r/m call get_opcode ; al = opcode ; dl = op flag push rdx call .write_byte pop rdx ; dl = op flag call .next_token jge .break call .get_next_tte push rdi and di, 0xFF00 cmp di, 0x1000 ; check if token is a memory address pop rdi ; di = next tte je .operator_1_memory push rdx ; di = next tte call get_tte_type ; al = type of token pop rdx ; dl = op flag cmp al, 0x02 ; type: register je .operator_1_register jmp .unexpected_token .operator_1_memory: push rsi mov rsi, .msg_operator_1_memory call print.debug pop rsi jmp .unsupported_memory_access .operator_1_register: push rsi mov rsi, .msg_operator_1_register call print.debug pop rsi ; di = token table entry call get_tte_typed_metadata ; al = register typed metadata and al, 11b ; al = register width cmp al, 00b ; 8 bit je .unexpected_token ; TODO handle 8 bit opcodes cmp al, 10b ; 32 bit je .operator_1_register_no_prefix ; default register length; no prefix cmp al, 01b ; 16 bit je .operator_1_register_16 cmp al, 11b ; 64 bit je .operator_1_register_64 .operator_1_register_16: mov al, 0x66 call .push_byte jmp .operator_1_register_no_prefix .operator_1_register_64: mov al, 0x48 call .push_byte jmp .operator_1_register_no_prefix .operator_1_register_no_prefix: mov si, di ; si = `R/M` tte and edx, 0xFF or edx, 0xFE00 ; pass di as direct value mov edi, edx ; di = op flag mov edx, 11b ; dl = mod bits call get_ModRM ; al = Mod R/M byte call .write_byte jmp .loop_next_token .operator_2: push rsi mov rsi, .msg_operator_2 call print.debug pop rsi mov cx, di ; cx = tte of operator call .next_token jge .break call .get_next_tte push rdi and di, 0xFF00 cmp di, 0x1000 ; check if token is a memory address pop rdi ; di = next tte je .operator_2_memory push rcx ; di = next tte call get_tte_type ; al = type of token pop rcx ; cx = tte of operator cmp al, 0x02 ; type: register je .operator_2_register jmp .unexpected_token .operator_2_memory: push rsi mov rsi, .msg_operator_2_memory call print.debug pop rsi cmp di, 0x1000 ; check if token is addressing a register jne .unsupported_memory_access ; if not, unsupported mov edi, ecx ; di = tte of operator xor esi, esi ; dst=r/m; src=r call get_opcode ; al = opcode ; dl = op flag call .write_byte call .next_token jge .break call .get_next_tte ; di = tte call get_tte_typed_metadata and al, 11b ; al = register width cmp al, 10b ; 32 bit je .operator_2_memory_32 cmp al, 11b ; 64 bit je .operator_2_memory_continue ; other cases: 16 bit, 8 bit both are not valid for addressing jmp .size_mismatch .operator_2_memory_32: mov al, 0x67 call .push_byte jmp .operator_2_memory_continue .operator_2_memory_continue: mov si, di ; si = dst register tte call .next_token jge .break call .get_next_tte push rdi and di, 0xFF00 cmp di, 0x1000 ; check if token is a memory address pop rdi ; di = next tte je .unsupported_memory_access ; if so, fail; no case of *],[* in asm ; di = next tte call get_tte_type ; al = type of token cmp al, 0x02 ; check if token is a register je .operator_2_memory_register ; if so, handle jmp .unexpected_token .operator_2_memory_register: push rsi mov rsi, .msg_operator_2_memory_register call print.debug pop rsi ; si = r/m ; di = src tte call get_tte_typed_metadata ; al = register typed metadata and al, 11b ; al = register width cmp al, 00b ; 8 bit je .unexpected_token ; TODO handle 8 bit opcodes cmp al, 01b ; 16 bit je .operator_2_memory_register_16 cmp al, 10b ; 32 bit je .operator_2_memory_register_continue ; default register length; no prefix cmp al, 11b ; 64 bit je .operator_2_memory_register_64 .operator_2_memory_register_16: mov al, 0x66 call .push_byte jmp .operator_2_memory_register_continue .operator_2_memory_register_64: mov al, 0x48 call .push_byte jmp .operator_2_memory_register_continue .operator_2_memory_register_continue: ; si = r/m; dst tte ; di = reg; src tte mov dl, 00b ; dl = mod bits call get_ModRM ; al = Mod R/M byte call .write_byte jmp .loop_next_token .operator_2_register: push rsi mov rsi, .msg_operator_2_register call print.debug pop rsi push rcx ; di = token table entry call get_tte_typed_metadata ; al = register typed metadata pop rcx ; cx = operator tte and al, 11b ; al = register width cmp al, 00b ; 8 bit je .operator_2_register_8 cmp al, 01b ; 16 bit je .operator_2_register_16 cmp al, 10b ; 32 bit je .operator_2_register_continue ; default register length; no prefix cmp al, 11b ; 64 bit je .operator_2_register_64 .operator_2_register_8: mov bl, 1b ; operator flag 8bit jmp .operator_2_register_continue .operator_2_register_16: xor ebx, ebx ; no operator flags mov al, 0x66 call .push_byte jmp .operator_2_register_continue .operator_2_register_64: xor ebx, ebx ; no operator flags mov al, 0x48 call .push_byte jmp .operator_2_register_continue .operator_2_register_continue: mov si, di ; si = dst tte call .next_token jge .break call .get_next_tte push rdi and di, 0xFF00 cmp di, 0x1000 ; check if token is a memory address pop rdi ; di = next tte je .operator_2_register_memory push rdi and di, 0xFF00 cmp di, 0x2000 ; check if token is a constant pop rdi ; di = next tte je .operator_2_register_const ; di = next tte call get_tte_type ; al = type of token cmp al, 0x02 ; check if token is a register je .operator_2_register_register ; if so, handle jmp .unexpected_token .operator_2_register_memory: push rsi mov rsi, .msg_operator_2_register_memory call print.debug pop rsi ; si = dst tte push rdi push rsi mov di, cx ; di = tte of operator mov sil, 1 ; dst = reg mov bl, 1 ; bl = operator flag byte call get_opcode ; al = opcode ; dl = op flag ; TODO do something if the op flag is present call .write_byte pop rsi ; si = tte pop rdi ; di = tte cmp di, 0x1000 ; check if token is addressing to a register jne .unsupported_memory_access ; if not, unsupported call .next_token jge .break call .get_next_tte ; di = tte call get_tte_typed_metadata ; al = register typed metadata and al, 11b ; al = register width cmp al, 10b ; 32 bit je .operator_2_register_memory_32 cmp al, 11b ; 64 bit je .operator_2_register_memory_continue ; default addr length; no prefix ; other cases: 16 bit, 8 bit both are not valid for addressing jmp .size_mismatch .operator_2_register_memory_32: mov al, 0x67 call .push_byte jmp .operator_2_register_memory_continue .operator_2_register_memory_continue: ; si = reg; dst tte ; di = r/m; src tte push rsi mov esi, edi ; si = reg; src tte pop rdi ; di = r/m; dst tte mov edx, 00b ; dl = mod bits call get_ModRM ; al = Mod R/M byte call .write_byte jmp .loop_next_token .operator_2_register_register: push rsi mov rsi, .msg_operator_2_register_register call print.debug pop rsi ; si = dst tte push rdi push rsi mov di, cx ; di = tte of operator mov sil, 1 ; dst = reg ; bl = operator flag byte call get_opcode ; al = opcode ; dl = op flag ; TODO do something if the op flag is present call .write_byte pop rsi ; si = tte pop rdi ; di = tte ; di = tte call get_tte_typed_metadata ; al = register typed metadata and al, 11b ; al = register width cmp al, 00b ; 8 bit je .unexpected_token ; TODO handle 8 bit opcodes cmp al, 01b ; 16 bit je .operator_2_register_register_16 cmp al, 10b ; 32 bit je .operator_2_register_register_32 cmp al, 11b ; 64 bit je .operator_2_register_register_64 .operator_2_register_register_16: ; 16 bit push rdi push rsi mov edi, .buffer_end - .buffer ; length of buffer mov rsi, .buffer ; buffer location mov dl, 0x66 call elemb pop rsi ; si = src tte pop rdi ; di = dst tte cmp al, 1 jne .size_mismatch ; 64 bit push rdi push rsi mov edi, .buffer_end - .buffer ; length of buffer mov rsi, .buffer ; buffer location mov dl, 0x48 call elemb pop rsi ; si = src tte pop rdi ; di = dst tte cmp al, 0 jne .size_mismatch jmp .operator_2_register_register_continue .operator_2_register_register_32: ; 16 bit push rdi push rsi mov edi, .buffer_end - .buffer ; length of buffer mov rsi, .buffer ; buffer location mov dl, 0x66 call elemb pop rsi ; si = src tte pop rdi ; di = dst tte cmp al, 0 jne .size_mismatch ; 64 bit push rdi push rsi mov edi, .buffer_end - .buffer ; length of buffer mov rsi, .buffer ; buffer location mov dl, 0x48 call elemb pop rsi ; si = src tte pop rdi ; di = dst tte cmp al, 0 jne .size_mismatch jmp .operator_2_register_register_continue .operator_2_register_register_64: ; 16 bit push rdi push rsi mov edi, .buffer_end - .buffer ; length of buffer mov rsi, .buffer ; buffer location mov dl, 0x66 call elemb pop rsi ; si = src tte pop rdi ; di = dst tte cmp al, 0 jne .size_mismatch ; 64 bit push rdi push rsi mov edi, .buffer_end - .buffer ; length of buffer mov rsi, .buffer ; buffer location mov dl, 0x48 call elemb pop rsi ; si = src tte pop rdi ; di = dst tte cmp al, 1 jne .size_mismatch jmp .operator_2_register_register_continue .operator_2_register_register_continue: push rsi mov esi, edi ; si = reg; src tte pop rdi ; di = r/m; dst tte mov edx, 11b ; dl = mod bits call get_ModRM ; al = Mod R/M byte call .write_byte jmp .loop_next_token .operator_2_register_const: push rsi mov rsi, .msg_operator_2_register_const call print.debug pop rsi ; si = dst tte push rdi push rsi mov di, cx ; di = tte of operator mov sil, 2 ; dst=r/m,src=imm ; bl = operator flag byte ; TODO change sil based on whether bl is 8 bit or not push rbx and ebx, 1 cmp bl, 1 ; bit8 flag pop rbx je .operator_2_register_const_get_opcode_8 jmp .operator_2_register_const_get_opcode_continue .operator_2_register_const_get_opcode_8: mov sil, 3 ; dst=r/m,src=imm8 .operator_2_register_const_get_opcode_continue: call get_opcode ; al = opcode ; dl = op flag ; TODO do something if the op flag is present call .write_byte pop rsi ; si = tte call .next_token jge .break push rbx and ebx, 1 cmp bl, 1 ; bit8 flag pop rbx je .operator_2_register_const_8 push rdi push rsi mov edi, .buffer_end - .buffer ; length of buffer mov rsi, .buffer ; buffer location mov dl, 0x48 call elemb pop rsi pop rdi cmp al, 1 je .operator_2_register_const_64 push rdi push rsi mov edi, .buffer_end - .buffer ; length of buffer mov rsi, .buffer ; buffer location mov dl, 0x66 call elemb pop rsi pop rdi cmp al, 1 je .operator_2_register_const_16 jmp .operator_2_register_const_32 .operator_2_register_const_8: mov ecx, [.tokens_processed] mov al, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next byte from the tt call .write_byte ; and add it to the buffer jmp .operator_2_register_const_continue .operator_2_register_const_16: mov ecx, [.tokens_processed] mov ax, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next 2 bytes from the tt mov ecx, [.buffer_pointer] mov [rcx], ax ; and add them to the buffer add ecx, 2 mov [.buffer_pointer], ecx jmp .operator_2_register_const_continue .operator_2_register_const_32: mov ecx, [.tokens_processed] mov eax, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next 4 bytes from the tt mov ecx, [.buffer_pointer] mov [rcx], eax ; and add them to the buffer add ecx, 4 mov [.buffer_pointer], ecx jmp .operator_2_register_const_continue .operator_2_register_const_64: mov ecx, [.tokens_processed] mov rax, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next 8 bytes from the tt mov ecx, [.buffer_pointer] mov [rcx], rax ; and add them to the buffer add ecx, 8 mov [.buffer_pointer], ecx jmp .operator_2_register_const_continue .operator_2_register_const_continue: ; skip the next 4 tokens (8 bytes) as prescribed by 0x2000 call .next_token jge .break call .next_token jge .break call .next_token jge .break jmp .loop_next_token .loop_next_token: call .next_token jge .break jmp .loop .break: call .flush_write_buffer push rsi mov rsi, .msg_break call print.debug pop rsi ret .unexpected_token: push rsi mov rsi, .msg_unexpected_token call print.error pop rsi jmp halt .unsupported_memory_access: push rsi mov rsi, .msg_unsupported_memory_access call print.error pop rsi jmp halt .size_mismatch: push rsi mov rsi, .msg_size_mismatch call print.error pop rsi jmp halt ; procedures ; add the line `jge .break` after call site .next_token: mov eax, [.tokens_processed] mov edi, [.tokens_total] inc eax mov [.tokens_processed], eax cmp eax, edi ret ; eax = current entry index in token table ; returns di = next tte .get_next_tte: xor edi, edi mov di, [eax * 2 + TOKEN_TABLE_ADDR] ret .tokens_processed dd 0 .tokens_total dd 0 ; al = byte to write .output_byte: push rdx mov edx, [.next_output_byte] ; get output byte's address mov [edx], al ; write byte to that address inc edx ; increment address mov [.next_output_byte], edx ; put output byte's address pop rdx ret .next_output_byte dd OUTPUT_ADDR ; next empty byte in output ; TODO get rid of this sketchy bit of state ; al = byte to push .push_byte: push rcx mov ecx, [.buffer_pointer] push rcx push rax mov ecx, .buffer_end mov [.buffer_pointer], ecx .push_byte_loop: dec ecx cmp ecx, .buffer jl .push_byte_break mov al, [ecx] mov [ecx + 1], al jmp .push_byte_loop .push_byte_break: pop rax ; al = byte to push mov [.buffer], al ; write desired byte to front of buffer pop rcx ; ecx = old buffer pointer inc ecx ; ecx = pointer to next empty in buffer mov [.buffer_pointer], ecx ; record write .buffer_pointer pop rcx ret ; al = byte to write .write_byte: push rdx mov edx, [.buffer_pointer] mov [edx], al inc edx mov [.buffer_pointer], edx pop rdx ret .flush_write_buffer: push rcx push rax mov ecx, .buffer dec ecx .flush_write_buffer_loop: inc ecx cmp ecx, [.buffer_pointer] jge .flush_write_buffer_break mov al, [ecx] call .output_byte mov byte [ecx], 0x00 jmp .flush_write_buffer_loop .flush_write_buffer_break: mov dword [.buffer_pointer], .buffer pop rax pop rcx ret .buffer dq 0, 0 ; octo word of space for max of 8 bytes per write .buffer_end: .buffer_pointer dd .buffer ; points to current byte in buffer .msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00 .msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00 .msg_size_mismatch db "size mismatch, aborting", 0x0A, 0x00 .msg_break db "break", 0x0A, 0x00 .msg_label db "label", 0x0A, 0x00 .msg_operator_0 db "operator_0", 0x0A, 0x00 .msg_operator_1 db "operator_1", 0x0A, 0x00 .msg_operator_1_memory db "operator_1_memory", 0x0A, 0x00 .msg_operator_1_register db "operator_1_register", 0x0A, 0x00 .msg_operator_2 db "operator_2", 0x0A, 0x00 .msg_operator_2_memory db "operator_2_memory", 0x0A, 0x00 .msg_operator_2_memory_register db "operator_2_memory_register", 0x0A, 0x00 .msg_operator_2_register db "operator_2_register", 0x0A, 0x00 .msg_operator_2_register_memory db "operator_2_register_memory", 0x0A, 0x00 .msg_operator_2_register_register db "operator_2_register_register", 0x0A, 0x00 .msg_operator_2_register_const db "operator_2_register_const", 0x0A, 0x00 .msg_potential_label db "potential_label", 0x0A, 0x00 ; ------------------------------------------------------------------------------ ; get_tte_type ; ; description: ; given a token table entry, returns the declared type in `tokens.by_id`. If ; there is no entry, returns UNRECOGNISED_ID_TYPE ; ; +-----+-----------------+ ; | hex | meaning | ; +-----+-----------------+ ; | 0x0 | ignored | ; | 0x1 | operator | ; | 0x2 | register | ; | 0x3 | pseudo-operator | ; | 0x4 | label | ; | 0xF | unknown | ; +-----+-----------------+ ; ; parameters: ; di = token table entry ; ; returned: ; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are ; zeroed; the rest of rax is zeroed. ; ------------------------------------------------------------------------------ get_tte_type: and edi, 0xFFFF ; di = token table entry xor eax, eax ; eax = tokens.by_id index .loop: cmp eax, (tokens.by_id_end - tokens.by_id) / 4 ; index range check jg .not_found mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id cmp cx, di je .found inc eax jmp .loop .not_found: shr edi, 12 cmp edi, 0x3 je .label mov eax, UNRECOGNISED_ID_TYPE and eax, 0xF ret .label: mov eax, 0x4 ret .found: mov al, [2 + tokens.by_id + eax * 4] and eax, 0xF ; mask as expected ret ; ------------------------------------------------------------------------------ ; get_tte_typed_metadata ; ; description: ; given a token table entry, returns the declared typed metadata in ; `tokens.by_id`. If there is no entry, returns UNRECOGNISED_ID_METADATA ; ; parameters: ; di = token table entry ; ; returned: ; al = typed metadata of token, or UNRECOGNISED_ID_METADATA; the rest of rax is ; zeroed. ; ------------------------------------------------------------------------------ get_tte_typed_metadata: and edi, 0xFFFF ; di = token table entry xor eax, eax ; eax = tokens.by_id index .loop: cmp eax, (tokens.by_id_end - tokens.by_id) / 4 ; index range check jg .not_found mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id cmp cx, di je .found inc eax jmp .loop .not_found: mov eax, UNRECOGNISED_ID_METADATA ret .found: mov al, [3 + tokens.by_id + eax * 4] and eax, 0xFF ret ; ------------------------------------------------------------------------------ ; get_ModRM ; ; description: ; given 2 register tokens and the mod bits, returns the ModR/M byte ; ; parameters: ; di = token table entry `reg`. 0xFEXX passes low 3 bytes as op flag ; si = token table entry `R/M` ; dl = lower 2 bits: mod bits. The rest is ignored ; ; returned: ; al = ModR/M byte; the rest of rax is zeroed ; ------------------------------------------------------------------------------ get_ModRM: push rbx and edi, 0xFFFF ; di = token table entry `reg` and esi, 0xFFFF ; si = token table entry `R/M` and edx, 11b ; dl = mod bits shl edx, 6 ; and position push rdi shr edi, 8 cmp dil, 0xFE pop rdi je .pass_di_as_op_flag ; di = tte call get_reg_bits ; al = reg bits mov bl, al ; bl = reg bits jmp .continue .pass_di_as_op_flag: mov ebx, edi ; bl = op flag and ebx, 111b .continue: shl ebx, 3 mov edi, esi ; do the other one ; di = tte call get_reg_bits ; al = reg bits mov ecx, eax ; cl = r/m bits xor eax, eax or eax, edx ; mod bits or eax, ebx ; reg bits or eax, ecx ; R/M bits and eax, 0xFF ; mask for byte pop rbx ret ; ------------------------------------------------------------------------------ ; get_opcode ; ; description: ; given an operator token, returns its opcode. For operators with multiple ; opcodes, the variant can be specified. ; ; parameters: ; di = token table entry ; sil = offset within opcode entry. 0 is the first opcode, 1 the second, and so ; on ; bl = flag byte ; ; returned: ; al = opcode; the rest of rax is zeroed. ; dl = lower 3 bits: op flag, if applicable. The rest of rdx is zeroed. ; ------------------------------------------------------------------------------ get_opcode: and edi, 0xFFFF ; di = token table entry and ebx, 0xFF ; bl = flag byte add esi, 2 and esi, 111b ; offset within opcode entry sub esi, 2 ; between 0 and 5 xor eax, eax ; eax = opcodes.by_id index .loop: cmp eax, (opcodes.by_id_end - opcodes.by_id) / 16 ; make sure it's still in range jg .not_found shl eax, 4 mov cx, [opcodes.by_id + eax] ; next entry in opcodes.by_id shr eax, 4 cmp cx, di je .maybe_found inc eax jmp .loop .maybe_found: shl eax, 4 mov cl, [opcodes.by_id + 11 + eax] shr eax, 4 cmp cl, bl je .found inc eax jmp .loop .not_found: xor eax, eax mov eax, UNRECOGNISED_ID_OPCODE ret .found: shl eax, 4 push rsi shr esi, 1 mov dl, [esi + 8 + opcodes.by_id + eax] pop rsi test esi, 1 ; check if offset is odd jz .found_continue shr edx, 4 ; if so, upper part of dl byte .found_continue: mov al, [esi + 2 + opcodes.by_id + eax] and eax, 0xFF and edx, 0x0F ret ; ------------------------------------------------------------------------------ ; get_reg_bits ; ; description: ; given a register token, returns its reg bits metadata ; ; parameters: ; di = token table entry ; ; returned: ; al = register token; the rest of rax, including the upper 5 bits of al, are ; zeroed. ; ------------------------------------------------------------------------------ get_reg_bits: ; di = tte call get_tte_typed_metadata ; al = typed metadata shr eax, 2 ; discard type data and eax, 111b ; mask ret ; ------------------------------------------------------------------------------ ; tokenising ; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------ ; tokenise ; TODO write tests ; ; description: ; represents the program at the given address and puts it in the token table ; it's probably desirable to clear the token table before calling this function. ; ; parameters: ; rdi -> first byte of program ; rsi = size of program in bytes ; ; returned: ; rax = number of tokens processed ; ------------------------------------------------------------------------------ tokenise: ; rdi -> current byte of program add rsi, rdi ; rsi -> last byte of program xor eax, eax ; rax = number of tokens processed xor edx, edx ; dl = current byte of program .loop: cmp rdi, rsi ; if current byte greater than last byte jge .break ; then break mov dl, [rdi] ; dl = current byte cmp dl, ";" ; if current byte is the start of a comment je .comment ; then handle the comment cmp dl, 0x0A ; if current byte is the end of a line je .newline_mk_flags ; then reset relevant flags cmp dl, "," ; if current byte is a comma je .comma ; then handle the comma push rsi push rdi push rax push rdx mov rsi, whitespace_2 ; rsi -> list of whitespace bytes mov rdi, 2 ; rdi = size of the list in bytes ; dl = current byte call elemb ; al = 0 if not whitespace, 1 if whitespace test eax, 1 ; check if current byte is whitespace pop rdx ; dl = current byte pop rax ; rax = number of tokens processed pop rdi ; rdi -> current byte of program pop rsi ; rsi -> last byte of program jnz .skip_byte_whitespace test byte [.expecting], E_LABEL ; check if a label is expected jnz .label ; if so, handle it test byte [.expecting], E_OPERATOR ; else, check if an operator is expected jnz .operator ; if so, handle it jmp .operand ; else, handle as an operand .comment: push rsi mov rsi, .found call print.debug mov rsi, .msg_comment call print pop rsi ; rsi -> last byte of program test byte [.expecting], E_COMMENT ; make sure a comment is expected jz .unexpected_comment ; if not, error .comment_loop: mov dl, [rdi] ; dl = current byte cmp dl, 0x0A ; if current byte is a newline je .comment_break ; then break inc rdi ; point to next unread byte cmp rdi, rsi jge .break jmp .comment_loop .comment_break: jmp .loop .skip_byte_whitespace: test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected jz .unexpected_whitespace ; if not, error inc rdi jmp .loop ; else, loop .comma: ; found comma push rsi mov rsi, .found call print.debug mov rsi, .msg_comma call print pop rsi test byte [.expecting], E_COMMA ; make sure a comma was expected jz .unexpected_comma ; if not, error inc rdi mov byte [.expecting], E_WHITESPACE | E_OPERAND ; else, make operand expected jmp .loop ; and loop .newline_mk_flags: push rsi mov rsi, .found call print.debug mov rsi, .msg_newline call print pop rsi test byte [.expecting], E_NEWLINE ; make sure a newline was expected jz .unexpected_newline ; if not, error mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL inc rdi jmp .loop .label: push rax xor eax, eax ; rax = number of bytes in label .label_loop: mov dl, [rdi + rax] ; next byte cmp dl, ":" je .label_break cmp dl, " " je .label_not_found cmp dl, 0x0A je .label_not_found cmp dl, 0x00 je .label_not_found cmp dl, ";" je .label_not_found inc eax ; inc byte counter cmp rdi, rsi jge .break jmp .label_loop .label_break: push rsi mov rsi, .found call print.debug mov rsi, .msg_label call print pop rsi ; rsi -> last byte of program push rax push rdi push rsi mov rsi, rdi ; rsi -> start of string mov rdi, rax ; rdi = size of string call djb2 ; rax = hash mov rdi, rax ; rdi = hash call add_label_hash ; rax = index on label table mov cx, ax and cx, 0x0FFF or cx, 0x3000 pop rsi ; rsi -> last byte of program pop rdi ; rdi -> current byte of program pop rax ; rax = number of bytes in label add rdi, rax ; move on to next byte inc rdi ; move past the colon pop rax ; rax = number of tokens processed mov [TOKEN_TABLE_ADDR + rax * 2], cx inc rax ; the next token mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE jmp .loop .label_not_found: pop rax ; rax = number of tokens processed mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR jmp .loop .operator: mov rcx, rax ; rcx = number of tokens processed xor eax, eax ; eax = number of bytes in operator mov [.pending_operator], eax ; zero pending operator .operator_loop: ; TODO give this its own error mov dl, [rdi] ; next byte ; TODO have better check for operator end cmp dl, " " je .operator_break cmp dl, 0x0A je .operator_break cmp dl, 0x00 je .operator_break cmp dl, ";" je .operator_break mov [.pending_operator + eax], dl inc eax ; inc byte counter inc rdi ; inc byte pointer cmp eax, 4 ; check that operator is short enough jg .unexpected_operator ; if not, error cmp rdi, rsi jge .break jmp .operator_loop ; and loop .operator_break: push rdi mov edi, [.pending_operator] ; edi = operator to be searched call identify_operator ; ax = operator's token ID push rcx mov ecx, eax ; cx = operator's token ID mov edi, eax ; di = operator's token ID call get_tte_type ; al = token type mov sil, al pop rax ; rax = tokens processed pop rdi ; rdi = byte counter cmp sil, 0x3 ; pseudo-operator je .pseudo_operator cmp sil, 0x1 ; operator jne .unexpected_operator ; debug message push rsi mov rsi, .found call print.debug mov rsi, .msg_operator call print pop rsi mov [TOKEN_TABLE_ADDR + rax * 2], cx inc rax ; plus 1 token processed mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND jmp .loop .pseudo_operator: ; debug message push rsi mov rsi, .found call print.debug mov rsi, .msg_pseudo_operator call print pop rsi mov [TOKEN_TABLE_ADDR + rax * 2], cx mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND jmp .loop .operand: ; debug message push rsi mov rsi, .found call print.debug mov rsi, .msg_operand call print pop rsi test byte [.expecting], E_OPERAND ; make sure an operand was expected jz .unexpected_operand ; if not, error push rax push rdi xor eax, eax ; rax = length of operand .operand_loop: mov dl, [rdi] cmp dl, "," je .operand_break cmp dl, 0x0A je .operand_break cmp dl, 0x00 je .operand_break cmp dl, ";" je .operand_break inc rax ; inc length counter inc rdi ; inc byte pointer jmp .operand_loop .operand_break: pop rdi ; rdi -> first byte of operand push rdi push rsi mov rsi, rax ; rsi = length of operand in bytes mov cx, ax ; cx = length counter for safe keeping push rcx call evaluate_operand ; dl = return code ; rax = binary data pop rcx pop rsi pop rdi ; rdi = first byte of operand add di, cx ; rdi = last byte of operand mov rcx, rax ; rcx = evaluate_operand's binary return data pop rax ; rax = number of tokens processed ; operand is some reg cmp dl, 0x00 ; cx = token ID je .operand_register ; operand is some [reg] cmp dl, 0x10 ; cx = token ID je .operand_addr_register ; operand is some constant cmp dl, 0x20 ; rcx = constant value je .operand_constant ; operand is some label cmp dl, 0x30 ; rcx = index of label in LT je .operand_label jmp .unexpected_operand .operand_register: mov [TOKEN_TABLE_ADDR + rax * 2], cx inc rax ; another token processed jmp .operand_break_continue .operand_addr_register: mov word [TOKEN_TABLE_ADDR + rax * 2], 0x1000 inc rax ; 0x1000: addr reg token, next token is the register mov [TOKEN_TABLE_ADDR + rax * 2], cx inc rax ; the register as returned by evaluate_operand jmp .operand_break_continue .operand_constant: mov word [TOKEN_TABLE_ADDR + rax * 2], 0x2000 inc rax ; another token processed mov [TOKEN_TABLE_ADDR + rax * 2], rcx add rax, 4 jmp .operand_break_continue .operand_label: and cx, 0x0FFF or cx, 0x3000 mov [TOKEN_TABLE_ADDR + rax * 2], cx inc rax jmp .operand_break_continue .operand_break_continue: mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA jmp .loop .break: ret ; state .expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL .unexpected_whitespace: push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_whitespace call print pop rsi jmp halt .unexpected_comment: push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_comment call print pop rsi jmp halt .unexpected_newline: push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_newline call print pop rsi jmp halt .unexpected_comma: push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_comma call print pop rsi jmp halt .unexpected_operand: push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_operand call print pop rsi jmp halt .unexpected_operator: push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_operator call print pop rsi jmp halt .err_unexpected db "unexpected ", 0x00 .found db "found ", 0x00 .msg_whitespace db "whitespace.", 0x0A, 0x00 .msg_comment db "comment.", 0x0A, 0x00 .msg_newline db "newline.", 0x0A, 0x00 .msg_comma db "comma.", 0x0A, 0x00 .msg_label db "label.", 0x0A, 0x00 .msg_operator db "operator.", 0x0A, 0x00 .msg_operand db "operand.", 0x0A, 0x00 .msg_pseudo_operator db "pseudo_operator.", 0x0A, 0x00 .pending_operator dd 0 ; the operator token that is pending processing ; ------------------------------------------------------------------------------ ; evaluate_operand ; ; description: ; takes the location and length of an operand and evaluates it into binary data ; and a return code to interpret the binary data. ; ; | code | rax contents | notes | ; |------|----------------------|-------| ; | 0x00 | token ID of register | reg | ; | 0x10 | token ID of register | [reg] | ; | 0x20 | constant value | const | ; | 0x30 | index of label in LT | label | ; | 0xFF | - | error | ; ; parameters: ; rdi -> first byte of operand ; rsi = size of operand in bytes ; ; returned: ; rax = binary data corresponding to the operand ; dl = return code ; ------------------------------------------------------------------------------ evaluate_operand: push rdi ; rdi -> start of operand ; rsi = size of operand call trim_trailing_whitespace ; rax = new size of operand pop rdi ; rdi -> first byte of operand mov rsi, rax ; rsi = size of operand w/o trailing whitespace cmp rsi, 0 ; case: 0 length je .unrecognised ; unrecognised cmp byte [rdi], '[' ; case: memory addressing je .address jmp .register ; otherwise: register (or constant, or label) .address: cmp byte [rdi + rsi - 1], ']' ; check if address is closed correctly jne .unrecognised ; if not, fail inc rdi ; rdi -> enclosed operand sub rsi, 2 ; rsi = length of enclosed operand call evaluate_operand ; rax = binary data ; dl = return code cmp dl, 0x10 ; make sure return code isn't another memory reference je .unrecognised ; if it is, fail shr edx, 4 or dl, 0x10 ; address return ret .register: cmp rsi, 4 jg .constant ; not a register: too long. Maybe constant? push rdi mov edi, [rdi] ; edi = register to be searched ; TODO figure out how to mask elegantly :/ ; mask edi for lower rsi bits cmp rsi, 4 je .register4 cmp rsi, 3 je .register3 cmp rsi, 2 je .register2 cmp rsi, 1 je .register1 .register1: and edi, 0xFF .register2: and edi, 0xFFFF .register3: and edi, 0xFFFFFF .register4: call identify_register ; ax = register's token ID or UNRECOGNISED_TOKEN_ID pop rdi ; rdi -> first byte of operand cmp ax, UNRECOGNISED_TOKEN_ID ; if not a register, constant? je .constant mov dl, 0x00 ret .constant: push rdi push rsi ; rdi -> first byte of constant ; rsi = size of constant in bytes call evaluate_constant ; dl = type of constant ; rax = hex value of constant pop rdi ; rdi = size of label in bytes pop rsi ; rsi -> first byte of label cmp dl, 0xFF je .label ; rax = hex value of constant mov dl, 0x20 ret .label: ; rdi = size of label in bytes ; rsi -> first byte of label call djb2 ; rax = hash mov rdi, rax ; rdi = hash call add_label_hash ; rax = index in LT of label mov dl, 0x30 ret .unrecognised: xor eax, eax mov dl, 0xFF ret ; ------------------------------------------------------------------------------ ; evaluate_constant ; ; description: ; takes a constant and returns its hexidecimal representation. Currently the ; following constants are supported: ; ; | type | p. | description | ; |------|----|--------------| ; | 0x00 | 0x | hexidecimal | ; | 0x01 | 0q | octal | ; | 0x02 | 0b | binary | ; | 0x03 | " | char | ; | 0xFF | | unrecognised | ; ; where `p.` is the prefix or otherwise indicator ; ; parameters: ; rdi -> first byte of constant ; rsi = size of constant in bytes ; ; returned: ; rax = value of the constant in hexidecimal ; dl = type of constant; the rest of rdx is zeroed ; ------------------------------------------------------------------------------ evaluate_constant: ; rsi = number of bytes left ; rdi -> current byte of constant xor eax, eax ; rax = value of constant ; each case pushes the return value of dl into `rcx`, which is popped into dl ; to return mov dl, [rdi] dec rsi ; one fewer byte left inc rdi ; point to next byte ; all numeric prefixes further handled in .numeric cmp dl, '0' je .numeric ; chr case mov rcx, 0x03 push rcx xor ecx, ecx ; rcx = number of times right-rolled cmp dl, '"' je .chr pop rcx push rcx ; waste value; .unrecognise expects something on the stack jmp .unrecognised .numeric: mov dl, [rdi] dec rsi ; one fewer byte left inc rdi ; point to next byte ; hex case mov rcx, 0x00 push rcx cmp dl, 'x' je .hex_loop pop rcx ; octal case mov rcx, 0x01 push rcx cmp dl, 'q' je .oct_loop pop rcx ; binary case mov rcx, 0x02 push rcx cmp dl, 'b' je .bin_loop pop rcx jmp .unrecognised .hex_loop: cmp rsi, 0 ; make sure we're in range je .break ; if not, break shl rax, 4 ; make room for next hex digit mov dl, [rdi] ; dl = next byte of constant sub dl, '0' ; dl = if digit: digit; else :shrug: cmp dl, 9 ; if !digit: jg .hex_alpha ; letter jmp .hex_continue ; else loop .hex_alpha: sub dl, 7 ; map [('A'-'0')..('F'-'0')] to [0xA..0xF] cmp dl, 0xF ; if not in the range [0xA..0xF] jg .unrecognised ; then unrecognised .hex_continue: and dl, 0x0F ; mask or al, dl ; and add newest nibble dec rsi ; one fewer byte left inc rdi ; point to next byte jmp .hex_loop ; and loop .oct_loop: cmp rsi, 0 ; make sure we're in range je .break ; if not, break shl rax, 3 ; make room for next octal digit mov dl, [rdi] ; dl = next byte of constant sub dl, '0' cmp dl, 7 jg .unrecognised and dl, 7 ; mask or al, dl ; and add newest 3-bit group dec rsi ; one fewer byte left inc rdi ; point to next byte jmp .oct_loop ; and loop .bin_loop: cmp rsi, 0 ; range check je .break shl rax, 1 mov dl, [rdi] sub dl, '0' cmp dl, 1 jg .unrecognised and dl, 1 ; mask or al, dl ; and newest bit dec rsi inc rdi jmp .bin_loop .chr: cmp rcx, 4 ; ensure char is only 4 bytes long jg .unrecognised cmp rsi, 1 ; range check je .chr_break ror rax, 8 inc rcx mov dl, [rdi] ; bound check byte as printable char cmp dl, 0x20 jl .unrecognised cmp dl, 0x7E jg .unrecognised or al, dl dec rsi inc rdi jmp .chr .chr_break: cmp rcx, 1 ; for each [1..rcx] jle .chr_break_for_good rol rax, 8 ; roll left to make up for the roll right earlier dec rcx jmp .chr_break .chr_break_for_good: mov dl, [rdi] ; make sure the chr is closed cmp dl, '"' jne .unrecognised jmp .break .break: pop rdx ret .unrecognised: pop rdx mov edx, 0xFF ; unrecognised type ret .msg db "evaluate_constant", 0x0A, 0x00 ; ------------------------------------------------------------------------------ ; identify_register ; ; description: ; takes a register in ascii-encoded text and returns its token ID or ; UNRECOGNISED_TOKEN_ID if not recognised ; ; parameters: ; edi = register to be searched ; ; returned: ; ax = register's token ID or UNRECOGNISED_TOKEN_ID ; ------------------------------------------------------------------------------ identify_register: xor eax, eax ; tokens.registers + eax -> entry in tokens.registers .loop: cmp eax, (tokens.registers_end - tokens.registers) jge .not_found cmp edi, [tokens.registers + eax] je .found add eax, 6 jmp .loop .found: mov ax, [tokens.registers + eax + 4] ret .not_found: mov ax, UNRECOGNISED_TOKEN_ID ret ; ------------------------------------------------------------------------------ ; identify_operator ; ; description: ; takes an operator in ascii-encoded text and returns its token ID or ; UNRECOGNISED_TOKEN_ID if not recognised ; ; parameters: ; edi = operator to be searched ; ; returned: ; ax = operator's token ID or UNRECOGNISED_TOKEN_ID ; ------------------------------------------------------------------------------ identify_operator: xor eax, eax ; tokens.operators + eax -> entry in tokens.operators .loop: cmp eax, (tokens.operators_end - tokens.operators) jge .not_found cmp edi, [tokens.operators + eax] je .found add eax, 6 jmp .loop .found: mov ax, [tokens.operators + eax + 4] ret .not_found: mov ax, UNRECOGNISED_TOKEN_ID ret ; ------------------------------------------------------------------------------ ; utilities ; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------ ; print ; ; description: ; prints a null-terminated string ; probably doesn't change any registers for ease of debugging ; ; parameters: ; rsi -> start of null-terminated string ; ------------------------------------------------------------------------------ print: push rdx push rax push rsi mov edx, 0x3F8 .loop: mov al, [rsi] cmp al, 0x00 je .done out dx, al inc rsi jmp .loop .done: pop rsi pop rax pop rdx ret .debug: push rsi mov rsi, .debug_msg call print pop rsi jmp print ; tail call .error: push rsi mov rsi, .error_msg call print pop rsi jmp print ; tail call .test: push rsi mov rsi, .test_msg call print pop rsi jmp print ; tail call .warn: push rsi mov rsi, .warn_msg call print pop rsi jmp print ; tail call .debug_msg db 0x1B, "[36m", "[DEBUG]: ", 0x1B, "[0m", 0x00 .error_msg db 0x1B, "[1;31m", "[ERROR]: ", 0x1B, "[0m", 0x00 .test_msg db 0x1B, "[1;33m", "[TEST]: ", 0x1B, "[0m", 0x00 .warn_msg db 0x1B, "[1;35m", "[WARN]: ", 0x1B, "[0m", 0x00 ; ------------------------------------------------------------------------------ ; halt ; ; description: ; halts the program, silly :) ; ------------------------------------------------------------------------------ halt: push rsi mov rsi, msg_halt call print pop rsi hlt jmp halt ; ------------------------------------------------------------------------------ ; elemb ; ; description: ; checks if given byte is element of the specified list. ; ; parameters: ; rdi = size of list ; rsi -> start of list ; dl = given byte ; ; returned: ; rax = 0: is not an element ; 1: is an element ; ------------------------------------------------------------------------------ elemb: .loop: cmp rdi, 0 ; check if remaining length 0 je .not_found ; if so, break; dl not an element of list mov al, [rsi] cmp al, dl ; check if current byte in list is the desired byte je .found ; if so, break; dl an element of list inc rsi ; move to next byte dec rdi ; and reduce remaining length jmp .loop .not_found: xor eax, eax ; return 0; dl not an element of list ret .found: xor eax, eax mov rax, 1 ; return 1; dl an element of list ret ; ------------------------------------------------------------------------------ ; djb2 ; ; description: ; gets the 64-bit djb2 hash of a given string ; ; parameters: ; rdi = size of string ; rsi -> start of string ; ; returned: ; rax = hash ; ------------------------------------------------------------------------------ djb2: xor ecx, ecx ; rcx = index mov rax, 5381 ; rax = hash .loop: cmp rcx, rdi jge .break mov rdx, rax shl rax, 5 add rax, rdx xor edx, edx mov dl, [rsi + rcx] ; dl = current byte add rax, rdx inc rcx jmp .loop .break: ret ; ------------------------------------------------------------------------------ ; trim_trailing_whitespace ; ; description: ; trims whitespace from the start and end of the given byte array. ; ; parameters: ; rdi -> start of list ; rsi = size of list ; ; returned: ; rax = new size of list ; ------------------------------------------------------------------------------ trim_trailing_whitespace: test rsi, rsi ; list of length zero jz .done ; already trimmed push rsi push rdi mov dl, [rdi + rsi - 1] ; last element of given list mov rsi, whitespace_2 ; pointer of whitespace list mov edi, 2 ; length of whitespace list call elemb pop rdi ; rdi -> start of list pop rsi ; rsi = size of list test eax, eax ; if last element whitespace jz .done ; then break .trim: ; otherwise one shorter dec rsi call trim_trailing_whitespace ret .done: mov rax, rsi ret ; ------------------------------------------------------------------------------ ; add_label_hash ; ; description: ; adds a label hash to the label table, or just finds it if already present ; ; parameters ; rdi = 64-bit hash to be added ; ; returned ; rax = index in label table ; ------------------------------------------------------------------------------ add_label_hash: xor eax, eax .loop: cmp rax, LABEL_TABLE_SIZE jge .break mov rcx, [LABEL_TABLE_ADDR + rax] ; TODO bug if there's an empty slot before the entry, it won't be found cmp rcx, 0 ; empty slot je .break cmp rcx, rdi ; already present je .break add rax, 16 jmp .loop .break: mov [LABEL_TABLE_ADDR + rax], rdi shr rax, 4 ; rax / 16 ; rax = index ret ; ------------------------------------------------------------------------------ ; add_label_address ; ; description: ; adds a label's address to the label table ; ; parameters ; rdi = lower 3 bytes: index of label table to add the address to ; rsi = 64-bit address to be added, relative to start of program ; ; returned ; rax = return value: 0 = success ; 1 = failure: label already in the table ; ------------------------------------------------------------------------------ add_label_address: and edi, 0xFFF shl rdi, 4 ; rdi * 16 mov rax, [LABEL_TABLE_ADDR + rdi] cmp rax, 0 jne .ret_1 mov [LABEL_TABLE_ADDR + 16 + rdi], rsi xor eax, eax ret .ret_1: mov eax, 1 ret ; ------------------------------------------------------------------------------ ; clear_token_table ; ; description: ; clears the token table as specified by TOKEN_TABLE_SIZE and TOKEN_TABLE_ADDR ; ------------------------------------------------------------------------------ clear_token_table: xor eax, eax ; value to write mov ecx, TOKEN_TABLE_SIZE / 4 ; number of double words mov edi, TOKEN_TABLE_ADDR ; address to start rep stosd ret ; ------------------------------------------------------------------------------ ; clear_label_tables ; ; description: ; clears the label table as specified by LABEL_TABLE_SIZE and LABEL_TABLE_ADDR ; and the awaiting label table as specified by AWAITING_LABEL_TABLE_SIZE and ; AWAITING_LABEL_TABLE_ADDR ; ------------------------------------------------------------------------------ clear_label_tables: xor eax, eax ; value to write mov ecx, LABEL_TABLE_SIZE / 4 ; number of double words mov edi, LABEL_TABLE_ADDR ; address to start rep stosd xor eax, eax ; value to write mov ecx, AWAITING_LABEL_TABLE_SIZE / 4 ; number of double words mov edi, AWAITING_LABEL_TABLE_ADDR ; address to start rep stosd ret ; ------------------------------------------------------------------------------ ; clear_test_arena ; ; description: ; clears the test arena as specified by TEST_ARENA_SIZE and TEST_ARENA_ADDR ; ------------------------------------------------------------------------------ clear_test_arena: xor eax, eax ; value to write mov ecx, TEST_ARENA_SIZE / 4 ; number of double words mov edi, TEST_ARENA_ADDR ; address to start rep stosd ret ; ------------------------------------------------------------------------------ ; clear_output_arena ; ; description: ; clears the output arena as specified by OUTPUT_SIZE and OUTPUT_ADDR ; ------------------------------------------------------------------------------ clear_output_arena: xor eax, eax ; value to write mov ecx, OUTPUT_SIZE / 4 ; number of double words mov edi, OUTPUT_ADDR ; address to start rep stosd ret %include "asm/tests.asm" ; ------------------------------------------------------------------------------ ; data ; ------------------------------------------------------------------------------ align 16 ; for readability in hexdump tokens: .by_id: dw 0x0000 ; rax db 0x02 ; type: register db 00000011b ; reg: 000b ; width: 11b (64 bits) dw 0x0001 ; rbx db 0x02 ; type: register db 00001111b ; reg: 011b ; width: 11b (64 bits) dw 0x0002 ; rcx db 0x02 ; type: register db 00000111b ; reg: 001b ; width: 11b (64 bits) dw 0x0003 ; rdx db 0x02 ; type: register db 00001011b ; reg: 010b ; width: 11b (64 bits) dw 0x0004 ; rsi db 0x02 ; type: register db 00011011b ; reg: 110b ; width: 11b (64 bits) dw 0x0005 ; rdi db 0x02 ; type: register db 00011111b ; reg: 111b ; width: 11b (64 bits) dw 0x0006 ; rsp db 0x02 ; type: register db 00010011b ; reg: 100b ; width: 11b (64 bits) dw 0x0007 ; rbp db 0x02 ; type: register db 00010111b ; reg: 101b ; width: 11b (64 bits) dw 0x0010 ; eax db 0x02 ; type: register db 00000010b ; reg: 000b ; width: 10b (32 bits) dw 0x0011 ; ebx db 0x02 ; type: register db 00001110b ; reg: 011b ; width: 10b (32 bits) dw 0x0012 ; ecx db 0x02 ; type: register db 00000110b ; reg: 001b ; width: 10b (32 bits) dw 0x0013 ; edx db 0x02 ; type: register db 00001010b ; reg: 010b ; width: 10b (32 bits) dw 0x0014 ; esi db 0x02 ; type: register db 00011010b ; reg: 110b ; width: 10b (32 bits) dw 0x0015 ; edi db 0x02 ; type: register db 00011110b ; reg: 111b ; width: 10b (32 bits) dw 0x0016 ; esp db 0x02 ; type: register db 00010010b ; reg: 100b ; width: 10b (32 bits) dw 0x0017 ; ebp db 0x02 ; type: register db 00010110b ; reg: 101b ; width: 10b (32 bits) dw 0x0020 ; ax db 0x02 ; type: register db 00000001b ; reg: 000b ; width: 01b (16 bits) dw 0x0021 ; bx db 0x02 ; type: register db 00001101b ; reg: 011b ; width: 01b (16 bits) dw 0x0022 ; cx db 0x02 ; type: register db 00000101b ; reg: 001b ; width: 01b (16 bits) dw 0x0023 ; dx db 0x02 ; type: register db 00001001b ; reg: 010b ; width: 01b (16 bits) dw 0x0024 ; si db 0x02 ; type: register db 00011001b ; reg: 110b ; width: 01b (16 bits) dw 0x0025 ; di db 0x02 ; type: register db 00011101b ; reg: 111b ; width: 01b (16 bits) dw 0x0026 ; sp db 0x02 ; type: register db 00010001b ; reg: 100b ; width: 01b (16 bits) dw 0x0027 ; bp db 0x02 ; type: register db 00010101b ; reg: 101b ; width: 01b (16 bits) dw 0x0030 ; al db 0x02 ; type: register db 00000000b ; reg: 000b ; width: 00b (8 bits) dw 0x0031 ; bl db 0x02 ; type: register db 00001100b ; reg: 011b ; width: 00b (8 bits) dw 0x0032 ; cl db 0x02 ; type: register db 00000100b ; reg: 001b ; width: 00b (8 bits) dw 0x0033 ; dl db 0x02 ; type: register db 00001000b ; reg: 010b ; width: 00b (8 bits) dw 0x0034 ; sil db 0x02 ; type: register db 00011000b ; reg: 110b ; width: 00b (8 bits) dw 0x0035 ; dil db 0x02 ; type: register db 00011100b ; reg: 111b ; width: 00b (8 bits) dw 0x0036 ; spl db 0x02 ; type: register db 00010000b ; reg: 100b ; width: 00b (8 bits) dw 0x0037 ; bpl db 0x02 ; type: register db 00010100b ; reg: 101b ; width: 00b (8 bits) dw 0x004F ; hlt db 0x01 ; type: operator db 0x00 ; # operands dw 0x0050 ; int3 db 0x01 ; type: operator db 0x00 ; # operands dw 0x0053 ; xor db 0x01 ; type: operator db 0x02 ; # operands dw 0x0054 ; inc db 0x01 ; type: operator db 0x01 ; # operands dw 0x0055 ; dec db 0x01 ; type: operator db 0x01 ; # operands dw 0x0056 ; mov db 0x01 ; type: operator db 0x02 ; # operands dw 0x0057 ; add db 0x01 ; type: operator db 0x02 ; # operands dw 0x0058 ; sub db 0x01 ; type: operator db 0x02 ; # operands dw 0x0059 ; call db 0x01 ; type: operator db 0x01 ; # operands dw 0x005A ; ret db 0x01 ; type: operator db 0x00 ; # operands dw 0x005B ; cmp db 0x01 ; type: operator db 0x02 ; # operands dw 0x005C ; jmp db 0x01 ; type: operator db 0x01 ; # operands dw 0x005D ; je db 0x01 ; type: operator db 0x01 ; # operands dw 0x005E ; jne db 0x01 ; type: operator db 0x01 ; # operands dw 0x005F ; push db 0x01 ; type: operator db 0x01 ; # operands dw 0x0060 ; pop db 0x01 ; type: operator db 0x01 ; # operands dw 0x0061 ; out db 0x01 ; type: operator db 0x02 ; # operands dw 0x0100 ; db db 0x03 ; type: pseudo-operator db 0x01 ; # operands .by_id_end: .operators: dd "hlt" dw 0x004F dd "int3" dw 0x0050 dd "xor" dw 0x0053 dd "inc" dw 0x0054 dd "dec" dw 0x0055 dd "mov" dw 0x0056 dd "add" dw 0x0057 dd "sub" dw 0x0058 dd "call" dw 0x0059 dd "ret" dw 0x005A dd "cmp" dw 0x005B dd "jmp" dw 0x005C dd "je" dw 0x005D dd "jne" dw 0x005E dd "push" dw 0x005F dd "pop" dw 0x0060 dd "out" dw 0x0061 dd "db" dw 0x0100 .operators_end: .registers: dd "r8" dw 0x0008 dd "r9" dw 0x0009 dd "ax" dw 0x0020 dd "bx" dw 0x0021 dd "cx" dw 0x0022 dd "dx" dw 0x0023 dd "si" dw 0x0024 dd "di" dw 0x0025 dd "sp" dw 0x0026 dd "bp" dw 0x0027 dd "al" dw 0x0030 dd "bl" dw 0x0031 dd "cl" dw 0x0032 dd "dl" dw 0x0033 dd "ah" dw 0x0040 dd "bh" dw 0x0041 dd "ch" dw 0x0042 dd "dh" dw 0x0043 dd "cs" dw 0x0044 dd "ds" dw 0x0045 dd "es" dw 0x0046 dd "fs" dw 0x0047 dd "gs" dw 0x0048 dd "ss" dw 0x0049 dd "rax" dw 0x0000 dd "rbx" dw 0x0001 dd "rcx" dw 0x0002 dd "rdx" dw 0x0003 dd "rsi" dw 0x0004 dd "rdi" dw 0x0005 dd "rsp" dw 0x0006 dd "rbp" dw 0x0007 dd "r10" dw 0x000A dd "r11" dw 0x000B dd "r12" dw 0x000C dd "r13" dw 0x000D dd "r14" dw 0x000E dd "r15" dw 0x000F dd "eax" dw 0x0010 dd "ebx" dw 0x0011 dd "ecx" dw 0x0012 dd "edx" dw 0x0013 dd "esi" dw 0x0014 dd "edi" dw 0x0015 dd "esp" dw 0x0016 dd "ebp" dw 0x0017 dd "r8d" dw 0x0018 dd "r9d" dw 0x0019 dd "r8w" dw 0x0028 dd "r9w" dw 0x0029 dd "sil" dw 0x0034 dd "dil" dw 0x0035 dd "spl" dw 0x0036 dd "bpl" dw 0x0037 dd "r8b" dw 0x0038 dd "r9b" dw 0x0039 dd "cr0" dw 0x004A dd "cr2" dw 0x004B dd "cr3" dw 0x004C dd "cr4" dw 0x004D dd "cr8" dw 0x004E dd "r10d" dw 0x001A dd "r11d" dw 0x001B dd "r12d" dw 0x001C dd "r13d" dw 0x001D dd "r14d" dw 0x001E dd "r15d" dw 0x001F dd "r10w" dw 0x002A dd "r11w" dw 0x002B dd "r12w" dw 0x002C dd "r13w" dw 0x002D dd "r14w" dw 0x002E dd "r15w" dw 0x002F dd "r10b" dw 0x003A dd "r11b" dw 0x003B dd "r12b" dw 0x003C dd "r13b" dw 0x003D dd "r14b" dw 0x003E dd "r15b" dw 0x003F .registers_end: align 16 ; for readability in hexdump opcodes: .by_id: ; hlt dw 0x004F db 0xF4 ; opcode db 0x00 ; reserved dd 0x00000000 dd 0x00000000 dd 0x00000000 ; int3 dw 0x0050 db 0xCC ; opcode db 0x00 ; reserved dd 0x00000000 dd 0x00000000 dd 0x00000000 ; xor dw 0x0053 db 0x31 ; r/m <- r db 0x33 ; r <- r/m db 0x81 ; r/m <- imm16/32 db 0x83 ; r/m <- imm8 dw 0x0000 dd 0x00006600 ; 00: ; 6: r/m <- imm16/32 op flag ; 6: r/m <- imm8 op flag ; 0x0000: dd 0x00000000 ; reserved ; inc dw 0x0054 db 0xFF ; r/m db 0x00 dd 0x00000000 dd 0x00000000 ; 0: r/m op flag ; 0000000: dd 0x00000000 ; dec dw 0x0055 db 0xFF ; r/m db 0x00 dd 0x00000000 dd 0x00000001 ; 1: r/m op flag ; 0000000: dd 0x00000000 ; mov dw 0x0056 db 0x89 ; r/m <- r db 0x8B ; r <- r/m db 0xC7 ; r/m <- imm16/32 db 0x00 dw 0x0000 dd 0x00000000 ; 00: ; 0: r/m <- imm16/32 op flag ; 00000: dd 0x00000000 ; mov bit8 dw 0x0056 db 0x88 ; r/m8 <- r8 db 0x8A ; r8 <- r/m8 db 0x00 db 0xC6 ; r/m8 <- imm8 dw 0x0000 dd 0x01000000 ; 000: ; 0: r/m8 <- imm8 op flag ; 00: ; 01: bit8 flag dd 0x00000000 ; add dw 0x0057 db 0x01 ; r/m <- r db 0x03 ; r <- r/m db 0x81 ; r/m <- imm16/32 db 0x83 ; r/m <- imm8 dw 0x0000 dd 0x00000000 ; 00: ; 0: r/m <- imm16/32 op flag ; 0: r/m <- imm8 op flag ; 0000: dd 0x00000000 ; sub dw 0x0058 db 0x29 ; r/m <- r db 0x2B ; r <- r/m db 0x81 ; r/m <- imm16/32 db 0x83 ; r/m <- imm8 dw 0x0000 dd 0x00005500 ; 00: ; 5: r/m <- imm16/32 op flag ; 5: r/m <- imm8 op flag ; 0000: dd 0x00000000 ; call dw 0x0059 db 0xFF ; r/m db 0x00 dw 0x0000 db 0xE8 ; rel16/32 db 0x00 dd 0x00000002 ; 2: r/m op flag ; 0000000: dd 0x00000000 ; retn dw 0x005A db 0xC3 ; opcode db 0x00 ; reserved dd 0x00000000 dd 0x00000000 dd 0x00000000 ; cmp dw 0x005B db 0x39 ; r/m <- r db 0x3B ; r <- r/m db 0x81 ; r/m <- imm16/32 db 0x83 ; r/m <- imm8 dw 0x0000 dd 0x00007700 ; 00: ; 7: r/m <- imm16/32 op flag ; 7: r/m <- imm8 op flag ; 0000: dd 0x00000000 ; cmp bit8 dw 0x005B db 0x38 ; r/m8 <- r8 db 0x3A ; r8 <- r/m8 db 0x00 db 0x80 ; r/m8 <- imm8 dw 0x0000 dd 0x01007000 ; 000: ; 7: r/m8 <- imm8 op flag ; 00: ; 01: bit8 flag ; jmp dw 0x005C db 0xFF ; r/m db 0x00 dw 0x0000 db 0xE9 ; rel16/32 db 0xEB ; rel8 dd 0x00000004 ; 4: r/m ; 000: ; 0: rel16/32 ; 0: rel8 ; 00: dd 0x00000000 ; je dw 0x005D dw 0x0000 dw 0x0000 db 0x00 ; TODO figure out the 0x0F prefix this will need db 0x74 ; rel8 dd 0x00000000 ; 00000: ; 0: rel8 ; 00: dd 0x00000000 ; jne dw 0x005E dw 0x0000 dw 0x0000 db 0x00 ; TODO figure out the 0x0F prefix this will need db 0x75 ; rel8 dd 0x00000000 ; 00000: ; 0: rel8 ; 00: dd 0x00000000 ; push ; TODO add support for the +r variation dw 0x005F db 0xFF ; r/m db 0x00 db 0x68 ; imm16/32 db 0x6A ; imm8 dw 0x0000 dd 0x00000006 ; 6: r/m ; 0: ; 0: imm16/32 ; 0: imm8 ; 0000: dd 0x00000000 ; pop ; TODO add support for the +r variation dw 0x0060 db 0x8F ; r/m db 0x00 dd 0x00000000 dd 0x00000000 ; 0: r/m ; 0000000: dd 0x00000000 ; out ; TODO enforce DX AL requirement, ignore ModR/M correctly dw 0x0061 db 0xEE db 0x00 dd 0x00000000 dd 0x00000000 dd 0x00000000 .by_id_end: msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00 msg_halt db "halted.", 0x0A, 0x00 whitespace_2 db " ", 0x0D ; test program program: db "print:", 0x0A db " push rdx", 0x0A db " push rax", 0x0A db " push rsi", 0x0A db "", 0x0A db " mov edx, 0x3F8", 0x0A db " .loop:", 0x0A db " mov al, [rsi]", 0x0A db " cmp al, 0x00", 0x0A db " je .done", 0x0A db " out dx, al", 0x0A db " inc rsi", 0x0A db " jmp .loop", 0x0A db " .done:", 0x0A db " pop rsi", 0x0A db " pop rax", 0x0A db " pop rdx", 0x0A db " ret", 0x0A db " .debug:", 0x0A db " push rsi", 0x0A db " mov rsi, .debug_msg", 0x0A db " call print", 0x0A db " pop rsi", 0x0A db " jmp print ; tail call", 0x0A db " .error:", 0x0A db " push rsi", 0x0A db " mov rsi, .error_msg", 0x0A db " call print", 0x0A db " pop rsi", 0x0A db " jmp print ; tail call", 0x0A db " .test:", 0x0A db " push rsi", 0x0A db " mov rsi, .test_msg", 0x0A db " call print", 0x0A db " pop rsi", 0x0A db " jmp print ; tail call", 0x0A db " .warn:", 0x0A db " push rsi", 0x0A db " mov rsi, .warn_msg", 0x0A db " call print", 0x0A db " pop rsi", 0x0A db " jmp print ; tail call", 0x0A db " .debug_msg:", 0x0A db " db 0x1B", 0x0A db ' db "[36m"', 0x0A db ' db "[DEBUG]: "', 0x0A db " db 0x1B", 0x0A db ' db "[0m"', 0x0A db " db 0x00", 0x0A db " .error_msg:", 0x0A db " db 0x1B", 0x0A db ' db "[1;31m"', 0x0A db ' db "[ERROR]: "', 0x0A db " db 0x1B", 0x0A db ' db "[0m"', 0x0A db " db 0x00", 0x0A db " .test_msg:", 0x0A db " db 0x1B", 0x0A db ' db "[1;33m"', 0x0A db ' db "[TEST]: "', 0x0A db " db 0x1B", 0x0A db ' db "[0m"', 0x0A db " db 0x00", 0x0A db " .warn_msg:", 0x0A db " db 0x1B", 0x0A db ' db "[1;35m"', 0x0A db ' db "[WARN]: "', 0x0A db " db 0x1B", 0x0A db ' db "[0m"', 0x0A db " db 0x00", 0x0A .size dq $ - program msg_end db "end of the binary ->|", 0x0A, 0x00