diff --git a/twasm/README.md b/twasm/README.md index c479b33..2921ffa 100644 --- a/twasm/README.md +++ b/twasm/README.md @@ -160,7 +160,7 @@ the `type` hex digit is defined as the following: | hex | meaning | examples | |-----|----------|-| -| 0x0 | ignored | `; this entire comment is 1 token` | +| 0x0 | ignored | | | 0x1 | operator | `mov`, `hlt` | | 0x2 | register | `rsp`, `al` | | 0xF | unknown | any token ID not represented in the lookup table | diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index 7552c38..5ebc9cb 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -7,8 +7,6 @@ TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at TOKEN_TABLE_SIZE equ 0x1000 ; max length of table -TOKEN_TABLE_ENTRY_SIZE equ 2 ; size of token table entry; things may break - ; if this ever changes OUTPUT_ADDR equ 0x00070000 ; address of outputed binary OUTPUT_SIZE equ 0x1000 ; max length of outputed binary @@ -49,11 +47,14 @@ start: mov rdi, program ; -> program mov rsi, [program.size] ; = size of program call tokenise - ; rax = number of tokens processed + ; rax = number of tokens in token table mov rdi, rax + push rdi + call clear_output_arena - pop rdi + + pop rdi ; rdi = number of tokens in token table call assemble jmp halt @@ -64,8 +65,6 @@ start: ; ------------------------------------------------------------------------------ ; assemble -; TODO write tests -; TODO make it work :/ putting the cart before the horse ; ; description: ; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat @@ -85,25 +84,33 @@ start: ; ------------------------------------------------------------------------------ assemble: - xor eax, eax ; rax = number of tokens processed - mov [.tokens_total], edi ; rdi = number of tokens in table + ; TODO deal with src=imm and src=imm8 + xor eax, eax + mov [.tokens_processed], eax ; eax = number of tokens processed + mov [.tokens_total], edi ; edi = total number of tokens in table .loop: - xor edi, edi - mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; next tte + call .get_next_tte + ; di = tte + call get_tte_type + ; al = type + + cmp al, 0x1 ; check if next tte is an operator + je .operator ; if so, handle + jmp .unexpected_token ; otherwise, fail - ; di = tte of operator .operator: ; if next tte's type is an operator: - push rdi ; di = tte of operator - call get_tte_typed_metadata + call get_tte_typed_metadata ; al = tte typed metadata - pop rdi + + cmp al, UNRECOGNISED_ID_METADATA ; make sure token has metadata on record + je .unexpected_token ; if not, fail and al, 11b ; mask for # operands - cmp al, 0 ; check if operator has no operands - je .operator_0 ; if so, handle case of no operands + cmp al, 0 ; check if operator has no operands + je .operator_0 ; if so, handle case of no operands cmp al, 1 ; check if operator has one operand je .operator_1 ; if so, handle case of one operand @@ -111,39 +118,25 @@ assemble: cmp al, 2 ; check if operator has two operands je .operator_2 ; if so, handle case of two operands - jmp .unexpected_token ; TODO actually check operator type or not first - ; if get_tte_typed_metadata happens to return 0, 1, - ; or 2 on a non-operator, it doesn't get caught + jmp .unexpected_token - ; di = tte of operator .operator_0: - push rsi mov rsi, .msg_operator_0 call print.debug - pop rsi - push rdi - push rsi ; di = tte of operator - mov sil, 0b ; standard opcode + mov sil, 0b ; opcode call get_opcode ; al = opcode - ; dl = op flag (none) + ; dl = 0x00 call .output_byte - pop rsi - pop rdi jmp .loop_next_token - ; di = tte of operator .operator_1: - push rsi mov rsi, .msg_operator_1 call print.debug - pop rsi - push rdi - push rsi ; di = tte of operator mov sil, 0b ; dst=r/m call get_opcode @@ -152,13 +145,10 @@ assemble: push rdx call .output_byte pop rdx ; dl = op flag - pop rsi - pop rdi ; di = tte of operator call .next_token jge .break - xor edi, edi - mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte + call .get_next_tte push rdi and di, 0xFF00 @@ -178,23 +168,19 @@ assemble: jmp .loop_next_token .operator_1_memory: - push rsi mov rsi, .msg_operator_1_memory call print.debug - pop rsi jmp .unsupported_memory_access .operator_1_register: - push rsi mov rsi, .msg_operator_1_register call print.debug - pop rsi mov si, di ; si = `R/M` tte and edx, 0xFF - or dx, 0xFE00 ; pass di as direct value - mov di, dx ; di = op flag - mov dl, 11b ; dl = mod bits + or edx, 0xFE00 ; pass di as direct value + mov edi, edx ; di = op flag + mov edx, 11b ; dl = mod bits call get_ModRM ; al = Mod R/M byte call .output_byte @@ -202,17 +188,14 @@ assemble: jmp .loop_next_token .operator_2: - push rsi mov rsi, .msg_operator_2 call print.debug - pop rsi mov cx, di ; cx = tte of operator call .next_token jge .break - xor edi, edi - mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte + call .get_next_tte push rdi and di, 0xFF00 @@ -232,57 +215,49 @@ assemble: jmp .loop_next_token .operator_2_memory: - push rsi mov rsi, .msg_operator_2_memory call print.debug - pop rsi cmp di, 0x1000 ; check if token is addressing a register - jne .unsupported_memory_access ; if not, unsupported :/ + jne .unsupported_memory_access ; if not, unsupported - - push rdi - mov di, cx ; di = tte of operator - mov sil, 0 ; dst = r/m + mov edi, ecx ; di = tte of operator + xor esi, esi ; dst=r/m; src=r call get_opcode ; al = opcode ; dl = op flag - ; TODO act accordingly if the op flag is present call .output_byte - pop rdi call .next_token jge .break - xor edi, edi - mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte + call .get_next_tte - mov si, di ; si = dst tte + mov si, di ; si = dst register tte call .next_token jge .break - xor edi, edi - mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte + call .get_next_tte push rdi and di, 0xFF00 cmp di, 0x1000 ; check if token is a memory address pop rdi ; di = next tte - je .unsupported_memory_access ; no case of *],[* in asm + je .unsupported_memory_access ; if so, fail; no case of *],[* in asm ; di = next tte call get_tte_type ; al = type of token - cmp al, 0x02 - je .operator_2_memory_register + cmp al, 0x02 ; check if token is a register + je .operator_2_memory_register ; if so, handle - jmp .loop_next_token + jmp .unexpected_token .operator_2_memory_register: push rsi mov rsi, .msg_operator_2_memory_register call print.debug - pop rsi + pop rsi ; si = r/m ; si = r/m; dst tte ; di = reg; src tte @@ -294,10 +269,8 @@ assemble: jmp .loop_next_token .operator_2_register: - push rsi mov rsi, .msg_operator_2_register call print.debug - pop rsi push rdi mov di, cx ; di = tte of operator @@ -307,14 +280,13 @@ assemble: ; dl = op flag ; TODO do something if the op flag is present call .output_byte - pop rdi + pop rdi ; di = dst tte mov si, di ; si = dst tte call .next_token jge .break - xor edi, edi - mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte + call .get_next_tte push rdi and di, 0xFF00 @@ -326,31 +298,30 @@ assemble: call get_tte_type ; al = type of token - cmp al, 0x02 - je .operator_2_register_register + cmp al, 0x02 ; check if token is a register + je .operator_2_register_register ; if so, handle - jmp .loop_next_token + jmp .unexpected_token .operator_2_register_memory: push rsi mov rsi, .msg_operator_2_register_memory call print.debug - pop rsi + pop rsi ; si = dst tte cmp di, 0x1000 ; check if token is addressing to a register - jne .unsupported_memory_access ; if not, unsupported :/ + jne .unsupported_memory_access ; if not, unsupported call .next_token jge .break - xor edi, edi - mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte + call .get_next_tte - ; si = `R/M` tte - ; di = `reg` tte + ; si = r/m; dst tte + ; di = reg; src tte push rsi - mov si, di - pop rdi - mov dl, 00b ; dl = mod bits + mov esi, edi ; si = reg; src tte + pop rdi ; di = r/m; dst tte + mov edx, 00b ; dl = mod bits call get_ModRM ; al = Mod R/M byte call .output_byte @@ -358,15 +329,13 @@ assemble: jmp .loop_next_token .operator_2_register_register: - push rsi mov rsi, .msg_operator_2_register_register call print.debug - pop rsi push rsi - mov si, di ; si = reg; src tte - pop rdi ; di = r/m; dst tte - mov dl, 11b ; dl = mod bits + mov esi, edi ; si = reg; src tte + pop rdi ; di = r/m; dst tte + mov edx, 11b ; dl = mod bits call get_ModRM ; al = Mod R/M byte call .output_byte @@ -402,6 +371,13 @@ assemble: cmp eax, edi ret + ; eax = current entry index in token table + ; returns di = next tte + .get_next_tte: + xor edi, edi + mov di, [eax * 2 + TOKEN_TABLE_ADDR] + ret + .tokens_processed dd 0 .tokens_total dd 0 @@ -444,27 +420,27 @@ assemble: ; ------------------------------------------------------------------------------ get_tte_type: - and rdi, 0xFFFF ; mask input so it behaves as expected - xor eax, eax + and edi, 0xFFFF ; di = token table entry + xor eax, eax ; eax = tokens.by_id index .loop: - cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range + cmp eax, (tokens.by_id_end - tokens.by_id) / 4 ; index range check jg .not_found - mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id + mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id cmp cx, di je .found - inc rax + inc eax jmp .loop .not_found: - mov al, UNRECOGNISED_ID_TYPE - and ax, 0xF ; mask as expected + mov eax, UNRECOGNISED_ID_TYPE + and eax, 0xF ret .found: - mov al, [2 + tokens.by_id + rax * 4] - and ax, 0xF ; mask as expected + mov al, [2 + tokens.by_id + eax * 4] + and eax, 0xF ; mask as expected ret ; ------------------------------------------------------------------------------ @@ -483,27 +459,26 @@ get_tte_type: ; ------------------------------------------------------------------------------ get_tte_typed_metadata: - and rdi, 0xFFFF ; mask input so it behaves as expected - xor eax, eax + and edi, 0xFFFF ; di = token table entry + xor eax, eax ; eax = tokens.by_id index .loop: - cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range + cmp eax, (tokens.by_id_end - tokens.by_id) / 4 ; index range check jg .not_found - mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id + mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id cmp cx, di je .found - inc rax + inc eax jmp .loop .not_found: - xor eax, eax - mov al, UNRECOGNISED_ID_METADATA + mov eax, UNRECOGNISED_ID_METADATA ret .found: - mov al, [3 + tokens.by_id + rax * 4] - and rax, 0xFF + mov al, [3 + tokens.by_id + eax * 4] + and eax, 0xFF ret ; ------------------------------------------------------------------------------ @@ -524,11 +499,14 @@ get_tte_typed_metadata: get_ModRM: push rbx - and dl, 11b ; mask for mod bits - shl dl, 6 + and edi, 0xFFFF ; di = token table entry `reg` + and esi, 0xFFFF ; si = token table entry `R/M` + + and edx, 11b ; dl = mod bits + shl edx, 6 ; and position push rdi - shr di, 8 + shr edi, 8 cmp dil, 0xFE pop rdi je .pass_di_as_op_flag @@ -541,25 +519,25 @@ get_ModRM: jmp .continue .pass_di_as_op_flag: - mov bl, dil ; bl = op flag - and bl, 111b ; mask + mov ebx, edi ; bl = op flag + and ebx, 111b .continue: - shl bl, 3 + shl ebx, 3 - mov rdi, rsi ; do the other one + mov edi, esi ; do the other one ; di = tte call get_reg_bits ; al = reg bits - mov cl, al + mov ecx, eax xor eax, eax - or al, dl ; mod bits - or al, bl ; reg bits - or al, cl ; R/M bits - and rax, 0xFF ; mask for byte + or eax, edx ; mod bits + or eax, ebx ; reg bits + or eax, ecx ; R/M bits + and eax, 0xFF ; mask for byte pop rbx ret @@ -581,49 +559,45 @@ get_ModRM: ; ------------------------------------------------------------------------------ get_opcode: - and rdi, 0xFFFF + and edi, 0xFFFF ; di = token table entry - add rsi, 2 - and rsi, 111b - sub rsi, 2 + add esi, 2 + and esi, 111b ; offset within opcode entry + sub esi, 2 ; between 0 and 5 - xor eax, eax + xor eax, eax ; eax = opcodes.by_id index .loop: - cmp rax, (opcodes.by_id_end - opcodes.by_id) / 16 ; make sure it's still in range + cmp eax, (opcodes.by_id_end - opcodes.by_id) / 16 ; make sure it's still in range jg .not_found - shl rax, 4 - mov cx, [opcodes.by_id + rax] ; next entry in opcodes.by_id - shr rax, 4 + shl eax, 4 + mov cx, [opcodes.by_id + eax] ; next entry in opcodes.by_id + shr eax, 4 cmp cx, di je .found - inc rax + inc eax jmp .loop .not_found: xor eax, eax - mov al, UNRECOGNISED_ID_OPCODE + mov eax, UNRECOGNISED_ID_OPCODE ret .found: - shl rax, 4 + shl eax, 4 push rsi - shr rsi, 1 - mov dl, [rsi + 8 + opcodes.by_id + rax] + shr esi, 1 + mov dl, [esi + 8 + opcodes.by_id + eax] pop rsi - push rsi - and rsi, 1 - cmp esi, 1 ; check if offset is odd - pop rsi - jne .found_continue - - shr dl, 4 ; if so, actually 1 further on dl byte + test esi, 1 ; check if offset is odd + jz .found_continue + shr edx, 4 ; if so, upper part of dl byte .found_continue: - mov al, [rsi + 2 + opcodes.by_id + rax] - and rax, 0xFF ; mask - and rdx, 0x0F ; mask + mov al, [esi + 2 + opcodes.by_id + eax] + and eax, 0xFF + and edx, 0x0F ret ; ------------------------------------------------------------------------------ @@ -644,8 +618,8 @@ get_reg_bits: ; di = tte call get_tte_typed_metadata ; al = typed metadata - shr al, 2 ; discard type data - and al, 111b ; mask + shr eax, 2 ; discard type data + and eax, 111b ; mask ret ; ------------------------------------------------------------------------------ @@ -823,7 +797,7 @@ tokenise: pop rdi ; rdi = byte counter pop rax ; rax = tokens processed - mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx + mov [TOKEN_TABLE_ADDR + rax * 2], cx inc rax ; plus 1 token processed mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND @@ -891,15 +865,15 @@ tokenise: ; cx = token ID .operand_register: - mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx + mov [TOKEN_TABLE_ADDR + rax * 2], cx inc rax ; another token processed jmp .operand_break_continue ; cx = token ID .operand_addr_register: - mov word [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], 0x1000 + mov word [TOKEN_TABLE_ADDR + rax * 2], 0x1000 inc rax ; 0x1000: addr reg token, next token is the register - mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], cx + mov [TOKEN_TABLE_ADDR + rax * 2], cx inc rax ; the register as returned by evaluate_operand jmp .operand_break_continue @@ -983,11 +957,8 @@ tokenise: ; ------------------------------------------------------------------------------ evaluate_operand: - push rdi - - push rsi - mov rsi, rdi ; rsi -> start of operand - pop rdi ; rdi = size of operand + push rdi ; rdi -> start of operand + ; rsi = size of operand call trim_trailing_whitespace pop rdi ; rdi -> first byte of operand @@ -1467,38 +1438,38 @@ djb2: ; trims whitespace from the start and end of the given byte array. ; ; parameters: -; rdi = size of list -; rsi -> start of list +; rdi -> start of list +; rsi = size of list ; ; returned: ; rax = new size of list ; ------------------------------------------------------------------------------ trim_trailing_whitespace: - cmp rdi, 0 ; list of length zero - je .done ; already trimmed + test rsi, rsi ; list of length zero + jz .done ; already trimmed - push rdi push rsi + push rdi - mov dl, [rsi + rdi - 1] ; last element of given list + mov dl, [rdi + rsi - 1] ; last element of given list mov rsi, whitespace_2 ; pointer of whitespace list - mov rdi, 2 ; length of whitespace list + mov edi, 2 ; length of whitespace list call elemb - pop rsi ; rsi -> start of list - pop rdi ; rdi = size of list + pop rdi ; rdi -> start of list + pop rsi ; rsi = size of list - cmp al, 0 ; if last element whitespace - je .done ; then break + test eax, eax ; if last element whitespace + jz .done ; then break .trim: ; otherwise one shorter - dec rdi + dec rsi call trim_trailing_whitespace ret .done: - mov rax, rdi + mov rax, rsi ret ; ------------------------------------------------------------------------------ @@ -1510,8 +1481,8 @@ trim_trailing_whitespace: clear_token_table: xor eax, eax ; value to write - mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words - mov rdi, TOKEN_TABLE_ADDR ; address to start + mov ecx, TOKEN_TABLE_SIZE / 4 ; number of double words + mov edi, TOKEN_TABLE_ADDR ; address to start rep stosd ret @@ -1524,8 +1495,8 @@ clear_token_table: clear_test_arena: xor eax, eax ; value to write - mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words - mov rdi, TOKEN_TABLE_ADDR ; address to start + mov ecx, TOKEN_TABLE_SIZE / 4 ; number of double words + mov edi, TOKEN_TABLE_ADDR ; address to start rep stosd ret @@ -1538,8 +1509,8 @@ clear_test_arena: clear_output_arena: xor eax, eax ; value to write - mov rcx, OUTPUT_SIZE / 4 ; number of double words - mov rdi, OUTPUT_ADDR ; address to start + mov ecx, OUTPUT_SIZE / 4 ; number of double words + mov edi, OUTPUT_ADDR ; address to start rep stosd ret diff --git a/twasm/asm/tests.asm b/twasm/asm/tests.asm index dd82315..980d53d 100644 --- a/twasm/asm/tests.asm +++ b/twasm/asm/tests.asm @@ -654,7 +654,7 @@ test_evaluate_operand: msg_pass: db 0x0A - times (TEST_LINE_LENGTH + .start + 5 - .end) db " ", ; right align + times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align db 0x1B, "[32m" .start db "passed." .end db 0x1B, "[0m", 0x0A, 0x00