From 18125e6b204aa0473cca8b2d87b31d521e41ace2 Mon Sep 17 00:00:00 2001 From: andromeda Date: Thu, 2 Apr 2026 00:13:47 +0200 Subject: [PATCH] fix fatal bug in data, get prefix down, start work on awaiting_label, add debug function, more --- twasm/README.md | 28 ++++---- twasm/asm/main.asm | 173 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 165 insertions(+), 36 deletions(-) diff --git a/twasm/README.md b/twasm/README.md index 6509be7..8d0ce3b 100644 --- a/twasm/README.md +++ b/twasm/README.md @@ -95,15 +95,15 @@ label definitions are stored and recalled from this table. The memory addresses ``` 16 bytes -+---------+ -| 127 64 | -+---------+ -| address | -+---------+ -| 63 0 | -+---------+ -| hash | -+---------+ ++----------+---------+ +| 127 96 | 95 64 | ++----------+---------+ +| reserved | address | ++----------+---------+ +| 63 0 | ++--------------------+ +| hash | ++--------------------+ ``` #### awaiting label table (ALT) @@ -113,7 +113,7 @@ forward references are stored in this table to be filled in after assembly is ot ``` 16 bytes +----------+----------+------------------+---------+ -| 127 105 | 104 104 | 103 96 | 95 64 | +| 127 101 | 100 | 99 96 | 95 64 | +----------+----------+------------------+---------+ | reserved | abs flag | # bytes reserved | address | +----------+----------+------------------+---------+ @@ -263,7 +263,7 @@ entries are as follows: | 63 56 | 55 48 | 47 40 | 39 32 | +----------+---------------+---------------+------------------+ | opcode | opcode | opcode | opcode | -| dst=rel8 | dest=rel | dst=imm8 | dst=imm | +| dst=rel8 | dst=rel | dst=imm8 | dst=imm | +----------+---------------+---------------+------------------+ | 31 24 | 23 16 | 15 0 | +----------+---------------+----------------------------------+ @@ -319,7 +319,8 @@ rel ; rel 16/32 rel8 ; rel 8 opX&8 ; low 8 bits are the operator flag that goes with opcode at offset X from - ; the first opcode in the table entry + ; the first opcode in the table entry. High bit is (somewhat confusingly) + ; a flag for whether or not the operator comes with an `0F` prefix ``` note much room to expand. If an opcode doesn't exist, it should be 0x00 @@ -431,8 +432,7 @@ supported tokens are listed below | db | 0x0100 | pseudo-operator | | | 0x10XX | some memory address; `XX` is as specified below | | | 0x20XX | some constant; `XX` is as specified below | -| | 0x3XXX | some label definition; `XXX` is its entry index in the label table | -| | 0x4XXX | some label reference; `XXX` is its entry index in the label table +| | 0x3XXX | some label; `XXX` is its entry index in the label table | | | 0xFEXX | used to pass some raw value `XX` in place of a token id to a couple of functions that mention this as a feature. If the function doesn't mention it, it will lead to undefined behaviour | | | 0xFFFF | unrecognised token | diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index 7f09981..aec5199 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -112,16 +112,23 @@ assemble: jmp .unexpected_token ; otherwise, fail + dq 0,0,0 ; TODO figure out why this does anything and fix it :shrug: .label: push rsi mov rsi, .msg_label call print.debug pop rsi + mov esi, [.next_output_byte] + sub esi, OUTPUT_ADDR ; esi = relative address of label to start of program + and edi, 0xFFF ; edi = index to add address hash to + call add_label_address jmp .loop_next_token .operator: + push rdi ; di = tte of operator call get_tte_typed_metadata ; al = tte typed metadata + pop rdi ; di = tte of operator cmp al, UNRECOGNISED_ID_METADATA ; make sure token has metadata on record je .unexpected_token ; if not, fail @@ -144,8 +151,9 @@ assemble: call print.debug pop rsi - ; di = tte of operator - mov sil, 0b ; opcode + ; di = tte of operator + mov sil, 0b ; opcode + xor ebx, ebx ; no flags call get_opcode ; al = opcode ; dl = 0x00 @@ -158,18 +166,45 @@ assemble: call print.debug pop rsi - ; di = tte of operator - mov sil, 0b ; dst=r/m + push rdi + call .next_token + jge .break + call .get_next_tte + mov rcx, rdi ; cx = operand tte + pop rdi + push rcx + + + ; di = tte of operator + mov sil, 0 ; dst=r/m + xor ebx, ebx ; no flags + + shr ecx, 12 + and ecx, 0xF + cmp ecx, 0x3 + je .operator_1_get_opcode_label + + jmp .operator_1_get_opcode_continue + .operator_1_get_opcode_label: + mov sil, 4 ; dst=rel + .operator_1_get_opcode_continue: call get_opcode ; al = opcode ; dl = op flag push rdx + and dl, 1000b + cmp dl, 1000b + jne .operator_1_write_prefix_continue + .operator_1_write_prefix + push rax + mov al, 0x0F + call .write_byte + pop rax ; al = opcode + .operator_1_write_prefix_continue call .write_byte pop rdx ; dl = op flag - call .next_token - jge .break - call .get_next_tte + pop rdi ; di = next tte push rdi and di, 0xFF00 @@ -177,15 +212,20 @@ assemble: pop rdi ; di = next tte je .operator_1_memory + push rdi push rdx ; di = next tte call get_tte_type ; al = type of token pop rdx ; dl = op flag + pop rdi ; di = next tte cmp al, 0x02 ; type: register je .operator_1_register + cmp al, 0x04 ; type: label reference + je .operator_1_label + jmp .unexpected_token .operator_1_memory: push rsi @@ -227,13 +267,34 @@ assemble: .operator_1_register_no_prefix: mov si, di ; si = `R/M` tte and edx, 0xFF - or edx, 0xFE00 ; pass di as direct value - mov edi, edx ; di = op flag - mov edx, 11b ; dl = mod bits + or edx, 0xFE00 ; pass di as direct value + mov edi, edx ; di = op flag + mov edx, 11b ; dl = mod bits call get_ModRM ; al = Mod R/M byte call .write_byte + jmp .loop_next_token + .operator_1_label: + push rsi + mov rsi, .msg_operator_1_label + call print.debug + pop rsi + + mov esi, [.next_output_byte] + sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start + ; of program + mov eax, 0x04 ; al = first 4 bits: # bytes reserved + ; 5th bit: abs flag + and edi, 0xFFF ; edi = index of hash + call add_awaiting_label + + mov al, 0xFF ; reserve space + call .write_byte + call .write_byte + call .write_byte + call .write_byte + jmp .loop_next_token .operator_2: push rsi @@ -274,6 +335,7 @@ assemble: mov edi, ecx ; di = tte of operator xor esi, esi ; dst=r/m; src=r + xor ebx, ebx ; no flags call get_opcode ; al = opcode ; dl = op flag @@ -435,7 +497,7 @@ assemble: push rsi mov di, cx ; di = tte of operator mov sil, 1 ; dst = reg - mov bl, 1 ; bl = operator flag byte + ; bl = operator flag byte call get_opcode ; al = opcode ; dl = op flag @@ -614,7 +676,6 @@ assemble: mov di, cx ; di = tte of operator mov sil, 2 ; dst=r/m,src=imm ; bl = operator flag byte - ; TODO change sil based on whether bl is 8 bit or not push rbx and ebx, 1 cmp bl, 1 ; bit8 flag @@ -629,7 +690,14 @@ assemble: ; dl = op flag ; TODO do something if the op flag is present call .write_byte - pop rsi ; si = tte + mov edi, edx ; si = op flag + and edi, 0xFF + or edi, 0xFE00 + pop rsi ; si = r/m; dst tte + mov edx, 11b ; dl = mod bits + call get_ModRM + ; al = Mod R/M byte + call .write_byte call .next_token jge .break @@ -834,6 +902,7 @@ assemble: .msg_operator_1 db "operator_1", 0x0A, 0x00 .msg_operator_1_memory db "operator_1_memory", 0x0A, 0x00 .msg_operator_1_register db "operator_1_register", 0x0A, 0x00 + .msg_operator_1_label db "operator_1_label", 0x0A, 0x00 .msg_operator_2 db "operator_2", 0x0A, 0x00 .msg_operator_2_memory db "operator_2_memory", 0x0A, 0x00 .msg_operator_2_memory_register db "operator_2_memory_register", 0x0A, 0x00 @@ -1014,7 +1083,9 @@ get_ModRM: ; ; returned: ; al = opcode; the rest of rax is zeroed. -; dl = lower 3 bits: op flag, if applicable. The rest of rdx is zeroed. +; dl = lower 3 bits: op flag, if applicable. +; 4th bit: 0x0F prefix flag +; the rest of rdx is zeroed. ; ------------------------------------------------------------------------------ get_opcode: @@ -1112,6 +1183,7 @@ get_reg_bits: ; rax = number of tokens processed ; ------------------------------------------------------------------------------ +align 16 ; idk, fixes some alignment bug sometimes :/ tokenise: ; rdi -> current byte of program add rsi, rdi ; rsi -> last byte of program @@ -1952,6 +2024,25 @@ print: .test_msg db 0x1B, "[1;33m", "[TEST]: ", 0x1B, "[0m", 0x00 .warn_msg db 0x1B, "[1;35m", "[WARN]: ", 0x1B, "[0m", 0x00 +; ------------------------------------------------------------------------------ +; print_least_4_bits +; +; description: +; prints the least significant 4 bits of rax for debugging reasons +; ------------------------------------------------------------------------------ + +print_least_4_bits: + push rax + push rsi + add eax, 0x30 + mov [.byte], al + mov rsi, .byte + call print.warn + pop rsi + pop rax + ret + .byte db 0x00, 0x0A, 0x00 + ; ------------------------------------------------------------------------------ ; halt ; @@ -2123,7 +2214,7 @@ add_label_hash: ; ; parameters ; rdi = lower 3 bytes: index of label table to add the address to -; rsi = 64-bit address to be added, relative to start of program +; esi = 32-bit address to be added, relative to start of program ; ; returned ; rax = return value: 0 = success @@ -2133,16 +2224,52 @@ add_label_hash: add_label_address: and edi, 0xFFF shl rdi, 4 ; rdi * 16 - mov rax, [LABEL_TABLE_ADDR + rdi] - cmp rax, 0 + mov eax, [LABEL_TABLE_ADDR + 8 + rdi] + cmp eax, 0 jne .ret_1 - mov [LABEL_TABLE_ADDR + 16 + rdi], rsi + mov [LABEL_TABLE_ADDR + 8 + rdi], esi xor eax, eax ret .ret_1: mov eax, 1 ret +; ------------------------------------------------------------------------------ +; add_awaiting_label +; +; description: +; adds metadata of a forward reference to be completed in a later pass +; +; parameters +; rdi = lower 3 bytes: index of label table this forward reference is awaiting +; esi = 32-bit address of reference, relative to start of program +; al = lower 4 bytes: # bytes reserved at [OUTPUT_ADDR + esi] for the reference +; byte 5: abs flag if the reference is absolute +; ------------------------------------------------------------------------------ + +add_awaiting_label: + and edi, 0xFFF + and esi, esi + and eax, 0x1F + push rax + xor eax, eax + .loop: + cmp rax, AWAITING_LABEL_TABLE_SIZE + jge .break + mov rcx, [AWAITING_LABEL_TABLE_SIZE + rax] + cmp rcx, 0 ; empty slot + je .break + add rax, 16 + jmp .loop + .break: + pop rcx ; cl = bits passed to al + shl rdi, 4 + mov rdx, [LABEL_TABLE_ADDR + rdi] + mov [AWAITING_LABEL_TABLE_ADDR + rax], rdx ; hash + mov [AWAITING_LABEL_TABLE_ADDR + 8 + rax], esi ; address + mov [AWAITING_LABEL_TABLE_ADDR + 13 + rax], cl ; bits passed to al + ret + ; ------------------------------------------------------------------------------ ; clear_token_table ; @@ -2818,6 +2945,8 @@ opcodes: ; 00: ; 01: bit8 flag + dd 0x00000000 + ; jmp dw 0x005C db 0xFF ; r/m @@ -2840,12 +2969,12 @@ opcodes: dw 0x0000 dw 0x0000 - db 0x00 ; TODO figure out the 0x0F prefix this will need + db 0x84 ; rel16/32 db 0x74 ; rel8 - dd 0x00000000 ; 00000: - ; 0: rel8 - ; 00: + dd 0x00080000 ; 0000: + ; 8: rel16/32 0x0F flag + ; 000: dd 0x00000000