diff --git a/twasm/README.md b/twasm/README.md index 3f63fe7..3b63414 100644 --- a/twasm/README.md +++ b/twasm/README.md @@ -210,11 +210,11 @@ type metadata for the different types is as follows: entries are as follows: ``` -+----------+--------+----------+ -| 31 24 | 23 16 | 15 0 | -+----------+--------+----------+ -| reserved | opcode | token ID | -+----------+--------+----------+ ++-----------------+-----------------+----------+ +| 31 24 | 23 16 | 15 0 | ++-----------------+-----------------+----------+ +| dest=reg opcode | dest=r/m opcode | token ID | ++-----------------+-----------------+----------+ ``` note the lack of support for multiple-byte opcodes or multiple opcodes for one token ID; these features will likely be added at some point after the parser accumulates too much jank. diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index 5cac78b..32a08d8 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -124,16 +124,20 @@ assemble: pop rsi push rdi - ; di = tte of operator + push rsi + ; di = tte of operator + mov sil, 0b ; standard opcode call get_opcode ; al = opcode call .output_byte + pop rsi pop rdi pop rax ; rax = number of tokens processed pop rdi ; rdi = total number of tokens inc rax cmp rax, rdi + jge .break jmp .loop ; di = tte of operator @@ -144,10 +148,13 @@ assemble: pop rsi push rdi - ; di = tte of operator + push rsi + ; di = tte of operator + mov sil, 0b ; dst=r/m call get_opcode ; al = opcode call .output_byte + pop rsi pop rdi ; di = tte of operator pop rax ; rax = number of tokens processed @@ -214,12 +221,7 @@ assemble: call print.debug pop rsi - push rdi - ; di = tte of operator - call get_opcode - ; al = opcode - call .output_byte - pop rdi ; di = tte of operator + mov cx, di ; cx = tte of operator pop rax pop rdi @@ -237,9 +239,11 @@ assemble: pop rdi ; di = next tte je .operator_2_memory_access + push rcx ; di = next tte call get_tte_type ; al = type of token + pop rcx ; cx = tte of operator cmp al, 0x02 ; type: register je .operator_2_register @@ -256,7 +260,80 @@ assemble: mov rsi, .msg_operator_2_memory_access call print.debug pop rsi - jmp .unsupported_memory_access + + cmp di, 0x1000 ; check if token is addressing a register + jne .unsupported_memory_access ; if not, unsupported :/ + + + push rdi + mov di, cx ; di = tte of operator + mov sil, 0 ; dst = r/m + call get_opcode + ; al = opcode + call .output_byte + pop rdi + + pop rax + pop rdi + inc rax + cmp rax, rdi + jge .break + push rdi + push rax + xor edi, edi + mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte + + mov si, di ; si = dst tte + + pop rax + pop rdi + inc rax + cmp rax, rdi + jge .break + push rdi + push rax + xor edi, edi + mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; di = next tte + + push rdi + and di, 0xFF00 + cmp di, 0x1000 ; check if token is a memory address + pop rdi ; di = next tte + je .unsupported_memory_access ; no case of *],[* in asm + + ; di = next tte + call get_tte_type + ; al = type of token + + cmp al, 0x02 + je .operator_2_memory_access_register + + pop rax ; rax = number of tokens processed + pop rdi ; rdi = total number of tokens + inc rax + cmp rax, rdi + jge .break + jmp .loop + + .operator_2_memory_access_register: + push rsi + mov rsi, .msg_operator_2_memory_access_register + call print.debug + pop rsi + + ; si = r/m; dst tte + ; di = reg; src tte + mov dl, 00b ; dl = mod bits + call get_ModRM + ; al = Mod R/M byte + call .output_byte + + pop rax ; rax = number of tokens processed + pop rdi ; rdi = total number of tokens + inc rax + cmp rax, rdi + jge .break + jmp .loop .operator_2_register: push rsi @@ -264,7 +341,15 @@ assemble: call print.debug pop rsi - mov si, di ; si = `R/M` tte + push rdi + mov di, cx ; di = tte of operator + mov sil, 1 ; dst = reg + call get_opcode + ; al = opcode + call .output_byte + pop rdi + + mov si, di ; si = dst tte pop rax pop rdi @@ -290,7 +375,7 @@ assemble: je .operator_2_register_register pop rax ; rax = number of tokens processed - pop rdi ; rdi = total nnumber of tokens + pop rdi ; rdi = total number of tokens inc rax cmp rax, rdi jge .break @@ -338,11 +423,9 @@ assemble: call print.debug pop rsi - ; si = `R/M` tte - ; di = `reg` tte push rsi - mov si, di - pop rdi + mov si, di ; si = reg; src tte + pop rdi ; di = r/m; dst tte mov dl, 11b ; dl = mod bits call get_ModRM ; al = Mod R/M byte @@ -388,6 +471,7 @@ assemble: .msg_operator_1_register db "operator_1_register", 0x0A, 0x00 .msg_operator_2 db "operator_2", 0x0A, 0x00 .msg_operator_2_memory_access db "operator_2_memory_access", 0x0A, 0x00 + .msg_operator_2_memory_access_register db "operator_2_memory_access_register", 0x0A, 0x00 .msg_operator_2_register db "operator_2_register", 0x0A, 0x00 .msg_operator_2_register_memory_access db "operator_2_register_memory_access", 0x0A, 0x00 .msg_operator_2_register_register db "operator_2_register_register", 0x0A, 0x00 @@ -514,10 +598,13 @@ get_ModRM: ; get_opcode ; ; description: -; given an operator token, returns its opcode +; given an operator token, returns its opcode. For operators with multiple +; opcodes, the variant can be specified. ; ; parameters: ; di = token table entry +; sil = lower bit: 0: dst=r/m or only opcode +; 1: dst=reg or 0x00 ; ; returned: ; al = opcode; the rest of rax is zeroed @@ -525,6 +612,7 @@ get_ModRM: get_opcode: and rdi, 0xFFFF + and rsi, 1 xor eax, eax .loop: @@ -543,7 +631,7 @@ get_opcode: mov al, UNRECOGNISED_ID_OPCODE ret .found: - mov al, [2 + opcodes.by_id + rax * 4] + mov al, [rsi + 2 + opcodes.by_id + rax * 4] and rax, 0xFF ; mask ret @@ -1707,20 +1795,20 @@ tokens: opcodes: .by_id: dw 0x0053 ; xor - db 0x31 - db 0x00 ; reserved + db 0x31 ; r/m <- reg + db 0x33 ; reg <- r/m dw 0x0054 ; inc - db 0xFF - db 0x00 ; reserved + db 0xFF ; r/m + db 0x00 ; dw 0x0056 ; mov - db 0x8B - db 0x00 ; reserved + db 0x89 ; r/m <- reg + db 0x8B ; reg <- r/m dw 0x004F ; hlt - db 0xF4 - db 0x00 ; reserved + db 0xF4 ; . + db 0x00 ; .by_id_end: msg_welcome db "Welcome to Twasm", 0x0A, 0x00 @@ -1735,7 +1823,7 @@ program: db "; one line comment", 0x0A db "mov rdx, [rax]", 0x0A db "mov [rax], rdx", 0x0A - db "hlt" + db "hlt", 0x0A .size dq $ - program msg_end db "end of the binary ->|", 0x0A, 0x00 diff --git a/twasm/asm/tests.asm b/twasm/asm/tests.asm index 23e0178..bc12ecd 100644 --- a/twasm/asm/tests.asm +++ b/twasm/asm/tests.asm @@ -251,6 +251,11 @@ test_get_tte_typed_metadata: cmp al, 0x02 ; # operands jne .fail + mov di, 0x004F ; hlt + call get_tte_typed_metadata + cmp al, 0x00 ; # operands + jne .fail + mov di, 0x0003 ; rdx call get_tte_typed_metadata cmp al, 00001011b ; reg: 010b @@ -289,21 +294,32 @@ test_get_opcode: call print.test mov di, 0x0053 ; xor + mov sil, 0b call get_opcode cmp al, 0x31 jne .fail + mov di, 0x0053 ; xor + mov sil, 1b + call get_opcode + cmp al, 0x33 + jne .fail + + mov di, 0x0054 ; inc + mov sil, 0b call get_opcode cmp al, 0xFF jne .fail mov di, 0x004F ; hlt + mov sil, 0b call get_opcode cmp al, 0xF4 jne .fail mov di, 0x0003 ; rdx (not an operator) + mov sil, 0b call get_opcode cmp al, UNRECOGNISED_ID_OPCODE jne .fail