diff --git a/twasm/README.md b/twasm/README.md index ce6beec..d6d2ec5 100644 --- a/twasm/README.md +++ b/twasm/README.md @@ -144,6 +144,20 @@ type metadata for the different types is as follows: 11b ; 64 bit ``` +#### `opcodes.by_id` + +entries are as follows: + +``` ++----------+--------+----------+ +| 31 24 | 23 16 | 15 0 | ++----------+--------+----------+ +| reserved | opcode | token ID | ++----------+--------+----------+ +``` + +note the lack of support for multiple-byte opcodes or multiple opcodes for one token ID; these features will likely be added at some point after the parser accumulates too much jank. + ### token IDs supported tokens are listed below diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index 1dfad3b..12f54fd 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -18,6 +18,7 @@ STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id +UNRECOGNISED_ID_OPCODE equ 0x90 ; opcode of an unrecognised id (NOP) TEST_LINE_LENGTH equ 80 ; right border of test suite results @@ -81,24 +82,43 @@ assemble: je .operator jne .continue_operator - .operator + .operator: push rsi mov rsi, .msg_found_operator call print pop rsi - .continue_operator + push rdi + + ; di = tte + call get_tte_typed_metadata + ; al = tte typed metadata + + pop rdi + + and al, 11b ; mask for # operands + + cmp al, 0 ; 0 operands + je .operator_0 + jne .continue_operator0 + + .operator_0: + jmp .continue_operator + + .continue_operator0: + + .continue_operator: cmp ax, 0x02 ; check if it's a register je .register jne .continue_register - .register + .register: push rsi mov rsi, .msg_found_register call print pop rsi - .continue_register + .continue_register: pop rax ; incrementer pop rdi ; total number of tokens @@ -109,6 +129,8 @@ assemble: ret .msg_found_operator db "found operator", 0x0A, 0x00 .msg_found_register db "found register", 0x0A, 0x00 + .next_register dw UNRECOGNISED_TOKEN_ID + .next_next_register dw UNRECOGNISED_TOKEN_ID ; ------------------------------------------------------------------------------ ; get_tte_type @@ -185,6 +207,7 @@ get_tte_typed_metadata: ret .found: mov al, [3 + tokens.by_id + rax * 4] + and rax, 0xFF ret ; ------------------------------------------------------------------------------ @@ -199,7 +222,7 @@ get_tte_typed_metadata: ; si = token table entry `R/M` ; ; returned: -; al = ModR/M byte +; al = ModR/M byte; the rest of rax is zeroed ; ------------------------------------------------------------------------------ get_direct_addressing_ModRM: @@ -244,6 +267,43 @@ get_direct_addressing_ModRM: and rax, 0xFF ; mask for byte ret +; ------------------------------------------------------------------------------ +; get_opcode +; +; description: +; given an operator token, returns its opcode +; +; parameters: +; di = token table entry +; +; returned: +; al = opcode; the rest of rax is zeroed +; ------------------------------------------------------------------------------ + +get_opcode: + and rdi, 0xFFFF + xor eax, eax + + .loop: + cmp rax, (opcodes.by_id_end - opcodes.by_id) / 4 ; make sure it's still in range + jg .not_found + + mov cx, [opcodes.by_id + rax * 4] ; next entry in opcodes.by_id + + cmp cx, di + je .found + + inc rax + jmp .loop + .not_found: + xor eax, eax + mov al, UNRECOGNISED_ID_OPCODE + ret + .found: + mov al, [2 + opcodes.by_id + rax * 4] + and rax, 0xFF ; mask + ret + ; ------------------------------------------------------------------------------ ; tokenising ; ------------------------------------------------------------------------------ @@ -961,6 +1021,25 @@ tokens: db 0x00 ; # operands .by_id_end: +opcodes: + .by_id: + dw 0x0053 ; xor + db 0x33 + db 0x00 ; reserved + + dw 0x0054 ; inc + db 0xFF + db 0x00 ; reserved + + dw 0x0056 ; mov + db 0x8B + db 0x00 ; reserved + + dw 0x004F ; hlt + db 0xF4 + db 0x00 ; reserved + .by_id_end: + msg_welcome db "Welcome to Twasm", 0x0A, 0x00 msg_halt db "halted.", 0x0A, 0x00 diff --git a/twasm/asm/tests.asm b/twasm/asm/tests.asm index 8480554..22d626a 100644 --- a/twasm/asm/tests.asm +++ b/twasm/asm/tests.asm @@ -37,6 +37,9 @@ run_tests: call clear_test_arena call test_get_direct_addressing_ModRM + call clear_test_arena + call test_get_opcode + ret .msg db "running test suite...", 0x0A, 0x00 @@ -513,6 +516,47 @@ test_get_direct_addressing_ModRM: ret .msg db "test_get_direct_addressing_ModRM...", 0x00 +; ------------------------------------------------------------------------------ +; test_get_opcode +; +; description: +; tests get_opcode described functionality +; ------------------------------------------------------------------------------ + +test_get_opcode: + mov rsi, .msg + call print + + mov di, 0x0053 ; xor + call get_opcode + cmp al, 0x33 + jne .fail + + mov di, 0x0054 ; inc + call get_opcode + cmp al, 0xFF + jne .fail + + mov di, 0x004F ; hlt + call get_opcode + cmp al, 0xF4 + jne .fail + + mov di, 0x0003 ; rdx (not an operator) + call get_opcode + cmp al, UNRECOGNISED_ID_OPCODE + jne .fail + + .pass: + mov rsi, msg_pass + call print + ret + .fail: + mov rsi, msg_fail + call print + ret + .msg db "test_get_opcode...", 0x00 + msg_pass: db 0x0A times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align