From 63e3a1ea7eecef3ea6a212f3855cb26ba33fc621 Mon Sep 17 00:00:00 2001 From: andromeda Date: Sat, 7 Mar 2026 21:33:28 +0100 Subject: [PATCH] add some tokens, length1 token support --- twasm/README.md | 23 +++++++++++++ twasm/asm/main.asm | 86 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/twasm/README.md b/twasm/README.md index 4607b01..91a7eb3 100644 --- a/twasm/README.md +++ b/twasm/README.md @@ -129,4 +129,27 @@ supported tokens are listed below | cr3 | 0x004C | | | cr4 | 0x004D | | | cr8 | 0x004E | | +| hlt | 0x004F | | +| int3 | 0x0050 | | +| [ | 0x0051 | | +| ] | 0x0052 | | +| xor | 0x0053 | | +| inc | 0x0054 | | +| dec | 0x0055 | | +| mov | 0x0056 | | +| add | 0x0057 | | +| sub | 0x0058 | | +| call | 0x0059 | | +| ret | 0x005A | | +| cmp | 0x005B | | +| je | 0x005C | | +| jne | 0x005D | | +| jge | 0x005E | | +| jg | 0x005F | | +| jle | 0x0060 | | +| jl | 0x0061 | | +| + | 0x0062 | | +| - | 0x0063 | | +| * | 0x0064 | | +| / | 0x0065 | | | | 0xFFFF | unrecognised token | diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index e498f91..a3dea27 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -47,6 +47,9 @@ start: ; ------------------------------------------------------------------------------ identify_token: + cmp rsi, 1 ; if the token has length 1 + je .start_length1 ; then enter the length 1 loop + cmp rsi, 2 ; if the token has length 2 je .start_length2 ; then enter the length 2 loop @@ -58,6 +61,26 @@ identify_token: jmp .unrecognised ; else unrecognised + .start_length1 + mov rcx, tokens.length1 ; rcx -> list of known tokens + + .loop_length1 + cmp rcx, tokens.length2 ; check if rcx still in the bounds of length1 tokens + jge .unrecognised ; if not, unrecognised + + mov r10b, [rcx] ; known token + mov r11b, [rdi] ; token + cmp r10b, r11b ; if known token matches token + je .found_length1 ; exit loop + + add rcx, 3 ; length of token + length of id + jmp .loop_length1 + + .found_length1 + xor eax, eax ; make sure rest of rax is zeroed + mov ax, [rcx + 1] ; return id of token + ret + .start_length2 mov rcx, tokens.length2 ; rcx -> list of known tokens @@ -496,6 +519,22 @@ test_identify_token: mov rsi, .msg call print + ; length1 token that exists + mov byte [TEST_ARENA_ADDR], "*" + mov rdi, TEST_ARENA_ADDR + mov rsi, 1 + call identify_token + cmp ax, 0x0064 + jne .fail + + ; length1 token that doesn't exist + mov byte [TEST_ARENA_ADDR], " " + mov rdi, TEST_ARENA_ADDR + mov rsi, 1 + call identify_token + cmp ax, 0xFFFF + jne .fail + ; length2 token that exists mov word [TEST_ARENA_ADDR], "sp" mov rdi, TEST_ARENA_ADDR @@ -575,6 +614,19 @@ test_identify_token: ; ------------------------------------------------------------------------------ tokens: + .length1 + db "[" + dw 0x0051 + db "]" + dw 0x0052 + db "+" + dw 0x0062 + db "-" + dw 0x0063 + db "*" + dw 0x0064 + db "/" + dw 0x0065 .length2 db "r8" dw 0x0008 @@ -624,6 +676,12 @@ tokens: dw 0x0048 db "ss" dw 0x0049 + db "je" + dw 0x005C + db "jg" + dw 0x005F + db "jl" + dw 0x0061 .length3 db "rax" dw 0x0000 @@ -699,6 +757,30 @@ tokens: dw 0x004D db "cr8" dw 0x004E + db "hlt" + dw 0x004F + db "xor" + dw 0x0053 + db "inc" + dw 0x0054 + db "dec" + dw 0x0055 + db "mov" + dw 0x0056 + db "add" + dw 0x0057 + db "sub" + dw 0x0058 + db "ret" + dw 0x005A + db "cmp" + dw 0x005B + db "jne" + dw 0x005D + db "jge" + dw 0x005E + db "jle" + dw 0x0060 .length4 db "r10d" dw 0x001A @@ -736,6 +818,10 @@ tokens: dw 0x003E db "r15b" dw 0x003F + db "int3" + dw 0x0050 + db "call" + dw 0x0059 .length5 .end