From 74fc57cdfc012b2d3cd621ef9dd11794dd0e5fcf Mon Sep 17 00:00:00 2001 From: andromeda Date: Tue, 24 Mar 2026 11:16:39 +0100 Subject: [PATCH] add more stuff to the lookup tables --- twasm/README.md | 163 ++++++++++++++++++++++++-------------------- twasm/asm/main.asm | 165 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 227 insertions(+), 101 deletions(-) diff --git a/twasm/README.md b/twasm/README.md index dfb198d..72ec2e3 100644 --- a/twasm/README.md +++ b/twasm/README.md @@ -210,11 +210,32 @@ type metadata for the different types is as follows: entries are as follows: ``` -+-----------------+-----------------+----------+ -| 31 24 | 23 16 | 15 0 | -+-----------------+-----------------+----------+ -| dest=reg opcode | dest=r/m opcode | token ID | -+-----------------+-----------------+----------+ ++------------------------------+ +| 0 operand operators | ++----------+--------+----------+ +| 31 24 | 23 16 | 15 0 | ++----------+--------+----------+ +| reserved | opcode | token ID | ++----------+--------+----------+ + ++-------+----------+---------+----------+ +| 1 operand operators | ++-------+----------+---------+----------+ +| 31 27 | 26 24 | 23 16 | 15 0 | ++-------+----------+---------+----------+ +| zeros | reg bits | opcode | token ID | +| | | dst=r/m | | ++-------+----------+---------+----------+ + ++------------------------------+ +| 2 operand operators | ++---------+---------+----------+ +| 31 24 | 23 16 | 15 0 | ++---------+---------+----------+ +| opcode | opcode | token ID | +| dst=reg | dst=r/m | | +| src=r/m | src=reg | | ++---------+---------+----------+ ``` note the lack of support for multiple-byte opcodes or multiple opcodes for one token ID; these features will likely be added at some point after the parser accumulates too much jank. @@ -233,14 +254,14 @@ supported tokens are listed below | rdi | 0x0005 | | | rsp | 0x0006 | | | rbp | 0x0007 | | -| r8 | 0x0008 | | -| r9 | 0x0009 | | -| r10 | 0x000A | | -| r11 | 0x000B | | -| r12 | 0x000C | | -| r13 | 0x000D | | -| r14 | 0x000E | | -| r15 | 0x000F | | +| r8 | 0x0008 | unimplemented | +| r9 | 0x0009 | unimplemented | +| r10 | 0x000A | unimplemented | +| r11 | 0x000B | unimplemented | +| r12 | 0x000C | unimplemented | +| r13 | 0x000D | unimplemented | +| r14 | 0x000E | unimplemented | +| r15 | 0x000F | unimplemented | | eax | 0x0010 | | | ebx | 0x0011 | | | ecx | 0x0012 | | @@ -249,61 +270,61 @@ supported tokens are listed below | edi | 0x0015 | | | esp | 0x0016 | | | ebp | 0x0017 | | -| r8d | 0x0018 | | -| r9d | 0x0019 | | -| r10d | 0x001A | | -| r11d | 0x001B | | -| r12d | 0x001C | | -| r13d | 0x001D | | -| r14d | 0x001E | | -| r15d | 0x001F | | -| ax | 0x0020 | | -| bx | 0x0021 | | -| cx | 0x0022 | | -| dx | 0x0023 | | -| si | 0x0024 | | -| di | 0x0025 | | -| sp | 0x0026 | | -| bp | 0x0027 | | -| r8w | 0x0028 | | -| r9w | 0x0029 | | -| r10w | 0x002A | | -| r11w | 0x002B | | -| r12w | 0x002C | | -| r13w | 0x002D | | -| r14w | 0x002E | | -| r15w | 0x002F | | -| al | 0x0030 | | -| bl | 0x0031 | | -| cl | 0x0032 | | -| dl | 0x0033 | | -| sil | 0x0034 | | -| dil | 0x0035 | | -| spl | 0x0036 | | -| bpl | 0x0037 | | -| r8b | 0x0038 | | -| r9b | 0x0039 | | -| r10b | 0x003A | | -| r11b | 0x003B | | -| r12b | 0x003C | | -| r13b | 0x003D | | -| r14b | 0x003E | | -| r15b | 0x003F | | -| ah | 0x0040 | | -| bh | 0x0041 | | -| ch | 0x0042 | | -| dh | 0x0043 | | -| cs | 0x0044 | | -| ds | 0x0045 | | -| es | 0x0046 | | -| fs | 0x0047 | | -| gs | 0x0048 | | -| ss | 0x0049 | | -| cr0 | 0x004A | | -| cr2 | 0x004B | | -| cr3 | 0x004C | | -| cr4 | 0x004D | | -| cr8 | 0x004E | | +| r8d | 0x0018 | unimplemented | +| r9d | 0x0019 | unimplemented | +| r10d | 0x001A | unimplemented | +| r11d | 0x001B | unimplemented | +| r12d | 0x001C | unimplemented | +| r13d | 0x001D | unimplemented | +| r14d | 0x001E | unimplemented | +| r15d | 0x001F | unimplemented | +| ax | 0x0020 | unimplemented | +| bx | 0x0021 | unimplemented | +| cx | 0x0022 | unimplemented | +| dx | 0x0023 | unimplemented | +| si | 0x0024 | unimplemented | +| di | 0x0025 | unimplemented | +| sp | 0x0026 | unimplemented | +| bp | 0x0027 | unimplemented | +| r8w | 0x0028 | unimplemented | +| r9w | 0x0029 | unimplemented | +| r10w | 0x002A | unimplemented | +| r11w | 0x002B | unimplemented | +| r12w | 0x002C | unimplemented | +| r13w | 0x002D | unimplemented | +| r14w | 0x002E | unimplemented | +| r15w | 0x002F | unimplemented | +| al | 0x0030 | unimplemented | +| bl | 0x0031 | unimplemented | +| cl | 0x0032 | unimplemented | +| dl | 0x0033 | unimplemented | +| sil | 0x0034 | unimplemented | +| dil | 0x0035 | unimplemented | +| spl | 0x0036 | unimplemented | +| bpl | 0x0037 | unimplemented | +| r8b | 0x0038 | unimplemented | +| r9b | 0x0039 | unimplemented | +| r10b | 0x003A | unimplemented | +| r11b | 0x003B | unimplemented | +| r12b | 0x003C | unimplemented | +| r13b | 0x003D | unimplemented | +| r14b | 0x003E | unimplemented | +| r15b | 0x003F | unimplemented | +| ah | 0x0040 | unimplemented | +| bh | 0x0041 | unimplemented | +| ch | 0x0042 | unimplemented | +| dh | 0x0043 | unimplemented | +| cs | 0x0044 | unimplemented | +| ds | 0x0045 | unimplemented | +| es | 0x0046 | unimplemented | +| fs | 0x0047 | unimplemented | +| gs | 0x0048 | unimplemented | +| ss | 0x0049 | unimplemented | +| cr0 | 0x004A | unimplemented | +| cr2 | 0x004B | unimplemented | +| cr3 | 0x004C | unimplemented | +| cr4 | 0x004D | unimplemented | +| cr8 | 0x004E | unimplemented | | hlt | 0x004F | | | int3 | 0x0050 | | | | 0x0051 | deprecated; formerly `[`. Now `0x10XX` is used. | @@ -317,12 +338,6 @@ supported tokens are listed below | call | 0x0059 | | | ret | 0x005A | | | cmp | 0x005B | | -| je | 0x005C | | -| jne | 0x005D | | -| jge | 0x005E | | -| jg | 0x005F | | -| jle | 0x0060 | | -| jl | 0x0061 | | | | 0x10XX | some memory address; `XX` is as specified below | | | 0xFFFF | unrecognised token | diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index bdcba88..89d079f 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -1560,21 +1560,94 @@ clear_output_arena: tokens: .by_id: - dw 0x0010 ; eax - db 0x02 ; type: register - db 00000010b ; reg: 000b - ; width: 10b (32 bits) - dw 0x0000 ; rax db 0x02 ; type: register db 00000011b ; reg: 000b ; width: 11b (64 bits) + dw 0x0001 ; rbx + db 0x02 ; type: register + db 00001111b ; reg: 011b + ; width: 11b (64 bits) + + dw 0x0002 ; rcx + db 0x02 ; type: register + db 00000111b ; reg: 001b + ; width: 11b (64 bits) + dw 0x0003 ; rdx db 0x02 ; type: register db 00001011b ; reg: 010b ; width: 11b (64 bits) + dw 0x0004 ; rsi + db 0x02 ; type: register + db 00011011b ; reg: 110b + ; width: 11b (64 bits) + + dw 0x0005 ; rdi + db 0x02 ; type: register + db 00011111b ; reg: 111b + ; width: 11b (64 bits) + + dw 0x0006 ; rsp + db 0x02 ; type: register + db 00010011b ; reg: 100b + ; width: 11b (64 bits) + + dw 0x0007 ; rbp + db 0x02 ; type: register + db 00010111b ; reg: 101b + ; width: 11b (64 bits) + + dw 0x0010 ; eax + db 0x02 ; type: register + db 00000010b ; reg: 000b + ; width: 10b (32 bits) + + dw 0x0011 ; ebx + db 0x02 ; type: register + db 00001110b ; reg: 011b + ; width: 10b (32 bits) + + dw 0x0012 ; ecx + db 0x02 ; type: register + db 00000110b ; reg: 001b + ; width: 10b (32 bits) + + dw 0x0013 ; edx + db 0x02 ; type: register + db 00001010b ; reg: 010b + ; width: 10b (32 bits) + + dw 0x0014 ; esi + db 0x02 ; type: register + db 00011010b ; reg: 110b + ; width: 10b (32 bits) + + dw 0x0015 ; edi + db 0x02 ; type: register + db 00011110b ; reg: 111b + ; width: 10b (32 bits) + + dw 0x0016 ; esp + db 0x02 ; type: register + db 00010010b ; reg: 100b + ; width: 10b (32 bits) + + dw 0x0017 ; ebp + db 0x02 ; type: register + db 00010110b ; reg: 101b + ; width: 10b (32 bits) + + dw 0x004F ; hlt + db 0x01 ; type: operator + db 0x00 ; # operands + + dw 0x0050 ; int3 + db 0x01 ; type: operator + db 0x00 ; # operands + dw 0x0053 ; xor db 0x01 ; type: operator db 0x02 ; # operands @@ -1583,23 +1656,39 @@ tokens: db 0x01 ; type: operator db 0x01 ; # operands + dw 0x0055 ; dec + db 0x01 ; type: operator + db 0x01 ; # operands + dw 0x0056 ; mov db 0x01 ; type: operator db 0x02 ; # operands - dw 0x004F ; hlt + dw 0x0057 ; add + db 0x01 ; type: operator + db 0x02 ; # operands + + dw 0x0058 ; sub + db 0x01 ; type: operator + db 0x02 ; # operands + + dw 0x0059 ; call + db 0x01 ; type: operator + db 0x01 ; # operands + + dw 0x005A ; ret db 0x01 ; type: operator db 0x00 ; # operands + + dw 0x005B ; cmp + db 0x01 ; type: operator + db 0x02 ; # operands .by_id_end: .operators: - dd "je" - dw 0x005C - dd "jg" - dw 0x005F - dd "jl" - dw 0x0061 dd "hlt" dw 0x004F + dd "int3" + dw 0x0050 dd "xor" dw 0x0053 dd "inc" @@ -1612,20 +1701,12 @@ tokens: dw 0x0057 dd "sub" dw 0x0058 + dd "call" + dw 0x0059 dd "ret" dw 0x005A dd "cmp" dw 0x005B - dd "jne" - dw 0x005D - dd "jge" - dw 0x005E - dd "jle" - dw 0x0060 - dd "int3" - dw 0x0050 - dd "call" - dw 0x0059 .operators_end: .registers: dd "r8" @@ -1790,21 +1871,51 @@ tokens: opcodes: .by_id: + dw 0x004F ; hlt + db 0xF4 ; . + db 0x00 ; + + dw 0x0050 ; int3 + db 0xCC ; + db 0x00 ; + dw 0x0053 ; xor db 0x31 ; r/m <- reg db 0x33 ; reg <- r/m dw 0x0054 ; inc - db 0xFF ; r/m - db 0x00 ; + db 0xFF ; r/m <- + db 0x00 ; reg bits + + dw 0x0055 ; dec + db 0xFF ; r/m <- + db 0x01 ; reg bits dw 0x0056 ; mov db 0x89 ; r/m <- reg db 0x8B ; reg <- r/m - dw 0x004F ; hlt - db 0xF4 ; . - db 0x00 ; + dw 0x0057 ; add + db 0x01 ; r/m <- reg + db 0x03 ; reg <- r/m + + dw 0x0058 ; sub + db 0x29 ; r/m <- reg + db 0x2B ; reg <- r/m + + ; TODO deal with rel values, differentiate 16/32 and 64 for call + dw 0x0059 ; call + db 0xFF ; r/m <- + db 0x02 ; reg bits + + ; TODO deal with optional parameter + dw 0x005A ; ret + db 0xC3 ; opcode + db 0x00 ; reserved + + dw 0x005B ; cmp + db 0x39 ; r/m <- reg + db 0x3B ; reg <- r/m .by_id_end: msg_welcome db "Welcome to Twasm", 0x0A, 0x00