add more stuff to the lookup tables

This commit is contained in:
andromeda
2026-03-24 11:16:39 +01:00
parent 26b6b44caf
commit 74fc57cdfc
2 changed files with 227 additions and 101 deletions

View File

@@ -210,11 +210,32 @@ type metadata for the different types is as follows:
entries are as follows:
```
+-----------------+-----------------+----------+
| 31 24 | 23 16 | 15 0 |
+-----------------+-----------------+----------+
| dest=reg opcode | dest=r/m opcode | token ID |
+-----------------+-----------------+----------+
+------------------------------+
| 0 operand operators |
+----------+--------+----------+
| 31 24 | 23 16 | 15 0 |
+----------+--------+----------+
| reserved | opcode | token ID |
+----------+--------+----------+
+-------+----------+---------+----------+
| 1 operand operators |
+-------+----------+---------+----------+
| 31 27 | 26 24 | 23 16 | 15 0 |
+-------+----------+---------+----------+
| zeros | reg bits | opcode | token ID |
| | | dst=r/m | |
+-------+----------+---------+----------+
+------------------------------+
| 2 operand operators |
+---------+---------+----------+
| 31 24 | 23 16 | 15 0 |
+---------+---------+----------+
| opcode | opcode | token ID |
| dst=reg | dst=r/m | |
| src=r/m | src=reg | |
+---------+---------+----------+
```
note the lack of support for multiple-byte opcodes or multiple opcodes for one token ID; these features will likely be added at some point after the parser accumulates too much jank.
@@ -233,14 +254,14 @@ supported tokens are listed below
| rdi | 0x0005 | |
| rsp | 0x0006 | |
| rbp | 0x0007 | |
| r8 | 0x0008 | |
| r9 | 0x0009 | |
| r10 | 0x000A | |
| r11 | 0x000B | |
| r12 | 0x000C | |
| r13 | 0x000D | |
| r14 | 0x000E | |
| r15 | 0x000F | |
| r8 | 0x0008 | unimplemented |
| r9 | 0x0009 | unimplemented |
| r10 | 0x000A | unimplemented |
| r11 | 0x000B | unimplemented |
| r12 | 0x000C | unimplemented |
| r13 | 0x000D | unimplemented |
| r14 | 0x000E | unimplemented |
| r15 | 0x000F | unimplemented |
| eax | 0x0010 | |
| ebx | 0x0011 | |
| ecx | 0x0012 | |
@@ -249,61 +270,61 @@ supported tokens are listed below
| edi | 0x0015 | |
| esp | 0x0016 | |
| ebp | 0x0017 | |
| r8d | 0x0018 | |
| r9d | 0x0019 | |
| r10d | 0x001A | |
| r11d | 0x001B | |
| r12d | 0x001C | |
| r13d | 0x001D | |
| r14d | 0x001E | |
| r15d | 0x001F | |
| ax | 0x0020 | |
| bx | 0x0021 | |
| cx | 0x0022 | |
| dx | 0x0023 | |
| si | 0x0024 | |
| di | 0x0025 | |
| sp | 0x0026 | |
| bp | 0x0027 | |
| r8w | 0x0028 | |
| r9w | 0x0029 | |
| r10w | 0x002A | |
| r11w | 0x002B | |
| r12w | 0x002C | |
| r13w | 0x002D | |
| r14w | 0x002E | |
| r15w | 0x002F | |
| al | 0x0030 | |
| bl | 0x0031 | |
| cl | 0x0032 | |
| dl | 0x0033 | |
| sil | 0x0034 | |
| dil | 0x0035 | |
| spl | 0x0036 | |
| bpl | 0x0037 | |
| r8b | 0x0038 | |
| r9b | 0x0039 | |
| r10b | 0x003A | |
| r11b | 0x003B | |
| r12b | 0x003C | |
| r13b | 0x003D | |
| r14b | 0x003E | |
| r15b | 0x003F | |
| ah | 0x0040 | |
| bh | 0x0041 | |
| ch | 0x0042 | |
| dh | 0x0043 | |
| cs | 0x0044 | |
| ds | 0x0045 | |
| es | 0x0046 | |
| fs | 0x0047 | |
| gs | 0x0048 | |
| ss | 0x0049 | |
| cr0 | 0x004A | |
| cr2 | 0x004B | |
| cr3 | 0x004C | |
| cr4 | 0x004D | |
| cr8 | 0x004E | |
| r8d | 0x0018 | unimplemented |
| r9d | 0x0019 | unimplemented |
| r10d | 0x001A | unimplemented |
| r11d | 0x001B | unimplemented |
| r12d | 0x001C | unimplemented |
| r13d | 0x001D | unimplemented |
| r14d | 0x001E | unimplemented |
| r15d | 0x001F | unimplemented |
| ax | 0x0020 | unimplemented |
| bx | 0x0021 | unimplemented |
| cx | 0x0022 | unimplemented |
| dx | 0x0023 | unimplemented |
| si | 0x0024 | unimplemented |
| di | 0x0025 | unimplemented |
| sp | 0x0026 | unimplemented |
| bp | 0x0027 | unimplemented |
| r8w | 0x0028 | unimplemented |
| r9w | 0x0029 | unimplemented |
| r10w | 0x002A | unimplemented |
| r11w | 0x002B | unimplemented |
| r12w | 0x002C | unimplemented |
| r13w | 0x002D | unimplemented |
| r14w | 0x002E | unimplemented |
| r15w | 0x002F | unimplemented |
| al | 0x0030 | unimplemented |
| bl | 0x0031 | unimplemented |
| cl | 0x0032 | unimplemented |
| dl | 0x0033 | unimplemented |
| sil | 0x0034 | unimplemented |
| dil | 0x0035 | unimplemented |
| spl | 0x0036 | unimplemented |
| bpl | 0x0037 | unimplemented |
| r8b | 0x0038 | unimplemented |
| r9b | 0x0039 | unimplemented |
| r10b | 0x003A | unimplemented |
| r11b | 0x003B | unimplemented |
| r12b | 0x003C | unimplemented |
| r13b | 0x003D | unimplemented |
| r14b | 0x003E | unimplemented |
| r15b | 0x003F | unimplemented |
| ah | 0x0040 | unimplemented |
| bh | 0x0041 | unimplemented |
| ch | 0x0042 | unimplemented |
| dh | 0x0043 | unimplemented |
| cs | 0x0044 | unimplemented |
| ds | 0x0045 | unimplemented |
| es | 0x0046 | unimplemented |
| fs | 0x0047 | unimplemented |
| gs | 0x0048 | unimplemented |
| ss | 0x0049 | unimplemented |
| cr0 | 0x004A | unimplemented |
| cr2 | 0x004B | unimplemented |
| cr3 | 0x004C | unimplemented |
| cr4 | 0x004D | unimplemented |
| cr8 | 0x004E | unimplemented |
| hlt | 0x004F | |
| int3 | 0x0050 | |
| | 0x0051 | deprecated; formerly `[`. Now `0x10XX` is used. |
@@ -317,12 +338,6 @@ supported tokens are listed below
| call | 0x0059 | |
| ret | 0x005A | |
| cmp | 0x005B | |
| je | 0x005C | |
| jne | 0x005D | |
| jge | 0x005E | |
| jg | 0x005F | |
| jle | 0x0060 | |
| jl | 0x0061 | |
| | 0x10XX | some memory address; `XX` is as specified below |
| | 0xFFFF | unrecognised token |

View File

@@ -1560,21 +1560,94 @@ clear_output_arena:
tokens:
.by_id:
dw 0x0010 ; eax
db 0x02 ; type: register
db 00000010b ; reg: 000b
; width: 10b (32 bits)
dw 0x0000 ; rax
db 0x02 ; type: register
db 00000011b ; reg: 000b
; width: 11b (64 bits)
dw 0x0001 ; rbx
db 0x02 ; type: register
db 00001111b ; reg: 011b
; width: 11b (64 bits)
dw 0x0002 ; rcx
db 0x02 ; type: register
db 00000111b ; reg: 001b
; width: 11b (64 bits)
dw 0x0003 ; rdx
db 0x02 ; type: register
db 00001011b ; reg: 010b
; width: 11b (64 bits)
dw 0x0004 ; rsi
db 0x02 ; type: register
db 00011011b ; reg: 110b
; width: 11b (64 bits)
dw 0x0005 ; rdi
db 0x02 ; type: register
db 00011111b ; reg: 111b
; width: 11b (64 bits)
dw 0x0006 ; rsp
db 0x02 ; type: register
db 00010011b ; reg: 100b
; width: 11b (64 bits)
dw 0x0007 ; rbp
db 0x02 ; type: register
db 00010111b ; reg: 101b
; width: 11b (64 bits)
dw 0x0010 ; eax
db 0x02 ; type: register
db 00000010b ; reg: 000b
; width: 10b (32 bits)
dw 0x0011 ; ebx
db 0x02 ; type: register
db 00001110b ; reg: 011b
; width: 10b (32 bits)
dw 0x0012 ; ecx
db 0x02 ; type: register
db 00000110b ; reg: 001b
; width: 10b (32 bits)
dw 0x0013 ; edx
db 0x02 ; type: register
db 00001010b ; reg: 010b
; width: 10b (32 bits)
dw 0x0014 ; esi
db 0x02 ; type: register
db 00011010b ; reg: 110b
; width: 10b (32 bits)
dw 0x0015 ; edi
db 0x02 ; type: register
db 00011110b ; reg: 111b
; width: 10b (32 bits)
dw 0x0016 ; esp
db 0x02 ; type: register
db 00010010b ; reg: 100b
; width: 10b (32 bits)
dw 0x0017 ; ebp
db 0x02 ; type: register
db 00010110b ; reg: 101b
; width: 10b (32 bits)
dw 0x004F ; hlt
db 0x01 ; type: operator
db 0x00 ; # operands
dw 0x0050 ; int3
db 0x01 ; type: operator
db 0x00 ; # operands
dw 0x0053 ; xor
db 0x01 ; type: operator
db 0x02 ; # operands
@@ -1583,23 +1656,39 @@ tokens:
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x0055 ; dec
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x0056 ; mov
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x004F ; hlt
dw 0x0057 ; add
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0058 ; sub
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0059 ; call
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x005A ; ret
db 0x01 ; type: operator
db 0x00 ; # operands
dw 0x005B ; cmp
db 0x01 ; type: operator
db 0x02 ; # operands
.by_id_end:
.operators:
dd "je"
dw 0x005C
dd "jg"
dw 0x005F
dd "jl"
dw 0x0061
dd "hlt"
dw 0x004F
dd "int3"
dw 0x0050
dd "xor"
dw 0x0053
dd "inc"
@@ -1612,20 +1701,12 @@ tokens:
dw 0x0057
dd "sub"
dw 0x0058
dd "call"
dw 0x0059
dd "ret"
dw 0x005A
dd "cmp"
dw 0x005B
dd "jne"
dw 0x005D
dd "jge"
dw 0x005E
dd "jle"
dw 0x0060
dd "int3"
dw 0x0050
dd "call"
dw 0x0059
.operators_end:
.registers:
dd "r8"
@@ -1790,21 +1871,51 @@ tokens:
opcodes:
.by_id:
dw 0x004F ; hlt
db 0xF4 ; .
db 0x00 ;
dw 0x0050 ; int3
db 0xCC ;
db 0x00 ;
dw 0x0053 ; xor
db 0x31 ; r/m <- reg
db 0x33 ; reg <- r/m
dw 0x0054 ; inc
db 0xFF ; r/m
db 0x00 ;
db 0xFF ; r/m <-
db 0x00 ; reg bits
dw 0x0055 ; dec
db 0xFF ; r/m <-
db 0x01 ; reg bits
dw 0x0056 ; mov
db 0x89 ; r/m <- reg
db 0x8B ; reg <- r/m
dw 0x004F ; hlt
db 0xF4 ; .
db 0x00 ;
dw 0x0057 ; add
db 0x01 ; r/m <- reg
db 0x03 ; reg <- r/m
dw 0x0058 ; sub
db 0x29 ; r/m <- reg
db 0x2B ; reg <- r/m
; TODO deal with rel values, differentiate 16/32 and 64 for call
dw 0x0059 ; call
db 0xFF ; r/m <-
db 0x02 ; reg bits
; TODO deal with optional parameter
dw 0x005A ; ret
db 0xC3 ; opcode
db 0x00 ; reserved
dw 0x005B ; cmp
db 0x39 ; r/m <- reg
db 0x3B ; reg <- r/m
.by_id_end:
msg_welcome db "Welcome to Twasm", 0x0A, 0x00