add more stuff to the lookup tables
This commit is contained in:
163
twasm/README.md
163
twasm/README.md
@@ -210,11 +210,32 @@ type metadata for the different types is as follows:
|
||||
entries are as follows:
|
||||
|
||||
```
|
||||
+-----------------+-----------------+----------+
|
||||
| 31 24 | 23 16 | 15 0 |
|
||||
+-----------------+-----------------+----------+
|
||||
| dest=reg opcode | dest=r/m opcode | token ID |
|
||||
+-----------------+-----------------+----------+
|
||||
+------------------------------+
|
||||
| 0 operand operators |
|
||||
+----------+--------+----------+
|
||||
| 31 24 | 23 16 | 15 0 |
|
||||
+----------+--------+----------+
|
||||
| reserved | opcode | token ID |
|
||||
+----------+--------+----------+
|
||||
|
||||
+-------+----------+---------+----------+
|
||||
| 1 operand operators |
|
||||
+-------+----------+---------+----------+
|
||||
| 31 27 | 26 24 | 23 16 | 15 0 |
|
||||
+-------+----------+---------+----------+
|
||||
| zeros | reg bits | opcode | token ID |
|
||||
| | | dst=r/m | |
|
||||
+-------+----------+---------+----------+
|
||||
|
||||
+------------------------------+
|
||||
| 2 operand operators |
|
||||
+---------+---------+----------+
|
||||
| 31 24 | 23 16 | 15 0 |
|
||||
+---------+---------+----------+
|
||||
| opcode | opcode | token ID |
|
||||
| dst=reg | dst=r/m | |
|
||||
| src=r/m | src=reg | |
|
||||
+---------+---------+----------+
|
||||
```
|
||||
|
||||
note the lack of support for multiple-byte opcodes or multiple opcodes for one token ID; these features will likely be added at some point after the parser accumulates too much jank.
|
||||
@@ -233,14 +254,14 @@ supported tokens are listed below
|
||||
| rdi | 0x0005 | |
|
||||
| rsp | 0x0006 | |
|
||||
| rbp | 0x0007 | |
|
||||
| r8 | 0x0008 | |
|
||||
| r9 | 0x0009 | |
|
||||
| r10 | 0x000A | |
|
||||
| r11 | 0x000B | |
|
||||
| r12 | 0x000C | |
|
||||
| r13 | 0x000D | |
|
||||
| r14 | 0x000E | |
|
||||
| r15 | 0x000F | |
|
||||
| r8 | 0x0008 | unimplemented |
|
||||
| r9 | 0x0009 | unimplemented |
|
||||
| r10 | 0x000A | unimplemented |
|
||||
| r11 | 0x000B | unimplemented |
|
||||
| r12 | 0x000C | unimplemented |
|
||||
| r13 | 0x000D | unimplemented |
|
||||
| r14 | 0x000E | unimplemented |
|
||||
| r15 | 0x000F | unimplemented |
|
||||
| eax | 0x0010 | |
|
||||
| ebx | 0x0011 | |
|
||||
| ecx | 0x0012 | |
|
||||
@@ -249,61 +270,61 @@ supported tokens are listed below
|
||||
| edi | 0x0015 | |
|
||||
| esp | 0x0016 | |
|
||||
| ebp | 0x0017 | |
|
||||
| r8d | 0x0018 | |
|
||||
| r9d | 0x0019 | |
|
||||
| r10d | 0x001A | |
|
||||
| r11d | 0x001B | |
|
||||
| r12d | 0x001C | |
|
||||
| r13d | 0x001D | |
|
||||
| r14d | 0x001E | |
|
||||
| r15d | 0x001F | |
|
||||
| ax | 0x0020 | |
|
||||
| bx | 0x0021 | |
|
||||
| cx | 0x0022 | |
|
||||
| dx | 0x0023 | |
|
||||
| si | 0x0024 | |
|
||||
| di | 0x0025 | |
|
||||
| sp | 0x0026 | |
|
||||
| bp | 0x0027 | |
|
||||
| r8w | 0x0028 | |
|
||||
| r9w | 0x0029 | |
|
||||
| r10w | 0x002A | |
|
||||
| r11w | 0x002B | |
|
||||
| r12w | 0x002C | |
|
||||
| r13w | 0x002D | |
|
||||
| r14w | 0x002E | |
|
||||
| r15w | 0x002F | |
|
||||
| al | 0x0030 | |
|
||||
| bl | 0x0031 | |
|
||||
| cl | 0x0032 | |
|
||||
| dl | 0x0033 | |
|
||||
| sil | 0x0034 | |
|
||||
| dil | 0x0035 | |
|
||||
| spl | 0x0036 | |
|
||||
| bpl | 0x0037 | |
|
||||
| r8b | 0x0038 | |
|
||||
| r9b | 0x0039 | |
|
||||
| r10b | 0x003A | |
|
||||
| r11b | 0x003B | |
|
||||
| r12b | 0x003C | |
|
||||
| r13b | 0x003D | |
|
||||
| r14b | 0x003E | |
|
||||
| r15b | 0x003F | |
|
||||
| ah | 0x0040 | |
|
||||
| bh | 0x0041 | |
|
||||
| ch | 0x0042 | |
|
||||
| dh | 0x0043 | |
|
||||
| cs | 0x0044 | |
|
||||
| ds | 0x0045 | |
|
||||
| es | 0x0046 | |
|
||||
| fs | 0x0047 | |
|
||||
| gs | 0x0048 | |
|
||||
| ss | 0x0049 | |
|
||||
| cr0 | 0x004A | |
|
||||
| cr2 | 0x004B | |
|
||||
| cr3 | 0x004C | |
|
||||
| cr4 | 0x004D | |
|
||||
| cr8 | 0x004E | |
|
||||
| r8d | 0x0018 | unimplemented |
|
||||
| r9d | 0x0019 | unimplemented |
|
||||
| r10d | 0x001A | unimplemented |
|
||||
| r11d | 0x001B | unimplemented |
|
||||
| r12d | 0x001C | unimplemented |
|
||||
| r13d | 0x001D | unimplemented |
|
||||
| r14d | 0x001E | unimplemented |
|
||||
| r15d | 0x001F | unimplemented |
|
||||
| ax | 0x0020 | unimplemented |
|
||||
| bx | 0x0021 | unimplemented |
|
||||
| cx | 0x0022 | unimplemented |
|
||||
| dx | 0x0023 | unimplemented |
|
||||
| si | 0x0024 | unimplemented |
|
||||
| di | 0x0025 | unimplemented |
|
||||
| sp | 0x0026 | unimplemented |
|
||||
| bp | 0x0027 | unimplemented |
|
||||
| r8w | 0x0028 | unimplemented |
|
||||
| r9w | 0x0029 | unimplemented |
|
||||
| r10w | 0x002A | unimplemented |
|
||||
| r11w | 0x002B | unimplemented |
|
||||
| r12w | 0x002C | unimplemented |
|
||||
| r13w | 0x002D | unimplemented |
|
||||
| r14w | 0x002E | unimplemented |
|
||||
| r15w | 0x002F | unimplemented |
|
||||
| al | 0x0030 | unimplemented |
|
||||
| bl | 0x0031 | unimplemented |
|
||||
| cl | 0x0032 | unimplemented |
|
||||
| dl | 0x0033 | unimplemented |
|
||||
| sil | 0x0034 | unimplemented |
|
||||
| dil | 0x0035 | unimplemented |
|
||||
| spl | 0x0036 | unimplemented |
|
||||
| bpl | 0x0037 | unimplemented |
|
||||
| r8b | 0x0038 | unimplemented |
|
||||
| r9b | 0x0039 | unimplemented |
|
||||
| r10b | 0x003A | unimplemented |
|
||||
| r11b | 0x003B | unimplemented |
|
||||
| r12b | 0x003C | unimplemented |
|
||||
| r13b | 0x003D | unimplemented |
|
||||
| r14b | 0x003E | unimplemented |
|
||||
| r15b | 0x003F | unimplemented |
|
||||
| ah | 0x0040 | unimplemented |
|
||||
| bh | 0x0041 | unimplemented |
|
||||
| ch | 0x0042 | unimplemented |
|
||||
| dh | 0x0043 | unimplemented |
|
||||
| cs | 0x0044 | unimplemented |
|
||||
| ds | 0x0045 | unimplemented |
|
||||
| es | 0x0046 | unimplemented |
|
||||
| fs | 0x0047 | unimplemented |
|
||||
| gs | 0x0048 | unimplemented |
|
||||
| ss | 0x0049 | unimplemented |
|
||||
| cr0 | 0x004A | unimplemented |
|
||||
| cr2 | 0x004B | unimplemented |
|
||||
| cr3 | 0x004C | unimplemented |
|
||||
| cr4 | 0x004D | unimplemented |
|
||||
| cr8 | 0x004E | unimplemented |
|
||||
| hlt | 0x004F | |
|
||||
| int3 | 0x0050 | |
|
||||
| | 0x0051 | deprecated; formerly `[`. Now `0x10XX` is used. |
|
||||
@@ -317,12 +338,6 @@ supported tokens are listed below
|
||||
| call | 0x0059 | |
|
||||
| ret | 0x005A | |
|
||||
| cmp | 0x005B | |
|
||||
| je | 0x005C | |
|
||||
| jne | 0x005D | |
|
||||
| jge | 0x005E | |
|
||||
| jg | 0x005F | |
|
||||
| jle | 0x0060 | |
|
||||
| jl | 0x0061 | |
|
||||
| | 0x10XX | some memory address; `XX` is as specified below |
|
||||
| | 0xFFFF | unrecognised token |
|
||||
|
||||
|
||||
@@ -1560,21 +1560,94 @@ clear_output_arena:
|
||||
|
||||
tokens:
|
||||
.by_id:
|
||||
dw 0x0010 ; eax
|
||||
db 0x02 ; type: register
|
||||
db 00000010b ; reg: 000b
|
||||
; width: 10b (32 bits)
|
||||
|
||||
dw 0x0000 ; rax
|
||||
db 0x02 ; type: register
|
||||
db 00000011b ; reg: 000b
|
||||
; width: 11b (64 bits)
|
||||
|
||||
dw 0x0001 ; rbx
|
||||
db 0x02 ; type: register
|
||||
db 00001111b ; reg: 011b
|
||||
; width: 11b (64 bits)
|
||||
|
||||
dw 0x0002 ; rcx
|
||||
db 0x02 ; type: register
|
||||
db 00000111b ; reg: 001b
|
||||
; width: 11b (64 bits)
|
||||
|
||||
dw 0x0003 ; rdx
|
||||
db 0x02 ; type: register
|
||||
db 00001011b ; reg: 010b
|
||||
; width: 11b (64 bits)
|
||||
|
||||
dw 0x0004 ; rsi
|
||||
db 0x02 ; type: register
|
||||
db 00011011b ; reg: 110b
|
||||
; width: 11b (64 bits)
|
||||
|
||||
dw 0x0005 ; rdi
|
||||
db 0x02 ; type: register
|
||||
db 00011111b ; reg: 111b
|
||||
; width: 11b (64 bits)
|
||||
|
||||
dw 0x0006 ; rsp
|
||||
db 0x02 ; type: register
|
||||
db 00010011b ; reg: 100b
|
||||
; width: 11b (64 bits)
|
||||
|
||||
dw 0x0007 ; rbp
|
||||
db 0x02 ; type: register
|
||||
db 00010111b ; reg: 101b
|
||||
; width: 11b (64 bits)
|
||||
|
||||
dw 0x0010 ; eax
|
||||
db 0x02 ; type: register
|
||||
db 00000010b ; reg: 000b
|
||||
; width: 10b (32 bits)
|
||||
|
||||
dw 0x0011 ; ebx
|
||||
db 0x02 ; type: register
|
||||
db 00001110b ; reg: 011b
|
||||
; width: 10b (32 bits)
|
||||
|
||||
dw 0x0012 ; ecx
|
||||
db 0x02 ; type: register
|
||||
db 00000110b ; reg: 001b
|
||||
; width: 10b (32 bits)
|
||||
|
||||
dw 0x0013 ; edx
|
||||
db 0x02 ; type: register
|
||||
db 00001010b ; reg: 010b
|
||||
; width: 10b (32 bits)
|
||||
|
||||
dw 0x0014 ; esi
|
||||
db 0x02 ; type: register
|
||||
db 00011010b ; reg: 110b
|
||||
; width: 10b (32 bits)
|
||||
|
||||
dw 0x0015 ; edi
|
||||
db 0x02 ; type: register
|
||||
db 00011110b ; reg: 111b
|
||||
; width: 10b (32 bits)
|
||||
|
||||
dw 0x0016 ; esp
|
||||
db 0x02 ; type: register
|
||||
db 00010010b ; reg: 100b
|
||||
; width: 10b (32 bits)
|
||||
|
||||
dw 0x0017 ; ebp
|
||||
db 0x02 ; type: register
|
||||
db 00010110b ; reg: 101b
|
||||
; width: 10b (32 bits)
|
||||
|
||||
dw 0x004F ; hlt
|
||||
db 0x01 ; type: operator
|
||||
db 0x00 ; # operands
|
||||
|
||||
dw 0x0050 ; int3
|
||||
db 0x01 ; type: operator
|
||||
db 0x00 ; # operands
|
||||
|
||||
dw 0x0053 ; xor
|
||||
db 0x01 ; type: operator
|
||||
db 0x02 ; # operands
|
||||
@@ -1583,23 +1656,39 @@ tokens:
|
||||
db 0x01 ; type: operator
|
||||
db 0x01 ; # operands
|
||||
|
||||
dw 0x0055 ; dec
|
||||
db 0x01 ; type: operator
|
||||
db 0x01 ; # operands
|
||||
|
||||
dw 0x0056 ; mov
|
||||
db 0x01 ; type: operator
|
||||
db 0x02 ; # operands
|
||||
|
||||
dw 0x004F ; hlt
|
||||
dw 0x0057 ; add
|
||||
db 0x01 ; type: operator
|
||||
db 0x02 ; # operands
|
||||
|
||||
dw 0x0058 ; sub
|
||||
db 0x01 ; type: operator
|
||||
db 0x02 ; # operands
|
||||
|
||||
dw 0x0059 ; call
|
||||
db 0x01 ; type: operator
|
||||
db 0x01 ; # operands
|
||||
|
||||
dw 0x005A ; ret
|
||||
db 0x01 ; type: operator
|
||||
db 0x00 ; # operands
|
||||
|
||||
dw 0x005B ; cmp
|
||||
db 0x01 ; type: operator
|
||||
db 0x02 ; # operands
|
||||
.by_id_end:
|
||||
.operators:
|
||||
dd "je"
|
||||
dw 0x005C
|
||||
dd "jg"
|
||||
dw 0x005F
|
||||
dd "jl"
|
||||
dw 0x0061
|
||||
dd "hlt"
|
||||
dw 0x004F
|
||||
dd "int3"
|
||||
dw 0x0050
|
||||
dd "xor"
|
||||
dw 0x0053
|
||||
dd "inc"
|
||||
@@ -1612,20 +1701,12 @@ tokens:
|
||||
dw 0x0057
|
||||
dd "sub"
|
||||
dw 0x0058
|
||||
dd "call"
|
||||
dw 0x0059
|
||||
dd "ret"
|
||||
dw 0x005A
|
||||
dd "cmp"
|
||||
dw 0x005B
|
||||
dd "jne"
|
||||
dw 0x005D
|
||||
dd "jge"
|
||||
dw 0x005E
|
||||
dd "jle"
|
||||
dw 0x0060
|
||||
dd "int3"
|
||||
dw 0x0050
|
||||
dd "call"
|
||||
dw 0x0059
|
||||
.operators_end:
|
||||
.registers:
|
||||
dd "r8"
|
||||
@@ -1790,21 +1871,51 @@ tokens:
|
||||
|
||||
opcodes:
|
||||
.by_id:
|
||||
dw 0x004F ; hlt
|
||||
db 0xF4 ; .
|
||||
db 0x00 ;
|
||||
|
||||
dw 0x0050 ; int3
|
||||
db 0xCC ;
|
||||
db 0x00 ;
|
||||
|
||||
dw 0x0053 ; xor
|
||||
db 0x31 ; r/m <- reg
|
||||
db 0x33 ; reg <- r/m
|
||||
|
||||
dw 0x0054 ; inc
|
||||
db 0xFF ; r/m
|
||||
db 0x00 ;
|
||||
db 0xFF ; r/m <-
|
||||
db 0x00 ; reg bits
|
||||
|
||||
dw 0x0055 ; dec
|
||||
db 0xFF ; r/m <-
|
||||
db 0x01 ; reg bits
|
||||
|
||||
dw 0x0056 ; mov
|
||||
db 0x89 ; r/m <- reg
|
||||
db 0x8B ; reg <- r/m
|
||||
|
||||
dw 0x004F ; hlt
|
||||
db 0xF4 ; .
|
||||
db 0x00 ;
|
||||
dw 0x0057 ; add
|
||||
db 0x01 ; r/m <- reg
|
||||
db 0x03 ; reg <- r/m
|
||||
|
||||
dw 0x0058 ; sub
|
||||
db 0x29 ; r/m <- reg
|
||||
db 0x2B ; reg <- r/m
|
||||
|
||||
; TODO deal with rel values, differentiate 16/32 and 64 for call
|
||||
dw 0x0059 ; call
|
||||
db 0xFF ; r/m <-
|
||||
db 0x02 ; reg bits
|
||||
|
||||
; TODO deal with optional parameter
|
||||
dw 0x005A ; ret
|
||||
db 0xC3 ; opcode
|
||||
db 0x00 ; reserved
|
||||
|
||||
dw 0x005B ; cmp
|
||||
db 0x39 ; r/m <- reg
|
||||
db 0x3B ; reg <- r/m
|
||||
.by_id_end:
|
||||
|
||||
msg_welcome db "Welcome to Twasm", 0x0A, 0x00
|
||||
|
||||
Reference in New Issue
Block a user