improve data structures
This commit is contained in:
@@ -212,33 +212,83 @@ entries are as follows:
|
||||
```
|
||||
+------------------------------+
|
||||
| 0 operand operators |
|
||||
+------------------------------+
|
||||
| 127 96 |
|
||||
+------------------------------+
|
||||
| reserved |
|
||||
+------------------------------+
|
||||
| 95 64 |
|
||||
+------------------------------+
|
||||
| reserved |
|
||||
+------------------------------+
|
||||
| 63 32 |
|
||||
+------------------------------+
|
||||
| reserved |
|
||||
+----------+--------+----------+
|
||||
| 31 24 | 23 16 | 15 0 |
|
||||
+----------+--------+----------+
|
||||
| reserved | opcode | token ID |
|
||||
+----------+--------+----------+
|
||||
|
||||
+-------+----------+---------+----------+
|
||||
| 1 operand operators |
|
||||
+-------+----------+---------+----------+
|
||||
| 31 27 | 26 24 | 23 16 | 15 0 |
|
||||
+-------+----------+---------+----------+
|
||||
| zeros | reg bits | opcode | token ID |
|
||||
| | | dst=r/m | |
|
||||
+-------+----------+---------+----------+
|
||||
+-------------------------------------------------------------+
|
||||
| 1 operand operators |
|
||||
+-------------------------------------------------------------+
|
||||
| 127 96 |
|
||||
+-------------------------------------------------------------+
|
||||
| reserved |
|
||||
+----------+-------+-------+-------+-------+----------+-------+
|
||||
| 95 88 | 87 84 | 83 80 | 79 76 | 75 72 | 71 68 | 67 64 |
|
||||
+----------+-------+-------+-------+-------+----------+-------+
|
||||
| reserved | op5&8 | op4&8 | op3&8 | op2&8 | reserved | op0&8 |
|
||||
+----------+-------+-------+-------+-------+----------+-------+
|
||||
| 63 56 | 55 48 | 47 40 | 39 32 |
|
||||
+----------+---------------+---------------+------------------+
|
||||
| opcode | opcode | opcode | opcode |
|
||||
| dst=rel8 | dest=rel | dst=imm8 | dst=imm |
|
||||
+----------+---------------+---------------+------------------+
|
||||
| 31 24 | 23 16 | 15 0 |
|
||||
+----------+---------------+----------------------------------+
|
||||
| reserved | opcode | token ID |
|
||||
| | dst=r/m | |
|
||||
+----------+---------------+----------------------------------+
|
||||
|
||||
+------------------------------+
|
||||
| 2 operand operators |
|
||||
+---------+---------+----------+
|
||||
| 31 24 | 23 16 | 15 0 |
|
||||
+---------+---------+----------+
|
||||
| opcode | opcode | token ID |
|
||||
| dst=reg | dst=r/m | |
|
||||
| src=r/m | src=reg | |
|
||||
+---------+---------+----------+
|
||||
+----------------------------------------------+
|
||||
| 2 operand operators |
|
||||
+----------------------------------------------+
|
||||
| 127 96 |
|
||||
+----------------------------------------------+
|
||||
| reserved |
|
||||
+-------------------+-------+-------+----------+
|
||||
| 95 80 | 79 76 | 75 72 | 71 64 |
|
||||
+-------------------+-------+-------+----------+
|
||||
| reserved | op3&8 | op2&8 | reserved |
|
||||
+-------------------+-------+-------+----------+
|
||||
| 63 48 | 47 40 | 39 32 |
|
||||
+-------------------+---------------+----------+
|
||||
| reserved | opcode | opcode |
|
||||
| | dst=r/m | dst=r/m |
|
||||
| | src=imm8 | src=imm |
|
||||
+---------+---------+---------------+----------+
|
||||
| 31 24 | 23 16 | 15 0 |
|
||||
+---------+---------+--------------------------+
|
||||
| opcode | opcode | token ID |
|
||||
| dst=r | dst=r/m | |
|
||||
| src=r/m | src=r | |
|
||||
+---------+---------+--------------------------+
|
||||
|
||||
; key:
|
||||
r/m ; r/m 16/32/64
|
||||
r ; r 16/32/64
|
||||
imm ; imm 16/32
|
||||
imm8 ; imm 8
|
||||
rel ; rel 16/32
|
||||
rel8 ; rel 8
|
||||
|
||||
opX&8 ; low 8 bits are the operator flag that goes with opcode at offset X from
|
||||
; the first opcode in the table entry
|
||||
```
|
||||
|
||||
note the lack of support for multiple-byte opcodes or multiple opcodes for one token ID; these features will likely be added at some point after the parser accumulates too much jank.
|
||||
note much room to expand. If an opcode doesn't exist, it should be 0x00
|
||||
|
||||
### token IDs
|
||||
|
||||
@@ -339,6 +389,7 @@ supported tokens are listed below
|
||||
| ret | 0x005A | |
|
||||
| cmp | 0x005B | |
|
||||
| | 0x10XX | some memory address; `XX` is as specified below |
|
||||
| | 0xFEXX | used to pass some raw value `XX` in place of a token id |
|
||||
| | 0xFFFF | unrecognised token |
|
||||
|
||||
values of `XX` in `0x10XX`:
|
||||
|
||||
@@ -129,6 +129,7 @@ assemble:
|
||||
mov sil, 0b ; standard opcode
|
||||
call get_opcode
|
||||
; al = opcode
|
||||
; dl = op flag (none)
|
||||
call .output_byte
|
||||
pop rsi
|
||||
pop rdi
|
||||
@@ -153,7 +154,10 @@ assemble:
|
||||
mov sil, 0b ; dst=r/m
|
||||
call get_opcode
|
||||
; al = opcode
|
||||
; dl = op flag
|
||||
push rdx
|
||||
call .output_byte
|
||||
pop rdx ; dl = op flag
|
||||
pop rsi
|
||||
pop rdi ; di = tte of operator
|
||||
|
||||
@@ -173,9 +177,11 @@ assemble:
|
||||
pop rdi ; di = next tte
|
||||
je .operator_1_memory_access
|
||||
|
||||
push rdx
|
||||
; di = next tte
|
||||
call get_tte_type
|
||||
; al = type of token
|
||||
pop rdx ; dl = op flag
|
||||
|
||||
cmp al, 0x02 ; type: register
|
||||
je .operator_1_register
|
||||
@@ -202,8 +208,10 @@ assemble:
|
||||
pop rsi
|
||||
|
||||
mov si, di ; si = `R/M` tte
|
||||
mov di, 0x0000 ; di = `reg` tte
|
||||
mov dl, 11b ; dl bits
|
||||
and edx, 0xFF
|
||||
or dx, 0xFE00 ; pass di as direct value
|
||||
mov di, dx ; di = op flag
|
||||
mov dl, 11b ; dl = mod bits
|
||||
call get_ModRM
|
||||
; al = Mod R/M byte
|
||||
call .output_byte
|
||||
@@ -270,6 +278,8 @@ assemble:
|
||||
mov sil, 0 ; dst = r/m
|
||||
call get_opcode
|
||||
; al = opcode
|
||||
; dl = op flag
|
||||
; TODO act accordingly if the op flag is present
|
||||
call .output_byte
|
||||
pop rdi
|
||||
|
||||
@@ -346,6 +356,8 @@ assemble:
|
||||
mov sil, 1 ; dst = reg
|
||||
call get_opcode
|
||||
; al = opcode
|
||||
; dl = op flag
|
||||
; TODO do something if the op flag is present
|
||||
call .output_byte
|
||||
pop rdi
|
||||
|
||||
@@ -561,7 +573,7 @@ get_tte_typed_metadata:
|
||||
; given 2 register tokens and the mod bits, returns the ModR/M byte
|
||||
;
|
||||
; parameters:
|
||||
; di = token table entry `reg`
|
||||
; di = token table entry `reg`. 0xFEXX passes low 3 bytes as op flag
|
||||
; si = token table entry `R/M`
|
||||
; dl = lower 2 bits: mod bits. The rest is ignored
|
||||
;
|
||||
@@ -570,14 +582,40 @@ get_tte_typed_metadata:
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
get_ModRM:
|
||||
push rbx
|
||||
|
||||
and dl, 11b ; mask for mod bits
|
||||
shl dl, 6
|
||||
|
||||
push rdi
|
||||
shr di, 8
|
||||
cmp dil, 0xFE
|
||||
pop rdi
|
||||
je .pass_di_as_op_flag
|
||||
|
||||
; di = tte
|
||||
call get_reg_bits
|
||||
; al = reg bits
|
||||
mov bl, al
|
||||
shl bl, 3
|
||||
|
||||
push rsi
|
||||
mov rsi, .msg_normal_ModRM
|
||||
call print.debug
|
||||
pop rsi
|
||||
|
||||
mov bl, al ; bl = reg bits
|
||||
jmp .continue
|
||||
|
||||
.pass_di_as_op_flag:
|
||||
push rsi
|
||||
mov rsi, .msg_op_flag
|
||||
call print.debug
|
||||
pop rsi
|
||||
|
||||
mov bl, dil ; bl = op flag
|
||||
and bl, 111b ; mask
|
||||
|
||||
.continue:
|
||||
shl bl, 3
|
||||
|
||||
mov rdi, rsi ; do the other one
|
||||
|
||||
@@ -592,8 +630,12 @@ get_ModRM:
|
||||
or al, bl ; reg bits
|
||||
or al, cl ; R/M bits
|
||||
and rax, 0xFF ; mask for byte
|
||||
pop rbx
|
||||
ret
|
||||
|
||||
.msg_op_flag db "get_ModRM op_flag", 0x0A, 0x00
|
||||
.msg_normal_ModRM db "get_ModRM normal_ModRM", 0x0A, 0x00
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; get_opcode
|
||||
;
|
||||
@@ -603,23 +645,30 @@ get_ModRM:
|
||||
;
|
||||
; parameters:
|
||||
; di = token table entry
|
||||
; sil = lower bit: 0: dst=r/m or only opcode
|
||||
; 1: dst=reg or 0x00
|
||||
; sil = offset within opcode entry. 0 is the first opcode, 1 the second, and so
|
||||
; on
|
||||
;
|
||||
; returned:
|
||||
; al = opcode; the rest of rax is zeroed
|
||||
; al = opcode; the rest of rax is zeroed.
|
||||
; dl = lower 3 bits: op flag, if applicable. The rest of rdx is zeroed.
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
get_opcode:
|
||||
and rdi, 0xFFFF
|
||||
and rsi, 1
|
||||
|
||||
add rsi, 2
|
||||
and rsi, 111b
|
||||
sub rsi, 2
|
||||
|
||||
xor eax, eax
|
||||
|
||||
.loop:
|
||||
cmp rax, (opcodes.by_id_end - opcodes.by_id) / 4 ; make sure it's still in range
|
||||
cmp rax, (opcodes.by_id_end - opcodes.by_id) / 16 ; make sure it's still in range
|
||||
jg .not_found
|
||||
|
||||
mov cx, [opcodes.by_id + rax * 4] ; next entry in opcodes.by_id
|
||||
shl rax, 4
|
||||
mov cx, [opcodes.by_id + rax] ; next entry in opcodes.by_id
|
||||
shr rax, 4
|
||||
|
||||
cmp cx, di
|
||||
je .found
|
||||
@@ -631,8 +680,24 @@ get_opcode:
|
||||
mov al, UNRECOGNISED_ID_OPCODE
|
||||
ret
|
||||
.found:
|
||||
mov al, [rsi + 2 + opcodes.by_id + rax * 4]
|
||||
shl rax, 4
|
||||
push rsi
|
||||
shr rsi, 1
|
||||
mov dl, [rsi + 8 + opcodes.by_id + rax]
|
||||
pop rsi
|
||||
|
||||
push rsi
|
||||
and rsi, 1
|
||||
cmp esi, 1 ; check if offset is odd
|
||||
pop rsi
|
||||
jne .found_continue
|
||||
|
||||
shr dl, 4 ; if so, actually 1 further on dl byte
|
||||
|
||||
.found_continue
|
||||
mov al, [rsi + 2 + opcodes.by_id + rax]
|
||||
and rax, 0xFF ; mask
|
||||
and rdx, 0x0F ; mask
|
||||
ret
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
@@ -1558,6 +1623,7 @@ clear_output_arena:
|
||||
; data
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
align 4
|
||||
tokens:
|
||||
.by_id:
|
||||
dw 0x0000 ; rax
|
||||
@@ -1869,53 +1935,148 @@ tokens:
|
||||
dw 0x003F
|
||||
.registers_end:
|
||||
|
||||
align 16
|
||||
opcodes:
|
||||
.by_id:
|
||||
dw 0x004F ; hlt
|
||||
db 0xF4 ; .
|
||||
db 0x00 ;
|
||||
; hlt
|
||||
dw 0x004F
|
||||
db 0xF4 ; opcode
|
||||
db 0x00 ; reserved
|
||||
dd 0x00000000
|
||||
dd 0x00000000
|
||||
dd 0x00000000
|
||||
|
||||
dw 0x0050 ; int3
|
||||
db 0xCC ;
|
||||
db 0x00 ;
|
||||
; int3
|
||||
dw 0x0050
|
||||
db 0xCC ; opcode
|
||||
db 0x00 ; reserved
|
||||
dd 0x00000000
|
||||
dd 0x00000000
|
||||
dd 0x00000000
|
||||
|
||||
dw 0x0053 ; xor
|
||||
db 0x31 ; r/m <- reg
|
||||
db 0x33 ; reg <- r/m
|
||||
; xor
|
||||
dw 0x0053
|
||||
db 0x31 ; r/m <- r
|
||||
db 0x33 ; r <- r/m
|
||||
|
||||
dw 0x0054 ; inc
|
||||
db 0xFF ; r/m <-
|
||||
db 0x00 ; reg bits
|
||||
db 0x81 ; r/m <- imm16/32
|
||||
db 0x83 ; r/m <- imm8
|
||||
dw 0x0000
|
||||
|
||||
dw 0x0055 ; dec
|
||||
db 0xFF ; r/m <-
|
||||
db 0x01 ; reg bits
|
||||
dd 0x00006600 ; 00:
|
||||
; 6: r/m <- imm16/32 op flag
|
||||
; 6: r/m <- imm8 op flag
|
||||
; 0x0000:
|
||||
|
||||
dw 0x0056 ; mov
|
||||
db 0x89 ; r/m <- reg
|
||||
db 0x8B ; reg <- r/m
|
||||
dd 0x00000000 ; reserved
|
||||
|
||||
dw 0x0057 ; add
|
||||
db 0x01 ; r/m <- reg
|
||||
db 0x03 ; reg <- r/m
|
||||
; inc
|
||||
dw 0x0054
|
||||
db 0xFF ; r/m
|
||||
db 0x00
|
||||
|
||||
dw 0x0058 ; sub
|
||||
db 0x29 ; r/m <- reg
|
||||
db 0x2B ; reg <- r/m
|
||||
dd 0x00000000
|
||||
|
||||
; TODO deal with rel values, differentiate 16/32 and 64 for call
|
||||
dw 0x0059 ; call
|
||||
db 0xFF ; r/m <-
|
||||
db 0x02 ; reg bits
|
||||
dd 0x00000000 ; 0: r/m op flag
|
||||
; 0000000:
|
||||
|
||||
; TODO deal with optional parameter
|
||||
dw 0x005A ; ret
|
||||
db 0xC3 ; opcode
|
||||
db 0x00 ; reserved
|
||||
dd 0x00000000
|
||||
|
||||
dw 0x005B ; cmp
|
||||
db 0x39 ; r/m <- reg
|
||||
db 0x3B ; reg <- r/m
|
||||
; dec
|
||||
dw 0x0055
|
||||
db 0xFF ; r/m
|
||||
db 0x00
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
dd 0x00000001 ; 1: r/m op flag
|
||||
; 0000000:
|
||||
dd 0x00000000
|
||||
|
||||
; mov
|
||||
dw 0x0056
|
||||
db 0x89 ; r/m <- r
|
||||
db 0x8B ; r <- r/m
|
||||
|
||||
db 0xC7 ; r/m <- imm16/32
|
||||
db 0x00
|
||||
dw 0x0000
|
||||
|
||||
dd 0x00000000 ; 00:
|
||||
; 0: r/m <- imm16/32 op flag
|
||||
; 00000:
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
; add
|
||||
dw 0x0057
|
||||
db 0x01 ; r/m <- r
|
||||
db 0x03 ; r <- r/m
|
||||
|
||||
db 0x81 ; r/m <- imm16/32
|
||||
db 0x83 ; r/m <- imm8
|
||||
dw 0x0000
|
||||
|
||||
dd 0x00000000 ; 00:
|
||||
; 0: r/m <- imm16/32 op flag
|
||||
; 0: r/m <- imm8 op flag
|
||||
; 0000:
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
; sub
|
||||
dw 0x0058
|
||||
db 0x29 ; r/m <- r
|
||||
db 0x2B ; r <- r/m
|
||||
|
||||
db 0x81 ; r/m <- imm16/32
|
||||
db 0x83 ; r/m <- imm8
|
||||
dw 0x0000
|
||||
|
||||
dd 0x00005500 ; 00:
|
||||
; 5: r/m <- imm16/32 op flag
|
||||
; 5: r/m <- imm8 op flag
|
||||
; 0000:
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
; call
|
||||
dw 0x0059
|
||||
db 0xFF ; r/m
|
||||
db 0x00
|
||||
|
||||
dw 0x0000
|
||||
db 0x00 ; rel16/32
|
||||
db 0x00
|
||||
|
||||
dd 0x00000002 ; 2: r/m op flag
|
||||
; 0000000:
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
; retn
|
||||
dw 0x005A
|
||||
db 0xC3 ; opcode
|
||||
db 0x00 ; reserved
|
||||
dd 0x00000000
|
||||
dd 0x00000000
|
||||
dd 0x00000000
|
||||
|
||||
; cmp
|
||||
dw 0x005B
|
||||
db 0x39 ; r/m <- r
|
||||
db 0x3B ; r <- r/m
|
||||
|
||||
db 0x81 ; r/m <- imm16/32
|
||||
db 0x83 ; r/m <- imm8
|
||||
dw 0x0000
|
||||
|
||||
dd 0x00007700 ; 00:
|
||||
; 7: r/m <- imm16/32 op flag
|
||||
; 7: r/m <- imm8 op flag
|
||||
; 0000:
|
||||
|
||||
dd 0x00000000
|
||||
.by_id_end:
|
||||
|
||||
msg_welcome db "Welcome to Twasm", 0x0A, 0x00
|
||||
@@ -1927,6 +2088,7 @@ whitespace_2 db " ", 0x0D
|
||||
program:
|
||||
db "xor eax, eax", 0x0A
|
||||
db "inc rax ; inline comment", 0x0A
|
||||
db "dec rax", 0x0A
|
||||
db "; one line comment", 0x0A
|
||||
db "mov rdx, [rax]", 0x0A
|
||||
db "mov [rax], rdx", 0x0A
|
||||
|
||||
@@ -294,32 +294,71 @@ test_get_opcode:
|
||||
call print.test
|
||||
|
||||
mov di, 0x0053 ; xor
|
||||
mov sil, 0b
|
||||
mov sil, 0
|
||||
call get_opcode
|
||||
cmp al, 0x31
|
||||
jne .fail
|
||||
cmp dl, 0q0
|
||||
jne .fail
|
||||
|
||||
mov di, 0x0053 ; xor
|
||||
mov sil, 1b
|
||||
mov sil, 1
|
||||
call get_opcode
|
||||
cmp al, 0x33
|
||||
jne .fail
|
||||
cmp dl, 0q0
|
||||
jne .fail
|
||||
|
||||
mov di, 0x0053 ; xor
|
||||
mov sil, 2
|
||||
call get_opcode
|
||||
cmp al, 0x81
|
||||
jne .fail
|
||||
cmp dl, 0q6
|
||||
jne .fail
|
||||
|
||||
mov di, 0x0053 ; xor
|
||||
mov sil, 3
|
||||
call get_opcode
|
||||
cmp al, 0x83
|
||||
jne .fail
|
||||
cmp dl, 0q6
|
||||
jne .fail
|
||||
|
||||
mov di, 0x0054 ; inc
|
||||
mov sil, 0b
|
||||
mov sil, 0
|
||||
call get_opcode
|
||||
cmp al, 0xFF
|
||||
jne .fail
|
||||
cmp dl, 0q0
|
||||
jne .fail
|
||||
|
||||
mov di, 0x0055 ; dec
|
||||
mov sil, 0
|
||||
call get_opcode
|
||||
cmp al, 0xFF
|
||||
jne .fail
|
||||
cmp dl, 0q1
|
||||
jne .fail
|
||||
|
||||
mov di, 0x004F ; hlt
|
||||
mov sil, 0b
|
||||
mov sil, 0
|
||||
call get_opcode
|
||||
cmp al, 0xF4
|
||||
jne .fail
|
||||
cmp dl, 0q0
|
||||
jne .fail
|
||||
|
||||
mov di, 0x0059 ; call
|
||||
mov sil, 0q0
|
||||
call get_opcode
|
||||
cmp al, 0xFF
|
||||
jne .fail
|
||||
cmp dl, 0q2
|
||||
jne .fail
|
||||
|
||||
mov di, 0x0003 ; rdx (not an operator)
|
||||
mov sil, 0b
|
||||
mov sil, 0q0
|
||||
call get_opcode
|
||||
cmp al, UNRECOGNISED_ID_OPCODE
|
||||
jne .fail
|
||||
|
||||
Reference in New Issue
Block a user