improve data structures

This commit is contained in:
andromeda
2026-03-24 20:49:52 +01:00
parent 74fc57cdfc
commit 121a0df8e4
3 changed files with 322 additions and 70 deletions

View File

@@ -129,6 +129,7 @@ assemble:
mov sil, 0b ; standard opcode
call get_opcode
; al = opcode
; dl = op flag (none)
call .output_byte
pop rsi
pop rdi
@@ -153,7 +154,10 @@ assemble:
mov sil, 0b ; dst=r/m
call get_opcode
; al = opcode
; dl = op flag
push rdx
call .output_byte
pop rdx ; dl = op flag
pop rsi
pop rdi ; di = tte of operator
@@ -173,9 +177,11 @@ assemble:
pop rdi ; di = next tte
je .operator_1_memory_access
push rdx
; di = next tte
call get_tte_type
; al = type of token
pop rdx ; dl = op flag
cmp al, 0x02 ; type: register
je .operator_1_register
@@ -202,8 +208,10 @@ assemble:
pop rsi
mov si, di ; si = `R/M` tte
mov di, 0x0000 ; di = `reg` tte
mov dl, 11b ; dl bits
and edx, 0xFF
or dx, 0xFE00 ; pass di as direct value
mov di, dx ; di = op flag
mov dl, 11b ; dl = mod bits
call get_ModRM
; al = Mod R/M byte
call .output_byte
@@ -270,6 +278,8 @@ assemble:
mov sil, 0 ; dst = r/m
call get_opcode
; al = opcode
; dl = op flag
; TODO act accordingly if the op flag is present
call .output_byte
pop rdi
@@ -346,6 +356,8 @@ assemble:
mov sil, 1 ; dst = reg
call get_opcode
; al = opcode
; dl = op flag
; TODO do something if the op flag is present
call .output_byte
pop rdi
@@ -561,7 +573,7 @@ get_tte_typed_metadata:
; given 2 register tokens and the mod bits, returns the ModR/M byte
;
; parameters:
; di = token table entry `reg`
; di = token table entry `reg`. 0xFEXX passes low 3 bytes as op flag
; si = token table entry `R/M`
; dl = lower 2 bits: mod bits. The rest is ignored
;
@@ -570,14 +582,40 @@ get_tte_typed_metadata:
; ------------------------------------------------------------------------------
get_ModRM:
push rbx
and dl, 11b ; mask for mod bits
shl dl, 6
push rdi
shr di, 8
cmp dil, 0xFE
pop rdi
je .pass_di_as_op_flag
; di = tte
call get_reg_bits
; al = reg bits
mov bl, al
shl bl, 3
push rsi
mov rsi, .msg_normal_ModRM
call print.debug
pop rsi
mov bl, al ; bl = reg bits
jmp .continue
.pass_di_as_op_flag:
push rsi
mov rsi, .msg_op_flag
call print.debug
pop rsi
mov bl, dil ; bl = op flag
and bl, 111b ; mask
.continue:
shl bl, 3
mov rdi, rsi ; do the other one
@@ -592,8 +630,12 @@ get_ModRM:
or al, bl ; reg bits
or al, cl ; R/M bits
and rax, 0xFF ; mask for byte
pop rbx
ret
.msg_op_flag db "get_ModRM op_flag", 0x0A, 0x00
.msg_normal_ModRM db "get_ModRM normal_ModRM", 0x0A, 0x00
; ------------------------------------------------------------------------------
; get_opcode
;
@@ -603,23 +645,30 @@ get_ModRM:
;
; parameters:
; di = token table entry
; sil = lower bit: 0: dst=r/m or only opcode
; 1: dst=reg or 0x00
; sil = offset within opcode entry. 0 is the first opcode, 1 the second, and so
; on
;
; returned:
; al = opcode; the rest of rax is zeroed
; al = opcode; the rest of rax is zeroed.
; dl = lower 3 bits: op flag, if applicable. The rest of rdx is zeroed.
; ------------------------------------------------------------------------------
get_opcode:
and rdi, 0xFFFF
and rsi, 1
add rsi, 2
and rsi, 111b
sub rsi, 2
xor eax, eax
.loop:
cmp rax, (opcodes.by_id_end - opcodes.by_id) / 4 ; make sure it's still in range
cmp rax, (opcodes.by_id_end - opcodes.by_id) / 16 ; make sure it's still in range
jg .not_found
mov cx, [opcodes.by_id + rax * 4] ; next entry in opcodes.by_id
shl rax, 4
mov cx, [opcodes.by_id + rax] ; next entry in opcodes.by_id
shr rax, 4
cmp cx, di
je .found
@@ -631,8 +680,24 @@ get_opcode:
mov al, UNRECOGNISED_ID_OPCODE
ret
.found:
mov al, [rsi + 2 + opcodes.by_id + rax * 4]
shl rax, 4
push rsi
shr rsi, 1
mov dl, [rsi + 8 + opcodes.by_id + rax]
pop rsi
push rsi
and rsi, 1
cmp esi, 1 ; check if offset is odd
pop rsi
jne .found_continue
shr dl, 4 ; if so, actually 1 further on dl byte
.found_continue
mov al, [rsi + 2 + opcodes.by_id + rax]
and rax, 0xFF ; mask
and rdx, 0x0F ; mask
ret
; ------------------------------------------------------------------------------
@@ -1558,6 +1623,7 @@ clear_output_arena:
; data
; ------------------------------------------------------------------------------
align 4
tokens:
.by_id:
dw 0x0000 ; rax
@@ -1869,53 +1935,148 @@ tokens:
dw 0x003F
.registers_end:
align 16
opcodes:
.by_id:
dw 0x004F ; hlt
db 0xF4 ; .
db 0x00 ;
; hlt
dw 0x004F
db 0xF4 ; opcode
db 0x00 ; reserved
dd 0x00000000
dd 0x00000000
dd 0x00000000
dw 0x0050 ; int3
db 0xCC ;
db 0x00 ;
; int3
dw 0x0050
db 0xCC ; opcode
db 0x00 ; reserved
dd 0x00000000
dd 0x00000000
dd 0x00000000
dw 0x0053 ; xor
db 0x31 ; r/m <- reg
db 0x33 ; reg <- r/m
; xor
dw 0x0053
db 0x31 ; r/m <- r
db 0x33 ; r <- r/m
dw 0x0054 ; inc
db 0xFF ; r/m <-
db 0x00 ; reg bits
db 0x81 ; r/m <- imm16/32
db 0x83 ; r/m <- imm8
dw 0x0000
dw 0x0055 ; dec
db 0xFF ; r/m <-
db 0x01 ; reg bits
dd 0x00006600 ; 00:
; 6: r/m <- imm16/32 op flag
; 6: r/m <- imm8 op flag
; 0x0000:
dw 0x0056 ; mov
db 0x89 ; r/m <- reg
db 0x8B ; reg <- r/m
dd 0x00000000 ; reserved
dw 0x0057 ; add
db 0x01 ; r/m <- reg
db 0x03 ; reg <- r/m
; inc
dw 0x0054
db 0xFF ; r/m
db 0x00
dw 0x0058 ; sub
db 0x29 ; r/m <- reg
db 0x2B ; reg <- r/m
dd 0x00000000
; TODO deal with rel values, differentiate 16/32 and 64 for call
dw 0x0059 ; call
db 0xFF ; r/m <-
db 0x02 ; reg bits
dd 0x00000000 ; 0: r/m op flag
; 0000000:
; TODO deal with optional parameter
dw 0x005A ; ret
db 0xC3 ; opcode
db 0x00 ; reserved
dd 0x00000000
dw 0x005B ; cmp
db 0x39 ; r/m <- reg
db 0x3B ; reg <- r/m
; dec
dw 0x0055
db 0xFF ; r/m
db 0x00
dd 0x00000000
dd 0x00000001 ; 1: r/m op flag
; 0000000:
dd 0x00000000
; mov
dw 0x0056
db 0x89 ; r/m <- r
db 0x8B ; r <- r/m
db 0xC7 ; r/m <- imm16/32
db 0x00
dw 0x0000
dd 0x00000000 ; 00:
; 0: r/m <- imm16/32 op flag
; 00000:
dd 0x00000000
; add
dw 0x0057
db 0x01 ; r/m <- r
db 0x03 ; r <- r/m
db 0x81 ; r/m <- imm16/32
db 0x83 ; r/m <- imm8
dw 0x0000
dd 0x00000000 ; 00:
; 0: r/m <- imm16/32 op flag
; 0: r/m <- imm8 op flag
; 0000:
dd 0x00000000
; sub
dw 0x0058
db 0x29 ; r/m <- r
db 0x2B ; r <- r/m
db 0x81 ; r/m <- imm16/32
db 0x83 ; r/m <- imm8
dw 0x0000
dd 0x00005500 ; 00:
; 5: r/m <- imm16/32 op flag
; 5: r/m <- imm8 op flag
; 0000:
dd 0x00000000
; call
dw 0x0059
db 0xFF ; r/m
db 0x00
dw 0x0000
db 0x00 ; rel16/32
db 0x00
dd 0x00000002 ; 2: r/m op flag
; 0000000:
dd 0x00000000
; retn
dw 0x005A
db 0xC3 ; opcode
db 0x00 ; reserved
dd 0x00000000
dd 0x00000000
dd 0x00000000
; cmp
dw 0x005B
db 0x39 ; r/m <- r
db 0x3B ; r <- r/m
db 0x81 ; r/m <- imm16/32
db 0x83 ; r/m <- imm8
dw 0x0000
dd 0x00007700 ; 00:
; 7: r/m <- imm16/32 op flag
; 7: r/m <- imm8 op flag
; 0000:
dd 0x00000000
.by_id_end:
msg_welcome db "Welcome to Twasm", 0x0A, 0x00
@@ -1927,6 +2088,7 @@ whitespace_2 db " ", 0x0D
program:
db "xor eax, eax", 0x0A
db "inc rax ; inline comment", 0x0A
db "dec rax", 0x0A
db "; one line comment", 0x0A
db "mov rdx, [rax]", 0x0A
db "mov [rax], rdx", 0x0A