add some operators, stop printing whitespace while tokenising, add frame for pseudo-op support

This commit is contained in:
andromeda
2026-03-30 20:11:36 +02:00
parent 7a3e1fc37c
commit a0a99e3116
2 changed files with 186 additions and 23 deletions

View File

@@ -161,10 +161,11 @@ metadata about some tokens in the following form:
the `type` hex digit is defined as the following: the `type` hex digit is defined as the following:
| hex | meaning | examples | | hex | meaning | examples |
|-----|----------|-| |-----|-----------------|-|
| 0x0 | ignored | | | 0x0 | ignored | |
| 0x1 | operator | `mov`, `hlt` | | 0x1 | operator | `mov`, `hlt` |
| 0x2 | register | `rsp`, `al` | | 0x2 | register | `rsp`, `al` |
| 0x3 | pseudo-operator | `db` |
| 0xF | unknown | any token ID not represented in the lookup table | | 0xF | unknown | any token ID not represented in the lookup table |
type metadata for the different types is as follows: type metadata for the different types is as follows:
@@ -210,6 +211,17 @@ type metadata for the different types is as follows:
11b ; 64 bit 11b ; 64 bit
``` ```
```
1 byte
+----------+
| type 0x3 |
+----------+
| 31 24 |
+----------+
| reserved |
+----------+
```
#### `opcodes.by_id` #### `opcodes.by_id`
entries are as follows: entries are as follows:
@@ -396,6 +408,13 @@ supported tokens are listed below
| call | 0x0059 | | | call | 0x0059 | |
| ret | 0x005A | | | ret | 0x005A | |
| cmp | 0x005B | | | cmp | 0x005B | |
| jmp | 0x005C | |
| je | 0x005D | |
| jne | 0x005E | |
| push | 0x005F | |
| pop | 0x0060 | |
| out | 0x0061 | |
| db | 0x0100 | pseudo-operator |
| | 0x10XX | some memory address; `XX` is as specified below | | | 0x10XX | some memory address; `XX` is as specified below |
| | 0x20XX | some constant; `XX` is as specified below | | | 0x20XX | some constant; `XX` is as specified below |
| | 0x3XXX | some label definition; `XXX` is its entry index in the label table | | | 0x3XXX | some label definition; `XXX` is its entry index in the label table |

View File

@@ -984,13 +984,6 @@ tokenise:
jmp .loop jmp .loop
.skip_byte_whitespace: .skip_byte_whitespace:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_whitespace
call print
pop rsi
test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected
jz .unexpected_whitespace ; if not, error jz .unexpected_whitespace ; if not, error
inc rdi inc rdi
@@ -1088,14 +1081,6 @@ tokenise:
jmp .loop jmp .loop
.operator: .operator:
; debug message
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_operator
call print
pop rsi
mov rcx, rax ; rcx = number of tokens processed mov rcx, rax ; rcx = number of tokens processed
xor eax, eax ; eax = number of bytes in operator xor eax, eax ; eax = number of bytes in operator
mov [.pending_operator], eax ; zero pending operator mov [.pending_operator], eax ; zero pending operator
@@ -1124,7 +1109,6 @@ tokenise:
jge .break jge .break
jmp .operator_loop ; and loop jmp .operator_loop ; and loop
.operator_break: .operator_break:
; rax already pushed from .operator
push rdi push rdi
mov edi, [.pending_operator] ; edi = operator to be searched mov edi, [.pending_operator] ; edi = operator to be searched
@@ -1132,15 +1116,47 @@ tokenise:
; ax = operator's token ID ; ax = operator's token ID
push rcx push rcx
mov ecx, eax ; cx = operator's token ID mov ecx, eax ; cx = operator's token ID
mov edi, eax ; di = operator's token ID
call get_tte_type
; al = token type
mov sil, al
pop rax ; rax = tokens processed pop rax ; rax = tokens processed
pop rdi ; rdi = byte counter pop rdi ; rdi = byte counter
cmp sil, 0x3 ; pseudo-operator
je .pseudo_operator
cmp sil, 0x1 ; operator
jne .unexpected_operator
; debug message
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_operator
call print
pop rsi
mov [TOKEN_TABLE_ADDR + rax * 2], cx mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax ; plus 1 token processed inc rax ; plus 1 token processed
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
jmp .loop jmp .loop
.pseudo_operator:
; debug message
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_pseudo_operator
call print
pop rsi
mov [TOKEN_TABLE_ADDR + rax * 2], cx
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
jmp .loop
.operand: .operand:
; debug message ; debug message
push rsi push rsi
@@ -1287,6 +1303,7 @@ tokenise:
.msg_label db "label.", 0x0A, 0x00 .msg_label db "label.", 0x0A, 0x00
.msg_operator db "operator.", 0x0A, 0x00 .msg_operator db "operator.", 0x0A, 0x00
.msg_operand db "operand.", 0x0A, 0x00 .msg_operand db "operand.", 0x0A, 0x00
.msg_pseudo_operator db "pseudo_operator.", 0x0A, 0x00
.pending_operator dd 0 ; the operator token that is pending processing .pending_operator dd 0 ; the operator token that is pending processing
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
@@ -2163,6 +2180,34 @@ tokens:
dw 0x005B ; cmp dw 0x005B ; cmp
db 0x01 ; type: operator db 0x01 ; type: operator
db 0x02 ; # operands db 0x02 ; # operands
dw 0x005C ; jmp
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x005D ; je
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x005E ; jne
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x005F ; push
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x0060 ; pop
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x0061 ; out
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0100 ; db
db 0x03 ; type: pseudo-operator
db 0x01 ; # operands
.by_id_end: .by_id_end:
.operators: .operators:
dd "hlt" dd "hlt"
@@ -2187,6 +2232,20 @@ tokens:
dw 0x005A dw 0x005A
dd "cmp" dd "cmp"
dw 0x005B dw 0x005B
dd "jmp"
dw 0x005C
dd "je"
dw 0x005D
dd "jne"
dw 0x005E
dd "push"
dw 0x005F
dd "pop"
dw 0x0060
dd "out"
dw 0x0061
dd "db"
dw 0x0100
.operators_end: .operators_end:
.registers: .registers:
dd "r8" dd "r8"
@@ -2460,7 +2519,7 @@ opcodes:
db 0x00 db 0x00
dw 0x0000 dw 0x0000
db 0x00 ; rel16/32 db 0xE8 ; rel16/32
db 0x00 db 0x00
dd 0x00000002 ; 2: r/m op flag dd 0x00000002 ; 2: r/m op flag
@@ -2491,6 +2550,91 @@ opcodes:
; 0000: ; 0000:
dd 0x00000000 dd 0x00000000
; jmp
dw 0x005C
db 0xFF ; r/m
db 0x00
dw 0x0000
db 0xE9 ; rel16/32
db 0xEB ; rel8
dd 0x00000004 ; 4: r/m
; 000:
; 0: rel16/32
; 0: rel8
; 00:
dd 0x00000000
; je
dw 0x005D
dw 0x0000
dw 0x0000
db 0x00 ; TODO figure out the 0x0F prefix this will need
db 0x74 ; rel8
dd 0x00000000 ; 00000:
; 0: rel8
; 00:
dd 0x00000000
; jne
dw 0x005E
dw 0x0000
dw 0x0000
db 0x00 ; TODO figure out the 0x0F prefix this will need
db 0x75 ; rel8
dd 0x00000000 ; 00000:
; 0: rel8
; 00:
dd 0x00000000
; push
; TODO add support for the +r variation
dw 0x005F
db 0xFF ; r/m
db 0x00
db 0x68 ; imm16/32
db 0x6A ; imm8
dw 0x0000
dd 0x00000006 ; 6: r/m
; 0:
; 0: imm16/32
; 0: imm8
; 0000:
dd 0x00000000
; pop
; TODO add support for the +r variation
dw 0x0060
db 0x8F ; r/m
db 0x00
dd 0x00000000
dd 0x00000000 ; 0: r/m
; 0000000:
dd 0x00000000
; out
; TODO enforce DX AL requirement, ignore ModR/M correctly
dw 0x0061
db 0xEE
db 0x00
dd 0x00000000
dd 0x00000000
dd 0x00000000
.by_id_end: .by_id_end:
msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00 msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00