add some operators, stop printing whitespace while tokenising, add frame for pseudo-op support
This commit is contained in:
@@ -160,12 +160,13 @@ metadata about some tokens in the following form:
|
||||
|
||||
the `type` hex digit is defined as the following:
|
||||
|
||||
| hex | meaning | examples |
|
||||
|-----|----------|-|
|
||||
| 0x0 | ignored | |
|
||||
| 0x1 | operator | `mov`, `hlt` |
|
||||
| 0x2 | register | `rsp`, `al` |
|
||||
| 0xF | unknown | any token ID not represented in the lookup table |
|
||||
| hex | meaning | examples |
|
||||
|-----|-----------------|-|
|
||||
| 0x0 | ignored | |
|
||||
| 0x1 | operator | `mov`, `hlt` |
|
||||
| 0x2 | register | `rsp`, `al` |
|
||||
| 0x3 | pseudo-operator | `db` |
|
||||
| 0xF | unknown | any token ID not represented in the lookup table |
|
||||
|
||||
type metadata for the different types is as follows:
|
||||
|
||||
@@ -210,6 +211,17 @@ type metadata for the different types is as follows:
|
||||
11b ; 64 bit
|
||||
```
|
||||
|
||||
```
|
||||
1 byte
|
||||
+----------+
|
||||
| type 0x3 |
|
||||
+----------+
|
||||
| 31 24 |
|
||||
+----------+
|
||||
| reserved |
|
||||
+----------+
|
||||
```
|
||||
|
||||
#### `opcodes.by_id`
|
||||
|
||||
entries are as follows:
|
||||
@@ -396,6 +408,13 @@ supported tokens are listed below
|
||||
| call | 0x0059 | |
|
||||
| ret | 0x005A | |
|
||||
| cmp | 0x005B | |
|
||||
| jmp | 0x005C | |
|
||||
| je | 0x005D | |
|
||||
| jne | 0x005E | |
|
||||
| push | 0x005F | |
|
||||
| pop | 0x0060 | |
|
||||
| out | 0x0061 | |
|
||||
| db | 0x0100 | pseudo-operator |
|
||||
| | 0x10XX | some memory address; `XX` is as specified below |
|
||||
| | 0x20XX | some constant; `XX` is as specified below |
|
||||
| | 0x3XXX | some label definition; `XXX` is its entry index in the label table |
|
||||
|
||||
@@ -984,13 +984,6 @@ tokenise:
|
||||
jmp .loop
|
||||
|
||||
.skip_byte_whitespace:
|
||||
push rsi
|
||||
mov rsi, .found
|
||||
call print.debug
|
||||
mov rsi, .msg_whitespace
|
||||
call print
|
||||
pop rsi
|
||||
|
||||
test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected
|
||||
jz .unexpected_whitespace ; if not, error
|
||||
inc rdi
|
||||
@@ -1088,14 +1081,6 @@ tokenise:
|
||||
jmp .loop
|
||||
|
||||
.operator:
|
||||
; debug message
|
||||
push rsi
|
||||
mov rsi, .found
|
||||
call print.debug
|
||||
mov rsi, .msg_operator
|
||||
call print
|
||||
pop rsi
|
||||
|
||||
mov rcx, rax ; rcx = number of tokens processed
|
||||
xor eax, eax ; eax = number of bytes in operator
|
||||
mov [.pending_operator], eax ; zero pending operator
|
||||
@@ -1124,7 +1109,6 @@ tokenise:
|
||||
jge .break
|
||||
jmp .operator_loop ; and loop
|
||||
.operator_break:
|
||||
; rax already pushed from .operator
|
||||
push rdi
|
||||
|
||||
mov edi, [.pending_operator] ; edi = operator to be searched
|
||||
@@ -1132,15 +1116,47 @@ tokenise:
|
||||
; ax = operator's token ID
|
||||
push rcx
|
||||
mov ecx, eax ; cx = operator's token ID
|
||||
mov edi, eax ; di = operator's token ID
|
||||
call get_tte_type
|
||||
; al = token type
|
||||
mov sil, al
|
||||
pop rax ; rax = tokens processed
|
||||
pop rdi ; rdi = byte counter
|
||||
|
||||
cmp sil, 0x3 ; pseudo-operator
|
||||
je .pseudo_operator
|
||||
|
||||
cmp sil, 0x1 ; operator
|
||||
jne .unexpected_operator
|
||||
|
||||
; debug message
|
||||
push rsi
|
||||
mov rsi, .found
|
||||
call print.debug
|
||||
mov rsi, .msg_operator
|
||||
call print
|
||||
pop rsi
|
||||
|
||||
|
||||
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
||||
inc rax ; plus 1 token processed
|
||||
|
||||
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
|
||||
jmp .loop
|
||||
|
||||
.pseudo_operator:
|
||||
; debug message
|
||||
push rsi
|
||||
mov rsi, .found
|
||||
call print.debug
|
||||
mov rsi, .msg_pseudo_operator
|
||||
call print
|
||||
pop rsi
|
||||
|
||||
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
||||
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
|
||||
jmp .loop
|
||||
|
||||
.operand:
|
||||
; debug message
|
||||
push rsi
|
||||
@@ -1287,6 +1303,7 @@ tokenise:
|
||||
.msg_label db "label.", 0x0A, 0x00
|
||||
.msg_operator db "operator.", 0x0A, 0x00
|
||||
.msg_operand db "operand.", 0x0A, 0x00
|
||||
.msg_pseudo_operator db "pseudo_operator.", 0x0A, 0x00
|
||||
.pending_operator dd 0 ; the operator token that is pending processing
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
@@ -2163,6 +2180,34 @@ tokens:
|
||||
dw 0x005B ; cmp
|
||||
db 0x01 ; type: operator
|
||||
db 0x02 ; # operands
|
||||
|
||||
dw 0x005C ; jmp
|
||||
db 0x01 ; type: operator
|
||||
db 0x01 ; # operands
|
||||
|
||||
dw 0x005D ; je
|
||||
db 0x01 ; type: operator
|
||||
db 0x01 ; # operands
|
||||
|
||||
dw 0x005E ; jne
|
||||
db 0x01 ; type: operator
|
||||
db 0x01 ; # operands
|
||||
|
||||
dw 0x005F ; push
|
||||
db 0x01 ; type: operator
|
||||
db 0x01 ; # operands
|
||||
|
||||
dw 0x0060 ; pop
|
||||
db 0x01 ; type: operator
|
||||
db 0x01 ; # operands
|
||||
|
||||
dw 0x0061 ; out
|
||||
db 0x01 ; type: operator
|
||||
db 0x02 ; # operands
|
||||
|
||||
dw 0x0100 ; db
|
||||
db 0x03 ; type: pseudo-operator
|
||||
db 0x01 ; # operands
|
||||
.by_id_end:
|
||||
.operators:
|
||||
dd "hlt"
|
||||
@@ -2187,6 +2232,20 @@ tokens:
|
||||
dw 0x005A
|
||||
dd "cmp"
|
||||
dw 0x005B
|
||||
dd "jmp"
|
||||
dw 0x005C
|
||||
dd "je"
|
||||
dw 0x005D
|
||||
dd "jne"
|
||||
dw 0x005E
|
||||
dd "push"
|
||||
dw 0x005F
|
||||
dd "pop"
|
||||
dw 0x0060
|
||||
dd "out"
|
||||
dw 0x0061
|
||||
dd "db"
|
||||
dw 0x0100
|
||||
.operators_end:
|
||||
.registers:
|
||||
dd "r8"
|
||||
@@ -2460,7 +2519,7 @@ opcodes:
|
||||
db 0x00
|
||||
|
||||
dw 0x0000
|
||||
db 0x00 ; rel16/32
|
||||
db 0xE8 ; rel16/32
|
||||
db 0x00
|
||||
|
||||
dd 0x00000002 ; 2: r/m op flag
|
||||
@@ -2491,6 +2550,91 @@ opcodes:
|
||||
; 0000:
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
; jmp
|
||||
dw 0x005C
|
||||
db 0xFF ; r/m
|
||||
db 0x00
|
||||
|
||||
dw 0x0000
|
||||
db 0xE9 ; rel16/32
|
||||
db 0xEB ; rel8
|
||||
|
||||
dd 0x00000004 ; 4: r/m
|
||||
; 000:
|
||||
; 0: rel16/32
|
||||
; 0: rel8
|
||||
; 00:
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
; je
|
||||
dw 0x005D
|
||||
dw 0x0000
|
||||
|
||||
dw 0x0000
|
||||
db 0x00 ; TODO figure out the 0x0F prefix this will need
|
||||
db 0x74 ; rel8
|
||||
|
||||
dd 0x00000000 ; 00000:
|
||||
; 0: rel8
|
||||
; 00:
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
; jne
|
||||
dw 0x005E
|
||||
dw 0x0000
|
||||
|
||||
dw 0x0000
|
||||
db 0x00 ; TODO figure out the 0x0F prefix this will need
|
||||
db 0x75 ; rel8
|
||||
|
||||
dd 0x00000000 ; 00000:
|
||||
; 0: rel8
|
||||
; 00:
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
; push
|
||||
; TODO add support for the +r variation
|
||||
dw 0x005F
|
||||
db 0xFF ; r/m
|
||||
db 0x00
|
||||
|
||||
db 0x68 ; imm16/32
|
||||
db 0x6A ; imm8
|
||||
dw 0x0000
|
||||
|
||||
dd 0x00000006 ; 6: r/m
|
||||
; 0:
|
||||
; 0: imm16/32
|
||||
; 0: imm8
|
||||
; 0000:
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
; pop
|
||||
; TODO add support for the +r variation
|
||||
dw 0x0060
|
||||
db 0x8F ; r/m
|
||||
db 0x00
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
dd 0x00000000 ; 0: r/m
|
||||
; 0000000:
|
||||
|
||||
dd 0x00000000
|
||||
|
||||
; out
|
||||
; TODO enforce DX AL requirement, ignore ModR/M correctly
|
||||
dw 0x0061
|
||||
db 0xEE
|
||||
db 0x00
|
||||
dd 0x00000000
|
||||
dd 0x00000000
|
||||
dd 0x00000000
|
||||
.by_id_end:
|
||||
|
||||
msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00
|
||||
|
||||
Reference in New Issue
Block a user