add some operators, stop printing whitespace while tokenising, add frame for pseudo-op support
This commit is contained in:
@@ -160,12 +160,13 @@ metadata about some tokens in the following form:
|
|||||||
|
|
||||||
the `type` hex digit is defined as the following:
|
the `type` hex digit is defined as the following:
|
||||||
|
|
||||||
| hex | meaning | examples |
|
| hex | meaning | examples |
|
||||||
|-----|----------|-|
|
|-----|-----------------|-|
|
||||||
| 0x0 | ignored | |
|
| 0x0 | ignored | |
|
||||||
| 0x1 | operator | `mov`, `hlt` |
|
| 0x1 | operator | `mov`, `hlt` |
|
||||||
| 0x2 | register | `rsp`, `al` |
|
| 0x2 | register | `rsp`, `al` |
|
||||||
| 0xF | unknown | any token ID not represented in the lookup table |
|
| 0x3 | pseudo-operator | `db` |
|
||||||
|
| 0xF | unknown | any token ID not represented in the lookup table |
|
||||||
|
|
||||||
type metadata for the different types is as follows:
|
type metadata for the different types is as follows:
|
||||||
|
|
||||||
@@ -210,6 +211,17 @@ type metadata for the different types is as follows:
|
|||||||
11b ; 64 bit
|
11b ; 64 bit
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
1 byte
|
||||||
|
+----------+
|
||||||
|
| type 0x3 |
|
||||||
|
+----------+
|
||||||
|
| 31 24 |
|
||||||
|
+----------+
|
||||||
|
| reserved |
|
||||||
|
+----------+
|
||||||
|
```
|
||||||
|
|
||||||
#### `opcodes.by_id`
|
#### `opcodes.by_id`
|
||||||
|
|
||||||
entries are as follows:
|
entries are as follows:
|
||||||
@@ -396,6 +408,13 @@ supported tokens are listed below
|
|||||||
| call | 0x0059 | |
|
| call | 0x0059 | |
|
||||||
| ret | 0x005A | |
|
| ret | 0x005A | |
|
||||||
| cmp | 0x005B | |
|
| cmp | 0x005B | |
|
||||||
|
| jmp | 0x005C | |
|
||||||
|
| je | 0x005D | |
|
||||||
|
| jne | 0x005E | |
|
||||||
|
| push | 0x005F | |
|
||||||
|
| pop | 0x0060 | |
|
||||||
|
| out | 0x0061 | |
|
||||||
|
| db | 0x0100 | pseudo-operator |
|
||||||
| | 0x10XX | some memory address; `XX` is as specified below |
|
| | 0x10XX | some memory address; `XX` is as specified below |
|
||||||
| | 0x20XX | some constant; `XX` is as specified below |
|
| | 0x20XX | some constant; `XX` is as specified below |
|
||||||
| | 0x3XXX | some label definition; `XXX` is its entry index in the label table |
|
| | 0x3XXX | some label definition; `XXX` is its entry index in the label table |
|
||||||
|
|||||||
@@ -984,13 +984,6 @@ tokenise:
|
|||||||
jmp .loop
|
jmp .loop
|
||||||
|
|
||||||
.skip_byte_whitespace:
|
.skip_byte_whitespace:
|
||||||
push rsi
|
|
||||||
mov rsi, .found
|
|
||||||
call print.debug
|
|
||||||
mov rsi, .msg_whitespace
|
|
||||||
call print
|
|
||||||
pop rsi
|
|
||||||
|
|
||||||
test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected
|
test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected
|
||||||
jz .unexpected_whitespace ; if not, error
|
jz .unexpected_whitespace ; if not, error
|
||||||
inc rdi
|
inc rdi
|
||||||
@@ -1088,14 +1081,6 @@ tokenise:
|
|||||||
jmp .loop
|
jmp .loop
|
||||||
|
|
||||||
.operator:
|
.operator:
|
||||||
; debug message
|
|
||||||
push rsi
|
|
||||||
mov rsi, .found
|
|
||||||
call print.debug
|
|
||||||
mov rsi, .msg_operator
|
|
||||||
call print
|
|
||||||
pop rsi
|
|
||||||
|
|
||||||
mov rcx, rax ; rcx = number of tokens processed
|
mov rcx, rax ; rcx = number of tokens processed
|
||||||
xor eax, eax ; eax = number of bytes in operator
|
xor eax, eax ; eax = number of bytes in operator
|
||||||
mov [.pending_operator], eax ; zero pending operator
|
mov [.pending_operator], eax ; zero pending operator
|
||||||
@@ -1124,7 +1109,6 @@ tokenise:
|
|||||||
jge .break
|
jge .break
|
||||||
jmp .operator_loop ; and loop
|
jmp .operator_loop ; and loop
|
||||||
.operator_break:
|
.operator_break:
|
||||||
; rax already pushed from .operator
|
|
||||||
push rdi
|
push rdi
|
||||||
|
|
||||||
mov edi, [.pending_operator] ; edi = operator to be searched
|
mov edi, [.pending_operator] ; edi = operator to be searched
|
||||||
@@ -1132,15 +1116,47 @@ tokenise:
|
|||||||
; ax = operator's token ID
|
; ax = operator's token ID
|
||||||
push rcx
|
push rcx
|
||||||
mov ecx, eax ; cx = operator's token ID
|
mov ecx, eax ; cx = operator's token ID
|
||||||
|
mov edi, eax ; di = operator's token ID
|
||||||
|
call get_tte_type
|
||||||
|
; al = token type
|
||||||
|
mov sil, al
|
||||||
pop rax ; rax = tokens processed
|
pop rax ; rax = tokens processed
|
||||||
pop rdi ; rdi = byte counter
|
pop rdi ; rdi = byte counter
|
||||||
|
|
||||||
|
cmp sil, 0x3 ; pseudo-operator
|
||||||
|
je .pseudo_operator
|
||||||
|
|
||||||
|
cmp sil, 0x1 ; operator
|
||||||
|
jne .unexpected_operator
|
||||||
|
|
||||||
|
; debug message
|
||||||
|
push rsi
|
||||||
|
mov rsi, .found
|
||||||
|
call print.debug
|
||||||
|
mov rsi, .msg_operator
|
||||||
|
call print
|
||||||
|
pop rsi
|
||||||
|
|
||||||
|
|
||||||
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
||||||
inc rax ; plus 1 token processed
|
inc rax ; plus 1 token processed
|
||||||
|
|
||||||
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
|
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
|
||||||
jmp .loop
|
jmp .loop
|
||||||
|
|
||||||
|
.pseudo_operator:
|
||||||
|
; debug message
|
||||||
|
push rsi
|
||||||
|
mov rsi, .found
|
||||||
|
call print.debug
|
||||||
|
mov rsi, .msg_pseudo_operator
|
||||||
|
call print
|
||||||
|
pop rsi
|
||||||
|
|
||||||
|
mov [TOKEN_TABLE_ADDR + rax * 2], cx
|
||||||
|
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
|
||||||
|
jmp .loop
|
||||||
|
|
||||||
.operand:
|
.operand:
|
||||||
; debug message
|
; debug message
|
||||||
push rsi
|
push rsi
|
||||||
@@ -1287,6 +1303,7 @@ tokenise:
|
|||||||
.msg_label db "label.", 0x0A, 0x00
|
.msg_label db "label.", 0x0A, 0x00
|
||||||
.msg_operator db "operator.", 0x0A, 0x00
|
.msg_operator db "operator.", 0x0A, 0x00
|
||||||
.msg_operand db "operand.", 0x0A, 0x00
|
.msg_operand db "operand.", 0x0A, 0x00
|
||||||
|
.msg_pseudo_operator db "pseudo_operator.", 0x0A, 0x00
|
||||||
.pending_operator dd 0 ; the operator token that is pending processing
|
.pending_operator dd 0 ; the operator token that is pending processing
|
||||||
|
|
||||||
; ------------------------------------------------------------------------------
|
; ------------------------------------------------------------------------------
|
||||||
@@ -2163,6 +2180,34 @@ tokens:
|
|||||||
dw 0x005B ; cmp
|
dw 0x005B ; cmp
|
||||||
db 0x01 ; type: operator
|
db 0x01 ; type: operator
|
||||||
db 0x02 ; # operands
|
db 0x02 ; # operands
|
||||||
|
|
||||||
|
dw 0x005C ; jmp
|
||||||
|
db 0x01 ; type: operator
|
||||||
|
db 0x01 ; # operands
|
||||||
|
|
||||||
|
dw 0x005D ; je
|
||||||
|
db 0x01 ; type: operator
|
||||||
|
db 0x01 ; # operands
|
||||||
|
|
||||||
|
dw 0x005E ; jne
|
||||||
|
db 0x01 ; type: operator
|
||||||
|
db 0x01 ; # operands
|
||||||
|
|
||||||
|
dw 0x005F ; push
|
||||||
|
db 0x01 ; type: operator
|
||||||
|
db 0x01 ; # operands
|
||||||
|
|
||||||
|
dw 0x0060 ; pop
|
||||||
|
db 0x01 ; type: operator
|
||||||
|
db 0x01 ; # operands
|
||||||
|
|
||||||
|
dw 0x0061 ; out
|
||||||
|
db 0x01 ; type: operator
|
||||||
|
db 0x02 ; # operands
|
||||||
|
|
||||||
|
dw 0x0100 ; db
|
||||||
|
db 0x03 ; type: pseudo-operator
|
||||||
|
db 0x01 ; # operands
|
||||||
.by_id_end:
|
.by_id_end:
|
||||||
.operators:
|
.operators:
|
||||||
dd "hlt"
|
dd "hlt"
|
||||||
@@ -2187,6 +2232,20 @@ tokens:
|
|||||||
dw 0x005A
|
dw 0x005A
|
||||||
dd "cmp"
|
dd "cmp"
|
||||||
dw 0x005B
|
dw 0x005B
|
||||||
|
dd "jmp"
|
||||||
|
dw 0x005C
|
||||||
|
dd "je"
|
||||||
|
dw 0x005D
|
||||||
|
dd "jne"
|
||||||
|
dw 0x005E
|
||||||
|
dd "push"
|
||||||
|
dw 0x005F
|
||||||
|
dd "pop"
|
||||||
|
dw 0x0060
|
||||||
|
dd "out"
|
||||||
|
dw 0x0061
|
||||||
|
dd "db"
|
||||||
|
dw 0x0100
|
||||||
.operators_end:
|
.operators_end:
|
||||||
.registers:
|
.registers:
|
||||||
dd "r8"
|
dd "r8"
|
||||||
@@ -2460,7 +2519,7 @@ opcodes:
|
|||||||
db 0x00
|
db 0x00
|
||||||
|
|
||||||
dw 0x0000
|
dw 0x0000
|
||||||
db 0x00 ; rel16/32
|
db 0xE8 ; rel16/32
|
||||||
db 0x00
|
db 0x00
|
||||||
|
|
||||||
dd 0x00000002 ; 2: r/m op flag
|
dd 0x00000002 ; 2: r/m op flag
|
||||||
@@ -2491,6 +2550,91 @@ opcodes:
|
|||||||
; 0000:
|
; 0000:
|
||||||
|
|
||||||
dd 0x00000000
|
dd 0x00000000
|
||||||
|
|
||||||
|
; jmp
|
||||||
|
dw 0x005C
|
||||||
|
db 0xFF ; r/m
|
||||||
|
db 0x00
|
||||||
|
|
||||||
|
dw 0x0000
|
||||||
|
db 0xE9 ; rel16/32
|
||||||
|
db 0xEB ; rel8
|
||||||
|
|
||||||
|
dd 0x00000004 ; 4: r/m
|
||||||
|
; 000:
|
||||||
|
; 0: rel16/32
|
||||||
|
; 0: rel8
|
||||||
|
; 00:
|
||||||
|
|
||||||
|
dd 0x00000000
|
||||||
|
|
||||||
|
; je
|
||||||
|
dw 0x005D
|
||||||
|
dw 0x0000
|
||||||
|
|
||||||
|
dw 0x0000
|
||||||
|
db 0x00 ; TODO figure out the 0x0F prefix this will need
|
||||||
|
db 0x74 ; rel8
|
||||||
|
|
||||||
|
dd 0x00000000 ; 00000:
|
||||||
|
; 0: rel8
|
||||||
|
; 00:
|
||||||
|
|
||||||
|
dd 0x00000000
|
||||||
|
|
||||||
|
; jne
|
||||||
|
dw 0x005E
|
||||||
|
dw 0x0000
|
||||||
|
|
||||||
|
dw 0x0000
|
||||||
|
db 0x00 ; TODO figure out the 0x0F prefix this will need
|
||||||
|
db 0x75 ; rel8
|
||||||
|
|
||||||
|
dd 0x00000000 ; 00000:
|
||||||
|
; 0: rel8
|
||||||
|
; 00:
|
||||||
|
|
||||||
|
dd 0x00000000
|
||||||
|
|
||||||
|
; push
|
||||||
|
; TODO add support for the +r variation
|
||||||
|
dw 0x005F
|
||||||
|
db 0xFF ; r/m
|
||||||
|
db 0x00
|
||||||
|
|
||||||
|
db 0x68 ; imm16/32
|
||||||
|
db 0x6A ; imm8
|
||||||
|
dw 0x0000
|
||||||
|
|
||||||
|
dd 0x00000006 ; 6: r/m
|
||||||
|
; 0:
|
||||||
|
; 0: imm16/32
|
||||||
|
; 0: imm8
|
||||||
|
; 0000:
|
||||||
|
|
||||||
|
dd 0x00000000
|
||||||
|
|
||||||
|
; pop
|
||||||
|
; TODO add support for the +r variation
|
||||||
|
dw 0x0060
|
||||||
|
db 0x8F ; r/m
|
||||||
|
db 0x00
|
||||||
|
|
||||||
|
dd 0x00000000
|
||||||
|
|
||||||
|
dd 0x00000000 ; 0: r/m
|
||||||
|
; 0000000:
|
||||||
|
|
||||||
|
dd 0x00000000
|
||||||
|
|
||||||
|
; out
|
||||||
|
; TODO enforce DX AL requirement, ignore ModR/M correctly
|
||||||
|
dw 0x0061
|
||||||
|
db 0xEE
|
||||||
|
db 0x00
|
||||||
|
dd 0x00000000
|
||||||
|
dd 0x00000000
|
||||||
|
dd 0x00000000
|
||||||
.by_id_end:
|
.by_id_end:
|
||||||
|
|
||||||
msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00
|
msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00
|
||||||
|
|||||||
Reference in New Issue
Block a user