From a0a99e3116bb3d8d0b9e503a826f0c0e0b708ecb Mon Sep 17 00:00:00 2001 From: andromeda Date: Mon, 30 Mar 2026 20:11:36 +0200 Subject: [PATCH] add some operators, stop printing whitespace while tokenising, add frame for pseudo-op support --- twasm/README.md | 31 ++++++-- twasm/asm/main.asm | 178 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 186 insertions(+), 23 deletions(-) diff --git a/twasm/README.md b/twasm/README.md index 774f568..aed9ea1 100644 --- a/twasm/README.md +++ b/twasm/README.md @@ -160,12 +160,13 @@ metadata about some tokens in the following form: the `type` hex digit is defined as the following: -| hex | meaning | examples | -|-----|----------|-| -| 0x0 | ignored | | -| 0x1 | operator | `mov`, `hlt` | -| 0x2 | register | `rsp`, `al` | -| 0xF | unknown | any token ID not represented in the lookup table | +| hex | meaning | examples | +|-----|-----------------|-| +| 0x0 | ignored | | +| 0x1 | operator | `mov`, `hlt` | +| 0x2 | register | `rsp`, `al` | +| 0x3 | pseudo-operator | `db` | +| 0xF | unknown | any token ID not represented in the lookup table | type metadata for the different types is as follows: @@ -210,6 +211,17 @@ type metadata for the different types is as follows: 11b ; 64 bit ``` +``` +1 byte ++----------+ +| type 0x3 | ++----------+ +| 31 24 | ++----------+ +| reserved | ++----------+ +``` + #### `opcodes.by_id` entries are as follows: @@ -396,6 +408,13 @@ supported tokens are listed below | call | 0x0059 | | | ret | 0x005A | | | cmp | 0x005B | | +| jmp | 0x005C | | +| je | 0x005D | | +| jne | 0x005E | | +| push | 0x005F | | +| pop | 0x0060 | | +| out | 0x0061 | | +| db | 0x0100 | pseudo-operator | | | 0x10XX | some memory address; `XX` is as specified below | | | 0x20XX | some constant; `XX` is as specified below | | | 0x3XXX | some label definition; `XXX` is its entry index in the label table | diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index 156bf02..1666336 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -984,13 +984,6 @@ tokenise: jmp .loop .skip_byte_whitespace: - push rsi - mov rsi, .found - call print.debug - mov rsi, .msg_whitespace - call print - pop rsi - test byte [.expecting], E_WHITESPACE ; make sure a whitespace was expected jz .unexpected_whitespace ; if not, error inc rdi @@ -1088,14 +1081,6 @@ tokenise: jmp .loop .operator: - ; debug message - push rsi - mov rsi, .found - call print.debug - mov rsi, .msg_operator - call print - pop rsi - mov rcx, rax ; rcx = number of tokens processed xor eax, eax ; eax = number of bytes in operator mov [.pending_operator], eax ; zero pending operator @@ -1124,7 +1109,6 @@ tokenise: jge .break jmp .operator_loop ; and loop .operator_break: - ; rax already pushed from .operator push rdi mov edi, [.pending_operator] ; edi = operator to be searched @@ -1132,15 +1116,47 @@ tokenise: ; ax = operator's token ID push rcx mov ecx, eax ; cx = operator's token ID + mov edi, eax ; di = operator's token ID + call get_tte_type + ; al = token type + mov sil, al pop rax ; rax = tokens processed pop rdi ; rdi = byte counter + cmp sil, 0x3 ; pseudo-operator + je .pseudo_operator + + cmp sil, 0x1 ; operator + jne .unexpected_operator + + ; debug message + push rsi + mov rsi, .found + call print.debug + mov rsi, .msg_operator + call print + pop rsi + + mov [TOKEN_TABLE_ADDR + rax * 2], cx inc rax ; plus 1 token processed mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND jmp .loop + .pseudo_operator: + ; debug message + push rsi + mov rsi, .found + call print.debug + mov rsi, .msg_pseudo_operator + call print + pop rsi + + mov [TOKEN_TABLE_ADDR + rax * 2], cx + mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND + jmp .loop + .operand: ; debug message push rsi @@ -1287,6 +1303,7 @@ tokenise: .msg_label db "label.", 0x0A, 0x00 .msg_operator db "operator.", 0x0A, 0x00 .msg_operand db "operand.", 0x0A, 0x00 + .msg_pseudo_operator db "pseudo_operator.", 0x0A, 0x00 .pending_operator dd 0 ; the operator token that is pending processing ; ------------------------------------------------------------------------------ @@ -2163,6 +2180,34 @@ tokens: dw 0x005B ; cmp db 0x01 ; type: operator db 0x02 ; # operands + + dw 0x005C ; jmp + db 0x01 ; type: operator + db 0x01 ; # operands + + dw 0x005D ; je + db 0x01 ; type: operator + db 0x01 ; # operands + + dw 0x005E ; jne + db 0x01 ; type: operator + db 0x01 ; # operands + + dw 0x005F ; push + db 0x01 ; type: operator + db 0x01 ; # operands + + dw 0x0060 ; pop + db 0x01 ; type: operator + db 0x01 ; # operands + + dw 0x0061 ; out + db 0x01 ; type: operator + db 0x02 ; # operands + + dw 0x0100 ; db + db 0x03 ; type: pseudo-operator + db 0x01 ; # operands .by_id_end: .operators: dd "hlt" @@ -2187,6 +2232,20 @@ tokens: dw 0x005A dd "cmp" dw 0x005B + dd "jmp" + dw 0x005C + dd "je" + dw 0x005D + dd "jne" + dw 0x005E + dd "push" + dw 0x005F + dd "pop" + dw 0x0060 + dd "out" + dw 0x0061 + dd "db" + dw 0x0100 .operators_end: .registers: dd "r8" @@ -2460,7 +2519,7 @@ opcodes: db 0x00 dw 0x0000 - db 0x00 ; rel16/32 + db 0xE8 ; rel16/32 db 0x00 dd 0x00000002 ; 2: r/m op flag @@ -2491,6 +2550,91 @@ opcodes: ; 0000: dd 0x00000000 + + ; jmp + dw 0x005C + db 0xFF ; r/m + db 0x00 + + dw 0x0000 + db 0xE9 ; rel16/32 + db 0xEB ; rel8 + + dd 0x00000004 ; 4: r/m + ; 000: + ; 0: rel16/32 + ; 0: rel8 + ; 00: + + dd 0x00000000 + + ; je + dw 0x005D + dw 0x0000 + + dw 0x0000 + db 0x00 ; TODO figure out the 0x0F prefix this will need + db 0x74 ; rel8 + + dd 0x00000000 ; 00000: + ; 0: rel8 + ; 00: + + dd 0x00000000 + + ; jne + dw 0x005E + dw 0x0000 + + dw 0x0000 + db 0x00 ; TODO figure out the 0x0F prefix this will need + db 0x75 ; rel8 + + dd 0x00000000 ; 00000: + ; 0: rel8 + ; 00: + + dd 0x00000000 + + ; push + ; TODO add support for the +r variation + dw 0x005F + db 0xFF ; r/m + db 0x00 + + db 0x68 ; imm16/32 + db 0x6A ; imm8 + dw 0x0000 + + dd 0x00000006 ; 6: r/m + ; 0: + ; 0: imm16/32 + ; 0: imm8 + ; 0000: + + dd 0x00000000 + + ; pop + ; TODO add support for the +r variation + dw 0x0060 + db 0x8F ; r/m + db 0x00 + + dd 0x00000000 + + dd 0x00000000 ; 0: r/m + ; 0000000: + + dd 0x00000000 + + ; out + ; TODO enforce DX AL requirement, ignore ModR/M correctly + dw 0x0061 + db 0xEE + db 0x00 + dd 0x00000000 + dd 0x00000000 + dd 0x00000000 .by_id_end: msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00