From 4a3350fe4ec27bd31acbae3454449dbbf9d34584 Mon Sep 17 00:00:00 2001 From: andromeda Date: Fri, 13 Mar 2026 22:49:15 +0100 Subject: [PATCH] get token id of operators --- twasm/asm/main.asm | 71 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index 22a1084..8fc7b5b 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -487,8 +487,9 @@ tokenise: push rdi push rax push rdx - mov rsi, whitespace_2 ; rsi -> list of whitespace (ignored) bytes - mov rdi, 2 ; rdi = size of list in bytes + ; TODO probably should not ignore null bytes + mov rsi, whitespace_3 ; rsi -> list of whitespace (ignored) bytes + mov rdi, 3 ; rdi = size of list in bytes ; dl = current byte call elemb ; al = 0 if not whitespace, 1 if whitespace @@ -568,47 +569,100 @@ tokenise: jmp .loop .operator: + ; debug message push rsi mov rsi, .found call print.debug mov rsi, .msg_operator call print pop rsi + + push rax + mov dword [.pending_operator], 0 ; zero pending operator + xor eax, eax ; eax = number of bytes in operator + .operator_loop: + ; TODO give this its own error + ; TODO make this pop rax + cmp eax, 4 ; check that operator is short enough + jg .unexpected_operator ; if not, error + mov dl, [rdi] ; next byte + ; TODO have better check for operator end cmp dl, " " je .operator_break cmp dl, 0x0A je .operator_break + cmp dl, 0x00 + je .operator_break cmp dl, ";" je .operator_break - inc rdi ; inc byte counter + mov [.pending_operator + eax], dl + + inc rax ; inc byte counter + inc rdi ; inc byte pointer jmp .operator_loop ; and loop + .operator_break: + pop rax + + push rcx + mov rcx, tokens.operators ; rcx -> entry in lookup table + + .operator_id_loop: + cmp rcx, tokens.operators_end ; check if index still in range + ; TODO give own error + jg .unexpected_operator ; if not, error + + ; TODO use something other than r8 and r9 + mov r8d, [rcx] + mov r9d, [.pending_operator] + cmp r8d, r9d + je .found_id + + add rcx, 6 ; next entry + + jmp .operator_id_loop + + .found_id + push rdx + mov dx, [rcx + 4] ; dx = token id + + mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], dx ; write to token + inc rax ; table + + pop rdx + pop rcx + mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND jmp .loop .operand: + ; debug message push rsi mov rsi, .found call print.debug mov rsi, .msg_operand call print pop rsi + test byte [.expecting], E_OPERAND ; make sure an operand was expected jz .unexpected_operand ; if not, error .operand_loop: mov dl, [rdi] + cmp dl, "," je .operand_break cmp dl, 0x0A je .operand_break cmp dl, 0x00 je .operand_break + inc rdi jmp .operand_loop + .operand_break: mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA jmp .loop @@ -650,6 +704,12 @@ tokenise: mov rsi, .msg_operand call print jmp halt + .unexpected_operator: + mov rsi, .err_unexpected + call print.error + mov rsi, .msg_operator + call print + jmp halt .err_unexpected db "unexpected ", 0x00 .found db "found ", 0x00 .msg_whitespace db "whitespace.", 0x0A, 0x00 @@ -658,6 +718,7 @@ tokenise: .msg_comma db "comma.", 0x0A, 0x00 .msg_operator db "operator.", 0x0A, 0x00 .msg_operand db "operand.", 0x0A, 0x00 + .pending_operator dd 0 ; the operator token that is pending processing ; ------------------------------------------------------------------------------ ; utilities @@ -825,8 +886,6 @@ clear_output_arena: ; data ; ------------------------------------------------------------------------------ - - tokens: .by_id: dw 0x0010 ; eax @@ -1081,7 +1140,7 @@ msg_halt db "halted.", 0x0A, 0x00 token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00 -whitespace_2 db " ", 0x0D +whitespace_3 db " ", 0x0D, 0x00 ; test program program: