get token id of operators

This commit is contained in:
andromeda
2026-03-13 22:49:15 +01:00
parent 0ee8ff7914
commit 4a3350fe4e

View File

@@ -487,8 +487,9 @@ tokenise:
push rdi
push rax
push rdx
mov rsi, whitespace_2 ; rsi -> list of whitespace (ignored) bytes
mov rdi, 2 ; rdi = size of list in bytes
; TODO probably should not ignore null bytes
mov rsi, whitespace_3 ; rsi -> list of whitespace (ignored) bytes
mov rdi, 3 ; rdi = size of list in bytes
; dl = current byte
call elemb
; al = 0 if not whitespace, 1 if whitespace
@@ -568,47 +569,100 @@ tokenise:
jmp .loop
.operator:
; debug message
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_operator
call print
pop rsi
push rax
mov dword [.pending_operator], 0 ; zero pending operator
xor eax, eax ; eax = number of bytes in operator
.operator_loop:
; TODO give this its own error
; TODO make this pop rax
cmp eax, 4 ; check that operator is short enough
jg .unexpected_operator ; if not, error
mov dl, [rdi] ; next byte
; TODO have better check for operator end
cmp dl, " "
je .operator_break
cmp dl, 0x0A
je .operator_break
cmp dl, 0x00
je .operator_break
cmp dl, ";"
je .operator_break
inc rdi ; inc byte counter
mov [.pending_operator + eax], dl
inc rax ; inc byte counter
inc rdi ; inc byte pointer
jmp .operator_loop ; and loop
.operator_break:
pop rax
push rcx
mov rcx, tokens.operators ; rcx -> entry in lookup table
.operator_id_loop:
cmp rcx, tokens.operators_end ; check if index still in range
; TODO give own error
jg .unexpected_operator ; if not, error
; TODO use something other than r8 and r9
mov r8d, [rcx]
mov r9d, [.pending_operator]
cmp r8d, r9d
je .found_id
add rcx, 6 ; next entry
jmp .operator_id_loop
.found_id
push rdx
mov dx, [rcx + 4] ; dx = token id
mov [TOKEN_TABLE_ADDR + rax * TOKEN_TABLE_ENTRY_SIZE], dx ; write to token
inc rax ; table
pop rdx
pop rcx
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
jmp .loop
.operand:
; debug message
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_operand
call print
pop rsi
test byte [.expecting], E_OPERAND ; make sure an operand was expected
jz .unexpected_operand ; if not, error
.operand_loop:
mov dl, [rdi]
cmp dl, ","
je .operand_break
cmp dl, 0x0A
je .operand_break
cmp dl, 0x00
je .operand_break
inc rdi
jmp .operand_loop
.operand_break:
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_COMMA
jmp .loop
@@ -650,6 +704,12 @@ tokenise:
mov rsi, .msg_operand
call print
jmp halt
.unexpected_operator:
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_operator
call print
jmp halt
.err_unexpected db "unexpected ", 0x00
.found db "found ", 0x00
.msg_whitespace db "whitespace.", 0x0A, 0x00
@@ -658,6 +718,7 @@ tokenise:
.msg_comma db "comma.", 0x0A, 0x00
.msg_operator db "operator.", 0x0A, 0x00
.msg_operand db "operand.", 0x0A, 0x00
.pending_operator dd 0 ; the operator token that is pending processing
; ------------------------------------------------------------------------------
; utilities
@@ -825,8 +886,6 @@ clear_output_arena:
; data
; ------------------------------------------------------------------------------
tokens:
.by_id:
dw 0x0010 ; eax
@@ -1081,7 +1140,7 @@ msg_halt db "halted.", 0x0A, 0x00
token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00
whitespace_2 db " ", 0x0D
whitespace_3 db " ", 0x0D, 0x00
; test program
program: