parse brackets, improve docs

This commit is contained in:
andromeda
2026-03-08 12:35:14 +01:00
parent 172566dfe3
commit e10d771743
2 changed files with 90 additions and 6 deletions

View File

@@ -76,28 +76,60 @@ tokenise:
; deal with terminator character (reported as 0 length token)
cmp rdx, 0
je .token_length0
jne .continue
jne .continue0
.token_length0:
mov ax, 0xFE00 ; terminator character
mov al, [rdi] ; byte of terminator
mov edx, 1 ; byte length is 1
.continue:
.continue0:
add rdi, rdx ; current byte + length of token = next unread byte
mov [TOKEN_TABLE_ADDR + rcx * TOKEN_TABLE_ENTRY_SIZE], ax ; fill next entry
; in token table
; TODO fix undefined behaviour when open brackets and closed brackets aren't
; correctly paired or have too much distance between them
cmp ax, 0x0051 ; check if read token is an open bracket
je .open_bracket ; if so, handle it
jne .continue_open_bracket ; if not, continue
.open_bracket:
; TODO make brackets able to hold more
mov [.data_open_bracket], cl ; record which entry the open bracket is at
.continue_open_bracket:
cmp ax, 0x0052 ; check if read token is a closing bracket
je .close_bracket ; if so, handle it
jne .continue_close_bracket ; if not, continue
.close_bracket:
; rewrite open bracket token entry with a filled out one
mov dl, [.data_open_bracket]
sub cl, dl
mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl
mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10
add cl, dl
.continue_close_bracket:
inc rcx ; +1 token processed
jmp .loop
.break:
ret
.data_open_bracket db 0x00 ; represents the token # of the latest open bracket
; ------------------------------------------------------------------------------
; identify_token
;
; description:
; returns the id of a given token
; returns the id of a given token. If there are multiple ways to represent a
; given token, like the open-bracket, it returns the one that doesn't require
; information about the surrounding tokens, because it has no such information.
; In other words, if it isn't in the `tokens` data structure, this function
; doesn't see it. If the first byte of the token points to a terminator
; byte, this function returns it as an unrecognised token.
;
; parameters:
; rdi -> first byte of token
@@ -122,6 +154,7 @@ identify_token:
jmp .unrecognised ; else unrecognised
; length1
.start_length1:
mov rcx, tokens.length1 ; rcx -> list of known tokens
@@ -142,6 +175,7 @@ identify_token:
mov ax, [rcx + 1] ; return id of token
ret
; length2
.start_length2:
mov rcx, tokens.length2 ; rcx -> list of known tokens
@@ -162,6 +196,7 @@ identify_token:
mov ax, [rcx + 2] ; return id of token
ret
; length3
.start_length3:
mov rcx, tokens.length3 ; rcx -> list of known tokens
@@ -187,6 +222,7 @@ identify_token:
mov ax, [rcx + 3] ; return id of token
ret
; length4
.start_length4:
mov rcx, tokens.length4 ; rcx -> list of known tokens
@@ -215,7 +251,8 @@ identify_token:
; ------------------------------------------------------------------------------
; identify_next_token
; description:
; like identify_token, except it automatically finds the length
; like identify_token, except it automatically finds the length. If the first
; byte of the token points to a terminator byte, it returns a length of 0.
;
; parameters:
; rdi -> first byte of token
@@ -1062,9 +1099,11 @@ token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00
debug_string db "debug_string", 0x0A, 0x00
; test program
program:
db "xor eax, eax", 0x0A
db "inc rax", 0x0A
db "mov [ rax ], rdx", 0x0A
db "hlt", 0x0A
db 0x00
db 0x00 ; just for the sake of being able to print it, I made it a string
.size db $ - program - 1