add pseudo-operator support to an extremely minimal degree

This commit is contained in:
andromeda
2026-04-02 23:38:10 +02:00
parent 57f8f5a118
commit c1463e1fef

View File

@@ -51,8 +51,7 @@ start:
call clear_token_table
call clear_label_tables
mov rdi, program ; -> program
mov rsi, [program.size] ; = size of program
mov rdi, program
call tokenise
; rax = number of tokens in token table
mov rdi, rax
@@ -104,10 +103,13 @@ assemble:
; al = type
pop rdi ; di = tte
cmp al, 0x1 ; check if next tte is an operator
je .operator ; if so, handle
cmp al, 0x1 ; check if next tte is an operator
je .operator
cmp al, 0x4 ; check if next tte is a label
cmp al, 0x3 ; check if next tte is a pseudo-operator
je .pseudo_operator
cmp al, 0x4 ; check if next tte is a label
je .label
jmp .unexpected_token ; otherwise, fail
@@ -123,6 +125,39 @@ assemble:
and edi, 0x0FFF ; edi = index to add address hash to
call add_label_address
jmp .loop_next_token
.pseudo_operator:
push rsi
mov rsi, .msg_pseudo_operator
call print.debug
pop rsi
cmp di, 0x0100
je .pseudo_operator_db
jmp .unexpected_token
.pseudo_operator_db:
call .next_token
jge .break
call .get_next_tte
cmp di, 0x2000
jne .unexpected_token
call .next_token
jge .break
mov ecx, [.tokens_processed]
mov rax, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next 8 bytes from the tt
mov ecx, [.buffer_pointer]
mov [rcx], rax ; and add them to the buffer
add ecx, 8
mov [.buffer_pointer], ecx
call .next_token
jge .break
call .next_token
jge .break
call .next_token
jge .break
jmp .loop_next_token
.operator:
push rdi
; di = tte of operator
@@ -905,7 +940,7 @@ assemble:
mov rsi, .msg_break
call print.debug
pop rsi
ret
jmp halt
.unexpected_token:
call .flush_write_buffer
@@ -1037,7 +1072,7 @@ assemble:
.msg_operator_2_register_memory db "operator_2_register_memory", 0x0A, 0x00
.msg_operator_2_register_register db "operator_2_register_register", 0x0A, 0x00
.msg_operator_2_register_const db "operator_2_register_const", 0x0A, 0x00
.msg_potential_label db "potential_label", 0x0A, 0x00
.msg_pseudo_operator db "pseudo_operator", 0x0A, 0x00
; ------------------------------------------------------------------------------
; get_tte_type
@@ -1075,7 +1110,10 @@ get_tte_type:
mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id
cmp cx, di
cmp di, 0x0100
je .pseudo_operator
cmp di, cx
je .found
inc eax
@@ -1088,7 +1126,9 @@ get_tte_type:
mov eax, UNRECOGNISED_ID_TYPE
and eax, 0xF
ret
.pseudo_operator:
mov eax, 0x3
ret
.label:
mov eax, 0x4
ret
@@ -1293,15 +1333,13 @@ get_reg_bits:
; ------------------------------------------------------------------------------
; tokenise
; TODO write tests
;
; description:
; represents the program at the given address and puts it in the token table
; it's probably desirable to clear the token table before calling this function.
; creates a tokenised definition of the null-terminated program at rdi and puts
; it in memory at TOKEN_TABLE_ADDR
;
; parameters:
; rdi -> first byte of program
; rsi = size of program in bytes
;
; returned:
; rax = number of tokens processed
@@ -1309,16 +1347,15 @@ get_reg_bits:
tokenise:
; rdi -> current byte of program
add rsi, rdi ; rsi -> last byte of program
xor eax, eax ; rax = number of tokens processed
xor edx, edx ; dl = current byte of program
.loop:
cmp rdi, rsi ; if current byte greater than last byte
jge .break ; then break
mov dl, [rdi] ; dl = current byte
cmp dl, 0x00 ; if current byte is null
je .break ; then break
cmp dl, ";" ; if current byte is the start of a comment
je .comment ; then handle the comment
@@ -1328,7 +1365,6 @@ tokenise:
cmp dl, "," ; if current byte is a comma
je .comma ; then handle the comma
push rsi
push rdi
push rax
push rdx
@@ -1341,7 +1377,6 @@ tokenise:
pop rdx ; dl = current byte
pop rax ; rax = number of tokens processed
pop rdi ; rdi -> current byte of program
pop rsi ; rsi -> last byte of program
jnz .skip_byte_whitespace
test byte [.expecting], E_LABEL ; check if a label is expected
@@ -1351,12 +1386,10 @@ tokenise:
jmp .operand ; else, handle as an operand
.comment:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_comment
call print
pop rsi ; rsi -> last byte of program
test byte [.expecting], E_COMMENT ; make sure a comment is expected
jz .unexpected_comment ; if not, error
@@ -1367,8 +1400,6 @@ tokenise:
je .comment_break ; then break
inc rdi ; point to next unread byte
cmp rdi, rsi
jge .break
jmp .comment_loop
.comment_break:
jmp .loop
@@ -1380,12 +1411,10 @@ tokenise:
jmp .loop ; else, loop
.comma: ; found comma
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_comma
call print
pop rsi
test byte [.expecting], E_COMMA ; make sure a comma was expected
jz .unexpected_comma ; if not, error
@@ -1394,12 +1423,10 @@ tokenise:
jmp .loop ; and loop
.newline_mk_flags:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_newline
call print
pop rsi
test byte [.expecting], E_NEWLINE ; make sure a newline was expected
jz .unexpected_newline ; if not, error
@@ -1425,20 +1452,15 @@ tokenise:
cmp dl, ";"
je .label_not_found
inc eax ; inc byte counter
cmp rdi, rsi
jge .break
jmp .label_loop
.label_break:
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_label
call print
pop rsi ; rsi -> last byte of program
push rax
push rdi
push rsi
mov rsi, rdi ; rsi -> start of string
mov rdi, rax ; rdi = size of string
@@ -1451,7 +1473,6 @@ tokenise:
and cx, 0x0FFF
or cx, 0x3000
pop rsi ; rsi -> last byte of program
pop rdi ; rdi -> current byte of program
pop rax ; rax = number of bytes in label
@@ -1492,11 +1513,9 @@ tokenise:
mov [.pending_operator + eax], dl
inc eax ; inc byte counter
inc rdi ; inc byte pointer
cmp eax, 4 ; check that operator is short enough
jg .unexpected_operator ; if not, error
cmp rdi, rsi
jge .break
inc rdi ; inc byte pointer
jmp .operator_loop ; and loop
.operator_break:
push rdi
@@ -1507,8 +1526,10 @@ tokenise:
push rcx
mov ecx, eax ; cx = operator's token ID
mov edi, eax ; di = operator's token ID
push rcx
call get_tte_type
; al = token type
pop rcx ; cx = operator's token ID
mov sil, al
pop rax ; rax = tokens processed
pop rdi ; rdi = byte counter
@@ -1520,13 +1541,10 @@ tokenise:
jne .unexpected_operator
; debug message
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_operator
call print
pop rsi
mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax ; plus 1 token processed
@@ -1535,26 +1553,21 @@ tokenise:
jmp .loop
.pseudo_operator:
; debug message
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_pseudo_operator
call print
pop rsi
mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
jmp .loop
.operand:
; debug message
push rsi
mov rsi, .found
call print.debug
mov rsi, .msg_operand
call print
pop rsi
test byte [.expecting], E_OPERAND ; make sure an operand was expected
jz .unexpected_operand ; if not, error
@@ -1571,8 +1584,6 @@ tokenise:
je .operand_break
cmp dl, 0x00
je .operand_break
cmp dl, ";"
je .operand_break
inc rax ; inc length counter
inc rdi ; inc byte pointer
@@ -1580,7 +1591,6 @@ tokenise:
.operand_break:
pop rdi ; rdi -> first byte of operand
push rdi
push rsi
mov rsi, rax ; rsi = length of operand in bytes
mov cx, ax ; cx = length counter for safe keeping
@@ -1589,7 +1599,6 @@ tokenise:
; dl = return code
; rax = binary data
pop rcx
pop rsi
pop rdi ; rdi = first byte of operand
add di, cx ; rdi = last byte of operand
mov rcx, rax ; rcx = evaluate_operand's binary return data
@@ -1649,52 +1658,40 @@ tokenise:
.expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL
.unexpected_whitespace:
push rsi
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_whitespace
call print
pop rsi
jmp halt
.unexpected_comment:
push rsi
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_comment
call print
pop rsi
jmp halt
.unexpected_newline:
push rsi
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_newline
call print
pop rsi
jmp halt
.unexpected_comma:
push rsi
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_comma
call print
pop rsi
jmp halt
.unexpected_operand:
push rsi
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_operand
call print
pop rsi
jmp halt
.unexpected_operator:
push rsi
mov rsi, .err_unexpected
call print.error
mov rsi, .msg_operator
call print
pop rsi
jmp halt
.err_unexpected db "unexpected ", 0x00
.found db "found ", 0x00
@@ -1805,6 +1802,8 @@ evaluate_operand:
call evaluate_constant
; dl = type of constant
; rax = hex value of constant
; these are intentionally swapped; `djb2` call in .label takes this order
pop rdi ; rdi = size of label in bytes
pop rsi ; rsi -> first byte of label
@@ -1976,8 +1975,7 @@ evaluate_constant:
jmp .bin_loop
.chr:
cmp rcx, 4 ; ensure char is only 4 bytes long
jg .unrecognised
; TODO check for overlength string and do something; probably in `tokenise`
cmp rsi, 1 ; range check
je .chr_break
@@ -3165,6 +3163,7 @@ msg_halt db "halted.", 0x0A, 0x00
whitespace_2 db " ", 0x0D
; test program
align 128
program:
db "print:", 0x0A
db " push rdx", 0x0A
@@ -3235,7 +3234,7 @@ program:
db ' db "[WARN]: "', 0x0A
db " db 0x1B", 0x0A
db ' db "[0m"', 0x0A
db " db 0x00", 0x0A
.size dq $ - program
db " db 0x00", 0x0A, 0x00
program_end:
msg_end db "end of the binary ->|", 0x0A, 0x00