add pseudo-operator support to an extremely minimal degree

This commit is contained in:
andromeda
2026-04-02 23:38:10 +02:00
parent 57f8f5a118
commit c1463e1fef

View File

@@ -51,8 +51,7 @@ start:
call clear_token_table call clear_token_table
call clear_label_tables call clear_label_tables
mov rdi, program ; -> program mov rdi, program
mov rsi, [program.size] ; = size of program
call tokenise call tokenise
; rax = number of tokens in token table ; rax = number of tokens in token table
mov rdi, rax mov rdi, rax
@@ -105,7 +104,10 @@ assemble:
pop rdi ; di = tte pop rdi ; di = tte
cmp al, 0x1 ; check if next tte is an operator cmp al, 0x1 ; check if next tte is an operator
je .operator ; if so, handle je .operator
cmp al, 0x3 ; check if next tte is a pseudo-operator
je .pseudo_operator
cmp al, 0x4 ; check if next tte is a label cmp al, 0x4 ; check if next tte is a label
je .label je .label
@@ -123,6 +125,39 @@ assemble:
and edi, 0x0FFF ; edi = index to add address hash to and edi, 0x0FFF ; edi = index to add address hash to
call add_label_address call add_label_address
jmp .loop_next_token jmp .loop_next_token
.pseudo_operator:
push rsi
mov rsi, .msg_pseudo_operator
call print.debug
pop rsi
cmp di, 0x0100
je .pseudo_operator_db
jmp .unexpected_token
.pseudo_operator_db:
call .next_token
jge .break
call .get_next_tte
cmp di, 0x2000
jne .unexpected_token
call .next_token
jge .break
mov ecx, [.tokens_processed]
mov rax, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next 8 bytes from the tt
mov ecx, [.buffer_pointer]
mov [rcx], rax ; and add them to the buffer
add ecx, 8
mov [.buffer_pointer], ecx
call .next_token
jge .break
call .next_token
jge .break
call .next_token
jge .break
jmp .loop_next_token
.operator: .operator:
push rdi push rdi
; di = tte of operator ; di = tte of operator
@@ -905,7 +940,7 @@ assemble:
mov rsi, .msg_break mov rsi, .msg_break
call print.debug call print.debug
pop rsi pop rsi
ret jmp halt
.unexpected_token: .unexpected_token:
call .flush_write_buffer call .flush_write_buffer
@@ -1037,7 +1072,7 @@ assemble:
.msg_operator_2_register_memory db "operator_2_register_memory", 0x0A, 0x00 .msg_operator_2_register_memory db "operator_2_register_memory", 0x0A, 0x00
.msg_operator_2_register_register db "operator_2_register_register", 0x0A, 0x00 .msg_operator_2_register_register db "operator_2_register_register", 0x0A, 0x00
.msg_operator_2_register_const db "operator_2_register_const", 0x0A, 0x00 .msg_operator_2_register_const db "operator_2_register_const", 0x0A, 0x00
.msg_potential_label db "potential_label", 0x0A, 0x00 .msg_pseudo_operator db "pseudo_operator", 0x0A, 0x00
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; get_tte_type ; get_tte_type
@@ -1075,7 +1110,10 @@ get_tte_type:
mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id
cmp cx, di cmp di, 0x0100
je .pseudo_operator
cmp di, cx
je .found je .found
inc eax inc eax
@@ -1088,7 +1126,9 @@ get_tte_type:
mov eax, UNRECOGNISED_ID_TYPE mov eax, UNRECOGNISED_ID_TYPE
and eax, 0xF and eax, 0xF
ret ret
.pseudo_operator:
mov eax, 0x3
ret
.label: .label:
mov eax, 0x4 mov eax, 0x4
ret ret
@@ -1293,15 +1333,13 @@ get_reg_bits:
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; tokenise ; tokenise
; TODO write tests
; ;
; description: ; description:
; represents the program at the given address and puts it in the token table ; creates a tokenised definition of the null-terminated program at rdi and puts
; it's probably desirable to clear the token table before calling this function. ; it in memory at TOKEN_TABLE_ADDR
; ;
; parameters: ; parameters:
; rdi -> first byte of program ; rdi -> first byte of program
; rsi = size of program in bytes
; ;
; returned: ; returned:
; rax = number of tokens processed ; rax = number of tokens processed
@@ -1309,16 +1347,15 @@ get_reg_bits:
tokenise: tokenise:
; rdi -> current byte of program ; rdi -> current byte of program
add rsi, rdi ; rsi -> last byte of program
xor eax, eax ; rax = number of tokens processed xor eax, eax ; rax = number of tokens processed
xor edx, edx ; dl = current byte of program xor edx, edx ; dl = current byte of program
.loop: .loop:
cmp rdi, rsi ; if current byte greater than last byte
jge .break ; then break
mov dl, [rdi] ; dl = current byte mov dl, [rdi] ; dl = current byte
cmp dl, 0x00 ; if current byte is null
je .break ; then break
cmp dl, ";" ; if current byte is the start of a comment cmp dl, ";" ; if current byte is the start of a comment
je .comment ; then handle the comment je .comment ; then handle the comment
@@ -1328,7 +1365,6 @@ tokenise:
cmp dl, "," ; if current byte is a comma cmp dl, "," ; if current byte is a comma
je .comma ; then handle the comma je .comma ; then handle the comma
push rsi
push rdi push rdi
push rax push rax
push rdx push rdx
@@ -1341,7 +1377,6 @@ tokenise:
pop rdx ; dl = current byte pop rdx ; dl = current byte
pop rax ; rax = number of tokens processed pop rax ; rax = number of tokens processed
pop rdi ; rdi -> current byte of program pop rdi ; rdi -> current byte of program
pop rsi ; rsi -> last byte of program
jnz .skip_byte_whitespace jnz .skip_byte_whitespace
test byte [.expecting], E_LABEL ; check if a label is expected test byte [.expecting], E_LABEL ; check if a label is expected
@@ -1351,12 +1386,10 @@ tokenise:
jmp .operand ; else, handle as an operand jmp .operand ; else, handle as an operand
.comment: .comment:
push rsi
mov rsi, .found mov rsi, .found
call print.debug call print.debug
mov rsi, .msg_comment mov rsi, .msg_comment
call print call print
pop rsi ; rsi -> last byte of program
test byte [.expecting], E_COMMENT ; make sure a comment is expected test byte [.expecting], E_COMMENT ; make sure a comment is expected
jz .unexpected_comment ; if not, error jz .unexpected_comment ; if not, error
@@ -1367,8 +1400,6 @@ tokenise:
je .comment_break ; then break je .comment_break ; then break
inc rdi ; point to next unread byte inc rdi ; point to next unread byte
cmp rdi, rsi
jge .break
jmp .comment_loop jmp .comment_loop
.comment_break: .comment_break:
jmp .loop jmp .loop
@@ -1380,12 +1411,10 @@ tokenise:
jmp .loop ; else, loop jmp .loop ; else, loop
.comma: ; found comma .comma: ; found comma
push rsi
mov rsi, .found mov rsi, .found
call print.debug call print.debug
mov rsi, .msg_comma mov rsi, .msg_comma
call print call print
pop rsi
test byte [.expecting], E_COMMA ; make sure a comma was expected test byte [.expecting], E_COMMA ; make sure a comma was expected
jz .unexpected_comma ; if not, error jz .unexpected_comma ; if not, error
@@ -1394,12 +1423,10 @@ tokenise:
jmp .loop ; and loop jmp .loop ; and loop
.newline_mk_flags: .newline_mk_flags:
push rsi
mov rsi, .found mov rsi, .found
call print.debug call print.debug
mov rsi, .msg_newline mov rsi, .msg_newline
call print call print
pop rsi
test byte [.expecting], E_NEWLINE ; make sure a newline was expected test byte [.expecting], E_NEWLINE ; make sure a newline was expected
jz .unexpected_newline ; if not, error jz .unexpected_newline ; if not, error
@@ -1425,20 +1452,15 @@ tokenise:
cmp dl, ";" cmp dl, ";"
je .label_not_found je .label_not_found
inc eax ; inc byte counter inc eax ; inc byte counter
cmp rdi, rsi
jge .break
jmp .label_loop jmp .label_loop
.label_break: .label_break:
push rsi
mov rsi, .found mov rsi, .found
call print.debug call print.debug
mov rsi, .msg_label mov rsi, .msg_label
call print call print
pop rsi ; rsi -> last byte of program
push rax push rax
push rdi push rdi
push rsi
mov rsi, rdi ; rsi -> start of string mov rsi, rdi ; rsi -> start of string
mov rdi, rax ; rdi = size of string mov rdi, rax ; rdi = size of string
@@ -1451,7 +1473,6 @@ tokenise:
and cx, 0x0FFF and cx, 0x0FFF
or cx, 0x3000 or cx, 0x3000
pop rsi ; rsi -> last byte of program
pop rdi ; rdi -> current byte of program pop rdi ; rdi -> current byte of program
pop rax ; rax = number of bytes in label pop rax ; rax = number of bytes in label
@@ -1492,11 +1513,9 @@ tokenise:
mov [.pending_operator + eax], dl mov [.pending_operator + eax], dl
inc eax ; inc byte counter inc eax ; inc byte counter
inc rdi ; inc byte pointer
cmp eax, 4 ; check that operator is short enough cmp eax, 4 ; check that operator is short enough
jg .unexpected_operator ; if not, error jg .unexpected_operator ; if not, error
cmp rdi, rsi inc rdi ; inc byte pointer
jge .break
jmp .operator_loop ; and loop jmp .operator_loop ; and loop
.operator_break: .operator_break:
push rdi push rdi
@@ -1507,8 +1526,10 @@ tokenise:
push rcx push rcx
mov ecx, eax ; cx = operator's token ID mov ecx, eax ; cx = operator's token ID
mov edi, eax ; di = operator's token ID mov edi, eax ; di = operator's token ID
push rcx
call get_tte_type call get_tte_type
; al = token type ; al = token type
pop rcx ; cx = operator's token ID
mov sil, al mov sil, al
pop rax ; rax = tokens processed pop rax ; rax = tokens processed
pop rdi ; rdi = byte counter pop rdi ; rdi = byte counter
@@ -1520,13 +1541,10 @@ tokenise:
jne .unexpected_operator jne .unexpected_operator
; debug message ; debug message
push rsi
mov rsi, .found mov rsi, .found
call print.debug call print.debug
mov rsi, .msg_operator mov rsi, .msg_operator
call print call print
pop rsi
mov [TOKEN_TABLE_ADDR + rax * 2], cx mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax ; plus 1 token processed inc rax ; plus 1 token processed
@@ -1535,26 +1553,21 @@ tokenise:
jmp .loop jmp .loop
.pseudo_operator: .pseudo_operator:
; debug message
push rsi
mov rsi, .found mov rsi, .found
call print.debug call print.debug
mov rsi, .msg_pseudo_operator mov rsi, .msg_pseudo_operator
call print call print
pop rsi
mov [TOKEN_TABLE_ADDR + rax * 2], cx mov [TOKEN_TABLE_ADDR + rax * 2], cx
inc rax
mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND
jmp .loop jmp .loop
.operand: .operand:
; debug message
push rsi
mov rsi, .found mov rsi, .found
call print.debug call print.debug
mov rsi, .msg_operand mov rsi, .msg_operand
call print call print
pop rsi
test byte [.expecting], E_OPERAND ; make sure an operand was expected test byte [.expecting], E_OPERAND ; make sure an operand was expected
jz .unexpected_operand ; if not, error jz .unexpected_operand ; if not, error
@@ -1571,8 +1584,6 @@ tokenise:
je .operand_break je .operand_break
cmp dl, 0x00 cmp dl, 0x00
je .operand_break je .operand_break
cmp dl, ";"
je .operand_break
inc rax ; inc length counter inc rax ; inc length counter
inc rdi ; inc byte pointer inc rdi ; inc byte pointer
@@ -1580,7 +1591,6 @@ tokenise:
.operand_break: .operand_break:
pop rdi ; rdi -> first byte of operand pop rdi ; rdi -> first byte of operand
push rdi push rdi
push rsi
mov rsi, rax ; rsi = length of operand in bytes mov rsi, rax ; rsi = length of operand in bytes
mov cx, ax ; cx = length counter for safe keeping mov cx, ax ; cx = length counter for safe keeping
@@ -1589,7 +1599,6 @@ tokenise:
; dl = return code ; dl = return code
; rax = binary data ; rax = binary data
pop rcx pop rcx
pop rsi
pop rdi ; rdi = first byte of operand pop rdi ; rdi = first byte of operand
add di, cx ; rdi = last byte of operand add di, cx ; rdi = last byte of operand
mov rcx, rax ; rcx = evaluate_operand's binary return data mov rcx, rax ; rcx = evaluate_operand's binary return data
@@ -1649,52 +1658,40 @@ tokenise:
.expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL .expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL
.unexpected_whitespace: .unexpected_whitespace:
push rsi
mov rsi, .err_unexpected mov rsi, .err_unexpected
call print.error call print.error
mov rsi, .msg_whitespace mov rsi, .msg_whitespace
call print call print
pop rsi
jmp halt jmp halt
.unexpected_comment: .unexpected_comment:
push rsi
mov rsi, .err_unexpected mov rsi, .err_unexpected
call print.error call print.error
mov rsi, .msg_comment mov rsi, .msg_comment
call print call print
pop rsi
jmp halt jmp halt
.unexpected_newline: .unexpected_newline:
push rsi
mov rsi, .err_unexpected mov rsi, .err_unexpected
call print.error call print.error
mov rsi, .msg_newline mov rsi, .msg_newline
call print call print
pop rsi
jmp halt jmp halt
.unexpected_comma: .unexpected_comma:
push rsi
mov rsi, .err_unexpected mov rsi, .err_unexpected
call print.error call print.error
mov rsi, .msg_comma mov rsi, .msg_comma
call print call print
pop rsi
jmp halt jmp halt
.unexpected_operand: .unexpected_operand:
push rsi
mov rsi, .err_unexpected mov rsi, .err_unexpected
call print.error call print.error
mov rsi, .msg_operand mov rsi, .msg_operand
call print call print
pop rsi
jmp halt jmp halt
.unexpected_operator: .unexpected_operator:
push rsi
mov rsi, .err_unexpected mov rsi, .err_unexpected
call print.error call print.error
mov rsi, .msg_operator mov rsi, .msg_operator
call print call print
pop rsi
jmp halt jmp halt
.err_unexpected db "unexpected ", 0x00 .err_unexpected db "unexpected ", 0x00
.found db "found ", 0x00 .found db "found ", 0x00
@@ -1805,6 +1802,8 @@ evaluate_operand:
call evaluate_constant call evaluate_constant
; dl = type of constant ; dl = type of constant
; rax = hex value of constant ; rax = hex value of constant
; these are intentionally swapped; `djb2` call in .label takes this order
pop rdi ; rdi = size of label in bytes pop rdi ; rdi = size of label in bytes
pop rsi ; rsi -> first byte of label pop rsi ; rsi -> first byte of label
@@ -1976,8 +1975,7 @@ evaluate_constant:
jmp .bin_loop jmp .bin_loop
.chr: .chr:
cmp rcx, 4 ; ensure char is only 4 bytes long ; TODO check for overlength string and do something; probably in `tokenise`
jg .unrecognised
cmp rsi, 1 ; range check cmp rsi, 1 ; range check
je .chr_break je .chr_break
@@ -3165,6 +3163,7 @@ msg_halt db "halted.", 0x0A, 0x00
whitespace_2 db " ", 0x0D whitespace_2 db " ", 0x0D
; test program ; test program
align 128
program: program:
db "print:", 0x0A db "print:", 0x0A
db " push rdx", 0x0A db " push rdx", 0x0A
@@ -3235,7 +3234,7 @@ program:
db ' db "[WARN]: "', 0x0A db ' db "[WARN]: "', 0x0A
db " db 0x1B", 0x0A db " db 0x1B", 0x0A
db ' db "[0m"', 0x0A db ' db "[0m"', 0x0A
db " db 0x00", 0x0A db " db 0x00", 0x0A, 0x00
.size dq $ - program program_end:
msg_end db "end of the binary ->|", 0x0A, 0x00 msg_end db "end of the binary ->|", 0x0A, 0x00