diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index d607021..b958731 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -51,8 +51,7 @@ start: call clear_token_table call clear_label_tables - mov rdi, program ; -> program - mov rsi, [program.size] ; = size of program + mov rdi, program call tokenise ; rax = number of tokens in token table mov rdi, rax @@ -104,10 +103,13 @@ assemble: ; al = type pop rdi ; di = tte - cmp al, 0x1 ; check if next tte is an operator - je .operator ; if so, handle + cmp al, 0x1 ; check if next tte is an operator + je .operator - cmp al, 0x4 ; check if next tte is a label + cmp al, 0x3 ; check if next tte is a pseudo-operator + je .pseudo_operator + + cmp al, 0x4 ; check if next tte is a label je .label jmp .unexpected_token ; otherwise, fail @@ -123,6 +125,39 @@ assemble: and edi, 0x0FFF ; edi = index to add address hash to call add_label_address jmp .loop_next_token + .pseudo_operator: + push rsi + mov rsi, .msg_pseudo_operator + call print.debug + pop rsi + + cmp di, 0x0100 + je .pseudo_operator_db + jmp .unexpected_token + .pseudo_operator_db: + call .next_token + jge .break + call .get_next_tte + + cmp di, 0x2000 + jne .unexpected_token + + call .next_token + jge .break + + mov ecx, [.tokens_processed] + mov rax, [TOKEN_TABLE_ADDR + 2 * rcx] ; get the next 8 bytes from the tt + mov ecx, [.buffer_pointer] + mov [rcx], rax ; and add them to the buffer + add ecx, 8 + mov [.buffer_pointer], ecx + call .next_token + jge .break + call .next_token + jge .break + call .next_token + jge .break + jmp .loop_next_token .operator: push rdi ; di = tte of operator @@ -905,7 +940,7 @@ assemble: mov rsi, .msg_break call print.debug pop rsi - ret + jmp halt .unexpected_token: call .flush_write_buffer @@ -1037,7 +1072,7 @@ assemble: .msg_operator_2_register_memory db "operator_2_register_memory", 0x0A, 0x00 .msg_operator_2_register_register db "operator_2_register_register", 0x0A, 0x00 .msg_operator_2_register_const db "operator_2_register_const", 0x0A, 0x00 - .msg_potential_label db "potential_label", 0x0A, 0x00 + .msg_pseudo_operator db "pseudo_operator", 0x0A, 0x00 ; ------------------------------------------------------------------------------ ; get_tte_type @@ -1075,7 +1110,10 @@ get_tte_type: mov cx, [tokens.by_id + eax * 4] ; next entry in tokens.by_id - cmp cx, di + cmp di, 0x0100 + je .pseudo_operator + + cmp di, cx je .found inc eax @@ -1088,7 +1126,9 @@ get_tte_type: mov eax, UNRECOGNISED_ID_TYPE and eax, 0xF ret - + .pseudo_operator: + mov eax, 0x3 + ret .label: mov eax, 0x4 ret @@ -1293,15 +1333,13 @@ get_reg_bits: ; ------------------------------------------------------------------------------ ; tokenise -; TODO write tests ; ; description: -; represents the program at the given address and puts it in the token table -; it's probably desirable to clear the token table before calling this function. +; creates a tokenised definition of the null-terminated program at rdi and puts +; it in memory at TOKEN_TABLE_ADDR ; ; parameters: ; rdi -> first byte of program -; rsi = size of program in bytes ; ; returned: ; rax = number of tokens processed @@ -1309,16 +1347,15 @@ get_reg_bits: tokenise: ; rdi -> current byte of program - add rsi, rdi ; rsi -> last byte of program xor eax, eax ; rax = number of tokens processed xor edx, edx ; dl = current byte of program .loop: - cmp rdi, rsi ; if current byte greater than last byte - jge .break ; then break - mov dl, [rdi] ; dl = current byte + cmp dl, 0x00 ; if current byte is null + je .break ; then break + cmp dl, ";" ; if current byte is the start of a comment je .comment ; then handle the comment @@ -1328,7 +1365,6 @@ tokenise: cmp dl, "," ; if current byte is a comma je .comma ; then handle the comma - push rsi push rdi push rax push rdx @@ -1341,7 +1377,6 @@ tokenise: pop rdx ; dl = current byte pop rax ; rax = number of tokens processed pop rdi ; rdi -> current byte of program - pop rsi ; rsi -> last byte of program jnz .skip_byte_whitespace test byte [.expecting], E_LABEL ; check if a label is expected @@ -1351,12 +1386,10 @@ tokenise: jmp .operand ; else, handle as an operand .comment: - push rsi mov rsi, .found call print.debug mov rsi, .msg_comment call print - pop rsi ; rsi -> last byte of program test byte [.expecting], E_COMMENT ; make sure a comment is expected jz .unexpected_comment ; if not, error @@ -1367,8 +1400,6 @@ tokenise: je .comment_break ; then break inc rdi ; point to next unread byte - cmp rdi, rsi - jge .break jmp .comment_loop .comment_break: jmp .loop @@ -1380,12 +1411,10 @@ tokenise: jmp .loop ; else, loop .comma: ; found comma - push rsi mov rsi, .found call print.debug mov rsi, .msg_comma call print - pop rsi test byte [.expecting], E_COMMA ; make sure a comma was expected jz .unexpected_comma ; if not, error @@ -1394,12 +1423,10 @@ tokenise: jmp .loop ; and loop .newline_mk_flags: - push rsi mov rsi, .found call print.debug mov rsi, .msg_newline call print - pop rsi test byte [.expecting], E_NEWLINE ; make sure a newline was expected jz .unexpected_newline ; if not, error @@ -1425,20 +1452,15 @@ tokenise: cmp dl, ";" je .label_not_found inc eax ; inc byte counter - cmp rdi, rsi - jge .break jmp .label_loop .label_break: - push rsi mov rsi, .found call print.debug mov rsi, .msg_label call print - pop rsi ; rsi -> last byte of program push rax push rdi - push rsi mov rsi, rdi ; rsi -> start of string mov rdi, rax ; rdi = size of string @@ -1451,7 +1473,6 @@ tokenise: and cx, 0x0FFF or cx, 0x3000 - pop rsi ; rsi -> last byte of program pop rdi ; rdi -> current byte of program pop rax ; rax = number of bytes in label @@ -1492,11 +1513,9 @@ tokenise: mov [.pending_operator + eax], dl inc eax ; inc byte counter - inc rdi ; inc byte pointer cmp eax, 4 ; check that operator is short enough jg .unexpected_operator ; if not, error - cmp rdi, rsi - jge .break + inc rdi ; inc byte pointer jmp .operator_loop ; and loop .operator_break: push rdi @@ -1507,8 +1526,10 @@ tokenise: push rcx mov ecx, eax ; cx = operator's token ID mov edi, eax ; di = operator's token ID + push rcx call get_tte_type ; al = token type + pop rcx ; cx = operator's token ID mov sil, al pop rax ; rax = tokens processed pop rdi ; rdi = byte counter @@ -1520,13 +1541,10 @@ tokenise: jne .unexpected_operator ; debug message - push rsi mov rsi, .found call print.debug mov rsi, .msg_operator call print - pop rsi - mov [TOKEN_TABLE_ADDR + rax * 2], cx inc rax ; plus 1 token processed @@ -1535,26 +1553,21 @@ tokenise: jmp .loop .pseudo_operator: - ; debug message - push rsi mov rsi, .found call print.debug mov rsi, .msg_pseudo_operator call print - pop rsi mov [TOKEN_TABLE_ADDR + rax * 2], cx + inc rax mov byte [.expecting], E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERAND jmp .loop .operand: - ; debug message - push rsi mov rsi, .found call print.debug mov rsi, .msg_operand call print - pop rsi test byte [.expecting], E_OPERAND ; make sure an operand was expected jz .unexpected_operand ; if not, error @@ -1571,8 +1584,6 @@ tokenise: je .operand_break cmp dl, 0x00 je .operand_break - cmp dl, ";" - je .operand_break inc rax ; inc length counter inc rdi ; inc byte pointer @@ -1580,7 +1591,6 @@ tokenise: .operand_break: pop rdi ; rdi -> first byte of operand push rdi - push rsi mov rsi, rax ; rsi = length of operand in bytes mov cx, ax ; cx = length counter for safe keeping @@ -1589,7 +1599,6 @@ tokenise: ; dl = return code ; rax = binary data pop rcx - pop rsi pop rdi ; rdi = first byte of operand add di, cx ; rdi = last byte of operand mov rcx, rax ; rcx = evaluate_operand's binary return data @@ -1649,52 +1658,40 @@ tokenise: .expecting db E_COMMENT | E_NEWLINE | E_WHITESPACE | E_OPERATOR | E_LABEL .unexpected_whitespace: - push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_whitespace call print - pop rsi jmp halt .unexpected_comment: - push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_comment call print - pop rsi jmp halt .unexpected_newline: - push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_newline call print - pop rsi jmp halt .unexpected_comma: - push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_comma call print - pop rsi jmp halt .unexpected_operand: - push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_operand call print - pop rsi jmp halt .unexpected_operator: - push rsi mov rsi, .err_unexpected call print.error mov rsi, .msg_operator call print - pop rsi jmp halt .err_unexpected db "unexpected ", 0x00 .found db "found ", 0x00 @@ -1805,6 +1802,8 @@ evaluate_operand: call evaluate_constant ; dl = type of constant ; rax = hex value of constant + + ; these are intentionally swapped; `djb2` call in .label takes this order pop rdi ; rdi = size of label in bytes pop rsi ; rsi -> first byte of label @@ -1976,8 +1975,7 @@ evaluate_constant: jmp .bin_loop .chr: - cmp rcx, 4 ; ensure char is only 4 bytes long - jg .unrecognised + ; TODO check for overlength string and do something; probably in `tokenise` cmp rsi, 1 ; range check je .chr_break @@ -3165,6 +3163,7 @@ msg_halt db "halted.", 0x0A, 0x00 whitespace_2 db " ", 0x0D ; test program +align 128 program: db "print:", 0x0A db " push rdx", 0x0A @@ -3235,7 +3234,7 @@ program: db ' db "[WARN]: "', 0x0A db " db 0x1B", 0x0A db ' db "[0m"', 0x0A - db " db 0x00", 0x0A - .size dq $ - program + db " db 0x00", 0x0A, 0x00 +program_end: msg_end db "end of the binary ->|", 0x0A, 0x00