little optimisation, add single-token parsing function
This commit is contained in:
@@ -16,11 +16,13 @@ I want to compile Bootler and Twasm with the Twasm assembler
|
||||
+------ 0x00070000 ------+
|
||||
| token table |
|
||||
+------ 0x00060000 ------+
|
||||
| test arena |
|
||||
+------ 0x00050000 ------+
|
||||
| stack (rsp) |
|
||||
+------------------------+
|
||||
| input |
|
||||
+------------------------+ this is lined up to a sector
|
||||
| | and this is less than a sector
|
||||
+------------------------+ <- this is lined up to a sector
|
||||
| | <- and this is less than a sector
|
||||
+------------------------+
|
||||
| assembler |
|
||||
+------ 0x00010000 ------+
|
||||
|
||||
@@ -3,7 +3,7 @@ LOAD_ADDR equ 0x00010000 ; address this program is loaded at
|
||||
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
|
||||
TOKEN_TABLE_SIZE equ 0x1000 ; max length of table
|
||||
|
||||
TEST_ARENA_ADDR equ 0x00060000 ; address to run tests at
|
||||
TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at
|
||||
TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use
|
||||
|
||||
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
|
||||
@@ -11,6 +11,8 @@ OUTPUT_SIZE equ 0x1000 ; max length of outputed binary
|
||||
|
||||
STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at
|
||||
|
||||
UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token
|
||||
|
||||
[bits 64]
|
||||
[org LOAD_ADDR]
|
||||
|
||||
@@ -30,6 +32,102 @@ start:
|
||||
; tokenising
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; identify_token
|
||||
;
|
||||
; description:
|
||||
; returns the id of a given token
|
||||
;
|
||||
; parameters:
|
||||
; rdi -> first byte of token
|
||||
; rsi = size of token in bytes
|
||||
;
|
||||
; returned:
|
||||
; ax = id of token; the rest of rax is zeroed
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
identify_token:
|
||||
cmp rsi, 2 ; if the token has length 2
|
||||
je .start_length2 ; then enter the length 2 loop
|
||||
|
||||
cmp rsi, 3 ; if the token has length 3
|
||||
je .start_length3 ; then enter the length 3 loop
|
||||
|
||||
cmp rsi, 4 ; if the token has length 4
|
||||
je .start_length4 ; then enter the length 4 loop
|
||||
|
||||
jmp .unrecognised ; else unrecognised
|
||||
|
||||
.start_length2
|
||||
mov rcx, tokens.length2 ; rcx -> list of known tokens
|
||||
|
||||
.loop_length2:
|
||||
cmp rcx, tokens.length3 ; check if rcx still in the bounds of length2 tokens
|
||||
jge .unrecognised ; if not, unrecognised
|
||||
|
||||
mov r10w, [rcx] ; current entry in known tokens
|
||||
mov r11w, [rdi] ; token
|
||||
cmp r10w, r11w ; if current entry matches token,
|
||||
je .found_length2 ; exit loop
|
||||
|
||||
add rcx, 4 ; length of token + length of id
|
||||
jmp .loop_length2
|
||||
|
||||
.found_length2:
|
||||
xor eax, eax ; make sure rest of rax is zeroed
|
||||
mov ax, [rcx + 2] ; return id of token
|
||||
ret
|
||||
|
||||
.start_length3:
|
||||
mov rcx, tokens.length3 ; rcx -> list of known tokens
|
||||
|
||||
.loop_length3:
|
||||
cmp rcx, tokens.length4 ; check if rcx still in bounds of length3 tokens
|
||||
jge .unrecognised ; if not, unrecognised
|
||||
|
||||
; TODO make this safe (it overreaches 1 byte)
|
||||
mov r10d, [rcx] ; known token + next byte
|
||||
mov r11d, [rdi] ; token + next byte
|
||||
|
||||
and r10d, 0x00FFFFFF ; mask for just the token
|
||||
and r11d, 0x00FFFFFF
|
||||
|
||||
cmp r10d, r11d ; if known token matches token,
|
||||
je .found_length3 ; exit loop
|
||||
|
||||
add rcx, 5 ; length of token + length of id
|
||||
jmp .loop_length3
|
||||
|
||||
.found_length3:
|
||||
xor rax, rax ; zero rax
|
||||
mov ax, [rcx + 3] ; return id of token
|
||||
ret
|
||||
|
||||
.start_length4:
|
||||
mov rcx, tokens.length4 ; rcx -> list of known tokens
|
||||
|
||||
.loop_length4:
|
||||
cmp rcx, tokens.length5 ; check if rcx still in bounds of length3 tokens
|
||||
jge .unrecognised ; if not, unrecognised
|
||||
|
||||
mov r10d, [rcx] ; known token
|
||||
mov r11d, [rdi] ; token
|
||||
cmp r10d, r11d ; if known token matches token,
|
||||
je .found_length4 ; exit loop
|
||||
|
||||
add rcx, 6 ; length of token + length of id
|
||||
jmp .loop_length4
|
||||
|
||||
.found_length4:
|
||||
xor rax, rax ; zero rax
|
||||
mov ax, [rcx + 4] ; return id of token
|
||||
ret
|
||||
|
||||
.unrecognised:
|
||||
xor eax, eax
|
||||
mov ax, UNRECOGNISED_TOKEN_ID
|
||||
ret
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; copy_token
|
||||
;
|
||||
@@ -92,7 +190,7 @@ copy_token:
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
copy_byte:
|
||||
xor rax, rax ; zero out so it returns fine
|
||||
xor eax, eax ; zero out so it returns fine
|
||||
mov al, [rdi]
|
||||
mov [rsi], al
|
||||
ret
|
||||
@@ -113,7 +211,7 @@ copy_byte:
|
||||
|
||||
print:
|
||||
push rdx
|
||||
mov rdx, 0x3F8
|
||||
mov edx, 0x3F8
|
||||
.loop:
|
||||
mov al, [rsi]
|
||||
test al, al
|
||||
@@ -169,11 +267,11 @@ elemb:
|
||||
jmp .loop
|
||||
|
||||
.not_found
|
||||
xor rax, rax ; return 0; dl not an element of list
|
||||
xor eax, eax ; return 0; dl not an element of list
|
||||
ret
|
||||
|
||||
.found
|
||||
xor rax, rax
|
||||
xor eax, eax
|
||||
mov rax, 1 ; return 1; dl an element of list
|
||||
ret
|
||||
|
||||
@@ -188,7 +286,7 @@ elemb:
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
clear_token_table:
|
||||
xor rax, rax ; value to write
|
||||
xor eax, eax ; value to write
|
||||
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
||||
mov rdi, TOKEN_TABLE_ADDR ; address to start
|
||||
rep stosd
|
||||
@@ -202,7 +300,7 @@ clear_token_table:
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
clear_test_arena:
|
||||
xor rax, rax ; value to write
|
||||
xor eax, eax ; value to write
|
||||
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
||||
mov rdi, TOKEN_TABLE_ADDR ; address to start
|
||||
rep stosd
|
||||
@@ -232,6 +330,9 @@ run_tests:
|
||||
call clear_test_arena
|
||||
call test_elemb
|
||||
|
||||
call clear_test_arena
|
||||
call test_identify_token
|
||||
|
||||
ret
|
||||
.msg db "running test suite...", 0x0D, 0x0A, 0x00
|
||||
|
||||
@@ -384,6 +485,91 @@ test_elemb:
|
||||
ret
|
||||
.msg db "test_elemb...", 0x00
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; test_identify_token
|
||||
;
|
||||
; description:
|
||||
; tests identify_token described functionality
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
test_identify_token:
|
||||
mov rsi, .msg
|
||||
call print
|
||||
|
||||
; length2 token that exists
|
||||
mov word [TEST_ARENA_ADDR], "sp"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 2
|
||||
call identify_token
|
||||
cmp ax, 0x0026
|
||||
jne .fail
|
||||
|
||||
; length2 token that doesn't exist
|
||||
mov word [TEST_ARENA_ADDR], "QQ"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 2
|
||||
call identify_token
|
||||
cmp ax, 0xFFFF
|
||||
jne .fail
|
||||
|
||||
; length3 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "rax"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 3
|
||||
call identify_token
|
||||
cmp ax, 0x0000
|
||||
jne .fail
|
||||
|
||||
; length3 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "cr0"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 3
|
||||
call identify_token
|
||||
cmp ax, 0x004A
|
||||
jne .fail
|
||||
|
||||
; length3 token that doesn't exist
|
||||
mov dword [TEST_ARENA_ADDR], "r16"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 3
|
||||
call identify_token
|
||||
cmp ax, 0xFFFF
|
||||
jne .fail
|
||||
|
||||
; length4 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "r10d"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 4
|
||||
call identify_token
|
||||
cmp ax, 0x001A
|
||||
jne .fail
|
||||
|
||||
; length4 token that exists
|
||||
mov dword [TEST_ARENA_ADDR], "r15b"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 4
|
||||
call identify_token
|
||||
cmp ax, 0x003F
|
||||
jne .fail
|
||||
|
||||
; length4 token that doesn't exist
|
||||
mov dword [TEST_ARENA_ADDR], "r15q"
|
||||
mov rdi, TEST_ARENA_ADDR
|
||||
mov rsi, 4
|
||||
call identify_token
|
||||
cmp ax, 0xFFFF
|
||||
jne .fail
|
||||
|
||||
.pass:
|
||||
mov rsi, msg_pass
|
||||
call print
|
||||
ret
|
||||
.fail:
|
||||
mov rsi, msg_fail
|
||||
call print
|
||||
ret
|
||||
.msg db "test_identify_token...", 0x00
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
; data
|
||||
; ------------------------------------------------------------------------------
|
||||
@@ -561,7 +747,7 @@ msg_fail db "failed.", 0x0D, 0x0A, 0x00
|
||||
test_byte db "Q" ; unterminated, just a byte chillin
|
||||
test_token_null db "TestTokn", 0x00 ; followed by null terminator. Quad word
|
||||
test_token_space db "TestTokn " ; followed by space. Quad word
|
||||
test_elemb_0
|
||||
test_elemb_0 ; [This Page Intentionally Left Blank]
|
||||
test_elemb_5 db 0x54, 0x00, 0x21, 0x20, 0x34
|
||||
|
||||
token_terminator_8 db 0x00, " ", 0x0A, 0x0D, 0x00, 0x00, 0x00, 0x00
|
||||
|
||||
Reference in New Issue
Block a user