little optimisation, add single-token parsing function
This commit is contained in:
@@ -16,11 +16,13 @@ I want to compile Bootler and Twasm with the Twasm assembler
|
|||||||
+------ 0x00070000 ------+
|
+------ 0x00070000 ------+
|
||||||
| token table |
|
| token table |
|
||||||
+------ 0x00060000 ------+
|
+------ 0x00060000 ------+
|
||||||
|
| test arena |
|
||||||
|
+------ 0x00050000 ------+
|
||||||
| stack (rsp) |
|
| stack (rsp) |
|
||||||
+------------------------+
|
+------------------------+
|
||||||
| input |
|
| input |
|
||||||
+------------------------+ this is lined up to a sector
|
+------------------------+ <- this is lined up to a sector
|
||||||
| | and this is less than a sector
|
| | <- and this is less than a sector
|
||||||
+------------------------+
|
+------------------------+
|
||||||
| assembler |
|
| assembler |
|
||||||
+------ 0x00010000 ------+
|
+------ 0x00010000 ------+
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ LOAD_ADDR equ 0x00010000 ; address this program is loaded at
|
|||||||
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
|
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
|
||||||
TOKEN_TABLE_SIZE equ 0x1000 ; max length of table
|
TOKEN_TABLE_SIZE equ 0x1000 ; max length of table
|
||||||
|
|
||||||
TEST_ARENA_ADDR equ 0x00060000 ; address to run tests at
|
TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at
|
||||||
TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use
|
TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use
|
||||||
|
|
||||||
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
|
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
|
||||||
@@ -11,6 +11,8 @@ OUTPUT_SIZE equ 0x1000 ; max length of outputed binary
|
|||||||
|
|
||||||
STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at
|
STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at
|
||||||
|
|
||||||
|
UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token
|
||||||
|
|
||||||
[bits 64]
|
[bits 64]
|
||||||
[org LOAD_ADDR]
|
[org LOAD_ADDR]
|
||||||
|
|
||||||
@@ -30,6 +32,102 @@ start:
|
|||||||
; tokenising
|
; tokenising
|
||||||
; ------------------------------------------------------------------------------
|
; ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
; ------------------------------------------------------------------------------
|
||||||
|
; identify_token
|
||||||
|
;
|
||||||
|
; description:
|
||||||
|
; returns the id of a given token
|
||||||
|
;
|
||||||
|
; parameters:
|
||||||
|
; rdi -> first byte of token
|
||||||
|
; rsi = size of token in bytes
|
||||||
|
;
|
||||||
|
; returned:
|
||||||
|
; ax = id of token; the rest of rax is zeroed
|
||||||
|
; ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
identify_token:
|
||||||
|
cmp rsi, 2 ; if the token has length 2
|
||||||
|
je .start_length2 ; then enter the length 2 loop
|
||||||
|
|
||||||
|
cmp rsi, 3 ; if the token has length 3
|
||||||
|
je .start_length3 ; then enter the length 3 loop
|
||||||
|
|
||||||
|
cmp rsi, 4 ; if the token has length 4
|
||||||
|
je .start_length4 ; then enter the length 4 loop
|
||||||
|
|
||||||
|
jmp .unrecognised ; else unrecognised
|
||||||
|
|
||||||
|
.start_length2
|
||||||
|
mov rcx, tokens.length2 ; rcx -> list of known tokens
|
||||||
|
|
||||||
|
.loop_length2:
|
||||||
|
cmp rcx, tokens.length3 ; check if rcx still in the bounds of length2 tokens
|
||||||
|
jge .unrecognised ; if not, unrecognised
|
||||||
|
|
||||||
|
mov r10w, [rcx] ; current entry in known tokens
|
||||||
|
mov r11w, [rdi] ; token
|
||||||
|
cmp r10w, r11w ; if current entry matches token,
|
||||||
|
je .found_length2 ; exit loop
|
||||||
|
|
||||||
|
add rcx, 4 ; length of token + length of id
|
||||||
|
jmp .loop_length2
|
||||||
|
|
||||||
|
.found_length2:
|
||||||
|
xor eax, eax ; make sure rest of rax is zeroed
|
||||||
|
mov ax, [rcx + 2] ; return id of token
|
||||||
|
ret
|
||||||
|
|
||||||
|
.start_length3:
|
||||||
|
mov rcx, tokens.length3 ; rcx -> list of known tokens
|
||||||
|
|
||||||
|
.loop_length3:
|
||||||
|
cmp rcx, tokens.length4 ; check if rcx still in bounds of length3 tokens
|
||||||
|
jge .unrecognised ; if not, unrecognised
|
||||||
|
|
||||||
|
; TODO make this safe (it overreaches 1 byte)
|
||||||
|
mov r10d, [rcx] ; known token + next byte
|
||||||
|
mov r11d, [rdi] ; token + next byte
|
||||||
|
|
||||||
|
and r10d, 0x00FFFFFF ; mask for just the token
|
||||||
|
and r11d, 0x00FFFFFF
|
||||||
|
|
||||||
|
cmp r10d, r11d ; if known token matches token,
|
||||||
|
je .found_length3 ; exit loop
|
||||||
|
|
||||||
|
add rcx, 5 ; length of token + length of id
|
||||||
|
jmp .loop_length3
|
||||||
|
|
||||||
|
.found_length3:
|
||||||
|
xor rax, rax ; zero rax
|
||||||
|
mov ax, [rcx + 3] ; return id of token
|
||||||
|
ret
|
||||||
|
|
||||||
|
.start_length4:
|
||||||
|
mov rcx, tokens.length4 ; rcx -> list of known tokens
|
||||||
|
|
||||||
|
.loop_length4:
|
||||||
|
cmp rcx, tokens.length5 ; check if rcx still in bounds of length3 tokens
|
||||||
|
jge .unrecognised ; if not, unrecognised
|
||||||
|
|
||||||
|
mov r10d, [rcx] ; known token
|
||||||
|
mov r11d, [rdi] ; token
|
||||||
|
cmp r10d, r11d ; if known token matches token,
|
||||||
|
je .found_length4 ; exit loop
|
||||||
|
|
||||||
|
add rcx, 6 ; length of token + length of id
|
||||||
|
jmp .loop_length4
|
||||||
|
|
||||||
|
.found_length4:
|
||||||
|
xor rax, rax ; zero rax
|
||||||
|
mov ax, [rcx + 4] ; return id of token
|
||||||
|
ret
|
||||||
|
|
||||||
|
.unrecognised:
|
||||||
|
xor eax, eax
|
||||||
|
mov ax, UNRECOGNISED_TOKEN_ID
|
||||||
|
ret
|
||||||
|
|
||||||
; ------------------------------------------------------------------------------
|
; ------------------------------------------------------------------------------
|
||||||
; copy_token
|
; copy_token
|
||||||
;
|
;
|
||||||
@@ -92,7 +190,7 @@ copy_token:
|
|||||||
; ------------------------------------------------------------------------------
|
; ------------------------------------------------------------------------------
|
||||||
|
|
||||||
copy_byte:
|
copy_byte:
|
||||||
xor rax, rax ; zero out so it returns fine
|
xor eax, eax ; zero out so it returns fine
|
||||||
mov al, [rdi]
|
mov al, [rdi]
|
||||||
mov [rsi], al
|
mov [rsi], al
|
||||||
ret
|
ret
|
||||||
@@ -113,7 +211,7 @@ copy_byte:
|
|||||||
|
|
||||||
print:
|
print:
|
||||||
push rdx
|
push rdx
|
||||||
mov rdx, 0x3F8
|
mov edx, 0x3F8
|
||||||
.loop:
|
.loop:
|
||||||
mov al, [rsi]
|
mov al, [rsi]
|
||||||
test al, al
|
test al, al
|
||||||
@@ -169,11 +267,11 @@ elemb:
|
|||||||
jmp .loop
|
jmp .loop
|
||||||
|
|
||||||
.not_found
|
.not_found
|
||||||
xor rax, rax ; return 0; dl not an element of list
|
xor eax, eax ; return 0; dl not an element of list
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.found
|
.found
|
||||||
xor rax, rax
|
xor eax, eax
|
||||||
mov rax, 1 ; return 1; dl an element of list
|
mov rax, 1 ; return 1; dl an element of list
|
||||||
ret
|
ret
|
||||||
|
|
||||||
@@ -188,7 +286,7 @@ elemb:
|
|||||||
; ------------------------------------------------------------------------------
|
; ------------------------------------------------------------------------------
|
||||||
|
|
||||||
clear_token_table:
|
clear_token_table:
|
||||||
xor rax, rax ; value to write
|
xor eax, eax ; value to write
|
||||||
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
||||||
mov rdi, TOKEN_TABLE_ADDR ; address to start
|
mov rdi, TOKEN_TABLE_ADDR ; address to start
|
||||||
rep stosd
|
rep stosd
|
||||||
@@ -202,7 +300,7 @@ clear_token_table:
|
|||||||
; ------------------------------------------------------------------------------
|
; ------------------------------------------------------------------------------
|
||||||
|
|
||||||
clear_test_arena:
|
clear_test_arena:
|
||||||
xor rax, rax ; value to write
|
xor eax, eax ; value to write
|
||||||
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
|
||||||
mov rdi, TOKEN_TABLE_ADDR ; address to start
|
mov rdi, TOKEN_TABLE_ADDR ; address to start
|
||||||
rep stosd
|
rep stosd
|
||||||
@@ -232,6 +330,9 @@ run_tests:
|
|||||||
call clear_test_arena
|
call clear_test_arena
|
||||||
call test_elemb
|
call test_elemb
|
||||||
|
|
||||||
|
call clear_test_arena
|
||||||
|
call test_identify_token
|
||||||
|
|
||||||
ret
|
ret
|
||||||
.msg db "running test suite...", 0x0D, 0x0A, 0x00
|
.msg db "running test suite...", 0x0D, 0x0A, 0x00
|
||||||
|
|
||||||
@@ -384,6 +485,91 @@ test_elemb:
|
|||||||
ret
|
ret
|
||||||
.msg db "test_elemb...", 0x00
|
.msg db "test_elemb...", 0x00
|
||||||
|
|
||||||
|
; ------------------------------------------------------------------------------
|
||||||
|
; test_identify_token
|
||||||
|
;
|
||||||
|
; description:
|
||||||
|
; tests identify_token described functionality
|
||||||
|
; ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
test_identify_token:
|
||||||
|
mov rsi, .msg
|
||||||
|
call print
|
||||||
|
|
||||||
|
; length2 token that exists
|
||||||
|
mov word [TEST_ARENA_ADDR], "sp"
|
||||||
|
mov rdi, TEST_ARENA_ADDR
|
||||||
|
mov rsi, 2
|
||||||
|
call identify_token
|
||||||
|
cmp ax, 0x0026
|
||||||
|
jne .fail
|
||||||
|
|
||||||
|
; length2 token that doesn't exist
|
||||||
|
mov word [TEST_ARENA_ADDR], "QQ"
|
||||||
|
mov rdi, TEST_ARENA_ADDR
|
||||||
|
mov rsi, 2
|
||||||
|
call identify_token
|
||||||
|
cmp ax, 0xFFFF
|
||||||
|
jne .fail
|
||||||
|
|
||||||
|
; length3 token that exists
|
||||||
|
mov dword [TEST_ARENA_ADDR], "rax"
|
||||||
|
mov rdi, TEST_ARENA_ADDR
|
||||||
|
mov rsi, 3
|
||||||
|
call identify_token
|
||||||
|
cmp ax, 0x0000
|
||||||
|
jne .fail
|
||||||
|
|
||||||
|
; length3 token that exists
|
||||||
|
mov dword [TEST_ARENA_ADDR], "cr0"
|
||||||
|
mov rdi, TEST_ARENA_ADDR
|
||||||
|
mov rsi, 3
|
||||||
|
call identify_token
|
||||||
|
cmp ax, 0x004A
|
||||||
|
jne .fail
|
||||||
|
|
||||||
|
; length3 token that doesn't exist
|
||||||
|
mov dword [TEST_ARENA_ADDR], "r16"
|
||||||
|
mov rdi, TEST_ARENA_ADDR
|
||||||
|
mov rsi, 3
|
||||||
|
call identify_token
|
||||||
|
cmp ax, 0xFFFF
|
||||||
|
jne .fail
|
||||||
|
|
||||||
|
; length4 token that exists
|
||||||
|
mov dword [TEST_ARENA_ADDR], "r10d"
|
||||||
|
mov rdi, TEST_ARENA_ADDR
|
||||||
|
mov rsi, 4
|
||||||
|
call identify_token
|
||||||
|
cmp ax, 0x001A
|
||||||
|
jne .fail
|
||||||
|
|
||||||
|
; length4 token that exists
|
||||||
|
mov dword [TEST_ARENA_ADDR], "r15b"
|
||||||
|
mov rdi, TEST_ARENA_ADDR
|
||||||
|
mov rsi, 4
|
||||||
|
call identify_token
|
||||||
|
cmp ax, 0x003F
|
||||||
|
jne .fail
|
||||||
|
|
||||||
|
; length4 token that doesn't exist
|
||||||
|
mov dword [TEST_ARENA_ADDR], "r15q"
|
||||||
|
mov rdi, TEST_ARENA_ADDR
|
||||||
|
mov rsi, 4
|
||||||
|
call identify_token
|
||||||
|
cmp ax, 0xFFFF
|
||||||
|
jne .fail
|
||||||
|
|
||||||
|
.pass:
|
||||||
|
mov rsi, msg_pass
|
||||||
|
call print
|
||||||
|
ret
|
||||||
|
.fail:
|
||||||
|
mov rsi, msg_fail
|
||||||
|
call print
|
||||||
|
ret
|
||||||
|
.msg db "test_identify_token...", 0x00
|
||||||
|
|
||||||
; ------------------------------------------------------------------------------
|
; ------------------------------------------------------------------------------
|
||||||
; data
|
; data
|
||||||
; ------------------------------------------------------------------------------
|
; ------------------------------------------------------------------------------
|
||||||
@@ -561,7 +747,7 @@ msg_fail db "failed.", 0x0D, 0x0A, 0x00
|
|||||||
test_byte db "Q" ; unterminated, just a byte chillin
|
test_byte db "Q" ; unterminated, just a byte chillin
|
||||||
test_token_null db "TestTokn", 0x00 ; followed by null terminator. Quad word
|
test_token_null db "TestTokn", 0x00 ; followed by null terminator. Quad word
|
||||||
test_token_space db "TestTokn " ; followed by space. Quad word
|
test_token_space db "TestTokn " ; followed by space. Quad word
|
||||||
test_elemb_0
|
test_elemb_0 ; [This Page Intentionally Left Blank]
|
||||||
test_elemb_5 db 0x54, 0x00, 0x21, 0x20, 0x34
|
test_elemb_5 db 0x54, 0x00, 0x21, 0x20, 0x34
|
||||||
|
|
||||||
token_terminator_8 db 0x00, " ", 0x0A, 0x0D, 0x00, 0x00, 0x00, 0x00
|
token_terminator_8 db 0x00, " ", 0x0A, 0x0D, 0x00, 0x00, 0x00, 0x00
|
||||||
|
|||||||
Reference in New Issue
Block a user