check sizes, fix bug with buffer, fix a couple other bugs, add more registers to tokens.registers

This commit is contained in:
andromeda
2026-03-29 09:36:55 +02:00
parent d51de0cc1d
commit b1e7d2e3d5
2 changed files with 359 additions and 51 deletions

View File

@@ -328,14 +328,14 @@ supported tokens are listed below
| r13d | 0x001D | unimplemented | | r13d | 0x001D | unimplemented |
| r14d | 0x001E | unimplemented | | r14d | 0x001E | unimplemented |
| r15d | 0x001F | unimplemented | | r15d | 0x001F | unimplemented |
| ax | 0x0020 | unimplemented | | ax | 0x0020 | |
| bx | 0x0021 | unimplemented | | bx | 0x0021 | |
| cx | 0x0022 | unimplemented | | cx | 0x0022 | |
| dx | 0x0023 | unimplemented | | dx | 0x0023 | |
| si | 0x0024 | unimplemented | | si | 0x0024 | |
| di | 0x0025 | unimplemented | | di | 0x0025 | |
| sp | 0x0026 | unimplemented | | sp | 0x0026 | |
| bp | 0x0027 | unimplemented | | bp | 0x0027 | |
| r8w | 0x0028 | unimplemented | | r8w | 0x0028 | unimplemented |
| r9w | 0x0029 | unimplemented | | r9w | 0x0029 | unimplemented |
| r10w | 0x002A | unimplemented | | r10w | 0x002A | unimplemented |
@@ -344,14 +344,14 @@ supported tokens are listed below
| r13w | 0x002D | unimplemented | | r13w | 0x002D | unimplemented |
| r14w | 0x002E | unimplemented | | r14w | 0x002E | unimplemented |
| r15w | 0x002F | unimplemented | | r15w | 0x002F | unimplemented |
| al | 0x0030 | unimplemented | | al | 0x0030 | |
| bl | 0x0031 | unimplemented | | bl | 0x0031 | |
| cl | 0x0032 | unimplemented | | cl | 0x0032 | |
| dl | 0x0033 | unimplemented | | dl | 0x0033 | |
| sil | 0x0034 | unimplemented | | sil | 0x0034 | |
| dil | 0x0035 | unimplemented | | dil | 0x0035 | |
| spl | 0x0036 | unimplemented | | spl | 0x0036 | |
| bpl | 0x0037 | unimplemented | | bpl | 0x0037 | |
| r8b | 0x0038 | unimplemented | | r8b | 0x0038 | unimplemented |
| r9b | 0x0039 | unimplemented | | r9b | 0x0039 | unimplemented |
| r10b | 0x003A | unimplemented | | r10b | 0x003A | unimplemented |

View File

@@ -177,6 +177,32 @@ assemble:
mov rsi, .msg_operator_1_register mov rsi, .msg_operator_1_register
call print.debug call print.debug
; di = token table entry
call get_tte_typed_metadata
; al = register typed metadata
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .unexpected_token ; TODO handle 8 bit opcodes
cmp al, 10b ; 32 bit
je .operator_1_register_no_prefix ; default register length; no prefix
cmp al, 01b ; 16 bit
je .operator_1_register_16
cmp al, 11b ; 64 bit
je .operator_1_register_64
.operator_1_register_16:
mov al, 0x66
call .push_byte
jmp .operator_1_register_no_prefix
.operator_1_register_64:
mov al, 0x48
call .push_byte
jmp .operator_1_register_no_prefix
.operator_1_register_no_prefix:
mov si, di ; si = `R/M` tte mov si, di ; si = `R/M` tte
and edx, 0xFF and edx, 0xFF
or edx, 0xFE00 ; pass di as direct value or edx, 0xFE00 ; pass di as direct value
@@ -232,7 +258,24 @@ assemble:
call .next_token call .next_token
jge .break jge .break
call .get_next_tte call .get_next_tte
; di = tte
call get_tte_typed_metadata
and al, 11b ; al = register width
cmp al, 10b ; 32 bit
je .operator_2_memory_32
cmp al, 11b ; 64 bit
je .operator_2_memory_continue
; other cases: 16 bit, 8 bit both are not valid for addressing
jmp .size_mismatch
.operator_2_memory_32:
mov al, 0x67
call .push_byte
jmp .operator_2_memory_continue
.operator_2_memory_continue:
mov si, di ; si = dst register tte mov si, di ; si = dst register tte
call .next_token call .next_token
@@ -253,13 +296,39 @@ assemble:
je .operator_2_memory_register ; if so, handle je .operator_2_memory_register ; if so, handle
jmp .unexpected_token jmp .unexpected_token
.operator_2_memory_register: .operator_2_memory_register:
push rsi push rsi
mov rsi, .msg_operator_2_memory_register mov rsi, .msg_operator_2_memory_register
call print.debug call print.debug
pop rsi ; si = r/m pop rsi ; si = r/m
; di = src tte
call get_tte_typed_metadata
; al = register typed metadata
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .unexpected_token ; TODO handle 8 bit opcodes
cmp al, 01b ; 16 bit
je .operator_2_memory_register_16
cmp al, 10b ; 32 bit
je .operator_2_memory_register_continue ; default register length; no prefix
cmp al, 11b ; 64 bit
je .operator_2_memory_register_64
.operator_2_memory_register_16:
mov al, 0x66
call .push_byte
jmp .operator_2_memory_register_continue
.operator_2_memory_register_64:
mov al, 0x48
call .push_byte
jmp .operator_2_memory_register_continue
.operator_2_memory_register_continue:
; si = r/m; dst tte ; si = r/m; dst tte
; di = reg; src tte ; di = reg; src tte
mov dl, 00b ; dl = mod bits mov dl, 00b ; dl = mod bits
@@ -283,6 +352,32 @@ assemble:
call .write_byte call .write_byte
pop rdi ; di = dst tte pop rdi ; di = dst tte
; di = token table entry
call get_tte_typed_metadata
; al = register typed metadata
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .unexpected_token ; TODO handle 8 bit opcodes
cmp al, 01b ; 16 bit
je .operator_2_register_16
cmp al, 10b ; 32 bit
je .operator_2_register_continue ; default register length; no prefix
cmp al, 11b ; 64 bit
je .operator_2_register_64
.operator_2_register_16:
mov al, 0x66
call .push_byte
jmp .operator_2_register_continue
.operator_2_register_64:
mov al, 0x48
call .push_byte
jmp .operator_2_register_continue
.operator_2_register_continue:
mov si, di ; si = dst tte mov si, di ; si = dst tte
call .next_token call .next_token
@@ -316,9 +411,27 @@ assemble:
call .next_token call .next_token
jge .break jge .break
call .get_next_tte call .get_next_tte
; di = tte
call get_tte_typed_metadata
; al = register typed metadata
; si = r/m; dst tte and al, 11b ; al = register width
; di = reg; src tte
cmp al, 10b ; 32 bit
je .operator_2_register_memory_32
cmp al, 11b ; 64 bit
je .operator_2_register_memory_continue ; default addr length; no prefix
; other cases: 16 bit, 8 bit both are not valid for addressing
jmp .size_mismatch
.operator_2_register_memory_32:
mov al, 0x67
call .push_byte
jmp .operator_2_register_memory_continue
.operator_2_register_memory_continue:
; si = reg; dst tte
; di = r/m; src tte
push rsi push rsi
mov esi, edi ; si = reg; src tte mov esi, edi ; si = reg; src tte
pop rdi ; di = r/m; dst tte pop rdi ; di = r/m; dst tte
@@ -330,8 +443,106 @@ assemble:
jmp .loop_next_token jmp .loop_next_token
.operator_2_register_register: .operator_2_register_register:
push rsi
mov rsi, .msg_operator_2_register_register mov rsi, .msg_operator_2_register_register
call print.debug call print.debug
pop rsi ; si = dst tte
; di = tte
call get_tte_typed_metadata
; al = register typed metadata
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .unexpected_token ; TODO handle 8 bit opcodes
cmp al, 01b ; 16 bit
je .operator_2_register_register_16
cmp al, 10b ; 32 bit
je .operator_2_register_register_32
cmp al, 11b ; 64 bit
je .operator_2_register_register_64
.operator_2_register_register_16:
; 16 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x66
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 1
jne .size_mismatch
; 64 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x48
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
jmp .operator_2_register_register_continue
.operator_2_register_register_32:
; 16 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x66
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
; 64 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x48
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
jmp .operator_2_register_register_continue
.operator_2_register_register_64:
; 16 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x66
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
; 64 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x48
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 1
jne .size_mismatch
jmp .operator_2_register_register_continue
.operator_2_register_register_continue:
push rsi push rsi
mov esi, edi ; si = reg; src tte mov esi, edi ; si = reg; src tte
@@ -362,6 +573,11 @@ assemble:
call print.error call print.error
jmp halt jmp halt
.size_mismatch:
mov rsi, .msg_size_mismatch
call print.error
jmp halt
; procedures ; procedures
; add the line `jge .break` after call site ; add the line `jge .break` after call site
@@ -385,10 +601,12 @@ assemble:
; al = byte to write ; al = byte to write
.output_byte: .output_byte:
push rdx
mov edx, [.next_output_byte] ; get output byte's address mov edx, [.next_output_byte] ; get output byte's address
mov [edx], al ; write byte to that address mov [edx], al ; write byte to that address
inc edx ; increment address inc edx ; increment address
mov [.next_output_byte], edx ; put output byte's address mov [.next_output_byte], edx ; put output byte's address
pop rdx
ret ret
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output .next_output_byte dd OUTPUT_ADDR ; next empty byte in output
; TODO get rid of this sketchy bit of state ; TODO get rid of this sketchy bit of state
@@ -410,29 +628,32 @@ assemble:
jmp .push_byte_loop jmp .push_byte_loop
.push_byte_break: .push_byte_break:
pop rax ; al = byte to push pop rax ; al = byte to push
mov [.buffer], al mov [.buffer], al ; write desired byte to front of buffer
pop rcx ; ecx = old buffer pointer pop rcx ; ecx = old buffer pointer
inc ecx inc ecx ; ecx = pointer to next empty in buffer
mov [.buffer_pointer], ecx mov [.buffer_pointer], ecx ; record write .buffer_pointer
pop rcx pop rcx
ret ret
; al = byte to write ; al = byte to write
.write_byte: .write_byte:
push rdx
mov edx, [.buffer_pointer] mov edx, [.buffer_pointer]
mov [edx], al mov [edx], al
inc edx inc edx
mov [.buffer_pointer], edx mov [.buffer_pointer], edx
pop rdx
ret ret
.flush_write_buffer: .flush_write_buffer:
push rcx push rcx
push rax push rax
mov ecx, [.buffer_pointer] mov ecx, .buffer
.flush_write_buffer_loop:
dec ecx dec ecx
cmp ecx, .buffer .flush_write_buffer_loop:
jl .flush_write_buffer_break inc ecx
cmp ecx, [.buffer_pointer]
jge .flush_write_buffer_break
mov al, [ecx] mov al, [ecx]
call .output_byte call .output_byte
mov byte [ecx], 0x00 mov byte [ecx], 0x00
@@ -448,6 +669,7 @@ assemble:
.msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00 .msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00
.msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00 .msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00
.msg_size_mismatch db "size mismatch, aborting", 0x0A, 0x00
.msg_operator_0 db "operator_0", 0x0A, 0x00 .msg_operator_0 db "operator_0", 0x0A, 0x00
.msg_operator_1 db "operator_1", 0x0A, 0x00 .msg_operator_1 db "operator_1", 0x0A, 0x00
.msg_operator_1_memory db "operator_1_memory", 0x0A, 0x00 .msg_operator_1_memory db "operator_1_memory", 0x0A, 0x00
@@ -586,7 +808,7 @@ get_ModRM:
call get_reg_bits call get_reg_bits
; al = reg bits ; al = reg bits
mov ecx, eax mov ecx, eax ; cl = r/m bits
xor eax, eax xor eax, eax
or eax, edx ; mod bits or eax, edx ; mod bits
@@ -1307,7 +1529,6 @@ identify_register:
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; identify_operator ; identify_operator
; TODO combine with identify_register
; ;
; description: ; description:
; takes an operator in ascii-encoded text and returns its token ID or ; takes an operator in ascii-encoded text and returns its token ID or
@@ -1660,6 +1881,86 @@ tokens:
db 00010110b ; reg: 101b db 00010110b ; reg: 101b
; width: 10b (32 bits) ; width: 10b (32 bits)
dw 0x0020 ; ax
db 0x02 ; type: register
db 00000001b ; reg: 000b
; width: 01b (16 bits)
dw 0x0021 ; bx
db 0x02 ; type: register
db 00001101b ; reg: 011b
; width: 01b (16 bits)
dw 0x0022 ; cx
db 0x02 ; type: register
db 00000101b ; reg: 001b
; width: 01b (16 bits)
dw 0x0023 ; dx
db 0x02 ; type: register
db 00001001b ; reg: 010b
; width: 01b (16 bits)
dw 0x0024 ; si
db 0x02 ; type: register
db 00011001b ; reg: 110b
; width: 01b (16 bits)
dw 0x0025 ; di
db 0x02 ; type: register
db 00011101b ; reg: 111b
; width: 01b (16 bits)
dw 0x0026 ; sp
db 0x02 ; type: register
db 00010001b ; reg: 100b
; width: 01b (16 bits)
dw 0x0027 ; bp
db 0x02 ; type: register
db 00010101b ; reg: 101b
; width: 01b (16 bits)
dw 0x0030 ; al
db 0x02 ; type: register
db 00000000b ; reg: 000b
; width: 00b (8 bits)
dw 0x0031 ; bl
db 0x02 ; type: register
db 00001100b ; reg: 011b
; width: 00b (8 bits)
dw 0x0032 ; cl
db 0x02 ; type: register
db 00000100b ; reg: 001b
; width: 00b (8 bits)
dw 0x0033 ; dl
db 0x02 ; type: register
db 00001000b ; reg: 010b
; width: 00b (8 bits)
dw 0x0034 ; sil
db 0x02 ; type: register
db 00011000b ; reg: 110b
; width: 00b (8 bits)
dw 0x0035 ; dil
db 0x02 ; type: register
db 00011100b ; reg: 111b
; width: 00b (8 bits)
dw 0x0036 ; spl
db 0x02 ; type: register
db 00010000b ; reg: 100b
; width: 00b (8 bits)
dw 0x0037 ; bpl
db 0x02 ; type: register
db 00010100b ; reg: 101b
; width: 00b (8 bits)
dw 0x004F ; hlt dw 0x004F ; hlt
db 0x01 ; type: operator db 0x01 ; type: operator
db 0x00 ; # operands db 0x00 ; # operands
@@ -2041,6 +2342,10 @@ whitespace_2 db " ", 0x0D
; test program ; test program
program: program:
db "xor eax, eax", 0x0A db "xor eax, eax", 0x0A
db "mov rax, rax", 0x0A
db "mov rax, rbx", 0x0A
db "mov eax, ebx", 0x0A
db "mov ax, bx", 0x0A
db "inc rax ; inline comment", 0x0A db "inc rax ; inline comment", 0x0A
db "dec rax", 0x0A db "dec rax", 0x0A
db "; one line comment", 0x0A db "; one line comment", 0x0A
@@ -2048,6 +2353,9 @@ program:
db "mov [rax], rdx", 0x0A db "mov [rax], rdx", 0x0A
db "mov [rcx], rbx", 0x0A db "mov [rcx], rbx", 0x0A
db "mov rcx, [rbx]", 0x0A db "mov rcx, [rbx]", 0x0A
db "mov rcx, [ebx]", 0x0A
db "mov ecx, [ebx]", 0x0A
db "mov cx, [ebx]", 0x0A
db "hlt", 0x0A db "hlt", 0x0A
.size dq $ - program .size dq $ - program