check sizes, fix bug with buffer, fix a couple other bugs, add more registers to tokens.registers

This commit is contained in:
andromeda
2026-03-29 09:36:55 +02:00
parent d51de0cc1d
commit b1e7d2e3d5
2 changed files with 359 additions and 51 deletions

View File

@@ -328,14 +328,14 @@ supported tokens are listed below
| r13d | 0x001D | unimplemented |
| r14d | 0x001E | unimplemented |
| r15d | 0x001F | unimplemented |
| ax | 0x0020 | unimplemented |
| bx | 0x0021 | unimplemented |
| cx | 0x0022 | unimplemented |
| dx | 0x0023 | unimplemented |
| si | 0x0024 | unimplemented |
| di | 0x0025 | unimplemented |
| sp | 0x0026 | unimplemented |
| bp | 0x0027 | unimplemented |
| ax | 0x0020 | |
| bx | 0x0021 | |
| cx | 0x0022 | |
| dx | 0x0023 | |
| si | 0x0024 | |
| di | 0x0025 | |
| sp | 0x0026 | |
| bp | 0x0027 | |
| r8w | 0x0028 | unimplemented |
| r9w | 0x0029 | unimplemented |
| r10w | 0x002A | unimplemented |
@@ -344,14 +344,14 @@ supported tokens are listed below
| r13w | 0x002D | unimplemented |
| r14w | 0x002E | unimplemented |
| r15w | 0x002F | unimplemented |
| al | 0x0030 | unimplemented |
| bl | 0x0031 | unimplemented |
| cl | 0x0032 | unimplemented |
| dl | 0x0033 | unimplemented |
| sil | 0x0034 | unimplemented |
| dil | 0x0035 | unimplemented |
| spl | 0x0036 | unimplemented |
| bpl | 0x0037 | unimplemented |
| al | 0x0030 | |
| bl | 0x0031 | |
| cl | 0x0032 | |
| dl | 0x0033 | |
| sil | 0x0034 | |
| dil | 0x0035 | |
| spl | 0x0036 | |
| bpl | 0x0037 | |
| r8b | 0x0038 | unimplemented |
| r9b | 0x0039 | unimplemented |
| r10b | 0x003A | unimplemented |

View File

@@ -177,6 +177,32 @@ assemble:
mov rsi, .msg_operator_1_register
call print.debug
; di = token table entry
call get_tte_typed_metadata
; al = register typed metadata
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .unexpected_token ; TODO handle 8 bit opcodes
cmp al, 10b ; 32 bit
je .operator_1_register_no_prefix ; default register length; no prefix
cmp al, 01b ; 16 bit
je .operator_1_register_16
cmp al, 11b ; 64 bit
je .operator_1_register_64
.operator_1_register_16:
mov al, 0x66
call .push_byte
jmp .operator_1_register_no_prefix
.operator_1_register_64:
mov al, 0x48
call .push_byte
jmp .operator_1_register_no_prefix
.operator_1_register_no_prefix:
mov si, di ; si = `R/M` tte
and edx, 0xFF
or edx, 0xFE00 ; pass di as direct value
@@ -232,7 +258,24 @@ assemble:
call .next_token
jge .break
call .get_next_tte
; di = tte
call get_tte_typed_metadata
and al, 11b ; al = register width
cmp al, 10b ; 32 bit
je .operator_2_memory_32
cmp al, 11b ; 64 bit
je .operator_2_memory_continue
; other cases: 16 bit, 8 bit both are not valid for addressing
jmp .size_mismatch
.operator_2_memory_32:
mov al, 0x67
call .push_byte
jmp .operator_2_memory_continue
.operator_2_memory_continue:
mov si, di ; si = dst register tte
call .next_token
@@ -253,13 +296,39 @@ assemble:
je .operator_2_memory_register ; if so, handle
jmp .unexpected_token
.operator_2_memory_register:
push rsi
mov rsi, .msg_operator_2_memory_register
call print.debug
pop rsi ; si = r/m
; di = src tte
call get_tte_typed_metadata
; al = register typed metadata
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .unexpected_token ; TODO handle 8 bit opcodes
cmp al, 01b ; 16 bit
je .operator_2_memory_register_16
cmp al, 10b ; 32 bit
je .operator_2_memory_register_continue ; default register length; no prefix
cmp al, 11b ; 64 bit
je .operator_2_memory_register_64
.operator_2_memory_register_16:
mov al, 0x66
call .push_byte
jmp .operator_2_memory_register_continue
.operator_2_memory_register_64:
mov al, 0x48
call .push_byte
jmp .operator_2_memory_register_continue
.operator_2_memory_register_continue:
; si = r/m; dst tte
; di = reg; src tte
mov dl, 00b ; dl = mod bits
@@ -283,6 +352,32 @@ assemble:
call .write_byte
pop rdi ; di = dst tte
; di = token table entry
call get_tte_typed_metadata
; al = register typed metadata
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .unexpected_token ; TODO handle 8 bit opcodes
cmp al, 01b ; 16 bit
je .operator_2_register_16
cmp al, 10b ; 32 bit
je .operator_2_register_continue ; default register length; no prefix
cmp al, 11b ; 64 bit
je .operator_2_register_64
.operator_2_register_16:
mov al, 0x66
call .push_byte
jmp .operator_2_register_continue
.operator_2_register_64:
mov al, 0x48
call .push_byte
jmp .operator_2_register_continue
.operator_2_register_continue:
mov si, di ; si = dst tte
call .next_token
@@ -316,9 +411,27 @@ assemble:
call .next_token
jge .break
call .get_next_tte
; di = tte
call get_tte_typed_metadata
; al = register typed metadata
; si = r/m; dst tte
; di = reg; src tte
and al, 11b ; al = register width
cmp al, 10b ; 32 bit
je .operator_2_register_memory_32
cmp al, 11b ; 64 bit
je .operator_2_register_memory_continue ; default addr length; no prefix
; other cases: 16 bit, 8 bit both are not valid for addressing
jmp .size_mismatch
.operator_2_register_memory_32:
mov al, 0x67
call .push_byte
jmp .operator_2_register_memory_continue
.operator_2_register_memory_continue:
; si = reg; dst tte
; di = r/m; src tte
push rsi
mov esi, edi ; si = reg; src tte
pop rdi ; di = r/m; dst tte
@@ -330,8 +443,106 @@ assemble:
jmp .loop_next_token
.operator_2_register_register:
push rsi
mov rsi, .msg_operator_2_register_register
call print.debug
pop rsi ; si = dst tte
; di = tte
call get_tte_typed_metadata
; al = register typed metadata
and al, 11b ; al = register width
cmp al, 00b ; 8 bit
je .unexpected_token ; TODO handle 8 bit opcodes
cmp al, 01b ; 16 bit
je .operator_2_register_register_16
cmp al, 10b ; 32 bit
je .operator_2_register_register_32
cmp al, 11b ; 64 bit
je .operator_2_register_register_64
.operator_2_register_register_16:
; 16 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x66
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 1
jne .size_mismatch
; 64 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x48
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
jmp .operator_2_register_register_continue
.operator_2_register_register_32:
; 16 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x66
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
; 64 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x48
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
jmp .operator_2_register_register_continue
.operator_2_register_register_64:
; 16 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x66
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 0
jne .size_mismatch
; 64 bit
push rdi
push rsi
mov edi, .buffer_end - .buffer ; length of buffer
mov rsi, .buffer ; buffer location
mov dl, 0x48
call elemb
pop rsi ; si = src tte
pop rdi ; di = dst tte
cmp al, 1
jne .size_mismatch
jmp .operator_2_register_register_continue
.operator_2_register_register_continue:
push rsi
mov esi, edi ; si = reg; src tte
@@ -362,6 +573,11 @@ assemble:
call print.error
jmp halt
.size_mismatch:
mov rsi, .msg_size_mismatch
call print.error
jmp halt
; procedures
; add the line `jge .break` after call site
@@ -385,10 +601,12 @@ assemble:
; al = byte to write
.output_byte:
push rdx
mov edx, [.next_output_byte] ; get output byte's address
mov [edx], al ; write byte to that address
inc edx ; increment address
mov [.next_output_byte], edx ; put output byte's address
pop rdx
ret
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
; TODO get rid of this sketchy bit of state
@@ -401,38 +619,41 @@ assemble:
push rax
mov ecx, .buffer_end
mov [.buffer_pointer], ecx
.push_byte_loop:
dec ecx
cmp ecx, .buffer
jl .push_byte_break
mov al, [ecx]
mov [ecx + 1], al
jmp .push_byte_loop
.push_byte_break:
pop rax ; al = byte to push
mov [.buffer], al
pop rcx ; ecx = old buffer pointer
inc ecx
mov [.buffer_pointer], ecx
pop rcx
ret
.push_byte_loop:
dec ecx
cmp ecx, .buffer
jl .push_byte_break
mov al, [ecx]
mov [ecx + 1], al
jmp .push_byte_loop
.push_byte_break:
pop rax ; al = byte to push
mov [.buffer], al ; write desired byte to front of buffer
pop rcx ; ecx = old buffer pointer
inc ecx ; ecx = pointer to next empty in buffer
mov [.buffer_pointer], ecx ; record write .buffer_pointer
pop rcx
ret
; al = byte to write
.write_byte:
push rdx
mov edx, [.buffer_pointer]
mov [edx], al
inc edx
mov [.buffer_pointer], edx
pop rdx
ret
.flush_write_buffer:
push rcx
push rax
mov ecx, [.buffer_pointer]
mov ecx, .buffer
dec ecx
.flush_write_buffer_loop:
dec ecx
cmp ecx, .buffer
jl .flush_write_buffer_break
inc ecx
cmp ecx, [.buffer_pointer]
jge .flush_write_buffer_break
mov al, [ecx]
call .output_byte
mov byte [ecx], 0x00
@@ -448,6 +669,7 @@ assemble:
.msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00
.msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00
.msg_size_mismatch db "size mismatch, aborting", 0x0A, 0x00
.msg_operator_0 db "operator_0", 0x0A, 0x00
.msg_operator_1 db "operator_1", 0x0A, 0x00
.msg_operator_1_memory db "operator_1_memory", 0x0A, 0x00
@@ -580,21 +802,21 @@ get_ModRM:
.continue:
shl ebx, 3
mov edi, esi ; do the other one
mov edi, esi ; do the other one
; di = tte
call get_reg_bits
; al = reg bits
; di = tte
call get_reg_bits
; al = reg bits
mov ecx, eax
mov ecx, eax ; cl = r/m bits
xor eax, eax
or eax, edx ; mod bits
or eax, ebx ; reg bits
or eax, ecx ; R/M bits
and eax, 0xFF ; mask for byte
pop rbx
ret
xor eax, eax
or eax, edx ; mod bits
or eax, ebx ; reg bits
or eax, ecx ; R/M bits
and eax, 0xFF ; mask for byte
pop rbx
ret
; ------------------------------------------------------------------------------
; get_opcode
@@ -1307,7 +1529,6 @@ identify_register:
; ------------------------------------------------------------------------------
; identify_operator
; TODO combine with identify_register
;
; description:
; takes an operator in ascii-encoded text and returns its token ID or
@@ -1660,6 +1881,86 @@ tokens:
db 00010110b ; reg: 101b
; width: 10b (32 bits)
dw 0x0020 ; ax
db 0x02 ; type: register
db 00000001b ; reg: 000b
; width: 01b (16 bits)
dw 0x0021 ; bx
db 0x02 ; type: register
db 00001101b ; reg: 011b
; width: 01b (16 bits)
dw 0x0022 ; cx
db 0x02 ; type: register
db 00000101b ; reg: 001b
; width: 01b (16 bits)
dw 0x0023 ; dx
db 0x02 ; type: register
db 00001001b ; reg: 010b
; width: 01b (16 bits)
dw 0x0024 ; si
db 0x02 ; type: register
db 00011001b ; reg: 110b
; width: 01b (16 bits)
dw 0x0025 ; di
db 0x02 ; type: register
db 00011101b ; reg: 111b
; width: 01b (16 bits)
dw 0x0026 ; sp
db 0x02 ; type: register
db 00010001b ; reg: 100b
; width: 01b (16 bits)
dw 0x0027 ; bp
db 0x02 ; type: register
db 00010101b ; reg: 101b
; width: 01b (16 bits)
dw 0x0030 ; al
db 0x02 ; type: register
db 00000000b ; reg: 000b
; width: 00b (8 bits)
dw 0x0031 ; bl
db 0x02 ; type: register
db 00001100b ; reg: 011b
; width: 00b (8 bits)
dw 0x0032 ; cl
db 0x02 ; type: register
db 00000100b ; reg: 001b
; width: 00b (8 bits)
dw 0x0033 ; dl
db 0x02 ; type: register
db 00001000b ; reg: 010b
; width: 00b (8 bits)
dw 0x0034 ; sil
db 0x02 ; type: register
db 00011000b ; reg: 110b
; width: 00b (8 bits)
dw 0x0035 ; dil
db 0x02 ; type: register
db 00011100b ; reg: 111b
; width: 00b (8 bits)
dw 0x0036 ; spl
db 0x02 ; type: register
db 00010000b ; reg: 100b
; width: 00b (8 bits)
dw 0x0037 ; bpl
db 0x02 ; type: register
db 00010100b ; reg: 101b
; width: 00b (8 bits)
dw 0x004F ; hlt
db 0x01 ; type: operator
db 0x00 ; # operands
@@ -2041,6 +2342,10 @@ whitespace_2 db " ", 0x0D
; test program
program:
db "xor eax, eax", 0x0A
db "mov rax, rax", 0x0A
db "mov rax, rbx", 0x0A
db "mov eax, ebx", 0x0A
db "mov ax, bx", 0x0A
db "inc rax ; inline comment", 0x0A
db "dec rax", 0x0A
db "; one line comment", 0x0A
@@ -2048,6 +2353,9 @@ program:
db "mov [rax], rdx", 0x0A
db "mov [rcx], rbx", 0x0A
db "mov rcx, [rbx]", 0x0A
db "mov rcx, [ebx]", 0x0A
db "mov ecx, [ebx]", 0x0A
db "mov cx, [ebx]", 0x0A
db "hlt", 0x0A
.size dq $ - program