Compare commits

..

7 Commits

Author SHA1 Message Date
andromeda
562d5ceee9 update readme 2026-04-04 17:21:21 +02:00
andromeda
a2d66bbb4d remove silly align that fixed bug that no longer exists 2026-04-04 17:18:44 +02:00
andromeda
3a6275fc53 IT'S ALIIIVE 2026-04-04 17:13:55 +02:00
andromeda
0e423fa763 do awaiting labels, fix bug, make call not crash xd 2026-04-04 13:47:39 +02:00
andromeda
34b11aabe5 improve readme a bit 2026-04-04 10:49:40 +02:00
andromeda
767453bd55 save stack in tokenise 2026-04-04 10:26:15 +02:00
andromeda
df8e04ce63 mask smth 2026-04-04 09:50:40 +02:00
3 changed files with 168 additions and 68 deletions

View File

@@ -10,4 +10,4 @@ status: gets to long mode, loads+jumps to kernel, starts idt and gdt... :)
hobby self-hosted assembler hobby self-hosted assembler
status: tokenises a lot of common stuff, compiles a lot of common stuff... not good enough for anything real-world yet status: tokenises a lot of common stuff, compiles a lot of common stuff... not good enough for anything real-world yet. It can assemble its own print function!!!

View File

@@ -267,8 +267,8 @@ entries are as follows:
+----------+----------+----------+---------+ +----------+----------+----------+---------+
| 31 24 | 23 16 | 15 0 | | 31 24 | 23 16 | 15 0 |
+----------+----------+--------------------+ +----------+----------+--------------------+
| reserved | opcode | token ID | | opcode+r | opcode | token ID |
| | dst=r/m | | | dst=r | dst=r/m | |
+----------+----------+--------------------+ +----------+----------+--------------------+
16 bytes 16 bytes
@@ -300,24 +300,25 @@ entries are as follows:
+-----------------+ +-----------------+
| flags byte | | flags byte |
+----------+------+ +----------+------+
| 95 89 | 88 | | 95 89 | 88 |
+----------+------+ +----------+------+
| reserved | 8bit | | reserved | 8bit |
+----------+------+ +----------+------+
1 byte 1 byte
+----------------------------------------------------+ +--------------------------------------------------------------+
| flagsX byte | | flagsX byte |
+----------+-----------+-------------+---------------+ +----------+---------+-----------+-------------+---------------+
| 7 5 | 4 | 3 | 2 0 | | 7 6 | 5 | 4 | 3 | 2 0 |
+----------+-----------+-------------+---------------+ +----------+---------+-----------+-------------+---------------+
| reserved | no ModR/M | 0x0F prefix | operator flag | | reserved | +r flag | no ModR/M | 0x0F prefix | operator flag |
+----------+-----------+-------------+---------------+ +----------+---------+-----------+-------------+---------------+
; flags key: ; flags key:
8bit ; tte has opcodes for r/m8 and r8 instead of r/m and r respectively 8bit ; tte has opcodes for r/m8 and r8 instead of r/m and r respectively
; flagsX key: ; flagsX key:
+r flag ; there is a +r variation of this opcode
no ModR/M ; there is no ModR/M byte for this opcode no ModR/M ; there is no ModR/M byte for this opcode
0x0F prefix ; there is a 0x0F prefix for this opcode 0x0F prefix ; there is a 0x0F prefix for this opcode
operator flag ; contents of `reg` if applicable operator flag ; contents of `reg` if applicable

View File

@@ -63,6 +63,9 @@ start:
pop rdi ; rdi = number of tokens in token table pop rdi ; rdi = number of tokens in token table
call assemble call assemble
mov rsi, msg_welcome_1
call OUTPUT_ADDR
jmp halt jmp halt
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
@@ -120,6 +123,8 @@ assemble:
call print.debug call print.debug
pop rsi pop rsi
mov esi, [.buffer_pointer]
sub esi, .buffer
mov esi, [.next_output_byte] mov esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label to start of program sub esi, OUTPUT_ADDR ; esi = relative address of label to start of program
and edi, 0x0FFF ; edi = index to add address hash to and edi, 0x0FFF ; edi = index to add address hash to
@@ -207,7 +212,8 @@ assemble:
call .get_next_tte call .get_next_tte
mov rcx, rdi ; cx = operand tte mov rcx, rdi ; cx = operand tte
pop rdi pop rdi
push rcx ; pushes until after write_prefix_continue push rcx ; pushes until after write_prefix_continue or before
; plusr_continue
; di = tte of operator ; di = tte of operator
mov sil, 0 ; dst=r/m mov sil, 0 ; dst=r/m
@@ -222,9 +228,40 @@ assemble:
.operator_1_get_opcode_label: .operator_1_get_opcode_label:
mov sil, 4 ; dst=rel mov sil, 4 ; dst=rel
.operator_1_get_opcode_continue: .operator_1_get_opcode_continue:
push rdi
push rbx
call get_opcode call get_opcode
; al = opcode ; al = opcode
; dl = op flag ; dl = op flag
pop rbx ; flags
pop rdi ; di
push rdx
and dl, 0x20
cmp dl, 0x20
pop rdx
jne .operator_1_plusr_continue
mov sil, 1 ; dst=r
; TODO only do this if we've confirmed the next token's a register xD
call .clear_write_buffer
call get_opcode
; al = opcode
; dl = op flag
pop rdi ; di = next tte. from early in .operator_1
push rax
; di = tte
call get_tte_typed_metadata
; al = register typed metadata
mov ecx, eax ; cl = register typed metadata
shr ecx, 2
and ecx, 111b ; cl = reg value
pop rax ; al = opcode
add eax, ecx ; opcode + r
call .write_byte
jmp .loop_next_token
.operator_1_plusr_continue:
push rdx push rdx
and dl, 1000b and dl, 1000b
cmp dl, 1000b cmp dl, 1000b
@@ -237,8 +274,7 @@ assemble:
.operator_1_write_prefix_continue .operator_1_write_prefix_continue
call .write_byte call .write_byte
pop rdx ; dl = op flag pop rdx ; dl = op flag
pop rdi ; di = next tte. from early in .operator_1
pop rdi ; di = next tte
push rdi push rdi
and di, 0xFF00 and di, 0xFF00
@@ -315,7 +351,9 @@ assemble:
call print.debug call print.debug
pop rsi pop rsi
mov esi, [.next_output_byte] mov esi, [.buffer_pointer]
sub esi, .buffer
add esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
; of program ; of program
mov eax, 0x04 ; al = first 4 bits: # bytes reserved mov eax, 0x04 ; al = first 4 bits: # bytes reserved
@@ -593,7 +631,9 @@ assemble:
jmp .operator_2_register_label_32 jmp .operator_2_register_label_32
.operator_2_register_label_8: .operator_2_register_label_8:
mov esi, [.next_output_byte] mov esi, [.buffer_pointer]
sub esi, .buffer
add esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
mov eax, 0x11 ; al = first 4 bits: # bits reserved mov eax, 0x11 ; al = first 4 bits: # bits reserved
; 5th bit: abs flag ; 5th bit: abs flag
@@ -603,7 +643,9 @@ assemble:
call .write_byte call .write_byte
jmp .loop_next_token jmp .loop_next_token
.operator_2_register_label_16: .operator_2_register_label_16:
mov esi, [.next_output_byte] mov esi, [.buffer_pointer]
sub esi, .buffer
add esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
mov eax, 0x12 ; al = first 4 bits: # bits reserved mov eax, 0x12 ; al = first 4 bits: # bits reserved
; 5th bit: abs flag ; 5th bit: abs flag
@@ -614,7 +656,9 @@ assemble:
call .write_byte call .write_byte
jmp .loop_next_token jmp .loop_next_token
.operator_2_register_label_32: .operator_2_register_label_32:
mov esi, [.next_output_byte] mov esi, [.buffer_pointer]
sub esi, .buffer
add esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
mov eax, 0x14 ; al = first 4 bits: # bits reserved mov eax, 0x14 ; al = first 4 bits: # bits reserved
; 5th bit: abs flag ; 5th bit: abs flag
@@ -941,7 +985,7 @@ assemble:
call print.debug call print.debug
pop rsi pop rsi
mov rsp, rbp mov rsp, rbp
ret jmp resolve_awaiting_labels ; tail call
.unexpected_token: .unexpected_token:
call .flush_write_buffer call .flush_write_buffer
@@ -987,19 +1031,8 @@ assemble:
.tokens_processed dd 0 .tokens_processed dd 0
.tokens_total dd 0 .tokens_total dd 0
; al = byte to write
.output_byte:
push rdx
mov edx, [.next_output_byte] ; get output byte's address
mov [edx], al ; write byte to that address
inc edx ; increment address
mov [.next_output_byte], edx ; put output byte's address
pop rdx
ret
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
; TODO get rid of this sketchy bit of state
; al = byte to push ; al = byte to push
; buffered push
.push_byte: .push_byte:
push rcx push rcx
mov ecx, [.buffer_pointer] mov ecx, [.buffer_pointer]
@@ -1024,6 +1057,7 @@ assemble:
ret ret
; al = byte to write ; al = byte to write
; buffered write
.write_byte: .write_byte:
push rdx push rdx
mov edx, [.buffer_pointer] mov edx, [.buffer_pointer]
@@ -1033,6 +1067,16 @@ assemble:
pop rdx pop rdx
ret ret
.clear_write_buffer:
push rax
xor eax, eax
mov [.buffer], rax
mov [.buffer + 8], rax
mov eax, .buffer
mov [.buffer_pointer], eax
pop rax
ret
.flush_write_buffer: .flush_write_buffer:
push rcx push rcx
push rax push rax
@@ -1051,10 +1095,23 @@ assemble:
pop rax pop rax
pop rcx pop rcx
ret ret
.buffer dq 0, 0 ; octo word of space for max of 8 bytes per write .buffer dq 0, 0 ; octo word of space for max of 16 bytes per write
.buffer_end: .buffer_end:
.buffer_pointer dd .buffer ; points to current byte in buffer .buffer_pointer dd .buffer ; points to current byte in buffer
; al = byte to write
; unbuffered output; prefer `.write_byte`
.output_byte:
push rdx
mov edx, [.next_output_byte] ; get output byte's address
mov [edx], al ; write byte to that address
inc edx ; increment address
mov [.next_output_byte], edx ; put output byte's address
pop rdx
ret
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
.msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00 .msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00
.msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00 .msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00
.msg_size_mismatch db "size mismatch, aborting", 0x0A, 0x00 .msg_size_mismatch db "size mismatch, aborting", 0x0A, 0x00
@@ -1075,6 +1132,64 @@ assemble:
.msg_operator_2_register_const db "operator_2_register_const", 0x0A, 0x00 .msg_operator_2_register_const db "operator_2_register_const", 0x0A, 0x00
.msg_pseudo_operator db "pseudo_operator", 0x0A, 0x00 .msg_pseudo_operator db "pseudo_operator", 0x0A, 0x00
; ------------------------------------------------------------------------------
; resolve_awaiting_labels
;
; description:
; resolves placeholder references in the program at OUTPUT_ADDR using the
; label table and awaiting label table
; ------------------------------------------------------------------------------
resolve_awaiting_labels:
mov rbp, rsp
xor eax, eax
.loop:
shl rax, 4
mov rdi, [AWAITING_LABEL_TABLE_ADDR + rax]
; rdi = raxth hash in awaiting label table
cmp rdi, 0
je .break
push rax
; rdi = raxth hash in awaiting label table
call add_label_hash
; rax = index in label table
shl rax, 4
mov edi, [LABEL_TABLE_ADDR + rax + 8] ; edi = address of label
pop rax ; rax = count
mov esi, [AWAITING_LABEL_TABLE_ADDR + rax + 8] ; esi = address of reference
sub edi, esi ; edi = offset
; also sub length of reference TODO handle non-4-length refs
sub edi, 4
mov dl, [AWAITING_LABEL_TABLE_ADDR + rax + 12] ; dl = flags
; make sure it's 4 byte reference
push rdx
and dl, 0x0F
cmp dl, 4
pop rdx
jne .break
; make sure it's relative reference
push rdx
and dl, 0x10
cmp dl, 0x00
pop rdx
jne .break
mov [OUTPUT_ADDR + rsi], edi
shr rax, 4
inc rax
jmp .loop
.break:
mov rsp, rbp
ret
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; get_tte_type ; get_tte_type
; ;
@@ -1252,13 +1367,13 @@ get_ModRM:
; returned: ; returned:
; al = opcode; the rest of rax is zeroed. ; al = opcode; the rest of rax is zeroed.
; dl = flagsX ; dl = flagsX
; +----------------------------------------------------+ ; +--------------------------------------------------------------+
; | flagsX byte | ; | flagsX byte |
; +----------+-----------+-------------+---------------+ ; +----------+---------+-----------+-------------+---------------+
; | 7 5 | 4 | 3 | 2 0 | ; | 7 6 | 5 | 4 | 3 | 2 0 |
; +----------+-----------+-------------+---------------+ ; +----------+---------+-----------+-------------+---------------+
; | reserved | no ModR/M | 0x0F prefix | operator flag | ; | reserved | +r flag | no ModR/M | 0x0F prefix | operator flag |
; +----------+-----------+-------------+---------------+ ; +----------+---------+-----------+-------------+---------------+
; ;
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
@@ -1302,6 +1417,7 @@ get_opcode:
.found: .found:
shl eax, 4 shl eax, 4
mov dl, [esi + 8 + opcodes.by_id + eax] mov dl, [esi + 8 + opcodes.by_id + eax]
and edx, 0xFF
mov al, [esi + 2 + opcodes.by_id + eax] mov al, [esi + 2 + opcodes.by_id + eax]
and eax, 0xFF and eax, 0xFF
ret ret
@@ -1347,6 +1463,7 @@ get_reg_bits:
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
tokenise: tokenise:
mov rbp, rsp
; rdi -> current byte of program ; rdi -> current byte of program
xor eax, eax ; rax = number of tokens processed xor eax, eax ; rax = number of tokens processed
xor edx, edx ; dl = current byte of program xor edx, edx ; dl = current byte of program
@@ -1653,6 +1770,7 @@ tokenise:
jmp .loop jmp .loop
.break: .break:
mov rsp, rbp
ret ret
; state ; state
@@ -2146,25 +2264,6 @@ print:
.test_msg db 0x1B, "[1;33m", "[TEST]: ", 0x1B, "[0m", 0x00 .test_msg db 0x1B, "[1;33m", "[TEST]: ", 0x1B, "[0m", 0x00
.warn_msg db 0x1B, "[1;35m", "[WARN]: ", 0x1B, "[0m", 0x00 .warn_msg db 0x1B, "[1;35m", "[WARN]: ", 0x1B, "[0m", 0x00
; ------------------------------------------------------------------------------
; print_least_4_bits
;
; description:
; prints the least significant 4 bits of rax for debugging reasons
; ------------------------------------------------------------------------------
print_least_4_bits:
push rax
push rsi
add eax, 0x30
mov [.byte], al
mov rsi, .byte
call print.warn
pop rsi
pop rax
ret
.byte db 0x00, 0x0A, 0x00
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; halt ; halt
; ;
@@ -2389,7 +2488,7 @@ add_awaiting_label:
mov rdx, [LABEL_TABLE_ADDR + rdi] mov rdx, [LABEL_TABLE_ADDR + rdi]
mov [AWAITING_LABEL_TABLE_ADDR + rax], rdx ; hash mov [AWAITING_LABEL_TABLE_ADDR + rax], rdx ; hash
mov [AWAITING_LABEL_TABLE_ADDR + 8 + rax], esi ; address mov [AWAITING_LABEL_TABLE_ADDR + 8 + rax], esi ; address
mov [AWAITING_LABEL_TABLE_ADDR + 13 + rax], cl ; bits passed to al mov [AWAITING_LABEL_TABLE_ADDR + 12 + rax], cl ; bits passed to al
ret ret
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
@@ -3116,30 +3215,30 @@ opcodes:
dw 0x0000 dw 0x0000
; push ; push
; TODO add support for the +r variation
dw 0x005F dw 0x005F
db 0xFF ; r/m db 0xFF ; r/m
db 0x00 db 0x50 ; +r
db 0x68 ; imm16/32 db 0x68 ; imm16/32
db 0x6A ; imm8 db 0x6A ; imm8
dw 0x0000 dw 0x0000
db 0x06 ; 6: r/m db 0x26 ; 26: +r flag, r/m
db 0x00 db 0x00
dw 0x0000 dw 0x0000
dd 0x00000000 dd 0x00000000
; pop ; pop
; TODO add support for the +r variation
dw 0x0060 dw 0x0060
db 0x8F ; r/m db 0x8F ; r/m
db 0x58 ; +r
dd 0x00000000
db 0x20 ; 20: +r flag, r/m
db 0x00 db 0x00
dw 0x0000
dd 0x00000000
dd 0x00000000
dd 0x00000000 dd 0x00000000
@@ -3159,12 +3258,12 @@ opcodes:
.by_id_end: .by_id_end:
msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00 msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00
msg_welcome_1 db 0x1B, "[35m", "Welcome to Twasm (from the next level)", 0x1B, "[0m", 0x0A, 0x00
msg_halt db "halted.", 0x0A, 0x00 msg_halt db "halted.", 0x0A, 0x00
whitespace_2 db " ", 0x0D whitespace_2 db " ", 0x0D
; test program ; test program
align 128
program: program:
db "print:", 0x0A db "print:", 0x0A
db " push rdx", 0x0A db " push rdx", 0x0A