Compare commits

...

4 Commits

Author SHA1 Message Date
andromeda
562d5ceee9 update readme 2026-04-04 17:21:21 +02:00
andromeda
a2d66bbb4d remove silly align that fixed bug that no longer exists 2026-04-04 17:18:44 +02:00
andromeda
3a6275fc53 IT'S ALIIIVE 2026-04-04 17:13:55 +02:00
andromeda
0e423fa763 do awaiting labels, fix bug, make call not crash xd 2026-04-04 13:47:39 +02:00
3 changed files with 165 additions and 135 deletions

View File

@@ -10,4 +10,4 @@ status: gets to long mode, loads+jumps to kernel, starts idt and gdt... :)
hobby self-hosted assembler hobby self-hosted assembler
status: tokenises a lot of common stuff, compiles a lot of common stuff... not good enough for anything real-world yet status: tokenises a lot of common stuff, compiles a lot of common stuff... not good enough for anything real-world yet. It can assemble its own print function!!!

View File

@@ -267,8 +267,8 @@ entries are as follows:
+----------+----------+----------+---------+ +----------+----------+----------+---------+
| 31 24 | 23 16 | 15 0 | | 31 24 | 23 16 | 15 0 |
+----------+----------+--------------------+ +----------+----------+--------------------+
| reserved | opcode | token ID | | opcode+r | opcode | token ID |
| | dst=r/m | | | dst=r | dst=r/m | |
+----------+----------+--------------------+ +----------+----------+--------------------+
16 bytes 16 bytes
@@ -300,24 +300,25 @@ entries are as follows:
+-----------------+ +-----------------+
| flags byte | | flags byte |
+----------+------+ +----------+------+
| 95 89 | 88 | | 95 89 | 88 |
+----------+------+ +----------+------+
| reserved | 8bit | | reserved | 8bit |
+----------+------+ +----------+------+
1 byte 1 byte
+----------------------------------------------------+ +--------------------------------------------------------------+
| flagsX byte | | flagsX byte |
+----------+-----------+-------------+---------------+ +----------+---------+-----------+-------------+---------------+
| 7 5 | 4 | 3 | 2 0 | | 7 6 | 5 | 4 | 3 | 2 0 |
+----------+-----------+-------------+---------------+ +----------+---------+-----------+-------------+---------------+
| reserved | no ModR/M | 0x0F prefix | operator flag | | reserved | +r flag | no ModR/M | 0x0F prefix | operator flag |
+----------+-----------+-------------+---------------+ +----------+---------+-----------+-------------+---------------+
; flags key: ; flags key:
8bit ; tte has opcodes for r/m8 and r8 instead of r/m and r respectively 8bit ; tte has opcodes for r/m8 and r8 instead of r/m and r respectively
; flagsX key: ; flagsX key:
+r flag ; there is a +r variation of this opcode
no ModR/M ; there is no ModR/M byte for this opcode no ModR/M ; there is no ModR/M byte for this opcode
0x0F prefix ; there is a 0x0F prefix for this opcode 0x0F prefix ; there is a 0x0F prefix for this opcode
operator flag ; contents of `reg` if applicable operator flag ; contents of `reg` if applicable
@@ -526,70 +527,3 @@ hlt
0xF4 ; HLT 0xF4 ; HLT
``` ```
#### program output with the function `print`, each comma-seperated `db` value put onto its own line
editted output of `x/512xb 0x00070000` in [gdb](https://www.sourceware.org/gdb/)
the following is somewhat correct! I just need to a) null-terminate 8-byte chars and b) define all the addresses currently represented as `0xff 0xff 0xff 0xff`
```
0x48 0xff 0xf2
0x48 0xff 0xf0
0x48 0xff 0xf6
0xc7 0xc2 0xf8 0x03 0x00 0x00
0x8a 0x06
0x80 0xf8 0x00
0x0f 0x84 0xff 0xff 0xff 0xff
0x66 0xee
0x48 0xff 0xc6
0xe9 0xff 0xff 0xff 0xff
0x48 0x8f 0xc6
0x48 0x8f 0xc0
0x48 0x8f 0xc2
0xc3
0x48 0xff 0xf6
0xc7 0xc6 0xff 0xff 0xff 0xff
0xe8 0xff 0xff 0xff 0xff
0x48 0x8f 0xc6
0xe9 0xff 0xff 0xff 0xff
0x48 0xff 0xf6
0xc7 0xc6 0xff 0xff 0xff 0xff
0xe8 0xff 0xff 0xff 0xff
0x48 0x8f 0xc6
0xe9 0xff 0xff 0xff 0xff
0x48 0xff 0xf6
0xc7 0xc6 0xff 0xff 0xff 0xff
0xe8 0xff 0xff 0xff 0xff
0x48 0x8f 0xc6
0xe9 0xff 0xff 0xff 0xff
0x48 0xff 0xf6
0xc7 0xc6 0xff 0xff 0xff 0xff
0xe8 0xff 0xff 0xff 0xff
0x48 0x8f 0xc6
0xe9 0xff 0xff 0xff 0xff
0x1b 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x5b 0x33 0x36 0x6d 0x00 0x00 0x00 0x00
0x7b 0x44 0x45 0x42 0x55 0x47 0x5d 0x3a
0x1b 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x5b 0x30 0x6d 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x1b 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x5b 0x31 0x3b 0x33 0x31 0x6d 0x00 0x00
0x7b 0x45 0x52 0x52 0x4f 0x52 0x5d 0x3a
0x1b 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x5b 0x30 0x6d 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x1b 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x5b 0x31 0x3b 0x33 0x33 0x6d 0x00 0x00
0x5b 0x54 0x45 0x53 0x54 0x5d 0x3a 0x20
0x1b 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x5b 0x30 0x6d 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x1b 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x5b 0x31 0x3b 0x33 0x35 0x6d 0x00 0x00
0x5b 0x57 0x41 0x52 0x4e 0x5d 0x3a 0x20
0x1b 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x5b 0x30 0x6d 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
```

View File

@@ -63,6 +63,9 @@ start:
pop rdi ; rdi = number of tokens in token table pop rdi ; rdi = number of tokens in token table
call assemble call assemble
mov rsi, msg_welcome_1
call OUTPUT_ADDR
jmp halt jmp halt
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
@@ -120,6 +123,8 @@ assemble:
call print.debug call print.debug
pop rsi pop rsi
mov esi, [.buffer_pointer]
sub esi, .buffer
mov esi, [.next_output_byte] mov esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label to start of program sub esi, OUTPUT_ADDR ; esi = relative address of label to start of program
and edi, 0x0FFF ; edi = index to add address hash to and edi, 0x0FFF ; edi = index to add address hash to
@@ -207,7 +212,8 @@ assemble:
call .get_next_tte call .get_next_tte
mov rcx, rdi ; cx = operand tte mov rcx, rdi ; cx = operand tte
pop rdi pop rdi
push rcx ; pushes until after write_prefix_continue push rcx ; pushes until after write_prefix_continue or before
; plusr_continue
; di = tte of operator ; di = tte of operator
mov sil, 0 ; dst=r/m mov sil, 0 ; dst=r/m
@@ -222,9 +228,40 @@ assemble:
.operator_1_get_opcode_label: .operator_1_get_opcode_label:
mov sil, 4 ; dst=rel mov sil, 4 ; dst=rel
.operator_1_get_opcode_continue: .operator_1_get_opcode_continue:
push rdi
push rbx
call get_opcode call get_opcode
; al = opcode ; al = opcode
; dl = op flag ; dl = op flag
pop rbx ; flags
pop rdi ; di
push rdx
and dl, 0x20
cmp dl, 0x20
pop rdx
jne .operator_1_plusr_continue
mov sil, 1 ; dst=r
; TODO only do this if we've confirmed the next token's a register xD
call .clear_write_buffer
call get_opcode
; al = opcode
; dl = op flag
pop rdi ; di = next tte. from early in .operator_1
push rax
; di = tte
call get_tte_typed_metadata
; al = register typed metadata
mov ecx, eax ; cl = register typed metadata
shr ecx, 2
and ecx, 111b ; cl = reg value
pop rax ; al = opcode
add eax, ecx ; opcode + r
call .write_byte
jmp .loop_next_token
.operator_1_plusr_continue:
push rdx push rdx
and dl, 1000b and dl, 1000b
cmp dl, 1000b cmp dl, 1000b
@@ -237,8 +274,7 @@ assemble:
.operator_1_write_prefix_continue .operator_1_write_prefix_continue
call .write_byte call .write_byte
pop rdx ; dl = op flag pop rdx ; dl = op flag
pop rdi ; di = next tte. from early in .operator_1
pop rdi ; di = next tte
push rdi push rdi
and di, 0xFF00 and di, 0xFF00
@@ -315,7 +351,9 @@ assemble:
call print.debug call print.debug
pop rsi pop rsi
mov esi, [.next_output_byte] mov esi, [.buffer_pointer]
sub esi, .buffer
add esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
; of program ; of program
mov eax, 0x04 ; al = first 4 bits: # bytes reserved mov eax, 0x04 ; al = first 4 bits: # bytes reserved
@@ -593,7 +631,9 @@ assemble:
jmp .operator_2_register_label_32 jmp .operator_2_register_label_32
.operator_2_register_label_8: .operator_2_register_label_8:
mov esi, [.next_output_byte] mov esi, [.buffer_pointer]
sub esi, .buffer
add esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
mov eax, 0x11 ; al = first 4 bits: # bits reserved mov eax, 0x11 ; al = first 4 bits: # bits reserved
; 5th bit: abs flag ; 5th bit: abs flag
@@ -603,7 +643,9 @@ assemble:
call .write_byte call .write_byte
jmp .loop_next_token jmp .loop_next_token
.operator_2_register_label_16: .operator_2_register_label_16:
mov esi, [.next_output_byte] mov esi, [.buffer_pointer]
sub esi, .buffer
add esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
mov eax, 0x12 ; al = first 4 bits: # bits reserved mov eax, 0x12 ; al = first 4 bits: # bits reserved
; 5th bit: abs flag ; 5th bit: abs flag
@@ -614,7 +656,9 @@ assemble:
call .write_byte call .write_byte
jmp .loop_next_token jmp .loop_next_token
.operator_2_register_label_32: .operator_2_register_label_32:
mov esi, [.next_output_byte] mov esi, [.buffer_pointer]
sub esi, .buffer
add esi, [.next_output_byte]
sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start sub esi, OUTPUT_ADDR ; esi = relative address of label reference to start
mov eax, 0x14 ; al = first 4 bits: # bits reserved mov eax, 0x14 ; al = first 4 bits: # bits reserved
; 5th bit: abs flag ; 5th bit: abs flag
@@ -941,7 +985,7 @@ assemble:
call print.debug call print.debug
pop rsi pop rsi
mov rsp, rbp mov rsp, rbp
ret jmp resolve_awaiting_labels ; tail call
.unexpected_token: .unexpected_token:
call .flush_write_buffer call .flush_write_buffer
@@ -987,19 +1031,8 @@ assemble:
.tokens_processed dd 0 .tokens_processed dd 0
.tokens_total dd 0 .tokens_total dd 0
; al = byte to write
.output_byte:
push rdx
mov edx, [.next_output_byte] ; get output byte's address
mov [edx], al ; write byte to that address
inc edx ; increment address
mov [.next_output_byte], edx ; put output byte's address
pop rdx
ret
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
; TODO get rid of this sketchy bit of state
; al = byte to push ; al = byte to push
; buffered push
.push_byte: .push_byte:
push rcx push rcx
mov ecx, [.buffer_pointer] mov ecx, [.buffer_pointer]
@@ -1024,6 +1057,7 @@ assemble:
ret ret
; al = byte to write ; al = byte to write
; buffered write
.write_byte: .write_byte:
push rdx push rdx
mov edx, [.buffer_pointer] mov edx, [.buffer_pointer]
@@ -1033,6 +1067,16 @@ assemble:
pop rdx pop rdx
ret ret
.clear_write_buffer:
push rax
xor eax, eax
mov [.buffer], rax
mov [.buffer + 8], rax
mov eax, .buffer
mov [.buffer_pointer], eax
pop rax
ret
.flush_write_buffer: .flush_write_buffer:
push rcx push rcx
push rax push rax
@@ -1051,10 +1095,23 @@ assemble:
pop rax pop rax
pop rcx pop rcx
ret ret
.buffer dq 0, 0 ; octo word of space for max of 8 bytes per write .buffer dq 0, 0 ; octo word of space for max of 16 bytes per write
.buffer_end: .buffer_end:
.buffer_pointer dd .buffer ; points to current byte in buffer .buffer_pointer dd .buffer ; points to current byte in buffer
; al = byte to write
; unbuffered output; prefer `.write_byte`
.output_byte:
push rdx
mov edx, [.next_output_byte] ; get output byte's address
mov [edx], al ; write byte to that address
inc edx ; increment address
mov [.next_output_byte], edx ; put output byte's address
pop rdx
ret
.next_output_byte dd OUTPUT_ADDR ; next empty byte in output
.msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00 .msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00
.msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00 .msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00
.msg_size_mismatch db "size mismatch, aborting", 0x0A, 0x00 .msg_size_mismatch db "size mismatch, aborting", 0x0A, 0x00
@@ -1075,6 +1132,64 @@ assemble:
.msg_operator_2_register_const db "operator_2_register_const", 0x0A, 0x00 .msg_operator_2_register_const db "operator_2_register_const", 0x0A, 0x00
.msg_pseudo_operator db "pseudo_operator", 0x0A, 0x00 .msg_pseudo_operator db "pseudo_operator", 0x0A, 0x00
; ------------------------------------------------------------------------------
; resolve_awaiting_labels
;
; description:
; resolves placeholder references in the program at OUTPUT_ADDR using the
; label table and awaiting label table
; ------------------------------------------------------------------------------
resolve_awaiting_labels:
mov rbp, rsp
xor eax, eax
.loop:
shl rax, 4
mov rdi, [AWAITING_LABEL_TABLE_ADDR + rax]
; rdi = raxth hash in awaiting label table
cmp rdi, 0
je .break
push rax
; rdi = raxth hash in awaiting label table
call add_label_hash
; rax = index in label table
shl rax, 4
mov edi, [LABEL_TABLE_ADDR + rax + 8] ; edi = address of label
pop rax ; rax = count
mov esi, [AWAITING_LABEL_TABLE_ADDR + rax + 8] ; esi = address of reference
sub edi, esi ; edi = offset
; also sub length of reference TODO handle non-4-length refs
sub edi, 4
mov dl, [AWAITING_LABEL_TABLE_ADDR + rax + 12] ; dl = flags
; make sure it's 4 byte reference
push rdx
and dl, 0x0F
cmp dl, 4
pop rdx
jne .break
; make sure it's relative reference
push rdx
and dl, 0x10
cmp dl, 0x00
pop rdx
jne .break
mov [OUTPUT_ADDR + rsi], edi
shr rax, 4
inc rax
jmp .loop
.break:
mov rsp, rbp
ret
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; get_tte_type ; get_tte_type
; ;
@@ -1252,13 +1367,13 @@ get_ModRM:
; returned: ; returned:
; al = opcode; the rest of rax is zeroed. ; al = opcode; the rest of rax is zeroed.
; dl = flagsX ; dl = flagsX
; +----------------------------------------------------+ ; +--------------------------------------------------------------+
; | flagsX byte | ; | flagsX byte |
; +----------+-----------+-------------+---------------+ ; +----------+---------+-----------+-------------+---------------+
; | 7 5 | 4 | 3 | 2 0 | ; | 7 6 | 5 | 4 | 3 | 2 0 |
; +----------+-----------+-------------+---------------+ ; +----------+---------+-----------+-------------+---------------+
; | reserved | no ModR/M | 0x0F prefix | operator flag | ; | reserved | +r flag | no ModR/M | 0x0F prefix | operator flag |
; +----------+-----------+-------------+---------------+ ; +----------+---------+-----------+-------------+---------------+
; ;
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
@@ -2149,25 +2264,6 @@ print:
.test_msg db 0x1B, "[1;33m", "[TEST]: ", 0x1B, "[0m", 0x00 .test_msg db 0x1B, "[1;33m", "[TEST]: ", 0x1B, "[0m", 0x00
.warn_msg db 0x1B, "[1;35m", "[WARN]: ", 0x1B, "[0m", 0x00 .warn_msg db 0x1B, "[1;35m", "[WARN]: ", 0x1B, "[0m", 0x00
; ------------------------------------------------------------------------------
; print_least_4_bits
;
; description:
; prints the least significant 4 bits of rax for debugging reasons
; ------------------------------------------------------------------------------
print_least_4_bits:
push rax
push rsi
add eax, 0x30
mov [.byte], al
mov rsi, .byte
call print.warn
pop rsi
pop rax
ret
.byte db 0x00, 0x0A, 0x00
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; halt ; halt
; ;
@@ -2392,7 +2488,7 @@ add_awaiting_label:
mov rdx, [LABEL_TABLE_ADDR + rdi] mov rdx, [LABEL_TABLE_ADDR + rdi]
mov [AWAITING_LABEL_TABLE_ADDR + rax], rdx ; hash mov [AWAITING_LABEL_TABLE_ADDR + rax], rdx ; hash
mov [AWAITING_LABEL_TABLE_ADDR + 8 + rax], esi ; address mov [AWAITING_LABEL_TABLE_ADDR + 8 + rax], esi ; address
mov [AWAITING_LABEL_TABLE_ADDR + 13 + rax], cl ; bits passed to al mov [AWAITING_LABEL_TABLE_ADDR + 12 + rax], cl ; bits passed to al
ret ret
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
@@ -3119,30 +3215,30 @@ opcodes:
dw 0x0000 dw 0x0000
; push ; push
; TODO add support for the +r variation
dw 0x005F dw 0x005F
db 0xFF ; r/m db 0xFF ; r/m
db 0x00 db 0x50 ; +r
db 0x68 ; imm16/32 db 0x68 ; imm16/32
db 0x6A ; imm8 db 0x6A ; imm8
dw 0x0000 dw 0x0000
db 0x06 ; 6: r/m db 0x26 ; 26: +r flag, r/m
db 0x00 db 0x00
dw 0x0000 dw 0x0000
dd 0x00000000 dd 0x00000000
; pop ; pop
; TODO add support for the +r variation
dw 0x0060 dw 0x0060
db 0x8F ; r/m db 0x8F ; r/m
db 0x58 ; +r
dd 0x00000000
db 0x20 ; 20: +r flag, r/m
db 0x00 db 0x00
dw 0x0000
dd 0x00000000
dd 0x00000000
dd 0x00000000 dd 0x00000000
@@ -3162,12 +3258,12 @@ opcodes:
.by_id_end: .by_id_end:
msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00 msg_welcome db 0x1B, "[35m", "Welcome to Twasm", 0x1B, "[0m", 0x0A, 0x00
msg_welcome_1 db 0x1B, "[35m", "Welcome to Twasm (from the next level)", 0x1B, "[0m", 0x0A, 0x00
msg_halt db "halted.", 0x0A, 0x00 msg_halt db "halted.", 0x0A, 0x00
whitespace_2 db " ", 0x0D whitespace_2 db " ", 0x0D
; test program ; test program
align 128
program: program:
db "print:", 0x0A db "print:", 0x0A
db " push rdx", 0x0A db " push rdx", 0x0A