diff --git a/twasm/README.md b/twasm/README.md index 2921ffa..77905a6 100644 --- a/twasm/README.md +++ b/twasm/README.md @@ -328,14 +328,14 @@ supported tokens are listed below | r13d | 0x001D | unimplemented | | r14d | 0x001E | unimplemented | | r15d | 0x001F | unimplemented | -| ax | 0x0020 | unimplemented | -| bx | 0x0021 | unimplemented | -| cx | 0x0022 | unimplemented | -| dx | 0x0023 | unimplemented | -| si | 0x0024 | unimplemented | -| di | 0x0025 | unimplemented | -| sp | 0x0026 | unimplemented | -| bp | 0x0027 | unimplemented | +| ax | 0x0020 | | +| bx | 0x0021 | | +| cx | 0x0022 | | +| dx | 0x0023 | | +| si | 0x0024 | | +| di | 0x0025 | | +| sp | 0x0026 | | +| bp | 0x0027 | | | r8w | 0x0028 | unimplemented | | r9w | 0x0029 | unimplemented | | r10w | 0x002A | unimplemented | @@ -344,14 +344,14 @@ supported tokens are listed below | r13w | 0x002D | unimplemented | | r14w | 0x002E | unimplemented | | r15w | 0x002F | unimplemented | -| al | 0x0030 | unimplemented | -| bl | 0x0031 | unimplemented | -| cl | 0x0032 | unimplemented | -| dl | 0x0033 | unimplemented | -| sil | 0x0034 | unimplemented | -| dil | 0x0035 | unimplemented | -| spl | 0x0036 | unimplemented | -| bpl | 0x0037 | unimplemented | +| al | 0x0030 | | +| bl | 0x0031 | | +| cl | 0x0032 | | +| dl | 0x0033 | | +| sil | 0x0034 | | +| dil | 0x0035 | | +| spl | 0x0036 | | +| bpl | 0x0037 | | | r8b | 0x0038 | unimplemented | | r9b | 0x0039 | unimplemented | | r10b | 0x003A | unimplemented | diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm index 9c052e1..0383da2 100644 --- a/twasm/asm/main.asm +++ b/twasm/asm/main.asm @@ -177,6 +177,32 @@ assemble: mov rsi, .msg_operator_1_register call print.debug + ; di = token table entry + call get_tte_typed_metadata + ; al = register typed metadata + + and al, 11b ; al = register width + + cmp al, 00b ; 8 bit + je .unexpected_token ; TODO handle 8 bit opcodes + + cmp al, 10b ; 32 bit + je .operator_1_register_no_prefix ; default register length; no prefix + + cmp al, 01b ; 16 bit + je .operator_1_register_16 + + cmp al, 11b ; 64 bit + je .operator_1_register_64 + .operator_1_register_16: + mov al, 0x66 + call .push_byte + jmp .operator_1_register_no_prefix + .operator_1_register_64: + mov al, 0x48 + call .push_byte + jmp .operator_1_register_no_prefix + .operator_1_register_no_prefix: mov si, di ; si = `R/M` tte and edx, 0xFF or edx, 0xFE00 ; pass di as direct value @@ -232,7 +258,24 @@ assemble: call .next_token jge .break call .get_next_tte + ; di = tte + call get_tte_typed_metadata + and al, 11b ; al = register width + + cmp al, 10b ; 32 bit + je .operator_2_memory_32 + + cmp al, 11b ; 64 bit + je .operator_2_memory_continue + + ; other cases: 16 bit, 8 bit both are not valid for addressing + jmp .size_mismatch + .operator_2_memory_32: + mov al, 0x67 + call .push_byte + jmp .operator_2_memory_continue + .operator_2_memory_continue: mov si, di ; si = dst register tte call .next_token @@ -253,13 +296,39 @@ assemble: je .operator_2_memory_register ; if so, handle jmp .unexpected_token - .operator_2_memory_register: push rsi mov rsi, .msg_operator_2_memory_register call print.debug pop rsi ; si = r/m + ; di = src tte + call get_tte_typed_metadata + ; al = register typed metadata + + and al, 11b ; al = register width + + cmp al, 00b ; 8 bit + je .unexpected_token ; TODO handle 8 bit opcodes + + cmp al, 01b ; 16 bit + je .operator_2_memory_register_16 + + cmp al, 10b ; 32 bit + je .operator_2_memory_register_continue ; default register length; no prefix + + cmp al, 11b ; 64 bit + je .operator_2_memory_register_64 + + .operator_2_memory_register_16: + mov al, 0x66 + call .push_byte + jmp .operator_2_memory_register_continue + .operator_2_memory_register_64: + mov al, 0x48 + call .push_byte + jmp .operator_2_memory_register_continue + .operator_2_memory_register_continue: ; si = r/m; dst tte ; di = reg; src tte mov dl, 00b ; dl = mod bits @@ -283,6 +352,32 @@ assemble: call .write_byte pop rdi ; di = dst tte + ; di = token table entry + call get_tte_typed_metadata + ; al = register typed metadata + + and al, 11b ; al = register width + + cmp al, 00b ; 8 bit + je .unexpected_token ; TODO handle 8 bit opcodes + + cmp al, 01b ; 16 bit + je .operator_2_register_16 + + cmp al, 10b ; 32 bit + je .operator_2_register_continue ; default register length; no prefix + + cmp al, 11b ; 64 bit + je .operator_2_register_64 + .operator_2_register_16: + mov al, 0x66 + call .push_byte + jmp .operator_2_register_continue + .operator_2_register_64: + mov al, 0x48 + call .push_byte + jmp .operator_2_register_continue + .operator_2_register_continue: mov si, di ; si = dst tte call .next_token @@ -316,9 +411,27 @@ assemble: call .next_token jge .break call .get_next_tte + ; di = tte + call get_tte_typed_metadata + ; al = register typed metadata - ; si = r/m; dst tte - ; di = reg; src tte + and al, 11b ; al = register width + + cmp al, 10b ; 32 bit + je .operator_2_register_memory_32 + + cmp al, 11b ; 64 bit + je .operator_2_register_memory_continue ; default addr length; no prefix + + ; other cases: 16 bit, 8 bit both are not valid for addressing + jmp .size_mismatch + .operator_2_register_memory_32: + mov al, 0x67 + call .push_byte + jmp .operator_2_register_memory_continue + .operator_2_register_memory_continue: + ; si = reg; dst tte + ; di = r/m; src tte push rsi mov esi, edi ; si = reg; src tte pop rdi ; di = r/m; dst tte @@ -330,8 +443,106 @@ assemble: jmp .loop_next_token .operator_2_register_register: + push rsi mov rsi, .msg_operator_2_register_register call print.debug + pop rsi ; si = dst tte + + ; di = tte + call get_tte_typed_metadata + ; al = register typed metadata + and al, 11b ; al = register width + + cmp al, 00b ; 8 bit + je .unexpected_token ; TODO handle 8 bit opcodes + + cmp al, 01b ; 16 bit + je .operator_2_register_register_16 + + cmp al, 10b ; 32 bit + je .operator_2_register_register_32 + + cmp al, 11b ; 64 bit + je .operator_2_register_register_64 + .operator_2_register_register_16: + ; 16 bit + push rdi + push rsi + mov edi, .buffer_end - .buffer ; length of buffer + mov rsi, .buffer ; buffer location + mov dl, 0x66 + call elemb + pop rsi ; si = src tte + pop rdi ; di = dst tte + cmp al, 1 + jne .size_mismatch + + ; 64 bit + push rdi + push rsi + mov edi, .buffer_end - .buffer ; length of buffer + mov rsi, .buffer ; buffer location + mov dl, 0x48 + call elemb + pop rsi ; si = src tte + pop rdi ; di = dst tte + cmp al, 0 + jne .size_mismatch + + jmp .operator_2_register_register_continue + .operator_2_register_register_32: + ; 16 bit + push rdi + push rsi + mov edi, .buffer_end - .buffer ; length of buffer + mov rsi, .buffer ; buffer location + mov dl, 0x66 + call elemb + pop rsi ; si = src tte + pop rdi ; di = dst tte + cmp al, 0 + jne .size_mismatch + + ; 64 bit + push rdi + push rsi + mov edi, .buffer_end - .buffer ; length of buffer + mov rsi, .buffer ; buffer location + mov dl, 0x48 + call elemb + pop rsi ; si = src tte + pop rdi ; di = dst tte + cmp al, 0 + jne .size_mismatch + + jmp .operator_2_register_register_continue + .operator_2_register_register_64: + ; 16 bit + push rdi + push rsi + mov edi, .buffer_end - .buffer ; length of buffer + mov rsi, .buffer ; buffer location + mov dl, 0x66 + call elemb + pop rsi ; si = src tte + pop rdi ; di = dst tte + cmp al, 0 + jne .size_mismatch + + ; 64 bit + push rdi + push rsi + mov edi, .buffer_end - .buffer ; length of buffer + mov rsi, .buffer ; buffer location + mov dl, 0x48 + call elemb + pop rsi ; si = src tte + pop rdi ; di = dst tte + cmp al, 1 + jne .size_mismatch + + jmp .operator_2_register_register_continue + .operator_2_register_register_continue: push rsi mov esi, edi ; si = reg; src tte @@ -362,6 +573,11 @@ assemble: call print.error jmp halt + .size_mismatch: + mov rsi, .msg_size_mismatch + call print.error + jmp halt + ; procedures ; add the line `jge .break` after call site @@ -385,10 +601,12 @@ assemble: ; al = byte to write .output_byte: + push rdx mov edx, [.next_output_byte] ; get output byte's address mov [edx], al ; write byte to that address inc edx ; increment address mov [.next_output_byte], edx ; put output byte's address + pop rdx ret .next_output_byte dd OUTPUT_ADDR ; next empty byte in output ; TODO get rid of this sketchy bit of state @@ -401,38 +619,41 @@ assemble: push rax mov ecx, .buffer_end mov [.buffer_pointer], ecx - .push_byte_loop: - dec ecx - cmp ecx, .buffer - jl .push_byte_break - mov al, [ecx] - mov [ecx + 1], al - jmp .push_byte_loop - .push_byte_break: - pop rax ; al = byte to push - mov [.buffer], al - pop rcx ; ecx = old buffer pointer - inc ecx - mov [.buffer_pointer], ecx - pop rcx - ret + .push_byte_loop: + dec ecx + cmp ecx, .buffer + jl .push_byte_break + mov al, [ecx] + mov [ecx + 1], al + jmp .push_byte_loop + .push_byte_break: + pop rax ; al = byte to push + mov [.buffer], al ; write desired byte to front of buffer + pop rcx ; ecx = old buffer pointer + inc ecx ; ecx = pointer to next empty in buffer + mov [.buffer_pointer], ecx ; record write .buffer_pointer + pop rcx + ret ; al = byte to write .write_byte: + push rdx mov edx, [.buffer_pointer] mov [edx], al inc edx mov [.buffer_pointer], edx + pop rdx ret .flush_write_buffer: push rcx push rax - mov ecx, [.buffer_pointer] + mov ecx, .buffer + dec ecx .flush_write_buffer_loop: - dec ecx - cmp ecx, .buffer - jl .flush_write_buffer_break + inc ecx + cmp ecx, [.buffer_pointer] + jge .flush_write_buffer_break mov al, [ecx] call .output_byte mov byte [ecx], 0x00 @@ -448,6 +669,7 @@ assemble: .msg_unexpected_token db "unexpected token, aborting", 0x0A, 0x00 .msg_unsupported_memory_access db "unsupported memory access, aborting", 0x0A, 0x00 + .msg_size_mismatch db "size mismatch, aborting", 0x0A, 0x00 .msg_operator_0 db "operator_0", 0x0A, 0x00 .msg_operator_1 db "operator_1", 0x0A, 0x00 .msg_operator_1_memory db "operator_1_memory", 0x0A, 0x00 @@ -580,21 +802,21 @@ get_ModRM: .continue: shl ebx, 3 - mov edi, esi ; do the other one + mov edi, esi ; do the other one - ; di = tte - call get_reg_bits - ; al = reg bits + ; di = tte + call get_reg_bits + ; al = reg bits - mov ecx, eax + mov ecx, eax ; cl = r/m bits - xor eax, eax - or eax, edx ; mod bits - or eax, ebx ; reg bits - or eax, ecx ; R/M bits - and eax, 0xFF ; mask for byte - pop rbx - ret + xor eax, eax + or eax, edx ; mod bits + or eax, ebx ; reg bits + or eax, ecx ; R/M bits + and eax, 0xFF ; mask for byte + pop rbx + ret ; ------------------------------------------------------------------------------ ; get_opcode @@ -1307,7 +1529,6 @@ identify_register: ; ------------------------------------------------------------------------------ ; identify_operator -; TODO combine with identify_register ; ; description: ; takes an operator in ascii-encoded text and returns its token ID or @@ -1660,6 +1881,86 @@ tokens: db 00010110b ; reg: 101b ; width: 10b (32 bits) + dw 0x0020 ; ax + db 0x02 ; type: register + db 00000001b ; reg: 000b + ; width: 01b (16 bits) + + dw 0x0021 ; bx + db 0x02 ; type: register + db 00001101b ; reg: 011b + ; width: 01b (16 bits) + + dw 0x0022 ; cx + db 0x02 ; type: register + db 00000101b ; reg: 001b + ; width: 01b (16 bits) + + dw 0x0023 ; dx + db 0x02 ; type: register + db 00001001b ; reg: 010b + ; width: 01b (16 bits) + + dw 0x0024 ; si + db 0x02 ; type: register + db 00011001b ; reg: 110b + ; width: 01b (16 bits) + + dw 0x0025 ; di + db 0x02 ; type: register + db 00011101b ; reg: 111b + ; width: 01b (16 bits) + + dw 0x0026 ; sp + db 0x02 ; type: register + db 00010001b ; reg: 100b + ; width: 01b (16 bits) + + dw 0x0027 ; bp + db 0x02 ; type: register + db 00010101b ; reg: 101b + ; width: 01b (16 bits) + + dw 0x0030 ; al + db 0x02 ; type: register + db 00000000b ; reg: 000b + ; width: 00b (8 bits) + + dw 0x0031 ; bl + db 0x02 ; type: register + db 00001100b ; reg: 011b + ; width: 00b (8 bits) + + dw 0x0032 ; cl + db 0x02 ; type: register + db 00000100b ; reg: 001b + ; width: 00b (8 bits) + + dw 0x0033 ; dl + db 0x02 ; type: register + db 00001000b ; reg: 010b + ; width: 00b (8 bits) + + dw 0x0034 ; sil + db 0x02 ; type: register + db 00011000b ; reg: 110b + ; width: 00b (8 bits) + + dw 0x0035 ; dil + db 0x02 ; type: register + db 00011100b ; reg: 111b + ; width: 00b (8 bits) + + dw 0x0036 ; spl + db 0x02 ; type: register + db 00010000b ; reg: 100b + ; width: 00b (8 bits) + + dw 0x0037 ; bpl + db 0x02 ; type: register + db 00010100b ; reg: 101b + ; width: 00b (8 bits) + dw 0x004F ; hlt db 0x01 ; type: operator db 0x00 ; # operands @@ -2041,6 +2342,10 @@ whitespace_2 db " ", 0x0D ; test program program: db "xor eax, eax", 0x0A + db "mov rax, rax", 0x0A + db "mov rax, rbx", 0x0A + db "mov eax, ebx", 0x0A + db "mov ax, bx", 0x0A db "inc rax ; inline comment", 0x0A db "dec rax", 0x0A db "; one line comment", 0x0A @@ -2048,6 +2353,9 @@ program: db "mov [rax], rdx", 0x0A db "mov [rcx], rbx", 0x0A db "mov rcx, [rbx]", 0x0A + db "mov rcx, [ebx]", 0x0A + db "mov ecx, [ebx]", 0x0A + db "mov cx, [ebx]", 0x0A db "hlt", 0x0A .size dq $ - program