# twasm this will be a self hosted, very minimal subset of nasm-style 64 bit asm ### goals I want to compile Bootler and Twasm with the Twasm assembler ### memory map ``` +------ 0x00100000 ------+ | hardware, bios stuff | +------ 0x00080000 ------+ | output binary | +------ 0x00070000 ------+ | token table | +------ 0x00060000 ------+ | test arena | +------ 0x00050000 ------+ | stack (rsp) | +------------------------+ | input | +------------------------+ | assembler | +------ 0x00010000 ------+ | bootloader, bios, etc. | +------------------------+ ``` each word represents a token on the token table. #### token table (TT) each token gets loaded into the token table with the following form: ``` +----------+ | 15 0 | +----------+ | token id | +----------+ ``` ### token IDs supported tokens are listed below | token | id | notes | |-------|--------|-| | rax | 0x0000 | | | rbx | 0x0001 | | | rcx | 0x0002 | | | rdx | 0x0003 | | | rsi | 0x0004 | | | rdi | 0x0005 | | | rsp | 0x0006 | | | rbp | 0x0007 | | | r8 | 0x0008 | | | r9 | 0x0009 | | | r10 | 0x000A | | | r11 | 0x000B | | | r12 | 0x000C | | | r13 | 0x000D | | | r14 | 0x000E | | | r15 | 0x000F | | | eax | 0x0010 | | | ebx | 0x0011 | | | ecx | 0x0012 | | | edx | 0x0013 | | | esi | 0x0014 | | | edi | 0x0015 | | | esp | 0x0016 | | | ebp | 0x0017 | | | r8d | 0x0018 | | | r9d | 0x0019 | | | r10d | 0x001A | | | r11d | 0x001B | | | r12d | 0x001C | | | r13d | 0x001D | | | r14d | 0x001E | | | r15d | 0x001F | | | ax | 0x0020 | | | bx | 0x0021 | | | cx | 0x0022 | | | dx | 0x0023 | | | si | 0x0024 | | | di | 0x0025 | | | sp | 0x0026 | | | bp | 0x0027 | | | r8w | 0x0028 | | | r9w | 0x0029 | | | r10w | 0x002A | | | r11w | 0x002B | | | r12w | 0x002C | | | r13w | 0x002D | | | r14w | 0x002E | | | r15w | 0x002F | | | al | 0x0030 | | | bl | 0x0031 | | | cl | 0x0032 | | | dl | 0x0033 | | | sil | 0x0034 | | | dil | 0x0035 | | | spl | 0x0036 | | | bpl | 0x0037 | | | r8b | 0x0038 | | | r9b | 0x0039 | | | r10b | 0x003A | | | r11b | 0x003B | | | r12b | 0x003C | | | r13b | 0x003D | | | r14b | 0x003E | | | r15b | 0x003F | | | ah | 0x0040 | | | bh | 0x0041 | | | ch | 0x0042 | | | dh | 0x0043 | | | cs | 0x0044 | | | ds | 0x0045 | | | es | 0x0046 | | | fs | 0x0047 | | | gs | 0x0048 | | | ss | 0x0049 | | | cr0 | 0x004A | | | cr2 | 0x004B | | | cr3 | 0x004C | | | cr4 | 0x004D | | | cr8 | 0x004E | | | hlt | 0x004F | | | int3 | 0x0050 | | | [ | 0x0051 | open bracket placeholder; 0x10XX should be used in contexts where the surrounding tokens can be known | | ] | 0x0052 | | | xor | 0x0053 | | | inc | 0x0054 | | | dec | 0x0055 | | | mov | 0x0056 | | | add | 0x0057 | | | sub | 0x0058 | | | call | 0x0059 | | | ret | 0x005A | | | cmp | 0x005B | | | je | 0x005C | | | jne | 0x005D | | | jge | 0x005E | | | jg | 0x005F | | | jle | 0x0060 | | | jl | 0x0061 | | | + | 0x0062 | | | - | 0x0063 | | | * | 0x0064 | | | / | 0x0065 | | | [ | 0x10XX | open bracket with `XX` bytes until the closing bracket | | | 0xFEXX | token terminator byte as token, where `XX` is the byte | | | 0xFFFF | unrecognised token | ### example program #### program in assembly this program doesn't do anything useful, it's just a test ```nasm xor eax, eax inc rax mov [ rax ], rdx hlt ``` #### tokenization ```nasm 0x0053 ; xor 0xFE20 ; space 0x0010 ; eax 0xFE2C ; comma 0xFE20 ; space 0x0010 ; eax 0xFE0A ; newline 0x0054 ; inc 0xFE20 ; space 0x0000 ; rax 0xFE0A ; newline 0x0056 ; mov 0xFE20 ; space 0x1004 ; open bracket (4) 0xFE20 ; space |1 0x0000 ; rax |2 0xFE20 ; space |3 0x0052 ; close bracket |4 0xFE2C ; comma 0xFE20 ; space 0x0003 ; rdx 0xFE0A ; newline 0x004F ; hlt 0xFE0A ; newline 0xFE00 ; null terminator ```