diff --git a/twasm/README.md b/twasm/README.md
index 83dab90..b414770 100644
--- a/twasm/README.md
+++ b/twasm/README.md
@@ -129,7 +129,7 @@ supported tokens are listed below
 | cr8   | 0x004E | |
 | hlt   | 0x004F | |
 | int3  | 0x0050 | |
-| [     | 0x0051 | |
+| [     | 0x0051 | open bracket placeholder; 0x10XX should be used in contexts where the surrounding tokens can be known |
 | ]     | 0x0052 | |
 | xor   | 0x0053 | |
 | inc   | 0x0054 | |
@@ -150,5 +150,50 @@ supported tokens are listed below
 | -     | 0x0063 | |
 | *     | 0x0064 | |
 | /     | 0x0065 | |
+| [     | 0x10XX | open bracket with `XX` bytes until the closing bracket |
 |       | 0xFEXX | token terminator byte as token, where `XX` is the byte |
 |       | 0xFFFF | unrecognised token |
+
+### example program
+
+#### program in assembly
+
+this program doesn't do anything useful, it's just a test
+
+```nasm
+xor eax, eax
+inc rax
+mov [ rax ], rdx
+hlt
+
+```
+
+#### tokenization
+
+```nasm
+0x0053 ; xor
+0xFE20 ; space
+0x0010 ; eax
+0xFE2C ; comma
+0xFE20 ; space
+0x0010 ; eax
+0xFE0A ; newline
+0x0054 ; inc
+0xFE20 ; space
+0x0000 ; rax
+0xFE0A ; newline
+0x0056 ; mov
+0xFE20 ; space
+0x1004 ; open bracket (4)
+0xFE20 ; space         |1
+0x0000 ; rax           |2
+0xFE20 ; space         |3
+0x0052 ; close bracket |4
+0xFE2C ; comma
+0xFE20 ; space
+0x0003 ; rdx
+0xFE0A ; newline
+0x004F ; hlt
+0xFE0A ; newline
+0xFE00 ; null terminator
+```
diff --git a/twasm/asm/main.asm b/twasm/asm/main.asm
index 73abda4..dce7bed 100644
--- a/twasm/asm/main.asm
+++ b/twasm/asm/main.asm
@@ -76,28 +76,60 @@ tokenise:
     ; deal with terminator character (reported as 0 length token)
     cmp rdx, 0
     je .token_length0
-    jne .continue
+    jne .continue0
 
   .token_length0:
     mov ax, 0xFE00 ; terminator character
     mov al, [rdi]  ; byte of terminator
     mov edx, 1     ; byte length is 1
 
-  .continue:
+  .continue0:
     add rdi, rdx ; current byte + length of token = next unread byte
 
     mov [TOKEN_TABLE_ADDR + rcx * TOKEN_TABLE_ENTRY_SIZE], ax ; fill next entry
                                                                ; in token table
+
+    ; TODO fix undefined behaviour when open brackets and closed brackets aren't
+    ; correctly paired or have too much distance between them
+    cmp ax, 0x0051             ; check if read token is an open bracket
+    je .open_bracket           ; if so, handle it
+    jne .continue_open_bracket ; if not, continue
+
+  .open_bracket:
+    ; TODO make brackets able to hold more
+    mov [.data_open_bracket], cl ; record which entry the open bracket is at
+
+  .continue_open_bracket:
+    cmp ax, 0x0052              ; check if read token is a closing bracket
+    je .close_bracket           ; if so, handle it
+    jne .continue_close_bracket ; if not, continue
+
+  .close_bracket:
+    ; rewrite open bracket token entry with a filled out one
+    mov dl, [.data_open_bracket]
+    sub cl, dl
+    mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl
+    mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10
+    add cl, dl
+
+  .continue_close_bracket:
     inc rcx ; +1 token processed
     jmp .loop
   .break:
     ret
 
+  .data_open_bracket db 0x00 ; represents the token # of the latest open bracket
+
 ; ------------------------------------------------------------------------------
 ; identify_token
 ;
 ; description:
-; returns the id of a given token
+; returns the id of a given token. If there are multiple ways to represent a
+; given token, like the open-bracket, it returns the one that doesn't require
+; information about the surrounding tokens, because it has no such information.
+; In other words, if it isn't in the `tokens` data structure, this function
+; doesn't see it. If the first byte of the token points to a terminator
+; byte, this function returns it as an unrecognised token.
 ;
 ; parameters:
 ; rdi -> first byte of token
@@ -122,6 +154,7 @@ identify_token:
 
   jmp .unrecognised ; else unrecognised
 
+  ; length1
   .start_length1:
     mov rcx, tokens.length1 ; rcx -> list of known tokens
 
@@ -142,6 +175,7 @@ identify_token:
     mov ax, [rcx + 1] ; return id of token
     ret
 
+  ; length2
   .start_length2:
     mov rcx, tokens.length2 ; rcx -> list of known tokens
 
@@ -162,6 +196,7 @@ identify_token:
     mov ax, [rcx + 2] ; return id of token
     ret
 
+  ; length3
   .start_length3:
     mov rcx, tokens.length3 ; rcx -> list of known tokens
 
@@ -187,6 +222,7 @@ identify_token:
     mov ax, [rcx + 3] ; return id of token
     ret
 
+  ; length4
   .start_length4:
     mov rcx, tokens.length4 ; rcx -> list of known tokens
 
@@ -215,7 +251,8 @@ identify_token:
 ; ------------------------------------------------------------------------------
 ; identify_next_token
 ; description:
-; like identify_token, except it automatically finds the length
+; like identify_token, except it automatically finds the length. If the first
+; byte of the token points to a terminator byte, it returns a length of 0.
 ;
 ; parameters:
 ; rdi -> first byte of token
@@ -1062,9 +1099,11 @@ token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00
 
 debug_string db "debug_string", 0x0A, 0x00
 
+; test program
 program:
   db "xor eax, eax", 0x0A
   db "inc rax", 0x0A
+  db "mov [ rax ], rdx", 0x0A
   db "hlt", 0x0A
-  db 0x00
+  db 0x00 ; just for the sake of being able to print it, I made it a string
   .size db $ - program - 1