Compare commits

...

27 Commits

Author SHA1 Message Date
andromeda
869420ef7a fix some bugs, work on assembler 2026-03-09 11:00:59 +01:00
andromeda
33710a8ebe work on metadata system, put tests in seperate file 2026-03-09 10:08:19 +01:00
andromeda
0b7526661c clear up internal data structures, add to README 2026-03-08 16:03:24 +01:00
andromeda
76e9cc4cd7 add resources to README 2026-03-08 13:53:05 +01:00
andromeda
002100bd70 add resources to README 2026-03-08 13:47:12 +01:00
andromeda
e10d771743 parse brackets, improve docs 2026-03-08 12:35:14 +01:00
andromeda
172566dfe3 remove line feed characters xD 2026-03-08 11:23:46 +01:00
andromeda
f4cadcfff9 change token table entries, remove temporary file :shame: 2026-03-08 11:15:05 +01:00
andromeda
d90c39b6bc clarify readme 2026-03-08 11:04:42 +01:00
andromeda
0d66e77976 get tokenising working a bit :p also some fixes and semantics 2026-03-08 10:56:20 +01:00
andromeda
63e3a1ea7e add some tokens, length1 token support 2026-03-07 21:33:28 +01:00
andromeda
19a3e4ff5b little optimisation, add single-token parsing function 2026-03-07 20:57:19 +01:00
andromeda
50964e945e encode register tokens 2026-03-07 16:44:40 +01:00
andromeda
46bdc91966 clear tables 2026-03-07 15:51:51 +01:00
andromeda
7df3d25727 design token work 2026-03-07 15:43:41 +01:00
andromeda
30a7b5cb34 add elemb, improve token checking 2026-03-07 12:43:55 +01:00
andromeda
e1822da600 load 16 sectors instead of 1 2026-03-06 23:02:09 +01:00
andromeda
444e85e30c clarify doc 2026-03-06 22:36:47 +01:00
andromeda
00be2cc545 reformat, add copy_token and test, fix bug 2026-03-06 22:16:26 +01:00
andromeda
e5c715d54f change rdme 2026-03-06 20:56:03 +01:00
andromeda
42003df415 merge new changes 2026-03-06 20:38:32 +01:00
andromeda
9c65697dd8 bunch of stuff idek 2026-03-06 20:33:51 +01:00
andromeda
d35463e195 reorganise 2026-03-06 16:53:18 +01:00
andromeda
fdf5bb9daf remove redundant compile flag 2026-03-05 22:26:54 +01:00
andromeda
0d739522a0 copy libs also 2026-03-05 21:08:44 +01:00
andromeda
846c54653b change dev env, format rust 2026-03-05 21:00:18 +01:00
andromeda
f52da82650 add footer 2026-03-05 20:48:30 +01:00
21 changed files with 1888 additions and 112 deletions

View File

@@ -2,50 +2,18 @@ Call me Terry Davis because... actually please don't. I have visions: aspiration
# bootle
hobby kernel written in rust. It's just for playing around... for now :p
hobby kernel
run with `nix run git+https://git.mtgmonkey.net/andromeda/bootler#bootle`
status: basically nothing, come back later
# bootler
hobby bootloader written in asm. It's just for playing around... for now :p
hobby 1-stage legacy mode bootloader
run with `nix run git+https://git.mtgmonkey.net/andromeda/bootler#bootler`
status: gets to long mode, loads+jumps to kernel, starts idt and gdt... :)
### memory map
# twasm
```
+------ 0x00100000 ------+
| hardware, bios stuff |
+------ 0x00080000 ------+
| |
| |
+------ 0x00010200 ------+
| x86_64 kernel |
+------ 0x00010000 ------+
| |
| |
+------ 0x00009000 ------+
| IDT |
+------ 0x00008000 ------+
| |
| |
+------ 0x00007E00 ------+
| bootloader (boot.asm) |
+------ 0x00007C00 ------+
| stack | TODO get real stack
+------ 0x00005000 ------+
| PT |
+------ 0x00004000 ------+
| PDT |
+------ 0x00003000 ------+
| PDPT |
+------ 0x00002000 ------+
| PML4T |
+------ 0x00001000 ------+
| |
| |
+------ 0x00000500 ------+
| bios stuff |
+------ 0x00000000 ------+
```
hobby self-hosted assembler
status: basically nothing, come back later

View File

28
bootle/README.md Normal file
View File

@@ -0,0 +1,28 @@
# bootle
hobby kernel written in rust. It's just for playing around... for now :p
run with `nix run git+https://git.mtgmonkey.net/andromeda/bootler#bootle`
### memory map
```
+------ 0x00100000 ------+
| hardware, bios stuff |
+------ 0x00080000 ------+
| |
| |
+------ 0x00010200 ------+
| kernel |
+------ 0x00010000 ------+
| bootloader stuff |
| includes stack, gdt, |
| idt for the time being |
+------ 0x00000500 ------+
| bios stuff |
+------ 0x00000000 ------+
```
---
this project follows [Common Changelog](https://common-changelog.org) guidelines

View File

@@ -6,27 +6,28 @@
qemu,
...
}: let
rust-toolchain = rust-bin.fromRustupToolchainFile ../../rust-toolchain.toml;
rust-toolchain = rust-bin.fromRustupToolchainFile ./rust-toolchain.toml;
naersk' = callPackage naersk {
cargo = rust-toolchain;
rustc = rust-toolchain;
clippy = rust-toolchain;
};
in (naersk'.buildPackage {
src = ../../.;
src = ./.;
# deps for rust-src
additionalCargoLock = "${rust-toolchain.availableComponents.rust-src}/lib/rustlib/src/rust/library/Cargo.lock";
# just library build
copyBins = false;
copyLibs = true;
release = true;
# build std
cargoBuildOptions = x:
x
++ [
"-Zbuild-std=core,compiler_builtins"
"-Zbuild-std"
];
postInstall = ''
@@ -36,7 +37,7 @@ in (naersk'.buildPackage {
-e _start \
target/x86_64-unknown-none/release/libbootle.a
dd if=/dev/zero of=disk bs=512 count=2
dd if=/dev/zero of=disk bs=512 count=2880
dd if=${bootler}/bin/boot.bin of=disk conv=notrunc
dd if=kernel.bin of=disk bs=512 seek=1 conv=notrunc

View File

@@ -12,12 +12,14 @@ pub extern "C" fn _start() -> ! {
fn print_serial(s: &str) {
let mut bytes = s.bytes();
while let Some(b) = bytes.next() {
unsafe {core::arch::asm!(
unsafe {
core::arch::asm!(
"out dx, al"
, in("al") b
)};
)
};
}
}
fn println_serial(s: &str) {
print_serial(s);
@@ -28,7 +30,11 @@ fn welcome_serial() {
print_serial(ANSI_PINK);
println_serial("\nWelcome to Bootle OS");
println_serial("All code GPL licensed and freely available on git.mtgmonkey.net");
print_serial("Enjoy your time! Press "); print_serial(ANSI_RED); print_serial("ctrl+a x"); print_serial(ANSI_PINK); println_serial(" to escape Qemu");
print_serial("Enjoy your time! Press ");
print_serial(ANSI_RED);
print_serial("ctrl+a x");
print_serial(ANSI_PINK);
println_serial(" to escape Qemu");
print_serial(ANSI_CLEAR);
}
@@ -39,9 +45,7 @@ fn panic(_: &PanicInfo) -> ! {
}
fn halt() -> ! {
unsafe {core::arch::asm!(
"hlt"
)};
unsafe { core::arch::asm!("hlt") };
halt()
}

7
bootler/CHANGELOG.md Normal file
View File

@@ -0,0 +1,7 @@
# Changelog
## [0.1.0] - 2026-03-06
### Added
- initialised CHANGELOG.md

43
bootler/README.md Normal file
View File

@@ -0,0 +1,43 @@
# bootler
hobby bootloader, 1-stage, legacy mode :p
run with `nix run git+https://git.mtgmonkey.net/andromeda/bootler#bootler`
### memory map
```
. .
: :
| longmode kernel |
+------ 0x00010000 ------+
| |
| |
+------ 0x00009000 ------+
| IDT |
+------ 0x00008000 ------+
| |
| |
+------ 0x00007E00 ------+
| bootloader (boot.asm) |
+------ 0x00007C00 ------+
| stack |
+------ 0x00005000 ------+
| PT |
+------ 0x00004000 ------+
| PDT |
+------ 0x00003000 ------+
| PDPT |
+------ 0x00002000 ------+
| PML4T |
+------ 0x00001000 ------+
| |
| |
+------ 0x00000500 ------+
| bios stuff |
+------ 0x00000000 ------+
```
---
this project follows [Common Changelog](https://common-changelog.org) guidelines

View File

@@ -6,7 +6,7 @@
LOAD_ADDR equ 0x7C00
KERNEL_START equ 2 ; first sector on disk to load kernel from; 1 indexed
KERNEL_SIZE equ 1 ; length of kernel in sectors
KERNEL_SIZE equ 16 ; length of kernel in sectors
KERNEL_LOAD_ADDR_ES equ 0x1000 ; kernel to be loaded at es * 0x10 + 0x0000
PAGE_TABLE_LOAD_ADDR equ 0x1000 ; start of page table; 4 * pt size

View File

@@ -9,19 +9,18 @@ in
stdenv.mkDerivation {
pname = "bootler";
version = "0.1.0";
src = ../../.;
src = ./.;
buildPhase = ''
${nasm}/bin/nasm asm/boot.asm -o boot.bin
${nasm}/bin/nasm asm/kernel.asm -o dummy.bin
dd if=/dev/zero of=${bootImg} bs=512 count=2
dd if=/dev/zero of=${bootImg} bs=512 count=2880
dd if=boot.bin of=${bootImg} conv=notrunc
dd if=dummy.bin of=${bootImg} bs=512 seek=1 conv=notrunc
'';
installPhase = ''
mkdir -p $out/bin
cp boot.bin $out/bin
cp dummy.bin $out/bin
cp ${bootImg} $out/bin
# create emulation binary

View File

@@ -18,20 +18,25 @@
...
}: let
system = "x86_64-linux";
pkgs = import nixpkgs {
inherit system;
overlays = [(import rust-overlay)];
};
pkgs = nixpkgs.legacyPackages.${system};
pkgsWithRustOverlay = pkgs.extend (import rust-overlay);
in {
packages.${system} = {
bootler = pkgs.callPackage ./nix/pkgs/bootler.nix {};
bootle = pkgs.callPackage ./nix/pkgs/bootle.nix {
naersk = naersk;
bootler = pkgs.callPackage ./bootler/package.nix {};
bootle = pkgsWithRustOverlay.callPackage ./bootle/package.nix {
inherit naersk;
bootler = self.packages.${system}.bootler;
};
twasm = pkgs.callPackage ./twasm/package.nix {
bootler = self.packages.${system}.bootler;
};
};
devShells.${system}.default = pkgs.mkShell {
inputsFrom = [self.packages.${system}.default];
inputsFrom = [
self.packages.${system}.bootle
self.packages.${system}.bootler
self.packages.${system}.twasm
];
};
};
}

View File

@@ -1,47 +0,0 @@
{
stdenv,
nasm,
qemu,
...
}: let
bootImg = "boot";
in
stdenv.mkDerivation {
pname = "bootler";
version = "0.1.0";
src = ../.;
buildPhase = ''
${nasm}/bin/nasm asm/boot.asm -o boot.bin
${nasm}/bin/nasm asm/kernel.asm -o kernel.bin
dd if=/dev/zero of=${bootImg} bs=512 count=2
dd if=boot.bin of=${bootImg} conv=notrunc
dd if=kernel.bin of=${bootImg} bs=512 seek=1 conv=notrunc
'';
installPhase = ''
mkdir -p $out/bin
cp ${bootImg} $out/bin
# create emulation binary
cat<<EOF>$out/bin/bootler
#!/usr/bin/env bash
# create temp dir
mkdir -p ./.bootler
cp $(echo $out)/bin/${bootImg} ./.bootler/${bootImg}
chmod a+w ./.bootler/${bootImg}
# run image
${qemu}/bin/qemu-system-x86_64 \
-nographic \
-drive file=./.bootler/${bootImg},format=raw,index=0,media=disk
# clean up
rm ./.bootler -r
EOF
chmod +x $out/bin/${bootImg}
chmod +x $out/bin/bootler
'';
}

326
twasm/README.md Normal file
View File

@@ -0,0 +1,326 @@
# twasm
this will be a self hosted, very minimal subset of nasm-style 64 bit asm
### goals
I want to compile Bootler and Twasm with the Twasm assembler
### reading
- [instructions](https://www.felixcloutier.com/x86/)
- [opcodes,ModR/M,SIB](http://ref.x86asm.net/coder64.html) (no secure site available)
- [calling conventions](https://wiki.osdev.org/Calling_Conventions); I try to use System V
### memory map
```
+------ 0x00100000 ------+
| hardware, bios stuff |
+------ 0x00080000 ------+
| output binary |
+------ 0x00070000 ------+
| token table |
+------ 0x00060000 ------+
| test arena |
+------ 0x00050000 ------+
| stack (rsp) |
+------------------------+
| input |
+------------------------+
| assembler |
+------ 0x00010000 ------+
| bootloader, bios, etc. |
+------------------------+
```
each word represents a token on the token table.
#### token table (TT)
each token gets loaded into the token table with the following form:
```
+----------+
| 15 0 |
+----------+
| token id |
+----------+
```
### internal data structures
#### `tokens.by_nameX`
contains all tokens of that length followed by their ID. For some non-empty `tokens.by_nameX`, it is true that `tokens.by_name<X+1> - tokens.by_nameX` is the size in bytes of `tokens.by_nameX`.
each entry is in the following form:
```
+----------+--------------------------------+
|[2 bytes] | 8 * token_length - 1 0 |
+----------+--------------------------------+
| token ID | string without null terminator |
+----------+--------------------------------+
```
example implementation:
```nasm
tokens:
.by_name1:
db "+"
dw 0x0062
db "-"
dw 0x0063
.by_name2:
db "r8"
dw 0x0008
.by_name3: ; this is required for futureproofness; the caller can use this to
; find the size of tokens.by_name2
```
#### `tokens.by_id`
contains some tokens with their metadata. Some tokens have embedded information (`0x10XX` for instance). Those will not have entries in this table, being handled instead inside the assemble function itself.
metadata about some tokens in the following form:
```
+----------------+----------+-------+----------+
| 31 24 | 23 20 | 19 16 | 15 0 |
+----------------+----------+-------+----------+
| typed metadata | reserved | type | token ID |
+----------------+----------+-------+----------+
```
the `type` hex digit is defined as the following:
| hex | meaning | examples |
|-----|----------|-|
| 0x0 | ignored | `; this entire comment is 1 token` |
| 0x1 | operator | `mov`, `hlt` |
| 0x2 | register | `rsp`, `al` |
| 0xF | unknown | any token ID not represented in the lookup table |
type metadata for the different types is as follows:
```
+----------+
| type 0x0 |
+----------+
| 31 24 |
+----------+
| reserved |
+----------+
```
```
+-------------------------------+
| type 0x1 |
+----------+--------------------+
| 31 26 | 25 24 |
+----------+--------------------+
| reserved | number of operands |
+----------+--------------------+
```
```
+------------------+
| type 0x2 |
+----------+-------+
| 31 26 | 25 24 |
+----------+-------+
| reserved | width |
+----------+-------+
; width:
00b ; 8 bit
01b ; 16 bit
10b ; 32 bit
11b ; 64 bit
```
### token IDs
supported tokens are listed below
| token | id | notes |
|-------|--------|-|
| rax | 0x0000 | |
| rbx | 0x0001 | |
| rcx | 0x0002 | |
| rdx | 0x0003 | |
| rsi | 0x0004 | |
| rdi | 0x0005 | |
| rsp | 0x0006 | |
| rbp | 0x0007 | |
| r8 | 0x0008 | |
| r9 | 0x0009 | |
| r10 | 0x000A | |
| r11 | 0x000B | |
| r12 | 0x000C | |
| r13 | 0x000D | |
| r14 | 0x000E | |
| r15 | 0x000F | |
| eax | 0x0010 | |
| ebx | 0x0011 | |
| ecx | 0x0012 | |
| edx | 0x0013 | |
| esi | 0x0014 | |
| edi | 0x0015 | |
| esp | 0x0016 | |
| ebp | 0x0017 | |
| r8d | 0x0018 | |
| r9d | 0x0019 | |
| r10d | 0x001A | |
| r11d | 0x001B | |
| r12d | 0x001C | |
| r13d | 0x001D | |
| r14d | 0x001E | |
| r15d | 0x001F | |
| ax | 0x0020 | |
| bx | 0x0021 | |
| cx | 0x0022 | |
| dx | 0x0023 | |
| si | 0x0024 | |
| di | 0x0025 | |
| sp | 0x0026 | |
| bp | 0x0027 | |
| r8w | 0x0028 | |
| r9w | 0x0029 | |
| r10w | 0x002A | |
| r11w | 0x002B | |
| r12w | 0x002C | |
| r13w | 0x002D | |
| r14w | 0x002E | |
| r15w | 0x002F | |
| al | 0x0030 | |
| bl | 0x0031 | |
| cl | 0x0032 | |
| dl | 0x0033 | |
| sil | 0x0034 | |
| dil | 0x0035 | |
| spl | 0x0036 | |
| bpl | 0x0037 | |
| r8b | 0x0038 | |
| r9b | 0x0039 | |
| r10b | 0x003A | |
| r11b | 0x003B | |
| r12b | 0x003C | |
| r13b | 0x003D | |
| r14b | 0x003E | |
| r15b | 0x003F | |
| ah | 0x0040 | |
| bh | 0x0041 | |
| ch | 0x0042 | |
| dh | 0x0043 | |
| cs | 0x0044 | |
| ds | 0x0045 | |
| es | 0x0046 | |
| fs | 0x0047 | |
| gs | 0x0048 | |
| ss | 0x0049 | |
| cr0 | 0x004A | |
| cr2 | 0x004B | |
| cr3 | 0x004C | |
| cr4 | 0x004D | |
| cr8 | 0x004E | |
| hlt | 0x004F | |
| int3 | 0x0050 | |
| [ | 0x0051 | open bracket placeholder; 0x10XX should be used in contexts where the surrounding tokens can be known |
| ] | 0x0052 | |
| xor | 0x0053 | |
| inc | 0x0054 | |
| dec | 0x0055 | |
| mov | 0x0056 | |
| add | 0x0057 | |
| sub | 0x0058 | |
| call | 0x0059 | |
| ret | 0x005A | |
| cmp | 0x005B | |
| je | 0x005C | |
| jne | 0x005D | |
| jge | 0x005E | |
| jg | 0x005F | |
| jle | 0x0060 | |
| jl | 0x0061 | |
| + | 0x0062 | |
| - | 0x0063 | |
| * | 0x0064 | |
| / | 0x0065 | |
| [ | 0x10XX | open bracket with `XX` bytes until the closing bracket |
| | 0xFEXX | token terminator byte as token, where `XX` is the byte |
| | 0xFFFF | unrecognised token |
### example program
#### program in assembly
this program doesn't do anything useful, it's just a test
```nasm
xor eax, eax
inc rax
mov [ rax ], rdx
hlt
```
#### tokenization
```nasm
0x0053 ; xor
0xFE20 ; space
0x0010 ; eax
0xFE2C ; comma
0xFE20 ; space
0x0010 ; eax
0xFE0A ; newline
0x0054 ; inc
0xFE20 ; space
0x0000 ; rax
0xFE0A ; newline
0x0056 ; mov
0xFE20 ; space
0x1004 ; open bracket (4)
0xFE20 ; space |1
0x0000 ; rax |2
0xFE20 ; space |3
0x0052 ; close bracket |4
0xFE2C ; comma
0xFE20 ; space
0x0003 ; rdx
0xFE0A ; newline
0x004F ; hlt
0xFE0A ; newline
0xFE00 ; null terminator
```
#### nasm output with the above example program, bits 64
```nasm
0x31 ; XOR r/m16/32/64 r16/32/64
0xC0 ; ModR/M byte
; mod 11b ; directly address the following:
; reg 000b ; EAX
; r/m 000b ; EAX
0x48 ; 64 Bit Operand Size prefix
0xFF ; with `reg` from ModR/M byte 000b:
; INC r/m16/32/64
0xC0 ; ModR/M byte
; mod 11b ; direct addressing
; reg 000b ; RAX
; r/m 000b ; RAX
0x48 ; 64 Bit Operand Size prefix
0x89 ; MOV r/m16/32/64 r16/32/64
0x10 ; ModR/M byte
; mod 00b ; indirect addressing, no displacement
; reg 010b ; RDX
; r/m 000b ; [RAX]
0xF4 ; HLT
```

918
twasm/asm/main.asm Normal file
View File

@@ -0,0 +1,918 @@
; TODO actually enforce any of these *_SIZE constants :p
LOAD_ADDR equ 0x00010000 ; address this program is loaded at
TEST_ARENA_ADDR equ 0x00050000 ; address to run tests at
TEST_ARENA_SIZE equ 0x1000 ; maximum size tests can use
TOKEN_TABLE_ADDR equ 0x00060000 ; address the token table is loaded at
TOKEN_TABLE_SIZE equ 0x1000 ; max length of table
TOKEN_TABLE_ENTRY_SIZE equ 2 ; size of token table entry; things may break
; if this ever changes
OUTPUT_ADDR equ 0x00070000 ; address of outputed binary
OUTPUT_SIZE equ 0x1000 ; max length of outputed binary
STACK_ADDR equ 0x00060000 ; address to put the 64-bit stack at
UNRECOGNISED_TOKEN_ID equ 0xFFFF ; id of an unrecognised token
UNRECOGNISED_ID_TYPE equ 0x0F ; type of an unrecognised id
UNRECOGNISED_ID_METADATA equ 0xFF ; metadata of an unrecognised id
TEST_LINE_LENGTH equ 80 ; right border of test suite results
[bits 64]
[org LOAD_ADDR]
start:
mov rsp, STACK_ADDR ; we might need more stack space, let's just be safe
mov rsi, msg_welcome
call print
call run_tests
call clear_token_table
mov rdi, program ; -> program
mov rsi, [program.size] ; = size of program
call tokenise
; rax = number of tokens processed
mov rdi, rax
push rdi
call clear_output_arena
pop rdi
call assemble
jmp halt
; ------------------------------------------------------------------------------
; assembling
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; assemble
; TODO write testsr
; TODO make it work :/ putting the cart before the horse
;
; description:
; assembles the program from tokens located at TOKEN_TABLE_ADDR into a flat
; binary located at OUTPUT_ADDR. It's probably desirable to clear the output
; arena before calling this function.
;
; parameters:
; rdi = number of tokens in the token table
; ------------------------------------------------------------------------------
assemble:
xor rax, rax ; number of tokens processed
.loop:
cmp rax, rdi ; check incrementer against the number of tokens in the token
jge .break ; table. If overflown, break
push rdi
xor edi, edi
mov di, [rax * TOKEN_TABLE_ENTRY_SIZE + TOKEN_TABLE_ADDR] ; rdi = next tte
push rax
xor eax, eax
call get_tte_type
cmp ax, 0x01 ; check if it's an operator
je .operator
jne .continue_operator
.operator
push rsi
mov rsi, .msg_found_operator
call print
pop rsi
.continue_operator
cmp ax, 0x02 ; check if it's a register
je .register
jne .continue_register
.register
push rsi
mov rsi, .msg_found_register
call print
pop rsi
.continue_register
pop rax ; incrementer
pop rdi ; total number of tokens
inc rax ; move to next token
jmp .loop
.break:
ret
.msg_found_operator db "found operator", 0x0A, 0x00
.msg_found_register db "found register", 0x0A, 0x00
; ------------------------------------------------------------------------------
; get_tte_type
;
; description:
; given a token table entry, returns the declared type in `tokens.by_id`. If
; there is no entry, returns UNRECOGNISED_ID_TYPE
;
; parameters:
; di = token table entry
;
; returned:
; al = type of token, or UNRECOGNISED_ID_TYPE. The upper 4 bits of al are
; zeroed; the rest of rax is zeroed.
; ------------------------------------------------------------------------------
get_tte_type:
and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
.loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
jg .not_found
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
cmp cx, di
je .found
inc rax
jmp .loop
.not_found:
mov al, UNRECOGNISED_ID_TYPE
and ax, 0xF ; mask as expected
ret
.found:
mov al, [2 + tokens.by_id + rax * 4]
and ax, 0xF ; mask as expected
ret
; ------------------------------------------------------------------------------
; get_tte_typed_metadata
;
; description:
; given a token table entry, returns the declared typed metadata in
; `tokens.by_id`. If there is no entry, returns UNRECOGNISED_ID_METADATA
;
; parameters:
; di = token table entry
;
; returned:
; al = typed metadata of token, or UNRECOGNISED_ID_METADATA; the rest of rax is
; zeroed.
; ------------------------------------------------------------------------------
get_tte_typed_metadata:
and rdi, 0xFFFF ; mask input so it behaves as expected
xor eax, eax
.loop:
cmp rax, (tokens.by_id_end - tokens.by_id) / 4 ; make sure it's still in range
jg .not_found
mov cx, [tokens.by_id + rax * 4] ; next entry in tokens.by_id
cmp cx, di
je .found
inc rax
jmp .loop
.not_found:
xor eax, eax
mov al, UNRECOGNISED_ID_METADATA
ret
.found:
mov al, [3 + tokens.by_id + rax * 4]
ret
; ------------------------------------------------------------------------------
; tokenising
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; tokenise
; TODO write tests
;
; description:
; represents the program at the given address and puts it in the token table
; it's probably desirable to clear the token table before calling this function.
;
; parameters:
; rdi -> first byte of program
; rsi = size of program in bytes
;
; returned:
; rax = number of tokens processed
; ------------------------------------------------------------------------------
tokenise:
add rsi, rdi ; last byte of program
xor ecx, ecx ; number of tokens processed
.loop:
cmp rdi, rsi ; if current byte greater than last byte
jg .break ; then break
push rdi
push rsi
push rcx
; rdi -> current byte
call identify_next_token
; ax = id of token
; dx = length of token
pop rcx
pop rsi
pop rdi
; deal with terminator character (reported as 0 length token)
cmp rdx, 0
je .token_length0
jne .continue0
.token_length0:
mov ax, 0xFE00 ; terminator character
mov al, [rdi] ; byte of terminator
mov edx, 1 ; byte length is 1
.continue0:
add rdi, rdx ; current byte + length of token = next unread byte
mov [TOKEN_TABLE_ADDR + rcx * TOKEN_TABLE_ENTRY_SIZE], ax ; fill next entry
; in token table
; TODO fix undefined behaviour when open brackets and closed brackets aren't
; correctly paired or have too much distance between them
cmp ax, 0x0051 ; check if read token is an open bracket
je .open_bracket ; if so, handle it
jne .continue_open_bracket ; if not, continue
.open_bracket:
; TODO make brackets able to hold more
mov [.data_open_bracket], cl ; record which entry the open bracket is at
.continue_open_bracket:
cmp ax, 0x0052 ; check if read token is a closing bracket
je .close_bracket ; if so, handle it
jne .continue_close_bracket ; if not, continue
.close_bracket:
; rewrite open bracket token entry with a filled out one
push rcx
mov dl, [.data_open_bracket]
sub cl, dl
mov byte [TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], cl
mov byte [1 + TOKEN_TABLE_ADDR + rdx * TOKEN_TABLE_ENTRY_SIZE], 0x10
pop rcx
.continue_close_bracket:
inc rcx ; +1 token processed
jmp .loop
.break:
mov rax, rcx
ret
.data_open_bracket db 0x00 ; represents the token # of the latest open bracket
; ------------------------------------------------------------------------------
; identify_token
;
; description:
; returns the id of a given token. If there are multiple ways to represent a
; given token, like the open-bracket, it returns the one that doesn't require
; information about the surrounding tokens, because it has no such information.
; In other words, if it isn't in the `tokens` data structure, this function
; doesn't see it. If the first byte of the token points to a terminator
; byte, this function returns it as an unrecognised token.
;
; parameters:
; rdi -> first byte of token
; rsi = size of token in bytes
;
; returned:
; ax = id of token; the rest of rax is zeroed
; ------------------------------------------------------------------------------
identify_token:
cmp rsi, 1 ; if the token has length 1
je .start_length1 ; then enter the length 1 loop
cmp rsi, 2 ; if the token has length 2
je .start_length2 ; then enter the length 2 loop
cmp rsi, 3 ; if the token has length 3
je .start_length3 ; then enter the length 3 loop
cmp rsi, 4 ; if the token has length 4
je .start_length4 ; then enter the length 4 loop
jmp .unrecognised ; else unrecognised
; length1
.start_length1:
mov rcx, tokens.by_name_1 ; rcx -> list of known tokens
.loop_length1:
cmp rcx, tokens.by_name_2 ; check if rcx still in the bounds of length1 tokens
jge .unrecognised ; if not, unrecognised
mov r10b, [rcx] ; known token
mov r11b, [rdi] ; token
cmp r10b, r11b ; if known token matches token
je .found_length1 ; exit loop
add rcx, 3 ; length of token + length of id
jmp .loop_length1
.found_length1:
xor eax, eax ; make sure rest of rax is zeroed
mov ax, [rcx + 1] ; return id of token
ret
; length2
.start_length2:
mov rcx, tokens.by_name_2 ; rcx -> list of known tokens
.loop_length2:
cmp rcx, tokens.by_name_3 ; check if rcx still in the bounds of length2 tokens
jge .unrecognised ; if not, unrecognised
mov r10w, [rcx] ; current entry in known tokens
mov r11w, [rdi] ; token
cmp r10w, r11w ; if current entry matches token,
je .found_length2 ; exit loop
add rcx, 4 ; length of token + length of id
jmp .loop_length2
.found_length2:
xor eax, eax ; make sure rest of rax is zeroed
mov ax, [rcx + 2] ; return id of token
ret
; length3
.start_length3:
mov rcx, tokens.by_name_3 ; rcx -> list of known tokens
.loop_length3:
cmp rcx, tokens.by_name_4 ; check if rcx still in bounds of length3 tokens
jge .unrecognised ; if not, unrecognised
; TODO make this safe (it overreaches 1 byte)
mov r10d, [rcx] ; known token + next byte
mov r11d, [rdi] ; token + next byte
and r10d, 0x00FFFFFF ; mask for just the token
and r11d, 0x00FFFFFF
cmp r10d, r11d ; if known token matches token,
je .found_length3 ; exit loop
add rcx, 5 ; length of token + length of id
jmp .loop_length3
.found_length3:
xor rax, rax ; zero rax
mov ax, [rcx + 3] ; return id of token
ret
; length4
.start_length4:
mov rcx, tokens.by_name_4 ; rcx -> list of known tokens
.loop_length4:
cmp rcx, tokens.by_name_5 ; check if rcx still in bounds of length3 tokens
jge .unrecognised ; if not, unrecognised
mov r10d, [rcx] ; known token
mov r11d, [rdi] ; token
cmp r10d, r11d ; if known token matches token,
je .found_length4 ; exit loop
add rcx, 6 ; length of token + length of id
jmp .loop_length4
.found_length4:
xor rax, rax ; zero rax
mov ax, [rcx + 4] ; return id of token
ret
.unrecognised:
xor eax, eax
mov ax, UNRECOGNISED_TOKEN_ID
ret
; ------------------------------------------------------------------------------
; identify_next_token
; description:
; like identify_token, except it automatically finds the length. If the first
; byte of the token points to a terminator byte, it returns a length of 0.
;
; parameters:
; rdi -> first byte of token
;
; returned:
; ax = id of token; the rest of rax is zeroed
; dx = length of token in bytes; the rest of rdx is zeroed
; ------------------------------------------------------------------------------
identify_next_token:
push rdi
mov rsi, rdi ; rsi is the current byte
xor rdi, rdi ; rdi is the length
.loop:
xor edx, edx
mov dl, [rsi]
push rsi
push rdi
push rdx
mov rdi, 8 ; length of terminator list
mov rsi, token_terminator_8 ; start of terminator list
call elemb
pop rdx
pop rdi
pop rsi
cmp rax, 1 ; check if the next character is a token terminator
je .break ; if so, break
inc rdi ; next character
inc rsi ; next byte of token
jmp .loop
.break:
mov rsi, rdi ; length of token
pop rdi
push rsi
call identify_token
pop rsi
mov rdx, rsi ; length
ret
; ------------------------------------------------------------------------------
; copy_token
;
; description:
; copies a token from one spot in memory to another
;
; parameters:
; rdi -> start of buffer to be read
; rsi -> start of buffer to be written
;
; returned:
; rax -> last byte read
; rdx -> last byte written
; ------------------------------------------------------------------------------
copy_token:
.loop:
mov dl, [rdi] ; move bit to compare to current byte in read buffer
push rdi ; push incrementors to call elemb
push rsi ;
mov rdi, 8 ; length of terminator list
mov rsi, token_terminator_8 ; start of terminator list
; dl set before pushing rdi
call elemb
pop rsi ;
pop rdi ; pop incrementors after call
cmp rax, 1 ; check if the next character is a token terminator
je .break ; > if so, break the function
; rdi and rsi set from previous loop iteration
call copy_byte ; if not, copy the current byte in read buffer
inc rdi ; read pointer
inc rsi ; write pointer
jmp .loop
.break:
mov rax, rdi ; -> last byte read
mov rdx, rsi ; -> last byte written
ret
; ------------------------------------------------------------------------------
; copy_byte
;
; description:
; copies a byte from one spot in memory to another
;
; parameters:
; rdi -> word to be read
; rsi -> word to be written
;
; returned:
; al = byte that was read; the rest of rax is zeroed
; ------------------------------------------------------------------------------
copy_byte:
xor eax, eax ; zero out so it returns fine
mov al, [rdi]
mov [rsi], al
ret
; ------------------------------------------------------------------------------
; utilities
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; print
;
; description:
; prints a null-terminated string
; probably doesn't change any registers for ease of debugging
;
; parameters:
; rsi -> start of null-terminated string
; ------------------------------------------------------------------------------
print:
push rdx
push rax
push rsi
mov edx, 0x3F8
.loop:
mov al, [rsi]
test al, al
jz .done
out dx, al
inc rsi
jmp .loop
.done:
pop rsi
pop rax
pop rdx
ret
; ------------------------------------------------------------------------------
; halt
;
; description:
; halts the program, silly :)
; ------------------------------------------------------------------------------
halt:
mov rsi, msg_halt
call print
hlt
jmp halt
; ------------------------------------------------------------------------------
; elemb
;
; description:
; checks if given byte is element of the specified list
;
; parameters:
; rdi = size of list
; rsi -> start of list
; dl = given byte
;
; returned:
; rax = 0: is not an element
; 1: is an element
; ------------------------------------------------------------------------------
elemb:
.loop:
cmp rdi, 0 ; check if remaining length 0
je .not_found ; if so, break; dl not an element of list
mov al, [rsi]
cmp al, dl ; check if current byte in list is the desired byte
je .found ; if so, break; dl an element of list
inc rsi ; move to next byte
dec rdi ; and reduce remaining length
jmp .loop
.not_found:
xor eax, eax ; return 0; dl not an element of list
ret
.found:
xor eax, eax
mov rax, 1 ; return 1; dl an element of list
ret
.f db "found", 0x0A, 0x00
.nf db "not found", 0x0A, 0x00
; ------------------------------------------------------------------------------
; clear_token_table
;
; description:
; clears the token table as specified by TOKEN_TABLE_SIZE and TOKEN_TABLE_ADDR
; ------------------------------------------------------------------------------
clear_token_table:
xor eax, eax ; value to write
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
mov rdi, TOKEN_TABLE_ADDR ; address to start
rep stosd
ret
; ------------------------------------------------------------------------------
; clear_test_arena
;
; description:
; clears the test arena as specified by TEST_ARENA_SIZE and TEST_ARENA_ADDR
; ------------------------------------------------------------------------------
clear_test_arena:
xor eax, eax ; value to write
mov rcx, TOKEN_TABLE_SIZE / 4 ; number of double words
mov rdi, TOKEN_TABLE_ADDR ; address to start
rep stosd
ret
; ------------------------------------------------------------------------------
; clear_output_arena
;
; description:
; clears the output arena as specified by OUTPUT_SIZE and OUTPUT_ADDR
; ------------------------------------------------------------------------------
clear_output_arena:
xor eax, eax ; value to write
mov rcx, OUTPUT_SIZE / 4 ; number of double words
mov rdi, OUTPUT_ADDR ; address to start
rep stosd
ret
%include "asm/tests.asm"
; ------------------------------------------------------------------------------
; data
; ------------------------------------------------------------------------------
tokens:
.by_name_1:
db "["
dw 0x0051
db "]"
dw 0x0052
db "+"
dw 0x0062
db "-"
dw 0x0063
db "*"
dw 0x0064
db "/"
dw 0x0065
.by_name_2:
db "r8"
dw 0x0008
db "r9"
dw 0x0009
db "ax"
dw 0x0020
db "bx"
dw 0x0021
db "cx"
dw 0x0022
db "dx"
dw 0x0023
db "si"
dw 0x0024
db "di"
dw 0x0025
db "sp"
dw 0x0026
db "bp"
dw 0x0027
db "al"
dw 0x0030
db "bl"
dw 0x0031
db "cl"
dw 0x0032
db "dl"
dw 0x0033
db "ah"
dw 0x0040
db "bh"
dw 0x0041
db "ch"
dw 0x0042
db "dh"
dw 0x0043
db "cs"
dw 0x0044
db "ds"
dw 0x0045
db "es"
dw 0x0046
db "fs"
dw 0x0047
db "gs"
dw 0x0048
db "ss"
dw 0x0049
db "je"
dw 0x005C
db "jg"
dw 0x005F
db "jl"
dw 0x0061
.by_name_3:
db "rax"
dw 0x0000
db "rbx"
dw 0x0001
db "rcx"
dw 0x0002
db "rdx"
dw 0x0003
db "rsi"
dw 0x0004
db "rdi"
dw 0x0005
db "rsp"
dw 0x0006
db "rbp"
dw 0x0007
db "r10"
dw 0x000A
db "r11"
dw 0x000B
db "r12"
dw 0x000C
db "r13"
dw 0x000D
db "r14"
dw 0x000E
db "r15"
dw 0x000F
db "eax"
dw 0x0010
db "ebx"
dw 0x0011
db "ecx"
dw 0x0012
db "edx"
dw 0x0013
db "esi"
dw 0x0014
db "edi"
dw 0x0015
db "esp"
dw 0x0016
db "ebp"
dw 0x0017
db "r8d"
dw 0x0018
db "r9d"
dw 0x0019
db "r8w"
dw 0x0028
db "r9w"
dw 0x0029
db "sil"
dw 0x0034
db "dil"
dw 0x0035
db "spl"
dw 0x0036
db "bpl"
dw 0x0037
db "r8b"
dw 0x0038
db "r9b"
dw 0x0039
db "cr0"
dw 0x004A
db "cr2"
dw 0x004B
db "cr3"
dw 0x004C
db "cr4"
dw 0x004D
db "cr8"
dw 0x004E
db "hlt"
dw 0x004F
db "xor"
dw 0x0053
db "inc"
dw 0x0054
db "dec"
dw 0x0055
db "mov"
dw 0x0056
db "add"
dw 0x0057
db "sub"
dw 0x0058
db "ret"
dw 0x005A
db "cmp"
dw 0x005B
db "jne"
dw 0x005D
db "jge"
dw 0x005E
db "jle"
dw 0x0060
.by_name_4:
db "r10d"
dw 0x001A
db "r11d"
dw 0x001B
db "r12d"
dw 0x001C
db "r13d"
dw 0x001D
db "r14d"
dw 0x001E
db "r15d"
dw 0x001F
db "r10w"
dw 0x002A
db "r11w"
dw 0x002B
db "r12w"
dw 0x002C
db "r13w"
dw 0x002D
db "r14w"
dw 0x002E
db "r15w"
dw 0x002F
db "r10b"
dw 0x003A
db "r11b"
dw 0x003B
db "r12b"
dw 0x003C
db "r13b"
dw 0x003D
db "r14b"
dw 0x003E
db "r15b"
dw 0x003F
db "int3"
dw 0x0050
db "call"
dw 0x0059
.by_name_5:
.by_id:
dw 0x0053 ; xor
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0010 ; eax
db 0x02 ; type: register
db 0x02 ; width: 32 bit
dw 0x0054 ; inc
db 0x01 ; type: operator
db 0x01 ; # operands
dw 0x0000 ; rax
db 0x02 ; type: register
db 0x03 ; width: 64 bit
dw 0x0056 ; mov
db 0x01 ; type: operator
db 0x02 ; # operands
dw 0x0003 ; rdx
db 0x02 ; type: register
db 0x03 ; width: 64 bit
dw 0x004F ; hlt
db 0x01 ; type: operator
db 0x00 ; # operands
.by_id_end:
msg_welcome db "Welcome to Twasm", 0x0A, 0x00
msg_halt db "halted.", 0x0A, 0x00
token_terminator_8 db 0x00, " ", 0x0A, 0x0D, ",", 0x00, 0x00, 0x00
debug_string db "debug_string", 0x0A, 0x00
; test program
program:
db "xor eax, eax", 0x0A
db "inc rax", 0x0A
db "mov [ rax ], rdx", 0x0A
db "hlt", 0x0A
db 0x00 ; just for the sake of being able to print it, I made it a string
.size db $ - program - 1

482
twasm/asm/tests.asm Normal file
View File

@@ -0,0 +1,482 @@
; ------------------------------------------------------------------------------
; tests
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; run_tests
;
; description:
; runs all tests
; ------------------------------------------------------------------------------
run_tests:
mov rsi, .msg
call print
call clear_test_arena
call test_copy_byte
call clear_test_arena
call test_copy_token
call clear_test_arena
call test_elemb
call clear_test_arena
call test_identify_token
call clear_test_arena
call test_identify_next_token
call clear_test_arena
call test_get_tte_type
call clear_test_arena
call test_get_tte_typed_metadata
ret
.msg db "running test suite...", 0x0A, 0x00
; ------------------------------------------------------------------------------
; test_copy_byte
;
; description:
; tests copy_byte described functionality
; ------------------------------------------------------------------------------
test_copy_byte:
mov rsi, .msg
call print
mov rdi, test_byte ; byte to be copied
mov rsi, TEST_ARENA_ADDR ; location of test
call copy_byte
mov cx, [rsi]
and ax, 0xFF ; only compare bottom byte
and cx, 0xFF
cmp ax, cx ; compare returned byte to copied byte
jne .fail
cmp al, [test_byte] ; compare returned byte to expected byte
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_copy_byte...", 0x00
; ------------------------------------------------------------------------------
; test_copy_token
;
; description:
; tests copy_token described functionality
; ------------------------------------------------------------------------------
test_copy_token:
mov rsi, .msg
call print
; test case: space terminated
mov rdi, test_token_space ; read buffer
mov rsi, TEST_ARENA_ADDR ; write buffer
call copy_token
; check reported final indicies with the expected final indicies
cmp rax, test_token_space + 8 ; last byte read
jne .fail
cmp rdx, TEST_ARENA_ADDR + 8 ; last byte written
jne .fail
mov rsi, TEST_ARENA_ADDR
mov rcx, [rsi]
cmp rcx, [test_token_space] ; check if copied token matches expected token
jne .fail ; if not, fail
; test case: null terminated
mov rdi, test_token_null ; read buffer
mov rsi, TEST_ARENA_ADDR ; write buffer
call copy_token
; check reported final indicies with the expected final indicies
cmp rax, test_token_null + 8 ; last byte read
jne .fail
cmp rdx, TEST_ARENA_ADDR + 8 ; last byte written
jne .fail
mov rsi, TEST_ARENA_ADDR
mov rcx, [rsi]
cmp rcx, [test_token_null] ; check if copied token matches expected token
jne .fail ; if not, fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_copy_token...", 0x00
; ------------------------------------------------------------------------------
; test_elemb
;
; description:
; tests elemb described functionality
; ------------------------------------------------------------------------------
test_elemb:
mov rsi, .msg
call print
; [0]
mov rdi, 5
mov rsi, test_elemb_5
mov dl, [test_elemb_5]
call elemb
cmp al, 1
jne .fail
; [n - 1]
mov rdi, 5
mov rsi, test_elemb_5
mov dl, [test_elemb_5 + 4]
call elemb
cmp al, 1
jne .fail
; [1]
mov rdi, 5
mov rsi, test_elemb_5
mov dl, [test_elemb_5 + 1]
call elemb
cmp al, 1
jne .fail
; not present
mov rdi, 5
mov rsi, test_elemb_5
mov dl, 0xDA
call elemb
cmp al, 0
jne .fail
; 0 length list
mov rdi, 0
mov rsi, test_elemb_0
mov dl, 0x34
call elemb
cmp al, 0
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_elemb...", 0x00
; ------------------------------------------------------------------------------
; test_identify_token
;
; description:
; tests identify_token described functionality
; ------------------------------------------------------------------------------
test_identify_token:
mov rsi, .msg
call print
; length1 token that exists
mov byte [TEST_ARENA_ADDR], "*"
mov rdi, TEST_ARENA_ADDR
mov rsi, 1
call identify_token
cmp ax, 0x0064
jne .fail
; length1 token that doesn't exist
mov byte [TEST_ARENA_ADDR], " "
mov rdi, TEST_ARENA_ADDR
mov rsi, 1
call identify_token
cmp ax, 0xFFFF
jne .fail
; length2 token that exists
mov word [TEST_ARENA_ADDR], "sp"
mov rdi, TEST_ARENA_ADDR
mov rsi, 2
call identify_token
cmp ax, 0x0026
jne .fail
; length2 token that doesn't exist
mov word [TEST_ARENA_ADDR], "QQ"
mov rdi, TEST_ARENA_ADDR
mov rsi, 2
call identify_token
cmp ax, 0xFFFF
jne .fail
; length3 token that exists
mov dword [TEST_ARENA_ADDR], "rax"
mov rdi, TEST_ARENA_ADDR
mov rsi, 3
call identify_token
cmp ax, 0x0000
jne .fail
; length3 token that exists
mov dword [TEST_ARENA_ADDR], "cr0"
mov rdi, TEST_ARENA_ADDR
mov rsi, 3
call identify_token
cmp ax, 0x004A
jne .fail
; length3 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r16"
mov rdi, TEST_ARENA_ADDR
mov rsi, 3
call identify_token
cmp ax, 0xFFFF
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r10d"
mov rdi, TEST_ARENA_ADDR
mov rsi, 4
call identify_token
cmp ax, 0x001A
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r15b"
mov rdi, TEST_ARENA_ADDR
mov rsi, 4
call identify_token
cmp ax, 0x003F
jne .fail
; length4 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r15q"
mov rdi, TEST_ARENA_ADDR
mov rsi, 4
call identify_token
cmp ax, 0xFFFF
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_identify_token...", 0x00
; ------------------------------------------------------------------------------
; test_identify_next_token
;
; description:
; tests identify_next_token described functionality
; ------------------------------------------------------------------------------
test_identify_next_token:
mov rsi, .msg
call print
; length1 token that exists
mov word [TEST_ARENA_ADDR], "* "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x0064
jne .fail
; length1 token that doesn't exist
mov word [TEST_ARENA_ADDR], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
; length2 token that exists
mov dword [TEST_ARENA_ADDR], "sp "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x0026
jne .fail
; length2 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "QQ "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
; length3 token that exists
mov dword [TEST_ARENA_ADDR], "rax "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x0000
jne .fail
; length3 token that exists
mov dword [TEST_ARENA_ADDR], "cr0 "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x004A
jne .fail
; length3 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r16 "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r10d"
mov byte [TEST_ARENA_ADDR + 4], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x001A
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r15b"
mov byte [TEST_ARENA_ADDR + 4], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x003F
jne .fail
; length4 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r15q"
mov byte [TEST_ARENA_ADDR + 4], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_identify_next_token...", 0x00
; ------------------------------------------------------------------------------
; test_get_tte_type
;
; description:
; tests get_tte_type described functionality
; ------------------------------------------------------------------------------
test_get_tte_type:
mov rsi, .msg
call print
mov di, 0x0053 ; xor
call get_tte_type
cmp al, 0x01 ; operator
jne .fail
mov di, 0x0003 ; rdx
call get_tte_type
cmp al, 0x02 ; register
jne .fail
mov di, 0x0056 ; mov
call get_tte_type
cmp al, 0x01 ; operator
jne .fail
mov di, 0xFFFF ; unrecognised token
call get_tte_type
cmp al, UNRECOGNISED_ID_TYPE
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_get_tte_type...", 0x00
; ------------------------------------------------------------------------------
; test_get_tte_typed_metadata
;
; description:
; tests get_tte_typed_metadata described functionality
; ------------------------------------------------------------------------------
test_get_tte_typed_metadata:
mov rsi, .msg
call print
mov di, 0x0053 ; xor
call get_tte_typed_metadata
cmp al, 0x02 ; # operands
jne .fail
mov di, 0x0003 ; rdx
call get_tte_typed_metadata
cmp al, 0x03 ; width: 64 bits
jne .fail
mov di, 0x0056 ; mov
call get_tte_typed_metadata
cmp al, 0x02 ; # operands
jne .fail
mov di, 0xFFFF ; unrecognised token
call get_tte_typed_metadata
cmp al, UNRECOGNISED_ID_METADATA
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_get_tte_type...", 0x00
msg_pass:
db 0x0A
times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align
.start db "passed."
.end db 0x0A, 0x00
msg_fail:
db 0x0A
times (TEST_LINE_LENGTH + .start - .end) db " ",
.start db "failed."
.end db 0x0A, 0x00
test_byte db "Q" ; unterminated, just a byte chillin
test_token_null db "TestTokn", 0x00 ; followed by null terminator. Quad word
test_token_space db "TestTokn " ; followed by space. Quad word
test_elemb_0: ; [This Page Intentionally Left Blank]
test_elemb_5 db 0x54, 0x00, 0x21, 0x20, 0x34

42
twasm/package.nix Normal file
View File

@@ -0,0 +1,42 @@
{
bootler,
stdenv,
nasm,
qemu,
...
}:
stdenv.mkDerivation {
pname = "twasm";
version = "0.1.0";
src = ./.;
buildPhase = ''
${nasm}/bin/nasm asm/main.asm -o out.bin
'';
installPhase = ''
dd if=/dev/zero of=disk bs=512 count=2880
dd if=${bootler}/bin/boot.bin of=disk conv=notrunc
dd if=out.bin of=disk bs=512 seek=1 conv=notrunc
mkdir -p $out/bin
cat<<EOF>$out/bin/twasm
#!/usr/bin/env bash
mkdir -p ./.bootle
cp $(echo $out)/bin/disk ./.bootle/disk
chmod a+w ./.bootle/disk
${qemu}/bin/qemu-system-x86_64 \
-nographic \
-s \
-drive file=./.bootle/disk,format=raw,index=0,media=disk
rm ./.bootle -r
EOF
chmod +x $out/bin/twasm
cp out.bin $out/bin
cp disk $out/bin
'';
}