Compare commits

...

67 Commits

Author SHA1 Message Date
andromeda
121a0df8e4 improve data structures 2026-03-24 20:49:52 +01:00
andromeda
74fc57cdfc add more stuff to the lookup tables 2026-03-24 11:16:39 +01:00
andromeda
26b6b44caf improve print 2026-03-24 10:05:21 +01:00
andromeda
8b0e4759be update example in readme 2026-03-23 23:54:27 +01:00
andromeda
b607bd13f3 IT SORTA WORKS LESGOOO 2026-03-23 23:37:39 +01:00
andromeda
40be72a5c3 remove superfluety 2026-03-23 22:29:13 +01:00
andromeda
f4f0f50d80 test completeness, couple semantics 2026-03-23 22:27:52 +01:00
andromeda
9233ca421b ;) 2026-03-23 18:11:27 +01:00
andromeda
9a88a859cf remove some stuff it doesn't need 2026-03-23 17:39:26 +01:00
andromeda
75e9c27dda remove superfluous wrapper function 2026-03-23 17:23:09 +01:00
andromeda
b952210561 add hash 2026-03-23 17:00:32 +01:00
andromeda
50e635332c start migration to better parsing of arguments 2026-03-23 16:25:10 +01:00
andromeda
cab89cdd2b a couple semantics 2026-03-22 04:19:51 +01:00
andromeda
55c426631b correctly handle [register]s as operands 2026-03-21 21:42:50 +01:00
andromeda
73ea2bb2b5 add addressing to evaluate_operand 2026-03-20 09:04:50 +01:00
andromeda
a5fd811b3f add trim_trailing_whitespace 2026-03-19 21:58:59 +01:00
andromeda
ad6a79d937 throw error with non-register operands 2026-03-18 20:52:00 +01:00
andromeda
ac0a10fafd finally change .gitignore, correctly handle register operands 2026-03-18 20:45:12 +01:00
andromeda
2e222c304a add evaluate_operand, fix operator tokenising 2026-03-18 20:26:30 +01:00
andromeda
ec88bd7381 rearrange test case 2026-03-18 18:41:48 +01:00
andromeda
d5c2dde221 identify some tokens 2026-03-18 16:53:35 +01:00
andromeda
de60e52c5a range check char length 2026-03-18 16:21:34 +01:00
andromeda
17a0a1a406 yeh only chr consts fn 2026-03-18 16:18:19 +01:00
andromeda
e79a30ba71 add rough char/str parsing 2026-03-17 21:16:17 +01:00
andromeda
0160d2e127 add binary 2026-03-15 22:02:36 +01:00
andromeda
c05adee382 add octal 2026-03-15 21:55:16 +01:00
andromeda
64d3e4f635 remove dead lines 2026-03-15 21:19:30 +01:00
andromeda
238069aa0d evaluate constants as long as they are hex in the form 0x 2026-03-15 21:18:40 +01:00
andromeda
4a3350fe4e get token id of operators 2026-03-13 22:49:15 +01:00
andromeda
0ee8ff7914 some major architecture changes 2026-03-12 23:03:29 +01:00
andromeda
bc19d760b9 remove dead lines 2026-03-12 14:30:24 +01:00
andromeda
065a746ca4 remove disk again, oops x( 2026-03-12 14:27:16 +01:00
andromeda
1b93cab4f6 add warn print, clean up a little 2026-03-12 14:26:38 +01:00
andromeda
9e68a6bb6e increase/fix warnings 2026-03-12 14:12:33 +01:00
andromeda
c003e63d62 remove unused code 2026-03-12 14:03:46 +01:00
andromeda
e775b05f85 add print.* modes 2026-03-12 13:54:44 +01:00
andromeda
a84f2d7453 add get_ModRM 2026-03-11 20:14:13 +01:00
andromeda
e6d7cb2f21 add get_reg_bits and test, streamline ModRM calculations, correct opcodes, fix typos... 2026-03-10 21:21:59 +01:00
andromeda
08c39a2bd8 add get_opcode and its test, fix a couple bugs + semantics things 2026-03-10 16:32:06 +01:00
andromeda
a972f38bb6 add get_direct_addressing_ModRM and test_*, fix a couple bugs/typos, add 'reg value' as register metadata 2026-03-09 23:01:12 +01:00
andromeda
869420ef7a fix some bugs, work on assembler 2026-03-09 11:00:59 +01:00
andromeda
33710a8ebe work on metadata system, put tests in seperate file 2026-03-09 10:08:19 +01:00
andromeda
0b7526661c clear up internal data structures, add to README 2026-03-08 16:03:24 +01:00
andromeda
76e9cc4cd7 add resources to README 2026-03-08 13:53:05 +01:00
andromeda
002100bd70 add resources to README 2026-03-08 13:47:12 +01:00
andromeda
e10d771743 parse brackets, improve docs 2026-03-08 12:35:14 +01:00
andromeda
172566dfe3 remove line feed characters xD 2026-03-08 11:23:46 +01:00
andromeda
f4cadcfff9 change token table entries, remove temporary file :shame: 2026-03-08 11:15:05 +01:00
andromeda
d90c39b6bc clarify readme 2026-03-08 11:04:42 +01:00
andromeda
0d66e77976 get tokenising working a bit :p also some fixes and semantics 2026-03-08 10:56:20 +01:00
andromeda
63e3a1ea7e add some tokens, length1 token support 2026-03-07 21:33:28 +01:00
andromeda
19a3e4ff5b little optimisation, add single-token parsing function 2026-03-07 20:57:19 +01:00
andromeda
50964e945e encode register tokens 2026-03-07 16:44:40 +01:00
andromeda
46bdc91966 clear tables 2026-03-07 15:51:51 +01:00
andromeda
7df3d25727 design token work 2026-03-07 15:43:41 +01:00
andromeda
30a7b5cb34 add elemb, improve token checking 2026-03-07 12:43:55 +01:00
andromeda
e1822da600 load 16 sectors instead of 1 2026-03-06 23:02:09 +01:00
andromeda
444e85e30c clarify doc 2026-03-06 22:36:47 +01:00
andromeda
00be2cc545 reformat, add copy_token and test, fix bug 2026-03-06 22:16:26 +01:00
andromeda
e5c715d54f change rdme 2026-03-06 20:56:03 +01:00
andromeda
42003df415 merge new changes 2026-03-06 20:38:32 +01:00
andromeda
9c65697dd8 bunch of stuff idek 2026-03-06 20:33:51 +01:00
andromeda
d35463e195 reorganise 2026-03-06 16:53:18 +01:00
andromeda
fdf5bb9daf remove redundant compile flag 2026-03-05 22:26:54 +01:00
andromeda
0d739522a0 copy libs also 2026-03-05 21:08:44 +01:00
andromeda
846c54653b change dev env, format rust 2026-03-05 21:00:18 +01:00
andromeda
f52da82650 add footer 2026-03-05 20:48:30 +01:00
22 changed files with 3397 additions and 112 deletions

1
.gitignore vendored
View File

@@ -1 +1,2 @@
result*
.bootle*

View File

@@ -2,50 +2,18 @@ Call me Terry Davis because... actually please don't. I have visions: aspiration
# bootle
hobby kernel written in rust. It's just for playing around... for now :p
hobby kernel
run with `nix run git+https://git.mtgmonkey.net/andromeda/bootler#bootle`
status: basically nothing, come back later
# bootler
hobby bootloader written in asm. It's just for playing around... for now :p
hobby 1-stage legacy mode bootloader
run with `nix run git+https://git.mtgmonkey.net/andromeda/bootler#bootler`
status: gets to long mode, loads+jumps to kernel, starts idt and gdt... :)
### memory map
# twasm
```
+------ 0x00100000 ------+
| hardware, bios stuff |
+------ 0x00080000 ------+
| |
| |
+------ 0x00010200 ------+
| x86_64 kernel |
+------ 0x00010000 ------+
| |
| |
+------ 0x00009000 ------+
| IDT |
+------ 0x00008000 ------+
| |
| |
+------ 0x00007E00 ------+
| bootloader (boot.asm) |
+------ 0x00007C00 ------+
| stack | TODO get real stack
+------ 0x00005000 ------+
| PT |
+------ 0x00004000 ------+
| PDT |
+------ 0x00003000 ------+
| PDPT |
+------ 0x00002000 ------+
| PML4T |
+------ 0x00001000 ------+
| |
| |
+------ 0x00000500 ------+
| bios stuff |
+------ 0x00000000 ------+
```
hobby self-hosted assembler
status: basically nothing, come back later

View File

28
bootle/README.md Normal file
View File

@@ -0,0 +1,28 @@
# bootle
hobby kernel written in rust. It's just for playing around... for now :p
run with `nix run git+https://git.mtgmonkey.net/andromeda/bootler#bootle`
### memory map
```
+------ 0x00100000 ------+
| hardware, bios stuff |
+------ 0x00080000 ------+
| |
| |
+------ 0x00010200 ------+
| kernel |
+------ 0x00010000 ------+
| bootloader stuff |
| includes stack, gdt, |
| idt for the time being |
+------ 0x00000500 ------+
| bios stuff |
+------ 0x00000000 ------+
```
---
this project follows [Common Changelog](https://common-changelog.org) guidelines

View File

@@ -6,27 +6,28 @@
qemu,
...
}: let
rust-toolchain = rust-bin.fromRustupToolchainFile ../../rust-toolchain.toml;
rust-toolchain = rust-bin.fromRustupToolchainFile ./rust-toolchain.toml;
naersk' = callPackage naersk {
cargo = rust-toolchain;
rustc = rust-toolchain;
clippy = rust-toolchain;
};
in (naersk'.buildPackage {
src = ../../.;
src = ./.;
# deps for rust-src
additionalCargoLock = "${rust-toolchain.availableComponents.rust-src}/lib/rustlib/src/rust/library/Cargo.lock";
# just library build
copyBins = false;
copyLibs = true;
release = true;
# build std
cargoBuildOptions = x:
x
++ [
"-Zbuild-std=core,compiler_builtins"
"-Zbuild-std"
];
postInstall = ''
@@ -36,7 +37,7 @@ in (naersk'.buildPackage {
-e _start \
target/x86_64-unknown-none/release/libbootle.a
dd if=/dev/zero of=disk bs=512 count=2
dd if=/dev/zero of=disk bs=512 count=2880
dd if=${bootler}/bin/boot.bin of=disk conv=notrunc
dd if=kernel.bin of=disk bs=512 seek=1 conv=notrunc

View File

@@ -12,12 +12,14 @@ pub extern "C" fn _start() -> ! {
fn print_serial(s: &str) {
let mut bytes = s.bytes();
while let Some(b) = bytes.next() {
unsafe {core::arch::asm!(
unsafe {
core::arch::asm!(
"out dx, al"
, in("al") b
)};
)
};
}
}
fn println_serial(s: &str) {
print_serial(s);
@@ -28,7 +30,11 @@ fn welcome_serial() {
print_serial(ANSI_PINK);
println_serial("\nWelcome to Bootle OS");
println_serial("All code GPL licensed and freely available on git.mtgmonkey.net");
print_serial("Enjoy your time! Press "); print_serial(ANSI_RED); print_serial("ctrl+a x"); print_serial(ANSI_PINK); println_serial(" to escape Qemu");
print_serial("Enjoy your time! Press ");
print_serial(ANSI_RED);
print_serial("ctrl+a x");
print_serial(ANSI_PINK);
println_serial(" to escape Qemu");
print_serial(ANSI_CLEAR);
}
@@ -39,9 +45,7 @@ fn panic(_: &PanicInfo) -> ! {
}
fn halt() -> ! {
unsafe {core::arch::asm!(
"hlt"
)};
unsafe { core::arch::asm!("hlt") };
halt()
}

7
bootler/CHANGELOG.md Normal file
View File

@@ -0,0 +1,7 @@
# Changelog
## [0.1.0] - 2026-03-06
### Added
- initialised CHANGELOG.md

43
bootler/README.md Normal file
View File

@@ -0,0 +1,43 @@
# bootler
hobby bootloader, 1-stage, legacy mode :p
run with `nix run git+https://git.mtgmonkey.net/andromeda/bootler#bootler`
### memory map
```
. .
: :
| longmode kernel |
+------ 0x00010000 ------+
| |
| |
+------ 0x00009000 ------+
| IDT |
+------ 0x00008000 ------+
| |
| |
+------ 0x00007E00 ------+
| bootloader (boot.asm) |
+------ 0x00007C00 ------+
| stack |
+------ 0x00005000 ------+
| PT |
+------ 0x00004000 ------+
| PDT |
+------ 0x00003000 ------+
| PDPT |
+------ 0x00002000 ------+
| PML4T |
+------ 0x00001000 ------+
| |
| |
+------ 0x00000500 ------+
| bios stuff |
+------ 0x00000000 ------+
```
---
this project follows [Common Changelog](https://common-changelog.org) guidelines

View File

@@ -6,7 +6,7 @@
LOAD_ADDR equ 0x7C00
KERNEL_START equ 2 ; first sector on disk to load kernel from; 1 indexed
KERNEL_SIZE equ 1 ; length of kernel in sectors
KERNEL_SIZE equ 16 ; length of kernel in sectors
KERNEL_LOAD_ADDR_ES equ 0x1000 ; kernel to be loaded at es * 0x10 + 0x0000
PAGE_TABLE_LOAD_ADDR equ 0x1000 ; start of page table; 4 * pt size

View File

@@ -9,19 +9,18 @@ in
stdenv.mkDerivation {
pname = "bootler";
version = "0.1.0";
src = ../../.;
src = ./.;
buildPhase = ''
${nasm}/bin/nasm asm/boot.asm -o boot.bin
${nasm}/bin/nasm asm/kernel.asm -o dummy.bin
dd if=/dev/zero of=${bootImg} bs=512 count=2
dd if=/dev/zero of=${bootImg} bs=512 count=2880
dd if=boot.bin of=${bootImg} conv=notrunc
dd if=dummy.bin of=${bootImg} bs=512 seek=1 conv=notrunc
'';
installPhase = ''
mkdir -p $out/bin
cp boot.bin $out/bin
cp dummy.bin $out/bin
cp ${bootImg} $out/bin
# create emulation binary

View File

@@ -18,20 +18,25 @@
...
}: let
system = "x86_64-linux";
pkgs = import nixpkgs {
inherit system;
overlays = [(import rust-overlay)];
};
pkgs = nixpkgs.legacyPackages.${system};
pkgsWithRustOverlay = pkgs.extend (import rust-overlay);
in {
packages.${system} = {
bootler = pkgs.callPackage ./nix/pkgs/bootler.nix {};
bootle = pkgs.callPackage ./nix/pkgs/bootle.nix {
naersk = naersk;
bootler = pkgs.callPackage ./bootler/package.nix {};
bootle = pkgsWithRustOverlay.callPackage ./bootle/package.nix {
inherit naersk;
bootler = self.packages.${system}.bootler;
};
twasm = pkgs.callPackage ./twasm/package.nix {
bootler = self.packages.${system}.bootler;
};
};
devShells.${system}.default = pkgs.mkShell {
inputsFrom = [self.packages.${system}.default];
inputsFrom = [
self.packages.${system}.bootle
self.packages.${system}.bootler
self.packages.${system}.twasm
];
};
};
}

View File

@@ -1,47 +0,0 @@
{
stdenv,
nasm,
qemu,
...
}: let
bootImg = "boot";
in
stdenv.mkDerivation {
pname = "bootler";
version = "0.1.0";
src = ../.;
buildPhase = ''
${nasm}/bin/nasm asm/boot.asm -o boot.bin
${nasm}/bin/nasm asm/kernel.asm -o kernel.bin
dd if=/dev/zero of=${bootImg} bs=512 count=2
dd if=boot.bin of=${bootImg} conv=notrunc
dd if=kernel.bin of=${bootImg} bs=512 seek=1 conv=notrunc
'';
installPhase = ''
mkdir -p $out/bin
cp ${bootImg} $out/bin
# create emulation binary
cat<<EOF>$out/bin/bootler
#!/usr/bin/env bash
# create temp dir
mkdir -p ./.bootler
cp $(echo $out)/bin/${bootImg} ./.bootler/${bootImg}
chmod a+w ./.bootler/${bootImg}
# run image
${qemu}/bin/qemu-system-x86_64 \
-nographic \
-drive file=./.bootler/${bootImg},format=raw,index=0,media=disk
# clean up
rm ./.bootler -r
EOF
chmod +x $out/bin/${bootImg}
chmod +x $out/bin/bootler
'';
}

468
twasm/README.md Normal file
View File

@@ -0,0 +1,468 @@
# twasm
this will be a self hosted, very minimal subset of nasm-style 64 bit asm
### goals
I want to compile Bootler and Twasm with the Twasm assembler
### reading
- [instructions](https://www.felixcloutier.com/x86/)
- [opcodes,ModR/M,SIB](http://ref.x86asm.net/coder64.html) (no secure site available)
- [calling conventions](https://wiki.osdev.org/Calling_Conventions); I try to use System V
### tokeniser
whitespace is ignored for the sake of readability; it can go between pretty much anything
```
------------------------
tokeniser
------------------------
byte(s) -> next byte(s)
------------------------
Newline -> Newline
-> Komment
-> Operator
-> Directive
Komment -> Newline
Operator -> Newline
-> Komment
-> Operand
Operand -> Newline
-> Komment
-> Comma
Comma -> Operand
Directive -> Newline
-> Komment
-> Operator
------------------------
```
not yet implemented:
```
------------------------
operand parser
------------------------
byte(s) -> next byte(s)
------------------------
START -> '['
-> Register
-> Constant
'[' -> Register
-> Constant
']' -> END
Register -> IF #[, ']'
-> Operator
Constant -> IF #[, ']'
-> Operator
Operator -> IF NOT #R, Register
-> Constant
------------------------
:R: = whether a register has been found
:[: = whether a '[' has been found
------------------------
```
### memory map
```
+------ 0x00100000 ------+
| hardware, bios stuff |
+------ 0x00080000 ------+
| output binary |
+------ 0x00070000 ------+
| token table |
+------ 0x00060000 ------+
| test arena |
+------ 0x00050000 ------+
| stack (rsp) |
+------------------------+
| input |
+------------------------+
| assembler |
+------ 0x00010000 ------+
| bootloader, bios, etc. |
+------------------------+
```
each word represents a token on the token table.
#### token table (TT)
each token gets loaded into the token table with the following form:
```
+----------+
| 15 0 |
+----------+
| token id |
+----------+
```
### internal data structures
#### `tokens.[operators|registers]`
contains tokens by their type. Intended to be searched by token name to get the token's ID.
each entry is in the following form:
```
+----------+--------------------------------+
| 47 32 | 31 0 |
+----------+--------------------------------+
| token ID | string without null terminator |
+----------+--------------------------------+
```
example implementation:
```nasm
tokens
.registers:
dd "r8"
dw 0x0008
.by_name3: ; this is required for futureproofness; the caller can use this to
; find the size of registers.by_name2
```
note that tokens longer than 4 bytes are problematic :/
#### `tokens.by_id`
contains some tokens with their metadata. Some tokens have embedded information (`0x10XX` for instance). Those will not have entries in this table, being handled instead inside the assemble function itself.
metadata about some tokens in the following form:
```
+----------------+----------+-------+----------+
| 31 24 | 23 20 | 19 16 | 15 0 |
+----------------+----------+-------+----------+
| typed metadata | reserved | type | token ID |
+----------------+----------+-------+----------+
```
the `type` hex digit is defined as the following:
| hex | meaning | examples |
|-----|----------|-|
| 0x0 | ignored | `; this entire comment is 1 token` |
| 0x1 | operator | `mov`, `hlt` |
| 0x2 | register | `rsp`, `al` |
| 0xF | unknown | any token ID not represented in the lookup table |
type metadata for the different types is as follows:
```
+----------+
| type 0x0 |
+----------+
| 31 24 |
+----------+
| reserved |
+----------+
```
```
+-------------------------------+
| type 0x1 |
+----------+--------------------+
| 31 26 | 25 24 |
+----------+--------------------+
| reserved | number of operands |
+----------+--------------------+
```
```
+------------------------------+
| type 0x2 |
+----------+-----------+-------+
| 31 29 | 28 26 | 25 24 |
+----------+-----------+-------+
| reserved | reg value | width |
+----------+-----------+-------+
; reg is the value that cooresponds to the register in the ModR/M byte
; width:
00b ; 8 bit
01b ; 16 bit
10b ; 32 bit
11b ; 64 bit
```
#### `opcodes.by_id`
entries are as follows:
```
+------------------------------+
| 0 operand operators |
+------------------------------+
| 127 96 |
+------------------------------+
| reserved |
+------------------------------+
| 95 64 |
+------------------------------+
| reserved |
+------------------------------+
| 63 32 |
+------------------------------+
| reserved |
+----------+--------+----------+
| 31 24 | 23 16 | 15 0 |
+----------+--------+----------+
| reserved | opcode | token ID |
+----------+--------+----------+
+-------------------------------------------------------------+
| 1 operand operators |
+-------------------------------------------------------------+
| 127 96 |
+-------------------------------------------------------------+
| reserved |
+----------+-------+-------+-------+-------+----------+-------+
| 95 88 | 87 84 | 83 80 | 79 76 | 75 72 | 71 68 | 67 64 |
+----------+-------+-------+-------+-------+----------+-------+
| reserved | op5&8 | op4&8 | op3&8 | op2&8 | reserved | op0&8 |
+----------+-------+-------+-------+-------+----------+-------+
| 63 56 | 55 48 | 47 40 | 39 32 |
+----------+---------------+---------------+------------------+
| opcode | opcode | opcode | opcode |
| dst=rel8 | dest=rel | dst=imm8 | dst=imm |
+----------+---------------+---------------+------------------+
| 31 24 | 23 16 | 15 0 |
+----------+---------------+----------------------------------+
| reserved | opcode | token ID |
| | dst=r/m | |
+----------+---------------+----------------------------------+
+----------------------------------------------+
| 2 operand operators |
+----------------------------------------------+
| 127 96 |
+----------------------------------------------+
| reserved |
+-------------------+-------+-------+----------+
| 95 80 | 79 76 | 75 72 | 71 64 |
+-------------------+-------+-------+----------+
| reserved | op3&8 | op2&8 | reserved |
+-------------------+-------+-------+----------+
| 63 48 | 47 40 | 39 32 |
+-------------------+---------------+----------+
| reserved | opcode | opcode |
| | dst=r/m | dst=r/m |
| | src=imm8 | src=imm |
+---------+---------+---------------+----------+
| 31 24 | 23 16 | 15 0 |
+---------+---------+--------------------------+
| opcode | opcode | token ID |
| dst=r | dst=r/m | |
| src=r/m | src=r | |
+---------+---------+--------------------------+
; key:
r/m ; r/m 16/32/64
r ; r 16/32/64
imm ; imm 16/32
imm8 ; imm 8
rel ; rel 16/32
rel8 ; rel 8
opX&8 ; low 8 bits are the operator flag that goes with opcode at offset X from
; the first opcode in the table entry
```
note much room to expand. If an opcode doesn't exist, it should be 0x00
### token IDs
supported tokens are listed below
| token | id | notes |
|-------|--------|-|
| rax | 0x0000 | |
| rbx | 0x0001 | |
| rcx | 0x0002 | |
| rdx | 0x0003 | |
| rsi | 0x0004 | |
| rdi | 0x0005 | |
| rsp | 0x0006 | |
| rbp | 0x0007 | |
| r8 | 0x0008 | unimplemented |
| r9 | 0x0009 | unimplemented |
| r10 | 0x000A | unimplemented |
| r11 | 0x000B | unimplemented |
| r12 | 0x000C | unimplemented |
| r13 | 0x000D | unimplemented |
| r14 | 0x000E | unimplemented |
| r15 | 0x000F | unimplemented |
| eax | 0x0010 | |
| ebx | 0x0011 | |
| ecx | 0x0012 | |
| edx | 0x0013 | |
| esi | 0x0014 | |
| edi | 0x0015 | |
| esp | 0x0016 | |
| ebp | 0x0017 | |
| r8d | 0x0018 | unimplemented |
| r9d | 0x0019 | unimplemented |
| r10d | 0x001A | unimplemented |
| r11d | 0x001B | unimplemented |
| r12d | 0x001C | unimplemented |
| r13d | 0x001D | unimplemented |
| r14d | 0x001E | unimplemented |
| r15d | 0x001F | unimplemented |
| ax | 0x0020 | unimplemented |
| bx | 0x0021 | unimplemented |
| cx | 0x0022 | unimplemented |
| dx | 0x0023 | unimplemented |
| si | 0x0024 | unimplemented |
| di | 0x0025 | unimplemented |
| sp | 0x0026 | unimplemented |
| bp | 0x0027 | unimplemented |
| r8w | 0x0028 | unimplemented |
| r9w | 0x0029 | unimplemented |
| r10w | 0x002A | unimplemented |
| r11w | 0x002B | unimplemented |
| r12w | 0x002C | unimplemented |
| r13w | 0x002D | unimplemented |
| r14w | 0x002E | unimplemented |
| r15w | 0x002F | unimplemented |
| al | 0x0030 | unimplemented |
| bl | 0x0031 | unimplemented |
| cl | 0x0032 | unimplemented |
| dl | 0x0033 | unimplemented |
| sil | 0x0034 | unimplemented |
| dil | 0x0035 | unimplemented |
| spl | 0x0036 | unimplemented |
| bpl | 0x0037 | unimplemented |
| r8b | 0x0038 | unimplemented |
| r9b | 0x0039 | unimplemented |
| r10b | 0x003A | unimplemented |
| r11b | 0x003B | unimplemented |
| r12b | 0x003C | unimplemented |
| r13b | 0x003D | unimplemented |
| r14b | 0x003E | unimplemented |
| r15b | 0x003F | unimplemented |
| ah | 0x0040 | unimplemented |
| bh | 0x0041 | unimplemented |
| ch | 0x0042 | unimplemented |
| dh | 0x0043 | unimplemented |
| cs | 0x0044 | unimplemented |
| ds | 0x0045 | unimplemented |
| es | 0x0046 | unimplemented |
| fs | 0x0047 | unimplemented |
| gs | 0x0048 | unimplemented |
| ss | 0x0049 | unimplemented |
| cr0 | 0x004A | unimplemented |
| cr2 | 0x004B | unimplemented |
| cr3 | 0x004C | unimplemented |
| cr4 | 0x004D | unimplemented |
| cr8 | 0x004E | unimplemented |
| hlt | 0x004F | |
| int3 | 0x0050 | |
| | 0x0051 | deprecated; formerly `[`. Now `0x10XX` is used. |
| | 0x0052 | deprecated; formerly `]`. |
| xor | 0x0053 | |
| inc | 0x0054 | |
| dec | 0x0055 | |
| mov | 0x0056 | |
| add | 0x0057 | |
| sub | 0x0058 | |
| call | 0x0059 | |
| ret | 0x005A | |
| cmp | 0x005B | |
| | 0x10XX | some memory address; `XX` is as specified below |
| | 0xFEXX | used to pass some raw value `XX` in place of a token id |
| | 0xFFFF | unrecognised token |
values of `XX` in `0x10XX`:
| XX | description |
|------|-------------|
| 0x00 | following byte is the token ID of some register |
### example program
#### program in assembly
this program doesn't do anything useful, it's just a test
```nasm
xor eax, eax
inc rax ; inline comment
; one line comment
mov rdx, [rax]
mov [rax], rdx
hlt
```
#### tokenization
```nasm
0x0053 ; xor
0x0010 ; eax
0x0010 ; eax
0x0054 ; inc
0x0000 ; rax
0x0056 ; mov
0x0003 ; rdx
0x1000 ; memory address: register
0x0000 ; rax
0x0056 ; mov
0x1000 ; memory address: register
0x0000 ; rax
0x0003 ; rdx
0x004F ; hlt
```
#### nasm output with the above example program, bits 64
```nasm
0x31 ; XOR r/m16/32/64 r16/32/64
0xC0 ; ModR/M byte
; mod 11b ; directly address the following:
; reg 000b ; EAX
; r/m 000b ; EAX
0x48 ; 64 Bit Operand Size prefix
0xFF ; with `reg` from ModR/M byte 000b:
; INC r/m16/32/64
0xC0 ; ModR/M byte
; mod 11b ; direct addressing
; reg 000b ; RAX
; r/m 000b ; RAX
0x48 ; 64 Bit Operand Size prefix
0x8B ; MOV r16/32/64 r/m16/32/64
0x10 ; ModR/M byte
; mod 00b ; indirect addressing, no displacement
; reg 010b ; RDX
; r/m 000b ; [RAX]
0x48 ; 64 Bit Operand Size prefix
0x89 ; MOV r/m16/32/64 r16/32/64
0x10 ; ModR/M byte
; mod 00b ; indirect addressing, no displacement
; reg 010b ; RDX
; r/m 000b ; [RAX]
0xF4 ; HLT
```

2098
twasm/asm/main.asm Normal file

File diff suppressed because it is too large Load Diff

664
twasm/asm/tests.asm Normal file
View File

@@ -0,0 +1,664 @@
; ------------------------------------------------------------------------------
; tests
; ------------------------------------------------------------------------------
; ------------------------------------------------------------------------------
; run_tests
;
; description:
; runs all tests
; ------------------------------------------------------------------------------
run_tests:
mov rsi, .msg
call print.test
call test_completeness
call clear_test_arena
call test_djb2
call clear_test_arena
call test_elemb
call clear_test_arena
call test_get_tte_type
call clear_test_arena
call test_get_tte_typed_metadata
call clear_test_arena
call test_get_opcode
call clear_test_arena
call test_get_reg_bits
call clear_test_arena
call test_evaluate_constant
call clear_test_arena
call test_identify_register
call clear_test_arena
call test_identify_operator
call clear_test_arena
call test_evaluate_operand
ret
.msg db "running test suite...", 0x0A, 0x00
; ------------------------------------------------------------------------------
; test_completeness
;
; description:
; visual confirmation of binary integrity
; ------------------------------------------------------------------------------
test_completeness:
mov rsi, .msg
call print.test
mov rsi, .msg_content
call print
mov rsi, msg_end
call print
mov rsi, .msg_confirm
call print
ret
.msg db "test_completeness...", 0x0A, 0x00
.msg_content db " here is the ", 0x00
.msg_confirm db " here is the end of the binary ->|", 0x0A, " assert: the previous 2 lines are identical", 0x0A, 0x00
; ------------------------------------------------------------------------------
; test_elemb
;
; description:
; tests elemb described functionality
; ------------------------------------------------------------------------------
test_elemb:
mov rsi, .msg
call print.test
; [0]
mov rdi, 5
mov rsi, .case1
mov dl, [.case1]
call elemb
cmp al, 1
jne .fail
; [n - 1]
mov rdi, 5
mov rsi, .case1
mov dl, [.case1 + 4]
call elemb
cmp al, 1
jne .fail
; [1]
mov rdi, 5
mov rsi, .case1
mov dl, [.case1 + 1]
call elemb
cmp al, 1
jne .fail
; not present
mov rdi, 5
mov rsi, .case1
mov dl, 0xDA
call elemb
cmp al, 0
jne .fail
; 0 length list
mov rdi, 0
mov rsi, .case0
mov dl, 0x34
call elemb
cmp al, 0
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.case0: ; [This Page Intentionally Left Blank]
.case1 db 0x54, 0x00, 0x21, 0x20, 0x34
.msg db "test_elemb...", 0x00
; ------------------------------------------------------------------------------
; test_djb2
;
; description:
; tests djb2 described functionality
; ------------------------------------------------------------------------------
test_djb2:
mov rsi, .msg
call print.test
mov rsi, .case0
mov rdi, 0
call djb2
cmp rax, 5381
jne .fail
mov rsi, .case1
mov rdi, 1
call djb2
cmp rax, 177670
jne .fail
mov rsi, .case2
mov rdi, 2
call djb2
cmp rax, 5863208
jne .fail
; why am I testing this, of course it's without side effects xD
mov rsi, .case0
mov rdi, 0
call djb2
cmp rax, 5381
jne .fail
mov rsi, .case1
mov rdi, 1
call djb2
cmp rax, 177670
jne .fail
mov rsi, .case2
mov rdi, 2
call djb2
cmp rax, 5863208
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.case0 db ""
.case1 db "a"
.case2 db "ab"
.msg db "test_djb2...", 0x00
; ------------------------------------------------------------------------------
; test_get_tte_type
;
; description:
; tests get_tte_type described functionality
; ------------------------------------------------------------------------------
test_get_tte_type:
mov rsi, .msg
call print.test
mov di, 0x0053 ; xor
call get_tte_type
cmp al, 0x01 ; operator
jne .fail
mov di, 0x0003 ; rdx
call get_tte_type
cmp al, 0x02 ; register
jne .fail
mov di, 0x0056 ; mov
call get_tte_type
cmp al, 0x01 ; operator
jne .fail
mov di, 0xFFFF ; unrecognised token
call get_tte_type
cmp al, UNRECOGNISED_ID_TYPE
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_get_tte_type...", 0x00
; ------------------------------------------------------------------------------
; test_get_tte_typed_metadata
;
; description:
; tests get_tte_typed_metadata described functionality
; ------------------------------------------------------------------------------
test_get_tte_typed_metadata:
mov rsi, .msg
call print.test
mov di, 0x0053 ; xor
call get_tte_typed_metadata
cmp al, 0x02 ; # operands
jne .fail
mov di, 0x004F ; hlt
call get_tte_typed_metadata
cmp al, 0x00 ; # operands
jne .fail
mov di, 0x0003 ; rdx
call get_tte_typed_metadata
cmp al, 00001011b ; reg: 010b
; width: 11b (64 bits)
jne .fail
mov di, 0x0056 ; mov
call get_tte_typed_metadata
cmp al, 0x02 ; # operands
jne .fail
mov di, 0xFFFF ; unrecognised token
call get_tte_typed_metadata
cmp al, UNRECOGNISED_ID_METADATA
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_get_tte_typed_metadata...", 0x00
; ------------------------------------------------------------------------------
; test_get_opcode
;
; description:
; tests get_opcode described functionality
; ------------------------------------------------------------------------------
test_get_opcode:
mov rsi, .msg
call print.test
mov di, 0x0053 ; xor
mov sil, 0
call get_opcode
cmp al, 0x31
jne .fail
cmp dl, 0q0
jne .fail
mov di, 0x0053 ; xor
mov sil, 1
call get_opcode
cmp al, 0x33
jne .fail
cmp dl, 0q0
jne .fail
mov di, 0x0053 ; xor
mov sil, 2
call get_opcode
cmp al, 0x81
jne .fail
cmp dl, 0q6
jne .fail
mov di, 0x0053 ; xor
mov sil, 3
call get_opcode
cmp al, 0x83
jne .fail
cmp dl, 0q6
jne .fail
mov di, 0x0054 ; inc
mov sil, 0
call get_opcode
cmp al, 0xFF
jne .fail
cmp dl, 0q0
jne .fail
mov di, 0x0055 ; dec
mov sil, 0
call get_opcode
cmp al, 0xFF
jne .fail
cmp dl, 0q1
jne .fail
mov di, 0x004F ; hlt
mov sil, 0
call get_opcode
cmp al, 0xF4
jne .fail
cmp dl, 0q0
jne .fail
mov di, 0x0059 ; call
mov sil, 0q0
call get_opcode
cmp al, 0xFF
jne .fail
cmp dl, 0q2
jne .fail
mov di, 0x0003 ; rdx (not an operator)
mov sil, 0q0
call get_opcode
cmp al, UNRECOGNISED_ID_OPCODE
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_get_opcode...", 0x00
; ------------------------------------------------------------------------------
; test_get_reg_bits
;
; description:
; tests get_reg_bits described functionality
; ------------------------------------------------------------------------------
test_get_reg_bits:
mov rsi, .msg
call print.test
mov di, 0x0000 ; rax
call get_reg_bits
cmp al, 000b
jne .fail
mov di, 0x0010 ; eax
call get_reg_bits
cmp al, 000b
jne .fail
mov di, 0x0003 ; rdx
call get_reg_bits
cmp al, 010b
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_get_reg_bits...", 0x00
; ------------------------------------------------------------------------------
; test_evaluate_constant
;
; description:
; tests evaluate_constant described funtionality
; ------------------------------------------------------------------------------
test_evaluate_constant:
mov rsi, .msg
call print.test
; just numerals
mov rdi, .case0h ; addr of constant
mov rsi, 8 ; length of constant
call evaluate_constant
cmp rax, [.case0h_solution]
jne .fail
cmp rdx, 0x00
jne .fail
; just chars
mov rdi, .case1h ; addr of constant
mov rsi, 8 ; length of constant
call evaluate_constant
cmp rax, [.case1h_solution]
jne .fail
cmp rdx, 0x00
jne .fail
; just chars
mov rdi, .case2h ; addr of constant
mov rsi, 12 ; length of constant
call evaluate_constant
cmp rax, [.case2h_solution]
jne .fail
cmp rdx, 0x00
jne .fail
; PI x
mov rdi, .case3h ; addr of constant
mov rsi, 18 ; length of constant
call evaluate_constant
cmp rax, [.case3h_solution]
jne .fail
cmp rdx, 0x00
jne .fail
; PI q
mov rdi, .case0q
mov rsi, 16
call evaluate_constant
cmp rax, [.case0q_solution]
jne .fail
cmp rdx, 0x01
jne .fail
; PI b
mov rdi, .case0b
mov rsi, 66
call evaluate_constant
cmp rax, [.case0b_solution]
jne .fail
cmp rdx, 0x02
jne .fail
; char
mov rdi, .case0c
mov rsi, 6
call evaluate_constant
cmp rax, [.case0c_solution]
jne .fail
cmp rdx, 0x03
jne .fail
; oversized char
mov rdi, .case1c
mov rsi, 7
call evaluate_constant
cmp rdx, 0xFF
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_evaluate_constant...", 0x00
.case0h db "0x012390"
.case0h_solution dq 0x012390
.case1h db "0xABCDEF"
.case1h_solution dq 0xABCDEF
.case2h db "0x1234567890"
.case2h_solution dq 0x1234567890
.case3h db "0x243F6A8885A308D3"
.case3h_solution dq 0x243F6A8885A308D3
.case0c db '"char"'
.case0c_solution dq "char"
.case1c db '"chars"'
.case1c_solution dq "chars"
; " wow my editor really doesn't like highlighting quotes correctly
.case0q db "0q31103755242102"
.case0q_solution dq 0q31103755242102
.case0b db "0b0110011001101001011100100111001101110100001000000011011000110100"
.case0b_solution dq 0b0110011001101001011100100111001101110100001000000011011000110100
; ------------------------------------------------------------------------------
; test_identify_register
;
; description:
; tests identify_register described funtionality
; ------------------------------------------------------------------------------
test_identify_register:
mov rsi, .msg
call print.test
mov edi, "rcx"
call identify_register
cmp ax, 0x0002
jne .fail
mov edi, "RaNd"
call identify_register
cmp ax, UNRECOGNISED_TOKEN_ID
jne .fail
mov edi, ""
call identify_register
cmp ax, UNRECOGNISED_TOKEN_ID
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_identify_register...", 0x00
; ------------------------------------------------------------------------------
; test_identify_operator
;
; description:
; tests identify_operator described funtionality
; ------------------------------------------------------------------------------
test_identify_operator:
mov rsi, .msg
call print.test
mov edi, "xor"
call identify_operator
cmp ax, 0x0053
jne .fail
mov edi, [tokens.operators_end]
call identify_operator
cmp ax, UNRECOGNISED_TOKEN_ID
jne .fail
mov edi, "RaNd"
call identify_operator
cmp ax, UNRECOGNISED_TOKEN_ID
jne .fail
mov edi, ""
call identify_operator
cmp ax, UNRECOGNISED_TOKEN_ID
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_identify_operator...", 0x00
; ------------------------------------------------------------------------------
; test_evaluate_operand
;
; description:
; tests evaluate_operand described funtionality
; ------------------------------------------------------------------------------
test_evaluate_operand:
mov rsi, .msg
call print.test
mov rdi, .case0
mov rsi, 3
call evaluate_operand
cmp dl, 0x00
jne .fail
cmp ax, 0x0000
jne .fail
mov rdi, .case1
mov rsi, 0
call evaluate_operand
cmp dl, 0xFF
jne .fail
mov rdi, .case2
mov rsi, 3
call evaluate_operand
cmp dl, 0x00
jne .fail
cmp ax, 0x0003
jne .fail
mov rdi, .case3
mov rsi, 5
call evaluate_operand
cmp dl, 0x10
jne .fail
cmp ax, 0x0003
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.case0 db "rax"
.case1: ; intentionally blank
.case2 db "rdx"
.case3 db "[rdx]"
.msg db "test_evaluate_operand...", 0x00
msg_pass:
db 0x0A
times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align
.start db "passed."
.end db 0x0A, 0x00
msg_fail:
db 0x0A
times (TEST_LINE_LENGTH + .start - .end) db " ",
.start db "failed."
.end db 0x0A, 0x00

46
twasm/package.nix Normal file
View File

@@ -0,0 +1,46 @@
{
bootler,
stdenv,
nasm,
qemu,
...
}:
stdenv.mkDerivation {
pname = "twasm";
version = "0.1.0";
src = ./.;
buildPhase = ''
${nasm}/bin/nasm \
asm/main.asm \
-o out.bin \
-w+all \
-w-reloc-abs
'';
installPhase = ''
dd if=/dev/zero of=disk bs=512 count=2880
dd if=${bootler}/bin/boot.bin of=disk conv=notrunc
dd if=out.bin of=disk bs=512 seek=1 conv=notrunc
mkdir -p $out/bin
cat<<EOF>$out/bin/twasm
#!/usr/bin/env bash
mkdir -p ./.bootle
cp $(echo $out)/bin/disk ./.bootle/disk
chmod a+w ./.bootle/disk
${qemu}/bin/qemu-system-x86_64 \
-nographic \
-s \
-drive file=./.bootle/disk,format=raw,index=0,media=disk
rm ./.bootle -r
EOF
chmod +x $out/bin/twasm
cp out.bin $out/bin
cp disk $out/bin
'';
}