Compare commits

...

60 Commits

Author SHA1 Message Date
andromeda
562d5ceee9 update readme 2026-04-04 17:21:21 +02:00
andromeda
a2d66bbb4d remove silly align that fixed bug that no longer exists 2026-04-04 17:18:44 +02:00
andromeda
3a6275fc53 IT'S ALIIIVE 2026-04-04 17:13:55 +02:00
andromeda
0e423fa763 do awaiting labels, fix bug, make call not crash xd 2026-04-04 13:47:39 +02:00
andromeda
34b11aabe5 improve readme a bit 2026-04-04 10:49:40 +02:00
andromeda
767453bd55 save stack in tokenise 2026-04-04 10:26:15 +02:00
andromeda
df8e04ce63 mask smth 2026-04-04 09:50:40 +02:00
andromeda
a8bf6749b8 fix test_evaluate_constant 2026-04-02 23:46:20 +02:00
andromeda
e8c1313ece :p 2026-04-02 23:44:17 +02:00
andromeda
c1463e1fef add pseudo-operator support to an extremely minimal degree 2026-04-02 23:38:10 +02:00
andromeda
57f8f5a118 operator_2_register_label, fix bug in AWAITING_LABEL_TABLE add function 2026-04-02 17:16:34 +02:00
andromeda
793677a2da tweak 2026-04-02 12:39:38 +02:00
andromeda
16f26fd552 no mod r/m flag on 'out' opcode, use r/m<-r as default r<-r 2026-04-02 12:17:47 +02:00
andromeda
91a609040f rework flags in opcode table 2026-04-02 09:40:58 +02:00
andromeda
8f3d6b91f9 make out easier xD 2026-04-02 08:26:37 +02:00
andromeda
18125e6b20 fix fatal bug in data, get prefix down, start work on awaiting_label, add debug function, more 2026-04-02 00:13:47 +02:00
andromeda
395c42dff4 add 8 bit opcode support 2026-03-31 22:20:30 +02:00
andromeda
ad9be1029c start w labels idk 2026-03-31 21:05:41 +02:00
andromeda
0c7418b293 improve readme a bit 2026-03-30 20:14:10 +02:00
andromeda
a0a99e3116 add some operators, stop printing whitespace while tokenising, add frame for pseudo-op support 2026-03-30 20:11:36 +02:00
andromeda
7a3e1fc37c remove bootle 2026-03-30 17:03:36 +02:00
andromeda
f789d49e8a tokenise labels and constants! Now assembly highkey fails but ok 2026-03-30 16:09:25 +02:00
andromeda
b1e7d2e3d5 check sizes, fix bug with buffer, fix a couple other bugs, add more registers to tokens.registers 2026-03-29 09:36:55 +02:00
andromeda
d51de0cc1d buffer writes, add to example 2026-03-26 21:11:23 +01:00
andromeda
9becdea2b9 minor improvements 2026-03-25 21:37:40 +01:00
andromeda
2960c1b795 clean up some stuff 2026-03-25 21:14:34 +01:00
andromeda
31a438d1ee prettify some printing, tidy loop 2026-03-25 17:48:58 +01:00
andromeda
20162ab02c clean up labels 2026-03-24 22:33:14 +01:00
andromeda
121a0df8e4 improve data structures 2026-03-24 20:49:52 +01:00
andromeda
74fc57cdfc add more stuff to the lookup tables 2026-03-24 11:16:39 +01:00
andromeda
26b6b44caf improve print 2026-03-24 10:05:21 +01:00
andromeda
8b0e4759be update example in readme 2026-03-23 23:54:27 +01:00
andromeda
b607bd13f3 IT SORTA WORKS LESGOOO 2026-03-23 23:37:39 +01:00
andromeda
40be72a5c3 remove superfluety 2026-03-23 22:29:13 +01:00
andromeda
f4f0f50d80 test completeness, couple semantics 2026-03-23 22:27:52 +01:00
andromeda
9233ca421b ;) 2026-03-23 18:11:27 +01:00
andromeda
9a88a859cf remove some stuff it doesn't need 2026-03-23 17:39:26 +01:00
andromeda
75e9c27dda remove superfluous wrapper function 2026-03-23 17:23:09 +01:00
andromeda
b952210561 add hash 2026-03-23 17:00:32 +01:00
andromeda
50e635332c start migration to better parsing of arguments 2026-03-23 16:25:10 +01:00
andromeda
cab89cdd2b a couple semantics 2026-03-22 04:19:51 +01:00
andromeda
55c426631b correctly handle [register]s as operands 2026-03-21 21:42:50 +01:00
andromeda
73ea2bb2b5 add addressing to evaluate_operand 2026-03-20 09:04:50 +01:00
andromeda
a5fd811b3f add trim_trailing_whitespace 2026-03-19 21:58:59 +01:00
andromeda
ad6a79d937 throw error with non-register operands 2026-03-18 20:52:00 +01:00
andromeda
ac0a10fafd finally change .gitignore, correctly handle register operands 2026-03-18 20:45:12 +01:00
andromeda
2e222c304a add evaluate_operand, fix operator tokenising 2026-03-18 20:26:30 +01:00
andromeda
ec88bd7381 rearrange test case 2026-03-18 18:41:48 +01:00
andromeda
d5c2dde221 identify some tokens 2026-03-18 16:53:35 +01:00
andromeda
de60e52c5a range check char length 2026-03-18 16:21:34 +01:00
andromeda
17a0a1a406 yeh only chr consts fn 2026-03-18 16:18:19 +01:00
andromeda
e79a30ba71 add rough char/str parsing 2026-03-17 21:16:17 +01:00
andromeda
0160d2e127 add binary 2026-03-15 22:02:36 +01:00
andromeda
c05adee382 add octal 2026-03-15 21:55:16 +01:00
andromeda
64d3e4f635 remove dead lines 2026-03-15 21:19:30 +01:00
andromeda
238069aa0d evaluate constants as long as they are hex in the form 0x 2026-03-15 21:18:40 +01:00
andromeda
4a3350fe4e get token id of operators 2026-03-13 22:49:15 +01:00
andromeda
0ee8ff7914 some major architecture changes 2026-03-12 23:03:29 +01:00
andromeda
bc19d760b9 remove dead lines 2026-03-12 14:30:24 +01:00
andromeda
065a746ca4 remove disk again, oops x( 2026-03-12 14:27:16 +01:00
18 changed files with 3583 additions and 1719 deletions

1
.gitignore vendored
View File

@@ -1 +1,2 @@
result* result*
.bootle*

View File

@@ -1,11 +1,5 @@
Call me Terry Davis because... actually please don't. I have visions: aspirations, not hallucinations :p Call me Terry Davis because... actually please don't. I have visions: aspirations, not hallucinations :p
# bootle
hobby kernel
status: basically nothing, come back later
# bootler # bootler
hobby 1-stage legacy mode bootloader hobby 1-stage legacy mode bootloader
@@ -16,4 +10,4 @@ status: gets to long mode, loads+jumps to kernel, starts idt and gdt... :)
hobby self-hosted assembler hobby self-hosted assembler
status: basically nothing, come back later status: tokenises a lot of common stuff, compiles a lot of common stuff... not good enough for anything real-world yet. It can assemble its own print function!!!

View File

@@ -1,5 +0,0 @@
[build]
target = "x86_64-unknown-none"
rustflags = [
"-Crelocation-model=static"
]

View File

@@ -1,7 +0,0 @@
# Changelog
## [0.1.0] - 2026-03-05
### Added
- CHANGELOG.md

359
bootle/Cargo.lock generated
View File

@@ -1,359 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "addr2line"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9acbfca36652500c911ddb767ed433e3ed99b032b5d935be73c6923662db1d43"
dependencies = [
"cpp_demangle",
"fallible-iterator",
"gimli",
"memmap2",
"object",
"rustc-demangle",
"smallvec",
"typed-arena",
]
[[package]]
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "bootle"
version = "0.1.0"
dependencies = [
"addr2line",
"cfg-if",
"dlmalloc",
"fortanix-sgx-abi",
"getopts",
"hashbrown",
"hermit-abi",
"object",
"r-efi",
"r-efi-alloc",
"rustc-demangle",
"rustc-literal-escaper",
"unwinding",
"wasi",
]
[[package]]
name = "cfg-if"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
[[package]]
name = "cpp_demangle"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2bb79cb74d735044c972aae58ed0aaa9a837e85b01106a54c39e42e97f62253"
dependencies = [
"cfg-if",
]
[[package]]
name = "crc32fast"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"cfg-if",
]
[[package]]
name = "dlmalloc"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa3a2dbee57b69fbb5dbe852fa9c0925697fb0c7fbcb1593e90e5ffaedf13d51"
dependencies = [
"cfg-if",
"libc",
"windows-sys",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "fallible-iterator"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "flate2"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "fortanix-sgx-abi"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5efc85edd5b83e8394f4371dd0da6859dff63dd387dab8568fece6af4cde6f84"
[[package]]
name = "getopts"
version = "0.2.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cba6ae63eb948698e300f645f87c70f76630d505f23b8907cf1e193ee85048c1"
dependencies = [
"unicode-width",
]
[[package]]
name = "gimli"
version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
dependencies = [
"fallible-iterator",
"stable_deref_trait",
]
[[package]]
name = "hashbrown"
version = "0.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]
[[package]]
name = "hermit-abi"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]]
name = "libc"
version = "0.2.182"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
[[package]]
name = "memchr"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "memmap2"
version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
dependencies = [
"libc",
]
[[package]]
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
name = "object"
version = "0.37.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03fd943161069e1768b4b3d050890ba48730e590f57e56d4aa04e7e090e61b4a"
dependencies = [
"flate2",
"memchr",
"ruzstd",
]
[[package]]
name = "r-efi"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "r-efi-alloc"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc2f58ef3ca9bb0f9c44d9aa8537601bcd3df94cc9314a40178cadf7d4466354"
dependencies = [
"r-efi",
]
[[package]]
name = "rustc-demangle"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
[[package]]
name = "rustc-literal-escaper"
version = "0.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4ee29da77c5a54f42697493cd4c9b9f31b74df666a6c04dfc4fde77abe0438b"
[[package]]
name = "ruzstd"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ff0cc5e135c8870a775d3320910cd9b564ec036b4dc0b8741629020be63f01"
dependencies = [
"twox-hash",
]
[[package]]
name = "simd-adler32"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
[[package]]
name = "twox-hash"
version = "2.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
[[package]]
name = "typed-arena"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
[[package]]
name = "unicode-width"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
[[package]]
name = "unwinding"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d80f6c2bfede213d9a90b4a14f3eb99b84e33c52df6c1a15de0a100f5a88751"
dependencies = [
"gimli",
"libc",
]
[[package]]
name = "wasi"
version = "0.11.1+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

View File

@@ -1,17 +0,0 @@
[package]
name = "bootle"
version = "0.1.0"
edition = "2024"
[lib]
crate-type = ["staticlib"]
[dependencies]
[profile.release]
panic = "abort"
opt-level = 'z'
debug = false
[profile.dev]
panic = "abort"

View File

@@ -1,28 +0,0 @@
# bootle
hobby kernel written in rust. It's just for playing around... for now :p
run with `nix run git+https://git.mtgmonkey.net/andromeda/bootler#bootle`
### memory map
```
+------ 0x00100000 ------+
| hardware, bios stuff |
+------ 0x00080000 ------+
| |
| |
+------ 0x00010200 ------+
| kernel |
+------ 0x00010000 ------+
| bootloader stuff |
| includes stack, gdt, |
| idt for the time being |
+------ 0x00000500 ------+
| bios stuff |
+------ 0x00000000 ------+
```
---
this project follows [Common Changelog](https://common-changelog.org) guidelines

View File

@@ -1,68 +0,0 @@
{
callPackage,
rust-bin,
naersk,
bootler,
qemu,
...
}: let
rust-toolchain = rust-bin.fromRustupToolchainFile ./rust-toolchain.toml;
naersk' = callPackage naersk {
cargo = rust-toolchain;
rustc = rust-toolchain;
clippy = rust-toolchain;
};
in (naersk'.buildPackage {
src = ./.;
# deps for rust-src
additionalCargoLock = "${rust-toolchain.availableComponents.rust-src}/lib/rustlib/src/rust/library/Cargo.lock";
# just library build
copyBins = false;
copyLibs = true;
release = true;
# build std
cargoBuildOptions = x:
x
++ [
"-Zbuild-std"
];
postInstall = ''
ld --oformat binary \
-o kernel.bin \
-T src/linker.ld \
-e _start \
target/x86_64-unknown-none/release/libbootle.a
dd if=/dev/zero of=disk bs=512 count=2880
dd if=${bootler}/bin/boot.bin of=disk conv=notrunc
dd if=kernel.bin of=disk bs=512 seek=1 conv=notrunc
mkdir -p $out/lib
mkdir -p $out/bin
cat<<EOF>$out/bin/bootle
#!/usr/bin/env bash
mkdir -p ./.bootle
cp $(echo $out)/bin/disk ./.bootle/disk
chmod a+w ./.bootle/disk
${qemu}/bin/qemu-system-x86_64 \
-nographic \
-drive file=./.bootle/disk,format=raw,index=0,media=disk
rm ./.bootle -r
EOF
chmod +x $out/bin/bootle
cp target/x86_64-unknown-none/release/libbootle.a $out/lib
cp kernel.bin $out/bin
cp disk $out/bin
'';
})

View File

@@ -1,5 +0,0 @@
[toolchain]
channel = "nightly"
components = ["rust-src"]
targets = ["x86_64-unknown-none"]
profile = "default"

View File

@@ -1,55 +0,0 @@
#![no_std]
#![no_main]
use core::panic::PanicInfo;
#[unsafe(no_mangle)]
pub extern "C" fn _start() -> ! {
welcome_serial();
halt()
}
fn print_serial(s: &str) {
let mut bytes = s.bytes();
while let Some(b) = bytes.next() {
unsafe {
core::arch::asm!(
"out dx, al"
, in("al") b
)
};
}
}
fn println_serial(s: &str) {
print_serial(s);
print_serial("\n");
}
fn welcome_serial() {
print_serial(ANSI_PINK);
println_serial("\nWelcome to Bootle OS");
println_serial("All code GPL licensed and freely available on git.mtgmonkey.net");
print_serial("Enjoy your time! Press ");
print_serial(ANSI_RED);
print_serial("ctrl+a x");
print_serial(ANSI_PINK);
println_serial(" to escape Qemu");
print_serial(ANSI_CLEAR);
}
#[panic_handler]
fn panic(_: &PanicInfo) -> ! {
print_serial("panicked");
halt()
}
fn halt() -> ! {
unsafe { core::arch::asm!("hlt") };
halt()
}
const ANSI_CLEAR: &str = "\x1b[0m";
const ANSI_RED: &str = "\x1b[31m";
const ANSI_PINK: &str = "\x1b[35m";
const ANSI_GREEN: &str = "\x1b[32m";

View File

@@ -1,8 +0,0 @@
SECTIONS
{
. = 0x00010000;
.text : {
*(.text._start)
*(.text*)
}
}

View File

@@ -6,7 +6,7 @@
LOAD_ADDR equ 0x7C00 LOAD_ADDR equ 0x7C00
KERNEL_START equ 2 ; first sector on disk to load kernel from; 1 indexed KERNEL_START equ 2 ; first sector on disk to load kernel from; 1 indexed
KERNEL_SIZE equ 16 ; length of kernel in sectors KERNEL_SIZE equ 32 ; length of kernel in sectors
KERNEL_LOAD_ADDR_ES equ 0x1000 ; kernel to be loaded at es * 0x10 + 0x0000 KERNEL_LOAD_ADDR_ES equ 0x1000 ; kernel to be loaded at es * 0x10 + 0x0000
PAGE_TABLE_LOAD_ADDR equ 0x1000 ; start of page table; 4 * pt size PAGE_TABLE_LOAD_ADDR equ 0x1000 ; start of page table; 4 * pt size

90
flake.lock generated
View File

@@ -1,55 +1,12 @@
{ {
"nodes": { "nodes": {
"fenix": {
"inputs": {
"nixpkgs": [
"naersk",
"nixpkgs"
],
"rust-analyzer-src": "rust-analyzer-src"
},
"locked": {
"lastModified": 1752475459,
"narHash": "sha256-z6QEu4ZFuHiqdOPbYss4/Q8B0BFhacR8ts6jO/F/aOU=",
"owner": "nix-community",
"repo": "fenix",
"rev": "bf0d6f70f4c9a9cf8845f992105652173f4b617f",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "fenix",
"type": "github"
}
},
"naersk": {
"inputs": {
"fenix": "fenix",
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1769799857,
"narHash": "sha256-88IFXZ7Sa1vxbz5pty0Io5qEaMQMMUPMonLa3Ls/ss4=",
"owner": "nix-community",
"repo": "naersk",
"rev": "9d4ed44d8b8cecdceb1d6fd76e74123d90ae6339",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "naersk",
"type": "github"
}
},
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1772173633, "lastModified": 1774701658,
"narHash": "sha256-MOH58F4AIbCkh6qlQcwMycyk5SWvsqnS/TCfnqDlpj4=", "narHash": "sha256-CIS/4AMUSwUyC8X5g+5JsMRvIUL3YUfewe8K4VrbsSQ=",
"owner": "nixos", "owner": "nixos",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "c0f3d81a7ddbc2b1332be0d8481a672b4f6004d6", "rev": "b63fe7f000adcfa269967eeff72c64cafecbbebe",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -61,46 +18,7 @@
}, },
"root": { "root": {
"inputs": { "inputs": {
"naersk": "naersk", "nixpkgs": "nixpkgs"
"nixpkgs": "nixpkgs",
"rust-overlay": "rust-overlay"
}
},
"rust-analyzer-src": {
"flake": false,
"locked": {
"lastModified": 1752428706,
"narHash": "sha256-EJcdxw3aXfP8Ex1Nm3s0awyH9egQvB2Gu+QEnJn2Sfg=",
"owner": "rust-lang",
"repo": "rust-analyzer",
"rev": "591e3b7624be97e4443ea7b5542c191311aa141d",
"type": "github"
},
"original": {
"owner": "rust-lang",
"ref": "nightly",
"repo": "rust-analyzer",
"type": "github"
}
},
"rust-overlay": {
"inputs": {
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1772507320,
"narHash": "sha256-GdGXniFvtIfRiakc+ncdQYnoQjKbTCv9Imjfl4ggquI=",
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "1775eafa1879ac098ee436849bc9c3d963206f89",
"type": "github"
},
"original": {
"owner": "oxalica",
"repo": "rust-overlay",
"type": "github"
} }
} }
}, },

View File

@@ -1,39 +1,21 @@
{ {
inputs = { inputs.nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
naersk = {
url = "github:nix-community/naersk";
inputs.nixpkgs.follows = "nixpkgs";
};
rust-overlay = {
url = "github:oxalica/rust-overlay";
inputs.nixpkgs.follows = "nixpkgs";
};
};
outputs = { outputs = {
nixpkgs, nixpkgs,
naersk,
rust-overlay,
self, self,
... ...
}: let }: let
system = "x86_64-linux"; system = "x86_64-linux";
pkgs = nixpkgs.legacyPackages.${system}; pkgs = nixpkgs.legacyPackages.${system};
pkgsWithRustOverlay = pkgs.extend (import rust-overlay);
in { in {
packages.${system} = { packages.${system} = {
bootler = pkgs.callPackage ./bootler/package.nix {}; bootler = pkgs.callPackage ./bootler/package.nix {};
bootle = pkgsWithRustOverlay.callPackage ./bootle/package.nix {
inherit naersk;
bootler = self.packages.${system}.bootler;
};
twasm = pkgs.callPackage ./twasm/package.nix { twasm = pkgs.callPackage ./twasm/package.nix {
bootler = self.packages.${system}.bootler; bootler = self.packages.${system}.bootler;
}; };
}; };
devShells.${system}.default = pkgs.mkShell { devShells.${system}.default = pkgs.mkShell {
inputsFrom = [ inputsFrom = [
self.packages.${system}.bootle
self.packages.${system}.bootler self.packages.${system}.bootler
self.packages.${system}.twasm self.packages.${system}.twasm
]; ];

Binary file not shown.

View File

@@ -12,6 +12,42 @@ I want to compile Bootler and Twasm with the Twasm assembler
- [opcodes,ModR/M,SIB](http://ref.x86asm.net/coder64.html) (no secure site available) - [opcodes,ModR/M,SIB](http://ref.x86asm.net/coder64.html) (no secure site available)
- [calling conventions](https://wiki.osdev.org/Calling_Conventions); I try to use System V - [calling conventions](https://wiki.osdev.org/Calling_Conventions); I try to use System V
### tokeniser
whitespace is ignored for the sake of readability; it can go between pretty much anything
```
------------------------
tokeniser
------------------------
byte(s) -> next byte(s)
------------------------
Newline -> Label
-> Newline
-> Komment
-> Operator
-> Directive
Label -> Newline
Komment -> Newline
Operator -> Newline
-> Komment
-> Operand
Operand -> Newline
-> Komment
-> Comma
Comma -> Operand
Directive -> Newline
-> Komment
-> Operator
------------------------
```
### memory map ### memory map
``` ```
@@ -24,6 +60,10 @@ I want to compile Bootler and Twasm with the Twasm assembler
+------ 0x00060000 ------+ +------ 0x00060000 ------+
| test arena | | test arena |
+------ 0x00050000 ------+ +------ 0x00050000 ------+
| label table |
+------ 0x00040000 ------+
| awaiting label table |
+------ 0x00030000 ------+
| stack (rsp) | | stack (rsp) |
+------------------------+ +------------------------+
| input | | input |
@@ -41,6 +81,7 @@ each word represents a token on the token table.
each token gets loaded into the token table with the following form: each token gets loaded into the token table with the following form:
``` ```
2 bytes
+----------+ +----------+
| 15 0 | | 15 0 |
+----------+ +----------+
@@ -48,46 +89,68 @@ each token gets loaded into the token table with the following form:
+----------+ +----------+
``` ```
#### label table (LT)
label definitions are stored and recalled from this table. The memory addresses are relative to the start of the program
```
16 bytes
+----------+---------+
| 127 96 | 95 64 |
+----------+---------+
| reserved | address |
+----------+---------+
| 63 0 |
+--------------------+
| hash |
+--------------------+
```
#### awaiting label table (ALT)
forward references are stored in this table to be filled in after assembly is otherwise complete. The memory addresses are relative to the start of the program
```
16 bytes
+----------+----------+------------------+---------+
| 127 101 | 100 | 99 96 | 95 64 |
+----------+----------+------------------+---------+
| reserved | abs flag | # bytes reserved | address |
+----------+----------+------------------+---------+
| 63 0 |
+--------------------------------------------------+
| hash |
+--------------------------------------------------+
```
### internal data structures ### internal data structures
#### `tokens.by_nameX` #### `tokens.[operators|registers]`
contains all tokens of that length followed by their ID. For some non-empty `tokens.by_nameX`, it is true that `tokens.by_name<X+1> - tokens.by_nameX` is the size in bytes of `tokens.by_nameX`. contains tokens by their type. Intended to be searched by token name to get the token's ID.
each entry is in the following form: each entry is in the following form:
``` ```
6 bytes
+----------+--------------------------------+ +----------+--------------------------------+
|[2 bytes] | 8 * token_length - 1 0 | | 47 32 | 31 0 |
+----------+--------------------------------+ +----------+--------------------------------+
| token ID | string without null terminator | | token ID | string without null terminator |
+----------+--------------------------------+ +----------+--------------------------------+
``` ```
example implementation: note that tokens longer than 4 bytes are problematic :/
```nasm
tokens:
.by_name1:
db "+"
dw 0x0062
db "-"
dw 0x0063
.by_name2:
db "r8"
dw 0x0008
.by_name3: ; this is required for futureproofness; the caller can use this to
; find the size of tokens.by_name2
```
#### `tokens.by_id` #### `tokens.by_id`
contains some tokens with their metadata. Some tokens have embedded information (`0x10XX` for instance). Those will not have entries in this table, being handled instead inside the assemble function itself. contains some tokens with their metadata. Some tokens have embedded information (`0x10XX` for instance). Those do not have entries in this table, being handled instead inside the assemble function itself.
metadata about some tokens in the following form: metadata about some tokens in the following form:
``` ```
4 bytes
+----------------+----------+-------+----------+ +----------------+----------+-------+----------+
| 31 24 | 23 20 | 19 16 | 15 0 | | 31 24 | 23 20 | 19 16 | 15 0 |
+----------------+----------+-------+----------+ +----------------+----------+-------+----------+
@@ -98,15 +161,17 @@ metadata about some tokens in the following form:
the `type` hex digit is defined as the following: the `type` hex digit is defined as the following:
| hex | meaning | examples | | hex | meaning | examples |
|-----|----------|-| |-----|-----------------|-|
| 0x0 | ignored | `; this entire comment is 1 token` | | 0x0 | ignored | |
| 0x1 | operator | `mov`, `hlt` | | 0x1 | operator | `mov`, `hlt` |
| 0x2 | register | `rsp`, `al` | | 0x2 | register | `rsp`, `al` |
| 0x3 | pseudo-operator | `db` |
| 0xF | unknown | any token ID not represented in the lookup table | | 0xF | unknown | any token ID not represented in the lookup table |
type metadata for the different types is as follows: type metadata for the different types is as follows:
``` ```
1 byte
+----------+ +----------+
| type 0x0 | | type 0x0 |
+----------+ +----------+
@@ -117,6 +182,7 @@ type metadata for the different types is as follows:
``` ```
``` ```
1 byte
+-------------------------------+ +-------------------------------+
| type 0x1 | | type 0x1 |
+----------+--------------------+ +----------+--------------------+
@@ -127,6 +193,7 @@ type metadata for the different types is as follows:
``` ```
``` ```
1 byte
+------------------------------+ +------------------------------+
| type 0x2 | | type 0x2 |
+----------+-----------+-------+ +----------+-----------+-------+
@@ -144,19 +211,130 @@ type metadata for the different types is as follows:
11b ; 64 bit 11b ; 64 bit
``` ```
```
1 byte
+----------+
| type 0x3 |
+----------+
| 31 24 |
+----------+
| reserved |
+----------+
```
#### `opcodes.by_id` #### `opcodes.by_id`
entries are as follows: entries are as follows:
``` ```
16 bytes
+------------------------------+
| 0 operand operators |
+---------+--------------------+
| 127 120 | 119 96 |
+---------+--------------------+
| flags | reserved |
+---------+--------------------+
| 95 64 |
+------------------------------+
| reserved |
+------------------------------+
| 63 32 |
+------------------------------+
| reserved |
+----------+--------+----------+ +----------+--------+----------+
| 31 24 | 23 16 | 15 0 | | 31 24 | 23 16 | 15 0 |
+----------+--------+----------+ +----------+--------+----------+
| reserved | opcode | token ID | | reserved | opcode | token ID |
+----------+--------+----------+ +----------+--------+----------+
16 bytes
+------------------------------------------+
| 1 operand operators |
+----------+----------+----------+---------+
| 127 120 | 119 112 | 111 104 | 103 96 |
+----------+----------+----------+---------+
| flags | reserved | flags5 | flags4 |
+----------+----------+----------+---------+
| 95 88 | 87 80 | 79 72 | 71 64 |
+----------+----------+----------+---------+
| flags3 | flags2 | reserved | flags0 |
+----------+----------+----------+---------+
| 63 56 | 55 48 | 47 40 | 39 32 |
+----------+----------+----------+---------+
| opcode | opcode | opcode | opcode |
| dst=rel8 | dst=rel | dst=imm8 | dst=imm |
+----------+----------+----------+---------+
| 31 24 | 23 16 | 15 0 |
+----------+----------+--------------------+
| opcode+r | opcode | token ID |
| dst=r | dst=r/m | |
+----------+----------+--------------------+
16 bytes
+-----------------------------------------------+
| 2 operand operators |
+---------+-------------------------------------+
| 127 120 | 119 96 |
+---------+-------------------------------------+
| flags | reserved |
+---------+----------+--------------------------+
| 95 88 | 87 80 | 79 64 |
+---------+----------+--------------------------+
| flags3 | flags2 | reserved |
+---------+----------+-------+-------+----------+
| 63 48 | 47 40 | 39 32 |
+--------------------+---------------+----------+
| reserved | opcode | opcode |
| | dst=r/m | dst=r/m |
| | src=imm8 | src=imm |
+---------+----------+---------------+----------+
| 31 24 | 23 16 | 15 0 |
+---------+----------+--------------------------+
| opcode | opcode | token ID |
| dst=r | dst=r/m | |
| src=r/m | src=r | |
+---------+----------+--------------------------+
1 byte
+-----------------+
| flags byte |
+----------+------+
| 95 89 | 88 |
+----------+------+
| reserved | 8bit |
+----------+------+
1 byte
+--------------------------------------------------------------+
| flagsX byte |
+----------+---------+-----------+-------------+---------------+
| 7 6 | 5 | 4 | 3 | 2 0 |
+----------+---------+-----------+-------------+---------------+
| reserved | +r flag | no ModR/M | 0x0F prefix | operator flag |
+----------+---------+-----------+-------------+---------------+
; flags key:
8bit ; tte has opcodes for r/m8 and r8 instead of r/m and r respectively
; flagsX key:
+r flag ; there is a +r variation of this opcode
no ModR/M ; there is no ModR/M byte for this opcode
0x0F prefix ; there is a 0x0F prefix for this opcode
operator flag ; contents of `reg` if applicable
; key:
r/m ; r/m 16/32/64
r/m8 ; r/m 8
r ; r 16/32/64
r8 ; r 8
imm ; imm 16/32
imm8 ; imm 8
rel ; rel 16/32
rel8 ; rel 8
``` ```
note the lack of support for multiple-byte opcodes or multiple opcodes for one token ID; these features will likely be added at some point after the parser accumulates too much jank. note much room to expand. If an opcode doesn't exist, it should be 0x00
### token IDs ### token IDs
@@ -164,112 +342,123 @@ supported tokens are listed below
| token | id | notes | | token | id | notes |
|-------|--------|-| |-------|--------|-|
| rax | 0x0000 | | | rax | 0x0000 | register |
| rbx | 0x0001 | | | rbx | 0x0001 | register |
| rcx | 0x0002 | | | rcx | 0x0002 | register |
| rdx | 0x0003 | | | rdx | 0x0003 | register |
| rsi | 0x0004 | | | rsi | 0x0004 | register |
| rdi | 0x0005 | | | rdi | 0x0005 | register |
| rsp | 0x0006 | | | rsp | 0x0006 | register |
| rbp | 0x0007 | | | rbp | 0x0007 | register |
| r8 | 0x0008 | | | r8 | 0x0008 | unimplemented |
| r9 | 0x0009 | | | r9 | 0x0009 | unimplemented |
| r10 | 0x000A | | | r10 | 0x000A | unimplemented |
| r11 | 0x000B | | | r11 | 0x000B | unimplemented |
| r12 | 0x000C | | | r12 | 0x000C | unimplemented |
| r13 | 0x000D | | | r13 | 0x000D | unimplemented |
| r14 | 0x000E | | | r14 | 0x000E | unimplemented |
| r15 | 0x000F | | | r15 | 0x000F | unimplemented |
| eax | 0x0010 | | | eax | 0x0010 | register |
| ebx | 0x0011 | | | ebx | 0x0011 | register |
| ecx | 0x0012 | | | ecx | 0x0012 | register |
| edx | 0x0013 | | | edx | 0x0013 | register |
| esi | 0x0014 | | | esi | 0x0014 | register |
| edi | 0x0015 | | | edi | 0x0015 | register |
| esp | 0x0016 | | | esp | 0x0016 | register |
| ebp | 0x0017 | | | ebp | 0x0017 | register |
| r8d | 0x0018 | | | r8d | 0x0018 | unimplemented |
| r9d | 0x0019 | | | r9d | 0x0019 | unimplemented |
| r10d | 0x001A | | | r10d | 0x001A | unimplemented |
| r11d | 0x001B | | | r11d | 0x001B | unimplemented |
| r12d | 0x001C | | | r12d | 0x001C | unimplemented |
| r13d | 0x001D | | | r13d | 0x001D | unimplemented |
| r14d | 0x001E | | | r14d | 0x001E | unimplemented |
| r15d | 0x001F | | | r15d | 0x001F | unimplemented |
| ax | 0x0020 | | | ax | 0x0020 | register |
| bx | 0x0021 | | | bx | 0x0021 | register |
| cx | 0x0022 | | | cx | 0x0022 | register |
| dx | 0x0023 | | | dx | 0x0023 | register |
| si | 0x0024 | | | si | 0x0024 | register |
| di | 0x0025 | | | di | 0x0025 | register |
| sp | 0x0026 | | | sp | 0x0026 | register |
| bp | 0x0027 | | | bp | 0x0027 | register |
| r8w | 0x0028 | | | r8w | 0x0028 | unimplemented |
| r9w | 0x0029 | | | r9w | 0x0029 | unimplemented |
| r10w | 0x002A | | | r10w | 0x002A | unimplemented |
| r11w | 0x002B | | | r11w | 0x002B | unimplemented |
| r12w | 0x002C | | | r12w | 0x002C | unimplemented |
| r13w | 0x002D | | | r13w | 0x002D | unimplemented |
| r14w | 0x002E | | | r14w | 0x002E | unimplemented |
| r15w | 0x002F | | | r15w | 0x002F | unimplemented |
| al | 0x0030 | | | al | 0x0030 | register |
| bl | 0x0031 | | | bl | 0x0031 | register |
| cl | 0x0032 | | | cl | 0x0032 | register |
| dl | 0x0033 | | | dl | 0x0033 | register |
| sil | 0x0034 | | | sil | 0x0034 | register |
| dil | 0x0035 | | | dil | 0x0035 | register |
| spl | 0x0036 | | | spl | 0x0036 | register |
| bpl | 0x0037 | | | bpl | 0x0037 | register |
| r8b | 0x0038 | | | r8b | 0x0038 | unimplemented |
| r9b | 0x0039 | | | r9b | 0x0039 | unimplemented |
| r10b | 0x003A | | | r10b | 0x003A | unimplemented |
| r11b | 0x003B | | | r11b | 0x003B | unimplemented |
| r12b | 0x003C | | | r12b | 0x003C | unimplemented |
| r13b | 0x003D | | | r13b | 0x003D | unimplemented |
| r14b | 0x003E | | | r14b | 0x003E | unimplemented |
| r15b | 0x003F | | | r15b | 0x003F | unimplemented |
| ah | 0x0040 | | | ah | 0x0040 | unimplemented |
| bh | 0x0041 | | | bh | 0x0041 | unimplemented |
| ch | 0x0042 | | | ch | 0x0042 | unimplemented |
| dh | 0x0043 | | | dh | 0x0043 | unimplemented |
| cs | 0x0044 | | | cs | 0x0044 | unimplemented |
| ds | 0x0045 | | | ds | 0x0045 | unimplemented |
| es | 0x0046 | | | es | 0x0046 | unimplemented |
| fs | 0x0047 | | | fs | 0x0047 | unimplemented |
| gs | 0x0048 | | | gs | 0x0048 | unimplemented |
| ss | 0x0049 | | | ss | 0x0049 | unimplemented |
| cr0 | 0x004A | | | cr0 | 0x004A | unimplemented |
| cr2 | 0x004B | | | cr2 | 0x004B | unimplemented |
| cr3 | 0x004C | | | cr3 | 0x004C | unimplemented |
| cr4 | 0x004D | | | cr4 | 0x004D | unimplemented |
| cr8 | 0x004E | | | cr8 | 0x004E | unimplemented |
| hlt | 0x004F | | | hlt | 0x004F | operator |
| int3 | 0x0050 | | | int3 | 0x0050 | operator |
| [ | 0x0051 | open bracket placeholder; 0x10XX should be used in contexts where the surrounding tokens can be known | | | 0x0051 | deprecated; formerly `[`. Now `0x10XX` is used. |
| ] | 0x0052 | | | | 0x0052 | deprecated; formerly `]`. |
| xor | 0x0053 | | | xor | 0x0053 | operator |
| inc | 0x0054 | | | inc | 0x0054 | operator |
| dec | 0x0055 | | | dec | 0x0055 | operator |
| mov | 0x0056 | | | mov | 0x0056 | operator |
| add | 0x0057 | | | add | 0x0057 | operator |
| sub | 0x0058 | | | sub | 0x0058 | operator |
| call | 0x0059 | | | call | 0x0059 | operator |
| ret | 0x005A | | | ret | 0x005A | operator |
| cmp | 0x005B | | | cmp | 0x005B | operator |
| je | 0x005C | | | jmp | 0x005C | operator |
| jne | 0x005D | | | je | 0x005D | operator |
| jge | 0x005E | | | jne | 0x005E | operator |
| jg | 0x005F | | | push | 0x005F | operator |
| jle | 0x0060 | | | pop | 0x0060 | operator |
| jl | 0x0061 | | | out | 0x0061 | operator |
| + | 0x0062 | | | db | 0x0100 | pseudo-operator |
| - | 0x0063 | | | | 0x10XX | some memory address; `XX` is as specified below |
| * | 0x0064 | | | | 0x20XX | some constant; `XX` is as specified below |
| / | 0x0065 | | | | 0x3XXX | some label; `XXX` is its entry index in the label table |
| [ | 0x10XX | open bracket with `XX` bytes until the closing bracket | | | 0xFEXX | used to pass some raw value `XX` in place of a token id to a couple of functions that mention this as a feature. If the function doesn't mention it, it will lead to undefined behaviour |
| | 0xFEXX | token terminator byte as token, where `XX` is the byte |
| | 0xFFFF | unrecognised token | | | 0xFFFF | unrecognised token |
values of `XX` in `0x10XX`:
| XX | description |
|------|-------------|
| 0x00 | following word is the token ID of some register |
values of `XX` in `0x20XX`:
| XX | description |
|------|-------------|
| 0x00 | following 8 bytes are the constant's value |
### example program ### example program
#### program in assembly #### program in assembly
@@ -278,7 +467,9 @@ this program doesn't do anything useful, it's just a test
```nasm ```nasm
xor eax, eax xor eax, eax
inc rax inc rax ; inline comment
; one line comment
mov rdx, [rax]
mov [rax], rdx mov [rax], rdx
hlt hlt
@@ -288,30 +479,19 @@ hlt
```nasm ```nasm
0x0053 ; xor 0x0053 ; xor
0xFE20 ; space
0x0010 ; eax 0x0010 ; eax
0xFE2C ; comma
0xFE20 ; space
0x0010 ; eax 0x0010 ; eax
0xFE0A ; newline
0x0054 ; inc 0x0054 ; inc
0xFE20 ; space
0x0000 ; rax 0x0000 ; rax
0xFE0A ; newline
0x0056 ; mov 0x0056 ; mov
0xFE20 ; space
0x1004 ; open bracket (4)
0xFE20 ; space |1
0x0000 ; rax |2
0xFE20 ; space |3
0x0052 ; close bracket |4
0xFE2C ; comma
0xFE20 ; space
0x0003 ; rdx 0x0003 ; rdx
0xFE0A ; newline 0x1000 ; memory address: register
0x0000 ; rax
0x0056 ; mov
0x1000 ; memory address: register
0x0000 ; rax
0x0003 ; rdx
0x004F ; hlt 0x004F ; hlt
0xFE0A ; newline
0xFE00 ; null terminator
``` ```
#### nasm output with the above example program, bits 64 #### nasm output with the above example program, bits 64
@@ -331,6 +511,13 @@ hlt
; reg 000b ; RAX ; reg 000b ; RAX
; r/m 000b ; RAX ; r/m 000b ; RAX
0x48 ; 64 Bit Operand Size prefix
0x8B ; MOV r16/32/64 r/m16/32/64
0x10 ; ModR/M byte
; mod 00b ; indirect addressing, no displacement
; reg 010b ; RDX
; r/m 000b ; [RAX]
0x48 ; 64 Bit Operand Size prefix 0x48 ; 64 Bit Operand Size prefix
0x89 ; MOV r/m16/32/64 r16/32/64 0x89 ; MOV r/m16/32/64 r16/32/64
0x10 ; ModR/M byte 0x10 ; ModR/M byte

File diff suppressed because it is too large Load Diff

View File

@@ -13,33 +13,62 @@ run_tests:
mov rsi, .msg mov rsi, .msg
call print.test call print.test
call test_completeness
call clear_test_arena
call test_djb2
call clear_test_arena call clear_test_arena
call test_elemb call test_elemb
call clear_test_arena
call test_identify_token
call clear_test_arena
call test_identify_next_token
call clear_test_arena call clear_test_arena
call test_get_tte_type call test_get_tte_type
call clear_test_arena call clear_test_arena
call test_get_tte_typed_metadata call test_get_tte_typed_metadata
call clear_test_arena
call test_get_direct_addressing_ModRM
call clear_test_arena call clear_test_arena
call test_get_opcode call test_get_opcode
call clear_test_arena call clear_test_arena
call test_get_reg_bits call test_get_reg_bits
call clear_test_arena
call test_evaluate_constant
call clear_test_arena
call test_identify_register
call clear_test_arena
call test_identify_operator
call clear_test_arena
call test_evaluate_operand
ret ret
.msg db "running test suite...", 0x0A, 0x00 .msg db "running test suite...", 0x0A, 0x00
; ------------------------------------------------------------------------------
; test_completeness
;
; description:
; visual confirmation of binary integrity
; ------------------------------------------------------------------------------
test_completeness:
mov rsi, .msg
call print.test
mov rsi, .msg_content
call print
mov rsi, msg_end
call print
mov rsi, .msg_confirm
call print
ret
.msg db "test_completeness...", 0x0A, 0x00
.msg_content db " here is the ", 0x00
.msg_confirm db " here is the end of the binary ->|", 0x0A, " assert: the previous 2 lines are identical", 0x0A, 0x00
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; test_elemb ; test_elemb
; ;
@@ -53,31 +82,31 @@ test_elemb:
; [0] ; [0]
mov rdi, 5 mov rdi, 5
mov rsi, test_elemb_5 mov rsi, .case1
mov dl, [test_elemb_5] mov dl, [.case1]
call elemb call elemb
cmp al, 1 cmp al, 1
jne .fail jne .fail
; [n - 1] ; [n - 1]
mov rdi, 5 mov rdi, 5
mov rsi, test_elemb_5 mov rsi, .case1
mov dl, [test_elemb_5 + 4] mov dl, [.case1 + 4]
call elemb call elemb
cmp al, 1 cmp al, 1
jne .fail jne .fail
; [1] ; [1]
mov rdi, 5 mov rdi, 5
mov rsi, test_elemb_5 mov rsi, .case1
mov dl, [test_elemb_5 + 1] mov dl, [.case1 + 1]
call elemb call elemb
cmp al, 1 cmp al, 1
jne .fail jne .fail
; not present ; not present
mov rdi, 5 mov rdi, 5
mov rsi, test_elemb_5 mov rsi, .case1
mov dl, 0xDA mov dl, 0xDA
call elemb call elemb
cmp al, 0 cmp al, 0
@@ -85,7 +114,7 @@ test_elemb:
; 0 length list ; 0 length list
mov rdi, 0 mov rdi, 0
mov rsi, test_elemb_0 mov rsi, .case0
mov dl, 0x34 mov dl, 0x34
call elemb call elemb
cmp al, 0 cmp al, 0
@@ -99,97 +128,57 @@ test_elemb:
mov rsi, msg_fail mov rsi, msg_fail
call print call print
ret ret
.case0: ; [This Page Intentionally Left Blank]
.case1 db 0x54, 0x00, 0x21, 0x20, 0x34
.msg db "test_elemb...", 0x00 .msg db "test_elemb...", 0x00
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; test_identify_token ; test_djb2
; ;
; description: ; description:
; tests identify_token described functionality ; tests djb2 described functionality
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
test_identify_token: test_djb2:
mov rsi, .msg mov rsi, .msg
call print.test call print.test
; length1 token that exists mov rsi, .case0
mov byte [TEST_ARENA_ADDR], "*" mov rdi, 0
mov rdi, TEST_ARENA_ADDR call djb2
mov rsi, 1 cmp rax, 5381
call identify_token
cmp ax, 0x0064
jne .fail jne .fail
; length1 token that doesn't exist mov rsi, .case1
mov byte [TEST_ARENA_ADDR], " " mov rdi, 1
mov rdi, TEST_ARENA_ADDR call djb2
mov rsi, 1 cmp rax, 177670
call identify_token
cmp ax, 0xFFFF
jne .fail jne .fail
; length2 token that exists mov rsi, .case2
mov word [TEST_ARENA_ADDR], "sp" mov rdi, 2
mov rdi, TEST_ARENA_ADDR call djb2
mov rsi, 2 cmp rax, 5863208
call identify_token
cmp ax, 0x0026
jne .fail jne .fail
; length2 token that doesn't exist ; why am I testing this, of course it's without side effects xD
mov word [TEST_ARENA_ADDR], "QQ"
mov rdi, TEST_ARENA_ADDR mov rsi, .case0
mov rsi, 2 mov rdi, 0
call identify_token call djb2
cmp ax, 0xFFFF cmp rax, 5381
jne .fail jne .fail
; length3 token that exists mov rsi, .case1
mov dword [TEST_ARENA_ADDR], "rax" mov rdi, 1
mov rdi, TEST_ARENA_ADDR call djb2
mov rsi, 3 cmp rax, 177670
call identify_token
cmp ax, 0x0000
jne .fail jne .fail
; length3 token that exists mov rsi, .case2
mov dword [TEST_ARENA_ADDR], "cr0" mov rdi, 2
mov rdi, TEST_ARENA_ADDR call djb2
mov rsi, 3 cmp rax, 5863208
call identify_token
cmp ax, 0x004A
jne .fail
; length3 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r16"
mov rdi, TEST_ARENA_ADDR
mov rsi, 3
call identify_token
cmp ax, 0xFFFF
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r10d"
mov rdi, TEST_ARENA_ADDR
mov rsi, 4
call identify_token
cmp ax, 0x001A
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r15b"
mov rdi, TEST_ARENA_ADDR
mov rsi, 4
call identify_token
cmp ax, 0x003F
jne .fail
; length4 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r15q"
mov rdi, TEST_ARENA_ADDR
mov rsi, 4
call identify_token
cmp ax, 0xFFFF
jne .fail jne .fail
.pass: .pass:
@@ -200,102 +189,10 @@ test_identify_token:
mov rsi, msg_fail mov rsi, msg_fail
call print call print
ret ret
.msg db "test_identify_token...", 0x00 .case0 db ""
.case1 db "a"
; ------------------------------------------------------------------------------ .case2 db "ab"
; test_identify_next_token .msg db "test_djb2...", 0x00
;
; description:
; tests identify_next_token described functionality
; ------------------------------------------------------------------------------
test_identify_next_token:
mov rsi, .msg
call print.test
; length1 token that exists
mov word [TEST_ARENA_ADDR], "* "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x0064
jne .fail
; length1 token that doesn't exist
mov word [TEST_ARENA_ADDR], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
; length2 token that exists
mov dword [TEST_ARENA_ADDR], "sp "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x0026
jne .fail
; length2 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "QQ "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
; length3 token that exists
mov dword [TEST_ARENA_ADDR], "rax "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x0000
jne .fail
; length3 token that exists
mov dword [TEST_ARENA_ADDR], "cr0 "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x004A
jne .fail
; length3 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r16 "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r10d"
mov byte [TEST_ARENA_ADDR + 4], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x001A
jne .fail
; length4 token that exists
mov dword [TEST_ARENA_ADDR], "r15b"
mov byte [TEST_ARENA_ADDR + 4], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0x003F
jne .fail
; length4 token that doesn't exist
mov dword [TEST_ARENA_ADDR], "r15q"
mov byte [TEST_ARENA_ADDR + 4], " "
mov rdi, TEST_ARENA_ADDR
call identify_next_token
cmp ax, 0xFFFF
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_identify_next_token...", 0x00
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; test_get_tte_type ; test_get_tte_type
@@ -354,6 +251,11 @@ test_get_tte_typed_metadata:
cmp al, 0x02 ; # operands cmp al, 0x02 ; # operands
jne .fail jne .fail
mov di, 0x004F ; hlt
call get_tte_typed_metadata
cmp al, 0x00 ; # operands
jne .fail
mov di, 0x0003 ; rdx mov di, 0x0003 ; rdx
call get_tte_typed_metadata call get_tte_typed_metadata
cmp al, 00001011b ; reg: 010b cmp al, 00001011b ; reg: 010b
@@ -380,51 +282,6 @@ test_get_tte_typed_metadata:
ret ret
.msg db "test_get_tte_typed_metadata...", 0x00 .msg db "test_get_tte_typed_metadata...", 0x00
; ------------------------------------------------------------------------------
; test_get_direct_addressing_ModRM
;
; description:
; tests get_direct_addressing_ModRM described functionality
; ------------------------------------------------------------------------------
test_get_direct_addressing_ModRM:
mov rsi, .msg
call print.test
mov di, 0x0000 ; rax
mov si, 0x0000 ; rax
call get_direct_addressing_ModRM
cmp al, 11000000b ; Mod Reg R/M: 11b 000b 000b
jne .fail
mov di, 0x0000 ; rax
mov si, 0x0003 ; rdx
call get_direct_addressing_ModRM
cmp al, 11000010b ; Mod Reg R/M: 11b 000b 010b
jne .fail
mov di, 0x0003 ; rdx
mov si, 0x0000 ; rax
call get_direct_addressing_ModRM
cmp al, 11010000b ; Mod Reg R/M: 11b 010b 000b
jne .fail
mov di, 0x0003 ; rdx
mov si, 0x0003 ; rdx
call get_direct_addressing_ModRM
cmp al, 11010010b ; Mod Reg R/M 11b 010b 010b
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_get_direct_addressing_ModRM...", 0x00
; ------------------------------------------------------------------------------ ; ------------------------------------------------------------------------------
; test_get_opcode ; test_get_opcode
; ;
@@ -437,21 +294,80 @@ test_get_opcode:
call print.test call print.test
mov di, 0x0053 ; xor mov di, 0x0053 ; xor
mov sil, 0
mov bl, 0
call get_opcode call get_opcode
cmp al, 0x31 cmp al, 0x31
jne .fail jne .fail
cmp dl, 0q0
jne .fail
mov di, 0x0053 ; xor
mov sil, 1
mov bl, 0
call get_opcode
cmp al, 0x33
jne .fail
cmp dl, 0q0
jne .fail
mov di, 0x0053 ; xor
mov sil, 2
mov bl, 0
call get_opcode
cmp al, 0x81
jne .fail
cmp dl, 0q6
jne .fail
mov di, 0x0053 ; xor
mov sil, 3
mov bl, 0
call get_opcode
cmp al, 0x83
jne .fail
cmp dl, 0q6
jne .fail
mov di, 0x0054 ; inc mov di, 0x0054 ; inc
mov sil, 0
mov bl, 0
call get_opcode call get_opcode
cmp al, 0xFF cmp al, 0xFF
jne .fail jne .fail
cmp dl, 0q0
jne .fail
mov di, 0x0055 ; dec
mov sil, 0
mov bl, 0
call get_opcode
cmp al, 0xFF
jne .fail
cmp dl, 0q1
jne .fail
mov di, 0x004F ; hlt mov di, 0x004F ; hlt
mov sil, 0
mov bl, 0
call get_opcode call get_opcode
cmp al, 0xF4 cmp al, 0xF4
jne .fail jne .fail
cmp dl, 0q0
jne .fail
mov di, 0x0059 ; call
mov sil, 0q0
mov bl, 0
call get_opcode
cmp al, 0xFF
jne .fail
cmp dl, 0q2
jne .fail
mov di, 0x0003 ; rdx (not an operator) mov di, 0x0003 ; rdx (not an operator)
mov sil, 0q0
mov bl, 0
call get_opcode call get_opcode
cmp al, UNRECOGNISED_ID_OPCODE cmp al, UNRECOGNISED_ID_OPCODE
jne .fail jne .fail
@@ -502,19 +418,251 @@ test_get_reg_bits:
ret ret
.msg db "test_get_reg_bits...", 0x00 .msg db "test_get_reg_bits...", 0x00
; ------------------------------------------------------------------------------
; test_evaluate_constant
;
; description:
; tests evaluate_constant described funtionality
; ------------------------------------------------------------------------------
test_evaluate_constant:
mov rsi, .msg
call print.test
; just numerals
mov rdi, .case0h ; addr of constant
mov rsi, 8 ; length of constant
call evaluate_constant
cmp rax, [.case0h_solution]
jne .fail
cmp rdx, 0x00
jne .fail
; just chars
mov rdi, .case1h ; addr of constant
mov rsi, 8 ; length of constant
call evaluate_constant
cmp rax, [.case1h_solution]
jne .fail
cmp rdx, 0x00
jne .fail
; just chars
mov rdi, .case2h ; addr of constant
mov rsi, 12 ; length of constant
call evaluate_constant
cmp rax, [.case2h_solution]
jne .fail
cmp rdx, 0x00
jne .fail
; PI x
mov rdi, .case3h ; addr of constant
mov rsi, 18 ; length of constant
call evaluate_constant
cmp rax, [.case3h_solution]
jne .fail
cmp rdx, 0x00
jne .fail
; PI q
mov rdi, .case0q
mov rsi, 16
call evaluate_constant
cmp rax, [.case0q_solution]
jne .fail
cmp rdx, 0x01
jne .fail
; PI b
mov rdi, .case0b
mov rsi, 66
call evaluate_constant
cmp rax, [.case0b_solution]
jne .fail
cmp rdx, 0x02
jne .fail
; char
mov rdi, .case0c
mov rsi, 6
call evaluate_constant
cmp rax, [.case0c_solution]
jne .fail
cmp rdx, 0x03
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_evaluate_constant...", 0x00
.case0h db "0x012390"
.case0h_solution dq 0x012390
.case1h db "0xABCDEF"
.case1h_solution dq 0xABCDEF
.case2h db "0x1234567890"
.case2h_solution dq 0x1234567890
.case3h db "0x243F6A8885A308D3"
.case3h_solution dq 0x243F6A8885A308D3
.case0c db '"char"'
.case0c_solution dq "char"
.case1c db '"chars"'
.case1c_solution dq "chars"
; " wow my editor really doesn't like highlighting quotes correctly
.case0q db "0q31103755242102"
.case0q_solution dq 0q31103755242102
.case0b db "0b0110011001101001011100100111001101110100001000000011011000110100"
.case0b_solution dq 0b0110011001101001011100100111001101110100001000000011011000110100
; ------------------------------------------------------------------------------
; test_identify_register
;
; description:
; tests identify_register described funtionality
; ------------------------------------------------------------------------------
test_identify_register:
mov rsi, .msg
call print.test
mov edi, "rcx"
call identify_register
cmp ax, 0x0002
jne .fail
mov edi, "RaNd"
call identify_register
cmp ax, UNRECOGNISED_TOKEN_ID
jne .fail
mov edi, ""
call identify_register
cmp ax, UNRECOGNISED_TOKEN_ID
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_identify_register...", 0x00
; ------------------------------------------------------------------------------
; test_identify_operator
;
; description:
; tests identify_operator described funtionality
; ------------------------------------------------------------------------------
test_identify_operator:
mov rsi, .msg
call print.test
mov edi, "xor"
call identify_operator
cmp ax, 0x0053
jne .fail
mov edi, [tokens.operators_end]
call identify_operator
cmp ax, UNRECOGNISED_TOKEN_ID
jne .fail
mov edi, "RaNd"
call identify_operator
cmp ax, UNRECOGNISED_TOKEN_ID
jne .fail
mov edi, ""
call identify_operator
cmp ax, UNRECOGNISED_TOKEN_ID
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.msg db "test_identify_operator...", 0x00
; ------------------------------------------------------------------------------
; test_evaluate_operand
;
; description:
; tests evaluate_operand described funtionality
; ------------------------------------------------------------------------------
test_evaluate_operand:
mov rsi, .msg
call print.test
mov rdi, .case0
mov rsi, 3
call evaluate_operand
cmp dl, 0x00
jne .fail
cmp ax, 0x0000
jne .fail
mov rdi, .case1
mov rsi, 0
call evaluate_operand
cmp dl, 0xFF
jne .fail
mov rdi, .case2
mov rsi, 3
call evaluate_operand
cmp dl, 0x00
jne .fail
cmp ax, 0x0003
jne .fail
mov rdi, .case3
mov rsi, 5
call evaluate_operand
cmp dl, 0x10
jne .fail
cmp ax, 0x0003
jne .fail
.pass:
mov rsi, msg_pass
call print
ret
.fail:
mov rsi, msg_fail
call print
ret
.case0 db "rax"
.case1: ; intentionally blank
.case2 db "rdx"
.case3 db "[rdx]"
.msg db "test_evaluate_operand...", 0x00
msg_pass: msg_pass:
db 0x0A db 0x0A
times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align times (TEST_LINE_LENGTH + .start - .end) db " ", ; right align
db 0x1B, "[32m"
.start db "passed." .start db "passed."
.end db 0x0A, 0x00 .end db 0x1B, "[0m", 0x0A, 0x00
msg_fail: msg_fail:
db 0x0A db 0x0A
times (TEST_LINE_LENGTH + .start - .end) db " ", times (TEST_LINE_LENGTH + .start - .end) db " ",
db 0x1B, "[31m"
.start db "failed." .start db "failed."
.end db 0x0A, 0x00 .end db 0x1B, "[0m", 0x0A, 0x00
test_byte db "Q" ; unterminated, just a byte chillin
test_token_null db "TestTokn", 0x00 ; followed by null terminator. Quad word
test_token_space db "TestTokn " ; followed by space. Quad word
test_elemb_0: ; [This Page Intentionally Left Blank]
test_elemb_5 db 0x54, 0x00, 0x21, 0x20, 0x34