2 years ago · 874e938802
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,10 @@
 
				+# Rust
			
 
				+/target
			
 
				+Cargo.lock
			
 
				+
			
 
				+# CLion
			
 
				+*.iml
			
 
				+.idea/
			
 
				+
			
 
				+# MacOS
			
 
				+.DS_Store
			
--- a/.run/Documentation.run.xml
+++ b/.run/Documentation.run.xml
@@ -0,0 +1,19 @@
 
				+<component name="ProjectRunConfigurationManager">
			
 
				+  <configuration default="false" name="Documentation" type="CargoCommandRunConfiguration" factoryName="Cargo Command">
			
 
				+    <option name="command" value="doc --no-deps" />
			
 
				+    <option name="workingDirectory" value="file://$PROJECT_DIR$" />
			
 
				+    <option name="channel" value="STABLE" />
			
 
				+    <option name="requiredFeatures" value="true" />
			
 
				+    <option name="allFeatures" value="false" />
			
 
				+    <option name="emulateTerminal" value="false" />
			
 
				+    <option name="withSudo" value="false" />
			
 
				+    <option name="buildTarget" value="REMOTE" />
			
 
				+    <option name="backtrace" value="SHORT" />
			
 
				+    <envs />
			
 
				+    <option name="isRedirectInput" value="false" />
			
 
				+    <option name="redirectInputPath" value="" />
			
 
				+    <method v="2">
			
 
				+      <option name="CARGO.BUILD_TASK_PROVIDER" enabled="true" />
			
 
				+    </method>
			
 
				+  </configuration>
			
 
				+</component>
			
--- a/assembly.run.xml
+++ b/assembly.run.xml
@@ -0,0 +1,19 @@
 
				+<component name="ProjectRunConfigurationManager">
			
 
				+  <configuration default="false" name="Test assembly" type="CargoCommandRunConfiguration" factoryName="Cargo Command">
			
 
				+    <option name="command" value="test --package assembly" />
			
 
				+    <option name="workingDirectory" value="file://$PROJECT_DIR$" />
			
 
				+    <option name="channel" value="STABLE" />
			
 
				+    <option name="requiredFeatures" value="true" />
			
 
				+    <option name="allFeatures" value="true" />
			
 
				+    <option name="emulateTerminal" value="false" />
			
 
				+    <option name="withSudo" value="false" />
			
 
				+    <option name="buildTarget" value="REMOTE" />
			
 
				+    <option name="backtrace" value="SHORT" />
			
 
				+    <envs />
			
 
				+    <option name="isRedirectInput" value="false" />
			
 
				+    <option name="redirectInputPath" value="" />
			
 
				+    <method v="2">
			
 
				+      <option name="CARGO.BUILD_TASK_PROVIDER" enabled="true" />
			
 
				+    </method>
			
 
				+  </configuration>
			
 
				+</component>
			
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,5 @@
 
				+[workspace]
			
 
				+members = [
			
 
				+    "assembly",
			
 
				+    "assembler"
			
 
				+]
			
--- a/assembler/Cargo.toml
+++ b/assembler/Cargo.toml
@@ -0,0 +1,12 @@
 
				+[package]
			
 
				+name = "assembler"
			
 
				+description = "A simple assembler"
			
 
				+version = "0.1.0"
			
 
				+edition = "2021"
			
 
				+license = "MIT"
			
 
				+authors = ["Felix Bytow <drako@drako.guru>"]
			
 
				+publish = ["crates-drako-guru"]
			
 
				+repository = "https://git.drako.guru/drako/asm-rs"
			
 
				+
			
 
				+[dependencies]
			
 
				+assembly = { path = "../assembly" }
			
--- a/assembler/src/main.rs
+++ b/assembler/src/main.rs
@@ -0,0 +1,3 @@
 
				+fn main() {
			
 
				+    println!("Hello, world!");
			
 
				+}
			
--- a/assembly/Cargo.toml
+++ b/assembly/Cargo.toml
@@ -0,0 +1,15 @@
 
				+[package]
			
 
				+name = "assembly"
			
 
				+description = "An embeddable assembler"
			
 
				+version = "0.1.0"
			
 
				+edition = "2021"
			
 
				+license = "MIT"
			
 
				+authors = ["Felix Bytow"]
			
 
				+publish = ["crates-drako-guru"]
			
 
				+repository = "https://git.drako.guru/drako/asm-rs"
			
 
				+
			
 
				+[lib]
			
 
				+doctest = true
			
 
				+
			
 
				+[dependencies]
			
 
				+phf = { version = "0.11.1", features = ["macros"] }
			
--- a/assembly/src/lexer.rs
+++ b/assembly/src/lexer.rs
@@ -0,0 +1,210 @@
 
				+use std::str::Chars;
			
 
				+use crate::lexer::LexerState::{Decimal, Space, Word};
			
 
				+use crate::token::{INSTRUCTIONS, Token, TokenInfo};
			
 
				+
			
 
				+/// Possible errors the `Lexer` may encounter.
			
 
				+#[derive(Debug)]
			
 
				+pub enum LexerError {
			
 
				+    /// An unknown instruction was found.
			
 
				+    InvalidInstruction {
			
 
				+        /// The line in the input.
			
 
				+        line: usize,
			
 
				+
			
 
				+        /// The column in the input where the instruction started.
			
 
				+        column: usize,
			
 
				+
			
 
				+        /// The invalid instruction.
			
 
				+        instruction: String,
			
 
				+    },
			
 
				+
			
 
				+    /// A decimal literal was too large to fit in a `u64`.
			
 
				+    TooLargeDecimalLiteral {
			
 
				+        /// The line in the input.
			
 
				+        line: usize,
			
 
				+
			
 
				+        /// The column in the input where the literal started.
			
 
				+        column: usize,
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+#[doc(hidden)]
			
 
				+enum LexerState {
			
 
				+    Space,
			
 
				+    Word { start: usize, value: String },
			
 
				+    Decimal { start: usize, value: u64 },
			
 
				+}
			
 
				+
			
 
				+/// Class turning characters into `Token`s.
			
 
				+pub struct Lexer {
			
 
				+    /// 1-based line number.
			
 
				+    line: usize,
			
 
				+
			
 
				+    /// 1 base column.
			
 
				+    column: usize,
			
 
				+
			
 
				+    /// The lexer is a state machine and this is the current state.
			
 
				+    state: LexerState,
			
 
				+}
			
 
				+
			
 
				+impl Lexer {
			
 
				+    /// Create a new lexer.
			
 
				+    pub fn new() -> Self {
			
 
				+        Self {
			
 
				+            line: 1,
			
 
				+            column: 1,
			
 
				+            state: Space,
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /// Tokenize the given characters.
			
 
				+    ///
			
 
				+    /// Every `TokenInfo` contains additional information about the source location of the `Token`.
			
 
				+    pub fn tokenize(&mut self, input: Chars) -> Result<Vec<TokenInfo>, LexerError> {
			
 
				+        let mut tokens = Vec::new();
			
 
				+
			
 
				+        for c in input {
			
 
				+            self.handle_character(c, &mut tokens)?;
			
 
				+        }
			
 
				+        if let Space = &self.state {} else {
			
 
				+            self.handle_character('\n', &mut tokens)?;
			
 
				+        }
			
 
				+
			
 
				+        Ok(tokens)
			
 
				+    }
			
 
				+
			
 
				+    fn handle_character(&mut self, c: char, tokens: &mut Vec<TokenInfo>) -> Result<(), LexerError> {
			
 
				+        match &self.state {
			
 
				+            Space => self.state_space(c, tokens),
			
 
				+            Word { .. } => self.state_word(c, tokens),
			
 
				+            Decimal { .. } => self.state_decimal(c, tokens),
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    fn state_space(&mut self, c: char, tokens: &mut Vec<TokenInfo>) -> Result<(), LexerError> {
			
 
				+        if c.is_ascii_alphabetic() {
			
 
				+            self.state = Word { start: self.column, value: String::new() };
			
 
				+            self.state_word(c, tokens)
			
 
				+        } else if c.is_ascii_digit() {
			
 
				+            self.state = Decimal { start: self.column, value: 0 };
			
 
				+            self.state_decimal(c, tokens)
			
 
				+        } else if c == ',' {
			
 
				+            tokens.push(TokenInfo {
			
 
				+                token: Token::Comma,
			
 
				+                line: self.line,
			
 
				+                column: self.column,
			
 
				+            });
			
 
				+            Ok(())
			
 
				+        } else {
			
 
				+            Ok(())
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    fn state_word(&mut self, c: char, tokens: &mut Vec<TokenInfo>) -> Result<(), LexerError> {
			
 
				+        if let Word { start, value } = &self.state {
			
 
				+            if c.is_ascii_alphanumeric() {
			
 
				+                let mut v = value.clone();
			
 
				+                v.push(c);
			
 
				+                self.state = Word { start: *start, value: v };
			
 
				+                Ok(())
			
 
				+            } else {
			
 
				+                let inst = value.clone();
			
 
				+                let column = *start;
			
 
				+
			
 
				+                if !INSTRUCTIONS.contains(&inst) {
			
 
				+                    return Err(LexerError::InvalidInstruction {
			
 
				+                        line: self.line,
			
 
				+                        column,
			
 
				+                        instruction: inst,
			
 
				+                    });
			
 
				+                }
			
 
				+
			
 
				+                tokens.push(TokenInfo {
			
 
				+                    token: Token::Instruction(inst),
			
 
				+                    line: self.line,
			
 
				+                    column,
			
 
				+                });
			
 
				+                self.state = Space;
			
 
				+                self.state_space(c, tokens)
			
 
				+            }
			
 
				+        } else {
			
 
				+            unreachable!("state_word is only called when state is a Word.")
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    fn state_decimal(&mut self, c: char, tokens: &mut Vec<TokenInfo>) -> Result<(), LexerError> {
			
 
				+        if let Decimal { start, value } = &self.state {
			
 
				+            let s = *start;
			
 
				+            let v = *value;
			
 
				+            if c.is_ascii_digit() {
			
 
				+                let digit = unsafe { c.to_digit(10).unwrap_unchecked() } as u64;
			
 
				+                let new_v = v
			
 
				+                    .checked_mul(10u64)
			
 
				+                    .and_then(|x| x.checked_add(digit))
			
 
				+                    .ok_or(LexerError::TooLargeDecimalLiteral { line: self.line, column: s })?;
			
 
				+                self.state = Decimal {
			
 
				+                    start: s,
			
 
				+                    value: new_v,
			
 
				+                };
			
 
				+                Ok(())
			
 
				+            } else {
			
 
				+                tokens.push(TokenInfo {
			
 
				+                    token: Token::DecimalLiteral(v),
			
 
				+                    line: self.line,
			
 
				+                    column: s,
			
 
				+                });
			
 
				+                self.state = Space;
			
 
				+                self.state_space(c, tokens)
			
 
				+            }
			
 
				+        } else {
			
 
				+            unreachable!("state_word is only called when state is a Word.")
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+#[cfg(test)]
			
 
				+mod tests {
			
 
				+    use crate::lexer::{Lexer, LexerError};
			
 
				+    use crate::token::{INSTRUCTIONS, Token, TokenInfo};
			
 
				+
			
 
				+    #[test]
			
 
				+    fn instructions() -> Result<(), LexerError> {
			
 
				+        for &inst in INSTRUCTIONS.iter() {
			
 
				+            let mut lexer = Lexer::new();
			
 
				+            let tokens = lexer.tokenize(inst.chars())?;
			
 
				+            let expected = vec![
			
 
				+                TokenInfo {
			
 
				+                    token: Token::Instruction(inst.to_string()),
			
 
				+                    line: 1,
			
 
				+                    column: 1,
			
 
				+                }
			
 
				+            ];
			
 
				+            assert_eq!(tokens, expected);
			
 
				+        }
			
 
				+        Ok(())
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn decimals() -> Result<(), LexerError> {
			
 
				+        for n in [0, 23, 42, 1337, 500000] {
			
 
				+            let mut lexer = Lexer::new();
			
 
				+            let tokens = lexer.tokenize(n.to_string().chars())?;
			
 
				+            let expected = vec![
			
 
				+                TokenInfo {
			
 
				+                    token: Token::DecimalLiteral(n),
			
 
				+                    line: 1,
			
 
				+                    column: 1,
			
 
				+                }
			
 
				+            ];
			
 
				+            assert_eq!(tokens, expected);
			
 
				+        }
			
 
				+        Ok(())
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    #[should_panic(expected = "TooLargeDecimalLiteral { line: 1, column: 1 }")]
			
 
				+    fn too_large_decimal_literal() {
			
 
				+        Lexer::new()
			
 
				+            .tokenize((u64::MAX.to_string() + "0").chars())
			
 
				+            .unwrap();
			
 
				+    }
			
 
				+}
			
--- a/assembly/src/lib.rs
+++ b/assembly/src/lib.rs
@@ -0,0 +1,8 @@
 
				+#![warn(missing_docs)]
			
 
				+#![doc = "An embeddable assembler."]
			
 
				+
			
 
				+/// The `Lexer`, turning characters into `Token`s.
			
 
				+pub mod lexer;
			
 
				+
			
 
				+/// `Token`s and `TokenInfo`s.
			
 
				+pub mod token;
			
--- a/assembly/src/token.rs
+++ b/assembly/src/token.rs
@@ -0,0 +1,35 @@
 
				+use phf::phf_set;
			
 
				+
			
 
				+/// An assembly token.
			
 
				+#[derive(Debug, PartialEq, Clone)]
			
 
				+pub enum Token {
			
 
				+    /// An instruction in the given instruction set.
			
 
				+    Instruction(String),
			
 
				+
			
 
				+    /// The `,` character.
			
 
				+    Comma,
			
 
				+
			
 
				+    /// A base-10 unsigned integer literal.
			
 
				+    DecimalLiteral(u64),
			
 
				+}
			
 
				+
			
 
				+/// A `Token` with source position information.
			
 
				+#[derive(Debug, PartialEq, Clone)]
			
 
				+pub struct TokenInfo {
			
 
				+    /// The `Token`.
			
 
				+    pub token: Token,
			
 
				+
			
 
				+    /// The line in the source.
			
 
				+    pub line: usize,
			
 
				+
			
 
				+    /// The column in the source where the token started.
			
 
				+    pub column: usize,
			
 
				+}
			
 
				+
			
 
				+/// All currently supported instructions.
			
 
				+pub const INSTRUCTIONS: phf::Set<&'static str> = phf_set! {
			
 
				+    "db",
			
 
				+    "dw",
			
 
				+    "dd",
			
 
				+    "dq"
			
 
				+};