From b7ad73cdb68fbecd3da299a675cca0b05ee38964 Mon Sep 17 00:00:00 2001
From: caandt <caandt@twoha.cc>
Date: Thu, 29 May 2025 00:03:28 -0500
Subject: [PATCH] initial commit

---
 .gitignore             |   7 ++
 build.zig              | 116 +++++++++++++++++++++++++++
 build.zig.zon          |  86 ++++++++++++++++++++
 src/frontend/token.zig | 178 +++++++++++++++++++++++++++++++++++++++++
 src/main.zig           |  29 +++++++
 src/root.zig           |   6 ++
 6 files changed, 422 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 build.zig
 create mode 100644 build.zig.zon
 create mode 100644 src/frontend/token.zig
 create mode 100644 src/main.zig
 create mode 100644 src/root.zig

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a57d616
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+.zig-cache/
+zig-out/
+/release/
+/debug/
+/build/
+/build-*/
+/docgen_tmp/
diff --git a/build.zig b/build.zig
new file mode 100644
index 0000000..84d4954
--- /dev/null
+++ b/build.zig
@@ -0,0 +1,116 @@
+const std = @import("std");
+
+// Although this function looks imperative, note that its job is to
+// declaratively construct a build graph that will be executed by an external
+// runner.
+pub fn build(b: *std.Build) void {
+    // Standard target options allows the person running `zig build` to choose
+    // what target to build for. Here we do not override the defaults, which
+    // means any target is allowed, and the default is native. Other options
+    // for restricting supported target set are available.
+    const target = b.standardTargetOptions(.{});
+
+    // Standard optimization options allow the person running `zig build` to select
+    // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
+    // set a preferred release mode, allowing the user to decide how to optimize.
+    const optimize = b.standardOptimizeOption(.{});
+
+    // This creates a "module", which represents a collection of source files alongside
+    // some compilation options, such as optimization mode and linked system libraries.
+    // Every executable or library we compile will be based on one or more modules.
+    const lib_mod = b.createModule(.{
+        // `root_source_file` is the Zig "entry point" of the module. If a module
+        // only contains e.g. external object files, you can make this `null`.
+        // In this case the main source file is merely a path, however, in more
+        // complicated build scripts, this could be a generated file.
+        .root_source_file = b.path("src/root.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+
+    // We will also create a module for our other entry point, 'main.zig'.
+    const exe_mod = b.createModule(.{
+        // `root_source_file` is the Zig "entry point" of the module. If a module
+        // only contains e.g. external object files, you can make this `null`.
+        // In this case the main source file is merely a path, however, in more
+        // complicated build scripts, this could be a generated file.
+        .root_source_file = b.path("src/main.zig"),
+        .target = target,
+        .optimize = optimize,
+    });
+
+    // Modules can depend on one another using the `std.Build.Module.addImport` function.
+    // This is what allows Zig source code to use `@import("foo")` where 'foo' is not a
+    // file path. In this case, we set up `exe_mod` to import `lib_mod`.
+    exe_mod.addImport("suzu_lib", lib_mod);
+
+    // Now, we will create a static library based on the module we created above.
+    // This creates a `std.Build.Step.Compile`, which is the build step responsible
+    // for actually invoking the compiler.
+    const lib = b.addLibrary(.{
+        .linkage = .static,
+        .name = "suzu",
+        .root_module = lib_mod,
+    });
+
+    // This declares intent for the library to be installed into the standard
+    // location when the user invokes the "install" step (the default step when
+    // running `zig build`).
+    b.installArtifact(lib);
+
+    // This creates another `std.Build.Step.Compile`, but this one builds an executable
+    // rather than a static library.
+    const exe = b.addExecutable(.{
+        .name = "suzu",
+        .root_module = exe_mod,
+    });
+
+    // This declares intent for the executable to be installed into the
+    // standard location when the user invokes the "install" step (the default
+    // step when running `zig build`).
+    b.installArtifact(exe);
+
+    // This *creates* a Run step in the build graph, to be executed when another
+    // step is evaluated that depends on it. The next line below will establish
+    // such a dependency.
+    const run_cmd = b.addRunArtifact(exe);
+
+    // By making the run step depend on the install step, it will be run from the
+    // installation directory rather than directly from within the cache directory.
+    // This is not necessary, however, if the application depends on other installed
+    // files, this ensures they will be present and in the expected location.
+    run_cmd.step.dependOn(b.getInstallStep());
+
+    // This allows the user to pass arguments to the application in the build
+    // command itself, like this: `zig build run -- arg1 arg2 etc`
+    if (b.args) |args| {
+        run_cmd.addArgs(args);
+    }
+
+    // This creates a build step. It will be visible in the `zig build --help` menu,
+    // and can be selected like this: `zig build run`
+    // This will evaluate the `run` step rather than the default, which is "install".
+    const run_step = b.step("run", "Run the app");
+    run_step.dependOn(&run_cmd.step);
+
+    // Creates a step for unit testing. This only builds the test executable
+    // but does not run it.
+    const lib_unit_tests = b.addTest(.{
+        .root_module = lib_mod,
+    });
+
+    const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
+
+    const exe_unit_tests = b.addTest(.{
+        .root_module = exe_mod,
+    });
+
+    const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
+
+    // Similar to creating the run step earlier, this exposes a `test` step to
+    // the `zig build --help` menu, providing a way for the user to request
+    // running the unit tests.
+    const test_step = b.step("test", "Run unit tests");
+    test_step.dependOn(&run_lib_unit_tests.step);
+    test_step.dependOn(&run_exe_unit_tests.step);
+}
diff --git a/build.zig.zon b/build.zig.zon
new file mode 100644
index 0000000..17813a9
--- /dev/null
+++ b/build.zig.zon
@@ -0,0 +1,86 @@
+.{
+    // This is the default name used by packages depending on this one. For
+    // example, when a user runs `zig fetch --save <url>`, this field is used
+    // as the key in the `dependencies` table. Although the user can choose a
+    // different name, most users will stick with this provided value.
+    //
+    // It is redundant to include "zig" in this name because it is already
+    // within the Zig package namespace.
+    .name = .suzu,
+
+    // This is a [Semantic Version](https://semver.org/).
+    // In a future version of Zig it will be used for package deduplication.
+    .version = "0.0.0",
+
+    // Together with name, this represents a globally unique package
+    // identifier. This field is generated by the Zig toolchain when the
+    // package is first created, and then *never changes*. This allows
+    // unambiguous detection of one package being an updated version of
+    // another.
+    //
+    // When forking a Zig project, this id should be regenerated (delete the
+    // field and run `zig build`) if the upstream project is still maintained.
+    // Otherwise, the fork is *hostile*, attempting to take control over the
+    // original project's identity. Thus it is recommended to leave the comment
+    // on the following line intact, so that it shows up in code reviews that
+    // modify the field.
+    .fingerprint = 0xff4b6e1a9d31d955, // Changing this has security and trust implications.
+
+    // Tracks the earliest Zig version that the package considers to be a
+    // supported use case.
+    .minimum_zig_version = "0.14.0",
+
+    // This field is optional.
+    // Each dependency must either provide a `url` and `hash`, or a `path`.
+    // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
+    // Once all dependencies are fetched, `zig build` no longer requires
+    // internet connectivity.
+    .dependencies = .{
+        // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
+        //.example = .{
+        //    // When updating this field to a new URL, be sure to delete the corresponding
+        //    // `hash`, otherwise you are communicating that you expect to find the old hash at
+        //    // the new URL. If the contents of a URL change this will result in a hash mismatch
+        //    // which will prevent zig from using it.
+        //    .url = "https://example.com/foo.tar.gz",
+        //
+        //    // This is computed from the file contents of the directory of files that is
+        //    // obtained after fetching `url` and applying the inclusion rules given by
+        //    // `paths`.
+        //    //
+        //    // This field is the source of truth; packages do not come from a `url`; they
+        //    // come from a `hash`. `url` is just one of many possible mirrors for how to
+        //    // obtain a package matching this `hash`.
+        //    //
+        //    // Uses the [multihash](https://multiformats.io/multihash/) format.
+        //    .hash = "...",
+        //
+        //    // When this is provided, the package is found in a directory relative to the
+        //    // build root. In this case the package's hash is irrelevant and therefore not
+        //    // computed. This field and `url` are mutually exclusive.
+        //    .path = "foo",
+        //
+        //    // When this is set to `true`, a package is declared to be lazily
+        //    // fetched. This makes the dependency only get fetched if it is
+        //    // actually used.
+        //    .lazy = false,
+        //},
+    },
+
+    // Specifies the set of files and directories that are included in this package.
+    // Only files and directories listed here are included in the `hash` that
+    // is computed for this package. Only files listed here will remain on disk
+    // when using the zig package manager. As a rule of thumb, one should list
+    // files required for compilation plus any license(s).
+    // Paths are relative to the build root. Use the empty string (`""`) to refer to
+    // the build root itself.
+    // A directory listed here means that all files within, recursively, are included.
+    .paths = .{
+        "build.zig",
+        "build.zig.zon",
+        "src",
+        // For example...
+        //"LICENSE",
+        //"README.md",
+    },
+}
diff --git a/src/frontend/token.zig b/src/frontend/token.zig
new file mode 100644
index 0000000..6980f08
--- /dev/null
+++ b/src/frontend/token.zig
@@ -0,0 +1,178 @@
+const std = @import("std");
+
+pub const TokenKind = enum(u24) {
+    name,
+    number,
+    eof,
+    semicolon,
+    invalid,
+    _,
+};
+pub const Token = struct {
+    kind: TokenKind,
+    start: usize,
+    end: usize,
+};
+
+fn is_alpha(c: u8) bool {
+    return switch (c) {
+        'a'...'z', 'A'...'Z', '_' => true,
+        else => false,
+    };
+}
+fn is_numeric(c: u8) bool {
+    return switch (c) {
+        '0'...'9' => true,
+        else => false,
+    };
+}
+fn is_whitespace(c: u8) bool {
+    return switch (c) {
+        ' ', '\t', '\n' => true,
+        else => false,
+    };
+}
+fn is_alphanumeric(c: u8) bool {
+    return is_alpha(c) or is_numeric(c);
+}
+fn is_op(c: u8) bool {
+    return switch (c) {
+        '!', '@', '$', '%', '^', '&', '*', '(', ')', '-', '+', '=', '~', '[', ']', '{', '}', '|', ':', '<', '>', ',', '.', '?', '/' => true,
+        else => false,
+    };
+}
+
+pub fn op_kind(op: []const u8) TokenKind {
+    const v: u24 = switch (op.len) {
+        1 => @intCast(op[0]),
+        2 => @intCast(@as(u16, @bitCast(op[0..2].*))),
+        3 => @bitCast(op[0..3].*),
+        else => @intFromEnum(TokenKind.invalid),
+    };
+    return @enumFromInt(v);
+}
+
+pub const Tokenizer = struct {
+    src: []const u8,
+    i: usize,
+    cache: ?Token,
+    pub fn init(src: []const u8) Tokenizer {
+        return Tokenizer{ .src = src, .i = 0, .cache = null };
+    }
+    fn get_char(self: *Tokenizer) ?u8 {
+        if (self.i < self.src.len) {
+            defer self.i += 1;
+            return self.src[self.i];
+        } else {
+            return null;
+        }
+    }
+    fn peek_char(self: *Tokenizer) ?u8 {
+        return if (self.i < self.src.len) self.src[self.i] else null;
+    }
+    pub fn at_end(self: *Tokenizer) bool {
+        return self.i >= self.src.len;
+    }
+    pub fn peek(self: *Tokenizer) Token {
+        if (self.cache) |c| {
+            return c;
+        }
+        self.cache = self.get_next();
+        return self.cache.?;
+    }
+    pub fn next(self: *Tokenizer) Token {
+        if (self.cache) |c| {
+            self.cache = null;
+            return c;
+        }
+        return self.get_next();
+    }
+    fn get_next(self: *Tokenizer) Token {
+        const State = enum {
+            start,
+            op,
+            name,
+            whitespace,
+            number,
+        };
+        var start = self.i;
+        const kind: TokenKind = st: switch (State.start) {
+            .start => {
+                const c = self.get_char() orelse break :st .eof;
+                if (is_alpha(c))
+                    continue :st .name
+                else if (is_numeric(c))
+                    continue :st .number
+                else if (is_whitespace(c))
+                    continue :st .whitespace
+                else if (is_op(c))
+                    continue :st .op
+                else if (c == ';')
+                    break :st .semicolon
+                else
+                    break :st .invalid;
+            },
+            .op => {
+                while (is_op(self.peek_char() orelse 0)) {
+                    self.i += 1;
+                }
+                break :st op_kind(self.src[start..self.i]);
+            },
+            .name => {
+                while (is_alphanumeric(self.peek_char() orelse 0)) self.i += 1;
+                break :st .name;
+            },
+            .whitespace => {
+                while (is_whitespace(self.peek_char() orelse 0)) self.i += 1;
+                start = self.i;
+                continue :st .start;
+            },
+            .number => {
+                while (is_numeric(self.peek_char() orelse 0)) self.i += 1;
+                break :st .number;
+            },
+        };
+        return .{ .kind = kind, .start = start, .end = self.i };
+    }
+};
+
+fn check_tokenizer(comptime input: []const u8, comptime expected: []const u8) !void {
+    comptime {
+        var tokenizer = Tokenizer.init(input);
+        var i = std.mem.indexOfNone(u8, expected, " \\") orelse unreachable;
+        while (i < expected.len) {
+            const j = std.mem.indexOfNonePos(u8, expected, i, &[_]u8{expected[i]}) orelse expected.len;
+            const k = switch (expected[i]) {
+                'a' => .name,
+                '0' => .number,
+                'o' => op_kind(input[i..j]),
+                ';' => .semicolon,
+                else => unreachable,
+            };
+            try std.testing.expectEqual(Token{ .kind = k, .start = i, .end = j }, tokenizer.next());
+            i = std.mem.indexOfNonePos(u8, expected, j, " \\") orelse expected.len;
+        }
+        try std.testing.expectEqual(Token{ .kind = .eof, .start = expected.len, .end = expected.len }, tokenizer.next());
+    }
+}
+
+test "basic" {
+    const i = "aaa-bb+c-";
+    const e = "aaaoaaoao";
+    try check_tokenizer(i, e);
+}
+test "number" {
+    const i = "1 + 2 + 3";
+    const e = "0 o 0 o 0";
+    try check_tokenizer(i, e);
+}
+test "spacing" {
+    const i = "\n  1 \n\t\t \t +++ 2292929 + 3\t";
+    const e = "\\  0 \\\\\\ \\ ooo 0000000 o 0\\";
+    try check_tokenizer(i, e);
+}
+test "equals" {
+    const i = "a += 2; b -= 3; c = 5;";
+    const e = "a oo 0; a oo 0; a o 0;";
+    try check_tokenizer(i, e);
+}
diff --git a/src/main.zig b/src/main.zig
new file mode 100644
index 0000000..79ec288
--- /dev/null
+++ b/src/main.zig
@@ -0,0 +1,29 @@
+const std = @import("std");
+const lib = @import("suzu_lib");
+
+pub fn main() !void {
+    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+    defer arena.deinit();
+    const allocator = arena.allocator();
+
+    const argv = std.os.argv;
+
+    if (argv.len < 2) {
+        std.debug.print("Usage: {s} <filename>\n", .{argv[0]});
+        return;
+    }
+    const name = std.mem.span(argv[1]);
+    const file = try std.fs.cwd().openFile(name, .{});
+    defer file.close();
+    const content = try file.readToEndAlloc(allocator, std.math.maxInt(usize));
+    defer allocator.free(content);
+
+    var t = lib.token.Tokenizer.init(content);
+    while (!t.at_end()) {
+        std.debug.print("{}\n", .{t.next()});
+    }
+}
+
+test {
+    std.testing.refAllDecls(@This());
+}
diff --git a/src/root.zig b/src/root.zig
new file mode 100644
index 0000000..0a843ee
--- /dev/null
+++ b/src/root.zig
@@ -0,0 +1,6 @@
+const std = @import("std");
+pub const token = @import("frontend/token.zig");
+
+test {
+    std.testing.refAllDecls(@This());
+}