From b7ad73cdb68fbecd3da299a675cca0b05ee38964 Mon Sep 17 00:00:00 2001 From: caandt Date: Thu, 29 May 2025 00:03:28 -0500 Subject: [PATCH] initial commit --- .gitignore | 7 ++ build.zig | 116 +++++++++++++++++++++++++++ build.zig.zon | 86 ++++++++++++++++++++ src/frontend/token.zig | 178 +++++++++++++++++++++++++++++++++++++++++ src/main.zig | 29 +++++++ src/root.zig | 6 ++ 6 files changed, 422 insertions(+) create mode 100644 .gitignore create mode 100644 build.zig create mode 100644 build.zig.zon create mode 100644 src/frontend/token.zig create mode 100644 src/main.zig create mode 100644 src/root.zig diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a57d616 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.zig-cache/ +zig-out/ +/release/ +/debug/ +/build/ +/build-*/ +/docgen_tmp/ diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..84d4954 --- /dev/null +++ b/build.zig @@ -0,0 +1,116 @@ +const std = @import("std"); + +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + + // This creates a "module", which represents a collection of source files alongside + // some compilation options, such as optimization mode and linked system libraries. + // Every executable or library we compile will be based on one or more modules. + const lib_mod = b.createModule(.{ + // `root_source_file` is the Zig "entry point" of the module. If a module + // only contains e.g. external object files, you can make this `null`. + // In this case the main source file is merely a path, however, in more + // complicated build scripts, this could be a generated file. + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + // We will also create a module for our other entry point, 'main.zig'. + const exe_mod = b.createModule(.{ + // `root_source_file` is the Zig "entry point" of the module. If a module + // only contains e.g. external object files, you can make this `null`. + // In this case the main source file is merely a path, however, in more + // complicated build scripts, this could be a generated file. + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + // Modules can depend on one another using the `std.Build.Module.addImport` function. + // This is what allows Zig source code to use `@import("foo")` where 'foo' is not a + // file path. In this case, we set up `exe_mod` to import `lib_mod`. + exe_mod.addImport("suzu_lib", lib_mod); + + // Now, we will create a static library based on the module we created above. + // This creates a `std.Build.Step.Compile`, which is the build step responsible + // for actually invoking the compiler. + const lib = b.addLibrary(.{ + .linkage = .static, + .name = "suzu", + .root_module = lib_mod, + }); + + // This declares intent for the library to be installed into the standard + // location when the user invokes the "install" step (the default step when + // running `zig build`). + b.installArtifact(lib); + + // This creates another `std.Build.Step.Compile`, but this one builds an executable + // rather than a static library. + const exe = b.addExecutable(.{ + .name = "suzu", + .root_module = exe_mod, + }); + + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). + b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + // Creates a step for unit testing. This only builds the test executable + // but does not run it. + const lib_unit_tests = b.addTest(.{ + .root_module = lib_mod, + }); + + const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); + + const exe_unit_tests = b.addTest(.{ + .root_module = exe_mod, + }); + + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_lib_unit_tests.step); + test_step.dependOn(&run_exe_unit_tests.step); +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..17813a9 --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,86 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = .suzu, + + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + + // Together with name, this represents a globally unique package + // identifier. This field is generated by the Zig toolchain when the + // package is first created, and then *never changes*. This allows + // unambiguous detection of one package being an updated version of + // another. + // + // When forking a Zig project, this id should be regenerated (delete the + // field and run `zig build`) if the upstream project is still maintained. + // Otherwise, the fork is *hostile*, attempting to take control over the + // original project's identity. Thus it is recommended to leave the comment + // on the following line intact, so that it shows up in code reviews that + // modify the field. + .fingerprint = 0xff4b6e1a9d31d955, // Changing this has security and trust implications. + + // Tracks the earliest Zig version that the package considers to be a + // supported use case. + .minimum_zig_version = "0.14.0", + + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. If the contents of a URL change this will result in a hash mismatch + // // which will prevent zig from using it. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + // + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/src/frontend/token.zig b/src/frontend/token.zig new file mode 100644 index 0000000..6980f08 --- /dev/null +++ b/src/frontend/token.zig @@ -0,0 +1,178 @@ +const std = @import("std"); + +pub const TokenKind = enum(u24) { + name, + number, + eof, + semicolon, + invalid, + _, +}; +pub const Token = struct { + kind: TokenKind, + start: usize, + end: usize, +}; + +fn is_alpha(c: u8) bool { + return switch (c) { + 'a'...'z', 'A'...'Z', '_' => true, + else => false, + }; +} +fn is_numeric(c: u8) bool { + return switch (c) { + '0'...'9' => true, + else => false, + }; +} +fn is_whitespace(c: u8) bool { + return switch (c) { + ' ', '\t', '\n' => true, + else => false, + }; +} +fn is_alphanumeric(c: u8) bool { + return is_alpha(c) or is_numeric(c); +} +fn is_op(c: u8) bool { + return switch (c) { + '!', '@', '$', '%', '^', '&', '*', '(', ')', '-', '+', '=', '~', '[', ']', '{', '}', '|', ':', '<', '>', ',', '.', '?', '/' => true, + else => false, + }; +} + +pub fn op_kind(op: []const u8) TokenKind { + const v: u24 = switch (op.len) { + 1 => @intCast(op[0]), + 2 => @intCast(@as(u16, @bitCast(op[0..2].*))), + 3 => @bitCast(op[0..3].*), + else => @intFromEnum(TokenKind.invalid), + }; + return @enumFromInt(v); +} + +pub const Tokenizer = struct { + src: []const u8, + i: usize, + cache: ?Token, + pub fn init(src: []const u8) Tokenizer { + return Tokenizer{ .src = src, .i = 0, .cache = null }; + } + fn get_char(self: *Tokenizer) ?u8 { + if (self.i < self.src.len) { + defer self.i += 1; + return self.src[self.i]; + } else { + return null; + } + } + fn peek_char(self: *Tokenizer) ?u8 { + return if (self.i < self.src.len) self.src[self.i] else null; + } + pub fn at_end(self: *Tokenizer) bool { + return self.i >= self.src.len; + } + pub fn peek(self: *Tokenizer) Token { + if (self.cache) |c| { + return c; + } + self.cache = self.get_next(); + return self.cache.?; + } + pub fn next(self: *Tokenizer) Token { + if (self.cache) |c| { + self.cache = null; + return c; + } + return self.get_next(); + } + fn get_next(self: *Tokenizer) Token { + const State = enum { + start, + op, + name, + whitespace, + number, + }; + var start = self.i; + const kind: TokenKind = st: switch (State.start) { + .start => { + const c = self.get_char() orelse break :st .eof; + if (is_alpha(c)) + continue :st .name + else if (is_numeric(c)) + continue :st .number + else if (is_whitespace(c)) + continue :st .whitespace + else if (is_op(c)) + continue :st .op + else if (c == ';') + break :st .semicolon + else + break :st .invalid; + }, + .op => { + while (is_op(self.peek_char() orelse 0)) { + self.i += 1; + } + break :st op_kind(self.src[start..self.i]); + }, + .name => { + while (is_alphanumeric(self.peek_char() orelse 0)) self.i += 1; + break :st .name; + }, + .whitespace => { + while (is_whitespace(self.peek_char() orelse 0)) self.i += 1; + start = self.i; + continue :st .start; + }, + .number => { + while (is_numeric(self.peek_char() orelse 0)) self.i += 1; + break :st .number; + }, + }; + return .{ .kind = kind, .start = start, .end = self.i }; + } +}; + +fn check_tokenizer(comptime input: []const u8, comptime expected: []const u8) !void { + comptime { + var tokenizer = Tokenizer.init(input); + var i = std.mem.indexOfNone(u8, expected, " \\") orelse unreachable; + while (i < expected.len) { + const j = std.mem.indexOfNonePos(u8, expected, i, &[_]u8{expected[i]}) orelse expected.len; + const k = switch (expected[i]) { + 'a' => .name, + '0' => .number, + 'o' => op_kind(input[i..j]), + ';' => .semicolon, + else => unreachable, + }; + try std.testing.expectEqual(Token{ .kind = k, .start = i, .end = j }, tokenizer.next()); + i = std.mem.indexOfNonePos(u8, expected, j, " \\") orelse expected.len; + } + try std.testing.expectEqual(Token{ .kind = .eof, .start = expected.len, .end = expected.len }, tokenizer.next()); + } +} + +test "basic" { + const i = "aaa-bb+c-"; + const e = "aaaoaaoao"; + try check_tokenizer(i, e); +} +test "number" { + const i = "1 + 2 + 3"; + const e = "0 o 0 o 0"; + try check_tokenizer(i, e); +} +test "spacing" { + const i = "\n 1 \n\t\t \t +++ 2292929 + 3\t"; + const e = "\\ 0 \\\\\\ \\ ooo 0000000 o 0\\"; + try check_tokenizer(i, e); +} +test "equals" { + const i = "a += 2; b -= 3; c = 5;"; + const e = "a oo 0; a oo 0; a o 0;"; + try check_tokenizer(i, e); +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..79ec288 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,29 @@ +const std = @import("std"); +const lib = @import("suzu_lib"); + +pub fn main() !void { + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena.deinit(); + const allocator = arena.allocator(); + + const argv = std.os.argv; + + if (argv.len < 2) { + std.debug.print("Usage: {s} \n", .{argv[0]}); + return; + } + const name = std.mem.span(argv[1]); + const file = try std.fs.cwd().openFile(name, .{}); + defer file.close(); + const content = try file.readToEndAlloc(allocator, std.math.maxInt(usize)); + defer allocator.free(content); + + var t = lib.token.Tokenizer.init(content); + while (!t.at_end()) { + std.debug.print("{}\n", .{t.next()}); + } +} + +test { + std.testing.refAllDecls(@This()); +} diff --git a/src/root.zig b/src/root.zig new file mode 100644 index 0000000..0a843ee --- /dev/null +++ b/src/root.zig @@ -0,0 +1,6 @@ +const std = @import("std"); +pub const token = @import("frontend/token.zig"); + +test { + std.testing.refAllDecls(@This()); +}