check for valid op in tokenizer
This commit is contained in:
parent
e285a6338e
commit
3eb8983028
|
|
@ -37,9 +37,18 @@ pub const Expr = union(enum) {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const Error = error{ InvalidOp, SyntaxError, OutOfMemory };
|
pub const Error = error{ InvalidOp, SyntaxError, OutOfMemory };
|
||||||
const PrefixHandler = struct { precedence: u32, parse: *const fn (*Parser, u32, Token) Error!*const Expr };
|
fn no_prefix(_: *Parser, _: u32, _: Token) Error!*const Expr {
|
||||||
const InfixHandler = struct { precedence: u32, parse: *const fn (*Parser, u32, *const Expr, Token) Error!*const Expr };
|
return Error.InvalidOp;
|
||||||
|
}
|
||||||
|
fn no_infix(_: *Parser, _: u32, _: *const Expr, _: Token) Error!*const Expr {
|
||||||
|
return Error.InvalidOp;
|
||||||
|
}
|
||||||
|
pub const Operator = struct {
|
||||||
|
precedence: u32 = 0,
|
||||||
|
parse_prefix: *const fn(*Parser, u32, Token) Error!*const Expr = no_prefix,
|
||||||
|
parse_infix: *const fn(*Parser, u32, *const Expr, Token) Error!*const Expr = no_infix,
|
||||||
|
};
|
||||||
|
|
||||||
fn prefix_atom(parser: *Parser, _: u32, tok: Token) Error!*const Expr {
|
fn prefix_atom(parser: *Parser, _: u32, tok: Token) Error!*const Expr {
|
||||||
return try parser.make_expr(.{ .atom = tok });
|
return try parser.make_expr(.{ .atom = tok });
|
||||||
|
|
@ -63,18 +72,17 @@ fn infix_binop(parser: *Parser, precedence: u32, lhs: *const Expr, tok: Token) E
|
||||||
pub const Parser = struct {
|
pub const Parser = struct {
|
||||||
tokenizer: token.Tokenizer,
|
tokenizer: token.Tokenizer,
|
||||||
allocator: std.mem.Allocator,
|
allocator: std.mem.Allocator,
|
||||||
prefixes: std.AutoHashMap(TokenKind, PrefixHandler),
|
ops: std.AutoHashMap(TokenKind, Operator),
|
||||||
infixes: std.AutoHashMap(TokenKind, InfixHandler),
|
|
||||||
pub fn parse(self: *Parser, precedence: u32) Error!*const Expr {
|
pub fn parse(self: *Parser, precedence: u32) Error!*const Expr {
|
||||||
const tok = self.tokenizer.next();
|
const tok = self.tokenizer.next();
|
||||||
const prefix = self.prefixes.get(tok.kind) orelse return Error.InvalidOp;
|
const prefix = self.ops.get(tok.kind) orelse return Error.InvalidOp;
|
||||||
var left = try prefix.parse(self, prefix.precedence, tok);
|
var left = try prefix.parse_prefix(self, prefix.precedence, tok);
|
||||||
var infix: InfixHandler = undefined;
|
var infix: Operator = undefined;
|
||||||
while (w: {
|
while (w: {
|
||||||
infix = self.infixes.get(self.tokenizer.peek().kind) orelse break :w false;
|
infix = self.ops.get(self.tokenizer.peek().kind) orelse break :w false;
|
||||||
break :w infix.precedence > precedence;
|
break :w infix.precedence > precedence;
|
||||||
}) {
|
}) {
|
||||||
left = try infix.parse(self, infix.precedence, left, self.tokenizer.next());
|
left = try infix.parse_infix(self, infix.precedence, left, self.tokenizer.next());
|
||||||
}
|
}
|
||||||
return left;
|
return left;
|
||||||
}
|
}
|
||||||
|
|
@ -84,19 +92,20 @@ pub const Parser = struct {
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
fn register_unop(self: *Parser, op: []const u8, precedence: u32) !void {
|
fn register_unop(self: *Parser, op: []const u8, precedence: u32) !void {
|
||||||
try self.prefixes.put(op_kind(op), .{ .precedence = precedence, .parse = prefix_unop });
|
const op_p = try self.ops.getOrPutValue(op_kind(op), .{ .precedence = precedence });
|
||||||
|
op_p.value_ptr.parse_prefix = prefix_unop;
|
||||||
}
|
}
|
||||||
fn register_binop(self: *Parser, op: []const u8, precedence: u32) !void {
|
fn register_binop(self: *Parser, op: []const u8, precedence: u32) !void {
|
||||||
try self.infixes.put(op_kind(op), .{ .precedence = precedence, .parse = infix_binop });
|
const op_p = try self.ops.getOrPutValue(op_kind(op), .{ .precedence = precedence });
|
||||||
|
op_p.value_ptr.parse_infix = infix_binop;
|
||||||
}
|
}
|
||||||
pub fn init(tokenizer: token.Tokenizer, allocator: std.mem.Allocator) !Parser {
|
pub fn init(src: []const u8, allocator: std.mem.Allocator) !Parser {
|
||||||
const prefixes = std.AutoHashMap(TokenKind, PrefixHandler).init(allocator);
|
const ops = std.AutoHashMap(TokenKind, Operator).init(allocator);
|
||||||
const infixes = std.AutoHashMap(TokenKind, InfixHandler).init(allocator);
|
var p = Parser{ .tokenizer = token.Tokenizer.init(src, ops), .allocator = allocator, .ops = ops };
|
||||||
var p = Parser{ .tokenizer = tokenizer, .allocator = allocator, .prefixes = prefixes, .infixes = infixes };
|
|
||||||
|
|
||||||
try p.prefixes.put(TokenKind.name, .{ .precedence = 0, .parse = prefix_atom });
|
try p.ops.put(TokenKind.name, .{ .parse_prefix = prefix_atom });
|
||||||
try p.prefixes.put(TokenKind.number, .{ .precedence = 0, .parse = prefix_atom });
|
try p.ops.put(TokenKind.number, .{ .parse_prefix = prefix_atom });
|
||||||
try p.prefixes.put(op_kind("("), .{ .precedence = 0, .parse = prefix_paren });
|
try p.ops.put(op_kind("("), .{ .parse_prefix = prefix_paren });
|
||||||
try p.register_unop("+", 4);
|
try p.register_unop("+", 4);
|
||||||
try p.register_unop("-", 4);
|
try p.register_unop("-", 4);
|
||||||
|
|
||||||
|
|
@ -107,7 +116,6 @@ pub const Parser = struct {
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
pub fn free(self: *Parser) void {
|
pub fn free(self: *Parser) void {
|
||||||
self.prefixes.clearAndFree();
|
self.ops.clearAndFree();
|
||||||
self.infixes.clearAndFree();
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -61,12 +61,14 @@ pub fn op_kind(op: []const u8) TokenKind {
|
||||||
return @enumFromInt(v);
|
return @enumFromInt(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const Operator = @import("parse.zig").Operator;
|
||||||
pub const Tokenizer = struct {
|
pub const Tokenizer = struct {
|
||||||
src: []const u8,
|
src: []const u8,
|
||||||
i: usize,
|
i: usize = 0,
|
||||||
cache: ?Token,
|
cache: ?Token = null,
|
||||||
pub fn init(src: []const u8) Tokenizer {
|
ops: std.AutoHashMap(TokenKind, Operator),
|
||||||
return Tokenizer{ .src = src, .i = 0, .cache = null };
|
pub fn init(src: []const u8, ops: std.AutoHashMap(TokenKind, Operator)) Tokenizer {
|
||||||
|
return Tokenizer{ .src = src, .ops = ops };
|
||||||
}
|
}
|
||||||
fn get_char(self: *Tokenizer) ?u8 {
|
fn get_char(self: *Tokenizer) ?u8 {
|
||||||
if (self.i < self.src.len) {
|
if (self.i < self.src.len) {
|
||||||
|
|
@ -122,10 +124,25 @@ pub const Tokenizer = struct {
|
||||||
break :st .invalid;
|
break :st .invalid;
|
||||||
},
|
},
|
||||||
.op => {
|
.op => {
|
||||||
while (is_op(self.peek_char() orelse 0)) {
|
const first: u16 = op_map[self.src[start]];
|
||||||
self.i += 1;
|
if (is_op(self.peek_char() orelse 0)) {
|
||||||
|
const second = @as(u16, op_map[self.src[self.i]]) << 5;
|
||||||
|
if (is_op(self.peek_char() orelse 0)) {
|
||||||
|
const third = @as(u16, op_map[self.src[self.i]]) << 10;
|
||||||
|
if (self.ops.contains(@enumFromInt(first + second + third))) {
|
||||||
|
self.i += 2;
|
||||||
|
break :st @enumFromInt(first + second + third);
|
||||||
}
|
}
|
||||||
break :st op_kind(self.src[start..self.i]);
|
}
|
||||||
|
if (self.ops.contains(@enumFromInt(first + second))) {
|
||||||
|
self.i += 1;
|
||||||
|
break :st @enumFromInt(first + second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (self.ops.contains(@enumFromInt(first))) {
|
||||||
|
break :st @enumFromInt(first);
|
||||||
|
}
|
||||||
|
break :st .invalid;
|
||||||
},
|
},
|
||||||
.name => {
|
.name => {
|
||||||
while (is_alphanumeric(self.peek_char() orelse 0)) self.i += 1;
|
while (is_alphanumeric(self.peek_char() orelse 0)) self.i += 1;
|
||||||
|
|
@ -146,8 +163,12 @@ pub const Tokenizer = struct {
|
||||||
};
|
};
|
||||||
|
|
||||||
fn check_tokenizer(comptime input: []const u8, comptime expected: []const u8) !void {
|
fn check_tokenizer(comptime input: []const u8, comptime expected: []const u8) !void {
|
||||||
comptime {
|
var ops = std.AutoHashMap(TokenKind, Operator).init(std.testing.allocator);
|
||||||
var tokenizer = Tokenizer.init(input);
|
defer ops.clearAndFree();
|
||||||
|
for ([_][]const u8{ "(", "+", "-", "*", "/", "+++", "+=", "-=", "=" }) |x| {
|
||||||
|
try ops.put(op_kind(x), .{});
|
||||||
|
}
|
||||||
|
var tokenizer = Tokenizer.init(input, ops);
|
||||||
var i = std.mem.indexOfNone(u8, expected, " \\") orelse unreachable;
|
var i = std.mem.indexOfNone(u8, expected, " \\") orelse unreachable;
|
||||||
while (i < expected.len) {
|
while (i < expected.len) {
|
||||||
const j = std.mem.indexOfNonePos(u8, expected, i, &[_]u8{expected[i]}) orelse expected.len;
|
const j = std.mem.indexOfNonePos(u8, expected, i, &[_]u8{expected[i]}) orelse expected.len;
|
||||||
|
|
@ -162,7 +183,6 @@ fn check_tokenizer(comptime input: []const u8, comptime expected: []const u8) !v
|
||||||
i = std.mem.indexOfNonePos(u8, expected, j, " \\") orelse expected.len;
|
i = std.mem.indexOfNonePos(u8, expected, j, " \\") orelse expected.len;
|
||||||
}
|
}
|
||||||
try std.testing.expectEqual(Token{ .kind = .eof, .start = expected.len, .end = expected.len }, tokenizer.next());
|
try std.testing.expectEqual(Token{ .kind = .eof, .start = expected.len, .end = expected.len }, tokenizer.next());
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
test "basic" {
|
test "basic" {
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,7 @@ pub fn main() !void {
|
||||||
const content = try file.readToEndAlloc(allocator, std.math.maxInt(usize));
|
const content = try file.readToEndAlloc(allocator, std.math.maxInt(usize));
|
||||||
defer allocator.free(content);
|
defer allocator.free(content);
|
||||||
|
|
||||||
const t = lib.token.Tokenizer.init(content);
|
var p = try lib.parse.Parser.init(content, allocator);
|
||||||
var p = try lib.parse.Parser.init(t, allocator);
|
|
||||||
defer p.free();
|
defer p.free();
|
||||||
std.debug.print("{}\n", .{(try p.parse(0)).eval(content)});
|
std.debug.print("{}\n", .{(try p.parse(0)).eval(content)});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue