diff --git a/build.zig b/build.zig index 92f587c..dc68d16 100644 --- a/build.zig +++ b/build.zig @@ -2,7 +2,7 @@ const std = @import("std"); const cases = @import("tests/cases.zig"); -var trace: ?bool = false; +var trace: bool = false; var @"enable-bench": ?bool = false; var backend: TraceBackend = .None; @@ -19,9 +19,9 @@ pub fn build(b: *std.Build) !void { trace = b.option(bool, "trace", \\Enables tracing of the compiler using the default backend (spall) - ); + ) orelse false; - if (trace) |_| { + if (trace) { backend = b.option(TraceBackend, "trace-backend", \\Switch between what backend to use. None is default. ) orelse backend; @@ -40,7 +40,7 @@ pub fn build(b: *std.Build) !void { const exe_options = b.addOptions(); - exe_options.addOption(bool, "trace", trace orelse false); + exe_options.addOption(bool, "trace", trace); exe_options.addOption(TraceBackend, "backend", backend); exe_options.addOption(std.log.Level, "debug_log", debug_log); exe_options.addOption(usize, "src_file_trimlen", std.fs.path.dirname(std.fs.path.dirname(@src().file).?).?.len); diff --git a/demo/print_ast.py b/demo/print_ast.py index 788f781..e69de29 100644 --- a/demo/print_ast.py +++ b/demo/print_ast.py @@ -1,7 +0,0 @@ - -import marshal - -filename = './demo/test.py' -with open(filename, 'r') as f: - bytes = marshal.load(f) - diff --git a/demo/test.py b/demo/test.py index cf58eef..d25d49e 100644 --- a/demo/test.py +++ b/demo/test.py @@ -1,5 +1 @@ -# Set -a = {1, 2, 3, 4, 5} -# a.add(6) - -print(a) \ No newline at end of file +a = 1 \ No newline at end of file diff --git a/src/Manager.zig b/src/Manager.zig index 43bc49e..cef090a 100644 --- a/src/Manager.zig +++ b/src/Manager.zig @@ -7,8 +7,7 @@ const Manager = @This(); const tracer = @import("tracer"); -// const Tokenizer = @import("frontend/tokenizer/Tokenizer.zig"); -// const Parser = @import("frontend/Parser.zig"); +const Ast = @import("frontend/Ast.zig"); const Marshal = @import("compiler/Marshal.zig"); const Vm = @import("vm/Vm.zig"); @@ -31,7 +30,6 @@ pub fn run_pyc(manager: *Manager, file_name: []const u8) !void { // Open source file. const source_file = try std.fs.cwd().openFile(file_name, .{}); - const source_file_size = (try source_file.stat()).size; const source = try source_file.readToEndAllocOptions( @@ -50,34 +48,19 @@ pub fn run_pyc(manager: *Manager, file_name: []const u8) !void { } pub fn run_file(manager: *Manager, file_name: []const u8) !void { - _ = std.ChildProcess.run(.{ - .allocator = manager.allocator, - .argv = &.{ - "python3.10", - "-m", - "py_compile", - file_name, - }, - .cwd = ".", - .expand_arg0 = .expand, - }) catch @panic("failed to side-run python"); - - // This outputs to __pycache__/file_name.cpython-310.pyc - const output_file_name: []const u8 = name: { - const trimmed_name: []const u8 = file_name[0 .. file_name.len - ".py".len]; - const output_file = std.fs.path.basename(trimmed_name); - - log.debug("Trimmed: {s}", .{trimmed_name}); - - const output_dir = std.fs.path.dirname(trimmed_name) orelse @panic("why in root"); - - const output_pyc = try std.fmt.allocPrint(manager.allocator, "{s}/__pycache__/{s}.cpython-310.pyc", .{ output_dir, output_file }); - - break :name output_pyc; - }; + const source_file = try std.fs.cwd().openFile(file_name, .{ .lock = .exclusive }); + defer source_file.close(); + + const source_file_size = (try source_file.stat()).size; - log.debug("File: {s}", .{output_file_name}); + const source = try source_file.readToEndAllocOptions( + manager.allocator, + source_file_size, + source_file_size, + @alignOf(u8), + 0, + ); - // Run python on that. - try manager.run_pyc(output_file_name); + const ast = try Ast.parse(source, manager.allocator); + _ = ast; } diff --git a/src/frontend/Ast.zig b/src/frontend/Ast.zig new file mode 100644 index 0000000..6179055 --- /dev/null +++ b/src/frontend/Ast.zig @@ -0,0 +1,58 @@ +//! Generates an AST given python source code. + +source: [:0]const u8, +tokens: TokenList.Slice, +nodes: NodeList.Slice, + +pub const NodeList = std.MultiArrayList(Parser.Node); +pub const TokenList = std.MultiArrayList(Token); + +pub const TokenIndex = u32; + +pub fn parse(source: [:0]const u8, allocator: Allocator) !Ast { + var tokens: std.MultiArrayList(Token) = .{}; + defer tokens.deinit(allocator); + + var tokenizer = Tokenizer.init(source); + while (true) { + const token = tokenizer.next(); + log.debug("Token: {}", .{token.tag}); + try tokens.append(allocator, .{ + .tag = token.tag, + .start = @as(u32, @intCast(token.loc.start)), + }); + if (token.tag == .eof) break; + } + + var parser = Parser{ + .tokens = tokens, + .token_index = 0, + .allocator = allocator, + .nodes = .{}, + .source = source, + }; + defer parser.tokens.deinit(allocator); + defer parser.nodes.deinit(allocator); + + try parser.parseFile(); + + return Ast{ + .source = source, + .tokens = tokens.toOwnedSlice(), + .nodes = parser.nodes.toOwnedSlice(), + }; +} + +pub const Token = struct { + tag: Tokenizer.Token.Tag, + start: u32, +}; + +const Parser = @import("Parser.zig"); +const Tokenizer = @import("Tokenizer.zig"); + +const log = std.log.scoped(.ast); + +const Ast = @This(); +const std = @import("std"); +const Allocator = std.mem.Allocator; diff --git a/src/frontend/Parser.zig b/src/frontend/Parser.zig new file mode 100644 index 0000000..3cd07e3 --- /dev/null +++ b/src/frontend/Parser.zig @@ -0,0 +1,109 @@ +//! Converts a list of Tokens into an AST + +tokens: Ast.TokenList, +nodes: Ast.NodeList, +allocator: Allocator, +token_index: u32 = 0, + +source: [:0]const u8, + +/// file: [statements] ENDMARKER +pub fn parseFile(p: *Parser) !void { + if (p.tokens.get(p.token_index).tag == .eof) return; + try p.parseStatements(); + _ = p.eatToken(.eof) orelse return error.NotEof; +} + +/// statements: statement+ +fn parseStatements(p: *Parser) !void { + while (p.tokens.get(p.token_index).tag != .eof) { + try p.parseStatement(); + } +} + +/// statement: compound_stmt | simple_stmts +fn parseStatement(p: *Parser) !void { + // TODO: compound_stmt + try p.parseSimpleStatment(); +} + +fn parseSimpleStatment(p: *Parser) !void { + const tag = p.tokens.get(p.token_index).tag; + switch (tag) { + .identifier => { + const next_tag = p.tokens.get(p.token_index + 1).tag; + if (next_tag == .eof) { + @panic("simple statment found eof after ident"); + } + switch (next_tag) { + .assign => try p.parseAssignExpr(), + else => std.debug.panic("TODO: parseSimpleStatment identifier {}", .{next_tag}), + } + }, + else => std.debug.panic("TODO: parseSimpleStatment {}", .{tag}), + } +} + +/// assignment: +/// | NAME ':' expression ['=' annotated_rhs ] +/// | ('(' single_target ')' +/// | single_subscript_attribute_target) ':' expression ['=' annotated_rhs ] +/// | (star_targets '=' )+ (yield_expr | star_expressions) !'=' [TYPE_COMMENT] +/// | single_target augassign ~ (yield_expr | star_expressions) +fn parseAssignExpr(p: *Parser) !void { + const maybe_ident_tok = p.eatToken(.identifier); + if (maybe_ident_tok) |ident_tok| { + _ = ident_tok; + return; + } + + @panic("TODO: parseAssignExpr non-ident"); +} + +fn eatToken(p: *Parser, tag: Tokenizer.Token.Tag) ?Token { + const next_tok = p.nextToken(); + if (next_tok.tag == tag) return next_tok; + return null; +} + +fn nextToken(p: *Parser) Token { + const tok = p.tokens.get(p.token_index); + p.token_index += 1; + return tok; +} + +fn addNode(p: *Parser, elem: Node) Allocator.Error!Node.Index { + const result = @as(Node.Index, @intCast(p.nodes.len)); + try p.nodes.append(p.gpa, elem); + return result; +} + +pub const Node = struct { + tag: Tag, + main_token: Ast.TokenIndex, + data: Data, + + pub const Index = u32; + + pub const Tag = enum(u8) { + root, + /// An assignment. + /// + /// `lhs = rhs`. main_token is the `=`. + assign, + }; + + pub const Data = struct { + lhs: Index, + rhs: Index, + }; +}; + +const Parser = @This(); + +const std = @import("std"); +const Ast = @import("Ast.zig"); +const Tokenizer = @import("Tokenizer.zig"); +const Token = Ast.Token; + +const Allocator = std.mem.Allocator; diff --git a/src/frontend/Tokenizer.zig b/src/frontend/Tokenizer.zig new file mode 100644 index 0000000..73855e2 --- /dev/null +++ b/src/frontend/Tokenizer.zig @@ -0,0 +1,195 @@ +//! Converts Python source code into a list of Tokens + +buffer: [:0]const u8, +index: usize, + +const log = std.log.scoped(.tokenizer); + +pub fn init(buffer: [:0]const u8) Tokenizer { + const src_start: usize = if (std.mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else 0; + return .{ + .buffer = buffer, + .index = src_start, + }; +} + +pub fn next(t: *Tokenizer) Token { + var state: State = .start; + var result: Token = .{ + .tag = .eof, + .loc = .{ + .start = t.index, + .end = undefined, + }, + }; + + while (true) : (t.index += 1) { + const c = t.buffer[t.index]; + log.debug("State: {}", .{state}); + switch (state) { + .start => switch (c) { + 0 => { + if (t.index != t.buffer.len) @panic("eof not at end of file"); + break; + }, + 'a'...'z', 'A'...'Z' => { + state = .identifier; + result.tag = .identifier; + }, + ' ', '\n', '\r' => { + result.loc.start = t.index + 1; + }, + '=' => { + state = .equal_start; + }, + '0'...'9' => { + state = .int; + result.tag = .number_literal; + }, + else => { + result.tag = .invalid; + result.loc.end = t.index; + t.index += 1; + return result; + }, + }, + .identifier => switch (c) { + 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, + else => { + if (Token.getKeyword(t.buffer[result.loc.start..t.index])) |tag| { + result.tag = tag; + } + break; + }, + }, + .int => switch (c) { + '0'...'9' => {}, + else => break, + }, + .equal_start => switch (c) { + '=' => { + result.tag = .equal; + break; + }, + else => { + result.tag = .assign; + break; + }, + }, + } + } + + result.loc.end = t.index; + return result; +} + +const State = enum { + start, + identifier, + equal_start, + int, +}; + +pub const Token = struct { + tag: Tag, + loc: Loc, + + pub const Loc = struct { + start: usize, + end: usize, + }; + + pub const Tag = enum(u8) { + invalid, + eof, + identifier, + number_literal, + + // keywords + keyword_false, + keyword_none, + keyword_true, + keyword_and, + keyword_as, + keyword_assert, + keyword_async, + keyword_await, + keyword_break, + keyword_class, + keyword_continue, + keyword_def, + keyword_del, + keyword_elif, + keyword_else, + keyword_expect, + keyword_finally, + keyword_for, + keyword_from, + keyword_global, + keyword_if, + keyword_import, + keyword_in, + keyword_is, + keyword_lambda, + keyword_nonlocal, + keyword_not, + keyword_or, + keyword_pass, + keyword_raise, + keyword_return, + keyword_try, + keyword_while, + keyword_with, + keyword_yield, + + // operators + equal, + assign, + }; + + pub fn getKeyword(bytes: []const u8) ?Tag { + return keywords.get(bytes); + } + + pub const keywords = std.ComptimeStringMap(Tag, .{ + .{ "False", .keyword_false }, + .{ "None", .keyword_none }, + .{ "True", .keyword_true }, + .{ "class", .keyword_class }, + .{ "from", .keyword_from }, + .{ "or", .keyword_or }, + .{ "continue", .keyword_continue }, + .{ "global", .keyword_global }, + .{ "pass", .keyword_pass }, + .{ "def", .keyword_def }, + .{ "if", .keyword_if }, + .{ "raise", .keyword_raise }, + .{ "and", .keyword_and }, + .{ "del", .keyword_del }, + .{ "import", .keyword_import }, + .{ "return", .keyword_return }, + .{ "as", .keyword_as }, + .{ "elif", .keyword_elif }, + .{ "in", .keyword_in }, + .{ "try", .keyword_try }, + .{ "assert", .keyword_assert }, + .{ "else", .keyword_else }, + .{ "is", .keyword_is }, + .{ "while", .keyword_while }, + .{ "async", .keyword_async }, + .{ "except", .keyword_expect }, + .{ "lambda", .keyword_lambda }, + .{ "with", .keyword_with }, + .{ "await", .keyword_await }, + .{ "finally", .keyword_finally }, + .{ "nonlocal", .keyword_nonlocal }, + .{ "yield", .keyword_yield }, + .{ "break", .keyword_break }, + .{ "for", .keyword_for }, + .{ "not", .keyword_not }, + }); +}; + +const Tokenizer = @This(); +const std = @import("std"); +const assert = std.debug.assert; diff --git a/src/vm/Object.zig b/src/vm/Object.zig index f044842..61dd9ff 100644 --- a/src/vm/Object.zig +++ b/src/vm/Object.zig @@ -28,7 +28,7 @@ payload: ?(*align(blk: { }) anyopaque), pub const Tag = enum(usize) { - const first_payload = @intFromEnum(Tag.none) + 1; + const first_payload = @intFromEnum(Tag.int); // Note: this is the literal None type. none, diff --git a/test.zig b/test.zig new file mode 100644 index 0000000..2a4cd12 --- /dev/null +++ b/test.zig @@ -0,0 +1,7 @@ +const std = @import("std"); + +pub fn main() !void { + const V = @Vector(4, *const @Vector(4, u32)); + const x: V = @splat(&@splat(10)); + std.debug.print("x: {}\n", .{x}); +}