diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..24a5fb7 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,23 @@ +name: test + +on: + push: + branches: + - master + - main + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: erlef/setup-beam@v1 + with: + otp-version: "26.0.2" + gleam-version: "1.0.0-rc2" + rebar3-version: "3" + # elixir-version: "1.15.4" + - run: gleam deps download + - run: gleam test + - run: gleam format --check src test diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..599be4e --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.beam +*.ez +/build +erl_crash.dump diff --git a/README.md b/README.md new file mode 100644 index 0000000..62696d2 --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +# type_inference + +Following the execellent [Type Inference by Example](https://ahnfelt.medium.com/type-inference-by-example-793d83f98382) article by Joakim Ahnfelt-Rønne in Gleam. + +## Development + +```sh +gleam run # Run the project +gleam test # Run the tests +``` diff --git a/gleam.toml b/gleam.toml new file mode 100644 index 0000000..2df21f8 --- /dev/null +++ b/gleam.toml @@ -0,0 +1,20 @@ +name = "type_inference" +target = "javascript" +version = "1.0.0" + +# Fill out these fields if you intend to generate HTML documentation or publish +# your project to the Hex package manager. +# +# description = "" +# licences = ["Apache-2.0"] +# repository = { type = "github", user = "username", repo = "project" } +# links = [{ title = "Website", href = "https://gleam.run" }] +# +# For a full reference of all the available options, you can have a look at +# https://gleam.run/writing-gleam/gleam-toml/. + +[dependencies] +gleam_stdlib = "~> 0.34 or ~> 1.0" + +[dev-dependencies] +gleeunit = "~> 1.0" diff --git a/manifest.toml b/manifest.toml new file mode 100644 index 0000000..1ee80fb --- /dev/null +++ b/manifest.toml @@ -0,0 +1,11 @@ +# This file was generated by Gleam +# You typically do not need to edit this file + +packages = [ + { name = "gleam_stdlib", version = "0.35.1", build_tools = ["gleam"], requirements = [], otp_app = "gleam_stdlib", source = "hex", outer_checksum = "5443EEB74708454B65650FEBBB1EF5175057D1DEC62AEA9D7C6D96F41DA79152" }, + { name = "gleeunit", version = "1.0.2", build_tools = ["gleam"], requirements = ["gleam_stdlib"], otp_app = "gleeunit", source = "hex", outer_checksum = "D364C87AFEB26BDB4FB8A5ABDE67D635DC9FA52D6AB68416044C35B096C6882D" }, +] + +[requirements] +gleam_stdlib = { version = "~> 0.34 or ~> 1.0" } +gleeunit = { version = "~> 1.0" } diff --git a/src/type_inference.gleam b/src/type_inference.gleam new file mode 100644 index 0000000..d9d9a9e --- /dev/null +++ b/src/type_inference.gleam @@ -0,0 +1,264 @@ +import gleam/io +import gleam/list +import gleam/bool +import gleam/dict.{type Dict} +import gleam/result.{try} + +// ---- TYPES ------------------------------------------------------------------ + +/// A Type can be a type constructor or a type variable. +/// +/// - `Int` -> `TConstructor("Int", [])` +/// - `List(Int)` -> `TConstructor("List", [TConstructor("Int", [])])` +/// - `(Int, Int) => Int` -> `TConstructor("Function2", [TConstructor("Int", []), TConstructor("Int", []), TConstructor("Int", [])])` +/// - `$1` -> `TVariable(1)` +type Type { + /// A constructor has a name and list of type parameters. + TConstructor(name: String, generics: List(Type)) + /// A type variable is identified by it's index, which is generated when using + /// the `fresh_type_variable` function. + TVariable(index: Int) +} + +/// The AST for the hypothetical language +type Expression { + ELambda(arg: String, body: Expression) + EApply(expression: Expression, arg: Expression) + EVariable(name: String) +} + +/// A constraint is something we know about types and their relationships +type Constraint { + /// This constraint means the two types must be equal + CEquality(t1: Type, t2: Type) +} + +/// The context holds stuff we've discovered when typchecking. +type Ctx { + Ctx(substitution: Dict(Int, Type), type_constraints: List(Constraint)) +} + +type TypeError { + TypeError(message: String) +} + +// ---- TESTING ---------------------------------------------------------------- + +pub fn main() { + ELambda("x", EApply(EApply(EVariable("+"), EVariable("x")), EVariable("x"))) + |> infer + |> io.debug + Nil +} + +fn initial_environment() -> Dict(String, Type) { + ["+", "-", "*", "/"] + |> list.map(fn(op) { + #( + op, + TConstructor("Function1", [ + TConstructor("Int", []), + TConstructor("Function1", [ + TConstructor("Int", []), + TConstructor("Int", []), + ]), + ]), + ) + }) + |> dict.from_list +} + +fn initial_ctx() -> Ctx { + Ctx(dict.new(), []) +} + +fn infer(expression: Expression) -> Result(Type, TypeError) { + // 1. Infer the type of the expression + use #(inferred, ctx) <- try(infer_type( + expression, + initial_environment(), + initial_ctx(), + )) + // 2. Solve the constraints + use ctx <- try(solve_constraints(ctx)) + // 3. Replace type variables in the inferred type with their substitutions + let result = substitute(inferred, ctx) + Ok(result) +} + +// ---- TYPE INFERENCE --------------------------------------------------------- + +/// Turn an expression into type variables, adding constraints to things we know +/// must be certain types, and inserting variables and functions into the environment +/// when appropriate. +/// +fn infer_type( + expression: Expression, + environment: Dict(String, Type), + ctx: Ctx, +) -> Result(#(Type, Ctx), TypeError) { + case expression { + ELambda(name, body) -> { + let #(arg_type, ctx) = fresh_type_variable(ctx) + let environment2 = dict.insert(environment, name, arg_type) + use #(return_type, ctx) <- try(infer_type(body, environment2, ctx)) + Ok(#(TConstructor("Function1", [arg_type, return_type]), ctx)) + } + + EVariable(name) -> + case dict.get(environment, name) { + Ok(t) -> Ok(#(t, ctx)) + Error(_) -> Error(TypeError("Variable not defined: " <> name)) + } + + EApply(function, arg) -> { + use #(function_type, ctx) <- try(infer_type(function, environment, ctx)) + use #(arg_type, ctx) <- try(infer_type(arg, environment, ctx)) + let #(return_type, ctx) = fresh_type_variable(ctx) + + let constraint = + CEquality( + function_type, + TConstructor("Function1", [arg_type, return_type]), + ) + let ctx = push_constraint(ctx, constraint) + + Ok(#(return_type, ctx)) + } + } +} + +/// Create a "fresh" type variable and add it to the substitution dictionary. +/// +fn fresh_type_variable(ctx: Ctx) -> #(Type, Ctx) { + let index = dict.size(ctx.substitution) + let result = TVariable(index) + #(result, insert_substitution(ctx, index, result)) +} + +/// Add a constraint to the type_constraints list. +/// +fn push_constraint(ctx: Ctx, constraint: Constraint) -> Ctx { + Ctx(..ctx, type_constraints: [constraint, ..ctx.type_constraints]) +} + +// TODO: this function could be easily modified to return multiple type errors +// instead of just the first one. +// +/// "Solve" constraints by going through them and making sure they are true, then +/// clearing the constraints list. +/// +fn solve_constraints(ctx: Ctx) -> Result(Ctx, TypeError) { + use ctx <- try( + list.try_fold(ctx.type_constraints, ctx, fn(ctx, constraint) { + let CEquality(t1, t2) = constraint + unify(t1, t2, ctx) + }), + ) + Ok(Ctx(..ctx, type_constraints: [])) +} + +/// Check that both types (`t1` and `t2`) are equal, filling in any type variables +/// in the substitution dictionary. +/// +fn unify(t1: Type, t2: Type, ctx: Ctx) -> Result(Ctx, TypeError) { + case t1, t2 { + // Both types are type constructors... + TConstructor(name1, generics1), TConstructor(name2, generics2) -> { + // ...and must have the same name and amount of generics. + use <- bool.guard( + when: name1 != name2 || list.length(generics1) != list.length(generics2), + return: Error(TypeError("Type mismatch: " <> name1 <> " and " <> name2)), + ) + // Unify the generics from both type constructors + list.zip(generics1, generics2) + |> list.try_fold(ctx, fn(ctx, generics) { + let #(t1, t2) = generics + unify(t1, t2, ctx) + }) + } + + // If both types are the same type variable, do nothing + TVariable(i), TVariable(j) if i == j -> Ok(ctx) + + // If one of the types is a type variable... + // + // Does it reference itself in the substitution dictionary? + // ↪ no: unify the type variable's substitution with the other type + // ↪ yes: Does it occur in the other type? + // ↪ yes: return an error + // ↪ no: update the substitution dictionary, replacing the type variable's + // substitution with the other type + TVariable(i), _ -> { + use <- does_ref_self(i, ctx, no: unify(_, t2, ctx)) + + case occurs_in(i, t2, ctx) { + True -> Error(TypeError("Usage of recursive type variable")) + False -> Ok(insert_substitution(ctx, i, t2)) + } + } + + // Same thing here, but with the other type + _, TVariable(i) -> { + use <- does_ref_self(i, ctx, no: unify(t1, _, ctx)) + + case occurs_in(i, t1, ctx) { + True -> Error(TypeError("Usage of recursive type variable")) + False -> Ok(insert_substitution(ctx, i, t1)) + } + } + } +} + +/// Returns True if the type variable (referenced by its `index`) is is used in +/// the type `t`. +/// +fn occurs_in(index: Int, t: Type, ctx: Ctx) -> Bool { + case t { + TVariable(i) -> { + use <- does_ref_self(i, ctx, no: occurs_in(index, _, ctx)) + i == index + } + TConstructor(_, generics) -> list.any(generics, occurs_in(index, _, ctx)) + } +} + +/// Detect if a type variable's substitution references itself. If it does, run +/// the `yes` function, otherwise run the `no` function with the type variable's +/// substitution. +/// +fn does_ref_self( + index: Int, + ctx: Ctx, + yes true: fn() -> a, + no false: fn(Type) -> a, +) -> a { + let assert Ok(sub) = dict.get(ctx.substitution, index) + + case sub == TVariable(index) { + True -> true() + False -> false(sub) + } +} + +/// Update or create a type variable in the substitution dictionary with a type. +/// +fn insert_substitution(ctx: Ctx, index: Int, t: Type) -> Ctx { + Ctx(..ctx, substitution: dict.insert(ctx.substitution, index, t)) +} + +/// Recursively replace all type variables in the given type with their substitutions. +/// +fn substitute(t: Type, ctx: Ctx) -> Type { + case t { + TVariable(i) -> { + // If the type variable references itself, we don't want to run substitute + // on it again! That would cause an infinite loop + use <- does_ref_self(i, ctx, no: substitute(_, ctx)) + t + } + + TConstructor(name, generics) -> + TConstructor(name, list.map(generics, substitute(_, ctx))) + } +} diff --git a/test/type_inference_test.gleam b/test/type_inference_test.gleam new file mode 100644 index 0000000..3831e7a --- /dev/null +++ b/test/type_inference_test.gleam @@ -0,0 +1,12 @@ +import gleeunit +import gleeunit/should + +pub fn main() { + gleeunit.main() +} + +// gleeunit test functions end in `_test` +pub fn hello_world_test() { + 1 + |> should.equal(1) +}