From dfed719bc0c510851d1fe536e422aaa9acdddf4b Mon Sep 17 00:00:00 2001 From: Lucas Franceschino Date: Mon, 1 Jul 2024 09:11:22 +0200 Subject: [PATCH] doc(engine): ppx_functor_application Fixes #742 --- .../utils/ppx_functor_application/README.md | 125 ++++++++++-------- .../ppx_functor_application.ml | 23 +++- 2 files changed, 90 insertions(+), 58 deletions(-) diff --git a/engine/utils/ppx_functor_application/README.md b/engine/utils/ppx_functor_application/README.md index fb852d344..3a59a7558 100644 --- a/engine/utils/ppx_functor_application/README.md +++ b/engine/utils/ppx_functor_application/README.md @@ -1,65 +1,82 @@ -# `ppx_inline` +# `ppx_functor_application` -Inlines chunks of OCaml AST in place. +## Motivation +The engine consists of numerous phases, implemented as OCaml functors +parametrized over "AST features" (see the book). Two phases can be +binded (sequenced) via `Phase_utils.BindPhase` functor. -Rewrite `[%%inline_defs L]`, `[%%inline_arms L]`, `[%%inline_body PATH]` inside nodes `[%%inlined_contents NODE]`, where: - - `L` is a (`+`/`-`-separated) list of path specifying which chunk of AST we should inline; - - `PATH` is a `.`-separated list of strings, possibly containing the `*` glob. +Since OCaml define (or let users define) infix notations for functor +application, combining many phases (functors) results in the following +christmas tree looking kinds of code: -## Example: -File `some_module.ml`: ```ocaml -let f x = x + 1 -let g x = x + 2 -let f' x = x + 3 - -module M = struct - let w = 0 - let x = 1 - let y = 2 - let z = 3 +struct + module ARG0 = (Phases.Reject.RawOrMutPointer)(Features.Rust) + module ARG1 = (Phases.Transform_hax_lib_inline)(ARG0.FB) + module ARG2 = (Phases.Specialize)(ARG1.FB) + module ARG3 = (Phases.Drop_sized_trait)(ARG2.FB) + module ARG4 = (Phases.Simplify_question_marks)(ARG3.FB) + module ARG5 = (Phases.And_mut_defsite)(ARG4.FB) + module ARG6 = (Phases.Reconstruct_for_loops)(ARG5.FB) + module ARG7 = (Phases.Reconstruct_while_loops)(ARG6.FB) + module ARG8 = (Phases.Direct_and_mut)(ARG7.FB) + module ARG9 = (Phases.Reject.Arbitrary_lhs)(ARG8.FB) + module ARG10 = (Phases.Drop_blocks)(ARG9.FB) + module ARG11 = (Phases.Drop_references)(ARG10.FB) + module ARG12 = (Phases.Trivialize_assign_lhs)(ARG11.FB) + module ARG13 = (Side_effect_utils.Hoist)(ARG12.FB) + module ARG14 = (Phases.Simplify_match_return)(ARG13.FB) + module ARG15 = (Phases.Drop_needless_returns)(ARG14.FB) + module ARG16 = (Phases.Local_mutation)(ARG15.FB) + module ARG17 = (Phases.Reject.Continue)(ARG16.FB) + module ARG18 = (Phases.Cf_into_monads)(ARG17.FB) + module ARG19 = (Phases.Reject.EarlyExit)(ARG18.FB) + module ARG20 = (Phases.Functionalize_loops)(ARG19.FB) + module ARG21 = (Phases.Reject.As_pattern)(ARG20.FB) + module ARG22 = (Phases.Traits_specs)(ARG21.FB) + module ARG23 = (Phases.Simplify_hoisting)(ARG22.FB) + module ARG24 = (Phases.Newtype_as_refinement)(ARG23.FB) + module ARG25 = (SubtypeToInputLanguage)(ARG24.FB) + module ARG26 = (Identity)(ARG25.FB) + include + ((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(ARG0))(ARG1)))(ARG2)))(ARG3)))(ARG4)))(ARG5)))(ARG6)))(ARG7)))(ARG8)))(ARG9)))(ARG10)))(ARG11)))(ARG12)))(ARG13)))(ARG14)))(ARG15)))(ARG16)))(ARG17)))(ARG18)))(ARG19)))(ARG20)))(ARG21)))(ARG22)))(ARG23)))(ARG24)))(ARG25)))(ARG26) end - -let h x = "" -type foo = | A | B -let i (x: foo) = - match x with - | A -> 0 - | B -> 1 ``` -The module: -```ocaml -module%inlined_contents [@@add "some_module.ml"] Test = struct - [%%inline_defs f + g + foo] - [%%inline_defs "M.*" - z - y] - - let h: int -> string = [%%inline_body h] - let i: foo -> int = - match i with - | [%%inline_arms "i.*" - B] -> dummy - | B -> 123 -end -``` +The system of phases is supposed to let backends opt-in or out easily +for phases. This syntactic limitation was a major issue for that. + +## Solution +This PPX defines a small DSL that embeds in the OCaml syntax of +expressions to provide a nice way of binding phases functors via a +`|>` infix operator. -Will be rewritten into: +Example: ```ocaml -module%inlined_contents [@@add "some_module.ml"] Test = struct - - (* [%%inline_defs f + g + foo] *) - let f x = x + 1 - let g x = x + 2 - type foo = | A | B - - (* [%%inline_defs "M.*" - z - y] *) - let w = 0 - let x = 1 - - let h: int -> string = (fun x -> "") - let i: foo -> int = - match i with - | A -> 0 - | B -> 123 -end +module TransformToInputLanguage = + [%functor_application + Phases.Reject.RawOrMutPointer(Features.Rust) + |> Phases.Transform_hax_lib_inline + |> Phases.Specialize + |> Phases.Drop_sized_trait + |> Phases.Simplify_question_marks + |> Phases.And_mut_defsite + |> Phases.Reconstruct_for_loops + |> Phases.Reconstruct_while_loops + |> SubtypeToInputLanguage + |> Identity + ] + [@ocamlformat "disable"] ``` +Note: the `[@ocamlformat "disable"]` annotation is important, +otherwise `ocamlformat` tries to format those PPX invokations with its +rules for expressions, yielding rather ugly looking code... + +### Syntax + - `Name`: a module `Name` + - `Name(X, Y, Z)`: the application of the functor `Name` with three arguments `X`, `Y` and `Z` + - `(module )`: the arbitary OCaml module expression `` + - ` `: the application of the module described by `` and the module described by `` + - `(fun X -> )`: a "functor" from `X` to `` + - ` |> `: `` binded with `` diff --git a/engine/utils/ppx_functor_application/ppx_functor_application.ml b/engine/utils/ppx_functor_application/ppx_functor_application.ml index c49ff7bd9..f53344de7 100644 --- a/engine/utils/ppx_functor_application/ppx_functor_application.ml +++ b/engine/utils/ppx_functor_application/ppx_functor_application.ml @@ -30,6 +30,7 @@ let show_module_expr = string_of_module_expr let pp_module_expr (fmt : Format.formatter) (s : module_expr) : unit = Format.pp_print_string fmt @@ string_of_module_expr s +(** Defines a DSL for functor application. *) type module_dsl = | Var of longident | App of module_dsl * module_dsl @@ -41,6 +42,7 @@ type module_dsl = let var_of_string s = Var (Longident.Lident s) +(** Elaborate a OCaml module expression from a `module_dsl` *) let rec elab ~loc (t : module_dsl) : module_expr = let (module E) = Ast_builder.make loc in let h = elab ~loc in @@ -92,6 +94,7 @@ let rec collect_pipes (t : module_dsl) : module_dsl list = | Meta (Pipe l, _) | Pipe l -> List.concat_map ~f:collect_pipes l | _ -> [ t ] +(** Get rid of extra `Pipe` nodes *) let rec normalize (t : module_dsl) : module_dsl = match t with | App (f, x) -> App (normalize f, normalize x) @@ -104,16 +107,28 @@ let rec normalize (t : module_dsl) : module_dsl = | [ t ] -> t | l -> Pipe l) +(** Recognize a small language embedded in OCaml syntax for applying +functors in chain. *) let rec parse expr = let r = match expr with - | { pexp_desc = Pexp_construct ({ txt; _ }, None); _ } -> Var txt + | { pexp_desc = Pexp_construct ({ txt; _ }, None); _ } -> + (* Parses variables (module names are uppercase, since we are looking at OCaml expressions, so we match on constructors) *) + Var txt | { pexp_desc = Pexp_construct ({ txt; _ }, Some arg); _ } -> + (* Parses module applcations (same as above: in expressions, module applications are parsed as constructor applications) *) App (Var txt, parse arg) - | [%expr [%e? m1] |> [%e? m2]] -> Pipe [ parse m1; parse m2 ] - | [%expr (module [%m? m])] -> ModExpr m - | [%expr [%e? f] [%e? x]] -> App (parse f, parse x) + | [%expr [%e? m1] |> [%e? m2]] -> + (* Parses `... |> ...` infix module application *) + Pipe [ parse m1; parse m2 ] + | [%expr (module [%m? m])] -> + (* Parses module expressions (in this case, that corresponds to OCaml module expression) *) + ModExpr m + | [%expr [%e? f] [%e? x]] -> + (* Parses module applications (e.g. `(fun x -> ...) (module YXZ)`) *) + App (parse f, parse x) | [%expr fun [%p? x] -> [%e? body]] -> ( + (* Parses module abstractions (e.g. `fun X -> Z(X)`) *) match x with | { ppat_desc = Ppat_construct ({ txt = Lident x; _ }, None); _ } -> Abs (x, parse body)