diff --git a/lib/Ast.ml b/lib/Ast.ml index 81a6099d77..bf9437564d 100644 --- a/lib/Ast.ml +++ b/lib/Ast.ml @@ -12,7 +12,8 @@ (** Abstract syntax tree term *) open Migrate_ast -open Extended_ast +open Ocamlformat_parser_extended +open Parsetree type cmt_checker = { cmts_before: Location.t -> bool diff --git a/lib/Ast.mli b/lib/Ast.mli index a79351bc28..fc1e7d6253 100644 --- a/lib/Ast.mli +++ b/lib/Ast.mli @@ -12,7 +12,8 @@ (** Abstract syntax tree terms *) open Migrate_ast -open Extended_ast +open Ocamlformat_parser_extended +open Parsetree val init : Conf.t -> unit (** Initialize internal state *) diff --git a/lib/Cmts.ml b/lib/Cmts.ml index 955f369dda..e97ec93251 100644 --- a/lib/Cmts.ml +++ b/lib/Cmts.ml @@ -11,6 +11,7 @@ (** Placing and formatting comments in a parsetree. *) +open Ocamlformat_parser_extended open Migrate_ast type layout_cache_key = @@ -63,7 +64,13 @@ type t = ; mutable cmts_within: Cmt.t Multimap.M(Location).t ; source: Source.t ; mutable remaining: Set.M(Location).t - ; layout_cache: Layout_cache.t } + ; layout_cache: Layout_cache.t + ; original: Cmt.t list + (** All dedup'd comments, kept around so they can be retrieved + after the maps above have been consumed by formatting. *) + } + +let source t = t.source let copy { debug @@ -72,14 +79,16 @@ let copy ; cmts_within ; source ; remaining - ; layout_cache } = + ; layout_cache + ; original } = { debug ; cmts_before ; cmts_after ; cmts_within ; source ; remaining - ; layout_cache } + ; layout_cache + ; original } let restore src ~into = into.cmts_before <- src.cmts_before ; @@ -337,7 +346,7 @@ let relocate_pattern_matching_cmts (t : t) src tok ~whole_loc ~matched_loc = relocate_cmts_before t ~src:matched_loc ~sep:kwd_loc ~dst:whole_loc let relocate_ext_cmts (t : t) src (_pre, pld) ~whole_loc = - let open Extended_ast in + let open Parsetree in match pld with | PStr [ { pstr_desc= @@ -361,7 +370,7 @@ let relocate_ext_cmts (t : t) src (_pre, pld) ~whole_loc = | _ -> () let relocate_wrongfully_attached_cmts t src exp = - let open Extended_ast in + let open Parsetree in match exp.pexp_desc with | Pexp_match (e0, _, _) -> relocate_pattern_matching_cmts t src Parser.MATCH @@ -372,8 +381,38 @@ let relocate_wrongfully_attached_cmts t src exp = | Pexp_extension ext -> relocate_ext_cmts t src ext ~whole_loc:exp.pexp_loc | _ -> () +let all_comments t = t.original + +(** Drop comments that are already represented as docstring attributes in + the AST (so they don't get double-printed). *) +let dedup_cmts ~traverse ast comments = + let docs = ref (Set.empty (module Cmt)) in + let attribute m atr = + let open Parsetree in + match atr with + | { attr_payload= + PStr + [ { pstr_desc= + Pstr_eval + ( { pexp_desc= + Pexp_constant + {pconst_desc= Pconst_string (doc, _, None); _} + ; pexp_loc + ; _ } + , [] ) + ; _ } ] + ; _ } + when Ast.Attr.is_doc atr -> + docs := Set.add !docs (Cmt.create_docstring doc pexp_loc) ; + atr + | _ -> Ast_mapper.default_mapper.attribute m atr + in + traverse {Ast_mapper.default_mapper with attribute} ast |> ignore ; + Set.(to_list (diff (of_list (module Cmt) comments) !docs)) + (** Initialize global state and place comments. *) -let init fragment ~debug source asts comments_n_docstrings = +let init ~debug ~source ~ast ~comments ~traverse ~print_ast = + let comments = dedup_cmts ~traverse ast comments in let t = { debug ; cmts_before= Map.empty (module Location) @@ -381,13 +420,11 @@ let init fragment ~debug source asts comments_n_docstrings = ; cmts_within= Map.empty (module Location) ; source ; remaining= Set.empty (module Location) - ; layout_cache= Layout_cache.create () } - in - let comments = - Normalize_extended_ast.dedup_cmts fragment asts comments_n_docstrings + ; layout_cache= Layout_cache.create () + ; original= comments } in if not (List.is_empty comments) then ( - let loc_tree, locs = Loc_tree.of_ast fragment asts in + let loc_tree, locs = Loc_tree.of_ast ~traverse ast in if debug then List.iter locs ~f:(fun loc -> if not (Location.compare loc Location.none = 0) then @@ -408,8 +445,7 @@ let init fragment ~debug source asts comments_n_docstrings = ; after= get_cmts `After } in Printast.cmts := Some cmts ; - Format.eprintf "AST:\n%a\n%!" (Extended_ast.Printast.ast fragment) asts - ) ) ; + Format.eprintf "AST:\n%a\n%!" print_ast ast ) ) ; t let preserve_nomemo f t = diff --git a/lib/Cmts.mli b/lib/Cmts.mli index a5c04848fd..b7b2f214c8 100644 --- a/lib/Cmts.mli +++ b/lib/Cmts.mli @@ -25,11 +25,35 @@ type t +val source : t -> Source.t +(** The [Source.t] used to place comments. *) + +val copy : t -> t +(** Deep-copy the placement state so it can be consumed independently. *) + val init : - 'a Extended_ast.t -> debug:bool -> Source.t -> 'a -> Cmt.t list -> t -(** [init fragment source x comments] associates each comment in [comments] - with a source location appearing in [x]. It uses [Source] to help resolve - ambiguities. Initializes the state used by the [fmt] functions. *) + debug:bool + -> source:Source.t + -> ast:'a + -> comments:Cmt.t list + -> traverse:(Ocamlformat_parser_extended.Ast_mapper.mapper -> 'a -> 'a) + -> print_ast:(Format.formatter -> 'a -> unit) + -> t +(** Associate each [comment] with a source location appearing in [ast]. Uses + [traverse] to walk the AST. [print_ast] is only used in debug mode. + Initializes the state used by the [fmt] functions. *) + +val dedup_cmts : + traverse:(Ocamlformat_parser_extended.Ast_mapper.mapper -> 'a -> 'a) + -> 'a + -> Cmt.t list + -> Cmt.t list +(** Drop comments that are already represented as docstring attributes in + the AST (so they don't get double-printed). *) + +val all_comments : t -> Cmt.t list +(** All comments associated with this state — placed (before/within/after) + plus any not-yet-formatted ones. *) val relocate : t -> src:Location.t -> before:Location.t -> after:Location.t -> unit @@ -38,7 +62,7 @@ val relocate : [after]. *) val relocate_wrongfully_attached_cmts : - t -> Source.t -> Extended_ast.expression -> unit + t -> Source.t -> Ocamlformat_parser_extended.Parsetree.expression -> unit (** [relocate_wrongfully_attached_cmts] relocates wrongfully attached comments, e.g. comments that should be attached to the whole pattern-matching expressions ([match-with] or [try-with] expressions) but diff --git a/lib/Extended_ast.ml b/lib/Extended_ast.ml index 42c62d77cf..9dd1620436 100644 --- a/lib/Extended_ast.ml +++ b/lib/Extended_ast.ml @@ -12,362 +12,436 @@ open Ocamlformat_parser_extended include Parsetree -let equal_core_type : core_type -> core_type -> bool = Poly.equal - type use_file = toplevel_phrase list type repl_file = repl_phrase list +module Std_parsetree = Ocamlformat_parser_standard.Parsetree + +(** Internal kind tag GADT, used for parse-time dispatch and to recover the + constructor of a parsed [t] without unpacking its data. *) +module Kind = struct + type 'a t = + | Structure : structure t + | Signature : signature t + | Use_file : use_file t + | Core_type : core_type t + | Module_type : module_type t + | Expression : expression t + | Pattern : pattern t + | Repl_file : repl_file t + | Documentation : Ocamlformat_odoc_parser.Ast.t t + + type any = Any : 'a t -> any [@@unboxed] + + let of_syntax = function + | Syntax.Structure -> Any Structure + | Signature -> Any Signature + | Use_file -> Any Use_file + | Core_type -> Any Core_type + | Module_type -> Any Module_type + | Expression -> Any Expression + | Pattern -> Any Pattern + | Repl_file -> Any Repl_file + | Documentation -> Any Documentation +end + type 'a t = - | Structure : structure t - | Signature : signature t - | Use_file : use_file t - | Core_type : core_type t - | Module_type : module_type t - | Expression : expression t - | Pattern : pattern t - | Repl_file : repl_file t - | Documentation : Ocamlformat_odoc_parser.Ast.t t + | Structure : + {ast: structure; std: Std_parsetree.structure; cmts: Cmts.t} + -> structure t + | Signature : + {ast: signature; std: Std_parsetree.signature; cmts: Cmts.t} + -> signature t + | Use_file : + { ast: use_file + ; std: Std_parsetree.toplevel_phrase list + ; prefix: string + ; cmts: Cmts.t } + -> use_file t + | Core_type : + {ast: core_type; std: Std_parsetree.core_type; cmts: Cmts.t} + -> core_type t + | Module_type : + {ast: module_type; std: Std_parsetree.module_type; cmts: Cmts.t} + -> module_type t + | Expression : + {ast: expression; std: Std_parsetree.expression; cmts: Cmts.t} + -> expression t + | Pattern : + {ast: pattern; std: Std_parsetree.pattern; cmts: Cmts.t} + -> pattern t + | Repl_file : {ast: repl_file; cmts: Cmts.t} -> repl_file t + | Documentation : + Ocamlformat_odoc_parser.Ast.t + -> Ocamlformat_odoc_parser.Ast.t t type any_t = Any : 'a t -> any_t [@@unboxed] -let of_syntax = function - | Syntax.Structure -> Any Structure - | Signature -> Any Signature - | Use_file -> Any Use_file - | Core_type -> Any Core_type - | Module_type -> Any Module_type - | Expression -> Any Expression - | Pattern -> Any Pattern - | Repl_file -> Any Repl_file - | Documentation -> Any Documentation - -let equal (type a) (_ : a t) : a -> a -> bool = Poly.equal - -let map (type a) (x : a t) (m : Ast_mapper.mapper) : a -> a = - match x with - | Structure -> m.structure m - | Signature -> m.signature m - | Use_file -> List.map ~f:(m.toplevel_phrase m) - | Core_type -> m.typ m - | Module_type -> m.module_type m - | Expression -> m.expr m - | Pattern -> m.pat m - | Repl_file -> List.map ~f:(m.repl_phrase m) - | Documentation -> Fn.id - -module Parse = struct - let normalize_mapper ~ocaml_version ~preserve_beginend ~prefer_let_puns = - let open Asttypes in - let open Ast_mapper in - let enable_short_field_annot = - Ocaml_version.compare ocaml_version Ocaml_version.Releases.v4_03_0 >= 0 - in - let record_field m (f, t, v) = - match (t, v) with - (* [{ x = x }] -> [{ x }] *) - | ( _ - , Some {pexp_desc= Pexp_ident {txt= v_txt; _}; pexp_attributes= []; _} - ) - when Std_longident.field_alias ~field:f.txt v_txt -> - (f, t, None) - (* [{ x = (x : t) }] -> [{ x : t }] *) - | ( None - , Some - { pexp_desc= - Pexp_constraint - ( { pexp_desc= Pexp_ident {txt= v_txt; _} - ; pexp_attributes= [] - ; _ } - , t1 ) - ; pexp_attributes= [] - ; _ } ) - when enable_short_field_annot - && Std_longident.field_alias ~field:f.txt v_txt -> - (f, Some (Pconstraint t1), None) - (* [{ x :> t = (x : t) }] -> [{ x : t :> t }] *) - | ( Some (Pcoerce (None, t2)) - , Some - { pexp_desc= - Pexp_constraint - ( { pexp_desc= Pexp_ident {txt= v_txt; _} - ; pexp_attributes= [] - ; _ } - , t1 ) - ; pexp_attributes= [] - ; _ } ) - when enable_short_field_annot - && Std_longident.field_alias ~field:f.txt v_txt -> - (f, Some (Pcoerce (Some t1, t2)), None) - (* [{ x = (x :> t) }] -> [{ x :> t }] *) - (* [{ x = (x : t :> t) }] -> [{ x : t :> t }] *) - | ( None - , Some - { pexp_desc= - Pexp_coerce - ( { pexp_desc= Pexp_ident {txt= v_txt; _} - ; pexp_attributes= [] - ; _ } - , t1 - , t2 ) - ; pexp_attributes= [] - ; _ } ) - when enable_short_field_annot - && Std_longident.field_alias ~field:f.txt v_txt -> - (f, Some (Pcoerce (t1, t2)), None) - (* [{ x : t = (x :> t) }] -> [{ x : t :> t }] *) - | ( Some (Pconstraint t1) - , Some - { pexp_desc= - Pexp_coerce - ( { pexp_desc= Pexp_ident {txt= v_txt; _} - ; pexp_attributes= [] - ; _ } - , None - , t2 ) - ; pexp_attributes= [] - ; _ } ) - when enable_short_field_annot - && Std_longident.field_alias ~field:f.txt v_txt -> - (f, Some (Pcoerce (Some t1, t2)), None) - | _ -> (f, t, Option.map ~f:(m.expr m) v) - in - let pat_record_field m (f, t, v) = - match (t, v) with - (* [{ x = x }] -> [{ x }] *) - | _, Some {ppat_desc= Ppat_var {txt= v_txt; _}; ppat_attributes= []; _} - when Std_longident.field_alias ~field:f.txt (Lident v_txt) -> - (f, t, None) - (* [{ x = (x : t) }] -> [{ x : t}] *) - | ( None - , Some - { ppat_desc= - Ppat_constraint - ( { ppat_desc= Ppat_var {txt= v_txt; _} - ; ppat_attributes= [] - ; _ } - , t ) - ; ppat_attributes= [] - ; _ } ) - when enable_short_field_annot - && Std_longident.field_alias ~field:f.txt (Lident v_txt) -> - (f, Some t, None) - | _ -> (f, t, Option.map ~f:(m.pat m) v) - in - let map_labeled_tuple_element m f = function - | Lte_simple lte -> f m lte - | (Lte_constrained_pun _ | Lte_pun _) as x -> x - in - let pat_tuple_elt m te = - match (te.lte_label, te.lte_elt) with - (* [ ~x:x ] -> [ ~x ] *) - | ( Some lbl - , {ppat_desc= Ppat_var {txt= v_txt; _}; ppat_attributes= []; _} ) - when String.equal lbl.txt v_txt -> - Lte_pun lbl - (* [~x:(x : t)] -> [ ~(x : t)] *) - | ( Some lbl - , { ppat_desc= - Ppat_constraint - ( { ppat_desc= Ppat_var {txt= v_txt; _} - ; ppat_attributes= [] +let ast (type a) (t : a t) : a = + match t with + | Structure {ast; _} -> ast + | Signature {ast; _} -> ast + | Use_file {ast; _} -> ast + | Core_type {ast; _} -> ast + | Module_type {ast; _} -> ast + | Expression {ast; _} -> ast + | Pattern {ast; _} -> ast + | Repl_file {ast; _} -> ast + | Documentation ast -> ast + +let traverse (type a) (t : a t) : Ast_mapper.mapper -> a -> a = + match t with + | Structure _ -> fun m -> m.structure m + | Signature _ -> fun m -> m.signature m + | Use_file _ -> fun m -> List.map ~f:(m.toplevel_phrase m) + | Core_type _ -> fun m -> m.typ m + | Module_type _ -> fun m -> m.module_type m + | Expression _ -> fun m -> m.expr m + | Pattern _ -> fun m -> m.pat m + | Repl_file _ -> fun m -> List.map ~f:(m.repl_phrase m) + | Documentation _ -> fun _ x -> x + +let cmts (type a) (t : a t) : Cmts.t option = + match t with + | Structure {cmts; _} + |Signature {cmts; _} + |Use_file {cmts; _} + |Core_type {cmts; _} + |Module_type {cmts; _} + |Expression {cmts; _} + |Pattern {cmts; _} + |Repl_file {cmts; _} -> + Some cmts + | Documentation _ -> None + +let copy_cmts (type a) (t : a t) : a t = + match t with + | Structure r -> Structure {r with cmts= Cmts.copy r.cmts} + | Signature r -> Signature {r with cmts= Cmts.copy r.cmts} + | Use_file r -> Use_file {r with cmts= Cmts.copy r.cmts} + | Core_type r -> Core_type {r with cmts= Cmts.copy r.cmts} + | Module_type r -> Module_type {r with cmts= Cmts.copy r.cmts} + | Expression r -> Expression {r with cmts= Cmts.copy r.cmts} + | Pattern r -> Pattern {r with cmts= Cmts.copy r.cmts} + | Repl_file r -> Repl_file {r with cmts= Cmts.copy r.cmts} + | Documentation _ as t -> t + +let kind_of (type a) (t : a t) : a Kind.t = + match t with + | Structure _ -> Structure + | Signature _ -> Signature + | Use_file _ -> Use_file + | Core_type _ -> Core_type + | Module_type _ -> Module_type + | Expression _ -> Expression + | Pattern _ -> Pattern + | Repl_file _ -> Repl_file + | Documentation _ -> Documentation + +(** Build a lexbuf for [source]. For [Use_file], also consumes the [#!] + shebang line if present (so subsequent parsing/tokenizing has line + numbers relative to the original source) and returns it as [prefix]. *) +let prepare_lexbuf (type a) (fg : a Kind.t) ~input_name source = + let lexbuf = Lexing.from_string source in + Location.init_info lexbuf input_name ; + let prefix = + match fg with + | Kind.Use_file -> + Lexer.skip_hash_bang lexbuf ; + String.sub source ~pos:0 ~len:lexbuf.lex_last_pos + | _ -> "" + in + (lexbuf, prefix) + +let map (type a) (m : Ast_mapper.mapper) (t : a t) : a t = + match t with + | Structure r -> Structure {r with ast= m.structure m r.ast} + | Signature r -> Signature {r with ast= m.signature m r.ast} + | Use_file r -> + Use_file {r with ast= List.map ~f:(m.toplevel_phrase m) r.ast} + | Core_type r -> Core_type {r with ast= m.typ m r.ast} + | Module_type r -> Module_type {r with ast= m.module_type m r.ast} + | Expression r -> Expression {r with ast= m.expr m r.ast} + | Pattern r -> Pattern {r with ast= m.pat m r.ast} + | Repl_file r -> Repl_file {r with ast= List.map ~f:(m.repl_phrase m) r.ast} + | Documentation _ as t -> t + +let normalize_mapper ~ocaml_version ~preserve_beginend ~prefer_let_puns = + let open Asttypes in + let open Ast_mapper in + let enable_short_field_annot = + Ocaml_version.compare ocaml_version Ocaml_version.Releases.v4_03_0 >= 0 + in + let record_field m (f, t, v) = + match (t, v) with + (* [{ x = x }] -> [{ x }] *) + | _, Some {pexp_desc= Pexp_ident {txt= v_txt; _}; pexp_attributes= []; _} + when Std_longident.field_alias ~field:f.txt v_txt -> + (f, t, None) + (* [{ x = (x : t) }] -> [{ x : t }] *) + | ( None + , Some + { pexp_desc= + Pexp_constraint + ( { pexp_desc= Pexp_ident {txt= v_txt; _} + ; pexp_attributes= [] ; _ } - , t ) - ; ppat_attributes= [] - ; ppat_loc - ; _ } ) - when String.equal lbl.txt v_txt -> - Lte_constrained_pun - { loc= {lbl.loc with loc_end= ppat_loc.loc_end} - ; label= lbl - ; type_constraint= t } - | lte_label, pat -> Lte_simple {lte_label; lte_elt= m.pat m pat} - in - let pat_tuple_elt m lte = - map_labeled_tuple_element m pat_tuple_elt lte - in - let exp_tuple_elt m te = - match (te.lte_label, te.lte_elt) with - (* [ ~x:x ] -> [ ~x ] *) - | ( Some lbl - , { pexp_desc= Pexp_ident {txt= Lident v_txt; _} + , t1 ) ; pexp_attributes= [] ; _ } ) - when String.equal lbl.txt v_txt -> - Lte_pun lbl - (* [~x:(x : t)] -> [ ~(x : t)] *) - | ( Some lbl - , { pexp_desc= + when enable_short_field_annot + && Std_longident.field_alias ~field:f.txt v_txt -> + (f, Some (Pconstraint t1), None) + (* [{ x :> t = (x : t) }] -> [{ x : t :> t }] *) + | ( Some (Pcoerce (None, t2)) + , Some + { pexp_desc= Pexp_constraint - ( { pexp_desc= Pexp_ident {txt= Lident v_txt; _} + ( { pexp_desc= Pexp_ident {txt= v_txt; _} ; pexp_attributes= [] ; _ } - , t ) + , t1 ) ; pexp_attributes= [] - ; pexp_loc ; _ } ) - when String.equal lbl.txt v_txt -> - Lte_constrained_pun - { loc= {lbl.loc with loc_end= pexp_loc.loc_end} - ; label= lbl - ; type_constraint= Pconstraint t } - (* [~x:(x : t1 :> t2)] -> [ ~(x : t1 :> t2)] *) - | ( Some lbl - , { pexp_desc= + when enable_short_field_annot + && Std_longident.field_alias ~field:f.txt v_txt -> + (f, Some (Pcoerce (Some t1, t2)), None) + (* [{ x = (x :> t) }] -> [{ x :> t }] *) + (* [{ x = (x : t :> t) }] -> [{ x : t :> t }] *) + | ( None + , Some + { pexp_desc= Pexp_coerce - ({pexp_desc= Pexp_ident {txt= Lident v_txt; _}; _}, bty, tty) + ( { pexp_desc= Pexp_ident {txt= v_txt; _} + ; pexp_attributes= [] + ; _ } + , t1 + , t2 ) ; pexp_attributes= [] - ; pexp_loc ; _ } ) - when String.equal lbl.txt v_txt -> - Lte_constrained_pun - { loc= {lbl.loc with loc_end= pexp_loc.loc_end} - ; label= lbl - ; type_constraint= Pcoerce (bty, tty) } - | lte_label, exp -> Lte_simple {lte_label; lte_elt= m.expr m exp} - in - let exp_tuple_elt m lte = - map_labeled_tuple_element m exp_tuple_elt lte - in - let binding_op (m : Ast_mapper.mapper) b = - let b' = - let loc_start = b.pbop_op.loc.loc_start in - let loc_end = b.pbop_exp.pexp_loc.loc_end in - let pbop_is_pun = - match prefer_let_puns with - | None -> b.pbop_is_pun - | Some false -> false - | Some true -> ( - b.pbop_is_pun - || - match (b.pbop_pat.ppat_desc, b.pbop_exp.pexp_desc) with - | Ppat_var {txt; _}, Pexp_ident {txt= Lident e; _} -> - String.equal txt e - | _ -> false ) - in - {b with pbop_loc= {b.pbop_loc with loc_start; loc_end}; pbop_is_pun} + when enable_short_field_annot + && Std_longident.field_alias ~field:f.txt v_txt -> + (f, Some (Pcoerce (t1, t2)), None) + (* [{ x : t = (x :> t) }] -> [{ x : t :> t }] *) + | ( Some (Pconstraint t1) + , Some + { pexp_desc= + Pexp_coerce + ( { pexp_desc= Pexp_ident {txt= v_txt; _} + ; pexp_attributes= [] + ; _ } + , None + , t2 ) + ; pexp_attributes= [] + ; _ } ) + when enable_short_field_annot + && Std_longident.field_alias ~field:f.txt v_txt -> + (f, Some (Pcoerce (Some t1, t2)), None) + | _ -> (f, t, Option.map ~f:(m.expr m) v) + in + let pat_record_field m (f, t, v) = + match (t, v) with + (* [{ x = x }] -> [{ x }] *) + | _, Some {ppat_desc= Ppat_var {txt= v_txt; _}; ppat_attributes= []; _} + when Std_longident.field_alias ~field:f.txt (Lident v_txt) -> + (f, t, None) + (* [{ x = (x : t) }] -> [{ x : t}] *) + | ( None + , Some + { ppat_desc= + Ppat_constraint + ( { ppat_desc= Ppat_var {txt= v_txt; _} + ; ppat_attributes= [] + ; _ } + , t ) + ; ppat_attributes= [] + ; _ } ) + when enable_short_field_annot + && Std_longident.field_alias ~field:f.txt (Lident v_txt) -> + (f, Some t, None) + | _ -> (f, t, Option.map ~f:(m.pat m) v) + in + let map_labeled_tuple_element m f = function + | Lte_simple lte -> f m lte + | (Lte_constrained_pun _ | Lte_pun _) as x -> x + in + let pat_tuple_elt m te = + match (te.lte_label, te.lte_elt) with + (* [ ~x:x ] -> [ ~x ] *) + | Some lbl, {ppat_desc= Ppat_var {txt= v_txt; _}; ppat_attributes= []; _} + when String.equal lbl.txt v_txt -> + Lte_pun lbl + (* [~x:(x : t)] -> [ ~(x : t)] *) + | ( Some lbl + , { ppat_desc= + Ppat_constraint + ( {ppat_desc= Ppat_var {txt= v_txt; _}; ppat_attributes= []; _} + , t ) + ; ppat_attributes= [] + ; ppat_loc + ; _ } ) + when String.equal lbl.txt v_txt -> + Lte_constrained_pun + { loc= {lbl.loc with loc_end= ppat_loc.loc_end} + ; label= lbl + ; type_constraint= t } + | lte_label, pat -> Lte_simple {lte_label; lte_elt= m.pat m pat} + in + let pat_tuple_elt m lte = map_labeled_tuple_element m pat_tuple_elt lte in + let exp_tuple_elt m te = + match (te.lte_label, te.lte_elt) with + (* [ ~x:x ] -> [ ~x ] *) + | ( Some lbl + , {pexp_desc= Pexp_ident {txt= Lident v_txt; _}; pexp_attributes= []; _} + ) + when String.equal lbl.txt v_txt -> + Lte_pun lbl + (* [~x:(x : t)] -> [ ~(x : t)] *) + | ( Some lbl + , { pexp_desc= + Pexp_constraint + ( { pexp_desc= Pexp_ident {txt= Lident v_txt; _} + ; pexp_attributes= [] + ; _ } + , t ) + ; pexp_attributes= [] + ; pexp_loc + ; _ } ) + when String.equal lbl.txt v_txt -> + Lte_constrained_pun + { loc= {lbl.loc with loc_end= pexp_loc.loc_end} + ; label= lbl + ; type_constraint= Pconstraint t } + (* [~x:(x : t1 :> t2)] -> [ ~(x : t1 :> t2)] *) + | ( Some lbl + , { pexp_desc= + Pexp_coerce + ({pexp_desc= Pexp_ident {txt= Lident v_txt; _}; _}, bty, tty) + ; pexp_attributes= [] + ; pexp_loc + ; _ } ) + when String.equal lbl.txt v_txt -> + Lte_constrained_pun + { loc= {lbl.loc with loc_end= pexp_loc.loc_end} + ; label= lbl + ; type_constraint= Pcoerce (bty, tty) } + | lte_label, exp -> Lte_simple {lte_label; lte_elt= m.expr m exp} + in + let exp_tuple_elt m lte = map_labeled_tuple_element m exp_tuple_elt lte in + let binding_op (m : Ast_mapper.mapper) b = + let b' = + let loc_start = b.pbop_op.loc.loc_start in + let loc_end = b.pbop_exp.pexp_loc.loc_end in + let pbop_is_pun = + match prefer_let_puns with + | None -> b.pbop_is_pun + | Some false -> false + | Some true -> ( + b.pbop_is_pun + || + match (b.pbop_pat.ppat_desc, b.pbop_exp.pexp_desc) with + | Ppat_var {txt; _}, Pexp_ident {txt= Lident e; _} -> + String.equal txt e + | _ -> false ) in - Ast_mapper.default_mapper.binding_op m b' + {b with pbop_loc= {b.pbop_loc with loc_start; loc_end}; pbop_is_pun} in - let value_bindings (m : Ast_mapper.mapper) vbs = - let punning is_extension vb = - let is_extension = - (* [and] nodes don't have extensions, so we need to track if the - earlier [let] did *) - is_extension || Option.is_some vb.pvb_attributes.attrs_extension - in - let pvb_is_pun = - is_extension - && - match prefer_let_puns with - | None -> vb.pvb_is_pun - | Some false -> false - | Some true -> ( - vb.pvb_is_pun - || - match (vb.pvb_pat.ppat_desc, vb.pvb_body) with - | ( Ppat_var {txt; _} - , Pfunction_body {pexp_desc= Pexp_ident {txt= Lident e; _}; _} - ) -> - String.equal txt e - | _ -> false ) - in - (is_extension, {vb with pvb_is_pun}) + Ast_mapper.default_mapper.binding_op m b' + in + let value_bindings (m : Ast_mapper.mapper) vbs = + let punning is_extension vb = + let is_extension = + (* [and] nodes don't have extensions, so we need to track if the + earlier [let] did *) + is_extension || Option.is_some vb.pvb_attributes.attrs_extension in - let vbs' = - { vbs with - pvbs_bindings= - snd @@ List.fold_map ~init:false ~f:punning vbs.pvbs_bindings } + let pvb_is_pun = + is_extension + && + match prefer_let_puns with + | None -> vb.pvb_is_pun + | Some false -> false + | Some true -> ( + vb.pvb_is_pun + || + match (vb.pvb_pat.ppat_desc, vb.pvb_body) with + | ( Ppat_var {txt; _} + , Pfunction_body {pexp_desc= Pexp_ident {txt= Lident e; _}; _} + ) -> + String.equal txt e + | _ -> false ) in - Ast_mapper.default_mapper.value_bindings m vbs' + (is_extension, {vb with pvb_is_pun}) in - let pat m = function - | {ppat_desc= Ppat_cons (_ :: _ :: _ :: _ as l); _} as p - when match List.last_exn l with - (* Empty lists are always represented as Lident [] *) - | { ppat_desc= Ppat_construct ({txt= Lident "[]"; loc= _}, None) - ; ppat_attributes= [] - ; _ } -> - true - | _ -> false -> - let pats = List.(rev (tl_exn (rev l))) in - {p with ppat_desc= Ppat_list pats} - (* Field alias shorthand *) - | {ppat_desc= Ppat_record (fields, flag); _} as e -> - let fields = List.map ~f:(pat_record_field m) fields in - {e with ppat_desc= Ppat_record (fields, flag)} - | {ppat_desc= Ppat_tuple (l, oc); _} as p -> - let l = List.map ~f:(pat_tuple_elt m) l in - {p with ppat_desc= Ppat_tuple (l, oc)} - | p -> Ast_mapper.default_mapper.pat m p + let vbs' = + { vbs with + pvbs_bindings= + snd @@ List.fold_map ~init:false ~f:punning vbs.pvbs_bindings } in - let expr (m : Ast_mapper.mapper) = function - | {pexp_desc= Pexp_cons (_ :: _ :: _ :: _ as l); _} as e - when match List.last_exn l with - (* Empty lists are always represented as Lident [] *) - | { pexp_desc= Pexp_construct ({txt= Lident "[]"; loc= _}, None) - ; pexp_attributes= [] - ; _ } -> - true - | _ -> false -> - let exprs = List.(rev (tl_exn (rev l))) in - {e with pexp_desc= Pexp_list exprs} - (* Removing beginend *) - | { pexp_desc= Pexp_beginend (e', {infix_ext= None; infix_attrs= []}) - ; pexp_attributes= [] - ; _ } - when not preserve_beginend -> - m.expr m e' - (* Field alias shorthand *) - | {pexp_desc= Pexp_record (fields, with_); _} as e -> - let fields = List.map ~f:(record_field m) fields in - { e with - pexp_desc= Pexp_record (fields, Option.map ~f:(m.expr m) with_) - } - (* [( + ) 1 2] -> [1 + 2] *) - | { pexp_desc= - Pexp_apply - ( { pexp_desc= - Pexp_ident {txt= Lident op as longident; loc= loc_op} - ; pexp_attributes= [] - ; _ } - , [(Nolabel, l); (Nolabel, r)] ) - ; _ } as e - when Std_longident.is_infix longident - && not (Std_longident.is_monadic_binding longident) -> - let label_loc = {txt= op; loc= loc_op} in - {e with pexp_desc= Pexp_infix (label_loc, m.expr m l, m.expr m r)} - | {pexp_desc= Pexp_tuple l; _} as p -> - let l = List.map ~f:(exp_tuple_elt m) l in - {p with pexp_desc= Pexp_tuple l} - | e -> Ast_mapper.default_mapper.expr m e - in - Ast_mapper.{default_mapper with expr; pat; binding_op; value_bindings} - - let ast (type a) (fg : a t) ~ocaml_version ~preserve_beginend - ~prefer_let_puns ~input_name str : a = - map fg - (normalize_mapper ~ocaml_version ~preserve_beginend ~prefer_let_puns) - @@ - let lexbuf = Lexing.from_string str in - let ocaml_version = - Some Ocaml_version.(major ocaml_version, minor ocaml_version) - in - Location.init_info lexbuf input_name ; - match fg with - | Structure -> Parse.implementation ~ocaml_version lexbuf - | Signature -> Parse.interface ~ocaml_version lexbuf - | Use_file -> Parse.use_file ~ocaml_version lexbuf - | Core_type -> Parse.core_type ~ocaml_version lexbuf - | Module_type -> Parse.module_type ~ocaml_version lexbuf - | Expression -> Parse.expression ~ocaml_version lexbuf - | Pattern -> Parse.pattern ~ocaml_version lexbuf - | Repl_file -> Toplevel_lexer.repl_file ~ocaml_version lexbuf - | Documentation -> - let pos = (Location.curr lexbuf).loc_start in - let pos = {pos with pos_fname= input_name} in - Docstring.parse_file pos str -end + Ast_mapper.default_mapper.value_bindings m vbs' + in + let pat m = function + | {ppat_desc= Ppat_cons (_ :: _ :: _ :: _ as l); _} as p + when match List.last_exn l with + (* Empty lists are always represented as Lident [] *) + | { ppat_desc= Ppat_construct ({txt= Lident "[]"; loc= _}, None) + ; ppat_attributes= [] + ; _ } -> + true + | _ -> false -> + let pats = List.(rev (tl_exn (rev l))) in + {p with ppat_desc= Ppat_list pats} + (* Field alias shorthand *) + | {ppat_desc= Ppat_record (fields, flag); _} as e -> + let fields = List.map ~f:(pat_record_field m) fields in + {e with ppat_desc= Ppat_record (fields, flag)} + | {ppat_desc= Ppat_tuple (l, oc); _} as p -> + let l = List.map ~f:(pat_tuple_elt m) l in + {p with ppat_desc= Ppat_tuple (l, oc)} + | p -> Ast_mapper.default_mapper.pat m p + in + let expr (m : Ast_mapper.mapper) = function + | {pexp_desc= Pexp_cons (_ :: _ :: _ :: _ as l); _} as e + when match List.last_exn l with + (* Empty lists are always represented as Lident [] *) + | { pexp_desc= Pexp_construct ({txt= Lident "[]"; loc= _}, None) + ; pexp_attributes= [] + ; _ } -> + true + | _ -> false -> + let exprs = List.(rev (tl_exn (rev l))) in + {e with pexp_desc= Pexp_list exprs} + (* Removing beginend *) + | { pexp_desc= Pexp_beginend (e', {infix_ext= None; infix_attrs= []}) + ; pexp_attributes= [] + ; _ } + when not preserve_beginend -> + m.expr m e' + (* Field alias shorthand *) + | {pexp_desc= Pexp_record (fields, with_); _} as e -> + let fields = List.map ~f:(record_field m) fields in + { e with + pexp_desc= Pexp_record (fields, Option.map ~f:(m.expr m) with_) } + (* [( + ) 1 2] -> [1 + 2] *) + | { pexp_desc= + Pexp_apply + ( { pexp_desc= + Pexp_ident {txt= Lident op as longident; loc= loc_op} + ; pexp_attributes= [] + ; _ } + , [(Nolabel, l); (Nolabel, r)] ) + ; _ } as e + when Std_longident.is_infix longident + && not (Std_longident.is_monadic_binding longident) -> + let label_loc = {txt= op; loc= loc_op} in + {e with pexp_desc= Pexp_infix (label_loc, m.expr m l, m.expr m r)} + | {pexp_desc= Pexp_tuple l; _} as p -> + let l = List.map ~f:(exp_tuple_elt m) l in + {p with pexp_desc= Pexp_tuple l} + | e -> Ast_mapper.default_mapper.expr m e + in + Ast_mapper.{default_mapper with expr; pat; binding_op; value_bindings} module Printast = struct include Printast @@ -376,16 +450,17 @@ module Printast = struct let repl_file = Format.pp_print_list repl_phrase - let ast (type a) : a t -> _ -> a -> _ = function - | Structure -> implementation - | Signature -> interface - | Use_file -> use_file - | Core_type -> core_type - | Module_type -> module_type - | Expression -> expression - | Pattern -> pattern - | Repl_file -> repl_file - | Documentation -> Docstring.dump + let ast (type a) fmt (t : a t) = + match t with + | Structure {ast; _} -> implementation fmt ast + | Signature {ast; _} -> interface fmt ast + | Use_file {ast; _} -> use_file fmt ast + | Core_type {ast; _} -> core_type fmt ast + | Module_type {ast; _} -> module_type fmt ast + | Expression {ast; _} -> expression fmt ast + | Pattern {ast; _} -> pattern fmt ast + | Repl_file {ast; _} -> repl_file fmt ast + | Documentation ast -> Docstring.dump fmt ast end module Asttypes = struct @@ -395,3 +470,292 @@ module Asttypes = struct let is_recursive = function Recursive -> true | Nonrecursive -> false end + +exception Warning50 of (Location.t * Warnings.t) list + +module W = struct + type t = int + + let in_lexer : t list = [1; 2; 3; 14; 29] + + let disable x = -abs x + + let enable x = abs x + + let to_string x = + String.concat ~sep:"" (List.map ~f:(Format.sprintf "%+d") x) +end + +let tokens lexbuf = + let rec loop acc = + match Lexer.token_with_comments lexbuf with + (* The location in lexbuf are invalid for comments *) + | COMMENT (_, loc) as tok -> loop ((tok, loc) :: acc) + | DOCSTRING ds as tok -> loop ((tok, Docstrings.docstring_loc ds) :: acc) + | tok -> ( + let loc = Migrate_ast.Location.of_lexbuf lexbuf in + let acc = (tok, loc) :: acc in + match tok with EOF -> List.rev acc | _ -> loop acc ) + in + loop [] + +let collect_comments () = + List.map (Lexer.comments ()) ~f:(function + | `Comment txt, loc -> Cmt.create_comment txt loc + | `Docstring txt, loc -> Cmt.create_docstring txt loc ) + +let parse_ocaml (type a) ?(disable_w50 = false) ?(disable_deprecated = false) + (fg : a Kind.t) (conf : Conf.t) ~input_name ~source : a t = + let warnings = + if conf.opr_opts.quiet.v then List.map ~f:W.disable W.in_lexer else [] + in + let warnings = if disable_w50 then warnings else W.enable 50 :: warnings in + ignore @@ Warnings.parse_options false (W.to_string warnings) ; + let w50 = ref [] in + let lexbuf, prefix = prepare_lexbuf fg ~input_name source in + let t = + Warning.with_warning_filter + ~filter_warning:(fun loc warn -> + if + Warning.is_unexpected_docstring warn + && conf.opr_opts.comment_check.v + then ( + w50 := (loc, warn) :: !w50 ; + false ) + else not conf.opr_opts.quiet.v ) + ~filter_alert:(fun _loc alert -> + if Warning.is_deprecated_alert alert && disable_deprecated then false + else not conf.opr_opts.quiet.v ) + ~f:(fun () -> + let ocaml_version = conf.opr_opts.ocaml_version.v in + let preserve_beginend = + Poly.(conf.fmt_opts.exp_grouping.v = `Preserve) + in + let prefer_let_puns = + match conf.fmt_opts.letop_punning.v with + | `Always -> Some true + | `Never -> Some false + | `Preserve -> None + in + let nm = + normalize_mapper ~ocaml_version ~preserve_beginend ~prefer_let_puns + in + let ocaml_version_pair = + Some Ocaml_version.(major ocaml_version, minor ocaml_version) + in + let parse_std (type std) (std_fg : std Std_ast.t) : std = + (* Suppress warnings during raw std parse to avoid duplicate w50 + warnings — w50 handling is done at the OCaml-parser level. *) + let std_str = + if String.is_empty prefix then source + else + let pos = String.length prefix in + String.sub source ~pos ~len:(String.length source - pos) + in + Warning.with_warning_filter + ~filter_warning:(fun _loc _warn -> false) + ~filter_alert:(fun _loc _alert -> false) + ~f:(fun () -> + Std_ast.Parse.ast std_fg ~ocaml_version ~input_name std_str ) + in + let debug = conf.opr_opts.debug.v in + let metadata () = + let comments = collect_comments () in + Warnings.check_fatal () ; + let tokens = + let lexbuf, _ = prepare_lexbuf fg ~input_name source in + tokens lexbuf + in + (comments, Source.create ~text:source ~tokens) + in + let make_cmts ~walk ~ast ~print_ast = + let comments, source = metadata () in + Cmts.init ~debug ~source ~ast ~comments ~traverse:walk ~print_ast + in + let make_paired (type ext std) ~parse_ext + ~(walk : Ast_mapper.mapper -> ext -> ext) + ~(std_fg : std Std_ast.t) ~print_ast : ext * std * Cmts.t = + let ast = + walk nm (parse_ext ~ocaml_version:ocaml_version_pair lexbuf) + in + let std = parse_std std_fg in + let cmts = make_cmts ~walk ~ast ~print_ast in + (ast, std, cmts) + in + ( match fg with + | Structure -> + let ast, std, cmts = + make_paired ~parse_ext:Parse.implementation + ~walk:(fun m -> m.structure m) + ~std_fg:Std_ast.Structure + ~print_ast:Printast.implementation + in + Structure {ast; std; cmts} + | Signature -> + let ast, std, cmts = + make_paired ~parse_ext:Parse.interface + ~walk:(fun m -> m.signature m) + ~std_fg:Std_ast.Signature ~print_ast:Printast.interface + in + Signature {ast; std; cmts} + | Use_file -> + let ast, std, cmts = + make_paired ~parse_ext:Parse.use_file + ~walk:(fun m -> List.map ~f:(m.toplevel_phrase m)) + ~std_fg:Std_ast.Use_file ~print_ast:Printast.use_file + in + Use_file {ast; std; prefix; cmts} + | Core_type -> + let ast, std, cmts = + make_paired ~parse_ext:Parse.core_type + ~walk:(fun m -> m.typ m) + ~std_fg:Std_ast.Core_type ~print_ast:Printast.core_type + in + Core_type {ast; std; cmts} + | Module_type -> + let ast, std, cmts = + make_paired ~parse_ext:Parse.module_type + ~walk:(fun m -> m.module_type m) + ~std_fg:Std_ast.Module_type ~print_ast:Printast.module_type + in + Module_type {ast; std; cmts} + | Expression -> + let ast, std, cmts = + make_paired ~parse_ext:Parse.expression + ~walk:(fun m -> m.expr m) + ~std_fg:Std_ast.Expression ~print_ast:Printast.expression + in + Expression {ast; std; cmts} + | Pattern -> + let ast, std, cmts = + make_paired ~parse_ext:Parse.pattern + ~walk:(fun m -> m.pat m) + ~std_fg:Std_ast.Pattern ~print_ast:Printast.pattern + in + Pattern {ast; std; cmts} + | Repl_file -> + let walk (m : Ast_mapper.mapper) = + List.map ~f:(m.repl_phrase m) + in + let ast = + walk nm + (Toplevel_lexer.repl_file ~ocaml_version:ocaml_version_pair + lexbuf ) + in + let cmts = + make_cmts ~walk ~ast ~print_ast:Printast.repl_file + in + Repl_file {ast; cmts} + | Documentation -> assert false + : a t ) ) + in + match List.rev !w50 with [] -> t | w50 -> raise (Warning50 w50) + +let parse (type a) ?disable_w50 ?disable_deprecated (k : a Kind.t) conf + ~input_name ~source : a t = + match k with + | Documentation -> + let pos = {Lexing.dummy_pos with pos_fname= input_name} in + Documentation (Docstring.parse_file pos source) + | k -> + parse_ocaml ?disable_w50 ?disable_deprecated k conf ~input_name ~source + +(** [is_repl_block x] returns whether [x] is a list of REPL phrases and + outputs of the form: + + {v + # let this is = some phrase;; + this is some output + v} *) +let is_repl_block x = + String.length x >= 2 && Char.equal x.[0] '#' && Char.is_whitespace x.[1] + +let parse_toplevel ?disable_w50 ?disable_deprecated (conf : Conf.t) + ~input_name ~source : (use_file t, repl_file t) Either.t = + if is_repl_block source && conf.fmt_opts.parse_toplevel_phrases.v then + Either.Second + (parse ?disable_w50 ?disable_deprecated Repl_file conf ~input_name + ~source ) + else + First + (parse ?disable_w50 ?disable_deprecated Use_file conf ~input_name + ~source ) + +type std_value = Std_value : 'a Std_ast.t * 'a -> std_value + +let get_std (type a) (t : a t) : std_value option = + match t with + | Structure {std; _} -> Some (Std_value (Structure, std)) + | Signature {std; _} -> Some (Std_value (Signature, std)) + | Use_file {std; _} -> Some (Std_value (Use_file, std)) + | Core_type {std; _} -> Some (Std_value (Core_type, std)) + | Module_type {std; _} -> Some (Std_value (Module_type, std)) + | Expression {std; _} -> Some (Std_value (Expression, std)) + | Pattern {std; _} -> Some (Std_value (Pattern, std)) + | Repl_file _ -> None + | Documentation _ -> None + +type std_pair = Std_pair : 'a Std_ast.t * 'a * 'a -> std_pair + +let get_std_pair (type a) (t1 : a t) (t2 : a t) : std_pair option = + match (t1, t2) with + | Structure {std= s1; _}, Structure {std= s2; _} -> + Some (Std_pair (Structure, s1, s2)) + | Signature {std= s1; _}, Signature {std= s2; _} -> + Some (Std_pair (Signature, s1, s2)) + | Use_file {std= s1; _}, Use_file {std= s2; _} -> + Some (Std_pair (Use_file, s1, s2)) + | Core_type {std= s1; _}, Core_type {std= s2; _} -> + Some (Std_pair (Core_type, s1, s2)) + | Module_type {std= s1; _}, Module_type {std= s2; _} -> + Some (Std_pair (Module_type, s1, s2)) + | Expression {std= s1; _}, Expression {std= s2; _} -> + Some (Std_pair (Expression, s1, s2)) + | Pattern {std= s1; _}, Pattern {std= s2; _} -> + Some (Std_pair (Pattern, s1, s2)) + | Repl_file _, Repl_file _ -> None + | Documentation _, Documentation _ -> None + +let dump (type a) fmt (t : a t) = + match get_std t with + | Some (Std_value (std_fg, std_v)) -> Std_ast.Printast.ast std_fg fmt std_v + | None -> Printast.ast fmt t + +let dump_normalized (type a) ~normalize_code conf fmt (t : a t) = + match get_std t with + | Some (Std_value (std_fg, std_v)) -> + Std_ast.Printast.ast std_fg fmt + (Normalize_std_ast.ast std_fg ~normalize_code conf std_v) + | None -> Printast.ast fmt t + +type ast_check_result = + | Ast_preserved + | Docstrings_moved of Cmt.error list + | Ast_changed + +let equivalent (type a) ~normalize_code conf (old_t : a t) (new_t : a t) : + ast_check_result = + match get_std_pair old_t new_t with + | None -> + (* TODO: Repl_file and Documentation have no std AST, so we skip the + equivalence check. + + - Repl_file: could validate each toplevel phrase individually. + + - Documentation: could check each formatted code block for AST + preservation. *) + Ast_preserved + | Some (Std_pair (std_fg, old_std, new_std)) -> + if + Normalize_std_ast.equal std_fg ~normalize_code + ~ignore_doc_comments:(not conf.Conf.opr_opts.comment_check.v) + conf old_std new_std + then Ast_preserved + else if + Normalize_std_ast.equal std_fg ~normalize_code + ~ignore_doc_comments:true conf old_std new_std + then + Docstrings_moved + (Normalize_std_ast.moved_docstrings ~normalize_code std_fg conf + old_std new_std ) + else Ast_changed diff --git a/lib/Extended_ast.mli b/lib/Extended_ast.mli index bf4b0c145f..e2b1070a55 100644 --- a/lib/Extended_ast.mli +++ b/lib/Extended_ast.mli @@ -17,42 +17,84 @@ type use_file = toplevel_phrase list type repl_file = repl_phrase list +module Std_parsetree = Ocamlformat_parser_standard.Parsetree + +(** A parsed fragment. Each constructor identifies the AST kind and carries + the metadata produced by parsing. Paired kinds carry both the extended + AST [ast] and its standard-library counterpart [std] (used for + equivalence checking). Only [Use_file] carries a shebang [prefix]. + [Documentation] has no comment-placement state at all; the other kinds + carry a [Cmts.t] holding the placed comments and the [Source.t]. *) +module Kind : sig + type 'a t = + | Structure : structure t + | Signature : signature t + | Use_file : use_file t + | Core_type : core_type t + | Module_type : module_type t + | Expression : expression t + | Pattern : pattern t + | Repl_file : repl_file t + | Documentation : Ocamlformat_odoc_parser.Ast.t t + + type any = Any : 'a t -> any [@@unboxed] + + val of_syntax : Syntax.t -> any +end + type 'a t = - | Structure : structure t - | Signature : signature t - | Use_file : use_file t - | Core_type : core_type t - | Module_type : module_type t - | Expression : expression t - | Pattern : pattern t - | Repl_file : repl_file t - | Documentation : Ocamlformat_odoc_parser.Ast.t t + | Structure : + {ast: structure; std: Std_parsetree.structure; cmts: Cmts.t} + -> structure t + | Signature : + {ast: signature; std: Std_parsetree.signature; cmts: Cmts.t} + -> signature t + | Use_file : + { ast: use_file + ; std: Std_parsetree.toplevel_phrase list + ; prefix: string + ; cmts: Cmts.t } + -> use_file t + | Core_type : + {ast: core_type; std: Std_parsetree.core_type; cmts: Cmts.t} + -> core_type t + | Module_type : + {ast: module_type; std: Std_parsetree.module_type; cmts: Cmts.t} + -> module_type t + | Expression : + {ast: expression; std: Std_parsetree.expression; cmts: Cmts.t} + -> expression t + | Pattern : + {ast: pattern; std: Std_parsetree.pattern; cmts: Cmts.t} + -> pattern t + | Repl_file : {ast: repl_file; cmts: Cmts.t} -> repl_file t + | Documentation : + Ocamlformat_odoc_parser.Ast.t + -> Ocamlformat_odoc_parser.Ast.t t type any_t = Any : 'a t -> any_t [@@unboxed] -val of_syntax : Syntax.t -> any_t - -module Parse : sig - val ast : - 'a t - -> ocaml_version:Ocaml_version.t - -> preserve_beginend:bool - -> prefer_let_puns:bool option - -> input_name:string - -> string - -> 'a -end +val kind_of : 'a t -> 'a Kind.t -val equal_core_type : core_type -> core_type -> bool +val ast : 'a t -> 'a -val equal : 'a t -> 'a -> 'a -> bool +val cmts : 'a t -> Cmts.t option +(** [None] for [Documentation]. *) -val map : 'a t -> Ast_mapper.mapper -> 'a -> 'a +val copy_cmts : 'a t -> 'a t +(** Return [t] with a deep copy of its embedded [Cmts.t] (no-op for + [Documentation]). Used to format without consuming the original. *) + +val traverse : 'a t -> Ast_mapper.mapper -> 'a -> 'a +(** Apply a mapper to the AST inside [t]. [Documentation] is left + unchanged. *) + +val map : Ast_mapper.mapper -> 'a t -> 'a t module Printast : sig include module type of Printast - val ast : 'a t -> Format.formatter -> 'a -> unit + val ast : Format.formatter -> 'a t -> unit end module Asttypes : sig @@ -62,3 +104,58 @@ module Asttypes : sig val is_recursive : rec_flag -> bool end + +exception Warning50 of (Location.t * Warnings.t) list + +val parse : + ?disable_w50:bool + -> ?disable_deprecated:bool + -> 'a Kind.t + -> Conf.t + -> input_name:string + -> source:string + -> 'a t +(** Parse source with warning handling, hash-bang detection, and comment + placement. For paired fragment kinds, also parses with the standard + parser. *) + +val parse_toplevel : + ?disable_w50:bool + -> ?disable_deprecated:bool + -> Conf.t + -> input_name:string + -> source:string + -> (use_file t, repl_file t) Either.t +(** Parse source as toplevel phrases or REPL phrases depending on content. *) + +type std_value = Std_value : 'a Std_ast.t * 'a -> std_value + +val get_std : 'a t -> std_value option +(** Extract the std AST with its [Std_ast.t] witness, or [None] for + [Repl_file] and [Documentation]. *) + +val dump : Format.formatter -> 'a t -> unit +(** Print the std AST for debug output. Falls back to extended AST for + [Repl_file] and [Documentation]. *) + +val dump_normalized : + normalize_code:(string -> string) + -> Conf.t + -> Format.formatter + -> 'a t + -> unit +(** Print the normalized std AST for debug output. *) + +type ast_check_result = + | Ast_preserved + | Docstrings_moved of Cmt.error list + | Ast_changed + +val equivalent : + normalize_code:(string -> string) + -> Conf.t + -> 'a t + -> 'a t + -> ast_check_result +(** Check whether formatting preserved the standard AST. For [Repl_file] + and [Documentation], always returns [Ast_preserved]. *) diff --git a/lib/Fmt_ast.ml b/lib/Fmt_ast.ml index 62ecceced4..5b5c515076 100644 --- a/lib/Fmt_ast.ml +++ b/lib/Fmt_ast.ml @@ -5097,37 +5097,41 @@ module Chunk = struct fmt fg c ctx @@ split fg l ~state end -let fmt_file (type a) ~ctx ~fmt_code ~debug (fragment : a Extended_ast.t) - source cmts conf (itms : a) = - let c = {source; cmts; conf; debug; fmt_code} in - match (fragment, itms) with - | Structure, [] | Signature, [] | Use_file, [] -> - Cmts.fmt_after ~pro:noop c Location.none - | Structure, l -> Chunk.split_and_fmt Structure c ctx l - | Signature, l -> Chunk.split_and_fmt Signature c ctx l - | Use_file, l -> Chunk.split_and_fmt Use_file c ctx l - | Core_type, ty -> fmt_core_type c (sub_typ ~ctx:(Pld (PTyp ty)) ty) - | Module_type, mty -> +let fmt_file (type a) ~ctx ~fmt_code ~debug (parsed : a Extended_ast.t) conf + = + let mk_c (cmts : Cmts.t) = + {source= Cmts.source cmts; cmts; conf; debug; fmt_code} + in + match parsed with + | Documentation d -> Fmt_odoc.fmt_ast conf ~fmt_code d + | Structure {ast= []; cmts; _} | Signature {ast= []; cmts; _} -> + Cmts.fmt_after ~pro:noop (mk_c cmts) Location.none + | Use_file {ast; prefix; cmts; _} -> ( + let c = mk_c cmts in + fmt_if (not (String.is_empty prefix)) (str prefix $ force_newline) + $ + match ast with + | [] -> Cmts.fmt_after ~pro:noop c Location.none + | _ -> Chunk.split_and_fmt Use_file c ctx ast ) + | Structure {ast; cmts; _} -> + Chunk.split_and_fmt Structure (mk_c cmts) ctx ast + | Signature {ast; cmts; _} -> + Chunk.split_and_fmt Signature (mk_c cmts) ctx ast + | Core_type {ast= ty; cmts; _} -> + fmt_core_type (mk_c cmts) (sub_typ ~ctx:(Pld (PTyp ty)) ty) + | Module_type {ast= mty; cmts; _} -> compose_module ~f:Fn.id - (fmt_module_type c (sub_mty ~ctx:(Mty mty) mty)) - | Expression, e -> - fmt_expression c (sub_exp ~ctx:(Str (Ast_helper.Str.eval e)) e) - | Pattern, p -> fmt_pattern c (sub_pat ~ctx:(Pld (PPat (p, None))) p) - | Repl_file, l -> fmt_repl_file c ctx l - | Documentation, d -> - (* TODO: [source] and [cmts] should have never been computed when - formatting doc. *) - Fmt_odoc.fmt_ast c.conf ~fmt_code:c.fmt_code d - -let fmt_parse_result conf ~debug ast_kind ast source comments - ~set_margin:set_margin_p ~fmt_code = - let cmts = Cmts.init ast_kind ~debug source ast comments in - let ctx = Top in - let code = - fmt_if set_margin_p (set_margin conf.Conf.fmt_opts.margin.v) - $ fmt_file ~ctx ~debug ast_kind source cmts conf ast ~fmt_code - in - Ok code + (fmt_module_type (mk_c cmts) (sub_mty ~ctx:(Mty mty) mty)) + | Expression {ast= e; cmts; _} -> + fmt_expression (mk_c cmts) + (sub_exp ~ctx:(Str (Ast_helper.Str.eval e)) e) + | Pattern {ast= p; cmts; _} -> + fmt_pattern (mk_c cmts) (sub_pat ~ctx:(Pld (PPat (p, None))) p) + | Repl_file {ast= l; cmts; _} -> fmt_repl_file (mk_c cmts) ctx l + +let fmt_parse_result conf ~debug parsed ~set_margin:set_margin_p ~fmt_code = + fmt_if set_margin_p (set_margin conf.Conf.fmt_opts.margin.v) + $ fmt_file ~ctx:Top ~debug parsed conf ~fmt_code let fmt_code ~debug = let rec fmt_code (conf : Conf.t) ~offset ~set_margin s = @@ -5140,15 +5144,13 @@ let fmt_code ~debug = let warn = fmt_opts.parse_toplevel_phrases.v in let input_name = !Location.input_name in match - Parse_with_comments.parse_toplevel ~disable_deprecated:true conf - ~input_name ~source:s + Extended_ast.parse_toplevel ~disable_deprecated:true conf ~input_name + ~source:s with - | Either.First {ast; comments; source; prefix= _} -> - fmt_parse_result conf ~debug Use_file ast source comments ~set_margin - ~fmt_code - | Second {ast; comments; source; prefix= _} -> - fmt_parse_result conf ~debug Repl_file ast source comments - ~set_margin ~fmt_code + | Either.First parsed -> + Ok (fmt_parse_result conf ~debug parsed ~set_margin ~fmt_code) + | Second parsed -> + Ok (fmt_parse_result conf ~debug parsed ~set_margin ~fmt_code) | exception Syntaxerr.Error (Expecting (_, x)) when warn -> Error (`Msg (Format.asprintf "expecting: %s" x)) | exception Syntaxerr.Error (Not_expecting (_, x)) when warn -> @@ -5160,9 +5162,10 @@ let fmt_code ~debug = in fmt_code -let fmt_ast fragment ~debug source cmts conf l = +let fmt_ast parsed ~debug conf = (* [Ast.init] should be called only once per file. In particular, we don't want to call it when formatting comments *) Ast.init conf ; let fmt_code = fmt_code ~debug in - fmt_file ~ctx:Top ~fmt_code ~debug fragment source cmts conf l + let parsed = Extended_ast.copy_cmts parsed in + (fmt_file ~ctx:Top ~fmt_code ~debug parsed conf, Extended_ast.cmts parsed) diff --git a/lib/Fmt_ast.mli b/lib/Fmt_ast.mli index 86a6a01c9c..a920374417 100644 --- a/lib/Fmt_ast.mli +++ b/lib/Fmt_ast.mli @@ -12,11 +12,8 @@ (** Format OCaml Ast *) val fmt_ast : - 'a Extended_ast.t - -> debug:bool - -> Source.t - -> Cmts.t - -> Conf.t - -> 'a - -> Fmt.t -(** Format a fragment. *) + 'a Extended_ast.t -> debug:bool -> Conf.t -> Fmt.t * Cmts.t option +(** Format a parsed fragment. Internally takes a fresh copy of the embedded + [Cmts.t] (so the embedded state is preserved). Returns the formatter + and the copied [Cmts.t] (consumed once the formatter is evaluated), + or [None] for [Documentation]. *) diff --git a/lib/Loc_tree.ml b/lib/Loc_tree.ml index 60af264777..7609b14e58 100644 --- a/lib/Loc_tree.ml +++ b/lib/Loc_tree.ml @@ -10,11 +10,12 @@ (**************************************************************************) module Location = Migrate_ast.Location -open Extended_ast +open Ocamlformat_parser_extended include Non_overlapping_interval_tree.Make (Location) -(** Use Ast_mapper to collect all locs in ast, and create tree of them. *) -let of_ast fragment ast = +(** Use [traverse] to apply a mapper that collects all locations in the AST, + and create a tree of them. *) +let of_ast ~traverse ast = let attribute (m : Ast_mapper.mapper) attr = (* ignore location of docstrings *) if Ast.Attr.is_doc attr then attr @@ -30,5 +31,5 @@ let of_ast fragment ast = let mapper = Ast_mapper.{default_mapper with location; attribute; arg_label} in - map fragment mapper ast |> ignore ; + traverse mapper ast |> ignore ; (of_list !locs, !locs) diff --git a/lib/Loc_tree.mli b/lib/Loc_tree.mli index 7bf989222c..34dbbc79b9 100644 --- a/lib/Loc_tree.mli +++ b/lib/Loc_tree.mli @@ -11,5 +11,9 @@ include Non_overlapping_interval_tree.S with type itv = Location.t -val of_ast : 'a Extended_ast.t -> 'a -> t * Location.t list -(** Use Ast_mapper to collect all locs in ast, and create a tree of them. *) +val of_ast : + traverse:(Ocamlformat_parser_extended.Ast_mapper.mapper -> 'a -> 'a) + -> 'a + -> t * Location.t list +(** Use [traverse] to apply a mapper that collects all locations in the AST, + and create a tree of them. *) diff --git a/lib/Normalize_extended_ast.ml b/lib/Normalize_extended_ast.ml index 72eb0a8621..791fe2357c 100644 --- a/lib/Normalize_extended_ast.ml +++ b/lib/Normalize_extended_ast.ml @@ -13,32 +13,10 @@ open Extended_ast type 'a t = 'a Extended_ast.t -let dedup_cmts fragment ast comments = - let of_ast ast = - let docs = ref (Set.empty (module Cmt)) in - let attribute m atr = - match atr with - | { attr_payload= - PStr - [ { pstr_desc= - Pstr_eval - ( { pexp_desc= - Pexp_constant - {pconst_desc= Pconst_string (doc, _, None); _} - ; pexp_loc - ; _ } - , [] ) - ; _ } ] - ; _ } - when Ast.Attr.is_doc atr -> - docs := Set.add !docs (Cmt.create_docstring doc pexp_loc) ; - atr - | _ -> Ast_mapper.default_mapper.attribute m atr - in - map fragment {Ast_mapper.default_mapper with attribute} ast |> ignore ; - !docs - in - Set.(to_list (diff (of_list (module Cmt) comments) (of_ast ast))) +let comments_of (type a) (parsed : a Extended_ast.t) : Cmt.t list = + match Extended_ast.cmts parsed with + | Some cmts -> Cmts.all_comments cmts + | None -> [] let normalize_comments ~normalize_cmt dedup fmt comments = dedup comments @@ -46,23 +24,20 @@ let normalize_comments ~normalize_cmt dedup fmt comments = Migrate_ast.Location.compare (Cmt.loc a) (Cmt.loc b) ) |> List.iter ~f:(fun cmt -> Format.fprintf fmt "%s," (normalize_cmt cmt)) -let normalize_parse_result ~normalize_cmt ast_kind ast comments = - Format.asprintf "AST,%a,COMMENTS,[%a]" (Printast.ast ast_kind) ast - (normalize_comments ~normalize_cmt (dedup_cmts ast_kind ast)) +let normalize_parse_result ~normalize_cmt parsed = + let ast = Extended_ast.ast parsed in + let traverse = Extended_ast.traverse parsed in + let comments = comments_of parsed in + Format.asprintf "AST,%a,COMMENTS,[%a]" Printast.ast parsed + (normalize_comments ~normalize_cmt (Cmts.dedup_cmts ~traverse ast)) comments let normalize_code ~normalize_cmt conf (m : Ast_mapper.mapper) txt = let input_name = "" in let normalize_cmt = normalize_cmt conf in - match Parse_with_comments.parse_toplevel conf ~input_name ~source:txt with - | First {ast; comments; _} -> - normalize_parse_result ~normalize_cmt Use_file - (List.map ~f:(m.toplevel_phrase m) ast) - comments - | Second {ast; comments; _} -> - normalize_parse_result ~normalize_cmt Repl_file - (List.map ~f:(m.repl_phrase m) ast) - comments + match Extended_ast.parse_toplevel conf ~input_name ~source:txt with + | First parsed -> normalize_parse_result ~normalize_cmt (map m parsed) + | Second parsed -> normalize_parse_result ~normalize_cmt (map m parsed) | exception _ -> txt let docstring (c : Conf.t) = @@ -186,11 +161,6 @@ let normalize_code (conf : Conf.t) code = let n = normalize_cmt conf in n#code code -let ast fragment ~ignore_doc_comments c = - let normalize_cmt = normalize_cmt c in - map fragment - (make_mapper ~ignore_doc_comments ~normalize_doc:normalize_cmt#doc) - module Normalized_cmt = struct type t = { cmt_kind: [`Comment | `Doc_comment] @@ -237,14 +207,12 @@ let diff ~f x y = ~second:Normalized_cmt.added ) |> function [] -> Ok () | errors -> Error errors -let diff_cmts (conf : Conf.t) x y = +let diff_cmts (conf : Conf.t) old new_ = let normalize = normalize_cmt conf in let f z = let f = Normalized_cmt.of_cmt normalize#cmt in - Set.of_list (module Normalized_cmt.Comparator) (List.map ~f z) + Set.of_list + (module Normalized_cmt.Comparator) + (List.map ~f (comments_of z)) in - diff ~f x y - -let equal fragment ~ignore_doc_comments c ast1 ast2 = - let map = ast fragment c ~ignore_doc_comments in - equal fragment (map ast1) (map ast2) + diff ~f old new_ diff --git a/lib/Normalize_extended_ast.mli b/lib/Normalize_extended_ast.mli index ca1c9e8eba..4890c120ca 100644 --- a/lib/Normalize_extended_ast.mli +++ b/lib/Normalize_extended_ast.mli @@ -11,15 +11,8 @@ type 'a t = 'a Extended_ast.t -val dedup_cmts : 'a t -> 'a -> Cmt.t list -> Cmt.t list -(** Remove comments that duplicate docstrings (or other comments). *) - -val equal : 'a t -> ignore_doc_comments:bool -> Conf.t -> 'a -> 'a -> bool -(** Compare fragments for equality up to normalization. *) - -val diff_cmts : - Conf.t -> Cmt.t list -> Cmt.t list -> (unit, Cmt.error list) Result.t -(** Difference between two lists of comments. *) +val diff_cmts : Conf.t -> 'a t -> 'a t -> (unit, Cmt.error list) Result.t +(** Difference between the comments of two parsed values. *) val normalize_code : Conf.t -> string -> string (** Normalize a code block in docstrings. *) diff --git a/lib/Normalize_std_ast.ml b/lib/Normalize_std_ast.ml index c20d44bef9..b6a2fb456a 100644 --- a/lib/Normalize_std_ast.ml +++ b/lib/Normalize_std_ast.ml @@ -18,18 +18,13 @@ let is_doc = function | {attr_name= {Location.txt= "ocaml.doc" | "ocaml.text"; _}; _} -> true | _ -> false -let normalize_code conf txt = - (* Normalize code blocks in docstrings using the extended AST. This - correctly handles repl phrases. *) - Normalize_extended_ast.normalize_code conf txt - let docstring (c : Conf.t) = Docstring.normalize ~parse_docstrings:c.fmt_opts.parse_docstrings.v let sort_attributes : attributes -> attributes = List.sort ~compare:Poly.compare -let make_mapper conf ~ignore_doc_comments = +let make_mapper conf ~normalize_code ~ignore_doc_comments = let open Ast_helper in (* remove locations *) let location _ _ = Location.none in @@ -47,7 +42,6 @@ let make_mapper conf ~ignore_doc_comments = , [] ) ; _ } as pstr ) ] when is_doc attr -> - let normalize_code = normalize_code conf in let doc' = docstring conf ~normalize_code doc in Ast_mapper.default_mapper.attribute m { attr with @@ -168,11 +162,11 @@ let make_mapper conf ~ignore_doc_comments = ; pat ; typ } -let ast fragment ~ignore_doc_comments c = - map fragment (make_mapper c ~ignore_doc_comments) +let ast fragment ~normalize_code ~ignore_doc_comments c = + map fragment (make_mapper c ~normalize_code ~ignore_doc_comments) -let equal fragment ~ignore_doc_comments c ast1 ast2 = - let map = ast fragment c ~ignore_doc_comments in +let equal fragment ~normalize_code ~ignore_doc_comments c ast1 ast2 = + let map = ast fragment c ~normalize_code ~ignore_doc_comments in equal fragment (map ast1) (map ast2) let ast = ast ~ignore_doc_comments:false @@ -206,14 +200,16 @@ let docstrings (type a) (fragment : a t) s = let (_ : a) = map fragment (make_docstring_mapper docstrings) s in !docstrings -let docstring conf = - let normalize_code = normalize_code conf in - docstring conf ~normalize_code +let docstring ~normalize_code conf = docstring conf ~normalize_code -let moved_docstrings fragment c s1 s2 = +let moved_docstrings ~normalize_code fragment c s1 s2 = let d1 = docstrings fragment s1 in let d2 = docstrings fragment s2 in - let equal (_, x) (_, y) = String.equal (docstring c x) (docstring c y) in + let equal (_, x) (_, y) = + String.equal + (docstring ~normalize_code c x) + (docstring ~normalize_code c y) + in let cmt_kind = `Doc_comment in let cmt (loc, x) = Cmt.create_docstring x loc in let dropped x = {Cmt.kind= `Dropped (cmt x); cmt_kind} in diff --git a/lib/Normalize_std_ast.mli b/lib/Normalize_std_ast.mli index 37ba7e76d3..c38721d125 100644 --- a/lib/Normalize_std_ast.mli +++ b/lib/Normalize_std_ast.mli @@ -11,10 +11,23 @@ type 'a t = 'a Std_ast.t -val ast : 'a t -> Conf.t -> 'a -> 'a +val ast : 'a t -> normalize_code:(string -> string) -> Conf.t -> 'a -> 'a (** Normalize an AST fragment. *) -val equal : 'a t -> ignore_doc_comments:bool -> Conf.t -> 'a -> 'a -> bool +val equal : + 'a t + -> normalize_code:(string -> string) + -> ignore_doc_comments:bool + -> Conf.t + -> 'a + -> 'a + -> bool (** Compare fragments for equality up to normalization. *) -val moved_docstrings : 'a t -> Conf.t -> 'a -> 'a -> Cmt.error list +val moved_docstrings : + normalize_code:(string -> string) + -> 'a t + -> Conf.t + -> 'a + -> 'a + -> Cmt.error list diff --git a/lib/Parse_with_comments.ml b/lib/Parse_with_comments.ml deleted file mode 100644 index 4a1c48a5bc..0000000000 --- a/lib/Parse_with_comments.ml +++ /dev/null @@ -1,137 +0,0 @@ -(**************************************************************************) -(* *) -(* OCamlFormat *) -(* *) -(* Copyright (c) Facebook, Inc. and its affiliates. *) -(* *) -(* This source code is licensed under the MIT license found in *) -(* the LICENSE file in the root directory of this source tree. *) -(* *) -(**************************************************************************) - -open Migrate_ast - -type 'a with_comments = - {ast: 'a; comments: Cmt.t list; prefix: string; source: Source.t} - -module W = struct - type t = int - - let in_lexer = [1; 2; 3; 14; 29] - - let disable x = -abs x - - let enable x = abs x - - let to_string x = - String.concat ~sep:"" (List.map ~f:(Format.sprintf "%+d") x) -end - -exception Warning50 of (Location.t * Warnings.t) list - -let tokens lexbuf = - let rec loop acc = - match Lexer.token_with_comments lexbuf with - (* The location in lexbuf are invalid for comments *) - | COMMENT (_, loc) as tok -> loop ((tok, loc) :: acc) - | DOCSTRING ds as tok -> loop ((tok, Docstrings.docstring_loc ds) :: acc) - | tok -> ( - let loc = Location.of_lexbuf lexbuf in - let acc = (tok, loc) :: acc in - match tok with EOF -> List.rev acc | _ -> loop acc ) - in - loop [] - -let fresh_lexbuf source = - let lexbuf = Lexing.from_string source in - Location.init_info lexbuf !Location.input_name ; - let hash_bang = - Lexer.skip_hash_bang lexbuf ; - let len = lexbuf.lex_last_pos in - String.sub source ~pos:0 ~len - in - (lexbuf, hash_bang) - -let split_hash_bang source = - let lexbuf = Lexing.from_string source in - Location.init_info lexbuf !Location.input_name ; - Lexer.skip_hash_bang lexbuf ; - let len = lexbuf.lex_last_pos in - let hash_bang = String.sub source ~pos:0 ~len in - let rest = String.sub source ~pos:len ~len:(String.length source - len) in - (rest, hash_bang) - -let parse ?(disable_w50 = false) ?(disable_deprecated = false) parse fragment - (conf : Conf.t) ~input_name ~source = - let warnings = - if conf.opr_opts.quiet.v then List.map ~f:W.disable W.in_lexer else [] - in - let warnings = if disable_w50 then warnings else W.enable 50 :: warnings in - ignore @@ Warnings.parse_options false (W.to_string warnings) ; - let w50 = ref [] in - let t = - let source, hash_bang = split_hash_bang source in - Warning.with_warning_filter - ~filter_warning:(fun loc warn -> - if - Warning.is_unexpected_docstring warn - && conf.opr_opts.comment_check.v - then ( - w50 := (loc, warn) :: !w50 ; - false ) - else not conf.opr_opts.quiet.v ) - ~filter_alert:(fun _loc alert -> - if Warning.is_deprecated_alert alert && disable_deprecated then false - else not conf.opr_opts.quiet.v ) - ~f:(fun () -> - let ocaml_version = conf.opr_opts.ocaml_version.v in - let ast = parse fragment ~ocaml_version ~input_name source in - Warnings.check_fatal () ; - let comments = - let mk_cmt = function - | `Comment txt, loc -> Cmt.create_comment txt loc - | `Docstring txt, loc -> Cmt.create_docstring txt loc - in - List.map ~f:mk_cmt (Lexer.comments ()) - in - let tokens = - (* mld files can not always be lexed using the ocaml lexer *) - let lexbuf, _ = fresh_lexbuf source in - try tokens lexbuf with Lexer.Error _ -> [] - in - let source = Source.create ~text:source ~tokens in - {ast; comments; prefix= hash_bang; source} ) - in - match List.rev !w50 with [] -> t | w50 -> raise (Warning50 w50) - -let parse_ast (conf : Conf.t) fg ~ocaml_version ~input_name s = - let preserve_beginend = Poly.(conf.fmt_opts.exp_grouping.v = `Preserve) in - let prefer_let_puns = - match conf.fmt_opts.letop_punning.v with - | `Always -> Some true - | `Never -> Some false - | `Preserve -> None - in - Extended_ast.Parse.ast fg ~ocaml_version ~preserve_beginend - ~prefer_let_puns ~input_name s - -(** [is_repl_block x] returns whether [x] is a list of REPL phrases and - outputs of the form: - - {v - # let this is = some phrase;; - this is some output - v} *) -let is_repl_block x = - String.length x >= 2 && Char.equal x.[0] '#' && Char.is_whitespace x.[1] - -let parse_toplevel ?disable_w50 ?disable_deprecated (conf : Conf.t) - ~input_name ~source = - if is_repl_block source && conf.fmt_opts.parse_toplevel_phrases.v then - Either.Second - (parse ?disable_w50 ?disable_deprecated (parse_ast conf) - Extended_ast.Repl_file conf ~input_name ~source ) - else - First - (parse ?disable_w50 ?disable_deprecated (parse_ast conf) - Extended_ast.Use_file conf ~input_name ~source ) diff --git a/lib/Parse_with_comments.mli b/lib/Parse_with_comments.mli deleted file mode 100644 index f2fc03ce2b..0000000000 --- a/lib/Parse_with_comments.mli +++ /dev/null @@ -1,63 +0,0 @@ -(**************************************************************************) -(* *) -(* OCamlFormat *) -(* *) -(* Copyright (c) Facebook, Inc. and its affiliates. *) -(* *) -(* This source code is licensed under the MIT license found in *) -(* the LICENSE file in the root directory of this source tree. *) -(* *) -(**************************************************************************) - -type 'a with_comments = - {ast: 'a; comments: Cmt.t list; prefix: string; source: Source.t} - -module W : sig - type t - - val in_lexer : int list - - val disable : int -> t - - val enable : int -> t - - val to_string : t list -> string -end - -exception Warning50 of (Location.t * Warnings.t) list - -val parse : - ?disable_w50:bool - -> ?disable_deprecated:bool - -> ( 'b - -> ocaml_version:Ocaml_version.t - -> input_name:string - -> string - -> 'a ) - -> 'b - -> Conf.t - -> input_name:string - -> source:string - -> 'a with_comments -(** @raise [Warning50] on misplaced documentation comments. *) - -val parse_toplevel : - ?disable_w50:bool - -> ?disable_deprecated:bool - -> Conf.t - -> input_name:string - -> source:string - -> ( Extended_ast.use_file with_comments - , Extended_ast.repl_file with_comments ) - Either.t -(** Variant of {!parse} that uses {!Extended_ast.Parse.toplevel}. This - function handles [conf.parse_toplevel_phrases]. *) - -val parse_ast : - Conf.t - -> 'a Extended_ast.t - -> ocaml_version:Ocaml_version.t - -> input_name:string - -> string - -> 'a -(** Argument to {!parse}. *) diff --git a/lib/Source.ml b/lib/Source.ml index 07f5ebf21e..76b01f9466 100644 --- a/lib/Source.ml +++ b/lib/Source.ml @@ -10,7 +10,7 @@ (**************************************************************************) open Migrate_ast -open Extended_ast +open Parsetree (** Concrete syntax. *) type t = {text: string; tokens: (Parser.token * Location.t) array} diff --git a/lib/Source.mli b/lib/Source.mli index 4d7217040c..9416265e8b 100644 --- a/lib/Source.mli +++ b/lib/Source.mli @@ -9,7 +9,7 @@ (* *) (**************************************************************************) -open Extended_ast +open Parsetree type t diff --git a/lib/Std_ast.ml b/lib/Std_ast.ml index c00c34ca0e..e7e3abaeb5 100644 --- a/lib/Std_ast.ml +++ b/lib/Std_ast.ml @@ -22,22 +22,6 @@ type 'a t = | Module_type : module_type t | Expression : expression t | Pattern : pattern t - (* not implemented *) - | Repl_file : unit t - | Documentation : unit t - -type any_t = Any : 'a t -> any_t [@@unboxed] - -let of_syntax = function - | Syntax.Structure -> Any Structure - | Signature -> Any Signature - | Use_file -> Any Use_file - | Core_type -> Any Core_type - | Module_type -> Any Module_type - | Expression -> Any Expression - | Pattern -> Any Pattern - | Repl_file -> Any Repl_file - | Documentation -> Any Documentation let equal (type a) (_ : a t) : a -> a -> bool = Poly.equal @@ -55,8 +39,6 @@ let map (type a) (x : a t) (m : Ast_mapper.mapper) : a -> a = | Module_type -> m.module_type m | Expression -> m.expr m | Pattern -> m.pat m - | Repl_file -> Fn.id - | Documentation -> Fn.id module Parse = struct let ast (type a) (fg : a t) ~ocaml_version ~input_name str : a = @@ -73,8 +55,6 @@ module Parse = struct | Module_type -> Parse.module_type ~ocaml_version lexbuf | Expression -> Parse.expression ~ocaml_version lexbuf | Pattern -> Parse.pattern ~ocaml_version lexbuf - | Repl_file -> () - | Documentation -> () end module Printast = struct @@ -90,6 +70,4 @@ module Printast = struct | Module_type -> module_type 0 | Expression -> expression 0 | Pattern -> pattern 0 - | Repl_file -> fun _ _ -> () - | Documentation -> fun _ _ -> () end diff --git a/lib/Std_ast.mli b/lib/Std_ast.mli index bc0646da38..a953d77b24 100644 --- a/lib/Std_ast.mli +++ b/lib/Std_ast.mli @@ -9,7 +9,23 @@ (* *) (**************************************************************************) -(** Interface over the AST defined in vendor/ocaml-4.13 *) +(** Standard OCaml parser AST (vendor/parser-standard). + + OCamlformat uses an extended parser for formatting that supports + additional syntax constructs (field punning, begin..end nodes, labeled + tuples, etc.). To verify that formatting preserves semantics, + ocamlformat re-parses both the original and formatted source with the + standard OCaml parser and compares the resulting ASTs. If they differ, + formatting changed the meaning of the program — which is a bug. + + The standard parser is used for this comparison because it represents + what the OCaml compiler sees: two programs are equivalent if the + compiler parses them identically. + + Before comparison the ASTs are run through {!Normalize_std_ast}, which + erases differences that don't affect program meaning (e.g. docstring + placement, attribute ordering) so the equality check focuses on + semantically significant changes. *) open Ocamlformat_parser_standard @@ -25,13 +41,6 @@ type 'a t = | Module_type : module_type t | Expression : expression t | Pattern : pattern t - (* not implemented *) - | Repl_file : unit t - | Documentation : unit t - -type any_t = Any : 'a t -> any_t [@@unboxed] - -val of_syntax : Syntax.t -> any_t module Parse : sig val ast : diff --git a/lib/Translation_unit.ml b/lib/Translation_unit.ml index 64842cf3c3..4d1677ea46 100644 --- a/lib/Translation_unit.ml +++ b/lib/Translation_unit.ml @@ -12,7 +12,6 @@ (** Translation units *) module Location = Migrate_ast.Location -open Parse_with_comments let ( let* ) = Result.( >>= ) @@ -66,7 +65,7 @@ module Error = struct let print_internal_error ~debug ~quiet fmt e = let s = match e with - | `Cannot_parse (Parse_with_comments.Warning50 _) -> + | `Cannot_parse (Extended_ast.Warning50 _) -> "generating invalid comment attachment" | `Cannot_parse _ -> "generating invalid ocaml syntax" | `Ast_changed -> "ast changed" @@ -78,7 +77,7 @@ module Error = struct | `Comment x when not quiet -> Cmt.pp_error fmt x | `Cannot_parse ((Syntaxerr.Error _ | Lexer.Error _) as exn) -> if debug then Location.report_exception fmt exn - | `Cannot_parse (Parse_with_comments.Warning50 _) -> + | `Cannot_parse (Extended_ast.Warning50 _) -> (* Printing the warning is not useful because it doesn't reference the right filename *) () @@ -93,14 +92,15 @@ module Error = struct let reason = match exn with | Syntaxerr.Error _ | Lexer.Error _ -> " (syntax error)" - | Warning50 _ -> " (misplaced documentation comments - warning 50)" + | Extended_ast.Warning50 _ -> + " (misplaced documentation comments - warning 50)" | _ -> "" in Format.fprintf fmt "%s: ignoring %S%s\n%!" exe input_name reason ; match exn with | Syntaxerr.Error _ | Lexer.Error _ -> Location.report_exception fmt exn - | Warning50 l -> + | Extended_ast.Warning50 l -> List.iter l ~f:(fun (l, w) -> Warning.print_warning l w) ; Format.fprintf fmt "@{Hint@}: (Warning 50) This file contains a \ @@ -207,10 +207,9 @@ let strconst_mapper locs = in {Ast_mapper.default_mapper with constant} -let collect_strlocs (type a) (fg : a Extended_ast.t) (ast : a) : - (int * int) list = +let collect_strlocs (type a) (parsed : a Extended_ast.t) : (int * int) list = let locs = ref [] in - let _ = Extended_ast.map fg (strconst_mapper locs) ast in + let _ = Extended_ast.map (strconst_mapper locs) parsed in let compare (c1, _) (c2, _) = Stdlib.compare c1 c2 in List.sort ~compare !locs @@ -223,22 +222,23 @@ let check_remaining_comments cmts = let check_comments (conf : Conf.t) cmts ~old:t_old ~new_:t_new = if conf.opr_opts.comment_check.v then let errors = - let* () = check_remaining_comments cmts in - Normalize_extended_ast.diff_cmts conf t_old.comments t_new.comments + let* () = + match cmts with + | Some cmts -> check_remaining_comments cmts + | None -> Ok () + in + Normalize_extended_ast.diff_cmts conf t_old t_new in match errors with | Ok () -> () | Error e -> internal_error (List.map e ~f:(fun x -> `Comment x)) [] -let format (type ext std) (ext_fg : ext Extended_ast.t) - (std_fg : std Std_ast.t) ?output_file ~input_name ~prev_source - ~ext_parsed ~std_parsed (conf : Conf.t) = +let format ?output_file ~input_name ~prev_source ~ext_parsed (conf : Conf.t) + = Box_debug.enable_stacktraces := conf.opr_opts.debug.v ; - let dump_ast fg ~suffix ast = + let dump_ast ~suffix fmt = if conf.opr_opts.debug.v then - Some - (dump_ast ~input_name ?output_file ~suffix (fun fmt -> - Std_ast.Printast.ast fg fmt ast ) ) + Some (dump_ast ~input_name ?output_file ~suffix fmt) else None in let dump_formatted ?ext ~suffix fmted = @@ -248,34 +248,26 @@ let format (type ext std) (ext_fg : ext Extended_ast.t) in Location.input_name := input_name ; (* iterate until formatting stabilizes *) - let rec print_check ~i ~(conf : Conf.t) ~prev_source ext_t std_t = + let rec print_check ~i ~(conf : Conf.t) ~prev_source ext_t = let format ~box_debug = let open Fmt in - let cmts_t = - Cmts.init ext_fg ~debug:conf.opr_opts.debug.v ext_t.source ext_t.ast - ext_t.comments + let body, cmts = + Fmt_ast.fmt_ast ext_t ~debug:conf.opr_opts.debug.v conf in let contents = with_buffer_formatter ~buffer_size:(String.length prev_source) ( set_margin conf.fmt_opts.margin.v $ set_max_indent conf.fmt_opts.max_indent.v - $ fmt_if - (not (String.is_empty ext_t.prefix)) - (str ext_t.prefix $ force_newline) - $ with_optional_box_debug ~box_debug - (Fmt_ast.fmt_ast ext_fg ~debug:conf.opr_opts.debug.v - ext_t.source cmts_t conf ext_t.ast ) ) + $ with_optional_box_debug ~box_debug body ) in - (contents, cmts_t) + (contents, cmts) in - ( if conf.opr_opts.debug.v then - format ~box_debug:true |> fst - |> dump_formatted ~suffix:"_boxes" ~ext:".html" - |> function - | Some file -> - if i = 1 then Format.eprintf "[DEBUG] Box structure: %s\n" file - | None -> () ) ; + if conf.opr_opts.debug.v then + format ~box_debug:true |> fst + |> dump_formatted ~suffix:"_boxes" ~ext:".html" + |> Option.iter ~f:(fun file -> + if i = 1 then Format.eprintf "[DEBUG] Box structure: %s\n" file ) ; let fmted, cmts_t = format ~box_debug:false in let conf = if conf.opr_opts.debug.v then conf @@ -287,82 +279,67 @@ let format (type ext std) (ext_fg : ext Extended_ast.t) in if String.equal prev_source fmted then ( if conf.opr_opts.debug.v then - check_all_locations Format.err_formatter cmts_t ; + Option.iter cmts_t ~f:(check_all_locations Format.err_formatter) ; if conf.opr_opts.margin_check.v then check_margin conf ~fmted ~filename:(Option.value output_file ~default:input_name) ; - let strlocs = collect_strlocs ext_fg ext_t.ast in + let strlocs = collect_strlocs ext_t in Ok (strlocs, fmted) ) else + let opt_arg name f = + Option.map f ~f:(fun s -> (name, String.sexp_of_t s)) + in let exn_args () = - [("output file", dump_formatted ~suffix:".invalid-ast" fmted)] - |> List.filter_map ~f:(fun (s, f_opt) -> - Option.map f_opt ~f:(fun f -> (s, String.sexp_of_t f)) ) + List.filter_opt + [ opt_arg "output file" + (dump_formatted ~suffix:".invalid-ast" fmted) ] in let* ext_t_new = match - parse (parse_ast conf) ~disable_w50:true ext_fg conf ~input_name - ~source:fmted + Extended_ast.parse ~disable_w50:true + (Extended_ast.kind_of ext_t) + conf ~input_name ~source:fmted with | exception Sys_error msg -> Error (Error.User_error msg) - | exception exn -> internal_error [`Cannot_parse exn] (exn_args ()) - | ext_t_new -> Ok ext_t_new - in - let* std_t_new = - match - parse Std_ast.Parse.ast std_fg conf ~input_name ~source:fmted - with - | exception Sys_error msg -> Error (Error.User_error msg) - | exception Warning50 l -> + | exception Extended_ast.Warning50 l -> internal_error (List.map ~f:(fun x -> `Warning50 x) l) (exn_args ()) | exception exn -> internal_error [`Cannot_parse exn] (exn_args ()) - | std_t_new -> Ok std_t_new + | ext_t_new -> Ok ext_t_new in (* Ast not preserved ? *) - ( if - not - (Normalize_std_ast.equal std_fg conf std_t.ast std_t_new.ast - ~ignore_doc_comments:(not conf.opr_opts.comment_check.v) ) - then - let old_ast = - dump_ast std_fg ~suffix:".old" - (Normalize_std_ast.ast std_fg conf std_t.ast) - in - let new_ast = - dump_ast std_fg ~suffix:".new" - (Normalize_std_ast.ast std_fg conf std_t_new.ast) + let normalize_code = Normalize_extended_ast.normalize_code conf in + ( match + Extended_ast.equivalent ~normalize_code conf ext_t ext_t_new + with + | Ast_preserved -> + dump_ast ~suffix:"" (fun fmt -> Extended_ast.dump fmt ext_t_new) + |> Option.iter ~f:(fun file -> + if i = 1 then Format.eprintf "[DEBUG] AST structure: %s\n" file ) + | (Docstrings_moved _ | Ast_changed) as check -> ( + let dump_normalized t ~suffix = + dump_ast ~suffix (fun fmt -> + Extended_ast.dump_normalized ~normalize_code conf fmt t ) in + let old_ast = dump_normalized ext_t ~suffix:".old" in + let new_ast = dump_normalized ext_t_new ~suffix:".new" in let args ~suffix = - [ ("output file", dump_formatted ~suffix fmted) - ; ("old ast", old_ast) - ; ("new ast", new_ast) ] - |> List.filter_map ~f:(fun (s, f_opt) -> - Option.map f_opt ~f:(fun f -> (s, String.sexp_of_t f)) ) + List.filter_opt + [ opt_arg "output file" (dump_formatted ~suffix fmted) + ; opt_arg "old ast" old_ast + ; opt_arg "new ast" new_ast ] in - if - Normalize_std_ast.equal std_fg ~ignore_doc_comments:true conf - std_t.ast std_t_new.ast - then - let docstrings = - Normalize_std_ast.moved_docstrings std_fg conf std_t.ast - std_t_new.ast - in - let args = args ~suffix:".unequal-docs" in - internal_error - (List.map ~f:(fun x -> `Comment x) docstrings) - args - else - let args = args ~suffix:".unequal-ast" in - internal_error [`Ast_changed] args - else - dump_ast std_fg ~suffix:"" - (Normalize_std_ast.ast std_fg conf std_t_new.ast) - |> function - | Some file -> - if i = 1 then Format.eprintf "[DEBUG] AST structure: %s\n" file - | None -> () ) ; + match check with + | Docstrings_moved docstrings -> + let args = args ~suffix:".unequal-docs" in + internal_error + (List.map ~f:(fun x -> `Comment x) docstrings) + args + | Ast_changed -> + let args = args ~suffix:".unequal-ast" in + internal_error [`Ast_changed] args + | Ast_preserved -> assert false ) ) ; check_comments conf cmts_t ~old:ext_t ~new_:ext_t_new ; (* Too many iteration ? *) if i >= conf.opr_opts.max_iters.v then ( @@ -371,36 +348,23 @@ let format (type ext std) (ext_fg : ext Extended_ast.t) (Unstable {iteration= i; prev= prev_source; next= fmted; input_name} ) ) else (* All good, continue *) - print_check ~i:(i + 1) ~conf ~prev_source:fmted ext_t_new std_t_new + print_check ~i:(i + 1) ~conf ~prev_source:fmted ext_t_new in - try print_check ~i:1 ~conf ~prev_source ext_parsed std_parsed with + try print_check ~i:1 ~conf ~prev_source ext_parsed with | Sys_error msg -> Error (User_error msg) | exn -> Error (Ocamlformat_bug {exn; input_name}) -let parse_result ?disable_w50 f fragment conf ~source ~input_name = - match parse ?disable_w50 f fragment conf ~input_name ~source with - | exception exn -> Error (Error.Invalid_source {exn; input_name}) - | parsed -> Ok parsed - -let parse_and_format (type ext std) (ext_fg : ext Extended_ast.t) - (std_fg : std Std_ast.t) ?output_file ~input_name ~source (conf : Conf.t) +let parse_and_format syntax ?output_file ~input_name ~source (conf : Conf.t) = Location.input_name := input_name ; let line_endings = conf.fmt_opts.line_endings.v in + let (Extended_ast.Kind.Any kind) = Extended_ast.Kind.of_syntax syntax in let* ext_parsed = - parse_result (parse_ast conf) ~disable_w50:true ext_fg conf ~source - ~input_name - in - let* std_parsed = - parse_result Std_ast.Parse.ast std_fg conf ~source ~input_name + match Extended_ast.parse kind conf ~input_name ~source with + | exception exn -> Error (Error.Invalid_source {exn; input_name}) + | parsed -> Ok parsed in let+ strlocs, formatted = - format ext_fg std_fg ?output_file ~input_name ~prev_source:source - ~ext_parsed ~std_parsed conf + format ?output_file ~input_name ~prev_source:source ~ext_parsed conf in Eol_compat.normalize_eol ~exclude_locs:strlocs ~line_endings formatted - -let parse_and_format syntax = - let (Extended_ast.Any ext) = Extended_ast.of_syntax syntax in - let (Std_ast.Any std) = Std_ast.of_syntax syntax in - parse_and_format ext std diff --git a/test/cli/debug.t b/test/cli/debug.t index 9737b4d5c2..27006410f6 100644 --- a/test/cli/debug.t +++ b/test/cli/debug.t @@ -40,69 +40,6 @@ ] [DEBUG] Box structure: .html - AST: - Ptop_def - [ - structure_item (a.ml[2,34+0]..[4,59+7]) - comments - before: (* Intentionally not formatted *) - Pstr_value Nonrec - [ - (a.ml[2,34+0]..[4,59+7]) - comments - before: (* Intentionally not formatted *) is_pun=false - None - pattern (a.ml[2,34+4]..[2,34+6]) - Ppat_construct "()" (a.ml[2,34+4]..[2,34+6]) - None - args - [] - Pfunction_body - expression (a.ml[3,43+2]..[4,59+7]) - Pexp_apply - expression (a.ml[3,43+2]..[3,43+15]) - Pexp_ident "print_endline" (a.ml[3,43+2]..[3,43+15]) - [ - - Nolabel - expression (a.ml[4,59+4]..[4,59+7]) - Pexp_ident "A.x" (a.ml[4,59+4]..[4,59+7]) - ] - ] - ] - - [DEBUG] AST structure: .ast - AST: - Ptop_def - [ - structure_item (a.ml[2,34+0]..[2,34+26]) - comments - before: (* Intentionally not formatted *) - Pstr_value Nonrec - [ - (a.ml[2,34+0]..[2,34+26]) - comments - before: (* Intentionally not formatted *) is_pun=false - None - pattern (a.ml[2,34+4]..[2,34+6]) - Ppat_construct "()" (a.ml[2,34+4]..[2,34+6]) - None - args - [] - Pfunction_body - expression (a.ml[2,34+9]..[2,34+26]) - Pexp_apply - expression (a.ml[2,34+9]..[2,34+22]) - Pexp_ident "print_endline" (a.ml[2,34+9]..[2,34+22]) - [ - - Nolabel - expression (a.ml[2,34+23]..[2,34+26]) - Pexp_ident "A.x" (a.ml[2,34+23]..[2,34+26]) - ] - ] - ] - AST: Ptop_def [ @@ -136,6 +73,7 @@ (* Intentionally not formatted *) let () = print_endline A.x + [DEBUG] AST structure: .ast $ cat > a.ml << EOF > (* before let-binding *) @@ -210,129 +148,6 @@ ] [DEBUG] Box structure: .html - AST: - Ptop_def - [ - structure_item (a.ml[2,25+0]..[6,233+3]) - comments - before: (* before let-binding *) - after: (* after let-binding *) - Pstr_value Nonrec - [ - (a.ml[2,25+0]..[6,233+3]) - comments - before: (* before let-binding *) - after: (* after let-binding *) is_pun=false - None - pattern (a.ml[2,25+4]..[2,25+6]) - Ppat_construct "()" (a.ml[2,25+4]..[2,25+6]) - None - args - [] - Pfunction_body - expression (a.ml[5,90+2]..[6,233+3]) - comments - before: (* before x binding #1 *) - before: (* before x binding #2 *) - Pexp_let Nonrec - loc_in: (a.ml[5,90+140]..[5,90+142]) - comments - before: (* after unit *) - [ - (a.ml[5,90+2]..[5,90+122]) is_pun=false - None - pattern (a.ml[5,90+21]..[5,90+22]) - comments - before: (* before x *) - after: (* after x #1 *) - after: (* after x #2 *) - Ppat_var "x" (a.ml[5,90+21]..[5,90+22]) - comments - before: (* before x *) - after: (* after x #1 *) - after: (* after x #2 *) - args - [] - Pfunction_body - expression (a.ml[5,90+77]..[5,90+122]) - comments - before: (* before unit *) - within: (* within unit #1 *) - within: (* within unit #2 *) - Pexp_construct "()" (a.ml[5,90+77]..[5,90+122]) - comments - before: (* before unit *) - within: (* within unit #1 *) - within: (* within unit #2 *) - None - ] - expression (a.ml[6,233+2]..[6,233+3]) - Pexp_ident "x" (a.ml[6,233+2]..[6,233+3]) - ] - ] - - [DEBUG] AST structure: .ast - AST: - Ptop_def - [ - structure_item (a.ml[2,25+0]..[13,265+3]) - comments - before: (* before let-binding *) - after: (* after let-binding *) - Pstr_value Nonrec - [ - (a.ml[2,25+0]..[13,265+3]) - comments - before: (* before let-binding *) - after: (* after let-binding *) is_pun=false - None - pattern (a.ml[2,25+4]..[2,25+6]) - Ppat_construct "()" (a.ml[2,25+4]..[2,25+6]) - None - args - [] - Pfunction_body - expression (a.ml[5,90+2]..[13,265+3]) - comments - before: (* before x binding #1 *) - before: (* before x binding #2 *) - Pexp_let Nonrec - loc_in: (a.ml[12,260+2]..[12,260+4]) - [ - (a.ml[5,90+2]..[10,210+28]) - comments - after: (* after unit *) is_pun=false - None - pattern (a.ml[5,90+21]..[5,90+22]) - comments - before: (* before x *) - after: (* after x #1 *) - after: (* after x #2 *) - Ppat_var "x" (a.ml[5,90+21]..[5,90+22]) - comments - before: (* before x *) - after: (* after x #1 *) - after: (* after x #2 *) - args - [] - Pfunction_body - expression (a.ml[9,183+4]..[10,210+28]) - comments - before: (* before unit *) - within: (* within unit #1 *) - within: (* within unit #2 *) - Pexp_construct "()" (a.ml[9,183+4]..[10,210+28]) - comments - before: (* before unit *) - within: (* within unit #1 *) - within: (* within unit #2 *) - None - ] - expression (a.ml[13,265+2]..[13,265+3]) - Pexp_ident "x" (a.ml[13,265+2]..[13,265+3]) - ] - ] - AST: Ptop_def [ @@ -408,3 +223,4 @@ in x (* after let-binding *) + [DEBUG] AST structure: .ast diff --git a/test/unit/test_fmt_ast.ml b/test/unit/test_fmt_ast.ml index 071a5e7c5c..069b224f04 100644 --- a/test/unit/test_fmt_ast.ml +++ b/test/unit/test_fmt_ast.ml @@ -9,21 +9,28 @@ let check_updated_test source expected = String.make 1000 '\n' ^ source in let conf = Ocamlformat_lib.Conf.default in - let ast ~input_name ~source = - Ocamlformat_lib.Parse_with_comments.parse - (Ocamlformat_lib.Parse_with_comments.parse_ast conf) - Structure conf ~input_name ~source + let parse_struct ~input_name ~source = + Extended_ast.parse Structure conf ~input_name ~source in - let ast1 = ast ~input_name:"source1" ~source:source1 in - let ast2 = - let ast = ast ~input_name:"source2" ~source:source2 in + let unwrap_structure (parsed : _ Extended_ast.t) = + match parsed with + | Structure {ast; std; cmts} -> (ast, std, cmts) + | _ -> . + in + let ast1 = parse_struct ~input_name:"source1" ~source:source1 in + let _, ast1_std, ast1_cmts = unwrap_structure ast1 in + let ast2_ast = + let parsed = parse_struct ~input_name:"source2" ~source:source2 in + let ast, _, _ = unwrap_structure parsed in let ghostify = { Ocamlformat_parser_extended.Ast_mapper.default_mapper with location= (fun _ loc -> {loc with loc_ghost= true}) } in - {ast with ast= ghostify.structure ghostify ast.ast} + ghostify.structure ghostify ast + in + let mixed = + Extended_ast.Structure {ast= ast2_ast; std= ast1_std; cmts= ast1_cmts} in - let ast_replaced = {ast1 with ast= ast2.ast} in let with_buffer_formatter ~buffer_size k = let buffer = Buffer.create buffer_size in let fs = Format_.formatter_of_buffer buffer in @@ -32,18 +39,16 @@ let check_updated_test source expected = if Buffer.length buffer > 0 then Format_.pp_print_newline fs () ; Buffer.contents buffer in - let print (ast : _ Parse_with_comments.with_comments) = + let print parsed = let open Fmt in let debug = conf.opr_opts.debug.v in + let body, _ = Fmt_ast.fmt_ast parsed ~debug conf in with_buffer_formatter ~buffer_size:1000 ( set_margin conf.fmt_opts.margin.v $ set_max_indent conf.fmt_opts.max_indent.v - $ Fmt_ast.fmt_ast Structure ~debug ast.source - (Ocamlformat_lib.Cmts.init Structure ~debug ast.source ast.ast - ast.comments ) - conf ast.ast ) + $ body ) in - let printed_ast_replaced = String.strip (print ast_replaced) in + let printed_ast_replaced = String.strip (print mixed) in (* Ideally we'd improve two things about this test: - check the new string parses, to the same AST as the original one - use diff --git a/tools/printast/printast.ml b/tools/printast/printast.ml index 509917d82d..a3c25f3f8b 100644 --- a/tools/printast/printast.ml +++ b/tools/printast/printast.ml @@ -1,39 +1,33 @@ open! Stdio open Ocamlformat_lib -let ocaml_version = Ocaml_version.sys_version - -let extended_ast ppf syntax ~input_name content = - let open Extended_ast in - let (Any kind) = of_syntax syntax in - Parse.ast kind ~ocaml_version ~preserve_beginend:true ~prefer_let_puns:None - ~input_name content - |> Printast.ast kind ppf - -let std_ast ppf syntax ~input_name content = - let open Std_ast in - let (Any kind) = of_syntax syntax in - Parse.ast kind ~ocaml_version ~input_name content |> Printast.ast kind ppf - -let get_arg () = +let () = let std = ref false and input = ref None in let opts = [("-std", Arg.Set std, "Use the standard parser")] in let usage = "printast [-std] " in Arg.parse opts (fun inp -> input := Some inp) usage ; - let input = + let inputf = match !input with | Some inp -> inp | None -> Printf.eprintf "Not enough argument\n" ; exit 2 - and parse_and_print = if !std then std_ast else extended_ast in - (parse_and_print, input) - -let () = - let parse_and_print, inputf = get_arg () in + in let syntax = Option.value ~default:Syntax.Use_file (Syntax.of_fname inputf) in Printf.printf "Reading %S\n" inputf ; let content = In_channel.read_all inputf in - parse_and_print Format.std_formatter syntax ~input_name:inputf content + let conf = Conf.default in + let (Extended_ast.Kind.Any kind) = Extended_ast.Kind.of_syntax syntax in + let parsed = + Extended_ast.parse kind conf ~input_name:inputf ~source:content + in + if !std then ( + match Extended_ast.get_std parsed with + | Some (Std_value (std_fg, std_v)) -> + Std_ast.Printast.ast std_fg Format.std_formatter std_v + | None -> + Printf.eprintf "No standard parser for this fragment type\n" ; + exit 1 ) + else Extended_ast.Printast.ast Format.std_formatter parsed