diff --git a/.ocamlformat-ignore b/.ocamlformat-ignore index 05b7efe0e5..20bbb9f361 100644 --- a/.ocamlformat-ignore +++ b/.ocamlformat-ignore @@ -1 +1,3 @@ vendor/** +opam-files/** +opam-files-mll/** \ No newline at end of file diff --git a/dune-project b/dune-project index c3572e2781..83a1843b2d 100644 --- a/dune-project +++ b/dune-project @@ -1,4 +1,4 @@ -(lang dune 2.8) +(lang dune 3.13) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; @@ -13,7 +13,7 @@ (name ocamlformat) -(using menhir 2.1) +(using menhir 3.0) (cram enable) diff --git a/lib/Cmts.ml b/lib/Cmts.ml index 955f369dda..c156bb5309 100644 --- a/lib/Cmts.ml +++ b/lib/Cmts.ml @@ -454,36 +454,40 @@ let is_only_whitespaces s = String.for_all s ~f:Char.is_whitespace module Wrapped = struct let fmt ~pro ~epi text = let open Fmt in - assert (not (String.is_empty text)) ; - let prefix = if String.starts_with_whitespace text then " " else "" - and suffix = if String.ends_with_whitespace text then " " else "" in - let fmt_line line = - let words = - List.filter ~f:(Fn.non String.is_empty) - (String.split_on_chars line - ~on:['\t'; '\n'; '\011'; '\012'; '\r'; ' '] ) + if String.is_empty text then pro $ epi + else + let prefix = if String.starts_with_whitespace text then " " else "" + and suffix = if String.ends_with_whitespace text then " " else "" in + let fmt_line line = + let words = + List.filter ~f:(Fn.non String.is_empty) + (String.split_on_chars line + ~on:['\t'; '\n'; '\011'; '\012'; '\r'; ' '] ) + in + list words space_break str in - list words space_break str - in - let lines = - List.remove_consecutive_duplicates - ~equal:(fun x y -> String.is_empty x && String.is_empty y) - (String.split (String.rstrip text) ~on:'\n') - in - let groups = - List.group lines ~break:(fun _ y -> is_only_whitespaces y) - in - pro $ str prefix - $ hovbox 0 - (list_fl groups (fun ~first ~last:last_group group -> - let group = List.filter group ~f:(Fn.non is_only_whitespaces) in - fmt_if (not first) (str "\n" $ force_newline) - $ hovbox 0 - (list_fl group (fun ~first ~last x -> - fmt_if (not first) space_break - $ fmt_line x - $ fmt_if (last_group && last) (str suffix $ epi) ) ) ) - ) + let lines = + List.remove_consecutive_duplicates + ~equal:(fun x y -> String.is_empty x && String.is_empty y) + (String.split (String.rstrip text) ~on:'\n') + in + let groups = + List.group lines ~break:(fun _ y -> is_only_whitespaces y) + |> List.filter_map ~f:(fun group -> + match List.filter group ~f:(Fn.non is_only_whitespaces) with + | [] -> None + | group -> Some group ) + in + pro $ str prefix + $ hovbox 0 + (list_fl groups (fun ~first ~last:last_group group -> + fmt_if (not first) (str "\n" $ force_newline) + $ hovbox 0 + (list_fl group (fun ~first ~last x -> + fmt_if (not first) space_break + $ fmt_line x + $ fmt_if (last_group && last) (str suffix $ epi) ) ) ) + ) end module Asterisk_prefixed = struct diff --git a/lib/Conf.ml b/lib/Conf.ml index b914234da4..4ec66bbf99 100644 --- a/lib/Conf.ml +++ b/lib/Conf.ml @@ -111,6 +111,7 @@ let conventional_profile from = ; stritem_extension_indent= elt 0 ; type_decl= elt `Compact ; type_decl_indent= elt 2 + ; reformat_mll= elt `Ocaml_block ; wrap_comments= elt false ; wrap_docstrings= elt true ; wrap_fun_args= elt true } @@ -182,6 +183,7 @@ let ocamlformat_profile from = ; stritem_extension_indent= elt 0 ; type_decl= elt `Compact ; type_decl_indent= elt 2 + ; reformat_mll= elt `Ocaml_block ; wrap_comments= elt false ; wrap_docstrings= elt true ; wrap_fun_args= elt true } @@ -252,6 +254,7 @@ let janestreet_profile from = ; stritem_extension_indent= elt 2 ; type_decl= elt `Sparse ; type_decl_indent= elt 2 + ; reformat_mll= elt `Ocaml_block ; wrap_comments= elt false ; wrap_docstrings= elt false ; wrap_fun_args= elt false } @@ -1323,6 +1326,22 @@ module Formatting = struct update conf ~f:(fun f -> {f with type_decl_indent= elt}) ) (fun conf -> conf.fmt_opts.type_decl_indent) + let reformat_mll = + let doc = "How to format .mll (ocamllex) files." in + let names = ["reformat-mll"] in + let all = + [ Decl.Value.make ~name:"ocaml-block" `Ocaml_block + "$(b,ocaml-block) formats only the embedded OCaml code blocks, \ + preserving surrounding syntax and comments." + ; Decl.Value.make ~name:"full" `Full + "$(b,full) reformats the entire .mll file structure." + ; Decl.Value.make ~name:"no" `No + "$(b,no) disables formatting of .mll files." ] + in + Decl.choice ~names ~all ~default ~doc ~kind + (fun conf elt -> update conf ~f:(fun f -> {f with reformat_mll= elt})) + (fun conf -> conf.fmt_opts.reformat_mll) + let wrap_comments = let doc = "Comments are divided into paragraphs by open lines (two or more \ @@ -1395,6 +1414,7 @@ module Formatting = struct ; elt parens_tuple_patterns ; elt parse_docstrings ; elt parse_toplevel_phrases + ; elt reformat_mll ; elt sequence_blank_line ; elt sequence_style ; elt single_case diff --git a/lib/Conf_t.ml b/lib/Conf_t.ml index c332d45cfc..0e909fc58c 100644 --- a/lib/Conf_t.ml +++ b/lib/Conf_t.ml @@ -119,6 +119,7 @@ type fmt_opts = ; stritem_extension_indent: int elt ; type_decl: [`Compact | `Sparse] elt ; type_decl_indent: int elt + ; reformat_mll: [`No | `Ocaml_block | `Full] elt ; wrap_comments: bool elt ; wrap_docstrings: bool elt ; wrap_fun_args: bool elt } diff --git a/lib/Conf_t.mli b/lib/Conf_t.mli index 42a059b513..a655610622 100644 --- a/lib/Conf_t.mli +++ b/lib/Conf_t.mli @@ -117,6 +117,8 @@ type fmt_opts = ; stritem_extension_indent: int elt ; type_decl: [`Compact | `Sparse] elt ; type_decl_indent: int elt + ; reformat_mll: [`No | `Ocaml_block | `Full] elt + (** How to format .mll files. *) ; wrap_comments: bool elt (** Wrap comments at margin. *) ; wrap_docstrings: bool elt ; wrap_fun_args: bool elt } diff --git a/lib/Extended_ast.ml b/lib/Extended_ast.ml index b2121b08c5..42ba2ba4d3 100644 --- a/lib/Extended_ast.ml +++ b/lib/Extended_ast.ml @@ -30,6 +30,7 @@ type 'a t = | Pattern : (pattern, Std_parsetree.pattern) paired t | Repl_file : repl_file t | Documentation : Ocamlformat_odoc_parser.Ast.t t + | Mll_file : Ocamlformat_mll_parser.Mll_ast.lexer_def t type any_t = Any : 'a t -> any_t [@@unboxed] @@ -43,6 +44,7 @@ let of_syntax = function | Pattern -> Any Pattern | Repl_file -> Any Repl_file | Documentation -> Any Documentation + | Mll_file -> Any Mll_file let map (type a) (x : a t) (m : Ast_mapper.mapper) : a -> a = match x with @@ -56,6 +58,30 @@ let map (type a) (x : a t) (m : Ast_mapper.mapper) : a -> a = | Pattern -> fun v -> {v with extended= m.pat m v.extended} | Repl_file -> List.map ~f:(m.repl_phrase m) | Documentation -> Fn.id + | Mll_file -> + fun d -> + let open Ocamlformat_mll_parser.Mll_ast in + let loc l = m.location m l in + let map_located x = {x with loc= loc x.loc} in + let map_code (c : ocaml_code) = map_located c in + let map_code_opt = Option.map ~f:map_code in + let map_named_def nd = + { def_name= map_located nd.def_name + ; def_body= nd.def_body + ; def_loc= loc nd.def_loc } + in + let map_case c = {c with action= map_code c.action} in + let map_entry entry = + { entry_name= map_located entry.entry_name + ; entry_args= List.map ~f:map_located entry.entry_args + ; entry_is_shortest= entry.entry_is_shortest + ; entry_cases= List.map ~f:map_case entry.entry_cases } + in + { header= map_code_opt d.header + ; named_defs= List.map ~f:map_named_def d.named_defs + ; rules= List.map ~f:map_entry d.rules + ; trailer= map_code_opt d.trailer + ; comments= d.comments } module Parse = struct let normalize_mapper ~ocaml_version ~preserve_beginend ~prefer_let_puns = @@ -396,6 +422,8 @@ module Parse = struct let pos = (Location.curr lexbuf).loc_start in let pos = {pos with pos_fname= input_name} in Docstring.parse_file pos str + | Mll_file -> + Ocamlformat_mll_parser.Mll_parser.parse_string ~input_name str end module Printast = struct @@ -415,8 +443,29 @@ module Printast = struct | Pattern -> fun fmt v -> pattern fmt v.extended | Repl_file -> repl_file | Documentation -> Docstring.dump + | Mll_file -> Ocamlformat_mll_parser.Mll_printast.pp end +(* Strip comment delimiters: [(* text *)] -> [text], [/* text */] -> + [text] *) +let strip_comment_delimiters s = + let len = String.length s in + if + len >= 4 + && Char.equal s.[0] '(' + && Char.equal s.[1] '*' + && Char.equal s.[len - 2] '*' + && Char.equal s.[len - 1] ')' + then String.sub s ~pos:2 ~len:(len - 4) + else if + len >= 4 + && Char.equal s.[0] '/' + && Char.equal s.[1] '*' + && Char.equal s.[len - 2] '*' + && Char.equal s.[len - 1] '/' + then String.sub s ~pos:2 ~len:(len - 4) + else s + module Asttypes = struct include Asttypes @@ -425,6 +474,126 @@ module Asttypes = struct let is_recursive = function Recursive -> true | Nonrecursive -> false end +type std_value = Std_value : 'a Std_ast.t * 'a -> std_value + +let get_std (type a) (fg : a t) (v : a) : std_value option = + match fg with + | Structure -> Some (Std_value (Structure, v.std)) + | Signature -> Some (Std_value (Signature, v.std)) + | Use_file -> Some (Std_value (Use_file, v.std)) + | Core_type -> Some (Std_value (Core_type, v.std)) + | Module_type -> Some (Std_value (Module_type, v.std)) + | Expression -> Some (Std_value (Expression, v.std)) + | Pattern -> Some (Std_value (Pattern, v.std)) + | Repl_file -> None + | Documentation -> None + | Mll_file -> None + +type std_pair = Std_pair : 'a Std_ast.t * 'a * 'a -> std_pair + +let get_std_pair (type a) (fg : a t) (v1 : a) (v2 : a) : std_pair option = + match fg with + | Structure -> Some (Std_pair (Structure, v1.std, v2.std)) + | Signature -> Some (Std_pair (Signature, v1.std, v2.std)) + | Use_file -> Some (Std_pair (Use_file, v1.std, v2.std)) + | Core_type -> Some (Std_pair (Core_type, v1.std, v2.std)) + | Module_type -> Some (Std_pair (Module_type, v1.std, v2.std)) + | Expression -> Some (Std_pair (Expression, v1.std, v2.std)) + | Pattern -> Some (Std_pair (Pattern, v1.std, v2.std)) + | Repl_file -> None + | Documentation -> None + | Mll_file -> None + +let dump (type a) (fg : a t) fmt (v : a) = + match get_std fg v with + | Some (Std_value (std_fg, std_v)) -> Std_ast.Printast.ast std_fg fmt std_v + | None -> Printast.ast fg fmt v + +let dump_normalized (type a) (fg : a t) ~normalize_code conf fmt (v : a) = + match get_std fg v with + | Some (Std_value (std_fg, std_v)) -> + Std_ast.Printast.ast std_fg fmt + (Normalize_std_ast.ast std_fg ~normalize_code conf std_v) + | None -> Printast.ast fg fmt v + +type ast_check_result = + | Ast_preserved + | Docstrings_moved of Cmt.error list + | Ast_changed + +let equal_mll ~normalize_code (_conf : Conf.t) + (a : Ocamlformat_mll_parser.Mll_ast.lexer_def) + (b : Ocamlformat_mll_parser.Mll_ast.lexer_def) = + let open Ocamlformat_mll_parser.Mll_ast in + let code_equal (a : ocaml_code) (b : ocaml_code) = + let strip s = + let len = String.length s in + if len >= 2 && Char.equal s.[0] '{' && Char.equal s.[len - 1] '}' then + String.strip (String.sub s ~pos:1 ~len:(len - 2)) + else s + in + let sa = strip a.value and sb = strip b.value in + String.equal sa sb + || String.equal (normalize_code sa) (normalize_code sb) + in + let code_opt_equal a b = + match (a, b) with + | None, None -> true + | Some a, Some b -> code_equal a b + | _ -> false + in + let case_equal a b = + Poly.equal a.pattern b.pattern && code_equal a.action b.action + in + let entry_equal a b = + String.equal a.entry_name.value b.entry_name.value + && List.equal + (fun a b -> String.equal a.value b.value) + a.entry_args b.entry_args + && Bool.equal a.entry_is_shortest b.entry_is_shortest + && List.equal case_equal a.entry_cases b.entry_cases + in + let def_equal a b = + String.equal a.def_name.value b.def_name.value + && Poly.equal a.def_body b.def_body + in + code_opt_equal a.header b.header + && List.equal def_equal a.named_defs b.named_defs + && List.equal entry_equal a.rules b.rules + && code_opt_equal a.trailer b.trailer + +let equivalent (type a) (fg : a t) ~normalize_code conf (old_v : a) + (new_v : a) : ast_check_result = + match get_std_pair fg old_v new_v with + | None -> ( + match fg with + | Mll_file -> + if equal_mll ~normalize_code conf old_v new_v then Ast_preserved + else Ast_changed + | _ -> + (* TODO: Repl_file and Documentation have no std AST, so we skip the + equivalence check. - Repl_file: each toplevel phrase is OCaml code + that could be validated individually by parsing it with the + standard parser. - Documentation: OCaml code blocks inside .mld + files are formatted but never validated for AST preservation. We + should check each formatted code block by parsing it with the + standard parser and comparing. *) + Ast_preserved ) + | Some (Std_pair (std_fg, old_std, new_std)) -> + if + Normalize_std_ast.equal std_fg ~normalize_code + ~ignore_doc_comments:(not conf.Conf.opr_opts.comment_check.v) + conf old_std new_std + then Ast_preserved + else if + Normalize_std_ast.equal std_fg ~normalize_code + ~ignore_doc_comments:true conf old_std new_std + then + Docstrings_moved + (Normalize_std_ast.moved_docstrings ~normalize_code std_fg conf + old_std new_std ) + else Ast_changed + module Parsed = struct type 'a t = {ast: 'a; comments: Cmt.t list; prefix: string; source: Source.t} @@ -540,6 +709,17 @@ let parse (type a) ?disable_w50 ?disable_deprecated (fg : a t) conf ; comments= [] ; prefix= "" ; source= Source.create ~text:source ~tokens:[] } + | Mll_file -> + let open Ocamlformat_mll_parser in + let ast = Mll_parser.parse_string ~input_name source in + let comments = + List.map ast.Mll_ast.comments ~f:(fun (c : Mll_ast.ocaml_code) -> + Cmt.create_comment (strip_comment_delimiters c.value) c.loc ) + in + { Parsed.ast + ; comments + ; prefix= "" + ; source= Source.create ~text:source ~tokens:[] } | fg -> parse_ocaml ?disable_w50 ?disable_deprecated fg conf ~input_name ~source @@ -565,74 +745,3 @@ let parse_toplevel ?disable_w50 ?disable_deprecated (conf : Conf.t) (parse ?disable_w50 ?disable_deprecated Use_file conf ~input_name ~source ) -type std_value = Std_value : 'a Std_ast.t * 'a -> std_value - -let get_std (type a) (fg : a t) (v : a) : std_value option = - match fg with - | Structure -> Some (Std_value (Structure, v.std)) - | Signature -> Some (Std_value (Signature, v.std)) - | Use_file -> Some (Std_value (Use_file, v.std)) - | Core_type -> Some (Std_value (Core_type, v.std)) - | Module_type -> Some (Std_value (Module_type, v.std)) - | Expression -> Some (Std_value (Expression, v.std)) - | Pattern -> Some (Std_value (Pattern, v.std)) - | Repl_file -> None - | Documentation -> None - -type std_pair = Std_pair : 'a Std_ast.t * 'a * 'a -> std_pair - -let get_std_pair (type a) (fg : a t) (v1 : a) (v2 : a) : std_pair option = - match fg with - | Structure -> Some (Std_pair (Structure, v1.std, v2.std)) - | Signature -> Some (Std_pair (Signature, v1.std, v2.std)) - | Use_file -> Some (Std_pair (Use_file, v1.std, v2.std)) - | Core_type -> Some (Std_pair (Core_type, v1.std, v2.std)) - | Module_type -> Some (Std_pair (Module_type, v1.std, v2.std)) - | Expression -> Some (Std_pair (Expression, v1.std, v2.std)) - | Pattern -> Some (Std_pair (Pattern, v1.std, v2.std)) - | Repl_file -> None - | Documentation -> None - -let dump (type a) (fg : a t) fmt (v : a) = - match get_std fg v with - | Some (Std_value (std_fg, std_v)) -> Std_ast.Printast.ast std_fg fmt std_v - | None -> Printast.ast fg fmt v - -let dump_normalized (type a) (fg : a t) ~normalize_code conf fmt (v : a) = - match get_std fg v with - | Some (Std_value (std_fg, std_v)) -> - Std_ast.Printast.ast std_fg fmt - (Normalize_std_ast.ast std_fg ~normalize_code conf std_v) - | None -> Printast.ast fg fmt v - -type ast_check_result = - | Ast_preserved - | Docstrings_moved of Cmt.error list - | Ast_changed - -let equivalent (type a) (fg : a t) ~normalize_code conf (old_v : a) - (new_v : a) : ast_check_result = - match get_std_pair fg old_v new_v with - | None -> - (* TODO: Repl_file and Documentation have no std AST, so we skip the - equivalence check. - - - Repl_file: could validate each toplevel phrase individually. - - - Documentation: could check each formatted code block for AST - preservation. *) - Ast_preserved - | Some (Std_pair (std_fg, old_std, new_std)) -> - if - Normalize_std_ast.equal std_fg ~normalize_code - ~ignore_doc_comments:(not conf.Conf.opr_opts.comment_check.v) - conf old_std new_std - then Ast_preserved - else if - Normalize_std_ast.equal std_fg ~normalize_code - ~ignore_doc_comments:true conf old_std new_std - then - Docstrings_moved - (Normalize_std_ast.moved_docstrings ~normalize_code std_fg conf - old_std new_std ) - else Ast_changed diff --git a/lib/Extended_ast.mli b/lib/Extended_ast.mli index bc2a988f07..3b3670f506 100644 --- a/lib/Extended_ast.mli +++ b/lib/Extended_ast.mli @@ -37,6 +37,7 @@ type 'a t = | Pattern : (pattern, Std_parsetree.pattern) paired t | Repl_file : repl_file t | Documentation : Ocamlformat_odoc_parser.Ast.t t + | Mll_file : Ocamlformat_mll_parser.Mll_ast.lexer_def t type any_t = Any : 'a t -> any_t [@@unboxed] diff --git a/lib/Fmt_ast.ml b/lib/Fmt_ast.ml index 9b285e3187..0025c1e132 100644 --- a/lib/Fmt_ast.ml +++ b/lib/Fmt_ast.ml @@ -5097,8 +5097,8 @@ module Chunk = struct fmt fg c ctx @@ split fg l ~state end -let fmt_file (type a) ~ctx ~fmt_code ~debug (fragment : a Extended_ast.t) - source cmts conf (itms : a) = +let fmt_file (type a) ~ctx ~fmt_code ~fmt_code_structure ~debug + (fragment : a Extended_ast.t) source cmts conf (itms : a) = let c = {source; cmts; conf; debug; fmt_code} in match (fragment, itms) with | Structure, {extended= []; _} @@ -5122,14 +5122,36 @@ let fmt_file (type a) ~ctx ~fmt_code ~debug (fragment : a Extended_ast.t) (* TODO: [source] and [cmts] should have never been computed when formatting doc. *) Fmt_odoc.fmt_ast c.conf ~fmt_code:c.fmt_code d + | Mll_file, d when Poly.(c.conf.fmt_opts.reformat_mll.v = `Ocaml_block) -> + let codes = Fmt_mll.collect_ocaml_codes d in + let blocks = + List.map codes + ~f:(fun + ({Ocamlformat_mll_parser.Mll_ast.value; loc}, block_level) -> + {Fmt_inplace.value; loc; block_level} ) + in + let s = + Fmt_inplace.format_inplace ~source:c.source ~fmt_code:c.fmt_code + ~fmt_code_structure ~conf:c.conf ~menhir_mode:false ~cmts:c.cmts + blocks + in + (* Strip trailing newline: with_buffer_formatter will add one *) + str + ( if String.is_suffix s ~suffix:"\n" then + String.prefix s (String.length s - 1) + else s ) + | Mll_file, d -> + Fmt_mll.fmt_lexer_def c.conf ~cmts:c.cmts ~fmt_code:c.fmt_code + ~fmt_code_structure d let fmt_parse_result conf ~debug ast_kind ast source comments - ~set_margin:set_margin_p ~fmt_code = + ~set_margin:set_margin_p ~fmt_code ~fmt_code_structure = let cmts = Cmts.init ast_kind ~debug source ast comments in let ctx = Top in let code = fmt_if set_margin_p (set_margin conf.Conf.fmt_opts.margin.v) $ fmt_file ~ctx ~debug ast_kind source cmts conf ast ~fmt_code + ~fmt_code_structure in Ok code @@ -5149,10 +5171,10 @@ let fmt_code ~debug = with | Either.First {Parsed.ast; comments; source; prefix= _} -> fmt_parse_result conf ~debug Use_file ast source comments ~set_margin - ~fmt_code + ~fmt_code ~fmt_code_structure | Second {Parsed.ast; comments; source; prefix= _} -> fmt_parse_result conf ~debug Repl_file ast source comments - ~set_margin ~fmt_code + ~set_margin ~fmt_code ~fmt_code_structure | exception Syntaxerr.Error (Expecting (_, x)) when warn -> Error (`Msg (Format.asprintf "expecting: %s" x)) | exception Syntaxerr.Error (Not_expecting (_, x)) when warn -> @@ -5161,12 +5183,28 @@ let fmt_code ~debug = Error (`Msg (Format.asprintf "invalid toplevel or OCaml syntax")) | exception e when warn -> Error (`Msg (Format.asprintf "%a" Exn.pp e)) | exception _ -> Error (`Msg "") + and fmt_code_structure (conf : Conf.t) ~offset ~set_margin s = + let {Conf.fmt_opts; _} = conf in + let conf = + let margin = {fmt_opts.margin with v= fmt_opts.margin.v - offset} in + {conf with fmt_opts= {fmt_opts with margin}} + in + let input_name = !Location.input_name in + match + Extended_ast.parse ~disable_deprecated:true Structure conf ~input_name + ~source:s + with + | {Parsed.ast; comments; source; prefix= _} -> + fmt_parse_result conf ~debug Structure ast source comments + ~set_margin ~fmt_code ~fmt_code_structure + | exception _ -> Error (`Msg "") in - fmt_code + (fmt_code, fmt_code_structure) let fmt_ast fragment ~debug source cmts conf l = (* [Ast.init] should be called only once per file. In particular, we don't want to call it when formatting comments *) Ast.init conf ; - let fmt_code = fmt_code ~debug in - fmt_file ~ctx:Top ~fmt_code ~debug fragment source cmts conf l + let fmt_code, fmt_code_structure = fmt_code ~debug in + fmt_file ~ctx:Top ~fmt_code ~fmt_code_structure ~debug fragment source cmts + conf l diff --git a/lib/Fmt_inplace.ml b/lib/Fmt_inplace.ml new file mode 100644 index 0000000000..82d8bcd6f7 --- /dev/null +++ b/lib/Fmt_inplace.ml @@ -0,0 +1,133 @@ +type fmt_code = + Conf.t + -> offset:int + -> set_margin:bool + -> string + -> (Fmt.t, [`Msg of string]) Result.t + +(** Render a [Fmt.t] to a string with a given margin. *) +let render_fmt ~margin t = + let buffer = Buffer.create 256 in + let fs = Format_.formatter_of_buffer buffer in + Fmt.eval fs (Fmt.( $ ) (Fmt.set_margin margin) t) ; + Format_.pp_print_flush fs () ; + Buffer.contents buffer + +(** Detect and strip delimiters from an OCaml code block. Returns + [(inner_code, prefix_str, suffix_str, delimiter_len)]. *) +let strip_delimiters s = + let len = String.length s in + if + len >= 4 + && Char.equal s.[0] '%' + && Char.equal s.[1] '{' + && Char.equal s.[len - 2] '%' + && Char.equal s.[len - 1] '}' + then + (* %{ ... %} *) + (String.sub s ~pos:2 ~len:(len - 4), "%{", "%}", 2) + else if len >= 2 && Char.equal s.[0] '{' && Char.equal s.[len - 1] '}' then + (* { ... } *) + (String.sub s ~pos:1 ~len:(len - 2), "{", "}", 1) + else + (* No delimiters (e.g. mly trailer) *) + (s, "", "", 0) + +(** A code block with its source text and position info. *) +type block = {value: string; loc: Location.t; block_level: bool} + +(** Find the column of the first non-whitespace character on the line + starting at byte offset [bol] in [source]. *) +let line_indent source bol = + let len = String.length source in + let rec loop i = + if i >= len then i - bol + else match source.[i] with ' ' | '\t' -> loop (i + 1) | _ -> i - bol + in + loop bol + +(** Format a single OCaml code block and return the replacement string, + or [None] if formatting fails. *) +let format_block ~source ~fmt_code ~fmt_code_structure ~(conf : Conf.t) + ~menhir_mode (block : block) = + let inner, open_delim, close_delim, _delim_len = + strip_delimiters block.value + in + let trimmed = String.strip inner in + if String.is_empty trimmed then None + else + let line_ind = line_indent source block.loc.loc_start.pos_bol in + let offset = if block.block_level then 0 else line_ind + 2 in + let _ = menhir_mode in + let fmt = if block.block_level then fmt_code_structure else fmt_code in + match fmt conf ~offset ~set_margin:false trimmed with + | Ok fmt_t -> + let margin = max 2 (conf.fmt_opts.margin.v - offset) in + let formatted = render_fmt ~margin fmt_t in + let formatted = String.rstrip formatted in + let result = + if String.is_empty open_delim then + (* No delimiters: trailer — preserve leading newline *) + let prefix = + if + String.length block.value > 0 + && Char.equal block.value.[0] '\n' + then "\n" + else "" + in + prefix ^ formatted + else if block.block_level then + (* Header block (%{ %} or { } at top level) — no indentation *) + open_delim ^ "\n" ^ formatted ^ "\n" ^ close_delim + else if String.mem formatted '\n' then + (* Multi-line action: indent based on first token of the line *) + let indent = String.make (line_ind + 2) ' ' in + let close_indent = String.make line_ind ' ' in + let lines = String.split_lines formatted in + let indented = String.concat ~sep:("\n" ^ indent) lines in + open_delim ^ "\n" ^ indent ^ indented ^ "\n" ^ close_indent + ^ close_delim + else + (* Single-line action: inline style *) + open_delim ^ " " ^ formatted ^ " " ^ close_delim + in + Some result + | Error _ -> None + +let format_inplace ~source ~fmt_code ~fmt_code_structure ~conf ~menhir_mode + ~cmts blocks = + let source = Source.text source in + let drop_comments_inside = Cmts.drop_inside cmts in + let sorted = + List.sort blocks ~compare:(fun (b1 : block) (b2 : block) -> + Int.compare b1.loc.loc_start.pos_cnum b2.loc.loc_start.pos_cnum ) + in + let mk_loc pos_start pos_end = + let mk pos_cnum = + {Lexing.pos_fname= ""; pos_lnum= 0; pos_bol= 0; pos_cnum} + in + {Location.loc_start= mk pos_start; loc_end= mk pos_end; loc_ghost= true} + in + let buf = Buffer.create (String.length source) in + let pos = + List.fold sorted ~init:0 ~f:(fun pos block -> + let start = block.loc.loc_start.pos_cnum in + let stop = block.loc.loc_end.pos_cnum in + (* Copy verbatim text before this block and drop its comments *) + Buffer.add_string buf (String.sub source ~pos ~len:(start - pos)) ; + drop_comments_inside (mk_loc pos start) ; + ( match + format_block ~source ~fmt_code ~fmt_code_structure ~conf + ~menhir_mode block + with + | Some replacement -> Buffer.add_string buf replacement + | None -> + Buffer.add_string buf + (String.sub source ~pos:start ~len:(stop - start)) ) ; + stop ) + in + (* Copy remaining text after the last block *) + let len = String.length source in + Buffer.add_string buf (String.sub source ~pos ~len:(len - pos)) ; + drop_comments_inside (mk_loc pos len) ; + Buffer.contents buf diff --git a/lib/Fmt_inplace.mli b/lib/Fmt_inplace.mli new file mode 100644 index 0000000000..90f78b9e5f --- /dev/null +++ b/lib/Fmt_inplace.mli @@ -0,0 +1,18 @@ +type fmt_code = + Conf.t + -> offset:int + -> set_margin:bool + -> string + -> (Fmt.t, [`Msg of string]) Result.t + +type block = {value: string; loc: Location.t; block_level: bool} + +val format_inplace : + source:Source.t + -> fmt_code:fmt_code + -> fmt_code_structure:fmt_code + -> conf:Conf.t + -> menhir_mode:bool + -> cmts:Cmts.t + -> block list + -> string diff --git a/lib/Fmt_mll.ml b/lib/Fmt_mll.ml new file mode 100644 index 0000000000..00f82f47b2 --- /dev/null +++ b/lib/Fmt_mll.ml @@ -0,0 +1,160 @@ +open Fmt +open Ocamlformat_mll_parser.Mll_ast + +type fmt_code = + Conf.t + -> offset:int + -> set_margin:bool + -> string + -> (Fmt.t, [`Msg of string]) Result.t + +type c = + { fmt_code: fmt_code + ; fmt_code_structure: fmt_code + ; conf: Conf.t + ; cmts: Cmts.t } + +(* Strip delimiters from ocaml code block: { code } -> code *) +let strip_braces s = + let len = String.length s in + if len >= 2 && Char.equal s.[0] '{' && Char.equal s.[len - 1] '}' then + String.sub s ~pos:1 ~len:(len - 2) + else s + +let fmt_ocaml_code c ~offset code = + let raw = strip_braces code in + let trimmed = String.strip raw in + if String.is_empty trimmed then str "{ }" + else + match + c.fmt_code c.conf ~offset:(offset + 2) ~set_margin:false trimmed + with + | Ok formatted -> hvbox 2 (str "{ " $ formatted $ str " }") + | Error _ -> str code + +let fmt_ocaml_block c code = + let raw = strip_braces code in + let trimmed = String.strip raw in + if String.is_empty trimmed then str "{" $ force_newline $ str "}" + else + match + c.fmt_code_structure c.conf ~offset:0 ~set_margin:false trimmed + with + | Ok formatted -> + vbox 0 (str "{" $ force_newline $ formatted $ force_newline $ str "}") + | Error _ -> str code + +let fmt_char_entry = function + | Cchar c -> str c + | Crange (c1, c2) -> str c1 $ str "-" $ str c2 + +let rec fmt_regexp = function + | Reof -> str "eof" + | Runderscore -> str "_" + | Rchar c -> str c + | Rstring s -> str s + | Rident id -> str id + | Rsequence rs -> list rs (str " ") fmt_regexp + | Ralternative rs -> list rs (str " | ") fmt_regexp + | Rrepeat r -> fmt_regexp r $ str "*" + | Rplus r -> fmt_regexp r $ str "+" + | Roption r -> fmt_regexp r $ str "?" + | Rparen r -> str "(" $ fmt_regexp r $ str ")" + | Rcharset entries -> + str "[" $ list entries (str " ") fmt_char_entry $ str "]" + | Rcharset_neg entries -> + str "[^" $ list entries (str " ") fmt_char_entry $ str "]" + | Ras (r, id) -> fmt_regexp r $ str " as " $ str id + | Rhash (r1, r2) -> fmt_regexp r1 $ str " # " $ fmt_regexp r2 + +let fmt_named_def _c (d : named_def) = + hovbox 2 + ( str "let " $ str d.def_name.value $ str " =" $ break 1 2 + $ fmt_regexp d.def_body ) + +let fmt_before c loc = Cmts.fmt_before c.cmts c.conf ~fmt_code:c.fmt_code loc + +let fmt_after c loc = Cmts.fmt_after c.cmts c.conf ~fmt_code:c.fmt_code loc + +let fmt_case c (case : rule_case) = + hovbox 2 + ( str "| " $ fmt_regexp case.pattern $ break 1 4 + $ lazy_ (fun () -> fmt_before c case.action.loc) + $ fmt_ocaml_code c ~offset:4 case.action.value ) + +let fmt_rule_entry c (entry : rule_entry) = + let args = + match entry.entry_args with + | [] -> noop + | args -> + str " " + $ list args (str " ") (fun arg -> + fmt_before c arg.loc $ str arg.value $ fmt_after c arg.loc ) + in + let kind = if entry.entry_is_shortest then "shortest" else "parse" in + vbox 2 + ( fmt_before c entry.entry_name.loc + $ str entry.entry_name.value + $ fmt_after c entry.entry_name.loc + $ args $ str " = " $ str kind $ force_newline + $ + let prev_action_loc = ref None in + list_fl entry.entry_cases (fun ~first:_ ~last case -> + lazy_ (fun () -> + let drain_prev = + match !prev_action_loc with + | Some loc -> fmt_after c loc + | None -> noop + in + prev_action_loc := Some case.action.loc ; + drain_prev $ fmt_before c case.action.loc ) + $ fmt_case c case + $ fmt_if last (lazy_ (fun () -> fmt_after c case.action.loc)) + $ fmt_if (not last) force_newline ) ) + +let collect_ocaml_codes (def : lexer_def) = + let codes = ref [] in + Option.iter def.header ~f:(fun h -> codes := (h, true) :: !codes) ; + List.iter def.rules ~f:(fun entry -> + List.iter entry.entry_cases ~f:(fun case -> + codes := (case.action, false) :: !codes ) ) ; + Option.iter def.trailer ~f:(fun t -> codes := (t, true) :: !codes) ; + List.rev !codes + +let fmt_lexer_def conf ~cmts ~fmt_code ~fmt_code_structure (def : lexer_def) + = + let c = {fmt_code; fmt_code_structure; conf; cmts} in + vbox 0 + ( (* Header *) + ( match def.header with + | Some h -> + fmt_before c h.loc $ fmt_ocaml_block c h.value + $ fmt_after c h.loc $ force_newline + | None -> noop ) + $ force_newline + $ + (* Named definitions *) + ( match def.named_defs with + | [] -> noop + | defs -> + list defs force_newline (fun (d : named_def) -> + lazy_ (fun () -> + fmt_before c d.def_loc $ fmt_before c d.def_name.loc ) + $ fmt_named_def c d + $ fmt_after c d.def_name.loc + $ fmt_after c d.def_loc ) + $ force_newline $ force_newline ) + $ + (* Rules *) + ( match def.rules with + | [] -> noop + | first :: rest -> + str "rule " $ fmt_rule_entry c first + $ list rest noop (fun entry -> + force_newline $ force_newline $ str "and " + $ fmt_rule_entry c entry ) ) + $ + (* Trailer *) + opt def.trailer (fun t -> + force_newline $ force_newline $ fmt_before c t.loc + $ fmt_ocaml_block c t.value $ fmt_after c t.loc ) ) diff --git a/lib/Fmt_mll.mli b/lib/Fmt_mll.mli new file mode 100644 index 0000000000..57824ee472 --- /dev/null +++ b/lib/Fmt_mll.mli @@ -0,0 +1,20 @@ +type fmt_code = + Conf.t + -> offset:int + -> set_margin:bool + -> string + -> (Fmt.t, [`Msg of string]) Result.t + +val collect_ocaml_codes : + Ocamlformat_mll_parser.Mll_ast.lexer_def + -> (Ocamlformat_mll_parser.Mll_ast.ocaml_code * bool) list +(** Returns [(code, block_level)] pairs. [block_level] is true for + headers and trailers, false for rule actions. *) + +val fmt_lexer_def : + Conf.t + -> cmts:Cmts.t + -> fmt_code:fmt_code + -> fmt_code_structure:fmt_code + -> Ocamlformat_mll_parser.Mll_ast.lexer_def + -> Fmt.t diff --git a/lib/Source.ml b/lib/Source.ml index 76b01f9466..aaf95565c6 100644 --- a/lib/Source.ml +++ b/lib/Source.ml @@ -24,6 +24,8 @@ let create ~text ~tokens = in {text; tokens= Array.of_list tokens} +let text t = t.text + let string_at t (l : Location.t) = let pos = l.loc_start.Lexing.pos_cnum and len = Position.distance l.loc_start l.loc_end in diff --git a/lib/Source.mli b/lib/Source.mli index 9416265e8b..bb930b4b20 100644 --- a/lib/Source.mli +++ b/lib/Source.mli @@ -15,6 +15,8 @@ type t val create : text:string -> tokens:(Parser.token * Location.t) list -> t +val text : t -> string + val empty_line_between : t -> Lexing.position -> Lexing.position -> bool (** [empty_line_between t p1 p2] is [true] if there is an empty line between [p1] and [p2]. The lines containing [p1] and [p2] are not considered diff --git a/lib/Std_longident.ml b/lib/Std_longident.ml index 7626d853d0..78076be29e 100644 --- a/lib/Std_longident.ml +++ b/lib/Std_longident.ml @@ -17,6 +17,7 @@ module Indexing_op = struct ; brackets: Asttypes.paren_kind ; extended: bool (** eg. [.*{;..}] *) ; has_rhs: bool (** eg. [.*{}<-] *) } + [@@warning "-69"] let parse ident = match String.chop_prefix ~prefix:"." ident with diff --git a/lib/Syntax.ml b/lib/Syntax.ml index 3f059153f7..5aeb4e506a 100644 --- a/lib/Syntax.ml +++ b/lib/Syntax.ml @@ -19,10 +19,12 @@ type t = | Pattern | Repl_file | Documentation + | Mll_file let of_fname fname = match Filename.extension fname with | ".ml" | ".mlt" | ".eliom" -> Some Use_file | ".mli" | ".eliomi" -> Some Signature | ".mld" -> Some Documentation + | ".mll" -> Some Mll_file | _ -> None diff --git a/lib/Syntax.mli b/lib/Syntax.mli index 1e201ebe85..0d4c242bf8 100644 --- a/lib/Syntax.mli +++ b/lib/Syntax.mli @@ -19,6 +19,7 @@ type t = | Pattern | Repl_file | Documentation + | Mll_file val of_fname : string -> t option (** The expected syntax of a file given its name. *) diff --git a/lib/Translation_unit.ml b/lib/Translation_unit.ml index 848b20d25d..9499cd0727 100644 --- a/lib/Translation_unit.ml +++ b/lib/Translation_unit.ml @@ -57,11 +57,9 @@ module Error = struct let n = Filename.temp_file input_name (Printf.sprintf ".next%s" ext) in Out_channel.write_all p ~data:prev ; Out_channel.write_all n ~data:next ; - ignore - (Stdlib.Sys.command - (Printf.sprintf "git diff --no-index -u %S %S | sed '1,4d' 1>&2" p n) ) ; - Stdlib.Sys.remove p ; - Stdlib.Sys.remove n + Format.eprintf "git diff --no-index -u %S %S \n" p n ; + if false then Stdlib.Sys.remove p ; + if false then Stdlib.Sys.remove n let print_internal_error ~debug ~quiet fmt e = let s = @@ -95,9 +93,18 @@ module Error = struct | Syntaxerr.Error _ | Lexer.Error _ -> " (syntax error)" | Extended_ast.Warning50 _ -> " (misplaced documentation comments - warning 50)" + | Ocamlformat_mll_parser.Mll_parser.Parse_error _ + |Ocamlformat_mll_parser.Mll_lexer.Lexer_error _ -> + " (syntax error)" | _ -> "" in Format.fprintf fmt "%s: ignoring %S%s\n%!" exe input_name reason ; + let fmt_pos fmt (pos : Lexing.position) = + Format.fprintf fmt "File %S, line %d, characters %d-%d" + pos.pos_fname pos.pos_lnum + (pos.pos_cnum - pos.pos_bol) + (pos.pos_cnum - pos.pos_bol) + in match exn with | Syntaxerr.Error _ | Lexer.Error _ -> Location.report_exception fmt exn @@ -113,9 +120,13 @@ module Error = struct (though it might not be consistent with the ocaml compilers \ and odoc), you can set the --no-comment-check option.\n\ %!" + | Ocamlformat_mll_parser.Mll_parser.Parse_error (msg, pos) -> + Format.fprintf fmt "%a:\nError: %s\n%!" fmt_pos pos msg + | Ocamlformat_mll_parser.Mll_lexer.Lexer_error (msg, pos) -> + Format.fprintf fmt "%a:\nError: %s\n%!" fmt_pos pos msg | exn -> Format.fprintf fmt "%s\n%!" (Exn.to_string exn) ) | Unstable {iteration; prev; next; input_name} -> - if debug then print_diff input_name ~prev ~next ; + if debug || true then print_diff input_name ~prev ~next ; if iteration <= 1 then Format.fprintf fmt "%s: %S was not already formatted. ([max-iters = 1])\n%!" exe @@ -374,14 +385,18 @@ let parse_and_format (type ext) (ext_fg : ext Extended_ast.t) ?output_file ~input_name ~source (conf : Conf.t) = Location.input_name := input_name ; let line_endings = conf.fmt_opts.line_endings.v in - let* ext_parsed = - parse_result ~disable_w50:false ext_fg conf ~source ~input_name - in - let+ strlocs, formatted = - format ext_fg ?output_file ~input_name ~prev_source:source ~ext_parsed - conf - in - Eol_compat.normalize_eol ~exclude_locs:strlocs ~line_endings formatted + match ext_fg with + | Extended_ast.Mll_file when Poly.(conf.fmt_opts.reformat_mll.v = `No) -> + Ok source + | _ -> + let* ext_parsed = + parse_result ~disable_w50:false ext_fg conf ~source ~input_name + in + let+ strlocs, formatted = + format ext_fg ?output_file ~input_name ~prev_source:source + ~ext_parsed conf + in + Eol_compat.normalize_eol ~exclude_locs:strlocs ~line_endings formatted let parse_and_format syntax = let (Extended_ast.Any ext) = Extended_ast.of_syntax syntax in diff --git a/lib/bin_conf/Bin_conf.ml b/lib/bin_conf/Bin_conf.ml index 5c357c4a5a..043e00374a 100644 --- a/lib/bin_conf/Bin_conf.ml +++ b/lib/bin_conf/Bin_conf.ml @@ -200,10 +200,14 @@ let kind = in let doc = "Parse input as an odoc documentation." in let doc_file = (Some Syntax.Documentation, Arg.info ["doc"] ~doc ~docs) in + let doc = "Parse input as a .mll lexer file." in + let mll_file = (Some Syntax.Mll_file, Arg.info ["mll"] ~doc ~docs) in let default = None in declare_option ~set:(fun kind conf -> {conf with kind}) - Arg.(value & vflag default [impl; intf; use_file; repl_file; doc_file]) + Arg.( + value + & vflag default [impl; intf; use_file; repl_file; doc_file; mll_file] ) let name = let docv = "NAME" in diff --git a/lib/dune b/lib/dune index 1281db301d..98c6d47066 100644 --- a/lib/dune +++ b/lib/dune @@ -33,6 +33,7 @@ ocaml-version ocamlformat_format ocamlformat_ocaml_common + ocamlformat_mll_parser ocamlformat_odoc_parser ocamlformat_parser_extended ocamlformat_parser_standard diff --git a/ocamlformat-bench.opam b/ocamlformat-bench.opam index 4ef1dee209..63de4e56ad 100644 --- a/ocamlformat-bench.opam +++ b/ocamlformat-bench.opam @@ -18,7 +18,7 @@ authors: [ homepage: "https://github.com/ocaml-ppx/ocamlformat" bug-reports: "https://github.com/ocaml-ppx/ocamlformat/issues" depends: [ - "dune" {>= "2.8"} + "dune" {>= "3.13"} "ocaml" {>= "4.08"} "alcotest" {with-test & >= "1.3.0"} "bechamel" {>= "0.2.0"} diff --git a/ocamlformat-lib.opam b/ocamlformat-lib.opam index bff1c298e8..64cd90a416 100644 --- a/ocamlformat-lib.opam +++ b/ocamlformat-lib.opam @@ -22,7 +22,7 @@ depends: [ "alcotest" {with-test & >= "1.3.0"} "base" {>= "v0.12.0"} "cmdliner" {>= "1.1.0"} - "dune" {>= "2.8"} + "dune" {>= "3.13"} "dune-build-info" "either" "fix" diff --git a/ocamlformat-rpc-lib.opam b/ocamlformat-rpc-lib.opam index 66cee8b9fd..cfdd704df0 100644 --- a/ocamlformat-rpc-lib.opam +++ b/ocamlformat-rpc-lib.opam @@ -19,7 +19,7 @@ license: "MIT" homepage: "https://github.com/ocaml-ppx/ocamlformat" bug-reports: "https://github.com/ocaml-ppx/ocamlformat/issues" depends: [ - "dune" {>= "2.8"} + "dune" {>= "3.13"} "ocaml" {>= "4.08"} "csexp" {>= "1.4.0"} "odoc" {with-doc} diff --git a/ocamlformat.opam b/ocamlformat.opam index b23201bbe2..a2c6373065 100644 --- a/ocamlformat.opam +++ b/ocamlformat.opam @@ -26,7 +26,7 @@ depends: [ "ocaml" {>= "4.14"} "cmdliner" {with-test = "false" & >= "1.1.0" | with-test & >= "1.2.0"} "csexp" {>= "1.4.0"} - "dune" {>= "2.8"} + "dune" {>= "3.13"} "ocamlformat-lib" {= version} "ocamlformat-rpc-lib" {with-test & = version} "re" {>= "1.10.3"} diff --git a/test/cli/mll.t b/test/cli/mll.t new file mode 100644 index 0000000000..855ff06a78 --- /dev/null +++ b/test/cli/mll.t @@ -0,0 +1,158 @@ + $ echo profile=default > .ocamlformat + +Format a .mll file from stdin using --mll (default: ocaml-block mode): + + $ cat << 'EOF' | ocamlformat --mll - + > { + > let x = 1 + > } + > let blank = [' ' '\t'] + > rule main = parse + > | blank+ + > { () } + > | eof + > { () } + > EOF + { + let x = 1 + } + let blank = [' ' '\t'] + rule main = parse + | blank+ + { () } + | eof + { () } + +Format a .mll file inferred from --name: + + $ cat << 'EOF' | ocamlformat --name test.mll - + > { + > open Lexing + > } + > let newline = '\n' | "\r\n" + > rule token = parse + > | newline + > { new_line lexbuf; token lexbuf } + > | eof + > { EOF } + > EOF + { + open Lexing + } + let newline = '\n' | "\r\n" + rule token = parse + | newline + { + new_line lexbuf; + token lexbuf + } + | eof + { EOF } + +Default mode (ocaml-block) preserves comments: + + $ cat << 'EOF' | ocamlformat --mll - + > (* a header comment *) + > { + > let x = 1 + > } + > (* a definition comment *) + > let blank = [' ' '\t'] + > (* a rule comment *) + > rule main = parse + > (* a case comment *) + > | blank+ + > { () } + > | eof + > { () } + > EOF + (* a header comment *) + { + let x = 1 + } + (* a definition comment *) + let blank = [' ' '\t'] + (* a rule comment *) + rule main = parse + (* a case comment *) + | blank+ + { () } + | eof + { () } + +Full reformat mode (--reformat-mll=full): + + $ cat << 'EOF' | ocamlformat --mll --reformat-mll=full - + > { + > let x = 1 + > } + > let blank = [' ' '\t'] + > rule main = parse + > | blank+ + > { () } + > | eof + > { () } + > EOF + { + let x = 1 + } + + let blank = [' ' '\t'] + + rule main = parse + | blank+ { () } + | eof { () } + +Full reformat mode preserves comments: + + $ cat << 'EOF' | ocamlformat --mll --reformat-mll=full - + > (* a header comment *) + > { + > let x = 1 + > } + > (* a definition comment *) + > let blank = [' ' '\t'] + > (* a rule comment *) + > rule main = parse + > (* a case comment *) + > | blank+ + > { () } + > | eof + > { () } + > EOF + (* a header comment *) + { + let x = 1 + } + + (* a definition comment *) + let blank = [' ' '\t'] + + (* a rule comment *) + rule main = parse + (* a case comment *) + | blank+ { () } + | eof { () } + +No-format mode (--reformat-mll=no) returns input unchanged: + + $ cat << 'EOF' | ocamlformat --mll --reformat-mll=no - + > { + > let x = 1 + > } + > let blank = [' ' '\t'] + > rule main = parse + > | blank+ + > { () } + > | eof + > { () } + > EOF + { + let x = 1 + } + let blank = [' ' '\t'] + rule main = parse + | blank+ + { () } + | eof + { () } diff --git a/test/cli/print_config.t b/test/cli/print_config.t index 46cb289268..2634d89ee2 100644 --- a/test/cli/print_config.t +++ b/test/cli/print_config.t @@ -73,6 +73,7 @@ No redundant values: parens-tuple-patterns=multi-line-only (profile conventional (file .ocamlformat:1)) parse-docstrings=true (profile conventional (file .ocamlformat:1)) parse-toplevel-phrases=false (profile conventional (file .ocamlformat:1)) + reformat-mll=ocaml-block (profile conventional (file .ocamlformat:1)) sequence-blank-line=preserve-one (profile conventional (file .ocamlformat:1)) sequence-style=terminator (profile conventional (file .ocamlformat:1)) single-case=compact (profile conventional (file .ocamlformat:1)) @@ -154,6 +155,7 @@ Redundant values from the conventional profile: parens-tuple-patterns=multi-line-only (profile conventional (file .ocamlformat:1)) parse-docstrings=true (profile conventional (file .ocamlformat:1)) parse-toplevel-phrases=false (profile conventional (file .ocamlformat:1)) + reformat-mll=ocaml-block (profile conventional (file .ocamlformat:1)) sequence-blank-line=preserve-one (profile conventional (file .ocamlformat:1)) sequence-style=terminator (profile conventional (file .ocamlformat:1)) single-case=compact (profile conventional (file .ocamlformat:1)) @@ -235,6 +237,7 @@ Redundant values from the ocamlformat profile: parens-tuple-patterns=multi-line-only (profile ocamlformat (file .ocamlformat:1)) parse-docstrings=false (profile ocamlformat (file .ocamlformat:1)) parse-toplevel-phrases=false (profile ocamlformat (file .ocamlformat:1)) + reformat-mll=ocaml-block (profile ocamlformat (file .ocamlformat:1)) sequence-blank-line=compact (profile ocamlformat (file .ocamlformat:1)) sequence-style=separator (profile ocamlformat (file .ocamlformat:1)) single-case=compact (profile ocamlformat (file .ocamlformat:1)) diff --git a/test/passing/tests/basic.mll b/test/passing/tests/basic.mll new file mode 100644 index 0000000000..a4a02a894f --- /dev/null +++ b/test/passing/tests/basic.mll @@ -0,0 +1,15 @@ +{ + let x = 1 +} + +let newline = '\n' | "\r\n" +let blank = [' ' '\t'] +let ident = ['a'-'z' 'A'-'Z' '_'] ['a'-'z' 'A'-'Z' '0'-'9' '_']* + +rule main = parse + | newline + { new_line lexbuf; main lexbuf } + | blank+ + { main lexbuf } + | eof + { () } diff --git a/vendor/mll-parser/dune b/vendor/mll-parser/dune new file mode 100644 index 0000000000..0a487faef0 --- /dev/null +++ b/vendor/mll-parser/dune @@ -0,0 +1,20 @@ +(ocamllex mll_lexer) + +(menhir + (modules mll_grammar) + (explain true)) + +(library + (name ocamlformat_mll_parser) + (public_name ocamlformat-lib.mll_parser) + (instrumentation + (backend bisect_ppx)) + (flags + (:standard + -w + -50 + -open + Ocamlformat_parser_shims + -open + Ocamlformat_ocaml_common)) + (libraries menhirLib ocamlformat_parser_shims ocamlformat_ocaml_common)) diff --git a/vendor/mll-parser/mll_ast.ml b/vendor/mll-parser/mll_ast.ml new file mode 100644 index 0000000000..d7ff73e2fc --- /dev/null +++ b/vendor/mll-parser/mll_ast.ml @@ -0,0 +1,48 @@ +type 'a located = {value: 'a; loc: Location.t} + +type ocaml_code = string located + +(** Character class entry (inside [...]) *) +type char_entry = + | Cchar of string (** single char literal, e.g. 'a' *) + | Crange of string * string (** range, e.g. 'a'-'z' *) + +(** Regular expression *) +type regexp = + | Reof + | Rchar of string (** character literal 'c' *) + | Rstring of string (** string literal "s" *) + | Rident of string (** named regexp reference *) + | Runderscore (** _ (any character) *) + | Rsequence of regexp list + | Ralternative of regexp list + | Rrepeat of regexp (** e* *) + | Rplus of regexp (** e+ *) + | Roption of regexp (** e? *) + | Rparen of regexp (** (e) *) + | Rcharset of char_entry list (** [set] *) + | Rcharset_neg of char_entry list (** [^set] *) + | Ras of regexp * string (** e as id *) + | Rhash of regexp * regexp (** e1 # e2 *) + +type action = ocaml_code + +type rule_case = {pattern: regexp; action: action} + +type rule_entry = + { entry_name: string located + ; entry_args: string located list + ; entry_is_shortest: bool + ; entry_cases: rule_case list } + +type named_def = + { def_name: string located + ; def_body: regexp + ; def_loc: Location.t } + +type lexer_def = + { header: ocaml_code option + ; named_defs: named_def list + ; rules: rule_entry list + ; trailer: ocaml_code option + ; comments: ocaml_code list } diff --git a/vendor/mll-parser/mll_grammar.mly b/vendor/mll-parser/mll_grammar.mly new file mode 100644 index 0000000000..3c49a3c130 --- /dev/null +++ b/vendor/mll-parser/mll_grammar.mly @@ -0,0 +1,171 @@ +/* Adapted from OCaml's lex/parser.mly (OCaml 5.4) */ +/* Modifications: produce Mll_ast types, preserve named_defs, keep text */ + +%{ +open Mll_ast + +let mk_loc startpos endpos = + { Location.loc_start = startpos; loc_end = endpos; loc_ghost = false } + +let located startpos endpos value = + { value; loc = mk_loc startpos endpos } +%} + +%token IDENT +%token CHAR +%token STRING +%token OCAML_CODE +%token RULE PARSE SHORTEST AND EQUAL PIPE UNDERSCORE EOF_KW + LBRACKET RBRACKET +%token STAR QUESTION PLUS LPAREN RPAREN CARET DASH LET AS HASH +%token EOF + +/* Precedence — matches upstream ocamllex exactly */ +%right AS +%left PIPE +%nonassoc CONCAT +%nonassoc QUESTION STAR PLUS +%left HASH +%nonassoc IDENT CHAR STRING UNDERSCORE EOF_KW LBRACKET LPAREN + +%start lexer_def +%% + +lexer_def: + header named_regexps refill_handler RULE definition other_definitions + trailer EOF + { { header = $1; + named_defs = List.rev $2; + rules = $5 :: List.rev_map snd $6; + trailer = $7; + comments = [] } } +; +header: + OCAML_CODE + { Some (located $startpos $endpos $1) } + | /*epsilon*/ + { None } +; +named_regexps: + named_regexps LET IDENT EQUAL regexp + { { def_name = located $startpos($3) $endpos($3) $3; + def_body = $5; + def_loc = mk_loc $startpos($2) $endpos } :: $1 } + | /*epsilon*/ + { [] } +; +other_definitions: + other_definitions AND definition + { ($startpos($2), $3) :: $1 } + | /*epsilon*/ + { [] } +; +refill_handler: + | IDENT OCAML_CODE + { (* "refill" { handler } — skip, collected as comment by lexer *) } + | /*empty*/ + { () } +; +definition: + IDENT arguments EQUAL PARSE entry + { { entry_name = located $startpos($1) $endpos($1) $1; + entry_args = $2; + entry_is_shortest = false; + entry_cases = $5; + } } + | IDENT arguments EQUAL SHORTEST entry + { { entry_name = located $startpos($1) $endpos($1) $1; + entry_args = $2; + entry_is_shortest = true; + entry_cases = $5; + } } +; + +arguments: + IDENT arguments + { (located $startpos($1) $endpos($1) $1) :: $2 } + | /*epsilon*/ + { [] } +; + +trailer: + OCAML_CODE + { Some (located $startpos $endpos $1) } + | /*epsilon*/ + { None } +; + +entry: + case rest_of_entry + { $1 :: List.rev $2 } + | PIPE case rest_of_entry + { $2 :: List.rev $3 } +; + +rest_of_entry: + rest_of_entry PIPE case + { $3 :: $1 } + | + { [] } +; +case: + regexp OCAML_CODE + { { pattern = $1; + action = located $startpos($2) $endpos($2) $2 } } +; +regexp: + UNDERSCORE + { Runderscore } + | EOF_KW + { Reof } + | CHAR + { Rchar $1 } + | STRING + { Rstring $1 } + | LBRACKET char_class RBRACKET + { $2 } + | regexp STAR + { Rrepeat $1 } + | regexp QUESTION + { Roption $1 } + | regexp PLUS + { Rplus $1 } + | regexp HASH regexp + { Rhash ($1, $3) } + | regexp PIPE regexp + { match $1, $3 with + | Ralternative l1, Ralternative l2 -> Ralternative (l1 @ l2) + | Ralternative l, r -> Ralternative (l @ [r]) + | l, Ralternative r -> Ralternative (l :: r) + | l, r -> Ralternative [l; r] } + | regexp regexp %prec CONCAT + { match $1, $2 with + | Rsequence l1, Rsequence l2 -> Rsequence (l1 @ l2) + | Rsequence l, r -> Rsequence (l @ [r]) + | l, Rsequence r -> Rsequence (l :: r) + | l, r -> Rsequence [l; r] } + | LPAREN regexp RPAREN + { Rparen $2 } + | IDENT + { Rident $1 } + | regexp AS IDENT + { Ras ($1, $3) } +; + +char_class: + CARET char_class1 + { Rcharset_neg (List.rev $2) } + | char_class1 + { Rcharset (List.rev $1) } +; + +char_class1: + CHAR DASH CHAR + { [Crange ($1, $3)] } + | CHAR + { [Cchar $1] } + | char_class1 char_class1 %prec CONCAT + { $2 @ $1 } +; + +%% diff --git a/vendor/mll-parser/mll_lexer.mll b/vendor/mll-parser/mll_lexer.mll new file mode 100644 index 0000000000..a8c4413256 --- /dev/null +++ b/vendor/mll-parser/mll_lexer.mll @@ -0,0 +1,278 @@ +(* Adapted from OCaml's lex/lexer.mll (OCaml 5.4) *) +(* Modifications: collect text into buffers, produce our tokens, track comments *) + +{ +open Lexing +open Mll_grammar + +exception Lexer_error of string * position + +let error lexbuf msg = + raise (Lexer_error (msg, lexbuf.lex_curr_p)) + +let comments : Mll_ast.ocaml_code list ref = ref [] + +let reset_comments () = + let c = List.rev !comments in + comments := []; + c + +let add_comment lexbuf s = + let loc = + { Location.loc_start = lexbuf.lex_start_p + ; loc_end = lexbuf.lex_curr_p + ; loc_ghost = false } + in + comments := { Mll_ast.value = s; loc } :: !comments + +(* Upstream helpers *) +let string_buff = Buffer.create 256 +let reset_string_buffer () = Buffer.clear string_buff +let store_string_char c = Buffer.add_char string_buff c +let store_string_chars s = Buffer.add_string string_buff s +let get_stored_string () = Buffer.contents string_buff + +let char_for_backslash = function + | 'n' -> '\010' | 'r' -> '\013' | 'b' -> '\008' | 't' -> '\009' | c -> c + +let decimal_code c d u = + 100 * (Char.code c - 48) + 10 * (Char.code d - 48) + (Char.code u - 48) + +let char_for_octal_code c d u = + Char.chr (64 * (Char.code c - 48) + 8 * (Char.code d - 48) + (Char.code u - 48)) + +let hex_digit_value d = + let d = Char.code d in + if d >= 97 then d - 87 else if d >= 65 then d - 55 else d - 48 + +let char_for_hexadecimal_code d u = + Char.chr (16 * (hex_digit_value d) + (hex_digit_value u)) + +let incr_loc lexbuf delta = + let pos = lexbuf.lex_curr_p in + lexbuf.lex_curr_p <- { pos with + pos_lnum = pos.pos_lnum + 1; + pos_bol = pos.pos_cnum - delta } +} + +(* Character classes — from upstream *) +let identstart = + ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255'] +let identbody = + ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9'] +let backslash_escapes = + ['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] +let lowercase = ['a'-'z' '_'] +let ident = identstart identbody* +let extattrident = ident ('.' ident)* +let blank = [' ' '\009' '\012'] +let uppercase = ['A'-'Z'] +let ocaml_identstart = lowercase | uppercase +let identchar = ['A'-'Z' 'a'-'z' '_' '\'' '0'-'9'] +let utf8 = ['\192'-'\255'] ['\128'-'\191']* +let identstart_ext = ocaml_identstart | utf8 +let identchar_ext = identchar | utf8 +let ocaml_ident = identstart_ext identchar_ext* + +rule main = parse + | [' ' '\013' '\009' '\012'] + + { main lexbuf } + | '\010' + { incr_loc lexbuf 0; main lexbuf } + | "(*" + { let start_p = lexbuf.lex_start_p in + let buf = Buffer.create 128 in + Buffer.add_string buf "(*"; + comment buf 0 lexbuf; + lexbuf.lex_start_p <- start_p; + add_comment lexbuf (Buffer.contents buf); + main lexbuf } + | '_' { UNDERSCORE } + | ident as s + { match s with + | "rule" -> RULE | "parse" -> PARSE | "shortest" -> SHORTEST + | "and" -> AND | "eof" -> EOF_KW | "let" -> LET | "as" -> AS + | "refill" -> + let start_p = lexbuf.lex_start_p in + let buf = Buffer.create 256 in + skip_refill buf lexbuf; + lexbuf.lex_start_p <- start_p; + add_comment lexbuf ("(* refill " ^ Buffer.contents buf ^ " *)"); + main lexbuf + | _ -> IDENT s } + | '"' + { reset_string_buffer(); + store_string_char '"'; + string lexbuf; + STRING (get_stored_string()) } + (* Character literals — from upstream *) + | "'" [^ '\\'] "'" + { CHAR (Lexing.lexeme lexbuf) } + | "'" '\\' backslash_escapes "'" + { CHAR (Lexing.lexeme lexbuf) } + | "'" '\\' (['0'-'9'] ['0'-'9'] ['0'-'9']) "'" + { CHAR (Lexing.lexeme lexbuf) } + | "'" '\\' 'o' (['0'-'3'] ['0'-'7'] ['0'-'7']) "'" + { CHAR (Lexing.lexeme lexbuf) } + | "'" '\\' 'x' (['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F']) "'" + { CHAR (Lexing.lexeme lexbuf) } + | '{' + { let start_p = lexbuf.lex_start_p in + let buf = Buffer.create 256 in + Buffer.add_char buf '{'; + action buf [] lexbuf; + lexbuf.lex_start_p <- start_p; + OCAML_CODE (Buffer.contents buf) } + | '|' { PIPE } | '=' { EQUAL } | '#' { HASH } | '*' { STAR } + | '+' { PLUS } | '?' { QUESTION } | '(' { LPAREN } | ')' { RPAREN } + | '[' { LBRACKET } | ']' { RBRACKET } | '^' { CARET } | '-' { DASH } + | eof { EOF } + | _ as c + { error lexbuf (Printf.sprintf "unexpected character: %C" c) } + +(* String parsing — from upstream *) +and string = parse + | '"' { store_string_char '"' } + | '\\' ('\013'* '\010') ([' ' '\009'] * as spaces) + { store_string_chars (Lexing.lexeme lexbuf); + incr_loc lexbuf (String.length spaces); string lexbuf } + | '\\' _ + { store_string_chars (Lexing.lexeme lexbuf); string lexbuf } + | eof { error lexbuf "unterminated string" } + | '\013'* '\010' as s + { store_string_chars s; incr_loc lexbuf 0; string lexbuf } + | _ as c + { store_string_char c; string lexbuf } + +(* Quoted string — from upstream *) +and quoted_string buf delim = parse + | '\013'* '\010' + { incr_loc lexbuf 0; Buffer.add_char buf '\n'; + quoted_string buf delim lexbuf } + | eof { error lexbuf "unterminated quoted string" } + | '|' (lowercase* as delim') '}' + { Buffer.add_string buf (Lexing.lexeme lexbuf); + if delim <> delim' then quoted_string buf delim lexbuf } + | _ as c + { Buffer.add_char buf c; quoted_string buf delim lexbuf } + +(* Comment — from upstream, with buf for text collection *) +and comment buf depth = parse + | "(*" + { Buffer.add_string buf "(*"; comment buf (depth + 1) lexbuf } + | "*)" + { Buffer.add_string buf "*)"; + if depth > 0 then comment buf (depth - 1) lexbuf } + | '"' + { Buffer.add_char buf '"'; + reset_string_buffer(); string_in_comment buf lexbuf; + comment buf depth lexbuf } + | '{' ('%' '%'? extattrident blank*)? (lowercase* as delim) "|" + { Buffer.add_string buf (Lexing.lexeme lexbuf); + quoted_string buf delim lexbuf; + comment buf depth lexbuf } + | "'" + { Buffer.add_char buf '\''; + skip_char buf lexbuf; + comment buf depth lexbuf } + | eof { error lexbuf "unterminated comment" } + | '\010' + { incr_loc lexbuf 0; Buffer.add_char buf '\n'; + comment buf depth lexbuf } + | ocaml_ident as s + { Buffer.add_string buf s; comment buf depth lexbuf } + | _ as c + { Buffer.add_char buf c; comment buf depth lexbuf } + +(* String inside comment — skip to closing quote *) +and string_in_comment buf = parse + | '"' { Buffer.add_char buf '"' } + | '\\' '"' + { Buffer.add_string buf "\\\""; string_in_comment buf lexbuf } + | '\\' '\\' + { Buffer.add_string buf "\\\\"; string_in_comment buf lexbuf } + | eof { error lexbuf "unterminated string in comment" } + | '\013'* '\010' as s + { Buffer.add_string buf s; incr_loc lexbuf 0; + string_in_comment buf lexbuf } + | _ as c + { Buffer.add_char buf c; string_in_comment buf lexbuf } + +(* Action — from upstream, with buf + stack-based matching *) +and action buf stk = parse + | '(' { Buffer.add_char buf '('; action buf ('(' :: stk) lexbuf } + | '{' { Buffer.add_char buf '{'; action buf ('{' :: stk) lexbuf } + | ')' + { Buffer.add_char buf ')'; + match stk with + | '(' :: stk' -> action buf stk' lexbuf + | _ -> error lexbuf "unmatched ) in action" } + | '}' + { match stk with + | [] -> Buffer.add_char buf '}' (* closing brace — done *) + | '{' :: stk' -> + Buffer.add_char buf '}'; action buf stk' lexbuf + | _ -> error lexbuf "unmatched } in action" } + | '"' + { Buffer.add_char buf '"'; + action_string buf lexbuf; + action buf stk lexbuf } + | '{' ('%' '%'? extattrident blank*)? (lowercase* as delim) "|" + { Buffer.add_string buf (Lexing.lexeme lexbuf); + quoted_string buf delim lexbuf; + action buf stk lexbuf } + | "'" + { Buffer.add_char buf '\''; + skip_char buf lexbuf; + action buf stk lexbuf } + | "(*" + { Buffer.add_string buf "(*"; + comment buf 0 lexbuf; + action buf stk lexbuf } + | eof { error lexbuf "unterminated action" } + | '\010' + { incr_loc lexbuf 0; Buffer.add_char buf '\n'; + action buf stk lexbuf } + | ocaml_ident as s + { Buffer.add_string buf s; action buf stk lexbuf } + | _ as c + { Buffer.add_char buf c; action buf stk lexbuf } + +(* String inside action *) +and action_string buf = parse + | '"' { Buffer.add_char buf '"' } + | '\\' '"' + { Buffer.add_string buf "\\\""; action_string buf lexbuf } + | '\\' '\\' + { Buffer.add_string buf "\\\\"; action_string buf lexbuf } + | '\\' '\'' { Buffer.add_string buf "\\\'" ; action_string buf lexbuf } + | eof { error lexbuf "unterminated string in action" } + | '\013'* '\010' as s + { Buffer.add_string buf s; incr_loc lexbuf 0; + action_string buf lexbuf } + | _ as c + { Buffer.add_char buf c; action_string buf lexbuf } + +(* Skip char literal — from upstream *) +and skip_char buf = parse + | '\\' ? ('\013'* '\010') "'" + { Buffer.add_string buf (Lexing.lexeme lexbuf); incr_loc lexbuf 1 } + | [^ '\\' '\'' '\010' '\013'] "'" + { Buffer.add_string buf (Lexing.lexeme lexbuf) } + | '\\' _ "'" + { Buffer.add_string buf (Lexing.lexeme lexbuf) } + | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'" + { Buffer.add_string buf (Lexing.lexeme lexbuf) } + | '\\' 'o' ['0'-'7'] ['0'-'7'] ['0'-'7'] "'" + { Buffer.add_string buf (Lexing.lexeme lexbuf) } + | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'" + { Buffer.add_string buf (Lexing.lexeme lexbuf) } + | "" + { () } (* Not a char literal — just a stray quote *) + +(* Skip refill { ... } block *) +and skip_refill buf = parse + | blank+ { skip_refill buf lexbuf } + | '{' { Buffer.add_char buf '{'; action buf [] lexbuf } + | _ as c { Buffer.add_char buf c; skip_refill buf lexbuf } + | eof { () } diff --git a/vendor/mll-parser/mll_parser.ml b/vendor/mll-parser/mll_parser.ml new file mode 100644 index 0000000000..ec7702623c --- /dev/null +++ b/vendor/mll-parser/mll_parser.ml @@ -0,0 +1,14 @@ +exception Parse_error of string * Lexing.position + +let parse_string ~input_name source = + let lexbuf = Lexing.from_string source in + Location.init_info lexbuf input_name; + Mll_lexer.comments := []; + let result = + try Mll_grammar.lexer_def Mll_lexer.main lexbuf + with Mll_grammar.Error -> + let pos = lexbuf.lex_curr_p in + raise (Parse_error ("syntax error", pos)) + in + let comments = Mll_lexer.reset_comments () in + { result with Mll_ast.comments } diff --git a/vendor/mll-parser/mll_printast.ml b/vendor/mll-parser/mll_printast.ml new file mode 100644 index 0000000000..28a781bffd --- /dev/null +++ b/vendor/mll-parser/mll_printast.ml @@ -0,0 +1,66 @@ +open Mll_ast + +let pp_located fmt x = + Format.fprintf fmt "%s%a" x.value Location.print_loc x.loc + +let pp_code fmt (c : ocaml_code) = + Format.fprintf fmt "%S%a" c.value Location.print_loc c.loc + +let rec pp_regexp fmt = function + | Reof -> Format.fprintf fmt "eof" + | Runderscore -> Format.fprintf fmt "_" + | Rchar s -> Format.fprintf fmt "%s" s + | Rstring s -> Format.fprintf fmt "%s" s + | Rident s -> Format.fprintf fmt "%s" s + | Rsequence rs -> + Format.fprintf fmt "@[(%a)@]" + (Format.pp_print_list ~pp_sep:(fun fmt () -> + Format.fprintf fmt "@ ") pp_regexp) rs + | Ralternative rs -> + Format.fprintf fmt "@[(%a)@]" + (Format.pp_print_list ~pp_sep:(fun fmt () -> + Format.fprintf fmt "@ | ") pp_regexp) rs + | Rrepeat r -> Format.fprintf fmt "%a*" pp_regexp r + | Rplus r -> Format.fprintf fmt "%a+" pp_regexp r + | Roption r -> Format.fprintf fmt "%a?" pp_regexp r + | Rparen r -> Format.fprintf fmt "(%a)" pp_regexp r + | Rcharset entries -> + Format.fprintf fmt "[%a]" + (Format.pp_print_list pp_char_entry) entries + | Rcharset_neg entries -> + Format.fprintf fmt "[^%a]" + (Format.pp_print_list pp_char_entry) entries + | Ras (r, id) -> Format.fprintf fmt "%a as %s" pp_regexp r id + | Rhash (r1, r2) -> + Format.fprintf fmt "%a # %a" pp_regexp r1 pp_regexp r2 + +and pp_char_entry fmt = function + | Cchar s -> Format.fprintf fmt "%s" s + | Crange (s1, s2) -> Format.fprintf fmt "%s-%s" s1 s2 + +let pp_case fmt c = + Format.fprintf fmt " | @[%a@ %a@]" + pp_regexp c.pattern pp_code c.action + +let pp_entry fmt e = + Format.fprintf fmt "@[%a%a = %s@]@.%a" + pp_located e.entry_name + (Format.pp_print_list ~pp_sep:Format.pp_print_space + (fun fmt a -> pp_located fmt a)) e.entry_args + (if e.entry_is_shortest then "shortest" else "parse") + (Format.pp_print_list ~pp_sep:Format.pp_print_newline pp_case) + e.entry_cases + +let pp fmt d = + Format.fprintf fmt "@[" ; + Option.iter (fun h -> Format.fprintf fmt "header: %a@." pp_code h) d.header ; + List.iter (fun nd -> + Format.fprintf fmt "let %a = %a@." + pp_located nd.def_name pp_regexp nd.def_body + ) d.named_defs ; + List.iteri (fun i e -> + Format.fprintf fmt "%s %a@." + (if i = 0 then "rule" else "and") pp_entry e + ) d.rules ; + Option.iter (fun t -> Format.fprintf fmt "trailer: %a@." pp_code t) d.trailer ; + Format.fprintf fmt "@]"