Skip to content

Commit

Permalink
refactor: cleanup pp of Text_tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
NeoKaios committed Aug 8, 2024
1 parent e15eab4 commit 37e268d
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 64 deletions.
5 changes: 2 additions & 3 deletions src/lsp/cobol_lsp/lsp_completion.ml
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,9 @@ let string_of_K tokens =
| [] -> ()
| Cobol_parser.Tokens.PERIOD::tl ->
Fmt.string ppf ".\n"; inner tl
| hd::tl ->
let token' = hd &@ Srcloc.dummy in
| token::tl ->
if space_before then Fmt.sp ppf ();
Cobol_parser.INTERNAL.pp_token ppf token'; (* TODO: Cobol_parser.Tokens.pp *)
Cobol_parser.Tokens.pp ppf token;
inner tl
in
inner ~space_before:false
Expand Down
14 changes: 7 additions & 7 deletions src/lsp/cobol_parser/cobol_parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@ module Options = Parser_options
module Outputs = Parser_outputs
module Diagnostics = Parser_diagnostics

module Tokens = Grammar_tokens
module Tokens = struct
include Grammar_tokens
let pp = Text_tokenizer.pp_token
let pp' = Text_tokenizer.pp_token'
let pp'_list = Text_tokenizer.pp_token'_list
let pp'_list_with_loc_info = Text_tokenizer.pp_tokens_with_loc_info
end
module Expect = Grammar_expect
module Printer = Grammar_printer
module Keywords = Text_keywords
Expand All @@ -36,12 +42,6 @@ include Parser_engine

module INTERNAL = struct

(** {2 COBOL tokens} *)

let pp_token = Text_tokenizer.pp_token
let pp_tokens = Text_tokenizer.pp_tokens
let pp_tokens' = Text_tokenizer.pp_tokens'

(** {2 COBOL grammar} *)

module Grammar (* : Grammar_sig.S *) = Grammar
Expand Down
4 changes: 2 additions & 2 deletions src/lsp/cobol_parser/parser_engine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ let all_diags { preproc = { pp; diags; tokzr; _ }; _ } =

(* --- *)

let rec produce_tokens (ps: _ state as 's) : 's * Text_tokenizer.tokens =
let rec produce_tokens (ps: _ state as 's) : 's * Tokzr.token' list =
let text, pp = Cobol_preproc.next_chunk ps.preproc.pp in
let { preproc = { pp; tokzr; _ }; _ } as ps = update_pp ps pp in
assert (text <> []);
Expand Down Expand Up @@ -400,7 +400,7 @@ type ('a, 'm) stage =
rewound. *)
and ('a, 'm) interim_stage =
'm state *
Text_tokenizer.tokens *
Tokzr.token' list *
'a Grammar_interpr.env (* Always valid input_needed env. *)

(** Final stage, at which the parser has stopped processing. *)
Expand Down
47 changes: 25 additions & 22 deletions src/lsp/cobol_parser/text_tokenizer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ open Parser_diagnostics

(* --- *)

type token = Grammar_tokens.token with_loc
type tokens = token list
type token' = Grammar_tokens.token with_loc

let combined_tokens =
(* /!\ WARNING: None of the constituents of combined tokens may be
Expand Down Expand Up @@ -93,40 +92,43 @@ let pp_token_string: Grammar_tokens.token Pretty.printer = fun ppf ->
try Hashtbl.find combined_tokens t
with Not_found -> "<unknown/unexpected token>"

let pp_token: token Pretty.printer = fun ppf ->
let pp_token: Grammar_tokens.token Pretty.printer = fun ppf ->
let string s = Pretty.string ppf s
and print format = Pretty.print ppf format in
fun t -> match ~&t with
fun t -> match t with
| WORD w -> print "WORD[%s]" w
| WORD_IN_AREA_A w -> print "WORD_IN_AREA_A[%s]" w
| PICTURE_STRING w -> print "PICTURE_STRING[%s]" w
| INFO_WORD s -> print "INFO_WORD[%s]" s
| COMMENT_ENTRY _ -> print "COMMENT_ENTRY[%a]" pp_token_string ~&t
| EXEC_BLOCK _ -> print "EXEC_BLOCK[%a]" pp_token_string ~&t
| COMMENT_ENTRY _ -> print "COMMENT_ENTRY[%a]" pp_token_string t
| EXEC_BLOCK _ -> print "EXEC_BLOCK[%a]" pp_token_string t
| DIGITS i -> print "DIGITS[%s]" i
| SINTLIT i -> print "SINT[%s]" i
| FIXEDLIT (i, sep, d) -> print "FIXED[%s%c%s]" i sep d
| FLOATLIT (i, sep, d, e) -> print "FLOAT[%s%c%sE%s]" i sep d e
| INTERVENING_ c -> print "<%c>" c
| tok when is_intrinsic_token tok ->
print "INTRINSIC_FUNC[%a]" pp_token_string ~&t
print "INTRINSIC_FUNC[%a]" pp_token_string t
| EOF -> string "EOF"
| t -> pp_token_string ppf t

let pp_tokens =
Pretty.list ~fopen:"@[" ~fclose:"@]" pp_token
let pp_token': token' Pretty.printer =
Cobol_common.Srcloc.pp_with_loc pp_token

let pp_tokens' ?fsep =
let pp_token'_list =
Pretty.list ~fopen:"@[" ~fclose:"@]" pp_token'

let pp_tokens_with_loc_info ?fsep =
Pretty.list ~fopen:"@[" ?fsep ~fclose:"@]" begin fun ppf t ->
Pretty.print ppf "%a@@%a"
pp_token t
pp_token' t
Cobol_common.Srcloc.pp_srcloc_struct ~@t
end

(* --- *)

let loc_in_area_a: srcloc -> bool = Cobol_common.Srcloc.in_area_a
let token_in_area_a: token -> bool = fun t -> loc_in_area_a ~@t
let token_in_area_a: token' -> bool = fun t -> loc_in_area_a ~@t

(* --- *)

Expand Down Expand Up @@ -338,12 +340,12 @@ let preproc_n_combine_tokens ~intrinsics_enabled ~source_format =

type 'a memory =
| Amnesic: Cobol_common.Behaviors.amnesic memory
| Eidetic: tokens -> Cobol_common.Behaviors.eidetic memory
| Eidetic: token' list -> Cobol_common.Behaviors.eidetic memory

type 'm state =
{
lexer_state: Text_lexer.lexer_state;
leftover_tokens: tokens; (* non-empty only when [preproc_n_combine_tokens]
leftover_tokens: token' list; (* non-empty only when [preproc_n_combine_tokens]
errors out for lack of input tokens. *)
memory: 'm memory;
context_stack: Context.stack;
Expand Down Expand Up @@ -542,7 +544,7 @@ let acc_tokens_of_text_word (rev_prefix_tokens, state) { payload = c; loc } =
else nominal state


let tokens_of_text: 'a state -> text -> tokens * 'a state = fun state text ->
let tokens_of_text: 'a state -> text -> token' list * 'a state = fun state text ->
let tokens, state = List.fold_left acc_tokens_of_text_word ([], state) text in
List.rev tokens, state

Expand All @@ -555,7 +557,7 @@ let tokenize_text ~source_format ({ leftover_tokens; _ } as state) text =
match preproc_n_combine_tokens ~intrinsics_enabled ~source_format tokens with
| Ok (tokens, diags) ->
if show `Tks state then
Pretty.error "Tks: %a@." pp_tokens tokens;
Pretty.error "Tks: %a@." pp_token'_list tokens;
let diags = Parser_diagnostics.union diags state.diags in
Ok tokens, { state with diags }
| Error `MissingInputs ->
Expand Down Expand Up @@ -604,7 +606,7 @@ let retokenize { lexer_state; persist = { lexer; _ }; _ } w =
(handling of DECIMAL POINT). *)
fst @@ Text_lexer.read_tokens lexer lexer_state w

let reword_intrinsics s : tokens -> tokens =
let reword_intrinsics s : token' list -> token' list =
(* Some intrinsics NOT preceded with FUNCTION may now be words; assumes
[Disabled_intrinsics] does not occur on a `FUNCTION` keyword (but that's
unlikely). *)
Expand All @@ -628,7 +630,8 @@ let reword_intrinsics s : tokens -> tokens =
{!module:Text_lexer}. *)
(* TODO: Find whether everything related to Area A and comma-retokenization
could be moved to Text_lexer *)
let retokenize_after: lexer_update -> _ state -> tokens -> tokens = fun update s ->
let retokenize_after: lexer_update -> _ state -> token' list -> token' list =
fun update s ->
match update with
| Enabled_keywords tokens
| Disabled_keywords tokens
Expand Down Expand Up @@ -735,7 +738,7 @@ let enable_intrinsics state token tokens =
let tokens = retokenize_after Enabled_intrinsics state tokens in
let token, tokens = List.hd tokens, List.tl tokens in
if show `Tks state then
Pretty.error "Tks': %a@." pp_tokens tokens;
Pretty.error "Tks': %a@." pp_token'_list tokens;
emit_token state token, token, tokens


Expand All @@ -747,7 +750,7 @@ let disable_intrinsics state token tokens =
let tokens = retokenize_after Disabled_intrinsics state tokens in
let token, tokens = List.hd tokens, List.tl tokens in
if show `Tks state then
Pretty.error "Tks': %a@." pp_tokens tokens;
Pretty.error "Tks': %a@." pp_token'_list tokens;
emit_token state token, token, tokens


Expand Down Expand Up @@ -782,7 +785,7 @@ let decimal_point_is_comma (type m) (state: m state) token tokens =
let tokens = retokenize_after CommaBecomesDecimalPoint state tokens in
let token, tokens = List.hd tokens, List.tl tokens in
if show `Tks state then
Pretty.error "Tks': %a@." pp_tokens tokens;
Pretty.error "Tks': %a@." pp_token'_list tokens;
emit_token state token, token, tokens


Expand Down Expand Up @@ -816,7 +819,7 @@ let push_contexts state tokens : Context.t list -> 's * 'a = function
(* Update tokenizer state *)
let state, tokens = enable_tokens state tokens tokens_set in
if show `Tks state then
Pretty.error "Tks': %a@." pp_tokens tokens;
Pretty.error "Tks': %a@." pp_token'_list tokens;

with_context_stack state context_stack, tokens

Expand Down
57 changes: 29 additions & 28 deletions src/lsp/cobol_parser/text_tokenizer.mli
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ open EzCompat
(** {2 Compilation group tokens} *)

(** Tokens passed to {!Parser}; can be obtained via {!tokenize_text}. *)
type token = Grammar_tokens.token Cobol_ptree.with_loc
type tokens = token list
val pp_token: token Pretty.printer
val pp_tokens: tokens Pretty.printer
val pp_tokens': ?fsep:Pretty.simple -> tokens Pretty.printer
type token' = Grammar_tokens.token Cobol_ptree.with_loc

val pp_token: Grammar_tokens.token Pretty.printer
val pp_token': token' Pretty.printer
val pp_token'_list: token' list Pretty.printer
val pp_tokens_with_loc_info: ?fsep:Pretty.simple -> token' list Pretty.printer

(* --- *)

Expand Down Expand Up @@ -62,44 +63,44 @@ val diagnostics

val parsed_tokens
: Cobol_common.Behaviors.eidetic state
-> tokens Lazy.t
-> token' list Lazy.t

val tokenize_text
: source_format: Cobol_preproc.Src_format.any
-> 'a state
-> Cobol_preproc.Text.t
-> (tokens, [>`MissingInputs | `ReachedEOF of tokens]) result * 'a state
-> (token' list, [>`MissingInputs | `ReachedEOF of token' list]) result * 'a state

val next_token
: 'a state
-> tokens
-> ('a state * token * tokens) option
-> token' list
-> ('a state * token' * token' list) option

val put_token_back
: 'a state
-> token
-> tokens
-> 'a state * tokens
-> token'
-> token' list
-> 'a state * token' list

(* --- *)

val enable_intrinsics
: 'a state
-> token
-> tokens
-> 'a state * token * tokens
-> token'
-> token' list
-> 'a state * token' * token' list

val disable_intrinsics
: 'a state
-> token
-> tokens
-> 'a state * token * tokens
-> token'
-> token' list
-> 'a state * token' * token' list

val reset_intrinsics
: 'a state
-> token
-> tokens
-> 'a state * token * tokens
-> token'
-> token' list
-> 'a state * token' * token' list

val replace_intrinsics
: 'a state
Expand All @@ -108,26 +109,26 @@ val replace_intrinsics

val decimal_point_is_comma
: 'a state
-> token
-> tokens
-> 'a state * token * tokens
-> token'
-> token' list
-> 'a state * token' * token' list

(* --- *)

val push_contexts
: 'a state
-> tokens
-> token' list
-> Grammar_contexts.context list
-> 'a state * tokens
-> 'a state * token' list

val top_context
: _ state
-> Grammar_contexts.context option

val pop_context
: 'a state
-> tokens
-> 'a state * tokens
-> token' list
-> 'a state * token' list

val enable_context_sensitive_tokens: _ state -> unit
val disable_context_sensitive_tokens: _ state -> unit
Expand Down
4 changes: 2 additions & 2 deletions test/cobol_parsing/parser_testing.ml
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ let show_parsed_tokens ?(parser_options = options ())
Cobol_parser.parse_with_artifacts ~options:parser_options
in
(if with_locations
then Cobol_parser.INTERNAL.pp_tokens' ~fsep:"@\n"
else Cobol_parser.INTERNAL.pp_tokens) Fmt.stdout (Lazy.force tokens)
then Cobol_parser.Tokens.pp'_list_with_loc_info ~fsep:"@\n"
else Cobol_parser.Tokens.pp'_list) Fmt.stdout (Lazy.force tokens)

let show_diagnostics ?(parser_options = options ())
?source_format ?filename contents =
Expand Down

0 comments on commit 37e268d

Please sign in to comment.