diff --git a/src/lsp/cobol_lsp/lsp_document.ml b/src/lsp/cobol_lsp/lsp_document.ml index 40bdb8240..f573dbc5c 100644 --- a/src/lsp/cobol_lsp/lsp_document.ml +++ b/src/lsp/cobol_lsp/lsp_document.ml @@ -63,6 +63,7 @@ module TYPES = struct doc_cache_pplog: Cobol_preproc.Trace.log; doc_cache_tokens: Cobol_parser.Outputs.tokens_with_locs; doc_cache_comments: Cobol_preproc.Text.comments; + doc_cache_ignored: Cobol_common.Srcloc.lexloc list; doc_cache_parsed: (Cobol_ptree.compilation_group * CUs.t) option; doc_cache_diags: DIAGS.Set.serializable; } @@ -116,7 +117,8 @@ let lazy_references ptree cus defs = let no_artifacts = Cobol_parser.Outputs.{ tokens = lazy []; pplog = Cobol_preproc.Trace.empty; - comments = [] } + rev_comments = []; + rev_ignored = [] } let gather_parsed_data ptree = Cobol_typeck.analyze_compilation_group ptree |> @@ -216,7 +218,8 @@ let retrieve_parsed_data: document -> parsed_data = function (** Caching utilities *) let to_cache ({ project; textdoc; parsed; diags; - artifacts = { pplog; tokens; comments; _ }; _ } as doc) = + artifacts = { pplog; tokens; + rev_comments; rev_ignored; _ }; _ } as doc) = { doc_cache_filename = Lsp_project.relative_path_for ~uri:(uri doc) project; doc_cache_checksum = Digest.string (Lsp.Text_document.text textdoc); @@ -224,7 +227,8 @@ let to_cache ({ project; textdoc; parsed; diags; doc_cache_version = Lsp.Text_document.version textdoc; doc_cache_pplog = pplog; doc_cache_tokens = Lazy.force tokens; - doc_cache_comments = comments; + doc_cache_comments = rev_comments; + doc_cache_ignored = rev_ignored; doc_cache_parsed = Option.map (fun { ptree; cus; _ } -> ptree, cus) parsed; doc_cache_diags = DIAGS.Set.apply_delayed_formatting diags; } @@ -239,7 +243,8 @@ let of_cache ~project doc_cache_version = version; doc_cache_pplog = pplog; doc_cache_tokens = tokens; - doc_cache_comments = comments; + doc_cache_comments = rev_comments; + doc_cache_ignored = rev_ignored; doc_cache_parsed = parsed; doc_cache_diags = diags } = let absolute_filename = Lsp_project.absolute_path_for ~filename project in @@ -260,7 +265,8 @@ let of_cache ~project { ptree; cus; definitions; references }) parsed in - { doc with artifacts = { pplog; tokens = lazy tokens; comments }; + { doc with artifacts = { pplog; tokens = lazy tokens; + rev_comments; rev_ignored }; diags = DIAGS.Set.of_serializable diags; parsed } diff --git a/src/lsp/cobol_lsp/lsp_request.ml b/src/lsp/cobol_lsp/lsp_request.ml index fcf207b0b..b80534384 100644 --- a/src/lsp/cobol_lsp/lsp_request.ml +++ b/src/lsp/cobol_lsp/lsp_request.ml @@ -216,11 +216,13 @@ let handle_semtoks_full, handle_semtoks_range = let handle registry ?range (doc: TextDocumentIdentifier.t) = try_with_document_data registry doc - ~f:begin fun ~doc:{ artifacts = { pplog; tokens; comments; _ }; + ~f:begin fun ~doc:{ artifacts = { pplog; tokens; + rev_comments; rev_ignored; _ }; _ } Lsp_document.{ ptree; _ } -> let data = Lsp_semtoks.data ~filename:(Lsp.Uri.to_path doc.uri) ~range - ~pplog ~comments ~tokens:(Lazy.force tokens) ~ptree + ~pplog ~rev_comments ~rev_ignored + ~tokens:(Lazy.force tokens) ~ptree in Some (SemanticTokens.create ~data ()) end diff --git a/src/lsp/cobol_lsp/lsp_semtoks.ml b/src/lsp/cobol_lsp/lsp_semtoks.ml index 9dbf4d275..409be6f88 100644 --- a/src/lsp/cobol_lsp/lsp_semtoks.ml +++ b/src/lsp/cobol_lsp/lsp_semtoks.ml @@ -553,17 +553,32 @@ let semtoks_from_ptree ~filename ?range ptree = end) ptree [] |> List.rev -let semtoks_of_comments ~filename ?range comments = comments |> - List.filter_map begin function +let semtoks_of_comments ~filename ?range rev_comments = + rev_comments |> + List.fold_left begin fun acc -> function | Cobol_preproc.Text.{ comment_loc = s, _ as lexloc; _ } when s.Lexing.pos_fname = filename && Option.fold range ~some:(fun r -> Lsp_position.intersects_lexloc r lexloc) ~none:true -> - Some (semtok TOKTYP.comment lexloc) + semtok TOKTYP.comment lexloc :: acc | _ -> - None - end + acc + end [] + +let semtoks_of_ignored ~filename ?range rev_ignored = + (* Decorate like comments, for lack of a better suited token type in the set + of available ones. This could be improved later with some client-side code + or configuration. *) + rev_ignored |> + List.fold_left begin fun acc ((s, _ ) as lexloc) -> + if s.Lexing.pos_fname = filename && + Option.fold range + ~some:(fun r -> Lsp_position.intersects_lexloc r lexloc) + ~none:true + then semtok TOKTYP.comment lexloc :: acc + else acc + end [] let semtoks_of_preproc_statements ~filename ?range pplog = List.rev @@ List.fold_left begin fun acc -> function @@ -646,11 +661,13 @@ let ensure_sorted name ~filename cmp l = List.fast_sort cmp l -let data ~filename ~range ~tokens ~pplog ~comments ~ptree : int array = +let data ~filename ~range ~tokens ~pplog + ~rev_comments ~rev_ignored ~ptree : int array = let semtoks1 = semtoks_of_non_ambigious_tokens ~filename ?range tokens in let semtoks2 = semtoks_from_ptree ~filename ?range ptree in - let semtoks3 = semtoks_of_comments ~filename ?range comments in - let semtoks4 = semtoks_of_preproc_statements ~filename ?range pplog in + let semtoks3 = semtoks_of_comments ~filename ?range rev_comments in + let semtoks4 = semtoks_of_ignored ~filename ?range rev_ignored in + let semtoks5 = semtoks_of_preproc_statements ~filename ?range pplog in (* NB: In *principle* all those lists are already sorted w.r.t lexical locations in [filename]. We just check that for now and raise a warning, in case. *) @@ -660,8 +677,10 @@ let data ~filename ~range ~tokens ~pplog ~comments ~ptree : int array = let semtoks1 = ensure_sorted "nonambiguous" ~filename compare_semtoks semtoks1 and semtoks2 = ensure_sorted "ptree" ~filename compare_semtoks semtoks2 and semtoks3 = ensure_sorted "comments" ~filename compare_semtoks semtoks3 - and semtoks4 = ensure_sorted "preproc" ~filename compare_semtoks semtoks4 in + and semtoks4 = ensure_sorted "ignored" ~filename compare_semtoks semtoks4 + and semtoks5 = ensure_sorted "preproc" ~filename compare_semtoks semtoks5 in relative_semtoks List.(merge compare_semtoks semtoks1 @@ merge compare_semtoks semtoks2 @@ - merge compare_semtoks semtoks3 @@ semtoks4) + merge compare_semtoks semtoks3 @@ + merge compare_semtoks semtoks4 @@ semtoks5) diff --git a/src/lsp/cobol_lsp/lsp_semtoks.mli b/src/lsp/cobol_lsp/lsp_semtoks.mli index 2e433f845..6754cf631 100644 --- a/src/lsp/cobol_lsp/lsp_semtoks.mli +++ b/src/lsp/cobol_lsp/lsp_semtoks.mli @@ -21,6 +21,7 @@ val data -> range: Lsp.Types.Range.t option -> tokens: Cobol_parser.Outputs.tokens_with_locs -> pplog: Cobol_preproc.Trace.log - -> comments: Cobol_preproc.Text.comments + -> rev_comments: Cobol_preproc.Text.comments + -> rev_ignored: Cobol_common.Srcloc.lexloc list -> ptree: Cobol_ptree.compilation_group -> int array diff --git a/src/lsp/cobol_parser/parser_engine.ml b/src/lsp/cobol_parser/parser_engine.ml index 14c3fdce3..f033c840d 100644 --- a/src/lsp/cobol_parser/parser_engine.ml +++ b/src/lsp/cobol_parser/parser_engine.ml @@ -419,8 +419,9 @@ let aggregate_output (type m) (ps: m state) res | Eidetic -> let artifacts = { tokens = Tokzr.parsed_tokens ps.preproc.tokzr; - pplog = Cobol_preproc.log ps.preproc.pp; - comments = Cobol_preproc.comments ps.preproc.pp } in + pplog = Cobol_preproc.rev_log ps.preproc.pp; + rev_comments = Cobol_preproc.rev_comments ps.preproc.pp; + rev_ignored = Cobol_preproc.rev_ignored ps.preproc.pp } in WithArtifacts (res, artifacts) (** Simple parsing *) @@ -509,32 +510,14 @@ let parse_with_history ?(save_stage = 10) rwps = (rewindable_parser_state rwps).preproc.tokzr; res, rwps - -let lexing_postion_of ~position rwps = match position with - | Lexing pos -> - pos - | Indexed { line; char } -> - let ps = rewindable_parser_state rwps in - let newline_cnums = Cobol_preproc.newline_cnums ps.preproc.pp in - if newline_cnums = [] - then raise Not_found (* no complete line was processed yet; just skip *) - else - let lexpos = Cobol_preproc.position ps.preproc.pp in - try - let pos_bol = - try List.nth newline_cnums (line - 1) - with Not_found | Invalid_argument _ -> 0 - in - Lexing.{ lexpos with pos_bol; - pos_cnum = pos_bol + char; - pos_lnum = line + 1 } - with Failure _ -> - (* The given line exceeds what was already processed, so we restart - from the current preprocessor position. *) - lexpos - let find_history_event_preceding ~position ({ store; _ } as rwps) = - let lexpos = lexing_postion_of ~position rwps in + let lexpos = match position with + | Lexing pos -> + pos + | Indexed { line; char } -> + let ps = rewindable_parser_state rwps in + Cobol_preproc.position_at ~line ~char ps.preproc.pp + in let rec aux = function | [] -> raise Not_found diff --git a/src/lsp/cobol_parser/parser_outputs.ml b/src/lsp/cobol_parser/parser_outputs.ml index 46b5fa2c2..0d88ed5ad 100644 --- a/src/lsp/cobol_parser/parser_outputs.ml +++ b/src/lsp/cobol_parser/parser_outputs.ml @@ -22,7 +22,8 @@ type artifacts = { tokens: tokens_with_locs Lazy.t; pplog: Cobol_preproc.Trace.log; - comments: Cobol_preproc.Text.comments; + rev_comments: Cobol_preproc.Text.comments; + rev_ignored: lexloc list; } (** The output of parsing functions depends on its memorization abilities: diff --git a/src/lsp/cobol_preproc/preproc_engine.ml b/src/lsp/cobol_preproc/preproc_engine.ml index ab82a936d..1167117d9 100644 --- a/src/lsp/cobol_preproc/preproc_engine.ml +++ b/src/lsp/cobol_preproc/preproc_engine.ml @@ -47,11 +47,33 @@ and preprocessor_persist = let diags { diags; reader; _ } = DIAGS.Set.union diags @@ Src_reader.diags reader let add_diag lp d = { lp with diags = DIAGS.Set.cons d lp.diags } let add_diags lp d = { lp with diags = DIAGS.Set.union d lp.diags } -let log { pplog; _ } = pplog -let source_format { reader; _ } = Src_reader.source_format reader let position { reader; _ } = Src_reader.position reader -let comments { reader; _ } = Src_reader.comments reader -let newline_cnums { reader; _ } = Src_reader.newline_cnums reader +let source_format { reader; _ } = Src_reader.source_format reader +let rev_log { pplog; _ } = pplog +let rev_comments { reader; _ } = Src_reader.rev_comments reader +let rev_ignored { reader; _ } = Src_reader.rev_ignored reader + +(** [position_at ~line ~char pp] computes a lexing position that corresponds to + the given line and character indexes (all starting at 0) in the input + already read by [pp]. Raises {!Not_found} if no complete line was processed + yet, or the current position if the given line index doed not correspond to + already processed lines. *) +let position_at ~line ~char { reader; _ } = + let rev_newline_cnums = Src_reader.rev_newline_cnums reader in + if rev_newline_cnums = [] + then raise Not_found + else + let lexpos = Src_reader.position reader in + try + let pos_bol = + try List.nth rev_newline_cnums (lexpos.pos_lnum - line - 1) + with Not_found | Invalid_argument _ -> 0 + in + Lexing.{ lexpos with pos_bol; + pos_cnum = pos_bol + char; + pos_lnum = line + 1 } + with Failure _ -> + lexpos let with_reader lp reader = if lp.reader == reader then lp else { lp with reader } diff --git a/src/lsp/cobol_preproc/preproc_engine.mli b/src/lsp/cobol_preproc/preproc_engine.mli index 6178d6e71..b237854e0 100644 --- a/src/lsp/cobol_preproc/preproc_engine.mli +++ b/src/lsp/cobol_preproc/preproc_engine.mli @@ -30,11 +30,12 @@ val reset_preprocessor_for_string val diags: preprocessor -> Cobol_common.Diagnostics.Set.t val add_diag: preprocessor -> Cobol_common.Diagnostics.t -> preprocessor val add_diags: preprocessor -> Cobol_common.Diagnostics.Set.t -> preprocessor -val log: preprocessor -> Preproc_trace.log -val comments: preprocessor -> Text.comments val position: preprocessor -> Lexing.position +val position_at: line:int -> char: int -> preprocessor -> Lexing.position val source_format: preprocessor -> Src_format.any -val newline_cnums: preprocessor -> int list +val rev_log: preprocessor -> Preproc_trace.log +val rev_comments: preprocessor -> Text.comments +val rev_ignored: preprocessor -> lexloc list val next_chunk: preprocessor -> Text.text * preprocessor diff --git a/src/lsp/cobol_preproc/src_lexer.mll b/src/lsp/cobol_preproc/src_lexer.mll index a5b6abad4..060beb41f 100644 --- a/src/lsp/cobol_preproc/src_lexer.mll +++ b/src/lsp/cobol_preproc/src_lexer.mll @@ -59,7 +59,7 @@ let newline = '\r'* '\n' let nnl = _ # ['\r' '\n'] (* anything but newline *) -let sna = nnl nnl nnl nnl nnl nnl (* 6 chars *) +let sna = nnl nnl nnl nnl nnl nnl (* 6 chars; TODO: exclude tabs *) let spaces = ([' ' '\t']*) let blank = [' ' '\009' '\r'] let nonblank = nnl # blank @@ -131,42 +131,56 @@ let cdir_word = rule fixed_line state = shortest - | sna ' ' | '\t' (* nominal *) + | sna (* nominal *) + { + fixed_indicator (Src_lexing.sna state lexbuf) lexbuf + } + | '\t' + { + fixed_nominal_line (Src_lexing.flush_continued state) lexbuf + } + | (nnl* newline) (* blank line (too short) *) + { + Src_lexing.new_line (Src_lexing.sna state lexbuf) lexbuf + } + | (nnl* eof) (* blank line (too short), without newline character *) + { + Src_lexing.(flush @@ eof (Src_lexing.sna state lexbuf) lexbuf) + } +and fixed_indicator state + = parse + | ' ' | '\t' (* second tab *) (* nominal *) { fixed_nominal_line (Src_lexing.flush_continued state) lexbuf } - | sna '-' (* continuation line *) + | '-' (* continuation line *) { fixed_continue_line state lexbuf } - | sna ('$' as marker) (* compiler directive *) + | ('$' as marker) (* compiler directive *) { fixed_mf_cdir_line (String.make 1 marker) state lexbuf } - | sna ('>' as marker) + | ('>' as marker) { maybe_fixed_cdir_line marker state lexbuf } - | sna (['*' '/'] as marker) (* comment line *) + | (['*' '/'] as marker) (* comment line *) { comment_line marker state lexbuf } - | sna ['d' 'D'] + | ['d' 'D'] { fixed_debug_line state lexbuf } - | sna (_#['\n' '\r'] as c) (* unknown indicator *) + | (_#['\n' '\r'] as c) (* unknown indicator *) { Src_lexing.unexpected Char ~c ~knd:"indicator" state lexbuf ~k:(fun state -> fixed_nominal_line (Src_lexing.flush_continued state)) } - | (nnl* newline) (* blank line (too short) *) - { - Src_lexing.new_line state lexbuf - } - | (nnl* eof) (* blank line (too short), without newline character *) + | epsilon { - Src_lexing.(flush @@ eof state lexbuf) + gobble_line (Src_lexing.sna state lexbuf) lexbuf } and xopen_line state (* X/Open free-form *) = parse (* (note no continuation line indicator) *) diff --git a/src/lsp/cobol_preproc/src_lexing.ml b/src/lsp/cobol_preproc/src_lexing.ml index c226666e0..31e0218c2 100644 --- a/src/lsp/cobol_preproc/src_lexing.ml +++ b/src/lsp/cobol_preproc/src_lexing.ml @@ -32,6 +32,7 @@ type 'k state = continued: continued; pseudotext: (srcloc * text) option; comments: comments; + ignored: lexloc list; (** lexical locations of ignored text *) cdir_seen: bool; newline: bool; newline_cnums: int list; (** index of all newline characters encountered @@ -65,6 +66,7 @@ let init_state : 'k source_format -> 'k state = fun source_format -> continued = CNone; pseudotext = None; comments = []; + ignored = []; cdir_seen = false; newline = true; newline_cnums = []; @@ -77,8 +79,9 @@ let init_state : 'k source_format -> 'k state = fun source_format -> } let diagnostics { diags; _ } = diags -let comments { comments; _ } = List.rev comments -let newline_cnums { newline_cnums; _ } = List.rev newline_cnums +let rev_comments { comments; _ } = comments +let rev_ignored { ignored; _ } = ignored +let rev_newline_cnums { newline_cnums; _ } = newline_cnums let source_format { config = { source_format; _ }; _ } = source_format let allow_debug { config = { debug; _ }; _ } = debug @@ -125,6 +128,9 @@ let raw_loc ~start_pos ~end_pos { newline; config = { source_format; _ }; _ } = in Cobol_common.Srcloc.raw ~in_area_a (start_pos, end_pos) +let ignore_lexloc ~start_pos ~end_pos state = + { state with ignored = (start_pos, end_pos) :: state.ignored } + let emit prod ({ pseudotext; cdir_seen; _ } as state) = match pseudotext with | None -> @@ -153,6 +159,19 @@ let append t state = | _, { loc; _ } -> lex_error state ~loc "Unexpected@ `%a'@ in@ continuation" Text.pp_word t +let sna ({ config = { source_format; _ }; _ } as state) lexbuf = + let indicator_pos, FixedWidth _ = source_format in + match indicator_pos with + | FixedIndic -> + let start_pos = Lexing.lexeme_start_p lexbuf in + let lex_len = Lexing.lexeme_end lexbuf - start_pos.pos_cnum in + let sna_len = min 6 lex_len in + let end_pos = { start_pos with + pos_cnum = start_pos.pos_cnum + sna_len } in + ignore_lexloc ~start_pos ~end_pos state + | _ -> + state + let new_line state lexbuf = Lexing.new_line lexbuf; let state = @@ -490,23 +509,26 @@ let comment ?(marker = "") ?(floating = false) state lexbuf = in new_line { state with comments = comment :: state.comments } lexbuf -let trunc_to_col n ((s, sp, ep) as info: lexeme_info) = +let trunc_to_col n ((s, sp, ep) as info: lexeme_info) state = let sc = pos_column sp and ec = pos_column ep in assert (sc <= n); (* starts on last column (CHECKME: always avoided?) *) if ec <= n then - info, if ec = n + 1 then Tacked else Nominal + info, (if ec = n + 1 then Tacked else Nominal), state else (* truncate lexeme and shift end position accordingly *) let s = String.sub s 0 (n - sc + 1) in - (s, sp, { ep with pos_cnum = ep.pos_cnum - ec + n + 1}), Tacked + let ep' = { ep with pos_cnum = ep.pos_cnum - ec + n + 1} in + (s, sp, ep'), Tacked, ignore_lexloc ~start_pos:ep' ~end_pos:ep state let fixed_text mk ({ config = { source_format; _ }; _ } as state) lexbuf = let _, FixedWidth { cut_at_col; _ } = source_format in - let (_, start_pos, _) as lexinf = lexeme_info lexbuf in + let (_, start_pos, end_pos) as lexinf = lexeme_info lexbuf in if pos_column start_pos > cut_at_col then - state, Tacked + ignore_lexloc ~start_pos ~end_pos state, Tacked else - let (s, start_pos, end_pos), fitting = trunc_to_col cut_at_col lexinf in + let (s, start_pos, end_pos), fitting, state = + trunc_to_col cut_at_col lexinf state + in mk ~start_pos ~end_pos ?fitting:(Some fitting) s state, fitting let fixed_text_word ?cont : fixed state -> _ = @@ -542,7 +564,9 @@ let fixed_alphanum_lit if pos_column start_pos > cut_at_col then state, Tacked else - let (s, start_pos, end_pos), fitting = trunc_to_col cut_at_col lexinf in + let (s, start_pos, end_pos), fitting, state = + trunc_to_col cut_at_col lexinf state + in let s, knd, state = extract_knd s state lexbuf in let s, end_pos, fitting = (* Actually double the opening delimiter ('\'' or '"'), to have the diff --git a/src/lsp/cobol_preproc/src_lexing.mli b/src/lsp/cobol_preproc/src_lexing.mli index 4fd5db044..eefbb4fb9 100644 --- a/src/lsp/cobol_preproc/src_lexing.mli +++ b/src/lsp/cobol_preproc/src_lexing.mli @@ -15,8 +15,9 @@ type 'k state val init_state: 'k Src_format.source_format -> 'k state val diagnostics: _ state -> Cobol_common.Diagnostics.Set.t -val comments: _ state -> Text.comments -val newline_cnums: _ state -> int list +val rev_comments: _ state -> Text.comments +val rev_ignored: _ state -> Cobol_common.Srcloc.lexloc list +val rev_newline_cnums: _ state -> int list val source_format: 'k state -> 'k Src_format.source_format val change_source_format : 'k state -> 'c Src_format.source_format -> ('c state, unit) result @@ -50,6 +51,9 @@ val eqeq' : k:('a state -> Lexing.lexbuf -> 'b) -> 'a state -> Lexing.lexbuf -> 'b +val sna + : (Src_format.fixed state as 's) + -> Lexing.lexbuf -> 's val cdir_word : ?marker:string -> (Src_format.fixed state as 's) diff --git a/src/lsp/cobol_preproc/src_reader.ml b/src/lsp/cobol_preproc/src_reader.ml index fcb5c6a89..3c9103466 100644 --- a/src/lsp/cobol_preproc/src_reader.ml +++ b/src/lsp/cobol_preproc/src_reader.ml @@ -26,9 +26,10 @@ type t = Plx: 'k reader -> t [@@unboxe let diags (Plx (pl, _)) = Src_lexing.diagnostics pl let position (Plx (_, lexbuf)) = lexbuf.Lexing.lex_curr_p -let comments (Plx (pl, _)) = Src_lexing.comments pl let source_format (Plx (pl, _)) = Src_format.SF (Src_lexing.source_format pl) -let newline_cnums (Plx (pl, _)) = Src_lexing.newline_cnums pl +let rev_comments (Plx (pl, _)) = Src_lexing.rev_comments pl +let rev_ignored (Plx (pl, _)) = Src_lexing.rev_ignored pl +let rev_newline_cnums (Plx (pl, _)) = Src_lexing.rev_newline_cnums pl let chunks_reader lexer = let rec next_line (state, lexbuf) = diff --git a/src/lsp/cobol_preproc/src_reader.mli b/src/lsp/cobol_preproc/src_reader.mli index 9658d69fd..e19f12533 100644 --- a/src/lsp/cobol_preproc/src_reader.mli +++ b/src/lsp/cobol_preproc/src_reader.mli @@ -26,9 +26,10 @@ val from val diags: t -> Cobol_common.Diagnostics.Set.t val position: t -> Lexing.position -val comments: t -> Text.comments val source_format: t -> Src_format.any -val newline_cnums: t -> int list +val rev_comments: t -> Text.comments +val rev_ignored: t -> lexloc list +val rev_newline_cnums: t -> int list val next_chunk: t -> t * Text.t val fold_lines