Skip to content

Commit

Permalink
Emit comment semantic tokens for SNA and text in the right margin
Browse files Browse the repository at this point in the history
  • Loading branch information
nberth committed Oct 23, 2023
1 parent 19d5894 commit 86e6cca
Show file tree
Hide file tree
Showing 13 changed files with 161 additions and 82 deletions.
16 changes: 11 additions & 5 deletions src/lsp/cobol_lsp/lsp_document.ml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ module TYPES = struct
doc_cache_pplog: Cobol_preproc.Trace.log;
doc_cache_tokens: Cobol_parser.Outputs.tokens_with_locs;
doc_cache_comments: Cobol_preproc.Text.comments;
doc_cache_ignored: Cobol_common.Srcloc.lexloc list;
doc_cache_parsed: (Cobol_ptree.compilation_group * CUs.t) option;
doc_cache_diags: DIAGS.Set.serializable;
}
Expand Down Expand Up @@ -116,7 +117,8 @@ let lazy_references ptree cus defs =
let no_artifacts =
Cobol_parser.Outputs.{ tokens = lazy [];
pplog = Cobol_preproc.Trace.empty;
comments = [] }
rev_comments = [];
rev_ignored = [] }

let gather_parsed_data ptree =
Cobol_typeck.analyze_compilation_group ptree |>
Expand Down Expand Up @@ -216,15 +218,17 @@ let retrieve_parsed_data: document -> parsed_data = function
(** Caching utilities *)

let to_cache ({ project; textdoc; parsed; diags;
artifacts = { pplog; tokens; comments; _ }; _ } as doc) =
artifacts = { pplog; tokens;
rev_comments; rev_ignored; _ }; _ } as doc) =
{
doc_cache_filename = Lsp_project.relative_path_for ~uri:(uri doc) project;
doc_cache_checksum = Digest.string (Lsp.Text_document.text textdoc);
doc_cache_langid = Lsp.Text_document.languageId textdoc;
doc_cache_version = Lsp.Text_document.version textdoc;
doc_cache_pplog = pplog;
doc_cache_tokens = Lazy.force tokens;
doc_cache_comments = comments;
doc_cache_comments = rev_comments;
doc_cache_ignored = rev_ignored;
doc_cache_parsed = Option.map (fun { ptree; cus; _ } -> ptree, cus) parsed;
doc_cache_diags = DIAGS.Set.apply_delayed_formatting diags;
}
Expand All @@ -239,7 +243,8 @@ let of_cache ~project
doc_cache_version = version;
doc_cache_pplog = pplog;
doc_cache_tokens = tokens;
doc_cache_comments = comments;
doc_cache_comments = rev_comments;
doc_cache_ignored = rev_ignored;
doc_cache_parsed = parsed;
doc_cache_diags = diags } =
let absolute_filename = Lsp_project.absolute_path_for ~filename project in
Expand All @@ -260,7 +265,8 @@ let of_cache ~project
{ ptree; cus; definitions; references })
parsed
in
{ doc with artifacts = { pplog; tokens = lazy tokens; comments };
{ doc with artifacts = { pplog; tokens = lazy tokens;
rev_comments; rev_ignored };
diags = DIAGS.Set.of_serializable diags;
parsed }

Expand Down
6 changes: 4 additions & 2 deletions src/lsp/cobol_lsp/lsp_request.ml
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,13 @@ let handle_semtoks_full,
handle_semtoks_range =
let handle registry ?range (doc: TextDocumentIdentifier.t) =
try_with_document_data registry doc
~f:begin fun ~doc:{ artifacts = { pplog; tokens; comments; _ };
~f:begin fun ~doc:{ artifacts = { pplog; tokens;
rev_comments; rev_ignored; _ };
_ } Lsp_document.{ ptree; _ } ->
let data =
Lsp_semtoks.data ~filename:(Lsp.Uri.to_path doc.uri) ~range
~pplog ~comments ~tokens:(Lazy.force tokens) ~ptree
~pplog ~rev_comments ~rev_ignored
~tokens:(Lazy.force tokens) ~ptree
in
Some (SemanticTokens.create ~data ())
end
Expand Down
39 changes: 29 additions & 10 deletions src/lsp/cobol_lsp/lsp_semtoks.ml
Original file line number Diff line number Diff line change
Expand Up @@ -553,17 +553,32 @@ let semtoks_from_ptree ~filename ?range ptree =

end) ptree [] |> List.rev

let semtoks_of_comments ~filename ?range comments = comments |>
List.filter_map begin function
let semtoks_of_comments ~filename ?range rev_comments =
rev_comments |>
List.fold_left begin fun acc -> function
| Cobol_preproc.Text.{ comment_loc = s, _ as lexloc; _ }
when s.Lexing.pos_fname = filename &&
Option.fold range
~some:(fun r -> Lsp_position.intersects_lexloc r lexloc)
~none:true ->
Some (semtok TOKTYP.comment lexloc)
semtok TOKTYP.comment lexloc :: acc
| _ ->
None
end
acc
end []

let semtoks_of_ignored ~filename ?range rev_ignored =
(* Decorate like comments, for lack of a better suited token type in the set
of available ones. This could be improved later with some client-side code
or configuration. *)
rev_ignored |>
List.fold_left begin fun acc ((s, _ ) as lexloc) ->
if s.Lexing.pos_fname = filename &&
Option.fold range
~some:(fun r -> Lsp_position.intersects_lexloc r lexloc)
~none:true
then semtok TOKTYP.comment lexloc :: acc
else acc
end []

let semtoks_of_preproc_statements ~filename ?range pplog =
List.rev @@ List.fold_left begin fun acc -> function
Expand Down Expand Up @@ -646,11 +661,13 @@ let ensure_sorted name ~filename cmp l =
List.fast_sort cmp l


let data ~filename ~range ~tokens ~pplog ~comments ~ptree : int array =
let data ~filename ~range ~tokens ~pplog
~rev_comments ~rev_ignored ~ptree : int array =
let semtoks1 = semtoks_of_non_ambigious_tokens ~filename ?range tokens in
let semtoks2 = semtoks_from_ptree ~filename ?range ptree in
let semtoks3 = semtoks_of_comments ~filename ?range comments in
let semtoks4 = semtoks_of_preproc_statements ~filename ?range pplog in
let semtoks3 = semtoks_of_comments ~filename ?range rev_comments in
let semtoks4 = semtoks_of_ignored ~filename ?range rev_ignored in
let semtoks5 = semtoks_of_preproc_statements ~filename ?range pplog in
(* NB: In *principle* all those lists are already sorted w.r.t lexical
locations in [filename]. We just check that for now and raise a warning,
in case. *)
Expand All @@ -660,8 +677,10 @@ let data ~filename ~range ~tokens ~pplog ~comments ~ptree : int array =
let semtoks1 = ensure_sorted "nonambiguous" ~filename compare_semtoks semtoks1
and semtoks2 = ensure_sorted "ptree" ~filename compare_semtoks semtoks2
and semtoks3 = ensure_sorted "comments" ~filename compare_semtoks semtoks3
and semtoks4 = ensure_sorted "preproc" ~filename compare_semtoks semtoks4 in
and semtoks4 = ensure_sorted "ignored" ~filename compare_semtoks semtoks4
and semtoks5 = ensure_sorted "preproc" ~filename compare_semtoks semtoks5 in
relative_semtoks
List.(merge compare_semtoks semtoks1 @@
merge compare_semtoks semtoks2 @@
merge compare_semtoks semtoks3 @@ semtoks4)
merge compare_semtoks semtoks3 @@
merge compare_semtoks semtoks4 @@ semtoks5)
3 changes: 2 additions & 1 deletion src/lsp/cobol_lsp/lsp_semtoks.mli
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ val data
-> range: Lsp.Types.Range.t option
-> tokens: Cobol_parser.Outputs.tokens_with_locs
-> pplog: Cobol_preproc.Trace.log
-> comments: Cobol_preproc.Text.comments
-> rev_comments: Cobol_preproc.Text.comments
-> rev_ignored: Cobol_common.Srcloc.lexloc list
-> ptree: Cobol_ptree.compilation_group
-> int array
37 changes: 10 additions & 27 deletions src/lsp/cobol_parser/parser_engine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -419,8 +419,9 @@ let aggregate_output (type m) (ps: m state) res
| Eidetic ->
let artifacts =
{ tokens = Tokzr.parsed_tokens ps.preproc.tokzr;
pplog = Cobol_preproc.log ps.preproc.pp;
comments = Cobol_preproc.comments ps.preproc.pp } in
pplog = Cobol_preproc.rev_log ps.preproc.pp;
rev_comments = Cobol_preproc.rev_comments ps.preproc.pp;
rev_ignored = Cobol_preproc.rev_ignored ps.preproc.pp } in
WithArtifacts (res, artifacts)

(** Simple parsing *)
Expand Down Expand Up @@ -509,32 +510,14 @@ let parse_with_history ?(save_stage = 10) rwps =
(rewindable_parser_state rwps).preproc.tokzr;
res, rwps


let lexing_postion_of ~position rwps = match position with
| Lexing pos ->
pos
| Indexed { line; char } ->
let ps = rewindable_parser_state rwps in
let newline_cnums = Cobol_preproc.newline_cnums ps.preproc.pp in
if newline_cnums = []
then raise Not_found (* no complete line was processed yet; just skip *)
else
let lexpos = Cobol_preproc.position ps.preproc.pp in
try
let pos_bol =
try List.nth newline_cnums (line - 1)
with Not_found | Invalid_argument _ -> 0
in
Lexing.{ lexpos with pos_bol;
pos_cnum = pos_bol + char;
pos_lnum = line + 1 }
with Failure _ ->
(* The given line exceeds what was already processed, so we restart
from the current preprocessor position. *)
lexpos

let find_history_event_preceding ~position ({ store; _ } as rwps) =
let lexpos = lexing_postion_of ~position rwps in
let lexpos = match position with
| Lexing pos ->
pos
| Indexed { line; char } ->
let ps = rewindable_parser_state rwps in
Cobol_preproc.position_at ~line ~char ps.preproc.pp
in
let rec aux = function
| [] ->
raise Not_found
Expand Down
3 changes: 2 additions & 1 deletion src/lsp/cobol_parser/parser_outputs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ type artifacts =
{
tokens: tokens_with_locs Lazy.t;
pplog: Cobol_preproc.Trace.log;
comments: Cobol_preproc.Text.comments;
rev_comments: Cobol_preproc.Text.comments;
rev_ignored: lexloc list;
}

(** The output of parsing functions depends on its memorization abilities:
Expand Down
30 changes: 26 additions & 4 deletions src/lsp/cobol_preproc/preproc_engine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,33 @@ and preprocessor_persist =
let diags { diags; reader; _ } = DIAGS.Set.union diags @@ Src_reader.diags reader
let add_diag lp d = { lp with diags = DIAGS.Set.cons d lp.diags }
let add_diags lp d = { lp with diags = DIAGS.Set.union d lp.diags }
let log { pplog; _ } = pplog
let source_format { reader; _ } = Src_reader.source_format reader
let position { reader; _ } = Src_reader.position reader
let comments { reader; _ } = Src_reader.comments reader
let newline_cnums { reader; _ } = Src_reader.newline_cnums reader
let source_format { reader; _ } = Src_reader.source_format reader
let rev_log { pplog; _ } = pplog
let rev_comments { reader; _ } = Src_reader.rev_comments reader
let rev_ignored { reader; _ } = Src_reader.rev_ignored reader

(** [position_at ~line ~char pp] computes a lexing position that corresponds to
the given line and character indexes (all starting at 0) in the input
already read by [pp]. Raises {!Not_found} if no complete line was processed
yet, or the current position if the given line index doed not correspond to
already processed lines. *)
let position_at ~line ~char { reader; _ } =
let rev_newline_cnums = Src_reader.rev_newline_cnums reader in
if rev_newline_cnums = []
then raise Not_found
else
let lexpos = Src_reader.position reader in
try
let pos_bol =
try List.nth rev_newline_cnums (lexpos.pos_lnum - line - 1)
with Not_found | Invalid_argument _ -> 0
in
Lexing.{ lexpos with pos_bol;
pos_cnum = pos_bol + char;
pos_lnum = line + 1 }
with Failure _ ->
lexpos

let with_reader lp reader =
if lp.reader == reader then lp else { lp with reader }
Expand Down
7 changes: 4 additions & 3 deletions src/lsp/cobol_preproc/preproc_engine.mli
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ val reset_preprocessor_for_string
val diags: preprocessor -> Cobol_common.Diagnostics.Set.t
val add_diag: preprocessor -> Cobol_common.Diagnostics.t -> preprocessor
val add_diags: preprocessor -> Cobol_common.Diagnostics.Set.t -> preprocessor
val log: preprocessor -> Preproc_trace.log
val comments: preprocessor -> Text.comments
val position: preprocessor -> Lexing.position
val position_at: line:int -> char: int -> preprocessor -> Lexing.position
val source_format: preprocessor -> Src_format.any
val newline_cnums: preprocessor -> int list
val rev_log: preprocessor -> Preproc_trace.log
val rev_comments: preprocessor -> Text.comments
val rev_ignored: preprocessor -> lexloc list

val next_chunk: preprocessor -> Text.text * preprocessor

Expand Down
42 changes: 28 additions & 14 deletions src/lsp/cobol_preproc/src_lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@

let newline = '\r'* '\n'
let nnl = _ # ['\r' '\n'] (* anything but newline *)
let sna = nnl nnl nnl nnl nnl nnl (* 6 chars *)
let sna = nnl nnl nnl nnl nnl nnl (* 6 chars; TODO: exclude tabs *)
let spaces = ([' ' '\t']*)
let blank = [' ' '\009' '\r']
let nonblank = nnl # blank
Expand Down Expand Up @@ -131,42 +131,56 @@ let cdir_word =

rule fixed_line state
= shortest
| sna ' ' | '\t' (* nominal *)
| sna (* nominal *)
{
fixed_indicator (Src_lexing.sna state lexbuf) lexbuf
}
| '\t'
{
fixed_nominal_line (Src_lexing.flush_continued state) lexbuf
}
| (nnl* newline) (* blank line (too short) *)
{
Src_lexing.new_line (Src_lexing.sna state lexbuf) lexbuf
}
| (nnl* eof) (* blank line (too short), without newline character *)
{
Src_lexing.(flush @@ eof (Src_lexing.sna state lexbuf) lexbuf)
}
and fixed_indicator state
= parse
| ' ' | '\t' (* second tab *) (* nominal *)
{
fixed_nominal_line (Src_lexing.flush_continued state) lexbuf
}
| sna '-' (* continuation line *)
| '-' (* continuation line *)
{
fixed_continue_line state lexbuf
}
| sna ('$' as marker) (* compiler directive *)
| ('$' as marker) (* compiler directive *)
{
fixed_mf_cdir_line (String.make 1 marker) state lexbuf
}
| sna ('>' as marker)
| ('>' as marker)
{
maybe_fixed_cdir_line marker state lexbuf
}
| sna (['*' '/'] as marker) (* comment line *)
| (['*' '/'] as marker) (* comment line *)
{
comment_line marker state lexbuf
}
| sna ['d' 'D']
| ['d' 'D']
{
fixed_debug_line state lexbuf
}
| sna (_#['\n' '\r'] as c) (* unknown indicator *)
| (_#['\n' '\r'] as c) (* unknown indicator *)
{
Src_lexing.unexpected Char ~c ~knd:"indicator" state lexbuf
~k:(fun state -> fixed_nominal_line (Src_lexing.flush_continued state))
}
| (nnl* newline) (* blank line (too short) *)
{
Src_lexing.new_line state lexbuf
}
| (nnl* eof) (* blank line (too short), without newline character *)
| epsilon
{
Src_lexing.(flush @@ eof state lexbuf)
gobble_line (Src_lexing.sna state lexbuf) lexbuf
}
and xopen_line state (* X/Open free-form *)
= parse (* (note no continuation line indicator) *)
Expand Down
Loading

0 comments on commit 86e6cca

Please sign in to comment.