Skip to content

Commit

Permalink
Implement autodetection of source format as in GnuCOBOL
Browse files Browse the repository at this point in the history
  • Loading branch information
nberth committed Oct 19, 2023
1 parent 161906f commit e80fdb4
Show file tree
Hide file tree
Showing 22 changed files with 168 additions and 119 deletions.
8 changes: 4 additions & 4 deletions .drom
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ version:0.9.0

# hash of toml configuration files
# used for generation of all files
f593ca640b0428b52f00f9a54b784372:.
33600fe888d71659a58729dcf2aa5517:.
# end context for .

# begin context for .github/workflows/workflow.yml
Expand Down Expand Up @@ -80,7 +80,7 @@ c8281f46ba9a11d0b61bc8ef67eaa357:docs/style.css

# begin context for dune-project
# file dune-project
e4dedba41fc92468ec674cbee826a8ae:dune-project
b8e5b29ae669482824c6b8cd2ff4fc9c:dune-project
# end context for dune-project

# begin context for opam/cobol_ast.opam
Expand Down Expand Up @@ -120,7 +120,7 @@ b59079469702b9b3c76559bff3e8d8d6:opam/cobol_lsp.opam

# begin context for opam/cobol_preproc.opam
# file opam/cobol_preproc.opam
177397ceec1c37ebf412047ec223bd10:opam/cobol_preproc.opam
9a463e0c260b2ffe2681c21d06e25df2:opam/cobol_preproc.opam
# end context for opam/cobol_preproc.opam

# begin context for opam/cobol_ptree.opam
Expand Down Expand Up @@ -340,7 +340,7 @@ c830729656f586961a44188b55cb4ac6:src/lsp/cobol_data/dune

# begin context for src/lsp/cobol_preproc/dune
# file src/lsp/cobol_preproc/dune
72290e9f09cebbc9fdea47a9045c0330:src/lsp/cobol_preproc/dune
6959ec99a7299e1be2b6bd025568bb21:src/lsp/cobol_preproc/dune
# end context for src/lsp/cobol_preproc/dune

# begin context for src/lsp/cobol_preproc/version.mlt
Expand Down
1 change: 1 addition & 0 deletions dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@
(description "This is the description\nof the superbol-studio-oss OCaml project\n")
(depends
(ocaml (>= 4.14.0))
(ppx_import ( >= 1 ))
(ppx_deriving ( >= 5.2.1 ))
(menhir ( >= 1.2 ))
(cobol_config (= version))
Expand Down
1 change: 1 addition & 0 deletions opam/cobol_preproc.opam
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ install: [
depends: [
"ocaml" {>= "4.14.0"}
"dune" {>= "2.8.0"}
"ppx_import" {>= "1"}
"ppx_deriving" {>= "5.2.1"}
"menhir" {>= "1.2"}
"cobol_config" {= version}
Expand Down
3 changes: 2 additions & 1 deletion src/lsp/cobol_indent/indenter.ml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ let indent_range' ~dialect ~source_format ~range ~file =
Cobol_preproc.fold_source_lines ~dialect ~source_format
~skip_compiler_directives_text:false
~f:(fun _lnum line acc -> Indent_check.check_indentation line acc)
(Filename file) { scope = BEGIN; context = []; acc = []; range }
(String { contents = file_content; filename = file; })
{ scope = BEGIN; context = []; acc = []; range }
in
(* NB: note here we ignore diagnostics *)
let ind_recds = state.result.acc in
Expand Down
3 changes: 3 additions & 0 deletions src/lsp/cobol_preproc/cobol_preproc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,8 @@ module Directives = Preproc_directives

(** {1 Main entry points for the processor itself} *)

type input = [%import: Src_input.t]

module Input = Src_input
module Options = Preproc_options
include Preproc_engine
4 changes: 2 additions & 2 deletions src/lsp/cobol_preproc/dune
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
(public_name cobol_preproc)
(wrapped true)
; use field 'dune-libraries' to add libraries without opam deps
(libraries ppx_deriving menhirLib cobol_config cobol_common str)
(libraries ppx_import ppx_deriving menhirLib cobol_config cobol_common str)
; use field 'dune-flags' to set this value
(flags (:standard))
; use field 'dune-stanzas' to add more stanzas here
(preprocess (pps ppx_deriving.show))
(preprocess (staged_pps ppx_import ppx_deriving.show))

)

Expand Down
3 changes: 2 additions & 1 deletion src/lsp/cobol_preproc/package.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ generators = ["ocamllex"]

# preprocessing options
# preprocess = "per-module (((action (run ./toto.sh %{input-file})) mod))"
preprocess = "pps ppx_deriving.show"
preprocess = "staged_pps ppx_import ppx_deriving.show"

# files to skip while updating at package level
skip = ["main.ml", "index.mld"]
Expand All @@ -56,6 +56,7 @@ skip = ["main.ml", "index.mld"]
cobol_common = "version"
cobol_config = "version"
ppx_deriving = ">=5.2.1"
ppx_import = "1"
[dependencies.menhir]
libname = "menhirLib"
version = ">=1.2"
Expand Down
85 changes: 33 additions & 52 deletions src/lsp/cobol_preproc/preproc_engine.ml
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,6 @@ module DIAGS = Cobol_common.Diagnostics

(* --- *)

type input =
| Filename of string
| String of { contents: string; filename: string }
| Channel of { contents: in_channel; filename: string }

let decide_source_format _input
: Cobol_config.source_format_spec ->
Src_format.any with_diags = function
| SF result ->
{ result = Src_format.from_config result; diags = DIAGS.Set.none }
| Auto ->
{ result = Src_format.from_config SFFixed;
diags = DIAGS.(Acc.warn Set.none) "Source format `auto` is not supported \
yet, using `fixed`" }

(* --- *)

type preprocessor =
{
buff: Text.t;
Expand All @@ -46,8 +29,9 @@ type preprocessor =
diags: DIAGS.diagnostics;
persist: preprocessor_persist;
}

(** the preprocessor state that does not change very often *)
and preprocessor_persist =
(** the preprocessor state that does not change very often *)
{
pparser: (module Text_processor.PPPARSER);
overlay_manager: (module Src_overlay.MANAGER);
Expand Down Expand Up @@ -86,28 +70,23 @@ let with_replacing lp replacing =
let show tag { persist = { verbose; show_if_verbose; _ }; _ } =
verbose && List.mem tag show_if_verbose

let make_reader ~source_format = function
| Filename filename ->
Src_reader.from_file ~source_format filename
| String { contents; filename } ->
Src_reader.from_string ~filename ~source_format contents
| Channel { contents; filename } ->
Src_reader.from_channel ~filename ~source_format contents
let source_format_config = function
| Cobol_config.SF sf -> Some (Src_format.from_config sf)
| Auto -> None

let preprocessor input = function
| `WithOptions { libpath; verbose; source_format;
config = (module Config) } ->
let module Om_name = struct let name = __MODULE__ end in
let module Om = Src_overlay.New_manager (Om_name) in
let module Pp = Preproc_grammar.Make (Config) (Om) in
let { result = source_format; diags }
= decide_source_format input source_format in
let source_format = source_format_config source_format in
{
buff = [];
reader = make_reader ~source_format input;
reader = Src_reader.from input ?source_format;
ppstate = Preproc_state.initial;
pplog = Preproc_trace.empty;
diags;
diags = DIAGS.Set.none;
persist =
{
pparser = (module Pp);
Expand All @@ -125,7 +104,7 @@ let preprocessor input = function
{
from with
buff = [];
reader = make_reader ~source_format input;
reader = Src_reader.from input ~source_format;
persist =
{
persist with
Expand Down Expand Up @@ -308,9 +287,11 @@ and read_lib ({ persist = { libpath; copybooks; verbose; _ }; _ } as lp)
if verbose then
Pretty.error "Reading library `%s'@." filename;
let text, lp = (* note: [lp] holds all prev and new diags *)
full_text (* likewise for pplog *)
(preprocessor (Filename filename) (`Fork (lp, loc, filename)))
~postproc:(Cobol_common.Srcloc.copy_from ~filename ~copyloc:loc)
Src_input.from ~filename ~f:begin fun input ->
full_text (* likewise for pplog *)
(preprocessor input (`Fork (lp, loc, filename)))
~postproc:(Cobol_common.Srcloc.copy_from ~filename ~copyloc:loc)
end
in
text, lp.diags, Preproc_trace.copy_done ~loc ~filename lp.pplog
| Error lnf ->
Expand Down Expand Up @@ -383,6 +364,7 @@ let pp_pptokens: pptokens Pretty.printer =
(* --- *)

let reset_preprocessor_for_string string ?new_position pp =
(* TODO: maybe we could auto-detect the source format again... *)
let contents = match new_position with
| Some Lexing.{ pos_cnum; _ } -> EzString.after string (pos_cnum - 1)
| None -> string
Expand All @@ -399,44 +381,43 @@ let preprocessor ?(options = Preproc_options.default) input =
{!preprocess_file}. *)
let default_oppf = Fmt.stdout

let lex_file ~dialect ~source_format ?(ppf = default_oppf) input =
let { result = source_format; diags } =
decide_source_format input source_format in
DIAGS.result ~diags @@
let lex_input ~dialect ~source_format ?(ppf = default_oppf) input =
DIAGS.result @@
Src_reader.print_lines ~dialect ~skip_compiler_directives_text:true ppf @@
make_reader ~source_format input
Src_reader.from input ?source_format:(source_format_config source_format)

let lex_file ~dialect ~source_format ?ppf filename =
Src_input.from ~filename ~f:(lex_input ~dialect ~source_format ?ppf)

let lex_lib ~dialect ~source_format ~libpath ?(ppf = default_oppf) libname =
match Cobol_common.Copybook.find_lib ~libpath libname with
| Ok filename ->
let { result = source_format; diags } =
decide_source_format (Filename filename) source_format in
DIAGS.result ~diags @@
Src_reader.print_lines ~dialect ~skip_compiler_directives_text:true ppf @@
Src_reader.from_file ~source_format filename
Src_input.from ~filename ~f:begin fun input ->
DIAGS.result @@
Src_reader.print_lines ~dialect ~skip_compiler_directives_text:true ppf @@
Src_reader.from input ?source_format:(source_format_config source_format)
end
| Error lnf ->
DIAGS.error_result () "%a" Cobol_common.Copybook.pp_lookup_error lnf

let fold_source_lines ~dialect ~source_format
?skip_compiler_directives_text ?on_compiler_directive
~f input acc =
let { result = source_format; diags } =
decide_source_format input source_format in
DIAGS.result ~diags @@
Src_reader.fold_lines ~dialect ~f (make_reader ~source_format input) acc
?skip_compiler_directives_text ?on_compiler_directive
DIAGS.result @@
Src_reader.fold_lines ~dialect ~f
(Src_reader.from input ?source_format:(source_format_config source_format))
?skip_compiler_directives_text ?on_compiler_directive acc

let text_of_input ?options input =
let text, pp = full_text ~item:"file" @@ preprocessor ?options input in
DIAGS.result text ~diags:(diags pp)

let text_of_file ?options filename =
text_of_input ?options (Filename filename)
Src_input.from ~filename ~f:(text_of_input ?options)

let preprocess_input ?options ?(ppf = default_oppf) input =
text_of_input ?options input |>
DIAGS.map_result ~f:(Pretty.print ppf "%a@." Text.pp_text)

let preprocess_file ?options ?(ppf = default_oppf) filename =
text_of_file ?options filename |>
DIAGS.map_result ~f:(Pretty.print ppf "%a@." Text.pp_text)
let preprocess_file ?options ?ppf filename =
Src_input.from ~filename ~f:(preprocess_input ?options ?ppf)
25 changes: 11 additions & 14 deletions src/lsp/cobol_preproc/preproc_engine.mli
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,9 @@ open Cobol_common.Srcloc.TYPES

type preprocessor

type input =
| Filename of string
| String of { contents: string; filename: string }
| Channel of { contents: in_channel; filename: string }

val preprocessor
: ?options: Preproc_options.preproc_options
-> input
-> Src_input.t
-> preprocessor
val reset_preprocessor_for_string
: string
Expand All @@ -45,16 +40,18 @@ val next_chunk: preprocessor -> Text.text * preprocessor

(** {2 High-level commands} *)

val decide_source_format
: string
-> Cobol_config.source_format_spec
-> Src_format.any Cobol_common.Diagnostics.with_diags
val lex_input
: dialect: Cobol_config.dialect
-> source_format: Cobol_config.source_format_spec
-> ?ppf:Format.formatter
-> Src_input.t
-> unit Cobol_common.Diagnostics.with_diags

val lex_file
: dialect: Cobol_config.dialect
-> source_format: Cobol_config.source_format_spec
-> ?ppf:Format.formatter
-> input
-> string
-> unit Cobol_common.Diagnostics.with_diags

val lex_lib
Expand Down Expand Up @@ -88,14 +85,14 @@ val fold_source_lines
-> ?on_compiler_directive
: (int -> Preproc_directives.compiler_directive with_loc -> 'a -> 'a)
-> f:(int -> Text.text -> 'a -> 'a)
-> input
-> Src_input.t
-> 'a
-> 'a Cobol_common.Diagnostics.with_diags

val preprocess_input
: ?options: Preproc_options.preproc_options
-> ?ppf:Format.formatter
-> input
-> Src_input.t
-> unit Cobol_common.Diagnostics.with_diags

val preprocess_file
Expand All @@ -111,5 +108,5 @@ val text_of_file

val text_of_input
: ?options: Preproc_options.preproc_options
-> input
-> Src_input.t
-> Text.t Cobol_common.Diagnostics.with_diags
17 changes: 17 additions & 0 deletions src/lsp/cobol_preproc/src_format.ml
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,20 @@ let comment_entry_termination
CBLXIndic), FixedWidth _), Some c -> AreaB { first_area_b_column = c }

(* --- *)

let looks_like_fixed_format ?(tab_stop = 8) contents_prefix =
let rec sna ap vp =
match contents_prefix.[ap] with
| '\n' -> sna (succ ap) 1
| '\t' -> sna (succ ap) (vp + (tab_stop - (vp + tab_stop) mod tab_stop))
| '\r' -> sna (succ ap) vp
| _ when vp <> 7 -> sna (succ ap) (succ vp)
| ' ' | '-' | 'd' | 'D' | '*' | '/' | '\\' | '$' when vp = 7 -> true
| _ -> false
in
try sna 0 1 with Invalid_argument _ -> false

let guess_from ~contents_prefix =
if looks_like_fixed_format contents_prefix
then from_config SFFixed
else from_config SFFree
1 change: 1 addition & 0 deletions src/lsp/cobol_preproc/src_format.mli
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ val decypher
: dialect: Cobol_config.dialect
-> string
-> (any, [> `SFUnknown of string ]) result
val guess_from: contents_prefix: string -> any

val comment_entry_termination: _ source_format -> comment_entry_termination
val first_area_b_column: _ source_format -> int option
23 changes: 23 additions & 0 deletions src/lsp/cobol_preproc/src_input.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
(**************************************************************************)
(* *)
(* SuperBOL OSS Studio *)
(* *)
(* Copyright (c) 2022-2023 OCamlPro SAS *)
(* *)
(* All rights reserved. *)
(* This source code is licensed under the GNU Affero General Public *)
(* License version 3 found in the LICENSE.md file in the root directory *)
(* of this source tree. *)
(* *)
(**************************************************************************)

type t =
| String of { contents: string; filename: string }
| Channel of { ic: in_channel; filename: string }

let from ~filename ~f =
if filename = "-"
then f (Channel { ic = stdin; filename }) (* filename = "(stdin)"? *)
else let ic = open_in_bin filename in
try let res = f (Channel { ic; filename }) in close_in ic; res
with e -> close_in_noerr ic; raise e
Loading

0 comments on commit e80fdb4

Please sign in to comment.