diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/build.sbt b/tools/bigquery-zos-mainframe-connector/gszutil/build.sbt index fd06277ee3..8d61de36bb 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/build.sbt +++ b/tools/bigquery-zos-mainframe-connector/gszutil/build.sbt @@ -16,7 +16,7 @@ */ organization := "com.google.cloud.imf" name := "mainframe-connector" -version := "5.7.1" +version := "5.7.2" scalaVersion := "2.13.8" diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/ExportConfig.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/ExportConfig.scala index aeecfdd7be..2a83e32e16 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/ExportConfig.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/ExportConfig.scala @@ -27,6 +27,7 @@ case class ExportConfig( timeoutMinutes: Int = 90, vartext: Boolean = false, runMode: String = "parallel", + encoding: Option[String] = None, picTCharset: Option[String] = None, bucket: String = "", diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/ExportOptionParser.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/ExportOptionParser.scala index 3f8b60c4ec..ceaa013663 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/ExportOptionParser.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/ExportOptionParser.scala @@ -162,6 +162,11 @@ object ExportOptionParser .action((x,c) => c.copy(sync = x)) // Custom Options + opt[String]("encoding") + .optional() + .text("(optional) charset used for encoding and decoding character fields. Overrides default set by ENCODING environment variable.") + .action((x, c) => c.copy(encoding = Option(x))) + opt[String]("stats_table") .optional() .text("tablespec of table to insert stats") diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsUtilConfig.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsUtilConfig.scala index bbd92da26c..c9a3cff351 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsUtilConfig.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsUtilConfig.scala @@ -91,6 +91,7 @@ case class GsUtilConfig(source: String = "INFILE", // Custom schemaProvider: Option[SchemaProvider] = None, picTCharset: Option[String] = None, + encoding: Option[String] = None, statsTable: String = "", remote: Boolean = false, remoteHost: String = "", diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsUtilOptionParser.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsUtilOptionParser.scala index 45cd40cd12..f980ac021c 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsUtilOptionParser.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsUtilOptionParser.scala @@ -110,6 +110,10 @@ object GsUtilOptionParser extends OptionParser[GsUtilConfig]("gsutil") with ArgP .action { (x, c) => c.copy(tfGCS = x) }, + opt[String]("encoding") + .optional() + .text("(optional) charset used for encoding and decoding character fields. Overrides default set by ENCODING environment variable.") + .action((x,c) => c.copy(encoding = Option(x))), opt[String]("pic_t_charset") .optional() .text("(optional) charset used for encoding and decoding international strings, used with PIC T copybook type, default is EBCDIC") diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsZUtilConfig.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsZUtilConfig.scala index aea51e8e9d..5ec864a556 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsZUtilConfig.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsZUtilConfig.scala @@ -27,6 +27,7 @@ case class GsZUtilConfig( projectId: String = "", datasetId: String = "", location: String = "US", + encoding: Option[String] = None, picTCharset: Option[String] = None, timeOutMinutes: Option[Int] = None, keepAliveTimeInSeconds: Option[Int] = None) diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsZUtilOptionParser.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsZUtilOptionParser.scala index 07db4ab07b..4ae9ca2a1c 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsZUtilOptionParser.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/GsZUtilOptionParser.scala @@ -59,6 +59,11 @@ with ArgParser[GsZUtilConfig]{ .text("port of GRecv transcoding service (default: 52701 or SRVPORT environment variable)") .action((x,c) => c.copy(remoteHost = x)) + opt[String]("encoding") + .optional() + .text("(optional) charset used for encoding and decoding character fields. Overrides default set by ENCODING environment variable.") + .action((x, c) => c.copy(encoding = Option(x))) + opt[String]("pic_t_charset") .optional() .text("(optional) charset used for encoding and decoding international strings, used with PIC T copybook type, default is EBCDIC") diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/Cp.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/Cp.scala index f7946f9727..c6f756f4ff 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/Cp.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/Cp.scala @@ -54,7 +54,7 @@ object Cp extends Command[GsUtilConfig] with Logging { case Some(x) => { logger.info("Merging copybook with provided transformations ...") - val sch = c.schemaProvider.getOrElse(zos.loadCopyBook(c.copyBook, c.picTCharset)) + val sch = c.schemaProvider.getOrElse(zos.loadCopyBook(c.copyBook, c.encoding, c.picTCharset)) logger.info(s"Current Schema: ${sch.toString}") val newSchema = merge(sch, x) @@ -63,7 +63,7 @@ object Cp extends Command[GsUtilConfig] with Logging { } case None => { logger.info("Use original copybook") - c.schemaProvider.getOrElse(zos.loadCopyBook(c.copyBook, c.picTCharset)) + c.schemaProvider.getOrElse(zos.loadCopyBook(c.copyBook, c.encoding, c.picTCharset)) } } val in: ZRecordReaderT = c.testInput.getOrElse(zos.readCloudDD(c.source)) diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/Export.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/Export.scala index 8dda01b4c4..3dcf5a1939 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/Export.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/Export.scala @@ -63,7 +63,7 @@ object Export extends Command[ExportConfig] with Logging { CopyBook(zos.readDSNLines(MVSStorage.parseDSN(cfg.cobDsn)).mkString("\n"), picTCharset = cfg.picTCharset) } else { logger.info(s"reading copybook from DD:COPYBOOK") - zos.loadCopyBook("COPYBOOK", cfg.picTCharset) + zos.loadCopyBook("COPYBOOK", cfg.encoding, cfg.picTCharset) } try { diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/GsZUtil.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/GsZUtil.scala index f2fa89fa27..7b8ca3782c 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/GsZUtil.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/bqsh/cmd/GsZUtil.scala @@ -56,7 +56,7 @@ object GsZUtil extends Command[GsZUtilConfig] with Logging { CopyBook(zos.readDSNLines(MVSStorage.parseDSN(c.cobDsn)).mkString("\n"), picTCharset = c.picTCharset) } else { logger.info(s"reading copybook from DD:COPYBOOK") - zos.loadCopyBook("COPYBOOK", c.picTCharset) + zos.loadCopyBook("COPYBOOK", c.encoding, c.picTCharset) } //TODO read FLDINFO DD and merge field info diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/CharsetTranscoder.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/CharsetTranscoder.scala new file mode 100644 index 0000000000..64c687bcac --- /dev/null +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/CharsetTranscoder.scala @@ -0,0 +1,17 @@ +package com.google.cloud.imf.gzos + +import com.google.cloud.gszutil.Transcoder + +import java.nio.charset.Charset + +/** Transcoder with user-specified character set + * @param encoding name of character set + */ +case class CharsetTranscoder(encoding: String) extends Transcoder { + override val charset: Charset = Charset.forName(encoding) + override val SP: Byte = { + val bytes = charset.encode(" ").array() + require(bytes.length == 1, "multi-byte space character not supported") + bytes.head + } +} diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/Ebcdic.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/Ebcdic.scala index 3ea8746770..66a9c184e1 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/Ebcdic.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/Ebcdic.scala @@ -20,7 +20,19 @@ import java.nio.charset.Charset import com.google.cloud.gszutil.Transcoder +/** Default Transcoder + * Uses EBCDIC US charset by default. + * Obtains charset name from ENCODING environment variable. + */ case object Ebcdic extends Transcoder { - override final val charset: Charset = new EBCDIC1() + override final val charset: Charset = { + sys.env.get("ENCODING") match { + case Some(charset) => + System.out.println(s"Using Charset '$charset'") + Charset.forName(charset) + case None => + new EBCDIC1() + } + } override val SP: Byte = 0x40 } diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/IBM.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/IBM.scala index 2edc63ca82..d55bbc036f 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/IBM.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/IBM.scala @@ -121,11 +121,15 @@ object IBM extends MVS with Logging { } } - override def loadCopyBook(dd: String, picTCharset: Option[String] = None) : CopyBook = { + override def loadCopyBook(dd: String, encoding: Option[String] = None, picTCharset: Option[String] = None) : CopyBook = { val raw = readDDString(dd, "\n") logger.info(s"Parsing copy book:\n$raw") try { - val copyBook = CopyBook(raw, Ebcdic, picTCharset = picTCharset) + val transcoder = encoding match { + case Some(charsetName) => CharsetTranscoder(charsetName) + case None => Ebcdic + } + val copyBook = CopyBook(raw, transcoder, picTCharset = picTCharset) logger.info(s"Loaded copy book with LRECL=${copyBook.LRECL}") copyBook } catch { diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/Linux.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/Linux.scala index fdb28ac0b0..847fcca69f 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/Linux.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/Linux.scala @@ -96,7 +96,7 @@ object Linux extends MVS with Logging { override def readKeyfile(): Array[Byte] = Array.empty - override def loadCopyBook(dd: String, picTCharset: Option[String] = Some("UTF-8")): CopyBook = { + override def loadCopyBook(dd: String, encoding: Option[String] = None, picTCharset: Option[String] = Some("UTF-8")): CopyBook = { val ddValue = System.getenv(dd) require(ddValue != null, s"$dd environment variable not defined") val ddPath = Paths.get(ddValue) diff --git a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/MVS.scala b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/MVS.scala index 0b7a9c9d33..7203ba6bb2 100644 --- a/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/MVS.scala +++ b/tools/bigquery-zos-mainframe-connector/gszutil/src/main/scala/com/google/cloud/imf/gzos/MVS.scala @@ -40,7 +40,7 @@ trait MVS { def getCredentialProvider(): CredentialProvider def readKeyfile(): Array[Byte] def listPDS(dsn: DSN): Iterator[PDSMemberInfo] - def loadCopyBook(dd: String, picTCharset: Option[String] = None): CopyBook + def loadCopyBook(dd: String, encoding: Option[String] = None, picTCharset: Option[String] = None): CopyBook def jobName: String def jobDate: String def jobTime: String