From 3b37b0f23e82660e1bac104c4275ac43b7669617 Mon Sep 17 00:00:00 2001 From: Tanner Clary Date: Mon, 11 Sep 2023 19:00:07 -0700 Subject: [PATCH] [CALCITE-6001] Add dialect-specific encoding for string literals --- core/src/main/codegen/templates/Parser.jj | 10 +- .../org/apache/calcite/sql/SqlDialect.java | 99 ++++++++++++++----- .../sql/dialect/BigQuerySqlDialect.java | 1 + .../calcite/sql/dialect/HiveSqlDialect.java | 1 + .../sql/fun/SqlLiteralChainOperator.java | 1 - .../sql/parser/SqlAbstractParserImpl.java | 7 ++ .../apache/calcite/sql/parser/SqlParser.java | 13 +++ .../rel/rel2sql/RelToSqlConverterTest.java | 21 ++++ .../org/apache/calcite/tools/PlannerTest.java | 5 +- .../calcite/sql/parser/SqlParserTest.java | 36 +++++-- 10 files changed, 157 insertions(+), 37 deletions(-) diff --git a/core/src/main/codegen/templates/Parser.jj b/core/src/main/codegen/templates/Parser.jj index b26efd896e6..0eed3a4f522 100644 --- a/core/src/main/codegen/templates/Parser.jj +++ b/core/src/main/codegen/templates/Parser.jj @@ -166,6 +166,7 @@ public class ${parser.class} extends SqlAbstractParserImpl private Casing unquotedCasing; private Casing quotedCasing; + private String charset; private int identifierMaxLength; private SqlConformance conformance; @@ -180,6 +181,7 @@ public class ${parser.class} extends SqlAbstractParserImpl ((SourceStringReader) reader).getSourceString(); parser.setOriginalSql(sql); } + parser.setConformance(SqlConformanceEnum.DEFAULT); return parser; } }; @@ -224,6 +226,10 @@ public class ${parser.class} extends SqlAbstractParserImpl this.unquotedCasing = unquotedCasing; } + public void setCharset(String charset) { + this.charset = charset; + } + public void setIdentifierMaxLength(int identifierMaxLength) { this.identifierMaxLength = identifierMaxLength; } @@ -4645,7 +4651,7 @@ SqlNode StringLiteral() : String p; final List frags; char unicodeEscapeChar = 0; - String charSet = null; + String charSet = this.charset; SqlCharStringLiteral literal; } { @@ -4779,7 +4785,7 @@ SqlNode StringLiteral() : p = SqlParserUtil.stripQuotes(getToken(0).image, DQ, DQ, "\\\"", Casing.UNCHANGED); try { - return SqlLiteral.createCharString(p, charSet, getPos()); + return literal = SqlLiteral.createCharString(p, charSet, getPos()); } catch (java.nio.charset.UnsupportedCharsetException e) { throw SqlUtil.newContextException(getPos(), RESOURCE.unknownCharacterSet(charSet)); diff --git a/core/src/main/java/org/apache/calcite/sql/SqlDialect.java b/core/src/main/java/org/apache/calcite/sql/SqlDialect.java index 9ebc4c383c4..d1967169bb5 100644 --- a/core/src/main/java/org/apache/calcite/sql/SqlDialect.java +++ b/core/src/main/java/org/apache/calcite/sql/SqlDialect.java @@ -157,6 +157,7 @@ public class SqlDialect { private final Casing unquotedCasing; private final Casing quotedCasing; private final boolean caseSensitive; + private final String charset; //~ Constructors ----------------------------------------------------------- @@ -241,6 +242,7 @@ public SqlDialect(Context context) { this.unquotedCasing = requireNonNull(context.unquotedCasing()); this.quotedCasing = requireNonNull(context.quotedCasing()); this.caseSensitive = context.caseSensitive(); + this.charset = context.charset(); } //~ Methods ---------------------------------------------------------------- @@ -248,7 +250,7 @@ public SqlDialect(Context context) { /** Creates an empty context. Use {@link #EMPTY_CONTEXT} to reference the instance. */ private static Context emptyContext() { return new ContextImpl(DatabaseProduct.UNKNOWN, null, null, -1, -1, - "'", "''", null, null, + "'", "''", null, null, "ISO-8859-1", Casing.UNCHANGED, Casing.TO_UPPER, true, SqlConformanceEnum.DEFAULT, NullCollation.HIGH, RelDataTypeSystemImpl.DEFAULT, JethroDataSqlDialect.JethroInfo.EMPTY); @@ -433,13 +435,19 @@ public final String quoteStringLiteral(String val) { */ public void quoteStringLiteral(StringBuilder buf, @Nullable String charsetName, String val) { - if (charsetName != null) { - buf.append("_"); - buf.append(charsetName); + if (containsNonISO88591(val) && charsetName == null) { + quoteStringLiteralUnicode(buf, val); + } else { + // Don't append charset if it matches dialect default, e.g. BigQuery shouldn't append _UTF-8 + // because that is the default + if (charsetName != null && !charsetName.equals(getCharset())) { + buf.append("_"); + buf.append(charsetName); + } + buf.append(literalQuoteString); + buf.append(val.replace(literalEndQuoteString, literalEscapedQuote)); + buf.append(literalEndQuoteString); } - buf.append(literalQuoteString); - buf.append(val.replace(literalEndQuoteString, literalEscapedQuote)); - buf.append(literalEndQuoteString); } public void unparseCall(SqlWriter writer, SqlCall call, int leftPrec, @@ -564,6 +572,24 @@ public void unparseTableScanHints(SqlWriter writer, SqlNodeList hints, int leftPrec, int rightPrec) { } + /** + * Returns whether the string contains any characters outside of + * ISO-8859-1 (Calcite's default character set). + * + *

+ * + * @param str String + * @return whether str contains any non-ISO-8859-1 characters. + */ + protected static boolean containsNonISO88591(String str) { + for (char c : str.toCharArray()) { + if (c > 255) { + return true; // Found a non-ISO-8859-1 character + } + } + return false; // No non-ISO-8859-1 characters found + } + /** * Returns whether the string contains any characters outside the * comfortable 7-bit ASCII range (32 through 127, plus linefeed (10) and @@ -1235,7 +1261,8 @@ public SqlParser.Config configureParser(SqlParser.Config config) { .withUnquotedCasing(getUnquotedCasing()) .withCaseSensitive(isCaseSensitive()) .withConformance(getConformance()) - .withCharLiteralStyles(ImmutableSet.of(CharLiteralStyle.STANDARD)); + .withCharLiteralStyles(ImmutableSet.of(CharLiteralStyle.STANDARD)) + .withCharset(getCharset()); } @Deprecated // to be removed before 2.0 @@ -1295,6 +1322,11 @@ public Casing getQuotedCasing() { return quotedCasing; } + /** Returns charset to use for encoding. */ + public String getCharset() { + return charset; + } + /** Returns whether matching of identifiers is case-sensitive. */ public boolean isCaseSensitive() { return caseSensitive; @@ -1484,6 +1516,8 @@ Context withLiteralEscapedQuoteString( @Nullable String identifierEscapedQuoteString(); Context withIdentifierEscapedQuoteString( @Nullable String identifierEscapedQuoteString); + String charset(); + Context withCharset(String charset); Casing unquotedCasing(); Context withUnquotedCasing(Casing unquotedCasing); Casing quotedCasing(); @@ -1511,6 +1545,7 @@ private static class ContextImpl implements Context { private final String literalEscapedQuoteString; private final @Nullable String identifierQuoteString; private final @Nullable String identifierEscapedQuoteString; + private final String charset; private final Casing unquotedCasing; private final Casing quotedCasing; private final boolean caseSensitive; @@ -1524,7 +1559,7 @@ private ContextImpl(DatabaseProduct databaseProduct, int databaseMajorVersion, int databaseMinorVersion, String literalQuoteString, String literalEscapedQuoteString, @Nullable String identifierQuoteString, - @Nullable String identifierEscapedQuoteString, + @Nullable String identifierEscapedQuoteString, String charset, Casing quotedCasing, Casing unquotedCasing, boolean caseSensitive, SqlConformance conformance, NullCollation nullCollation, RelDataTypeSystem dataTypeSystem, @@ -1538,6 +1573,7 @@ private ContextImpl(DatabaseProduct databaseProduct, this.literalEscapedQuoteString = literalEscapedQuoteString; this.identifierQuoteString = identifierQuoteString; this.identifierEscapedQuoteString = identifierEscapedQuoteString; + this.charset = charset; this.quotedCasing = requireNonNull(quotedCasing, "quotedCasing"); this.unquotedCasing = requireNonNull(unquotedCasing, "unquotedCasing"); this.caseSensitive = caseSensitive; @@ -1557,7 +1593,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1570,7 +1606,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1583,7 +1619,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1596,7 +1632,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1609,7 +1645,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1622,7 +1658,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1636,7 +1672,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1650,7 +1686,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1664,7 +1700,20 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, + conformance, nullCollation, dataTypeSystem, jethroInfo); + } + + @Override public String charset() { + return charset; + } + + @Override public Context withCharset(String charset) { + return new ContextImpl(databaseProduct, databaseProductName, + databaseVersion, databaseMajorVersion, databaseMinorVersion, + literalQuoteString, literalEscapedQuoteString, + identifierQuoteString, identifierEscapedQuoteString, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1677,7 +1726,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1690,7 +1739,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1703,7 +1752,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1716,7 +1765,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1730,7 +1779,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1743,7 +1792,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } @@ -1756,7 +1805,7 @@ private ContextImpl(DatabaseProduct databaseProduct, databaseVersion, databaseMajorVersion, databaseMinorVersion, literalQuoteString, literalEscapedQuoteString, identifierQuoteString, identifierEscapedQuoteString, - quotedCasing, unquotedCasing, caseSensitive, + charset, quotedCasing, unquotedCasing, caseSensitive, conformance, nullCollation, dataTypeSystem, jethroInfo); } } diff --git a/core/src/main/java/org/apache/calcite/sql/dialect/BigQuerySqlDialect.java b/core/src/main/java/org/apache/calcite/sql/dialect/BigQuerySqlDialect.java index 2368040cc4a..7a524e9d755 100644 --- a/core/src/main/java/org/apache/calcite/sql/dialect/BigQuerySqlDialect.java +++ b/core/src/main/java/org/apache/calcite/sql/dialect/BigQuerySqlDialect.java @@ -69,6 +69,7 @@ public class BigQuerySqlDialect extends SqlDialect { .withLiteralEscapedQuoteString("\\'") .withIdentifierQuoteString("`") .withIdentifierEscapedQuoteString("\\`") + .withCharset("UTF-8") .withNullCollation(NullCollation.LOW) .withUnquotedCasing(Casing.UNCHANGED) .withQuotedCasing(Casing.UNCHANGED) diff --git a/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java b/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java index bf40da483e7..58d77d86315 100644 --- a/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java +++ b/core/src/main/java/org/apache/calcite/sql/dialect/HiveSqlDialect.java @@ -42,6 +42,7 @@ public class HiveSqlDialect extends SqlDialect { public static final SqlDialect.Context DEFAULT_CONTEXT = SqlDialect.EMPTY_CONTEXT .withDatabaseProduct(SqlDialect.DatabaseProduct.HIVE) .withIdentifierQuoteString("`") + .withCharset("UTF-8") .withNullCollation(NullCollation.LOW); public static final SqlDialect DEFAULT = new HiveSqlDialect(DEFAULT_CONTEXT); diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLiteralChainOperator.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLiteralChainOperator.java index 259df4cbdc5..d4dfd2220e8 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLiteralChainOperator.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLiteralChainOperator.java @@ -166,7 +166,6 @@ private static boolean argTypesValid(SqlCallBinding callBinding) { if (operand.i == 0) { collation = nls.getCollation(); - // print with prefix writer.literal(nls.asSql(true, false, writer.getDialect())); } else { // print without prefix diff --git a/core/src/main/java/org/apache/calcite/sql/parser/SqlAbstractParserImpl.java b/core/src/main/java/org/apache/calcite/sql/parser/SqlAbstractParserImpl.java index 389a6fa5615..a9853fe30c4 100644 --- a/core/src/main/java/org/apache/calcite/sql/parser/SqlAbstractParserImpl.java +++ b/core/src/main/java/org/apache/calcite/sql/parser/SqlAbstractParserImpl.java @@ -537,6 +537,13 @@ protected SqlCall createCall( */ public abstract void setUnquotedCasing(Casing unquotedCasing); + /** + * Sets the charset. + * + * @param charset Charset to set. + */ + public abstract void setCharset(String charset); + /** * Sets the maximum length for sql identifier. */ diff --git a/core/src/main/java/org/apache/calcite/sql/parser/SqlParser.java b/core/src/main/java/org/apache/calcite/sql/parser/SqlParser.java index 237a366606c..b2004a5189d 100644 --- a/core/src/main/java/org/apache/calcite/sql/parser/SqlParser.java +++ b/core/src/main/java/org/apache/calcite/sql/parser/SqlParser.java @@ -19,6 +19,7 @@ import org.apache.calcite.avatica.util.Casing; import org.apache.calcite.avatica.util.Quoting; import org.apache.calcite.avatica.util.TimeUnit; +import org.apache.calcite.config.CalciteSystemProperty; import org.apache.calcite.config.CharLiteralStyle; import org.apache.calcite.config.Lex; import org.apache.calcite.rel.type.RelDataTypeSystem; @@ -75,6 +76,7 @@ private SqlParser(SqlAbstractParserImpl parser, parser.setIdentifierMaxLength(config.identifierMaxLength()); parser.setTimeUnitCodes(config.timeUnitCodes()); parser.setConformance(config.conformance()); + parser.setCharset(config.charset()); parser.switchTo(SqlAbstractParserImpl.LexicalState.forConfig(config)); } @@ -288,6 +290,13 @@ public interface Config { /** Sets {@link #unquotedCasing()}. */ Config withUnquotedCasing(Casing casing); + @Value.Default default String charset() { + return CalciteSystemProperty.DEFAULT_CHARSET.value(); + } + + /** Sets {@link #charset()}. */ + Config withCharset(String charset); + @Value.Default default Quoting quoting() { return Quoting.DOUBLE_QUOTE; } @@ -379,6 +388,10 @@ public ConfigBuilder setUnquotedCasing(Casing unquotedCasing) { return setConfig(config.withUnquotedCasing(unquotedCasing)); } + public ConfigBuilder setCharset(String charset) { + return setConfig(config.withCharset(charset)); + } + public ConfigBuilder setQuoting(Quoting quoting) { return setConfig(config.withQuoting(quoting)); } diff --git a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java index 6d2612b329a..e2acdeec589 100644 --- a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java +++ b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java @@ -1726,6 +1726,9 @@ private static String toSql(RelNode root, SqlDialect dialect, assertThat(toSql(root), isLinux(expectedSql)); } + + + /** Test case for * [CALCITE-5394] * RelToSql converter fails when semi-join is under a join node. */ @@ -2970,6 +2973,24 @@ private SqlDialect nonOrdinalDialect() { .withBigQuery().ok(expectedBigQuery); } + /** Test case for + * [CALCITE-6001] + * Add withCharset to allow dialect-specific encoding. */ + @Test void testStringLiteralEncoding() { + final SqlParser.Config parserConfig = + BigQuerySqlDialect.DEFAULT.configureParser(SqlParser.config()); + final String query = "select 'ק' from `foodmart`.`product`"; + final String failedQuery = "select 'ק' from \"product\""; + final String expectedBigQuery = "SELECT 'ק'\nFROM foodmart.product"; + final String expectedMySql = "SELECT 'ק'\nFROM `foodmart`.`product`"; + final String expectedRedshift = "SELECT 'ק'\nFROM \"foodmart\".\"product\""; + // Dialects that do not use UTF-8 as their default should have a prefix appended + final String expectedOracle = "SELECT _UTF-8'ק'\nFROM \"foodmart\".\"product\""; + + sql(failedQuery).throws_("Failed to encode 'ק' in character set 'ISO-8859-1'"); + sql(query).parserConfig(parserConfig).withBigQuery().ok(expectedBigQuery); + } + @Test void testIdentifier() { // Note that IGNORE is reserved in BigQuery but not in standard SQL final String query = "select *\n" diff --git a/core/src/test/java/org/apache/calcite/tools/PlannerTest.java b/core/src/test/java/org/apache/calcite/tools/PlannerTest.java index 08abc8f751b..c44d4b972e0 100644 --- a/core/src/test/java/org/apache/calcite/tools/PlannerTest.java +++ b/core/src/test/java/org/apache/calcite/tools/PlannerTest.java @@ -65,6 +65,7 @@ import org.apache.calcite.sql.SqlFunctionCategory; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.dialect.HiveSqlDialect; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; @@ -820,7 +821,9 @@ private void runDuplicateSortCheck(String sql, String plan) throws Exception { /** Tests that Hive dialect does not generate "AS". */ @Test void testHiveDialect() throws SqlParseException { - Planner planner = getPlanner(null); + final SqlParser.Config parserConfig = + HiveSqlDialect.DEFAULT.configureParser(SqlParser.config()); + Planner planner = getPlanner(null, parserConfig); final String sql = "select * from (select * from \"emps\") as t\n" + "where \"name\" like '%e%'"; SqlNode parse = planner.parse(sql); diff --git a/testkit/src/main/java/org/apache/calcite/sql/parser/SqlParserTest.java b/testkit/src/main/java/org/apache/calcite/sql/parser/SqlParserTest.java index 9e4734b9394..9bcc20b5ccc 100644 --- a/testkit/src/main/java/org/apache/calcite/sql/parser/SqlParserTest.java +++ b/testkit/src/main/java/org/apache/calcite/sql/parser/SqlParserTest.java @@ -71,6 +71,7 @@ import java.util.function.UnaryOperator; import java.util.stream.Collectors; +import static org.apache.calcite.linq4j.tree.Expressions.list; import static org.apache.calcite.util.Static.RESOURCE; import static org.apache.calcite.util.Util.toLinux; @@ -624,6 +625,7 @@ public class SqlParserTest { private static final SqlDialect REDSHIFT = SqlDialect.DatabaseProduct.REDSHIFT.getDialect(); + /** Creates the test fixture that determines the behavior of tests. * Sub-classes that, say, test different parser implementations should * override. */ @@ -1896,6 +1898,24 @@ void checkPeriodPredicate(Checker checker) { .ok("CAST('foo' AS `BAR`)"); } + /** Test case for + * [CALCITE-6001] + * Add withCharset to allow dialect-specific encoding. */ + @Test void testDialectSpecificEncoding() { + final SqlParserFixture f0 = fixture(); + // UTF-8 character that the Calcite default (ISO-8859-1) would not be able to encode. + f0.sql("select 'ק'") + .fails("Failed to encode 'ק' in character set 'ISO-8859-1'"); + // BigQuery has UTF-8 set as the default, so it should be able to encode non-ISO chars. + final SqlParserFixture f = f0.withDialect(BIG_QUERY); + // UTF-8 + f.sql("select 'ק'").ok("SELECT 'ק'"); + // ASCII 7-bit + f.sql("select 'm'").ok("SELECT 'm'"); + // ASCII 8-bit + f.sql("select 'Ç'").ok("SELECT 'Ç'"); + } + @Test void testCastFails() { expr("cast(x as varchar(10) ^with^ local time zone)") .fails("(?s).*Encountered \"with\" at line 1, column 23.\n.*"); @@ -3590,9 +3610,9 @@ void checkPeriodPredicate(Checker checker) { expr("'abba'\n'abba'").same(); expr("'abba'\n'0001'").same(); expr("N'yabba'\n'dabba'\n'doo'") - .ok("_ISO-8859-1'yabba'\n'dabba'\n'doo'"); + .ok("'yabba'\n'dabba'\n'doo'"); expr("_iso-8859-1'yabba'\n'dabba'\n'don''t'") - .ok("_ISO-8859-1'yabba'\n'dabba'\n'don''t'"); + .ok("'yabba'\n'dabba'\n'don''t'"); expr("x'01aa'\n'03ff'") .ok("X'01AA'\n'03FF'"); @@ -5304,19 +5324,19 @@ void checkPeriodPredicate(Checker checker) { expr("_latin1'hi'") .ok("_LATIN1'hi'"); expr("N'is it a plane? no it''s superman!'") - .ok("_ISO-8859-1'is it a plane? no it''s superman!'"); + .ok("'is it a plane? no it''s superman!'"); expr("n'lowercase n'") - .ok("_ISO-8859-1'lowercase n'"); + .ok("'lowercase n'"); expr("'boring string'").same(); expr("_iSo-8859-1'bye'") - .ok("_ISO-8859-1'bye'"); + .ok("'bye'"); expr("'three'\n' blind'\n' mice'").same(); expr("'three' -- comment\n' blind'\n' mice'") .ok("'three'\n' blind'\n' mice'"); expr("N'bye' \t\r\f\f\n' bye'") - .ok("_ISO-8859-1'bye'\n' bye'"); + .ok("'bye'\n' bye'"); expr("_iso-8859-1'bye'\n\n--\n-- this is a comment\n' bye'") - .ok("_ISO-8859-1'bye'\n' bye'"); + .ok("'bye'\n' bye'"); expr("_utf8'hi'") .ok("_UTF8'hi'"); @@ -5340,7 +5360,7 @@ void checkPeriodPredicate(Checker checker) { // valid syntax, but should give a validator error sql("select (N'1' '2') from t") - .ok("SELECT _ISO-8859-1'1'\n" + .ok("SELECT '1'\n" + "'2'\n" + "FROM `T`"); }