Skip to content

Commit

Permalink
[CALCITE-6001] Add dialect-specific encoding for string literals
Browse files Browse the repository at this point in the history
  • Loading branch information
tanclary committed Oct 2, 2024
1 parent 60e0a3f commit 3b37b0f
Show file tree
Hide file tree
Showing 10 changed files with 157 additions and 37 deletions.
10 changes: 8 additions & 2 deletions core/src/main/codegen/templates/Parser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ public class ${parser.class} extends SqlAbstractParserImpl

private Casing unquotedCasing;
private Casing quotedCasing;
private String charset;
private int identifierMaxLength;
private SqlConformance conformance;

Expand All @@ -180,6 +181,7 @@ public class ${parser.class} extends SqlAbstractParserImpl
((SourceStringReader) reader).getSourceString();
parser.setOriginalSql(sql);
}
parser.setConformance(SqlConformanceEnum.DEFAULT);
return parser;
}
};
Expand Down Expand Up @@ -224,6 +226,10 @@ public class ${parser.class} extends SqlAbstractParserImpl
this.unquotedCasing = unquotedCasing;
}

public void setCharset(String charset) {
this.charset = charset;
}

public void setIdentifierMaxLength(int identifierMaxLength) {
this.identifierMaxLength = identifierMaxLength;
}
Expand Down Expand Up @@ -4645,7 +4651,7 @@ SqlNode StringLiteral() :
String p;
final List<SqlLiteral> frags;
char unicodeEscapeChar = 0;
String charSet = null;
String charSet = this.charset;
SqlCharStringLiteral literal;
}
{
Expand Down Expand Up @@ -4779,7 +4785,7 @@ SqlNode StringLiteral() :
p = SqlParserUtil.stripQuotes(getToken(0).image, DQ, DQ, "\\\"",
Casing.UNCHANGED);
try {
return SqlLiteral.createCharString(p, charSet, getPos());
return literal = SqlLiteral.createCharString(p, charSet, getPos());
} catch (java.nio.charset.UnsupportedCharsetException e) {
throw SqlUtil.newContextException(getPos(),
RESOURCE.unknownCharacterSet(charSet));
Expand Down
99 changes: 74 additions & 25 deletions core/src/main/java/org/apache/calcite/sql/SqlDialect.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ public class SqlDialect {
private final Casing unquotedCasing;
private final Casing quotedCasing;
private final boolean caseSensitive;
private final String charset;

//~ Constructors -----------------------------------------------------------

Expand Down Expand Up @@ -241,14 +242,15 @@ public SqlDialect(Context context) {
this.unquotedCasing = requireNonNull(context.unquotedCasing());
this.quotedCasing = requireNonNull(context.quotedCasing());
this.caseSensitive = context.caseSensitive();
this.charset = context.charset();
}

//~ Methods ----------------------------------------------------------------

/** Creates an empty context. Use {@link #EMPTY_CONTEXT} to reference the instance. */
private static Context emptyContext() {
return new ContextImpl(DatabaseProduct.UNKNOWN, null, null, -1, -1,
"'", "''", null, null,
"'", "''", null, null, "ISO-8859-1",
Casing.UNCHANGED, Casing.TO_UPPER, true, SqlConformanceEnum.DEFAULT,
NullCollation.HIGH, RelDataTypeSystemImpl.DEFAULT,
JethroDataSqlDialect.JethroInfo.EMPTY);
Expand Down Expand Up @@ -433,13 +435,19 @@ public final String quoteStringLiteral(String val) {
*/
public void quoteStringLiteral(StringBuilder buf, @Nullable String charsetName,
String val) {
if (charsetName != null) {
buf.append("_");
buf.append(charsetName);
if (containsNonISO88591(val) && charsetName == null) {
quoteStringLiteralUnicode(buf, val);
} else {
// Don't append charset if it matches dialect default, e.g. BigQuery shouldn't append _UTF-8
// because that is the default
if (charsetName != null && !charsetName.equals(getCharset())) {
buf.append("_");
buf.append(charsetName);
}
buf.append(literalQuoteString);
buf.append(val.replace(literalEndQuoteString, literalEscapedQuote));
buf.append(literalEndQuoteString);
}
buf.append(literalQuoteString);
buf.append(val.replace(literalEndQuoteString, literalEscapedQuote));
buf.append(literalEndQuoteString);
}

public void unparseCall(SqlWriter writer, SqlCall call, int leftPrec,
Expand Down Expand Up @@ -564,6 +572,24 @@ public void unparseTableScanHints(SqlWriter writer,
SqlNodeList hints, int leftPrec, int rightPrec) {
}

/**
* Returns whether the string contains any characters outside of
* ISO-8859-1 (Calcite's default character set).
*
* <p></p>
*
* @param str String
* @return whether str contains any non-ISO-8859-1 characters.
*/
protected static boolean containsNonISO88591(String str) {
for (char c : str.toCharArray()) {

Check failure on line 585 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 23)

[Task :core:checkstyleMain] [Indentation] 'for' has incorrect indentation level 8, expected level should be 4.

Check failure on line 585 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 11, Pacific/Chatham Timezone)

[Task :core:checkstyleMain] [Indentation] 'for' has incorrect indentation level 8, expected level should be 4.

Check failure on line 585 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 21)

[Task :core:checkstyleMain] [Indentation] 'for' has incorrect indentation level 8, expected level should be 4.

Check failure on line 585 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 17)

[Task :core:checkstyleMain] [Indentation] 'for' has incorrect indentation level 8, expected level should be 4.

Check failure on line 585 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 11, Avatica main)

[Task :core:checkstyleMain] [Indentation] 'for' has incorrect indentation level 8, expected level should be 4.

Check failure on line 585 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 8, oldest Guava, America/New_York Timezone)

[Task :core:checkstyleMain] [Indentation] 'for' has incorrect indentation level 8, expected level should be 4.

Check failure on line 585 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 8, latest Guava, America/New_York Timezone)

[Task :core:checkstyleMain] [Indentation] 'for' has incorrect indentation level 8, expected level should be 4.
if (c > 255) {

Check failure on line 586 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 23)

[Task :core:checkstyleMain] [Indentation] 'if' has incorrect indentation level 12, expected level should be 6.

Check failure on line 586 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 11, Pacific/Chatham Timezone)

[Task :core:checkstyleMain] [Indentation] 'if' has incorrect indentation level 12, expected level should be 6.

Check failure on line 586 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 21)

[Task :core:checkstyleMain] [Indentation] 'if' has incorrect indentation level 12, expected level should be 6.

Check failure on line 586 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 17)

[Task :core:checkstyleMain] [Indentation] 'if' has incorrect indentation level 12, expected level should be 6.

Check failure on line 586 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 11, Avatica main)

[Task :core:checkstyleMain] [Indentation] 'if' has incorrect indentation level 12, expected level should be 6.

Check failure on line 586 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 8, oldest Guava, America/New_York Timezone)

[Task :core:checkstyleMain] [Indentation] 'if' has incorrect indentation level 12, expected level should be 6.

Check failure on line 586 in core/src/main/java/org/apache/calcite/sql/SqlDialect.java

View workflow job for this annotation

GitHub Actions / Linux (JDK 8, latest Guava, America/New_York Timezone)

[Task :core:checkstyleMain] [Indentation] 'if' has incorrect indentation level 12, expected level should be 6.
return true; // Found a non-ISO-8859-1 character
}
}
return false; // No non-ISO-8859-1 characters found
}

/**
* Returns whether the string contains any characters outside the
* comfortable 7-bit ASCII range (32 through 127, plus linefeed (10) and
Expand Down Expand Up @@ -1235,7 +1261,8 @@ public SqlParser.Config configureParser(SqlParser.Config config) {
.withUnquotedCasing(getUnquotedCasing())
.withCaseSensitive(isCaseSensitive())
.withConformance(getConformance())
.withCharLiteralStyles(ImmutableSet.of(CharLiteralStyle.STANDARD));
.withCharLiteralStyles(ImmutableSet.of(CharLiteralStyle.STANDARD))
.withCharset(getCharset());
}

@Deprecated // to be removed before 2.0
Expand Down Expand Up @@ -1295,6 +1322,11 @@ public Casing getQuotedCasing() {
return quotedCasing;
}

/** Returns charset to use for encoding. */
public String getCharset() {
return charset;
}

/** Returns whether matching of identifiers is case-sensitive. */
public boolean isCaseSensitive() {
return caseSensitive;
Expand Down Expand Up @@ -1484,6 +1516,8 @@ Context withLiteralEscapedQuoteString(
@Nullable String identifierEscapedQuoteString();
Context withIdentifierEscapedQuoteString(
@Nullable String identifierEscapedQuoteString);
String charset();
Context withCharset(String charset);
Casing unquotedCasing();
Context withUnquotedCasing(Casing unquotedCasing);
Casing quotedCasing();
Expand Down Expand Up @@ -1511,6 +1545,7 @@ private static class ContextImpl implements Context {
private final String literalEscapedQuoteString;
private final @Nullable String identifierQuoteString;
private final @Nullable String identifierEscapedQuoteString;
private final String charset;
private final Casing unquotedCasing;
private final Casing quotedCasing;
private final boolean caseSensitive;
Expand All @@ -1524,7 +1559,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
int databaseMajorVersion, int databaseMinorVersion,
String literalQuoteString, String literalEscapedQuoteString,
@Nullable String identifierQuoteString,
@Nullable String identifierEscapedQuoteString,
@Nullable String identifierEscapedQuoteString, String charset,
Casing quotedCasing, Casing unquotedCasing, boolean caseSensitive,
SqlConformance conformance, NullCollation nullCollation,
RelDataTypeSystem dataTypeSystem,
Expand All @@ -1538,6 +1573,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
this.literalEscapedQuoteString = literalEscapedQuoteString;
this.identifierQuoteString = identifierQuoteString;
this.identifierEscapedQuoteString = identifierEscapedQuoteString;
this.charset = charset;
this.quotedCasing = requireNonNull(quotedCasing, "quotedCasing");
this.unquotedCasing = requireNonNull(unquotedCasing, "unquotedCasing");
this.caseSensitive = caseSensitive;
Expand All @@ -1557,7 +1593,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1570,7 +1606,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1583,7 +1619,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1596,7 +1632,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1609,7 +1645,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1622,7 +1658,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1636,7 +1672,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1650,7 +1686,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1664,7 +1700,20 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

@Override public String charset() {
return charset;
}

@Override public Context withCharset(String charset) {
return new ContextImpl(databaseProduct, databaseProductName,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1677,7 +1726,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1690,7 +1739,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1703,7 +1752,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1716,7 +1765,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1730,7 +1779,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1743,7 +1792,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1756,7 +1805,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ public class BigQuerySqlDialect extends SqlDialect {
.withLiteralEscapedQuoteString("\\'")
.withIdentifierQuoteString("`")
.withIdentifierEscapedQuoteString("\\`")
.withCharset("UTF-8")
.withNullCollation(NullCollation.LOW)
.withUnquotedCasing(Casing.UNCHANGED)
.withQuotedCasing(Casing.UNCHANGED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class HiveSqlDialect extends SqlDialect {
public static final SqlDialect.Context DEFAULT_CONTEXT = SqlDialect.EMPTY_CONTEXT
.withDatabaseProduct(SqlDialect.DatabaseProduct.HIVE)
.withIdentifierQuoteString("`")
.withCharset("UTF-8")
.withNullCollation(NullCollation.LOW);

public static final SqlDialect DEFAULT = new HiveSqlDialect(DEFAULT_CONTEXT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ private static boolean argTypesValid(SqlCallBinding callBinding) {
if (operand.i == 0) {
collation = nls.getCollation();

// print with prefix
writer.literal(nls.asSql(true, false, writer.getDialect()));
} else {
// print without prefix
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,13 @@ protected SqlCall createCall(
*/
public abstract void setUnquotedCasing(Casing unquotedCasing);

/**
* Sets the charset.
*
* @param charset Charset to set.
*/
public abstract void setCharset(String charset);

/**
* Sets the maximum length for sql identifier.
*/
Expand Down
Loading

0 comments on commit 3b37b0f

Please sign in to comment.