Skip to content

Commit

Permalink
[CALCITE-6001] Add dialect-specific encoding for string literals
Browse files Browse the repository at this point in the history
  • Loading branch information
tanclary committed Oct 2, 2024
1 parent 60e0a3f commit bac5b31
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 36 deletions.
10 changes: 8 additions & 2 deletions core/src/main/codegen/templates/Parser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ public class ${parser.class} extends SqlAbstractParserImpl

private Casing unquotedCasing;
private Casing quotedCasing;
private String charset;
private int identifierMaxLength;
private SqlConformance conformance;

Expand All @@ -180,6 +181,7 @@ public class ${parser.class} extends SqlAbstractParserImpl
((SourceStringReader) reader).getSourceString();
parser.setOriginalSql(sql);
}
parser.setConformance(SqlConformanceEnum.DEFAULT);
return parser;
}
};
Expand Down Expand Up @@ -224,6 +226,10 @@ public class ${parser.class} extends SqlAbstractParserImpl
this.unquotedCasing = unquotedCasing;
}

public void setCharset(String charset) {
this.charset = charset;
}

public void setIdentifierMaxLength(int identifierMaxLength) {
this.identifierMaxLength = identifierMaxLength;
}
Expand Down Expand Up @@ -4645,7 +4651,7 @@ SqlNode StringLiteral() :
String p;
final List<SqlLiteral> frags;
char unicodeEscapeChar = 0;
String charSet = null;
String charSet = this.charset;
SqlCharStringLiteral literal;
}
{
Expand Down Expand Up @@ -4779,7 +4785,7 @@ SqlNode StringLiteral() :
p = SqlParserUtil.stripQuotes(getToken(0).image, DQ, DQ, "\\\"",
Casing.UNCHANGED);
try {
return SqlLiteral.createCharString(p, charSet, getPos());
return literal = SqlLiteral.createCharString(p, charSet, getPos());
} catch (java.nio.charset.UnsupportedCharsetException e) {
throw SqlUtil.newContextException(getPos(),
RESOURCE.unknownCharacterSet(charSet));
Expand Down
99 changes: 74 additions & 25 deletions core/src/main/java/org/apache/calcite/sql/SqlDialect.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ public class SqlDialect {
private final Casing unquotedCasing;
private final Casing quotedCasing;
private final boolean caseSensitive;
private final String charset;

//~ Constructors -----------------------------------------------------------

Expand Down Expand Up @@ -241,14 +242,15 @@ public SqlDialect(Context context) {
this.unquotedCasing = requireNonNull(context.unquotedCasing());
this.quotedCasing = requireNonNull(context.quotedCasing());
this.caseSensitive = context.caseSensitive();
this.charset = context.charset();
}

//~ Methods ----------------------------------------------------------------

/** Creates an empty context. Use {@link #EMPTY_CONTEXT} to reference the instance. */
private static Context emptyContext() {
return new ContextImpl(DatabaseProduct.UNKNOWN, null, null, -1, -1,
"'", "''", null, null,
"'", "''", null, null, "ISO-8859-1",
Casing.UNCHANGED, Casing.TO_UPPER, true, SqlConformanceEnum.DEFAULT,
NullCollation.HIGH, RelDataTypeSystemImpl.DEFAULT,
JethroDataSqlDialect.JethroInfo.EMPTY);
Expand Down Expand Up @@ -433,13 +435,19 @@ public final String quoteStringLiteral(String val) {
*/
public void quoteStringLiteral(StringBuilder buf, @Nullable String charsetName,
String val) {
if (charsetName != null) {
buf.append("_");
buf.append(charsetName);
if (containsNonISO88591(val) && charsetName == null) {
quoteStringLiteralUnicode(buf, val);
} else {
// Don't append charset if it matches dialect default, e.g. BigQuery shouldn't append _UTF-8
// because that is the default
if (charsetName != null && !charsetName.equals(getCharset())) {
buf.append("_");
buf.append(charsetName);
}
buf.append(literalQuoteString);
buf.append(val.replace(literalEndQuoteString, literalEscapedQuote));
buf.append(literalEndQuoteString);
}
buf.append(literalQuoteString);
buf.append(val.replace(literalEndQuoteString, literalEscapedQuote));
buf.append(literalEndQuoteString);
}

public void unparseCall(SqlWriter writer, SqlCall call, int leftPrec,
Expand Down Expand Up @@ -564,6 +572,24 @@ public void unparseTableScanHints(SqlWriter writer,
SqlNodeList hints, int leftPrec, int rightPrec) {
}

/**
* Returns whether the string contains any characters outside of
* ISO-8859-1 (Calcite's default character set).
*
* <p></p>
*
* @param str String
* @return whether str contains any non-ISO-8859-1 characters.
*/
protected static boolean containsNonISO88591(String str) {
for (char c : str.toCharArray()) {
if (c > 255) {
return true; // Found a non-ISO-8859-1 character
}
}
return false; // No non-ISO-8859-1 characters found
}

/**
* Returns whether the string contains any characters outside the
* comfortable 7-bit ASCII range (32 through 127, plus linefeed (10) and
Expand Down Expand Up @@ -1235,7 +1261,8 @@ public SqlParser.Config configureParser(SqlParser.Config config) {
.withUnquotedCasing(getUnquotedCasing())
.withCaseSensitive(isCaseSensitive())
.withConformance(getConformance())
.withCharLiteralStyles(ImmutableSet.of(CharLiteralStyle.STANDARD));
.withCharLiteralStyles(ImmutableSet.of(CharLiteralStyle.STANDARD))
.withCharset(getCharset());
}

@Deprecated // to be removed before 2.0
Expand Down Expand Up @@ -1295,6 +1322,11 @@ public Casing getQuotedCasing() {
return quotedCasing;
}

/** Returns charset to use for encoding. */
public String getCharset() {
return charset;
}

/** Returns whether matching of identifiers is case-sensitive. */
public boolean isCaseSensitive() {
return caseSensitive;
Expand Down Expand Up @@ -1484,6 +1516,8 @@ Context withLiteralEscapedQuoteString(
@Nullable String identifierEscapedQuoteString();
Context withIdentifierEscapedQuoteString(
@Nullable String identifierEscapedQuoteString);
String charset();
Context withCharset(String charset);
Casing unquotedCasing();
Context withUnquotedCasing(Casing unquotedCasing);
Casing quotedCasing();
Expand Down Expand Up @@ -1511,6 +1545,7 @@ private static class ContextImpl implements Context {
private final String literalEscapedQuoteString;
private final @Nullable String identifierQuoteString;
private final @Nullable String identifierEscapedQuoteString;
private final String charset;
private final Casing unquotedCasing;
private final Casing quotedCasing;
private final boolean caseSensitive;
Expand All @@ -1524,7 +1559,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
int databaseMajorVersion, int databaseMinorVersion,
String literalQuoteString, String literalEscapedQuoteString,
@Nullable String identifierQuoteString,
@Nullable String identifierEscapedQuoteString,
@Nullable String identifierEscapedQuoteString, String charset,
Casing quotedCasing, Casing unquotedCasing, boolean caseSensitive,
SqlConformance conformance, NullCollation nullCollation,
RelDataTypeSystem dataTypeSystem,
Expand All @@ -1538,6 +1573,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
this.literalEscapedQuoteString = literalEscapedQuoteString;
this.identifierQuoteString = identifierQuoteString;
this.identifierEscapedQuoteString = identifierEscapedQuoteString;
this.charset = charset;
this.quotedCasing = requireNonNull(quotedCasing, "quotedCasing");
this.unquotedCasing = requireNonNull(unquotedCasing, "unquotedCasing");
this.caseSensitive = caseSensitive;
Expand All @@ -1557,7 +1593,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1570,7 +1606,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1583,7 +1619,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1596,7 +1632,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1609,7 +1645,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1622,7 +1658,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1636,7 +1672,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1650,7 +1686,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1664,7 +1700,20 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

@Override public String charset() {
return charset;
}

@Override public Context withCharset(String charset) {
return new ContextImpl(databaseProduct, databaseProductName,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1677,7 +1726,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1690,7 +1739,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1703,7 +1752,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1716,7 +1765,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1730,7 +1779,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1743,7 +1792,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1756,7 +1805,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ public class BigQuerySqlDialect extends SqlDialect {
.withLiteralEscapedQuoteString("\\'")
.withIdentifierQuoteString("`")
.withIdentifierEscapedQuoteString("\\`")
.withCharset("UTF-8")
.withNullCollation(NullCollation.LOW)
.withUnquotedCasing(Casing.UNCHANGED)
.withQuotedCasing(Casing.UNCHANGED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,13 @@ protected SqlCall createCall(
*/
public abstract void setUnquotedCasing(Casing unquotedCasing);

/**
* Sets the charset.
*
* @param charset Charset to set.
*/
public abstract void setCharset(String charset);

/**
* Sets the maximum length for sql identifier.
*/
Expand Down
13 changes: 13 additions & 0 deletions core/src/main/java/org/apache/calcite/sql/parser/SqlParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.avatica.util.Quoting;
import org.apache.calcite.avatica.util.TimeUnit;
import org.apache.calcite.config.CalciteSystemProperty;
import org.apache.calcite.config.CharLiteralStyle;
import org.apache.calcite.config.Lex;
import org.apache.calcite.rel.type.RelDataTypeSystem;
Expand Down Expand Up @@ -75,6 +76,7 @@ private SqlParser(SqlAbstractParserImpl parser,
parser.setIdentifierMaxLength(config.identifierMaxLength());
parser.setTimeUnitCodes(config.timeUnitCodes());
parser.setConformance(config.conformance());
parser.setCharset(config.charset());
parser.switchTo(SqlAbstractParserImpl.LexicalState.forConfig(config));
}

Expand Down Expand Up @@ -288,6 +290,13 @@ public interface Config {
/** Sets {@link #unquotedCasing()}. */
Config withUnquotedCasing(Casing casing);

@Value.Default default String charset() {
return CalciteSystemProperty.DEFAULT_CHARSET.value();
}

/** Sets {@link #charset()}. */
Config withCharset(String charset);

@Value.Default default Quoting quoting() {
return Quoting.DOUBLE_QUOTE;
}
Expand Down Expand Up @@ -379,6 +388,10 @@ public ConfigBuilder setUnquotedCasing(Casing unquotedCasing) {
return setConfig(config.withUnquotedCasing(unquotedCasing));
}

public ConfigBuilder setCharset(String charset) {
return setConfig(config.withCharset(charset));
}

public ConfigBuilder setQuoting(Quoting quoting) {
return setConfig(config.withQuoting(quoting));
}
Expand Down
Loading

0 comments on commit bac5b31

Please sign in to comment.