sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4import datetime 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 NormalizationStrategy, 10 arg_max_or_min_no_count, 11 build_date_delta, 12 build_formatted_time, 13 inline_array_sql, 14 json_extract_segments, 15 json_path_key_only_name, 16 no_pivot_sql, 17 build_json_extract_path, 18 rename_func, 19 sha256_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28 29DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 30 31 32def _build_date_format(args: t.List) -> exp.TimeToStr: 33 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 34 35 timezone = seq_get(args, 2) 36 if timezone: 37 expr.set("zone", timezone) 38 39 return expr 40 41 42def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 43 scale = expression.args.get("scale") 44 timestamp = expression.this 45 46 if scale in (None, exp.UnixToTime.SECONDS): 47 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MILLIS: 49 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MICROS: 51 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.NANOS: 53 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 55 return self.func( 56 "fromUnixTimestamp", 57 exp.cast( 58 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 59 ), 60 ) 61 62 63def _lower_func(sql: str) -> str: 64 index = sql.index("(") 65 return sql[:index].lower() + sql[index:] 66 67 68def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 69 quantile = expression.args["quantile"] 70 args = f"({self.sql(expression, 'this')})" 71 72 if isinstance(quantile, exp.Array): 73 func = self.func("quantiles", *quantile) 74 else: 75 func = self.func("quantile", quantile) 76 77 return func + args 78 79 80def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 81 if len(args) == 1: 82 return exp.CountIf(this=seq_get(args, 0)) 83 84 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 85 86 87def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 88 if len(args) == 3: 89 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 90 91 strtodate = exp.StrToDate.from_arg_list(args) 92 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 93 94 95def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 96 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 97 if not expression.unit: 98 return rename_func(name)(self, expression) 99 100 return self.func( 101 name, 102 unit_to_var(expression), 103 expression.expression, 104 expression.this, 105 ) 106 107 return _delta_sql 108 109 110def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 111 tz = expression.args.get("zone") 112 datatype = exp.DataType.build(exp.DataType.Type.TIMESTAMP) 113 ts = expression.this 114 if tz: 115 # build a datatype that encodes the timezone as a type parameter, eg DateTime('America/Los_Angeles') 116 datatype = exp.DataType.build( 117 exp.DataType.Type.TIMESTAMPTZ, # Type.TIMESTAMPTZ maps to DateTime 118 expressions=[exp.DataTypeParam(this=tz)], 119 ) 120 121 if isinstance(ts, exp.Literal): 122 # strip the timezone out of the literal, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 123 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if it's part of the timestamp string 124 ts_without_tz = ( 125 datetime.datetime.fromisoformat(ts.name).replace(tzinfo=None).isoformat(sep=" ") 126 ) 127 ts = exp.Literal.string(ts_without_tz) 128 129 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 130 131 132class ClickHouse(Dialect): 133 NORMALIZE_FUNCTIONS: bool | str = False 134 NULL_ORDERING = "nulls_are_last" 135 SUPPORTS_USER_DEFINED_TYPES = False 136 SAFE_DIVISION = True 137 LOG_BASE_FIRST: t.Optional[bool] = None 138 FORCE_EARLY_ALIAS_REF_EXPANSION = True 139 140 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 141 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 142 143 UNESCAPED_SEQUENCES = { 144 "\\0": "\0", 145 } 146 147 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 148 149 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 150 exp.Except: False, 151 exp.Intersect: False, 152 exp.Union: None, 153 } 154 155 class Tokenizer(tokens.Tokenizer): 156 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 157 IDENTIFIERS = ['"', "`"] 158 STRING_ESCAPES = ["'", "\\"] 159 BIT_STRINGS = [("0b", "")] 160 HEX_STRINGS = [("0x", ""), ("0X", "")] 161 HEREDOC_STRINGS = ["$"] 162 163 KEYWORDS = { 164 **tokens.Tokenizer.KEYWORDS, 165 "ATTACH": TokenType.COMMAND, 166 "DATE32": TokenType.DATE32, 167 "DATETIME64": TokenType.DATETIME64, 168 "DICTIONARY": TokenType.DICTIONARY, 169 "ENUM8": TokenType.ENUM8, 170 "ENUM16": TokenType.ENUM16, 171 "FINAL": TokenType.FINAL, 172 "FIXEDSTRING": TokenType.FIXEDSTRING, 173 "FLOAT32": TokenType.FLOAT, 174 "FLOAT64": TokenType.DOUBLE, 175 "GLOBAL": TokenType.GLOBAL, 176 "INT256": TokenType.INT256, 177 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 178 "MAP": TokenType.MAP, 179 "NESTED": TokenType.NESTED, 180 "SAMPLE": TokenType.TABLE_SAMPLE, 181 "TUPLE": TokenType.STRUCT, 182 "UINT128": TokenType.UINT128, 183 "UINT16": TokenType.USMALLINT, 184 "UINT256": TokenType.UINT256, 185 "UINT32": TokenType.UINT, 186 "UINT64": TokenType.UBIGINT, 187 "UINT8": TokenType.UTINYINT, 188 "IPV4": TokenType.IPV4, 189 "IPV6": TokenType.IPV6, 190 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 191 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 192 "SYSTEM": TokenType.COMMAND, 193 "PREWHERE": TokenType.PREWHERE, 194 } 195 KEYWORDS.pop("/*+") 196 197 SINGLE_TOKENS = { 198 **tokens.Tokenizer.SINGLE_TOKENS, 199 "$": TokenType.HEREDOC_STRING, 200 } 201 202 class Parser(parser.Parser): 203 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 204 # * select x from t1 union all select x from t2 limit 1; 205 # * select x from t1 union all (select x from t2 limit 1); 206 MODIFIERS_ATTACHED_TO_SET_OP = False 207 INTERVAL_SPANS = False 208 209 FUNCTIONS = { 210 **parser.Parser.FUNCTIONS, 211 "ANY": exp.AnyValue.from_arg_list, 212 "ARRAYSUM": exp.ArraySum.from_arg_list, 213 "COUNTIF": _build_count_if, 214 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 215 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 217 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATE_FORMAT": _build_date_format, 219 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 220 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 221 "FORMATDATETIME": _build_date_format, 222 "JSONEXTRACTSTRING": build_json_extract_path( 223 exp.JSONExtractScalar, zero_based_indexing=False 224 ), 225 "MAP": parser.build_var_map, 226 "MATCH": exp.RegexpLike.from_arg_list, 227 "RANDCANONICAL": exp.Rand.from_arg_list, 228 "STR_TO_DATE": _build_str_to_date, 229 "TUPLE": exp.Struct.from_arg_list, 230 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 231 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 233 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "UNIQ": exp.ApproxDistinct.from_arg_list, 235 "XOR": lambda args: exp.Xor(expressions=args), 236 "MD5": exp.MD5Digest.from_arg_list, 237 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 238 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 239 } 240 241 AGG_FUNCTIONS = { 242 "count", 243 "min", 244 "max", 245 "sum", 246 "avg", 247 "any", 248 "stddevPop", 249 "stddevSamp", 250 "varPop", 251 "varSamp", 252 "corr", 253 "covarPop", 254 "covarSamp", 255 "entropy", 256 "exponentialMovingAverage", 257 "intervalLengthSum", 258 "kolmogorovSmirnovTest", 259 "mannWhitneyUTest", 260 "median", 261 "rankCorr", 262 "sumKahan", 263 "studentTTest", 264 "welchTTest", 265 "anyHeavy", 266 "anyLast", 267 "boundingRatio", 268 "first_value", 269 "last_value", 270 "argMin", 271 "argMax", 272 "avgWeighted", 273 "topK", 274 "topKWeighted", 275 "deltaSum", 276 "deltaSumTimestamp", 277 "groupArray", 278 "groupArrayLast", 279 "groupUniqArray", 280 "groupArrayInsertAt", 281 "groupArrayMovingAvg", 282 "groupArrayMovingSum", 283 "groupArraySample", 284 "groupBitAnd", 285 "groupBitOr", 286 "groupBitXor", 287 "groupBitmap", 288 "groupBitmapAnd", 289 "groupBitmapOr", 290 "groupBitmapXor", 291 "sumWithOverflow", 292 "sumMap", 293 "minMap", 294 "maxMap", 295 "skewSamp", 296 "skewPop", 297 "kurtSamp", 298 "kurtPop", 299 "uniq", 300 "uniqExact", 301 "uniqCombined", 302 "uniqCombined64", 303 "uniqHLL12", 304 "uniqTheta", 305 "quantile", 306 "quantiles", 307 "quantileExact", 308 "quantilesExact", 309 "quantileExactLow", 310 "quantilesExactLow", 311 "quantileExactHigh", 312 "quantilesExactHigh", 313 "quantileExactWeighted", 314 "quantilesExactWeighted", 315 "quantileTiming", 316 "quantilesTiming", 317 "quantileTimingWeighted", 318 "quantilesTimingWeighted", 319 "quantileDeterministic", 320 "quantilesDeterministic", 321 "quantileTDigest", 322 "quantilesTDigest", 323 "quantileTDigestWeighted", 324 "quantilesTDigestWeighted", 325 "quantileBFloat16", 326 "quantilesBFloat16", 327 "quantileBFloat16Weighted", 328 "quantilesBFloat16Weighted", 329 "simpleLinearRegression", 330 "stochasticLinearRegression", 331 "stochasticLogisticRegression", 332 "categoricalInformationValue", 333 "contingency", 334 "cramersV", 335 "cramersVBiasCorrected", 336 "theilsU", 337 "maxIntersections", 338 "maxIntersectionsPosition", 339 "meanZTest", 340 "quantileInterpolatedWeighted", 341 "quantilesInterpolatedWeighted", 342 "quantileGK", 343 "quantilesGK", 344 "sparkBar", 345 "sumCount", 346 "largestTriangleThreeBuckets", 347 "histogram", 348 "sequenceMatch", 349 "sequenceCount", 350 "windowFunnel", 351 "retention", 352 "uniqUpTo", 353 "sequenceNextNode", 354 "exponentialTimeDecayedAvg", 355 } 356 357 AGG_FUNCTIONS_SUFFIXES = [ 358 "If", 359 "Array", 360 "ArrayIf", 361 "Map", 362 "SimpleState", 363 "State", 364 "Merge", 365 "MergeState", 366 "ForEach", 367 "Distinct", 368 "OrDefault", 369 "OrNull", 370 "Resample", 371 "ArgMin", 372 "ArgMax", 373 ] 374 375 FUNC_TOKENS = { 376 *parser.Parser.FUNC_TOKENS, 377 TokenType.SET, 378 } 379 380 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 381 382 ID_VAR_TOKENS = { 383 *parser.Parser.ID_VAR_TOKENS, 384 TokenType.LIKE, 385 } 386 387 AGG_FUNC_MAPPING = ( 388 lambda functions, suffixes: { 389 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 390 } 391 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 392 393 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 394 395 FUNCTION_PARSERS = { 396 **parser.Parser.FUNCTION_PARSERS, 397 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 398 "QUANTILE": lambda self: self._parse_quantile(), 399 } 400 401 FUNCTION_PARSERS.pop("MATCH") 402 403 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 404 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 405 406 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 407 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 408 409 RANGE_PARSERS = { 410 **parser.Parser.RANGE_PARSERS, 411 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 412 and self._parse_in(this, is_global=True), 413 } 414 415 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 416 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 417 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 418 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 419 420 JOIN_KINDS = { 421 *parser.Parser.JOIN_KINDS, 422 TokenType.ANY, 423 TokenType.ASOF, 424 TokenType.ARRAY, 425 } 426 427 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 428 TokenType.ANY, 429 TokenType.ARRAY, 430 TokenType.FINAL, 431 TokenType.FORMAT, 432 TokenType.SETTINGS, 433 } 434 435 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 436 TokenType.FORMAT, 437 } 438 439 LOG_DEFAULTS_TO_LN = True 440 441 QUERY_MODIFIER_PARSERS = { 442 **parser.Parser.QUERY_MODIFIER_PARSERS, 443 TokenType.SETTINGS: lambda self: ( 444 "settings", 445 self._advance() or self._parse_csv(self._parse_assignment), 446 ), 447 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 448 } 449 450 CONSTRAINT_PARSERS = { 451 **parser.Parser.CONSTRAINT_PARSERS, 452 "INDEX": lambda self: self._parse_index_constraint(), 453 "CODEC": lambda self: self._parse_compress(), 454 } 455 456 ALTER_PARSERS = { 457 **parser.Parser.ALTER_PARSERS, 458 "REPLACE": lambda self: self._parse_alter_table_replace(), 459 } 460 461 SCHEMA_UNNAMED_CONSTRAINTS = { 462 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 463 "INDEX", 464 } 465 466 PLACEHOLDER_PARSERS = { 467 **parser.Parser.PLACEHOLDER_PARSERS, 468 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 469 } 470 471 def _parse_types( 472 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 473 ) -> t.Optional[exp.Expression]: 474 dtype = super()._parse_types( 475 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 476 ) 477 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 478 # Mark every type as non-nullable which is ClickHouse's default, unless it's 479 # already marked as nullable. This marker helps us transpile types from other 480 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 481 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 482 # fail in ClickHouse without the `Nullable` type constructor. 483 dtype.set("nullable", False) 484 485 return dtype 486 487 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 488 index = self._index 489 this = self._parse_bitwise() 490 if self._match(TokenType.FROM): 491 self._retreat(index) 492 return super()._parse_extract() 493 494 # We return Anonymous here because extract and regexpExtract have different semantics, 495 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 496 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 497 # 498 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 499 self._match(TokenType.COMMA) 500 return self.expression( 501 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 502 ) 503 504 def _parse_assignment(self) -> t.Optional[exp.Expression]: 505 this = super()._parse_assignment() 506 507 if self._match(TokenType.PLACEHOLDER): 508 return self.expression( 509 exp.If, 510 this=this, 511 true=self._parse_assignment(), 512 false=self._match(TokenType.COLON) and self._parse_assignment(), 513 ) 514 515 return this 516 517 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 518 """ 519 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 520 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 521 """ 522 this = self._parse_id_var() 523 self._match(TokenType.COLON) 524 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 525 self._match_text_seq("IDENTIFIER") and "Identifier" 526 ) 527 528 if not kind: 529 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 530 elif not self._match(TokenType.R_BRACE): 531 self.raise_error("Expecting }") 532 533 return self.expression(exp.Placeholder, this=this, kind=kind) 534 535 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 536 this = super()._parse_in(this) 537 this.set("is_global", is_global) 538 return this 539 540 def _parse_table( 541 self, 542 schema: bool = False, 543 joins: bool = False, 544 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 545 parse_bracket: bool = False, 546 is_db_reference: bool = False, 547 parse_partition: bool = False, 548 ) -> t.Optional[exp.Expression]: 549 this = super()._parse_table( 550 schema=schema, 551 joins=joins, 552 alias_tokens=alias_tokens, 553 parse_bracket=parse_bracket, 554 is_db_reference=is_db_reference, 555 ) 556 557 if self._match(TokenType.FINAL): 558 this = self.expression(exp.Final, this=this) 559 560 return this 561 562 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 563 return super()._parse_position(haystack_first=True) 564 565 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 566 def _parse_cte(self) -> exp.CTE: 567 # WITH <identifier> AS <subquery expression> 568 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 569 570 if not cte: 571 # WITH <expression> AS <identifier> 572 cte = self.expression( 573 exp.CTE, 574 this=self._parse_assignment(), 575 alias=self._parse_table_alias(), 576 scalar=True, 577 ) 578 579 return cte 580 581 def _parse_join_parts( 582 self, 583 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 584 is_global = self._match(TokenType.GLOBAL) and self._prev 585 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 586 587 if kind_pre: 588 kind = self._match_set(self.JOIN_KINDS) and self._prev 589 side = self._match_set(self.JOIN_SIDES) and self._prev 590 return is_global, side, kind 591 592 return ( 593 is_global, 594 self._match_set(self.JOIN_SIDES) and self._prev, 595 self._match_set(self.JOIN_KINDS) and self._prev, 596 ) 597 598 def _parse_join( 599 self, skip_join_token: bool = False, parse_bracket: bool = False 600 ) -> t.Optional[exp.Join]: 601 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 602 if join: 603 join.set("global", join.args.pop("method", None)) 604 605 return join 606 607 def _parse_function( 608 self, 609 functions: t.Optional[t.Dict[str, t.Callable]] = None, 610 anonymous: bool = False, 611 optional_parens: bool = True, 612 any_token: bool = False, 613 ) -> t.Optional[exp.Expression]: 614 expr = super()._parse_function( 615 functions=functions, 616 anonymous=anonymous, 617 optional_parens=optional_parens, 618 any_token=any_token, 619 ) 620 621 func = expr.this if isinstance(expr, exp.Window) else expr 622 623 # Aggregate functions can be split in 2 parts: <func_name><suffix> 624 parts = ( 625 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 626 ) 627 628 if parts: 629 params = self._parse_func_params(func) 630 631 kwargs = { 632 "this": func.this, 633 "expressions": func.expressions, 634 } 635 if parts[1]: 636 kwargs["parts"] = parts 637 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 638 else: 639 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 640 641 kwargs["exp_class"] = exp_class 642 if params: 643 kwargs["params"] = params 644 645 func = self.expression(**kwargs) 646 647 if isinstance(expr, exp.Window): 648 # The window's func was parsed as Anonymous in base parser, fix its 649 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 650 expr.set("this", func) 651 elif params: 652 # Params have blocked super()._parse_function() from parsing the following window 653 # (if that exists) as they're standing between the function call and the window spec 654 expr = self._parse_window(func) 655 else: 656 expr = func 657 658 return expr 659 660 def _parse_func_params( 661 self, this: t.Optional[exp.Func] = None 662 ) -> t.Optional[t.List[exp.Expression]]: 663 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 664 return self._parse_csv(self._parse_lambda) 665 666 if self._match(TokenType.L_PAREN): 667 params = self._parse_csv(self._parse_lambda) 668 self._match_r_paren(this) 669 return params 670 671 return None 672 673 def _parse_quantile(self) -> exp.Quantile: 674 this = self._parse_lambda() 675 params = self._parse_func_params() 676 if params: 677 return self.expression(exp.Quantile, this=params[0], quantile=this) 678 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 679 680 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 681 return super()._parse_wrapped_id_vars(optional=True) 682 683 def _parse_primary_key( 684 self, wrapped_optional: bool = False, in_props: bool = False 685 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 686 return super()._parse_primary_key( 687 wrapped_optional=wrapped_optional or in_props, in_props=in_props 688 ) 689 690 def _parse_on_property(self) -> t.Optional[exp.Expression]: 691 index = self._index 692 if self._match_text_seq("CLUSTER"): 693 this = self._parse_id_var() 694 if this: 695 return self.expression(exp.OnCluster, this=this) 696 else: 697 self._retreat(index) 698 return None 699 700 def _parse_index_constraint( 701 self, kind: t.Optional[str] = None 702 ) -> exp.IndexColumnConstraint: 703 # INDEX name1 expr TYPE type1(args) GRANULARITY value 704 this = self._parse_id_var() 705 expression = self._parse_assignment() 706 707 index_type = self._match_text_seq("TYPE") and ( 708 self._parse_function() or self._parse_var() 709 ) 710 711 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 712 713 return self.expression( 714 exp.IndexColumnConstraint, 715 this=this, 716 expression=expression, 717 index_type=index_type, 718 granularity=granularity, 719 ) 720 721 def _parse_partition(self) -> t.Optional[exp.Partition]: 722 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 723 if not self._match(TokenType.PARTITION): 724 return None 725 726 if self._match_text_seq("ID"): 727 # Corresponds to the PARTITION ID <string_value> syntax 728 expressions: t.List[exp.Expression] = [ 729 self.expression(exp.PartitionId, this=self._parse_string()) 730 ] 731 else: 732 expressions = self._parse_expressions() 733 734 return self.expression(exp.Partition, expressions=expressions) 735 736 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 737 partition = self._parse_partition() 738 739 if not partition or not self._match(TokenType.FROM): 740 return None 741 742 return self.expression( 743 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 744 ) 745 746 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 747 if not self._match_text_seq("PROJECTION"): 748 return None 749 750 return self.expression( 751 exp.ProjectionDef, 752 this=self._parse_id_var(), 753 expression=self._parse_wrapped(self._parse_statement), 754 ) 755 756 def _parse_constraint(self) -> t.Optional[exp.Expression]: 757 return super()._parse_constraint() or self._parse_projection_def() 758 759 class Generator(generator.Generator): 760 QUERY_HINTS = False 761 STRUCT_DELIMITER = ("(", ")") 762 NVL2_SUPPORTED = False 763 TABLESAMPLE_REQUIRES_PARENS = False 764 TABLESAMPLE_SIZE_IS_ROWS = False 765 TABLESAMPLE_KEYWORDS = "SAMPLE" 766 LAST_DAY_SUPPORTS_DATE_PART = False 767 CAN_IMPLEMENT_ARRAY_ANY = True 768 SUPPORTS_TO_NUMBER = False 769 JOIN_HINTS = False 770 TABLE_HINTS = False 771 GROUPINGS_SEP = "" 772 SET_OP_MODIFIERS = False 773 SUPPORTS_TABLE_ALIAS_COLUMNS = False 774 VALUES_AS_TABLE = False 775 776 STRING_TYPE_MAPPING = { 777 exp.DataType.Type.CHAR: "String", 778 exp.DataType.Type.LONGBLOB: "String", 779 exp.DataType.Type.LONGTEXT: "String", 780 exp.DataType.Type.MEDIUMBLOB: "String", 781 exp.DataType.Type.MEDIUMTEXT: "String", 782 exp.DataType.Type.TINYBLOB: "String", 783 exp.DataType.Type.TINYTEXT: "String", 784 exp.DataType.Type.TEXT: "String", 785 exp.DataType.Type.VARBINARY: "String", 786 exp.DataType.Type.VARCHAR: "String", 787 } 788 789 SUPPORTED_JSON_PATH_PARTS = { 790 exp.JSONPathKey, 791 exp.JSONPathRoot, 792 exp.JSONPathSubscript, 793 } 794 795 TYPE_MAPPING = { 796 **generator.Generator.TYPE_MAPPING, 797 **STRING_TYPE_MAPPING, 798 exp.DataType.Type.ARRAY: "Array", 799 exp.DataType.Type.BIGINT: "Int64", 800 exp.DataType.Type.DATE32: "Date32", 801 exp.DataType.Type.DATETIME: "DateTime", 802 exp.DataType.Type.DATETIME64: "DateTime64", 803 exp.DataType.Type.TIMESTAMP: "DateTime", 804 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 805 exp.DataType.Type.DOUBLE: "Float64", 806 exp.DataType.Type.ENUM: "Enum", 807 exp.DataType.Type.ENUM8: "Enum8", 808 exp.DataType.Type.ENUM16: "Enum16", 809 exp.DataType.Type.FIXEDSTRING: "FixedString", 810 exp.DataType.Type.FLOAT: "Float32", 811 exp.DataType.Type.INT: "Int32", 812 exp.DataType.Type.MEDIUMINT: "Int32", 813 exp.DataType.Type.INT128: "Int128", 814 exp.DataType.Type.INT256: "Int256", 815 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 816 exp.DataType.Type.MAP: "Map", 817 exp.DataType.Type.NESTED: "Nested", 818 exp.DataType.Type.SMALLINT: "Int16", 819 exp.DataType.Type.STRUCT: "Tuple", 820 exp.DataType.Type.TINYINT: "Int8", 821 exp.DataType.Type.UBIGINT: "UInt64", 822 exp.DataType.Type.UINT: "UInt32", 823 exp.DataType.Type.UINT128: "UInt128", 824 exp.DataType.Type.UINT256: "UInt256", 825 exp.DataType.Type.USMALLINT: "UInt16", 826 exp.DataType.Type.UTINYINT: "UInt8", 827 exp.DataType.Type.IPV4: "IPv4", 828 exp.DataType.Type.IPV6: "IPv6", 829 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 830 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 831 } 832 833 TRANSFORMS = { 834 **generator.Generator.TRANSFORMS, 835 exp.AnyValue: rename_func("any"), 836 exp.ApproxDistinct: rename_func("uniq"), 837 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 838 exp.ArraySize: rename_func("LENGTH"), 839 exp.ArraySum: rename_func("arraySum"), 840 exp.ArgMax: arg_max_or_min_no_count("argMax"), 841 exp.ArgMin: arg_max_or_min_no_count("argMin"), 842 exp.Array: inline_array_sql, 843 exp.CastToStrType: rename_func("CAST"), 844 exp.CountIf: rename_func("countIf"), 845 exp.CompressColumnConstraint: lambda self, 846 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 847 exp.ComputedColumnConstraint: lambda self, 848 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 849 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 850 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 851 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 852 exp.DateStrToDate: rename_func("toDate"), 853 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 854 exp.Explode: rename_func("arrayJoin"), 855 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 856 exp.IsNan: rename_func("isNaN"), 857 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 858 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 859 exp.JSONPathKey: json_path_key_only_name, 860 exp.JSONPathRoot: lambda *_: "", 861 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 862 exp.Nullif: rename_func("nullIf"), 863 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 864 exp.Pivot: no_pivot_sql, 865 exp.Quantile: _quantile_sql, 866 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 867 exp.Rand: rename_func("randCanonical"), 868 exp.StartsWith: rename_func("startsWith"), 869 exp.StrPosition: lambda self, e: self.func( 870 "position", e.this, e.args.get("substr"), e.args.get("position") 871 ), 872 exp.TimeToStr: lambda self, e: self.func( 873 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 874 ), 875 exp.TimeStrToTime: _timestrtotime_sql, 876 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 877 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 878 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 879 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 880 exp.MD5Digest: rename_func("MD5"), 881 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 882 exp.SHA: rename_func("SHA1"), 883 exp.SHA2: sha256_sql, 884 exp.UnixToTime: _unix_to_time_sql, 885 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 886 exp.Trim: trim_sql, 887 exp.Variance: rename_func("varSamp"), 888 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 889 exp.Stddev: rename_func("stddevSamp"), 890 exp.Chr: rename_func("CHAR"), 891 exp.Lag: lambda self, e: self.func( 892 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 893 ), 894 exp.Lead: lambda self, e: self.func( 895 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 896 ), 897 } 898 899 PROPERTIES_LOCATION = { 900 **generator.Generator.PROPERTIES_LOCATION, 901 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 902 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 903 exp.OnCluster: exp.Properties.Location.POST_NAME, 904 } 905 906 # There's no list in docs, but it can be found in Clickhouse code 907 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 908 ON_CLUSTER_TARGETS = { 909 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 910 "DATABASE", 911 "TABLE", 912 "VIEW", 913 "DICTIONARY", 914 "INDEX", 915 "FUNCTION", 916 "NAMED COLLECTION", 917 } 918 919 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 920 NON_NULLABLE_TYPES = { 921 exp.DataType.Type.ARRAY, 922 exp.DataType.Type.MAP, 923 exp.DataType.Type.STRUCT, 924 } 925 926 def strtodate_sql(self, expression: exp.StrToDate) -> str: 927 strtodate_sql = self.function_fallback_sql(expression) 928 929 if not isinstance(expression.parent, exp.Cast): 930 # StrToDate returns DATEs in other dialects (eg. postgres), so 931 # this branch aims to improve the transpilation to clickhouse 932 return f"CAST({strtodate_sql} AS DATE)" 933 934 return strtodate_sql 935 936 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 937 this = expression.this 938 939 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 940 return self.sql(this) 941 942 return super().cast_sql(expression, safe_prefix=safe_prefix) 943 944 def trycast_sql(self, expression: exp.TryCast) -> str: 945 dtype = expression.to 946 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 947 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 948 dtype.set("nullable", True) 949 950 return super().cast_sql(expression) 951 952 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 953 this = self.json_path_part(expression.this) 954 return str(int(this) + 1) if is_int(this) else this 955 956 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 957 return f"AS {self.sql(expression, 'this')}" 958 959 def _any_to_has( 960 self, 961 expression: exp.EQ | exp.NEQ, 962 default: t.Callable[[t.Any], str], 963 prefix: str = "", 964 ) -> str: 965 if isinstance(expression.left, exp.Any): 966 arr = expression.left 967 this = expression.right 968 elif isinstance(expression.right, exp.Any): 969 arr = expression.right 970 this = expression.left 971 else: 972 return default(expression) 973 974 return prefix + self.func("has", arr.this.unnest(), this) 975 976 def eq_sql(self, expression: exp.EQ) -> str: 977 return self._any_to_has(expression, super().eq_sql) 978 979 def neq_sql(self, expression: exp.NEQ) -> str: 980 return self._any_to_has(expression, super().neq_sql, "NOT ") 981 982 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 983 # Manually add a flag to make the search case-insensitive 984 regex = self.func("CONCAT", "'(?i)'", expression.expression) 985 return self.func("match", expression.this, regex) 986 987 def datatype_sql(self, expression: exp.DataType) -> str: 988 # String is the standard ClickHouse type, every other variant is just an alias. 989 # Additionally, any supplied length parameter will be ignored. 990 # 991 # https://clickhouse.com/docs/en/sql-reference/data-types/string 992 if expression.this in self.STRING_TYPE_MAPPING: 993 dtype = "String" 994 else: 995 dtype = super().datatype_sql(expression) 996 997 # This section changes the type to `Nullable(...)` if the following conditions hold: 998 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 999 # and change their semantics 1000 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1001 # constraint: "Type of Map key must be a type, that can be represented by integer or 1002 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1003 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1004 parent = expression.parent 1005 nullable = expression.args.get("nullable") 1006 if nullable is True or ( 1007 nullable is None 1008 and not ( 1009 isinstance(parent, exp.DataType) 1010 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1011 and expression.index in (None, 0) 1012 ) 1013 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1014 ): 1015 dtype = f"Nullable({dtype})" 1016 1017 return dtype 1018 1019 def cte_sql(self, expression: exp.CTE) -> str: 1020 if expression.args.get("scalar"): 1021 this = self.sql(expression, "this") 1022 alias = self.sql(expression, "alias") 1023 return f"{this} AS {alias}" 1024 1025 return super().cte_sql(expression) 1026 1027 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1028 return super().after_limit_modifiers(expression) + [ 1029 ( 1030 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1031 if expression.args.get("settings") 1032 else "" 1033 ), 1034 ( 1035 self.seg("FORMAT ") + self.sql(expression, "format") 1036 if expression.args.get("format") 1037 else "" 1038 ), 1039 ] 1040 1041 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1042 params = self.expressions(expression, key="params", flat=True) 1043 return self.func(expression.name, *expression.expressions) + f"({params})" 1044 1045 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1046 return self.func(expression.name, *expression.expressions) 1047 1048 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1049 return self.anonymousaggfunc_sql(expression) 1050 1051 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1052 return self.parameterizedagg_sql(expression) 1053 1054 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1055 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1056 1057 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1058 return f"ON CLUSTER {self.sql(expression, 'this')}" 1059 1060 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1061 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1062 exp.Properties.Location.POST_NAME 1063 ): 1064 this_name = self.sql( 1065 expression.this if isinstance(expression.this, exp.Schema) else expression, 1066 "this", 1067 ) 1068 this_properties = " ".join( 1069 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1070 ) 1071 this_schema = self.schema_columns_sql(expression.this) 1072 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1073 1074 return super().createable_sql(expression, locations) 1075 1076 def create_sql(self, expression: exp.Create) -> str: 1077 # The comment property comes last in CTAS statements, i.e. after the query 1078 query = expression.expression 1079 if isinstance(query, exp.Query): 1080 comment_prop = expression.find(exp.SchemaCommentProperty) 1081 if comment_prop: 1082 comment_prop.pop() 1083 query.replace(exp.paren(query)) 1084 else: 1085 comment_prop = None 1086 1087 create_sql = super().create_sql(expression) 1088 1089 comment_sql = self.sql(comment_prop) 1090 comment_sql = f" {comment_sql}" if comment_sql else "" 1091 1092 return f"{create_sql}{comment_sql}" 1093 1094 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1095 this = self.indent(self.sql(expression, "this")) 1096 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1097 1098 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1099 this = self.sql(expression, "this") 1100 this = f" {this}" if this else "" 1101 expr = self.sql(expression, "expression") 1102 expr = f" {expr}" if expr else "" 1103 index_type = self.sql(expression, "index_type") 1104 index_type = f" TYPE {index_type}" if index_type else "" 1105 granularity = self.sql(expression, "granularity") 1106 granularity = f" GRANULARITY {granularity}" if granularity else "" 1107 1108 return f"INDEX{this}{expr}{index_type}{granularity}" 1109 1110 def partition_sql(self, expression: exp.Partition) -> str: 1111 return f"PARTITION {self.expressions(expression, flat=True)}" 1112 1113 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1114 return f"ID {self.sql(expression.this)}" 1115 1116 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1117 return ( 1118 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1119 ) 1120 1121 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1122 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
133class ClickHouse(Dialect): 134 NORMALIZE_FUNCTIONS: bool | str = False 135 NULL_ORDERING = "nulls_are_last" 136 SUPPORTS_USER_DEFINED_TYPES = False 137 SAFE_DIVISION = True 138 LOG_BASE_FIRST: t.Optional[bool] = None 139 FORCE_EARLY_ALIAS_REF_EXPANSION = True 140 141 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 142 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 143 144 UNESCAPED_SEQUENCES = { 145 "\\0": "\0", 146 } 147 148 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 149 150 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 151 exp.Except: False, 152 exp.Intersect: False, 153 exp.Union: None, 154 } 155 156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 } 202 203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 } 401 402 FUNCTION_PARSERS.pop("MATCH") 403 404 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 405 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 406 407 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 408 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 409 410 RANGE_PARSERS = { 411 **parser.Parser.RANGE_PARSERS, 412 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 413 and self._parse_in(this, is_global=True), 414 } 415 416 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 417 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 418 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 419 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 420 421 JOIN_KINDS = { 422 *parser.Parser.JOIN_KINDS, 423 TokenType.ANY, 424 TokenType.ASOF, 425 TokenType.ARRAY, 426 } 427 428 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 429 TokenType.ANY, 430 TokenType.ARRAY, 431 TokenType.FINAL, 432 TokenType.FORMAT, 433 TokenType.SETTINGS, 434 } 435 436 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 437 TokenType.FORMAT, 438 } 439 440 LOG_DEFAULTS_TO_LN = True 441 442 QUERY_MODIFIER_PARSERS = { 443 **parser.Parser.QUERY_MODIFIER_PARSERS, 444 TokenType.SETTINGS: lambda self: ( 445 "settings", 446 self._advance() or self._parse_csv(self._parse_assignment), 447 ), 448 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 449 } 450 451 CONSTRAINT_PARSERS = { 452 **parser.Parser.CONSTRAINT_PARSERS, 453 "INDEX": lambda self: self._parse_index_constraint(), 454 "CODEC": lambda self: self._parse_compress(), 455 } 456 457 ALTER_PARSERS = { 458 **parser.Parser.ALTER_PARSERS, 459 "REPLACE": lambda self: self._parse_alter_table_replace(), 460 } 461 462 SCHEMA_UNNAMED_CONSTRAINTS = { 463 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 464 "INDEX", 465 } 466 467 PLACEHOLDER_PARSERS = { 468 **parser.Parser.PLACEHOLDER_PARSERS, 469 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 470 } 471 472 def _parse_types( 473 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 474 ) -> t.Optional[exp.Expression]: 475 dtype = super()._parse_types( 476 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 477 ) 478 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 479 # Mark every type as non-nullable which is ClickHouse's default, unless it's 480 # already marked as nullable. This marker helps us transpile types from other 481 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 482 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 483 # fail in ClickHouse without the `Nullable` type constructor. 484 dtype.set("nullable", False) 485 486 return dtype 487 488 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 489 index = self._index 490 this = self._parse_bitwise() 491 if self._match(TokenType.FROM): 492 self._retreat(index) 493 return super()._parse_extract() 494 495 # We return Anonymous here because extract and regexpExtract have different semantics, 496 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 497 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 498 # 499 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 500 self._match(TokenType.COMMA) 501 return self.expression( 502 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 503 ) 504 505 def _parse_assignment(self) -> t.Optional[exp.Expression]: 506 this = super()._parse_assignment() 507 508 if self._match(TokenType.PLACEHOLDER): 509 return self.expression( 510 exp.If, 511 this=this, 512 true=self._parse_assignment(), 513 false=self._match(TokenType.COLON) and self._parse_assignment(), 514 ) 515 516 return this 517 518 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 519 """ 520 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 521 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 522 """ 523 this = self._parse_id_var() 524 self._match(TokenType.COLON) 525 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 526 self._match_text_seq("IDENTIFIER") and "Identifier" 527 ) 528 529 if not kind: 530 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 531 elif not self._match(TokenType.R_BRACE): 532 self.raise_error("Expecting }") 533 534 return self.expression(exp.Placeholder, this=this, kind=kind) 535 536 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 537 this = super()._parse_in(this) 538 this.set("is_global", is_global) 539 return this 540 541 def _parse_table( 542 self, 543 schema: bool = False, 544 joins: bool = False, 545 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 546 parse_bracket: bool = False, 547 is_db_reference: bool = False, 548 parse_partition: bool = False, 549 ) -> t.Optional[exp.Expression]: 550 this = super()._parse_table( 551 schema=schema, 552 joins=joins, 553 alias_tokens=alias_tokens, 554 parse_bracket=parse_bracket, 555 is_db_reference=is_db_reference, 556 ) 557 558 if self._match(TokenType.FINAL): 559 this = self.expression(exp.Final, this=this) 560 561 return this 562 563 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 564 return super()._parse_position(haystack_first=True) 565 566 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 567 def _parse_cte(self) -> exp.CTE: 568 # WITH <identifier> AS <subquery expression> 569 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 570 571 if not cte: 572 # WITH <expression> AS <identifier> 573 cte = self.expression( 574 exp.CTE, 575 this=self._parse_assignment(), 576 alias=self._parse_table_alias(), 577 scalar=True, 578 ) 579 580 return cte 581 582 def _parse_join_parts( 583 self, 584 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 585 is_global = self._match(TokenType.GLOBAL) and self._prev 586 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 587 588 if kind_pre: 589 kind = self._match_set(self.JOIN_KINDS) and self._prev 590 side = self._match_set(self.JOIN_SIDES) and self._prev 591 return is_global, side, kind 592 593 return ( 594 is_global, 595 self._match_set(self.JOIN_SIDES) and self._prev, 596 self._match_set(self.JOIN_KINDS) and self._prev, 597 ) 598 599 def _parse_join( 600 self, skip_join_token: bool = False, parse_bracket: bool = False 601 ) -> t.Optional[exp.Join]: 602 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 603 if join: 604 join.set("global", join.args.pop("method", None)) 605 606 return join 607 608 def _parse_function( 609 self, 610 functions: t.Optional[t.Dict[str, t.Callable]] = None, 611 anonymous: bool = False, 612 optional_parens: bool = True, 613 any_token: bool = False, 614 ) -> t.Optional[exp.Expression]: 615 expr = super()._parse_function( 616 functions=functions, 617 anonymous=anonymous, 618 optional_parens=optional_parens, 619 any_token=any_token, 620 ) 621 622 func = expr.this if isinstance(expr, exp.Window) else expr 623 624 # Aggregate functions can be split in 2 parts: <func_name><suffix> 625 parts = ( 626 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 627 ) 628 629 if parts: 630 params = self._parse_func_params(func) 631 632 kwargs = { 633 "this": func.this, 634 "expressions": func.expressions, 635 } 636 if parts[1]: 637 kwargs["parts"] = parts 638 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 639 else: 640 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 641 642 kwargs["exp_class"] = exp_class 643 if params: 644 kwargs["params"] = params 645 646 func = self.expression(**kwargs) 647 648 if isinstance(expr, exp.Window): 649 # The window's func was parsed as Anonymous in base parser, fix its 650 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 651 expr.set("this", func) 652 elif params: 653 # Params have blocked super()._parse_function() from parsing the following window 654 # (if that exists) as they're standing between the function call and the window spec 655 expr = self._parse_window(func) 656 else: 657 expr = func 658 659 return expr 660 661 def _parse_func_params( 662 self, this: t.Optional[exp.Func] = None 663 ) -> t.Optional[t.List[exp.Expression]]: 664 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 665 return self._parse_csv(self._parse_lambda) 666 667 if self._match(TokenType.L_PAREN): 668 params = self._parse_csv(self._parse_lambda) 669 self._match_r_paren(this) 670 return params 671 672 return None 673 674 def _parse_quantile(self) -> exp.Quantile: 675 this = self._parse_lambda() 676 params = self._parse_func_params() 677 if params: 678 return self.expression(exp.Quantile, this=params[0], quantile=this) 679 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 680 681 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 682 return super()._parse_wrapped_id_vars(optional=True) 683 684 def _parse_primary_key( 685 self, wrapped_optional: bool = False, in_props: bool = False 686 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 687 return super()._parse_primary_key( 688 wrapped_optional=wrapped_optional or in_props, in_props=in_props 689 ) 690 691 def _parse_on_property(self) -> t.Optional[exp.Expression]: 692 index = self._index 693 if self._match_text_seq("CLUSTER"): 694 this = self._parse_id_var() 695 if this: 696 return self.expression(exp.OnCluster, this=this) 697 else: 698 self._retreat(index) 699 return None 700 701 def _parse_index_constraint( 702 self, kind: t.Optional[str] = None 703 ) -> exp.IndexColumnConstraint: 704 # INDEX name1 expr TYPE type1(args) GRANULARITY value 705 this = self._parse_id_var() 706 expression = self._parse_assignment() 707 708 index_type = self._match_text_seq("TYPE") and ( 709 self._parse_function() or self._parse_var() 710 ) 711 712 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 713 714 return self.expression( 715 exp.IndexColumnConstraint, 716 this=this, 717 expression=expression, 718 index_type=index_type, 719 granularity=granularity, 720 ) 721 722 def _parse_partition(self) -> t.Optional[exp.Partition]: 723 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 724 if not self._match(TokenType.PARTITION): 725 return None 726 727 if self._match_text_seq("ID"): 728 # Corresponds to the PARTITION ID <string_value> syntax 729 expressions: t.List[exp.Expression] = [ 730 self.expression(exp.PartitionId, this=self._parse_string()) 731 ] 732 else: 733 expressions = self._parse_expressions() 734 735 return self.expression(exp.Partition, expressions=expressions) 736 737 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 738 partition = self._parse_partition() 739 740 if not partition or not self._match(TokenType.FROM): 741 return None 742 743 return self.expression( 744 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 745 ) 746 747 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 748 if not self._match_text_seq("PROJECTION"): 749 return None 750 751 return self.expression( 752 exp.ProjectionDef, 753 this=self._parse_id_var(), 754 expression=self._parse_wrapped(self._parse_statement), 755 ) 756 757 def _parse_constraint(self) -> t.Optional[exp.Expression]: 758 return super()._parse_constraint() or self._parse_projection_def() 759 760 class Generator(generator.Generator): 761 QUERY_HINTS = False 762 STRUCT_DELIMITER = ("(", ")") 763 NVL2_SUPPORTED = False 764 TABLESAMPLE_REQUIRES_PARENS = False 765 TABLESAMPLE_SIZE_IS_ROWS = False 766 TABLESAMPLE_KEYWORDS = "SAMPLE" 767 LAST_DAY_SUPPORTS_DATE_PART = False 768 CAN_IMPLEMENT_ARRAY_ANY = True 769 SUPPORTS_TO_NUMBER = False 770 JOIN_HINTS = False 771 TABLE_HINTS = False 772 GROUPINGS_SEP = "" 773 SET_OP_MODIFIERS = False 774 SUPPORTS_TABLE_ALIAS_COLUMNS = False 775 VALUES_AS_TABLE = False 776 777 STRING_TYPE_MAPPING = { 778 exp.DataType.Type.CHAR: "String", 779 exp.DataType.Type.LONGBLOB: "String", 780 exp.DataType.Type.LONGTEXT: "String", 781 exp.DataType.Type.MEDIUMBLOB: "String", 782 exp.DataType.Type.MEDIUMTEXT: "String", 783 exp.DataType.Type.TINYBLOB: "String", 784 exp.DataType.Type.TINYTEXT: "String", 785 exp.DataType.Type.TEXT: "String", 786 exp.DataType.Type.VARBINARY: "String", 787 exp.DataType.Type.VARCHAR: "String", 788 } 789 790 SUPPORTED_JSON_PATH_PARTS = { 791 exp.JSONPathKey, 792 exp.JSONPathRoot, 793 exp.JSONPathSubscript, 794 } 795 796 TYPE_MAPPING = { 797 **generator.Generator.TYPE_MAPPING, 798 **STRING_TYPE_MAPPING, 799 exp.DataType.Type.ARRAY: "Array", 800 exp.DataType.Type.BIGINT: "Int64", 801 exp.DataType.Type.DATE32: "Date32", 802 exp.DataType.Type.DATETIME: "DateTime", 803 exp.DataType.Type.DATETIME64: "DateTime64", 804 exp.DataType.Type.TIMESTAMP: "DateTime", 805 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 806 exp.DataType.Type.DOUBLE: "Float64", 807 exp.DataType.Type.ENUM: "Enum", 808 exp.DataType.Type.ENUM8: "Enum8", 809 exp.DataType.Type.ENUM16: "Enum16", 810 exp.DataType.Type.FIXEDSTRING: "FixedString", 811 exp.DataType.Type.FLOAT: "Float32", 812 exp.DataType.Type.INT: "Int32", 813 exp.DataType.Type.MEDIUMINT: "Int32", 814 exp.DataType.Type.INT128: "Int128", 815 exp.DataType.Type.INT256: "Int256", 816 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 817 exp.DataType.Type.MAP: "Map", 818 exp.DataType.Type.NESTED: "Nested", 819 exp.DataType.Type.SMALLINT: "Int16", 820 exp.DataType.Type.STRUCT: "Tuple", 821 exp.DataType.Type.TINYINT: "Int8", 822 exp.DataType.Type.UBIGINT: "UInt64", 823 exp.DataType.Type.UINT: "UInt32", 824 exp.DataType.Type.UINT128: "UInt128", 825 exp.DataType.Type.UINT256: "UInt256", 826 exp.DataType.Type.USMALLINT: "UInt16", 827 exp.DataType.Type.UTINYINT: "UInt8", 828 exp.DataType.Type.IPV4: "IPv4", 829 exp.DataType.Type.IPV6: "IPv6", 830 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 831 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 832 } 833 834 TRANSFORMS = { 835 **generator.Generator.TRANSFORMS, 836 exp.AnyValue: rename_func("any"), 837 exp.ApproxDistinct: rename_func("uniq"), 838 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 839 exp.ArraySize: rename_func("LENGTH"), 840 exp.ArraySum: rename_func("arraySum"), 841 exp.ArgMax: arg_max_or_min_no_count("argMax"), 842 exp.ArgMin: arg_max_or_min_no_count("argMin"), 843 exp.Array: inline_array_sql, 844 exp.CastToStrType: rename_func("CAST"), 845 exp.CountIf: rename_func("countIf"), 846 exp.CompressColumnConstraint: lambda self, 847 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 848 exp.ComputedColumnConstraint: lambda self, 849 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 850 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 851 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 852 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 853 exp.DateStrToDate: rename_func("toDate"), 854 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 855 exp.Explode: rename_func("arrayJoin"), 856 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 857 exp.IsNan: rename_func("isNaN"), 858 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 859 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 860 exp.JSONPathKey: json_path_key_only_name, 861 exp.JSONPathRoot: lambda *_: "", 862 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 863 exp.Nullif: rename_func("nullIf"), 864 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 865 exp.Pivot: no_pivot_sql, 866 exp.Quantile: _quantile_sql, 867 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 868 exp.Rand: rename_func("randCanonical"), 869 exp.StartsWith: rename_func("startsWith"), 870 exp.StrPosition: lambda self, e: self.func( 871 "position", e.this, e.args.get("substr"), e.args.get("position") 872 ), 873 exp.TimeToStr: lambda self, e: self.func( 874 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 875 ), 876 exp.TimeStrToTime: _timestrtotime_sql, 877 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 878 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 879 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 880 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 881 exp.MD5Digest: rename_func("MD5"), 882 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 883 exp.SHA: rename_func("SHA1"), 884 exp.SHA2: sha256_sql, 885 exp.UnixToTime: _unix_to_time_sql, 886 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 887 exp.Trim: trim_sql, 888 exp.Variance: rename_func("varSamp"), 889 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 890 exp.Stddev: rename_func("stddevSamp"), 891 exp.Chr: rename_func("CHAR"), 892 exp.Lag: lambda self, e: self.func( 893 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 894 ), 895 exp.Lead: lambda self, e: self.func( 896 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 897 ), 898 } 899 900 PROPERTIES_LOCATION = { 901 **generator.Generator.PROPERTIES_LOCATION, 902 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 903 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 904 exp.OnCluster: exp.Properties.Location.POST_NAME, 905 } 906 907 # There's no list in docs, but it can be found in Clickhouse code 908 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 909 ON_CLUSTER_TARGETS = { 910 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 911 "DATABASE", 912 "TABLE", 913 "VIEW", 914 "DICTIONARY", 915 "INDEX", 916 "FUNCTION", 917 "NAMED COLLECTION", 918 } 919 920 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 921 NON_NULLABLE_TYPES = { 922 exp.DataType.Type.ARRAY, 923 exp.DataType.Type.MAP, 924 exp.DataType.Type.STRUCT, 925 } 926 927 def strtodate_sql(self, expression: exp.StrToDate) -> str: 928 strtodate_sql = self.function_fallback_sql(expression) 929 930 if not isinstance(expression.parent, exp.Cast): 931 # StrToDate returns DATEs in other dialects (eg. postgres), so 932 # this branch aims to improve the transpilation to clickhouse 933 return f"CAST({strtodate_sql} AS DATE)" 934 935 return strtodate_sql 936 937 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 938 this = expression.this 939 940 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 941 return self.sql(this) 942 943 return super().cast_sql(expression, safe_prefix=safe_prefix) 944 945 def trycast_sql(self, expression: exp.TryCast) -> str: 946 dtype = expression.to 947 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 948 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 949 dtype.set("nullable", True) 950 951 return super().cast_sql(expression) 952 953 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 954 this = self.json_path_part(expression.this) 955 return str(int(this) + 1) if is_int(this) else this 956 957 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 958 return f"AS {self.sql(expression, 'this')}" 959 960 def _any_to_has( 961 self, 962 expression: exp.EQ | exp.NEQ, 963 default: t.Callable[[t.Any], str], 964 prefix: str = "", 965 ) -> str: 966 if isinstance(expression.left, exp.Any): 967 arr = expression.left 968 this = expression.right 969 elif isinstance(expression.right, exp.Any): 970 arr = expression.right 971 this = expression.left 972 else: 973 return default(expression) 974 975 return prefix + self.func("has", arr.this.unnest(), this) 976 977 def eq_sql(self, expression: exp.EQ) -> str: 978 return self._any_to_has(expression, super().eq_sql) 979 980 def neq_sql(self, expression: exp.NEQ) -> str: 981 return self._any_to_has(expression, super().neq_sql, "NOT ") 982 983 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 984 # Manually add a flag to make the search case-insensitive 985 regex = self.func("CONCAT", "'(?i)'", expression.expression) 986 return self.func("match", expression.this, regex) 987 988 def datatype_sql(self, expression: exp.DataType) -> str: 989 # String is the standard ClickHouse type, every other variant is just an alias. 990 # Additionally, any supplied length parameter will be ignored. 991 # 992 # https://clickhouse.com/docs/en/sql-reference/data-types/string 993 if expression.this in self.STRING_TYPE_MAPPING: 994 dtype = "String" 995 else: 996 dtype = super().datatype_sql(expression) 997 998 # This section changes the type to `Nullable(...)` if the following conditions hold: 999 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1000 # and change their semantics 1001 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1002 # constraint: "Type of Map key must be a type, that can be represented by integer or 1003 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1004 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1005 parent = expression.parent 1006 nullable = expression.args.get("nullable") 1007 if nullable is True or ( 1008 nullable is None 1009 and not ( 1010 isinstance(parent, exp.DataType) 1011 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1012 and expression.index in (None, 0) 1013 ) 1014 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1015 ): 1016 dtype = f"Nullable({dtype})" 1017 1018 return dtype 1019 1020 def cte_sql(self, expression: exp.CTE) -> str: 1021 if expression.args.get("scalar"): 1022 this = self.sql(expression, "this") 1023 alias = self.sql(expression, "alias") 1024 return f"{this} AS {alias}" 1025 1026 return super().cte_sql(expression) 1027 1028 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1029 return super().after_limit_modifiers(expression) + [ 1030 ( 1031 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1032 if expression.args.get("settings") 1033 else "" 1034 ), 1035 ( 1036 self.seg("FORMAT ") + self.sql(expression, "format") 1037 if expression.args.get("format") 1038 else "" 1039 ), 1040 ] 1041 1042 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1043 params = self.expressions(expression, key="params", flat=True) 1044 return self.func(expression.name, *expression.expressions) + f"({params})" 1045 1046 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1047 return self.func(expression.name, *expression.expressions) 1048 1049 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1050 return self.anonymousaggfunc_sql(expression) 1051 1052 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1053 return self.parameterizedagg_sql(expression) 1054 1055 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1056 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1057 1058 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1059 return f"ON CLUSTER {self.sql(expression, 'this')}" 1060 1061 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1062 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1063 exp.Properties.Location.POST_NAME 1064 ): 1065 this_name = self.sql( 1066 expression.this if isinstance(expression.this, exp.Schema) else expression, 1067 "this", 1068 ) 1069 this_properties = " ".join( 1070 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1071 ) 1072 this_schema = self.schema_columns_sql(expression.this) 1073 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1074 1075 return super().createable_sql(expression, locations) 1076 1077 def create_sql(self, expression: exp.Create) -> str: 1078 # The comment property comes last in CTAS statements, i.e. after the query 1079 query = expression.expression 1080 if isinstance(query, exp.Query): 1081 comment_prop = expression.find(exp.SchemaCommentProperty) 1082 if comment_prop: 1083 comment_prop.pop() 1084 query.replace(exp.paren(query)) 1085 else: 1086 comment_prop = None 1087 1088 create_sql = super().create_sql(expression) 1089 1090 comment_sql = self.sql(comment_prop) 1091 comment_sql = f" {comment_sql}" if comment_sql else "" 1092 1093 return f"{create_sql}{comment_sql}" 1094 1095 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1096 this = self.indent(self.sql(expression, "this")) 1097 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1098 1099 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1100 this = self.sql(expression, "this") 1101 this = f" {this}" if this else "" 1102 expr = self.sql(expression, "expression") 1103 expr = f" {expr}" if expr else "" 1104 index_type = self.sql(expression, "index_type") 1105 index_type = f" TYPE {index_type}" if index_type else "" 1106 granularity = self.sql(expression, "granularity") 1107 granularity = f" GRANULARITY {granularity}" if granularity else "" 1108 1109 return f"INDEX{this}{expr}{index_type}{granularity}" 1110 1111 def partition_sql(self, expression: exp.Partition) -> str: 1112 return f"PARTITION {self.expressions(expression, flat=True)}" 1113 1114 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1115 return f"ID {self.sql(expression.this)}" 1116 1117 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1118 return ( 1119 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1120 ) 1121 1122 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1123 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 } 401 402 FUNCTION_PARSERS.pop("MATCH") 403 404 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 405 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 406 407 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 408 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 409 410 RANGE_PARSERS = { 411 **parser.Parser.RANGE_PARSERS, 412 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 413 and self._parse_in(this, is_global=True), 414 } 415 416 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 417 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 418 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 419 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 420 421 JOIN_KINDS = { 422 *parser.Parser.JOIN_KINDS, 423 TokenType.ANY, 424 TokenType.ASOF, 425 TokenType.ARRAY, 426 } 427 428 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 429 TokenType.ANY, 430 TokenType.ARRAY, 431 TokenType.FINAL, 432 TokenType.FORMAT, 433 TokenType.SETTINGS, 434 } 435 436 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 437 TokenType.FORMAT, 438 } 439 440 LOG_DEFAULTS_TO_LN = True 441 442 QUERY_MODIFIER_PARSERS = { 443 **parser.Parser.QUERY_MODIFIER_PARSERS, 444 TokenType.SETTINGS: lambda self: ( 445 "settings", 446 self._advance() or self._parse_csv(self._parse_assignment), 447 ), 448 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 449 } 450 451 CONSTRAINT_PARSERS = { 452 **parser.Parser.CONSTRAINT_PARSERS, 453 "INDEX": lambda self: self._parse_index_constraint(), 454 "CODEC": lambda self: self._parse_compress(), 455 } 456 457 ALTER_PARSERS = { 458 **parser.Parser.ALTER_PARSERS, 459 "REPLACE": lambda self: self._parse_alter_table_replace(), 460 } 461 462 SCHEMA_UNNAMED_CONSTRAINTS = { 463 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 464 "INDEX", 465 } 466 467 PLACEHOLDER_PARSERS = { 468 **parser.Parser.PLACEHOLDER_PARSERS, 469 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 470 } 471 472 def _parse_types( 473 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 474 ) -> t.Optional[exp.Expression]: 475 dtype = super()._parse_types( 476 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 477 ) 478 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 479 # Mark every type as non-nullable which is ClickHouse's default, unless it's 480 # already marked as nullable. This marker helps us transpile types from other 481 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 482 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 483 # fail in ClickHouse without the `Nullable` type constructor. 484 dtype.set("nullable", False) 485 486 return dtype 487 488 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 489 index = self._index 490 this = self._parse_bitwise() 491 if self._match(TokenType.FROM): 492 self._retreat(index) 493 return super()._parse_extract() 494 495 # We return Anonymous here because extract and regexpExtract have different semantics, 496 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 497 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 498 # 499 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 500 self._match(TokenType.COMMA) 501 return self.expression( 502 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 503 ) 504 505 def _parse_assignment(self) -> t.Optional[exp.Expression]: 506 this = super()._parse_assignment() 507 508 if self._match(TokenType.PLACEHOLDER): 509 return self.expression( 510 exp.If, 511 this=this, 512 true=self._parse_assignment(), 513 false=self._match(TokenType.COLON) and self._parse_assignment(), 514 ) 515 516 return this 517 518 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 519 """ 520 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 521 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 522 """ 523 this = self._parse_id_var() 524 self._match(TokenType.COLON) 525 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 526 self._match_text_seq("IDENTIFIER") and "Identifier" 527 ) 528 529 if not kind: 530 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 531 elif not self._match(TokenType.R_BRACE): 532 self.raise_error("Expecting }") 533 534 return self.expression(exp.Placeholder, this=this, kind=kind) 535 536 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 537 this = super()._parse_in(this) 538 this.set("is_global", is_global) 539 return this 540 541 def _parse_table( 542 self, 543 schema: bool = False, 544 joins: bool = False, 545 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 546 parse_bracket: bool = False, 547 is_db_reference: bool = False, 548 parse_partition: bool = False, 549 ) -> t.Optional[exp.Expression]: 550 this = super()._parse_table( 551 schema=schema, 552 joins=joins, 553 alias_tokens=alias_tokens, 554 parse_bracket=parse_bracket, 555 is_db_reference=is_db_reference, 556 ) 557 558 if self._match(TokenType.FINAL): 559 this = self.expression(exp.Final, this=this) 560 561 return this 562 563 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 564 return super()._parse_position(haystack_first=True) 565 566 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 567 def _parse_cte(self) -> exp.CTE: 568 # WITH <identifier> AS <subquery expression> 569 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 570 571 if not cte: 572 # WITH <expression> AS <identifier> 573 cte = self.expression( 574 exp.CTE, 575 this=self._parse_assignment(), 576 alias=self._parse_table_alias(), 577 scalar=True, 578 ) 579 580 return cte 581 582 def _parse_join_parts( 583 self, 584 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 585 is_global = self._match(TokenType.GLOBAL) and self._prev 586 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 587 588 if kind_pre: 589 kind = self._match_set(self.JOIN_KINDS) and self._prev 590 side = self._match_set(self.JOIN_SIDES) and self._prev 591 return is_global, side, kind 592 593 return ( 594 is_global, 595 self._match_set(self.JOIN_SIDES) and self._prev, 596 self._match_set(self.JOIN_KINDS) and self._prev, 597 ) 598 599 def _parse_join( 600 self, skip_join_token: bool = False, parse_bracket: bool = False 601 ) -> t.Optional[exp.Join]: 602 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 603 if join: 604 join.set("global", join.args.pop("method", None)) 605 606 return join 607 608 def _parse_function( 609 self, 610 functions: t.Optional[t.Dict[str, t.Callable]] = None, 611 anonymous: bool = False, 612 optional_parens: bool = True, 613 any_token: bool = False, 614 ) -> t.Optional[exp.Expression]: 615 expr = super()._parse_function( 616 functions=functions, 617 anonymous=anonymous, 618 optional_parens=optional_parens, 619 any_token=any_token, 620 ) 621 622 func = expr.this if isinstance(expr, exp.Window) else expr 623 624 # Aggregate functions can be split in 2 parts: <func_name><suffix> 625 parts = ( 626 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 627 ) 628 629 if parts: 630 params = self._parse_func_params(func) 631 632 kwargs = { 633 "this": func.this, 634 "expressions": func.expressions, 635 } 636 if parts[1]: 637 kwargs["parts"] = parts 638 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 639 else: 640 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 641 642 kwargs["exp_class"] = exp_class 643 if params: 644 kwargs["params"] = params 645 646 func = self.expression(**kwargs) 647 648 if isinstance(expr, exp.Window): 649 # The window's func was parsed as Anonymous in base parser, fix its 650 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 651 expr.set("this", func) 652 elif params: 653 # Params have blocked super()._parse_function() from parsing the following window 654 # (if that exists) as they're standing between the function call and the window spec 655 expr = self._parse_window(func) 656 else: 657 expr = func 658 659 return expr 660 661 def _parse_func_params( 662 self, this: t.Optional[exp.Func] = None 663 ) -> t.Optional[t.List[exp.Expression]]: 664 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 665 return self._parse_csv(self._parse_lambda) 666 667 if self._match(TokenType.L_PAREN): 668 params = self._parse_csv(self._parse_lambda) 669 self._match_r_paren(this) 670 return params 671 672 return None 673 674 def _parse_quantile(self) -> exp.Quantile: 675 this = self._parse_lambda() 676 params = self._parse_func_params() 677 if params: 678 return self.expression(exp.Quantile, this=params[0], quantile=this) 679 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 680 681 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 682 return super()._parse_wrapped_id_vars(optional=True) 683 684 def _parse_primary_key( 685 self, wrapped_optional: bool = False, in_props: bool = False 686 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 687 return super()._parse_primary_key( 688 wrapped_optional=wrapped_optional or in_props, in_props=in_props 689 ) 690 691 def _parse_on_property(self) -> t.Optional[exp.Expression]: 692 index = self._index 693 if self._match_text_seq("CLUSTER"): 694 this = self._parse_id_var() 695 if this: 696 return self.expression(exp.OnCluster, this=this) 697 else: 698 self._retreat(index) 699 return None 700 701 def _parse_index_constraint( 702 self, kind: t.Optional[str] = None 703 ) -> exp.IndexColumnConstraint: 704 # INDEX name1 expr TYPE type1(args) GRANULARITY value 705 this = self._parse_id_var() 706 expression = self._parse_assignment() 707 708 index_type = self._match_text_seq("TYPE") and ( 709 self._parse_function() or self._parse_var() 710 ) 711 712 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 713 714 return self.expression( 715 exp.IndexColumnConstraint, 716 this=this, 717 expression=expression, 718 index_type=index_type, 719 granularity=granularity, 720 ) 721 722 def _parse_partition(self) -> t.Optional[exp.Partition]: 723 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 724 if not self._match(TokenType.PARTITION): 725 return None 726 727 if self._match_text_seq("ID"): 728 # Corresponds to the PARTITION ID <string_value> syntax 729 expressions: t.List[exp.Expression] = [ 730 self.expression(exp.PartitionId, this=self._parse_string()) 731 ] 732 else: 733 expressions = self._parse_expressions() 734 735 return self.expression(exp.Partition, expressions=expressions) 736 737 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 738 partition = self._parse_partition() 739 740 if not partition or not self._match(TokenType.FROM): 741 return None 742 743 return self.expression( 744 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 745 ) 746 747 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 748 if not self._match_text_seq("PROJECTION"): 749 return None 750 751 return self.expression( 752 exp.ProjectionDef, 753 this=self._parse_id_var(), 754 expression=self._parse_wrapped(self._parse_statement), 755 ) 756 757 def _parse_constraint(self) -> t.Optional[exp.Expression]: 758 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
760 class Generator(generator.Generator): 761 QUERY_HINTS = False 762 STRUCT_DELIMITER = ("(", ")") 763 NVL2_SUPPORTED = False 764 TABLESAMPLE_REQUIRES_PARENS = False 765 TABLESAMPLE_SIZE_IS_ROWS = False 766 TABLESAMPLE_KEYWORDS = "SAMPLE" 767 LAST_DAY_SUPPORTS_DATE_PART = False 768 CAN_IMPLEMENT_ARRAY_ANY = True 769 SUPPORTS_TO_NUMBER = False 770 JOIN_HINTS = False 771 TABLE_HINTS = False 772 GROUPINGS_SEP = "" 773 SET_OP_MODIFIERS = False 774 SUPPORTS_TABLE_ALIAS_COLUMNS = False 775 VALUES_AS_TABLE = False 776 777 STRING_TYPE_MAPPING = { 778 exp.DataType.Type.CHAR: "String", 779 exp.DataType.Type.LONGBLOB: "String", 780 exp.DataType.Type.LONGTEXT: "String", 781 exp.DataType.Type.MEDIUMBLOB: "String", 782 exp.DataType.Type.MEDIUMTEXT: "String", 783 exp.DataType.Type.TINYBLOB: "String", 784 exp.DataType.Type.TINYTEXT: "String", 785 exp.DataType.Type.TEXT: "String", 786 exp.DataType.Type.VARBINARY: "String", 787 exp.DataType.Type.VARCHAR: "String", 788 } 789 790 SUPPORTED_JSON_PATH_PARTS = { 791 exp.JSONPathKey, 792 exp.JSONPathRoot, 793 exp.JSONPathSubscript, 794 } 795 796 TYPE_MAPPING = { 797 **generator.Generator.TYPE_MAPPING, 798 **STRING_TYPE_MAPPING, 799 exp.DataType.Type.ARRAY: "Array", 800 exp.DataType.Type.BIGINT: "Int64", 801 exp.DataType.Type.DATE32: "Date32", 802 exp.DataType.Type.DATETIME: "DateTime", 803 exp.DataType.Type.DATETIME64: "DateTime64", 804 exp.DataType.Type.TIMESTAMP: "DateTime", 805 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 806 exp.DataType.Type.DOUBLE: "Float64", 807 exp.DataType.Type.ENUM: "Enum", 808 exp.DataType.Type.ENUM8: "Enum8", 809 exp.DataType.Type.ENUM16: "Enum16", 810 exp.DataType.Type.FIXEDSTRING: "FixedString", 811 exp.DataType.Type.FLOAT: "Float32", 812 exp.DataType.Type.INT: "Int32", 813 exp.DataType.Type.MEDIUMINT: "Int32", 814 exp.DataType.Type.INT128: "Int128", 815 exp.DataType.Type.INT256: "Int256", 816 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 817 exp.DataType.Type.MAP: "Map", 818 exp.DataType.Type.NESTED: "Nested", 819 exp.DataType.Type.SMALLINT: "Int16", 820 exp.DataType.Type.STRUCT: "Tuple", 821 exp.DataType.Type.TINYINT: "Int8", 822 exp.DataType.Type.UBIGINT: "UInt64", 823 exp.DataType.Type.UINT: "UInt32", 824 exp.DataType.Type.UINT128: "UInt128", 825 exp.DataType.Type.UINT256: "UInt256", 826 exp.DataType.Type.USMALLINT: "UInt16", 827 exp.DataType.Type.UTINYINT: "UInt8", 828 exp.DataType.Type.IPV4: "IPv4", 829 exp.DataType.Type.IPV6: "IPv6", 830 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 831 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 832 } 833 834 TRANSFORMS = { 835 **generator.Generator.TRANSFORMS, 836 exp.AnyValue: rename_func("any"), 837 exp.ApproxDistinct: rename_func("uniq"), 838 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 839 exp.ArraySize: rename_func("LENGTH"), 840 exp.ArraySum: rename_func("arraySum"), 841 exp.ArgMax: arg_max_or_min_no_count("argMax"), 842 exp.ArgMin: arg_max_or_min_no_count("argMin"), 843 exp.Array: inline_array_sql, 844 exp.CastToStrType: rename_func("CAST"), 845 exp.CountIf: rename_func("countIf"), 846 exp.CompressColumnConstraint: lambda self, 847 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 848 exp.ComputedColumnConstraint: lambda self, 849 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 850 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 851 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 852 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 853 exp.DateStrToDate: rename_func("toDate"), 854 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 855 exp.Explode: rename_func("arrayJoin"), 856 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 857 exp.IsNan: rename_func("isNaN"), 858 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 859 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 860 exp.JSONPathKey: json_path_key_only_name, 861 exp.JSONPathRoot: lambda *_: "", 862 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 863 exp.Nullif: rename_func("nullIf"), 864 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 865 exp.Pivot: no_pivot_sql, 866 exp.Quantile: _quantile_sql, 867 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 868 exp.Rand: rename_func("randCanonical"), 869 exp.StartsWith: rename_func("startsWith"), 870 exp.StrPosition: lambda self, e: self.func( 871 "position", e.this, e.args.get("substr"), e.args.get("position") 872 ), 873 exp.TimeToStr: lambda self, e: self.func( 874 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 875 ), 876 exp.TimeStrToTime: _timestrtotime_sql, 877 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 878 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 879 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 880 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 881 exp.MD5Digest: rename_func("MD5"), 882 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 883 exp.SHA: rename_func("SHA1"), 884 exp.SHA2: sha256_sql, 885 exp.UnixToTime: _unix_to_time_sql, 886 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 887 exp.Trim: trim_sql, 888 exp.Variance: rename_func("varSamp"), 889 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 890 exp.Stddev: rename_func("stddevSamp"), 891 exp.Chr: rename_func("CHAR"), 892 exp.Lag: lambda self, e: self.func( 893 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 894 ), 895 exp.Lead: lambda self, e: self.func( 896 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 897 ), 898 } 899 900 PROPERTIES_LOCATION = { 901 **generator.Generator.PROPERTIES_LOCATION, 902 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 903 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 904 exp.OnCluster: exp.Properties.Location.POST_NAME, 905 } 906 907 # There's no list in docs, but it can be found in Clickhouse code 908 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 909 ON_CLUSTER_TARGETS = { 910 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 911 "DATABASE", 912 "TABLE", 913 "VIEW", 914 "DICTIONARY", 915 "INDEX", 916 "FUNCTION", 917 "NAMED COLLECTION", 918 } 919 920 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 921 NON_NULLABLE_TYPES = { 922 exp.DataType.Type.ARRAY, 923 exp.DataType.Type.MAP, 924 exp.DataType.Type.STRUCT, 925 } 926 927 def strtodate_sql(self, expression: exp.StrToDate) -> str: 928 strtodate_sql = self.function_fallback_sql(expression) 929 930 if not isinstance(expression.parent, exp.Cast): 931 # StrToDate returns DATEs in other dialects (eg. postgres), so 932 # this branch aims to improve the transpilation to clickhouse 933 return f"CAST({strtodate_sql} AS DATE)" 934 935 return strtodate_sql 936 937 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 938 this = expression.this 939 940 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 941 return self.sql(this) 942 943 return super().cast_sql(expression, safe_prefix=safe_prefix) 944 945 def trycast_sql(self, expression: exp.TryCast) -> str: 946 dtype = expression.to 947 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 948 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 949 dtype.set("nullable", True) 950 951 return super().cast_sql(expression) 952 953 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 954 this = self.json_path_part(expression.this) 955 return str(int(this) + 1) if is_int(this) else this 956 957 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 958 return f"AS {self.sql(expression, 'this')}" 959 960 def _any_to_has( 961 self, 962 expression: exp.EQ | exp.NEQ, 963 default: t.Callable[[t.Any], str], 964 prefix: str = "", 965 ) -> str: 966 if isinstance(expression.left, exp.Any): 967 arr = expression.left 968 this = expression.right 969 elif isinstance(expression.right, exp.Any): 970 arr = expression.right 971 this = expression.left 972 else: 973 return default(expression) 974 975 return prefix + self.func("has", arr.this.unnest(), this) 976 977 def eq_sql(self, expression: exp.EQ) -> str: 978 return self._any_to_has(expression, super().eq_sql) 979 980 def neq_sql(self, expression: exp.NEQ) -> str: 981 return self._any_to_has(expression, super().neq_sql, "NOT ") 982 983 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 984 # Manually add a flag to make the search case-insensitive 985 regex = self.func("CONCAT", "'(?i)'", expression.expression) 986 return self.func("match", expression.this, regex) 987 988 def datatype_sql(self, expression: exp.DataType) -> str: 989 # String is the standard ClickHouse type, every other variant is just an alias. 990 # Additionally, any supplied length parameter will be ignored. 991 # 992 # https://clickhouse.com/docs/en/sql-reference/data-types/string 993 if expression.this in self.STRING_TYPE_MAPPING: 994 dtype = "String" 995 else: 996 dtype = super().datatype_sql(expression) 997 998 # This section changes the type to `Nullable(...)` if the following conditions hold: 999 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1000 # and change their semantics 1001 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1002 # constraint: "Type of Map key must be a type, that can be represented by integer or 1003 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1004 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1005 parent = expression.parent 1006 nullable = expression.args.get("nullable") 1007 if nullable is True or ( 1008 nullable is None 1009 and not ( 1010 isinstance(parent, exp.DataType) 1011 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1012 and expression.index in (None, 0) 1013 ) 1014 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1015 ): 1016 dtype = f"Nullable({dtype})" 1017 1018 return dtype 1019 1020 def cte_sql(self, expression: exp.CTE) -> str: 1021 if expression.args.get("scalar"): 1022 this = self.sql(expression, "this") 1023 alias = self.sql(expression, "alias") 1024 return f"{this} AS {alias}" 1025 1026 return super().cte_sql(expression) 1027 1028 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1029 return super().after_limit_modifiers(expression) + [ 1030 ( 1031 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1032 if expression.args.get("settings") 1033 else "" 1034 ), 1035 ( 1036 self.seg("FORMAT ") + self.sql(expression, "format") 1037 if expression.args.get("format") 1038 else "" 1039 ), 1040 ] 1041 1042 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1043 params = self.expressions(expression, key="params", flat=True) 1044 return self.func(expression.name, *expression.expressions) + f"({params})" 1045 1046 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1047 return self.func(expression.name, *expression.expressions) 1048 1049 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1050 return self.anonymousaggfunc_sql(expression) 1051 1052 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1053 return self.parameterizedagg_sql(expression) 1054 1055 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1056 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1057 1058 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1059 return f"ON CLUSTER {self.sql(expression, 'this')}" 1060 1061 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1062 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1063 exp.Properties.Location.POST_NAME 1064 ): 1065 this_name = self.sql( 1066 expression.this if isinstance(expression.this, exp.Schema) else expression, 1067 "this", 1068 ) 1069 this_properties = " ".join( 1070 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1071 ) 1072 this_schema = self.schema_columns_sql(expression.this) 1073 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1074 1075 return super().createable_sql(expression, locations) 1076 1077 def create_sql(self, expression: exp.Create) -> str: 1078 # The comment property comes last in CTAS statements, i.e. after the query 1079 query = expression.expression 1080 if isinstance(query, exp.Query): 1081 comment_prop = expression.find(exp.SchemaCommentProperty) 1082 if comment_prop: 1083 comment_prop.pop() 1084 query.replace(exp.paren(query)) 1085 else: 1086 comment_prop = None 1087 1088 create_sql = super().create_sql(expression) 1089 1090 comment_sql = self.sql(comment_prop) 1091 comment_sql = f" {comment_sql}" if comment_sql else "" 1092 1093 return f"{create_sql}{comment_sql}" 1094 1095 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1096 this = self.indent(self.sql(expression, "this")) 1097 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1098 1099 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1100 this = self.sql(expression, "this") 1101 this = f" {this}" if this else "" 1102 expr = self.sql(expression, "expression") 1103 expr = f" {expr}" if expr else "" 1104 index_type = self.sql(expression, "index_type") 1105 index_type = f" TYPE {index_type}" if index_type else "" 1106 granularity = self.sql(expression, "granularity") 1107 granularity = f" GRANULARITY {granularity}" if granularity else "" 1108 1109 return f"INDEX{this}{expr}{index_type}{granularity}" 1110 1111 def partition_sql(self, expression: exp.Partition) -> str: 1112 return f"PARTITION {self.expressions(expression, flat=True)}" 1113 1114 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1115 return f"ID {self.sql(expression.this)}" 1116 1117 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1118 return ( 1119 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1120 ) 1121 1122 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1123 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
927 def strtodate_sql(self, expression: exp.StrToDate) -> str: 928 strtodate_sql = self.function_fallback_sql(expression) 929 930 if not isinstance(expression.parent, exp.Cast): 931 # StrToDate returns DATEs in other dialects (eg. postgres), so 932 # this branch aims to improve the transpilation to clickhouse 933 return f"CAST({strtodate_sql} AS DATE)" 934 935 return strtodate_sql
937 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 938 this = expression.this 939 940 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 941 return self.sql(this) 942 943 return super().cast_sql(expression, safe_prefix=safe_prefix)
945 def trycast_sql(self, expression: exp.TryCast) -> str: 946 dtype = expression.to 947 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 948 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 949 dtype.set("nullable", True) 950 951 return super().cast_sql(expression)
988 def datatype_sql(self, expression: exp.DataType) -> str: 989 # String is the standard ClickHouse type, every other variant is just an alias. 990 # Additionally, any supplied length parameter will be ignored. 991 # 992 # https://clickhouse.com/docs/en/sql-reference/data-types/string 993 if expression.this in self.STRING_TYPE_MAPPING: 994 dtype = "String" 995 else: 996 dtype = super().datatype_sql(expression) 997 998 # This section changes the type to `Nullable(...)` if the following conditions hold: 999 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1000 # and change their semantics 1001 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1002 # constraint: "Type of Map key must be a type, that can be represented by integer or 1003 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1004 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1005 parent = expression.parent 1006 nullable = expression.args.get("nullable") 1007 if nullable is True or ( 1008 nullable is None 1009 and not ( 1010 isinstance(parent, exp.DataType) 1011 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1012 and expression.index in (None, 0) 1013 ) 1014 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1015 ): 1016 dtype = f"Nullable({dtype})" 1017 1018 return dtype
1028 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1029 return super().after_limit_modifiers(expression) + [ 1030 ( 1031 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1032 if expression.args.get("settings") 1033 else "" 1034 ), 1035 ( 1036 self.seg("FORMAT ") + self.sql(expression, "format") 1037 if expression.args.get("format") 1038 else "" 1039 ), 1040 ]
1061 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1062 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1063 exp.Properties.Location.POST_NAME 1064 ): 1065 this_name = self.sql( 1066 expression.this if isinstance(expression.this, exp.Schema) else expression, 1067 "this", 1068 ) 1069 this_properties = " ".join( 1070 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1071 ) 1072 this_schema = self.schema_columns_sql(expression.this) 1073 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1074 1075 return super().createable_sql(expression, locations)
1077 def create_sql(self, expression: exp.Create) -> str: 1078 # The comment property comes last in CTAS statements, i.e. after the query 1079 query = expression.expression 1080 if isinstance(query, exp.Query): 1081 comment_prop = expression.find(exp.SchemaCommentProperty) 1082 if comment_prop: 1083 comment_prop.pop() 1084 query.replace(exp.paren(query)) 1085 else: 1086 comment_prop = None 1087 1088 create_sql = super().create_sql(expression) 1089 1090 comment_sql = self.sql(comment_prop) 1091 comment_sql = f" {comment_sql}" if comment_sql else "" 1092 1093 return f"{create_sql}{comment_sql}"
1099 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1100 this = self.sql(expression, "this") 1101 this = f" {this}" if this else "" 1102 expr = self.sql(expression, "expression") 1103 expr = f" {expr}" if expr else "" 1104 index_type = self.sql(expression, "index_type") 1105 index_type = f" TYPE {index_type}" if index_type else "" 1106 granularity = self.sql(expression, "granularity") 1107 granularity = f" GRANULARITY {granularity}" if granularity else "" 1108 1109 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql