sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 216 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 217 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 218 "LIKE": build_like, 219 "LOG": build_logarithm, 220 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 221 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 222 "LOWER": build_lower, 223 "LPAD": lambda args: build_pad(args), 224 "LEFTPAD": lambda args: build_pad(args), 225 "LTRIM": lambda args: build_trim(args), 226 "MOD": build_mod, 227 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 228 "RPAD": lambda args: build_pad(args, is_left=False), 229 "RTRIM": lambda args: build_trim(args, is_left=False), 230 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 231 if len(args) != 2 232 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 233 "TIME_TO_TIME_STR": lambda args: exp.Cast( 234 this=seq_get(args, 0), 235 to=exp.DataType(this=exp.DataType.Type.TEXT), 236 ), 237 "TO_HEX": build_hex, 238 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 239 this=exp.Cast( 240 this=seq_get(args, 0), 241 to=exp.DataType(this=exp.DataType.Type.TEXT), 242 ), 243 start=exp.Literal.number(1), 244 length=exp.Literal.number(10), 245 ), 246 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 247 "UPPER": build_upper, 248 "VAR_MAP": build_var_map, 249 } 250 251 NO_PAREN_FUNCTIONS = { 252 TokenType.CURRENT_DATE: exp.CurrentDate, 253 TokenType.CURRENT_DATETIME: exp.CurrentDate, 254 TokenType.CURRENT_TIME: exp.CurrentTime, 255 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 256 TokenType.CURRENT_USER: exp.CurrentUser, 257 } 258 259 STRUCT_TYPE_TOKENS = { 260 TokenType.NESTED, 261 TokenType.OBJECT, 262 TokenType.STRUCT, 263 } 264 265 NESTED_TYPE_TOKENS = { 266 TokenType.ARRAY, 267 TokenType.LIST, 268 TokenType.LOWCARDINALITY, 269 TokenType.MAP, 270 TokenType.NULLABLE, 271 *STRUCT_TYPE_TOKENS, 272 } 273 274 ENUM_TYPE_TOKENS = { 275 TokenType.ENUM, 276 TokenType.ENUM8, 277 TokenType.ENUM16, 278 } 279 280 AGGREGATE_TYPE_TOKENS = { 281 TokenType.AGGREGATEFUNCTION, 282 TokenType.SIMPLEAGGREGATEFUNCTION, 283 } 284 285 TYPE_TOKENS = { 286 TokenType.BIT, 287 TokenType.BOOLEAN, 288 TokenType.TINYINT, 289 TokenType.UTINYINT, 290 TokenType.SMALLINT, 291 TokenType.USMALLINT, 292 TokenType.INT, 293 TokenType.UINT, 294 TokenType.BIGINT, 295 TokenType.UBIGINT, 296 TokenType.INT128, 297 TokenType.UINT128, 298 TokenType.INT256, 299 TokenType.UINT256, 300 TokenType.MEDIUMINT, 301 TokenType.UMEDIUMINT, 302 TokenType.FIXEDSTRING, 303 TokenType.FLOAT, 304 TokenType.DOUBLE, 305 TokenType.CHAR, 306 TokenType.NCHAR, 307 TokenType.VARCHAR, 308 TokenType.NVARCHAR, 309 TokenType.BPCHAR, 310 TokenType.TEXT, 311 TokenType.MEDIUMTEXT, 312 TokenType.LONGTEXT, 313 TokenType.MEDIUMBLOB, 314 TokenType.LONGBLOB, 315 TokenType.BINARY, 316 TokenType.VARBINARY, 317 TokenType.JSON, 318 TokenType.JSONB, 319 TokenType.INTERVAL, 320 TokenType.TINYBLOB, 321 TokenType.TINYTEXT, 322 TokenType.TIME, 323 TokenType.TIMETZ, 324 TokenType.TIMESTAMP, 325 TokenType.TIMESTAMP_S, 326 TokenType.TIMESTAMP_MS, 327 TokenType.TIMESTAMP_NS, 328 TokenType.TIMESTAMPTZ, 329 TokenType.TIMESTAMPLTZ, 330 TokenType.TIMESTAMPNTZ, 331 TokenType.DATETIME, 332 TokenType.DATETIME64, 333 TokenType.DATE, 334 TokenType.DATE32, 335 TokenType.INT4RANGE, 336 TokenType.INT4MULTIRANGE, 337 TokenType.INT8RANGE, 338 TokenType.INT8MULTIRANGE, 339 TokenType.NUMRANGE, 340 TokenType.NUMMULTIRANGE, 341 TokenType.TSRANGE, 342 TokenType.TSMULTIRANGE, 343 TokenType.TSTZRANGE, 344 TokenType.TSTZMULTIRANGE, 345 TokenType.DATERANGE, 346 TokenType.DATEMULTIRANGE, 347 TokenType.DECIMAL, 348 TokenType.DECIMAL32, 349 TokenType.DECIMAL64, 350 TokenType.DECIMAL128, 351 TokenType.UDECIMAL, 352 TokenType.BIGDECIMAL, 353 TokenType.UUID, 354 TokenType.GEOGRAPHY, 355 TokenType.GEOMETRY, 356 TokenType.HLLSKETCH, 357 TokenType.HSTORE, 358 TokenType.PSEUDO_TYPE, 359 TokenType.SUPER, 360 TokenType.SERIAL, 361 TokenType.SMALLSERIAL, 362 TokenType.BIGSERIAL, 363 TokenType.XML, 364 TokenType.YEAR, 365 TokenType.UNIQUEIDENTIFIER, 366 TokenType.USERDEFINED, 367 TokenType.MONEY, 368 TokenType.SMALLMONEY, 369 TokenType.ROWVERSION, 370 TokenType.IMAGE, 371 TokenType.VARIANT, 372 TokenType.VECTOR, 373 TokenType.OBJECT, 374 TokenType.OBJECT_IDENTIFIER, 375 TokenType.INET, 376 TokenType.IPADDRESS, 377 TokenType.IPPREFIX, 378 TokenType.IPV4, 379 TokenType.IPV6, 380 TokenType.UNKNOWN, 381 TokenType.NULL, 382 TokenType.NAME, 383 TokenType.TDIGEST, 384 *ENUM_TYPE_TOKENS, 385 *NESTED_TYPE_TOKENS, 386 *AGGREGATE_TYPE_TOKENS, 387 } 388 389 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 390 TokenType.BIGINT: TokenType.UBIGINT, 391 TokenType.INT: TokenType.UINT, 392 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 393 TokenType.SMALLINT: TokenType.USMALLINT, 394 TokenType.TINYINT: TokenType.UTINYINT, 395 TokenType.DECIMAL: TokenType.UDECIMAL, 396 } 397 398 SUBQUERY_PREDICATES = { 399 TokenType.ANY: exp.Any, 400 TokenType.ALL: exp.All, 401 TokenType.EXISTS: exp.Exists, 402 TokenType.SOME: exp.Any, 403 } 404 405 RESERVED_TOKENS = { 406 *Tokenizer.SINGLE_TOKENS.values(), 407 TokenType.SELECT, 408 } - {TokenType.IDENTIFIER} 409 410 DB_CREATABLES = { 411 TokenType.DATABASE, 412 TokenType.DICTIONARY, 413 TokenType.MODEL, 414 TokenType.SCHEMA, 415 TokenType.SEQUENCE, 416 TokenType.STORAGE_INTEGRATION, 417 TokenType.TABLE, 418 TokenType.TAG, 419 TokenType.VIEW, 420 TokenType.WAREHOUSE, 421 TokenType.STREAMLIT, 422 } 423 424 CREATABLES = { 425 TokenType.COLUMN, 426 TokenType.CONSTRAINT, 427 TokenType.FOREIGN_KEY, 428 TokenType.FUNCTION, 429 TokenType.INDEX, 430 TokenType.PROCEDURE, 431 *DB_CREATABLES, 432 } 433 434 ALTERABLES = { 435 TokenType.INDEX, 436 TokenType.TABLE, 437 TokenType.VIEW, 438 } 439 440 # Tokens that can represent identifiers 441 ID_VAR_TOKENS = { 442 TokenType.ALL, 443 TokenType.VAR, 444 TokenType.ANTI, 445 TokenType.APPLY, 446 TokenType.ASC, 447 TokenType.ASOF, 448 TokenType.AUTO_INCREMENT, 449 TokenType.BEGIN, 450 TokenType.BPCHAR, 451 TokenType.CACHE, 452 TokenType.CASE, 453 TokenType.COLLATE, 454 TokenType.COMMAND, 455 TokenType.COMMENT, 456 TokenType.COMMIT, 457 TokenType.CONSTRAINT, 458 TokenType.COPY, 459 TokenType.CUBE, 460 TokenType.DEFAULT, 461 TokenType.DELETE, 462 TokenType.DESC, 463 TokenType.DESCRIBE, 464 TokenType.DICTIONARY, 465 TokenType.DIV, 466 TokenType.END, 467 TokenType.EXECUTE, 468 TokenType.ESCAPE, 469 TokenType.FALSE, 470 TokenType.FIRST, 471 TokenType.FILTER, 472 TokenType.FINAL, 473 TokenType.FORMAT, 474 TokenType.FULL, 475 TokenType.IDENTIFIER, 476 TokenType.IS, 477 TokenType.ISNULL, 478 TokenType.INTERVAL, 479 TokenType.KEEP, 480 TokenType.KILL, 481 TokenType.LEFT, 482 TokenType.LOAD, 483 TokenType.MERGE, 484 TokenType.NATURAL, 485 TokenType.NEXT, 486 TokenType.OFFSET, 487 TokenType.OPERATOR, 488 TokenType.ORDINALITY, 489 TokenType.OVERLAPS, 490 TokenType.OVERWRITE, 491 TokenType.PARTITION, 492 TokenType.PERCENT, 493 TokenType.PIVOT, 494 TokenType.PRAGMA, 495 TokenType.RANGE, 496 TokenType.RECURSIVE, 497 TokenType.REFERENCES, 498 TokenType.REFRESH, 499 TokenType.RENAME, 500 TokenType.REPLACE, 501 TokenType.RIGHT, 502 TokenType.ROLLUP, 503 TokenType.ROW, 504 TokenType.ROWS, 505 TokenType.SEMI, 506 TokenType.SET, 507 TokenType.SETTINGS, 508 TokenType.SHOW, 509 TokenType.TEMPORARY, 510 TokenType.TOP, 511 TokenType.TRUE, 512 TokenType.TRUNCATE, 513 TokenType.UNIQUE, 514 TokenType.UNNEST, 515 TokenType.UNPIVOT, 516 TokenType.UPDATE, 517 TokenType.USE, 518 TokenType.VOLATILE, 519 TokenType.WINDOW, 520 *CREATABLES, 521 *SUBQUERY_PREDICATES, 522 *TYPE_TOKENS, 523 *NO_PAREN_FUNCTIONS, 524 } 525 526 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 527 528 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 529 TokenType.ANTI, 530 TokenType.APPLY, 531 TokenType.ASOF, 532 TokenType.FULL, 533 TokenType.LEFT, 534 TokenType.LOCK, 535 TokenType.NATURAL, 536 TokenType.OFFSET, 537 TokenType.RIGHT, 538 TokenType.SEMI, 539 TokenType.WINDOW, 540 } 541 542 ALIAS_TOKENS = ID_VAR_TOKENS 543 544 ARRAY_CONSTRUCTORS = { 545 "ARRAY": exp.Array, 546 "LIST": exp.List, 547 } 548 549 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 550 551 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 552 553 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 554 555 FUNC_TOKENS = { 556 TokenType.COLLATE, 557 TokenType.COMMAND, 558 TokenType.CURRENT_DATE, 559 TokenType.CURRENT_DATETIME, 560 TokenType.CURRENT_TIMESTAMP, 561 TokenType.CURRENT_TIME, 562 TokenType.CURRENT_USER, 563 TokenType.FILTER, 564 TokenType.FIRST, 565 TokenType.FORMAT, 566 TokenType.GLOB, 567 TokenType.IDENTIFIER, 568 TokenType.INDEX, 569 TokenType.ISNULL, 570 TokenType.ILIKE, 571 TokenType.INSERT, 572 TokenType.LIKE, 573 TokenType.MERGE, 574 TokenType.OFFSET, 575 TokenType.PRIMARY_KEY, 576 TokenType.RANGE, 577 TokenType.REPLACE, 578 TokenType.RLIKE, 579 TokenType.ROW, 580 TokenType.UNNEST, 581 TokenType.VAR, 582 TokenType.LEFT, 583 TokenType.RIGHT, 584 TokenType.SEQUENCE, 585 TokenType.DATE, 586 TokenType.DATETIME, 587 TokenType.TABLE, 588 TokenType.TIMESTAMP, 589 TokenType.TIMESTAMPTZ, 590 TokenType.TRUNCATE, 591 TokenType.WINDOW, 592 TokenType.XOR, 593 *TYPE_TOKENS, 594 *SUBQUERY_PREDICATES, 595 } 596 597 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 598 TokenType.AND: exp.And, 599 } 600 601 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 602 TokenType.COLON_EQ: exp.PropertyEQ, 603 } 604 605 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 606 TokenType.OR: exp.Or, 607 } 608 609 EQUALITY = { 610 TokenType.EQ: exp.EQ, 611 TokenType.NEQ: exp.NEQ, 612 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 613 } 614 615 COMPARISON = { 616 TokenType.GT: exp.GT, 617 TokenType.GTE: exp.GTE, 618 TokenType.LT: exp.LT, 619 TokenType.LTE: exp.LTE, 620 } 621 622 BITWISE = { 623 TokenType.AMP: exp.BitwiseAnd, 624 TokenType.CARET: exp.BitwiseXor, 625 TokenType.PIPE: exp.BitwiseOr, 626 } 627 628 TERM = { 629 TokenType.DASH: exp.Sub, 630 TokenType.PLUS: exp.Add, 631 TokenType.MOD: exp.Mod, 632 TokenType.COLLATE: exp.Collate, 633 } 634 635 FACTOR = { 636 TokenType.DIV: exp.IntDiv, 637 TokenType.LR_ARROW: exp.Distance, 638 TokenType.SLASH: exp.Div, 639 TokenType.STAR: exp.Mul, 640 } 641 642 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 643 644 TIMES = { 645 TokenType.TIME, 646 TokenType.TIMETZ, 647 } 648 649 TIMESTAMPS = { 650 TokenType.TIMESTAMP, 651 TokenType.TIMESTAMPTZ, 652 TokenType.TIMESTAMPLTZ, 653 *TIMES, 654 } 655 656 SET_OPERATIONS = { 657 TokenType.UNION, 658 TokenType.INTERSECT, 659 TokenType.EXCEPT, 660 } 661 662 JOIN_METHODS = { 663 TokenType.ASOF, 664 TokenType.NATURAL, 665 TokenType.POSITIONAL, 666 } 667 668 JOIN_SIDES = { 669 TokenType.LEFT, 670 TokenType.RIGHT, 671 TokenType.FULL, 672 } 673 674 JOIN_KINDS = { 675 TokenType.ANTI, 676 TokenType.CROSS, 677 TokenType.INNER, 678 TokenType.OUTER, 679 TokenType.SEMI, 680 TokenType.STRAIGHT_JOIN, 681 } 682 683 JOIN_HINTS: t.Set[str] = set() 684 685 LAMBDAS = { 686 TokenType.ARROW: lambda self, expressions: self.expression( 687 exp.Lambda, 688 this=self._replace_lambda( 689 self._parse_assignment(), 690 expressions, 691 ), 692 expressions=expressions, 693 ), 694 TokenType.FARROW: lambda self, expressions: self.expression( 695 exp.Kwarg, 696 this=exp.var(expressions[0].name), 697 expression=self._parse_assignment(), 698 ), 699 } 700 701 COLUMN_OPERATORS = { 702 TokenType.DOT: None, 703 TokenType.DCOLON: lambda self, this, to: self.expression( 704 exp.Cast if self.STRICT_CAST else exp.TryCast, 705 this=this, 706 to=to, 707 ), 708 TokenType.ARROW: lambda self, this, path: self.expression( 709 exp.JSONExtract, 710 this=this, 711 expression=self.dialect.to_json_path(path), 712 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 713 ), 714 TokenType.DARROW: lambda self, this, path: self.expression( 715 exp.JSONExtractScalar, 716 this=this, 717 expression=self.dialect.to_json_path(path), 718 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 719 ), 720 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 721 exp.JSONBExtract, 722 this=this, 723 expression=path, 724 ), 725 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 726 exp.JSONBExtractScalar, 727 this=this, 728 expression=path, 729 ), 730 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 731 exp.JSONBContains, 732 this=this, 733 expression=key, 734 ), 735 } 736 737 EXPRESSION_PARSERS = { 738 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 739 exp.Column: lambda self: self._parse_column(), 740 exp.Condition: lambda self: self._parse_assignment(), 741 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 742 exp.Expression: lambda self: self._parse_expression(), 743 exp.From: lambda self: self._parse_from(joins=True), 744 exp.Group: lambda self: self._parse_group(), 745 exp.Having: lambda self: self._parse_having(), 746 exp.Identifier: lambda self: self._parse_id_var(), 747 exp.Join: lambda self: self._parse_join(), 748 exp.Lambda: lambda self: self._parse_lambda(), 749 exp.Lateral: lambda self: self._parse_lateral(), 750 exp.Limit: lambda self: self._parse_limit(), 751 exp.Offset: lambda self: self._parse_offset(), 752 exp.Order: lambda self: self._parse_order(), 753 exp.Ordered: lambda self: self._parse_ordered(), 754 exp.Properties: lambda self: self._parse_properties(), 755 exp.Qualify: lambda self: self._parse_qualify(), 756 exp.Returning: lambda self: self._parse_returning(), 757 exp.Select: lambda self: self._parse_select(), 758 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 759 exp.Table: lambda self: self._parse_table_parts(), 760 exp.TableAlias: lambda self: self._parse_table_alias(), 761 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 762 exp.Where: lambda self: self._parse_where(), 763 exp.Window: lambda self: self._parse_named_window(), 764 exp.With: lambda self: self._parse_with(), 765 "JOIN_TYPE": lambda self: self._parse_join_parts(), 766 } 767 768 STATEMENT_PARSERS = { 769 TokenType.ALTER: lambda self: self._parse_alter(), 770 TokenType.BEGIN: lambda self: self._parse_transaction(), 771 TokenType.CACHE: lambda self: self._parse_cache(), 772 TokenType.COMMENT: lambda self: self._parse_comment(), 773 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 774 TokenType.COPY: lambda self: self._parse_copy(), 775 TokenType.CREATE: lambda self: self._parse_create(), 776 TokenType.DELETE: lambda self: self._parse_delete(), 777 TokenType.DESC: lambda self: self._parse_describe(), 778 TokenType.DESCRIBE: lambda self: self._parse_describe(), 779 TokenType.DROP: lambda self: self._parse_drop(), 780 TokenType.INSERT: lambda self: self._parse_insert(), 781 TokenType.KILL: lambda self: self._parse_kill(), 782 TokenType.LOAD: lambda self: self._parse_load(), 783 TokenType.MERGE: lambda self: self._parse_merge(), 784 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 785 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 786 TokenType.REFRESH: lambda self: self._parse_refresh(), 787 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 788 TokenType.SET: lambda self: self._parse_set(), 789 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 790 TokenType.UNCACHE: lambda self: self._parse_uncache(), 791 TokenType.UPDATE: lambda self: self._parse_update(), 792 TokenType.USE: lambda self: self.expression( 793 exp.Use, 794 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 795 this=self._parse_table(schema=False), 796 ), 797 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 798 } 799 800 UNARY_PARSERS = { 801 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 802 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 803 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 804 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 805 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 806 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 807 } 808 809 STRING_PARSERS = { 810 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 811 exp.RawString, this=token.text 812 ), 813 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 814 exp.National, this=token.text 815 ), 816 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 817 TokenType.STRING: lambda self, token: self.expression( 818 exp.Literal, this=token.text, is_string=True 819 ), 820 TokenType.UNICODE_STRING: lambda self, token: self.expression( 821 exp.UnicodeString, 822 this=token.text, 823 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 824 ), 825 } 826 827 NUMERIC_PARSERS = { 828 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 829 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 830 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 831 TokenType.NUMBER: lambda self, token: self.expression( 832 exp.Literal, this=token.text, is_string=False 833 ), 834 } 835 836 PRIMARY_PARSERS = { 837 **STRING_PARSERS, 838 **NUMERIC_PARSERS, 839 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 840 TokenType.NULL: lambda self, _: self.expression(exp.Null), 841 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 842 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 843 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 844 TokenType.STAR: lambda self, _: self._parse_star_ops(), 845 } 846 847 PLACEHOLDER_PARSERS = { 848 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 849 TokenType.PARAMETER: lambda self: self._parse_parameter(), 850 TokenType.COLON: lambda self: ( 851 self.expression(exp.Placeholder, this=self._prev.text) 852 if self._match_set(self.ID_VAR_TOKENS) 853 else None 854 ), 855 } 856 857 RANGE_PARSERS = { 858 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 859 TokenType.GLOB: binary_range_parser(exp.Glob), 860 TokenType.ILIKE: binary_range_parser(exp.ILike), 861 TokenType.IN: lambda self, this: self._parse_in(this), 862 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 863 TokenType.IS: lambda self, this: self._parse_is(this), 864 TokenType.LIKE: binary_range_parser(exp.Like), 865 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 866 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 867 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 868 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 869 } 870 871 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 872 "ALLOWED_VALUES": lambda self: self.expression( 873 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 874 ), 875 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 876 "AUTO": lambda self: self._parse_auto_property(), 877 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 878 "BACKUP": lambda self: self.expression( 879 exp.BackupProperty, this=self._parse_var(any_token=True) 880 ), 881 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 882 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 883 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 884 "CHECKSUM": lambda self: self._parse_checksum(), 885 "CLUSTER BY": lambda self: self._parse_cluster(), 886 "CLUSTERED": lambda self: self._parse_clustered_by(), 887 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 888 exp.CollateProperty, **kwargs 889 ), 890 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 891 "CONTAINS": lambda self: self._parse_contains_property(), 892 "COPY": lambda self: self._parse_copy_property(), 893 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 894 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 895 "DEFINER": lambda self: self._parse_definer(), 896 "DETERMINISTIC": lambda self: self.expression( 897 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 898 ), 899 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 900 "DUPLICATE": lambda self: self._parse_duplicate(), 901 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 902 "DISTKEY": lambda self: self._parse_distkey(), 903 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 904 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 905 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 906 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 907 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 908 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 909 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 910 "FREESPACE": lambda self: self._parse_freespace(), 911 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 912 "HEAP": lambda self: self.expression(exp.HeapProperty), 913 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 914 "IMMUTABLE": lambda self: self.expression( 915 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 916 ), 917 "INHERITS": lambda self: self.expression( 918 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 919 ), 920 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 921 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 922 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 923 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 924 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 925 "LIKE": lambda self: self._parse_create_like(), 926 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 927 "LOCK": lambda self: self._parse_locking(), 928 "LOCKING": lambda self: self._parse_locking(), 929 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 930 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 931 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 932 "MODIFIES": lambda self: self._parse_modifies_property(), 933 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 934 "NO": lambda self: self._parse_no_property(), 935 "ON": lambda self: self._parse_on_property(), 936 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 937 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 938 "PARTITION": lambda self: self._parse_partitioned_of(), 939 "PARTITION BY": lambda self: self._parse_partitioned_by(), 940 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 941 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 942 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 943 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 944 "READS": lambda self: self._parse_reads_property(), 945 "REMOTE": lambda self: self._parse_remote_with_connection(), 946 "RETURNS": lambda self: self._parse_returns(), 947 "STRICT": lambda self: self.expression(exp.StrictProperty), 948 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 949 "ROW": lambda self: self._parse_row(), 950 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 951 "SAMPLE": lambda self: self.expression( 952 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 953 ), 954 "SECURE": lambda self: self.expression(exp.SecureProperty), 955 "SECURITY": lambda self: self._parse_security(), 956 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 957 "SETTINGS": lambda self: self._parse_settings_property(), 958 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 959 "SORTKEY": lambda self: self._parse_sortkey(), 960 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 961 "STABLE": lambda self: self.expression( 962 exp.StabilityProperty, this=exp.Literal.string("STABLE") 963 ), 964 "STORED": lambda self: self._parse_stored(), 965 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 966 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 967 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 968 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 969 "TO": lambda self: self._parse_to_table(), 970 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 971 "TRANSFORM": lambda self: self.expression( 972 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 973 ), 974 "TTL": lambda self: self._parse_ttl(), 975 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 976 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 977 "VOLATILE": lambda self: self._parse_volatile_property(), 978 "WITH": lambda self: self._parse_with_property(), 979 } 980 981 CONSTRAINT_PARSERS = { 982 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 983 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 984 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 985 "CHARACTER SET": lambda self: self.expression( 986 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 987 ), 988 "CHECK": lambda self: self.expression( 989 exp.CheckColumnConstraint, 990 this=self._parse_wrapped(self._parse_assignment), 991 enforced=self._match_text_seq("ENFORCED"), 992 ), 993 "COLLATE": lambda self: self.expression( 994 exp.CollateColumnConstraint, 995 this=self._parse_identifier() or self._parse_column(), 996 ), 997 "COMMENT": lambda self: self.expression( 998 exp.CommentColumnConstraint, this=self._parse_string() 999 ), 1000 "COMPRESS": lambda self: self._parse_compress(), 1001 "CLUSTERED": lambda self: self.expression( 1002 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1003 ), 1004 "NONCLUSTERED": lambda self: self.expression( 1005 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1006 ), 1007 "DEFAULT": lambda self: self.expression( 1008 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1009 ), 1010 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1011 "EPHEMERAL": lambda self: self.expression( 1012 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1013 ), 1014 "EXCLUDE": lambda self: self.expression( 1015 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1016 ), 1017 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1018 "FORMAT": lambda self: self.expression( 1019 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1020 ), 1021 "GENERATED": lambda self: self._parse_generated_as_identity(), 1022 "IDENTITY": lambda self: self._parse_auto_increment(), 1023 "INLINE": lambda self: self._parse_inline(), 1024 "LIKE": lambda self: self._parse_create_like(), 1025 "NOT": lambda self: self._parse_not_constraint(), 1026 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1027 "ON": lambda self: ( 1028 self._match(TokenType.UPDATE) 1029 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1030 ) 1031 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1032 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1033 "PERIOD": lambda self: self._parse_period_for_system_time(), 1034 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1035 "REFERENCES": lambda self: self._parse_references(match=False), 1036 "TITLE": lambda self: self.expression( 1037 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1038 ), 1039 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1040 "UNIQUE": lambda self: self._parse_unique(), 1041 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1042 "WITH": lambda self: self.expression( 1043 exp.Properties, expressions=self._parse_wrapped_properties() 1044 ), 1045 } 1046 1047 ALTER_PARSERS = { 1048 "ADD": lambda self: self._parse_alter_table_add(), 1049 "ALTER": lambda self: self._parse_alter_table_alter(), 1050 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1051 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1052 "DROP": lambda self: self._parse_alter_table_drop(), 1053 "RENAME": lambda self: self._parse_alter_table_rename(), 1054 "SET": lambda self: self._parse_alter_table_set(), 1055 "AS": lambda self: self._parse_select(), 1056 } 1057 1058 ALTER_ALTER_PARSERS = { 1059 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1060 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1061 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1062 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1063 } 1064 1065 SCHEMA_UNNAMED_CONSTRAINTS = { 1066 "CHECK", 1067 "EXCLUDE", 1068 "FOREIGN KEY", 1069 "LIKE", 1070 "PERIOD", 1071 "PRIMARY KEY", 1072 "UNIQUE", 1073 } 1074 1075 NO_PAREN_FUNCTION_PARSERS = { 1076 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1077 "CASE": lambda self: self._parse_case(), 1078 "CONNECT_BY_ROOT": lambda self: self.expression( 1079 exp.ConnectByRoot, this=self._parse_column() 1080 ), 1081 "IF": lambda self: self._parse_if(), 1082 "NEXT": lambda self: self._parse_next_value_for(), 1083 } 1084 1085 INVALID_FUNC_NAME_TOKENS = { 1086 TokenType.IDENTIFIER, 1087 TokenType.STRING, 1088 } 1089 1090 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1091 1092 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1093 1094 FUNCTION_PARSERS = { 1095 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1096 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1097 "DECODE": lambda self: self._parse_decode(), 1098 "EXTRACT": lambda self: self._parse_extract(), 1099 "GAP_FILL": lambda self: self._parse_gap_fill(), 1100 "JSON_OBJECT": lambda self: self._parse_json_object(), 1101 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1102 "JSON_TABLE": lambda self: self._parse_json_table(), 1103 "MATCH": lambda self: self._parse_match_against(), 1104 "NORMALIZE": lambda self: self._parse_normalize(), 1105 "OPENJSON": lambda self: self._parse_open_json(), 1106 "POSITION": lambda self: self._parse_position(), 1107 "PREDICT": lambda self: self._parse_predict(), 1108 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1109 "STRING_AGG": lambda self: self._parse_string_agg(), 1110 "SUBSTRING": lambda self: self._parse_substring(), 1111 "TRIM": lambda self: self._parse_trim(), 1112 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1113 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1114 } 1115 1116 QUERY_MODIFIER_PARSERS = { 1117 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1118 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1119 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1120 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1121 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1122 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1123 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1124 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1125 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1126 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1127 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1128 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1129 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1130 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1131 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1132 TokenType.CLUSTER_BY: lambda self: ( 1133 "cluster", 1134 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1135 ), 1136 TokenType.DISTRIBUTE_BY: lambda self: ( 1137 "distribute", 1138 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1139 ), 1140 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1141 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1142 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1143 } 1144 1145 SET_PARSERS = { 1146 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1147 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1148 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1149 "TRANSACTION": lambda self: self._parse_set_transaction(), 1150 } 1151 1152 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1153 1154 TYPE_LITERAL_PARSERS = { 1155 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1156 } 1157 1158 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1159 1160 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1161 1162 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1163 1164 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1165 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1166 "ISOLATION": ( 1167 ("LEVEL", "REPEATABLE", "READ"), 1168 ("LEVEL", "READ", "COMMITTED"), 1169 ("LEVEL", "READ", "UNCOMITTED"), 1170 ("LEVEL", "SERIALIZABLE"), 1171 ), 1172 "READ": ("WRITE", "ONLY"), 1173 } 1174 1175 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1176 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1177 ) 1178 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1179 1180 CREATE_SEQUENCE: OPTIONS_TYPE = { 1181 "SCALE": ("EXTEND", "NOEXTEND"), 1182 "SHARD": ("EXTEND", "NOEXTEND"), 1183 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1184 **dict.fromkeys( 1185 ( 1186 "SESSION", 1187 "GLOBAL", 1188 "KEEP", 1189 "NOKEEP", 1190 "ORDER", 1191 "NOORDER", 1192 "NOCACHE", 1193 "CYCLE", 1194 "NOCYCLE", 1195 "NOMINVALUE", 1196 "NOMAXVALUE", 1197 "NOSCALE", 1198 "NOSHARD", 1199 ), 1200 tuple(), 1201 ), 1202 } 1203 1204 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1205 1206 USABLES: OPTIONS_TYPE = dict.fromkeys( 1207 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1208 ) 1209 1210 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1211 1212 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1213 "TYPE": ("EVOLUTION",), 1214 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1215 } 1216 1217 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1218 "NOT": ("ENFORCED",), 1219 "MATCH": ( 1220 "FULL", 1221 "PARTIAL", 1222 "SIMPLE", 1223 ), 1224 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1225 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1226 } 1227 1228 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1229 1230 CLONE_KEYWORDS = {"CLONE", "COPY"} 1231 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1232 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1233 1234 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1235 1236 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1237 1238 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1239 1240 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1241 1242 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1243 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1244 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1245 1246 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1247 1248 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1249 1250 ADD_CONSTRAINT_TOKENS = { 1251 TokenType.CONSTRAINT, 1252 TokenType.FOREIGN_KEY, 1253 TokenType.INDEX, 1254 TokenType.KEY, 1255 TokenType.PRIMARY_KEY, 1256 TokenType.UNIQUE, 1257 } 1258 1259 DISTINCT_TOKENS = {TokenType.DISTINCT} 1260 1261 NULL_TOKENS = {TokenType.NULL} 1262 1263 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1264 1265 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1266 1267 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1268 1269 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1270 1271 ODBC_DATETIME_LITERALS = { 1272 "d": exp.Date, 1273 "t": exp.Time, 1274 "ts": exp.Timestamp, 1275 } 1276 1277 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1278 1279 STRICT_CAST = True 1280 1281 PREFIXED_PIVOT_COLUMNS = False 1282 IDENTIFY_PIVOT_STRINGS = False 1283 1284 LOG_DEFAULTS_TO_LN = False 1285 1286 # Whether ADD is present for each column added by ALTER TABLE 1287 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1288 1289 # Whether the table sample clause expects CSV syntax 1290 TABLESAMPLE_CSV = False 1291 1292 # The default method used for table sampling 1293 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1294 1295 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1296 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1297 1298 # Whether the TRIM function expects the characters to trim as its first argument 1299 TRIM_PATTERN_FIRST = False 1300 1301 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1302 STRING_ALIASES = False 1303 1304 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1305 MODIFIERS_ATTACHED_TO_SET_OP = True 1306 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1307 1308 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1309 NO_PAREN_IF_COMMANDS = True 1310 1311 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1312 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1313 1314 # Whether the `:` operator is used to extract a value from a VARIANT column 1315 COLON_IS_VARIANT_EXTRACT = False 1316 1317 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1318 # If this is True and '(' is not found, the keyword will be treated as an identifier 1319 VALUES_FOLLOWED_BY_PAREN = True 1320 1321 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1322 SUPPORTS_IMPLICIT_UNNEST = False 1323 1324 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1325 INTERVAL_SPANS = True 1326 1327 # Whether a PARTITION clause can follow a table reference 1328 SUPPORTS_PARTITION_SELECTION = False 1329 1330 __slots__ = ( 1331 "error_level", 1332 "error_message_context", 1333 "max_errors", 1334 "dialect", 1335 "sql", 1336 "errors", 1337 "_tokens", 1338 "_index", 1339 "_curr", 1340 "_next", 1341 "_prev", 1342 "_prev_comments", 1343 ) 1344 1345 # Autofilled 1346 SHOW_TRIE: t.Dict = {} 1347 SET_TRIE: t.Dict = {} 1348 1349 def __init__( 1350 self, 1351 error_level: t.Optional[ErrorLevel] = None, 1352 error_message_context: int = 100, 1353 max_errors: int = 3, 1354 dialect: DialectType = None, 1355 ): 1356 from sqlglot.dialects import Dialect 1357 1358 self.error_level = error_level or ErrorLevel.IMMEDIATE 1359 self.error_message_context = error_message_context 1360 self.max_errors = max_errors 1361 self.dialect = Dialect.get_or_raise(dialect) 1362 self.reset() 1363 1364 def reset(self): 1365 self.sql = "" 1366 self.errors = [] 1367 self._tokens = [] 1368 self._index = 0 1369 self._curr = None 1370 self._next = None 1371 self._prev = None 1372 self._prev_comments = None 1373 1374 def parse( 1375 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1376 ) -> t.List[t.Optional[exp.Expression]]: 1377 """ 1378 Parses a list of tokens and returns a list of syntax trees, one tree 1379 per parsed SQL statement. 1380 1381 Args: 1382 raw_tokens: The list of tokens. 1383 sql: The original SQL string, used to produce helpful debug messages. 1384 1385 Returns: 1386 The list of the produced syntax trees. 1387 """ 1388 return self._parse( 1389 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1390 ) 1391 1392 def parse_into( 1393 self, 1394 expression_types: exp.IntoType, 1395 raw_tokens: t.List[Token], 1396 sql: t.Optional[str] = None, 1397 ) -> t.List[t.Optional[exp.Expression]]: 1398 """ 1399 Parses a list of tokens into a given Expression type. If a collection of Expression 1400 types is given instead, this method will try to parse the token list into each one 1401 of them, stopping at the first for which the parsing succeeds. 1402 1403 Args: 1404 expression_types: The expression type(s) to try and parse the token list into. 1405 raw_tokens: The list of tokens. 1406 sql: The original SQL string, used to produce helpful debug messages. 1407 1408 Returns: 1409 The target Expression. 1410 """ 1411 errors = [] 1412 for expression_type in ensure_list(expression_types): 1413 parser = self.EXPRESSION_PARSERS.get(expression_type) 1414 if not parser: 1415 raise TypeError(f"No parser registered for {expression_type}") 1416 1417 try: 1418 return self._parse(parser, raw_tokens, sql) 1419 except ParseError as e: 1420 e.errors[0]["into_expression"] = expression_type 1421 errors.append(e) 1422 1423 raise ParseError( 1424 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1425 errors=merge_errors(errors), 1426 ) from errors[-1] 1427 1428 def _parse( 1429 self, 1430 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1431 raw_tokens: t.List[Token], 1432 sql: t.Optional[str] = None, 1433 ) -> t.List[t.Optional[exp.Expression]]: 1434 self.reset() 1435 self.sql = sql or "" 1436 1437 total = len(raw_tokens) 1438 chunks: t.List[t.List[Token]] = [[]] 1439 1440 for i, token in enumerate(raw_tokens): 1441 if token.token_type == TokenType.SEMICOLON: 1442 if token.comments: 1443 chunks.append([token]) 1444 1445 if i < total - 1: 1446 chunks.append([]) 1447 else: 1448 chunks[-1].append(token) 1449 1450 expressions = [] 1451 1452 for tokens in chunks: 1453 self._index = -1 1454 self._tokens = tokens 1455 self._advance() 1456 1457 expressions.append(parse_method(self)) 1458 1459 if self._index < len(self._tokens): 1460 self.raise_error("Invalid expression / Unexpected token") 1461 1462 self.check_errors() 1463 1464 return expressions 1465 1466 def check_errors(self) -> None: 1467 """Logs or raises any found errors, depending on the chosen error level setting.""" 1468 if self.error_level == ErrorLevel.WARN: 1469 for error in self.errors: 1470 logger.error(str(error)) 1471 elif self.error_level == ErrorLevel.RAISE and self.errors: 1472 raise ParseError( 1473 concat_messages(self.errors, self.max_errors), 1474 errors=merge_errors(self.errors), 1475 ) 1476 1477 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1478 """ 1479 Appends an error in the list of recorded errors or raises it, depending on the chosen 1480 error level setting. 1481 """ 1482 token = token or self._curr or self._prev or Token.string("") 1483 start = token.start 1484 end = token.end + 1 1485 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1486 highlight = self.sql[start:end] 1487 end_context = self.sql[end : end + self.error_message_context] 1488 1489 error = ParseError.new( 1490 f"{message}. Line {token.line}, Col: {token.col}.\n" 1491 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1492 description=message, 1493 line=token.line, 1494 col=token.col, 1495 start_context=start_context, 1496 highlight=highlight, 1497 end_context=end_context, 1498 ) 1499 1500 if self.error_level == ErrorLevel.IMMEDIATE: 1501 raise error 1502 1503 self.errors.append(error) 1504 1505 def expression( 1506 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1507 ) -> E: 1508 """ 1509 Creates a new, validated Expression. 1510 1511 Args: 1512 exp_class: The expression class to instantiate. 1513 comments: An optional list of comments to attach to the expression. 1514 kwargs: The arguments to set for the expression along with their respective values. 1515 1516 Returns: 1517 The target expression. 1518 """ 1519 instance = exp_class(**kwargs) 1520 instance.add_comments(comments) if comments else self._add_comments(instance) 1521 return self.validate_expression(instance) 1522 1523 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1524 if expression and self._prev_comments: 1525 expression.add_comments(self._prev_comments) 1526 self._prev_comments = None 1527 1528 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1529 """ 1530 Validates an Expression, making sure that all its mandatory arguments are set. 1531 1532 Args: 1533 expression: The expression to validate. 1534 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1535 1536 Returns: 1537 The validated expression. 1538 """ 1539 if self.error_level != ErrorLevel.IGNORE: 1540 for error_message in expression.error_messages(args): 1541 self.raise_error(error_message) 1542 1543 return expression 1544 1545 def _find_sql(self, start: Token, end: Token) -> str: 1546 return self.sql[start.start : end.end + 1] 1547 1548 def _is_connected(self) -> bool: 1549 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1550 1551 def _advance(self, times: int = 1) -> None: 1552 self._index += times 1553 self._curr = seq_get(self._tokens, self._index) 1554 self._next = seq_get(self._tokens, self._index + 1) 1555 1556 if self._index > 0: 1557 self._prev = self._tokens[self._index - 1] 1558 self._prev_comments = self._prev.comments 1559 else: 1560 self._prev = None 1561 self._prev_comments = None 1562 1563 def _retreat(self, index: int) -> None: 1564 if index != self._index: 1565 self._advance(index - self._index) 1566 1567 def _warn_unsupported(self) -> None: 1568 if len(self._tokens) <= 1: 1569 return 1570 1571 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1572 # interested in emitting a warning for the one being currently processed. 1573 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1574 1575 logger.warning( 1576 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1577 ) 1578 1579 def _parse_command(self) -> exp.Command: 1580 self._warn_unsupported() 1581 return self.expression( 1582 exp.Command, 1583 comments=self._prev_comments, 1584 this=self._prev.text.upper(), 1585 expression=self._parse_string(), 1586 ) 1587 1588 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1589 """ 1590 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1591 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1592 solve this by setting & resetting the parser state accordingly 1593 """ 1594 index = self._index 1595 error_level = self.error_level 1596 1597 self.error_level = ErrorLevel.IMMEDIATE 1598 try: 1599 this = parse_method() 1600 except ParseError: 1601 this = None 1602 finally: 1603 if not this or retreat: 1604 self._retreat(index) 1605 self.error_level = error_level 1606 1607 return this 1608 1609 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1610 start = self._prev 1611 exists = self._parse_exists() if allow_exists else None 1612 1613 self._match(TokenType.ON) 1614 1615 materialized = self._match_text_seq("MATERIALIZED") 1616 kind = self._match_set(self.CREATABLES) and self._prev 1617 if not kind: 1618 return self._parse_as_command(start) 1619 1620 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1621 this = self._parse_user_defined_function(kind=kind.token_type) 1622 elif kind.token_type == TokenType.TABLE: 1623 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1624 elif kind.token_type == TokenType.COLUMN: 1625 this = self._parse_column() 1626 else: 1627 this = self._parse_id_var() 1628 1629 self._match(TokenType.IS) 1630 1631 return self.expression( 1632 exp.Comment, 1633 this=this, 1634 kind=kind.text, 1635 expression=self._parse_string(), 1636 exists=exists, 1637 materialized=materialized, 1638 ) 1639 1640 def _parse_to_table( 1641 self, 1642 ) -> exp.ToTableProperty: 1643 table = self._parse_table_parts(schema=True) 1644 return self.expression(exp.ToTableProperty, this=table) 1645 1646 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1647 def _parse_ttl(self) -> exp.Expression: 1648 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1649 this = self._parse_bitwise() 1650 1651 if self._match_text_seq("DELETE"): 1652 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1653 if self._match_text_seq("RECOMPRESS"): 1654 return self.expression( 1655 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1656 ) 1657 if self._match_text_seq("TO", "DISK"): 1658 return self.expression( 1659 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1660 ) 1661 if self._match_text_seq("TO", "VOLUME"): 1662 return self.expression( 1663 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1664 ) 1665 1666 return this 1667 1668 expressions = self._parse_csv(_parse_ttl_action) 1669 where = self._parse_where() 1670 group = self._parse_group() 1671 1672 aggregates = None 1673 if group and self._match(TokenType.SET): 1674 aggregates = self._parse_csv(self._parse_set_item) 1675 1676 return self.expression( 1677 exp.MergeTreeTTL, 1678 expressions=expressions, 1679 where=where, 1680 group=group, 1681 aggregates=aggregates, 1682 ) 1683 1684 def _parse_statement(self) -> t.Optional[exp.Expression]: 1685 if self._curr is None: 1686 return None 1687 1688 if self._match_set(self.STATEMENT_PARSERS): 1689 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1690 1691 if self._match_set(self.dialect.tokenizer.COMMANDS): 1692 return self._parse_command() 1693 1694 expression = self._parse_expression() 1695 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1696 return self._parse_query_modifiers(expression) 1697 1698 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1699 start = self._prev 1700 temporary = self._match(TokenType.TEMPORARY) 1701 materialized = self._match_text_seq("MATERIALIZED") 1702 1703 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1704 if not kind: 1705 return self._parse_as_command(start) 1706 1707 concurrently = self._match_text_seq("CONCURRENTLY") 1708 if_exists = exists or self._parse_exists() 1709 table = self._parse_table_parts( 1710 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1711 ) 1712 1713 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1714 1715 if self._match(TokenType.L_PAREN, advance=False): 1716 expressions = self._parse_wrapped_csv(self._parse_types) 1717 else: 1718 expressions = None 1719 1720 return self.expression( 1721 exp.Drop, 1722 comments=start.comments, 1723 exists=if_exists, 1724 this=table, 1725 expressions=expressions, 1726 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1727 temporary=temporary, 1728 materialized=materialized, 1729 cascade=self._match_text_seq("CASCADE"), 1730 constraints=self._match_text_seq("CONSTRAINTS"), 1731 purge=self._match_text_seq("PURGE"), 1732 cluster=cluster, 1733 concurrently=concurrently, 1734 ) 1735 1736 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1737 return ( 1738 self._match_text_seq("IF") 1739 and (not not_ or self._match(TokenType.NOT)) 1740 and self._match(TokenType.EXISTS) 1741 ) 1742 1743 def _parse_create(self) -> exp.Create | exp.Command: 1744 # Note: this can't be None because we've matched a statement parser 1745 start = self._prev 1746 comments = self._prev_comments 1747 1748 replace = ( 1749 start.token_type == TokenType.REPLACE 1750 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1751 or self._match_pair(TokenType.OR, TokenType.ALTER) 1752 ) 1753 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1754 1755 unique = self._match(TokenType.UNIQUE) 1756 1757 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1758 clustered = True 1759 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1760 "COLUMNSTORE" 1761 ): 1762 clustered = False 1763 else: 1764 clustered = None 1765 1766 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1767 self._advance() 1768 1769 properties = None 1770 create_token = self._match_set(self.CREATABLES) and self._prev 1771 1772 if not create_token: 1773 # exp.Properties.Location.POST_CREATE 1774 properties = self._parse_properties() 1775 create_token = self._match_set(self.CREATABLES) and self._prev 1776 1777 if not properties or not create_token: 1778 return self._parse_as_command(start) 1779 1780 concurrently = self._match_text_seq("CONCURRENTLY") 1781 exists = self._parse_exists(not_=True) 1782 this = None 1783 expression: t.Optional[exp.Expression] = None 1784 indexes = None 1785 no_schema_binding = None 1786 begin = None 1787 end = None 1788 clone = None 1789 1790 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1791 nonlocal properties 1792 if properties and temp_props: 1793 properties.expressions.extend(temp_props.expressions) 1794 elif temp_props: 1795 properties = temp_props 1796 1797 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1798 this = self._parse_user_defined_function(kind=create_token.token_type) 1799 1800 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1801 extend_props(self._parse_properties()) 1802 1803 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1804 extend_props(self._parse_properties()) 1805 1806 if not expression: 1807 if self._match(TokenType.COMMAND): 1808 expression = self._parse_as_command(self._prev) 1809 else: 1810 begin = self._match(TokenType.BEGIN) 1811 return_ = self._match_text_seq("RETURN") 1812 1813 if self._match(TokenType.STRING, advance=False): 1814 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1815 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1816 expression = self._parse_string() 1817 extend_props(self._parse_properties()) 1818 else: 1819 expression = self._parse_statement() 1820 1821 end = self._match_text_seq("END") 1822 1823 if return_: 1824 expression = self.expression(exp.Return, this=expression) 1825 elif create_token.token_type == TokenType.INDEX: 1826 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1827 if not self._match(TokenType.ON): 1828 index = self._parse_id_var() 1829 anonymous = False 1830 else: 1831 index = None 1832 anonymous = True 1833 1834 this = self._parse_index(index=index, anonymous=anonymous) 1835 elif create_token.token_type in self.DB_CREATABLES: 1836 table_parts = self._parse_table_parts( 1837 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1838 ) 1839 1840 # exp.Properties.Location.POST_NAME 1841 self._match(TokenType.COMMA) 1842 extend_props(self._parse_properties(before=True)) 1843 1844 this = self._parse_schema(this=table_parts) 1845 1846 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1847 extend_props(self._parse_properties()) 1848 1849 self._match(TokenType.ALIAS) 1850 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1851 # exp.Properties.Location.POST_ALIAS 1852 extend_props(self._parse_properties()) 1853 1854 if create_token.token_type == TokenType.SEQUENCE: 1855 expression = self._parse_types() 1856 extend_props(self._parse_properties()) 1857 else: 1858 expression = self._parse_ddl_select() 1859 1860 if create_token.token_type == TokenType.TABLE: 1861 # exp.Properties.Location.POST_EXPRESSION 1862 extend_props(self._parse_properties()) 1863 1864 indexes = [] 1865 while True: 1866 index = self._parse_index() 1867 1868 # exp.Properties.Location.POST_INDEX 1869 extend_props(self._parse_properties()) 1870 if not index: 1871 break 1872 else: 1873 self._match(TokenType.COMMA) 1874 indexes.append(index) 1875 elif create_token.token_type == TokenType.VIEW: 1876 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1877 no_schema_binding = True 1878 1879 shallow = self._match_text_seq("SHALLOW") 1880 1881 if self._match_texts(self.CLONE_KEYWORDS): 1882 copy = self._prev.text.lower() == "copy" 1883 clone = self.expression( 1884 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1885 ) 1886 1887 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1888 return self._parse_as_command(start) 1889 1890 create_kind_text = create_token.text.upper() 1891 return self.expression( 1892 exp.Create, 1893 comments=comments, 1894 this=this, 1895 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1896 replace=replace, 1897 refresh=refresh, 1898 unique=unique, 1899 expression=expression, 1900 exists=exists, 1901 properties=properties, 1902 indexes=indexes, 1903 no_schema_binding=no_schema_binding, 1904 begin=begin, 1905 end=end, 1906 clone=clone, 1907 concurrently=concurrently, 1908 clustered=clustered, 1909 ) 1910 1911 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1912 seq = exp.SequenceProperties() 1913 1914 options = [] 1915 index = self._index 1916 1917 while self._curr: 1918 self._match(TokenType.COMMA) 1919 if self._match_text_seq("INCREMENT"): 1920 self._match_text_seq("BY") 1921 self._match_text_seq("=") 1922 seq.set("increment", self._parse_term()) 1923 elif self._match_text_seq("MINVALUE"): 1924 seq.set("minvalue", self._parse_term()) 1925 elif self._match_text_seq("MAXVALUE"): 1926 seq.set("maxvalue", self._parse_term()) 1927 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1928 self._match_text_seq("=") 1929 seq.set("start", self._parse_term()) 1930 elif self._match_text_seq("CACHE"): 1931 # T-SQL allows empty CACHE which is initialized dynamically 1932 seq.set("cache", self._parse_number() or True) 1933 elif self._match_text_seq("OWNED", "BY"): 1934 # "OWNED BY NONE" is the default 1935 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1936 else: 1937 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1938 if opt: 1939 options.append(opt) 1940 else: 1941 break 1942 1943 seq.set("options", options if options else None) 1944 return None if self._index == index else seq 1945 1946 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1947 # only used for teradata currently 1948 self._match(TokenType.COMMA) 1949 1950 kwargs = { 1951 "no": self._match_text_seq("NO"), 1952 "dual": self._match_text_seq("DUAL"), 1953 "before": self._match_text_seq("BEFORE"), 1954 "default": self._match_text_seq("DEFAULT"), 1955 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1956 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1957 "after": self._match_text_seq("AFTER"), 1958 "minimum": self._match_texts(("MIN", "MINIMUM")), 1959 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1960 } 1961 1962 if self._match_texts(self.PROPERTY_PARSERS): 1963 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1964 try: 1965 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1966 except TypeError: 1967 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1968 1969 return None 1970 1971 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1972 return self._parse_wrapped_csv(self._parse_property) 1973 1974 def _parse_property(self) -> t.Optional[exp.Expression]: 1975 if self._match_texts(self.PROPERTY_PARSERS): 1976 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1977 1978 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1979 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1980 1981 if self._match_text_seq("COMPOUND", "SORTKEY"): 1982 return self._parse_sortkey(compound=True) 1983 1984 if self._match_text_seq("SQL", "SECURITY"): 1985 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1986 1987 index = self._index 1988 key = self._parse_column() 1989 1990 if not self._match(TokenType.EQ): 1991 self._retreat(index) 1992 return self._parse_sequence_properties() 1993 1994 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1995 if isinstance(key, exp.Column): 1996 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1997 1998 value = self._parse_bitwise() or self._parse_var(any_token=True) 1999 2000 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2001 if isinstance(value, exp.Column): 2002 value = exp.var(value.name) 2003 2004 return self.expression(exp.Property, this=key, value=value) 2005 2006 def _parse_stored(self) -> exp.FileFormatProperty: 2007 self._match(TokenType.ALIAS) 2008 2009 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2010 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2011 2012 return self.expression( 2013 exp.FileFormatProperty, 2014 this=( 2015 self.expression( 2016 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2017 ) 2018 if input_format or output_format 2019 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2020 ), 2021 ) 2022 2023 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2024 field = self._parse_field() 2025 if isinstance(field, exp.Identifier) and not field.quoted: 2026 field = exp.var(field) 2027 2028 return field 2029 2030 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2031 self._match(TokenType.EQ) 2032 self._match(TokenType.ALIAS) 2033 2034 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2035 2036 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2037 properties = [] 2038 while True: 2039 if before: 2040 prop = self._parse_property_before() 2041 else: 2042 prop = self._parse_property() 2043 if not prop: 2044 break 2045 for p in ensure_list(prop): 2046 properties.append(p) 2047 2048 if properties: 2049 return self.expression(exp.Properties, expressions=properties) 2050 2051 return None 2052 2053 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2054 return self.expression( 2055 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2056 ) 2057 2058 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2059 if self._match_texts(("DEFINER", "INVOKER")): 2060 security_specifier = self._prev.text.upper() 2061 return self.expression(exp.SecurityProperty, this=security_specifier) 2062 return None 2063 2064 def _parse_settings_property(self) -> exp.SettingsProperty: 2065 return self.expression( 2066 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2067 ) 2068 2069 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2070 if self._index >= 2: 2071 pre_volatile_token = self._tokens[self._index - 2] 2072 else: 2073 pre_volatile_token = None 2074 2075 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2076 return exp.VolatileProperty() 2077 2078 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2079 2080 def _parse_retention_period(self) -> exp.Var: 2081 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2082 number = self._parse_number() 2083 number_str = f"{number} " if number else "" 2084 unit = self._parse_var(any_token=True) 2085 return exp.var(f"{number_str}{unit}") 2086 2087 def _parse_system_versioning_property( 2088 self, with_: bool = False 2089 ) -> exp.WithSystemVersioningProperty: 2090 self._match(TokenType.EQ) 2091 prop = self.expression( 2092 exp.WithSystemVersioningProperty, 2093 **{ # type: ignore 2094 "on": True, 2095 "with": with_, 2096 }, 2097 ) 2098 2099 if self._match_text_seq("OFF"): 2100 prop.set("on", False) 2101 return prop 2102 2103 self._match(TokenType.ON) 2104 if self._match(TokenType.L_PAREN): 2105 while self._curr and not self._match(TokenType.R_PAREN): 2106 if self._match_text_seq("HISTORY_TABLE", "="): 2107 prop.set("this", self._parse_table_parts()) 2108 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2109 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2110 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2111 prop.set("retention_period", self._parse_retention_period()) 2112 2113 self._match(TokenType.COMMA) 2114 2115 return prop 2116 2117 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2118 self._match(TokenType.EQ) 2119 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2120 prop = self.expression(exp.DataDeletionProperty, on=on) 2121 2122 if self._match(TokenType.L_PAREN): 2123 while self._curr and not self._match(TokenType.R_PAREN): 2124 if self._match_text_seq("FILTER_COLUMN", "="): 2125 prop.set("filter_column", self._parse_column()) 2126 elif self._match_text_seq("RETENTION_PERIOD", "="): 2127 prop.set("retention_period", self._parse_retention_period()) 2128 2129 self._match(TokenType.COMMA) 2130 2131 return prop 2132 2133 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2134 kind = "HASH" 2135 expressions: t.Optional[t.List[exp.Expression]] = None 2136 if self._match_text_seq("BY", "HASH"): 2137 expressions = self._parse_wrapped_csv(self._parse_id_var) 2138 elif self._match_text_seq("BY", "RANDOM"): 2139 kind = "RANDOM" 2140 2141 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2142 buckets: t.Optional[exp.Expression] = None 2143 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2144 buckets = self._parse_number() 2145 2146 return self.expression( 2147 exp.DistributedByProperty, 2148 expressions=expressions, 2149 kind=kind, 2150 buckets=buckets, 2151 order=self._parse_order(), 2152 ) 2153 2154 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2155 self._match_text_seq("KEY") 2156 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2157 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2158 2159 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2160 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2161 prop = self._parse_system_versioning_property(with_=True) 2162 self._match_r_paren() 2163 return prop 2164 2165 if self._match(TokenType.L_PAREN, advance=False): 2166 return self._parse_wrapped_properties() 2167 2168 if self._match_text_seq("JOURNAL"): 2169 return self._parse_withjournaltable() 2170 2171 if self._match_texts(self.VIEW_ATTRIBUTES): 2172 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2173 2174 if self._match_text_seq("DATA"): 2175 return self._parse_withdata(no=False) 2176 elif self._match_text_seq("NO", "DATA"): 2177 return self._parse_withdata(no=True) 2178 2179 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2180 return self._parse_serde_properties(with_=True) 2181 2182 if self._match(TokenType.SCHEMA): 2183 return self.expression( 2184 exp.WithSchemaBindingProperty, 2185 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2186 ) 2187 2188 if not self._next: 2189 return None 2190 2191 return self._parse_withisolatedloading() 2192 2193 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2194 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2195 self._match(TokenType.EQ) 2196 2197 user = self._parse_id_var() 2198 self._match(TokenType.PARAMETER) 2199 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2200 2201 if not user or not host: 2202 return None 2203 2204 return exp.DefinerProperty(this=f"{user}@{host}") 2205 2206 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2207 self._match(TokenType.TABLE) 2208 self._match(TokenType.EQ) 2209 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2210 2211 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2212 return self.expression(exp.LogProperty, no=no) 2213 2214 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2215 return self.expression(exp.JournalProperty, **kwargs) 2216 2217 def _parse_checksum(self) -> exp.ChecksumProperty: 2218 self._match(TokenType.EQ) 2219 2220 on = None 2221 if self._match(TokenType.ON): 2222 on = True 2223 elif self._match_text_seq("OFF"): 2224 on = False 2225 2226 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2227 2228 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2229 return self.expression( 2230 exp.Cluster, 2231 expressions=( 2232 self._parse_wrapped_csv(self._parse_ordered) 2233 if wrapped 2234 else self._parse_csv(self._parse_ordered) 2235 ), 2236 ) 2237 2238 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2239 self._match_text_seq("BY") 2240 2241 self._match_l_paren() 2242 expressions = self._parse_csv(self._parse_column) 2243 self._match_r_paren() 2244 2245 if self._match_text_seq("SORTED", "BY"): 2246 self._match_l_paren() 2247 sorted_by = self._parse_csv(self._parse_ordered) 2248 self._match_r_paren() 2249 else: 2250 sorted_by = None 2251 2252 self._match(TokenType.INTO) 2253 buckets = self._parse_number() 2254 self._match_text_seq("BUCKETS") 2255 2256 return self.expression( 2257 exp.ClusteredByProperty, 2258 expressions=expressions, 2259 sorted_by=sorted_by, 2260 buckets=buckets, 2261 ) 2262 2263 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2264 if not self._match_text_seq("GRANTS"): 2265 self._retreat(self._index - 1) 2266 return None 2267 2268 return self.expression(exp.CopyGrantsProperty) 2269 2270 def _parse_freespace(self) -> exp.FreespaceProperty: 2271 self._match(TokenType.EQ) 2272 return self.expression( 2273 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2274 ) 2275 2276 def _parse_mergeblockratio( 2277 self, no: bool = False, default: bool = False 2278 ) -> exp.MergeBlockRatioProperty: 2279 if self._match(TokenType.EQ): 2280 return self.expression( 2281 exp.MergeBlockRatioProperty, 2282 this=self._parse_number(), 2283 percent=self._match(TokenType.PERCENT), 2284 ) 2285 2286 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2287 2288 def _parse_datablocksize( 2289 self, 2290 default: t.Optional[bool] = None, 2291 minimum: t.Optional[bool] = None, 2292 maximum: t.Optional[bool] = None, 2293 ) -> exp.DataBlocksizeProperty: 2294 self._match(TokenType.EQ) 2295 size = self._parse_number() 2296 2297 units = None 2298 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2299 units = self._prev.text 2300 2301 return self.expression( 2302 exp.DataBlocksizeProperty, 2303 size=size, 2304 units=units, 2305 default=default, 2306 minimum=minimum, 2307 maximum=maximum, 2308 ) 2309 2310 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2311 self._match(TokenType.EQ) 2312 always = self._match_text_seq("ALWAYS") 2313 manual = self._match_text_seq("MANUAL") 2314 never = self._match_text_seq("NEVER") 2315 default = self._match_text_seq("DEFAULT") 2316 2317 autotemp = None 2318 if self._match_text_seq("AUTOTEMP"): 2319 autotemp = self._parse_schema() 2320 2321 return self.expression( 2322 exp.BlockCompressionProperty, 2323 always=always, 2324 manual=manual, 2325 never=never, 2326 default=default, 2327 autotemp=autotemp, 2328 ) 2329 2330 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2331 index = self._index 2332 no = self._match_text_seq("NO") 2333 concurrent = self._match_text_seq("CONCURRENT") 2334 2335 if not self._match_text_seq("ISOLATED", "LOADING"): 2336 self._retreat(index) 2337 return None 2338 2339 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2340 return self.expression( 2341 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2342 ) 2343 2344 def _parse_locking(self) -> exp.LockingProperty: 2345 if self._match(TokenType.TABLE): 2346 kind = "TABLE" 2347 elif self._match(TokenType.VIEW): 2348 kind = "VIEW" 2349 elif self._match(TokenType.ROW): 2350 kind = "ROW" 2351 elif self._match_text_seq("DATABASE"): 2352 kind = "DATABASE" 2353 else: 2354 kind = None 2355 2356 if kind in ("DATABASE", "TABLE", "VIEW"): 2357 this = self._parse_table_parts() 2358 else: 2359 this = None 2360 2361 if self._match(TokenType.FOR): 2362 for_or_in = "FOR" 2363 elif self._match(TokenType.IN): 2364 for_or_in = "IN" 2365 else: 2366 for_or_in = None 2367 2368 if self._match_text_seq("ACCESS"): 2369 lock_type = "ACCESS" 2370 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2371 lock_type = "EXCLUSIVE" 2372 elif self._match_text_seq("SHARE"): 2373 lock_type = "SHARE" 2374 elif self._match_text_seq("READ"): 2375 lock_type = "READ" 2376 elif self._match_text_seq("WRITE"): 2377 lock_type = "WRITE" 2378 elif self._match_text_seq("CHECKSUM"): 2379 lock_type = "CHECKSUM" 2380 else: 2381 lock_type = None 2382 2383 override = self._match_text_seq("OVERRIDE") 2384 2385 return self.expression( 2386 exp.LockingProperty, 2387 this=this, 2388 kind=kind, 2389 for_or_in=for_or_in, 2390 lock_type=lock_type, 2391 override=override, 2392 ) 2393 2394 def _parse_partition_by(self) -> t.List[exp.Expression]: 2395 if self._match(TokenType.PARTITION_BY): 2396 return self._parse_csv(self._parse_assignment) 2397 return [] 2398 2399 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2400 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2401 if self._match_text_seq("MINVALUE"): 2402 return exp.var("MINVALUE") 2403 if self._match_text_seq("MAXVALUE"): 2404 return exp.var("MAXVALUE") 2405 return self._parse_bitwise() 2406 2407 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2408 expression = None 2409 from_expressions = None 2410 to_expressions = None 2411 2412 if self._match(TokenType.IN): 2413 this = self._parse_wrapped_csv(self._parse_bitwise) 2414 elif self._match(TokenType.FROM): 2415 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2416 self._match_text_seq("TO") 2417 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2418 elif self._match_text_seq("WITH", "(", "MODULUS"): 2419 this = self._parse_number() 2420 self._match_text_seq(",", "REMAINDER") 2421 expression = self._parse_number() 2422 self._match_r_paren() 2423 else: 2424 self.raise_error("Failed to parse partition bound spec.") 2425 2426 return self.expression( 2427 exp.PartitionBoundSpec, 2428 this=this, 2429 expression=expression, 2430 from_expressions=from_expressions, 2431 to_expressions=to_expressions, 2432 ) 2433 2434 # https://www.postgresql.org/docs/current/sql-createtable.html 2435 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2436 if not self._match_text_seq("OF"): 2437 self._retreat(self._index - 1) 2438 return None 2439 2440 this = self._parse_table(schema=True) 2441 2442 if self._match(TokenType.DEFAULT): 2443 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2444 elif self._match_text_seq("FOR", "VALUES"): 2445 expression = self._parse_partition_bound_spec() 2446 else: 2447 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2448 2449 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2450 2451 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2452 self._match(TokenType.EQ) 2453 return self.expression( 2454 exp.PartitionedByProperty, 2455 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2456 ) 2457 2458 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2459 if self._match_text_seq("AND", "STATISTICS"): 2460 statistics = True 2461 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2462 statistics = False 2463 else: 2464 statistics = None 2465 2466 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2467 2468 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2469 if self._match_text_seq("SQL"): 2470 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2471 return None 2472 2473 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2474 if self._match_text_seq("SQL", "DATA"): 2475 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2476 return None 2477 2478 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2479 if self._match_text_seq("PRIMARY", "INDEX"): 2480 return exp.NoPrimaryIndexProperty() 2481 if self._match_text_seq("SQL"): 2482 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2483 return None 2484 2485 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2486 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2487 return exp.OnCommitProperty() 2488 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2489 return exp.OnCommitProperty(delete=True) 2490 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2491 2492 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2493 if self._match_text_seq("SQL", "DATA"): 2494 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2495 return None 2496 2497 def _parse_distkey(self) -> exp.DistKeyProperty: 2498 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2499 2500 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2501 table = self._parse_table(schema=True) 2502 2503 options = [] 2504 while self._match_texts(("INCLUDING", "EXCLUDING")): 2505 this = self._prev.text.upper() 2506 2507 id_var = self._parse_id_var() 2508 if not id_var: 2509 return None 2510 2511 options.append( 2512 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2513 ) 2514 2515 return self.expression(exp.LikeProperty, this=table, expressions=options) 2516 2517 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2518 return self.expression( 2519 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2520 ) 2521 2522 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2523 self._match(TokenType.EQ) 2524 return self.expression( 2525 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2526 ) 2527 2528 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2529 self._match_text_seq("WITH", "CONNECTION") 2530 return self.expression( 2531 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2532 ) 2533 2534 def _parse_returns(self) -> exp.ReturnsProperty: 2535 value: t.Optional[exp.Expression] 2536 null = None 2537 is_table = self._match(TokenType.TABLE) 2538 2539 if is_table: 2540 if self._match(TokenType.LT): 2541 value = self.expression( 2542 exp.Schema, 2543 this="TABLE", 2544 expressions=self._parse_csv(self._parse_struct_types), 2545 ) 2546 if not self._match(TokenType.GT): 2547 self.raise_error("Expecting >") 2548 else: 2549 value = self._parse_schema(exp.var("TABLE")) 2550 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2551 null = True 2552 value = None 2553 else: 2554 value = self._parse_types() 2555 2556 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2557 2558 def _parse_describe(self) -> exp.Describe: 2559 kind = self._match_set(self.CREATABLES) and self._prev.text 2560 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2561 if self._match(TokenType.DOT): 2562 style = None 2563 self._retreat(self._index - 2) 2564 this = self._parse_table(schema=True) 2565 properties = self._parse_properties() 2566 expressions = properties.expressions if properties else None 2567 partition = self._parse_partition() 2568 return self.expression( 2569 exp.Describe, 2570 this=this, 2571 style=style, 2572 kind=kind, 2573 expressions=expressions, 2574 partition=partition, 2575 ) 2576 2577 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2578 kind = self._prev.text.upper() 2579 expressions = [] 2580 2581 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2582 if self._match(TokenType.WHEN): 2583 expression = self._parse_disjunction() 2584 self._match(TokenType.THEN) 2585 else: 2586 expression = None 2587 2588 else_ = self._match(TokenType.ELSE) 2589 2590 if not self._match(TokenType.INTO): 2591 return None 2592 2593 return self.expression( 2594 exp.ConditionalInsert, 2595 this=self.expression( 2596 exp.Insert, 2597 this=self._parse_table(schema=True), 2598 expression=self._parse_derived_table_values(), 2599 ), 2600 expression=expression, 2601 else_=else_, 2602 ) 2603 2604 expression = parse_conditional_insert() 2605 while expression is not None: 2606 expressions.append(expression) 2607 expression = parse_conditional_insert() 2608 2609 return self.expression( 2610 exp.MultitableInserts, 2611 kind=kind, 2612 comments=comments, 2613 expressions=expressions, 2614 source=self._parse_table(), 2615 ) 2616 2617 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2618 comments = ensure_list(self._prev_comments) 2619 hint = self._parse_hint() 2620 overwrite = self._match(TokenType.OVERWRITE) 2621 ignore = self._match(TokenType.IGNORE) 2622 local = self._match_text_seq("LOCAL") 2623 alternative = None 2624 is_function = None 2625 2626 if self._match_text_seq("DIRECTORY"): 2627 this: t.Optional[exp.Expression] = self.expression( 2628 exp.Directory, 2629 this=self._parse_var_or_string(), 2630 local=local, 2631 row_format=self._parse_row_format(match_row=True), 2632 ) 2633 else: 2634 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2635 comments += ensure_list(self._prev_comments) 2636 return self._parse_multitable_inserts(comments) 2637 2638 if self._match(TokenType.OR): 2639 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2640 2641 self._match(TokenType.INTO) 2642 comments += ensure_list(self._prev_comments) 2643 self._match(TokenType.TABLE) 2644 is_function = self._match(TokenType.FUNCTION) 2645 2646 this = ( 2647 self._parse_table(schema=True, parse_partition=True) 2648 if not is_function 2649 else self._parse_function() 2650 ) 2651 2652 returning = self._parse_returning() 2653 2654 return self.expression( 2655 exp.Insert, 2656 comments=comments, 2657 hint=hint, 2658 is_function=is_function, 2659 this=this, 2660 stored=self._match_text_seq("STORED") and self._parse_stored(), 2661 by_name=self._match_text_seq("BY", "NAME"), 2662 exists=self._parse_exists(), 2663 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2664 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2665 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2666 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2667 conflict=self._parse_on_conflict(), 2668 returning=returning or self._parse_returning(), 2669 overwrite=overwrite, 2670 alternative=alternative, 2671 ignore=ignore, 2672 source=self._match(TokenType.TABLE) and self._parse_table(), 2673 ) 2674 2675 def _parse_kill(self) -> exp.Kill: 2676 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2677 2678 return self.expression( 2679 exp.Kill, 2680 this=self._parse_primary(), 2681 kind=kind, 2682 ) 2683 2684 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2685 conflict = self._match_text_seq("ON", "CONFLICT") 2686 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2687 2688 if not conflict and not duplicate: 2689 return None 2690 2691 conflict_keys = None 2692 constraint = None 2693 2694 if conflict: 2695 if self._match_text_seq("ON", "CONSTRAINT"): 2696 constraint = self._parse_id_var() 2697 elif self._match(TokenType.L_PAREN): 2698 conflict_keys = self._parse_csv(self._parse_id_var) 2699 self._match_r_paren() 2700 2701 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2702 if self._prev.token_type == TokenType.UPDATE: 2703 self._match(TokenType.SET) 2704 expressions = self._parse_csv(self._parse_equality) 2705 else: 2706 expressions = None 2707 2708 return self.expression( 2709 exp.OnConflict, 2710 duplicate=duplicate, 2711 expressions=expressions, 2712 action=action, 2713 conflict_keys=conflict_keys, 2714 constraint=constraint, 2715 ) 2716 2717 def _parse_returning(self) -> t.Optional[exp.Returning]: 2718 if not self._match(TokenType.RETURNING): 2719 return None 2720 return self.expression( 2721 exp.Returning, 2722 expressions=self._parse_csv(self._parse_expression), 2723 into=self._match(TokenType.INTO) and self._parse_table_part(), 2724 ) 2725 2726 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2727 if not self._match(TokenType.FORMAT): 2728 return None 2729 return self._parse_row_format() 2730 2731 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2732 index = self._index 2733 with_ = with_ or self._match_text_seq("WITH") 2734 2735 if not self._match(TokenType.SERDE_PROPERTIES): 2736 self._retreat(index) 2737 return None 2738 return self.expression( 2739 exp.SerdeProperties, 2740 **{ # type: ignore 2741 "expressions": self._parse_wrapped_properties(), 2742 "with": with_, 2743 }, 2744 ) 2745 2746 def _parse_row_format( 2747 self, match_row: bool = False 2748 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2749 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2750 return None 2751 2752 if self._match_text_seq("SERDE"): 2753 this = self._parse_string() 2754 2755 serde_properties = self._parse_serde_properties() 2756 2757 return self.expression( 2758 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2759 ) 2760 2761 self._match_text_seq("DELIMITED") 2762 2763 kwargs = {} 2764 2765 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2766 kwargs["fields"] = self._parse_string() 2767 if self._match_text_seq("ESCAPED", "BY"): 2768 kwargs["escaped"] = self._parse_string() 2769 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2770 kwargs["collection_items"] = self._parse_string() 2771 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2772 kwargs["map_keys"] = self._parse_string() 2773 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2774 kwargs["lines"] = self._parse_string() 2775 if self._match_text_seq("NULL", "DEFINED", "AS"): 2776 kwargs["null"] = self._parse_string() 2777 2778 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2779 2780 def _parse_load(self) -> exp.LoadData | exp.Command: 2781 if self._match_text_seq("DATA"): 2782 local = self._match_text_seq("LOCAL") 2783 self._match_text_seq("INPATH") 2784 inpath = self._parse_string() 2785 overwrite = self._match(TokenType.OVERWRITE) 2786 self._match_pair(TokenType.INTO, TokenType.TABLE) 2787 2788 return self.expression( 2789 exp.LoadData, 2790 this=self._parse_table(schema=True), 2791 local=local, 2792 overwrite=overwrite, 2793 inpath=inpath, 2794 partition=self._parse_partition(), 2795 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2796 serde=self._match_text_seq("SERDE") and self._parse_string(), 2797 ) 2798 return self._parse_as_command(self._prev) 2799 2800 def _parse_delete(self) -> exp.Delete: 2801 # This handles MySQL's "Multiple-Table Syntax" 2802 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2803 tables = None 2804 comments = self._prev_comments 2805 if not self._match(TokenType.FROM, advance=False): 2806 tables = self._parse_csv(self._parse_table) or None 2807 2808 returning = self._parse_returning() 2809 2810 return self.expression( 2811 exp.Delete, 2812 comments=comments, 2813 tables=tables, 2814 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2815 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2816 where=self._parse_where(), 2817 returning=returning or self._parse_returning(), 2818 limit=self._parse_limit(), 2819 ) 2820 2821 def _parse_update(self) -> exp.Update: 2822 comments = self._prev_comments 2823 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2824 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2825 returning = self._parse_returning() 2826 return self.expression( 2827 exp.Update, 2828 comments=comments, 2829 **{ # type: ignore 2830 "this": this, 2831 "expressions": expressions, 2832 "from": self._parse_from(joins=True), 2833 "where": self._parse_where(), 2834 "returning": returning or self._parse_returning(), 2835 "order": self._parse_order(), 2836 "limit": self._parse_limit(), 2837 }, 2838 ) 2839 2840 def _parse_uncache(self) -> exp.Uncache: 2841 if not self._match(TokenType.TABLE): 2842 self.raise_error("Expecting TABLE after UNCACHE") 2843 2844 return self.expression( 2845 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2846 ) 2847 2848 def _parse_cache(self) -> exp.Cache: 2849 lazy = self._match_text_seq("LAZY") 2850 self._match(TokenType.TABLE) 2851 table = self._parse_table(schema=True) 2852 2853 options = [] 2854 if self._match_text_seq("OPTIONS"): 2855 self._match_l_paren() 2856 k = self._parse_string() 2857 self._match(TokenType.EQ) 2858 v = self._parse_string() 2859 options = [k, v] 2860 self._match_r_paren() 2861 2862 self._match(TokenType.ALIAS) 2863 return self.expression( 2864 exp.Cache, 2865 this=table, 2866 lazy=lazy, 2867 options=options, 2868 expression=self._parse_select(nested=True), 2869 ) 2870 2871 def _parse_partition(self) -> t.Optional[exp.Partition]: 2872 if not self._match(TokenType.PARTITION): 2873 return None 2874 2875 return self.expression( 2876 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2877 ) 2878 2879 def _parse_value(self) -> t.Optional[exp.Tuple]: 2880 if self._match(TokenType.L_PAREN): 2881 expressions = self._parse_csv(self._parse_expression) 2882 self._match_r_paren() 2883 return self.expression(exp.Tuple, expressions=expressions) 2884 2885 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2886 expression = self._parse_expression() 2887 if expression: 2888 return self.expression(exp.Tuple, expressions=[expression]) 2889 return None 2890 2891 def _parse_projections(self) -> t.List[exp.Expression]: 2892 return self._parse_expressions() 2893 2894 def _parse_select( 2895 self, 2896 nested: bool = False, 2897 table: bool = False, 2898 parse_subquery_alias: bool = True, 2899 parse_set_operation: bool = True, 2900 ) -> t.Optional[exp.Expression]: 2901 cte = self._parse_with() 2902 2903 if cte: 2904 this = self._parse_statement() 2905 2906 if not this: 2907 self.raise_error("Failed to parse any statement following CTE") 2908 return cte 2909 2910 if "with" in this.arg_types: 2911 this.set("with", cte) 2912 else: 2913 self.raise_error(f"{this.key} does not support CTE") 2914 this = cte 2915 2916 return this 2917 2918 # duckdb supports leading with FROM x 2919 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2920 2921 if self._match(TokenType.SELECT): 2922 comments = self._prev_comments 2923 2924 hint = self._parse_hint() 2925 2926 if self._next and not self._next.token_type == TokenType.DOT: 2927 all_ = self._match(TokenType.ALL) 2928 distinct = self._match_set(self.DISTINCT_TOKENS) 2929 else: 2930 all_, distinct = None, None 2931 2932 kind = ( 2933 self._match(TokenType.ALIAS) 2934 and self._match_texts(("STRUCT", "VALUE")) 2935 and self._prev.text.upper() 2936 ) 2937 2938 if distinct: 2939 distinct = self.expression( 2940 exp.Distinct, 2941 on=self._parse_value() if self._match(TokenType.ON) else None, 2942 ) 2943 2944 if all_ and distinct: 2945 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2946 2947 limit = self._parse_limit(top=True) 2948 projections = self._parse_projections() 2949 2950 this = self.expression( 2951 exp.Select, 2952 kind=kind, 2953 hint=hint, 2954 distinct=distinct, 2955 expressions=projections, 2956 limit=limit, 2957 ) 2958 this.comments = comments 2959 2960 into = self._parse_into() 2961 if into: 2962 this.set("into", into) 2963 2964 if not from_: 2965 from_ = self._parse_from() 2966 2967 if from_: 2968 this.set("from", from_) 2969 2970 this = self._parse_query_modifiers(this) 2971 elif (table or nested) and self._match(TokenType.L_PAREN): 2972 if self._match(TokenType.PIVOT): 2973 this = self._parse_simplified_pivot() 2974 elif self._match(TokenType.FROM): 2975 this = exp.select("*").from_( 2976 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2977 ) 2978 else: 2979 this = ( 2980 self._parse_table() 2981 if table 2982 else self._parse_select(nested=True, parse_set_operation=False) 2983 ) 2984 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2985 2986 self._match_r_paren() 2987 2988 # We return early here so that the UNION isn't attached to the subquery by the 2989 # following call to _parse_set_operations, but instead becomes the parent node 2990 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2991 elif self._match(TokenType.VALUES, advance=False): 2992 this = self._parse_derived_table_values() 2993 elif from_: 2994 this = exp.select("*").from_(from_.this, copy=False) 2995 elif self._match(TokenType.SUMMARIZE): 2996 table = self._match(TokenType.TABLE) 2997 this = self._parse_select() or self._parse_string() or self._parse_table() 2998 return self.expression(exp.Summarize, this=this, table=table) 2999 elif self._match(TokenType.DESCRIBE): 3000 this = self._parse_describe() 3001 elif self._match_text_seq("STREAM"): 3002 this = self.expression(exp.Stream, this=self._parse_function()) 3003 else: 3004 this = None 3005 3006 return self._parse_set_operations(this) if parse_set_operation else this 3007 3008 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3009 if not skip_with_token and not self._match(TokenType.WITH): 3010 return None 3011 3012 comments = self._prev_comments 3013 recursive = self._match(TokenType.RECURSIVE) 3014 3015 expressions = [] 3016 while True: 3017 expressions.append(self._parse_cte()) 3018 3019 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3020 break 3021 else: 3022 self._match(TokenType.WITH) 3023 3024 return self.expression( 3025 exp.With, comments=comments, expressions=expressions, recursive=recursive 3026 ) 3027 3028 def _parse_cte(self) -> exp.CTE: 3029 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3030 if not alias or not alias.this: 3031 self.raise_error("Expected CTE to have alias") 3032 3033 self._match(TokenType.ALIAS) 3034 comments = self._prev_comments 3035 3036 if self._match_text_seq("NOT", "MATERIALIZED"): 3037 materialized = False 3038 elif self._match_text_seq("MATERIALIZED"): 3039 materialized = True 3040 else: 3041 materialized = None 3042 3043 return self.expression( 3044 exp.CTE, 3045 this=self._parse_wrapped(self._parse_statement), 3046 alias=alias, 3047 materialized=materialized, 3048 comments=comments, 3049 ) 3050 3051 def _parse_table_alias( 3052 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3053 ) -> t.Optional[exp.TableAlias]: 3054 any_token = self._match(TokenType.ALIAS) 3055 alias = ( 3056 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3057 or self._parse_string_as_identifier() 3058 ) 3059 3060 index = self._index 3061 if self._match(TokenType.L_PAREN): 3062 columns = self._parse_csv(self._parse_function_parameter) 3063 self._match_r_paren() if columns else self._retreat(index) 3064 else: 3065 columns = None 3066 3067 if not alias and not columns: 3068 return None 3069 3070 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3071 3072 # We bubble up comments from the Identifier to the TableAlias 3073 if isinstance(alias, exp.Identifier): 3074 table_alias.add_comments(alias.pop_comments()) 3075 3076 return table_alias 3077 3078 def _parse_subquery( 3079 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3080 ) -> t.Optional[exp.Subquery]: 3081 if not this: 3082 return None 3083 3084 return self.expression( 3085 exp.Subquery, 3086 this=this, 3087 pivots=self._parse_pivots(), 3088 alias=self._parse_table_alias() if parse_alias else None, 3089 sample=self._parse_table_sample(), 3090 ) 3091 3092 def _implicit_unnests_to_explicit(self, this: E) -> E: 3093 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3094 3095 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3096 for i, join in enumerate(this.args.get("joins") or []): 3097 table = join.this 3098 normalized_table = table.copy() 3099 normalized_table.meta["maybe_column"] = True 3100 normalized_table = _norm(normalized_table, dialect=self.dialect) 3101 3102 if isinstance(table, exp.Table) and not join.args.get("on"): 3103 if normalized_table.parts[0].name in refs: 3104 table_as_column = table.to_column() 3105 unnest = exp.Unnest(expressions=[table_as_column]) 3106 3107 # Table.to_column creates a parent Alias node that we want to convert to 3108 # a TableAlias and attach to the Unnest, so it matches the parser's output 3109 if isinstance(table.args.get("alias"), exp.TableAlias): 3110 table_as_column.replace(table_as_column.this) 3111 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3112 3113 table.replace(unnest) 3114 3115 refs.add(normalized_table.alias_or_name) 3116 3117 return this 3118 3119 def _parse_query_modifiers( 3120 self, this: t.Optional[exp.Expression] 3121 ) -> t.Optional[exp.Expression]: 3122 if isinstance(this, (exp.Query, exp.Table)): 3123 for join in self._parse_joins(): 3124 this.append("joins", join) 3125 for lateral in iter(self._parse_lateral, None): 3126 this.append("laterals", lateral) 3127 3128 while True: 3129 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3130 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3131 key, expression = parser(self) 3132 3133 if expression: 3134 this.set(key, expression) 3135 if key == "limit": 3136 offset = expression.args.pop("offset", None) 3137 3138 if offset: 3139 offset = exp.Offset(expression=offset) 3140 this.set("offset", offset) 3141 3142 limit_by_expressions = expression.expressions 3143 expression.set("expressions", None) 3144 offset.set("expressions", limit_by_expressions) 3145 continue 3146 break 3147 3148 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3149 this = self._implicit_unnests_to_explicit(this) 3150 3151 return this 3152 3153 def _parse_hint(self) -> t.Optional[exp.Hint]: 3154 if self._match(TokenType.HINT): 3155 hints = [] 3156 for hint in iter( 3157 lambda: self._parse_csv( 3158 lambda: self._parse_function() or self._parse_var(upper=True) 3159 ), 3160 [], 3161 ): 3162 hints.extend(hint) 3163 3164 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3165 self.raise_error("Expected */ after HINT") 3166 3167 return self.expression(exp.Hint, expressions=hints) 3168 3169 return None 3170 3171 def _parse_into(self) -> t.Optional[exp.Into]: 3172 if not self._match(TokenType.INTO): 3173 return None 3174 3175 temp = self._match(TokenType.TEMPORARY) 3176 unlogged = self._match_text_seq("UNLOGGED") 3177 self._match(TokenType.TABLE) 3178 3179 return self.expression( 3180 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3181 ) 3182 3183 def _parse_from( 3184 self, joins: bool = False, skip_from_token: bool = False 3185 ) -> t.Optional[exp.From]: 3186 if not skip_from_token and not self._match(TokenType.FROM): 3187 return None 3188 3189 return self.expression( 3190 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3191 ) 3192 3193 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3194 return self.expression( 3195 exp.MatchRecognizeMeasure, 3196 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3197 this=self._parse_expression(), 3198 ) 3199 3200 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3201 if not self._match(TokenType.MATCH_RECOGNIZE): 3202 return None 3203 3204 self._match_l_paren() 3205 3206 partition = self._parse_partition_by() 3207 order = self._parse_order() 3208 3209 measures = ( 3210 self._parse_csv(self._parse_match_recognize_measure) 3211 if self._match_text_seq("MEASURES") 3212 else None 3213 ) 3214 3215 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3216 rows = exp.var("ONE ROW PER MATCH") 3217 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3218 text = "ALL ROWS PER MATCH" 3219 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3220 text += " SHOW EMPTY MATCHES" 3221 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3222 text += " OMIT EMPTY MATCHES" 3223 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3224 text += " WITH UNMATCHED ROWS" 3225 rows = exp.var(text) 3226 else: 3227 rows = None 3228 3229 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3230 text = "AFTER MATCH SKIP" 3231 if self._match_text_seq("PAST", "LAST", "ROW"): 3232 text += " PAST LAST ROW" 3233 elif self._match_text_seq("TO", "NEXT", "ROW"): 3234 text += " TO NEXT ROW" 3235 elif self._match_text_seq("TO", "FIRST"): 3236 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3237 elif self._match_text_seq("TO", "LAST"): 3238 text += f" TO LAST {self._advance_any().text}" # type: ignore 3239 after = exp.var(text) 3240 else: 3241 after = None 3242 3243 if self._match_text_seq("PATTERN"): 3244 self._match_l_paren() 3245 3246 if not self._curr: 3247 self.raise_error("Expecting )", self._curr) 3248 3249 paren = 1 3250 start = self._curr 3251 3252 while self._curr and paren > 0: 3253 if self._curr.token_type == TokenType.L_PAREN: 3254 paren += 1 3255 if self._curr.token_type == TokenType.R_PAREN: 3256 paren -= 1 3257 3258 end = self._prev 3259 self._advance() 3260 3261 if paren > 0: 3262 self.raise_error("Expecting )", self._curr) 3263 3264 pattern = exp.var(self._find_sql(start, end)) 3265 else: 3266 pattern = None 3267 3268 define = ( 3269 self._parse_csv(self._parse_name_as_expression) 3270 if self._match_text_seq("DEFINE") 3271 else None 3272 ) 3273 3274 self._match_r_paren() 3275 3276 return self.expression( 3277 exp.MatchRecognize, 3278 partition_by=partition, 3279 order=order, 3280 measures=measures, 3281 rows=rows, 3282 after=after, 3283 pattern=pattern, 3284 define=define, 3285 alias=self._parse_table_alias(), 3286 ) 3287 3288 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3289 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3290 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3291 cross_apply = False 3292 3293 if cross_apply is not None: 3294 this = self._parse_select(table=True) 3295 view = None 3296 outer = None 3297 elif self._match(TokenType.LATERAL): 3298 this = self._parse_select(table=True) 3299 view = self._match(TokenType.VIEW) 3300 outer = self._match(TokenType.OUTER) 3301 else: 3302 return None 3303 3304 if not this: 3305 this = ( 3306 self._parse_unnest() 3307 or self._parse_function() 3308 or self._parse_id_var(any_token=False) 3309 ) 3310 3311 while self._match(TokenType.DOT): 3312 this = exp.Dot( 3313 this=this, 3314 expression=self._parse_function() or self._parse_id_var(any_token=False), 3315 ) 3316 3317 if view: 3318 table = self._parse_id_var(any_token=False) 3319 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3320 table_alias: t.Optional[exp.TableAlias] = self.expression( 3321 exp.TableAlias, this=table, columns=columns 3322 ) 3323 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3324 # We move the alias from the lateral's child node to the lateral itself 3325 table_alias = this.args["alias"].pop() 3326 else: 3327 table_alias = self._parse_table_alias() 3328 3329 return self.expression( 3330 exp.Lateral, 3331 this=this, 3332 view=view, 3333 outer=outer, 3334 alias=table_alias, 3335 cross_apply=cross_apply, 3336 ) 3337 3338 def _parse_join_parts( 3339 self, 3340 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3341 return ( 3342 self._match_set(self.JOIN_METHODS) and self._prev, 3343 self._match_set(self.JOIN_SIDES) and self._prev, 3344 self._match_set(self.JOIN_KINDS) and self._prev, 3345 ) 3346 3347 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3348 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3349 this = self._parse_column() 3350 if isinstance(this, exp.Column): 3351 return this.this 3352 return this 3353 3354 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3355 3356 def _parse_join( 3357 self, skip_join_token: bool = False, parse_bracket: bool = False 3358 ) -> t.Optional[exp.Join]: 3359 if self._match(TokenType.COMMA): 3360 return self.expression(exp.Join, this=self._parse_table()) 3361 3362 index = self._index 3363 method, side, kind = self._parse_join_parts() 3364 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3365 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3366 3367 if not skip_join_token and not join: 3368 self._retreat(index) 3369 kind = None 3370 method = None 3371 side = None 3372 3373 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3374 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3375 3376 if not skip_join_token and not join and not outer_apply and not cross_apply: 3377 return None 3378 3379 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3380 3381 if method: 3382 kwargs["method"] = method.text 3383 if side: 3384 kwargs["side"] = side.text 3385 if kind: 3386 kwargs["kind"] = kind.text 3387 if hint: 3388 kwargs["hint"] = hint 3389 3390 if self._match(TokenType.MATCH_CONDITION): 3391 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3392 3393 if self._match(TokenType.ON): 3394 kwargs["on"] = self._parse_assignment() 3395 elif self._match(TokenType.USING): 3396 kwargs["using"] = self._parse_using_identifiers() 3397 elif ( 3398 not (outer_apply or cross_apply) 3399 and not isinstance(kwargs["this"], exp.Unnest) 3400 and not (kind and kind.token_type == TokenType.CROSS) 3401 ): 3402 index = self._index 3403 joins: t.Optional[list] = list(self._parse_joins()) 3404 3405 if joins and self._match(TokenType.ON): 3406 kwargs["on"] = self._parse_assignment() 3407 elif joins and self._match(TokenType.USING): 3408 kwargs["using"] = self._parse_using_identifiers() 3409 else: 3410 joins = None 3411 self._retreat(index) 3412 3413 kwargs["this"].set("joins", joins if joins else None) 3414 3415 comments = [c for token in (method, side, kind) if token for c in token.comments] 3416 return self.expression(exp.Join, comments=comments, **kwargs) 3417 3418 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3419 this = self._parse_assignment() 3420 3421 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3422 return this 3423 3424 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3425 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3426 3427 return this 3428 3429 def _parse_index_params(self) -> exp.IndexParameters: 3430 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3431 3432 if self._match(TokenType.L_PAREN, advance=False): 3433 columns = self._parse_wrapped_csv(self._parse_with_operator) 3434 else: 3435 columns = None 3436 3437 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3438 partition_by = self._parse_partition_by() 3439 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3440 tablespace = ( 3441 self._parse_var(any_token=True) 3442 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3443 else None 3444 ) 3445 where = self._parse_where() 3446 3447 on = self._parse_field() if self._match(TokenType.ON) else None 3448 3449 return self.expression( 3450 exp.IndexParameters, 3451 using=using, 3452 columns=columns, 3453 include=include, 3454 partition_by=partition_by, 3455 where=where, 3456 with_storage=with_storage, 3457 tablespace=tablespace, 3458 on=on, 3459 ) 3460 3461 def _parse_index( 3462 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3463 ) -> t.Optional[exp.Index]: 3464 if index or anonymous: 3465 unique = None 3466 primary = None 3467 amp = None 3468 3469 self._match(TokenType.ON) 3470 self._match(TokenType.TABLE) # hive 3471 table = self._parse_table_parts(schema=True) 3472 else: 3473 unique = self._match(TokenType.UNIQUE) 3474 primary = self._match_text_seq("PRIMARY") 3475 amp = self._match_text_seq("AMP") 3476 3477 if not self._match(TokenType.INDEX): 3478 return None 3479 3480 index = self._parse_id_var() 3481 table = None 3482 3483 params = self._parse_index_params() 3484 3485 return self.expression( 3486 exp.Index, 3487 this=index, 3488 table=table, 3489 unique=unique, 3490 primary=primary, 3491 amp=amp, 3492 params=params, 3493 ) 3494 3495 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3496 hints: t.List[exp.Expression] = [] 3497 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3498 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3499 hints.append( 3500 self.expression( 3501 exp.WithTableHint, 3502 expressions=self._parse_csv( 3503 lambda: self._parse_function() or self._parse_var(any_token=True) 3504 ), 3505 ) 3506 ) 3507 self._match_r_paren() 3508 else: 3509 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3510 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3511 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3512 3513 self._match_set((TokenType.INDEX, TokenType.KEY)) 3514 if self._match(TokenType.FOR): 3515 hint.set("target", self._advance_any() and self._prev.text.upper()) 3516 3517 hint.set("expressions", self._parse_wrapped_id_vars()) 3518 hints.append(hint) 3519 3520 return hints or None 3521 3522 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3523 return ( 3524 (not schema and self._parse_function(optional_parens=False)) 3525 or self._parse_id_var(any_token=False) 3526 or self._parse_string_as_identifier() 3527 or self._parse_placeholder() 3528 ) 3529 3530 def _parse_table_parts( 3531 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3532 ) -> exp.Table: 3533 catalog = None 3534 db = None 3535 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3536 3537 while self._match(TokenType.DOT): 3538 if catalog: 3539 # This allows nesting the table in arbitrarily many dot expressions if needed 3540 table = self.expression( 3541 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3542 ) 3543 else: 3544 catalog = db 3545 db = table 3546 # "" used for tsql FROM a..b case 3547 table = self._parse_table_part(schema=schema) or "" 3548 3549 if ( 3550 wildcard 3551 and self._is_connected() 3552 and (isinstance(table, exp.Identifier) or not table) 3553 and self._match(TokenType.STAR) 3554 ): 3555 if isinstance(table, exp.Identifier): 3556 table.args["this"] += "*" 3557 else: 3558 table = exp.Identifier(this="*") 3559 3560 # We bubble up comments from the Identifier to the Table 3561 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3562 3563 if is_db_reference: 3564 catalog = db 3565 db = table 3566 table = None 3567 3568 if not table and not is_db_reference: 3569 self.raise_error(f"Expected table name but got {self._curr}") 3570 if not db and is_db_reference: 3571 self.raise_error(f"Expected database name but got {self._curr}") 3572 3573 table = self.expression( 3574 exp.Table, 3575 comments=comments, 3576 this=table, 3577 db=db, 3578 catalog=catalog, 3579 ) 3580 3581 changes = self._parse_changes() 3582 if changes: 3583 table.set("changes", changes) 3584 3585 at_before = self._parse_historical_data() 3586 if at_before: 3587 table.set("when", at_before) 3588 3589 pivots = self._parse_pivots() 3590 if pivots: 3591 table.set("pivots", pivots) 3592 3593 return table 3594 3595 def _parse_table( 3596 self, 3597 schema: bool = False, 3598 joins: bool = False, 3599 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3600 parse_bracket: bool = False, 3601 is_db_reference: bool = False, 3602 parse_partition: bool = False, 3603 ) -> t.Optional[exp.Expression]: 3604 lateral = self._parse_lateral() 3605 if lateral: 3606 return lateral 3607 3608 unnest = self._parse_unnest() 3609 if unnest: 3610 return unnest 3611 3612 values = self._parse_derived_table_values() 3613 if values: 3614 return values 3615 3616 subquery = self._parse_select(table=True) 3617 if subquery: 3618 if not subquery.args.get("pivots"): 3619 subquery.set("pivots", self._parse_pivots()) 3620 return subquery 3621 3622 bracket = parse_bracket and self._parse_bracket(None) 3623 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3624 3625 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3626 self._parse_table 3627 ) 3628 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3629 3630 only = self._match(TokenType.ONLY) 3631 3632 this = t.cast( 3633 exp.Expression, 3634 bracket 3635 or rows_from 3636 or self._parse_bracket( 3637 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3638 ), 3639 ) 3640 3641 if only: 3642 this.set("only", only) 3643 3644 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3645 self._match_text_seq("*") 3646 3647 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3648 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3649 this.set("partition", self._parse_partition()) 3650 3651 if schema: 3652 return self._parse_schema(this=this) 3653 3654 version = self._parse_version() 3655 3656 if version: 3657 this.set("version", version) 3658 3659 if self.dialect.ALIAS_POST_TABLESAMPLE: 3660 this.set("sample", self._parse_table_sample()) 3661 3662 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3663 if alias: 3664 this.set("alias", alias) 3665 3666 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3667 return self.expression( 3668 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3669 ) 3670 3671 this.set("hints", self._parse_table_hints()) 3672 3673 if not this.args.get("pivots"): 3674 this.set("pivots", self._parse_pivots()) 3675 3676 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3677 this.set("sample", self._parse_table_sample()) 3678 3679 if joins: 3680 for join in self._parse_joins(): 3681 this.append("joins", join) 3682 3683 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3684 this.set("ordinality", True) 3685 this.set("alias", self._parse_table_alias()) 3686 3687 return this 3688 3689 def _parse_version(self) -> t.Optional[exp.Version]: 3690 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3691 this = "TIMESTAMP" 3692 elif self._match(TokenType.VERSION_SNAPSHOT): 3693 this = "VERSION" 3694 else: 3695 return None 3696 3697 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3698 kind = self._prev.text.upper() 3699 start = self._parse_bitwise() 3700 self._match_texts(("TO", "AND")) 3701 end = self._parse_bitwise() 3702 expression: t.Optional[exp.Expression] = self.expression( 3703 exp.Tuple, expressions=[start, end] 3704 ) 3705 elif self._match_text_seq("CONTAINED", "IN"): 3706 kind = "CONTAINED IN" 3707 expression = self.expression( 3708 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3709 ) 3710 elif self._match(TokenType.ALL): 3711 kind = "ALL" 3712 expression = None 3713 else: 3714 self._match_text_seq("AS", "OF") 3715 kind = "AS OF" 3716 expression = self._parse_type() 3717 3718 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3719 3720 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3721 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3722 index = self._index 3723 historical_data = None 3724 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3725 this = self._prev.text.upper() 3726 kind = ( 3727 self._match(TokenType.L_PAREN) 3728 and self._match_texts(self.HISTORICAL_DATA_KIND) 3729 and self._prev.text.upper() 3730 ) 3731 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3732 3733 if expression: 3734 self._match_r_paren() 3735 historical_data = self.expression( 3736 exp.HistoricalData, this=this, kind=kind, expression=expression 3737 ) 3738 else: 3739 self._retreat(index) 3740 3741 return historical_data 3742 3743 def _parse_changes(self) -> t.Optional[exp.Changes]: 3744 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3745 return None 3746 3747 information = self._parse_var(any_token=True) 3748 self._match_r_paren() 3749 3750 return self.expression( 3751 exp.Changes, 3752 information=information, 3753 at_before=self._parse_historical_data(), 3754 end=self._parse_historical_data(), 3755 ) 3756 3757 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3758 if not self._match(TokenType.UNNEST): 3759 return None 3760 3761 expressions = self._parse_wrapped_csv(self._parse_equality) 3762 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3763 3764 alias = self._parse_table_alias() if with_alias else None 3765 3766 if alias: 3767 if self.dialect.UNNEST_COLUMN_ONLY: 3768 if alias.args.get("columns"): 3769 self.raise_error("Unexpected extra column alias in unnest.") 3770 3771 alias.set("columns", [alias.this]) 3772 alias.set("this", None) 3773 3774 columns = alias.args.get("columns") or [] 3775 if offset and len(expressions) < len(columns): 3776 offset = columns.pop() 3777 3778 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3779 self._match(TokenType.ALIAS) 3780 offset = self._parse_id_var( 3781 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3782 ) or exp.to_identifier("offset") 3783 3784 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3785 3786 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3787 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3788 if not is_derived and not ( 3789 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3790 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3791 ): 3792 return None 3793 3794 expressions = self._parse_csv(self._parse_value) 3795 alias = self._parse_table_alias() 3796 3797 if is_derived: 3798 self._match_r_paren() 3799 3800 return self.expression( 3801 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3802 ) 3803 3804 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3805 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3806 as_modifier and self._match_text_seq("USING", "SAMPLE") 3807 ): 3808 return None 3809 3810 bucket_numerator = None 3811 bucket_denominator = None 3812 bucket_field = None 3813 percent = None 3814 size = None 3815 seed = None 3816 3817 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3818 matched_l_paren = self._match(TokenType.L_PAREN) 3819 3820 if self.TABLESAMPLE_CSV: 3821 num = None 3822 expressions = self._parse_csv(self._parse_primary) 3823 else: 3824 expressions = None 3825 num = ( 3826 self._parse_factor() 3827 if self._match(TokenType.NUMBER, advance=False) 3828 else self._parse_primary() or self._parse_placeholder() 3829 ) 3830 3831 if self._match_text_seq("BUCKET"): 3832 bucket_numerator = self._parse_number() 3833 self._match_text_seq("OUT", "OF") 3834 bucket_denominator = bucket_denominator = self._parse_number() 3835 self._match(TokenType.ON) 3836 bucket_field = self._parse_field() 3837 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3838 percent = num 3839 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3840 size = num 3841 else: 3842 percent = num 3843 3844 if matched_l_paren: 3845 self._match_r_paren() 3846 3847 if self._match(TokenType.L_PAREN): 3848 method = self._parse_var(upper=True) 3849 seed = self._match(TokenType.COMMA) and self._parse_number() 3850 self._match_r_paren() 3851 elif self._match_texts(("SEED", "REPEATABLE")): 3852 seed = self._parse_wrapped(self._parse_number) 3853 3854 if not method and self.DEFAULT_SAMPLING_METHOD: 3855 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3856 3857 return self.expression( 3858 exp.TableSample, 3859 expressions=expressions, 3860 method=method, 3861 bucket_numerator=bucket_numerator, 3862 bucket_denominator=bucket_denominator, 3863 bucket_field=bucket_field, 3864 percent=percent, 3865 size=size, 3866 seed=seed, 3867 ) 3868 3869 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3870 return list(iter(self._parse_pivot, None)) or None 3871 3872 def _parse_joins(self) -> t.Iterator[exp.Join]: 3873 return iter(self._parse_join, None) 3874 3875 # https://duckdb.org/docs/sql/statements/pivot 3876 def _parse_simplified_pivot(self) -> exp.Pivot: 3877 def _parse_on() -> t.Optional[exp.Expression]: 3878 this = self._parse_bitwise() 3879 return self._parse_in(this) if self._match(TokenType.IN) else this 3880 3881 this = self._parse_table() 3882 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3883 using = self._match(TokenType.USING) and self._parse_csv( 3884 lambda: self._parse_alias(self._parse_function()) 3885 ) 3886 group = self._parse_group() 3887 return self.expression( 3888 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3889 ) 3890 3891 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3892 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3893 this = self._parse_select_or_expression() 3894 3895 self._match(TokenType.ALIAS) 3896 alias = self._parse_bitwise() 3897 if alias: 3898 if isinstance(alias, exp.Column) and not alias.db: 3899 alias = alias.this 3900 return self.expression(exp.PivotAlias, this=this, alias=alias) 3901 3902 return this 3903 3904 value = self._parse_column() 3905 3906 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3907 self.raise_error("Expecting IN (") 3908 3909 if self._match(TokenType.ANY): 3910 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3911 else: 3912 exprs = self._parse_csv(_parse_aliased_expression) 3913 3914 self._match_r_paren() 3915 return self.expression(exp.In, this=value, expressions=exprs) 3916 3917 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3918 index = self._index 3919 include_nulls = None 3920 3921 if self._match(TokenType.PIVOT): 3922 unpivot = False 3923 elif self._match(TokenType.UNPIVOT): 3924 unpivot = True 3925 3926 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3927 if self._match_text_seq("INCLUDE", "NULLS"): 3928 include_nulls = True 3929 elif self._match_text_seq("EXCLUDE", "NULLS"): 3930 include_nulls = False 3931 else: 3932 return None 3933 3934 expressions = [] 3935 3936 if not self._match(TokenType.L_PAREN): 3937 self._retreat(index) 3938 return None 3939 3940 if unpivot: 3941 expressions = self._parse_csv(self._parse_column) 3942 else: 3943 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3944 3945 if not expressions: 3946 self.raise_error("Failed to parse PIVOT's aggregation list") 3947 3948 if not self._match(TokenType.FOR): 3949 self.raise_error("Expecting FOR") 3950 3951 field = self._parse_pivot_in() 3952 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3953 self._parse_bitwise 3954 ) 3955 3956 self._match_r_paren() 3957 3958 pivot = self.expression( 3959 exp.Pivot, 3960 expressions=expressions, 3961 field=field, 3962 unpivot=unpivot, 3963 include_nulls=include_nulls, 3964 default_on_null=default_on_null, 3965 ) 3966 3967 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3968 pivot.set("alias", self._parse_table_alias()) 3969 3970 if not unpivot: 3971 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3972 3973 columns: t.List[exp.Expression] = [] 3974 for fld in pivot.args["field"].expressions: 3975 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3976 for name in names: 3977 if self.PREFIXED_PIVOT_COLUMNS: 3978 name = f"{name}_{field_name}" if name else field_name 3979 else: 3980 name = f"{field_name}_{name}" if name else field_name 3981 3982 columns.append(exp.to_identifier(name)) 3983 3984 pivot.set("columns", columns) 3985 3986 return pivot 3987 3988 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3989 return [agg.alias for agg in aggregations] 3990 3991 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3992 if not skip_where_token and not self._match(TokenType.PREWHERE): 3993 return None 3994 3995 return self.expression( 3996 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3997 ) 3998 3999 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4000 if not skip_where_token and not self._match(TokenType.WHERE): 4001 return None 4002 4003 return self.expression( 4004 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4005 ) 4006 4007 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4008 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4009 return None 4010 4011 elements: t.Dict[str, t.Any] = defaultdict(list) 4012 4013 if self._match(TokenType.ALL): 4014 elements["all"] = True 4015 elif self._match(TokenType.DISTINCT): 4016 elements["all"] = False 4017 4018 while True: 4019 index = self._index 4020 4021 elements["expressions"].extend( 4022 self._parse_csv( 4023 lambda: None 4024 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4025 else self._parse_assignment() 4026 ) 4027 ) 4028 4029 before_with_index = self._index 4030 with_prefix = self._match(TokenType.WITH) 4031 4032 if self._match(TokenType.ROLLUP): 4033 elements["rollup"].append( 4034 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4035 ) 4036 elif self._match(TokenType.CUBE): 4037 elements["cube"].append( 4038 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4039 ) 4040 elif self._match(TokenType.GROUPING_SETS): 4041 elements["grouping_sets"].append( 4042 self.expression( 4043 exp.GroupingSets, 4044 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4045 ) 4046 ) 4047 elif self._match_text_seq("TOTALS"): 4048 elements["totals"] = True # type: ignore 4049 4050 if before_with_index <= self._index <= before_with_index + 1: 4051 self._retreat(before_with_index) 4052 break 4053 4054 if index == self._index: 4055 break 4056 4057 return self.expression(exp.Group, **elements) # type: ignore 4058 4059 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4060 return self.expression( 4061 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4062 ) 4063 4064 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4065 if self._match(TokenType.L_PAREN): 4066 grouping_set = self._parse_csv(self._parse_column) 4067 self._match_r_paren() 4068 return self.expression(exp.Tuple, expressions=grouping_set) 4069 4070 return self._parse_column() 4071 4072 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4073 if not skip_having_token and not self._match(TokenType.HAVING): 4074 return None 4075 return self.expression(exp.Having, this=self._parse_assignment()) 4076 4077 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4078 if not self._match(TokenType.QUALIFY): 4079 return None 4080 return self.expression(exp.Qualify, this=self._parse_assignment()) 4081 4082 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4083 if skip_start_token: 4084 start = None 4085 elif self._match(TokenType.START_WITH): 4086 start = self._parse_assignment() 4087 else: 4088 return None 4089 4090 self._match(TokenType.CONNECT_BY) 4091 nocycle = self._match_text_seq("NOCYCLE") 4092 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4093 exp.Prior, this=self._parse_bitwise() 4094 ) 4095 connect = self._parse_assignment() 4096 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4097 4098 if not start and self._match(TokenType.START_WITH): 4099 start = self._parse_assignment() 4100 4101 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4102 4103 def _parse_name_as_expression(self) -> exp.Alias: 4104 return self.expression( 4105 exp.Alias, 4106 alias=self._parse_id_var(any_token=True), 4107 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4108 ) 4109 4110 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4111 if self._match_text_seq("INTERPOLATE"): 4112 return self._parse_wrapped_csv(self._parse_name_as_expression) 4113 return None 4114 4115 def _parse_order( 4116 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4117 ) -> t.Optional[exp.Expression]: 4118 siblings = None 4119 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4120 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4121 return this 4122 4123 siblings = True 4124 4125 return self.expression( 4126 exp.Order, 4127 this=this, 4128 expressions=self._parse_csv(self._parse_ordered), 4129 siblings=siblings, 4130 ) 4131 4132 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4133 if not self._match(token): 4134 return None 4135 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4136 4137 def _parse_ordered( 4138 self, parse_method: t.Optional[t.Callable] = None 4139 ) -> t.Optional[exp.Ordered]: 4140 this = parse_method() if parse_method else self._parse_assignment() 4141 if not this: 4142 return None 4143 4144 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4145 this = exp.var("ALL") 4146 4147 asc = self._match(TokenType.ASC) 4148 desc = self._match(TokenType.DESC) or (asc and False) 4149 4150 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4151 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4152 4153 nulls_first = is_nulls_first or False 4154 explicitly_null_ordered = is_nulls_first or is_nulls_last 4155 4156 if ( 4157 not explicitly_null_ordered 4158 and ( 4159 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4160 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4161 ) 4162 and self.dialect.NULL_ORDERING != "nulls_are_last" 4163 ): 4164 nulls_first = True 4165 4166 if self._match_text_seq("WITH", "FILL"): 4167 with_fill = self.expression( 4168 exp.WithFill, 4169 **{ # type: ignore 4170 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4171 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4172 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4173 "interpolate": self._parse_interpolate(), 4174 }, 4175 ) 4176 else: 4177 with_fill = None 4178 4179 return self.expression( 4180 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4181 ) 4182 4183 def _parse_limit( 4184 self, 4185 this: t.Optional[exp.Expression] = None, 4186 top: bool = False, 4187 skip_limit_token: bool = False, 4188 ) -> t.Optional[exp.Expression]: 4189 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4190 comments = self._prev_comments 4191 if top: 4192 limit_paren = self._match(TokenType.L_PAREN) 4193 expression = self._parse_term() if limit_paren else self._parse_number() 4194 4195 if limit_paren: 4196 self._match_r_paren() 4197 else: 4198 expression = self._parse_term() 4199 4200 if self._match(TokenType.COMMA): 4201 offset = expression 4202 expression = self._parse_term() 4203 else: 4204 offset = None 4205 4206 limit_exp = self.expression( 4207 exp.Limit, 4208 this=this, 4209 expression=expression, 4210 offset=offset, 4211 comments=comments, 4212 expressions=self._parse_limit_by(), 4213 ) 4214 4215 return limit_exp 4216 4217 if self._match(TokenType.FETCH): 4218 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4219 direction = self._prev.text.upper() if direction else "FIRST" 4220 4221 count = self._parse_field(tokens=self.FETCH_TOKENS) 4222 percent = self._match(TokenType.PERCENT) 4223 4224 self._match_set((TokenType.ROW, TokenType.ROWS)) 4225 4226 only = self._match_text_seq("ONLY") 4227 with_ties = self._match_text_seq("WITH", "TIES") 4228 4229 if only and with_ties: 4230 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4231 4232 return self.expression( 4233 exp.Fetch, 4234 direction=direction, 4235 count=count, 4236 percent=percent, 4237 with_ties=with_ties, 4238 ) 4239 4240 return this 4241 4242 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4243 if not self._match(TokenType.OFFSET): 4244 return this 4245 4246 count = self._parse_term() 4247 self._match_set((TokenType.ROW, TokenType.ROWS)) 4248 4249 return self.expression( 4250 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4251 ) 4252 4253 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4254 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4255 4256 def _parse_locks(self) -> t.List[exp.Lock]: 4257 locks = [] 4258 while True: 4259 if self._match_text_seq("FOR", "UPDATE"): 4260 update = True 4261 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4262 "LOCK", "IN", "SHARE", "MODE" 4263 ): 4264 update = False 4265 else: 4266 break 4267 4268 expressions = None 4269 if self._match_text_seq("OF"): 4270 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4271 4272 wait: t.Optional[bool | exp.Expression] = None 4273 if self._match_text_seq("NOWAIT"): 4274 wait = True 4275 elif self._match_text_seq("WAIT"): 4276 wait = self._parse_primary() 4277 elif self._match_text_seq("SKIP", "LOCKED"): 4278 wait = False 4279 4280 locks.append( 4281 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4282 ) 4283 4284 return locks 4285 4286 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4287 while this and self._match_set(self.SET_OPERATIONS): 4288 token_type = self._prev.token_type 4289 4290 if token_type == TokenType.UNION: 4291 operation: t.Type[exp.SetOperation] = exp.Union 4292 elif token_type == TokenType.EXCEPT: 4293 operation = exp.Except 4294 else: 4295 operation = exp.Intersect 4296 4297 comments = self._prev.comments 4298 4299 if self._match(TokenType.DISTINCT): 4300 distinct: t.Optional[bool] = True 4301 elif self._match(TokenType.ALL): 4302 distinct = False 4303 else: 4304 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4305 if distinct is None: 4306 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4307 4308 by_name = self._match_text_seq("BY", "NAME") 4309 expression = self._parse_select(nested=True, parse_set_operation=False) 4310 4311 this = self.expression( 4312 operation, 4313 comments=comments, 4314 this=this, 4315 distinct=distinct, 4316 by_name=by_name, 4317 expression=expression, 4318 ) 4319 4320 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4321 expression = this.expression 4322 4323 if expression: 4324 for arg in self.SET_OP_MODIFIERS: 4325 expr = expression.args.get(arg) 4326 if expr: 4327 this.set(arg, expr.pop()) 4328 4329 return this 4330 4331 def _parse_expression(self) -> t.Optional[exp.Expression]: 4332 return self._parse_alias(self._parse_assignment()) 4333 4334 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4335 this = self._parse_disjunction() 4336 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4337 # This allows us to parse <non-identifier token> := <expr> 4338 this = exp.column( 4339 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4340 ) 4341 4342 while self._match_set(self.ASSIGNMENT): 4343 this = self.expression( 4344 self.ASSIGNMENT[self._prev.token_type], 4345 this=this, 4346 comments=self._prev_comments, 4347 expression=self._parse_assignment(), 4348 ) 4349 4350 return this 4351 4352 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4353 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4354 4355 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4356 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4357 4358 def _parse_equality(self) -> t.Optional[exp.Expression]: 4359 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4360 4361 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4362 return self._parse_tokens(self._parse_range, self.COMPARISON) 4363 4364 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4365 this = this or self._parse_bitwise() 4366 negate = self._match(TokenType.NOT) 4367 4368 if self._match_set(self.RANGE_PARSERS): 4369 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4370 if not expression: 4371 return this 4372 4373 this = expression 4374 elif self._match(TokenType.ISNULL): 4375 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4376 4377 # Postgres supports ISNULL and NOTNULL for conditions. 4378 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4379 if self._match(TokenType.NOTNULL): 4380 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4381 this = self.expression(exp.Not, this=this) 4382 4383 if negate: 4384 this = self._negate_range(this) 4385 4386 if self._match(TokenType.IS): 4387 this = self._parse_is(this) 4388 4389 return this 4390 4391 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4392 if not this: 4393 return this 4394 4395 return self.expression(exp.Not, this=this) 4396 4397 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4398 index = self._index - 1 4399 negate = self._match(TokenType.NOT) 4400 4401 if self._match_text_seq("DISTINCT", "FROM"): 4402 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4403 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4404 4405 if self._match(TokenType.JSON): 4406 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4407 4408 if self._match_text_seq("WITH"): 4409 _with = True 4410 elif self._match_text_seq("WITHOUT"): 4411 _with = False 4412 else: 4413 _with = None 4414 4415 unique = self._match(TokenType.UNIQUE) 4416 self._match_text_seq("KEYS") 4417 expression: t.Optional[exp.Expression] = self.expression( 4418 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4419 ) 4420 else: 4421 expression = self._parse_primary() or self._parse_null() 4422 if not expression: 4423 self._retreat(index) 4424 return None 4425 4426 this = self.expression(exp.Is, this=this, expression=expression) 4427 return self.expression(exp.Not, this=this) if negate else this 4428 4429 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4430 unnest = self._parse_unnest(with_alias=False) 4431 if unnest: 4432 this = self.expression(exp.In, this=this, unnest=unnest) 4433 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4434 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4435 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4436 4437 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4438 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4439 else: 4440 this = self.expression(exp.In, this=this, expressions=expressions) 4441 4442 if matched_l_paren: 4443 self._match_r_paren(this) 4444 elif not self._match(TokenType.R_BRACKET, expression=this): 4445 self.raise_error("Expecting ]") 4446 else: 4447 this = self.expression(exp.In, this=this, field=self._parse_field()) 4448 4449 return this 4450 4451 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4452 low = self._parse_bitwise() 4453 self._match(TokenType.AND) 4454 high = self._parse_bitwise() 4455 return self.expression(exp.Between, this=this, low=low, high=high) 4456 4457 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4458 if not self._match(TokenType.ESCAPE): 4459 return this 4460 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4461 4462 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4463 index = self._index 4464 4465 if not self._match(TokenType.INTERVAL) and match_interval: 4466 return None 4467 4468 if self._match(TokenType.STRING, advance=False): 4469 this = self._parse_primary() 4470 else: 4471 this = self._parse_term() 4472 4473 if not this or ( 4474 isinstance(this, exp.Column) 4475 and not this.table 4476 and not this.this.quoted 4477 and this.name.upper() == "IS" 4478 ): 4479 self._retreat(index) 4480 return None 4481 4482 unit = self._parse_function() or ( 4483 not self._match(TokenType.ALIAS, advance=False) 4484 and self._parse_var(any_token=True, upper=True) 4485 ) 4486 4487 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4488 # each INTERVAL expression into this canonical form so it's easy to transpile 4489 if this and this.is_number: 4490 this = exp.Literal.string(this.to_py()) 4491 elif this and this.is_string: 4492 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4493 if len(parts) == 1: 4494 if unit: 4495 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4496 self._retreat(self._index - 1) 4497 4498 this = exp.Literal.string(parts[0][0]) 4499 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4500 4501 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4502 unit = self.expression( 4503 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4504 ) 4505 4506 interval = self.expression(exp.Interval, this=this, unit=unit) 4507 4508 index = self._index 4509 self._match(TokenType.PLUS) 4510 4511 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4512 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4513 return self.expression( 4514 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4515 ) 4516 4517 self._retreat(index) 4518 return interval 4519 4520 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4521 this = self._parse_term() 4522 4523 while True: 4524 if self._match_set(self.BITWISE): 4525 this = self.expression( 4526 self.BITWISE[self._prev.token_type], 4527 this=this, 4528 expression=self._parse_term(), 4529 ) 4530 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4531 this = self.expression( 4532 exp.DPipe, 4533 this=this, 4534 expression=self._parse_term(), 4535 safe=not self.dialect.STRICT_STRING_CONCAT, 4536 ) 4537 elif self._match(TokenType.DQMARK): 4538 this = self.expression( 4539 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4540 ) 4541 elif self._match_pair(TokenType.LT, TokenType.LT): 4542 this = self.expression( 4543 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4544 ) 4545 elif self._match_pair(TokenType.GT, TokenType.GT): 4546 this = self.expression( 4547 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4548 ) 4549 else: 4550 break 4551 4552 return this 4553 4554 def _parse_term(self) -> t.Optional[exp.Expression]: 4555 this = self._parse_factor() 4556 4557 while self._match_set(self.TERM): 4558 klass = self.TERM[self._prev.token_type] 4559 comments = self._prev_comments 4560 expression = self._parse_factor() 4561 4562 this = self.expression(klass, this=this, comments=comments, expression=expression) 4563 4564 if isinstance(this, exp.Collate): 4565 expr = this.expression 4566 4567 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4568 # fallback to Identifier / Var 4569 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4570 ident = expr.this 4571 if isinstance(ident, exp.Identifier): 4572 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4573 4574 return this 4575 4576 def _parse_factor(self) -> t.Optional[exp.Expression]: 4577 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4578 this = parse_method() 4579 4580 while self._match_set(self.FACTOR): 4581 klass = self.FACTOR[self._prev.token_type] 4582 comments = self._prev_comments 4583 expression = parse_method() 4584 4585 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4586 self._retreat(self._index - 1) 4587 return this 4588 4589 this = self.expression(klass, this=this, comments=comments, expression=expression) 4590 4591 if isinstance(this, exp.Div): 4592 this.args["typed"] = self.dialect.TYPED_DIVISION 4593 this.args["safe"] = self.dialect.SAFE_DIVISION 4594 4595 return this 4596 4597 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4598 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4599 4600 def _parse_unary(self) -> t.Optional[exp.Expression]: 4601 if self._match_set(self.UNARY_PARSERS): 4602 return self.UNARY_PARSERS[self._prev.token_type](self) 4603 return self._parse_at_time_zone(self._parse_type()) 4604 4605 def _parse_type( 4606 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4607 ) -> t.Optional[exp.Expression]: 4608 interval = parse_interval and self._parse_interval() 4609 if interval: 4610 return interval 4611 4612 index = self._index 4613 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4614 4615 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4616 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4617 if isinstance(data_type, exp.Cast): 4618 # This constructor can contain ops directly after it, for instance struct unnesting: 4619 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4620 return self._parse_column_ops(data_type) 4621 4622 if data_type: 4623 index2 = self._index 4624 this = self._parse_primary() 4625 4626 if isinstance(this, exp.Literal): 4627 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4628 if parser: 4629 return parser(self, this, data_type) 4630 4631 return self.expression(exp.Cast, this=this, to=data_type) 4632 4633 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4634 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4635 # 4636 # If the index difference here is greater than 1, that means the parser itself must have 4637 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4638 # 4639 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4640 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4641 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4642 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4643 # 4644 # In these cases, we don't really want to return the converted type, but instead retreat 4645 # and try to parse a Column or Identifier in the section below. 4646 if data_type.expressions and index2 - index > 1: 4647 self._retreat(index2) 4648 return self._parse_column_ops(data_type) 4649 4650 self._retreat(index) 4651 4652 if fallback_to_identifier: 4653 return self._parse_id_var() 4654 4655 this = self._parse_column() 4656 return this and self._parse_column_ops(this) 4657 4658 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4659 this = self._parse_type() 4660 if not this: 4661 return None 4662 4663 if isinstance(this, exp.Column) and not this.table: 4664 this = exp.var(this.name.upper()) 4665 4666 return self.expression( 4667 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4668 ) 4669 4670 def _parse_types( 4671 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4672 ) -> t.Optional[exp.Expression]: 4673 index = self._index 4674 4675 this: t.Optional[exp.Expression] = None 4676 prefix = self._match_text_seq("SYSUDTLIB", ".") 4677 4678 if not self._match_set(self.TYPE_TOKENS): 4679 identifier = allow_identifiers and self._parse_id_var( 4680 any_token=False, tokens=(TokenType.VAR,) 4681 ) 4682 if isinstance(identifier, exp.Identifier): 4683 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4684 4685 if len(tokens) != 1: 4686 self.raise_error("Unexpected identifier", self._prev) 4687 4688 if tokens[0].token_type in self.TYPE_TOKENS: 4689 self._prev = tokens[0] 4690 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4691 type_name = identifier.name 4692 4693 while self._match(TokenType.DOT): 4694 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4695 4696 this = exp.DataType.build(type_name, udt=True) 4697 else: 4698 self._retreat(self._index - 1) 4699 return None 4700 else: 4701 return None 4702 4703 type_token = self._prev.token_type 4704 4705 if type_token == TokenType.PSEUDO_TYPE: 4706 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4707 4708 if type_token == TokenType.OBJECT_IDENTIFIER: 4709 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4710 4711 # https://materialize.com/docs/sql/types/map/ 4712 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4713 key_type = self._parse_types( 4714 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4715 ) 4716 if not self._match(TokenType.FARROW): 4717 self._retreat(index) 4718 return None 4719 4720 value_type = self._parse_types( 4721 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4722 ) 4723 if not self._match(TokenType.R_BRACKET): 4724 self._retreat(index) 4725 return None 4726 4727 return exp.DataType( 4728 this=exp.DataType.Type.MAP, 4729 expressions=[key_type, value_type], 4730 nested=True, 4731 prefix=prefix, 4732 ) 4733 4734 nested = type_token in self.NESTED_TYPE_TOKENS 4735 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4736 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4737 expressions = None 4738 maybe_func = False 4739 4740 if self._match(TokenType.L_PAREN): 4741 if is_struct: 4742 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4743 elif nested: 4744 expressions = self._parse_csv( 4745 lambda: self._parse_types( 4746 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4747 ) 4748 ) 4749 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4750 this = expressions[0] 4751 this.set("nullable", True) 4752 self._match_r_paren() 4753 return this 4754 elif type_token in self.ENUM_TYPE_TOKENS: 4755 expressions = self._parse_csv(self._parse_equality) 4756 elif is_aggregate: 4757 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4758 any_token=False, tokens=(TokenType.VAR,) 4759 ) 4760 if not func_or_ident or not self._match(TokenType.COMMA): 4761 return None 4762 expressions = self._parse_csv( 4763 lambda: self._parse_types( 4764 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4765 ) 4766 ) 4767 expressions.insert(0, func_or_ident) 4768 else: 4769 expressions = self._parse_csv(self._parse_type_size) 4770 4771 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4772 if type_token == TokenType.VECTOR and len(expressions) == 2: 4773 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4774 4775 if not expressions or not self._match(TokenType.R_PAREN): 4776 self._retreat(index) 4777 return None 4778 4779 maybe_func = True 4780 4781 values: t.Optional[t.List[exp.Expression]] = None 4782 4783 if nested and self._match(TokenType.LT): 4784 if is_struct: 4785 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4786 else: 4787 expressions = self._parse_csv( 4788 lambda: self._parse_types( 4789 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4790 ) 4791 ) 4792 4793 if not self._match(TokenType.GT): 4794 self.raise_error("Expecting >") 4795 4796 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4797 values = self._parse_csv(self._parse_assignment) 4798 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4799 4800 if type_token in self.TIMESTAMPS: 4801 if self._match_text_seq("WITH", "TIME", "ZONE"): 4802 maybe_func = False 4803 tz_type = ( 4804 exp.DataType.Type.TIMETZ 4805 if type_token in self.TIMES 4806 else exp.DataType.Type.TIMESTAMPTZ 4807 ) 4808 this = exp.DataType(this=tz_type, expressions=expressions) 4809 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4810 maybe_func = False 4811 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4812 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4813 maybe_func = False 4814 elif type_token == TokenType.INTERVAL: 4815 unit = self._parse_var(upper=True) 4816 if unit: 4817 if self._match_text_seq("TO"): 4818 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4819 4820 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4821 else: 4822 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4823 4824 if maybe_func and check_func: 4825 index2 = self._index 4826 peek = self._parse_string() 4827 4828 if not peek: 4829 self._retreat(index) 4830 return None 4831 4832 self._retreat(index2) 4833 4834 if not this: 4835 if self._match_text_seq("UNSIGNED"): 4836 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4837 if not unsigned_type_token: 4838 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4839 4840 type_token = unsigned_type_token or type_token 4841 4842 this = exp.DataType( 4843 this=exp.DataType.Type[type_token.value], 4844 expressions=expressions, 4845 nested=nested, 4846 prefix=prefix, 4847 ) 4848 4849 # Empty arrays/structs are allowed 4850 if values is not None: 4851 cls = exp.Struct if is_struct else exp.Array 4852 this = exp.cast(cls(expressions=values), this, copy=False) 4853 4854 elif expressions: 4855 this.set("expressions", expressions) 4856 4857 # https://materialize.com/docs/sql/types/list/#type-name 4858 while self._match(TokenType.LIST): 4859 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4860 4861 index = self._index 4862 4863 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4864 matched_array = self._match(TokenType.ARRAY) 4865 4866 while self._curr: 4867 datatype_token = self._prev.token_type 4868 matched_l_bracket = self._match(TokenType.L_BRACKET) 4869 if not matched_l_bracket and not matched_array: 4870 break 4871 4872 matched_array = False 4873 values = self._parse_csv(self._parse_assignment) or None 4874 if ( 4875 values 4876 and not schema 4877 and ( 4878 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4879 ) 4880 ): 4881 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4882 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4883 self._retreat(index) 4884 break 4885 4886 this = exp.DataType( 4887 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4888 ) 4889 self._match(TokenType.R_BRACKET) 4890 4891 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4892 converter = self.TYPE_CONVERTERS.get(this.this) 4893 if converter: 4894 this = converter(t.cast(exp.DataType, this)) 4895 4896 return this 4897 4898 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4899 index = self._index 4900 4901 if ( 4902 self._curr 4903 and self._next 4904 and self._curr.token_type in self.TYPE_TOKENS 4905 and self._next.token_type in self.TYPE_TOKENS 4906 ): 4907 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4908 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4909 this = self._parse_id_var() 4910 else: 4911 this = ( 4912 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4913 or self._parse_id_var() 4914 ) 4915 4916 self._match(TokenType.COLON) 4917 4918 if ( 4919 type_required 4920 and not isinstance(this, exp.DataType) 4921 and not self._match_set(self.TYPE_TOKENS, advance=False) 4922 ): 4923 self._retreat(index) 4924 return self._parse_types() 4925 4926 return self._parse_column_def(this) 4927 4928 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4929 if not self._match_text_seq("AT", "TIME", "ZONE"): 4930 return this 4931 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4932 4933 def _parse_column(self) -> t.Optional[exp.Expression]: 4934 this = self._parse_column_reference() 4935 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4936 4937 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4938 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4939 4940 return column 4941 4942 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4943 this = self._parse_field() 4944 if ( 4945 not this 4946 and self._match(TokenType.VALUES, advance=False) 4947 and self.VALUES_FOLLOWED_BY_PAREN 4948 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4949 ): 4950 this = self._parse_id_var() 4951 4952 if isinstance(this, exp.Identifier): 4953 # We bubble up comments from the Identifier to the Column 4954 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4955 4956 return this 4957 4958 def _parse_colon_as_variant_extract( 4959 self, this: t.Optional[exp.Expression] 4960 ) -> t.Optional[exp.Expression]: 4961 casts = [] 4962 json_path = [] 4963 escape = None 4964 4965 while self._match(TokenType.COLON): 4966 start_index = self._index 4967 4968 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4969 path = self._parse_column_ops( 4970 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4971 ) 4972 4973 # The cast :: operator has a lower precedence than the extraction operator :, so 4974 # we rearrange the AST appropriately to avoid casting the JSON path 4975 while isinstance(path, exp.Cast): 4976 casts.append(path.to) 4977 path = path.this 4978 4979 if casts: 4980 dcolon_offset = next( 4981 i 4982 for i, t in enumerate(self._tokens[start_index:]) 4983 if t.token_type == TokenType.DCOLON 4984 ) 4985 end_token = self._tokens[start_index + dcolon_offset - 1] 4986 else: 4987 end_token = self._prev 4988 4989 if path: 4990 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 4991 # it'll roundtrip to a string literal in GET_PATH 4992 if isinstance(path, exp.Identifier) and path.quoted: 4993 escape = True 4994 4995 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4996 4997 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4998 # Databricks transforms it back to the colon/dot notation 4999 if json_path: 5000 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5001 5002 if json_path_expr: 5003 json_path_expr.set("escape", escape) 5004 5005 this = self.expression( 5006 exp.JSONExtract, 5007 this=this, 5008 expression=json_path_expr, 5009 variant_extract=True, 5010 ) 5011 5012 while casts: 5013 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5014 5015 return this 5016 5017 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5018 return self._parse_types() 5019 5020 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5021 this = self._parse_bracket(this) 5022 5023 while self._match_set(self.COLUMN_OPERATORS): 5024 op_token = self._prev.token_type 5025 op = self.COLUMN_OPERATORS.get(op_token) 5026 5027 if op_token == TokenType.DCOLON: 5028 field = self._parse_dcolon() 5029 if not field: 5030 self.raise_error("Expected type") 5031 elif op and self._curr: 5032 field = self._parse_column_reference() 5033 else: 5034 field = self._parse_field(any_token=True, anonymous_func=True) 5035 5036 if isinstance(field, exp.Func) and this: 5037 # bigquery allows function calls like x.y.count(...) 5038 # SAFE.SUBSTR(...) 5039 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5040 this = exp.replace_tree( 5041 this, 5042 lambda n: ( 5043 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5044 if n.table 5045 else n.this 5046 ) 5047 if isinstance(n, exp.Column) 5048 else n, 5049 ) 5050 5051 if op: 5052 this = op(self, this, field) 5053 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5054 this = self.expression( 5055 exp.Column, 5056 this=field, 5057 table=this.this, 5058 db=this.args.get("table"), 5059 catalog=this.args.get("db"), 5060 ) 5061 else: 5062 this = self.expression(exp.Dot, this=this, expression=field) 5063 5064 this = self._parse_bracket(this) 5065 5066 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5067 5068 def _parse_primary(self) -> t.Optional[exp.Expression]: 5069 if self._match_set(self.PRIMARY_PARSERS): 5070 token_type = self._prev.token_type 5071 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5072 5073 if token_type == TokenType.STRING: 5074 expressions = [primary] 5075 while self._match(TokenType.STRING): 5076 expressions.append(exp.Literal.string(self._prev.text)) 5077 5078 if len(expressions) > 1: 5079 return self.expression(exp.Concat, expressions=expressions) 5080 5081 return primary 5082 5083 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5084 return exp.Literal.number(f"0.{self._prev.text}") 5085 5086 if self._match(TokenType.L_PAREN): 5087 comments = self._prev_comments 5088 query = self._parse_select() 5089 5090 if query: 5091 expressions = [query] 5092 else: 5093 expressions = self._parse_expressions() 5094 5095 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5096 5097 if not this and self._match(TokenType.R_PAREN, advance=False): 5098 this = self.expression(exp.Tuple) 5099 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5100 this = self._parse_subquery(this=this, parse_alias=False) 5101 elif isinstance(this, exp.Subquery): 5102 this = self._parse_subquery( 5103 this=self._parse_set_operations(this), parse_alias=False 5104 ) 5105 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5106 this = self.expression(exp.Tuple, expressions=expressions) 5107 else: 5108 this = self.expression(exp.Paren, this=this) 5109 5110 if this: 5111 this.add_comments(comments) 5112 5113 self._match_r_paren(expression=this) 5114 return this 5115 5116 return None 5117 5118 def _parse_field( 5119 self, 5120 any_token: bool = False, 5121 tokens: t.Optional[t.Collection[TokenType]] = None, 5122 anonymous_func: bool = False, 5123 ) -> t.Optional[exp.Expression]: 5124 if anonymous_func: 5125 field = ( 5126 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5127 or self._parse_primary() 5128 ) 5129 else: 5130 field = self._parse_primary() or self._parse_function( 5131 anonymous=anonymous_func, any_token=any_token 5132 ) 5133 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5134 5135 def _parse_function( 5136 self, 5137 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5138 anonymous: bool = False, 5139 optional_parens: bool = True, 5140 any_token: bool = False, 5141 ) -> t.Optional[exp.Expression]: 5142 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5143 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5144 fn_syntax = False 5145 if ( 5146 self._match(TokenType.L_BRACE, advance=False) 5147 and self._next 5148 and self._next.text.upper() == "FN" 5149 ): 5150 self._advance(2) 5151 fn_syntax = True 5152 5153 func = self._parse_function_call( 5154 functions=functions, 5155 anonymous=anonymous, 5156 optional_parens=optional_parens, 5157 any_token=any_token, 5158 ) 5159 5160 if fn_syntax: 5161 self._match(TokenType.R_BRACE) 5162 5163 return func 5164 5165 def _parse_function_call( 5166 self, 5167 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5168 anonymous: bool = False, 5169 optional_parens: bool = True, 5170 any_token: bool = False, 5171 ) -> t.Optional[exp.Expression]: 5172 if not self._curr: 5173 return None 5174 5175 comments = self._curr.comments 5176 token_type = self._curr.token_type 5177 this = self._curr.text 5178 upper = this.upper() 5179 5180 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5181 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5182 self._advance() 5183 return self._parse_window(parser(self)) 5184 5185 if not self._next or self._next.token_type != TokenType.L_PAREN: 5186 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5187 self._advance() 5188 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5189 5190 return None 5191 5192 if any_token: 5193 if token_type in self.RESERVED_TOKENS: 5194 return None 5195 elif token_type not in self.FUNC_TOKENS: 5196 return None 5197 5198 self._advance(2) 5199 5200 parser = self.FUNCTION_PARSERS.get(upper) 5201 if parser and not anonymous: 5202 this = parser(self) 5203 else: 5204 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5205 5206 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5207 this = self.expression(subquery_predicate, this=self._parse_select()) 5208 self._match_r_paren() 5209 return this 5210 5211 if functions is None: 5212 functions = self.FUNCTIONS 5213 5214 function = functions.get(upper) 5215 5216 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5217 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5218 5219 if alias: 5220 args = self._kv_to_prop_eq(args) 5221 5222 if function and not anonymous: 5223 if "dialect" in function.__code__.co_varnames: 5224 func = function(args, dialect=self.dialect) 5225 else: 5226 func = function(args) 5227 5228 func = self.validate_expression(func, args) 5229 if not self.dialect.NORMALIZE_FUNCTIONS: 5230 func.meta["name"] = this 5231 5232 this = func 5233 else: 5234 if token_type == TokenType.IDENTIFIER: 5235 this = exp.Identifier(this=this, quoted=True) 5236 this = self.expression(exp.Anonymous, this=this, expressions=args) 5237 5238 if isinstance(this, exp.Expression): 5239 this.add_comments(comments) 5240 5241 self._match_r_paren(this) 5242 return self._parse_window(this) 5243 5244 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5245 return expression 5246 5247 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5248 transformed = [] 5249 5250 for index, e in enumerate(expressions): 5251 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5252 if isinstance(e, exp.Alias): 5253 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5254 5255 if not isinstance(e, exp.PropertyEQ): 5256 e = self.expression( 5257 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5258 ) 5259 5260 if isinstance(e.this, exp.Column): 5261 e.this.replace(e.this.this) 5262 else: 5263 e = self._to_prop_eq(e, index) 5264 5265 transformed.append(e) 5266 5267 return transformed 5268 5269 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5270 return self._parse_column_def(self._parse_id_var()) 5271 5272 def _parse_user_defined_function( 5273 self, kind: t.Optional[TokenType] = None 5274 ) -> t.Optional[exp.Expression]: 5275 this = self._parse_id_var() 5276 5277 while self._match(TokenType.DOT): 5278 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5279 5280 if not self._match(TokenType.L_PAREN): 5281 return this 5282 5283 expressions = self._parse_csv(self._parse_function_parameter) 5284 self._match_r_paren() 5285 return self.expression( 5286 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5287 ) 5288 5289 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5290 literal = self._parse_primary() 5291 if literal: 5292 return self.expression(exp.Introducer, this=token.text, expression=literal) 5293 5294 return self.expression(exp.Identifier, this=token.text) 5295 5296 def _parse_session_parameter(self) -> exp.SessionParameter: 5297 kind = None 5298 this = self._parse_id_var() or self._parse_primary() 5299 5300 if this and self._match(TokenType.DOT): 5301 kind = this.name 5302 this = self._parse_var() or self._parse_primary() 5303 5304 return self.expression(exp.SessionParameter, this=this, kind=kind) 5305 5306 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5307 return self._parse_id_var() 5308 5309 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5310 index = self._index 5311 5312 if self._match(TokenType.L_PAREN): 5313 expressions = t.cast( 5314 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5315 ) 5316 5317 if not self._match(TokenType.R_PAREN): 5318 self._retreat(index) 5319 else: 5320 expressions = [self._parse_lambda_arg()] 5321 5322 if self._match_set(self.LAMBDAS): 5323 return self.LAMBDAS[self._prev.token_type](self, expressions) 5324 5325 self._retreat(index) 5326 5327 this: t.Optional[exp.Expression] 5328 5329 if self._match(TokenType.DISTINCT): 5330 this = self.expression( 5331 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5332 ) 5333 else: 5334 this = self._parse_select_or_expression(alias=alias) 5335 5336 return self._parse_limit( 5337 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5338 ) 5339 5340 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5341 index = self._index 5342 if not self._match(TokenType.L_PAREN): 5343 return this 5344 5345 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5346 # expr can be of both types 5347 if self._match_set(self.SELECT_START_TOKENS): 5348 self._retreat(index) 5349 return this 5350 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5351 self._match_r_paren() 5352 return self.expression(exp.Schema, this=this, expressions=args) 5353 5354 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5355 return self._parse_column_def(self._parse_field(any_token=True)) 5356 5357 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5358 # column defs are not really columns, they're identifiers 5359 if isinstance(this, exp.Column): 5360 this = this.this 5361 5362 kind = self._parse_types(schema=True) 5363 5364 if self._match_text_seq("FOR", "ORDINALITY"): 5365 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5366 5367 constraints: t.List[exp.Expression] = [] 5368 5369 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5370 ("ALIAS", "MATERIALIZED") 5371 ): 5372 persisted = self._prev.text.upper() == "MATERIALIZED" 5373 constraint_kind = exp.ComputedColumnConstraint( 5374 this=self._parse_assignment(), 5375 persisted=persisted or self._match_text_seq("PERSISTED"), 5376 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5377 ) 5378 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5379 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5380 self._match(TokenType.ALIAS) 5381 constraints.append( 5382 self.expression( 5383 exp.ColumnConstraint, 5384 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5385 ) 5386 ) 5387 5388 while True: 5389 constraint = self._parse_column_constraint() 5390 if not constraint: 5391 break 5392 constraints.append(constraint) 5393 5394 if not kind and not constraints: 5395 return this 5396 5397 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5398 5399 def _parse_auto_increment( 5400 self, 5401 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5402 start = None 5403 increment = None 5404 5405 if self._match(TokenType.L_PAREN, advance=False): 5406 args = self._parse_wrapped_csv(self._parse_bitwise) 5407 start = seq_get(args, 0) 5408 increment = seq_get(args, 1) 5409 elif self._match_text_seq("START"): 5410 start = self._parse_bitwise() 5411 self._match_text_seq("INCREMENT") 5412 increment = self._parse_bitwise() 5413 5414 if start and increment: 5415 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5416 5417 return exp.AutoIncrementColumnConstraint() 5418 5419 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5420 if not self._match_text_seq("REFRESH"): 5421 self._retreat(self._index - 1) 5422 return None 5423 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5424 5425 def _parse_compress(self) -> exp.CompressColumnConstraint: 5426 if self._match(TokenType.L_PAREN, advance=False): 5427 return self.expression( 5428 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5429 ) 5430 5431 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5432 5433 def _parse_generated_as_identity( 5434 self, 5435 ) -> ( 5436 exp.GeneratedAsIdentityColumnConstraint 5437 | exp.ComputedColumnConstraint 5438 | exp.GeneratedAsRowColumnConstraint 5439 ): 5440 if self._match_text_seq("BY", "DEFAULT"): 5441 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5442 this = self.expression( 5443 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5444 ) 5445 else: 5446 self._match_text_seq("ALWAYS") 5447 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5448 5449 self._match(TokenType.ALIAS) 5450 5451 if self._match_text_seq("ROW"): 5452 start = self._match_text_seq("START") 5453 if not start: 5454 self._match(TokenType.END) 5455 hidden = self._match_text_seq("HIDDEN") 5456 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5457 5458 identity = self._match_text_seq("IDENTITY") 5459 5460 if self._match(TokenType.L_PAREN): 5461 if self._match(TokenType.START_WITH): 5462 this.set("start", self._parse_bitwise()) 5463 if self._match_text_seq("INCREMENT", "BY"): 5464 this.set("increment", self._parse_bitwise()) 5465 if self._match_text_seq("MINVALUE"): 5466 this.set("minvalue", self._parse_bitwise()) 5467 if self._match_text_seq("MAXVALUE"): 5468 this.set("maxvalue", self._parse_bitwise()) 5469 5470 if self._match_text_seq("CYCLE"): 5471 this.set("cycle", True) 5472 elif self._match_text_seq("NO", "CYCLE"): 5473 this.set("cycle", False) 5474 5475 if not identity: 5476 this.set("expression", self._parse_range()) 5477 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5478 args = self._parse_csv(self._parse_bitwise) 5479 this.set("start", seq_get(args, 0)) 5480 this.set("increment", seq_get(args, 1)) 5481 5482 self._match_r_paren() 5483 5484 return this 5485 5486 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5487 self._match_text_seq("LENGTH") 5488 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5489 5490 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5491 if self._match_text_seq("NULL"): 5492 return self.expression(exp.NotNullColumnConstraint) 5493 if self._match_text_seq("CASESPECIFIC"): 5494 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5495 if self._match_text_seq("FOR", "REPLICATION"): 5496 return self.expression(exp.NotForReplicationColumnConstraint) 5497 5498 # Unconsume the `NOT` token 5499 self._retreat(self._index - 1) 5500 return None 5501 5502 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5503 if self._match(TokenType.CONSTRAINT): 5504 this = self._parse_id_var() 5505 else: 5506 this = None 5507 5508 if self._match_texts(self.CONSTRAINT_PARSERS): 5509 return self.expression( 5510 exp.ColumnConstraint, 5511 this=this, 5512 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5513 ) 5514 5515 return this 5516 5517 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5518 if not self._match(TokenType.CONSTRAINT): 5519 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5520 5521 return self.expression( 5522 exp.Constraint, 5523 this=self._parse_id_var(), 5524 expressions=self._parse_unnamed_constraints(), 5525 ) 5526 5527 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5528 constraints = [] 5529 while True: 5530 constraint = self._parse_unnamed_constraint() or self._parse_function() 5531 if not constraint: 5532 break 5533 constraints.append(constraint) 5534 5535 return constraints 5536 5537 def _parse_unnamed_constraint( 5538 self, constraints: t.Optional[t.Collection[str]] = None 5539 ) -> t.Optional[exp.Expression]: 5540 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5541 constraints or self.CONSTRAINT_PARSERS 5542 ): 5543 return None 5544 5545 constraint = self._prev.text.upper() 5546 if constraint not in self.CONSTRAINT_PARSERS: 5547 self.raise_error(f"No parser found for schema constraint {constraint}.") 5548 5549 return self.CONSTRAINT_PARSERS[constraint](self) 5550 5551 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5552 return self._parse_id_var(any_token=False) 5553 5554 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5555 self._match_text_seq("KEY") 5556 return self.expression( 5557 exp.UniqueColumnConstraint, 5558 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5559 this=self._parse_schema(self._parse_unique_key()), 5560 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5561 on_conflict=self._parse_on_conflict(), 5562 ) 5563 5564 def _parse_key_constraint_options(self) -> t.List[str]: 5565 options = [] 5566 while True: 5567 if not self._curr: 5568 break 5569 5570 if self._match(TokenType.ON): 5571 action = None 5572 on = self._advance_any() and self._prev.text 5573 5574 if self._match_text_seq("NO", "ACTION"): 5575 action = "NO ACTION" 5576 elif self._match_text_seq("CASCADE"): 5577 action = "CASCADE" 5578 elif self._match_text_seq("RESTRICT"): 5579 action = "RESTRICT" 5580 elif self._match_pair(TokenType.SET, TokenType.NULL): 5581 action = "SET NULL" 5582 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5583 action = "SET DEFAULT" 5584 else: 5585 self.raise_error("Invalid key constraint") 5586 5587 options.append(f"ON {on} {action}") 5588 else: 5589 var = self._parse_var_from_options( 5590 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5591 ) 5592 if not var: 5593 break 5594 options.append(var.name) 5595 5596 return options 5597 5598 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5599 if match and not self._match(TokenType.REFERENCES): 5600 return None 5601 5602 expressions = None 5603 this = self._parse_table(schema=True) 5604 options = self._parse_key_constraint_options() 5605 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5606 5607 def _parse_foreign_key(self) -> exp.ForeignKey: 5608 expressions = self._parse_wrapped_id_vars() 5609 reference = self._parse_references() 5610 options = {} 5611 5612 while self._match(TokenType.ON): 5613 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5614 self.raise_error("Expected DELETE or UPDATE") 5615 5616 kind = self._prev.text.lower() 5617 5618 if self._match_text_seq("NO", "ACTION"): 5619 action = "NO ACTION" 5620 elif self._match(TokenType.SET): 5621 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5622 action = "SET " + self._prev.text.upper() 5623 else: 5624 self._advance() 5625 action = self._prev.text.upper() 5626 5627 options[kind] = action 5628 5629 return self.expression( 5630 exp.ForeignKey, 5631 expressions=expressions, 5632 reference=reference, 5633 **options, # type: ignore 5634 ) 5635 5636 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5637 return self._parse_field() 5638 5639 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5640 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5641 self._retreat(self._index - 1) 5642 return None 5643 5644 id_vars = self._parse_wrapped_id_vars() 5645 return self.expression( 5646 exp.PeriodForSystemTimeConstraint, 5647 this=seq_get(id_vars, 0), 5648 expression=seq_get(id_vars, 1), 5649 ) 5650 5651 def _parse_primary_key( 5652 self, wrapped_optional: bool = False, in_props: bool = False 5653 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5654 desc = ( 5655 self._match_set((TokenType.ASC, TokenType.DESC)) 5656 and self._prev.token_type == TokenType.DESC 5657 ) 5658 5659 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5660 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5661 5662 expressions = self._parse_wrapped_csv( 5663 self._parse_primary_key_part, optional=wrapped_optional 5664 ) 5665 options = self._parse_key_constraint_options() 5666 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5667 5668 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5669 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5670 5671 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5672 """ 5673 Parses a datetime column in ODBC format. We parse the column into the corresponding 5674 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5675 same as we did for `DATE('yyyy-mm-dd')`. 5676 5677 Reference: 5678 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5679 """ 5680 self._match(TokenType.VAR) 5681 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5682 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5683 if not self._match(TokenType.R_BRACE): 5684 self.raise_error("Expected }") 5685 return expression 5686 5687 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5688 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5689 return this 5690 5691 bracket_kind = self._prev.token_type 5692 if ( 5693 bracket_kind == TokenType.L_BRACE 5694 and self._curr 5695 and self._curr.token_type == TokenType.VAR 5696 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5697 ): 5698 return self._parse_odbc_datetime_literal() 5699 5700 expressions = self._parse_csv( 5701 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5702 ) 5703 5704 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5705 self.raise_error("Expected ]") 5706 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5707 self.raise_error("Expected }") 5708 5709 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5710 if bracket_kind == TokenType.L_BRACE: 5711 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5712 elif not this: 5713 this = build_array_constructor( 5714 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5715 ) 5716 else: 5717 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5718 if constructor_type: 5719 return build_array_constructor( 5720 constructor_type, 5721 args=expressions, 5722 bracket_kind=bracket_kind, 5723 dialect=self.dialect, 5724 ) 5725 5726 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5727 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5728 5729 self._add_comments(this) 5730 return self._parse_bracket(this) 5731 5732 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5733 if self._match(TokenType.COLON): 5734 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5735 return this 5736 5737 def _parse_case(self) -> t.Optional[exp.Expression]: 5738 ifs = [] 5739 default = None 5740 5741 comments = self._prev_comments 5742 expression = self._parse_assignment() 5743 5744 while self._match(TokenType.WHEN): 5745 this = self._parse_assignment() 5746 self._match(TokenType.THEN) 5747 then = self._parse_assignment() 5748 ifs.append(self.expression(exp.If, this=this, true=then)) 5749 5750 if self._match(TokenType.ELSE): 5751 default = self._parse_assignment() 5752 5753 if not self._match(TokenType.END): 5754 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5755 default = exp.column("interval") 5756 else: 5757 self.raise_error("Expected END after CASE", self._prev) 5758 5759 return self.expression( 5760 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5761 ) 5762 5763 def _parse_if(self) -> t.Optional[exp.Expression]: 5764 if self._match(TokenType.L_PAREN): 5765 args = self._parse_csv(self._parse_assignment) 5766 this = self.validate_expression(exp.If.from_arg_list(args), args) 5767 self._match_r_paren() 5768 else: 5769 index = self._index - 1 5770 5771 if self.NO_PAREN_IF_COMMANDS and index == 0: 5772 return self._parse_as_command(self._prev) 5773 5774 condition = self._parse_assignment() 5775 5776 if not condition: 5777 self._retreat(index) 5778 return None 5779 5780 self._match(TokenType.THEN) 5781 true = self._parse_assignment() 5782 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5783 self._match(TokenType.END) 5784 this = self.expression(exp.If, this=condition, true=true, false=false) 5785 5786 return this 5787 5788 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5789 if not self._match_text_seq("VALUE", "FOR"): 5790 self._retreat(self._index - 1) 5791 return None 5792 5793 return self.expression( 5794 exp.NextValueFor, 5795 this=self._parse_column(), 5796 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5797 ) 5798 5799 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5800 this = self._parse_function() or self._parse_var_or_string(upper=True) 5801 5802 if self._match(TokenType.FROM): 5803 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5804 5805 if not self._match(TokenType.COMMA): 5806 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5807 5808 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5809 5810 def _parse_gap_fill(self) -> exp.GapFill: 5811 self._match(TokenType.TABLE) 5812 this = self._parse_table() 5813 5814 self._match(TokenType.COMMA) 5815 args = [this, *self._parse_csv(self._parse_lambda)] 5816 5817 gap_fill = exp.GapFill.from_arg_list(args) 5818 return self.validate_expression(gap_fill, args) 5819 5820 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5821 this = self._parse_assignment() 5822 5823 if not self._match(TokenType.ALIAS): 5824 if self._match(TokenType.COMMA): 5825 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5826 5827 self.raise_error("Expected AS after CAST") 5828 5829 fmt = None 5830 to = self._parse_types() 5831 5832 if self._match(TokenType.FORMAT): 5833 fmt_string = self._parse_string() 5834 fmt = self._parse_at_time_zone(fmt_string) 5835 5836 if not to: 5837 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5838 if to.this in exp.DataType.TEMPORAL_TYPES: 5839 this = self.expression( 5840 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5841 this=this, 5842 format=exp.Literal.string( 5843 format_time( 5844 fmt_string.this if fmt_string else "", 5845 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5846 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5847 ) 5848 ), 5849 safe=safe, 5850 ) 5851 5852 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5853 this.set("zone", fmt.args["zone"]) 5854 return this 5855 elif not to: 5856 self.raise_error("Expected TYPE after CAST") 5857 elif isinstance(to, exp.Identifier): 5858 to = exp.DataType.build(to.name, udt=True) 5859 elif to.this == exp.DataType.Type.CHAR: 5860 if self._match(TokenType.CHARACTER_SET): 5861 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5862 5863 return self.expression( 5864 exp.Cast if strict else exp.TryCast, 5865 this=this, 5866 to=to, 5867 format=fmt, 5868 safe=safe, 5869 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5870 ) 5871 5872 def _parse_string_agg(self) -> exp.Expression: 5873 if self._match(TokenType.DISTINCT): 5874 args: t.List[t.Optional[exp.Expression]] = [ 5875 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5876 ] 5877 if self._match(TokenType.COMMA): 5878 args.extend(self._parse_csv(self._parse_assignment)) 5879 else: 5880 args = self._parse_csv(self._parse_assignment) # type: ignore 5881 5882 index = self._index 5883 if not self._match(TokenType.R_PAREN) and args: 5884 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5885 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5886 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5887 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5888 5889 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5890 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5891 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5892 if not self._match_text_seq("WITHIN", "GROUP"): 5893 self._retreat(index) 5894 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5895 5896 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5897 order = self._parse_order(this=seq_get(args, 0)) 5898 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5899 5900 def _parse_convert( 5901 self, strict: bool, safe: t.Optional[bool] = None 5902 ) -> t.Optional[exp.Expression]: 5903 this = self._parse_bitwise() 5904 5905 if self._match(TokenType.USING): 5906 to: t.Optional[exp.Expression] = self.expression( 5907 exp.CharacterSet, this=self._parse_var() 5908 ) 5909 elif self._match(TokenType.COMMA): 5910 to = self._parse_types() 5911 else: 5912 to = None 5913 5914 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5915 5916 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5917 """ 5918 There are generally two variants of the DECODE function: 5919 5920 - DECODE(bin, charset) 5921 - DECODE(expression, search, result [, search, result] ... [, default]) 5922 5923 The second variant will always be parsed into a CASE expression. Note that NULL 5924 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5925 instead of relying on pattern matching. 5926 """ 5927 args = self._parse_csv(self._parse_assignment) 5928 5929 if len(args) < 3: 5930 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5931 5932 expression, *expressions = args 5933 if not expression: 5934 return None 5935 5936 ifs = [] 5937 for search, result in zip(expressions[::2], expressions[1::2]): 5938 if not search or not result: 5939 return None 5940 5941 if isinstance(search, exp.Literal): 5942 ifs.append( 5943 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5944 ) 5945 elif isinstance(search, exp.Null): 5946 ifs.append( 5947 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5948 ) 5949 else: 5950 cond = exp.or_( 5951 exp.EQ(this=expression.copy(), expression=search), 5952 exp.and_( 5953 exp.Is(this=expression.copy(), expression=exp.Null()), 5954 exp.Is(this=search.copy(), expression=exp.Null()), 5955 copy=False, 5956 ), 5957 copy=False, 5958 ) 5959 ifs.append(exp.If(this=cond, true=result)) 5960 5961 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5962 5963 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5964 self._match_text_seq("KEY") 5965 key = self._parse_column() 5966 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5967 self._match_text_seq("VALUE") 5968 value = self._parse_bitwise() 5969 5970 if not key and not value: 5971 return None 5972 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5973 5974 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5975 if not this or not self._match_text_seq("FORMAT", "JSON"): 5976 return this 5977 5978 return self.expression(exp.FormatJson, this=this) 5979 5980 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 5981 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 5982 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 5983 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5984 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5985 else: 5986 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5987 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5988 5989 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 5990 5991 if not empty and not error and not null: 5992 return None 5993 5994 return self.expression( 5995 exp.OnCondition, 5996 empty=empty, 5997 error=error, 5998 null=null, 5999 ) 6000 6001 def _parse_on_handling( 6002 self, on: str, *values: str 6003 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6004 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6005 for value in values: 6006 if self._match_text_seq(value, "ON", on): 6007 return f"{value} ON {on}" 6008 6009 index = self._index 6010 if self._match(TokenType.DEFAULT): 6011 default_value = self._parse_bitwise() 6012 if self._match_text_seq("ON", on): 6013 return default_value 6014 6015 self._retreat(index) 6016 6017 return None 6018 6019 @t.overload 6020 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6021 6022 @t.overload 6023 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6024 6025 def _parse_json_object(self, agg=False): 6026 star = self._parse_star() 6027 expressions = ( 6028 [star] 6029 if star 6030 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6031 ) 6032 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6033 6034 unique_keys = None 6035 if self._match_text_seq("WITH", "UNIQUE"): 6036 unique_keys = True 6037 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6038 unique_keys = False 6039 6040 self._match_text_seq("KEYS") 6041 6042 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6043 self._parse_type() 6044 ) 6045 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6046 6047 return self.expression( 6048 exp.JSONObjectAgg if agg else exp.JSONObject, 6049 expressions=expressions, 6050 null_handling=null_handling, 6051 unique_keys=unique_keys, 6052 return_type=return_type, 6053 encoding=encoding, 6054 ) 6055 6056 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6057 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6058 if not self._match_text_seq("NESTED"): 6059 this = self._parse_id_var() 6060 kind = self._parse_types(allow_identifiers=False) 6061 nested = None 6062 else: 6063 this = None 6064 kind = None 6065 nested = True 6066 6067 path = self._match_text_seq("PATH") and self._parse_string() 6068 nested_schema = nested and self._parse_json_schema() 6069 6070 return self.expression( 6071 exp.JSONColumnDef, 6072 this=this, 6073 kind=kind, 6074 path=path, 6075 nested_schema=nested_schema, 6076 ) 6077 6078 def _parse_json_schema(self) -> exp.JSONSchema: 6079 self._match_text_seq("COLUMNS") 6080 return self.expression( 6081 exp.JSONSchema, 6082 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6083 ) 6084 6085 def _parse_json_table(self) -> exp.JSONTable: 6086 this = self._parse_format_json(self._parse_bitwise()) 6087 path = self._match(TokenType.COMMA) and self._parse_string() 6088 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6089 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6090 schema = self._parse_json_schema() 6091 6092 return exp.JSONTable( 6093 this=this, 6094 schema=schema, 6095 path=path, 6096 error_handling=error_handling, 6097 empty_handling=empty_handling, 6098 ) 6099 6100 def _parse_match_against(self) -> exp.MatchAgainst: 6101 expressions = self._parse_csv(self._parse_column) 6102 6103 self._match_text_seq(")", "AGAINST", "(") 6104 6105 this = self._parse_string() 6106 6107 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6108 modifier = "IN NATURAL LANGUAGE MODE" 6109 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6110 modifier = f"{modifier} WITH QUERY EXPANSION" 6111 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6112 modifier = "IN BOOLEAN MODE" 6113 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6114 modifier = "WITH QUERY EXPANSION" 6115 else: 6116 modifier = None 6117 6118 return self.expression( 6119 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6120 ) 6121 6122 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6123 def _parse_open_json(self) -> exp.OpenJSON: 6124 this = self._parse_bitwise() 6125 path = self._match(TokenType.COMMA) and self._parse_string() 6126 6127 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6128 this = self._parse_field(any_token=True) 6129 kind = self._parse_types() 6130 path = self._parse_string() 6131 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6132 6133 return self.expression( 6134 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6135 ) 6136 6137 expressions = None 6138 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6139 self._match_l_paren() 6140 expressions = self._parse_csv(_parse_open_json_column_def) 6141 6142 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6143 6144 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6145 args = self._parse_csv(self._parse_bitwise) 6146 6147 if self._match(TokenType.IN): 6148 return self.expression( 6149 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6150 ) 6151 6152 if haystack_first: 6153 haystack = seq_get(args, 0) 6154 needle = seq_get(args, 1) 6155 else: 6156 needle = seq_get(args, 0) 6157 haystack = seq_get(args, 1) 6158 6159 return self.expression( 6160 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6161 ) 6162 6163 def _parse_predict(self) -> exp.Predict: 6164 self._match_text_seq("MODEL") 6165 this = self._parse_table() 6166 6167 self._match(TokenType.COMMA) 6168 self._match_text_seq("TABLE") 6169 6170 return self.expression( 6171 exp.Predict, 6172 this=this, 6173 expression=self._parse_table(), 6174 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6175 ) 6176 6177 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6178 args = self._parse_csv(self._parse_table) 6179 return exp.JoinHint(this=func_name.upper(), expressions=args) 6180 6181 def _parse_substring(self) -> exp.Substring: 6182 # Postgres supports the form: substring(string [from int] [for int]) 6183 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6184 6185 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6186 6187 if self._match(TokenType.FROM): 6188 args.append(self._parse_bitwise()) 6189 if self._match(TokenType.FOR): 6190 if len(args) == 1: 6191 args.append(exp.Literal.number(1)) 6192 args.append(self._parse_bitwise()) 6193 6194 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6195 6196 def _parse_trim(self) -> exp.Trim: 6197 # https://www.w3resource.com/sql/character-functions/trim.php 6198 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6199 6200 position = None 6201 collation = None 6202 expression = None 6203 6204 if self._match_texts(self.TRIM_TYPES): 6205 position = self._prev.text.upper() 6206 6207 this = self._parse_bitwise() 6208 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6209 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6210 expression = self._parse_bitwise() 6211 6212 if invert_order: 6213 this, expression = expression, this 6214 6215 if self._match(TokenType.COLLATE): 6216 collation = self._parse_bitwise() 6217 6218 return self.expression( 6219 exp.Trim, this=this, position=position, expression=expression, collation=collation 6220 ) 6221 6222 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6223 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6224 6225 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6226 return self._parse_window(self._parse_id_var(), alias=True) 6227 6228 def _parse_respect_or_ignore_nulls( 6229 self, this: t.Optional[exp.Expression] 6230 ) -> t.Optional[exp.Expression]: 6231 if self._match_text_seq("IGNORE", "NULLS"): 6232 return self.expression(exp.IgnoreNulls, this=this) 6233 if self._match_text_seq("RESPECT", "NULLS"): 6234 return self.expression(exp.RespectNulls, this=this) 6235 return this 6236 6237 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6238 if self._match(TokenType.HAVING): 6239 self._match_texts(("MAX", "MIN")) 6240 max = self._prev.text.upper() != "MIN" 6241 return self.expression( 6242 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6243 ) 6244 6245 return this 6246 6247 def _parse_window( 6248 self, this: t.Optional[exp.Expression], alias: bool = False 6249 ) -> t.Optional[exp.Expression]: 6250 func = this 6251 comments = func.comments if isinstance(func, exp.Expression) else None 6252 6253 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6254 self._match(TokenType.WHERE) 6255 this = self.expression( 6256 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6257 ) 6258 self._match_r_paren() 6259 6260 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6261 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6262 if self._match_text_seq("WITHIN", "GROUP"): 6263 order = self._parse_wrapped(self._parse_order) 6264 this = self.expression(exp.WithinGroup, this=this, expression=order) 6265 6266 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6267 # Some dialects choose to implement and some do not. 6268 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6269 6270 # There is some code above in _parse_lambda that handles 6271 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6272 6273 # The below changes handle 6274 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6275 6276 # Oracle allows both formats 6277 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6278 # and Snowflake chose to do the same for familiarity 6279 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6280 if isinstance(this, exp.AggFunc): 6281 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6282 6283 if ignore_respect and ignore_respect is not this: 6284 ignore_respect.replace(ignore_respect.this) 6285 this = self.expression(ignore_respect.__class__, this=this) 6286 6287 this = self._parse_respect_or_ignore_nulls(this) 6288 6289 # bigquery select from window x AS (partition by ...) 6290 if alias: 6291 over = None 6292 self._match(TokenType.ALIAS) 6293 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6294 return this 6295 else: 6296 over = self._prev.text.upper() 6297 6298 if comments and isinstance(func, exp.Expression): 6299 func.pop_comments() 6300 6301 if not self._match(TokenType.L_PAREN): 6302 return self.expression( 6303 exp.Window, 6304 comments=comments, 6305 this=this, 6306 alias=self._parse_id_var(False), 6307 over=over, 6308 ) 6309 6310 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6311 6312 first = self._match(TokenType.FIRST) 6313 if self._match_text_seq("LAST"): 6314 first = False 6315 6316 partition, order = self._parse_partition_and_order() 6317 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6318 6319 if kind: 6320 self._match(TokenType.BETWEEN) 6321 start = self._parse_window_spec() 6322 self._match(TokenType.AND) 6323 end = self._parse_window_spec() 6324 6325 spec = self.expression( 6326 exp.WindowSpec, 6327 kind=kind, 6328 start=start["value"], 6329 start_side=start["side"], 6330 end=end["value"], 6331 end_side=end["side"], 6332 ) 6333 else: 6334 spec = None 6335 6336 self._match_r_paren() 6337 6338 window = self.expression( 6339 exp.Window, 6340 comments=comments, 6341 this=this, 6342 partition_by=partition, 6343 order=order, 6344 spec=spec, 6345 alias=window_alias, 6346 over=over, 6347 first=first, 6348 ) 6349 6350 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6351 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6352 return self._parse_window(window, alias=alias) 6353 6354 return window 6355 6356 def _parse_partition_and_order( 6357 self, 6358 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6359 return self._parse_partition_by(), self._parse_order() 6360 6361 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6362 self._match(TokenType.BETWEEN) 6363 6364 return { 6365 "value": ( 6366 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6367 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6368 or self._parse_bitwise() 6369 ), 6370 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6371 } 6372 6373 def _parse_alias( 6374 self, this: t.Optional[exp.Expression], explicit: bool = False 6375 ) -> t.Optional[exp.Expression]: 6376 any_token = self._match(TokenType.ALIAS) 6377 comments = self._prev_comments or [] 6378 6379 if explicit and not any_token: 6380 return this 6381 6382 if self._match(TokenType.L_PAREN): 6383 aliases = self.expression( 6384 exp.Aliases, 6385 comments=comments, 6386 this=this, 6387 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6388 ) 6389 self._match_r_paren(aliases) 6390 return aliases 6391 6392 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6393 self.STRING_ALIASES and self._parse_string_as_identifier() 6394 ) 6395 6396 if alias: 6397 comments.extend(alias.pop_comments()) 6398 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6399 column = this.this 6400 6401 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6402 if not this.comments and column and column.comments: 6403 this.comments = column.pop_comments() 6404 6405 return this 6406 6407 def _parse_id_var( 6408 self, 6409 any_token: bool = True, 6410 tokens: t.Optional[t.Collection[TokenType]] = None, 6411 ) -> t.Optional[exp.Expression]: 6412 expression = self._parse_identifier() 6413 if not expression and ( 6414 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6415 ): 6416 quoted = self._prev.token_type == TokenType.STRING 6417 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6418 6419 return expression 6420 6421 def _parse_string(self) -> t.Optional[exp.Expression]: 6422 if self._match_set(self.STRING_PARSERS): 6423 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6424 return self._parse_placeholder() 6425 6426 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6427 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6428 6429 def _parse_number(self) -> t.Optional[exp.Expression]: 6430 if self._match_set(self.NUMERIC_PARSERS): 6431 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6432 return self._parse_placeholder() 6433 6434 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6435 if self._match(TokenType.IDENTIFIER): 6436 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6437 return self._parse_placeholder() 6438 6439 def _parse_var( 6440 self, 6441 any_token: bool = False, 6442 tokens: t.Optional[t.Collection[TokenType]] = None, 6443 upper: bool = False, 6444 ) -> t.Optional[exp.Expression]: 6445 if ( 6446 (any_token and self._advance_any()) 6447 or self._match(TokenType.VAR) 6448 or (self._match_set(tokens) if tokens else False) 6449 ): 6450 return self.expression( 6451 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6452 ) 6453 return self._parse_placeholder() 6454 6455 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6456 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6457 self._advance() 6458 return self._prev 6459 return None 6460 6461 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6462 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6463 6464 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6465 return self._parse_primary() or self._parse_var(any_token=True) 6466 6467 def _parse_null(self) -> t.Optional[exp.Expression]: 6468 if self._match_set(self.NULL_TOKENS): 6469 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6470 return self._parse_placeholder() 6471 6472 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6473 if self._match(TokenType.TRUE): 6474 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6475 if self._match(TokenType.FALSE): 6476 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6477 return self._parse_placeholder() 6478 6479 def _parse_star(self) -> t.Optional[exp.Expression]: 6480 if self._match(TokenType.STAR): 6481 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6482 return self._parse_placeholder() 6483 6484 def _parse_parameter(self) -> exp.Parameter: 6485 this = self._parse_identifier() or self._parse_primary_or_var() 6486 return self.expression(exp.Parameter, this=this) 6487 6488 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6489 if self._match_set(self.PLACEHOLDER_PARSERS): 6490 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6491 if placeholder: 6492 return placeholder 6493 self._advance(-1) 6494 return None 6495 6496 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6497 if not self._match_texts(keywords): 6498 return None 6499 if self._match(TokenType.L_PAREN, advance=False): 6500 return self._parse_wrapped_csv(self._parse_expression) 6501 6502 expression = self._parse_expression() 6503 return [expression] if expression else None 6504 6505 def _parse_csv( 6506 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6507 ) -> t.List[exp.Expression]: 6508 parse_result = parse_method() 6509 items = [parse_result] if parse_result is not None else [] 6510 6511 while self._match(sep): 6512 self._add_comments(parse_result) 6513 parse_result = parse_method() 6514 if parse_result is not None: 6515 items.append(parse_result) 6516 6517 return items 6518 6519 def _parse_tokens( 6520 self, parse_method: t.Callable, expressions: t.Dict 6521 ) -> t.Optional[exp.Expression]: 6522 this = parse_method() 6523 6524 while self._match_set(expressions): 6525 this = self.expression( 6526 expressions[self._prev.token_type], 6527 this=this, 6528 comments=self._prev_comments, 6529 expression=parse_method(), 6530 ) 6531 6532 return this 6533 6534 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6535 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6536 6537 def _parse_wrapped_csv( 6538 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6539 ) -> t.List[exp.Expression]: 6540 return self._parse_wrapped( 6541 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6542 ) 6543 6544 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6545 wrapped = self._match(TokenType.L_PAREN) 6546 if not wrapped and not optional: 6547 self.raise_error("Expecting (") 6548 parse_result = parse_method() 6549 if wrapped: 6550 self._match_r_paren() 6551 return parse_result 6552 6553 def _parse_expressions(self) -> t.List[exp.Expression]: 6554 return self._parse_csv(self._parse_expression) 6555 6556 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6557 return self._parse_select() or self._parse_set_operations( 6558 self._parse_expression() if alias else self._parse_assignment() 6559 ) 6560 6561 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6562 return self._parse_query_modifiers( 6563 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6564 ) 6565 6566 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6567 this = None 6568 if self._match_texts(self.TRANSACTION_KIND): 6569 this = self._prev.text 6570 6571 self._match_texts(("TRANSACTION", "WORK")) 6572 6573 modes = [] 6574 while True: 6575 mode = [] 6576 while self._match(TokenType.VAR): 6577 mode.append(self._prev.text) 6578 6579 if mode: 6580 modes.append(" ".join(mode)) 6581 if not self._match(TokenType.COMMA): 6582 break 6583 6584 return self.expression(exp.Transaction, this=this, modes=modes) 6585 6586 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6587 chain = None 6588 savepoint = None 6589 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6590 6591 self._match_texts(("TRANSACTION", "WORK")) 6592 6593 if self._match_text_seq("TO"): 6594 self._match_text_seq("SAVEPOINT") 6595 savepoint = self._parse_id_var() 6596 6597 if self._match(TokenType.AND): 6598 chain = not self._match_text_seq("NO") 6599 self._match_text_seq("CHAIN") 6600 6601 if is_rollback: 6602 return self.expression(exp.Rollback, savepoint=savepoint) 6603 6604 return self.expression(exp.Commit, chain=chain) 6605 6606 def _parse_refresh(self) -> exp.Refresh: 6607 self._match(TokenType.TABLE) 6608 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6609 6610 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6611 if not self._match_text_seq("ADD"): 6612 return None 6613 6614 self._match(TokenType.COLUMN) 6615 exists_column = self._parse_exists(not_=True) 6616 expression = self._parse_field_def() 6617 6618 if expression: 6619 expression.set("exists", exists_column) 6620 6621 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6622 if self._match_texts(("FIRST", "AFTER")): 6623 position = self._prev.text 6624 column_position = self.expression( 6625 exp.ColumnPosition, this=self._parse_column(), position=position 6626 ) 6627 expression.set("position", column_position) 6628 6629 return expression 6630 6631 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6632 drop = self._match(TokenType.DROP) and self._parse_drop() 6633 if drop and not isinstance(drop, exp.Command): 6634 drop.set("kind", drop.args.get("kind", "COLUMN")) 6635 return drop 6636 6637 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6638 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6639 return self.expression( 6640 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6641 ) 6642 6643 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6644 index = self._index - 1 6645 6646 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6647 return self._parse_csv( 6648 lambda: self.expression( 6649 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6650 ) 6651 ) 6652 6653 self._retreat(index) 6654 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6655 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6656 6657 if self._match_text_seq("ADD", "COLUMNS"): 6658 schema = self._parse_schema() 6659 if schema: 6660 return [schema] 6661 return [] 6662 6663 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6664 6665 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6666 if self._match_texts(self.ALTER_ALTER_PARSERS): 6667 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6668 6669 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6670 # keyword after ALTER we default to parsing this statement 6671 self._match(TokenType.COLUMN) 6672 column = self._parse_field(any_token=True) 6673 6674 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6675 return self.expression(exp.AlterColumn, this=column, drop=True) 6676 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6677 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6678 if self._match(TokenType.COMMENT): 6679 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6680 if self._match_text_seq("DROP", "NOT", "NULL"): 6681 return self.expression( 6682 exp.AlterColumn, 6683 this=column, 6684 drop=True, 6685 allow_null=True, 6686 ) 6687 if self._match_text_seq("SET", "NOT", "NULL"): 6688 return self.expression( 6689 exp.AlterColumn, 6690 this=column, 6691 allow_null=False, 6692 ) 6693 self._match_text_seq("SET", "DATA") 6694 self._match_text_seq("TYPE") 6695 return self.expression( 6696 exp.AlterColumn, 6697 this=column, 6698 dtype=self._parse_types(), 6699 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6700 using=self._match(TokenType.USING) and self._parse_assignment(), 6701 ) 6702 6703 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6704 if self._match_texts(("ALL", "EVEN", "AUTO")): 6705 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6706 6707 self._match_text_seq("KEY", "DISTKEY") 6708 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6709 6710 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6711 if compound: 6712 self._match_text_seq("SORTKEY") 6713 6714 if self._match(TokenType.L_PAREN, advance=False): 6715 return self.expression( 6716 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6717 ) 6718 6719 self._match_texts(("AUTO", "NONE")) 6720 return self.expression( 6721 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6722 ) 6723 6724 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6725 index = self._index - 1 6726 6727 partition_exists = self._parse_exists() 6728 if self._match(TokenType.PARTITION, advance=False): 6729 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6730 6731 self._retreat(index) 6732 return self._parse_csv(self._parse_drop_column) 6733 6734 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6735 if self._match(TokenType.COLUMN): 6736 exists = self._parse_exists() 6737 old_column = self._parse_column() 6738 to = self._match_text_seq("TO") 6739 new_column = self._parse_column() 6740 6741 if old_column is None or to is None or new_column is None: 6742 return None 6743 6744 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6745 6746 self._match_text_seq("TO") 6747 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6748 6749 def _parse_alter_table_set(self) -> exp.AlterSet: 6750 alter_set = self.expression(exp.AlterSet) 6751 6752 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6753 "TABLE", "PROPERTIES" 6754 ): 6755 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6756 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6757 alter_set.set("expressions", [self._parse_assignment()]) 6758 elif self._match_texts(("LOGGED", "UNLOGGED")): 6759 alter_set.set("option", exp.var(self._prev.text.upper())) 6760 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6761 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6762 elif self._match_text_seq("LOCATION"): 6763 alter_set.set("location", self._parse_field()) 6764 elif self._match_text_seq("ACCESS", "METHOD"): 6765 alter_set.set("access_method", self._parse_field()) 6766 elif self._match_text_seq("TABLESPACE"): 6767 alter_set.set("tablespace", self._parse_field()) 6768 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6769 alter_set.set("file_format", [self._parse_field()]) 6770 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6771 alter_set.set("file_format", self._parse_wrapped_options()) 6772 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6773 alter_set.set("copy_options", self._parse_wrapped_options()) 6774 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6775 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6776 else: 6777 if self._match_text_seq("SERDE"): 6778 alter_set.set("serde", self._parse_field()) 6779 6780 alter_set.set("expressions", [self._parse_properties()]) 6781 6782 return alter_set 6783 6784 def _parse_alter(self) -> exp.Alter | exp.Command: 6785 start = self._prev 6786 6787 alter_token = self._match_set(self.ALTERABLES) and self._prev 6788 if not alter_token: 6789 return self._parse_as_command(start) 6790 6791 exists = self._parse_exists() 6792 only = self._match_text_seq("ONLY") 6793 this = self._parse_table(schema=True) 6794 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6795 6796 if self._next: 6797 self._advance() 6798 6799 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6800 if parser: 6801 actions = ensure_list(parser(self)) 6802 not_valid = self._match_text_seq("NOT", "VALID") 6803 options = self._parse_csv(self._parse_property) 6804 6805 if not self._curr and actions: 6806 return self.expression( 6807 exp.Alter, 6808 this=this, 6809 kind=alter_token.text.upper(), 6810 exists=exists, 6811 actions=actions, 6812 only=only, 6813 options=options, 6814 cluster=cluster, 6815 not_valid=not_valid, 6816 ) 6817 6818 return self._parse_as_command(start) 6819 6820 def _parse_merge(self) -> exp.Merge: 6821 self._match(TokenType.INTO) 6822 target = self._parse_table() 6823 6824 if target and self._match(TokenType.ALIAS, advance=False): 6825 target.set("alias", self._parse_table_alias()) 6826 6827 self._match(TokenType.USING) 6828 using = self._parse_table() 6829 6830 self._match(TokenType.ON) 6831 on = self._parse_assignment() 6832 6833 return self.expression( 6834 exp.Merge, 6835 this=target, 6836 using=using, 6837 on=on, 6838 expressions=self._parse_when_matched(), 6839 returning=self._match(TokenType.RETURNING) and self._parse_csv(self._parse_bitwise), 6840 ) 6841 6842 def _parse_when_matched(self) -> t.List[exp.When]: 6843 whens = [] 6844 6845 while self._match(TokenType.WHEN): 6846 matched = not self._match(TokenType.NOT) 6847 self._match_text_seq("MATCHED") 6848 source = ( 6849 False 6850 if self._match_text_seq("BY", "TARGET") 6851 else self._match_text_seq("BY", "SOURCE") 6852 ) 6853 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6854 6855 self._match(TokenType.THEN) 6856 6857 if self._match(TokenType.INSERT): 6858 this = self._parse_star() 6859 if this: 6860 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6861 else: 6862 then = self.expression( 6863 exp.Insert, 6864 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6865 expression=self._match_text_seq("VALUES") and self._parse_value(), 6866 ) 6867 elif self._match(TokenType.UPDATE): 6868 expressions = self._parse_star() 6869 if expressions: 6870 then = self.expression(exp.Update, expressions=expressions) 6871 else: 6872 then = self.expression( 6873 exp.Update, 6874 expressions=self._match(TokenType.SET) 6875 and self._parse_csv(self._parse_equality), 6876 ) 6877 elif self._match(TokenType.DELETE): 6878 then = self.expression(exp.Var, this=self._prev.text) 6879 else: 6880 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6881 6882 whens.append( 6883 self.expression( 6884 exp.When, 6885 matched=matched, 6886 source=source, 6887 condition=condition, 6888 then=then, 6889 ) 6890 ) 6891 return whens 6892 6893 def _parse_show(self) -> t.Optional[exp.Expression]: 6894 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6895 if parser: 6896 return parser(self) 6897 return self._parse_as_command(self._prev) 6898 6899 def _parse_set_item_assignment( 6900 self, kind: t.Optional[str] = None 6901 ) -> t.Optional[exp.Expression]: 6902 index = self._index 6903 6904 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6905 return self._parse_set_transaction(global_=kind == "GLOBAL") 6906 6907 left = self._parse_primary() or self._parse_column() 6908 assignment_delimiter = self._match_texts(("=", "TO")) 6909 6910 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6911 self._retreat(index) 6912 return None 6913 6914 right = self._parse_statement() or self._parse_id_var() 6915 if isinstance(right, (exp.Column, exp.Identifier)): 6916 right = exp.var(right.name) 6917 6918 this = self.expression(exp.EQ, this=left, expression=right) 6919 return self.expression(exp.SetItem, this=this, kind=kind) 6920 6921 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6922 self._match_text_seq("TRANSACTION") 6923 characteristics = self._parse_csv( 6924 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6925 ) 6926 return self.expression( 6927 exp.SetItem, 6928 expressions=characteristics, 6929 kind="TRANSACTION", 6930 **{"global": global_}, # type: ignore 6931 ) 6932 6933 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6934 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6935 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6936 6937 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6938 index = self._index 6939 set_ = self.expression( 6940 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6941 ) 6942 6943 if self._curr: 6944 self._retreat(index) 6945 return self._parse_as_command(self._prev) 6946 6947 return set_ 6948 6949 def _parse_var_from_options( 6950 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6951 ) -> t.Optional[exp.Var]: 6952 start = self._curr 6953 if not start: 6954 return None 6955 6956 option = start.text.upper() 6957 continuations = options.get(option) 6958 6959 index = self._index 6960 self._advance() 6961 for keywords in continuations or []: 6962 if isinstance(keywords, str): 6963 keywords = (keywords,) 6964 6965 if self._match_text_seq(*keywords): 6966 option = f"{option} {' '.join(keywords)}" 6967 break 6968 else: 6969 if continuations or continuations is None: 6970 if raise_unmatched: 6971 self.raise_error(f"Unknown option {option}") 6972 6973 self._retreat(index) 6974 return None 6975 6976 return exp.var(option) 6977 6978 def _parse_as_command(self, start: Token) -> exp.Command: 6979 while self._curr: 6980 self._advance() 6981 text = self._find_sql(start, self._prev) 6982 size = len(start.text) 6983 self._warn_unsupported() 6984 return exp.Command(this=text[:size], expression=text[size:]) 6985 6986 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6987 settings = [] 6988 6989 self._match_l_paren() 6990 kind = self._parse_id_var() 6991 6992 if self._match(TokenType.L_PAREN): 6993 while True: 6994 key = self._parse_id_var() 6995 value = self._parse_primary() 6996 6997 if not key and value is None: 6998 break 6999 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7000 self._match(TokenType.R_PAREN) 7001 7002 self._match_r_paren() 7003 7004 return self.expression( 7005 exp.DictProperty, 7006 this=this, 7007 kind=kind.this if kind else None, 7008 settings=settings, 7009 ) 7010 7011 def _parse_dict_range(self, this: str) -> exp.DictRange: 7012 self._match_l_paren() 7013 has_min = self._match_text_seq("MIN") 7014 if has_min: 7015 min = self._parse_var() or self._parse_primary() 7016 self._match_text_seq("MAX") 7017 max = self._parse_var() or self._parse_primary() 7018 else: 7019 max = self._parse_var() or self._parse_primary() 7020 min = exp.Literal.number(0) 7021 self._match_r_paren() 7022 return self.expression(exp.DictRange, this=this, min=min, max=max) 7023 7024 def _parse_comprehension( 7025 self, this: t.Optional[exp.Expression] 7026 ) -> t.Optional[exp.Comprehension]: 7027 index = self._index 7028 expression = self._parse_column() 7029 if not self._match(TokenType.IN): 7030 self._retreat(index - 1) 7031 return None 7032 iterator = self._parse_column() 7033 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7034 return self.expression( 7035 exp.Comprehension, 7036 this=this, 7037 expression=expression, 7038 iterator=iterator, 7039 condition=condition, 7040 ) 7041 7042 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7043 if self._match(TokenType.HEREDOC_STRING): 7044 return self.expression(exp.Heredoc, this=self._prev.text) 7045 7046 if not self._match_text_seq("$"): 7047 return None 7048 7049 tags = ["$"] 7050 tag_text = None 7051 7052 if self._is_connected(): 7053 self._advance() 7054 tags.append(self._prev.text.upper()) 7055 else: 7056 self.raise_error("No closing $ found") 7057 7058 if tags[-1] != "$": 7059 if self._is_connected() and self._match_text_seq("$"): 7060 tag_text = tags[-1] 7061 tags.append("$") 7062 else: 7063 self.raise_error("No closing $ found") 7064 7065 heredoc_start = self._curr 7066 7067 while self._curr: 7068 if self._match_text_seq(*tags, advance=False): 7069 this = self._find_sql(heredoc_start, self._prev) 7070 self._advance(len(tags)) 7071 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7072 7073 self._advance() 7074 7075 self.raise_error(f"No closing {''.join(tags)} found") 7076 return None 7077 7078 def _find_parser( 7079 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7080 ) -> t.Optional[t.Callable]: 7081 if not self._curr: 7082 return None 7083 7084 index = self._index 7085 this = [] 7086 while True: 7087 # The current token might be multiple words 7088 curr = self._curr.text.upper() 7089 key = curr.split(" ") 7090 this.append(curr) 7091 7092 self._advance() 7093 result, trie = in_trie(trie, key) 7094 if result == TrieResult.FAILED: 7095 break 7096 7097 if result == TrieResult.EXISTS: 7098 subparser = parsers[" ".join(this)] 7099 return subparser 7100 7101 self._retreat(index) 7102 return None 7103 7104 def _match(self, token_type, advance=True, expression=None): 7105 if not self._curr: 7106 return None 7107 7108 if self._curr.token_type == token_type: 7109 if advance: 7110 self._advance() 7111 self._add_comments(expression) 7112 return True 7113 7114 return None 7115 7116 def _match_set(self, types, advance=True): 7117 if not self._curr: 7118 return None 7119 7120 if self._curr.token_type in types: 7121 if advance: 7122 self._advance() 7123 return True 7124 7125 return None 7126 7127 def _match_pair(self, token_type_a, token_type_b, advance=True): 7128 if not self._curr or not self._next: 7129 return None 7130 7131 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7132 if advance: 7133 self._advance(2) 7134 return True 7135 7136 return None 7137 7138 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7139 if not self._match(TokenType.L_PAREN, expression=expression): 7140 self.raise_error("Expecting (") 7141 7142 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7143 if not self._match(TokenType.R_PAREN, expression=expression): 7144 self.raise_error("Expecting )") 7145 7146 def _match_texts(self, texts, advance=True): 7147 if ( 7148 self._curr 7149 and self._curr.token_type != TokenType.STRING 7150 and self._curr.text.upper() in texts 7151 ): 7152 if advance: 7153 self._advance() 7154 return True 7155 return None 7156 7157 def _match_text_seq(self, *texts, advance=True): 7158 index = self._index 7159 for text in texts: 7160 if ( 7161 self._curr 7162 and self._curr.token_type != TokenType.STRING 7163 and self._curr.text.upper() == text 7164 ): 7165 self._advance() 7166 else: 7167 self._retreat(index) 7168 return None 7169 7170 if not advance: 7171 self._retreat(index) 7172 7173 return True 7174 7175 def _replace_lambda( 7176 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7177 ) -> t.Optional[exp.Expression]: 7178 if not node: 7179 return node 7180 7181 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7182 7183 for column in node.find_all(exp.Column): 7184 typ = lambda_types.get(column.parts[0].name) 7185 if typ is not None: 7186 dot_or_id = column.to_dot() if column.table else column.this 7187 7188 if typ: 7189 dot_or_id = self.expression( 7190 exp.Cast, 7191 this=dot_or_id, 7192 to=typ, 7193 ) 7194 7195 parent = column.parent 7196 7197 while isinstance(parent, exp.Dot): 7198 if not isinstance(parent.parent, exp.Dot): 7199 parent.replace(dot_or_id) 7200 break 7201 parent = parent.parent 7202 else: 7203 if column is node: 7204 node = dot_or_id 7205 else: 7206 column.replace(dot_or_id) 7207 return node 7208 7209 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7210 start = self._prev 7211 7212 # Not to be confused with TRUNCATE(number, decimals) function call 7213 if self._match(TokenType.L_PAREN): 7214 self._retreat(self._index - 2) 7215 return self._parse_function() 7216 7217 # Clickhouse supports TRUNCATE DATABASE as well 7218 is_database = self._match(TokenType.DATABASE) 7219 7220 self._match(TokenType.TABLE) 7221 7222 exists = self._parse_exists(not_=False) 7223 7224 expressions = self._parse_csv( 7225 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7226 ) 7227 7228 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7229 7230 if self._match_text_seq("RESTART", "IDENTITY"): 7231 identity = "RESTART" 7232 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7233 identity = "CONTINUE" 7234 else: 7235 identity = None 7236 7237 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7238 option = self._prev.text 7239 else: 7240 option = None 7241 7242 partition = self._parse_partition() 7243 7244 # Fallback case 7245 if self._curr: 7246 return self._parse_as_command(start) 7247 7248 return self.expression( 7249 exp.TruncateTable, 7250 expressions=expressions, 7251 is_database=is_database, 7252 exists=exists, 7253 cluster=cluster, 7254 identity=identity, 7255 option=option, 7256 partition=partition, 7257 ) 7258 7259 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7260 this = self._parse_ordered(self._parse_opclass) 7261 7262 if not self._match(TokenType.WITH): 7263 return this 7264 7265 op = self._parse_var(any_token=True) 7266 7267 return self.expression(exp.WithOperator, this=this, op=op) 7268 7269 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7270 self._match(TokenType.EQ) 7271 self._match(TokenType.L_PAREN) 7272 7273 opts: t.List[t.Optional[exp.Expression]] = [] 7274 while self._curr and not self._match(TokenType.R_PAREN): 7275 if self._match_text_seq("FORMAT_NAME", "="): 7276 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7277 # so we parse it separately to use _parse_field() 7278 prop = self.expression( 7279 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7280 ) 7281 opts.append(prop) 7282 else: 7283 opts.append(self._parse_property()) 7284 7285 self._match(TokenType.COMMA) 7286 7287 return opts 7288 7289 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7290 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7291 7292 options = [] 7293 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7294 option = self._parse_var(any_token=True) 7295 prev = self._prev.text.upper() 7296 7297 # Different dialects might separate options and values by white space, "=" and "AS" 7298 self._match(TokenType.EQ) 7299 self._match(TokenType.ALIAS) 7300 7301 param = self.expression(exp.CopyParameter, this=option) 7302 7303 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7304 TokenType.L_PAREN, advance=False 7305 ): 7306 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7307 param.set("expressions", self._parse_wrapped_options()) 7308 elif prev == "FILE_FORMAT": 7309 # T-SQL's external file format case 7310 param.set("expression", self._parse_field()) 7311 else: 7312 param.set("expression", self._parse_unquoted_field()) 7313 7314 options.append(param) 7315 self._match(sep) 7316 7317 return options 7318 7319 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7320 expr = self.expression(exp.Credentials) 7321 7322 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7323 expr.set("storage", self._parse_field()) 7324 if self._match_text_seq("CREDENTIALS"): 7325 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7326 creds = ( 7327 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7328 ) 7329 expr.set("credentials", creds) 7330 if self._match_text_seq("ENCRYPTION"): 7331 expr.set("encryption", self._parse_wrapped_options()) 7332 if self._match_text_seq("IAM_ROLE"): 7333 expr.set("iam_role", self._parse_field()) 7334 if self._match_text_seq("REGION"): 7335 expr.set("region", self._parse_field()) 7336 7337 return expr 7338 7339 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7340 return self._parse_field() 7341 7342 def _parse_copy(self) -> exp.Copy | exp.Command: 7343 start = self._prev 7344 7345 self._match(TokenType.INTO) 7346 7347 this = ( 7348 self._parse_select(nested=True, parse_subquery_alias=False) 7349 if self._match(TokenType.L_PAREN, advance=False) 7350 else self._parse_table(schema=True) 7351 ) 7352 7353 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7354 7355 files = self._parse_csv(self._parse_file_location) 7356 credentials = self._parse_credentials() 7357 7358 self._match_text_seq("WITH") 7359 7360 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7361 7362 # Fallback case 7363 if self._curr: 7364 return self._parse_as_command(start) 7365 7366 return self.expression( 7367 exp.Copy, 7368 this=this, 7369 kind=kind, 7370 credentials=credentials, 7371 files=files, 7372 params=params, 7373 ) 7374 7375 def _parse_normalize(self) -> exp.Normalize: 7376 return self.expression( 7377 exp.Normalize, 7378 this=self._parse_bitwise(), 7379 form=self._match(TokenType.COMMA) and self._parse_var(), 7380 ) 7381 7382 def _parse_star_ops(self) -> exp.Star | exp.UnpackColumns: 7383 if self._match_text_seq("COLUMNS", "(", advance=False): 7384 return exp.UnpackColumns(this=self._parse_function()) 7385 7386 return self.expression( 7387 exp.Star, 7388 **{ # type: ignore 7389 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7390 "replace": self._parse_star_op("REPLACE"), 7391 "rename": self._parse_star_op("RENAME"), 7392 }, 7393 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 } 265 266 NESTED_TYPE_TOKENS = { 267 TokenType.ARRAY, 268 TokenType.LIST, 269 TokenType.LOWCARDINALITY, 270 TokenType.MAP, 271 TokenType.NULLABLE, 272 *STRUCT_TYPE_TOKENS, 273 } 274 275 ENUM_TYPE_TOKENS = { 276 TokenType.ENUM, 277 TokenType.ENUM8, 278 TokenType.ENUM16, 279 } 280 281 AGGREGATE_TYPE_TOKENS = { 282 TokenType.AGGREGATEFUNCTION, 283 TokenType.SIMPLEAGGREGATEFUNCTION, 284 } 285 286 TYPE_TOKENS = { 287 TokenType.BIT, 288 TokenType.BOOLEAN, 289 TokenType.TINYINT, 290 TokenType.UTINYINT, 291 TokenType.SMALLINT, 292 TokenType.USMALLINT, 293 TokenType.INT, 294 TokenType.UINT, 295 TokenType.BIGINT, 296 TokenType.UBIGINT, 297 TokenType.INT128, 298 TokenType.UINT128, 299 TokenType.INT256, 300 TokenType.UINT256, 301 TokenType.MEDIUMINT, 302 TokenType.UMEDIUMINT, 303 TokenType.FIXEDSTRING, 304 TokenType.FLOAT, 305 TokenType.DOUBLE, 306 TokenType.CHAR, 307 TokenType.NCHAR, 308 TokenType.VARCHAR, 309 TokenType.NVARCHAR, 310 TokenType.BPCHAR, 311 TokenType.TEXT, 312 TokenType.MEDIUMTEXT, 313 TokenType.LONGTEXT, 314 TokenType.MEDIUMBLOB, 315 TokenType.LONGBLOB, 316 TokenType.BINARY, 317 TokenType.VARBINARY, 318 TokenType.JSON, 319 TokenType.JSONB, 320 TokenType.INTERVAL, 321 TokenType.TINYBLOB, 322 TokenType.TINYTEXT, 323 TokenType.TIME, 324 TokenType.TIMETZ, 325 TokenType.TIMESTAMP, 326 TokenType.TIMESTAMP_S, 327 TokenType.TIMESTAMP_MS, 328 TokenType.TIMESTAMP_NS, 329 TokenType.TIMESTAMPTZ, 330 TokenType.TIMESTAMPLTZ, 331 TokenType.TIMESTAMPNTZ, 332 TokenType.DATETIME, 333 TokenType.DATETIME64, 334 TokenType.DATE, 335 TokenType.DATE32, 336 TokenType.INT4RANGE, 337 TokenType.INT4MULTIRANGE, 338 TokenType.INT8RANGE, 339 TokenType.INT8MULTIRANGE, 340 TokenType.NUMRANGE, 341 TokenType.NUMMULTIRANGE, 342 TokenType.TSRANGE, 343 TokenType.TSMULTIRANGE, 344 TokenType.TSTZRANGE, 345 TokenType.TSTZMULTIRANGE, 346 TokenType.DATERANGE, 347 TokenType.DATEMULTIRANGE, 348 TokenType.DECIMAL, 349 TokenType.DECIMAL32, 350 TokenType.DECIMAL64, 351 TokenType.DECIMAL128, 352 TokenType.UDECIMAL, 353 TokenType.BIGDECIMAL, 354 TokenType.UUID, 355 TokenType.GEOGRAPHY, 356 TokenType.GEOMETRY, 357 TokenType.HLLSKETCH, 358 TokenType.HSTORE, 359 TokenType.PSEUDO_TYPE, 360 TokenType.SUPER, 361 TokenType.SERIAL, 362 TokenType.SMALLSERIAL, 363 TokenType.BIGSERIAL, 364 TokenType.XML, 365 TokenType.YEAR, 366 TokenType.UNIQUEIDENTIFIER, 367 TokenType.USERDEFINED, 368 TokenType.MONEY, 369 TokenType.SMALLMONEY, 370 TokenType.ROWVERSION, 371 TokenType.IMAGE, 372 TokenType.VARIANT, 373 TokenType.VECTOR, 374 TokenType.OBJECT, 375 TokenType.OBJECT_IDENTIFIER, 376 TokenType.INET, 377 TokenType.IPADDRESS, 378 TokenType.IPPREFIX, 379 TokenType.IPV4, 380 TokenType.IPV6, 381 TokenType.UNKNOWN, 382 TokenType.NULL, 383 TokenType.NAME, 384 TokenType.TDIGEST, 385 *ENUM_TYPE_TOKENS, 386 *NESTED_TYPE_TOKENS, 387 *AGGREGATE_TYPE_TOKENS, 388 } 389 390 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 391 TokenType.BIGINT: TokenType.UBIGINT, 392 TokenType.INT: TokenType.UINT, 393 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 394 TokenType.SMALLINT: TokenType.USMALLINT, 395 TokenType.TINYINT: TokenType.UTINYINT, 396 TokenType.DECIMAL: TokenType.UDECIMAL, 397 } 398 399 SUBQUERY_PREDICATES = { 400 TokenType.ANY: exp.Any, 401 TokenType.ALL: exp.All, 402 TokenType.EXISTS: exp.Exists, 403 TokenType.SOME: exp.Any, 404 } 405 406 RESERVED_TOKENS = { 407 *Tokenizer.SINGLE_TOKENS.values(), 408 TokenType.SELECT, 409 } - {TokenType.IDENTIFIER} 410 411 DB_CREATABLES = { 412 TokenType.DATABASE, 413 TokenType.DICTIONARY, 414 TokenType.MODEL, 415 TokenType.SCHEMA, 416 TokenType.SEQUENCE, 417 TokenType.STORAGE_INTEGRATION, 418 TokenType.TABLE, 419 TokenType.TAG, 420 TokenType.VIEW, 421 TokenType.WAREHOUSE, 422 TokenType.STREAMLIT, 423 } 424 425 CREATABLES = { 426 TokenType.COLUMN, 427 TokenType.CONSTRAINT, 428 TokenType.FOREIGN_KEY, 429 TokenType.FUNCTION, 430 TokenType.INDEX, 431 TokenType.PROCEDURE, 432 *DB_CREATABLES, 433 } 434 435 ALTERABLES = { 436 TokenType.INDEX, 437 TokenType.TABLE, 438 TokenType.VIEW, 439 } 440 441 # Tokens that can represent identifiers 442 ID_VAR_TOKENS = { 443 TokenType.ALL, 444 TokenType.VAR, 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASC, 448 TokenType.ASOF, 449 TokenType.AUTO_INCREMENT, 450 TokenType.BEGIN, 451 TokenType.BPCHAR, 452 TokenType.CACHE, 453 TokenType.CASE, 454 TokenType.COLLATE, 455 TokenType.COMMAND, 456 TokenType.COMMENT, 457 TokenType.COMMIT, 458 TokenType.CONSTRAINT, 459 TokenType.COPY, 460 TokenType.CUBE, 461 TokenType.DEFAULT, 462 TokenType.DELETE, 463 TokenType.DESC, 464 TokenType.DESCRIBE, 465 TokenType.DICTIONARY, 466 TokenType.DIV, 467 TokenType.END, 468 TokenType.EXECUTE, 469 TokenType.ESCAPE, 470 TokenType.FALSE, 471 TokenType.FIRST, 472 TokenType.FILTER, 473 TokenType.FINAL, 474 TokenType.FORMAT, 475 TokenType.FULL, 476 TokenType.IDENTIFIER, 477 TokenType.IS, 478 TokenType.ISNULL, 479 TokenType.INTERVAL, 480 TokenType.KEEP, 481 TokenType.KILL, 482 TokenType.LEFT, 483 TokenType.LOAD, 484 TokenType.MERGE, 485 TokenType.NATURAL, 486 TokenType.NEXT, 487 TokenType.OFFSET, 488 TokenType.OPERATOR, 489 TokenType.ORDINALITY, 490 TokenType.OVERLAPS, 491 TokenType.OVERWRITE, 492 TokenType.PARTITION, 493 TokenType.PERCENT, 494 TokenType.PIVOT, 495 TokenType.PRAGMA, 496 TokenType.RANGE, 497 TokenType.RECURSIVE, 498 TokenType.REFERENCES, 499 TokenType.REFRESH, 500 TokenType.RENAME, 501 TokenType.REPLACE, 502 TokenType.RIGHT, 503 TokenType.ROLLUP, 504 TokenType.ROW, 505 TokenType.ROWS, 506 TokenType.SEMI, 507 TokenType.SET, 508 TokenType.SETTINGS, 509 TokenType.SHOW, 510 TokenType.TEMPORARY, 511 TokenType.TOP, 512 TokenType.TRUE, 513 TokenType.TRUNCATE, 514 TokenType.UNIQUE, 515 TokenType.UNNEST, 516 TokenType.UNPIVOT, 517 TokenType.UPDATE, 518 TokenType.USE, 519 TokenType.VOLATILE, 520 TokenType.WINDOW, 521 *CREATABLES, 522 *SUBQUERY_PREDICATES, 523 *TYPE_TOKENS, 524 *NO_PAREN_FUNCTIONS, 525 } 526 527 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 528 529 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 530 TokenType.ANTI, 531 TokenType.APPLY, 532 TokenType.ASOF, 533 TokenType.FULL, 534 TokenType.LEFT, 535 TokenType.LOCK, 536 TokenType.NATURAL, 537 TokenType.OFFSET, 538 TokenType.RIGHT, 539 TokenType.SEMI, 540 TokenType.WINDOW, 541 } 542 543 ALIAS_TOKENS = ID_VAR_TOKENS 544 545 ARRAY_CONSTRUCTORS = { 546 "ARRAY": exp.Array, 547 "LIST": exp.List, 548 } 549 550 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 551 552 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 553 554 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 555 556 FUNC_TOKENS = { 557 TokenType.COLLATE, 558 TokenType.COMMAND, 559 TokenType.CURRENT_DATE, 560 TokenType.CURRENT_DATETIME, 561 TokenType.CURRENT_TIMESTAMP, 562 TokenType.CURRENT_TIME, 563 TokenType.CURRENT_USER, 564 TokenType.FILTER, 565 TokenType.FIRST, 566 TokenType.FORMAT, 567 TokenType.GLOB, 568 TokenType.IDENTIFIER, 569 TokenType.INDEX, 570 TokenType.ISNULL, 571 TokenType.ILIKE, 572 TokenType.INSERT, 573 TokenType.LIKE, 574 TokenType.MERGE, 575 TokenType.OFFSET, 576 TokenType.PRIMARY_KEY, 577 TokenType.RANGE, 578 TokenType.REPLACE, 579 TokenType.RLIKE, 580 TokenType.ROW, 581 TokenType.UNNEST, 582 TokenType.VAR, 583 TokenType.LEFT, 584 TokenType.RIGHT, 585 TokenType.SEQUENCE, 586 TokenType.DATE, 587 TokenType.DATETIME, 588 TokenType.TABLE, 589 TokenType.TIMESTAMP, 590 TokenType.TIMESTAMPTZ, 591 TokenType.TRUNCATE, 592 TokenType.WINDOW, 593 TokenType.XOR, 594 *TYPE_TOKENS, 595 *SUBQUERY_PREDICATES, 596 } 597 598 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 599 TokenType.AND: exp.And, 600 } 601 602 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 603 TokenType.COLON_EQ: exp.PropertyEQ, 604 } 605 606 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 607 TokenType.OR: exp.Or, 608 } 609 610 EQUALITY = { 611 TokenType.EQ: exp.EQ, 612 TokenType.NEQ: exp.NEQ, 613 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 614 } 615 616 COMPARISON = { 617 TokenType.GT: exp.GT, 618 TokenType.GTE: exp.GTE, 619 TokenType.LT: exp.LT, 620 TokenType.LTE: exp.LTE, 621 } 622 623 BITWISE = { 624 TokenType.AMP: exp.BitwiseAnd, 625 TokenType.CARET: exp.BitwiseXor, 626 TokenType.PIPE: exp.BitwiseOr, 627 } 628 629 TERM = { 630 TokenType.DASH: exp.Sub, 631 TokenType.PLUS: exp.Add, 632 TokenType.MOD: exp.Mod, 633 TokenType.COLLATE: exp.Collate, 634 } 635 636 FACTOR = { 637 TokenType.DIV: exp.IntDiv, 638 TokenType.LR_ARROW: exp.Distance, 639 TokenType.SLASH: exp.Div, 640 TokenType.STAR: exp.Mul, 641 } 642 643 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 644 645 TIMES = { 646 TokenType.TIME, 647 TokenType.TIMETZ, 648 } 649 650 TIMESTAMPS = { 651 TokenType.TIMESTAMP, 652 TokenType.TIMESTAMPTZ, 653 TokenType.TIMESTAMPLTZ, 654 *TIMES, 655 } 656 657 SET_OPERATIONS = { 658 TokenType.UNION, 659 TokenType.INTERSECT, 660 TokenType.EXCEPT, 661 } 662 663 JOIN_METHODS = { 664 TokenType.ASOF, 665 TokenType.NATURAL, 666 TokenType.POSITIONAL, 667 } 668 669 JOIN_SIDES = { 670 TokenType.LEFT, 671 TokenType.RIGHT, 672 TokenType.FULL, 673 } 674 675 JOIN_KINDS = { 676 TokenType.ANTI, 677 TokenType.CROSS, 678 TokenType.INNER, 679 TokenType.OUTER, 680 TokenType.SEMI, 681 TokenType.STRAIGHT_JOIN, 682 } 683 684 JOIN_HINTS: t.Set[str] = set() 685 686 LAMBDAS = { 687 TokenType.ARROW: lambda self, expressions: self.expression( 688 exp.Lambda, 689 this=self._replace_lambda( 690 self._parse_assignment(), 691 expressions, 692 ), 693 expressions=expressions, 694 ), 695 TokenType.FARROW: lambda self, expressions: self.expression( 696 exp.Kwarg, 697 this=exp.var(expressions[0].name), 698 expression=self._parse_assignment(), 699 ), 700 } 701 702 COLUMN_OPERATORS = { 703 TokenType.DOT: None, 704 TokenType.DCOLON: lambda self, this, to: self.expression( 705 exp.Cast if self.STRICT_CAST else exp.TryCast, 706 this=this, 707 to=to, 708 ), 709 TokenType.ARROW: lambda self, this, path: self.expression( 710 exp.JSONExtract, 711 this=this, 712 expression=self.dialect.to_json_path(path), 713 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 714 ), 715 TokenType.DARROW: lambda self, this, path: self.expression( 716 exp.JSONExtractScalar, 717 this=this, 718 expression=self.dialect.to_json_path(path), 719 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 720 ), 721 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 722 exp.JSONBExtract, 723 this=this, 724 expression=path, 725 ), 726 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 727 exp.JSONBExtractScalar, 728 this=this, 729 expression=path, 730 ), 731 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 732 exp.JSONBContains, 733 this=this, 734 expression=key, 735 ), 736 } 737 738 EXPRESSION_PARSERS = { 739 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 740 exp.Column: lambda self: self._parse_column(), 741 exp.Condition: lambda self: self._parse_assignment(), 742 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 743 exp.Expression: lambda self: self._parse_expression(), 744 exp.From: lambda self: self._parse_from(joins=True), 745 exp.Group: lambda self: self._parse_group(), 746 exp.Having: lambda self: self._parse_having(), 747 exp.Identifier: lambda self: self._parse_id_var(), 748 exp.Join: lambda self: self._parse_join(), 749 exp.Lambda: lambda self: self._parse_lambda(), 750 exp.Lateral: lambda self: self._parse_lateral(), 751 exp.Limit: lambda self: self._parse_limit(), 752 exp.Offset: lambda self: self._parse_offset(), 753 exp.Order: lambda self: self._parse_order(), 754 exp.Ordered: lambda self: self._parse_ordered(), 755 exp.Properties: lambda self: self._parse_properties(), 756 exp.Qualify: lambda self: self._parse_qualify(), 757 exp.Returning: lambda self: self._parse_returning(), 758 exp.Select: lambda self: self._parse_select(), 759 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 760 exp.Table: lambda self: self._parse_table_parts(), 761 exp.TableAlias: lambda self: self._parse_table_alias(), 762 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 763 exp.Where: lambda self: self._parse_where(), 764 exp.Window: lambda self: self._parse_named_window(), 765 exp.With: lambda self: self._parse_with(), 766 "JOIN_TYPE": lambda self: self._parse_join_parts(), 767 } 768 769 STATEMENT_PARSERS = { 770 TokenType.ALTER: lambda self: self._parse_alter(), 771 TokenType.BEGIN: lambda self: self._parse_transaction(), 772 TokenType.CACHE: lambda self: self._parse_cache(), 773 TokenType.COMMENT: lambda self: self._parse_comment(), 774 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 775 TokenType.COPY: lambda self: self._parse_copy(), 776 TokenType.CREATE: lambda self: self._parse_create(), 777 TokenType.DELETE: lambda self: self._parse_delete(), 778 TokenType.DESC: lambda self: self._parse_describe(), 779 TokenType.DESCRIBE: lambda self: self._parse_describe(), 780 TokenType.DROP: lambda self: self._parse_drop(), 781 TokenType.INSERT: lambda self: self._parse_insert(), 782 TokenType.KILL: lambda self: self._parse_kill(), 783 TokenType.LOAD: lambda self: self._parse_load(), 784 TokenType.MERGE: lambda self: self._parse_merge(), 785 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 786 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 787 TokenType.REFRESH: lambda self: self._parse_refresh(), 788 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 789 TokenType.SET: lambda self: self._parse_set(), 790 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 791 TokenType.UNCACHE: lambda self: self._parse_uncache(), 792 TokenType.UPDATE: lambda self: self._parse_update(), 793 TokenType.USE: lambda self: self.expression( 794 exp.Use, 795 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 796 this=self._parse_table(schema=False), 797 ), 798 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 799 } 800 801 UNARY_PARSERS = { 802 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 803 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 804 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 805 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 806 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 807 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 808 } 809 810 STRING_PARSERS = { 811 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 812 exp.RawString, this=token.text 813 ), 814 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 815 exp.National, this=token.text 816 ), 817 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 818 TokenType.STRING: lambda self, token: self.expression( 819 exp.Literal, this=token.text, is_string=True 820 ), 821 TokenType.UNICODE_STRING: lambda self, token: self.expression( 822 exp.UnicodeString, 823 this=token.text, 824 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 825 ), 826 } 827 828 NUMERIC_PARSERS = { 829 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 830 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 831 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 832 TokenType.NUMBER: lambda self, token: self.expression( 833 exp.Literal, this=token.text, is_string=False 834 ), 835 } 836 837 PRIMARY_PARSERS = { 838 **STRING_PARSERS, 839 **NUMERIC_PARSERS, 840 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 841 TokenType.NULL: lambda self, _: self.expression(exp.Null), 842 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 843 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 844 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 845 TokenType.STAR: lambda self, _: self._parse_star_ops(), 846 } 847 848 PLACEHOLDER_PARSERS = { 849 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 850 TokenType.PARAMETER: lambda self: self._parse_parameter(), 851 TokenType.COLON: lambda self: ( 852 self.expression(exp.Placeholder, this=self._prev.text) 853 if self._match_set(self.ID_VAR_TOKENS) 854 else None 855 ), 856 } 857 858 RANGE_PARSERS = { 859 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 860 TokenType.GLOB: binary_range_parser(exp.Glob), 861 TokenType.ILIKE: binary_range_parser(exp.ILike), 862 TokenType.IN: lambda self, this: self._parse_in(this), 863 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 864 TokenType.IS: lambda self, this: self._parse_is(this), 865 TokenType.LIKE: binary_range_parser(exp.Like), 866 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 867 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 868 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 869 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 870 } 871 872 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 873 "ALLOWED_VALUES": lambda self: self.expression( 874 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 875 ), 876 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 877 "AUTO": lambda self: self._parse_auto_property(), 878 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 879 "BACKUP": lambda self: self.expression( 880 exp.BackupProperty, this=self._parse_var(any_token=True) 881 ), 882 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 883 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 884 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 885 "CHECKSUM": lambda self: self._parse_checksum(), 886 "CLUSTER BY": lambda self: self._parse_cluster(), 887 "CLUSTERED": lambda self: self._parse_clustered_by(), 888 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 889 exp.CollateProperty, **kwargs 890 ), 891 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 892 "CONTAINS": lambda self: self._parse_contains_property(), 893 "COPY": lambda self: self._parse_copy_property(), 894 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 895 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 896 "DEFINER": lambda self: self._parse_definer(), 897 "DETERMINISTIC": lambda self: self.expression( 898 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 899 ), 900 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 901 "DUPLICATE": lambda self: self._parse_duplicate(), 902 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 903 "DISTKEY": lambda self: self._parse_distkey(), 904 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 905 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 906 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 907 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 908 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 909 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 910 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 911 "FREESPACE": lambda self: self._parse_freespace(), 912 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 913 "HEAP": lambda self: self.expression(exp.HeapProperty), 914 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 915 "IMMUTABLE": lambda self: self.expression( 916 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 917 ), 918 "INHERITS": lambda self: self.expression( 919 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 920 ), 921 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 922 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 923 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 924 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 925 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 926 "LIKE": lambda self: self._parse_create_like(), 927 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 928 "LOCK": lambda self: self._parse_locking(), 929 "LOCKING": lambda self: self._parse_locking(), 930 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 931 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 932 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 933 "MODIFIES": lambda self: self._parse_modifies_property(), 934 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 935 "NO": lambda self: self._parse_no_property(), 936 "ON": lambda self: self._parse_on_property(), 937 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 938 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 939 "PARTITION": lambda self: self._parse_partitioned_of(), 940 "PARTITION BY": lambda self: self._parse_partitioned_by(), 941 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 942 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 943 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 944 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 945 "READS": lambda self: self._parse_reads_property(), 946 "REMOTE": lambda self: self._parse_remote_with_connection(), 947 "RETURNS": lambda self: self._parse_returns(), 948 "STRICT": lambda self: self.expression(exp.StrictProperty), 949 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 950 "ROW": lambda self: self._parse_row(), 951 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 952 "SAMPLE": lambda self: self.expression( 953 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 954 ), 955 "SECURE": lambda self: self.expression(exp.SecureProperty), 956 "SECURITY": lambda self: self._parse_security(), 957 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 958 "SETTINGS": lambda self: self._parse_settings_property(), 959 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 960 "SORTKEY": lambda self: self._parse_sortkey(), 961 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 962 "STABLE": lambda self: self.expression( 963 exp.StabilityProperty, this=exp.Literal.string("STABLE") 964 ), 965 "STORED": lambda self: self._parse_stored(), 966 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 967 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 968 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 969 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 970 "TO": lambda self: self._parse_to_table(), 971 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 972 "TRANSFORM": lambda self: self.expression( 973 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 974 ), 975 "TTL": lambda self: self._parse_ttl(), 976 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 977 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 978 "VOLATILE": lambda self: self._parse_volatile_property(), 979 "WITH": lambda self: self._parse_with_property(), 980 } 981 982 CONSTRAINT_PARSERS = { 983 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 984 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 985 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 986 "CHARACTER SET": lambda self: self.expression( 987 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 988 ), 989 "CHECK": lambda self: self.expression( 990 exp.CheckColumnConstraint, 991 this=self._parse_wrapped(self._parse_assignment), 992 enforced=self._match_text_seq("ENFORCED"), 993 ), 994 "COLLATE": lambda self: self.expression( 995 exp.CollateColumnConstraint, 996 this=self._parse_identifier() or self._parse_column(), 997 ), 998 "COMMENT": lambda self: self.expression( 999 exp.CommentColumnConstraint, this=self._parse_string() 1000 ), 1001 "COMPRESS": lambda self: self._parse_compress(), 1002 "CLUSTERED": lambda self: self.expression( 1003 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1004 ), 1005 "NONCLUSTERED": lambda self: self.expression( 1006 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1007 ), 1008 "DEFAULT": lambda self: self.expression( 1009 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1010 ), 1011 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1012 "EPHEMERAL": lambda self: self.expression( 1013 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1014 ), 1015 "EXCLUDE": lambda self: self.expression( 1016 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1017 ), 1018 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1019 "FORMAT": lambda self: self.expression( 1020 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1021 ), 1022 "GENERATED": lambda self: self._parse_generated_as_identity(), 1023 "IDENTITY": lambda self: self._parse_auto_increment(), 1024 "INLINE": lambda self: self._parse_inline(), 1025 "LIKE": lambda self: self._parse_create_like(), 1026 "NOT": lambda self: self._parse_not_constraint(), 1027 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1028 "ON": lambda self: ( 1029 self._match(TokenType.UPDATE) 1030 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1031 ) 1032 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1033 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1034 "PERIOD": lambda self: self._parse_period_for_system_time(), 1035 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1036 "REFERENCES": lambda self: self._parse_references(match=False), 1037 "TITLE": lambda self: self.expression( 1038 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1039 ), 1040 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1041 "UNIQUE": lambda self: self._parse_unique(), 1042 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1043 "WITH": lambda self: self.expression( 1044 exp.Properties, expressions=self._parse_wrapped_properties() 1045 ), 1046 } 1047 1048 ALTER_PARSERS = { 1049 "ADD": lambda self: self._parse_alter_table_add(), 1050 "ALTER": lambda self: self._parse_alter_table_alter(), 1051 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1052 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1053 "DROP": lambda self: self._parse_alter_table_drop(), 1054 "RENAME": lambda self: self._parse_alter_table_rename(), 1055 "SET": lambda self: self._parse_alter_table_set(), 1056 "AS": lambda self: self._parse_select(), 1057 } 1058 1059 ALTER_ALTER_PARSERS = { 1060 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1061 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1062 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1063 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1064 } 1065 1066 SCHEMA_UNNAMED_CONSTRAINTS = { 1067 "CHECK", 1068 "EXCLUDE", 1069 "FOREIGN KEY", 1070 "LIKE", 1071 "PERIOD", 1072 "PRIMARY KEY", 1073 "UNIQUE", 1074 } 1075 1076 NO_PAREN_FUNCTION_PARSERS = { 1077 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1078 "CASE": lambda self: self._parse_case(), 1079 "CONNECT_BY_ROOT": lambda self: self.expression( 1080 exp.ConnectByRoot, this=self._parse_column() 1081 ), 1082 "IF": lambda self: self._parse_if(), 1083 "NEXT": lambda self: self._parse_next_value_for(), 1084 } 1085 1086 INVALID_FUNC_NAME_TOKENS = { 1087 TokenType.IDENTIFIER, 1088 TokenType.STRING, 1089 } 1090 1091 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1092 1093 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1094 1095 FUNCTION_PARSERS = { 1096 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1097 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1098 "DECODE": lambda self: self._parse_decode(), 1099 "EXTRACT": lambda self: self._parse_extract(), 1100 "GAP_FILL": lambda self: self._parse_gap_fill(), 1101 "JSON_OBJECT": lambda self: self._parse_json_object(), 1102 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1103 "JSON_TABLE": lambda self: self._parse_json_table(), 1104 "MATCH": lambda self: self._parse_match_against(), 1105 "NORMALIZE": lambda self: self._parse_normalize(), 1106 "OPENJSON": lambda self: self._parse_open_json(), 1107 "POSITION": lambda self: self._parse_position(), 1108 "PREDICT": lambda self: self._parse_predict(), 1109 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1110 "STRING_AGG": lambda self: self._parse_string_agg(), 1111 "SUBSTRING": lambda self: self._parse_substring(), 1112 "TRIM": lambda self: self._parse_trim(), 1113 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1114 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1115 } 1116 1117 QUERY_MODIFIER_PARSERS = { 1118 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1119 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1120 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1121 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1122 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1123 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1124 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1125 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1126 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1127 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1128 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1129 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1130 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1131 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1132 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1133 TokenType.CLUSTER_BY: lambda self: ( 1134 "cluster", 1135 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1136 ), 1137 TokenType.DISTRIBUTE_BY: lambda self: ( 1138 "distribute", 1139 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1140 ), 1141 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1142 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1143 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1144 } 1145 1146 SET_PARSERS = { 1147 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1148 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1149 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1150 "TRANSACTION": lambda self: self._parse_set_transaction(), 1151 } 1152 1153 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1154 1155 TYPE_LITERAL_PARSERS = { 1156 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1157 } 1158 1159 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1160 1161 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1162 1163 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1164 1165 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1166 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1167 "ISOLATION": ( 1168 ("LEVEL", "REPEATABLE", "READ"), 1169 ("LEVEL", "READ", "COMMITTED"), 1170 ("LEVEL", "READ", "UNCOMITTED"), 1171 ("LEVEL", "SERIALIZABLE"), 1172 ), 1173 "READ": ("WRITE", "ONLY"), 1174 } 1175 1176 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1177 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1178 ) 1179 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1180 1181 CREATE_SEQUENCE: OPTIONS_TYPE = { 1182 "SCALE": ("EXTEND", "NOEXTEND"), 1183 "SHARD": ("EXTEND", "NOEXTEND"), 1184 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1185 **dict.fromkeys( 1186 ( 1187 "SESSION", 1188 "GLOBAL", 1189 "KEEP", 1190 "NOKEEP", 1191 "ORDER", 1192 "NOORDER", 1193 "NOCACHE", 1194 "CYCLE", 1195 "NOCYCLE", 1196 "NOMINVALUE", 1197 "NOMAXVALUE", 1198 "NOSCALE", 1199 "NOSHARD", 1200 ), 1201 tuple(), 1202 ), 1203 } 1204 1205 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1206 1207 USABLES: OPTIONS_TYPE = dict.fromkeys( 1208 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1209 ) 1210 1211 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1212 1213 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1214 "TYPE": ("EVOLUTION",), 1215 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1216 } 1217 1218 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1219 "NOT": ("ENFORCED",), 1220 "MATCH": ( 1221 "FULL", 1222 "PARTIAL", 1223 "SIMPLE", 1224 ), 1225 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1226 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1227 } 1228 1229 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1230 1231 CLONE_KEYWORDS = {"CLONE", "COPY"} 1232 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1233 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1234 1235 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1236 1237 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1238 1239 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1240 1241 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1242 1243 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1244 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1245 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1246 1247 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1248 1249 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1250 1251 ADD_CONSTRAINT_TOKENS = { 1252 TokenType.CONSTRAINT, 1253 TokenType.FOREIGN_KEY, 1254 TokenType.INDEX, 1255 TokenType.KEY, 1256 TokenType.PRIMARY_KEY, 1257 TokenType.UNIQUE, 1258 } 1259 1260 DISTINCT_TOKENS = {TokenType.DISTINCT} 1261 1262 NULL_TOKENS = {TokenType.NULL} 1263 1264 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1265 1266 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1267 1268 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1269 1270 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1271 1272 ODBC_DATETIME_LITERALS = { 1273 "d": exp.Date, 1274 "t": exp.Time, 1275 "ts": exp.Timestamp, 1276 } 1277 1278 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1279 1280 STRICT_CAST = True 1281 1282 PREFIXED_PIVOT_COLUMNS = False 1283 IDENTIFY_PIVOT_STRINGS = False 1284 1285 LOG_DEFAULTS_TO_LN = False 1286 1287 # Whether ADD is present for each column added by ALTER TABLE 1288 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1289 1290 # Whether the table sample clause expects CSV syntax 1291 TABLESAMPLE_CSV = False 1292 1293 # The default method used for table sampling 1294 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1295 1296 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1297 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1298 1299 # Whether the TRIM function expects the characters to trim as its first argument 1300 TRIM_PATTERN_FIRST = False 1301 1302 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1303 STRING_ALIASES = False 1304 1305 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1306 MODIFIERS_ATTACHED_TO_SET_OP = True 1307 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1308 1309 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1310 NO_PAREN_IF_COMMANDS = True 1311 1312 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1313 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1314 1315 # Whether the `:` operator is used to extract a value from a VARIANT column 1316 COLON_IS_VARIANT_EXTRACT = False 1317 1318 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1319 # If this is True and '(' is not found, the keyword will be treated as an identifier 1320 VALUES_FOLLOWED_BY_PAREN = True 1321 1322 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1323 SUPPORTS_IMPLICIT_UNNEST = False 1324 1325 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1326 INTERVAL_SPANS = True 1327 1328 # Whether a PARTITION clause can follow a table reference 1329 SUPPORTS_PARTITION_SELECTION = False 1330 1331 __slots__ = ( 1332 "error_level", 1333 "error_message_context", 1334 "max_errors", 1335 "dialect", 1336 "sql", 1337 "errors", 1338 "_tokens", 1339 "_index", 1340 "_curr", 1341 "_next", 1342 "_prev", 1343 "_prev_comments", 1344 ) 1345 1346 # Autofilled 1347 SHOW_TRIE: t.Dict = {} 1348 SET_TRIE: t.Dict = {} 1349 1350 def __init__( 1351 self, 1352 error_level: t.Optional[ErrorLevel] = None, 1353 error_message_context: int = 100, 1354 max_errors: int = 3, 1355 dialect: DialectType = None, 1356 ): 1357 from sqlglot.dialects import Dialect 1358 1359 self.error_level = error_level or ErrorLevel.IMMEDIATE 1360 self.error_message_context = error_message_context 1361 self.max_errors = max_errors 1362 self.dialect = Dialect.get_or_raise(dialect) 1363 self.reset() 1364 1365 def reset(self): 1366 self.sql = "" 1367 self.errors = [] 1368 self._tokens = [] 1369 self._index = 0 1370 self._curr = None 1371 self._next = None 1372 self._prev = None 1373 self._prev_comments = None 1374 1375 def parse( 1376 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1377 ) -> t.List[t.Optional[exp.Expression]]: 1378 """ 1379 Parses a list of tokens and returns a list of syntax trees, one tree 1380 per parsed SQL statement. 1381 1382 Args: 1383 raw_tokens: The list of tokens. 1384 sql: The original SQL string, used to produce helpful debug messages. 1385 1386 Returns: 1387 The list of the produced syntax trees. 1388 """ 1389 return self._parse( 1390 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1391 ) 1392 1393 def parse_into( 1394 self, 1395 expression_types: exp.IntoType, 1396 raw_tokens: t.List[Token], 1397 sql: t.Optional[str] = None, 1398 ) -> t.List[t.Optional[exp.Expression]]: 1399 """ 1400 Parses a list of tokens into a given Expression type. If a collection of Expression 1401 types is given instead, this method will try to parse the token list into each one 1402 of them, stopping at the first for which the parsing succeeds. 1403 1404 Args: 1405 expression_types: The expression type(s) to try and parse the token list into. 1406 raw_tokens: The list of tokens. 1407 sql: The original SQL string, used to produce helpful debug messages. 1408 1409 Returns: 1410 The target Expression. 1411 """ 1412 errors = [] 1413 for expression_type in ensure_list(expression_types): 1414 parser = self.EXPRESSION_PARSERS.get(expression_type) 1415 if not parser: 1416 raise TypeError(f"No parser registered for {expression_type}") 1417 1418 try: 1419 return self._parse(parser, raw_tokens, sql) 1420 except ParseError as e: 1421 e.errors[0]["into_expression"] = expression_type 1422 errors.append(e) 1423 1424 raise ParseError( 1425 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1426 errors=merge_errors(errors), 1427 ) from errors[-1] 1428 1429 def _parse( 1430 self, 1431 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1432 raw_tokens: t.List[Token], 1433 sql: t.Optional[str] = None, 1434 ) -> t.List[t.Optional[exp.Expression]]: 1435 self.reset() 1436 self.sql = sql or "" 1437 1438 total = len(raw_tokens) 1439 chunks: t.List[t.List[Token]] = [[]] 1440 1441 for i, token in enumerate(raw_tokens): 1442 if token.token_type == TokenType.SEMICOLON: 1443 if token.comments: 1444 chunks.append([token]) 1445 1446 if i < total - 1: 1447 chunks.append([]) 1448 else: 1449 chunks[-1].append(token) 1450 1451 expressions = [] 1452 1453 for tokens in chunks: 1454 self._index = -1 1455 self._tokens = tokens 1456 self._advance() 1457 1458 expressions.append(parse_method(self)) 1459 1460 if self._index < len(self._tokens): 1461 self.raise_error("Invalid expression / Unexpected token") 1462 1463 self.check_errors() 1464 1465 return expressions 1466 1467 def check_errors(self) -> None: 1468 """Logs or raises any found errors, depending on the chosen error level setting.""" 1469 if self.error_level == ErrorLevel.WARN: 1470 for error in self.errors: 1471 logger.error(str(error)) 1472 elif self.error_level == ErrorLevel.RAISE and self.errors: 1473 raise ParseError( 1474 concat_messages(self.errors, self.max_errors), 1475 errors=merge_errors(self.errors), 1476 ) 1477 1478 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1479 """ 1480 Appends an error in the list of recorded errors or raises it, depending on the chosen 1481 error level setting. 1482 """ 1483 token = token or self._curr or self._prev or Token.string("") 1484 start = token.start 1485 end = token.end + 1 1486 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1487 highlight = self.sql[start:end] 1488 end_context = self.sql[end : end + self.error_message_context] 1489 1490 error = ParseError.new( 1491 f"{message}. Line {token.line}, Col: {token.col}.\n" 1492 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1493 description=message, 1494 line=token.line, 1495 col=token.col, 1496 start_context=start_context, 1497 highlight=highlight, 1498 end_context=end_context, 1499 ) 1500 1501 if self.error_level == ErrorLevel.IMMEDIATE: 1502 raise error 1503 1504 self.errors.append(error) 1505 1506 def expression( 1507 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1508 ) -> E: 1509 """ 1510 Creates a new, validated Expression. 1511 1512 Args: 1513 exp_class: The expression class to instantiate. 1514 comments: An optional list of comments to attach to the expression. 1515 kwargs: The arguments to set for the expression along with their respective values. 1516 1517 Returns: 1518 The target expression. 1519 """ 1520 instance = exp_class(**kwargs) 1521 instance.add_comments(comments) if comments else self._add_comments(instance) 1522 return self.validate_expression(instance) 1523 1524 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1525 if expression and self._prev_comments: 1526 expression.add_comments(self._prev_comments) 1527 self._prev_comments = None 1528 1529 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1530 """ 1531 Validates an Expression, making sure that all its mandatory arguments are set. 1532 1533 Args: 1534 expression: The expression to validate. 1535 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1536 1537 Returns: 1538 The validated expression. 1539 """ 1540 if self.error_level != ErrorLevel.IGNORE: 1541 for error_message in expression.error_messages(args): 1542 self.raise_error(error_message) 1543 1544 return expression 1545 1546 def _find_sql(self, start: Token, end: Token) -> str: 1547 return self.sql[start.start : end.end + 1] 1548 1549 def _is_connected(self) -> bool: 1550 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1551 1552 def _advance(self, times: int = 1) -> None: 1553 self._index += times 1554 self._curr = seq_get(self._tokens, self._index) 1555 self._next = seq_get(self._tokens, self._index + 1) 1556 1557 if self._index > 0: 1558 self._prev = self._tokens[self._index - 1] 1559 self._prev_comments = self._prev.comments 1560 else: 1561 self._prev = None 1562 self._prev_comments = None 1563 1564 def _retreat(self, index: int) -> None: 1565 if index != self._index: 1566 self._advance(index - self._index) 1567 1568 def _warn_unsupported(self) -> None: 1569 if len(self._tokens) <= 1: 1570 return 1571 1572 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1573 # interested in emitting a warning for the one being currently processed. 1574 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1575 1576 logger.warning( 1577 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1578 ) 1579 1580 def _parse_command(self) -> exp.Command: 1581 self._warn_unsupported() 1582 return self.expression( 1583 exp.Command, 1584 comments=self._prev_comments, 1585 this=self._prev.text.upper(), 1586 expression=self._parse_string(), 1587 ) 1588 1589 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1590 """ 1591 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1592 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1593 solve this by setting & resetting the parser state accordingly 1594 """ 1595 index = self._index 1596 error_level = self.error_level 1597 1598 self.error_level = ErrorLevel.IMMEDIATE 1599 try: 1600 this = parse_method() 1601 except ParseError: 1602 this = None 1603 finally: 1604 if not this or retreat: 1605 self._retreat(index) 1606 self.error_level = error_level 1607 1608 return this 1609 1610 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1611 start = self._prev 1612 exists = self._parse_exists() if allow_exists else None 1613 1614 self._match(TokenType.ON) 1615 1616 materialized = self._match_text_seq("MATERIALIZED") 1617 kind = self._match_set(self.CREATABLES) and self._prev 1618 if not kind: 1619 return self._parse_as_command(start) 1620 1621 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1622 this = self._parse_user_defined_function(kind=kind.token_type) 1623 elif kind.token_type == TokenType.TABLE: 1624 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1625 elif kind.token_type == TokenType.COLUMN: 1626 this = self._parse_column() 1627 else: 1628 this = self._parse_id_var() 1629 1630 self._match(TokenType.IS) 1631 1632 return self.expression( 1633 exp.Comment, 1634 this=this, 1635 kind=kind.text, 1636 expression=self._parse_string(), 1637 exists=exists, 1638 materialized=materialized, 1639 ) 1640 1641 def _parse_to_table( 1642 self, 1643 ) -> exp.ToTableProperty: 1644 table = self._parse_table_parts(schema=True) 1645 return self.expression(exp.ToTableProperty, this=table) 1646 1647 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1648 def _parse_ttl(self) -> exp.Expression: 1649 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1650 this = self._parse_bitwise() 1651 1652 if self._match_text_seq("DELETE"): 1653 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1654 if self._match_text_seq("RECOMPRESS"): 1655 return self.expression( 1656 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1657 ) 1658 if self._match_text_seq("TO", "DISK"): 1659 return self.expression( 1660 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1661 ) 1662 if self._match_text_seq("TO", "VOLUME"): 1663 return self.expression( 1664 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1665 ) 1666 1667 return this 1668 1669 expressions = self._parse_csv(_parse_ttl_action) 1670 where = self._parse_where() 1671 group = self._parse_group() 1672 1673 aggregates = None 1674 if group and self._match(TokenType.SET): 1675 aggregates = self._parse_csv(self._parse_set_item) 1676 1677 return self.expression( 1678 exp.MergeTreeTTL, 1679 expressions=expressions, 1680 where=where, 1681 group=group, 1682 aggregates=aggregates, 1683 ) 1684 1685 def _parse_statement(self) -> t.Optional[exp.Expression]: 1686 if self._curr is None: 1687 return None 1688 1689 if self._match_set(self.STATEMENT_PARSERS): 1690 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1691 1692 if self._match_set(self.dialect.tokenizer.COMMANDS): 1693 return self._parse_command() 1694 1695 expression = self._parse_expression() 1696 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1697 return self._parse_query_modifiers(expression) 1698 1699 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1700 start = self._prev 1701 temporary = self._match(TokenType.TEMPORARY) 1702 materialized = self._match_text_seq("MATERIALIZED") 1703 1704 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1705 if not kind: 1706 return self._parse_as_command(start) 1707 1708 concurrently = self._match_text_seq("CONCURRENTLY") 1709 if_exists = exists or self._parse_exists() 1710 table = self._parse_table_parts( 1711 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1712 ) 1713 1714 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1715 1716 if self._match(TokenType.L_PAREN, advance=False): 1717 expressions = self._parse_wrapped_csv(self._parse_types) 1718 else: 1719 expressions = None 1720 1721 return self.expression( 1722 exp.Drop, 1723 comments=start.comments, 1724 exists=if_exists, 1725 this=table, 1726 expressions=expressions, 1727 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1728 temporary=temporary, 1729 materialized=materialized, 1730 cascade=self._match_text_seq("CASCADE"), 1731 constraints=self._match_text_seq("CONSTRAINTS"), 1732 purge=self._match_text_seq("PURGE"), 1733 cluster=cluster, 1734 concurrently=concurrently, 1735 ) 1736 1737 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1738 return ( 1739 self._match_text_seq("IF") 1740 and (not not_ or self._match(TokenType.NOT)) 1741 and self._match(TokenType.EXISTS) 1742 ) 1743 1744 def _parse_create(self) -> exp.Create | exp.Command: 1745 # Note: this can't be None because we've matched a statement parser 1746 start = self._prev 1747 comments = self._prev_comments 1748 1749 replace = ( 1750 start.token_type == TokenType.REPLACE 1751 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1752 or self._match_pair(TokenType.OR, TokenType.ALTER) 1753 ) 1754 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1755 1756 unique = self._match(TokenType.UNIQUE) 1757 1758 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1759 clustered = True 1760 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1761 "COLUMNSTORE" 1762 ): 1763 clustered = False 1764 else: 1765 clustered = None 1766 1767 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1768 self._advance() 1769 1770 properties = None 1771 create_token = self._match_set(self.CREATABLES) and self._prev 1772 1773 if not create_token: 1774 # exp.Properties.Location.POST_CREATE 1775 properties = self._parse_properties() 1776 create_token = self._match_set(self.CREATABLES) and self._prev 1777 1778 if not properties or not create_token: 1779 return self._parse_as_command(start) 1780 1781 concurrently = self._match_text_seq("CONCURRENTLY") 1782 exists = self._parse_exists(not_=True) 1783 this = None 1784 expression: t.Optional[exp.Expression] = None 1785 indexes = None 1786 no_schema_binding = None 1787 begin = None 1788 end = None 1789 clone = None 1790 1791 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1792 nonlocal properties 1793 if properties and temp_props: 1794 properties.expressions.extend(temp_props.expressions) 1795 elif temp_props: 1796 properties = temp_props 1797 1798 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1799 this = self._parse_user_defined_function(kind=create_token.token_type) 1800 1801 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1802 extend_props(self._parse_properties()) 1803 1804 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1805 extend_props(self._parse_properties()) 1806 1807 if not expression: 1808 if self._match(TokenType.COMMAND): 1809 expression = self._parse_as_command(self._prev) 1810 else: 1811 begin = self._match(TokenType.BEGIN) 1812 return_ = self._match_text_seq("RETURN") 1813 1814 if self._match(TokenType.STRING, advance=False): 1815 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1816 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1817 expression = self._parse_string() 1818 extend_props(self._parse_properties()) 1819 else: 1820 expression = self._parse_statement() 1821 1822 end = self._match_text_seq("END") 1823 1824 if return_: 1825 expression = self.expression(exp.Return, this=expression) 1826 elif create_token.token_type == TokenType.INDEX: 1827 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1828 if not self._match(TokenType.ON): 1829 index = self._parse_id_var() 1830 anonymous = False 1831 else: 1832 index = None 1833 anonymous = True 1834 1835 this = self._parse_index(index=index, anonymous=anonymous) 1836 elif create_token.token_type in self.DB_CREATABLES: 1837 table_parts = self._parse_table_parts( 1838 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1839 ) 1840 1841 # exp.Properties.Location.POST_NAME 1842 self._match(TokenType.COMMA) 1843 extend_props(self._parse_properties(before=True)) 1844 1845 this = self._parse_schema(this=table_parts) 1846 1847 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1848 extend_props(self._parse_properties()) 1849 1850 self._match(TokenType.ALIAS) 1851 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1852 # exp.Properties.Location.POST_ALIAS 1853 extend_props(self._parse_properties()) 1854 1855 if create_token.token_type == TokenType.SEQUENCE: 1856 expression = self._parse_types() 1857 extend_props(self._parse_properties()) 1858 else: 1859 expression = self._parse_ddl_select() 1860 1861 if create_token.token_type == TokenType.TABLE: 1862 # exp.Properties.Location.POST_EXPRESSION 1863 extend_props(self._parse_properties()) 1864 1865 indexes = [] 1866 while True: 1867 index = self._parse_index() 1868 1869 # exp.Properties.Location.POST_INDEX 1870 extend_props(self._parse_properties()) 1871 if not index: 1872 break 1873 else: 1874 self._match(TokenType.COMMA) 1875 indexes.append(index) 1876 elif create_token.token_type == TokenType.VIEW: 1877 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1878 no_schema_binding = True 1879 1880 shallow = self._match_text_seq("SHALLOW") 1881 1882 if self._match_texts(self.CLONE_KEYWORDS): 1883 copy = self._prev.text.lower() == "copy" 1884 clone = self.expression( 1885 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1886 ) 1887 1888 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1889 return self._parse_as_command(start) 1890 1891 create_kind_text = create_token.text.upper() 1892 return self.expression( 1893 exp.Create, 1894 comments=comments, 1895 this=this, 1896 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1897 replace=replace, 1898 refresh=refresh, 1899 unique=unique, 1900 expression=expression, 1901 exists=exists, 1902 properties=properties, 1903 indexes=indexes, 1904 no_schema_binding=no_schema_binding, 1905 begin=begin, 1906 end=end, 1907 clone=clone, 1908 concurrently=concurrently, 1909 clustered=clustered, 1910 ) 1911 1912 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1913 seq = exp.SequenceProperties() 1914 1915 options = [] 1916 index = self._index 1917 1918 while self._curr: 1919 self._match(TokenType.COMMA) 1920 if self._match_text_seq("INCREMENT"): 1921 self._match_text_seq("BY") 1922 self._match_text_seq("=") 1923 seq.set("increment", self._parse_term()) 1924 elif self._match_text_seq("MINVALUE"): 1925 seq.set("minvalue", self._parse_term()) 1926 elif self._match_text_seq("MAXVALUE"): 1927 seq.set("maxvalue", self._parse_term()) 1928 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1929 self._match_text_seq("=") 1930 seq.set("start", self._parse_term()) 1931 elif self._match_text_seq("CACHE"): 1932 # T-SQL allows empty CACHE which is initialized dynamically 1933 seq.set("cache", self._parse_number() or True) 1934 elif self._match_text_seq("OWNED", "BY"): 1935 # "OWNED BY NONE" is the default 1936 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1937 else: 1938 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1939 if opt: 1940 options.append(opt) 1941 else: 1942 break 1943 1944 seq.set("options", options if options else None) 1945 return None if self._index == index else seq 1946 1947 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1948 # only used for teradata currently 1949 self._match(TokenType.COMMA) 1950 1951 kwargs = { 1952 "no": self._match_text_seq("NO"), 1953 "dual": self._match_text_seq("DUAL"), 1954 "before": self._match_text_seq("BEFORE"), 1955 "default": self._match_text_seq("DEFAULT"), 1956 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1957 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1958 "after": self._match_text_seq("AFTER"), 1959 "minimum": self._match_texts(("MIN", "MINIMUM")), 1960 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1961 } 1962 1963 if self._match_texts(self.PROPERTY_PARSERS): 1964 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1965 try: 1966 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1967 except TypeError: 1968 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1969 1970 return None 1971 1972 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1973 return self._parse_wrapped_csv(self._parse_property) 1974 1975 def _parse_property(self) -> t.Optional[exp.Expression]: 1976 if self._match_texts(self.PROPERTY_PARSERS): 1977 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1978 1979 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1980 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1981 1982 if self._match_text_seq("COMPOUND", "SORTKEY"): 1983 return self._parse_sortkey(compound=True) 1984 1985 if self._match_text_seq("SQL", "SECURITY"): 1986 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1987 1988 index = self._index 1989 key = self._parse_column() 1990 1991 if not self._match(TokenType.EQ): 1992 self._retreat(index) 1993 return self._parse_sequence_properties() 1994 1995 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1996 if isinstance(key, exp.Column): 1997 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1998 1999 value = self._parse_bitwise() or self._parse_var(any_token=True) 2000 2001 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2002 if isinstance(value, exp.Column): 2003 value = exp.var(value.name) 2004 2005 return self.expression(exp.Property, this=key, value=value) 2006 2007 def _parse_stored(self) -> exp.FileFormatProperty: 2008 self._match(TokenType.ALIAS) 2009 2010 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2011 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2012 2013 return self.expression( 2014 exp.FileFormatProperty, 2015 this=( 2016 self.expression( 2017 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2018 ) 2019 if input_format or output_format 2020 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2021 ), 2022 ) 2023 2024 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2025 field = self._parse_field() 2026 if isinstance(field, exp.Identifier) and not field.quoted: 2027 field = exp.var(field) 2028 2029 return field 2030 2031 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2032 self._match(TokenType.EQ) 2033 self._match(TokenType.ALIAS) 2034 2035 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2036 2037 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2038 properties = [] 2039 while True: 2040 if before: 2041 prop = self._parse_property_before() 2042 else: 2043 prop = self._parse_property() 2044 if not prop: 2045 break 2046 for p in ensure_list(prop): 2047 properties.append(p) 2048 2049 if properties: 2050 return self.expression(exp.Properties, expressions=properties) 2051 2052 return None 2053 2054 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2055 return self.expression( 2056 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2057 ) 2058 2059 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2060 if self._match_texts(("DEFINER", "INVOKER")): 2061 security_specifier = self._prev.text.upper() 2062 return self.expression(exp.SecurityProperty, this=security_specifier) 2063 return None 2064 2065 def _parse_settings_property(self) -> exp.SettingsProperty: 2066 return self.expression( 2067 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2068 ) 2069 2070 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2071 if self._index >= 2: 2072 pre_volatile_token = self._tokens[self._index - 2] 2073 else: 2074 pre_volatile_token = None 2075 2076 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2077 return exp.VolatileProperty() 2078 2079 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2080 2081 def _parse_retention_period(self) -> exp.Var: 2082 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2083 number = self._parse_number() 2084 number_str = f"{number} " if number else "" 2085 unit = self._parse_var(any_token=True) 2086 return exp.var(f"{number_str}{unit}") 2087 2088 def _parse_system_versioning_property( 2089 self, with_: bool = False 2090 ) -> exp.WithSystemVersioningProperty: 2091 self._match(TokenType.EQ) 2092 prop = self.expression( 2093 exp.WithSystemVersioningProperty, 2094 **{ # type: ignore 2095 "on": True, 2096 "with": with_, 2097 }, 2098 ) 2099 2100 if self._match_text_seq("OFF"): 2101 prop.set("on", False) 2102 return prop 2103 2104 self._match(TokenType.ON) 2105 if self._match(TokenType.L_PAREN): 2106 while self._curr and not self._match(TokenType.R_PAREN): 2107 if self._match_text_seq("HISTORY_TABLE", "="): 2108 prop.set("this", self._parse_table_parts()) 2109 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2110 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2111 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2112 prop.set("retention_period", self._parse_retention_period()) 2113 2114 self._match(TokenType.COMMA) 2115 2116 return prop 2117 2118 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2119 self._match(TokenType.EQ) 2120 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2121 prop = self.expression(exp.DataDeletionProperty, on=on) 2122 2123 if self._match(TokenType.L_PAREN): 2124 while self._curr and not self._match(TokenType.R_PAREN): 2125 if self._match_text_seq("FILTER_COLUMN", "="): 2126 prop.set("filter_column", self._parse_column()) 2127 elif self._match_text_seq("RETENTION_PERIOD", "="): 2128 prop.set("retention_period", self._parse_retention_period()) 2129 2130 self._match(TokenType.COMMA) 2131 2132 return prop 2133 2134 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2135 kind = "HASH" 2136 expressions: t.Optional[t.List[exp.Expression]] = None 2137 if self._match_text_seq("BY", "HASH"): 2138 expressions = self._parse_wrapped_csv(self._parse_id_var) 2139 elif self._match_text_seq("BY", "RANDOM"): 2140 kind = "RANDOM" 2141 2142 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2143 buckets: t.Optional[exp.Expression] = None 2144 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2145 buckets = self._parse_number() 2146 2147 return self.expression( 2148 exp.DistributedByProperty, 2149 expressions=expressions, 2150 kind=kind, 2151 buckets=buckets, 2152 order=self._parse_order(), 2153 ) 2154 2155 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2156 self._match_text_seq("KEY") 2157 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2158 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2159 2160 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2161 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2162 prop = self._parse_system_versioning_property(with_=True) 2163 self._match_r_paren() 2164 return prop 2165 2166 if self._match(TokenType.L_PAREN, advance=False): 2167 return self._parse_wrapped_properties() 2168 2169 if self._match_text_seq("JOURNAL"): 2170 return self._parse_withjournaltable() 2171 2172 if self._match_texts(self.VIEW_ATTRIBUTES): 2173 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2174 2175 if self._match_text_seq("DATA"): 2176 return self._parse_withdata(no=False) 2177 elif self._match_text_seq("NO", "DATA"): 2178 return self._parse_withdata(no=True) 2179 2180 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2181 return self._parse_serde_properties(with_=True) 2182 2183 if self._match(TokenType.SCHEMA): 2184 return self.expression( 2185 exp.WithSchemaBindingProperty, 2186 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2187 ) 2188 2189 if not self._next: 2190 return None 2191 2192 return self._parse_withisolatedloading() 2193 2194 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2195 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2196 self._match(TokenType.EQ) 2197 2198 user = self._parse_id_var() 2199 self._match(TokenType.PARAMETER) 2200 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2201 2202 if not user or not host: 2203 return None 2204 2205 return exp.DefinerProperty(this=f"{user}@{host}") 2206 2207 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2208 self._match(TokenType.TABLE) 2209 self._match(TokenType.EQ) 2210 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2211 2212 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2213 return self.expression(exp.LogProperty, no=no) 2214 2215 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2216 return self.expression(exp.JournalProperty, **kwargs) 2217 2218 def _parse_checksum(self) -> exp.ChecksumProperty: 2219 self._match(TokenType.EQ) 2220 2221 on = None 2222 if self._match(TokenType.ON): 2223 on = True 2224 elif self._match_text_seq("OFF"): 2225 on = False 2226 2227 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2228 2229 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2230 return self.expression( 2231 exp.Cluster, 2232 expressions=( 2233 self._parse_wrapped_csv(self._parse_ordered) 2234 if wrapped 2235 else self._parse_csv(self._parse_ordered) 2236 ), 2237 ) 2238 2239 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2240 self._match_text_seq("BY") 2241 2242 self._match_l_paren() 2243 expressions = self._parse_csv(self._parse_column) 2244 self._match_r_paren() 2245 2246 if self._match_text_seq("SORTED", "BY"): 2247 self._match_l_paren() 2248 sorted_by = self._parse_csv(self._parse_ordered) 2249 self._match_r_paren() 2250 else: 2251 sorted_by = None 2252 2253 self._match(TokenType.INTO) 2254 buckets = self._parse_number() 2255 self._match_text_seq("BUCKETS") 2256 2257 return self.expression( 2258 exp.ClusteredByProperty, 2259 expressions=expressions, 2260 sorted_by=sorted_by, 2261 buckets=buckets, 2262 ) 2263 2264 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2265 if not self._match_text_seq("GRANTS"): 2266 self._retreat(self._index - 1) 2267 return None 2268 2269 return self.expression(exp.CopyGrantsProperty) 2270 2271 def _parse_freespace(self) -> exp.FreespaceProperty: 2272 self._match(TokenType.EQ) 2273 return self.expression( 2274 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2275 ) 2276 2277 def _parse_mergeblockratio( 2278 self, no: bool = False, default: bool = False 2279 ) -> exp.MergeBlockRatioProperty: 2280 if self._match(TokenType.EQ): 2281 return self.expression( 2282 exp.MergeBlockRatioProperty, 2283 this=self._parse_number(), 2284 percent=self._match(TokenType.PERCENT), 2285 ) 2286 2287 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2288 2289 def _parse_datablocksize( 2290 self, 2291 default: t.Optional[bool] = None, 2292 minimum: t.Optional[bool] = None, 2293 maximum: t.Optional[bool] = None, 2294 ) -> exp.DataBlocksizeProperty: 2295 self._match(TokenType.EQ) 2296 size = self._parse_number() 2297 2298 units = None 2299 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2300 units = self._prev.text 2301 2302 return self.expression( 2303 exp.DataBlocksizeProperty, 2304 size=size, 2305 units=units, 2306 default=default, 2307 minimum=minimum, 2308 maximum=maximum, 2309 ) 2310 2311 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2312 self._match(TokenType.EQ) 2313 always = self._match_text_seq("ALWAYS") 2314 manual = self._match_text_seq("MANUAL") 2315 never = self._match_text_seq("NEVER") 2316 default = self._match_text_seq("DEFAULT") 2317 2318 autotemp = None 2319 if self._match_text_seq("AUTOTEMP"): 2320 autotemp = self._parse_schema() 2321 2322 return self.expression( 2323 exp.BlockCompressionProperty, 2324 always=always, 2325 manual=manual, 2326 never=never, 2327 default=default, 2328 autotemp=autotemp, 2329 ) 2330 2331 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2332 index = self._index 2333 no = self._match_text_seq("NO") 2334 concurrent = self._match_text_seq("CONCURRENT") 2335 2336 if not self._match_text_seq("ISOLATED", "LOADING"): 2337 self._retreat(index) 2338 return None 2339 2340 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2341 return self.expression( 2342 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2343 ) 2344 2345 def _parse_locking(self) -> exp.LockingProperty: 2346 if self._match(TokenType.TABLE): 2347 kind = "TABLE" 2348 elif self._match(TokenType.VIEW): 2349 kind = "VIEW" 2350 elif self._match(TokenType.ROW): 2351 kind = "ROW" 2352 elif self._match_text_seq("DATABASE"): 2353 kind = "DATABASE" 2354 else: 2355 kind = None 2356 2357 if kind in ("DATABASE", "TABLE", "VIEW"): 2358 this = self._parse_table_parts() 2359 else: 2360 this = None 2361 2362 if self._match(TokenType.FOR): 2363 for_or_in = "FOR" 2364 elif self._match(TokenType.IN): 2365 for_or_in = "IN" 2366 else: 2367 for_or_in = None 2368 2369 if self._match_text_seq("ACCESS"): 2370 lock_type = "ACCESS" 2371 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2372 lock_type = "EXCLUSIVE" 2373 elif self._match_text_seq("SHARE"): 2374 lock_type = "SHARE" 2375 elif self._match_text_seq("READ"): 2376 lock_type = "READ" 2377 elif self._match_text_seq("WRITE"): 2378 lock_type = "WRITE" 2379 elif self._match_text_seq("CHECKSUM"): 2380 lock_type = "CHECKSUM" 2381 else: 2382 lock_type = None 2383 2384 override = self._match_text_seq("OVERRIDE") 2385 2386 return self.expression( 2387 exp.LockingProperty, 2388 this=this, 2389 kind=kind, 2390 for_or_in=for_or_in, 2391 lock_type=lock_type, 2392 override=override, 2393 ) 2394 2395 def _parse_partition_by(self) -> t.List[exp.Expression]: 2396 if self._match(TokenType.PARTITION_BY): 2397 return self._parse_csv(self._parse_assignment) 2398 return [] 2399 2400 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2401 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2402 if self._match_text_seq("MINVALUE"): 2403 return exp.var("MINVALUE") 2404 if self._match_text_seq("MAXVALUE"): 2405 return exp.var("MAXVALUE") 2406 return self._parse_bitwise() 2407 2408 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2409 expression = None 2410 from_expressions = None 2411 to_expressions = None 2412 2413 if self._match(TokenType.IN): 2414 this = self._parse_wrapped_csv(self._parse_bitwise) 2415 elif self._match(TokenType.FROM): 2416 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2417 self._match_text_seq("TO") 2418 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2419 elif self._match_text_seq("WITH", "(", "MODULUS"): 2420 this = self._parse_number() 2421 self._match_text_seq(",", "REMAINDER") 2422 expression = self._parse_number() 2423 self._match_r_paren() 2424 else: 2425 self.raise_error("Failed to parse partition bound spec.") 2426 2427 return self.expression( 2428 exp.PartitionBoundSpec, 2429 this=this, 2430 expression=expression, 2431 from_expressions=from_expressions, 2432 to_expressions=to_expressions, 2433 ) 2434 2435 # https://www.postgresql.org/docs/current/sql-createtable.html 2436 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2437 if not self._match_text_seq("OF"): 2438 self._retreat(self._index - 1) 2439 return None 2440 2441 this = self._parse_table(schema=True) 2442 2443 if self._match(TokenType.DEFAULT): 2444 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2445 elif self._match_text_seq("FOR", "VALUES"): 2446 expression = self._parse_partition_bound_spec() 2447 else: 2448 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2449 2450 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2451 2452 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2453 self._match(TokenType.EQ) 2454 return self.expression( 2455 exp.PartitionedByProperty, 2456 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2457 ) 2458 2459 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2460 if self._match_text_seq("AND", "STATISTICS"): 2461 statistics = True 2462 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2463 statistics = False 2464 else: 2465 statistics = None 2466 2467 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2468 2469 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2470 if self._match_text_seq("SQL"): 2471 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2472 return None 2473 2474 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2475 if self._match_text_seq("SQL", "DATA"): 2476 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2477 return None 2478 2479 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2480 if self._match_text_seq("PRIMARY", "INDEX"): 2481 return exp.NoPrimaryIndexProperty() 2482 if self._match_text_seq("SQL"): 2483 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2484 return None 2485 2486 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2487 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2488 return exp.OnCommitProperty() 2489 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2490 return exp.OnCommitProperty(delete=True) 2491 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2492 2493 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2494 if self._match_text_seq("SQL", "DATA"): 2495 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2496 return None 2497 2498 def _parse_distkey(self) -> exp.DistKeyProperty: 2499 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2500 2501 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2502 table = self._parse_table(schema=True) 2503 2504 options = [] 2505 while self._match_texts(("INCLUDING", "EXCLUDING")): 2506 this = self._prev.text.upper() 2507 2508 id_var = self._parse_id_var() 2509 if not id_var: 2510 return None 2511 2512 options.append( 2513 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2514 ) 2515 2516 return self.expression(exp.LikeProperty, this=table, expressions=options) 2517 2518 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2519 return self.expression( 2520 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2521 ) 2522 2523 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2524 self._match(TokenType.EQ) 2525 return self.expression( 2526 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2527 ) 2528 2529 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2530 self._match_text_seq("WITH", "CONNECTION") 2531 return self.expression( 2532 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2533 ) 2534 2535 def _parse_returns(self) -> exp.ReturnsProperty: 2536 value: t.Optional[exp.Expression] 2537 null = None 2538 is_table = self._match(TokenType.TABLE) 2539 2540 if is_table: 2541 if self._match(TokenType.LT): 2542 value = self.expression( 2543 exp.Schema, 2544 this="TABLE", 2545 expressions=self._parse_csv(self._parse_struct_types), 2546 ) 2547 if not self._match(TokenType.GT): 2548 self.raise_error("Expecting >") 2549 else: 2550 value = self._parse_schema(exp.var("TABLE")) 2551 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2552 null = True 2553 value = None 2554 else: 2555 value = self._parse_types() 2556 2557 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2558 2559 def _parse_describe(self) -> exp.Describe: 2560 kind = self._match_set(self.CREATABLES) and self._prev.text 2561 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2562 if self._match(TokenType.DOT): 2563 style = None 2564 self._retreat(self._index - 2) 2565 this = self._parse_table(schema=True) 2566 properties = self._parse_properties() 2567 expressions = properties.expressions if properties else None 2568 partition = self._parse_partition() 2569 return self.expression( 2570 exp.Describe, 2571 this=this, 2572 style=style, 2573 kind=kind, 2574 expressions=expressions, 2575 partition=partition, 2576 ) 2577 2578 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2579 kind = self._prev.text.upper() 2580 expressions = [] 2581 2582 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2583 if self._match(TokenType.WHEN): 2584 expression = self._parse_disjunction() 2585 self._match(TokenType.THEN) 2586 else: 2587 expression = None 2588 2589 else_ = self._match(TokenType.ELSE) 2590 2591 if not self._match(TokenType.INTO): 2592 return None 2593 2594 return self.expression( 2595 exp.ConditionalInsert, 2596 this=self.expression( 2597 exp.Insert, 2598 this=self._parse_table(schema=True), 2599 expression=self._parse_derived_table_values(), 2600 ), 2601 expression=expression, 2602 else_=else_, 2603 ) 2604 2605 expression = parse_conditional_insert() 2606 while expression is not None: 2607 expressions.append(expression) 2608 expression = parse_conditional_insert() 2609 2610 return self.expression( 2611 exp.MultitableInserts, 2612 kind=kind, 2613 comments=comments, 2614 expressions=expressions, 2615 source=self._parse_table(), 2616 ) 2617 2618 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2619 comments = ensure_list(self._prev_comments) 2620 hint = self._parse_hint() 2621 overwrite = self._match(TokenType.OVERWRITE) 2622 ignore = self._match(TokenType.IGNORE) 2623 local = self._match_text_seq("LOCAL") 2624 alternative = None 2625 is_function = None 2626 2627 if self._match_text_seq("DIRECTORY"): 2628 this: t.Optional[exp.Expression] = self.expression( 2629 exp.Directory, 2630 this=self._parse_var_or_string(), 2631 local=local, 2632 row_format=self._parse_row_format(match_row=True), 2633 ) 2634 else: 2635 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2636 comments += ensure_list(self._prev_comments) 2637 return self._parse_multitable_inserts(comments) 2638 2639 if self._match(TokenType.OR): 2640 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2641 2642 self._match(TokenType.INTO) 2643 comments += ensure_list(self._prev_comments) 2644 self._match(TokenType.TABLE) 2645 is_function = self._match(TokenType.FUNCTION) 2646 2647 this = ( 2648 self._parse_table(schema=True, parse_partition=True) 2649 if not is_function 2650 else self._parse_function() 2651 ) 2652 2653 returning = self._parse_returning() 2654 2655 return self.expression( 2656 exp.Insert, 2657 comments=comments, 2658 hint=hint, 2659 is_function=is_function, 2660 this=this, 2661 stored=self._match_text_seq("STORED") and self._parse_stored(), 2662 by_name=self._match_text_seq("BY", "NAME"), 2663 exists=self._parse_exists(), 2664 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2665 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2666 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2667 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2668 conflict=self._parse_on_conflict(), 2669 returning=returning or self._parse_returning(), 2670 overwrite=overwrite, 2671 alternative=alternative, 2672 ignore=ignore, 2673 source=self._match(TokenType.TABLE) and self._parse_table(), 2674 ) 2675 2676 def _parse_kill(self) -> exp.Kill: 2677 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2678 2679 return self.expression( 2680 exp.Kill, 2681 this=self._parse_primary(), 2682 kind=kind, 2683 ) 2684 2685 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2686 conflict = self._match_text_seq("ON", "CONFLICT") 2687 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2688 2689 if not conflict and not duplicate: 2690 return None 2691 2692 conflict_keys = None 2693 constraint = None 2694 2695 if conflict: 2696 if self._match_text_seq("ON", "CONSTRAINT"): 2697 constraint = self._parse_id_var() 2698 elif self._match(TokenType.L_PAREN): 2699 conflict_keys = self._parse_csv(self._parse_id_var) 2700 self._match_r_paren() 2701 2702 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2703 if self._prev.token_type == TokenType.UPDATE: 2704 self._match(TokenType.SET) 2705 expressions = self._parse_csv(self._parse_equality) 2706 else: 2707 expressions = None 2708 2709 return self.expression( 2710 exp.OnConflict, 2711 duplicate=duplicate, 2712 expressions=expressions, 2713 action=action, 2714 conflict_keys=conflict_keys, 2715 constraint=constraint, 2716 ) 2717 2718 def _parse_returning(self) -> t.Optional[exp.Returning]: 2719 if not self._match(TokenType.RETURNING): 2720 return None 2721 return self.expression( 2722 exp.Returning, 2723 expressions=self._parse_csv(self._parse_expression), 2724 into=self._match(TokenType.INTO) and self._parse_table_part(), 2725 ) 2726 2727 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2728 if not self._match(TokenType.FORMAT): 2729 return None 2730 return self._parse_row_format() 2731 2732 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2733 index = self._index 2734 with_ = with_ or self._match_text_seq("WITH") 2735 2736 if not self._match(TokenType.SERDE_PROPERTIES): 2737 self._retreat(index) 2738 return None 2739 return self.expression( 2740 exp.SerdeProperties, 2741 **{ # type: ignore 2742 "expressions": self._parse_wrapped_properties(), 2743 "with": with_, 2744 }, 2745 ) 2746 2747 def _parse_row_format( 2748 self, match_row: bool = False 2749 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2750 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2751 return None 2752 2753 if self._match_text_seq("SERDE"): 2754 this = self._parse_string() 2755 2756 serde_properties = self._parse_serde_properties() 2757 2758 return self.expression( 2759 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2760 ) 2761 2762 self._match_text_seq("DELIMITED") 2763 2764 kwargs = {} 2765 2766 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2767 kwargs["fields"] = self._parse_string() 2768 if self._match_text_seq("ESCAPED", "BY"): 2769 kwargs["escaped"] = self._parse_string() 2770 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2771 kwargs["collection_items"] = self._parse_string() 2772 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2773 kwargs["map_keys"] = self._parse_string() 2774 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2775 kwargs["lines"] = self._parse_string() 2776 if self._match_text_seq("NULL", "DEFINED", "AS"): 2777 kwargs["null"] = self._parse_string() 2778 2779 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2780 2781 def _parse_load(self) -> exp.LoadData | exp.Command: 2782 if self._match_text_seq("DATA"): 2783 local = self._match_text_seq("LOCAL") 2784 self._match_text_seq("INPATH") 2785 inpath = self._parse_string() 2786 overwrite = self._match(TokenType.OVERWRITE) 2787 self._match_pair(TokenType.INTO, TokenType.TABLE) 2788 2789 return self.expression( 2790 exp.LoadData, 2791 this=self._parse_table(schema=True), 2792 local=local, 2793 overwrite=overwrite, 2794 inpath=inpath, 2795 partition=self._parse_partition(), 2796 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2797 serde=self._match_text_seq("SERDE") and self._parse_string(), 2798 ) 2799 return self._parse_as_command(self._prev) 2800 2801 def _parse_delete(self) -> exp.Delete: 2802 # This handles MySQL's "Multiple-Table Syntax" 2803 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2804 tables = None 2805 comments = self._prev_comments 2806 if not self._match(TokenType.FROM, advance=False): 2807 tables = self._parse_csv(self._parse_table) or None 2808 2809 returning = self._parse_returning() 2810 2811 return self.expression( 2812 exp.Delete, 2813 comments=comments, 2814 tables=tables, 2815 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2816 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2817 where=self._parse_where(), 2818 returning=returning or self._parse_returning(), 2819 limit=self._parse_limit(), 2820 ) 2821 2822 def _parse_update(self) -> exp.Update: 2823 comments = self._prev_comments 2824 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2825 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2826 returning = self._parse_returning() 2827 return self.expression( 2828 exp.Update, 2829 comments=comments, 2830 **{ # type: ignore 2831 "this": this, 2832 "expressions": expressions, 2833 "from": self._parse_from(joins=True), 2834 "where": self._parse_where(), 2835 "returning": returning or self._parse_returning(), 2836 "order": self._parse_order(), 2837 "limit": self._parse_limit(), 2838 }, 2839 ) 2840 2841 def _parse_uncache(self) -> exp.Uncache: 2842 if not self._match(TokenType.TABLE): 2843 self.raise_error("Expecting TABLE after UNCACHE") 2844 2845 return self.expression( 2846 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2847 ) 2848 2849 def _parse_cache(self) -> exp.Cache: 2850 lazy = self._match_text_seq("LAZY") 2851 self._match(TokenType.TABLE) 2852 table = self._parse_table(schema=True) 2853 2854 options = [] 2855 if self._match_text_seq("OPTIONS"): 2856 self._match_l_paren() 2857 k = self._parse_string() 2858 self._match(TokenType.EQ) 2859 v = self._parse_string() 2860 options = [k, v] 2861 self._match_r_paren() 2862 2863 self._match(TokenType.ALIAS) 2864 return self.expression( 2865 exp.Cache, 2866 this=table, 2867 lazy=lazy, 2868 options=options, 2869 expression=self._parse_select(nested=True), 2870 ) 2871 2872 def _parse_partition(self) -> t.Optional[exp.Partition]: 2873 if not self._match(TokenType.PARTITION): 2874 return None 2875 2876 return self.expression( 2877 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2878 ) 2879 2880 def _parse_value(self) -> t.Optional[exp.Tuple]: 2881 if self._match(TokenType.L_PAREN): 2882 expressions = self._parse_csv(self._parse_expression) 2883 self._match_r_paren() 2884 return self.expression(exp.Tuple, expressions=expressions) 2885 2886 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2887 expression = self._parse_expression() 2888 if expression: 2889 return self.expression(exp.Tuple, expressions=[expression]) 2890 return None 2891 2892 def _parse_projections(self) -> t.List[exp.Expression]: 2893 return self._parse_expressions() 2894 2895 def _parse_select( 2896 self, 2897 nested: bool = False, 2898 table: bool = False, 2899 parse_subquery_alias: bool = True, 2900 parse_set_operation: bool = True, 2901 ) -> t.Optional[exp.Expression]: 2902 cte = self._parse_with() 2903 2904 if cte: 2905 this = self._parse_statement() 2906 2907 if not this: 2908 self.raise_error("Failed to parse any statement following CTE") 2909 return cte 2910 2911 if "with" in this.arg_types: 2912 this.set("with", cte) 2913 else: 2914 self.raise_error(f"{this.key} does not support CTE") 2915 this = cte 2916 2917 return this 2918 2919 # duckdb supports leading with FROM x 2920 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2921 2922 if self._match(TokenType.SELECT): 2923 comments = self._prev_comments 2924 2925 hint = self._parse_hint() 2926 2927 if self._next and not self._next.token_type == TokenType.DOT: 2928 all_ = self._match(TokenType.ALL) 2929 distinct = self._match_set(self.DISTINCT_TOKENS) 2930 else: 2931 all_, distinct = None, None 2932 2933 kind = ( 2934 self._match(TokenType.ALIAS) 2935 and self._match_texts(("STRUCT", "VALUE")) 2936 and self._prev.text.upper() 2937 ) 2938 2939 if distinct: 2940 distinct = self.expression( 2941 exp.Distinct, 2942 on=self._parse_value() if self._match(TokenType.ON) else None, 2943 ) 2944 2945 if all_ and distinct: 2946 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2947 2948 limit = self._parse_limit(top=True) 2949 projections = self._parse_projections() 2950 2951 this = self.expression( 2952 exp.Select, 2953 kind=kind, 2954 hint=hint, 2955 distinct=distinct, 2956 expressions=projections, 2957 limit=limit, 2958 ) 2959 this.comments = comments 2960 2961 into = self._parse_into() 2962 if into: 2963 this.set("into", into) 2964 2965 if not from_: 2966 from_ = self._parse_from() 2967 2968 if from_: 2969 this.set("from", from_) 2970 2971 this = self._parse_query_modifiers(this) 2972 elif (table or nested) and self._match(TokenType.L_PAREN): 2973 if self._match(TokenType.PIVOT): 2974 this = self._parse_simplified_pivot() 2975 elif self._match(TokenType.FROM): 2976 this = exp.select("*").from_( 2977 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2978 ) 2979 else: 2980 this = ( 2981 self._parse_table() 2982 if table 2983 else self._parse_select(nested=True, parse_set_operation=False) 2984 ) 2985 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2986 2987 self._match_r_paren() 2988 2989 # We return early here so that the UNION isn't attached to the subquery by the 2990 # following call to _parse_set_operations, but instead becomes the parent node 2991 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2992 elif self._match(TokenType.VALUES, advance=False): 2993 this = self._parse_derived_table_values() 2994 elif from_: 2995 this = exp.select("*").from_(from_.this, copy=False) 2996 elif self._match(TokenType.SUMMARIZE): 2997 table = self._match(TokenType.TABLE) 2998 this = self._parse_select() or self._parse_string() or self._parse_table() 2999 return self.expression(exp.Summarize, this=this, table=table) 3000 elif self._match(TokenType.DESCRIBE): 3001 this = self._parse_describe() 3002 elif self._match_text_seq("STREAM"): 3003 this = self.expression(exp.Stream, this=self._parse_function()) 3004 else: 3005 this = None 3006 3007 return self._parse_set_operations(this) if parse_set_operation else this 3008 3009 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3010 if not skip_with_token and not self._match(TokenType.WITH): 3011 return None 3012 3013 comments = self._prev_comments 3014 recursive = self._match(TokenType.RECURSIVE) 3015 3016 expressions = [] 3017 while True: 3018 expressions.append(self._parse_cte()) 3019 3020 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3021 break 3022 else: 3023 self._match(TokenType.WITH) 3024 3025 return self.expression( 3026 exp.With, comments=comments, expressions=expressions, recursive=recursive 3027 ) 3028 3029 def _parse_cte(self) -> exp.CTE: 3030 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3031 if not alias or not alias.this: 3032 self.raise_error("Expected CTE to have alias") 3033 3034 self._match(TokenType.ALIAS) 3035 comments = self._prev_comments 3036 3037 if self._match_text_seq("NOT", "MATERIALIZED"): 3038 materialized = False 3039 elif self._match_text_seq("MATERIALIZED"): 3040 materialized = True 3041 else: 3042 materialized = None 3043 3044 return self.expression( 3045 exp.CTE, 3046 this=self._parse_wrapped(self._parse_statement), 3047 alias=alias, 3048 materialized=materialized, 3049 comments=comments, 3050 ) 3051 3052 def _parse_table_alias( 3053 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3054 ) -> t.Optional[exp.TableAlias]: 3055 any_token = self._match(TokenType.ALIAS) 3056 alias = ( 3057 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3058 or self._parse_string_as_identifier() 3059 ) 3060 3061 index = self._index 3062 if self._match(TokenType.L_PAREN): 3063 columns = self._parse_csv(self._parse_function_parameter) 3064 self._match_r_paren() if columns else self._retreat(index) 3065 else: 3066 columns = None 3067 3068 if not alias and not columns: 3069 return None 3070 3071 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3072 3073 # We bubble up comments from the Identifier to the TableAlias 3074 if isinstance(alias, exp.Identifier): 3075 table_alias.add_comments(alias.pop_comments()) 3076 3077 return table_alias 3078 3079 def _parse_subquery( 3080 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3081 ) -> t.Optional[exp.Subquery]: 3082 if not this: 3083 return None 3084 3085 return self.expression( 3086 exp.Subquery, 3087 this=this, 3088 pivots=self._parse_pivots(), 3089 alias=self._parse_table_alias() if parse_alias else None, 3090 sample=self._parse_table_sample(), 3091 ) 3092 3093 def _implicit_unnests_to_explicit(self, this: E) -> E: 3094 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3095 3096 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3097 for i, join in enumerate(this.args.get("joins") or []): 3098 table = join.this 3099 normalized_table = table.copy() 3100 normalized_table.meta["maybe_column"] = True 3101 normalized_table = _norm(normalized_table, dialect=self.dialect) 3102 3103 if isinstance(table, exp.Table) and not join.args.get("on"): 3104 if normalized_table.parts[0].name in refs: 3105 table_as_column = table.to_column() 3106 unnest = exp.Unnest(expressions=[table_as_column]) 3107 3108 # Table.to_column creates a parent Alias node that we want to convert to 3109 # a TableAlias and attach to the Unnest, so it matches the parser's output 3110 if isinstance(table.args.get("alias"), exp.TableAlias): 3111 table_as_column.replace(table_as_column.this) 3112 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3113 3114 table.replace(unnest) 3115 3116 refs.add(normalized_table.alias_or_name) 3117 3118 return this 3119 3120 def _parse_query_modifiers( 3121 self, this: t.Optional[exp.Expression] 3122 ) -> t.Optional[exp.Expression]: 3123 if isinstance(this, (exp.Query, exp.Table)): 3124 for join in self._parse_joins(): 3125 this.append("joins", join) 3126 for lateral in iter(self._parse_lateral, None): 3127 this.append("laterals", lateral) 3128 3129 while True: 3130 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3131 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3132 key, expression = parser(self) 3133 3134 if expression: 3135 this.set(key, expression) 3136 if key == "limit": 3137 offset = expression.args.pop("offset", None) 3138 3139 if offset: 3140 offset = exp.Offset(expression=offset) 3141 this.set("offset", offset) 3142 3143 limit_by_expressions = expression.expressions 3144 expression.set("expressions", None) 3145 offset.set("expressions", limit_by_expressions) 3146 continue 3147 break 3148 3149 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3150 this = self._implicit_unnests_to_explicit(this) 3151 3152 return this 3153 3154 def _parse_hint(self) -> t.Optional[exp.Hint]: 3155 if self._match(TokenType.HINT): 3156 hints = [] 3157 for hint in iter( 3158 lambda: self._parse_csv( 3159 lambda: self._parse_function() or self._parse_var(upper=True) 3160 ), 3161 [], 3162 ): 3163 hints.extend(hint) 3164 3165 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3166 self.raise_error("Expected */ after HINT") 3167 3168 return self.expression(exp.Hint, expressions=hints) 3169 3170 return None 3171 3172 def _parse_into(self) -> t.Optional[exp.Into]: 3173 if not self._match(TokenType.INTO): 3174 return None 3175 3176 temp = self._match(TokenType.TEMPORARY) 3177 unlogged = self._match_text_seq("UNLOGGED") 3178 self._match(TokenType.TABLE) 3179 3180 return self.expression( 3181 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3182 ) 3183 3184 def _parse_from( 3185 self, joins: bool = False, skip_from_token: bool = False 3186 ) -> t.Optional[exp.From]: 3187 if not skip_from_token and not self._match(TokenType.FROM): 3188 return None 3189 3190 return self.expression( 3191 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3192 ) 3193 3194 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3195 return self.expression( 3196 exp.MatchRecognizeMeasure, 3197 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3198 this=self._parse_expression(), 3199 ) 3200 3201 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3202 if not self._match(TokenType.MATCH_RECOGNIZE): 3203 return None 3204 3205 self._match_l_paren() 3206 3207 partition = self._parse_partition_by() 3208 order = self._parse_order() 3209 3210 measures = ( 3211 self._parse_csv(self._parse_match_recognize_measure) 3212 if self._match_text_seq("MEASURES") 3213 else None 3214 ) 3215 3216 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3217 rows = exp.var("ONE ROW PER MATCH") 3218 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3219 text = "ALL ROWS PER MATCH" 3220 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3221 text += " SHOW EMPTY MATCHES" 3222 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3223 text += " OMIT EMPTY MATCHES" 3224 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3225 text += " WITH UNMATCHED ROWS" 3226 rows = exp.var(text) 3227 else: 3228 rows = None 3229 3230 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3231 text = "AFTER MATCH SKIP" 3232 if self._match_text_seq("PAST", "LAST", "ROW"): 3233 text += " PAST LAST ROW" 3234 elif self._match_text_seq("TO", "NEXT", "ROW"): 3235 text += " TO NEXT ROW" 3236 elif self._match_text_seq("TO", "FIRST"): 3237 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3238 elif self._match_text_seq("TO", "LAST"): 3239 text += f" TO LAST {self._advance_any().text}" # type: ignore 3240 after = exp.var(text) 3241 else: 3242 after = None 3243 3244 if self._match_text_seq("PATTERN"): 3245 self._match_l_paren() 3246 3247 if not self._curr: 3248 self.raise_error("Expecting )", self._curr) 3249 3250 paren = 1 3251 start = self._curr 3252 3253 while self._curr and paren > 0: 3254 if self._curr.token_type == TokenType.L_PAREN: 3255 paren += 1 3256 if self._curr.token_type == TokenType.R_PAREN: 3257 paren -= 1 3258 3259 end = self._prev 3260 self._advance() 3261 3262 if paren > 0: 3263 self.raise_error("Expecting )", self._curr) 3264 3265 pattern = exp.var(self._find_sql(start, end)) 3266 else: 3267 pattern = None 3268 3269 define = ( 3270 self._parse_csv(self._parse_name_as_expression) 3271 if self._match_text_seq("DEFINE") 3272 else None 3273 ) 3274 3275 self._match_r_paren() 3276 3277 return self.expression( 3278 exp.MatchRecognize, 3279 partition_by=partition, 3280 order=order, 3281 measures=measures, 3282 rows=rows, 3283 after=after, 3284 pattern=pattern, 3285 define=define, 3286 alias=self._parse_table_alias(), 3287 ) 3288 3289 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3290 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3291 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3292 cross_apply = False 3293 3294 if cross_apply is not None: 3295 this = self._parse_select(table=True) 3296 view = None 3297 outer = None 3298 elif self._match(TokenType.LATERAL): 3299 this = self._parse_select(table=True) 3300 view = self._match(TokenType.VIEW) 3301 outer = self._match(TokenType.OUTER) 3302 else: 3303 return None 3304 3305 if not this: 3306 this = ( 3307 self._parse_unnest() 3308 or self._parse_function() 3309 or self._parse_id_var(any_token=False) 3310 ) 3311 3312 while self._match(TokenType.DOT): 3313 this = exp.Dot( 3314 this=this, 3315 expression=self._parse_function() or self._parse_id_var(any_token=False), 3316 ) 3317 3318 if view: 3319 table = self._parse_id_var(any_token=False) 3320 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3321 table_alias: t.Optional[exp.TableAlias] = self.expression( 3322 exp.TableAlias, this=table, columns=columns 3323 ) 3324 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3325 # We move the alias from the lateral's child node to the lateral itself 3326 table_alias = this.args["alias"].pop() 3327 else: 3328 table_alias = self._parse_table_alias() 3329 3330 return self.expression( 3331 exp.Lateral, 3332 this=this, 3333 view=view, 3334 outer=outer, 3335 alias=table_alias, 3336 cross_apply=cross_apply, 3337 ) 3338 3339 def _parse_join_parts( 3340 self, 3341 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3342 return ( 3343 self._match_set(self.JOIN_METHODS) and self._prev, 3344 self._match_set(self.JOIN_SIDES) and self._prev, 3345 self._match_set(self.JOIN_KINDS) and self._prev, 3346 ) 3347 3348 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3349 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3350 this = self._parse_column() 3351 if isinstance(this, exp.Column): 3352 return this.this 3353 return this 3354 3355 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3356 3357 def _parse_join( 3358 self, skip_join_token: bool = False, parse_bracket: bool = False 3359 ) -> t.Optional[exp.Join]: 3360 if self._match(TokenType.COMMA): 3361 return self.expression(exp.Join, this=self._parse_table()) 3362 3363 index = self._index 3364 method, side, kind = self._parse_join_parts() 3365 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3366 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3367 3368 if not skip_join_token and not join: 3369 self._retreat(index) 3370 kind = None 3371 method = None 3372 side = None 3373 3374 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3375 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3376 3377 if not skip_join_token and not join and not outer_apply and not cross_apply: 3378 return None 3379 3380 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3381 3382 if method: 3383 kwargs["method"] = method.text 3384 if side: 3385 kwargs["side"] = side.text 3386 if kind: 3387 kwargs["kind"] = kind.text 3388 if hint: 3389 kwargs["hint"] = hint 3390 3391 if self._match(TokenType.MATCH_CONDITION): 3392 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3393 3394 if self._match(TokenType.ON): 3395 kwargs["on"] = self._parse_assignment() 3396 elif self._match(TokenType.USING): 3397 kwargs["using"] = self._parse_using_identifiers() 3398 elif ( 3399 not (outer_apply or cross_apply) 3400 and not isinstance(kwargs["this"], exp.Unnest) 3401 and not (kind and kind.token_type == TokenType.CROSS) 3402 ): 3403 index = self._index 3404 joins: t.Optional[list] = list(self._parse_joins()) 3405 3406 if joins and self._match(TokenType.ON): 3407 kwargs["on"] = self._parse_assignment() 3408 elif joins and self._match(TokenType.USING): 3409 kwargs["using"] = self._parse_using_identifiers() 3410 else: 3411 joins = None 3412 self._retreat(index) 3413 3414 kwargs["this"].set("joins", joins if joins else None) 3415 3416 comments = [c for token in (method, side, kind) if token for c in token.comments] 3417 return self.expression(exp.Join, comments=comments, **kwargs) 3418 3419 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3420 this = self._parse_assignment() 3421 3422 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3423 return this 3424 3425 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3426 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3427 3428 return this 3429 3430 def _parse_index_params(self) -> exp.IndexParameters: 3431 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3432 3433 if self._match(TokenType.L_PAREN, advance=False): 3434 columns = self._parse_wrapped_csv(self._parse_with_operator) 3435 else: 3436 columns = None 3437 3438 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3439 partition_by = self._parse_partition_by() 3440 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3441 tablespace = ( 3442 self._parse_var(any_token=True) 3443 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3444 else None 3445 ) 3446 where = self._parse_where() 3447 3448 on = self._parse_field() if self._match(TokenType.ON) else None 3449 3450 return self.expression( 3451 exp.IndexParameters, 3452 using=using, 3453 columns=columns, 3454 include=include, 3455 partition_by=partition_by, 3456 where=where, 3457 with_storage=with_storage, 3458 tablespace=tablespace, 3459 on=on, 3460 ) 3461 3462 def _parse_index( 3463 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3464 ) -> t.Optional[exp.Index]: 3465 if index or anonymous: 3466 unique = None 3467 primary = None 3468 amp = None 3469 3470 self._match(TokenType.ON) 3471 self._match(TokenType.TABLE) # hive 3472 table = self._parse_table_parts(schema=True) 3473 else: 3474 unique = self._match(TokenType.UNIQUE) 3475 primary = self._match_text_seq("PRIMARY") 3476 amp = self._match_text_seq("AMP") 3477 3478 if not self._match(TokenType.INDEX): 3479 return None 3480 3481 index = self._parse_id_var() 3482 table = None 3483 3484 params = self._parse_index_params() 3485 3486 return self.expression( 3487 exp.Index, 3488 this=index, 3489 table=table, 3490 unique=unique, 3491 primary=primary, 3492 amp=amp, 3493 params=params, 3494 ) 3495 3496 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3497 hints: t.List[exp.Expression] = [] 3498 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3499 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3500 hints.append( 3501 self.expression( 3502 exp.WithTableHint, 3503 expressions=self._parse_csv( 3504 lambda: self._parse_function() or self._parse_var(any_token=True) 3505 ), 3506 ) 3507 ) 3508 self._match_r_paren() 3509 else: 3510 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3511 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3512 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3513 3514 self._match_set((TokenType.INDEX, TokenType.KEY)) 3515 if self._match(TokenType.FOR): 3516 hint.set("target", self._advance_any() and self._prev.text.upper()) 3517 3518 hint.set("expressions", self._parse_wrapped_id_vars()) 3519 hints.append(hint) 3520 3521 return hints or None 3522 3523 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3524 return ( 3525 (not schema and self._parse_function(optional_parens=False)) 3526 or self._parse_id_var(any_token=False) 3527 or self._parse_string_as_identifier() 3528 or self._parse_placeholder() 3529 ) 3530 3531 def _parse_table_parts( 3532 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3533 ) -> exp.Table: 3534 catalog = None 3535 db = None 3536 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3537 3538 while self._match(TokenType.DOT): 3539 if catalog: 3540 # This allows nesting the table in arbitrarily many dot expressions if needed 3541 table = self.expression( 3542 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3543 ) 3544 else: 3545 catalog = db 3546 db = table 3547 # "" used for tsql FROM a..b case 3548 table = self._parse_table_part(schema=schema) or "" 3549 3550 if ( 3551 wildcard 3552 and self._is_connected() 3553 and (isinstance(table, exp.Identifier) or not table) 3554 and self._match(TokenType.STAR) 3555 ): 3556 if isinstance(table, exp.Identifier): 3557 table.args["this"] += "*" 3558 else: 3559 table = exp.Identifier(this="*") 3560 3561 # We bubble up comments from the Identifier to the Table 3562 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3563 3564 if is_db_reference: 3565 catalog = db 3566 db = table 3567 table = None 3568 3569 if not table and not is_db_reference: 3570 self.raise_error(f"Expected table name but got {self._curr}") 3571 if not db and is_db_reference: 3572 self.raise_error(f"Expected database name but got {self._curr}") 3573 3574 table = self.expression( 3575 exp.Table, 3576 comments=comments, 3577 this=table, 3578 db=db, 3579 catalog=catalog, 3580 ) 3581 3582 changes = self._parse_changes() 3583 if changes: 3584 table.set("changes", changes) 3585 3586 at_before = self._parse_historical_data() 3587 if at_before: 3588 table.set("when", at_before) 3589 3590 pivots = self._parse_pivots() 3591 if pivots: 3592 table.set("pivots", pivots) 3593 3594 return table 3595 3596 def _parse_table( 3597 self, 3598 schema: bool = False, 3599 joins: bool = False, 3600 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3601 parse_bracket: bool = False, 3602 is_db_reference: bool = False, 3603 parse_partition: bool = False, 3604 ) -> t.Optional[exp.Expression]: 3605 lateral = self._parse_lateral() 3606 if lateral: 3607 return lateral 3608 3609 unnest = self._parse_unnest() 3610 if unnest: 3611 return unnest 3612 3613 values = self._parse_derived_table_values() 3614 if values: 3615 return values 3616 3617 subquery = self._parse_select(table=True) 3618 if subquery: 3619 if not subquery.args.get("pivots"): 3620 subquery.set("pivots", self._parse_pivots()) 3621 return subquery 3622 3623 bracket = parse_bracket and self._parse_bracket(None) 3624 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3625 3626 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3627 self._parse_table 3628 ) 3629 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3630 3631 only = self._match(TokenType.ONLY) 3632 3633 this = t.cast( 3634 exp.Expression, 3635 bracket 3636 or rows_from 3637 or self._parse_bracket( 3638 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3639 ), 3640 ) 3641 3642 if only: 3643 this.set("only", only) 3644 3645 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3646 self._match_text_seq("*") 3647 3648 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3649 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3650 this.set("partition", self._parse_partition()) 3651 3652 if schema: 3653 return self._parse_schema(this=this) 3654 3655 version = self._parse_version() 3656 3657 if version: 3658 this.set("version", version) 3659 3660 if self.dialect.ALIAS_POST_TABLESAMPLE: 3661 this.set("sample", self._parse_table_sample()) 3662 3663 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3664 if alias: 3665 this.set("alias", alias) 3666 3667 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3668 return self.expression( 3669 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3670 ) 3671 3672 this.set("hints", self._parse_table_hints()) 3673 3674 if not this.args.get("pivots"): 3675 this.set("pivots", self._parse_pivots()) 3676 3677 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3678 this.set("sample", self._parse_table_sample()) 3679 3680 if joins: 3681 for join in self._parse_joins(): 3682 this.append("joins", join) 3683 3684 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3685 this.set("ordinality", True) 3686 this.set("alias", self._parse_table_alias()) 3687 3688 return this 3689 3690 def _parse_version(self) -> t.Optional[exp.Version]: 3691 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3692 this = "TIMESTAMP" 3693 elif self._match(TokenType.VERSION_SNAPSHOT): 3694 this = "VERSION" 3695 else: 3696 return None 3697 3698 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3699 kind = self._prev.text.upper() 3700 start = self._parse_bitwise() 3701 self._match_texts(("TO", "AND")) 3702 end = self._parse_bitwise() 3703 expression: t.Optional[exp.Expression] = self.expression( 3704 exp.Tuple, expressions=[start, end] 3705 ) 3706 elif self._match_text_seq("CONTAINED", "IN"): 3707 kind = "CONTAINED IN" 3708 expression = self.expression( 3709 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3710 ) 3711 elif self._match(TokenType.ALL): 3712 kind = "ALL" 3713 expression = None 3714 else: 3715 self._match_text_seq("AS", "OF") 3716 kind = "AS OF" 3717 expression = self._parse_type() 3718 3719 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3720 3721 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3722 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3723 index = self._index 3724 historical_data = None 3725 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3726 this = self._prev.text.upper() 3727 kind = ( 3728 self._match(TokenType.L_PAREN) 3729 and self._match_texts(self.HISTORICAL_DATA_KIND) 3730 and self._prev.text.upper() 3731 ) 3732 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3733 3734 if expression: 3735 self._match_r_paren() 3736 historical_data = self.expression( 3737 exp.HistoricalData, this=this, kind=kind, expression=expression 3738 ) 3739 else: 3740 self._retreat(index) 3741 3742 return historical_data 3743 3744 def _parse_changes(self) -> t.Optional[exp.Changes]: 3745 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3746 return None 3747 3748 information = self._parse_var(any_token=True) 3749 self._match_r_paren() 3750 3751 return self.expression( 3752 exp.Changes, 3753 information=information, 3754 at_before=self._parse_historical_data(), 3755 end=self._parse_historical_data(), 3756 ) 3757 3758 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3759 if not self._match(TokenType.UNNEST): 3760 return None 3761 3762 expressions = self._parse_wrapped_csv(self._parse_equality) 3763 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3764 3765 alias = self._parse_table_alias() if with_alias else None 3766 3767 if alias: 3768 if self.dialect.UNNEST_COLUMN_ONLY: 3769 if alias.args.get("columns"): 3770 self.raise_error("Unexpected extra column alias in unnest.") 3771 3772 alias.set("columns", [alias.this]) 3773 alias.set("this", None) 3774 3775 columns = alias.args.get("columns") or [] 3776 if offset and len(expressions) < len(columns): 3777 offset = columns.pop() 3778 3779 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3780 self._match(TokenType.ALIAS) 3781 offset = self._parse_id_var( 3782 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3783 ) or exp.to_identifier("offset") 3784 3785 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3786 3787 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3788 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3789 if not is_derived and not ( 3790 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3791 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3792 ): 3793 return None 3794 3795 expressions = self._parse_csv(self._parse_value) 3796 alias = self._parse_table_alias() 3797 3798 if is_derived: 3799 self._match_r_paren() 3800 3801 return self.expression( 3802 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3803 ) 3804 3805 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3806 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3807 as_modifier and self._match_text_seq("USING", "SAMPLE") 3808 ): 3809 return None 3810 3811 bucket_numerator = None 3812 bucket_denominator = None 3813 bucket_field = None 3814 percent = None 3815 size = None 3816 seed = None 3817 3818 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3819 matched_l_paren = self._match(TokenType.L_PAREN) 3820 3821 if self.TABLESAMPLE_CSV: 3822 num = None 3823 expressions = self._parse_csv(self._parse_primary) 3824 else: 3825 expressions = None 3826 num = ( 3827 self._parse_factor() 3828 if self._match(TokenType.NUMBER, advance=False) 3829 else self._parse_primary() or self._parse_placeholder() 3830 ) 3831 3832 if self._match_text_seq("BUCKET"): 3833 bucket_numerator = self._parse_number() 3834 self._match_text_seq("OUT", "OF") 3835 bucket_denominator = bucket_denominator = self._parse_number() 3836 self._match(TokenType.ON) 3837 bucket_field = self._parse_field() 3838 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3839 percent = num 3840 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3841 size = num 3842 else: 3843 percent = num 3844 3845 if matched_l_paren: 3846 self._match_r_paren() 3847 3848 if self._match(TokenType.L_PAREN): 3849 method = self._parse_var(upper=True) 3850 seed = self._match(TokenType.COMMA) and self._parse_number() 3851 self._match_r_paren() 3852 elif self._match_texts(("SEED", "REPEATABLE")): 3853 seed = self._parse_wrapped(self._parse_number) 3854 3855 if not method and self.DEFAULT_SAMPLING_METHOD: 3856 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3857 3858 return self.expression( 3859 exp.TableSample, 3860 expressions=expressions, 3861 method=method, 3862 bucket_numerator=bucket_numerator, 3863 bucket_denominator=bucket_denominator, 3864 bucket_field=bucket_field, 3865 percent=percent, 3866 size=size, 3867 seed=seed, 3868 ) 3869 3870 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3871 return list(iter(self._parse_pivot, None)) or None 3872 3873 def _parse_joins(self) -> t.Iterator[exp.Join]: 3874 return iter(self._parse_join, None) 3875 3876 # https://duckdb.org/docs/sql/statements/pivot 3877 def _parse_simplified_pivot(self) -> exp.Pivot: 3878 def _parse_on() -> t.Optional[exp.Expression]: 3879 this = self._parse_bitwise() 3880 return self._parse_in(this) if self._match(TokenType.IN) else this 3881 3882 this = self._parse_table() 3883 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3884 using = self._match(TokenType.USING) and self._parse_csv( 3885 lambda: self._parse_alias(self._parse_function()) 3886 ) 3887 group = self._parse_group() 3888 return self.expression( 3889 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3890 ) 3891 3892 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3893 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3894 this = self._parse_select_or_expression() 3895 3896 self._match(TokenType.ALIAS) 3897 alias = self._parse_bitwise() 3898 if alias: 3899 if isinstance(alias, exp.Column) and not alias.db: 3900 alias = alias.this 3901 return self.expression(exp.PivotAlias, this=this, alias=alias) 3902 3903 return this 3904 3905 value = self._parse_column() 3906 3907 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3908 self.raise_error("Expecting IN (") 3909 3910 if self._match(TokenType.ANY): 3911 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3912 else: 3913 exprs = self._parse_csv(_parse_aliased_expression) 3914 3915 self._match_r_paren() 3916 return self.expression(exp.In, this=value, expressions=exprs) 3917 3918 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3919 index = self._index 3920 include_nulls = None 3921 3922 if self._match(TokenType.PIVOT): 3923 unpivot = False 3924 elif self._match(TokenType.UNPIVOT): 3925 unpivot = True 3926 3927 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3928 if self._match_text_seq("INCLUDE", "NULLS"): 3929 include_nulls = True 3930 elif self._match_text_seq("EXCLUDE", "NULLS"): 3931 include_nulls = False 3932 else: 3933 return None 3934 3935 expressions = [] 3936 3937 if not self._match(TokenType.L_PAREN): 3938 self._retreat(index) 3939 return None 3940 3941 if unpivot: 3942 expressions = self._parse_csv(self._parse_column) 3943 else: 3944 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3945 3946 if not expressions: 3947 self.raise_error("Failed to parse PIVOT's aggregation list") 3948 3949 if not self._match(TokenType.FOR): 3950 self.raise_error("Expecting FOR") 3951 3952 field = self._parse_pivot_in() 3953 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3954 self._parse_bitwise 3955 ) 3956 3957 self._match_r_paren() 3958 3959 pivot = self.expression( 3960 exp.Pivot, 3961 expressions=expressions, 3962 field=field, 3963 unpivot=unpivot, 3964 include_nulls=include_nulls, 3965 default_on_null=default_on_null, 3966 ) 3967 3968 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3969 pivot.set("alias", self._parse_table_alias()) 3970 3971 if not unpivot: 3972 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3973 3974 columns: t.List[exp.Expression] = [] 3975 for fld in pivot.args["field"].expressions: 3976 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3977 for name in names: 3978 if self.PREFIXED_PIVOT_COLUMNS: 3979 name = f"{name}_{field_name}" if name else field_name 3980 else: 3981 name = f"{field_name}_{name}" if name else field_name 3982 3983 columns.append(exp.to_identifier(name)) 3984 3985 pivot.set("columns", columns) 3986 3987 return pivot 3988 3989 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3990 return [agg.alias for agg in aggregations] 3991 3992 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3993 if not skip_where_token and not self._match(TokenType.PREWHERE): 3994 return None 3995 3996 return self.expression( 3997 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3998 ) 3999 4000 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4001 if not skip_where_token and not self._match(TokenType.WHERE): 4002 return None 4003 4004 return self.expression( 4005 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4006 ) 4007 4008 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4009 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4010 return None 4011 4012 elements: t.Dict[str, t.Any] = defaultdict(list) 4013 4014 if self._match(TokenType.ALL): 4015 elements["all"] = True 4016 elif self._match(TokenType.DISTINCT): 4017 elements["all"] = False 4018 4019 while True: 4020 index = self._index 4021 4022 elements["expressions"].extend( 4023 self._parse_csv( 4024 lambda: None 4025 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4026 else self._parse_assignment() 4027 ) 4028 ) 4029 4030 before_with_index = self._index 4031 with_prefix = self._match(TokenType.WITH) 4032 4033 if self._match(TokenType.ROLLUP): 4034 elements["rollup"].append( 4035 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4036 ) 4037 elif self._match(TokenType.CUBE): 4038 elements["cube"].append( 4039 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4040 ) 4041 elif self._match(TokenType.GROUPING_SETS): 4042 elements["grouping_sets"].append( 4043 self.expression( 4044 exp.GroupingSets, 4045 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4046 ) 4047 ) 4048 elif self._match_text_seq("TOTALS"): 4049 elements["totals"] = True # type: ignore 4050 4051 if before_with_index <= self._index <= before_with_index + 1: 4052 self._retreat(before_with_index) 4053 break 4054 4055 if index == self._index: 4056 break 4057 4058 return self.expression(exp.Group, **elements) # type: ignore 4059 4060 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4061 return self.expression( 4062 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4063 ) 4064 4065 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4066 if self._match(TokenType.L_PAREN): 4067 grouping_set = self._parse_csv(self._parse_column) 4068 self._match_r_paren() 4069 return self.expression(exp.Tuple, expressions=grouping_set) 4070 4071 return self._parse_column() 4072 4073 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4074 if not skip_having_token and not self._match(TokenType.HAVING): 4075 return None 4076 return self.expression(exp.Having, this=self._parse_assignment()) 4077 4078 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4079 if not self._match(TokenType.QUALIFY): 4080 return None 4081 return self.expression(exp.Qualify, this=self._parse_assignment()) 4082 4083 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4084 if skip_start_token: 4085 start = None 4086 elif self._match(TokenType.START_WITH): 4087 start = self._parse_assignment() 4088 else: 4089 return None 4090 4091 self._match(TokenType.CONNECT_BY) 4092 nocycle = self._match_text_seq("NOCYCLE") 4093 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4094 exp.Prior, this=self._parse_bitwise() 4095 ) 4096 connect = self._parse_assignment() 4097 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4098 4099 if not start and self._match(TokenType.START_WITH): 4100 start = self._parse_assignment() 4101 4102 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4103 4104 def _parse_name_as_expression(self) -> exp.Alias: 4105 return self.expression( 4106 exp.Alias, 4107 alias=self._parse_id_var(any_token=True), 4108 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4109 ) 4110 4111 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4112 if self._match_text_seq("INTERPOLATE"): 4113 return self._parse_wrapped_csv(self._parse_name_as_expression) 4114 return None 4115 4116 def _parse_order( 4117 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4118 ) -> t.Optional[exp.Expression]: 4119 siblings = None 4120 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4121 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4122 return this 4123 4124 siblings = True 4125 4126 return self.expression( 4127 exp.Order, 4128 this=this, 4129 expressions=self._parse_csv(self._parse_ordered), 4130 siblings=siblings, 4131 ) 4132 4133 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4134 if not self._match(token): 4135 return None 4136 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4137 4138 def _parse_ordered( 4139 self, parse_method: t.Optional[t.Callable] = None 4140 ) -> t.Optional[exp.Ordered]: 4141 this = parse_method() if parse_method else self._parse_assignment() 4142 if not this: 4143 return None 4144 4145 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4146 this = exp.var("ALL") 4147 4148 asc = self._match(TokenType.ASC) 4149 desc = self._match(TokenType.DESC) or (asc and False) 4150 4151 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4152 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4153 4154 nulls_first = is_nulls_first or False 4155 explicitly_null_ordered = is_nulls_first or is_nulls_last 4156 4157 if ( 4158 not explicitly_null_ordered 4159 and ( 4160 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4161 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4162 ) 4163 and self.dialect.NULL_ORDERING != "nulls_are_last" 4164 ): 4165 nulls_first = True 4166 4167 if self._match_text_seq("WITH", "FILL"): 4168 with_fill = self.expression( 4169 exp.WithFill, 4170 **{ # type: ignore 4171 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4172 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4173 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4174 "interpolate": self._parse_interpolate(), 4175 }, 4176 ) 4177 else: 4178 with_fill = None 4179 4180 return self.expression( 4181 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4182 ) 4183 4184 def _parse_limit( 4185 self, 4186 this: t.Optional[exp.Expression] = None, 4187 top: bool = False, 4188 skip_limit_token: bool = False, 4189 ) -> t.Optional[exp.Expression]: 4190 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4191 comments = self._prev_comments 4192 if top: 4193 limit_paren = self._match(TokenType.L_PAREN) 4194 expression = self._parse_term() if limit_paren else self._parse_number() 4195 4196 if limit_paren: 4197 self._match_r_paren() 4198 else: 4199 expression = self._parse_term() 4200 4201 if self._match(TokenType.COMMA): 4202 offset = expression 4203 expression = self._parse_term() 4204 else: 4205 offset = None 4206 4207 limit_exp = self.expression( 4208 exp.Limit, 4209 this=this, 4210 expression=expression, 4211 offset=offset, 4212 comments=comments, 4213 expressions=self._parse_limit_by(), 4214 ) 4215 4216 return limit_exp 4217 4218 if self._match(TokenType.FETCH): 4219 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4220 direction = self._prev.text.upper() if direction else "FIRST" 4221 4222 count = self._parse_field(tokens=self.FETCH_TOKENS) 4223 percent = self._match(TokenType.PERCENT) 4224 4225 self._match_set((TokenType.ROW, TokenType.ROWS)) 4226 4227 only = self._match_text_seq("ONLY") 4228 with_ties = self._match_text_seq("WITH", "TIES") 4229 4230 if only and with_ties: 4231 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4232 4233 return self.expression( 4234 exp.Fetch, 4235 direction=direction, 4236 count=count, 4237 percent=percent, 4238 with_ties=with_ties, 4239 ) 4240 4241 return this 4242 4243 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4244 if not self._match(TokenType.OFFSET): 4245 return this 4246 4247 count = self._parse_term() 4248 self._match_set((TokenType.ROW, TokenType.ROWS)) 4249 4250 return self.expression( 4251 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4252 ) 4253 4254 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4255 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4256 4257 def _parse_locks(self) -> t.List[exp.Lock]: 4258 locks = [] 4259 while True: 4260 if self._match_text_seq("FOR", "UPDATE"): 4261 update = True 4262 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4263 "LOCK", "IN", "SHARE", "MODE" 4264 ): 4265 update = False 4266 else: 4267 break 4268 4269 expressions = None 4270 if self._match_text_seq("OF"): 4271 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4272 4273 wait: t.Optional[bool | exp.Expression] = None 4274 if self._match_text_seq("NOWAIT"): 4275 wait = True 4276 elif self._match_text_seq("WAIT"): 4277 wait = self._parse_primary() 4278 elif self._match_text_seq("SKIP", "LOCKED"): 4279 wait = False 4280 4281 locks.append( 4282 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4283 ) 4284 4285 return locks 4286 4287 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4288 while this and self._match_set(self.SET_OPERATIONS): 4289 token_type = self._prev.token_type 4290 4291 if token_type == TokenType.UNION: 4292 operation: t.Type[exp.SetOperation] = exp.Union 4293 elif token_type == TokenType.EXCEPT: 4294 operation = exp.Except 4295 else: 4296 operation = exp.Intersect 4297 4298 comments = self._prev.comments 4299 4300 if self._match(TokenType.DISTINCT): 4301 distinct: t.Optional[bool] = True 4302 elif self._match(TokenType.ALL): 4303 distinct = False 4304 else: 4305 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4306 if distinct is None: 4307 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4308 4309 by_name = self._match_text_seq("BY", "NAME") 4310 expression = self._parse_select(nested=True, parse_set_operation=False) 4311 4312 this = self.expression( 4313 operation, 4314 comments=comments, 4315 this=this, 4316 distinct=distinct, 4317 by_name=by_name, 4318 expression=expression, 4319 ) 4320 4321 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4322 expression = this.expression 4323 4324 if expression: 4325 for arg in self.SET_OP_MODIFIERS: 4326 expr = expression.args.get(arg) 4327 if expr: 4328 this.set(arg, expr.pop()) 4329 4330 return this 4331 4332 def _parse_expression(self) -> t.Optional[exp.Expression]: 4333 return self._parse_alias(self._parse_assignment()) 4334 4335 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4336 this = self._parse_disjunction() 4337 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4338 # This allows us to parse <non-identifier token> := <expr> 4339 this = exp.column( 4340 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4341 ) 4342 4343 while self._match_set(self.ASSIGNMENT): 4344 this = self.expression( 4345 self.ASSIGNMENT[self._prev.token_type], 4346 this=this, 4347 comments=self._prev_comments, 4348 expression=self._parse_assignment(), 4349 ) 4350 4351 return this 4352 4353 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4354 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4355 4356 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4357 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4358 4359 def _parse_equality(self) -> t.Optional[exp.Expression]: 4360 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4361 4362 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4363 return self._parse_tokens(self._parse_range, self.COMPARISON) 4364 4365 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4366 this = this or self._parse_bitwise() 4367 negate = self._match(TokenType.NOT) 4368 4369 if self._match_set(self.RANGE_PARSERS): 4370 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4371 if not expression: 4372 return this 4373 4374 this = expression 4375 elif self._match(TokenType.ISNULL): 4376 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4377 4378 # Postgres supports ISNULL and NOTNULL for conditions. 4379 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4380 if self._match(TokenType.NOTNULL): 4381 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4382 this = self.expression(exp.Not, this=this) 4383 4384 if negate: 4385 this = self._negate_range(this) 4386 4387 if self._match(TokenType.IS): 4388 this = self._parse_is(this) 4389 4390 return this 4391 4392 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4393 if not this: 4394 return this 4395 4396 return self.expression(exp.Not, this=this) 4397 4398 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4399 index = self._index - 1 4400 negate = self._match(TokenType.NOT) 4401 4402 if self._match_text_seq("DISTINCT", "FROM"): 4403 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4404 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4405 4406 if self._match(TokenType.JSON): 4407 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4408 4409 if self._match_text_seq("WITH"): 4410 _with = True 4411 elif self._match_text_seq("WITHOUT"): 4412 _with = False 4413 else: 4414 _with = None 4415 4416 unique = self._match(TokenType.UNIQUE) 4417 self._match_text_seq("KEYS") 4418 expression: t.Optional[exp.Expression] = self.expression( 4419 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4420 ) 4421 else: 4422 expression = self._parse_primary() or self._parse_null() 4423 if not expression: 4424 self._retreat(index) 4425 return None 4426 4427 this = self.expression(exp.Is, this=this, expression=expression) 4428 return self.expression(exp.Not, this=this) if negate else this 4429 4430 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4431 unnest = self._parse_unnest(with_alias=False) 4432 if unnest: 4433 this = self.expression(exp.In, this=this, unnest=unnest) 4434 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4435 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4436 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4437 4438 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4439 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4440 else: 4441 this = self.expression(exp.In, this=this, expressions=expressions) 4442 4443 if matched_l_paren: 4444 self._match_r_paren(this) 4445 elif not self._match(TokenType.R_BRACKET, expression=this): 4446 self.raise_error("Expecting ]") 4447 else: 4448 this = self.expression(exp.In, this=this, field=self._parse_field()) 4449 4450 return this 4451 4452 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4453 low = self._parse_bitwise() 4454 self._match(TokenType.AND) 4455 high = self._parse_bitwise() 4456 return self.expression(exp.Between, this=this, low=low, high=high) 4457 4458 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4459 if not self._match(TokenType.ESCAPE): 4460 return this 4461 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4462 4463 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4464 index = self._index 4465 4466 if not self._match(TokenType.INTERVAL) and match_interval: 4467 return None 4468 4469 if self._match(TokenType.STRING, advance=False): 4470 this = self._parse_primary() 4471 else: 4472 this = self._parse_term() 4473 4474 if not this or ( 4475 isinstance(this, exp.Column) 4476 and not this.table 4477 and not this.this.quoted 4478 and this.name.upper() == "IS" 4479 ): 4480 self._retreat(index) 4481 return None 4482 4483 unit = self._parse_function() or ( 4484 not self._match(TokenType.ALIAS, advance=False) 4485 and self._parse_var(any_token=True, upper=True) 4486 ) 4487 4488 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4489 # each INTERVAL expression into this canonical form so it's easy to transpile 4490 if this and this.is_number: 4491 this = exp.Literal.string(this.to_py()) 4492 elif this and this.is_string: 4493 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4494 if len(parts) == 1: 4495 if unit: 4496 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4497 self._retreat(self._index - 1) 4498 4499 this = exp.Literal.string(parts[0][0]) 4500 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4501 4502 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4503 unit = self.expression( 4504 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4505 ) 4506 4507 interval = self.expression(exp.Interval, this=this, unit=unit) 4508 4509 index = self._index 4510 self._match(TokenType.PLUS) 4511 4512 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4513 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4514 return self.expression( 4515 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4516 ) 4517 4518 self._retreat(index) 4519 return interval 4520 4521 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4522 this = self._parse_term() 4523 4524 while True: 4525 if self._match_set(self.BITWISE): 4526 this = self.expression( 4527 self.BITWISE[self._prev.token_type], 4528 this=this, 4529 expression=self._parse_term(), 4530 ) 4531 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4532 this = self.expression( 4533 exp.DPipe, 4534 this=this, 4535 expression=self._parse_term(), 4536 safe=not self.dialect.STRICT_STRING_CONCAT, 4537 ) 4538 elif self._match(TokenType.DQMARK): 4539 this = self.expression( 4540 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4541 ) 4542 elif self._match_pair(TokenType.LT, TokenType.LT): 4543 this = self.expression( 4544 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4545 ) 4546 elif self._match_pair(TokenType.GT, TokenType.GT): 4547 this = self.expression( 4548 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4549 ) 4550 else: 4551 break 4552 4553 return this 4554 4555 def _parse_term(self) -> t.Optional[exp.Expression]: 4556 this = self._parse_factor() 4557 4558 while self._match_set(self.TERM): 4559 klass = self.TERM[self._prev.token_type] 4560 comments = self._prev_comments 4561 expression = self._parse_factor() 4562 4563 this = self.expression(klass, this=this, comments=comments, expression=expression) 4564 4565 if isinstance(this, exp.Collate): 4566 expr = this.expression 4567 4568 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4569 # fallback to Identifier / Var 4570 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4571 ident = expr.this 4572 if isinstance(ident, exp.Identifier): 4573 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4574 4575 return this 4576 4577 def _parse_factor(self) -> t.Optional[exp.Expression]: 4578 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4579 this = parse_method() 4580 4581 while self._match_set(self.FACTOR): 4582 klass = self.FACTOR[self._prev.token_type] 4583 comments = self._prev_comments 4584 expression = parse_method() 4585 4586 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4587 self._retreat(self._index - 1) 4588 return this 4589 4590 this = self.expression(klass, this=this, comments=comments, expression=expression) 4591 4592 if isinstance(this, exp.Div): 4593 this.args["typed"] = self.dialect.TYPED_DIVISION 4594 this.args["safe"] = self.dialect.SAFE_DIVISION 4595 4596 return this 4597 4598 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4599 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4600 4601 def _parse_unary(self) -> t.Optional[exp.Expression]: 4602 if self._match_set(self.UNARY_PARSERS): 4603 return self.UNARY_PARSERS[self._prev.token_type](self) 4604 return self._parse_at_time_zone(self._parse_type()) 4605 4606 def _parse_type( 4607 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4608 ) -> t.Optional[exp.Expression]: 4609 interval = parse_interval and self._parse_interval() 4610 if interval: 4611 return interval 4612 4613 index = self._index 4614 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4615 4616 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4617 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4618 if isinstance(data_type, exp.Cast): 4619 # This constructor can contain ops directly after it, for instance struct unnesting: 4620 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4621 return self._parse_column_ops(data_type) 4622 4623 if data_type: 4624 index2 = self._index 4625 this = self._parse_primary() 4626 4627 if isinstance(this, exp.Literal): 4628 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4629 if parser: 4630 return parser(self, this, data_type) 4631 4632 return self.expression(exp.Cast, this=this, to=data_type) 4633 4634 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4635 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4636 # 4637 # If the index difference here is greater than 1, that means the parser itself must have 4638 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4639 # 4640 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4641 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4642 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4643 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4644 # 4645 # In these cases, we don't really want to return the converted type, but instead retreat 4646 # and try to parse a Column or Identifier in the section below. 4647 if data_type.expressions and index2 - index > 1: 4648 self._retreat(index2) 4649 return self._parse_column_ops(data_type) 4650 4651 self._retreat(index) 4652 4653 if fallback_to_identifier: 4654 return self._parse_id_var() 4655 4656 this = self._parse_column() 4657 return this and self._parse_column_ops(this) 4658 4659 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4660 this = self._parse_type() 4661 if not this: 4662 return None 4663 4664 if isinstance(this, exp.Column) and not this.table: 4665 this = exp.var(this.name.upper()) 4666 4667 return self.expression( 4668 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4669 ) 4670 4671 def _parse_types( 4672 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4673 ) -> t.Optional[exp.Expression]: 4674 index = self._index 4675 4676 this: t.Optional[exp.Expression] = None 4677 prefix = self._match_text_seq("SYSUDTLIB", ".") 4678 4679 if not self._match_set(self.TYPE_TOKENS): 4680 identifier = allow_identifiers and self._parse_id_var( 4681 any_token=False, tokens=(TokenType.VAR,) 4682 ) 4683 if isinstance(identifier, exp.Identifier): 4684 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4685 4686 if len(tokens) != 1: 4687 self.raise_error("Unexpected identifier", self._prev) 4688 4689 if tokens[0].token_type in self.TYPE_TOKENS: 4690 self._prev = tokens[0] 4691 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4692 type_name = identifier.name 4693 4694 while self._match(TokenType.DOT): 4695 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4696 4697 this = exp.DataType.build(type_name, udt=True) 4698 else: 4699 self._retreat(self._index - 1) 4700 return None 4701 else: 4702 return None 4703 4704 type_token = self._prev.token_type 4705 4706 if type_token == TokenType.PSEUDO_TYPE: 4707 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4708 4709 if type_token == TokenType.OBJECT_IDENTIFIER: 4710 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4711 4712 # https://materialize.com/docs/sql/types/map/ 4713 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4714 key_type = self._parse_types( 4715 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4716 ) 4717 if not self._match(TokenType.FARROW): 4718 self._retreat(index) 4719 return None 4720 4721 value_type = self._parse_types( 4722 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4723 ) 4724 if not self._match(TokenType.R_BRACKET): 4725 self._retreat(index) 4726 return None 4727 4728 return exp.DataType( 4729 this=exp.DataType.Type.MAP, 4730 expressions=[key_type, value_type], 4731 nested=True, 4732 prefix=prefix, 4733 ) 4734 4735 nested = type_token in self.NESTED_TYPE_TOKENS 4736 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4737 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4738 expressions = None 4739 maybe_func = False 4740 4741 if self._match(TokenType.L_PAREN): 4742 if is_struct: 4743 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4744 elif nested: 4745 expressions = self._parse_csv( 4746 lambda: self._parse_types( 4747 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4748 ) 4749 ) 4750 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4751 this = expressions[0] 4752 this.set("nullable", True) 4753 self._match_r_paren() 4754 return this 4755 elif type_token in self.ENUM_TYPE_TOKENS: 4756 expressions = self._parse_csv(self._parse_equality) 4757 elif is_aggregate: 4758 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4759 any_token=False, tokens=(TokenType.VAR,) 4760 ) 4761 if not func_or_ident or not self._match(TokenType.COMMA): 4762 return None 4763 expressions = self._parse_csv( 4764 lambda: self._parse_types( 4765 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4766 ) 4767 ) 4768 expressions.insert(0, func_or_ident) 4769 else: 4770 expressions = self._parse_csv(self._parse_type_size) 4771 4772 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4773 if type_token == TokenType.VECTOR and len(expressions) == 2: 4774 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4775 4776 if not expressions or not self._match(TokenType.R_PAREN): 4777 self._retreat(index) 4778 return None 4779 4780 maybe_func = True 4781 4782 values: t.Optional[t.List[exp.Expression]] = None 4783 4784 if nested and self._match(TokenType.LT): 4785 if is_struct: 4786 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4787 else: 4788 expressions = self._parse_csv( 4789 lambda: self._parse_types( 4790 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4791 ) 4792 ) 4793 4794 if not self._match(TokenType.GT): 4795 self.raise_error("Expecting >") 4796 4797 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4798 values = self._parse_csv(self._parse_assignment) 4799 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4800 4801 if type_token in self.TIMESTAMPS: 4802 if self._match_text_seq("WITH", "TIME", "ZONE"): 4803 maybe_func = False 4804 tz_type = ( 4805 exp.DataType.Type.TIMETZ 4806 if type_token in self.TIMES 4807 else exp.DataType.Type.TIMESTAMPTZ 4808 ) 4809 this = exp.DataType(this=tz_type, expressions=expressions) 4810 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4811 maybe_func = False 4812 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4813 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4814 maybe_func = False 4815 elif type_token == TokenType.INTERVAL: 4816 unit = self._parse_var(upper=True) 4817 if unit: 4818 if self._match_text_seq("TO"): 4819 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4820 4821 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4822 else: 4823 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4824 4825 if maybe_func and check_func: 4826 index2 = self._index 4827 peek = self._parse_string() 4828 4829 if not peek: 4830 self._retreat(index) 4831 return None 4832 4833 self._retreat(index2) 4834 4835 if not this: 4836 if self._match_text_seq("UNSIGNED"): 4837 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4838 if not unsigned_type_token: 4839 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4840 4841 type_token = unsigned_type_token or type_token 4842 4843 this = exp.DataType( 4844 this=exp.DataType.Type[type_token.value], 4845 expressions=expressions, 4846 nested=nested, 4847 prefix=prefix, 4848 ) 4849 4850 # Empty arrays/structs are allowed 4851 if values is not None: 4852 cls = exp.Struct if is_struct else exp.Array 4853 this = exp.cast(cls(expressions=values), this, copy=False) 4854 4855 elif expressions: 4856 this.set("expressions", expressions) 4857 4858 # https://materialize.com/docs/sql/types/list/#type-name 4859 while self._match(TokenType.LIST): 4860 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4861 4862 index = self._index 4863 4864 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4865 matched_array = self._match(TokenType.ARRAY) 4866 4867 while self._curr: 4868 datatype_token = self._prev.token_type 4869 matched_l_bracket = self._match(TokenType.L_BRACKET) 4870 if not matched_l_bracket and not matched_array: 4871 break 4872 4873 matched_array = False 4874 values = self._parse_csv(self._parse_assignment) or None 4875 if ( 4876 values 4877 and not schema 4878 and ( 4879 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4880 ) 4881 ): 4882 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4883 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4884 self._retreat(index) 4885 break 4886 4887 this = exp.DataType( 4888 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4889 ) 4890 self._match(TokenType.R_BRACKET) 4891 4892 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4893 converter = self.TYPE_CONVERTERS.get(this.this) 4894 if converter: 4895 this = converter(t.cast(exp.DataType, this)) 4896 4897 return this 4898 4899 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4900 index = self._index 4901 4902 if ( 4903 self._curr 4904 and self._next 4905 and self._curr.token_type in self.TYPE_TOKENS 4906 and self._next.token_type in self.TYPE_TOKENS 4907 ): 4908 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4909 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4910 this = self._parse_id_var() 4911 else: 4912 this = ( 4913 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4914 or self._parse_id_var() 4915 ) 4916 4917 self._match(TokenType.COLON) 4918 4919 if ( 4920 type_required 4921 and not isinstance(this, exp.DataType) 4922 and not self._match_set(self.TYPE_TOKENS, advance=False) 4923 ): 4924 self._retreat(index) 4925 return self._parse_types() 4926 4927 return self._parse_column_def(this) 4928 4929 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4930 if not self._match_text_seq("AT", "TIME", "ZONE"): 4931 return this 4932 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4933 4934 def _parse_column(self) -> t.Optional[exp.Expression]: 4935 this = self._parse_column_reference() 4936 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4937 4938 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4939 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4940 4941 return column 4942 4943 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4944 this = self._parse_field() 4945 if ( 4946 not this 4947 and self._match(TokenType.VALUES, advance=False) 4948 and self.VALUES_FOLLOWED_BY_PAREN 4949 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4950 ): 4951 this = self._parse_id_var() 4952 4953 if isinstance(this, exp.Identifier): 4954 # We bubble up comments from the Identifier to the Column 4955 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4956 4957 return this 4958 4959 def _parse_colon_as_variant_extract( 4960 self, this: t.Optional[exp.Expression] 4961 ) -> t.Optional[exp.Expression]: 4962 casts = [] 4963 json_path = [] 4964 escape = None 4965 4966 while self._match(TokenType.COLON): 4967 start_index = self._index 4968 4969 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4970 path = self._parse_column_ops( 4971 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4972 ) 4973 4974 # The cast :: operator has a lower precedence than the extraction operator :, so 4975 # we rearrange the AST appropriately to avoid casting the JSON path 4976 while isinstance(path, exp.Cast): 4977 casts.append(path.to) 4978 path = path.this 4979 4980 if casts: 4981 dcolon_offset = next( 4982 i 4983 for i, t in enumerate(self._tokens[start_index:]) 4984 if t.token_type == TokenType.DCOLON 4985 ) 4986 end_token = self._tokens[start_index + dcolon_offset - 1] 4987 else: 4988 end_token = self._prev 4989 4990 if path: 4991 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 4992 # it'll roundtrip to a string literal in GET_PATH 4993 if isinstance(path, exp.Identifier) and path.quoted: 4994 escape = True 4995 4996 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4997 4998 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4999 # Databricks transforms it back to the colon/dot notation 5000 if json_path: 5001 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5002 5003 if json_path_expr: 5004 json_path_expr.set("escape", escape) 5005 5006 this = self.expression( 5007 exp.JSONExtract, 5008 this=this, 5009 expression=json_path_expr, 5010 variant_extract=True, 5011 ) 5012 5013 while casts: 5014 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5015 5016 return this 5017 5018 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5019 return self._parse_types() 5020 5021 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5022 this = self._parse_bracket(this) 5023 5024 while self._match_set(self.COLUMN_OPERATORS): 5025 op_token = self._prev.token_type 5026 op = self.COLUMN_OPERATORS.get(op_token) 5027 5028 if op_token == TokenType.DCOLON: 5029 field = self._parse_dcolon() 5030 if not field: 5031 self.raise_error("Expected type") 5032 elif op and self._curr: 5033 field = self._parse_column_reference() 5034 else: 5035 field = self._parse_field(any_token=True, anonymous_func=True) 5036 5037 if isinstance(field, exp.Func) and this: 5038 # bigquery allows function calls like x.y.count(...) 5039 # SAFE.SUBSTR(...) 5040 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5041 this = exp.replace_tree( 5042 this, 5043 lambda n: ( 5044 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5045 if n.table 5046 else n.this 5047 ) 5048 if isinstance(n, exp.Column) 5049 else n, 5050 ) 5051 5052 if op: 5053 this = op(self, this, field) 5054 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5055 this = self.expression( 5056 exp.Column, 5057 this=field, 5058 table=this.this, 5059 db=this.args.get("table"), 5060 catalog=this.args.get("db"), 5061 ) 5062 else: 5063 this = self.expression(exp.Dot, this=this, expression=field) 5064 5065 this = self._parse_bracket(this) 5066 5067 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5068 5069 def _parse_primary(self) -> t.Optional[exp.Expression]: 5070 if self._match_set(self.PRIMARY_PARSERS): 5071 token_type = self._prev.token_type 5072 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5073 5074 if token_type == TokenType.STRING: 5075 expressions = [primary] 5076 while self._match(TokenType.STRING): 5077 expressions.append(exp.Literal.string(self._prev.text)) 5078 5079 if len(expressions) > 1: 5080 return self.expression(exp.Concat, expressions=expressions) 5081 5082 return primary 5083 5084 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5085 return exp.Literal.number(f"0.{self._prev.text}") 5086 5087 if self._match(TokenType.L_PAREN): 5088 comments = self._prev_comments 5089 query = self._parse_select() 5090 5091 if query: 5092 expressions = [query] 5093 else: 5094 expressions = self._parse_expressions() 5095 5096 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5097 5098 if not this and self._match(TokenType.R_PAREN, advance=False): 5099 this = self.expression(exp.Tuple) 5100 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5101 this = self._parse_subquery(this=this, parse_alias=False) 5102 elif isinstance(this, exp.Subquery): 5103 this = self._parse_subquery( 5104 this=self._parse_set_operations(this), parse_alias=False 5105 ) 5106 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5107 this = self.expression(exp.Tuple, expressions=expressions) 5108 else: 5109 this = self.expression(exp.Paren, this=this) 5110 5111 if this: 5112 this.add_comments(comments) 5113 5114 self._match_r_paren(expression=this) 5115 return this 5116 5117 return None 5118 5119 def _parse_field( 5120 self, 5121 any_token: bool = False, 5122 tokens: t.Optional[t.Collection[TokenType]] = None, 5123 anonymous_func: bool = False, 5124 ) -> t.Optional[exp.Expression]: 5125 if anonymous_func: 5126 field = ( 5127 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5128 or self._parse_primary() 5129 ) 5130 else: 5131 field = self._parse_primary() or self._parse_function( 5132 anonymous=anonymous_func, any_token=any_token 5133 ) 5134 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5135 5136 def _parse_function( 5137 self, 5138 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5139 anonymous: bool = False, 5140 optional_parens: bool = True, 5141 any_token: bool = False, 5142 ) -> t.Optional[exp.Expression]: 5143 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5144 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5145 fn_syntax = False 5146 if ( 5147 self._match(TokenType.L_BRACE, advance=False) 5148 and self._next 5149 and self._next.text.upper() == "FN" 5150 ): 5151 self._advance(2) 5152 fn_syntax = True 5153 5154 func = self._parse_function_call( 5155 functions=functions, 5156 anonymous=anonymous, 5157 optional_parens=optional_parens, 5158 any_token=any_token, 5159 ) 5160 5161 if fn_syntax: 5162 self._match(TokenType.R_BRACE) 5163 5164 return func 5165 5166 def _parse_function_call( 5167 self, 5168 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5169 anonymous: bool = False, 5170 optional_parens: bool = True, 5171 any_token: bool = False, 5172 ) -> t.Optional[exp.Expression]: 5173 if not self._curr: 5174 return None 5175 5176 comments = self._curr.comments 5177 token_type = self._curr.token_type 5178 this = self._curr.text 5179 upper = this.upper() 5180 5181 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5182 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5183 self._advance() 5184 return self._parse_window(parser(self)) 5185 5186 if not self._next or self._next.token_type != TokenType.L_PAREN: 5187 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5188 self._advance() 5189 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5190 5191 return None 5192 5193 if any_token: 5194 if token_type in self.RESERVED_TOKENS: 5195 return None 5196 elif token_type not in self.FUNC_TOKENS: 5197 return None 5198 5199 self._advance(2) 5200 5201 parser = self.FUNCTION_PARSERS.get(upper) 5202 if parser and not anonymous: 5203 this = parser(self) 5204 else: 5205 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5206 5207 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5208 this = self.expression(subquery_predicate, this=self._parse_select()) 5209 self._match_r_paren() 5210 return this 5211 5212 if functions is None: 5213 functions = self.FUNCTIONS 5214 5215 function = functions.get(upper) 5216 5217 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5218 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5219 5220 if alias: 5221 args = self._kv_to_prop_eq(args) 5222 5223 if function and not anonymous: 5224 if "dialect" in function.__code__.co_varnames: 5225 func = function(args, dialect=self.dialect) 5226 else: 5227 func = function(args) 5228 5229 func = self.validate_expression(func, args) 5230 if not self.dialect.NORMALIZE_FUNCTIONS: 5231 func.meta["name"] = this 5232 5233 this = func 5234 else: 5235 if token_type == TokenType.IDENTIFIER: 5236 this = exp.Identifier(this=this, quoted=True) 5237 this = self.expression(exp.Anonymous, this=this, expressions=args) 5238 5239 if isinstance(this, exp.Expression): 5240 this.add_comments(comments) 5241 5242 self._match_r_paren(this) 5243 return self._parse_window(this) 5244 5245 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5246 return expression 5247 5248 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5249 transformed = [] 5250 5251 for index, e in enumerate(expressions): 5252 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5253 if isinstance(e, exp.Alias): 5254 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5255 5256 if not isinstance(e, exp.PropertyEQ): 5257 e = self.expression( 5258 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5259 ) 5260 5261 if isinstance(e.this, exp.Column): 5262 e.this.replace(e.this.this) 5263 else: 5264 e = self._to_prop_eq(e, index) 5265 5266 transformed.append(e) 5267 5268 return transformed 5269 5270 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5271 return self._parse_column_def(self._parse_id_var()) 5272 5273 def _parse_user_defined_function( 5274 self, kind: t.Optional[TokenType] = None 5275 ) -> t.Optional[exp.Expression]: 5276 this = self._parse_id_var() 5277 5278 while self._match(TokenType.DOT): 5279 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5280 5281 if not self._match(TokenType.L_PAREN): 5282 return this 5283 5284 expressions = self._parse_csv(self._parse_function_parameter) 5285 self._match_r_paren() 5286 return self.expression( 5287 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5288 ) 5289 5290 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5291 literal = self._parse_primary() 5292 if literal: 5293 return self.expression(exp.Introducer, this=token.text, expression=literal) 5294 5295 return self.expression(exp.Identifier, this=token.text) 5296 5297 def _parse_session_parameter(self) -> exp.SessionParameter: 5298 kind = None 5299 this = self._parse_id_var() or self._parse_primary() 5300 5301 if this and self._match(TokenType.DOT): 5302 kind = this.name 5303 this = self._parse_var() or self._parse_primary() 5304 5305 return self.expression(exp.SessionParameter, this=this, kind=kind) 5306 5307 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5308 return self._parse_id_var() 5309 5310 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5311 index = self._index 5312 5313 if self._match(TokenType.L_PAREN): 5314 expressions = t.cast( 5315 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5316 ) 5317 5318 if not self._match(TokenType.R_PAREN): 5319 self._retreat(index) 5320 else: 5321 expressions = [self._parse_lambda_arg()] 5322 5323 if self._match_set(self.LAMBDAS): 5324 return self.LAMBDAS[self._prev.token_type](self, expressions) 5325 5326 self._retreat(index) 5327 5328 this: t.Optional[exp.Expression] 5329 5330 if self._match(TokenType.DISTINCT): 5331 this = self.expression( 5332 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5333 ) 5334 else: 5335 this = self._parse_select_or_expression(alias=alias) 5336 5337 return self._parse_limit( 5338 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5339 ) 5340 5341 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5342 index = self._index 5343 if not self._match(TokenType.L_PAREN): 5344 return this 5345 5346 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5347 # expr can be of both types 5348 if self._match_set(self.SELECT_START_TOKENS): 5349 self._retreat(index) 5350 return this 5351 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5352 self._match_r_paren() 5353 return self.expression(exp.Schema, this=this, expressions=args) 5354 5355 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5356 return self._parse_column_def(self._parse_field(any_token=True)) 5357 5358 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5359 # column defs are not really columns, they're identifiers 5360 if isinstance(this, exp.Column): 5361 this = this.this 5362 5363 kind = self._parse_types(schema=True) 5364 5365 if self._match_text_seq("FOR", "ORDINALITY"): 5366 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5367 5368 constraints: t.List[exp.Expression] = [] 5369 5370 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5371 ("ALIAS", "MATERIALIZED") 5372 ): 5373 persisted = self._prev.text.upper() == "MATERIALIZED" 5374 constraint_kind = exp.ComputedColumnConstraint( 5375 this=self._parse_assignment(), 5376 persisted=persisted or self._match_text_seq("PERSISTED"), 5377 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5378 ) 5379 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5380 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5381 self._match(TokenType.ALIAS) 5382 constraints.append( 5383 self.expression( 5384 exp.ColumnConstraint, 5385 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5386 ) 5387 ) 5388 5389 while True: 5390 constraint = self._parse_column_constraint() 5391 if not constraint: 5392 break 5393 constraints.append(constraint) 5394 5395 if not kind and not constraints: 5396 return this 5397 5398 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5399 5400 def _parse_auto_increment( 5401 self, 5402 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5403 start = None 5404 increment = None 5405 5406 if self._match(TokenType.L_PAREN, advance=False): 5407 args = self._parse_wrapped_csv(self._parse_bitwise) 5408 start = seq_get(args, 0) 5409 increment = seq_get(args, 1) 5410 elif self._match_text_seq("START"): 5411 start = self._parse_bitwise() 5412 self._match_text_seq("INCREMENT") 5413 increment = self._parse_bitwise() 5414 5415 if start and increment: 5416 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5417 5418 return exp.AutoIncrementColumnConstraint() 5419 5420 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5421 if not self._match_text_seq("REFRESH"): 5422 self._retreat(self._index - 1) 5423 return None 5424 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5425 5426 def _parse_compress(self) -> exp.CompressColumnConstraint: 5427 if self._match(TokenType.L_PAREN, advance=False): 5428 return self.expression( 5429 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5430 ) 5431 5432 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5433 5434 def _parse_generated_as_identity( 5435 self, 5436 ) -> ( 5437 exp.GeneratedAsIdentityColumnConstraint 5438 | exp.ComputedColumnConstraint 5439 | exp.GeneratedAsRowColumnConstraint 5440 ): 5441 if self._match_text_seq("BY", "DEFAULT"): 5442 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5443 this = self.expression( 5444 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5445 ) 5446 else: 5447 self._match_text_seq("ALWAYS") 5448 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5449 5450 self._match(TokenType.ALIAS) 5451 5452 if self._match_text_seq("ROW"): 5453 start = self._match_text_seq("START") 5454 if not start: 5455 self._match(TokenType.END) 5456 hidden = self._match_text_seq("HIDDEN") 5457 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5458 5459 identity = self._match_text_seq("IDENTITY") 5460 5461 if self._match(TokenType.L_PAREN): 5462 if self._match(TokenType.START_WITH): 5463 this.set("start", self._parse_bitwise()) 5464 if self._match_text_seq("INCREMENT", "BY"): 5465 this.set("increment", self._parse_bitwise()) 5466 if self._match_text_seq("MINVALUE"): 5467 this.set("minvalue", self._parse_bitwise()) 5468 if self._match_text_seq("MAXVALUE"): 5469 this.set("maxvalue", self._parse_bitwise()) 5470 5471 if self._match_text_seq("CYCLE"): 5472 this.set("cycle", True) 5473 elif self._match_text_seq("NO", "CYCLE"): 5474 this.set("cycle", False) 5475 5476 if not identity: 5477 this.set("expression", self._parse_range()) 5478 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5479 args = self._parse_csv(self._parse_bitwise) 5480 this.set("start", seq_get(args, 0)) 5481 this.set("increment", seq_get(args, 1)) 5482 5483 self._match_r_paren() 5484 5485 return this 5486 5487 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5488 self._match_text_seq("LENGTH") 5489 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5490 5491 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5492 if self._match_text_seq("NULL"): 5493 return self.expression(exp.NotNullColumnConstraint) 5494 if self._match_text_seq("CASESPECIFIC"): 5495 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5496 if self._match_text_seq("FOR", "REPLICATION"): 5497 return self.expression(exp.NotForReplicationColumnConstraint) 5498 5499 # Unconsume the `NOT` token 5500 self._retreat(self._index - 1) 5501 return None 5502 5503 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5504 if self._match(TokenType.CONSTRAINT): 5505 this = self._parse_id_var() 5506 else: 5507 this = None 5508 5509 if self._match_texts(self.CONSTRAINT_PARSERS): 5510 return self.expression( 5511 exp.ColumnConstraint, 5512 this=this, 5513 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5514 ) 5515 5516 return this 5517 5518 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5519 if not self._match(TokenType.CONSTRAINT): 5520 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5521 5522 return self.expression( 5523 exp.Constraint, 5524 this=self._parse_id_var(), 5525 expressions=self._parse_unnamed_constraints(), 5526 ) 5527 5528 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5529 constraints = [] 5530 while True: 5531 constraint = self._parse_unnamed_constraint() or self._parse_function() 5532 if not constraint: 5533 break 5534 constraints.append(constraint) 5535 5536 return constraints 5537 5538 def _parse_unnamed_constraint( 5539 self, constraints: t.Optional[t.Collection[str]] = None 5540 ) -> t.Optional[exp.Expression]: 5541 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5542 constraints or self.CONSTRAINT_PARSERS 5543 ): 5544 return None 5545 5546 constraint = self._prev.text.upper() 5547 if constraint not in self.CONSTRAINT_PARSERS: 5548 self.raise_error(f"No parser found for schema constraint {constraint}.") 5549 5550 return self.CONSTRAINT_PARSERS[constraint](self) 5551 5552 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5553 return self._parse_id_var(any_token=False) 5554 5555 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5556 self._match_text_seq("KEY") 5557 return self.expression( 5558 exp.UniqueColumnConstraint, 5559 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5560 this=self._parse_schema(self._parse_unique_key()), 5561 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5562 on_conflict=self._parse_on_conflict(), 5563 ) 5564 5565 def _parse_key_constraint_options(self) -> t.List[str]: 5566 options = [] 5567 while True: 5568 if not self._curr: 5569 break 5570 5571 if self._match(TokenType.ON): 5572 action = None 5573 on = self._advance_any() and self._prev.text 5574 5575 if self._match_text_seq("NO", "ACTION"): 5576 action = "NO ACTION" 5577 elif self._match_text_seq("CASCADE"): 5578 action = "CASCADE" 5579 elif self._match_text_seq("RESTRICT"): 5580 action = "RESTRICT" 5581 elif self._match_pair(TokenType.SET, TokenType.NULL): 5582 action = "SET NULL" 5583 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5584 action = "SET DEFAULT" 5585 else: 5586 self.raise_error("Invalid key constraint") 5587 5588 options.append(f"ON {on} {action}") 5589 else: 5590 var = self._parse_var_from_options( 5591 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5592 ) 5593 if not var: 5594 break 5595 options.append(var.name) 5596 5597 return options 5598 5599 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5600 if match and not self._match(TokenType.REFERENCES): 5601 return None 5602 5603 expressions = None 5604 this = self._parse_table(schema=True) 5605 options = self._parse_key_constraint_options() 5606 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5607 5608 def _parse_foreign_key(self) -> exp.ForeignKey: 5609 expressions = self._parse_wrapped_id_vars() 5610 reference = self._parse_references() 5611 options = {} 5612 5613 while self._match(TokenType.ON): 5614 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5615 self.raise_error("Expected DELETE or UPDATE") 5616 5617 kind = self._prev.text.lower() 5618 5619 if self._match_text_seq("NO", "ACTION"): 5620 action = "NO ACTION" 5621 elif self._match(TokenType.SET): 5622 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5623 action = "SET " + self._prev.text.upper() 5624 else: 5625 self._advance() 5626 action = self._prev.text.upper() 5627 5628 options[kind] = action 5629 5630 return self.expression( 5631 exp.ForeignKey, 5632 expressions=expressions, 5633 reference=reference, 5634 **options, # type: ignore 5635 ) 5636 5637 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5638 return self._parse_field() 5639 5640 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5641 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5642 self._retreat(self._index - 1) 5643 return None 5644 5645 id_vars = self._parse_wrapped_id_vars() 5646 return self.expression( 5647 exp.PeriodForSystemTimeConstraint, 5648 this=seq_get(id_vars, 0), 5649 expression=seq_get(id_vars, 1), 5650 ) 5651 5652 def _parse_primary_key( 5653 self, wrapped_optional: bool = False, in_props: bool = False 5654 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5655 desc = ( 5656 self._match_set((TokenType.ASC, TokenType.DESC)) 5657 and self._prev.token_type == TokenType.DESC 5658 ) 5659 5660 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5661 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5662 5663 expressions = self._parse_wrapped_csv( 5664 self._parse_primary_key_part, optional=wrapped_optional 5665 ) 5666 options = self._parse_key_constraint_options() 5667 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5668 5669 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5670 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5671 5672 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5673 """ 5674 Parses a datetime column in ODBC format. We parse the column into the corresponding 5675 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5676 same as we did for `DATE('yyyy-mm-dd')`. 5677 5678 Reference: 5679 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5680 """ 5681 self._match(TokenType.VAR) 5682 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5683 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5684 if not self._match(TokenType.R_BRACE): 5685 self.raise_error("Expected }") 5686 return expression 5687 5688 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5689 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5690 return this 5691 5692 bracket_kind = self._prev.token_type 5693 if ( 5694 bracket_kind == TokenType.L_BRACE 5695 and self._curr 5696 and self._curr.token_type == TokenType.VAR 5697 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5698 ): 5699 return self._parse_odbc_datetime_literal() 5700 5701 expressions = self._parse_csv( 5702 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5703 ) 5704 5705 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5706 self.raise_error("Expected ]") 5707 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5708 self.raise_error("Expected }") 5709 5710 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5711 if bracket_kind == TokenType.L_BRACE: 5712 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5713 elif not this: 5714 this = build_array_constructor( 5715 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5716 ) 5717 else: 5718 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5719 if constructor_type: 5720 return build_array_constructor( 5721 constructor_type, 5722 args=expressions, 5723 bracket_kind=bracket_kind, 5724 dialect=self.dialect, 5725 ) 5726 5727 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5728 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5729 5730 self._add_comments(this) 5731 return self._parse_bracket(this) 5732 5733 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5734 if self._match(TokenType.COLON): 5735 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5736 return this 5737 5738 def _parse_case(self) -> t.Optional[exp.Expression]: 5739 ifs = [] 5740 default = None 5741 5742 comments = self._prev_comments 5743 expression = self._parse_assignment() 5744 5745 while self._match(TokenType.WHEN): 5746 this = self._parse_assignment() 5747 self._match(TokenType.THEN) 5748 then = self._parse_assignment() 5749 ifs.append(self.expression(exp.If, this=this, true=then)) 5750 5751 if self._match(TokenType.ELSE): 5752 default = self._parse_assignment() 5753 5754 if not self._match(TokenType.END): 5755 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5756 default = exp.column("interval") 5757 else: 5758 self.raise_error("Expected END after CASE", self._prev) 5759 5760 return self.expression( 5761 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5762 ) 5763 5764 def _parse_if(self) -> t.Optional[exp.Expression]: 5765 if self._match(TokenType.L_PAREN): 5766 args = self._parse_csv(self._parse_assignment) 5767 this = self.validate_expression(exp.If.from_arg_list(args), args) 5768 self._match_r_paren() 5769 else: 5770 index = self._index - 1 5771 5772 if self.NO_PAREN_IF_COMMANDS and index == 0: 5773 return self._parse_as_command(self._prev) 5774 5775 condition = self._parse_assignment() 5776 5777 if not condition: 5778 self._retreat(index) 5779 return None 5780 5781 self._match(TokenType.THEN) 5782 true = self._parse_assignment() 5783 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5784 self._match(TokenType.END) 5785 this = self.expression(exp.If, this=condition, true=true, false=false) 5786 5787 return this 5788 5789 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5790 if not self._match_text_seq("VALUE", "FOR"): 5791 self._retreat(self._index - 1) 5792 return None 5793 5794 return self.expression( 5795 exp.NextValueFor, 5796 this=self._parse_column(), 5797 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5798 ) 5799 5800 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5801 this = self._parse_function() or self._parse_var_or_string(upper=True) 5802 5803 if self._match(TokenType.FROM): 5804 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5805 5806 if not self._match(TokenType.COMMA): 5807 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5808 5809 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5810 5811 def _parse_gap_fill(self) -> exp.GapFill: 5812 self._match(TokenType.TABLE) 5813 this = self._parse_table() 5814 5815 self._match(TokenType.COMMA) 5816 args = [this, *self._parse_csv(self._parse_lambda)] 5817 5818 gap_fill = exp.GapFill.from_arg_list(args) 5819 return self.validate_expression(gap_fill, args) 5820 5821 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5822 this = self._parse_assignment() 5823 5824 if not self._match(TokenType.ALIAS): 5825 if self._match(TokenType.COMMA): 5826 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5827 5828 self.raise_error("Expected AS after CAST") 5829 5830 fmt = None 5831 to = self._parse_types() 5832 5833 if self._match(TokenType.FORMAT): 5834 fmt_string = self._parse_string() 5835 fmt = self._parse_at_time_zone(fmt_string) 5836 5837 if not to: 5838 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5839 if to.this in exp.DataType.TEMPORAL_TYPES: 5840 this = self.expression( 5841 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5842 this=this, 5843 format=exp.Literal.string( 5844 format_time( 5845 fmt_string.this if fmt_string else "", 5846 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5847 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5848 ) 5849 ), 5850 safe=safe, 5851 ) 5852 5853 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5854 this.set("zone", fmt.args["zone"]) 5855 return this 5856 elif not to: 5857 self.raise_error("Expected TYPE after CAST") 5858 elif isinstance(to, exp.Identifier): 5859 to = exp.DataType.build(to.name, udt=True) 5860 elif to.this == exp.DataType.Type.CHAR: 5861 if self._match(TokenType.CHARACTER_SET): 5862 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5863 5864 return self.expression( 5865 exp.Cast if strict else exp.TryCast, 5866 this=this, 5867 to=to, 5868 format=fmt, 5869 safe=safe, 5870 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5871 ) 5872 5873 def _parse_string_agg(self) -> exp.Expression: 5874 if self._match(TokenType.DISTINCT): 5875 args: t.List[t.Optional[exp.Expression]] = [ 5876 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5877 ] 5878 if self._match(TokenType.COMMA): 5879 args.extend(self._parse_csv(self._parse_assignment)) 5880 else: 5881 args = self._parse_csv(self._parse_assignment) # type: ignore 5882 5883 index = self._index 5884 if not self._match(TokenType.R_PAREN) and args: 5885 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5886 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5887 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5888 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5889 5890 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5891 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5892 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5893 if not self._match_text_seq("WITHIN", "GROUP"): 5894 self._retreat(index) 5895 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5896 5897 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5898 order = self._parse_order(this=seq_get(args, 0)) 5899 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5900 5901 def _parse_convert( 5902 self, strict: bool, safe: t.Optional[bool] = None 5903 ) -> t.Optional[exp.Expression]: 5904 this = self._parse_bitwise() 5905 5906 if self._match(TokenType.USING): 5907 to: t.Optional[exp.Expression] = self.expression( 5908 exp.CharacterSet, this=self._parse_var() 5909 ) 5910 elif self._match(TokenType.COMMA): 5911 to = self._parse_types() 5912 else: 5913 to = None 5914 5915 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5916 5917 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5918 """ 5919 There are generally two variants of the DECODE function: 5920 5921 - DECODE(bin, charset) 5922 - DECODE(expression, search, result [, search, result] ... [, default]) 5923 5924 The second variant will always be parsed into a CASE expression. Note that NULL 5925 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5926 instead of relying on pattern matching. 5927 """ 5928 args = self._parse_csv(self._parse_assignment) 5929 5930 if len(args) < 3: 5931 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5932 5933 expression, *expressions = args 5934 if not expression: 5935 return None 5936 5937 ifs = [] 5938 for search, result in zip(expressions[::2], expressions[1::2]): 5939 if not search or not result: 5940 return None 5941 5942 if isinstance(search, exp.Literal): 5943 ifs.append( 5944 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5945 ) 5946 elif isinstance(search, exp.Null): 5947 ifs.append( 5948 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5949 ) 5950 else: 5951 cond = exp.or_( 5952 exp.EQ(this=expression.copy(), expression=search), 5953 exp.and_( 5954 exp.Is(this=expression.copy(), expression=exp.Null()), 5955 exp.Is(this=search.copy(), expression=exp.Null()), 5956 copy=False, 5957 ), 5958 copy=False, 5959 ) 5960 ifs.append(exp.If(this=cond, true=result)) 5961 5962 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5963 5964 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5965 self._match_text_seq("KEY") 5966 key = self._parse_column() 5967 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5968 self._match_text_seq("VALUE") 5969 value = self._parse_bitwise() 5970 5971 if not key and not value: 5972 return None 5973 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5974 5975 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5976 if not this or not self._match_text_seq("FORMAT", "JSON"): 5977 return this 5978 5979 return self.expression(exp.FormatJson, this=this) 5980 5981 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 5982 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 5983 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 5984 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5985 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5986 else: 5987 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 5988 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 5989 5990 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 5991 5992 if not empty and not error and not null: 5993 return None 5994 5995 return self.expression( 5996 exp.OnCondition, 5997 empty=empty, 5998 error=error, 5999 null=null, 6000 ) 6001 6002 def _parse_on_handling( 6003 self, on: str, *values: str 6004 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6005 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6006 for value in values: 6007 if self._match_text_seq(value, "ON", on): 6008 return f"{value} ON {on}" 6009 6010 index = self._index 6011 if self._match(TokenType.DEFAULT): 6012 default_value = self._parse_bitwise() 6013 if self._match_text_seq("ON", on): 6014 return default_value 6015 6016 self._retreat(index) 6017 6018 return None 6019 6020 @t.overload 6021 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6022 6023 @t.overload 6024 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6025 6026 def _parse_json_object(self, agg=False): 6027 star = self._parse_star() 6028 expressions = ( 6029 [star] 6030 if star 6031 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6032 ) 6033 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6034 6035 unique_keys = None 6036 if self._match_text_seq("WITH", "UNIQUE"): 6037 unique_keys = True 6038 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6039 unique_keys = False 6040 6041 self._match_text_seq("KEYS") 6042 6043 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6044 self._parse_type() 6045 ) 6046 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6047 6048 return self.expression( 6049 exp.JSONObjectAgg if agg else exp.JSONObject, 6050 expressions=expressions, 6051 null_handling=null_handling, 6052 unique_keys=unique_keys, 6053 return_type=return_type, 6054 encoding=encoding, 6055 ) 6056 6057 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6058 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6059 if not self._match_text_seq("NESTED"): 6060 this = self._parse_id_var() 6061 kind = self._parse_types(allow_identifiers=False) 6062 nested = None 6063 else: 6064 this = None 6065 kind = None 6066 nested = True 6067 6068 path = self._match_text_seq("PATH") and self._parse_string() 6069 nested_schema = nested and self._parse_json_schema() 6070 6071 return self.expression( 6072 exp.JSONColumnDef, 6073 this=this, 6074 kind=kind, 6075 path=path, 6076 nested_schema=nested_schema, 6077 ) 6078 6079 def _parse_json_schema(self) -> exp.JSONSchema: 6080 self._match_text_seq("COLUMNS") 6081 return self.expression( 6082 exp.JSONSchema, 6083 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6084 ) 6085 6086 def _parse_json_table(self) -> exp.JSONTable: 6087 this = self._parse_format_json(self._parse_bitwise()) 6088 path = self._match(TokenType.COMMA) and self._parse_string() 6089 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6090 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6091 schema = self._parse_json_schema() 6092 6093 return exp.JSONTable( 6094 this=this, 6095 schema=schema, 6096 path=path, 6097 error_handling=error_handling, 6098 empty_handling=empty_handling, 6099 ) 6100 6101 def _parse_match_against(self) -> exp.MatchAgainst: 6102 expressions = self._parse_csv(self._parse_column) 6103 6104 self._match_text_seq(")", "AGAINST", "(") 6105 6106 this = self._parse_string() 6107 6108 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6109 modifier = "IN NATURAL LANGUAGE MODE" 6110 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6111 modifier = f"{modifier} WITH QUERY EXPANSION" 6112 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6113 modifier = "IN BOOLEAN MODE" 6114 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6115 modifier = "WITH QUERY EXPANSION" 6116 else: 6117 modifier = None 6118 6119 return self.expression( 6120 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6121 ) 6122 6123 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6124 def _parse_open_json(self) -> exp.OpenJSON: 6125 this = self._parse_bitwise() 6126 path = self._match(TokenType.COMMA) and self._parse_string() 6127 6128 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6129 this = self._parse_field(any_token=True) 6130 kind = self._parse_types() 6131 path = self._parse_string() 6132 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6133 6134 return self.expression( 6135 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6136 ) 6137 6138 expressions = None 6139 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6140 self._match_l_paren() 6141 expressions = self._parse_csv(_parse_open_json_column_def) 6142 6143 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6144 6145 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6146 args = self._parse_csv(self._parse_bitwise) 6147 6148 if self._match(TokenType.IN): 6149 return self.expression( 6150 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6151 ) 6152 6153 if haystack_first: 6154 haystack = seq_get(args, 0) 6155 needle = seq_get(args, 1) 6156 else: 6157 needle = seq_get(args, 0) 6158 haystack = seq_get(args, 1) 6159 6160 return self.expression( 6161 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6162 ) 6163 6164 def _parse_predict(self) -> exp.Predict: 6165 self._match_text_seq("MODEL") 6166 this = self._parse_table() 6167 6168 self._match(TokenType.COMMA) 6169 self._match_text_seq("TABLE") 6170 6171 return self.expression( 6172 exp.Predict, 6173 this=this, 6174 expression=self._parse_table(), 6175 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6176 ) 6177 6178 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6179 args = self._parse_csv(self._parse_table) 6180 return exp.JoinHint(this=func_name.upper(), expressions=args) 6181 6182 def _parse_substring(self) -> exp.Substring: 6183 # Postgres supports the form: substring(string [from int] [for int]) 6184 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6185 6186 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6187 6188 if self._match(TokenType.FROM): 6189 args.append(self._parse_bitwise()) 6190 if self._match(TokenType.FOR): 6191 if len(args) == 1: 6192 args.append(exp.Literal.number(1)) 6193 args.append(self._parse_bitwise()) 6194 6195 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6196 6197 def _parse_trim(self) -> exp.Trim: 6198 # https://www.w3resource.com/sql/character-functions/trim.php 6199 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6200 6201 position = None 6202 collation = None 6203 expression = None 6204 6205 if self._match_texts(self.TRIM_TYPES): 6206 position = self._prev.text.upper() 6207 6208 this = self._parse_bitwise() 6209 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6210 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6211 expression = self._parse_bitwise() 6212 6213 if invert_order: 6214 this, expression = expression, this 6215 6216 if self._match(TokenType.COLLATE): 6217 collation = self._parse_bitwise() 6218 6219 return self.expression( 6220 exp.Trim, this=this, position=position, expression=expression, collation=collation 6221 ) 6222 6223 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6224 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6225 6226 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6227 return self._parse_window(self._parse_id_var(), alias=True) 6228 6229 def _parse_respect_or_ignore_nulls( 6230 self, this: t.Optional[exp.Expression] 6231 ) -> t.Optional[exp.Expression]: 6232 if self._match_text_seq("IGNORE", "NULLS"): 6233 return self.expression(exp.IgnoreNulls, this=this) 6234 if self._match_text_seq("RESPECT", "NULLS"): 6235 return self.expression(exp.RespectNulls, this=this) 6236 return this 6237 6238 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6239 if self._match(TokenType.HAVING): 6240 self._match_texts(("MAX", "MIN")) 6241 max = self._prev.text.upper() != "MIN" 6242 return self.expression( 6243 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6244 ) 6245 6246 return this 6247 6248 def _parse_window( 6249 self, this: t.Optional[exp.Expression], alias: bool = False 6250 ) -> t.Optional[exp.Expression]: 6251 func = this 6252 comments = func.comments if isinstance(func, exp.Expression) else None 6253 6254 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6255 self._match(TokenType.WHERE) 6256 this = self.expression( 6257 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6258 ) 6259 self._match_r_paren() 6260 6261 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6262 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6263 if self._match_text_seq("WITHIN", "GROUP"): 6264 order = self._parse_wrapped(self._parse_order) 6265 this = self.expression(exp.WithinGroup, this=this, expression=order) 6266 6267 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6268 # Some dialects choose to implement and some do not. 6269 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6270 6271 # There is some code above in _parse_lambda that handles 6272 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6273 6274 # The below changes handle 6275 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6276 6277 # Oracle allows both formats 6278 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6279 # and Snowflake chose to do the same for familiarity 6280 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6281 if isinstance(this, exp.AggFunc): 6282 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6283 6284 if ignore_respect and ignore_respect is not this: 6285 ignore_respect.replace(ignore_respect.this) 6286 this = self.expression(ignore_respect.__class__, this=this) 6287 6288 this = self._parse_respect_or_ignore_nulls(this) 6289 6290 # bigquery select from window x AS (partition by ...) 6291 if alias: 6292 over = None 6293 self._match(TokenType.ALIAS) 6294 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6295 return this 6296 else: 6297 over = self._prev.text.upper() 6298 6299 if comments and isinstance(func, exp.Expression): 6300 func.pop_comments() 6301 6302 if not self._match(TokenType.L_PAREN): 6303 return self.expression( 6304 exp.Window, 6305 comments=comments, 6306 this=this, 6307 alias=self._parse_id_var(False), 6308 over=over, 6309 ) 6310 6311 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6312 6313 first = self._match(TokenType.FIRST) 6314 if self._match_text_seq("LAST"): 6315 first = False 6316 6317 partition, order = self._parse_partition_and_order() 6318 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6319 6320 if kind: 6321 self._match(TokenType.BETWEEN) 6322 start = self._parse_window_spec() 6323 self._match(TokenType.AND) 6324 end = self._parse_window_spec() 6325 6326 spec = self.expression( 6327 exp.WindowSpec, 6328 kind=kind, 6329 start=start["value"], 6330 start_side=start["side"], 6331 end=end["value"], 6332 end_side=end["side"], 6333 ) 6334 else: 6335 spec = None 6336 6337 self._match_r_paren() 6338 6339 window = self.expression( 6340 exp.Window, 6341 comments=comments, 6342 this=this, 6343 partition_by=partition, 6344 order=order, 6345 spec=spec, 6346 alias=window_alias, 6347 over=over, 6348 first=first, 6349 ) 6350 6351 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6352 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6353 return self._parse_window(window, alias=alias) 6354 6355 return window 6356 6357 def _parse_partition_and_order( 6358 self, 6359 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6360 return self._parse_partition_by(), self._parse_order() 6361 6362 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6363 self._match(TokenType.BETWEEN) 6364 6365 return { 6366 "value": ( 6367 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6368 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6369 or self._parse_bitwise() 6370 ), 6371 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6372 } 6373 6374 def _parse_alias( 6375 self, this: t.Optional[exp.Expression], explicit: bool = False 6376 ) -> t.Optional[exp.Expression]: 6377 any_token = self._match(TokenType.ALIAS) 6378 comments = self._prev_comments or [] 6379 6380 if explicit and not any_token: 6381 return this 6382 6383 if self._match(TokenType.L_PAREN): 6384 aliases = self.expression( 6385 exp.Aliases, 6386 comments=comments, 6387 this=this, 6388 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6389 ) 6390 self._match_r_paren(aliases) 6391 return aliases 6392 6393 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6394 self.STRING_ALIASES and self._parse_string_as_identifier() 6395 ) 6396 6397 if alias: 6398 comments.extend(alias.pop_comments()) 6399 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6400 column = this.this 6401 6402 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6403 if not this.comments and column and column.comments: 6404 this.comments = column.pop_comments() 6405 6406 return this 6407 6408 def _parse_id_var( 6409 self, 6410 any_token: bool = True, 6411 tokens: t.Optional[t.Collection[TokenType]] = None, 6412 ) -> t.Optional[exp.Expression]: 6413 expression = self._parse_identifier() 6414 if not expression and ( 6415 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6416 ): 6417 quoted = self._prev.token_type == TokenType.STRING 6418 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6419 6420 return expression 6421 6422 def _parse_string(self) -> t.Optional[exp.Expression]: 6423 if self._match_set(self.STRING_PARSERS): 6424 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6425 return self._parse_placeholder() 6426 6427 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6428 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6429 6430 def _parse_number(self) -> t.Optional[exp.Expression]: 6431 if self._match_set(self.NUMERIC_PARSERS): 6432 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6433 return self._parse_placeholder() 6434 6435 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6436 if self._match(TokenType.IDENTIFIER): 6437 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6438 return self._parse_placeholder() 6439 6440 def _parse_var( 6441 self, 6442 any_token: bool = False, 6443 tokens: t.Optional[t.Collection[TokenType]] = None, 6444 upper: bool = False, 6445 ) -> t.Optional[exp.Expression]: 6446 if ( 6447 (any_token and self._advance_any()) 6448 or self._match(TokenType.VAR) 6449 or (self._match_set(tokens) if tokens else False) 6450 ): 6451 return self.expression( 6452 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6453 ) 6454 return self._parse_placeholder() 6455 6456 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6457 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6458 self._advance() 6459 return self._prev 6460 return None 6461 6462 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6463 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6464 6465 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6466 return self._parse_primary() or self._parse_var(any_token=True) 6467 6468 def _parse_null(self) -> t.Optional[exp.Expression]: 6469 if self._match_set(self.NULL_TOKENS): 6470 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6471 return self._parse_placeholder() 6472 6473 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6474 if self._match(TokenType.TRUE): 6475 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6476 if self._match(TokenType.FALSE): 6477 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6478 return self._parse_placeholder() 6479 6480 def _parse_star(self) -> t.Optional[exp.Expression]: 6481 if self._match(TokenType.STAR): 6482 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6483 return self._parse_placeholder() 6484 6485 def _parse_parameter(self) -> exp.Parameter: 6486 this = self._parse_identifier() or self._parse_primary_or_var() 6487 return self.expression(exp.Parameter, this=this) 6488 6489 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6490 if self._match_set(self.PLACEHOLDER_PARSERS): 6491 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6492 if placeholder: 6493 return placeholder 6494 self._advance(-1) 6495 return None 6496 6497 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6498 if not self._match_texts(keywords): 6499 return None 6500 if self._match(TokenType.L_PAREN, advance=False): 6501 return self._parse_wrapped_csv(self._parse_expression) 6502 6503 expression = self._parse_expression() 6504 return [expression] if expression else None 6505 6506 def _parse_csv( 6507 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6508 ) -> t.List[exp.Expression]: 6509 parse_result = parse_method() 6510 items = [parse_result] if parse_result is not None else [] 6511 6512 while self._match(sep): 6513 self._add_comments(parse_result) 6514 parse_result = parse_method() 6515 if parse_result is not None: 6516 items.append(parse_result) 6517 6518 return items 6519 6520 def _parse_tokens( 6521 self, parse_method: t.Callable, expressions: t.Dict 6522 ) -> t.Optional[exp.Expression]: 6523 this = parse_method() 6524 6525 while self._match_set(expressions): 6526 this = self.expression( 6527 expressions[self._prev.token_type], 6528 this=this, 6529 comments=self._prev_comments, 6530 expression=parse_method(), 6531 ) 6532 6533 return this 6534 6535 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6536 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6537 6538 def _parse_wrapped_csv( 6539 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6540 ) -> t.List[exp.Expression]: 6541 return self._parse_wrapped( 6542 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6543 ) 6544 6545 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6546 wrapped = self._match(TokenType.L_PAREN) 6547 if not wrapped and not optional: 6548 self.raise_error("Expecting (") 6549 parse_result = parse_method() 6550 if wrapped: 6551 self._match_r_paren() 6552 return parse_result 6553 6554 def _parse_expressions(self) -> t.List[exp.Expression]: 6555 return self._parse_csv(self._parse_expression) 6556 6557 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6558 return self._parse_select() or self._parse_set_operations( 6559 self._parse_expression() if alias else self._parse_assignment() 6560 ) 6561 6562 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6563 return self._parse_query_modifiers( 6564 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6565 ) 6566 6567 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6568 this = None 6569 if self._match_texts(self.TRANSACTION_KIND): 6570 this = self._prev.text 6571 6572 self._match_texts(("TRANSACTION", "WORK")) 6573 6574 modes = [] 6575 while True: 6576 mode = [] 6577 while self._match(TokenType.VAR): 6578 mode.append(self._prev.text) 6579 6580 if mode: 6581 modes.append(" ".join(mode)) 6582 if not self._match(TokenType.COMMA): 6583 break 6584 6585 return self.expression(exp.Transaction, this=this, modes=modes) 6586 6587 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6588 chain = None 6589 savepoint = None 6590 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6591 6592 self._match_texts(("TRANSACTION", "WORK")) 6593 6594 if self._match_text_seq("TO"): 6595 self._match_text_seq("SAVEPOINT") 6596 savepoint = self._parse_id_var() 6597 6598 if self._match(TokenType.AND): 6599 chain = not self._match_text_seq("NO") 6600 self._match_text_seq("CHAIN") 6601 6602 if is_rollback: 6603 return self.expression(exp.Rollback, savepoint=savepoint) 6604 6605 return self.expression(exp.Commit, chain=chain) 6606 6607 def _parse_refresh(self) -> exp.Refresh: 6608 self._match(TokenType.TABLE) 6609 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6610 6611 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6612 if not self._match_text_seq("ADD"): 6613 return None 6614 6615 self._match(TokenType.COLUMN) 6616 exists_column = self._parse_exists(not_=True) 6617 expression = self._parse_field_def() 6618 6619 if expression: 6620 expression.set("exists", exists_column) 6621 6622 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6623 if self._match_texts(("FIRST", "AFTER")): 6624 position = self._prev.text 6625 column_position = self.expression( 6626 exp.ColumnPosition, this=self._parse_column(), position=position 6627 ) 6628 expression.set("position", column_position) 6629 6630 return expression 6631 6632 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6633 drop = self._match(TokenType.DROP) and self._parse_drop() 6634 if drop and not isinstance(drop, exp.Command): 6635 drop.set("kind", drop.args.get("kind", "COLUMN")) 6636 return drop 6637 6638 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6639 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6640 return self.expression( 6641 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6642 ) 6643 6644 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6645 index = self._index - 1 6646 6647 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6648 return self._parse_csv( 6649 lambda: self.expression( 6650 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6651 ) 6652 ) 6653 6654 self._retreat(index) 6655 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6656 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6657 6658 if self._match_text_seq("ADD", "COLUMNS"): 6659 schema = self._parse_schema() 6660 if schema: 6661 return [schema] 6662 return [] 6663 6664 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6665 6666 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6667 if self._match_texts(self.ALTER_ALTER_PARSERS): 6668 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6669 6670 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6671 # keyword after ALTER we default to parsing this statement 6672 self._match(TokenType.COLUMN) 6673 column = self._parse_field(any_token=True) 6674 6675 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6676 return self.expression(exp.AlterColumn, this=column, drop=True) 6677 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6678 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6679 if self._match(TokenType.COMMENT): 6680 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6681 if self._match_text_seq("DROP", "NOT", "NULL"): 6682 return self.expression( 6683 exp.AlterColumn, 6684 this=column, 6685 drop=True, 6686 allow_null=True, 6687 ) 6688 if self._match_text_seq("SET", "NOT", "NULL"): 6689 return self.expression( 6690 exp.AlterColumn, 6691 this=column, 6692 allow_null=False, 6693 ) 6694 self._match_text_seq("SET", "DATA") 6695 self._match_text_seq("TYPE") 6696 return self.expression( 6697 exp.AlterColumn, 6698 this=column, 6699 dtype=self._parse_types(), 6700 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6701 using=self._match(TokenType.USING) and self._parse_assignment(), 6702 ) 6703 6704 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6705 if self._match_texts(("ALL", "EVEN", "AUTO")): 6706 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6707 6708 self._match_text_seq("KEY", "DISTKEY") 6709 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6710 6711 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6712 if compound: 6713 self._match_text_seq("SORTKEY") 6714 6715 if self._match(TokenType.L_PAREN, advance=False): 6716 return self.expression( 6717 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6718 ) 6719 6720 self._match_texts(("AUTO", "NONE")) 6721 return self.expression( 6722 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6723 ) 6724 6725 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6726 index = self._index - 1 6727 6728 partition_exists = self._parse_exists() 6729 if self._match(TokenType.PARTITION, advance=False): 6730 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6731 6732 self._retreat(index) 6733 return self._parse_csv(self._parse_drop_column) 6734 6735 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6736 if self._match(TokenType.COLUMN): 6737 exists = self._parse_exists() 6738 old_column = self._parse_column() 6739 to = self._match_text_seq("TO") 6740 new_column = self._parse_column() 6741 6742 if old_column is None or to is None or new_column is None: 6743 return None 6744 6745 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6746 6747 self._match_text_seq("TO") 6748 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6749 6750 def _parse_alter_table_set(self) -> exp.AlterSet: 6751 alter_set = self.expression(exp.AlterSet) 6752 6753 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6754 "TABLE", "PROPERTIES" 6755 ): 6756 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6757 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6758 alter_set.set("expressions", [self._parse_assignment()]) 6759 elif self._match_texts(("LOGGED", "UNLOGGED")): 6760 alter_set.set("option", exp.var(self._prev.text.upper())) 6761 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6762 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6763 elif self._match_text_seq("LOCATION"): 6764 alter_set.set("location", self._parse_field()) 6765 elif self._match_text_seq("ACCESS", "METHOD"): 6766 alter_set.set("access_method", self._parse_field()) 6767 elif self._match_text_seq("TABLESPACE"): 6768 alter_set.set("tablespace", self._parse_field()) 6769 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6770 alter_set.set("file_format", [self._parse_field()]) 6771 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6772 alter_set.set("file_format", self._parse_wrapped_options()) 6773 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6774 alter_set.set("copy_options", self._parse_wrapped_options()) 6775 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6776 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6777 else: 6778 if self._match_text_seq("SERDE"): 6779 alter_set.set("serde", self._parse_field()) 6780 6781 alter_set.set("expressions", [self._parse_properties()]) 6782 6783 return alter_set 6784 6785 def _parse_alter(self) -> exp.Alter | exp.Command: 6786 start = self._prev 6787 6788 alter_token = self._match_set(self.ALTERABLES) and self._prev 6789 if not alter_token: 6790 return self._parse_as_command(start) 6791 6792 exists = self._parse_exists() 6793 only = self._match_text_seq("ONLY") 6794 this = self._parse_table(schema=True) 6795 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6796 6797 if self._next: 6798 self._advance() 6799 6800 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6801 if parser: 6802 actions = ensure_list(parser(self)) 6803 not_valid = self._match_text_seq("NOT", "VALID") 6804 options = self._parse_csv(self._parse_property) 6805 6806 if not self._curr and actions: 6807 return self.expression( 6808 exp.Alter, 6809 this=this, 6810 kind=alter_token.text.upper(), 6811 exists=exists, 6812 actions=actions, 6813 only=only, 6814 options=options, 6815 cluster=cluster, 6816 not_valid=not_valid, 6817 ) 6818 6819 return self._parse_as_command(start) 6820 6821 def _parse_merge(self) -> exp.Merge: 6822 self._match(TokenType.INTO) 6823 target = self._parse_table() 6824 6825 if target and self._match(TokenType.ALIAS, advance=False): 6826 target.set("alias", self._parse_table_alias()) 6827 6828 self._match(TokenType.USING) 6829 using = self._parse_table() 6830 6831 self._match(TokenType.ON) 6832 on = self._parse_assignment() 6833 6834 return self.expression( 6835 exp.Merge, 6836 this=target, 6837 using=using, 6838 on=on, 6839 expressions=self._parse_when_matched(), 6840 returning=self._match(TokenType.RETURNING) and self._parse_csv(self._parse_bitwise), 6841 ) 6842 6843 def _parse_when_matched(self) -> t.List[exp.When]: 6844 whens = [] 6845 6846 while self._match(TokenType.WHEN): 6847 matched = not self._match(TokenType.NOT) 6848 self._match_text_seq("MATCHED") 6849 source = ( 6850 False 6851 if self._match_text_seq("BY", "TARGET") 6852 else self._match_text_seq("BY", "SOURCE") 6853 ) 6854 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6855 6856 self._match(TokenType.THEN) 6857 6858 if self._match(TokenType.INSERT): 6859 this = self._parse_star() 6860 if this: 6861 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6862 else: 6863 then = self.expression( 6864 exp.Insert, 6865 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6866 expression=self._match_text_seq("VALUES") and self._parse_value(), 6867 ) 6868 elif self._match(TokenType.UPDATE): 6869 expressions = self._parse_star() 6870 if expressions: 6871 then = self.expression(exp.Update, expressions=expressions) 6872 else: 6873 then = self.expression( 6874 exp.Update, 6875 expressions=self._match(TokenType.SET) 6876 and self._parse_csv(self._parse_equality), 6877 ) 6878 elif self._match(TokenType.DELETE): 6879 then = self.expression(exp.Var, this=self._prev.text) 6880 else: 6881 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6882 6883 whens.append( 6884 self.expression( 6885 exp.When, 6886 matched=matched, 6887 source=source, 6888 condition=condition, 6889 then=then, 6890 ) 6891 ) 6892 return whens 6893 6894 def _parse_show(self) -> t.Optional[exp.Expression]: 6895 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6896 if parser: 6897 return parser(self) 6898 return self._parse_as_command(self._prev) 6899 6900 def _parse_set_item_assignment( 6901 self, kind: t.Optional[str] = None 6902 ) -> t.Optional[exp.Expression]: 6903 index = self._index 6904 6905 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6906 return self._parse_set_transaction(global_=kind == "GLOBAL") 6907 6908 left = self._parse_primary() or self._parse_column() 6909 assignment_delimiter = self._match_texts(("=", "TO")) 6910 6911 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6912 self._retreat(index) 6913 return None 6914 6915 right = self._parse_statement() or self._parse_id_var() 6916 if isinstance(right, (exp.Column, exp.Identifier)): 6917 right = exp.var(right.name) 6918 6919 this = self.expression(exp.EQ, this=left, expression=right) 6920 return self.expression(exp.SetItem, this=this, kind=kind) 6921 6922 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6923 self._match_text_seq("TRANSACTION") 6924 characteristics = self._parse_csv( 6925 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6926 ) 6927 return self.expression( 6928 exp.SetItem, 6929 expressions=characteristics, 6930 kind="TRANSACTION", 6931 **{"global": global_}, # type: ignore 6932 ) 6933 6934 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6935 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6936 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6937 6938 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6939 index = self._index 6940 set_ = self.expression( 6941 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6942 ) 6943 6944 if self._curr: 6945 self._retreat(index) 6946 return self._parse_as_command(self._prev) 6947 6948 return set_ 6949 6950 def _parse_var_from_options( 6951 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6952 ) -> t.Optional[exp.Var]: 6953 start = self._curr 6954 if not start: 6955 return None 6956 6957 option = start.text.upper() 6958 continuations = options.get(option) 6959 6960 index = self._index 6961 self._advance() 6962 for keywords in continuations or []: 6963 if isinstance(keywords, str): 6964 keywords = (keywords,) 6965 6966 if self._match_text_seq(*keywords): 6967 option = f"{option} {' '.join(keywords)}" 6968 break 6969 else: 6970 if continuations or continuations is None: 6971 if raise_unmatched: 6972 self.raise_error(f"Unknown option {option}") 6973 6974 self._retreat(index) 6975 return None 6976 6977 return exp.var(option) 6978 6979 def _parse_as_command(self, start: Token) -> exp.Command: 6980 while self._curr: 6981 self._advance() 6982 text = self._find_sql(start, self._prev) 6983 size = len(start.text) 6984 self._warn_unsupported() 6985 return exp.Command(this=text[:size], expression=text[size:]) 6986 6987 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6988 settings = [] 6989 6990 self._match_l_paren() 6991 kind = self._parse_id_var() 6992 6993 if self._match(TokenType.L_PAREN): 6994 while True: 6995 key = self._parse_id_var() 6996 value = self._parse_primary() 6997 6998 if not key and value is None: 6999 break 7000 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7001 self._match(TokenType.R_PAREN) 7002 7003 self._match_r_paren() 7004 7005 return self.expression( 7006 exp.DictProperty, 7007 this=this, 7008 kind=kind.this if kind else None, 7009 settings=settings, 7010 ) 7011 7012 def _parse_dict_range(self, this: str) -> exp.DictRange: 7013 self._match_l_paren() 7014 has_min = self._match_text_seq("MIN") 7015 if has_min: 7016 min = self._parse_var() or self._parse_primary() 7017 self._match_text_seq("MAX") 7018 max = self._parse_var() or self._parse_primary() 7019 else: 7020 max = self._parse_var() or self._parse_primary() 7021 min = exp.Literal.number(0) 7022 self._match_r_paren() 7023 return self.expression(exp.DictRange, this=this, min=min, max=max) 7024 7025 def _parse_comprehension( 7026 self, this: t.Optional[exp.Expression] 7027 ) -> t.Optional[exp.Comprehension]: 7028 index = self._index 7029 expression = self._parse_column() 7030 if not self._match(TokenType.IN): 7031 self._retreat(index - 1) 7032 return None 7033 iterator = self._parse_column() 7034 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7035 return self.expression( 7036 exp.Comprehension, 7037 this=this, 7038 expression=expression, 7039 iterator=iterator, 7040 condition=condition, 7041 ) 7042 7043 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7044 if self._match(TokenType.HEREDOC_STRING): 7045 return self.expression(exp.Heredoc, this=self._prev.text) 7046 7047 if not self._match_text_seq("$"): 7048 return None 7049 7050 tags = ["$"] 7051 tag_text = None 7052 7053 if self._is_connected(): 7054 self._advance() 7055 tags.append(self._prev.text.upper()) 7056 else: 7057 self.raise_error("No closing $ found") 7058 7059 if tags[-1] != "$": 7060 if self._is_connected() and self._match_text_seq("$"): 7061 tag_text = tags[-1] 7062 tags.append("$") 7063 else: 7064 self.raise_error("No closing $ found") 7065 7066 heredoc_start = self._curr 7067 7068 while self._curr: 7069 if self._match_text_seq(*tags, advance=False): 7070 this = self._find_sql(heredoc_start, self._prev) 7071 self._advance(len(tags)) 7072 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7073 7074 self._advance() 7075 7076 self.raise_error(f"No closing {''.join(tags)} found") 7077 return None 7078 7079 def _find_parser( 7080 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7081 ) -> t.Optional[t.Callable]: 7082 if not self._curr: 7083 return None 7084 7085 index = self._index 7086 this = [] 7087 while True: 7088 # The current token might be multiple words 7089 curr = self._curr.text.upper() 7090 key = curr.split(" ") 7091 this.append(curr) 7092 7093 self._advance() 7094 result, trie = in_trie(trie, key) 7095 if result == TrieResult.FAILED: 7096 break 7097 7098 if result == TrieResult.EXISTS: 7099 subparser = parsers[" ".join(this)] 7100 return subparser 7101 7102 self._retreat(index) 7103 return None 7104 7105 def _match(self, token_type, advance=True, expression=None): 7106 if not self._curr: 7107 return None 7108 7109 if self._curr.token_type == token_type: 7110 if advance: 7111 self._advance() 7112 self._add_comments(expression) 7113 return True 7114 7115 return None 7116 7117 def _match_set(self, types, advance=True): 7118 if not self._curr: 7119 return None 7120 7121 if self._curr.token_type in types: 7122 if advance: 7123 self._advance() 7124 return True 7125 7126 return None 7127 7128 def _match_pair(self, token_type_a, token_type_b, advance=True): 7129 if not self._curr or not self._next: 7130 return None 7131 7132 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7133 if advance: 7134 self._advance(2) 7135 return True 7136 7137 return None 7138 7139 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7140 if not self._match(TokenType.L_PAREN, expression=expression): 7141 self.raise_error("Expecting (") 7142 7143 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7144 if not self._match(TokenType.R_PAREN, expression=expression): 7145 self.raise_error("Expecting )") 7146 7147 def _match_texts(self, texts, advance=True): 7148 if ( 7149 self._curr 7150 and self._curr.token_type != TokenType.STRING 7151 and self._curr.text.upper() in texts 7152 ): 7153 if advance: 7154 self._advance() 7155 return True 7156 return None 7157 7158 def _match_text_seq(self, *texts, advance=True): 7159 index = self._index 7160 for text in texts: 7161 if ( 7162 self._curr 7163 and self._curr.token_type != TokenType.STRING 7164 and self._curr.text.upper() == text 7165 ): 7166 self._advance() 7167 else: 7168 self._retreat(index) 7169 return None 7170 7171 if not advance: 7172 self._retreat(index) 7173 7174 return True 7175 7176 def _replace_lambda( 7177 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7178 ) -> t.Optional[exp.Expression]: 7179 if not node: 7180 return node 7181 7182 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7183 7184 for column in node.find_all(exp.Column): 7185 typ = lambda_types.get(column.parts[0].name) 7186 if typ is not None: 7187 dot_or_id = column.to_dot() if column.table else column.this 7188 7189 if typ: 7190 dot_or_id = self.expression( 7191 exp.Cast, 7192 this=dot_or_id, 7193 to=typ, 7194 ) 7195 7196 parent = column.parent 7197 7198 while isinstance(parent, exp.Dot): 7199 if not isinstance(parent.parent, exp.Dot): 7200 parent.replace(dot_or_id) 7201 break 7202 parent = parent.parent 7203 else: 7204 if column is node: 7205 node = dot_or_id 7206 else: 7207 column.replace(dot_or_id) 7208 return node 7209 7210 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7211 start = self._prev 7212 7213 # Not to be confused with TRUNCATE(number, decimals) function call 7214 if self._match(TokenType.L_PAREN): 7215 self._retreat(self._index - 2) 7216 return self._parse_function() 7217 7218 # Clickhouse supports TRUNCATE DATABASE as well 7219 is_database = self._match(TokenType.DATABASE) 7220 7221 self._match(TokenType.TABLE) 7222 7223 exists = self._parse_exists(not_=False) 7224 7225 expressions = self._parse_csv( 7226 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7227 ) 7228 7229 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7230 7231 if self._match_text_seq("RESTART", "IDENTITY"): 7232 identity = "RESTART" 7233 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7234 identity = "CONTINUE" 7235 else: 7236 identity = None 7237 7238 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7239 option = self._prev.text 7240 else: 7241 option = None 7242 7243 partition = self._parse_partition() 7244 7245 # Fallback case 7246 if self._curr: 7247 return self._parse_as_command(start) 7248 7249 return self.expression( 7250 exp.TruncateTable, 7251 expressions=expressions, 7252 is_database=is_database, 7253 exists=exists, 7254 cluster=cluster, 7255 identity=identity, 7256 option=option, 7257 partition=partition, 7258 ) 7259 7260 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7261 this = self._parse_ordered(self._parse_opclass) 7262 7263 if not self._match(TokenType.WITH): 7264 return this 7265 7266 op = self._parse_var(any_token=True) 7267 7268 return self.expression(exp.WithOperator, this=this, op=op) 7269 7270 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7271 self._match(TokenType.EQ) 7272 self._match(TokenType.L_PAREN) 7273 7274 opts: t.List[t.Optional[exp.Expression]] = [] 7275 while self._curr and not self._match(TokenType.R_PAREN): 7276 if self._match_text_seq("FORMAT_NAME", "="): 7277 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7278 # so we parse it separately to use _parse_field() 7279 prop = self.expression( 7280 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7281 ) 7282 opts.append(prop) 7283 else: 7284 opts.append(self._parse_property()) 7285 7286 self._match(TokenType.COMMA) 7287 7288 return opts 7289 7290 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7291 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7292 7293 options = [] 7294 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7295 option = self._parse_var(any_token=True) 7296 prev = self._prev.text.upper() 7297 7298 # Different dialects might separate options and values by white space, "=" and "AS" 7299 self._match(TokenType.EQ) 7300 self._match(TokenType.ALIAS) 7301 7302 param = self.expression(exp.CopyParameter, this=option) 7303 7304 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7305 TokenType.L_PAREN, advance=False 7306 ): 7307 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7308 param.set("expressions", self._parse_wrapped_options()) 7309 elif prev == "FILE_FORMAT": 7310 # T-SQL's external file format case 7311 param.set("expression", self._parse_field()) 7312 else: 7313 param.set("expression", self._parse_unquoted_field()) 7314 7315 options.append(param) 7316 self._match(sep) 7317 7318 return options 7319 7320 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7321 expr = self.expression(exp.Credentials) 7322 7323 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7324 expr.set("storage", self._parse_field()) 7325 if self._match_text_seq("CREDENTIALS"): 7326 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7327 creds = ( 7328 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7329 ) 7330 expr.set("credentials", creds) 7331 if self._match_text_seq("ENCRYPTION"): 7332 expr.set("encryption", self._parse_wrapped_options()) 7333 if self._match_text_seq("IAM_ROLE"): 7334 expr.set("iam_role", self._parse_field()) 7335 if self._match_text_seq("REGION"): 7336 expr.set("region", self._parse_field()) 7337 7338 return expr 7339 7340 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7341 return self._parse_field() 7342 7343 def _parse_copy(self) -> exp.Copy | exp.Command: 7344 start = self._prev 7345 7346 self._match(TokenType.INTO) 7347 7348 this = ( 7349 self._parse_select(nested=True, parse_subquery_alias=False) 7350 if self._match(TokenType.L_PAREN, advance=False) 7351 else self._parse_table(schema=True) 7352 ) 7353 7354 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7355 7356 files = self._parse_csv(self._parse_file_location) 7357 credentials = self._parse_credentials() 7358 7359 self._match_text_seq("WITH") 7360 7361 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7362 7363 # Fallback case 7364 if self._curr: 7365 return self._parse_as_command(start) 7366 7367 return self.expression( 7368 exp.Copy, 7369 this=this, 7370 kind=kind, 7371 credentials=credentials, 7372 files=files, 7373 params=params, 7374 ) 7375 7376 def _parse_normalize(self) -> exp.Normalize: 7377 return self.expression( 7378 exp.Normalize, 7379 this=self._parse_bitwise(), 7380 form=self._match(TokenType.COMMA) and self._parse_var(), 7381 ) 7382 7383 def _parse_star_ops(self) -> exp.Star | exp.UnpackColumns: 7384 if self._match_text_seq("COLUMNS", "(", advance=False): 7385 return exp.UnpackColumns(this=self._parse_function()) 7386 7387 return self.expression( 7388 exp.Star, 7389 **{ # type: ignore 7390 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7391 "replace": self._parse_star_op("REPLACE"), 7392 "rename": self._parse_star_op("RENAME"), 7393 }, 7394 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1350 def __init__( 1351 self, 1352 error_level: t.Optional[ErrorLevel] = None, 1353 error_message_context: int = 100, 1354 max_errors: int = 3, 1355 dialect: DialectType = None, 1356 ): 1357 from sqlglot.dialects import Dialect 1358 1359 self.error_level = error_level or ErrorLevel.IMMEDIATE 1360 self.error_message_context = error_message_context 1361 self.max_errors = max_errors 1362 self.dialect = Dialect.get_or_raise(dialect) 1363 self.reset()
1375 def parse( 1376 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1377 ) -> t.List[t.Optional[exp.Expression]]: 1378 """ 1379 Parses a list of tokens and returns a list of syntax trees, one tree 1380 per parsed SQL statement. 1381 1382 Args: 1383 raw_tokens: The list of tokens. 1384 sql: The original SQL string, used to produce helpful debug messages. 1385 1386 Returns: 1387 The list of the produced syntax trees. 1388 """ 1389 return self._parse( 1390 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1391 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1393 def parse_into( 1394 self, 1395 expression_types: exp.IntoType, 1396 raw_tokens: t.List[Token], 1397 sql: t.Optional[str] = None, 1398 ) -> t.List[t.Optional[exp.Expression]]: 1399 """ 1400 Parses a list of tokens into a given Expression type. If a collection of Expression 1401 types is given instead, this method will try to parse the token list into each one 1402 of them, stopping at the first for which the parsing succeeds. 1403 1404 Args: 1405 expression_types: The expression type(s) to try and parse the token list into. 1406 raw_tokens: The list of tokens. 1407 sql: The original SQL string, used to produce helpful debug messages. 1408 1409 Returns: 1410 The target Expression. 1411 """ 1412 errors = [] 1413 for expression_type in ensure_list(expression_types): 1414 parser = self.EXPRESSION_PARSERS.get(expression_type) 1415 if not parser: 1416 raise TypeError(f"No parser registered for {expression_type}") 1417 1418 try: 1419 return self._parse(parser, raw_tokens, sql) 1420 except ParseError as e: 1421 e.errors[0]["into_expression"] = expression_type 1422 errors.append(e) 1423 1424 raise ParseError( 1425 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1426 errors=merge_errors(errors), 1427 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1467 def check_errors(self) -> None: 1468 """Logs or raises any found errors, depending on the chosen error level setting.""" 1469 if self.error_level == ErrorLevel.WARN: 1470 for error in self.errors: 1471 logger.error(str(error)) 1472 elif self.error_level == ErrorLevel.RAISE and self.errors: 1473 raise ParseError( 1474 concat_messages(self.errors, self.max_errors), 1475 errors=merge_errors(self.errors), 1476 )
Logs or raises any found errors, depending on the chosen error level setting.
1478 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1479 """ 1480 Appends an error in the list of recorded errors or raises it, depending on the chosen 1481 error level setting. 1482 """ 1483 token = token or self._curr or self._prev or Token.string("") 1484 start = token.start 1485 end = token.end + 1 1486 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1487 highlight = self.sql[start:end] 1488 end_context = self.sql[end : end + self.error_message_context] 1489 1490 error = ParseError.new( 1491 f"{message}. Line {token.line}, Col: {token.col}.\n" 1492 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1493 description=message, 1494 line=token.line, 1495 col=token.col, 1496 start_context=start_context, 1497 highlight=highlight, 1498 end_context=end_context, 1499 ) 1500 1501 if self.error_level == ErrorLevel.IMMEDIATE: 1502 raise error 1503 1504 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1506 def expression( 1507 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1508 ) -> E: 1509 """ 1510 Creates a new, validated Expression. 1511 1512 Args: 1513 exp_class: The expression class to instantiate. 1514 comments: An optional list of comments to attach to the expression. 1515 kwargs: The arguments to set for the expression along with their respective values. 1516 1517 Returns: 1518 The target expression. 1519 """ 1520 instance = exp_class(**kwargs) 1521 instance.add_comments(comments) if comments else self._add_comments(instance) 1522 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1529 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1530 """ 1531 Validates an Expression, making sure that all its mandatory arguments are set. 1532 1533 Args: 1534 expression: The expression to validate. 1535 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1536 1537 Returns: 1538 The validated expression. 1539 """ 1540 if self.error_level != ErrorLevel.IGNORE: 1541 for error_message in expression.error_messages(args): 1542 self.raise_error(error_message) 1543 1544 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.