Edit on GitHub

sqlglot.dialects.prql

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, parser, tokens
  6from sqlglot.dialects.dialect import Dialect
  7from sqlglot.tokens import TokenType
  8
  9
 10class PRQL(Dialect):
 11    class Tokenizer(tokens.Tokenizer):
 12        IDENTIFIERS = ["`"]
 13        QUOTES = ["'", '"']
 14
 15        SINGLE_TOKENS = {
 16            **tokens.Tokenizer.SINGLE_TOKENS,
 17            "=": TokenType.ALIAS,
 18            "'": TokenType.QUOTE,
 19            '"': TokenType.QUOTE,
 20            "`": TokenType.IDENTIFIER,
 21            "#": TokenType.COMMENT,
 22        }
 23
 24        KEYWORDS = {
 25            **tokens.Tokenizer.KEYWORDS,
 26        }
 27
 28    class Parser(parser.Parser):
 29        TRANSFORM_PARSERS = {
 30            "DERIVE": lambda self, query: self._parse_selection(query),
 31            "SELECT": lambda self, query: self._parse_selection(query, append=False),
 32            "TAKE": lambda self, query: self._parse_take(query),
 33        }
 34
 35        def _parse_statement(self) -> t.Optional[exp.Expression]:
 36            expression = self._parse_expression()
 37            expression = expression if expression else self._parse_query()
 38            return expression
 39
 40        def _parse_query(self) -> t.Optional[exp.Query]:
 41            from_ = self._parse_from()
 42
 43            if not from_:
 44                return None
 45
 46            query = exp.select("*").from_(from_, copy=False)
 47
 48            while self._match_texts(self.TRANSFORM_PARSERS):
 49                query = self.TRANSFORM_PARSERS[self._prev.text.upper()](self, query)
 50
 51            return query
 52
 53        def _parse_selection(self, query: exp.Query, append: bool = True) -> exp.Query:
 54            if self._match(TokenType.L_BRACE):
 55                selects = self._parse_csv(self._parse_expression)
 56
 57                if not self._match(TokenType.R_BRACE, expression=query):
 58                    self.raise_error("Expecting }")
 59            else:
 60                expression = self._parse_expression()
 61                selects = [expression] if expression else []
 62
 63            projections = {
 64                select.alias_or_name: select.this if isinstance(select, exp.Alias) else select
 65                for select in query.selects
 66            }
 67
 68            selects = [
 69                select.transform(
 70                    lambda s: (projections[s.name].copy() if s.name in projections else s)
 71                    if isinstance(s, exp.Column)
 72                    else s,
 73                    copy=False,
 74                )
 75                for select in selects
 76            ]
 77
 78            return query.select(*selects, append=append, copy=False)
 79
 80        def _parse_take(self, query: exp.Query) -> t.Optional[exp.Query]:
 81            num = self._parse_number()  # TODO: TAKE for ranges a..b
 82            return query.limit(num) if num else None
 83
 84        def _parse_expression(self) -> t.Optional[exp.Expression]:
 85            if self._next and self._next.token_type == TokenType.ALIAS:
 86                alias = self._parse_id_var(True)
 87                self._match(TokenType.ALIAS)
 88                return self.expression(exp.Alias, this=self._parse_conjunction(), alias=alias)
 89            return self._parse_conjunction()
 90
 91        def _parse_table(
 92            self,
 93            schema: bool = False,
 94            joins: bool = False,
 95            alias_tokens: t.Optional[t.Collection[TokenType]] = None,
 96            parse_bracket: bool = False,
 97            is_db_reference: bool = False,
 98        ) -> t.Optional[exp.Expression]:
 99            return self._parse_table_parts()
100
101        def _parse_from(
102            self, joins: bool = False, skip_from_token: bool = False
103        ) -> t.Optional[exp.From]:
104            if not skip_from_token and not self._match(TokenType.FROM):
105                return None
106
107            return self.expression(
108                exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins)
109            )
class PRQL(sqlglot.dialects.dialect.Dialect):
 11class PRQL(Dialect):
 12    class Tokenizer(tokens.Tokenizer):
 13        IDENTIFIERS = ["`"]
 14        QUOTES = ["'", '"']
 15
 16        SINGLE_TOKENS = {
 17            **tokens.Tokenizer.SINGLE_TOKENS,
 18            "=": TokenType.ALIAS,
 19            "'": TokenType.QUOTE,
 20            '"': TokenType.QUOTE,
 21            "`": TokenType.IDENTIFIER,
 22            "#": TokenType.COMMENT,
 23        }
 24
 25        KEYWORDS = {
 26            **tokens.Tokenizer.KEYWORDS,
 27        }
 28
 29    class Parser(parser.Parser):
 30        TRANSFORM_PARSERS = {
 31            "DERIVE": lambda self, query: self._parse_selection(query),
 32            "SELECT": lambda self, query: self._parse_selection(query, append=False),
 33            "TAKE": lambda self, query: self._parse_take(query),
 34        }
 35
 36        def _parse_statement(self) -> t.Optional[exp.Expression]:
 37            expression = self._parse_expression()
 38            expression = expression if expression else self._parse_query()
 39            return expression
 40
 41        def _parse_query(self) -> t.Optional[exp.Query]:
 42            from_ = self._parse_from()
 43
 44            if not from_:
 45                return None
 46
 47            query = exp.select("*").from_(from_, copy=False)
 48
 49            while self._match_texts(self.TRANSFORM_PARSERS):
 50                query = self.TRANSFORM_PARSERS[self._prev.text.upper()](self, query)
 51
 52            return query
 53
 54        def _parse_selection(self, query: exp.Query, append: bool = True) -> exp.Query:
 55            if self._match(TokenType.L_BRACE):
 56                selects = self._parse_csv(self._parse_expression)
 57
 58                if not self._match(TokenType.R_BRACE, expression=query):
 59                    self.raise_error("Expecting }")
 60            else:
 61                expression = self._parse_expression()
 62                selects = [expression] if expression else []
 63
 64            projections = {
 65                select.alias_or_name: select.this if isinstance(select, exp.Alias) else select
 66                for select in query.selects
 67            }
 68
 69            selects = [
 70                select.transform(
 71                    lambda s: (projections[s.name].copy() if s.name in projections else s)
 72                    if isinstance(s, exp.Column)
 73                    else s,
 74                    copy=False,
 75                )
 76                for select in selects
 77            ]
 78
 79            return query.select(*selects, append=append, copy=False)
 80
 81        def _parse_take(self, query: exp.Query) -> t.Optional[exp.Query]:
 82            num = self._parse_number()  # TODO: TAKE for ranges a..b
 83            return query.limit(num) if num else None
 84
 85        def _parse_expression(self) -> t.Optional[exp.Expression]:
 86            if self._next and self._next.token_type == TokenType.ALIAS:
 87                alias = self._parse_id_var(True)
 88                self._match(TokenType.ALIAS)
 89                return self.expression(exp.Alias, this=self._parse_conjunction(), alias=alias)
 90            return self._parse_conjunction()
 91
 92        def _parse_table(
 93            self,
 94            schema: bool = False,
 95            joins: bool = False,
 96            alias_tokens: t.Optional[t.Collection[TokenType]] = None,
 97            parse_bracket: bool = False,
 98            is_db_reference: bool = False,
 99        ) -> t.Optional[exp.Expression]:
100            return self._parse_table_parts()
101
102        def _parse_from(
103            self, joins: bool = False, skip_from_token: bool = False
104        ) -> t.Optional[exp.From]:
105            if not skip_from_token and not self._match(TokenType.FROM):
106                return None
107
108            return self.expression(
109                exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins)
110            )
tokenizer_class = <class 'PRQL.Tokenizer'>
parser_class = <class 'PRQL.Parser'>
generator_class = <class 'sqlglot.generator.Generator'>
TIME_TRIE: Dict = {}
FORMAT_TRIE: Dict = {}
INVERSE_TIME_MAPPING: Dict[str, str] = {}
INVERSE_TIME_TRIE: Dict = {}
ESCAPED_SEQUENCES: Dict[str, str] = {}
QUOTE_START = "'"
QUOTE_END = "'"
IDENTIFIER_START = '`'
IDENTIFIER_END = '`'
BIT_START: Optional[str] = None
BIT_END: Optional[str] = None
HEX_START: Optional[str] = None
HEX_END: Optional[str] = None
BYTE_START: Optional[str] = None
BYTE_END: Optional[str] = None
UNICODE_START: Optional[str] = None
UNICODE_END: Optional[str] = None
class PRQL.Tokenizer(sqlglot.tokens.Tokenizer):
12    class Tokenizer(tokens.Tokenizer):
13        IDENTIFIERS = ["`"]
14        QUOTES = ["'", '"']
15
16        SINGLE_TOKENS = {
17            **tokens.Tokenizer.SINGLE_TOKENS,
18            "=": TokenType.ALIAS,
19            "'": TokenType.QUOTE,
20            '"': TokenType.QUOTE,
21            "`": TokenType.IDENTIFIER,
22            "#": TokenType.COMMENT,
23        }
24
25        KEYWORDS = {
26            **tokens.Tokenizer.KEYWORDS,
27        }
IDENTIFIERS = ['`']
QUOTES = ["'", '"']
SINGLE_TOKENS = {'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.ALIAS: 'ALIAS'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.QUOTE: 'QUOTE'>, '#': <TokenType.COMMENT: 'COMMENT'>}
KEYWORDS = {'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ENUM': <TokenType.ENUM: 'ENUM'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'TRUNCATE': <TokenType.TRUNCATE: 'TRUNCATE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'UINT': <TokenType.UINT: 'UINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'BPCHAR': <TokenType.BPCHAR: 'BPCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'SEQUENCE': <TokenType.SEQUENCE: 'SEQUENCE'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>}
class PRQL.Parser(sqlglot.parser.Parser):
 29    class Parser(parser.Parser):
 30        TRANSFORM_PARSERS = {
 31            "DERIVE": lambda self, query: self._parse_selection(query),
 32            "SELECT": lambda self, query: self._parse_selection(query, append=False),
 33            "TAKE": lambda self, query: self._parse_take(query),
 34        }
 35
 36        def _parse_statement(self) -> t.Optional[exp.Expression]:
 37            expression = self._parse_expression()
 38            expression = expression if expression else self._parse_query()
 39            return expression
 40
 41        def _parse_query(self) -> t.Optional[exp.Query]:
 42            from_ = self._parse_from()
 43
 44            if not from_:
 45                return None
 46
 47            query = exp.select("*").from_(from_, copy=False)
 48
 49            while self._match_texts(self.TRANSFORM_PARSERS):
 50                query = self.TRANSFORM_PARSERS[self._prev.text.upper()](self, query)
 51
 52            return query
 53
 54        def _parse_selection(self, query: exp.Query, append: bool = True) -> exp.Query:
 55            if self._match(TokenType.L_BRACE):
 56                selects = self._parse_csv(self._parse_expression)
 57
 58                if not self._match(TokenType.R_BRACE, expression=query):
 59                    self.raise_error("Expecting }")
 60            else:
 61                expression = self._parse_expression()
 62                selects = [expression] if expression else []
 63
 64            projections = {
 65                select.alias_or_name: select.this if isinstance(select, exp.Alias) else select
 66                for select in query.selects
 67            }
 68
 69            selects = [
 70                select.transform(
 71                    lambda s: (projections[s.name].copy() if s.name in projections else s)
 72                    if isinstance(s, exp.Column)
 73                    else s,
 74                    copy=False,
 75                )
 76                for select in selects
 77            ]
 78
 79            return query.select(*selects, append=append, copy=False)
 80
 81        def _parse_take(self, query: exp.Query) -> t.Optional[exp.Query]:
 82            num = self._parse_number()  # TODO: TAKE for ranges a..b
 83            return query.limit(num) if num else None
 84
 85        def _parse_expression(self) -> t.Optional[exp.Expression]:
 86            if self._next and self._next.token_type == TokenType.ALIAS:
 87                alias = self._parse_id_var(True)
 88                self._match(TokenType.ALIAS)
 89                return self.expression(exp.Alias, this=self._parse_conjunction(), alias=alias)
 90            return self._parse_conjunction()
 91
 92        def _parse_table(
 93            self,
 94            schema: bool = False,
 95            joins: bool = False,
 96            alias_tokens: t.Optional[t.Collection[TokenType]] = None,
 97            parse_bracket: bool = False,
 98            is_db_reference: bool = False,
 99        ) -> t.Optional[exp.Expression]:
100            return self._parse_table_parts()
101
102        def _parse_from(
103            self, joins: bool = False, skip_from_token: bool = False
104        ) -> t.Optional[exp.From]:
105            if not skip_from_token and not self._match(TokenType.FROM):
106                return None
107
108            return self.expression(
109                exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins)
110            )

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
TRANSFORM_PARSERS = {'DERIVE': <function PRQL.Parser.<lambda>>, 'SELECT': <function PRQL.Parser.<lambda>>, 'TAKE': <function PRQL.Parser.<lambda>>}
SHOW_TRIE: Dict = {}
SET_TRIE: Dict = {'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
Inherited Members
sqlglot.parser.Parser
Parser
FUNCTIONS
NO_PAREN_FUNCTIONS
STRUCT_TYPE_TOKENS
NESTED_TYPE_TOKENS
ENUM_TYPE_TOKENS
AGGREGATE_TYPE_TOKENS
TYPE_TOKENS
SIGNED_TO_UNSIGNED_TYPE_TOKEN
SUBQUERY_PREDICATES
RESERVED_TOKENS
DB_CREATABLES
CREATABLES
ID_VAR_TOKENS
INTERVAL_VARS
TABLE_ALIAS_TOKENS
ALIAS_TOKENS
COMMENT_TABLE_ALIAS_TOKENS
UPDATE_ALIAS_TOKENS
TRIM_TYPES
FUNC_TOKENS
CONJUNCTION
EQUALITY
COMPARISON
BITWISE
TERM
FACTOR
EXPONENT
TIMES
TIMESTAMPS
SET_OPERATIONS
JOIN_METHODS
JOIN_SIDES
JOIN_KINDS
JOIN_HINTS
LAMBDAS
COLUMN_OPERATORS
EXPRESSION_PARSERS
STATEMENT_PARSERS
UNARY_PARSERS
STRING_PARSERS
NUMERIC_PARSERS
PRIMARY_PARSERS
PLACEHOLDER_PARSERS
RANGE_PARSERS
PROPERTY_PARSERS
CONSTRAINT_PARSERS
ALTER_PARSERS
SCHEMA_UNNAMED_CONSTRAINTS
NO_PAREN_FUNCTION_PARSERS
INVALID_FUNC_NAME_TOKENS
FUNCTIONS_WITH_ALIASED_ARGS
KEY_VALUE_DEFINITIONS
FUNCTION_PARSERS
QUERY_MODIFIER_PARSERS
SET_PARSERS
SHOW_PARSERS
TYPE_LITERAL_PARSERS
DDL_SELECT_TOKENS
PRE_VOLATILE_TOKENS
TRANSACTION_KIND
TRANSACTION_CHARACTERISTICS
CONFLICT_ACTIONS
CREATE_SEQUENCE
ISOLATED_LOADING_OPTIONS
USABLES
CAST_ACTIONS
INSERT_ALTERNATIVES
CLONE_KEYWORDS
HISTORICAL_DATA_KIND
OPCLASS_FOLLOW_KEYWORDS
OPTYPE_FOLLOW_TOKENS
TABLE_INDEX_HINT_TOKENS
VIEW_ATTRIBUTES
WINDOW_ALIAS_TOKENS
WINDOW_BEFORE_PAREN_TOKENS
WINDOW_SIDES
JSON_KEY_VALUE_SEPARATOR_TOKENS
FETCH_TOKENS
ADD_CONSTRAINT_TOKENS
DISTINCT_TOKENS
NULL_TOKENS
UNNEST_OFFSET_ALIAS_TOKENS
SELECT_START_TOKENS
STRICT_CAST
PREFIXED_PIVOT_COLUMNS
IDENTIFY_PIVOT_STRINGS
LOG_DEFAULTS_TO_LN
ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
TABLESAMPLE_CSV
SET_REQUIRES_ASSIGNMENT_DELIMITER
TRIM_PATTERN_FIRST
STRING_ALIASES
MODIFIERS_ATTACHED_TO_UNION
UNION_MODIFIERS
NO_PAREN_IF_COMMANDS
JSON_ARROWS_REQUIRE_JSON_TYPE
VALUES_FOLLOWED_BY_PAREN
SUPPORTS_IMPLICIT_UNNEST
INTERVAL_SPANS
error_level
error_message_context
max_errors
dialect
reset
parse
parse_into
check_errors
raise_error
expression
validate_expression
errors
sql