diff --git a/bigframes/_config/experiment_options.py b/bigframes/_config/experiment_options.py index 6c51ef6db3..29cf2853e6 100644 --- a/bigframes/_config/experiment_options.py +++ b/bigframes/_config/experiment_options.py @@ -27,7 +27,7 @@ class ExperimentOptions: def __init__(self): self._semantic_operators: bool = False self._ai_operators: bool = False - self._sql_compiler: Literal["legacy", "stable", "experimental"] = "stable" + self._sql_compiler: Literal["legacy", "stable", "experimental"] = "experimental" @property def semantic_operators(self) -> bool: diff --git a/bigframes/core/bigframe_node.py b/bigframes/core/bigframe_node.py index 7e40248a00..e14b48a7ec 100644 --- a/bigframes/core/bigframe_node.py +++ b/bigframes/core/bigframe_node.py @@ -330,22 +330,12 @@ def top_down( """ Perform a top-down transformation of the BigFrameNode tree. """ - to_process = [self] - results: Dict[BigFrameNode, BigFrameNode] = {} - while to_process: - item = to_process.pop() - if item not in results.keys(): - item_result = transform(item) - results[item] = item_result - to_process.extend(item_result.child_nodes) + @functools.cache + def recursive_transform(node: BigFrameNode) -> BigFrameNode: + return transform(node).transform_children(recursive_transform) - to_process = [self] - # for each processed item, replace its children - for item in reversed(list(results.keys())): - results[item] = results[item].transform_children(lambda x: results[x]) - - return results[self] + return recursive_transform(self) def bottom_up( self: BigFrameNode, diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py index d4ae01f511..a86a192a9e 100644 --- a/bigframes/core/compile/sqlglot/compiler.py +++ b/bigframes/core/compile/sqlglot/compiler.py @@ -62,6 +62,8 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult: if request.sort_rows: result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node)) encoded_type_refs = data_type_logger.encode_type_refs(result_node) + # TODO: Extract CTEs earlier + result_node = typing.cast(nodes.ResultNode, rewrite.extract_ctes(result_node)) sql = _compile_result_node(result_node) return configs.CompileResult( sql, @@ -74,6 +76,8 @@ def compile_sql(request: configs.CompileRequest) -> configs.CompileResult: result_node = dataclasses.replace(result_node, order_by=None) result_node = typing.cast(nodes.ResultNode, rewrite.column_pruning(result_node)) encoded_type_refs = data_type_logger.encode_type_refs(result_node) + # TODO: Extract CTEs earlier + result_node = typing.cast(nodes.ResultNode, rewrite.extract_ctes(result_node)) sql = _compile_result_node(result_node) # Return the ordering iff no extra columns are needed to define the row order if ordering is not None: @@ -94,6 +98,7 @@ def _remap_variables( result_node, _ = rewrite.remap_variables( node, map(identifiers.ColumnId, uid_gen.get_uid_stream("bfcol_")) ) + result_node.validate_tree() return typing.cast(nodes.ResultNode, result_node) @@ -102,13 +107,16 @@ def _compile_result_node(root: nodes.ResultNode) -> str: # of nodes using the same generator. uid_gen = guid.SequentialUIDGenerator() root = _remap_variables(root, uid_gen) + # Remap variables creates too mayn new + # root = rewrite.select_pullup(root, prefer_source_names=False) root = typing.cast(nodes.ResultNode, rewrite.defer_selection(root)) # Have to bind schema as the final step before compilation. # Probably, should defer even further root = typing.cast(nodes.ResultNode, schema_binding.bind_schema_to_tree(root)) - sqlglot_ir_obj = compile_node(rewrite.as_sql_nodes(root), uid_gen) + # TODO: Bake all IDs in tree, stop passing uid_gen to emitters + sqlglot_ir_obj = compile_node(rewrite.as_sql_nodes(root, uid_gen), uid_gen) return sqlglot_ir_obj.sql @@ -121,7 +129,7 @@ def compile_node( for current_node in list(node.iter_nodes_topo()): if current_node.child_nodes == (): # For leaf node, generates a dumpy child to pass the UID generator. - child_results = tuple([sqlglot_ir.SQLGlotIR(uid_gen=uid_gen)]) + child_results = tuple([sqlglot_ir.SQLGlotIR.empty(uid_gen=uid_gen)]) else: # Child nodes should have been compiled in the reverse topological order. child_results = tuple( @@ -256,6 +264,23 @@ def compile_isin_join( ) +@_compile_node.register +def compile_cte_ref_node(node: sql_nodes.SqlCteRefNode, child: sqlglot_ir.SQLGlotIR): + return sqlglot_ir.SQLGlotIR.from_cte_ref( + node.cte_name, + uid_gen=child.uid_gen, + ) + + +@_compile_node.register +def compile_with_ctes_node( + node: sql_nodes.SqlWithCtesNode, + child: sqlglot_ir.SQLGlotIR, + *ctes: sqlglot_ir.SQLGlotIR, +): + return child.with_ctes(tuple(zip(node.cte_names, ctes))) + + @_compile_node.register def compile_concat( node: nodes.ConcatNode, *children: sqlglot_ir.SQLGlotIR @@ -271,7 +296,7 @@ def compile_concat( ] return sqlglot_ir.SQLGlotIR.from_union( - [child._as_select() for child in children], + [child.expr.as_select_all() for child in children], output_aliases=output_aliases, uid_gen=uid_gen, ) diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py index 52906ffb69..c815dbfd41 100644 --- a/bigframes/core/compile/sqlglot/sqlglot_ir.py +++ b/bigframes/core/compile/sqlglot/sqlglot_ir.py @@ -14,6 +14,7 @@ from __future__ import annotations +import abc import dataclasses import datetime import functools @@ -38,20 +39,107 @@ to_wkt = dumps +class SelectableFragment(abc.ABC): + """ + Represent a grammar fragment that can be converted to a SELECT or FROM item. + """ + + def as_select_all(self) -> sge.Select: + ... + + def select(self, *items: sge.Expression) -> sge.Select: + ... + + def as_from_item(self) -> sge.FromItem: + ... + + +class SelectFragment(SelectableFragment): + def __init__(self, select_expr: sge.Select): + self.select_expr = select_expr + + def as_select_all(self) -> sge.Select: + return self.select_expr + + def select(self, *items: sge.Expression) -> sge.Select: + return sge.Select().select(*items).from_(self.select_expr.subquery()) + + def as_from_item(self) -> sge.FromItem: + return self.select_expr.subquery() + + +class TableFragment(SelectableFragment): + def __init__(self, table: sge.Table | sge.Unnest): + self.table = table + + def as_select_all(self) -> sge.Select: + return sge.Select().select(sge.Star()).from_(self.table) + + def select(self, *items: sge.Expression) -> sge.Select: + return sge.Select().select(*items).from_(self.table) + + def as_from_item(self) -> sge.FromItem: + return self.table + + +class DeferredSelectFragment(SelectableFragment): + def __init__(self, select_supplier: typing.Callable[[sge.Select], sge.Select]): + self.select_supplier = select_supplier + + def as_select_all(self) -> sge.Select: + return self.select_supplier(sge.Select().select(sge.Star())) + + def select(self, *items: sge.Expression) -> sge.Select: + return self.select_supplier(sge.Select().select(*items)) + + def as_from_item(self) -> sge.FromItem: + return self.select_supplier(sge.Select().select(sge.Star())).subquery() + + @dataclasses.dataclass(frozen=True) class SQLGlotIR: """Helper class to build SQLGlot Query and generate SQL string.""" - expr: typing.Union[sge.Select, sge.Table] = sg.select() + expr: SelectableFragment """The SQLGlot expression representing the query.""" uid_gen: guid.SequentialUIDGenerator = guid.SequentialUIDGenerator() """Generator for unique identifiers.""" + def __post_init__(self): + assert isinstance(self.expr, SelectableFragment) + @property def sql(self) -> str: """Generate SQL string from the given expression.""" - return sql.to_sql(self.expr) + return sql.to_sql(self.expr.as_select_all()) + + @classmethod + def empty( + cls, uid_gen: guid.SequentialUIDGenerator = guid.SequentialUIDGenerator() + ) -> SQLGlotIR: + return cls(expr=SelectFragment(sge.select()), uid_gen=uid_gen) + + @classmethod + def from_expr( + cls, + expr: sge.Expression, + uid_gen: guid.SequentialUIDGenerator = guid.SequentialUIDGenerator(), + ) -> SQLGlotIR: + if isinstance(expr, sge.Select): + return cls(expr=SelectFragment(expr), uid_gen=uid_gen) + elif isinstance(expr, (sge.Table, sge.Unnest)): + return cls(expr=TableFragment(expr), uid_gen=uid_gen) + else: + raise ValueError(f"Unsupported expression type: {type(expr)}") + + @classmethod + def from_func( + cls, + select_handler: typing.Callable[[sge.Select], sge.Select], + uid_gen: guid.SequentialUIDGenerator = guid.SequentialUIDGenerator(), + ): + return cls(expr=DeferredSelectFragment(select_handler), uid_gen=uid_gen) @classmethod def from_pyarrow( @@ -97,7 +185,7 @@ def from_pyarrow( ), ], ) - return cls(expr=sg.select(sge.Star()).from_(expr), uid_gen=uid_gen) + return cls.from_expr(expr=expr, uid_gen=uid_gen) @classmethod def from_table( @@ -143,9 +231,20 @@ def from_table( select_expr = select_expr.where( sg.parse_one(sql_predicate, dialect=sql.base.DIALECT), append=False ) - return cls(expr=select_expr, uid_gen=uid_gen) + return cls.from_expr(expr=select_expr, uid_gen=uid_gen) - return cls(expr=table_expr, uid_gen=uid_gen) + return cls.from_expr(expr=table_expr, uid_gen=uid_gen) + + @classmethod + def from_cte_ref( + cls, + cte_ref: str, + uid_gen: guid.SequentialUIDGenerator, + ) -> SQLGlotIR: + table_expr = sge.Table( + this=sql.identifier(cte_ref), + ) + return cls.from_expr(expr=table_expr, uid_gen=uid_gen) def select( self, @@ -155,14 +254,6 @@ def select( limit: typing.Optional[int] = None, ) -> SQLGlotIR: # TODO: Explicitly insert CTEs into plan - if isinstance(self.expr, sge.Select): - new_expr, _ = self._select_to_cte() - else: - new_expr = sge.Select().from_(self.expr) - - if len(sorting) > 0: - new_expr = new_expr.order_by(*sorting) - if len(selections) > 0: to_select = [ sge.Alias( @@ -173,9 +264,12 @@ def select( else expr for id, expr in selections ] - new_expr = new_expr.select(*to_select, append=False) + new_expr = self.expr.select(*to_select) else: - new_expr = new_expr.select(sge.Star(), append=False) + new_expr = self.expr.as_select_all() + + if len(sorting) > 0: + new_expr = new_expr.order_by(*sorting) if len(predicates) > 0: condition = _and(predicates) @@ -183,7 +277,7 @@ def select( if limit is not None: new_expr = new_expr.limit(limit) - return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) + return SQLGlotIR.from_expr(expr=new_expr, uid_gen=self.uid_gen) @classmethod def from_unparsed_query( @@ -201,7 +295,7 @@ def from_unparsed_query( ) select_expr = sge.Select().select(sge.Star()).from_(sge.Table(this=cte_name)) select_expr = _set_query_ctes(select_expr, [cte]) - return cls(expr=select_expr, uid_gen=uid_gen) + return cls.from_expr(expr=select_expr, uid_gen=uid_gen) @classmethod def from_union( @@ -214,21 +308,8 @@ def from_union( assert ( len(list(selects)) >= 2 ), f"At least two select expressions must be provided, but got {selects}." - - existing_ctes: list[sge.CTE] = [] - union_selects: list[sge.Select] = [] - for select in selects: - assert isinstance( - select, sge.Select - ), f"All provided expressions must be of type sge.Select, but got {type(select)}" - - select_expr = select.copy() - select_expr, select_ctes = _pop_query_ctes(select_expr) - existing_ctes = _merge_ctes(existing_ctes, select_ctes) - union_selects.append(select_expr) - - union_expr: sge.Query = union_selects[0].subquery() - for select in union_selects[1:]: + union_expr: sge.Query = selects[0].subquery() + for select in selects[1:]: union_expr = sge.Union( this=union_expr, expression=select.subquery(), @@ -246,8 +327,7 @@ def from_union( final_select_expr = ( sge.Select().select(*selections).from_(union_expr.subquery()) ) - final_select_expr = _set_query_ctes(final_select_expr, existing_ctes) - return cls(expr=final_select_expr, uid_gen=uid_gen) + return cls.from_expr(expr=final_select_expr, uid_gen=uid_gen) def join( self, @@ -258,12 +338,8 @@ def join( joins_nulls: bool = True, ) -> SQLGlotIR: """Joins the current query with another SQLGlotIR instance.""" - left_select, left_cte_name = self._select_to_cte() - right_select, right_cte_name = right._select_to_cte() - - left_select, left_ctes = _pop_query_ctes(left_select) - right_select, right_ctes = _pop_query_ctes(right_select) - merged_ctes = _merge_ctes(left_ctes, right_ctes) + left_from = self.expr.as_from_item() + right_from = right.expr.as_from_item() join_on = _and( tuple( @@ -272,15 +348,12 @@ def join( ) join_type_str = join_type if join_type != "outer" else "full outer" - new_expr = ( - sge.Select() - .select(sge.Star()) - .from_(sge.Table(this=left_cte_name)) - .join(sge.Table(this=right_cte_name), on=join_on, join_type=join_type_str) + return SQLGlotIR.from_func( + lambda select: select.from_(left_from).join( + right_from, on=join_on, join_type=join_type_str + ), + uid_gen=self.uid_gen, ) - new_expr = _set_query_ctes(new_expr, merged_ctes) - - return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) def isin_join( self, @@ -290,53 +363,48 @@ def isin_join( joins_nulls: bool = True, ) -> SQLGlotIR: """Joins the current query with another SQLGlotIR instance.""" - left_select, left_cte_name = self._select_to_cte() - # Prefer subquery over CTE for the IN clause's right side to improve SQL readability. - right_select = right._as_select() - - left_select, left_ctes = _pop_query_ctes(left_select) - right_select, right_ctes = _pop_query_ctes(right_select) - merged_ctes = _merge_ctes(left_ctes, right_ctes) - - left_condition = typed_expr.TypedExpr( - sge.Column(this=conditions[0].expr, table=left_cte_name), - conditions[0].dtype, - ) + left_from = self.expr.as_from_item() new_column: sge.Expression if joins_nulls: - right_table_name = sql.identifier(next(self.uid_gen.get_uid_stream("bft_"))) - right_condition = typed_expr.TypedExpr( - sge.Column(this=conditions[1].expr, table=right_table_name), - conditions[1].dtype, + part1_id = sql.identifier(next(self.uid_gen.get_uid_stream("bfpart1_"))) + part2_id = sql.identifier(next(self.uid_gen.get_uid_stream("bfpart2_"))) + left_expr1, left_expr2 = _value_to_non_null_identity(conditions[0]) + left_as_struct = sge.Struct( + expressions=[ + sge.PropertyEQ(this=part1_id, expression=left_expr1), + sge.PropertyEQ(this=part2_id, expression=left_expr2), + ] ) - new_column = sge.Exists( - this=sge.Select() - .select(sge.convert(1)) - .from_(sge.Alias(this=right_select.subquery(), alias=right_table_name)) - .where( - _join_condition(left_condition, right_condition, joins_nulls=True) - ) + right_expr1, right_expr2 = _value_to_non_null_identity(conditions[1]) + right_select = right.expr.select( + *[ + sge.Struct( + expressions=[ + sge.PropertyEQ(this=part1_id, expression=right_expr1), + sge.PropertyEQ(this=part2_id, expression=right_expr2), + ] + ) + ], ) - else: + new_column = sge.In( - this=left_condition.expr, + this=left_as_struct, expressions=[right_select.subquery()], ) + else: + new_column = sge.In( + this=conditions[0].expr, + expressions=[right._as_subquery()], + ) new_column = sge.Alias( this=new_column, alias=sql.identifier(indicator_col), ) - new_expr = ( - sge.Select() - .select(sge.Column(this=sge.Star(), table=left_cte_name), new_column) - .from_(sge.Table(this=left_cte_name)) - ) - new_expr = _set_query_ctes(new_expr, merged_ctes) - - return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) + new_expr = sge.Select().select(sge.Star(), new_column).from_(left_from) + return SQLGlotIR.from_expr(expr=new_expr, uid_gen=self.uid_gen) def explode( self, @@ -358,8 +426,8 @@ def sample(self, fraction: float) -> SQLGlotIR: expression=sql.literal(fraction, dtypes.FLOAT_DTYPE), ) - new_expr = self._select_to_cte()[0].where(condition, append=False) - return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) + new_expr = self.expr.as_select_all().where(condition, append=False) + return SQLGlotIR.from_expr(expr=new_expr, uid_gen=self.uid_gen) def aggregate( self, @@ -382,10 +450,7 @@ def aggregate( for id, expr in aggregations ] - new_expr, _ = self._select_to_cte() - new_expr = new_expr.group_by(*by_cols).select( - *[*by_cols, *aggregations_expr], append=False - ) + new_expr = self.expr.select(*[*by_cols, *aggregations_expr]).group_by(*by_cols) condition = _and( tuple( @@ -395,7 +460,21 @@ def aggregate( ) if condition is not None: new_expr = new_expr.where(condition, append=False) - return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) + return SQLGlotIR.from_expr(expr=new_expr, uid_gen=self.uid_gen) + + def with_ctes( + self, + ctes: tuple[tuple[str, SQLGlotIR], ...], + ) -> SQLGlotIR: + sge_ctes = [ + sge.CTE( + this=cte.expr.as_select_all(), + alias=sql.identifier(cte_name), + ) + for cte_name, cte in ctes + ] + select_expr = _set_query_ctes(self.expr.as_select_all(), sge_ctes) + return SQLGlotIR.from_expr(expr=select_expr, uid_gen=self.uid_gen) def resample( self, @@ -405,16 +484,12 @@ def resample( stop_expr: sge.Expression, step_expr: sge.Expression, ) -> SQLGlotIR: - # Get identifier for left and right by pushing them to CTEs - left_select, left_id = self._select_to_cte() - right_select, right_id = right._select_to_cte() - - # Extract all CTEs from the returned select expressions - _, left_ctes = _pop_query_ctes(left_select) - _, right_ctes = _pop_query_ctes(right_select) - merged_ctes = _merge_ctes(left_ctes, right_ctes) - - generate_array = sge.func("GENERATE_ARRAY", start_expr, stop_expr, step_expr) + generate_array = sge.func( + "GENERATE_ARRAY", + start_expr, + stop_expr, + step_expr, + ) unnested_column_alias = sql.identifier( next(self.uid_gen.get_uid_stream("bfcol_")) @@ -430,13 +505,12 @@ def resample( new_expr = ( sge.Select() .select(unnested_column_alias.as_(final_col_id)) - .from_(sge.Table(this=left_id)) - .join(sge.Table(this=right_id), join_type="cross") + .from_(self.expr.as_from_item()) + .join(right.expr.as_from_item(), join_type="cross") .join(unnest_expr, join_type="cross") ) - new_expr = _set_query_ctes(new_expr, merged_ctes) - return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) + return SQLGlotIR.from_expr(expr=new_expr, uid_gen=self.uid_gen) def _explode_single_column( self, column_name: str, offsets_col: typing.Optional[str] @@ -454,12 +528,9 @@ def _explode_single_column( ) selection = sge.Star(replace=[unnested_column_alias.as_(column)]) - new_expr, _ = self._select_to_cte() # Use LEFT JOIN to preserve rows when unnesting empty arrays. - new_expr = new_expr.select(selection, append=False).join( - unnest_expr, join_type="LEFT" - ) - return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) + new_expr = self.expr.select(selection).join(unnest_expr, join_type="LEFT") + return SQLGlotIR.from_expr(expr=new_expr, uid_gen=self.uid_gen) def _explode_multiple_columns( self, @@ -497,38 +568,13 @@ def _explode_multiple_columns( for column in columns ] ) - new_expr, _ = self._select_to_cte() # Use LEFT JOIN to preserve rows when unnesting empty arrays. - new_expr = new_expr.select(selection, append=False).join( - unnest_expr, join_type="LEFT" - ) - return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen) - - def _as_select(self) -> sge.Select: - if isinstance(self.expr, sge.Select): - return self.expr - else: # table - return sge.Select().from_(self.expr) + new_expr = self.expr.select(selection).join(unnest_expr, join_type="LEFT") + return SQLGlotIR.from_expr(expr=new_expr, uid_gen=self.uid_gen) def _as_subquery(self) -> sge.Subquery: - return self._as_select().subquery() - - def _select_to_cte(self) -> tuple[sge.Select, sge.Identifier]: - """Transforms a given sge.Select query by pushing its main SELECT statement - into a new CTE and then generates a 'SELECT * FROM new_cte_name' - for the new query.""" - cte_name = sql.identifier(next(self.uid_gen.get_uid_stream("bfcte_"))) - select_expr = self._as_select().copy() - select_expr, existing_ctes = _pop_query_ctes(select_expr) - new_cte = sge.CTE( - this=select_expr, - alias=cte_name, - ) - new_select_expr = ( - sge.Select().select(sge.Star()).from_(sge.Table(this=cte_name)) - ) - new_select_expr = _set_query_ctes(new_select_expr, [*existing_ctes, new_cte]) - return new_select_expr, cte_name + # Sometimes explicitly need a subquery, e.g. for IN expressions. + return self.expr.as_select_all().subquery() def _and(conditions: tuple[sge.Expression, ...]) -> typing.Optional[sge.Expression]: @@ -561,77 +607,48 @@ def _join_condition( joins_nulls: If True, generates complex logic to handle nulls/NaNs. Otherwise, uses a simple equality check where appropriate. """ - is_floating_types = ( - left.dtype == dtypes.FLOAT_DTYPE and right.dtype == dtypes.FLOAT_DTYPE - ) - if not is_floating_types and not joins_nulls: + if not joins_nulls: return sge.EQ(this=left.expr, expression=right.expr) - - is_numeric_types = dtypes.is_numeric( - left.dtype, include_bool=False - ) and dtypes.is_numeric(right.dtype, include_bool=False) - if is_numeric_types: - return _join_condition_for_numeric(left, right) - else: - return _join_condition_for_others(left, right) - - -def _join_condition_for_others( - left: typed_expr.TypedExpr, - right: typed_expr.TypedExpr, -) -> sge.And: - """Generates a join condition for non-numeric types to match pandas's - null-handling logic. - """ - left_str = sql.cast(left.expr, "STRING") - right_str = sql.cast(right.expr, "STRING") - left_0 = sge.func("COALESCE", left_str, sql.literal("0", dtypes.STRING_DTYPE)) - left_1 = sge.func("COALESCE", left_str, sql.literal("1", dtypes.STRING_DTYPE)) - right_0 = sge.func("COALESCE", right_str, sql.literal("0", dtypes.STRING_DTYPE)) - right_1 = sge.func("COALESCE", right_str, sql.literal("1", dtypes.STRING_DTYPE)) + left_expr1, left_expr2 = _value_to_non_null_identity(left) + right_expr1, right_expr2 = _value_to_non_null_identity(right) return sge.And( - this=sge.EQ(this=left_0, expression=right_0), - expression=sge.EQ(this=left_1, expression=right_1), + this=sge.EQ(this=left_expr1, expression=right_expr1), + expression=sge.EQ(this=left_expr2, expression=right_expr2), ) -def _join_condition_for_numeric( - left: typed_expr.TypedExpr, - right: typed_expr.TypedExpr, -) -> sge.And: - """Generates a join condition for non-numeric types to match pandas's - null-handling logic. Specifically for FLOAT types, Pandas treats NaN aren't - equal so need to coalesce as well with different constants. - """ - is_floating_types = ( - left.dtype == dtypes.FLOAT_DTYPE and right.dtype == dtypes.FLOAT_DTYPE - ) - left_0 = sge.func("COALESCE", left.expr, sql.literal(0, left.dtype)) - left_1 = sge.func("COALESCE", left.expr, sql.literal(1, left.dtype)) - right_0 = sge.func("COALESCE", right.expr, sql.literal(0, right.dtype)) - right_1 = sge.func("COALESCE", right.expr, sql.literal(1, right.dtype)) - if not is_floating_types: - return sge.And( - this=sge.EQ(this=left_0, expression=right_0), - expression=sge.EQ(this=left_1, expression=right_1), +def _value_to_non_null_identity( + value: typed_expr.TypedExpr, +) -> tuple[sge.Expression, sge.Expression]: + # normal_value -> (normal_value, normal_value) + # null_value -> (0, 1) + # nan_value -> (2, 3) + if dtypes.is_numeric(value.dtype, include_bool=False): + expr1 = sge.func("COALESCE", value.expr, sql.literal(0, value.dtype)) + expr2 = sge.func("COALESCE", value.expr, sql.literal(1, value.dtype)) + if value.dtype == dtypes.FLOAT_DTYPE: + expr1 = sge.If( + this=sge.IsNan(this=value.expr), + true=sql.literal(2, value.dtype), + false=expr1, + ) + expr2 = sge.If( + this=sge.IsNan(this=value.expr), + true=sql.literal(3, value.dtype), + false=expr2, + ) + else: # general case, convert to string and coalesce + expr1 = sge.func( + "COALESCE", + sql.cast(value.expr, "STRING"), + sql.literal("0", dtypes.STRING_DTYPE), ) - - left_2 = sge.If( - this=sge.IsNan(this=left.expr), true=sql.literal(2, left.dtype), false=left_0 - ) - left_3 = sge.If( - this=sge.IsNan(this=left.expr), true=sql.literal(3, left.dtype), false=left_1 - ) - right_2 = sge.If( - this=sge.IsNan(this=right.expr), true=sql.literal(2, right.dtype), false=right_0 - ) - right_3 = sge.If( - this=sge.IsNan(this=right.expr), true=sql.literal(3, right.dtype), false=right_1 - ) - return sge.And( - this=sge.EQ(this=left_2, expression=right_2), - expression=sge.EQ(this=left_3, expression=right_3), - ) + expr2 = sge.func( + "COALESCE", + sql.cast(value.expr, "STRING"), + sql.literal("1", dtypes.STRING_DTYPE), + ) + return expr1, expr2 def _set_query_ctes( @@ -649,26 +666,3 @@ def _set_query_ctes( else: raise ValueError("The expression does not support CTEs.") return new_expr - - -def _merge_ctes(ctes1: list[sge.CTE], ctes2: list[sge.CTE]) -> list[sge.CTE]: - """Merges two lists of CTEs, de-duplicating by alias name.""" - seen = {cte.alias: cte for cte in ctes1} - for cte in ctes2: - if cte.alias not in seen: - seen[cte.alias] = cte - return list(seen.values()) - - -def _pop_query_ctes( - expr: sge.Select, -) -> tuple[sge.Select, list[sge.CTE]]: - """Pops the CTEs of a given sge.Select expression.""" - if "with" in expr.arg_types.keys(): - expr_ctes = expr.args.pop("with", []) - return expr, expr_ctes - elif "with_" in expr.arg_types.keys(): - expr_ctes = expr.args.pop("with_", []) - return expr, expr_ctes - else: - raise ValueError("The expression does not support CTEs.") diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index 4b1efcb285..6071eaeaea 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -1713,6 +1713,39 @@ def _node_expressions(self): return tuple(ref for ref, _ in self.output_cols) +@dataclasses.dataclass(frozen=True, eq=False) +class CteNode(UnaryNode): + """ + Semantically a no-op, used to indicate shared subtrees and act as optimization boundary. + """ + + @property + def fields(self) -> Sequence[Field]: + return self.child.fields + + @property + def variables_introduced(self) -> int: + return 0 + + @property + def row_count(self) -> Optional[int]: + return self.child.row_count + + @property + def node_defined_ids(self) -> Tuple[identifiers.ColumnId, ...]: + return () + + def remap_vars( + self, mappings: Mapping[identifiers.ColumnId, identifiers.ColumnId] + ) -> CteNode: + return self + + def remap_refs( + self, mappings: Mapping[identifiers.ColumnId, identifiers.ColumnId] + ) -> CteNode: + return self + + # Tree operators def top_down( root: BigFrameNode, diff --git a/bigframes/core/rewrite/__init__.py b/bigframes/core/rewrite/__init__.py index a120612aae..5279418f5f 100644 --- a/bigframes/core/rewrite/__init__.py +++ b/bigframes/core/rewrite/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. from bigframes.core.rewrite.as_sql import as_sql_nodes +from bigframes.core.rewrite.ctes import extract_ctes from bigframes.core.rewrite.fold_row_count import fold_row_counts from bigframes.core.rewrite.identifiers import remap_variables from bigframes.core.rewrite.implicit_align import try_row_join @@ -34,6 +35,7 @@ __all__ = [ "as_sql_nodes", + "extract_ctes", "legacy_join_as_projection", "try_row_join", "rewrite_slice", diff --git a/bigframes/core/rewrite/as_sql.py b/bigframes/core/rewrite/as_sql.py index 32d677f75d..2082eaa068 100644 --- a/bigframes/core/rewrite/as_sql.py +++ b/bigframes/core/rewrite/as_sql.py @@ -14,11 +14,13 @@ from __future__ import annotations import dataclasses +import itertools from typing import Optional, Sequence, Union from bigframes.core import ( agg_expressions, expression, + guid, identifiers, nodes, ordering, @@ -222,6 +224,83 @@ def _as_sql_node(node: nodes.BigFrameNode) -> nodes.BigFrameNode: return node -def as_sql_nodes(root: nodes.BigFrameNode) -> nodes.BigFrameNode: - # TODO: Aggregations, Unions, Joins, raw data sources - return nodes.bottom_up(root, _as_sql_node) +# In the future, we will have sql nodes for each of these node types. +_LOGICAL_NODE_TYPES_TO_WRAP = ( + nodes.ReadLocalNode, + nodes.ExplodeNode, + nodes.InNode, + nodes.AggregateNode, + nodes.FromRangeNode, + nodes.ConcatNode, + sql_nodes.SqlSelectNode, +) + + +def _insert_cte_markers(root: nodes.BigFrameNode) -> nodes.BigFrameNode: + # important not to wrap nodes that are already wrapped + wrapped_nodes = set( + node.child for node in root.unique_nodes() if isinstance(node, nodes.CteNode) + ) + # don't wrap child nodes of ConcatNode + union_child_nodes = set( + itertools.chain.from_iterable( + node.child_nodes + for node in root.unique_nodes() + if isinstance(node, nodes.ConcatNode) + ) + ) + + def maybe_insert_cte_marker(node: nodes.BigFrameNode) -> nodes.BigFrameNode: + if node == root: + return node + if ( + isinstance(node, _LOGICAL_NODE_TYPES_TO_WRAP) + and node not in wrapped_nodes + and node not in union_child_nodes + ): + wrapped_nodes.add(node) + return nodes.CteNode(node) + return node + + return root.top_down(maybe_insert_cte_marker) + + +def _extract_ctes_to_with_expr( + root: nodes.BigFrameNode, uid_gen: guid.SequentialUIDGenerator +) -> nodes.BigFrameNode: + topological_ctes = list( + filter( + lambda n: isinstance(n, nodes.CteNode), + root.iter_nodes_topo(), + ) + ) + cte_names = tuple( + next(uid_gen.get_uid_stream("bfcte_")) for _ in range(len(topological_ctes)) + ) + + if len(topological_ctes) == 0: + return root + + mapping = { + cte_node: sql_nodes.SqlCteRefNode(cte_name, tuple(cte_node.fields)) + for cte_node, cte_name in zip(topological_ctes, cte_names) + } + + # Replace all CTEs with CTE references and wrap the new root in a WITH clause + return sql_nodes.SqlWithCtesNode( + root.top_down(lambda x: mapping.get(x, x)), + cte_names, + tuple( + cte_node.child.top_down(lambda x: mapping.get(x, x)) for cte_node in topological_ctes # type: ignore + ), + ) + + +def as_sql_nodes( + root: nodes.BigFrameNode, uid_gen: guid.SequentialUIDGenerator +) -> nodes.BigFrameNode: + root = nodes.bottom_up(root, _as_sql_node) + # Insert CTE markers to indicate where we want to split the query. + root = _insert_cte_markers(root) + root = _extract_ctes_to_with_expr(root, uid_gen) + return root diff --git a/bigframes/core/rewrite/ctes.py b/bigframes/core/rewrite/ctes.py new file mode 100644 index 0000000000..a5afd19bb3 --- /dev/null +++ b/bigframes/core/rewrite/ctes.py @@ -0,0 +1,41 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +from collections import defaultdict + +from bigframes.core import nodes + + +def extract_ctes(root: nodes.BigFrameNode) -> nodes.BigFrameNode: + # identify candidates + node_parents: dict[nodes.BigFrameNode, int] = defaultdict(int) + for parent in root.unique_nodes(): + for child in parent.child_nodes: + node_parents[child] += 1 + + # everywhere a multi-parent node is referenced, wrap it in a CTE node + def insert_cte_markers(node: nodes.BigFrameNode) -> nodes.BigFrameNode: + def _add_cte_if_needed(child: nodes.BigFrameNode) -> nodes.BigFrameNode: + if node_parents[child] > 1: + return nodes.CteNode(child) + return child + + if isinstance(node, nodes.CteNode): + # don't re-wrap CTE nodes + return node + + return node.transform_children(_add_cte_if_needed) + + return root.top_down(insert_cte_markers) diff --git a/bigframes/core/rewrite/identifiers.py b/bigframes/core/rewrite/identifiers.py index 8efcbb4a0b..fbedb4fa1e 100644 --- a/bigframes/core/rewrite/identifiers.py +++ b/bigframes/core/rewrite/identifiers.py @@ -13,13 +13,42 @@ # limitations under the License. from __future__ import annotations -import dataclasses import typing from bigframes.core import identifiers, nodes -# TODO: May as well just outright remove selection nodes in this process. +def _create_mapping_operator( + id_def_remapping_by_node: dict[ + nodes.BigFrameNode, dict[identifiers.ColumnId, identifiers.ColumnId] + ], + id_ref_remapping_by_node: dict[ + nodes.BigFrameNode, dict[identifiers.ColumnId, identifiers.ColumnId] + ], +): + """ + Builds a remapping operator that uses predefined local remappings for ids. + + Args: + id_remapping_by_node: A mapping from nodes to their local remappings. + + Returns: + A remapping operator. + """ + + def _mapping_operator(node: nodes.BigFrameNode) -> nodes.BigFrameNode: + # Step 1: Get the local remapping for the current node. + local_def_remaps = id_def_remapping_by_node[node] + local_ref_remaps = id_ref_remapping_by_node[node] + + result = node.remap_vars(local_def_remaps) + result = result.remap_refs(local_ref_remaps) + + return result + + return _mapping_operator + + def remap_variables( root: nodes.BigFrameNode, id_generator: typing.Iterator[identifiers.ColumnId], @@ -42,46 +71,47 @@ def remap_variables( A tuple of the new root node and a mapping from old to new column IDs visible to the parent node. """ - # Step 1: Recursively remap children to get their new nodes and ID mappings. - new_child_nodes: list[nodes.BigFrameNode] = [] - new_child_mappings: list[dict[identifiers.ColumnId, identifiers.ColumnId]] = [] - for child in root.child_nodes: - new_child, child_mappings = remap_variables(child, id_generator=id_generator) - new_child_nodes.append(new_child) - new_child_mappings.append(child_mappings) - - # Step 2: Transform children to use their new nodes. - remapped_children: dict[nodes.BigFrameNode, nodes.BigFrameNode] = { - child: new_child for child, new_child in zip(root.child_nodes, new_child_nodes) - } - new_root = root.transform_children(lambda node: remapped_children[node]) - - # Step 3: Transform the current node using the mappings from its children. - if isinstance(new_root, nodes.InNode): - new_root = typing.cast(nodes.InNode, new_root) - new_root = dataclasses.replace( - new_root, - left_col=new_root.left_col.remap_column_refs( - new_child_mappings[0], allow_partial_bindings=True - ), - ) - else: - downstream_mappings: dict[identifiers.ColumnId, identifiers.ColumnId] = { - k: v for mapping in new_child_mappings for k, v in mapping.items() - } - new_root = new_root.remap_refs(downstream_mappings) + # step 1: defined remappings for each individual unique node + # step 2: bottom up traversal to apply remappings - # Step 4: Create new IDs for columns defined by the current node. - node_defined_mappings = { - old_id: next(id_generator) for old_id in root.node_defined_ids - } - new_root = new_root.remap_vars(node_defined_mappings) + id_def_remaps: dict[ + nodes.BigFrameNode, dict[identifiers.ColumnId, identifiers.ColumnId] + ] = {} + id_ref_remaps: dict[ + nodes.BigFrameNode, dict[identifiers.ColumnId, identifiers.ColumnId] + ] = {} + for node in root.iter_nodes_topo(): # bottom up + local_def_remaps = { + col_id: next(id_generator) for col_id in node.node_defined_ids + } + id_def_remaps[node] = local_def_remaps - new_root._validate() + local_ref_remaps = {} - # Step 5: Determine which mappings to propagate up to the parent. - propagated_mappings = { - old_id: new_id for old_id, new_id in zip(root.ids, new_root.ids) - } + # InNode is special case as ID scope inherited purely from left side + inheriting_nodes = ( + [node.child_nodes[0]] + if isinstance(node, nodes.InNode) + else node.child_nodes + ) + for child in inheriting_nodes: # inherit ref and def mappings from children + if not child.defines_namespace: # these nodes represent new id spaces + local_ref_remaps.update( + { + old_id: new_id + for old_id, new_id in id_ref_remaps[child].items() + if old_id in child.ids + } + ) + local_ref_remaps.update(id_def_remaps[child]) + id_ref_remaps[node] = local_ref_remaps - return new_root, propagated_mappings + # have to do top down to preserve node identities + return ( + root.top_down(_create_mapping_operator(id_def_remaps, id_ref_remaps)), + # Only used by unit tests + { + old_id: (id_def_remaps[root] | id_ref_remaps[root])[old_id] + for old_id in root.ids + }, + ) diff --git a/bigframes/core/rewrite/pruning.py b/bigframes/core/rewrite/pruning.py index 7695ace3b3..60400821c6 100644 --- a/bigframes/core/rewrite/pruning.py +++ b/bigframes/core/rewrite/pruning.py @@ -67,7 +67,7 @@ def prune_selection_child( # Important to check this first if list(selection.ids) == list(child.ids): - if (ref.ref.id == ref.id for ref in selection.input_output_pairs): + if all(ref.ref.id == ref.id for ref in selection.input_output_pairs): # selection is no-op so just remove it entirely return child @@ -75,6 +75,7 @@ def prune_selection_child( return selection.remap_refs( {id: ref.id for ref, id in child.input_output_pairs} ).replace_child(child.child) + elif isinstance(child, nodes.AdditiveNode): if not set(field.id for field in child.added_fields) & selection.consumed_ids: return selection.replace_child(child.additive_base) diff --git a/bigframes/core/sql_nodes.py b/bigframes/core/sql_nodes.py index 5d921de7ae..45048dc2b1 100644 --- a/bigframes/core/sql_nodes.py +++ b/bigframes/core/sql_nodes.py @@ -16,13 +16,18 @@ import dataclasses import functools -from typing import Mapping, Optional, Sequence, Tuple +from typing import Callable, Mapping, Optional, Sequence, Tuple from bigframes.core import bq_data, identifiers, nodes import bigframes.core.expression as ex from bigframes.core.ordering import OrderingExpression import bigframes.dtypes +# SQL Nodes are generally terminal, so don't support rich transformation methods +# like remap_vars, remap_refs, etc. +# Still, fields should be defined on them, as typing info is still used for +# dispatching some operators in the emitter, and for validation. + # TODO: Join node, union node @dataclasses.dataclass(frozen=True) @@ -84,6 +89,127 @@ def remap_refs( raise NotImplementedError() # type: ignore +@dataclasses.dataclass(frozen=True) +class SqlWithCtesNode(nodes.BigFrameNode): + # def, name pairs + child: nodes.BigFrameNode + cte_names: tuple[str, ...] + cte_defs: tuple[nodes.BigFrameNode, ...] + + @property + def child_nodes(self) -> Sequence[nodes.BigFrameNode]: + return (self.child, *self.cte_defs) + + @property + def fields(self) -> Sequence[nodes.Field]: + return self.child.fields + + @property + def variables_introduced(self) -> int: + # This operation only renames variables, doesn't actually create new ones + return 0 + + @property + def defines_namespace(self) -> bool: + return True + + @property + def explicitly_ordered(self) -> bool: + return False + + @property + def order_ambiguous(self) -> bool: + return True + + @property + def row_count(self) -> Optional[int]: + return self.child.row_count + + @property + def node_defined_ids(self) -> Tuple[identifiers.ColumnId, ...]: + return tuple(self.ids) + + @property + def consumed_ids(self): + return () + + @property + def _node_expressions(self): + return () + + def remap_vars( + self, mappings: Mapping[identifiers.ColumnId, identifiers.ColumnId] + ) -> SqlWithCtesNode: + raise NotImplementedError() + + def remap_refs( + self, mappings: Mapping[identifiers.ColumnId, identifiers.ColumnId] + ) -> SqlWithCtesNode: + raise NotImplementedError() # type: ignore + + def transform_children( + self, transform: Callable[[nodes.BigFrameNode], nodes.BigFrameNode] + ) -> SqlWithCtesNode: + return SqlWithCtesNode( + transform(self.child), + self.cte_names, + tuple(transform(cte) for cte in self.cte_defs), + ) + + +@dataclasses.dataclass(frozen=True) +class SqlCteRefNode(nodes.LeafNode): + cte_name: str + cte_schema: tuple[nodes.Field, ...] + + @property + def fields(self) -> Sequence[nodes.Field]: + return self.cte_schema + + @property + def variables_introduced(self) -> int: + # This operation only renames variables, doesn't actually create new ones + return 0 + + @property + def defines_namespace(self) -> bool: + return True + + @property + def explicitly_ordered(self) -> bool: + return False + + @property + def order_ambiguous(self) -> bool: + return True + + @property + def row_count(self) -> Optional[int]: + raise NotImplementedError() + + @property + def node_defined_ids(self) -> Tuple[identifiers.ColumnId, ...]: + return tuple(self.ids) + + @property + def consumed_ids(self): + return () + + @property + def _node_expressions(self): + return () + + def remap_vars( + self, mappings: Mapping[identifiers.ColumnId, identifiers.ColumnId] + ) -> SqlCteRefNode: + raise NotImplementedError() + + def remap_refs( + self, mappings: Mapping[identifiers.ColumnId, identifiers.ColumnId] + ) -> SqlCteRefNode: + raise NotImplementedError() # type: ignore + + @dataclasses.dataclass(frozen=True) class SqlSelectNode(nodes.UnaryNode): selections: tuple[nodes.ColumnDef, ...] = () diff --git a/bigframes/session/bq_caching_executor.py b/bigframes/session/bq_caching_executor.py index cd1642f6de..1e240a841c 100644 --- a/bigframes/session/bq_caching_executor.py +++ b/bigframes/session/bq_caching_executor.py @@ -308,10 +308,10 @@ def _export_gbq( ir = sqlglot_ir.SQLGlotIR.from_unparsed_query(sql) if spec.if_exists == "append": - sql = sg_sql.to_sql(sg_sql.insert(ir.expr, spec.table)) + sql = sg_sql.to_sql(sg_sql.insert(ir.expr.as_select_all(), spec.table)) else: # for "replace" assert spec.if_exists == "replace" - sql = sg_sql.to_sql(sg_sql.replace(ir.expr, spec.table)) + sql = sg_sql.to_sql(sg_sql.replace(ir.expr.as_select_all(), spec.table)) else: dispositions = { "fail": bigquery.WriteDisposition.WRITE_EMPTY, diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat/out.sql index 3b0f9f0633..4861435786 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat/out.sql @@ -1,41 +1,48 @@ WITH `bfcte_0` AS ( SELECT - `bfcol_9` AS `bfcol_30`, - `bfcol_10` AS `bfcol_31`, - `bfcol_11` AS `bfcol_32`, - `bfcol_12` AS `bfcol_33`, - `bfcol_13` AS `bfcol_34`, - `bfcol_14` AS `bfcol_35` + `rowindex` AS `bfcol_3`, + `rowindex` AS `bfcol_4`, + `int64_col` AS `bfcol_5`, + `string_col` AS `bfcol_6` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` +), `bfcte_1` AS ( + SELECT + `bfcol_17` AS `bfcol_23`, + `bfcol_18` AS `bfcol_24`, + `bfcol_19` AS `bfcol_25`, + `bfcol_20` AS `bfcol_26`, + `bfcol_21` AS `bfcol_27`, + `bfcol_22` AS `bfcol_28` FROM ( ( SELECT - `rowindex` AS `bfcol_9`, - `rowindex` AS `bfcol_10`, - `int64_col` AS `bfcol_11`, - `string_col` AS `bfcol_12`, - 0 AS `bfcol_13`, - ROW_NUMBER() OVER () - 1 AS `bfcol_14` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` + `bfcol_3` AS `bfcol_17`, + `bfcol_4` AS `bfcol_18`, + `bfcol_5` AS `bfcol_19`, + `bfcol_6` AS `bfcol_20`, + 0 AS `bfcol_21`, + ROW_NUMBER() OVER () - 1 AS `bfcol_22` + FROM `bfcte_0` ) UNION ALL ( SELECT - `rowindex` AS `bfcol_24`, - `rowindex` AS `bfcol_25`, - `int64_col` AS `bfcol_26`, - `string_col` AS `bfcol_27`, - 1 AS `bfcol_28`, - ROW_NUMBER() OVER () - 1 AS `bfcol_29` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` + `bfcol_3` AS `bfcol_11`, + `bfcol_4` AS `bfcol_12`, + `bfcol_5` AS `bfcol_13`, + `bfcol_6` AS `bfcol_14`, + 1 AS `bfcol_15`, + ROW_NUMBER() OVER () - 1 AS `bfcol_16` + FROM `bfcte_0` ) ) ) SELECT - `bfcol_30` AS `rowindex`, - `bfcol_31` AS `rowindex_1`, - `bfcol_32` AS `int64_col`, - `bfcol_33` AS `string_col` -FROM `bfcte_0` + `bfcol_23` AS `rowindex`, + `bfcol_24` AS `rowindex_1`, + `bfcol_25` AS `int64_col`, + `bfcol_26` AS `string_col` +FROM `bfcte_1` ORDER BY - `bfcol_34` ASC NULLS LAST, - `bfcol_35` ASC NULLS LAST \ No newline at end of file + `bfcol_27` ASC NULLS LAST, + `bfcol_28` ASC NULLS LAST \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql index a18d6998d4..477a47036a 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql @@ -1,55 +1,63 @@ WITH `bfcte_0` AS ( SELECT - `bfcol_6` AS `bfcol_42`, - `bfcol_7` AS `bfcol_43`, - `bfcol_8` AS `bfcol_44`, - `bfcol_9` AS `bfcol_45` + `float64_col` AS `bfcol_7`, + `int64_too` AS `bfcol_8` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` + WHERE + `bool_col` +), `bfcte_1` AS ( + SELECT + `float64_col` AS `bfcol_5`, + `int64_col` AS `bfcol_6` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` +), `bfcte_2` AS ( + SELECT + `bfcol_21` AS `bfcol_33`, + `bfcol_22` AS `bfcol_34`, + `bfcol_23` AS `bfcol_35`, + `bfcol_24` AS `bfcol_36` FROM ( ( SELECT - `float64_col` AS `bfcol_6`, - `int64_col` AS `bfcol_7`, - 0 AS `bfcol_8`, - ROW_NUMBER() OVER (ORDER BY `int64_col` ASC NULLS LAST) - 1 AS `bfcol_9` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` + `bfcol_5` AS `bfcol_21`, + `bfcol_6` AS `bfcol_22`, + 0 AS `bfcol_23`, + ROW_NUMBER() OVER (ORDER BY `bfcol_6` ASC NULLS LAST) - 1 AS `bfcol_24` + FROM `bfcte_1` ) UNION ALL ( SELECT - `float64_col` AS `bfcol_17`, - `int64_too` AS `bfcol_18`, - 1 AS `bfcol_19`, - ROW_NUMBER() OVER () - 1 AS `bfcol_20` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` - WHERE - `bool_col` + `bfcol_7` AS `bfcol_29`, + `bfcol_8` AS `bfcol_30`, + 1 AS `bfcol_31`, + ROW_NUMBER() OVER () - 1 AS `bfcol_32` + FROM `bfcte_0` ) UNION ALL ( SELECT - `float64_col` AS `bfcol_27`, - `int64_col` AS `bfcol_28`, - 2 AS `bfcol_29`, - ROW_NUMBER() OVER (ORDER BY `int64_col` ASC NULLS LAST) - 1 AS `bfcol_30` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` + `bfcol_5` AS `bfcol_17`, + `bfcol_6` AS `bfcol_18`, + 2 AS `bfcol_19`, + ROW_NUMBER() OVER (ORDER BY `bfcol_6` ASC NULLS LAST) - 1 AS `bfcol_20` + FROM `bfcte_1` ) UNION ALL ( SELECT - `float64_col` AS `bfcol_38`, - `int64_too` AS `bfcol_39`, - 3 AS `bfcol_40`, - ROW_NUMBER() OVER () - 1 AS `bfcol_41` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` - WHERE - `bool_col` + `bfcol_7` AS `bfcol_25`, + `bfcol_8` AS `bfcol_26`, + 3 AS `bfcol_27`, + ROW_NUMBER() OVER () - 1 AS `bfcol_28` + FROM `bfcte_0` ) ) ) SELECT - `bfcol_42` AS `float64_col`, - `bfcol_43` AS `int64_col` -FROM `bfcte_0` + `bfcol_33` AS `float64_col`, + `bfcol_34` AS `int64_col` +FROM `bfcte_2` ORDER BY - `bfcol_44` ASC NULLS LAST, - `bfcol_45` ASC NULLS LAST \ No newline at end of file + `bfcol_35` ASC NULLS LAST, + `bfcol_36` ASC NULLS LAST \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql index 47455a292b..0b0e07056a 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_fromrange/test_compile_fromrange/out.sql @@ -1,165 +1,75 @@ -WITH `bfcte_6` AS ( +WITH `bfcte_0` AS ( SELECT * FROM UNNEST(ARRAY>[STRUCT(CAST('2021-01-01T13:00:00' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:01' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:02' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:03' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:04' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:05' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:06' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:07' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:08' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:09' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:10' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:11' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:12' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:13' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:14' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:15' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:16' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:17' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:18' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:19' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:20' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:21' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:22' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:23' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:24' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:25' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:26' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:27' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:28' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:29' AS DATETIME))]) -), `bfcte_15` AS ( - SELECT - `bfcol_0` AS `bfcol_1` - FROM `bfcte_6` -), `bfcte_5` AS ( +), `bfcte_1` AS ( SELECT * - FROM UNNEST(ARRAY>[STRUCT(CAST('2021-01-01T13:00:00' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:01' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:02' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:03' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:04' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:05' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:06' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:07' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:08' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:09' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:10' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:11' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:12' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:13' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:14' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:15' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:16' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:17' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:18' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:19' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:20' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:21' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:22' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:23' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:24' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:25' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:26' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:27' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:28' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:29' AS DATETIME))]) -), `bfcte_10` AS ( + FROM UNNEST(ARRAY>[STRUCT(CAST('2021-01-01T13:00:00' AS DATETIME), 0, 10), STRUCT(CAST('2021-01-01T13:00:01' AS DATETIME), 1, 11), STRUCT(CAST('2021-01-01T13:00:02' AS DATETIME), 2, 12), STRUCT(CAST('2021-01-01T13:00:03' AS DATETIME), 3, 13), STRUCT(CAST('2021-01-01T13:00:04' AS DATETIME), 4, 14), STRUCT(CAST('2021-01-01T13:00:05' AS DATETIME), 5, 15), STRUCT(CAST('2021-01-01T13:00:06' AS DATETIME), 6, 16), STRUCT(CAST('2021-01-01T13:00:07' AS DATETIME), 7, 17), STRUCT(CAST('2021-01-01T13:00:08' AS DATETIME), 8, 18), STRUCT(CAST('2021-01-01T13:00:09' AS DATETIME), 9, 19), STRUCT(CAST('2021-01-01T13:00:10' AS DATETIME), 10, 20), STRUCT(CAST('2021-01-01T13:00:11' AS DATETIME), 11, 21), STRUCT(CAST('2021-01-01T13:00:12' AS DATETIME), 12, 22), STRUCT(CAST('2021-01-01T13:00:13' AS DATETIME), 13, 23), STRUCT(CAST('2021-01-01T13:00:14' AS DATETIME), 14, 24), STRUCT(CAST('2021-01-01T13:00:15' AS DATETIME), 15, 25), STRUCT(CAST('2021-01-01T13:00:16' AS DATETIME), 16, 26), STRUCT(CAST('2021-01-01T13:00:17' AS DATETIME), 17, 27), STRUCT(CAST('2021-01-01T13:00:18' AS DATETIME), 18, 28), STRUCT(CAST('2021-01-01T13:00:19' AS DATETIME), 19, 29), STRUCT(CAST('2021-01-01T13:00:20' AS DATETIME), 20, 30), STRUCT(CAST('2021-01-01T13:00:21' AS DATETIME), 21, 31), STRUCT(CAST('2021-01-01T13:00:22' AS DATETIME), 22, 32), STRUCT(CAST('2021-01-01T13:00:23' AS DATETIME), 23, 33), STRUCT(CAST('2021-01-01T13:00:24' AS DATETIME), 24, 34), STRUCT(CAST('2021-01-01T13:00:25' AS DATETIME), 25, 35), STRUCT(CAST('2021-01-01T13:00:26' AS DATETIME), 26, 36), STRUCT(CAST('2021-01-01T13:00:27' AS DATETIME), 27, 37), STRUCT(CAST('2021-01-01T13:00:28' AS DATETIME), 28, 38), STRUCT(CAST('2021-01-01T13:00:29' AS DATETIME), 29, 39)]) +), `bfcte_2` AS ( SELECT - MIN(`bfcol_2`) AS `bfcol_4` - FROM `bfcte_5` -), `bfcte_16` AS ( + `bfcol_0` AS `bfcol_4` + FROM `bfcte_0` +), `bfcte_3` AS ( SELECT - * - FROM `bfcte_10` -), `bfcte_19` AS ( + `bfcol_1` AS `bfcol_5`, + `bfcol_2` AS `bfcol_6`, + `bfcol_3` AS `bfcol_7` + FROM `bfcte_1` +), `bfcte_4` AS ( SELECT - * - FROM `bfcte_15` - CROSS JOIN `bfcte_16` -), `bfcte_21` AS ( + MIN(`bfcol_4`) AS `bfcol_8` + FROM `bfcte_2` +), `bfcte_5` AS ( SELECT - `bfcol_1`, - `bfcol_4`, + `bfcol_6` AS `bfcol_11`, + `bfcol_7` AS `bfcol_12`, CAST(FLOOR( IEEE_DIVIDE( - UNIX_MICROS(CAST(`bfcol_1` AS TIMESTAMP)) - UNIX_MICROS(CAST(CAST(`bfcol_4` AS DATE) AS TIMESTAMP)), + UNIX_MICROS(CAST(`bfcol_5` AS TIMESTAMP)) - UNIX_MICROS(CAST(CAST(`bfcol_8` AS DATE) AS TIMESTAMP)), 7000000 ) - ) AS INT64) AS `bfcol_5` - FROM `bfcte_19` -), `bfcte_23` AS ( - SELECT - MIN(`bfcol_5`) AS `bfcol_7` - FROM `bfcte_21` -), `bfcte_24` AS ( - SELECT - * - FROM `bfcte_23` -), `bfcte_4` AS ( - SELECT - * - FROM UNNEST(ARRAY>[STRUCT(CAST('2021-01-01T13:00:00' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:01' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:02' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:03' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:04' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:05' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:06' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:07' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:08' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:09' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:10' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:11' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:12' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:13' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:14' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:15' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:16' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:17' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:18' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:19' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:20' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:21' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:22' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:23' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:24' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:25' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:26' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:27' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:28' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:29' AS DATETIME))]) -), `bfcte_13` AS ( - SELECT - `bfcol_8` AS `bfcol_9` - FROM `bfcte_4` -), `bfcte_3` AS ( - SELECT - * - FROM UNNEST(ARRAY>[STRUCT(0, CAST('2021-01-01T13:00:00' AS DATETIME), 0, 10), STRUCT(1, CAST('2021-01-01T13:00:01' AS DATETIME), 1, 11), STRUCT(2, CAST('2021-01-01T13:00:02' AS DATETIME), 2, 12), STRUCT(3, CAST('2021-01-01T13:00:03' AS DATETIME), 3, 13), STRUCT(4, CAST('2021-01-01T13:00:04' AS DATETIME), 4, 14), STRUCT(5, CAST('2021-01-01T13:00:05' AS DATETIME), 5, 15), STRUCT(6, CAST('2021-01-01T13:00:06' AS DATETIME), 6, 16), STRUCT(7, CAST('2021-01-01T13:00:07' AS DATETIME), 7, 17), STRUCT(8, CAST('2021-01-01T13:00:08' AS DATETIME), 8, 18), STRUCT(9, CAST('2021-01-01T13:00:09' AS DATETIME), 9, 19), STRUCT(10, CAST('2021-01-01T13:00:10' AS DATETIME), 10, 20), STRUCT(11, CAST('2021-01-01T13:00:11' AS DATETIME), 11, 21), STRUCT(12, CAST('2021-01-01T13:00:12' AS DATETIME), 12, 22), STRUCT(13, CAST('2021-01-01T13:00:13' AS DATETIME), 13, 23), STRUCT(14, CAST('2021-01-01T13:00:14' AS DATETIME), 14, 24), STRUCT(15, CAST('2021-01-01T13:00:15' AS DATETIME), 15, 25), STRUCT(16, CAST('2021-01-01T13:00:16' AS DATETIME), 16, 26), STRUCT(17, CAST('2021-01-01T13:00:17' AS DATETIME), 17, 27), STRUCT(18, CAST('2021-01-01T13:00:18' AS DATETIME), 18, 28), STRUCT(19, CAST('2021-01-01T13:00:19' AS DATETIME), 19, 29), STRUCT(20, CAST('2021-01-01T13:00:20' AS DATETIME), 20, 30), STRUCT(21, CAST('2021-01-01T13:00:21' AS DATETIME), 21, 31), STRUCT(22, CAST('2021-01-01T13:00:22' AS DATETIME), 22, 32), STRUCT(23, CAST('2021-01-01T13:00:23' AS DATETIME), 23, 33), STRUCT(24, CAST('2021-01-01T13:00:24' AS DATETIME), 24, 34), STRUCT(25, CAST('2021-01-01T13:00:25' AS DATETIME), 25, 35), STRUCT(26, CAST('2021-01-01T13:00:26' AS DATETIME), 26, 36), STRUCT(27, CAST('2021-01-01T13:00:27' AS DATETIME), 27, 37), STRUCT(28, CAST('2021-01-01T13:00:28' AS DATETIME), 28, 38), STRUCT(29, CAST('2021-01-01T13:00:29' AS DATETIME), 29, 39)]) -), `bfcte_9` AS ( - SELECT - MIN(`bfcol_11`) AS `bfcol_37` + ) AS INT64) AS `bfcol_13` FROM `bfcte_3` -), `bfcte_14` AS ( + CROSS JOIN `bfcte_4` +), `bfcte_6` AS ( SELECT - * - FROM `bfcte_9` -), `bfcte_18` AS ( - SELECT - * - FROM `bfcte_13` - CROSS JOIN `bfcte_14` -), `bfcte_20` AS ( - SELECT - `bfcol_9`, - `bfcol_37`, CAST(FLOOR( IEEE_DIVIDE( - UNIX_MICROS(CAST(`bfcol_9` AS TIMESTAMP)) - UNIX_MICROS(CAST(CAST(`bfcol_37` AS DATE) AS TIMESTAMP)), + UNIX_MICROS(CAST(`bfcol_4` AS TIMESTAMP)) - UNIX_MICROS(CAST(CAST(`bfcol_8` AS DATE) AS TIMESTAMP)), 7000000 ) - ) AS INT64) AS `bfcol_38` - FROM `bfcte_18` -), `bfcte_22` AS ( - SELECT - MAX(`bfcol_38`) AS `bfcol_40` - FROM `bfcte_20` -), `bfcte_25` AS ( - SELECT - * - FROM `bfcte_22` -), `bfcte_26` AS ( - SELECT - `bfcol_67` AS `bfcol_41` - FROM `bfcte_24` - CROSS JOIN `bfcte_25` - CROSS JOIN UNNEST(GENERATE_ARRAY(`bfcol_7`, `bfcol_40`, 1)) AS `bfcol_67` -), `bfcte_2` AS ( - SELECT - * - FROM UNNEST(ARRAY>[STRUCT(CAST('2021-01-01T13:00:00' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:01' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:02' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:03' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:04' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:05' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:06' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:07' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:08' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:09' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:10' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:11' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:12' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:13' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:14' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:15' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:16' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:17' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:18' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:19' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:20' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:21' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:22' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:23' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:24' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:25' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:26' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:27' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:28' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:29' AS DATETIME))]) -), `bfcte_8` AS ( - SELECT - MIN(`bfcol_42`) AS `bfcol_44` + ) AS INT64) AS `bfcol_14` FROM `bfcte_2` -), `bfcte_27` AS ( - SELECT - * - FROM `bfcte_8` -), `bfcte_28` AS ( - SELECT - * - FROM `bfcte_26` - CROSS JOIN `bfcte_27` -), `bfcte_1` AS ( - SELECT - * - FROM UNNEST(ARRAY>[STRUCT(CAST('2021-01-01T13:00:00' AS DATETIME), 0, 10), STRUCT(CAST('2021-01-01T13:00:01' AS DATETIME), 1, 11), STRUCT(CAST('2021-01-01T13:00:02' AS DATETIME), 2, 12), STRUCT(CAST('2021-01-01T13:00:03' AS DATETIME), 3, 13), STRUCT(CAST('2021-01-01T13:00:04' AS DATETIME), 4, 14), STRUCT(CAST('2021-01-01T13:00:05' AS DATETIME), 5, 15), STRUCT(CAST('2021-01-01T13:00:06' AS DATETIME), 6, 16), STRUCT(CAST('2021-01-01T13:00:07' AS DATETIME), 7, 17), STRUCT(CAST('2021-01-01T13:00:08' AS DATETIME), 8, 18), STRUCT(CAST('2021-01-01T13:00:09' AS DATETIME), 9, 19), STRUCT(CAST('2021-01-01T13:00:10' AS DATETIME), 10, 20), STRUCT(CAST('2021-01-01T13:00:11' AS DATETIME), 11, 21), STRUCT(CAST('2021-01-01T13:00:12' AS DATETIME), 12, 22), STRUCT(CAST('2021-01-01T13:00:13' AS DATETIME), 13, 23), STRUCT(CAST('2021-01-01T13:00:14' AS DATETIME), 14, 24), STRUCT(CAST('2021-01-01T13:00:15' AS DATETIME), 15, 25), STRUCT(CAST('2021-01-01T13:00:16' AS DATETIME), 16, 26), STRUCT(CAST('2021-01-01T13:00:17' AS DATETIME), 17, 27), STRUCT(CAST('2021-01-01T13:00:18' AS DATETIME), 18, 28), STRUCT(CAST('2021-01-01T13:00:19' AS DATETIME), 19, 29), STRUCT(CAST('2021-01-01T13:00:20' AS DATETIME), 20, 30), STRUCT(CAST('2021-01-01T13:00:21' AS DATETIME), 21, 31), STRUCT(CAST('2021-01-01T13:00:22' AS DATETIME), 22, 32), STRUCT(CAST('2021-01-01T13:00:23' AS DATETIME), 23, 33), STRUCT(CAST('2021-01-01T13:00:24' AS DATETIME), 24, 34), STRUCT(CAST('2021-01-01T13:00:25' AS DATETIME), 25, 35), STRUCT(CAST('2021-01-01T13:00:26' AS DATETIME), 26, 36), STRUCT(CAST('2021-01-01T13:00:27' AS DATETIME), 27, 37), STRUCT(CAST('2021-01-01T13:00:28' AS DATETIME), 28, 38), STRUCT(CAST('2021-01-01T13:00:29' AS DATETIME), 29, 39)]) -), `bfcte_11` AS ( - SELECT - `bfcol_45` AS `bfcol_48`, - `bfcol_46` AS `bfcol_49`, - `bfcol_47` AS `bfcol_50` - FROM `bfcte_1` -), `bfcte_0` AS ( - SELECT - * - FROM UNNEST(ARRAY>[STRUCT(CAST('2021-01-01T13:00:00' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:01' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:02' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:03' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:04' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:05' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:06' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:07' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:08' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:09' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:10' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:11' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:12' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:13' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:14' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:15' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:16' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:17' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:18' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:19' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:20' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:21' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:22' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:23' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:24' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:25' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:26' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:27' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:28' AS DATETIME)), STRUCT(CAST('2021-01-01T13:00:29' AS DATETIME))]) + CROSS JOIN `bfcte_4` ), `bfcte_7` AS ( SELECT - MIN(`bfcol_51`) AS `bfcol_53` - FROM `bfcte_0` -), `bfcte_12` AS ( - SELECT - * - FROM `bfcte_7` -), `bfcte_17` AS ( - SELECT - * - FROM `bfcte_11` - CROSS JOIN `bfcte_12` -), `bfcte_29` AS ( + MAX(`bfcol_14`) AS `bfcol_15` + FROM `bfcte_6` +), `bfcte_8` AS ( SELECT - `bfcol_49` AS `bfcol_55`, - `bfcol_50` AS `bfcol_56`, - CAST(FLOOR( - IEEE_DIVIDE( - UNIX_MICROS(CAST(`bfcol_48` AS TIMESTAMP)) - UNIX_MICROS(CAST(CAST(`bfcol_53` AS DATE) AS TIMESTAMP)), - 7000000 - ) - ) AS INT64) AS `bfcol_57` - FROM `bfcte_17` -), `bfcte_30` AS ( + MIN(`bfcol_14`) AS `bfcol_16` + FROM `bfcte_6` +), `bfcte_9` AS ( SELECT - * - FROM `bfcte_28` - LEFT JOIN `bfcte_29` - ON `bfcol_41` = `bfcol_57` + `bfcol_27` AS `bfcol_17` + FROM `bfcte_8` + CROSS JOIN `bfcte_7` + CROSS JOIN UNNEST(GENERATE_ARRAY(`bfcol_16`, `bfcol_15`, 1)) AS `bfcol_27` ) SELECT CAST(TIMESTAMP_MICROS( - CAST(CAST(`bfcol_41` AS BIGNUMERIC) * 7000000 + CAST(UNIX_MICROS(CAST(CAST(`bfcol_44` AS DATE) AS TIMESTAMP)) AS BIGNUMERIC) AS INT64) + CAST(CAST(`bfcol_17` AS BIGNUMERIC) * 7000000 + CAST(UNIX_MICROS(CAST(CAST(`bfcol_8` AS DATE) AS TIMESTAMP)) AS BIGNUMERIC) AS INT64) ) AS DATETIME) AS `bigframes_unnamed_index`, - `bfcol_55` AS `int64_col`, - `bfcol_56` AS `int64_too` -FROM `bfcte_30` + `bfcol_11` AS `int64_col`, + `bfcol_12` AS `int64_too` +FROM ( + SELECT + * + FROM `bfcte_9` + CROSS JOIN `bfcte_4` +) +LEFT JOIN `bfcte_5` + ON `bfcol_17` = `bfcol_13` ORDER BY - `bfcol_41` ASC NULLS LAST \ No newline at end of file + `bfcol_17` ASC NULLS LAST \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin/out.sql index d80febf41c..416d6a47df 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin/out.sql @@ -1,13 +1,13 @@ -WITH `bfcte_2` AS ( - SELECT - `rowindex` AS `bfcol_2`, - `int64_col` AS `bfcol_3` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` -), `bfcte_0` AS ( +WITH `bfcte_0` AS ( SELECT `int64_too` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` ), `bfcte_1` AS ( + SELECT + `rowindex` AS `bfcol_3`, + `int64_col` AS `bfcol_4` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` +), `bfcte_2` AS ( SELECT `int64_too` FROM `bfcte_0` @@ -15,22 +15,21 @@ WITH `bfcte_2` AS ( `int64_too` ), `bfcte_3` AS ( SELECT - `bfcte_2`.*, - EXISTS( - SELECT - 1 - FROM ( + `int64_too` AS `bfcol_0` + FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + STRUCT(COALESCE(`bfcol_4`, 0) AS `bfpart1_0`, COALESCE(`bfcol_4`, 1) AS `bfpart2_0`) IN ( + ( SELECT - `int64_too` AS `bfcol_4` - FROM `bfcte_1` - ) AS `bft_1` - WHERE - COALESCE(`bfcte_2`.`bfcol_3`, 0) = COALESCE(`bft_1`.`bfcol_4`, 0) - AND COALESCE(`bfcte_2`.`bfcol_3`, 1) = COALESCE(`bft_1`.`bfcol_4`, 1) + STRUCT(COALESCE(`bfcol_0`, 0) AS `bfpart1_0`, COALESCE(`bfcol_0`, 1) AS `bfpart2_0`) + FROM `bfcte_3` + ) ) AS `bfcol_5` - FROM `bfcte_2` + FROM `bfcte_1` ) SELECT - `bfcol_2` AS `rowindex`, + `bfcol_3` AS `rowindex`, `bfcol_5` AS `int64_col` -FROM `bfcte_3` \ No newline at end of file +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin_not_nullable/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin_not_nullable/out.sql index 2b2735b163..cc1633d3a3 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin_not_nullable/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_isin/test_compile_isin_not_nullable/out.sql @@ -1,13 +1,13 @@ -WITH `bfcte_2` AS ( - SELECT - `rowindex` AS `bfcol_2`, - `rowindex_2` AS `bfcol_3` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` -), `bfcte_0` AS ( +WITH `bfcte_0` AS ( SELECT `rowindex_2` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` ), `bfcte_1` AS ( + SELECT + `rowindex` AS `bfcol_3`, + `rowindex_2` AS `bfcol_4` + FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` +), `bfcte_2` AS ( SELECT `rowindex_2` FROM `bfcte_0` @@ -15,15 +15,19 @@ WITH `bfcte_2` AS ( `rowindex_2` ), `bfcte_3` AS ( SELECT - `bfcte_2`.*, - `bfcte_2`.`bfcol_3` IN (( + `rowindex_2` AS `bfcol_0` + FROM `bfcte_2` +), `bfcte_4` AS ( + SELECT + *, + `bfcol_4` IN (( SELECT - `rowindex_2` AS `bfcol_4` - FROM `bfcte_1` + * + FROM `bfcte_3` )) AS `bfcol_5` - FROM `bfcte_2` + FROM `bfcte_1` ) SELECT - `bfcol_2` AS `rowindex`, + `bfcol_3` AS `rowindex`, `bfcol_5` AS `rowindex_2` -FROM `bfcte_3` \ No newline at end of file +FROM `bfcte_4` \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql index dfc4084027..cac57d0c8c 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join/out.sql @@ -1,22 +1,18 @@ WITH `bfcte_0` AS ( SELECT - `rowindex` AS `bfcol_2`, - `int64_col` AS `bfcol_3` + `int64_col` AS `bfcol_4`, + `int64_too` AS `bfcol_5` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` ), `bfcte_1` AS ( SELECT - `int64_col` AS `bfcol_6`, - `int64_too` AS `bfcol_7` + `rowindex` AS `bfcol_6`, + `int64_col` AS `bfcol_7` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` -), `bfcte_2` AS ( - SELECT - * - FROM `bfcte_0` - LEFT JOIN `bfcte_1` - ON COALESCE(`bfcol_2`, 0) = COALESCE(`bfcol_6`, 0) - AND COALESCE(`bfcol_2`, 1) = COALESCE(`bfcol_6`, 1) ) SELECT - `bfcol_3` AS `int64_col`, - `bfcol_7` AS `int64_too` -FROM `bfcte_2` \ No newline at end of file + `bfcol_7` AS `int64_col`, + `bfcol_5` AS `int64_too` +FROM `bfcte_1` +LEFT JOIN `bfcte_0` + ON COALESCE(`bfcol_6`, 0) = COALESCE(`bfcol_4`, 0) + AND COALESCE(`bfcol_6`, 1) = COALESCE(`bfcol_4`, 1) \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/bool_col/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/bool_col/out.sql index b3a2b45673..5042f91cd9 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/bool_col/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/bool_col/out.sql @@ -1,23 +1,24 @@ WITH `bfcte_0` AS ( SELECT - `rowindex` AS `bfcol_2`, - `bool_col` AS `bfcol_3` + `bool_col` AS `bfcol_0`, + `rowindex` AS `bfcol_1` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` ), `bfcte_1` AS ( SELECT - `rowindex` AS `bfcol_6`, - `bool_col` AS `bfcol_7` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` + `bfcol_1` AS `bfcol_2`, + `bfcol_0` AS `bfcol_3` + FROM `bfcte_0` ), `bfcte_2` AS ( SELECT - * + `bfcol_1` AS `bfcol_4`, + `bfcol_0` AS `bfcol_5` FROM `bfcte_0` - INNER JOIN `bfcte_1` - ON COALESCE(CAST(`bfcol_3` AS STRING), '0') = COALESCE(CAST(`bfcol_7` AS STRING), '0') - AND COALESCE(CAST(`bfcol_3` AS STRING), '1') = COALESCE(CAST(`bfcol_7` AS STRING), '1') ) SELECT - `bfcol_2` AS `rowindex_x`, - `bfcol_3` AS `bool_col`, - `bfcol_6` AS `rowindex_y` -FROM `bfcte_2` \ No newline at end of file + `bfcol_4` AS `rowindex_x`, + `bfcol_5` AS `bool_col`, + `bfcol_2` AS `rowindex_y` +FROM `bfcte_2` +INNER JOIN `bfcte_1` + ON COALESCE(CAST(`bfcol_5` AS STRING), '0') = COALESCE(CAST(`bfcol_3` AS STRING), '0') + AND COALESCE(CAST(`bfcol_5` AS STRING), '1') = COALESCE(CAST(`bfcol_3` AS STRING), '1') \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/float64_col/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/float64_col/out.sql index 4abc6aa4a7..9cc08dcbd3 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/float64_col/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/float64_col/out.sql @@ -1,23 +1,24 @@ WITH `bfcte_0` AS ( SELECT - `rowindex` AS `bfcol_2`, - `float64_col` AS `bfcol_3` + `float64_col` AS `bfcol_0`, + `rowindex` AS `bfcol_1` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` ), `bfcte_1` AS ( SELECT - `rowindex` AS `bfcol_6`, - `float64_col` AS `bfcol_7` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` + `bfcol_1` AS `bfcol_2`, + `bfcol_0` AS `bfcol_3` + FROM `bfcte_0` ), `bfcte_2` AS ( SELECT - * + `bfcol_1` AS `bfcol_4`, + `bfcol_0` AS `bfcol_5` FROM `bfcte_0` - INNER JOIN `bfcte_1` - ON IF(IS_NAN(`bfcol_3`), 2, COALESCE(`bfcol_3`, 0)) = IF(IS_NAN(`bfcol_7`), 2, COALESCE(`bfcol_7`, 0)) - AND IF(IS_NAN(`bfcol_3`), 3, COALESCE(`bfcol_3`, 1)) = IF(IS_NAN(`bfcol_7`), 3, COALESCE(`bfcol_7`, 1)) ) SELECT - `bfcol_2` AS `rowindex_x`, - `bfcol_3` AS `float64_col`, - `bfcol_6` AS `rowindex_y` -FROM `bfcte_2` \ No newline at end of file + `bfcol_4` AS `rowindex_x`, + `bfcol_5` AS `float64_col`, + `bfcol_2` AS `rowindex_y` +FROM `bfcte_2` +INNER JOIN `bfcte_1` + ON IF(IS_NAN(`bfcol_5`), 2, COALESCE(`bfcol_5`, 0)) = IF(IS_NAN(`bfcol_3`), 2, COALESCE(`bfcol_3`, 0)) + AND IF(IS_NAN(`bfcol_5`), 3, COALESCE(`bfcol_5`, 1)) = IF(IS_NAN(`bfcol_3`), 3, COALESCE(`bfcol_3`, 1)) \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/int64_col/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/int64_col/out.sql index b841ac1325..05b9ceec4d 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/int64_col/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/int64_col/out.sql @@ -1,23 +1,24 @@ WITH `bfcte_0` AS ( SELECT - `rowindex` AS `bfcol_2`, - `int64_col` AS `bfcol_3` + `int64_col` AS `bfcol_0`, + `rowindex` AS `bfcol_1` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` ), `bfcte_1` AS ( SELECT - `rowindex` AS `bfcol_6`, - `int64_col` AS `bfcol_7` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` + `bfcol_1` AS `bfcol_2`, + `bfcol_0` AS `bfcol_3` + FROM `bfcte_0` ), `bfcte_2` AS ( SELECT - * + `bfcol_1` AS `bfcol_4`, + `bfcol_0` AS `bfcol_5` FROM `bfcte_0` - INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_3`, 0) = COALESCE(`bfcol_7`, 0) - AND COALESCE(`bfcol_3`, 1) = COALESCE(`bfcol_7`, 1) ) SELECT - `bfcol_2` AS `rowindex_x`, - `bfcol_3` AS `int64_col`, - `bfcol_6` AS `rowindex_y` -FROM `bfcte_2` \ No newline at end of file + `bfcol_4` AS `rowindex_x`, + `bfcol_5` AS `int64_col`, + `bfcol_2` AS `rowindex_y` +FROM `bfcte_2` +INNER JOIN `bfcte_1` + ON COALESCE(`bfcol_5`, 0) = COALESCE(`bfcol_3`, 0) + AND COALESCE(`bfcol_5`, 1) = COALESCE(`bfcol_3`, 1) \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/numeric_col/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/numeric_col/out.sql index af2aaa69dc..2e0114593e 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/numeric_col/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/numeric_col/out.sql @@ -1,23 +1,24 @@ WITH `bfcte_0` AS ( SELECT - `rowindex` AS `bfcol_2`, - `numeric_col` AS `bfcol_3` + `numeric_col` AS `bfcol_0`, + `rowindex` AS `bfcol_1` FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` ), `bfcte_1` AS ( SELECT - `rowindex` AS `bfcol_6`, - `numeric_col` AS `bfcol_7` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` + `bfcol_1` AS `bfcol_2`, + `bfcol_0` AS `bfcol_3` + FROM `bfcte_0` ), `bfcte_2` AS ( SELECT - * + `bfcol_1` AS `bfcol_4`, + `bfcol_0` AS `bfcol_5` FROM `bfcte_0` - INNER JOIN `bfcte_1` - ON COALESCE(`bfcol_3`, CAST(0 AS NUMERIC)) = COALESCE(`bfcol_7`, CAST(0 AS NUMERIC)) - AND COALESCE(`bfcol_3`, CAST(1 AS NUMERIC)) = COALESCE(`bfcol_7`, CAST(1 AS NUMERIC)) ) SELECT - `bfcol_2` AS `rowindex_x`, - `bfcol_3` AS `numeric_col`, - `bfcol_6` AS `rowindex_y` -FROM `bfcte_2` \ No newline at end of file + `bfcol_4` AS `rowindex_x`, + `bfcol_5` AS `numeric_col`, + `bfcol_2` AS `rowindex_y` +FROM `bfcte_2` +INNER JOIN `bfcte_1` + ON COALESCE(`bfcol_5`, CAST(0 AS NUMERIC)) = COALESCE(`bfcol_3`, CAST(0 AS NUMERIC)) + AND COALESCE(`bfcol_5`, CAST(1 AS NUMERIC)) = COALESCE(`bfcol_3`, CAST(1 AS NUMERIC)) \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/string_col/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/string_col/out.sql index dfde2efb86..36aad50343 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/string_col/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/string_col/out.sql @@ -5,19 +5,15 @@ WITH `bfcte_0` AS ( FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` ), `bfcte_1` AS ( SELECT - `rowindex` AS `bfcol_4`, - `string_col` AS `bfcol_5` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` -), `bfcte_2` AS ( - SELECT - * + `bfcol_0` AS `bfcol_2`, + `bfcol_1` AS `bfcol_3` FROM `bfcte_0` - INNER JOIN `bfcte_1` - ON COALESCE(CAST(`bfcol_1` AS STRING), '0') = COALESCE(CAST(`bfcol_5` AS STRING), '0') - AND COALESCE(CAST(`bfcol_1` AS STRING), '1') = COALESCE(CAST(`bfcol_5` AS STRING), '1') ) SELECT `bfcol_0` AS `rowindex_x`, `bfcol_1` AS `string_col`, - `bfcol_4` AS `rowindex_y` -FROM `bfcte_2` \ No newline at end of file + `bfcol_2` AS `rowindex_y` +FROM `bfcte_0` +INNER JOIN `bfcte_1` + ON COALESCE(CAST(`bfcol_1` AS STRING), '0') = COALESCE(CAST(`bfcol_3` AS STRING), '0') + AND COALESCE(CAST(`bfcol_1` AS STRING), '1') = COALESCE(CAST(`bfcol_3` AS STRING), '1') \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/time_col/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/time_col/out.sql index 5a85812441..b945a1cbf3 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/time_col/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/time_col/out.sql @@ -5,19 +5,15 @@ WITH `bfcte_0` AS ( FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` ), `bfcte_1` AS ( SELECT - `rowindex` AS `bfcol_4`, - `time_col` AS `bfcol_5` - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` -), `bfcte_2` AS ( - SELECT - * + `bfcol_0` AS `bfcol_2`, + `bfcol_1` AS `bfcol_3` FROM `bfcte_0` - INNER JOIN `bfcte_1` - ON COALESCE(CAST(`bfcol_1` AS STRING), '0') = COALESCE(CAST(`bfcol_5` AS STRING), '0') - AND COALESCE(CAST(`bfcol_1` AS STRING), '1') = COALESCE(CAST(`bfcol_5` AS STRING), '1') ) SELECT `bfcol_0` AS `rowindex_x`, `bfcol_1` AS `time_col`, - `bfcol_4` AS `rowindex_y` -FROM `bfcte_2` \ No newline at end of file + `bfcol_2` AS `rowindex_y` +FROM `bfcte_0` +INNER JOIN `bfcte_1` + ON COALESCE(CAST(`bfcol_1` AS STRING), '0') = COALESCE(CAST(`bfcol_3` AS STRING), '0') + AND COALESCE(CAST(`bfcol_1` AS STRING), '1') = COALESCE(CAST(`bfcol_3` AS STRING), '1') \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_random_sample/test_compile_random_sample/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_random_sample/test_compile_random_sample/out.sql index 2f80d6ffbc..73879aa65d 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_random_sample/test_compile_random_sample/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_random_sample/test_compile_random_sample/out.sql @@ -155,12 +155,6 @@ WITH `bfcte_0` AS ( 432000000000, 8 )]) -), `bfcte_1` AS ( - SELECT - * - FROM `bfcte_0` - WHERE - RAND() < 0.1 ) SELECT `bfcol_0` AS `bool_col`, @@ -178,6 +172,12 @@ SELECT `bfcol_12` AS `time_col`, `bfcol_13` AS `timestamp_col`, `bfcol_14` AS `duration_col` -FROM `bfcte_1` +FROM ( + SELECT + * + FROM `bfcte_0` + WHERE + RAND() < 0.1 +) ORDER BY `bfcol_15` ASC NULLS LAST \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql index 512d3ca6bd..2b71ef917d 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_readtable/test_compile_readtable_w_columns_filters/out.sql @@ -1,10 +1,5 @@ -WITH `bfcte_0` AS ( - SELECT - * - FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` - WHERE - `rowindex` > 0 AND `string_col` IN ('Hello, World!') -) SELECT * -FROM `bfcte_0` \ No newline at end of file +FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0` +WHERE + `rowindex` > 0 AND `string_col` IN ('Hello, World!') \ No newline at end of file