From 21c571f9042a10adb6037c5e156255ac2e616dea Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 4 Mar 2026 22:39:58 +0000 Subject: [PATCH 1/4] feat: add bigquery.ml.get_insights function --- bigframes/bigquery/_operations/ml.py | 33 ++++++++++++++++++++++++++++ bigframes/core/sql/ml.py | 10 +++++++++ 2 files changed, 43 insertions(+) diff --git a/bigframes/bigquery/_operations/ml.py b/bigframes/bigquery/_operations/ml.py index d5b1786b25..3e5d6fb263 100644 --- a/bigframes/bigquery/_operations/ml.py +++ b/bigframes/bigquery/_operations/ml.py @@ -480,6 +480,39 @@ def generate_text( return session.read_gbq_query(sql) +@log_adapter.method_logger(custom_base_name="bigquery_ml") +def get_insights( + model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series], +) -> dataframe.DataFrame: + """ + Gets insights from a BigQuery ML model. + + See the `BigQuery ML GET_INSIGHTS function syntax + `_ + for additional reference. + + Args: + model (bigframes.ml.base.BaseEstimator, str, or pd.Series): + The model to get insights from. + + Returns: + bigframes.pandas.DataFrame: + The insights. + """ + import bigframes.pandas as bpd + + model_name, session = utils.get_model_name_and_session(model) + + sql = bigframes.core.sql.ml.get_insights( + model_name=model_name, + ) + + if session is None: + return bpd.read_gbq_query(sql) + else: + return session.read_gbq_query(sql) + + @log_adapter.method_logger(custom_base_name="bigquery_ml") def generate_embedding( model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series], diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py index 38d66ab9a5..13fe60e5ee 100644 --- a/bigframes/core/sql/ml.py +++ b/bigframes/core/sql/ml.py @@ -268,6 +268,16 @@ def generate_text( return sql +def get_insights( + model_name: str, +) -> str: + """Encode the ML.GET_INSIGHTS statement. + See https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-get-insights for reference. + """ + sql = f"SELECT * FROM ML.GET_INSIGHTS(MODEL {sqlglot_ir.identifier(model_name)})\n" + return sql + + def generate_embedding( model_name: str, table: str, From 7ddb5c6987ebe75520765750cf42f72c23342110 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 4 Mar 2026 23:00:36 +0000 Subject: [PATCH 2/4] tests --- GEMINI.md | 5 ----- bigframes/bigquery/ml.py | 2 ++ tests/system/large/bigquery/test_ml.py | 26 ++++++++++++++++++++++++++ tests/unit/bigquery/test_ml.py | 9 +++++++++ tests/unit/core/sql/test_ml.py | 7 +++++++ 5 files changed, 44 insertions(+), 5 deletions(-) delete mode 100644 GEMINI.md diff --git a/GEMINI.md b/GEMINI.md deleted file mode 100644 index 4de5912527..0000000000 --- a/GEMINI.md +++ /dev/null @@ -1,5 +0,0 @@ -# Contribution guidelines, tailored for LLM agents - -@.gemini/common/docs.md - -@.gemini/common/constraints.md diff --git a/bigframes/bigquery/ml.py b/bigframes/bigquery/ml.py index b1b33d0dbd..9b0d77d5b8 100644 --- a/bigframes/bigquery/ml.py +++ b/bigframes/bigquery/ml.py @@ -25,6 +25,7 @@ explain_predict, generate_embedding, generate_text, + get_insights, global_explain, predict, transform, @@ -39,4 +40,5 @@ "transform", "generate_text", "generate_embedding", + "get_insights", ] diff --git a/tests/system/large/bigquery/test_ml.py b/tests/system/large/bigquery/test_ml.py index 20a62ae2b6..f0f7d4f691 100644 --- a/tests/system/large/bigquery/test_ml.py +++ b/tests/system/large/bigquery/test_ml.py @@ -64,6 +64,32 @@ def test_generate_embedding_with_options(embedding_model): assert len(embedding[0]) == 256 +def test_get_insights(dataset_id): + df = bpd.DataFrame( + { + "dim1": ["a", "a", "b", "b", "a", "a", "b", "b"], + "dim2": ["x", "y", "x", "y", "x", "y", "x", "y"], + "metric": [10, 20, 30, 40, 12, 25, 35, 45], + "is_test": [False, False, False, False, True, True, True, True], + } + ) + model_name = f"{dataset_id}.contribution_analysis_model" + + ml.create_model( + model_name=model_name, + options={ + "model_type": "CONTRIBUTION_ANALYSIS", + "contribution_metric": "SUM(metric)", + "is_test_col": "is_test", + }, + training_data=df, + ) + + result = ml.get_insights(model_name) + assert len(result) > 0 + assert "contributors" in result.columns + + def test_create_model_linear_regression(dataset_id): df = bpd.DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]}) model_name = f"{dataset_id}.linear_regression_model" diff --git a/tests/unit/bigquery/test_ml.py b/tests/unit/bigquery/test_ml.py index e5c957767b..6d39901a35 100644 --- a/tests/unit/bigquery/test_ml.py +++ b/tests/unit/bigquery/test_ml.py @@ -177,6 +177,15 @@ def test_generate_text_with_pandas_dataframe(read_pandas_mock, read_gbq_query_mo assert "'TYPE' AS request_type" in generated_sql +@mock.patch("bigframes.pandas.read_gbq_query") +def test_get_insights(read_gbq_query_mock): + ml_ops.get_insights(MODEL_SERIES) + read_gbq_query_mock.assert_called_once() + generated_sql = read_gbq_query_mock.call_args[0][0] + assert "ML.GET_INSIGHTS" in generated_sql + assert f"MODEL `{MODEL_NAME}`" in generated_sql + + @mock.patch("bigframes.pandas.read_gbq_query") @mock.patch("bigframes.pandas.read_pandas") def test_generate_embedding_with_pandas_dataframe( diff --git a/tests/unit/core/sql/test_ml.py b/tests/unit/core/sql/test_ml.py index 27b7a00ac2..bb3b61a949 100644 --- a/tests/unit/core/sql/test_ml.py +++ b/tests/unit/core/sql/test_ml.py @@ -203,6 +203,13 @@ def test_generate_text_model_with_options(snapshot): snapshot.assert_match(sql, "generate_text_model_with_options.sql") +def test_get_insights_model_basic(snapshot): + sql = bigframes.core.sql.ml.get_insights( + model_name="my_project.my_dataset.my_model", + ) + snapshot.assert_match(sql, "get_insights_model_basic.sql") + + def test_generate_embedding_model_basic(snapshot): sql = bigframes.core.sql.ml.generate_embedding( model_name="my_project.my_dataset.my_model", From bc418bf74033e61774e9398ec10297c5c0580787 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 4 Mar 2026 23:01:15 +0000 Subject: [PATCH 3/4] snapshot --- .../test_get_insights_model_basic/get_insights_model_basic.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/unit/core/sql/snapshots/test_ml/test_get_insights_model_basic/get_insights_model_basic.sql diff --git a/tests/unit/core/sql/snapshots/test_ml/test_get_insights_model_basic/get_insights_model_basic.sql b/tests/unit/core/sql/snapshots/test_ml/test_get_insights_model_basic/get_insights_model_basic.sql new file mode 100644 index 0000000000..a3f2680c17 --- /dev/null +++ b/tests/unit/core/sql/snapshots/test_ml/test_get_insights_model_basic/get_insights_model_basic.sql @@ -0,0 +1 @@ +SELECT * FROM ML.GET_INSIGHTS(MODEL `my_project.my_dataset.my_model`) From 116d71cf79687337a8c39a41e9551e766f2819a5 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Thu, 5 Mar 2026 01:13:11 +0000 Subject: [PATCH 4/4] fix: lint --- bigframes/core/sql/ml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py index d42daad10e..0edb784c37 100644 --- a/bigframes/core/sql/ml.py +++ b/bigframes/core/sql/ml.py @@ -272,7 +272,7 @@ def get_insights( """Encode the ML.GET_INSIGHTS statement. See https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-get-insights for reference. """ - sql = f"SELECT * FROM ML.GET_INSIGHTS(MODEL {sqlglot_ir.identifier(model_name)})\n" + sql = f"SELECT * FROM ML.GET_INSIGHTS(MODEL {sg_sql.to_sql(sg_sql.identifier(model_name))})\n" return sql