diff --git a/GEMINI.md b/GEMINI.md deleted file mode 100644 index 4de5912527..0000000000 --- a/GEMINI.md +++ /dev/null @@ -1,5 +0,0 @@ -# Contribution guidelines, tailored for LLM agents - -@.gemini/common/docs.md - -@.gemini/common/constraints.md diff --git a/bigframes/bigquery/_operations/ml.py b/bigframes/bigquery/_operations/ml.py index d5b1786b25..3e5d6fb263 100644 --- a/bigframes/bigquery/_operations/ml.py +++ b/bigframes/bigquery/_operations/ml.py @@ -480,6 +480,39 @@ def generate_text( return session.read_gbq_query(sql) +@log_adapter.method_logger(custom_base_name="bigquery_ml") +def get_insights( + model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series], +) -> dataframe.DataFrame: + """ + Gets insights from a BigQuery ML model. + + See the `BigQuery ML GET_INSIGHTS function syntax + `_ + for additional reference. + + Args: + model (bigframes.ml.base.BaseEstimator, str, or pd.Series): + The model to get insights from. + + Returns: + bigframes.pandas.DataFrame: + The insights. + """ + import bigframes.pandas as bpd + + model_name, session = utils.get_model_name_and_session(model) + + sql = bigframes.core.sql.ml.get_insights( + model_name=model_name, + ) + + if session is None: + return bpd.read_gbq_query(sql) + else: + return session.read_gbq_query(sql) + + @log_adapter.method_logger(custom_base_name="bigquery_ml") def generate_embedding( model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series], diff --git a/bigframes/bigquery/ml.py b/bigframes/bigquery/ml.py index b1b33d0dbd..9b0d77d5b8 100644 --- a/bigframes/bigquery/ml.py +++ b/bigframes/bigquery/ml.py @@ -25,6 +25,7 @@ explain_predict, generate_embedding, generate_text, + get_insights, global_explain, predict, transform, @@ -39,4 +40,5 @@ "transform", "generate_text", "generate_embedding", + "get_insights", ] diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py index 391d905d2f..0edb784c37 100644 --- a/bigframes/core/sql/ml.py +++ b/bigframes/core/sql/ml.py @@ -266,6 +266,16 @@ def generate_text( return sql +def get_insights( + model_name: str, +) -> str: + """Encode the ML.GET_INSIGHTS statement. + See https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-get-insights for reference. + """ + sql = f"SELECT * FROM ML.GET_INSIGHTS(MODEL {sg_sql.to_sql(sg_sql.identifier(model_name))})\n" + return sql + + def generate_embedding( model_name: str, table: str, diff --git a/tests/system/large/bigquery/test_ml.py b/tests/system/large/bigquery/test_ml.py index 20a62ae2b6..f0f7d4f691 100644 --- a/tests/system/large/bigquery/test_ml.py +++ b/tests/system/large/bigquery/test_ml.py @@ -64,6 +64,32 @@ def test_generate_embedding_with_options(embedding_model): assert len(embedding[0]) == 256 +def test_get_insights(dataset_id): + df = bpd.DataFrame( + { + "dim1": ["a", "a", "b", "b", "a", "a", "b", "b"], + "dim2": ["x", "y", "x", "y", "x", "y", "x", "y"], + "metric": [10, 20, 30, 40, 12, 25, 35, 45], + "is_test": [False, False, False, False, True, True, True, True], + } + ) + model_name = f"{dataset_id}.contribution_analysis_model" + + ml.create_model( + model_name=model_name, + options={ + "model_type": "CONTRIBUTION_ANALYSIS", + "contribution_metric": "SUM(metric)", + "is_test_col": "is_test", + }, + training_data=df, + ) + + result = ml.get_insights(model_name) + assert len(result) > 0 + assert "contributors" in result.columns + + def test_create_model_linear_regression(dataset_id): df = bpd.DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]}) model_name = f"{dataset_id}.linear_regression_model" diff --git a/tests/unit/bigquery/test_ml.py b/tests/unit/bigquery/test_ml.py index e5c957767b..6d39901a35 100644 --- a/tests/unit/bigquery/test_ml.py +++ b/tests/unit/bigquery/test_ml.py @@ -177,6 +177,15 @@ def test_generate_text_with_pandas_dataframe(read_pandas_mock, read_gbq_query_mo assert "'TYPE' AS request_type" in generated_sql +@mock.patch("bigframes.pandas.read_gbq_query") +def test_get_insights(read_gbq_query_mock): + ml_ops.get_insights(MODEL_SERIES) + read_gbq_query_mock.assert_called_once() + generated_sql = read_gbq_query_mock.call_args[0][0] + assert "ML.GET_INSIGHTS" in generated_sql + assert f"MODEL `{MODEL_NAME}`" in generated_sql + + @mock.patch("bigframes.pandas.read_gbq_query") @mock.patch("bigframes.pandas.read_pandas") def test_generate_embedding_with_pandas_dataframe( diff --git a/tests/unit/core/sql/snapshots/test_ml/test_get_insights_model_basic/get_insights_model_basic.sql b/tests/unit/core/sql/snapshots/test_ml/test_get_insights_model_basic/get_insights_model_basic.sql new file mode 100644 index 0000000000..a3f2680c17 --- /dev/null +++ b/tests/unit/core/sql/snapshots/test_ml/test_get_insights_model_basic/get_insights_model_basic.sql @@ -0,0 +1 @@ +SELECT * FROM ML.GET_INSIGHTS(MODEL `my_project.my_dataset.my_model`) diff --git a/tests/unit/core/sql/test_ml.py b/tests/unit/core/sql/test_ml.py index 27b7a00ac2..bb3b61a949 100644 --- a/tests/unit/core/sql/test_ml.py +++ b/tests/unit/core/sql/test_ml.py @@ -203,6 +203,13 @@ def test_generate_text_model_with_options(snapshot): snapshot.assert_match(sql, "generate_text_model_with_options.sql") +def test_get_insights_model_basic(snapshot): + sql = bigframes.core.sql.ml.get_insights( + model_name="my_project.my_dataset.my_model", + ) + snapshot.assert_match(sql, "get_insights_model_basic.sql") + + def test_generate_embedding_model_basic(snapshot): sql = bigframes.core.sql.ml.generate_embedding( model_name="my_project.my_dataset.my_model",