From a8b304e4d95494ddd630367e3f40400d73067933 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Fri, 7 Jul 2023 19:37:18 +0200 Subject: [PATCH 1/7] Implement BridgeStan wrapper --- .../chainsail_helpers/pdf/stan/__init__.py | 57 +++++++++++++++++-- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/chainsail_helpers/chainsail_helpers/pdf/stan/__init__.py b/chainsail_helpers/chainsail_helpers/pdf/stan/__init__.py index 1711b54..2125a28 100644 --- a/chainsail_helpers/chainsail_helpers/pdf/stan/__init__.py +++ b/chainsail_helpers/chainsail_helpers/pdf/stan/__init__.py @@ -4,8 +4,10 @@ from __future__ import annotations import functools +import json from typing import Any, Callable +import bridgestan as bs import numpy as np import requests @@ -197,7 +199,7 @@ def _tag_data(data: dict[str, Any], include_prior: bool=True, include_likelihood def log_likelihood(self, x: np.ndarray) -> float: """ Evaluates the log-likelihood of the model. - + Calls out to the httpstan server specified in self._httpstan_url with datums `include_prior` set to `0` and `include_likelihood` set to `1`. @@ -214,7 +216,7 @@ def log_likelihood(self, x: np.ndarray) -> float: def log_prior(self, x: np.ndarray) -> float: """ Evaluates the log-prior probability of the model. - + Calls out to the httpstan server specified in self._httpstan_url with datums `include_prior` set to `1` and `include_likelihood` set to `0`. @@ -227,7 +229,7 @@ def log_prior(self, x: np.ndarray) -> float: """ preflight_data_transform = functools.partial(self._tag_data, include_likelihood=False) return self._query_log_prob(x, preflight_data_transform) - + def log_prob(self, x: np.ndarray) -> float: """ Log-probability of the density to be sampled. @@ -274,7 +276,7 @@ def log_prior_gradient(self, x: np.ndarray) -> np.ndarray: """ preflight_data_transform = functools.partial(self._tag_data, include_likelihood=False) return self._query_log_prob_gradient(x, preflight_data_transform) - + def log_prob_gradient(self, x: np.ndarray) -> np.ndarray: """ Gradient of the log-probability of the density to be sampled. @@ -289,3 +291,50 @@ def log_prob_gradient(self, x: np.ndarray) -> np.ndarray: log-probability gradient evaluated at x """ return self._query_log_prob_gradient(x) + + + +class BridgeStanPDF: + """ + Chainsail PDF wrapper around BridgeStan (https://github.com/roualdes/bridgestan). + """ + + def __init__(self, model_file: str, data: dict[str, Any] | None=None) -> None: + """ + Initializes a Chainsail-compatible PDF wrapper around the BridgeStan API. + + Args: + model_file: path to a Stan model file + data(dict): observations to condition on + """ + data = data or {} + self._model = bs.StanModel.from_stan_file(model_file, json.dumps(data)) + + + def log_prob(self, x: np.ndarray) -> float: + """ + Log-probability of the density to be sampled. + Calls out to the httpstan server specified in self._httpstan_url. + Args: + x: 1D array of floats at which the log-probability + is evaluated + + Returns: + log-probability evaluated at x + """ + return self._model.log_density(x, jacobian=False) + + def log_prob_gradient(self, x: np.ndarray) -> np.ndarray: + """ + Gradient of the log-probability of the density to be sampled. + + Args: + x: 1D array of floats at which the log-probability + gradient is evaluated + + Returns: + 1D array of floats containing the flattened + log-probability gradient evaluated at x + """ + _, gradient = self._model.log_density_gradient(x, jacobian=False) + return gradient From c5f0c6e8d5566a3647baf350511056100ecdb57a Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 15 Jul 2023 16:23:25 +0200 Subject: [PATCH 2/7] Add BridgeStan example --- examples/bridgestan-mixture/README.md | 7 +++++++ examples/bridgestan-mixture/mixture.stan | 7 +++++++ examples/bridgestan-mixture/probability.py | 11 +++++++++++ 3 files changed, 25 insertions(+) create mode 100644 examples/bridgestan-mixture/README.md create mode 100644 examples/bridgestan-mixture/mixture.stan create mode 100644 examples/bridgestan-mixture/probability.py diff --git a/examples/bridgestan-mixture/README.md b/examples/bridgestan-mixture/README.md new file mode 100644 index 0000000..aac1f03 --- /dev/null +++ b/examples/bridgestan-mixture/README.md @@ -0,0 +1,7 @@ +# Example: sampling from a mixture distribution using the BridgeStan wrapper + +This provides a very minimal example of how to define a Chainsail-compatible `probability.py` that uses a Stan model in a separate file and the BrideStan wrapper from the `chainsail-helpers` library. +Note that when creating a ZIP archive with `probability.py`, you'll have to add `mixture.stan` to it, too, for example using +```console +zip probability.zip probability.py mixture.stan +``` diff --git a/examples/bridgestan-mixture/mixture.stan b/examples/bridgestan-mixture/mixture.stan new file mode 100644 index 0000000..c17f4a7 --- /dev/null +++ b/examples/bridgestan-mixture/mixture.stan @@ -0,0 +1,7 @@ +parameters { + real y; +} +model { + target += log_sum_exp(log(0.3) + normal_lpdf(y | -1.5, 0.5), + log(0.7) + normal_lpdf(y | 2.0, 0.2)); +} diff --git a/examples/bridgestan-mixture/probability.py b/examples/bridgestan-mixture/probability.py new file mode 100644 index 0000000..d1db3a2 --- /dev/null +++ b/examples/bridgestan-mixture/probability.py @@ -0,0 +1,11 @@ +""" +Sample probability distribution definition using the BridgeStan wrapper +and a Stan model defined in `mixture.stan` +""" + +import numpy as np + +from chainsail_helpers.pdf.stan import BridgeStanPDF + +pdf = BridgeStanPDF("mixture.stan") +initial_states = np.array([[1.0]]) From 6b11c78424f9c927c79ef7bae8d2b3b85ecbc596 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 15 Jul 2023 16:31:52 +0200 Subject: [PATCH 3/7] Add README to Stan PDF wrappers --- chainsail_helpers/chainsail_helpers/pdf/stan/README.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 chainsail_helpers/chainsail_helpers/pdf/stan/README.md diff --git a/chainsail_helpers/chainsail_helpers/pdf/stan/README.md b/chainsail_helpers/chainsail_helpers/pdf/stan/README.md new file mode 100644 index 0000000..a9d9ba2 --- /dev/null +++ b/chainsail_helpers/chainsail_helpers/pdf/stan/README.md @@ -0,0 +1,5 @@ +# Wrappers for Stan + +This module provides two different wrappers that allow Chainsail to be used with models written in the Stan language (https://mc-stan.org/): +- `BaseStanPDF` / `PosteriorBaseStanPDF` use the httpstan (https://github.com/stan-dev/httpstan) API and require a Docker container runing httpstan. This is very slow and clunky and likely to be deprecated soon in favor of the following. +- `BridgeStanPDF` uses the BridgeStan (https://github.com/roualdes/bridgestan) library and is much, _much_ faster than the above httpstan-based wrapper. BridgeStan is pre-installed in the user code image (`docker/user_code`). From 9f767e60dd3df18a0fd4fa29578d1dd6c637dedd Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Sat, 15 Jul 2023 16:42:29 +0200 Subject: [PATCH 4/7] Split up Stan wrapper module into two submodules This is because BridgeStan is not Nix-packaged yet and thus would make for a missing import if a user uses the Nix derivation in `examples/nix/chainsail_helpers.nix` and then only wants to use the httpstan wrapper. --- .../chainsail_helpers/pdf/stan/__init__.py | 339 +----------------- .../chainsail_helpers/pdf/stan/bridgestan.py | 55 +++ .../chainsail_helpers/pdf/stan/httpstan.py | 291 +++++++++++++++ examples/bridgestan-mixture/probability.py | 2 +- examples/stan-mixture/README.md | 2 +- examples/stan-mixture/probability.py | 2 +- 6 files changed, 350 insertions(+), 341 deletions(-) create mode 100644 chainsail_helpers/chainsail_helpers/pdf/stan/bridgestan.py create mode 100644 chainsail_helpers/chainsail_helpers/pdf/stan/httpstan.py diff --git a/chainsail_helpers/chainsail_helpers/pdf/stan/__init__.py b/chainsail_helpers/chainsail_helpers/pdf/stan/__init__.py index 2125a28..316beca 100644 --- a/chainsail_helpers/chainsail_helpers/pdf/stan/__init__.py +++ b/chainsail_helpers/chainsail_helpers/pdf/stan/__init__.py @@ -1,340 +1,3 @@ """ -Interfaces for Chainsail probability densities defined by a Stan model +Wrappers that allow Chainsail to be used with Stan models """ -from __future__ import annotations - -import functools -import json -from typing import Any, Callable - -import bridgestan as bs -import numpy as np -import requests - -from chainsail_helpers.pdf import PDF - - -class BaseStanPDF(PDF): - """ - Chainsail PDF wrapper around httpstan - (https://github.com/stan-dev/httpstan). - """ - _HTTPSTAN_URL = "http://localhost:8082" - - def __init__(self, model_code: str, data: dict[str, Any] | None=None) -> None: - """ - Initializes a Chainsail-compatible PDF wrapper around the httpstan - REST API. - - Args: - model_code(string): Stan model specificaton that will be compiled - by httpstan - data(dict): observations to condition on - """ - r = requests.post( - f"{self._HTTPSTAN_URL}/v1/models", - json={"program_code": model_code}, - ) - # if the model did not compile successfully, httpstan returns - # a 400 status code (bad request) - if r.status_code == 400: - raise Exception( - ("Model compilation failed. httpstan message:\n" - f"{r.json()['message']}") - ) - else: - r.raise_for_status() - model_id = r.json()["name"] - self._httpstan_model_route = f"{self._HTTPSTAN_URL}/v1/{model_id}" - self._data = data or {} - - def _query_log_prob(self, x: np.ndarray, - preflight_data_transform: Callable | None=None) -> float: - """ - Uses the httpstan HTTP API to evaluate the model's log-probability. - - Args: - x: 1D array of floats at which the log-likelihood is evaluated - preflight_data_transform: transformation to be applied to the data - before sending it to httpstan - - Returns: - log-probability evaluated at `x` - """ - try: - if preflight_data_transform: - data = preflight_data_transform(self._data) - else: - data = self._data - r = requests.post( - f"{self._httpstan_model_route}/log_prob", - json={ - "unconstrained_parameters": x.tolist(), - "data": data, - "adjust_transform": False, - }, - ) - r.raise_for_status() - return r.json()["log_prob"] - except Exception as e: - raise Exception(f"Querying log-prob failed: Error: {e}") - - def _query_log_prob_gradient(self, x: np.ndarray, preflight_data_transform: Callable | None=None) -> np.ndarray: - """ - Uses the httpstan HTTP API to evaluate the model's log-probability gradient. - - Args: - x: 1D array of floats at which the log-probability's gradient is evaluated - preflight_data_transform: transformation to be applied to the data - before sending it to httpstan - - Returns: - 1D array with log-probability gradient evaluated at `x` - """ - try: - if preflight_data_transform: - data = preflight_data_transform(self._data) - else: - data = self._data - r = requests.post( - f"{self._httpstan_model_route}/log_prob_grad", - json={ - "unconstrained_parameters": x.tolist(), - "data": data, - "adjust_transform": False, - }, - ) - r.raise_for_status() - return np.array(r.json()["log_prob_grad"]) - except Exception as e: - raise Exception(f"Querying log-prob gradient failed: Error: {e}") - - def log_prob(self, x: np.ndarray) -> float: - """ - Log-probability of the density to be sampled. - Calls out to the httpstan server specified in self._httpstan_url. - Args: - x: 1D array of floats at which the log-probability - is evaluated - - Returns: - log-probability evaluated at x - """ - return self._query_log_prob(x) - - def log_prob_gradient(self, x: np.ndarray) -> np.ndarray: - """ - Gradient of the log-probability of the density to be sampled. - Calls out to the httpstan server specified in self._httpstan_url. - - Args: - x: 1D array of floats at which the log-probability - gradient is evaluated - - Returns: - 1D array of floats containing the flattened - log-probability gradient evaluated at x - """ - return self._query_log_prob_gradient(x) - - -class PosteriorStanPDF(BaseStanPDF): - """ - Chainsail PDF wrapper around httpstan - (https://github.com/stan-dev/httpstan). - """ - _HTTPSTAN_URL = "http://localhost:8082" - - def __init__(self, model_code: str, data: dict[str, Any] | None=None) -> None: - """ - Initializes a Chainsail-compatible, likelihood-temperable PDF wrapper around - the httpstan REST API. - - The Stan model has to take the additional "data" in the `data` section: - ``` - int include_prior ; - int include_likelihood ; - ``` - - and in the `model` section, these values need to be used to conditionally switch - on and off the prior and likelihood contributions, like so: - - ``` - if (include_prior) { - param_a ~ SomeDistribution ; - param_b ~ SomeOtherDistribution ; - } ; - - if (include_likelihood) { - data ~ YetAnotherDistribution(param_a, param_b) - } ; - ``` - - Args: - model_code(string): Stan model specification that will be compiled - by httpstan - data(dict): observations to condition on - """ - super().__init__(model_code, data) - - @staticmethod - def _tag_data(data: dict[str, Any], include_prior: bool=True, include_likelihood: bool=True) -> dict[str, Any]: - """ - Adds tags / flags to the data that indicate whether the prior or likelihood should be included. - Args: - data: observations to condition on - include_prior: whether the prior contributions are taken into account in later log-probability - or gradient evaluations - include_likelihood: whether the likelihood is taken into account in later log-probability - or gradient evaluations - - Returns: - data dictionary with additional entries `include_prior` and `include_likelihood`, set to either - 0 or 1. - """ - return {**data, - "include_prior": int(include_prior), - "include_likelihood": int(include_likelihood)} - - def log_likelihood(self, x: np.ndarray) -> float: - """ - Evaluates the log-likelihood of the model. - - Calls out to the httpstan server specified in self._httpstan_url with - datums `include_prior` set to `0` and `include_likelihood` set to `1`. - - Args: - x: 1D array of floats at which the log-likelihood - is evaluated - - Returns: - log-likelihood evaluated at x - """ - preflight_data_transform = functools.partial(self._tag_data, include_prior=False) - return self._query_log_prob(x, preflight_data_transform) - - def log_prior(self, x: np.ndarray) -> float: - """ - Evaluates the log-prior probability of the model. - - Calls out to the httpstan server specified in self._httpstan_url with - datums `include_prior` set to `1` and `include_likelihood` set to `0`. - - Args: - x: 1D array of floats at which the log-prior probability - is evaluated - - Returns: - log-prior probability evaluated at x - """ - preflight_data_transform = functools.partial(self._tag_data, include_likelihood=False) - return self._query_log_prob(x, preflight_data_transform) - - def log_prob(self, x: np.ndarray) -> float: - """ - Log-probability of the density to be sampled. - Calls out to the httpstan server specified in self._httpstan_url. - Args: - x: 1D array of floats at which the log-probability - is evaluated - - Returns: - log-probability evaluated at x - """ - return self._query_log_prob(x) - - def log_likelihood_gradient(self, x: np.ndarray) -> np.ndarray: - """ - Evaluates the gradient of the log-likelihood of the model. - Calls out to the httpstan server specified in self._httpstan_url - with datums `include_prior` set to `0` and `include_likelihood` set to `1`. - - Args: - x: 1D array of floats at which the gradient of the - log-likelihood is evaluated - - Returns: - 1D array of floats containing the flattened - gradient of the log-likelihood evaluated at x - """ - preflight_data_transform = functools.partial(self._tag_data, include_prior=False) - return self._query_log_prob_gradient(x, preflight_data_transform) - - def log_prior_gradient(self, x: np.ndarray) -> np.ndarray: - """ - Evaluates the gradient of the log-prior density of the model. - Calls out to the httpstan server specified in self._httpstan_url - with datums `include_prior` set to `1` and `include_likelihood` set to `0`. - - Args: - x: 1D array of floats at which the gradient of the - log-prior density is evaluated - - Returns: - 1D array of floats containing the flattened - gradient of the log-prior density evaluated at x - """ - preflight_data_transform = functools.partial(self._tag_data, include_likelihood=False) - return self._query_log_prob_gradient(x, preflight_data_transform) - - def log_prob_gradient(self, x: np.ndarray) -> np.ndarray: - """ - Gradient of the log-probability of the density to be sampled. - Calls out to the httpstan server specified in self._httpstan_url. - - Args: - x: 1D array of floats at which the log-probability - gradient is evaluated - - Returns: - 1D array of floats containing the flattened - log-probability gradient evaluated at x - """ - return self._query_log_prob_gradient(x) - - - -class BridgeStanPDF: - """ - Chainsail PDF wrapper around BridgeStan (https://github.com/roualdes/bridgestan). - """ - - def __init__(self, model_file: str, data: dict[str, Any] | None=None) -> None: - """ - Initializes a Chainsail-compatible PDF wrapper around the BridgeStan API. - - Args: - model_file: path to a Stan model file - data(dict): observations to condition on - """ - data = data or {} - self._model = bs.StanModel.from_stan_file(model_file, json.dumps(data)) - - - def log_prob(self, x: np.ndarray) -> float: - """ - Log-probability of the density to be sampled. - Calls out to the httpstan server specified in self._httpstan_url. - Args: - x: 1D array of floats at which the log-probability - is evaluated - - Returns: - log-probability evaluated at x - """ - return self._model.log_density(x, jacobian=False) - - def log_prob_gradient(self, x: np.ndarray) -> np.ndarray: - """ - Gradient of the log-probability of the density to be sampled. - - Args: - x: 1D array of floats at which the log-probability - gradient is evaluated - - Returns: - 1D array of floats containing the flattened - log-probability gradient evaluated at x - """ - _, gradient = self._model.log_density_gradient(x, jacobian=False) - return gradient diff --git a/chainsail_helpers/chainsail_helpers/pdf/stan/bridgestan.py b/chainsail_helpers/chainsail_helpers/pdf/stan/bridgestan.py new file mode 100644 index 0000000..f0fd56a --- /dev/null +++ b/chainsail_helpers/chainsail_helpers/pdf/stan/bridgestan.py @@ -0,0 +1,55 @@ +""" +Wrappers for Chainsail probability densities defined by a Stan model, using BridgeStan +""" + +import json +from typing import Any + +import numpy as np +import bridgestan as bs + + +class BridgeStanPDF: + """ + Chainsail PDF wrapper around BridgeStan (https://github.com/roualdes/bridgestan). + """ + + def __init__(self, model_file: str, data: dict[str, Any] | None=None) -> None: + """ + Initializes a Chainsail-compatible PDF wrapper around the BridgeStan API. + + Args: + model_file: path to a Stan model file + data(dict): observations to condition on + """ + data = data or {} + self._model = bs.StanModel.from_stan_file(model_file, json.dumps(data)) + + + def log_prob(self, x: np.ndarray) -> float: + """ + Log-probability of the density to be sampled. + Calls out to the httpstan server specified in self._httpstan_url. + Args: + x: 1D array of floats at which the log-probability + is evaluated + + Returns: + log-probability evaluated at x + """ + return self._model.log_density(x, jacobian=False) + + def log_prob_gradient(self, x: np.ndarray) -> np.ndarray: + """ + Gradient of the log-probability of the density to be sampled. + + Args: + x: 1D array of floats at which the log-probability + gradient is evaluated + + Returns: + 1D array of floats containing the flattened + log-probability gradient evaluated at x + """ + _, gradient = self._model.log_density_gradient(x, jacobian=False) + return gradient diff --git a/chainsail_helpers/chainsail_helpers/pdf/stan/httpstan.py b/chainsail_helpers/chainsail_helpers/pdf/stan/httpstan.py new file mode 100644 index 0000000..6345813 --- /dev/null +++ b/chainsail_helpers/chainsail_helpers/pdf/stan/httpstan.py @@ -0,0 +1,291 @@ +""" +Wrapper for Chainsail probability densities defined by a Stan model, using httpstan +""" +from __future__ import annotations + +import functools +from typing import Any, Callable + +import numpy as np +import requests + +from chainsail_helpers.pdf import PDF + + +class BaseStanPDF(PDF): + """ + Chainsail PDF wrapper around httpstan + (https://github.com/stan-dev/httpstan). + """ + _HTTPSTAN_URL = "http://localhost:8082" + + def __init__(self, model_code: str, data: dict[str, Any] | None=None) -> None: + """ + Initializes a Chainsail-compatible PDF wrapper around the httpstan + REST API. + + Args: + model_code(string): Stan model specificaton that will be compiled + by httpstan + data(dict): observations to condition on + """ + r = requests.post( + f"{self._HTTPSTAN_URL}/v1/models", + json={"program_code": model_code}, + ) + # if the model did not compile successfully, httpstan returns + # a 400 status code (bad request) + if r.status_code == 400: + raise Exception( + ("Model compilation failed. httpstan message:\n" + f"{r.json()['message']}") + ) + else: + r.raise_for_status() + model_id = r.json()["name"] + self._httpstan_model_route = f"{self._HTTPSTAN_URL}/v1/{model_id}" + self._data = data or {} + + def _query_log_prob(self, x: np.ndarray, + preflight_data_transform: Callable | None=None) -> float: + """ + Uses the httpstan HTTP API to evaluate the model's log-probability. + + Args: + x: 1D array of floats at which the log-likelihood is evaluated + preflight_data_transform: transformation to be applied to the data + before sending it to httpstan + + Returns: + log-probability evaluated at `x` + """ + try: + if preflight_data_transform: + data = preflight_data_transform(self._data) + else: + data = self._data + r = requests.post( + f"{self._httpstan_model_route}/log_prob", + json={ + "unconstrained_parameters": x.tolist(), + "data": data, + "adjust_transform": False, + }, + ) + r.raise_for_status() + return r.json()["log_prob"] + except Exception as e: + raise Exception(f"Querying log-prob failed: Error: {e}") + + def _query_log_prob_gradient(self, x: np.ndarray, preflight_data_transform: Callable | None=None) -> np.ndarray: + """ + Uses the httpstan HTTP API to evaluate the model's log-probability gradient. + + Args: + x: 1D array of floats at which the log-probability's gradient is evaluated + preflight_data_transform: transformation to be applied to the data + before sending it to httpstan + + Returns: + 1D array with log-probability gradient evaluated at `x` + """ + try: + if preflight_data_transform: + data = preflight_data_transform(self._data) + else: + data = self._data + r = requests.post( + f"{self._httpstan_model_route}/log_prob_grad", + json={ + "unconstrained_parameters": x.tolist(), + "data": data, + "adjust_transform": False, + }, + ) + r.raise_for_status() + return np.array(r.json()["log_prob_grad"]) + except Exception as e: + raise Exception(f"Querying log-prob gradient failed: Error: {e}") + + def log_prob(self, x: np.ndarray) -> float: + """ + Log-probability of the density to be sampled. + Calls out to the httpstan server specified in self._httpstan_url. + Args: + x: 1D array of floats at which the log-probability + is evaluated + + Returns: + log-probability evaluated at x + """ + return self._query_log_prob(x) + + def log_prob_gradient(self, x: np.ndarray) -> np.ndarray: + """ + Gradient of the log-probability of the density to be sampled. + Calls out to the httpstan server specified in self._httpstan_url. + + Args: + x: 1D array of floats at which the log-probability + gradient is evaluated + + Returns: + 1D array of floats containing the flattened + log-probability gradient evaluated at x + """ + return self._query_log_prob_gradient(x) + + +class PosteriorStanPDF(BaseStanPDF): + """ + Chainsail PDF wrapper around httpstan + (https://github.com/stan-dev/httpstan). + """ + _HTTPSTAN_URL = "http://localhost:8082" + + def __init__(self, model_code: str, data: dict[str, Any] | None=None) -> None: + """ + Initializes a Chainsail-compatible, likelihood-temperable PDF wrapper around + the httpstan REST API. + + The Stan model has to take the additional "data" in the `data` section: + ``` + int include_prior ; + int include_likelihood ; + ``` + + and in the `model` section, these values need to be used to conditionally switch + on and off the prior and likelihood contributions, like so: + + ``` + if (include_prior) { + param_a ~ SomeDistribution ; + param_b ~ SomeOtherDistribution ; + } ; + + if (include_likelihood) { + data ~ YetAnotherDistribution(param_a, param_b) + } ; + ``` + + Args: + model_code(string): Stan model specification that will be compiled + by httpstan + data(dict): observations to condition on + """ + super().__init__(model_code, data) + + @staticmethod + def _tag_data(data: dict[str, Any], include_prior: bool=True, include_likelihood: bool=True) -> dict[str, Any]: + """ + Adds tags / flags to the data that indicate whether the prior or likelihood should be included. + Args: + data: observations to condition on + include_prior: whether the prior contributions are taken into account in later log-probability + or gradient evaluations + include_likelihood: whether the likelihood is taken into account in later log-probability + or gradient evaluations + + Returns: + data dictionary with additional entries `include_prior` and `include_likelihood`, set to either + 0 or 1. + """ + return {**data, + "include_prior": int(include_prior), + "include_likelihood": int(include_likelihood)} + + def log_likelihood(self, x: np.ndarray) -> float: + """ + Evaluates the log-likelihood of the model. + + Calls out to the httpstan server specified in self._httpstan_url with + datums `include_prior` set to `0` and `include_likelihood` set to `1`. + + Args: + x: 1D array of floats at which the log-likelihood + is evaluated + + Returns: + log-likelihood evaluated at x + """ + preflight_data_transform = functools.partial(self._tag_data, include_prior=False) + return self._query_log_prob(x, preflight_data_transform) + + def log_prior(self, x: np.ndarray) -> float: + """ + Evaluates the log-prior probability of the model. + + Calls out to the httpstan server specified in self._httpstan_url with + datums `include_prior` set to `1` and `include_likelihood` set to `0`. + + Args: + x: 1D array of floats at which the log-prior probability + is evaluated + + Returns: + log-prior probability evaluated at x + """ + preflight_data_transform = functools.partial(self._tag_data, include_likelihood=False) + return self._query_log_prob(x, preflight_data_transform) + + def log_prob(self, x: np.ndarray) -> float: + """ + Log-probability of the density to be sampled. + Calls out to the httpstan server specified in self._httpstan_url. + Args: + x: 1D array of floats at which the log-probability + is evaluated + + Returns: + log-probability evaluated at x + """ + return self._query_log_prob(x) + + def log_likelihood_gradient(self, x: np.ndarray) -> np.ndarray: + """ + Evaluates the gradient of the log-likelihood of the model. + Calls out to the httpstan server specified in self._httpstan_url + with datums `include_prior` set to `0` and `include_likelihood` set to `1`. + + Args: + x: 1D array of floats at which the gradient of the + log-likelihood is evaluated + + Returns: + 1D array of floats containing the flattened + gradient of the log-likelihood evaluated at x + """ + preflight_data_transform = functools.partial(self._tag_data, include_prior=False) + return self._query_log_prob_gradient(x, preflight_data_transform) + + def log_prior_gradient(self, x: np.ndarray) -> np.ndarray: + """ + Evaluates the gradient of the log-prior density of the model. + Calls out to the httpstan server specified in self._httpstan_url + with datums `include_prior` set to `1` and `include_likelihood` set to `0`. + + Args: + x: 1D array of floats at which the gradient of the + log-prior density is evaluated + + Returns: + 1D array of floats containing the flattened + gradient of the log-prior density evaluated at x + """ + preflight_data_transform = functools.partial(self._tag_data, include_likelihood=False) + return self._query_log_prob_gradient(x, preflight_data_transform) + + def log_prob_gradient(self, x: np.ndarray) -> np.ndarray: + """ + Gradient of the log-probability of the density to be sampled. + Calls out to the httpstan server specified in self._httpstan_url. + + Args: + x: 1D array of floats at which the log-probability + gradient is evaluated + + Returns: + 1D array of floats containing the flattened + log-probability gradient evaluated at x + """ + return self._query_log_prob_gradient(x) diff --git a/examples/bridgestan-mixture/probability.py b/examples/bridgestan-mixture/probability.py index d1db3a2..8b6fa6f 100644 --- a/examples/bridgestan-mixture/probability.py +++ b/examples/bridgestan-mixture/probability.py @@ -5,7 +5,7 @@ import numpy as np -from chainsail_helpers.pdf.stan import BridgeStanPDF +from chainsail_helpers.pdf.stan.bridgestan import BridgeStanPDF pdf = BridgeStanPDF("mixture.stan") initial_states = np.array([[1.0]]) diff --git a/examples/stan-mixture/README.md b/examples/stan-mixture/README.md index 5508b31..2ccccf6 100644 --- a/examples/stan-mixture/README.md +++ b/examples/stan-mixture/README.md @@ -1,3 +1,3 @@ # Stan Gaussian mixture This is an example of how to wrap a model defined in the [Stan](https://mc-stan.org) language for use with Chainsail. -Chainsail internally has a [httpstan](https://github.com/stan-dev/httpstan) server running that is being accessed by the Stan PDF wrapper defined in `chainsail_utils.pdf.stan`. +Chainsail internally has a [httpstan](https://github.com/stan-dev/httpstan) server running that is being accessed by the Stan PDF wrapper defined in `chainsail_utils.pdf.stan.httpstan`. diff --git a/examples/stan-mixture/probability.py b/examples/stan-mixture/probability.py index 233fca9..c50f071 100644 --- a/examples/stan-mixture/probability.py +++ b/examples/stan-mixture/probability.py @@ -4,7 +4,7 @@ import numpy as np -from chainsail_helpers.pdf.stan import BaseStanPDF +from chainsail_helpers.pdf.stan.httpstan import BaseStanPDF model_code = """ parameters { From 90367db949ff431624686fa31608ee6a514568bc Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 19 Jul 2023 14:12:23 +0200 Subject: [PATCH 5/7] Bump `chainsail-helpers` version --- chainsail_helpers/pyproject.toml | 2 +- examples/nix/chainsail_helpers.nix | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/chainsail_helpers/pyproject.toml b/chainsail_helpers/pyproject.toml index 1d039bb..a6b3cd1 100644 --- a/chainsail_helpers/pyproject.toml +++ b/chainsail_helpers/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "chainsail-helpers" -version = "0.1.5.1" +version = "0.2.0.0" description = "Probability distribution interfaces, examples, and utilities for the Chainsail sampling service" authors = ["Simeon Carstens "] license = "MIT" diff --git a/examples/nix/chainsail_helpers.nix b/examples/nix/chainsail_helpers.nix index 23627ee..444bfbb 100644 --- a/examples/nix/chainsail_helpers.nix +++ b/examples/nix/chainsail_helpers.nix @@ -1,11 +1,15 @@ { buildPythonPackage, fetchPypi, numpy }: buildPythonPackage rec { pname = "chainsail-helpers"; - version = "0.1.5.1"; + version = "0.2.0.0"; src = fetchPypi { inherit pname version; - sha256 = "2e69fa0ed98aa1e4024ac66fb2b212ad81bb0ca71792da029b49f45c73769dcd"; + sha256 = "TODO after PyPi publishing"; }; doCheck = false; + # `chainsail-helpers` now also depends on BridgeStan + # (https://github.com/roualdes/bridgestan), but that's not + # Nix-packaged yet. But you can use everything in `chainsail-helper` + # other than the chainsail_helpers.pdf.stan.bridgestan module. propagatedBuildInputs = [ numpy ]; } From bb84c2d95330e15abc753f2374ca27403239e29a Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 19 Jul 2023 14:12:34 +0200 Subject: [PATCH 6/7] Bump minimum Python version for `chainsail-helpers` This is for BridgeStan compatibility. BridgeStan requires at least Python 3.9. --- chainsail_helpers/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chainsail_helpers/pyproject.toml b/chainsail_helpers/pyproject.toml index a6b3cd1..1c8a56c 100644 --- a/chainsail_helpers/pyproject.toml +++ b/chainsail_helpers/pyproject.toml @@ -14,7 +14,7 @@ packages = [ { include = 'chainsail_helpers' } ] concatenate-samples = 'chainsail_helpers.scripts.concatenate_samples:main' [tool.poetry.dependencies] -python = ">=3.8" +python = ">=3.9" numpy = "^1.21.2" pymc = { version = "^4.1.4", optional = true } requests = { version = "^2.26.0", optional = true } From f96f0e3135a515f961d70cabc1f188d248abb2a3 Mon Sep 17 00:00:00 2001 From: Simeon Carstens Date: Wed, 19 Jul 2023 14:30:44 +0200 Subject: [PATCH 7/7] Update documentation --- documentation/algorithms/replica_exchange.md | 4 ++-- documentation/defining_custom_probability.md | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/documentation/algorithms/replica_exchange.md b/documentation/algorithms/replica_exchange.md index 94e4263..adfc1ad 100644 --- a/documentation/algorithms/replica_exchange.md +++ b/documentation/algorithms/replica_exchange.md @@ -30,8 +30,8 @@ $$ p(x|D, \beta) \propto p(D|x)^\beta \times p(x) $$ For $\beta=1$, we sample from the full posterior, while for $0 < \beta \ll 1$ the likelihood and thus the influence of the data are essentially switched off. -Note that likelihood tempering is currently not supported for the [PyMC interface](https://github.com/tweag/chainsail-resources/tree/main/chainsail_helpers/chainsail_helpers/pdf/pymc/__init__.py). -When using likelihood tempering with the Stan interface, note that you have to make small modifications to your Stan model definition, as explained in the documentation for the [`PosteriorStanPDF`] class in the [Stan wrapper](https://github.com/tweag/chainsail-resources/tree/main/chainsail_helpers/chainsail_helpers/pdf/stan/__init__.py). +Note that likelihood tempering is currently neither supported for the [PyMC interface](https://github.com/tweag/chainsail-resources/tree/main/chainsail_helpers/chainsail_helpers/pdf/pymc/__init__.py) nor for the [BridgeStan wrapper]((https://github.com/tweag/chainsail-resources/tree/main/chainsail_helpers/chainsail_helpers/pdf/stan/bridgestan.py)). +When using likelihood tempering with the Stan interface, note that you have to make small modifications to your Stan model definition, as explained in the documentation for the [`PosteriorStanPDF`] class in the [`httpstan` wrapper](https://github.com/tweag/chainsail-resources/tree/main/chainsail_helpers/chainsail_helpers/pdf/stan/httpstan.py). ## Choice of inverse temperatures If, for two neighboring (in the schedule sense) replicas, the values for $\beta$ are too different, exchanges between those replicas are unlikely to be accepted. diff --git a/documentation/defining_custom_probability.md b/documentation/defining_custom_probability.md index 5c4a6ea..c2f03c2 100644 --- a/documentation/defining_custom_probability.md +++ b/documentation/defining_custom_probability.md @@ -3,14 +3,15 @@ The [`chainsail-helpers` package](./chainsail_helpers/README.md) provides the abstract interface (in [`chainsail_helpers.pdf`](./chainsail_helpers/chainsail_helpers/pdf/__init__.py). You have three options: - code up your probability distribution yourself by subclassing either the general PDF or the posterior PDF interface. In that case, make sure to specify any Python dependencies you might require during the job submission step. - in case you happen to already have your statistical model formulated in [PyMC](https://docs.pymc.io), you can use the [PyMC wrapper](./chainsail_helpers/chainsail_helpers/pdf/pymc/__init__.py). An example is provided [here](./examples/pymc-mixture/probability.py). - - if you formulated your model in [Stan](https://mc-stan.org), use the [Stan wrapper](./chainsail_helpers/chainsail_helpers/pdf/stan/__init__.py) we provide. It talks to a Chainsail-internal [`httpstan`](https://github.com/stan-dev/httpstan) server and might thus be a bit slow. Also see the [example](./examples/stan-mixture/probability.py). + - if you formulated your model in [Stan](https://mc-stan.org), use one of the [Stan wrappers](./chainsail_helpers/chainsail_helpers/pdf/stan/) we provide. We recommend you use the BridgeStan wrapper; the `httpstan` wrapper is very slow. Also see the [Bridgestan wrapper rexample](./examples/bridgestan-mixture/probability.py) and the [`httpstan` wrapper rexample](./examples/stan-mixture/probability.py). Follow these steps when defining your own probability distribution: 1. make an instance of your PDF available as an object with name `pdf` in a file called `probability.py` and furthermore provide a flat `numpy` array called `initial_states` in the same file which holds the initial state for the MCMC samplers. Note that all Python dependencies you use have to be entered into the corresponding field on the job submission form. Chainsail comes with a couple of standard dependencies preinstalled: - `numpy` (version 1.23.2), - `scipy` (version 1.9.1), - `pymc` (version 4.1.7), - - `chainsail-helpers` (version 0.1.4) + - `chainsail-helpers` (version 0.1.4), + - `bridgestan` (version 2.1.1) 2. test whether your PDF implementation actually works by calling its `log_{prob / likelihood, prior}` and `log_{prob / likelihood, prior}_gradient` methods with your `initial_states` as an argument. We will provide an automated way to easily test this later. -3. prepare a zip file of your `probability.py` and any other file dependencies your code may have (e.g., data files) and make sure that you don't have subdirectories and that this structure matches how you access these files in your `probability.py`. +3. prepare a zip file of your `probability.py` and any other file dependencies your code may have (e.g., data files, Stan model) and make sure that you don't have subdirectories and that this structure matches how you access these files in your `probability.py`. 4. upload that zip file in the job submission form.