Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
74d8ca2
add scalers for tensors
kevinyang-cky Dec 22, 2025
858c5ec
modified print_scaler() to convert tensors to NumPy arrays
kevinyang-cky Dec 23, 2025
830d00c
modified read_scaler() to convert NumPy arrays to tensors
kevinyang-cky Dec 23, 2025
bede810
add PyTorch check and public API
kevinyang-cky Jan 2, 2026
6a0d1e2
remove PyTorch check (already in __init__.py)
kevinyang-cky Jan 2, 2026
0c7ccb4
revert changes to backend.py
kevinyang-cky Jan 5, 2026
9b738de
modify PyTorch check and import backend for tensors
kevinyang-cky Jan 5, 2026
7dbea59
add PyTorch hard check
kevinyang-cky Jan 5, 2026
b5d18f5
backend methods for tensors
kevinyang-cky Jan 5, 2026
9b9a4aa
modify the require version and conditional torch imoports
kevinyang-cky Jan 5, 2026
ca18ee2
modify the required version
kevinyang-cky Jan 5, 2026
2ba8785
tensors placement
kevinyang-cky Feb 2, 2026
a7d255f
code optimization: avoid for-looping
kevinyang-cky Feb 4, 2026
8bc88d6
add path to environment
kevinyang-cky Feb 4, 2026
5a3d33f
installing in editable mode
kevinyang-cky Feb 4, 2026
37c4101
specify uv version
kevinyang-cky Feb 5, 2026
7ed3595
fix syntax error
kevinyang-cky Feb 5, 2026
262423b
downgrade Python version
kevinyang-cky Feb 5, 2026
bb87cb0
try this version combination
kevinyang-cky Feb 5, 2026
9d8466b
use Python 3.9 in GitHub workflow and latest uv
kevinyang-cky Feb 5, 2026
9eea944
fix syntax for backward compatibility
kevinyang-cky Feb 5, 2026
18d1876
revert changes to pass workflow runs
kevinyang-cky Feb 5, 2026
d192628
Pin Pandas and NumPy versions per Katelyn's suggestion
kevinyang-cky Feb 5, 2026
5775e2c
include attribute variable_names into operations for DStandardScalerT…
kevinyang-cky Feb 12, 2026
03c950e
include attribute variable_names into operations for DMinMaxScalerTen…
kevinyang-cky Feb 13, 2026
37e0877
fix columns check in DMinMaxScalerTensor()
kevinyang-cky Feb 13, 2026
3c615e8
modified variable_names attribute decoding
kevinyang-cky Feb 13, 2026
c7c623a
Merge branch 'modify_backend4tensors'
kevinyang-cky Feb 13, 2026
bb095b0
modify package_transformed_x() to accomodate input data without attri…
kevinyang-cky Feb 13, 2026
d77c83f
Merge branch 'add_attribute'
kevinyang-cky Feb 13, 2026
a4ec6c6
allow the data to be transform to have less variables than the scaler
kevinyang-cky Feb 13, 2026
d2afae5
Merge branch 'add_attribute'
kevinyang-cky Feb 13, 2026
614acde
uncommont code
kevinyang-cky Feb 13, 2026
73b5a6b
Merge branch 'add_attribute'
kevinyang-cky Feb 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 48 additions & 1 deletion bridgescaler/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,51 @@
from importlib.metadata import version, PackageNotFoundError

from packaging.version import Version


# 1. PyTorch Checks
REQUIRED_TORCH_VERSION = Version("2.0.0")

def get_torch_status() -> tuple[bool, Version | None]:
try:
return True, Version(version("torch"))
except PackageNotFoundError:
return False, None

TORCH_AVAILABLE, TORCH_VERSION = get_torch_status()

def require_torch() -> None:
if not TORCH_AVAILABLE:
raise ImportError("PyTorch is required but not installed")
if TORCH_VERSION < REQUIRED_TORCH_VERSION:
raise RuntimeError(
f"PyTorch >= {REQUIRED_TORCH_VERSION} required; found {TORCH_VERSION}"
)

# 2. Base Imports
from .backend import save_scaler, load_scaler, print_scaler, read_scaler
from .group import GroupStandardScaler, GroupRobustScaler, GroupMinMaxScaler
from .deep import DeepStandardScaler, DeepMinMaxScaler, DeepQuantileTransformer
from .distributed import DStandardScaler, DMinMaxScaler, DQuantileScaler
from .distributed import (DStandardScaler, DMinMaxScaler, DQuantileScaler)

# 3. Conditional Torch Imports
if TORCH_AVAILABLE:
try: # Ensure that no errors are raised if PyTorch is installed but does not meet the required version.
from .distributed_tensor import (
DStandardScalerTensor,
DMinMaxScalerTensor,
)
from .backend_tensor import print_scaler_tensor, read_scaler_tensor
except:
pass

# 4. Define Public API
__all__ = [
# Utilities
"save_scaler", "load_scaler", "print_scaler", "read_scaler",
"TORCH_AVAILABLE",
# Scalers
"GroupStandardScaler", "GroupRobustScaler", "GroupMinMaxScaler",
"DeepStandardScaler", "DeepMinMaxScaler", "DeepQuantileTransformer",
"DStandardScaler", "DMinMaxScaler", "DQuantileScaler",
]
2 changes: 1 addition & 1 deletion bridgescaler/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,4 +156,4 @@ def create_synthetic_data():
for l in range(locs.shape[0]):
x_data_dict[names[l]] = np.random.normal(loc=locs[l], scale=scales[l], size=num_examples)
x_data = pd.DataFrame(x_data_dict)
return x_data
return x_data
43 changes: 43 additions & 0 deletions bridgescaler/backend_tensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from . import require_torch
require_torch() # enforce torch availability/version at import time
import torch

import json
import numpy as np

from bridgescaler.distributed_tensor import DStandardScalerTensor, DMinMaxScalerTensor
from .backend import NumpyEncoder, object_hook


scaler_objs = {"DStandardScalerTensor": DStandardScalerTensor,
"DMinMaxScalerTensor": DMinMaxScalerTensor,
}


def print_scaler_tensor(scaler):
"""
Modify the print_scaler() in backend.py for tensors.
"""
scaler_params = scaler.__dict__
scaler_params["type"] = str(type(scaler))[1:-2].split(".")[-1]

for keys in scaler_params:
if type(scaler_params[keys]) == torch.Tensor:
scaler_params[keys] = scaler_params[keys].cpu().numpy().copy()

return json.dumps(scaler_params, indent=4, sort_keys=True, cls=NumpyEncoder)


def read_scaler_tensor(scaler_str):
"""
Modify the read_scaler() in backend.py for tensors.
"""
scaler_params = json.loads(scaler_str, object_hook=object_hook)
scaler = scaler_objs[scaler_params["type"]]()
del scaler_params["type"]
for k, v in scaler_params.items():
if k == "x_columns_":
setattr(scaler, k, v)
else:
setattr(scaler, k, torch.tensor(v))
return scaler
Loading