-
Notifications
You must be signed in to change notification settings - Fork 275
Speed up Constant @ MatrixExpr
#1159
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Zeroto521
wants to merge
11
commits into
scipopt:master
Choose a base branch
from
Zeroto521:issue/1153
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+169
−17
Open
Changes from all commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
ec229d1
Add numpy as build dependency and enhance matrix operations
Zeroto521 a38d790
Add and update matrix dot product tests
Zeroto521 de9a208
Replace MatrixExpr type checks with np.ndarray
Zeroto521 b42c337
Remove redundant 'out' handling in __array_ufunc__
Zeroto521 7b99d72
Update CHANGELOG.md
Zeroto521 8154fbc
Remove custom __matmul__ from MatrixExpr
Zeroto521 880f850
Refactor matrix multiplication logic in MatrixExpr
Zeroto521 035fbe2
Update matrix matmul return type tests
Zeroto521 99e7cb3
Remove `_is_num_dt`
Zeroto521 1e82cef
Enhance MatrixExpr __array_ufunc__ with type hints and docs
Zeroto521 6237880
Merge branch 'master' into issue/1153
Zeroto521 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,7 +3,7 @@ | |
| # TODO Add tests | ||
| """ | ||
|
|
||
| from typing import Optional, Tuple, Union | ||
| from typing import Literal, Optional, Tuple, Union | ||
| import numpy as np | ||
| try: | ||
| # NumPy 2.x location | ||
|
|
@@ -12,6 +12,10 @@ except ImportError: | |
| # Fallback for NumPy 1.x | ||
| from numpy.core.numeric import normalize_axis_tuple | ||
|
|
||
| cimport numpy as cnp | ||
|
|
||
| cnp.import_array() | ||
Joao-Dionisio marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
|
|
||
| def _is_number(e): | ||
| try: | ||
|
|
@@ -51,6 +55,48 @@ def _matrixexpr_richcmp(self, other, op): | |
|
|
||
| class MatrixExpr(np.ndarray): | ||
|
|
||
| def __array_ufunc__( | ||
| self, | ||
| ufunc: np.ufunc, | ||
| method: Literal["__call__", "reduce", "reduceat", "accumulate", "outer", "at"], | ||
| *args, | ||
| **kwargs, | ||
| ): | ||
| """ | ||
| Customizes the behavior of NumPy ufuncs for MatrixExpr. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| ufunc : numpy.ufunc | ||
| The ufunc object that was called. | ||
|
|
||
| method : {"__call__", "reduce", "reduceat", "accumulate", "outer", "at"} | ||
| A string indicating which UFunc method was called. | ||
|
|
||
| *args : tuple | ||
| The input arguments to the ufunc. | ||
|
|
||
| **kwargs : dict | ||
| Additional keyword arguments to the ufunc. | ||
|
|
||
| Returns | ||
| ------- | ||
| Expr, GenExpr, MatrixExpr | ||
| The result of the ufunc operation is wrapped back into a MatrixExpr if | ||
| applicable. | ||
|
|
||
| """ | ||
| res = NotImplemented | ||
| if method == "__call__": # Standard ufunc call, e.g., np.add(a, b) | ||
| if ufunc in {np.matmul, np.dot}: | ||
| res = _core_dot(_ensure_array(args[0]), _ensure_array(args[1])) | ||
|
|
||
| if res is NotImplemented: | ||
| # Unboxing MatrixExpr to stop __array_ufunc__ recursion | ||
| args = tuple(_ensure_array(arg) for arg in args) | ||
| res = super().__array_ufunc__(ufunc, method, *args, **kwargs) | ||
| return res.view(MatrixExpr) if isinstance(res, np.ndarray) else res | ||
|
|
||
| def sum( | ||
| self, | ||
| axis: Optional[Union[int, Tuple[int, ...]]] = None, | ||
|
|
@@ -145,8 +191,6 @@ class MatrixExpr(np.ndarray): | |
| def __rsub__(self, other): | ||
| return super().__rsub__(other).view(MatrixExpr) | ||
|
|
||
| def __matmul__(self, other): | ||
| return super().__matmul__(other).view(MatrixExpr) | ||
|
|
||
| class MatrixGenExpr(MatrixExpr): | ||
| pass | ||
|
|
@@ -161,3 +205,86 @@ class MatrixExprCons(np.ndarray): | |
|
|
||
| def __eq__(self, other): | ||
| raise NotImplementedError("Cannot compare MatrixExprCons with '=='.") | ||
|
|
||
|
|
||
| cdef inline _ensure_array(arg, bool convert_scalar = True): | ||
| if isinstance(arg, np.ndarray): | ||
| return arg.view(np.ndarray) | ||
| elif isinstance(arg, (list, tuple)): | ||
| return np.asarray(arg) | ||
| return np.array(arg, dtype=object) if convert_scalar else arg | ||
|
|
||
|
|
||
| def _core_dot(cnp.ndarray a, cnp.ndarray b) -> Union[Expr, np.ndarray]: | ||
| """ | ||
| Perform matrix multiplication between a N-Demension constant array and a N-Demension | ||
| `np.ndarray` of type `object` and containing `Expr` objects. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| a : np.ndarray | ||
| A constant n-d `np.ndarray` of type `np.float64`. | ||
|
|
||
| b : np.ndarray | ||
| A n-d `np.ndarray` of type `object` and containing `Expr` objects. | ||
|
|
||
| Returns | ||
| ------- | ||
| Expr or np.ndarray | ||
| If both `a` and `b` are 1-D arrays, return an `Expr`, otherwise return a | ||
| `np.ndarray` of type `object` and containing `Expr` objects. | ||
| """ | ||
| cdef bool a_is_1d = a.ndim == 1 | ||
| cdef bool b_is_1d = b.ndim == 1 | ||
| cdef cnp.ndarray a_nd = a[..., np.newaxis, :] if a_is_1d else a | ||
| cdef cnp.ndarray b_nd = b[..., :, np.newaxis] if b_is_1d else b | ||
| cdef bool a_is_num = a_nd.dtype.kind in "fiub" | ||
|
|
||
| if a_is_num ^ (b_nd.dtype.kind in "fiub"): | ||
| res = _core_dot_2d(a_nd, b_nd) if a_is_num else _core_dot_2d(b_nd.T, a_nd.T).T | ||
| if a_is_1d and b_is_1d: | ||
| return res.item() | ||
| if a_is_1d: | ||
| return res.reshape(np.delete(res.shape, -2)) | ||
| if b_is_1d: | ||
| return res.reshape(np.delete(res.shape, -1)) | ||
| return res | ||
| return NotImplemented | ||
|
|
||
|
|
||
| @np.vectorize(otypes=[object], signature="(m,n),(n,p)->(m,p)") | ||
| def _core_dot_2d(cnp.ndarray a, cnp.ndarray x) -> np.ndarray: | ||
|
Comment on lines
+255
to
+256
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I tried a custom Cython loop. But the performance is equal to cdef cnp.ndarray _core_dot_nd(cnp.ndarray a, cnp.ndarray b):
a_2d = a[None, :] if a.ndim == 1 else a
b_2d = b[:, None] if b.ndim == 1 else b
cdef tuple shape = np.broadcast_shapes(a_2d.shape[:-2], b_2d.shape[:-2])
cdef int m = a_2d.shape[-2]
cdef int k = a_2d.shape[-1]
cdef int p = b_2d.shape[-1]
cdef int batch_size = 1
cdef int i
for i in shape: batch_size *= i
cdef cnp.ndarray[object, ndim=1] res_batches = np.empty(batch_size, dtype=object)
cdef cnp.ndarray a_flat = np.broadcast_to(a_2d, shape + (m, k)).reshape(-1, m, k)
cdef cnp.ndarray b_flat = np.broadcast_to(b_2d, shape + (k, p)).reshape(-1, k, p)
for i in range(batch_size): res_batches[i] = _core_dot_2d(a_flat[i], b_flat[i])
cdef cnp.ndarray res = np.stack(res_batches).reshape(shape + (m, p))
if a.ndim == 1: res = res.squeeze(-2)
if b.ndim == 1: res = res.squeeze(-1)
return res
cdef cnp.ndarray _core_dot_2d(cnp.ndarray a, cnp.ndarray x):
if not a.flags.c_contiguous or a.dtype != np.float64:
a = np.ascontiguousarray(a, dtype=np.float64)
cdef const double[:, :] a_2d = a
cdef int m = a.shape[0], n = a.shape[1]
cdef int k = x.shape[1] if x.ndim > 1 else 1
cdef cnp.ndarray x_2d = x.reshape(n, k)
cdef cnp.ndarray[object, ndim=2] res = np.zeros((m, k), dtype=object)
cdef Py_ssize_t[:] nz_idx = np.empty(n, dtype=np.intp)
cdef double[:] nz_val = np.empty(n, dtype=np.float64)
cdef int i, j, l, nz_count
cdef list exprs
for i in range(m):
nz_count = 0
for l in range(n):
if a_2d[i, l] != 0:
nz_idx[nz_count] = l
nz_val[nz_count] = a_2d[i, l]
nz_count += 1
if nz_count == 0:
continue
for j in range(k):
if nz_count == 1:
res[i, j] = nz_val[0] * x_2d[nz_idx[0], j]
else:
exprs = []
for l in range(nz_count):
exprs.append(nz_val[l] * x_2d[nz_idx[l], j])
res[i, j] = quicksum(exprs)
return res |
||
| """ | ||
| Perform matrix multiplication between a 2-Demension constant array and a 2-Demension | ||
| `np.ndarray` of type `object` and containing `Expr` objects. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| a : np.ndarray | ||
| A 2-D `np.ndarray` of type `np.float64`. | ||
|
|
||
| x : np.ndarray | ||
| A 2-D `np.ndarray` of type `object` and containing `Expr` objects. | ||
|
|
||
| Returns | ||
| ------- | ||
| np.ndarray | ||
| A 2-D `np.ndarray` of type `object` and containing `Expr` objects. | ||
| """ | ||
| if not a.flags.c_contiguous or a.dtype != np.float64: | ||
Zeroto521 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| a = np.ascontiguousarray(a, dtype=np.float64) | ||
|
|
||
| cdef const double[:, :] a_view = a | ||
| cdef int m = a.shape[0], k = x.shape[1] | ||
| cdef cnp.ndarray[object, ndim=2] res = np.zeros((m, k), dtype=object) | ||
| cdef Py_ssize_t[:] nonzero | ||
| cdef int i, j, idx | ||
|
|
||
| for i in range(m): | ||
| if (nonzero := np.flatnonzero(a_view[i, :])).size == 0: | ||
| continue | ||
|
|
||
| for j in range(k): | ||
| res[i, j] = quicksum(a_view[i, idx] * x[idx, j] for idx in nonzero) | ||
|
|
||
| return res | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.