Skip to content

Commit d56f70e

Browse files
committed
fixed orthogonal and mainly shortened docstrings
1 parent 8890aa8 commit d56f70e

1 file changed

Lines changed: 36 additions & 136 deletions

File tree

src/scikit_opls/_orthogonal.py

Lines changed: 36 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,4 @@
1-
"""OSC-style orthogonal filtering for OPLS.
2-
3-
The public OPLS estimator passes a preprocessed ``X`` and a single centered
4-
response ``y``. The low-level predictive-direction helper also accepts a
5-
multivariate response block for tests and internal reuse, but OPLS itself is
6-
univariate.
7-
"""
1+
"""OSC-style orthogonal filtering primitives for OPLS."""
82

93
from __future__ import annotations
104

@@ -30,23 +24,8 @@ def _validate_n_components(n_components: int) -> int:
3024
class OrthogonalComponents:
3125
"""Result of :func:`opls_filter`.
3226
33-
Attributes
34-
----------
35-
x_ortho_weights : ndarray of shape (n_features, n_components)
36-
Orthogonal weight vectors ``w_o``.
37-
x_ortho_scores : ndarray of shape (n_samples, n_components)
38-
Orthogonal scores ``t_o``.
39-
x_ortho_loadings : ndarray of shape (n_features, n_components)
40-
Orthogonal loadings ``p_o``.
41-
x_filtered : ndarray of shape (n_samples, n_features)
42-
``X`` with the orthogonal variation removed.
43-
x_predictive_weight : ndarray of shape (n_features,)
44-
Normalised predictive weight direction ``w_p`` when computed. For
45-
``opls_filter(..., n_components=0)``, this is a zero vector because the
46-
predictive direction is unused.
47-
n_components : int
48-
Number of orthogonal components actually extracted (may be smaller than
49-
requested if ``X`` ran out of orthogonal variation).
27+
``n_components`` may be smaller than requested if ``X`` ran out of orthogonal
28+
variation. ``x_predictive_weight`` is a zero vector when ``n_components=0``.
5029
"""
5130

5231
x_ortho_weights: NDArray[np.float64]
@@ -58,36 +37,17 @@ class OrthogonalComponents:
5837

5938

6039
def predictive_weight(X: ArrayLike, Y: ArrayLike) -> NDArray[np.float64]:
61-
"""Leading joint X–Y direction (unit norm).
62-
63-
Generalises ``w_p ∝ Xᵀy`` to multivariate ``Y`` via the dominant left singular
64-
vector of ``S = Xᵀ Y``. For a single-column ``Y`` this reduces exactly to the
65-
normalised ``Xᵀy`` (up to sign), so single-``y`` OPLS is unchanged. The public
66-
OPLS estimator passes a single centered response; multivariate ``Y`` is
67-
accepted here only because the predictive-direction computation is
68-
block-agnostic.
69-
70-
Parameters
71-
----------
72-
X : ndarray of shape (n_samples, n_features)
73-
Preprocessed predictor matrix.
74-
Y : ndarray of shape (n_samples,) or (n_samples, n_targets)
75-
Centered response(s).
76-
77-
Returns
78-
-------
79-
w_p : ndarray of shape (n_features,)
80-
Unit-normalised predictive direction.
81-
82-
Raises
83-
------
84-
ValueError
85-
If ``X`` is orthogonal to ``Y`` (the predictive direction is undefined).
40+
"""Return the unit X-side direction of maximal X/Y covariance.
41+
42+
For univariate ``Y`` this is normalized ``X.T @ y``; for multivariate ``Y``,
43+
it is the leading left singular vector of ``X.T @ Y``.
8644
"""
8745
X = np.asarray(X, dtype=np.float64)
8846
Y = np.asarray(Y, dtype=np.float64)
8947
if X.ndim != 2:
9048
raise ValueError(f"X must be 2D, got shape {X.shape}")
49+
if X.shape[1] == 0:
50+
raise ValueError("X must contain at least one feature.")
9151
if not np.all(np.isfinite(X)):
9252
raise ValueError("X must contain only finite values.")
9353
if not np.all(np.isfinite(Y)):
@@ -142,37 +102,17 @@ def orthogonal_filter(
142102
predictive_direction: NDArray[np.float64],
143103
n_components: int,
144104
) -> OrthogonalComponents:
145-
"""Remove up to ``n_components`` directions in ``block`` orthogonal to a given one.
146-
147-
OSC-style deflation of one block against a supplied predictive direction
148-
(passed in rather than computed from ``y``), as used by OPLS.
149-
150-
Parameters
151-
----------
152-
block : ndarray of shape (n_samples, n_features)
153-
Preprocessed block to deflate.
154-
predictive_direction : ndarray of shape (n_features,)
155-
Unit-norm direction defining the predictive subspace to preserve.
156-
n_components : int
157-
Number of orthogonal components to extract.
158-
159-
Returns
160-
-------
161-
components : OrthogonalComponents
162-
Fitted orthogonal weights/scores/loadings, the filtered block, the
163-
predictive direction, and the number of components actually extracted.
164-
165-
Notes
166-
-----
167-
For each component: form the predictive score ``t = X w_p``, its loading
168-
``p = Xᵀt / (tᵀt)``, then the part of ``p`` orthogonal to ``w_p`` becomes the
169-
orthogonal weight ``w_o``. The orthogonal score ``t_o = X w_o`` and loading
170-
``p_o = Xᵀt_o / (t_oᵀt_o)`` are deflated out: ``X <- X - t_o p_oᵀ``.
105+
"""Sequentially deflate block variation orthogonal to a predictive direction.
106+
107+
The supplied predictive direction is normalized defensively. Fewer components
108+
may be returned if no numerically resolvable orthogonal variation remains.
171109
"""
172110
n_components = _validate_n_components(n_components)
173111
X = np.asarray(block, dtype=np.float64)
174112
if X.ndim != 2:
175113
raise ValueError(f"block must be 2D, got shape {X.shape}")
114+
if X.shape[1] == 0:
115+
raise ValueError("block must contain at least one feature.")
176116
w_pred = np.asarray(predictive_direction, dtype=np.float64).ravel()
177117
n_samples, n_features = X.shape
178118
if w_pred.shape != (n_features,):
@@ -184,17 +124,15 @@ def orthogonal_filter(
184124
if not np.all(np.isfinite(w_pred)):
185125
raise ValueError("predictive_direction must contain only finite values.")
186126

187-
# The deflation math below assumes a unit-norm predictive direction. Normalise it
188-
# defensively in case a caller passes an un-normalised direction. The direction is
189-
# unused when no components are requested, so only guard/normalise when it matters.
190-
if n_components > 0:
191-
w_norm = float(np.linalg.norm(w_pred))
192-
if w_norm <= np.finfo(np.float64).tiny:
193-
raise ValueError(
194-
"predictive_direction must be numerically non-zero when "
195-
"n_components > 0."
196-
)
127+
# Normalize defensively; the deflation formulas assume unit-norm w_pred. Only
128+
# raise on a zero direction when it will actually be used (n_components > 0).
129+
w_norm = float(np.linalg.norm(w_pred))
130+
if w_norm > np.finfo(np.float64).tiny:
197131
w_pred = w_pred / w_norm
132+
elif n_components > 0:
133+
raise ValueError(
134+
"predictive_direction must be numerically non-zero when n_components > 0."
135+
)
198136

199137
W = np.zeros((n_features, n_components))
200138
T = np.zeros((n_samples, n_components))
@@ -211,11 +149,9 @@ def orthogonal_filter(
211149
tt = float(t @ t)
212150
if tt <= _TOL * res_norm_sq:
213151
break
214-
# ``p`` describes how the current residual X loads onto the predictive
215-
# score. Removing its projection onto w_pred leaves only X variation
216-
# orthogonal to the predictive direction.
217152
p = X_res.T @ t / tt
218-
w_o = p - float(w_pred @ p) * w_pred # part of the loading orthogonal to w_pred
153+
# Remove the predictive-direction part of p to obtain an orthogonal weight.
154+
w_o = p - float(w_pred @ p) * w_pred
219155
w_norm = float(np.linalg.norm(w_o))
220156
p_norm = float(np.linalg.norm(p))
221157
if w_norm <= _TOL * p_norm:
@@ -226,8 +162,7 @@ def orthogonal_filter(
226162
if too <= _TOL * res_norm_sq:
227163
break
228164
p_o = X_res.T @ t_o / too
229-
# Sequentially deflate the fitted orthogonal score/loading pair so the next
230-
# component is extracted from the remaining X variation.
165+
# Deflate before extracting the next orthogonal component.
231166
X_res -= np.outer(t_o, p_o)
232167
W[:, i] = w_o
233168
T[:, i] = t_o
@@ -254,34 +189,13 @@ def orthogonal_filter(
254189

255190

256191
def opls_filter(X: ArrayLike, Y: ArrayLike, n_components: int) -> OrthogonalComponents:
257-
"""OPLS X-orthogonal filter: predictive direction from ``(X, Y)``, deflate ``X``.
258-
259-
Parameters
260-
----------
261-
X : ndarray of shape (n_samples, n_features)
262-
Preprocessed (centered/scaled) predictor matrix.
263-
Y : ndarray of shape (n_samples,) or (n_samples, n_targets)
264-
Centered response(s).
265-
n_components : int
266-
Number of orthogonal components to extract.
267-
268-
Returns
269-
-------
270-
components : OrthogonalComponents
271-
See :func:`orthogonal_filter`.
272-
273-
Notes
274-
-----
275-
The predictive direction is computed once from the original ``(X, Y)`` and
276-
reused for every orthogonal component. For univariate ``Y`` this is exact, not a
277-
shortcut: each orthogonal score is constructed exactly orthogonal to ``Y``, so
278-
removing it leaves ``Xᵀy`` (hence the predictive direction ``w_p ∝ Xᵀy``)
279-
unchanged. Recomputing ``w_p`` from each deflated residual would yield the same
280-
direction, so the canonical Trygg-Wold OPLS algorithm coincides with this
281-
fixed-direction filter for single-response OPLS.
282-
283-
When ``n_components=0``, ``Y`` is not inspected because no predictive direction
284-
is needed; the returned predictive weight is a zero vector.
192+
"""Compute the predictive direction from ``(X, Y)`` once, then deflate ``X``.
193+
194+
Reusing one direction for every component is exact, not a shortcut: each
195+
orthogonal score is built orthogonal to ``Y``, so removing it leaves ``Xᵀy``
196+
(hence the predictive direction) unchanged — recomputing it from each
197+
deflated residual would give the same answer. When ``n_components=0``, ``Y``
198+
is not inspected and the returned predictive weight is a zero vector.
285199
"""
286200
n_components = _validate_n_components(n_components)
287201
X = np.asarray(X, dtype=np.float64)
@@ -304,23 +218,9 @@ def apply_orthogonal_filter(
304218
x_ortho_weights: NDArray[np.float64],
305219
x_ortho_loadings: NDArray[np.float64],
306220
) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
307-
"""Replay a fitted orthogonal filter on new data.
308-
309-
Parameters
310-
----------
311-
X : ndarray of shape (n_samples, n_features)
312-
Preprocessed predictor matrix.
313-
x_ortho_weights : ndarray of shape (n_features, n_components)
314-
Orthogonal weights from :func:`opls_filter`.
315-
x_ortho_loadings : ndarray of shape (n_features, n_components)
316-
Orthogonal loadings from :func:`opls_filter`.
317-
318-
Returns
319-
-------
320-
X_filtered : ndarray of shape (n_samples, n_features)
321-
``X`` with the fitted orthogonal variation removed.
322-
x_ortho_scores : ndarray of shape (n_samples, n_components)
323-
Orthogonal scores of the new samples.
221+
"""Replay fitted sequential orthogonal deflations on new preprocessed X.
222+
223+
Returns ``(X_filtered, x_ortho_scores)``.
324224
"""
325225
X = np.asarray(X, dtype=np.float64)
326226
W = np.asarray(x_ortho_weights, dtype=np.float64)

0 commit comments

Comments
 (0)