Merge pull request #10 from HauserGroup/targeted-improvements

jsture · web-flow · commit 05ab06ca9cf5 · 2026-06-29T08:21:24.000+01:00
Tiny Targeted improvements
diff --git a/CHANGES.md b/CHANGES.md
@@ -21,6 +21,9 @@ and default-value changes will be documented here.
 
 ### Changed (breaking, pre-1.0)
 
+- Renamed the fitted attribute `rmsee_` to `rmse_` (uncorrected training root mean
+  squared error). The old name implied a degrees-of-freedom-corrected calibration
+  error, which it never computed; no alias is kept (pre-1.0).
 - Removed `OPLSDA`'s `probability` parameter and its in-sample Platt calibration
   (`predict_proba`, `raw_score`). `OPLSDA` is now a clean score classifier:
   `decision_function` returns the raw signed OPLS regression output and `predict`
@@ -53,6 +56,17 @@ and default-value changes will be documented here.
 
 ### Added
 
+- `OPLS.coef_raw_` / `OPLS.intercept_raw_`: linear coefficients on the original raw
+  input feature space, collapsing scaling, the orthogonal filter and the predictive
+  PLS into one map, so `X @ coef_raw_.T + intercept_raw_` reproduces `predict(X)`.
+  No bare sklearn `coef_` alias is exposed (it would be the raw-space coefficient,
+  not the engine's filtered-space one).
+
+- `OPLS.filter_transform(X)` returns the preprocessed, orthogonal-filtered `X`
+  actually passed to the predictive PLS engine (so
+  `pls_.predict(filter_transform(X))` matches `predict(X)`); useful for diagnostics
+  and downstream modelling.
+
 - Zensical documentation site (`zensical.toml`, mkdocstrings, numpy docstring style)
   with a `zensical build` CI gate and a GitHub Pages (Actions) deploy workflow.
 
diff --git a/README.md b/README.md
@@ -40,10 +40,18 @@ model = OPLS(n_components=1, n_orthogonal=2, scale="standard").fit(X, y)
 model.predict(X)              # predictions
 model.transform(X)            # predictive scores
 model.transform_orthogonal(X) # orthogonal scores
+model.filter_transform(X)     # preprocessed, orthogonal-filtered X fed to the engine
 model.r2x_, model.r2y_        # fit summaries
 model.vip_                    # variable importance (predictive), lazy property
 ```
 
+The whole fitted pipeline (scaling → orthogonal filter → predictive PLS) is linear,
+so it collapses to coefficients on the raw input space:
+
+```python
+y_hat = (X @ model.coef_raw_.T + model.intercept_raw_).ravel()  # == model.predict(X)
+```
+
 Let cross-validated Q2 choose the number of orthogonal components with
 scikit-learn's `GridSearchCV` — no bespoke estimator needed (`scoring=None` gives
 out-of-fold R2, which equals Q2 for `OPLS`):
diff --git a/docs/quickstart.md b/docs/quickstart.md
@@ -14,7 +14,7 @@ model = OPLS(n_components=1, n_orthogonal=2).fit(X, y)
 model.predict(X)              # predicted y
 model.transform(X)            # predictive scores
 model.transform_orthogonal(X) # orthogonal scores
-model.r2y_, model.rmsee_      # training-fit summaries
+model.r2y_, model.rmse_       # training-fit summaries
 ```
 
 ## Choosing `n_orthogonal` by cross-validation
diff --git a/src/scikit_opls/_inspection.py b/src/scikit_opls/_inspection.py
@@ -4,10 +4,13 @@
 ``vip_`` / ``ortho_vip_`` properties on :class:`~scikit_opls.OPLS` and
 :class:`~scikit_opls.OPLSDA`; these functions compute them from fitted weights.
 
-VIP (Variable Importance in Projection) follows Galindo-Prieto et al. (2014):
+VIP (Variable Importance in Projection) is defined in the style of Galindo-Prieto
+et al. (2014); these are not intended to reproduce ropls VIP values exactly:
 
-- predictive VIP weights each predictive component by the Y variance it explains;
-- orthogonal VIP weights each orthogonal component by the X variance it explains.
+- predictive VIP is the standard PLS VIP of the predictive model fitted on the
+  orthogonally filtered X, weighting each component by the Y variance it explains;
+- orthogonal VIP is an X-variance-weighted score for the removed orthogonal
+  components, weighting each component by the X variance it explains.
 
 For non-empty blocks with positive explained variance, VIP is normalized so that
 sum(vip**2) == n_features. Empty or degenerate blocks return zeros.
diff --git a/src/scikit_opls/_opls.py b/src/scikit_opls/_opls.py
@@ -43,6 +43,62 @@
 from scikit_opls._utils import _has_nonzero_variation
 
 
+def _orthogonal_filter_matrix(
+    x_ortho_weights: NDArray[np.float64],
+    x_ortho_loadings: NDArray[np.float64],
+) -> NDArray[np.float64]:
+    """Right-side linear operator ``F`` such that ``X_filtered == X_scaled @ F``.
+
+    The replayed orthogonal filter applies ``X <- X - (X w_i) p_iᵀ`` for each
+    component, i.e. right multiplication by ``I - outer(w_i, p_i)``. Composing them
+    in order yields the single matrix equivalent of :func:`apply_orthogonal_filter`.
+    """
+    W = np.asarray(x_ortho_weights, dtype=np.float64)
+    P = np.asarray(x_ortho_loadings, dtype=np.float64)
+    n_features = W.shape[0]
+    eye = np.eye(n_features, dtype=np.float64)
+    F = eye.copy()
+    for i in range(W.shape[1]):
+        F = F @ (eye - np.outer(W[:, i], P[:, i]))
+    return F
+
+
+def _compose_raw_coefficients(
+    coef_filtered: NDArray[np.float64],
+    intercept_filtered: float | NDArray[np.float64],
+    x_mean: NDArray[np.float64],
+    x_std: NDArray[np.float64],
+    x_ortho_weights: NDArray[np.float64],
+    x_ortho_loadings: NDArray[np.float64],
+) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
+    """Collapse filtered/scaled-space PLS coefficients into raw-X coefficients.
+
+    The fitted prediction is linear: ``X -> (X - mean) / std -> @ F -> @ Bᶠ + b``,
+    where ``b`` is the predictive engine's prediction offset (``pls_.predict(0)``,
+    not ``pls_.intercept_``). With ``B_scaled = F @ Bᶠ`` and ``inv_scale = 1 / std``
+    this reduces to ``y = X @ B_raw + (b - (mean * inv_scale) @ B_scaled)`` where
+    ``B_raw = inv_scale[:, None] * B_scaled``.
+    """
+    coef_arr = np.asarray(coef_filtered, dtype=np.float64)
+    if coef_arr.ndim == 1:
+        coef_arr = coef_arr.reshape(1, -1)
+    # sklearn PLSRegression exposes coef_ as (n_targets, n_features); work with the
+    # transpose B as (n_features, n_targets).
+    b_filtered = coef_arr.T
+
+    f_matrix = _orthogonal_filter_matrix(x_ortho_weights, x_ortho_loadings)
+    b_scaled = f_matrix @ b_filtered
+
+    inv_scale = 1.0 / np.asarray(x_std, dtype=np.float64)
+    b_raw = inv_scale[:, None] * b_scaled
+
+    offset_scaled = np.asarray(x_mean, dtype=np.float64) * inv_scale
+    intercept_raw = np.asarray(intercept_filtered, dtype=np.float64) - (
+        offset_scaled @ b_scaled
+    )
+    return b_raw.T, intercept_raw
+
+
 class OPLS(RegressorMixin, TransformerMixin, BaseEstimator):
     """Orthogonal Projections to Latent Structures regression.
 
@@ -76,15 +132,24 @@ class OPLS(RegressorMixin, TransformerMixin, BaseEstimator):
         coefficients act on the preprocessed, orthogonal-filtered space, and
         cannot be directly multiplied with raw input ``X``. Use ``predict(X)``
         for raw-input predictions.
+    coef_raw_ : ndarray of shape (1, n_features)
+        Linear coefficients on the original *raw* input feature space, collapsing the
+        scaling, orthogonal filter and predictive PLS into one linear map.
+        ``predict(X) == (X @ coef_raw_.T + intercept_raw_).ravel()`` up to
+        floating-point tolerance. (No sklearn ``coef_`` alias is exposed.)
     intercept_ : float or ndarray
         Intercept of the underlying PLS model for predictions from the preprocessed,
         orthogonal-filtered X block to the original y scale.
+    intercept_raw_ : float or ndarray
+        Intercept paired with ``coef_raw_`` for prediction from raw input ``X``.
     pls_ : PLSRegression
         The fitted predictive engine.
     x_mean_, x_std_ : ndarray
         Centering/scaling vectors applied to ``X``.
-    r2x_, r2x_ortho_, r2y_, rmsee_ : float
-        Training-set fit summaries. ``r2x_`` is computed from the predictive PLS
+    r2x_, r2x_ortho_, r2y_, rmse_ : float
+        Training-set fit summaries. ``rmse_`` is the uncorrected training root mean
+        squared error (no degrees-of-freedom correction). ``r2x_`` is computed from
+        the predictive PLS
         scores/loadings on the filtered ``X`` block, relative to the preprocessed
         original ``X``. ``r2x_ortho_`` is computed from the removed orthogonal
         scores/loadings. These are diagnostic summaries, not a guaranteed exact
@@ -106,6 +171,12 @@ class OPLS(RegressorMixin, TransformerMixin, BaseEstimator):
     Classic OPLS uses ``n_components=1``; ``n_orthogonal=0`` reduces to ordinary
     ``PLSRegression``, and ``n_components>1`` is orthogonal-filtered multi-component
     PLS (interpret score plots / S-plots component-wise).
+
+    Constant and near-constant columns are retained rather than removed, preserving
+    alignment with the input feature matrix, feature names, VIP arrays and
+    ``coef_filtered_``. To drop them, prepend
+    :class:`~sklearn.feature_selection.VarianceThreshold` in a
+    :class:`~sklearn.pipeline.Pipeline`.
     """
 
     n_features_in_: int
@@ -121,12 +192,14 @@ class OPLS(RegressorMixin, TransformerMixin, BaseEstimator):
     x_scores_: NDArray[np.float64]
     y_loadings_: NDArray[np.float64]
     coef_filtered_: NDArray[np.float64]
+    coef_raw_: NDArray[np.float64]
     intercept_: float | NDArray[np.float64]
+    intercept_raw_: float | NDArray[np.float64]
     pls_: PLSRegression
     r2x_: float
     r2x_ortho_: float
     r2y_: float
-    rmsee_: float
+    rmse_: float
     _n_features_out: int
 
     _parameter_constraints: dict = {
@@ -226,14 +299,29 @@ def fit(self, X: ArrayLike, y: ArrayLike) -> OPLS:
         self.y_loadings_ = self.pls_.y_loadings_
         self.coef_filtered_ = self.pls_.coef_
         self.intercept_ = self.pls_.intercept_
+        # The engine's prediction offset is predict(0), not intercept_: sklearn's
+        # PLSRegression centers the filtered X internally, so predict(Z) ==
+        # Z @ coef_.T + predict(0) but intercept_ omits that centering term (it only
+        # coincides with predict(0) when the filtered X is already centered).
+        engine_offset = self.pls_.predict(
+            np.zeros((1, X_filtered.shape[1]), dtype=np.float64)
+        ).ravel()
+        self.coef_raw_, self.intercept_raw_ = _compose_raw_coefficients(
+            self.coef_filtered_,
+            engine_offset,
+            self.x_mean_,
+            self.x_std_,
+            self.x_ortho_weights_,
+            self.x_ortho_loadings_,
+        )
 
         y_fit = self.pls_.predict(X_filtered)
         self.r2x_ = explained_x_variance(Xs, self.x_scores_, self.x_loadings_)
         self.r2x_ortho_ = explained_x_variance(
             Xs, self.x_ortho_scores_, self.x_ortho_loadings_
         )
         self.r2y_ = float(r2_score(y, y_fit))
-        self.rmsee_ = float(root_mean_squared_error(y, y_fit))
+        self.rmse_ = float(root_mean_squared_error(y, y_fit))
         return self
 
     def predict(self, X: ArrayLike) -> NDArray[np.float64]:
@@ -288,13 +376,37 @@ def transform_orthogonal(self, X: ArrayLike) -> NDArray[np.float64]:
         check_is_fitted(self)
         return self._filter(X)[1]
 
+    def filter_transform(self, X: ArrayLike) -> NDArray[np.float64]:
+        """Return ``X`` after preprocessing and orthogonal filtering.
+
+        This is the matrix actually passed to the predictive PLS engine, so
+        ``self.pls_.predict(self.filter_transform(X))`` matches ``self.predict(X)``
+        (up to output shape). The result is in the preprocessed, orthogonal-filtered
+        space, **not** on the raw input scale. With ``n_orthogonal=0`` it is just the
+        preprocessed ``X``.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Samples to preprocess and filter.
+
+        Returns
+        -------
+        X_filtered : ndarray of shape (n_samples, n_features)
+            Preprocessed ``X`` with the fitted orthogonal variation removed.
+        """
+        check_is_fitted(self)
+        return self._filter(X)[0]
+
     @property
     def vip_(self) -> NDArray[np.float64]:
-        """Predictive VIP per feature (Galindo-Prieto 2014); ndarray (n_features,).
+        """Predictive VIP per feature; ndarray (n_features,).
 
-        Variable Importance in Projection of the predictive block, normalised so
+        Standard PLS Variable Importance in Projection computed from the predictive
+        model fitted on the orthogonally filtered ``X``, normalised so
         ``sum(vip_**2) == n_features``. Computed lazily on first access from the
-        fitted weights.
+        fitted weights. Defined in the style of Galindo-Prieto et al. (2014); not
+        intended to reproduce ropls VIP values exactly.
         """
         check_is_fitted(self)
         if not hasattr(self, "_vip_"):
diff --git a/src/scikit_opls/_orthogonal.py b/src/scikit_opls/_orthogonal.py
@@ -269,6 +269,14 @@ def opls_filter(X: ArrayLike, Y: ArrayLike, n_components: int) -> OrthogonalComp
 
     Notes
     -----
+    The predictive direction is computed once from the original ``(X, Y)`` and
+    reused for every orthogonal component. For univariate ``Y`` this is exact, not a
+    shortcut: each orthogonal score is constructed exactly orthogonal to ``Y``, so
+    removing it leaves ``Xᵀy`` (hence the predictive direction ``w_p ∝ Xᵀy``)
+    unchanged. Recomputing ``w_p`` from each deflated residual would yield the same
+    direction, so the canonical Trygg-Wold OPLS algorithm coincides with this
+    fixed-direction filter for single-response OPLS.
+
     When ``n_components=0``, ``Y`` is not inspected because no predictive direction
     is needed; the returned predictive weight is a zero vector.
     """
diff --git a/tests/test_opls.py b/tests/test_opls.py
@@ -308,3 +308,29 @@ def test_opls_n_components_exceeds_post_filter_rank_raises():
         ValueError, match="exceeds the numerical rank of X after orthogonal filtering"
     ):
         OPLS(n_components=3, n_orthogonal=1).fit(X, y)
+
+
+def test_filter_transform_matches_predict_path():
+    """predict(X) == pls_.predict(filter_transform(X)), the matrix fed to the engine."""
+    X, y = _regression_data(seed=3)
+    model = OPLS(n_components=1, n_orthogonal=2).fit(X, y)
+    Xf = model.filter_transform(X)
+    assert_allclose(model.pls_.predict(Xf).ravel(), model.predict(X), atol=1e-10)
+
+
+def test_filter_transform_zero_orthogonal_is_preprocessed_x():
+    """With n_orthogonal=0 the filter is a no-op: just the preprocessed X."""
+    from scikit_opls._preprocessing import apply_scaling
+
+    X, y = _regression_data(seed=4)
+    model = OPLS(n_components=1, n_orthogonal=0).fit(X, y)
+    expected = apply_scaling(np.asarray(X, dtype=float), model.x_mean_, model.x_std_)
+    assert_allclose(model.filter_transform(X), expected, atol=1e-12)
+
+
+def test_filter_transform_requires_fit():
+    from sklearn.exceptions import NotFittedError
+
+    X, _ = _regression_data(seed=5)
+    with pytest.raises(NotFittedError):
+        OPLS().filter_transform(X)
diff --git a/tests/test_opls_coefficients.py b/tests/test_opls_coefficients.py
@@ -0,0 +1,80 @@
+"""Raw-space OPLS coefficients reproduce predict() as a single linear map."""
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+from sklearn.utils._testing import assert_allclose
+
+from scikit_opls import OPLS, OPLSDA
+
+
+@pytest.mark.parametrize("scale", ["none", "center", "pareto", "standard"])
+@pytest.mark.parametrize("n_orthogonal", [0, 1, 2])
+def test_raw_coefficients_reproduce_predict(scale, n_orthogonal):
+    rng = np.random.default_rng(0)
+    X = rng.normal(size=(50, 8))
+    beta = np.array([1.5, -0.7, 0.4, 0.0, 0.0, 0.2, 0.0, -0.3])
+    y = X @ beta + 0.1 * rng.normal(size=50)
+
+    model = OPLS(n_components=1, n_orthogonal=n_orthogonal, scale=scale).fit(X, y)
+
+    y_predict = model.predict(X)
+    y_linear = (X @ model.coef_raw_.T + model.intercept_raw_).ravel()
+    assert_allclose(y_predict, y_linear, rtol=1e-10, atol=1e-10)
+
+
+@pytest.mark.parametrize("scale", ["none", "center", "pareto", "standard"])
+@pytest.mark.parametrize("n_orthogonal", [0, 1, 2])
+def test_raw_coefficients_reproduce_predict_on_new_data(scale, n_orthogonal):
+    rng = np.random.default_rng(1)
+    X = rng.normal(size=(60, 10))
+    X_new = rng.normal(size=(17, 10))
+    beta = rng.normal(size=10)
+    y = X @ beta + 0.2 * rng.normal(size=60)
+
+    model = OPLS(n_components=1, n_orthogonal=n_orthogonal, scale=scale).fit(X, y)
+
+    y_predict = model.predict(X_new)
+    y_linear = (X_new @ model.coef_raw_.T + model.intercept_raw_).ravel()
+    assert_allclose(y_predict, y_linear, rtol=1e-10, atol=1e-10)
+
+
+def test_raw_coefficients_reproduce_predict_with_multiple_predictive_components():
+    rng = np.random.default_rng(2)
+    X = rng.normal(size=(80, 12))
+    beta = rng.normal(size=12)
+    y = X @ beta + 0.2 * rng.normal(size=80)
+
+    model = OPLS(n_components=2, n_orthogonal=1, scale="standard").fit(X, y)
+
+    y_predict = model.predict(X)
+    y_linear = (X @ model.coef_raw_.T + model.intercept_raw_).ravel()
+    assert_allclose(y_predict, y_linear, rtol=1e-10, atol=1e-10)
+
+
+def test_raw_coefficients_shape_and_no_coef_alias():
+    rng = np.random.default_rng(4)
+    X = rng.normal(size=(40, 6))
+    y = X[:, 0] - 0.5 * X[:, 1] + 0.1 * rng.normal(size=40)
+
+    model = OPLS(n_components=1, n_orthogonal=1).fit(X, y)
+
+    assert model.coef_raw_.shape == (1, X.shape[1])
+    # The raw coefficients are deliberately not exposed as a bare sklearn coef_.
+    assert not hasattr(model, "coef_")
+
+
+def test_oplsda_inner_opls_has_raw_coefficients():
+    rng = np.random.default_rng(3)
+    X = rng.normal(size=(40, 6))
+    y = np.array([0, 1] * 20)
+
+    clf = OPLSDA(n_components=1, n_orthogonal=1, scale="standard").fit(X, y)
+
+    assert hasattr(clf.opls_, "coef_raw_")
+    assert hasattr(clf.opls_, "intercept_raw_")
+
+    score_predict = clf.decision_function(X)
+    score_linear = (X @ clf.opls_.coef_raw_.T + clf.opls_.intercept_raw_).ravel()
+    assert_allclose(score_predict, score_linear, rtol=1e-10, atol=1e-10)
diff --git a/tests/test_orthogonal.py b/tests/test_orthogonal.py
diff --git a/tests/test_vip.py b/tests/test_vip.py