Skip to content

Commit b4b3432

Browse files
authored
[FIX] Use the common MLP as decoder in DeepAR (#1472)
1 parent f131cde commit b4b3432

3 files changed

Lines changed: 55 additions & 81 deletions

File tree

neuralforecast/common/_modules.py

Lines changed: 48 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -41,63 +41,72 @@
4141

4242

4343
class MLP(nn.Module):
44-
"""Multi-Layer Perceptron for time series forecasting.
44+
"""Multi-Layer Perceptron for time series forecasting.
4545
46-
A feedforward neural network with configurable depth and width. The network
47-
consists of an input layer, multiple hidden layers with activation functions
48-
and dropout, and an output layer. All hidden layers have the same dimensionality.
46+
A feedforward neural network with configurable depth and width. The network
47+
consists of an input layer, multiple hidden layers with activation functions
48+
and dropout, and an output layer. All hidden layers have the same dimensionality.
4949
50-
Args:
51-
in_features (int): Dimension of input features.
52-
out_features (int): Dimension of output features.
53-
activation (str): Activation function name. Must be one of the supported
54-
activations in ACTIVATIONS list (e.g., 'ReLU', 'Tanh', 'GELU', 'ELU').
55-
hidden_size (int): Number of units in each hidden layer. All hidden layers
56-
share the same dimensionality.
57-
num_layers (int): Total number of layers including input and output layers.
58-
Must be at least 2. For example, num_layers=3 creates: input layer,
59-
one hidden layer, and output layer.
60-
dropout (float): Dropout probability applied after each hidden layer's
50+
Args:
51+
in_features (int): Dimension of input features.
52+
out_features (int): Dimension of output features.
53+
activation (str): Activation function name. Must be one of the supported
54+
activations in ACTIVATIONS list (e.g., 'ReLU', 'Tanh', 'GELU', 'ELU').
55+
Ignored when num_layers=1.
56+
hidden_size (int): Number of units in each hidden layer. All hidden layers
57+
share the same dimensionality. Ignored when num_layers=1.
58+
num_layers (int): Total number of layers including input and output layers.
59+
Use num_layers=1 for a direct linear projection with no hidden layers or
60+
activation. For num_layers>=2, creates: input layer, (num_layers-2) hidden
61+
layers, and output layer.
62+
dropout (float): Dropout probability applied after each hidden layer's
6163
activation. Should be in range [0.0, 1.0]. Not applied to output layer.
64+
Ignored when num_layers=1.
6265
6366
Returns:
6467
(torch.Tensor): Transformed output tensor of shape [..., out_features].
6568
66-
Notes:
67-
- The activation function is applied after each hidden layer's linear
68-
transformation, but not after the final output layer.
69-
- Dropout is applied after activation in hidden layers for regularization.
70-
- This MLP is used as a decoder component in various forecasting models
71-
including RNN, LSTM, GRU, DilatedRNN, TCN, and xLSTM.
69+
Notes:
70+
- The activation function is applied after each hidden layer's linear
71+
transformation, but not after the final output layer.
72+
- Dropout is applied after activation in hidden layers for regularization.
73+
- This MLP is used as a decoder component in various forecasting models
74+
including RNN, LSTM, GRU, DilatedRNN, TCN, xLSTM, and DeepAR.
7275
"""
7376

7477
def __init__(
7578
self, in_features, out_features, activation, hidden_size, num_layers, dropout
7679
):
7780
super().__init__()
78-
assert activation in ACTIVATIONS, f"{activation} is not in {ACTIVATIONS}"
7981

80-
self.activation = getattr(nn, activation)()
82+
if num_layers == 1:
83+
# Direct linear projection with no hidden layers or activation
84+
self.layers = nn.Sequential(
85+
nn.Linear(in_features=in_features, out_features=out_features)
86+
)
87+
else:
88+
assert activation in ACTIVATIONS, f"{activation} is not in {ACTIVATIONS}"
89+
self.activation = getattr(nn, activation)()
8190

82-
# MultiLayer Perceptron
83-
# Input layer
84-
layers = [
85-
nn.Linear(in_features=in_features, out_features=hidden_size),
86-
self.activation,
87-
nn.Dropout(dropout),
88-
]
89-
# Hidden layers
90-
for i in range(num_layers - 2):
91-
layers += [
92-
nn.Linear(in_features=hidden_size, out_features=hidden_size),
91+
# MultiLayer Perceptron
92+
# Input layer
93+
layers = [
94+
nn.Linear(in_features=in_features, out_features=hidden_size),
9395
self.activation,
9496
nn.Dropout(dropout),
9597
]
96-
# Output layer
97-
layers += [nn.Linear(in_features=hidden_size, out_features=out_features)]
98-
99-
# Store in layers as ModuleList
100-
self.layers = nn.Sequential(*layers)
98+
# Hidden layers
99+
for i in range(num_layers - 2):
100+
layers += [
101+
nn.Linear(in_features=hidden_size, out_features=hidden_size),
102+
self.activation,
103+
nn.Dropout(dropout),
104+
]
105+
# Output layer
106+
layers += [nn.Linear(in_features=hidden_size, out_features=out_features)]
107+
108+
# Store in layers as ModuleList
109+
self.layers = nn.Sequential(*layers)
101110

102111
def forward(self, x):
103112
return self.layers(x)

neuralforecast/models/deepar.py

Lines changed: 6 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22

33

4-
__all__ = ['Decoder', 'DeepAR']
4+
__all__ = ['DeepAR']
55

66

77
from typing import Optional
@@ -10,47 +10,10 @@
1010
import torch.nn as nn
1111

1212
from ..common._base_model import BaseModel
13+
from ..common._modules import MLP
1314
from ..losses.pytorch import MAE, DistributionLoss
1415

1516

16-
class Decoder(nn.Module):
17-
"""Multi-Layer Perceptron Decoder
18-
19-
Args:
20-
in_features (int): dimension of input.
21-
out_features (int): dimension of output.
22-
hidden_size (int): dimension of hidden layers.
23-
hidden_layers (int): number of hidden layers.
24-
"""
25-
26-
def __init__(self, in_features, out_features, hidden_size, hidden_layers):
27-
super().__init__()
28-
29-
if hidden_layers == 0:
30-
# Input layer
31-
layers = [nn.Linear(in_features=in_features, out_features=out_features)]
32-
else:
33-
# Input layer
34-
layers = [
35-
nn.Linear(in_features=in_features, out_features=hidden_size),
36-
nn.ReLU(),
37-
]
38-
# Hidden layers
39-
for i in range(hidden_layers - 2):
40-
layers += [
41-
nn.Linear(in_features=hidden_size, out_features=hidden_size),
42-
nn.ReLU(),
43-
]
44-
# Output layer
45-
layers += [nn.Linear(in_features=hidden_size, out_features=out_features)]
46-
47-
# Store in layers as ModuleList
48-
self.layers = nn.Sequential(*layers)
49-
50-
def forward(self, x):
51-
return self.layers(x)
52-
53-
5417
class DeepAR(BaseModel):
5518
"""DeepAR
5619
@@ -209,11 +172,13 @@ def __init__(
209172
)
210173

211174
# Decoder MLP
212-
self.decoder = Decoder(
175+
self.decoder = MLP(
213176
in_features=lstm_hidden_size,
214177
out_features=self.loss.outputsize_multiplier,
215178
hidden_size=decoder_hidden_size,
216-
hidden_layers=decoder_hidden_layers,
179+
num_layers=decoder_hidden_layers + 1,
180+
activation="ReLU",
181+
dropout=0.0,
217182
)
218183

219184
def forward(self, windows_batch):

neuralforecast/models/xlstm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def __init__(
9696
encoder_bias: bool = True,
9797
encoder_dropout: float = 0.1,
9898
decoder_hidden_size: int = 128,
99-
decoder_layers: int = 1,
99+
decoder_layers: int = 2,
100100
decoder_dropout: float = 0.0,
101101
decoder_activation: str = "GELU",
102102
backbone: str = "mLSTM",

0 commit comments

Comments
 (0)