Skip to content

[BUG] Fixing bug #1930 with proper syntax #1950

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions pytorch_forecasting/data/timeseries/_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1430,7 +1430,7 @@ def _data_to_tensors(self, data: pd.DataFrame) -> dict[str, torch.Tensor]:
time index
"""

def _to_tensor(cols, long=True) -> torch.Tensor:
def _to_tensor(cols, long=True, real=False) -> torch.Tensor:
"""Convert data[cols] to torch tensor.

Converts sub-frames to numpy and then to torch tensor.
Expand All @@ -1439,16 +1439,21 @@ def _to_tensor(cols, long=True) -> torch.Tensor:
* float columns are converted to torch.float
* integer columns are converted to torch.int64 or torch.long,
depending on the long argument
* mixed columns are converted to their commonest type.
* if real argument is True, force the conversion to torch.float
"""
if not isinstance(cols, list) and cols not in data.columns:
return None
if isinstance(cols, list) and len(cols) == 0:
dtypekind = "f"
elif isinstance(cols, list): # and len(cols) > 0
dtypekind = data.dtypes[cols[0]].kind
# dtypekind = data.dtypes[cols[0]].kind
dtypekind = data[cols].to_numpy().dtype.kind
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are we doing this? It feels wasteful to do the conversion for the entire data container, instead of looking up the dtype.

else:
dtypekind = data.dtypes[cols].kind
if not long:
if real:
return torch.tensor(data[cols].to_numpy(np.float64), dtype=torch.float)
elif not long:
return torch.tensor(data[cols].to_numpy(np.int64), dtype=torch.int64)
elif dtypekind in "bi":
return torch.tensor(data[cols].to_numpy(np.int64), dtype=torch.long)
Expand Down
38 changes: 38 additions & 0 deletions tests/test_data/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,3 +678,41 @@ def distance_to_weights(dist):
if idx > 100:
break
print(a)


def test_correct_dtype_inference():
# Create a small dataset
data = pd.DataFrame(
{
"time_idx": np.arange(30),
"value": np.sin(np.arange(30) / 5) + np.random.normal(scale=1, size=30),
"group": ["A"] * 30,
}
)

# Define the dataset
dataset = TimeSeriesDataSet(
data.copy(),
time_idx="time_idx",
target="value",
group_ids=["group"],
static_categoricals=["group"],
max_encoder_length=4,
max_prediction_length=2,
time_varying_unknown_reals=["value"],
target_normalizer=None,
# WATCH THIS
time_varying_known_reals=["time_idx"],
scalers=dict(time_idx=None),
)

# and the dataloader
dataloader = dataset.to_dataloader(batch_size=8)

x, y = next(iter(dataset))
# real features must be real
assert x["x_cont"].dtype is torch.float32

x, y = next(iter(dataloader))
# real features must be real
assert x["encoder_cont"].dtype is torch.float32
Loading