From aa619ec423c63678852dd3765a1ee7755e59cba3 Mon Sep 17 00:00:00 2001 From: cngmid Date: Wed, 20 Aug 2025 14:43:35 +0200 Subject: [PATCH 1/2] Fixing bug #1930 with proper syntax --- .../data/timeseries/_timeseries.py | 11 ++++-- tests/test_data/test_timeseries.py | 38 +++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/pytorch_forecasting/data/timeseries/_timeseries.py b/pytorch_forecasting/data/timeseries/_timeseries.py index 98d16c920..1a9a4b586 100644 --- a/pytorch_forecasting/data/timeseries/_timeseries.py +++ b/pytorch_forecasting/data/timeseries/_timeseries.py @@ -1430,7 +1430,7 @@ def _data_to_tensors(self, data: pd.DataFrame) -> dict[str, torch.Tensor]: time index """ - def _to_tensor(cols, long=True) -> torch.Tensor: + def _to_tensor(cols, long=True, real=False) -> torch.Tensor: """Convert data[cols] to torch tensor. Converts sub-frames to numpy and then to torch tensor. @@ -1439,16 +1439,21 @@ def _to_tensor(cols, long=True) -> torch.Tensor: * float columns are converted to torch.float * integer columns are converted to torch.int64 or torch.long, depending on the long argument + * mixed columns are converted to their commonest type. + * if real argument is True, force the conversion to torch.float """ if not isinstance(cols, list) and cols not in data.columns: return None if isinstance(cols, list) and len(cols) == 0: dtypekind = "f" elif isinstance(cols, list): # and len(cols) > 0 - dtypekind = data.dtypes[cols[0]].kind + # dtypekind = data.dtypes[cols[0]].kind + dtypekind = data[cols].to_numpy().dtype.kind else: dtypekind = data.dtypes[cols].kind - if not long: + if real: + return torch.tensor(data[cols].to_numpy(np.float64), dtype=torch.float) + elif not long: return torch.tensor(data[cols].to_numpy(np.int64), dtype=torch.int64) elif dtypekind in "bi": return torch.tensor(data[cols].to_numpy(np.int64), dtype=torch.long) diff --git a/tests/test_data/test_timeseries.py b/tests/test_data/test_timeseries.py index 0b1b0ce74..4bb909792 100644 --- a/tests/test_data/test_timeseries.py +++ b/tests/test_data/test_timeseries.py @@ -678,3 +678,41 @@ def distance_to_weights(dist): if idx > 100: break print(a) + + +def test_correct_dtype_inference(): + # Create a small dataset + data = pd.DataFrame( + { + "time_idx": np.arange(30), + "value": np.sin(np.arange(30) / 5) + np.random.normal(scale=1, size=30), + "group": ["A"] * 30, + } + ) + + # Define the dataset + dataset = TimeSeriesDataSet( + data.copy(), + time_idx="time_idx", + target="value", + group_ids=["group"], + static_categoricals=["group"], + max_encoder_length=4, + max_prediction_length=2, + time_varying_unknown_reals=["value"], + target_normalizer=None, + # WATCH THIS + time_varying_known_reals=["time_idx"], + scalers=dict(time_idx=None), + ) + + # and the dataloader + dataloader = dataset.to_dataloader(batch_size=8) + + x, y = next(iter(dataset)) + # real features must be real + assert x["x_cont"].dtype is torch.float32 + + x, y = next(iter(dataloader)) + # real features must be real + assert x["encoder_cont"].dtype is torch.float32 From 9d2f5b1deeabea170b3dfa7ada14388b75c7a6a8 Mon Sep 17 00:00:00 2001 From: cngmid Date: Tue, 26 Aug 2025 09:00:11 +0200 Subject: [PATCH 2/2] Modify fix for bug #1930: updated _to_tensor logic --- pytorch_forecasting/data/timeseries/_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_forecasting/data/timeseries/_timeseries.py b/pytorch_forecasting/data/timeseries/_timeseries.py index 1a9a4b586..d4c812d13 100644 --- a/pytorch_forecasting/data/timeseries/_timeseries.py +++ b/pytorch_forecasting/data/timeseries/_timeseries.py @@ -1448,7 +1448,7 @@ def _to_tensor(cols, long=True, real=False) -> torch.Tensor: dtypekind = "f" elif isinstance(cols, list): # and len(cols) > 0 # dtypekind = data.dtypes[cols[0]].kind - dtypekind = data[cols].to_numpy().dtype.kind + dtypekind = np.result_type(*data[cols].dtypes.to_list()).kind else: dtypekind = data.dtypes[cols].kind if real: