sktime · cngmid · Aug 20, 2025 · fkiraly · Aug 20, 2025
@@ -1430,7 +1430,7 @@ def _data_to_tensors(self, data: pd.DataFrame) -> dict[str, torch.Tensor]:
             time index
         """
 
-        def _to_tensor(cols, long=True) -> torch.Tensor:
+        def _to_tensor(cols, long=True, real=False) -> torch.Tensor:
             """Convert data[cols] to torch tensor.
 
             Converts sub-frames to numpy and then to torch tensor.
@@ -1439,16 +1439,21 @@ def _to_tensor(cols, long=True) -> torch.Tensor:
             * float columns are converted to torch.float
             * integer columns are converted to torch.int64 or torch.long,
               depending on the long argument
+            * mixed columns are converted to their commonest type.
+            * if real argument is True, force the conversion to torch.float
             """
             if not isinstance(cols, list) and cols not in data.columns:
                 return None
             if isinstance(cols, list) and len(cols) == 0:
                 dtypekind = "f"
             elif isinstance(cols, list):  # and len(cols) > 0
-                dtypekind = data.dtypes[cols[0]].kind
+                # dtypekind = data.dtypes[cols[0]].kind
+                dtypekind = data[cols].to_numpy().dtype.kind
             else:
                 dtypekind = data.dtypes[cols].kind
-            if not long:
+            if real:
+                return torch.tensor(data[cols].to_numpy(np.float64), dtype=torch.float)
+            elif not long:
                 return torch.tensor(data[cols].to_numpy(np.int64), dtype=torch.int64)
             elif dtypekind in "bi":
                 return torch.tensor(data[cols].to_numpy(np.int64), dtype=torch.long)

@@ -678,3 +678,41 @@ def distance_to_weights(dist):
         if idx > 100:
             break
     print(a)
+
+
+def test_correct_dtype_inference():
+    # Create a small dataset
+    data = pd.DataFrame(
+        {
+            "time_idx": np.arange(30),
+            "value": np.sin(np.arange(30) / 5) + np.random.normal(scale=1, size=30),
+            "group": ["A"] * 30,
+        }
+    )
+
+    # Define the dataset
+    dataset = TimeSeriesDataSet(
+        data.copy(),
+        time_idx="time_idx",
+        target="value",
+        group_ids=["group"],
+        static_categoricals=["group"],
+        max_encoder_length=4,
+        max_prediction_length=2,
+        time_varying_unknown_reals=["value"],
+        target_normalizer=None,
+        # WATCH THIS
+        time_varying_known_reals=["time_idx"],
+        scalers=dict(time_idx=None),
+    )
+
+    # and the dataloader
+    dataloader = dataset.to_dataloader(batch_size=8)
+
+    x, y = next(iter(dataset))
+    # real features must be real
+    assert x["x_cont"].dtype is torch.float32
+
+    x, y = next(iter(dataloader))
+    # real features must be real
+    assert x["encoder_cont"].dtype is torch.float32