pandas-dev · gumus-g · Jul 15, 2025 · arthurlw · Jul 31, 2025 · gumus-g
diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py
@@ -390,7 +390,9 @@ def from_dummies(
         The default category is the implied category when a value has none of the
         listed categories specified with a one, i.e. if all dummies in a row are
         zero. Can be a single value for all variables or a dict directly mapping
-        the default categories to a prefix of a variable.
+        the default categories to a prefix of a variable. The default category
+        will be coerced to the dtype of ``data.columns`` if such coercion is
+        lossless, and will raise otherwise.
 
     Returns
     -------

diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -1,9 +1,10 @@
 """
-:mod:`pandas.io.html` is a module containing functionality for dealing with
-HTML IO.
+:mod:`pandas.io.html` is a module containing functionality for dealing with HTML IO.
 
+Provides utilities for reading and parsing HTML tables into pandas DataFrames.
 """
 
+
 from __future__ import annotations
 
 from collections import abc
@@ -387,7 +388,7 @@ def _parse_tables(self, document, match, attrs):
 
     def _equals_tag(self, obj, tag) -> bool:
         """
-        Return whether an individual DOM node matches a tag
+        Return whether an individual DOM node matches a tag.
 
         Parameters
         ----------
@@ -399,8 +400,8 @@ def _equals_tag(self, obj, tag) -> bool:
 
         Returns
         -------
-        boolean
-            Whether `obj`'s tag name is `tag`
+        bool
+            Whether `obj`'s tag name is `tag`.
         """
         raise AbstractMethodError(self)
 
@@ -562,7 +563,7 @@ def _expand_colspan_rowspan(
 
     def _handle_hidden_tables(self, tbl_list, attr_name: str):
         """
-        Return list of tables, potentially removing hidden elements
+        Return list of tables, potentially removing hidden elements.
 
         Parameters
         ----------
@@ -679,8 +680,9 @@ def _build_doc(self):
 
 def _build_xpath_expr(attrs) -> str:
     """
-    Build an xpath expression to simulate bs4's ability to pass in kwargs to
-    search for attributes when using the lxml parser.
+    Build an XPath expression to simulate bs4's ability to pass in kwargs.
+
+    Search for attributes when using the lxml parser.
 
     Parameters
     ----------
@@ -689,10 +691,11 @@ def _build_xpath_expr(attrs) -> str:
 
     Returns
     -------
-    expr : unicode
+    str
         An XPath expression that checks for the given HTML attributes.
     """
     # give class attribute as class_ because class is a python keyword
+
     if "class_" in attrs:
         attrs["class"] = attrs.pop("class_")
 
@@ -768,6 +771,8 @@ def _equals_tag(self, obj, tag) -> bool:
 
     def _build_doc(self):
         """
+        Build and parse the HTML document into a DOM tree.
+
         Raises
         ------
         ValueError

diff --git a/pandas/tests/reshape/test_from_dummies.py b/pandas/tests/reshape/test_from_dummies.py
@@ -333,9 +333,7 @@ def test_no_prefix_string_cats_default_category(
 ):
     dummies = DataFrame({"a": [1, 0, 0], "b": [0, 1, 0]})
     result = from_dummies(dummies, default_category=default_category)
-    expected = DataFrame(expected)
-    if using_infer_string:
-        expected[""] = expected[""].astype("str")
+    expected = DataFrame(expected, dtype=dummies.columns.dtype)
     tm.assert_frame_equal(result, expected)
 
 
@@ -449,3 +447,31 @@ def test_maintain_original_index():
     result = from_dummies(df)
     expected = DataFrame({"": list("abca")}, index=list("abcd"))
     tm.assert_frame_equal(result, expected)
+
+
+def test_int_columns_with_float_default():
+    # https://github.com/pandas-dev/pandas/pull/60694
+    df = DataFrame(
+        {
+            3: [1, 0, 0],
+            4: [0, 1, 0],
+        },
+    )
+    with pytest.raises(ValueError, match="Trying to coerce float values to integers"):
+        from_dummies(df, default_category=0.5)
+
+
+def test_object_dtype_preserved():
+    # https://github.com/pandas-dev/pandas/pull/60694
+    # When the input has object dtype, the result should as
+    # well even when infer_string is True.
+    df = DataFrame(
+        {
+            "x": [1, 0, 0],
+            "y": [0, 1, 0],
+        },
+    )
+    df.columns = df.columns.astype("object")
+    result = from_dummies(df, default_category="z")
+    expected = DataFrame({"": ["x", "y", "z"]}, dtype="object")
+    tm.assert_frame_equal(result, expected)