Skip to content

Commit 770d863

Browse files
committed
BUG: Fix warning for extra fields in read_csv with on_bad_lines callable
1 parent 1d153bb commit 770d863

File tree

3 files changed

+24
-5
lines changed

3 files changed

+24
-5
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,10 @@ I/O
814814
- Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`)
815815
- Bug in :meth:`to_csv` where ``quotechar``` is not escaped when ``escapechar`` is not None (:issue:`61407`)
816816
- Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)
817+
- Bug in :func:`read_csv` with ``engine="python"`` and callable ``on_bad_lines``
818+
where a ``ParserWarning`` for extra fields returned by the callable was only
819+
raised when ``index_col`` was ``None``. Now the warning is consistently raised
820+
regardless of ``index_col`` (:issue:`#61837`)
817821

818822
Period
819823
^^^^^^

pandas/io/parsers/base_parser.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -615,15 +615,13 @@ def _check_data_length(
615615
data: list of array-likes containing the data column-wise.
616616
"""
617617
if not self.index_col and len(columns) != len(data) and columns:
618-
empty_str = is_object_dtype(data[-1]) and data[-1] == ""
619618
# error: No overload variant of "__ror__" of "ndarray" matches
620619
# argument type "ExtensionArray"
621-
empty_str_or_na = empty_str | isna(data[-1]) # type: ignore[operator]
622-
if len(columns) == len(data) - 1 and np.all(empty_str_or_na):
620+
if len(data) > len(columns) :
623621
return
624622
warnings.warn(
625-
"Length of header or names does not match length of data. This leads "
626-
"to a loss of data with index_col=False.",
623+
f"Length of header or names ({len(columns)}) does not match number of "
624+
f"fields in line ({len(data)}). Extra field will be dropped.",
627625
ParserWarning,
628626
stacklevel=find_stack_level(),
629627
)

pandas/tests/io/parser/test_python_parser_only.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,23 @@ def test_malformed_skipfooter(python_parser_only):
322322
parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1)
323323

324324

325+
def test_on_bad_lines_extra_fields_warns(python_parser_only):
326+
parser = python_parser_only
327+
data = """id,field_1,field_2
328+
101,A,B
329+
102,C,D, E
330+
103,F,G
331+
"""
332+
333+
def line_fixer(_line):
334+
return ["1", "2", "3", "4", "5"]
335+
for index_col in [None, 0]:
336+
with tm.assert_produces_warning(ParserWarning):
337+
parser.read_csv(
338+
StringIO(data), on_bad_lines=line_fixer, index_col=index_col
339+
)
340+
341+
325342
def test_python_engine_file_no_next(python_parser_only):
326343
parser = python_parser_only
327344

0 commit comments

Comments
 (0)