diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3c5854602df53..63a64ffcf893c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -355,7 +355,7 @@ cpdef array_to_datetime( iresult[i] = parse_pydatetime(val, &dts, creso=creso) elif PyDate_Check(val): - item_reso = NPY_DATETIMEUNIT.NPY_FR_s + item_reso = NPY_DATETIMEUNIT.NPY_FR_us state.update_creso(item_reso) if infer_reso: creso = state.creso diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 6b9f41b1bb06f..352c5734a8c39 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -35,7 +35,8 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, cdef _TSObject convert_datetime_to_tsobject(datetime ts, tzinfo tz, int32_t nanos=*, - NPY_DATETIMEUNIT reso=*) + NPY_DATETIMEUNIT reso=*, + NPY_DATETIMEUNIT best_reso=*) cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, bint dayfirst=*, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 2a080bcb19ae9..0f57a1e477481 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -33,6 +33,7 @@ from pandas._libs.missing cimport checknull_with_nat_and_na from pandas._libs.tslibs.dtypes cimport ( abbrev_to_npy_unit, get_supported_reso, + get_supported_reso_for_dts, npy_unit_to_attrname, periods_per_second, ) @@ -422,10 +423,9 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, return convert_datetime_to_tsobject(ts, tz, nanos, reso=reso) elif PyDate_Check(ts): # Keep the converter same as PyDateTime's - # For date object we give the lowest supported resolution, i.e. "s" ts = datetime.combine(ts, time()) return convert_datetime_to_tsobject( - ts, tz, nanos=0, reso=NPY_DATETIMEUNIT.NPY_FR_s + ts, tz, nanos=0, reso=NPY_DATETIMEUNIT.NPY_FR_us ) else: from .period import Period @@ -453,7 +453,8 @@ cdef _TSObject convert_datetime_to_tsobject( datetime ts, tzinfo tz, int32_t nanos=0, - NPY_DATETIMEUNIT reso=NPY_FR_ns, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_GENERIC, + NPY_DATETIMEUNIT best_reso=NPY_DATETIMEUNIT.NPY_FR_GENERIC, ): """ Convert a datetime (or Timestamp) input `ts`, along with optional timezone @@ -480,7 +481,6 @@ cdef _TSObject convert_datetime_to_tsobject( _TSObject obj = _TSObject() int64_t pps - obj.creso = reso obj.fold = ts.fold if tz is not None: @@ -507,6 +507,10 @@ cdef _TSObject convert_datetime_to_tsobject( if nanos: obj.dts.ps = nanos * 1000 + if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + reso = get_supported_reso_for_dts(best_reso, &obj.dts) + obj.creso = reso + try: obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) except OverflowError as err: @@ -622,7 +626,7 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, &out_tzoffset, False ) if not string_to_dts_failed: - reso = get_supported_reso(out_bestunit) + reso = get_supported_reso_for_dts(out_bestunit, &dts) check_dts_bounds(&dts, reso) obj = _TSObject() obj.dts = dts @@ -660,8 +664,13 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, out_bestunit=&out_bestunit, nanos=&nanos, ) - reso = get_supported_reso(out_bestunit) - return convert_datetime_to_tsobject(dt, tz, nanos=nanos, reso=reso) + return convert_datetime_to_tsobject( + dt, + tz, + nanos=nanos, + reso=NPY_DATETIMEUNIT.NPY_FR_GENERIC, + best_reso=out_bestunit + ) cdef check_overflows(_TSObject obj, NPY_DATETIMEUNIT reso=NPY_FR_ns): diff --git a/pandas/_libs/tslibs/dtypes.pxd b/pandas/_libs/tslibs/dtypes.pxd index d8c536a34bc04..e8e8c6a477773 100644 --- a/pandas/_libs/tslibs/dtypes.pxd +++ b/pandas/_libs/tslibs/dtypes.pxd @@ -1,6 +1,9 @@ from numpy cimport int64_t -from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + npy_datetimestruct, +) cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit) @@ -9,6 +12,9 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) noexcept nogil cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1 cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1 cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso) +cdef NPY_DATETIMEUNIT get_supported_reso_for_dts( + NPY_DATETIMEUNIT reso, npy_datetimestruct* dts +) cdef bint is_supported_unit(NPY_DATETIMEUNIT reso) cdef dict c_OFFSET_TO_PERIOD_FREQSTR diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 4100f3d90e817..73191f3d0f58e 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -2,11 +2,21 @@ # originals from enum import Enum +import numpy as np + +from cpython.object cimport ( + Py_GE, + Py_LE, +) + from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, + cmp_dtstructs, get_conversion_factor, import_pandas_datetime, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, ) import_pandas_datetime() @@ -504,6 +514,45 @@ cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso): return reso +cdef npy_datetimestruct dts_us_min, dts_us_max +pandas_datetime_to_datetimestruct( + np.iinfo(np.int64).min + 1, NPY_DATETIMEUNIT.NPY_FR_us, &dts_us_min +) +pandas_datetime_to_datetimestruct( + np.iinfo(np.int64).max, NPY_DATETIMEUNIT.NPY_FR_us, &dts_us_max +) + + +cdef NPY_DATETIMEUNIT get_supported_reso_for_dts( + NPY_DATETIMEUNIT reso, npy_datetimestruct* dts +): + # Similar as above, but taking the actual datetime value in account, + # defaulting to 'us' if possible. + if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + return NPY_DATETIMEUNIT.NPY_FR_ns + # if dts.ps != 0: + # return NPY_DATETIMEUNIT.NPY_FR_ns + # elif ( + # cmp_dtstructs(dts, &dts_us_min, Py_GE) + # and cmp_dtstructs(dts, &dts_us_max, Py_LE) + # ): + # return NPY_DATETIMEUNIT.NPY_FR_us + # else: + # return NPY_DATETIMEUNIT.NPY_FR_s + if reso < NPY_DATETIMEUNIT.NPY_FR_us: + if ( + cmp_dtstructs(dts, &dts_us_min, Py_GE) + and cmp_dtstructs(dts, &dts_us_max, Py_LE) + ): + return NPY_DATETIMEUNIT.NPY_FR_us + else: + # TODO still distinguish between ms or s? + return NPY_DATETIMEUNIT.NPY_FR_s + elif reso > NPY_DATETIMEUNIT.NPY_FR_ns: + return NPY_DATETIMEUNIT.NPY_FR_ns + return reso + + cdef bint is_supported_unit(NPY_DATETIMEUNIT reso): return ( reso == NPY_DATETIMEUNIT.NPY_FR_ns diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index b443aa7bede22..84c2a449cdb06 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -53,6 +53,7 @@ from pandas._libs.tslibs.conversion cimport ( ) from pandas._libs.tslibs.dtypes cimport ( get_supported_reso, + get_supported_reso_for_dts, npy_unit_to_abbrev, npy_unit_to_attrname, ) @@ -421,7 +422,7 @@ def array_strptime( continue elif PyDate_Check(val): state.found_other = True - item_reso = NPY_DATETIMEUNIT.NPY_FR_s + item_reso = NPY_DATETIMEUNIT.NPY_FR_us state.update_creso(item_reso) if infer_reso: creso = state.creso @@ -460,7 +461,7 @@ def array_strptime( if string_to_dts_succeeded: # No error reported by string_to_dts, pick back up # where we left off - item_reso = get_supported_reso(out_bestunit) + item_reso = get_supported_reso_for_dts(out_bestunit, &dts) state.update_creso(item_reso) if infer_reso: creso = state.creso @@ -622,7 +623,7 @@ cdef tzinfo _parse_with_format( f"time data \"{val}\" doesn't match format \"{fmt}\"" ) - item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_s + item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us iso_year = -1 year = 1900 @@ -710,11 +711,7 @@ cdef tzinfo _parse_with_format( elif parse_code == 10: # e.g. val='10:10:10.100'; fmt='%H:%M:%S.%f' s = found_dict["f"] - if len(s) <= 3: - item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_ms - elif len(s) <= 6: - item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us - else: + if len(s) > 6: item_reso[0] = NPY_FR_ns # Pad to always return nanoseconds s += "0" * (9 - len(s)) diff --git a/pandas/conftest.py b/pandas/conftest.py index 774936be33631..07580890e672d 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -935,7 +935,7 @@ def rand_series_with_duplicate_datetimeindex() -> Series: (Period("2012-02-01", freq="D"), "period[D]"), ( Timestamp("2011-01-01", tz="US/Eastern"), - DatetimeTZDtype(unit="s", tz="US/Eastern"), + DatetimeTZDtype(unit="us", tz="US/Eastern"), ), (Timedelta(seconds=500), "timedelta64[ns]"), ] diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 533b9b689af0b..df1bd8fd56001 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -355,7 +355,7 @@ def unique(values): array([2, 1]) >>> pd.unique(pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])) - array(['2016-01-01T00:00:00'], dtype='datetime64[s]') + array(['2016-01-01T00:00:00.000000'], dtype='datetime64[us]') >>> pd.unique( ... pd.Series( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7e57b40e42430..a71bda593cbc0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1906,11 +1906,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: >>> rng_tz.floor("2h", ambiguous=False) DatetimeIndex(['2021-10-31 02:00:00+01:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) >>> rng_tz.floor("2h", ambiguous=True) DatetimeIndex(['2021-10-31 02:00:00+02:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) """ _floor_example = """>>> rng.floor('h') @@ -1933,11 +1933,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: >>> rng_tz.floor("2h", ambiguous=False) DatetimeIndex(['2021-10-31 02:00:00+01:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) >>> rng_tz.floor("2h", ambiguous=True) DatetimeIndex(['2021-10-31 02:00:00+02:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) """ _ceil_example = """>>> rng.ceil('h') @@ -1960,11 +1960,11 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: >>> rng_tz.ceil("h", ambiguous=False) DatetimeIndex(['2021-10-31 02:00:00+01:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) >>> rng_tz.ceil("h", ambiguous=True) DatetimeIndex(['2021-10-31 02:00:00+02:00'], - dtype='datetime64[s, Europe/Amsterdam]', freq=None) + dtype='datetime64[us, Europe/Amsterdam]', freq=None) """ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4103b7c337efe..d210d9eced91e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -220,7 +220,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc] ... ) ['2023-01-01 00:00:00', '2023-01-02 00:00:00'] - Length: 2, dtype: datetime64[s] + Length: 2, dtype: datetime64[us] """ _typ = "datetimearray" @@ -614,7 +614,7 @@ def tz(self) -> tzinfo | None: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.tz datetime.timezone.utc @@ -1044,7 +1044,7 @@ def tz_localize( 4 2018-10-28 02:30:00+01:00 5 2018-10-28 03:00:00+01:00 6 2018-10-28 03:30:00+01:00 - dtype: datetime64[s, CET] + dtype: datetime64[us, CET] In some cases, inferring the DST is impossible. In such cases, you can pass an ndarray to the ambiguous parameter to set the DST explicitly @@ -1056,7 +1056,7 @@ def tz_localize( 0 2018-10-28 01:20:00+02:00 1 2018-10-28 02:36:00+02:00 2 2018-10-28 03:46:00+01:00 - dtype: datetime64[s, CET] + dtype: datetime64[us, CET] If the DST transition causes nonexistent times, you can shift these dates forward or backwards with a timedelta object or `'shift_forward'` @@ -1439,7 +1439,7 @@ def time(self) -> npt.NDArray[np.object_]: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.time 0 10:00:00 1 11:00:00 @@ -1482,7 +1482,7 @@ def timetz(self) -> npt.NDArray[np.object_]: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.timetz 0 10:00:00+00:00 1 11:00:00+00:00 @@ -1524,7 +1524,7 @@ def date(self) -> npt.NDArray[np.object_]: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.date 0 2020-01-01 1 2020-02-01 @@ -1873,7 +1873,7 @@ def isocalendar(self) -> DataFrame: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.dayofyear 0 1 1 32 @@ -1909,7 +1909,7 @@ def isocalendar(self) -> DataFrame: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-04-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.quarter 0 1 1 2 @@ -1945,7 +1945,7 @@ def isocalendar(self) -> DataFrame: >>> s 0 2020-01-01 10:00:00+00:00 1 2020-02-01 11:00:00+00:00 - dtype: datetime64[s, UTC] + dtype: datetime64[us, UTC] >>> s.dt.daysinmonth 0 31 1 29 diff --git a/pandas/core/base.py b/pandas/core/base.py index 7c00d3925685f..0ffe81fa26ab1 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1376,7 +1376,7 @@ def factorize( 0 2000-03-11 1 2000-03-12 2 2000-03-13 - dtype: datetime64[s] + dtype: datetime64[us] >>> ser.searchsorted('3/14/2000') np.int64(3) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 408c2858aa876..2572e3dfd42e5 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -150,7 +150,7 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) >>> index DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], - dtype='datetime64[s]', freq=None) + dtype='datetime64[us]', freq=None) >>> pd.isna(index) array([False, False, True, False]) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9840af15f1249..c155731e06101 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6299,8 +6299,8 @@ def dtypes(self): >>> df.dtypes float float64 int int64 - datetime datetime64[s] - string str + datetime datetime64[us] + string str dtype: object """ data = self._mgr.get_dtypes() diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ce6ea1ed980dd..fd1f2f3dd1b7f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -246,7 +246,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin): >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"]) >>> idx DatetimeIndex(['2020-01-01 10:00:00+00:00', '2020-02-01 11:00:00+00:00'], - dtype='datetime64[s, UTC]', freq=None) + dtype='datetime64[us, UTC]', freq=None) """ _typ = "datetimeindex" diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 1b236deff330d..25bd1bf6dadf5 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -878,7 +878,7 @@ def to_datetime( >>> pd.to_datetime(df) 0 2015-02-04 1 2016-03-05 - dtype: datetime64[s] + dtype: datetime64[us] Using a unix epoch time @@ -921,14 +921,14 @@ def to_datetime( >>> pd.to_datetime(["2018-10-26 12:00:00", "2018-10-26 13:00:15"]) DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], - dtype='datetime64[s]', freq=None) + dtype='datetime64[us]', freq=None) - Timezone-aware inputs *with constant time offset* are converted to timezone-aware :class:`DatetimeIndex`: >>> pd.to_datetime(["2018-10-26 12:00 -0500", "2018-10-26 13:00 -0500"]) DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'], - dtype='datetime64[s, UTC-05:00]', freq=None) + dtype='datetime64[us, UTC-05:00]', freq=None) - However, timezone-aware inputs *with mixed time offsets* (for example issued from a timezone with daylight savings, such as Europe/Paris) @@ -970,14 +970,14 @@ def to_datetime( >>> pd.to_datetime(["2018-10-26 12:00", "2018-10-26 13:00"], utc=True) DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'], - dtype='datetime64[s, UTC]', freq=None) + dtype='datetime64[us, UTC]', freq=None) - Timezone-aware inputs are *converted* to UTC (the output represents the exact same datetime, but viewed from the UTC time offset `+00:00`). >>> pd.to_datetime(["2018-10-26 12:00 -0530", "2018-10-26 12:00 -0500"], utc=True) DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'], - dtype='datetime64[s, UTC]', freq=None) + dtype='datetime64[us, UTC]', freq=None) - Inputs can contain both string or datetime, the above rules still apply diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index cd78dfd6f343a..72074d7a81c67 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -127,7 +127,7 @@ def test_dt64_array(dtype_unit): ( pd.DatetimeIndex(["2000", "2001"]), None, - DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[s]"), + DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[us]"), ), ( ["2000", "2001"], @@ -324,7 +324,7 @@ def test_array_copy(): # datetime ( [pd.Timestamp("2000"), pd.Timestamp("2001")], - DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[s]"), + DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[us]"), ), ( [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], @@ -335,16 +335,16 @@ def test_array_copy(): DatetimeArray._from_sequence(np.array([1, 2], dtype="M8[ns]")), ), ( - np.array([1, 2], dtype="M8[us]"), + np.array([1, 2], dtype="M8[s]"), DatetimeArray._simple_new( - np.array([1, 2], dtype="M8[us]"), dtype=np.dtype("M8[us]") + np.array([1, 2], dtype="M8[s]"), dtype=np.dtype("M8[s]") ), ), # datetimetz ( [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")], DatetimeArray._from_sequence( - ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="s") + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="us") ), ), ( diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 821f51ee95ad3..175bb6b6a92fc 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -447,7 +447,7 @@ def test_to_numpy_dtype(as_series): [Timestamp("2000"), Timestamp("2000"), pd.NaT], None, Timestamp("2000"), - [np.datetime64("2000-01-01T00:00:00", "s")] * 3, + [np.datetime64("2000-01-01T00:00:00", "us")] * 3, ), ], ) @@ -489,7 +489,7 @@ def test_to_numpy_na_value_numpy_dtype( [(0, Timestamp("2021")), (0, Timestamp("2022")), (1, Timestamp("2000"))], None, Timestamp("2000"), - [np.datetime64("2000-01-01T00:00:00", "s")] * 3, + [np.datetime64("2000-01-01T00:00:00", "us")] * 3, ), ], ) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 679031a625c2d..68df5bdd6bcc7 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -155,8 +155,8 @@ def test_infer_dtype_from_scalar_errors(): (1, np.int64), (1.5, np.float64), (np.datetime64("2016-01-01"), np.dtype("M8[s]")), - (Timestamp("20160101"), np.dtype("M8[s]")), - (Timestamp("20160101", tz="UTC"), "datetime64[s, UTC]"), + (Timestamp("20160101"), np.dtype("M8[us]")), + (Timestamp("20160101", tz="UTC"), "datetime64[us, UTC]"), ], ) def test_infer_dtype_from_scalar(value, expected, using_infer_string): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 857fdc7468cec..a70c2f787c154 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3454,7 +3454,9 @@ def test_string_to_datetime_parsing_cast(): string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"] result = pd.Series(string_dates, dtype="timestamp[s][pyarrow]") expected = pd.Series( - ArrowExtensionArray(pa.array(pd.to_datetime(string_dates), from_pandas=True)) + ArrowExtensionArray( + pa.array(pd.to_datetime(string_dates).as_unit("s"), from_pandas=True) + ) ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 0c99b08cb30c4..8ac99073aa658 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -825,7 +825,7 @@ def test_setitem_single_column_mixed_datetime(self): # check our dtypes result = df.dtypes expected = Series( - [np.dtype("float64")] * 3 + [np.dtype("datetime64[s]")], + [np.dtype("float64")] * 3 + [np.dtype("datetime64[us]")], index=["foo", "bar", "baz", "timestamp"], ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 20dd7b0c4d3e7..c260524c8c01b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -260,7 +260,7 @@ def test_setitem_dict_preserves_dtypes(self): (Interval(left=0, right=5), IntervalDtype("int64", "right")), ( Timestamp("2011-01-01", tz="US/Eastern"), - DatetimeTZDtype(unit="s", tz="US/Eastern"), + DatetimeTZDtype(unit="us", tz="US/Eastern"), ), ], ) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 1e594043510ea..ad58234c18660 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -197,7 +197,7 @@ def test_combine_first_align_nan(self): # GH 7509 (not fixed) dfa = DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"]) dfb = DataFrame([[4], [5]], columns=["b"]) - assert dfa["a"].dtype == "datetime64[s]" + assert dfa["a"].dtype == "datetime64[us]" assert dfa["b"].dtype == "int64" res = dfa.combine_first(dfb) @@ -206,7 +206,7 @@ def test_combine_first_align_nan(self): columns=["a", "b"], ) tm.assert_frame_equal(res, exp) - assert res["a"].dtype == "datetime64[s]" + assert res["a"].dtype == "datetime64[us]" # TODO: this must be int64 assert res["b"].dtype == "int64" diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py index 666fcb1b5143b..b2c9be61afb7f 100644 --- a/pandas/tests/frame/methods/test_get_numeric_data.py +++ b/pandas/tests/frame/methods/test_get_numeric_data.py @@ -21,7 +21,7 @@ def test_get_numeric_data_preserve_dtype(self): tm.assert_frame_equal(result, expected) def test_get_numeric_data(self, using_infer_string): - datetime64name = np.dtype("M8[s]").name + datetime64name = np.dtype("M8[us]").name objectname = np.dtype(np.object_).name df = DataFrame( diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index f0f01529db58f..a404a37bd1ce5 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -141,7 +141,7 @@ def test_reindex_tzaware_fill_value(self): ts = pd.Timestamp("2023-04-10 17:32", tz="US/Pacific") res = df.reindex([0, 1], axis=1, fill_value=ts) - assert res.dtypes[1] == pd.DatetimeTZDtype(unit="s", tz="US/Pacific") + assert res.dtypes[1] == pd.DatetimeTZDtype(unit="us", tz="US/Pacific") expected = DataFrame({0: [1], 1: [ts]}) expected[1] = expected[1].astype(res.dtypes[1]) tm.assert_frame_equal(res, expected) @@ -154,7 +154,7 @@ def test_reindex_tzaware_fill_value(self): interval = pd.Interval(ts, ts + pd.Timedelta(seconds=1)) res = df.reindex([0, 1], axis=1, fill_value=interval) - assert res.dtypes[1] == pd.IntervalDtype("datetime64[s, US/Pacific]", "right") + assert res.dtypes[1] == pd.IntervalDtype("datetime64[us, US/Pacific]", "right") expected = DataFrame({0: [1], 1: [interval]}) expected[1] = expected[1].astype(res.dtypes[1]) tm.assert_frame_equal(res, expected) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 34d120145b381..ed96c20bd8938 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -50,7 +50,7 @@ def test_to_csv_from_csv1_datetime(self, temp_file, datetime_frame): datetime_frame.to_csv(path) recons = self.read_csv(path, parse_dates=True) expected = datetime_frame.copy() - expected.index = expected.index.as_unit("s") + expected.index = expected.index.as_unit("us") tm.assert_frame_equal(expected, recons) datetime_frame.to_csv(path, index_label="index") @@ -240,8 +240,8 @@ def make_dtnat_arr(n, nnat=None): result = self.read_csv(path).apply(to_datetime) expected = df[:] - expected["a"] = expected["a"].astype("M8[s]") - expected["b"] = expected["b"].astype("M8[s]") + expected["a"] = expected["a"].astype("M8[us]") + expected["b"] = expected["b"].astype("M8[us]") tm.assert_frame_equal(result, expected, check_names=False) def _return_result_expected( @@ -579,7 +579,7 @@ def test_to_csv_multiindex(self, temp_file, float_frame, datetime_frame): # TODO to_csv drops column name expected = tsframe.copy() - expected.index = MultiIndex.from_arrays([old_index.as_unit("s"), new_index[1]]) + expected.index = MultiIndex.from_arrays([old_index.as_unit("us"), new_index[1]]) tm.assert_frame_equal(recons, expected, check_names=False) # do not load index diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py index f68d7f533645d..9f697b6593784 100644 --- a/pandas/tests/frame/methods/test_to_numpy.py +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -71,8 +71,8 @@ def test_to_numpy_datetime_with_na(self): expected = np.array( [ [0.00e00, 0.00e00, 1.00e00], - [8.64e04, np.nan, np.nan], - [np.nan, 8.64e04, 2.00e00], + [8.64e10, np.nan, np.nan], + [np.nan, 8.64e10, 2.00e00], ] ) result = df.to_numpy(float, na_value=np.nan) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index f084d16e387a8..8550a23fc10e1 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -192,7 +192,7 @@ def test_construction_with_conversions(self): expected = DataFrame( { - "dt1": Timestamp("20130101"), + "dt1": Timestamp("20130101").as_unit("s"), "dt2": date_range("20130101", periods=3).astype("M8[s]"), # 'dt3' : date_range('20130101 00:00:01',periods=3,freq='s'), # FIXME: don't leave commented-out diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2426c89dbcff5..79de095ded11b 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -879,9 +879,9 @@ def create_data(constructor): ) result_datetime64 = DataFrame(data_datetime64) + assert result_datetime64.index.unit == "s" + result_datetime64.index = result_datetime64.index.as_unit("us") result_datetime = DataFrame(data_datetime) - assert result_datetime.index.unit == "us" - result_datetime.index = result_datetime.index.as_unit("s") result_Timestamp = DataFrame(data_Timestamp) tm.assert_frame_equal(result_datetime64, expected) tm.assert_frame_equal(result_datetime, expected) @@ -945,7 +945,7 @@ def test_constructor_dict_extension_scalar(self, ea_scalar_and_dtype): (Interval(left=0, right=5), IntervalDtype("int64", "right")), ( Timestamp("2011-01-01", tz="US/Eastern"), - DatetimeTZDtype(unit="s", tz="US/Eastern"), + DatetimeTZDtype(unit="us", tz="US/Eastern"), ), ], ) @@ -1863,7 +1863,7 @@ def test_constructor_with_datetimes(self, using_infer_string): else pd.StringDtype(na_value=np.nan) ] * 2 - + [np.dtype("M8[s]"), np.dtype("M8[us]")], + + [np.dtype("M8[us]")] * 2, index=list("ABCDE"), ) tm.assert_series_equal(result, expected) @@ -3076,9 +3076,9 @@ def test_from_tzaware_mixed_object_array(self): res = DataFrame(arr, columns=["A", "B", "C"]) expected_dtypes = [ - "datetime64[s]", - "datetime64[s, US/Eastern]", - "datetime64[s, CET]", + "datetime64[us]", + "datetime64[us, US/Eastern]", + "datetime64[us, CET]", ] assert (res.dtypes == expected_dtypes).all() diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 5ef36331a20fa..acd400b41b1a4 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -739,8 +739,7 @@ def test_groupby_max_datetime64(self): # GH 5869 # datetimelike dtype conversion from int df = DataFrame({"A": Timestamp("20130101"), "B": np.arange(5)}) - # TODO: can we retain second reso in .apply here? - expected = df.groupby("A")["A"].apply(lambda x: x.max()).astype("M8[s]") + expected = df.groupby("A")["A"].apply(lambda x: x.max()) result = df.groupby("A")["A"].max() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index c418b2a18008b..0ddd8048d6976 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -524,7 +524,7 @@ def test_construction_dti_with_mixed_timezones(self): Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], - dtype="M8[s, US/Eastern]", + dtype="M8[us, US/Eastern]", name="idx", ) tm.assert_index_equal(dti, expected) @@ -602,7 +602,7 @@ def test_constructor_coverage(self): expected = DatetimeIndex(strings.astype("O")) tm.assert_index_equal(result, expected) - from_ints = DatetimeIndex(expected.as_unit("ns").asi8).as_unit("s") + from_ints = DatetimeIndex(expected.as_unit("ns").asi8).as_unit("us") tm.assert_index_equal(from_ints, expected) # string with NaT @@ -611,7 +611,7 @@ def test_constructor_coverage(self): expected = DatetimeIndex(strings.astype("O")) tm.assert_index_equal(result, expected) - from_ints = DatetimeIndex(expected.as_unit("ns").asi8).as_unit("s") + from_ints = DatetimeIndex(expected.as_unit("ns").asi8).as_unit("us") tm.assert_index_equal(from_ints, expected) # non-conforming @@ -840,8 +840,8 @@ def test_construction_with_tz_and_tz_aware_dti(self): def test_construction_with_nat_and_tzlocal(self): tz = dateutil.tz.tzlocal() - result = DatetimeIndex(["2018", "NaT"], tz=tz).as_unit("ns") - expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT]).as_unit("ns") + result = DatetimeIndex(["2018", "NaT"], tz=tz) + expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT]) tm.assert_index_equal(result, expected) def test_constructor_with_ambiguous_keyword_arg(self): @@ -940,11 +940,11 @@ def test_dti_tz_constructors(self, tzstr): idx1 = to_datetime(arr).tz_localize(tzstr) idx2 = date_range( - start="2005-11-10 08:00:00", freq="h", periods=2, tz=tzstr, unit="s" + start="2005-11-10 08:00:00", freq="h", periods=2, tz=tzstr, unit="us" ) idx2 = idx2._with_freq(None) # the others all have freq=None - idx3 = DatetimeIndex(arr, tz=tzstr).as_unit("s") - idx4 = DatetimeIndex(np.array(arr), tz=tzstr).as_unit("s") + idx3 = DatetimeIndex(arr, tz=tzstr) + idx4 = DatetimeIndex(np.array(arr), tz=tzstr) tm.assert_index_equal(idx1, idx2) tm.assert_index_equal(idx1, idx3) @@ -1093,6 +1093,19 @@ def test_dti_constructor_with_dtype_object_int_matches_int_dtype(self, dtype): tm.assert_index_equal(res3, expected) tm.assert_index_equal(res4, expected) + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_constructor_from_timestamp_objects(self, unit, tz): + result = DatetimeIndex( + [ + Timestamp("2012-01-01 00:00:00", tz=tz).as_unit(unit), + Timestamp("2012-01-02 00:00:00", tz=tz).as_unit(unit), + ], + ) + expected = ( + DatetimeIndex(["2012-01-01", "2012-01-02"]).as_unit(unit).tz_localize(tz) + ) + tm.assert_index_equal(result, expected) + class TestTimeSeries: def test_dti_constructor_preserve_dti_freq(self): @@ -1198,9 +1211,9 @@ def test_dti_constructor_object_dtype_dayfirst_yearfirst_with_tz(self): yfirst = Timestamp(2005, 10, 16, tz="US/Pacific") result1 = DatetimeIndex([val], tz="US/Pacific", dayfirst=True) - expected1 = DatetimeIndex([dfirst]).as_unit("s") + expected1 = DatetimeIndex([dfirst]) tm.assert_index_equal(result1, expected1) result2 = DatetimeIndex([val], tz="US/Pacific", yearfirst=True) - expected2 = DatetimeIndex([yfirst]).as_unit("s") + expected2 = DatetimeIndex([yfirst]) tm.assert_index_equal(result2, expected2) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index d5002a47c3447..6dbf11036ef06 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -596,7 +596,7 @@ def test_fillna_complex128(self, index_or_series, fill_val, fill_dtype): @pytest.mark.parametrize( "fill_val,fill_dtype", [ - (pd.Timestamp("2012-01-01"), "datetime64[s]"), + (pd.Timestamp("2012-01-01"), "datetime64[us]"), (pd.Timestamp("2012-01-01", tz="US/Eastern"), object), (1, object), ("x", object), @@ -613,7 +613,7 @@ def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype): pd.Timestamp("2011-01-04"), ] ) - assert obj.dtype == "datetime64[s]" + assert obj.dtype == "datetime64[us]" exp = klass( [ @@ -628,10 +628,10 @@ def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype): @pytest.mark.parametrize( "fill_val,fill_dtype", [ - (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[s, US/Eastern]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[us, US/Eastern]"), (pd.Timestamp("2012-01-01"), object), # pre-2.0 with a mismatched tz we would get object result - (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), "datetime64[s, US/Eastern]"), + (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), "datetime64[us, US/Eastern]"), (1, object), ("x", object), ], @@ -648,7 +648,7 @@ def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype): pd.Timestamp("2011-01-04", tz=tz), ] ) - assert obj.dtype == "datetime64[s, US/Eastern]" + assert obj.dtype == "datetime64[us, US/Eastern]" if getattr(fill_val, "tz", None) is None: fv = fill_val diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index ccb58aae2783f..bb22bec57ecfb 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -701,7 +701,7 @@ def test_loc_modify_datetime(self): {"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]} ) - df["date_dt"] = to_datetime(df["date"], unit="ms", cache=True).dt.as_unit("ms") + df["date_dt"] = to_datetime(df["date"], unit="ms", cache=True).dt.as_unit("us") df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"] df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"] diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 6f20d0e4e7cbf..967c5941188ea 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -574,7 +574,7 @@ def test_partial_set_invalid(self): ], ), ( - date_range(start="2000", periods=20, freq="D", unit="s"), + date_range(start="2000", periods=20, freq="D", unit="us"), ["2000-01-04", "2000-01-08", "2000-01-12"], [ Timestamp("2000-01-04"), diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 71fb8f490e114..9a1442a610930 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -134,18 +134,8 @@ def df_ref(datapath): return df_ref -def get_exp_unit(read_ext: str, engine: str | None) -> str: - unit = "us" - if (read_ext == ".ods") ^ (engine == "calamine"): - unit = "s" - return unit - - def adjust_expected(expected: DataFrame, read_ext: str, engine: str | None) -> None: expected.index.name = None - unit = get_exp_unit(read_ext, engine) - # error: "Index" has no attribute "as_unit" - expected.index = expected.index.as_unit(unit) # type: ignore[attr-defined] def xfail_datetimes_with_pyxlsb(engine, request): @@ -490,7 +480,6 @@ def test_excel_table(self, request, engine, read_ext, df_ref): def test_reader_special_dtypes(self, request, engine, read_ext): xfail_datetimes_with_pyxlsb(engine, request) - unit = get_exp_unit(read_ext, engine) expected = DataFrame.from_dict( { "IntCol": [1, 2, -3, 4, 0], @@ -506,7 +495,6 @@ def test_reader_special_dtypes(self, request, engine, read_ext): datetime(2013, 12, 14), datetime(2015, 3, 14), ], - dtype=f"M8[{unit}]", ), }, ) @@ -665,8 +653,6 @@ def test_dtype_backend(self, read_ext, dtype_backend, engine, tmp_excel): expected["j"] = ArrowExtensionArray(pa.array([None, None])) else: expected = df - unit = get_exp_unit(read_ext, engine) - expected["i"] = expected["i"].astype(f"M8[{unit}]") tm.assert_frame_equal(result, expected) @@ -1034,8 +1020,6 @@ def test_read_excel_multiindex(self, request, engine, read_ext): # see gh-4679 xfail_datetimes_with_pyxlsb(engine, request) - unit = get_exp_unit(read_ext, engine) - mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]]) mi_file = "testmultiindex" + read_ext @@ -1049,8 +1033,6 @@ def test_read_excel_multiindex(self, request, engine, read_ext): ], columns=mi, ) - expected[mi[2]] = expected[mi[2]].astype(f"M8[{unit}]") - actual = pd.read_excel( mi_file, sheet_name="mi_column", header=[0, 1], index_col=0 ) @@ -1130,7 +1112,6 @@ def test_read_excel_multiindex_blank_after_name( mi_file = "testmultiindex" + read_ext mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]], names=["c1", "c2"]) - unit = get_exp_unit(read_ext, engine) expected = DataFrame( [ [1, 2.5, pd.Timestamp("2015-01-01"), True], @@ -1144,7 +1125,6 @@ def test_read_excel_multiindex_blank_after_name( names=["ilvl1", "ilvl2"], ), ) - expected[mi[2]] = expected[mi[2]].astype(f"M8[{unit}]") result = pd.read_excel( mi_file, sheet_name=sheet_name, @@ -1248,8 +1228,6 @@ def test_read_excel_skiprows(self, request, engine, read_ext): # GH 4903 xfail_datetimes_with_pyxlsb(engine, request) - unit = get_exp_unit(read_ext, engine) - actual = pd.read_excel( "testskiprows" + read_ext, sheet_name="skiprows_list", skiprows=[0, 2] ) @@ -1262,7 +1240,6 @@ def test_read_excel_skiprows(self, request, engine, read_ext): ], columns=["a", "b", "c", "d"], ) - expected["c"] = expected["c"].astype(f"M8[{unit}]") tm.assert_frame_equal(actual, expected) actual = pd.read_excel( @@ -1295,13 +1272,11 @@ def test_read_excel_skiprows(self, request, engine, read_ext): ], columns=["a", "b", "c", "d"], ) - expected["c"] = expected["c"].astype(f"M8[{unit}]") tm.assert_frame_equal(actual, expected) def test_read_excel_skiprows_callable_not_in(self, request, engine, read_ext): # GH 4903 xfail_datetimes_with_pyxlsb(engine, request) - unit = get_exp_unit(read_ext, engine) actual = pd.read_excel( "testskiprows" + read_ext, @@ -1317,7 +1292,6 @@ def test_read_excel_skiprows_callable_not_in(self, request, engine, read_ext): ], columns=["a", "b", "c", "d"], ) - expected["c"] = expected["c"].astype(f"M8[{unit}]") tm.assert_frame_equal(actual, expected) def test_read_excel_nrows(self, read_ext): @@ -1687,9 +1661,7 @@ def test_read_datetime_multiindex(self, request, engine, read_ext): with pd.ExcelFile(f) as excel: actual = pd.read_excel(excel, header=[0, 1], index_col=0, engine=engine) - unit = get_exp_unit(read_ext, engine) - - dti = pd.DatetimeIndex(["2020-02-29", "2020-03-01"], dtype=f"M8[{unit}]") + dti = pd.DatetimeIndex(["2020-02-29", "2020-03-01"]) expected_column_index = MultiIndex.from_arrays( [dti[:1], dti[1:]], names=[ diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index ced4feb9e7eb9..32d843b9b677c 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -37,12 +37,6 @@ from pandas.io.excel._util import _writers -def get_exp_unit(path: str) -> str: - if path.endswith(".ods"): - return "s" - return "us" - - @pytest.fixture def frame(float_frame): """ @@ -297,13 +291,12 @@ def test_read_excel_parse_dates(self, tmp_excel): res = pd.read_excel(tmp_excel, parse_dates=["date_strings"], index_col=0) expected = df[:] - expected["date_strings"] = expected["date_strings"].astype("M8[s]") + expected["date_strings"] = expected["date_strings"].astype("M8[us]") tm.assert_frame_equal(res, expected) res = pd.read_excel( tmp_excel, parse_dates=["date_strings"], date_format="%m/%d/%Y", index_col=0 ) - expected["date_strings"] = expected["date_strings"].astype("M8[s]") tm.assert_frame_equal(expected, res) def test_multiindex_interval_datetimes(self, tmp_excel): @@ -364,13 +357,6 @@ def test_excel_round_trip_with_periodindex(self, tmp_excel, merge_cells): names=["date", "category"], ), ) - time_format = ( - "datetime64[s]" if tmp_excel.endswith(".ods") else "datetime64[us]" - ) - expected.index = expected.index.set_levels( - expected.index.levels[0].astype(time_format), level=0 - ) - tm.assert_frame_equal(result, expected) @@ -507,24 +493,20 @@ def test_mixed(self, frame, tmp_excel): tm.assert_frame_equal(mixed_frame, recons) def test_ts_frame(self, tmp_excel): - unit = get_exp_unit(tmp_excel) df = DataFrame( np.random.default_rng(2).standard_normal((5, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=5, freq="B"), + index=date_range("2000-01-01", periods=5, freq="B", unit="us"), ) # freq doesn't round-trip index = pd.DatetimeIndex(np.asarray(df.index), freq=None) df.index = index - expected = df[:] - expected.index = expected.index.as_unit(unit) - df.to_excel(tmp_excel, sheet_name="test1") with ExcelFile(tmp_excel) as reader: recons = pd.read_excel(reader, sheet_name="test1", index_col=0) - tm.assert_frame_equal(expected, recons) + tm.assert_frame_equal(recons, df) def test_basics_with_nan(self, frame, tmp_excel): frame = frame.copy() @@ -594,19 +576,15 @@ def test_inf_roundtrip(self, tmp_excel): def test_sheets(self, frame, tmp_excel): # freq doesn't round-trip - unit = get_exp_unit(tmp_excel) tsframe = DataFrame( np.random.default_rng(2).standard_normal((5, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=5, freq="B"), + index=date_range("2000-01-01", periods=5, freq="B", unit="us"), ) index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None) tsframe.index = index - expected = tsframe[:] - expected.index = expected.index.as_unit(unit) - frame = frame.copy() frame.iloc[:5, frame.columns.get_loc("A")] = np.nan @@ -623,7 +601,7 @@ def test_sheets(self, frame, tmp_excel): recons = pd.read_excel(reader, sheet_name="test1", index_col=0) tm.assert_frame_equal(frame, recons) recons = pd.read_excel(reader, sheet_name="test2", index_col=0) - tm.assert_frame_equal(expected, recons) + tm.assert_frame_equal(tsframe, recons) assert 2 == len(reader.sheet_names) assert "test1" == reader.sheet_names[0] assert "test2" == reader.sheet_names[1] @@ -727,13 +705,12 @@ def test_excel_roundtrip_indexname(self, merge_cells, tmp_excel): def test_excel_roundtrip_datetime(self, merge_cells, tmp_excel): # datetime.date, not sure what to test here exactly - unit = get_exp_unit(tmp_excel) # freq does not round-trip tsframe = DataFrame( np.random.default_rng(2).standard_normal((5, 4)), columns=Index(list("ABCD")), - index=date_range("2000-01-01", periods=5, freq="B"), + index=date_range("2000-01-01", periods=5, freq="B", unit="us"), ) index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None) tsframe.index = index @@ -746,15 +723,12 @@ def test_excel_roundtrip_datetime(self, merge_cells, tmp_excel): with ExcelFile(tmp_excel) as reader: recons = pd.read_excel(reader, sheet_name="test1", index_col=0) - expected = tsframe[:] - expected.index = expected.index.as_unit(unit) - tm.assert_frame_equal(expected, recons) + tm.assert_frame_equal(tsframe, recons) def test_excel_date_datetime_format(self, ext, tmp_excel, tmp_path): # see gh-4133 # # Excel output format strings - unit = get_exp_unit(tmp_excel) df = DataFrame( [ [date(2014, 1, 31), date(1999, 9, 24)], @@ -771,7 +745,6 @@ def test_excel_date_datetime_format(self, ext, tmp_excel, tmp_path): index=["DATE", "DATETIME"], columns=["X", "Y"], ) - df_expected = df_expected.astype(f"M8[{unit}]") filename2 = tmp_path / f"tmp2{ext}" filename2.touch() @@ -791,9 +764,6 @@ def test_excel_date_datetime_format(self, ext, tmp_excel, tmp_path): with ExcelFile(filename2) as reader2: rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0) - # TODO: why do we get different units? - rs2 = rs2.astype(f"M8[{unit}]") - tm.assert_frame_equal(rs1, rs2) # Since the reader returns a datetime object for dates, @@ -955,7 +925,6 @@ def test_to_excel_multiindex_cols(self, merge_cells, tmp_excel): def test_to_excel_multiindex_dates(self, merge_cells, tmp_excel): # try multiindex with dates - unit = get_exp_unit(tmp_excel) tsframe = DataFrame( np.random.default_rng(2).standard_normal((5, 4)), columns=Index(list("ABCD")), @@ -963,7 +932,7 @@ def test_to_excel_multiindex_dates(self, merge_cells, tmp_excel): ) tsframe.index = MultiIndex.from_arrays( [ - tsframe.index.as_unit(unit), + tsframe.index.as_unit("us"), np.arange(len(tsframe.index), dtype=np.int64), ], names=["time", "foo"], @@ -1314,7 +1283,6 @@ def test_comment_empty_line(self, tmp_excel): def test_datetimes(self, tmp_excel): # Test writing and reading datetimes. For issue #9139. (xref #9185) - unit = get_exp_unit(tmp_excel) datetimes = [ datetime(2013, 1, 13, 1, 2, 3), datetime(2013, 1, 13, 2, 45, 56), @@ -1333,8 +1301,7 @@ def test_datetimes(self, tmp_excel): write_frame.to_excel(tmp_excel, sheet_name="Sheet1") read_frame = pd.read_excel(tmp_excel, sheet_name="Sheet1", header=0) - expected = write_frame.astype(f"M8[{unit}]") - tm.assert_series_equal(expected["A"], read_frame["A"]) + tm.assert_series_equal(write_frame["A"], read_frame["A"]) def test_bytes_io(self, engine): # see gh-7074 diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 9c47fcaf3375c..3c2db92b33eb8 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -146,7 +146,7 @@ def test_frame_non_unique_columns(self, orient, data, request): "The default 'epoch' date format is deprecated and will be removed " "in a future version, please use 'iso' date format instead." ) - if df.iloc[:, 0].dtype == "datetime64[s]": + if df.iloc[:, 0].dtype == "datetime64[us]": expected_warning = Pandas4Warning with tm.assert_produces_warning(expected_warning, match=msg): @@ -155,7 +155,7 @@ def test_frame_non_unique_columns(self, orient, data, request): ) if orient == "values": expected = DataFrame(data) - if expected.iloc[:, 0].dtype == "datetime64[s]": + if expected.iloc[:, 0].dtype == "datetime64[us]": # orient == "values" by default will write Timestamp objects out # in milliseconds; these are internally stored in nanosecond, # so divide to get where we need diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 3680273f5e98a..1765c4c589e8a 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -65,7 +65,7 @@ def test_read_csv_local(all_parsers, csv1): datetime(2000, 1, 10), datetime(2000, 1, 11), ], - dtype="M8[s]", + dtype="M8[us]" if all_parsers.engine != "pyarrow" else "M8[s]", name="index", ), ) @@ -167,7 +167,7 @@ def test_read_csv_dataframe(all_parsers, csv1): datetime(2000, 1, 10), datetime(2000, 1, 11), ], - dtype="M8[s]", + dtype="M8[us]" if all_parsers.engine != "pyarrow" else "M8[s]", name="index", ), ) diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index cfa8785b24bde..7570a4df1e89f 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -263,7 +263,7 @@ def test_read_csv_no_index_name(all_parsers, csv_dir_path): datetime(2000, 1, 6), datetime(2000, 1, 7), ], - dtype="M8[s]", + dtype="M8[us]", ), ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_multi_thread.py b/pandas/tests/io/parser/test_multi_thread.py index 348c19ac0f0c6..66207e94aa910 100644 --- a/pandas/tests/io/parser/test_multi_thread.py +++ b/pandas/tests/io/parser/test_multi_thread.py @@ -155,5 +155,5 @@ def test_multi_thread_path_multipart_read_csv(all_parsers): result = _generate_multi_thread_dataframe(parser, path, num_rows, num_tasks) expected = df[:] - expected["date"] = expected["date"].astype("M8[s]") + expected["date"] = expected["date"].astype("M8[us]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 9a15d9bc84a2e..75d295e431f0f 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -61,7 +61,6 @@ def test_date_col_as_index_col(all_parsers): datetime(1999, 1, 27, 21, 0), datetime(1999, 1, 27, 22, 0), ], - dtype="M8[s]", name="X1", ) expected = DataFrame( @@ -126,7 +125,7 @@ def test_parse_dates_string(all_parsers): parser = all_parsers result = parser.read_csv(StringIO(data), index_col="date", parse_dates=["date"]) # freq doesn't round-trip - index = date_range("1/1/2009", periods=3, name="date", unit="s")._with_freq(None) + index = date_range("1/1/2009", periods=3, name="date", unit="us")._with_freq(None) expected = DataFrame( {"A": ["a", "b", "c"], "B": [1, 3, 4], "C": [2, 4, 5]}, index=index @@ -143,8 +142,6 @@ def test_parse_dates_column_list(all_parsers, parse_dates): expected = DataFrame( {"a": [datetime(2010, 1, 1)], "b": [1], "c": [datetime(2010, 2, 15)]} ) - expected["a"] = expected["a"].astype("M8[s]") - expected["c"] = expected["c"].astype("M8[s]") expected = expected.set_index(["a", "b"]) result = parser.read_csv( @@ -168,7 +165,7 @@ def test_multi_index_parse_dates(all_parsers, index_col): 20090103,three,c,4,5 """ parser = all_parsers - dti = date_range("2009-01-01", periods=3, freq="D", unit="s") + dti = date_range("2009-01-01", periods=3, freq="D", unit="us") index = MultiIndex.from_product( [ dti, @@ -218,6 +215,7 @@ def test_parse_tz_aware(all_parsers): if parser.engine == "pyarrow": pytz = pytest.importorskip("pytz") expected_tz = pytz.utc + expected.index = expected.index.as_unit("s") else: expected_tz = timezone.utc tm.assert_frame_equal(result, expected) @@ -303,7 +301,7 @@ def test_parse_dates_empty_string(all_parsers): expected = DataFrame( [[datetime(2012, 1, 1), 1], [pd.NaT, 2]], columns=["Date", "test"] ) - expected["Date"] = expected["Date"].astype("M8[s]") + expected["Date"] = expected["Date"].astype("M8[us]") tm.assert_frame_equal(result, expected) @@ -314,22 +312,18 @@ def test_parse_dates_empty_string(all_parsers): ( "a\n04.15.2016", {"parse_dates": ["a"]}, - DataFrame([datetime(2016, 4, 15)], columns=["a"], dtype="M8[s]"), + DataFrame([datetime(2016, 4, 15)], columns=["a"]), ), ( "a\n04.15.2016", {"parse_dates": True, "index_col": 0}, - DataFrame( - index=DatetimeIndex(["2016-04-15"], dtype="M8[s]", name="a"), columns=[] - ), + DataFrame(index=DatetimeIndex(["2016-04-15"], name="a"), columns=[]), ), ( "a,b\n04.15.2016,09.16.2013", {"parse_dates": ["a", "b"]}, DataFrame( - [[datetime(2016, 4, 15), datetime(2013, 9, 16)]], - dtype="M8[s]", - columns=["a", "b"], + [[datetime(2016, 4, 15), datetime(2013, 9, 16)]], columns=["a", "b"] ), ), ( @@ -339,8 +333,8 @@ def test_parse_dates_empty_string(all_parsers): index=MultiIndex.from_tuples( [ ( - Timestamp(2016, 4, 15).as_unit("s"), - Timestamp(2013, 9, 16).as_unit("s"), + Timestamp(2016, 4, 15), + Timestamp(2013, 9, 16), ) ], names=["a", "b"], @@ -411,7 +405,7 @@ def test_parse_timezone(all_parsers): end="2018-01-04 09:05:00", freq="1min", tz=timezone(timedelta(minutes=540)), - unit="s", + unit="us", )._with_freq(None) expected_data = {"dt": dti, "val": [23350, 23400, 23400, 23400, 23400]} @@ -450,7 +444,7 @@ def test_parse_delimited_date_swap_no_warning( all_parsers, date_string, dayfirst, expected, request ): parser = all_parsers - expected = DataFrame({0: [expected]}, dtype="datetime64[s]") + expected = DataFrame({0: [expected]}, dtype="datetime64[us]") if parser.engine == "pyarrow": if not dayfirst: # "CSV parse error: Empty CSV file or block" @@ -483,7 +477,7 @@ def test_parse_delimited_date_swap_with_warning( all_parsers, date_string, dayfirst, expected ): parser = all_parsers - expected = DataFrame({0: [expected]}, dtype="datetime64[s]") + expected = DataFrame({0: [expected]}, dtype="datetime64[us]") warning_msg = ( "Parsing dates in .* format when dayfirst=.* was specified. " "Pass `dayfirst=.*` or specify a format to silence this warning." @@ -602,7 +596,6 @@ def test_date_parser_usecols_thousands(all_parsers): thousands="-", ) expected = DataFrame({"B": [3, 4], "C": [Timestamp("20-09-2001 01:00:00")] * 2}) - expected["C"] = expected["C"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -611,9 +604,7 @@ def test_dayfirst_warnings(): # CASE 1: valid input input = "date\n31/12/2014\n10/03/2011" - expected = DatetimeIndex( - ["2014-12-31", "2011-03-10"], dtype="datetime64[s]", freq=None, name="date" - ) + expected = DatetimeIndex(["2014-12-31", "2011-03-10"], freq=None, name="date") warning_msg = ( "Parsing dates in .* format when dayfirst=.* was specified. " "Pass `dayfirst=.*` or specify a format to silence this warning." @@ -672,9 +663,7 @@ def test_dayfirst_warnings(): def test_dayfirst_warnings_no_leading_zero(date_string, dayfirst): # GH47880 initial_value = f"date\n{date_string}" - expected = DatetimeIndex( - ["2014-01-31"], dtype="datetime64[s]", freq=None, name="date" - ) + expected = DatetimeIndex(["2014-01-31"], freq=None, name="date") warning_msg = ( "Parsing dates in .* format when dayfirst=.* was specified. " "Pass `dayfirst=.*` or specify a format to silence this warning." @@ -729,7 +718,7 @@ def test_replace_nans_before_parsing_dates(all_parsers): Timestamp("2017-09-09"), ] }, - dtype="M8[s]", + dtype="M8[us]", ) tm.assert_frame_equal(result, expected) @@ -744,7 +733,6 @@ def test_parse_dates_and_string_dtype(all_parsers): result = parser.read_csv(StringIO(data), dtype="string", parse_dates=["b"]) expected = DataFrame({"a": ["1"], "b": [Timestamp("2019-12-31")]}) expected["a"] = expected["a"].astype("string") - expected["b"] = expected["b"].astype("M8[s]") tm.assert_frame_equal(result, expected) @@ -763,9 +751,7 @@ def test_parse_dot_separated_dates(all_parsers): warn = None else: expected_index = DatetimeIndex( - ["2003-03-27 14:55:00", "2003-08-03 15:20:00"], - dtype="datetime64[ms]", - name="a", + ["2003-03-27 14:55:00", "2003-08-03 15:20:00"], name="a" ) warn = UserWarning msg = r"when dayfirst=False \(the default\) was specified" @@ -798,7 +784,7 @@ def test_parse_dates_dict_format(all_parsers): "a": [Timestamp("2019-12-31"), Timestamp("2020-12-31")], "b": [Timestamp("2019-12-31"), Timestamp("2020-12-31")], }, - dtype="M8[s]", + dtype="M8[us]", ) tm.assert_frame_equal(result, expected) @@ -840,6 +826,8 @@ def test_parse_dates_arrow_engine(all_parsers): "b": 1, } ) + if parser.engine == "pyarrow": + expected["a"] = expected["a"].dt.as_unit("s") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 6243185294894..792e6f8249fec 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -308,7 +308,6 @@ def test_fwf_regression(): parse_dates=True, date_format="%Y%j%H%M%S", ) - expected.index = expected.index.astype("M8[s]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py index 99642ee4befc6..619b53abec8c7 100644 --- a/pandas/tests/io/parser/test_skiprows.py +++ b/pandas/tests/io/parser/test_skiprows.py @@ -43,7 +43,7 @@ def test_skip_rows_bug(all_parsers, skiprows): ) index = Index( [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], - dtype="M8[s]", + dtype="M8[us]", name=0, ) @@ -88,7 +88,7 @@ def test_skip_rows_blank(all_parsers): ) index = Index( [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], - dtype="M8[s]", + dtype="M8[us]", name=0, ) diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 479f2468a86ab..308fae487f842 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -826,7 +826,7 @@ def test_append_raise(setup_path, using_infer_string): msg = re.escape( "Cannot serialize the column [foo] " "because its data contents are not [string] " - "but [datetime64[s]] object dtype" + "but [datetime64[us]] object dtype" ) with pytest.raises(ValueError, match=msg): store.append("df", df) diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 409b92d2ddde1..7ef1af0a3f516 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -252,8 +252,7 @@ def test_table_values_dtypes_roundtrip(setup_path, using_infer_string): "int8": 1, "int64": 1, str_dtype: 1, - "datetime64[s]": 2, - "datetime64[ms]": 1, + "datetime64[us]": 3, "datetime64[ns]": 1, }, name="count", diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index 87d5ea9376cc5..86215e8109ead 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -56,7 +56,7 @@ def df1(): "int": [1, 3], "float": [2.0, np.nan], "str": ["t", "s"], - "dt": date_range("2018-06-18", periods=2), + "dt": date_range("2018-06-18", periods=2, unit="us"), } ) @@ -76,9 +76,7 @@ def test_read_csv(cleared_fs, df1): w.write(text) df2 = read_csv("memory://test/test.csv", parse_dates=["dt"]) - expected = df1.copy() - expected["dt"] = expected["dt"].astype("M8[s]") - tm.assert_frame_equal(df2, expected) + tm.assert_frame_equal(df2, df1) def test_reasonable_error(monkeypatch, cleared_fs): @@ -101,9 +99,7 @@ def test_to_csv(cleared_fs, df1): df2 = read_csv("memory://test/test.csv", parse_dates=["dt"], index_col=0) - expected = df1.copy() - expected["dt"] = expected["dt"].astype("M8[s]") - tm.assert_frame_equal(df2, expected) + tm.assert_frame_equal(df2, df1) def test_to_excel(cleared_fs, df1): @@ -114,9 +110,7 @@ def test_to_excel(cleared_fs, df1): df2 = read_excel(path, parse_dates=["dt"], index_col=0) - expected = df1.copy() - expected["dt"] = expected["dt"].astype("M8[s]") - tm.assert_frame_equal(df2, expected) + tm.assert_frame_equal(df2, df1) @pytest.mark.parametrize("binary_mode", [False, True]) @@ -138,9 +132,7 @@ def test_to_csv_fsspec_object(cleared_fs, binary_mode, df1): ) assert not fsspec_object.closed - expected = df1.copy() - expected["dt"] = expected["dt"].astype("M8[s]") - tm.assert_frame_equal(df2, expected) + tm.assert_frame_equal(df2, df1) def test_csv_options(fsspectest): diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 7702003ccd1e2..1ff51167207a4 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -67,7 +67,7 @@ def test_to_read_gcs(gcs_buffer, format, monkeypatch, capsys, request): "int": [1, 3], "float": [2.0, np.nan], "str": ["t", "s"], - "dt": date_range("2018-06-18", periods=2), + "dt": date_range("2018-06-18", periods=2, unit="us"), } ) @@ -118,8 +118,9 @@ def from_uri(path): df2 = df1 expected = df1[:] - if format in ["csv", "excel"]: - expected["dt"] = expected["dt"].dt.as_unit("s") + if format == "json": + # json stores datetime in nanoseconds + expected["dt"] = expected["dt"].dt.as_unit("ns") tm.assert_frame_equal(df2, expected) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index bad92f677c7ea..9a808ac843936 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1060,16 +1060,12 @@ def test_header_inferred_from_rows_with_only_th(self, flavor_read_html): tm.assert_frame_equal(result, expected) def test_parse_dates_list(self, flavor_read_html): - df = DataFrame({"date": date_range("1/1/2001", periods=10)}) - - expected = df[:] - expected["date"] = expected["date"].dt.as_unit("s") - + df = DataFrame({"date": date_range("1/1/2001", periods=10, unit="us")}) str_df = df.to_html() res = flavor_read_html(StringIO(str_df), parse_dates=[1], index_col=0) - tm.assert_frame_equal(expected, res[0]) + tm.assert_frame_equal(df, res[0]) res = flavor_read_html(StringIO(str_df), parse_dates=["date"], index_col=0) - tm.assert_frame_equal(expected, res[0]) + tm.assert_frame_equal(df, res[0]) def test_wikipedia_states_table(self, datapath, flavor_read_html): data = datapath("io", "data", "html", "wikipedia_states.html") diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 4fe3a97cb2386..c2c45bdca9210 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -691,7 +691,6 @@ def test_parquet_read_from_url(self, httpserver, datapath, df_compat, engine): class TestParquetPyArrow(Base): - @pytest.mark.xfail(reason="datetime_with_nat unit doesn't round-trip") def test_basic(self, pa, df_full): df = df_full pytest.importorskip("pyarrow", "11.0.0") @@ -732,10 +731,6 @@ def test_to_bytes_without_path_or_buf_provided(self, pa, df_full): expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( "M8[ns]" ) - else: - expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( - "M8[ms]" - ) tm.assert_frame_equal(res, expected) def test_duplicate_columns(self, pa): @@ -1045,10 +1040,6 @@ def test_read_dtype_backend_pyarrow_config(self, pa, df_full): pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels")) ) - expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( - "timestamp[ms][pyarrow]" - ) - check_round_trip( df, engine=pa, diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 1b9ae5d8e7209..5fcb4cbec7579 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1858,10 +1858,8 @@ def test_api_custom_dateparsing_error( if conn_name == "postgresql_adbc_types" and pa_version_under14p1: expected["DateCol"] = expected["DateCol"].astype("datetime64[ns]") - elif "postgres" in conn_name or "mysql" in conn_name: - expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") else: - expected["DateCol"] = expected["DateCol"].astype("datetime64[s]") + expected["DateCol"] = expected["DateCol"].astype("datetime64[us]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index d9bd89af61aaf..a3a6f1f151fae 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -243,7 +243,7 @@ def test_resample_empty_sum_string(string_dtype_no_object, min_count): result = rs.sum(min_count=min_count) value = "" if min_count == 0 else pd.NA - index = date_range(start="2000-01-01", freq="20s", periods=2, unit="s") + index = date_range(start="2000-01-01", freq="20s", periods=2, unit="us") expected = Series(value, index=index, dtype=dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index d0ff950e7985f..77f852826a2f7 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -21,12 +21,12 @@ "float64": [1.1, np.nan, 3.3], "category": Categorical(["X", "Y", "Z"]), "object": ["a", "b", "c"], - "datetime64[s]": [ + "datetime64[us]": [ pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02"), pd.Timestamp("2011-01-03"), ], - "datetime64[s, US/Eastern]": [ + "datetime64[us, US/Eastern]": [ pd.Timestamp("2011-01-01", tz="US/Eastern"), pd.Timestamp("2011-01-02", tz="US/Eastern"), pd.Timestamp("2011-01-03", tz="US/Eastern"), diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 0cf3192ea3a74..564a761a74169 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -358,7 +358,7 @@ def test_concat_tz_series_tzlocal(self): result = concat([Series(x), Series(y)], ignore_index=True) tm.assert_series_equal(result, Series(x + y)) - assert result.dtype == "datetime64[s, tzlocal()]" + assert result.dtype == "datetime64[us, tzlocal()]" def test_concat_tz_series_with_datetimelike(self): # see gh-12620: tz and timedelta diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 63332fe4658e5..ef812a723a4c9 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -1,5 +1,3 @@ -from datetime import datetime - import numpy as np import pytest @@ -452,9 +450,8 @@ def test_datetime_bin(conv): bins = [conv(v) for v in bin_data] result = Series(cut(data, bins=bins)) - if type(bins[0]) is datetime: - # The bins have microsecond dtype -> so does result - expected = expected.astype("interval[datetime64[us]]") + if type(bins[0]) is np.datetime64: + expected = expected.astype("interval[datetime64[s]]") expected = expected.astype(CategoricalDtype(ordered=True)) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py index cb7ac5fa6f1da..50c2c2c96eb6c 100644 --- a/pandas/tests/scalar/timestamp/methods/test_tz_localize.py +++ b/pandas/tests/scalar/timestamp/methods/test_tz_localize.py @@ -72,7 +72,7 @@ def test_tz_localize_ambiguous(self): ts_dst = ts.tz_localize("US/Eastern", ambiguous=True) ts_no_dst = ts.tz_localize("US/Eastern", ambiguous=False) - assert ts_no_dst._value - ts_dst._value == 3600 + assert ts_no_dst._value - ts_dst._value == 3600_000_000 msg = re.escape( "'ambiguous' parameter must be one of: " "True, False, 'NaT', 'raise' (default)" @@ -140,7 +140,7 @@ def test_tz_localize_nonexistent(self, stamp, tz): def test_tz_localize_nonexistent_shift( self, stamp, tz, forward_expected, backward_expected ): - ts = Timestamp(stamp) + ts = Timestamp(stamp).as_unit("s") forward_ts = ts.tz_localize(tz, nonexistent="shift_forward") assert forward_ts == Timestamp(forward_expected, tz=tz) backward_ts = ts.tz_localize(tz, nonexistent="shift_backward") @@ -198,7 +198,7 @@ def test_tz_localize_ambiguous_compat(self): result_pytz = naive.tz_localize(pytz_zone, ambiguous=False) result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=False) assert result_pytz._value == result_dateutil._value - assert result_pytz._value == 1382835600 + assert result_pytz._value == 1382835600000000 # fixed ambiguous behavior # see gh-14621, GH#45087 @@ -210,7 +210,7 @@ def test_tz_localize_ambiguous_compat(self): result_pytz = naive.tz_localize(pytz_zone, ambiguous=True) result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=True) assert result_pytz._value == result_dateutil._value - assert result_pytz._value == 1382832000 + assert result_pytz._value == 1382832000000000 # see gh-14621 assert str(result_pytz) == str(result_dateutil) diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 70aded030e95a..1f518993b9481 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -427,31 +427,31 @@ class TestTimestampResolutionInference: def test_construct_from_time_unit(self): # GH#54097 only passing a time component, no date ts = Timestamp("01:01:01.111") - assert ts.unit == "ms" + assert ts.unit == "us" def test_constructor_str_infer_reso(self): # non-iso8601 path # _parse_delimited_date path ts = Timestamp("01/30/2023") - assert ts.unit == "s" + assert ts.unit == "us" # _parse_dateabbr_string path ts = Timestamp("2015Q1") - assert ts.unit == "s" + assert ts.unit == "us" # dateutil_parse path ts = Timestamp("2016-01-01 1:30:01 PM") - assert ts.unit == "s" + assert ts.unit == "us" ts = Timestamp("2016 June 3 15:25:01.345") - assert ts.unit == "ms" + assert ts.unit == "us" ts = Timestamp("300-01-01") - assert ts.unit == "s" + assert ts.unit == "us" ts = Timestamp("300 June 1:30:01.300") - assert ts.unit == "ms" + assert ts.unit == "us" # dateutil path -> don't drop trailing zeros ts = Timestamp("01-01-2013T00:00:00.000000000+0000") @@ -467,10 +467,10 @@ def test_constructor_str_infer_reso(self): # GH#56208 minute reso through the ISO8601 path with tz offset ts = Timestamp("2020-01-01 00:00+00:00") - assert ts.unit == "s" + assert ts.unit == "us" ts = Timestamp("2020-01-01 00+00:00") - assert ts.unit == "s" + assert ts.unit == "us" @pytest.mark.parametrize("method", ["now", "today"]) def test_now_today_unit(self, method): @@ -507,10 +507,10 @@ def test_construct_from_string_invalid_raises(self): def test_constructor_from_iso8601_str_with_offset_reso(self): # GH#49737 ts = Timestamp("2016-01-01 04:05:06-01:00") - assert ts.unit == "s" + assert ts.unit == "us" ts = Timestamp("2016-01-01 04:05:06.000-01:00") - assert ts.unit == "ms" + assert ts.unit == "us" ts = Timestamp("2016-01-01 04:05:06.000000-01:00") assert ts.unit == "us" @@ -523,7 +523,7 @@ def test_constructor_from_date_second_reso(self): # reso, i.e. seconds obj = date(2012, 9, 1) ts = Timestamp(obj) - assert ts.unit == "s" + assert ts.unit == "us" def test_constructor_datetime64_with_tz(self): # GH#42288, GH#24559 @@ -823,10 +823,10 @@ def test_out_of_bounds_string(self): Timestamp("2263-01-01").as_unit("ns") ts = Timestamp("2263-01-01") - assert ts.unit == "s" + assert ts.unit == "us" ts = Timestamp("1676-01-01") - assert ts.unit == "s" + assert ts.unit == "us" def test_barely_out_of_bounds(self): # GH#19529 @@ -877,7 +877,7 @@ def test_out_of_bounds_string_consistency(self, arg): Timestamp(arg).as_unit("ns") ts = Timestamp(arg) - assert ts.unit == "s" + assert ts.unit == "us" assert ts.year == ts.month == ts.day == 1 def test_min_valid(self): @@ -1070,7 +1070,7 @@ def test_timestamp_nano_range(nano): def test_non_nano_value(): # https://github.com/pandas-dev/pandas/issues/49076 - result = Timestamp("1800-01-01", unit="s").value + result = Timestamp("1800-01-01").value # `.value` shows nanoseconds, even though unit is 's' assert result == -5364662400000000000 @@ -1078,14 +1078,14 @@ def test_non_nano_value(): msg = ( r"Cannot convert Timestamp to nanoseconds without overflow. " r"Use `.asm8.view\('i8'\)` to cast represent Timestamp in its " - r"own unit \(here, s\).$" + r"own unit \(here, us\).$" ) ts = Timestamp("0300-01-01") with pytest.raises(OverflowError, match=msg): ts.value # check that the suggested workaround actually works result = ts.asm8.view("i8") - assert result == -52700112000 + assert result == -52700112000000000 @pytest.mark.parametrize("na_value", [None, np.nan, np.datetime64("NaT"), NaT, NA]) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index f69c90ced2828..db4bd3ab36b5f 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -370,7 +370,7 @@ def test_dt_round_tz_nonexistent(self, method, ts_str, freq): tm.assert_series_equal(result, expected) result = getattr(ser.dt, method)(freq, nonexistent="NaT") - expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz) + expected = Series([pd.NaT], dtype="datetime64[us]").dt.tz_localize(result.dt.tz) tm.assert_series_equal(result, expected) with pytest.raises(ValueError, match="2018-03-11 02:00:00"): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index f894005296781..0a61a4168c0d2 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1483,7 +1483,7 @@ class TestCoercionDatetime64HigherReso(CoercionTest): def obj(self, exp_dtype): idx = date_range("2011-01-01", freq="D", periods=4, unit="s") if exp_dtype == "m8[ms]": - idx = idx - Timestamp("1970-01-01") + idx = idx - Timestamp("1970-01-01").as_unit("s") assert idx.dtype == "m8[s]" elif exp_dtype == "M8[ms, UTC]": idx = idx.tz_localize("UTC") @@ -1493,7 +1493,7 @@ def obj(self, exp_dtype): def val(self, exp_dtype): ts = Timestamp("2011-01-02 03:04:05.678").as_unit("ms") if exp_dtype == "m8[ms]": - return ts - Timestamp("1970-01-01") + return ts - Timestamp("1970-01-01").as_unit("ms") elif exp_dtype == "M8[ms, UTC]": return ts.tz_localize("UTC") return ts diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index 51d6704e1905b..47f356475dd54 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -79,7 +79,10 @@ def test_combine_first_dt64(self, unit): s1 = Series([np.nan, "2011"]) rs = s0.combine_first(s1) - xp = Series([datetime(2010, 1, 1), "2011"], dtype=f"datetime64[{unit}]") + xp = Series( + [datetime(2010, 1, 1), "2011"], + dtype=f"datetime64[{'ns' if unit == 'ns' else 'us'}]", + ) tm.assert_series_equal(rs, xp) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index f53d75df83124..a1a43da1a8927 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -313,7 +313,7 @@ def test_datetime64_fillna(self): def test_datetime64_fillna_mismatched_reso_no_rounding(self, tz, scalar): # GH#56410 dti = date_range("2016-01-01", periods=3, unit="s", tz=tz) - item = Timestamp("2016-02-03 04:05:06.789", tz=tz) + item = Timestamp("2016-02-03 04:05:06.789", tz=tz).as_unit("ms") vec = date_range(item, periods=3, unit="ms") exp_dtype = "M8[ms]" if tz is None else "M8[ms, UTC]" @@ -340,8 +340,12 @@ def test_datetime64_fillna_mismatched_reso_no_rounding(self, tz, scalar): ) def test_timedelta64_fillna_mismatched_reso_no_rounding(self, scalar): # GH#56410 - tdi = date_range("2016-01-01", periods=3, unit="s") - Timestamp("1970-01-01") - item = Timestamp("2016-02-03 04:05:06.789") - Timestamp("1970-01-01") + tdi = date_range("2016-01-01", periods=3, unit="s") - Timestamp( + "1970-01-01" + ).as_unit("s") + item = (Timestamp("2016-02-03 04:05:06.789") - Timestamp("1970-01-01")).as_unit( + "ms" + ) vec = timedelta_range(item, periods=3, unit="ms") expected = Series([item, tdi[1], tdi[2]], dtype="m8[ms]") diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index 3e3eb36112680..d67d0b5235aba 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -32,7 +32,7 @@ def test_from_csv(self, datetime_series, string_series, temp_file): datetime_series.to_csv(path, header=False) ts = self.read_csv(path, parse_dates=True) expected = datetime_series.copy() - expected.index = expected.index.as_unit("s") + expected.index = expected.index.as_unit("us") tm.assert_series_equal(expected, ts, check_names=False) assert ts.name is None @@ -59,7 +59,6 @@ def test_from_csv(self, datetime_series, string_series, temp_file): series = self.read_csv(path, sep="|", parse_dates=True) check_series = Series({datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}) - check_series.index = check_series.index.as_unit("s") tm.assert_series_equal(check_series, series) series = self.read_csv(path, sep="|", parse_dates=False) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 6d991235958af..513ac5d4cb63a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1163,7 +1163,7 @@ def test_constructor_with_datetime_tz4(self): Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"), ] ) - assert ser.dtype == "datetime64[s, US/Pacific]" + assert ser.dtype == "datetime64[us, US/Pacific]" assert lib.infer_dtype(ser, skipna=True) == "datetime64" def test_constructor_with_datetime_tz3(self): @@ -1408,10 +1408,10 @@ def create_data(constructor): result_datetime = Series(data_datetime) result_Timestamp = Series(data_Timestamp) - tm.assert_series_equal(result_datetime64, expected) tm.assert_series_equal( - result_datetime, expected.set_axis(expected.index.as_unit("us")) + result_datetime64, expected.set_axis(expected.index.as_unit("s")) ) + tm.assert_series_equal(result_datetime, expected) tm.assert_series_equal(result_Timestamp, expected) def test_constructor_dict_tuple_indexer(self): diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index bf85199ec4f9f..fa55808353ef9 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -119,7 +119,7 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache): expected = Series( [Timestamp("19801222"), Timestamp("19801222")] + [Timestamp("19810105")] * 5, - dtype="M8[s]", + dtype="M8[us]", ) expected[2] = np.nan ser[2] = np.nan @@ -146,7 +146,7 @@ def test_to_datetime_format_YYYYMM_with_nat(self, cache): expected = Series( [Timestamp("19801201"), Timestamp("19801201")] + [Timestamp("19810101")] * 5, - dtype="M8[s]", + dtype="M8[us]", ) expected[2] = np.nan ser[2] = np.nan @@ -160,7 +160,7 @@ def test_to_datetime_format_YYYYMMDD_oob_for_ns(self, cache): result = to_datetime(ser, format="%Y%m%d", errors="raise", cache=cache) expected = Series( np.array(["2012-12-31", "2014-12-31", "9999-12-31"], dtype="M8[s]"), - dtype="M8[s]", + dtype="M8[us]", ) tm.assert_series_equal(result, expected) @@ -169,7 +169,7 @@ def test_to_datetime_format_YYYYMMDD_coercion(self, cache): # GH 7930 ser = Series([20121231, 20141231, 999999999999999999999999999991231]) result = to_datetime(ser, format="%Y%m%d", errors="coerce", cache=cache) - expected = Series(["20121231", "20141231", "NaT"], dtype="M8[s]") + expected = Series(["20121231", "20141231", "NaT"], dtype="M8[us]") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -579,7 +579,7 @@ def test_to_datetime_mixed_date_and_string(self, format): # https://github.com/pandas-dev/pandas/issues/50108 d1 = date(2020, 1, 2) res = to_datetime(["2020-01-01", d1], format=format) - expected = DatetimeIndex(["2020-01-01", "2020-01-02"], dtype="M8[s]") + expected = DatetimeIndex(["2020-01-01", "2020-01-02"], dtype="M8[us]") tm.assert_index_equal(res, expected) @pytest.mark.parametrize( @@ -641,8 +641,6 @@ def test_to_datetime_mixed_datetime_and_string_with_format( ts1 = constructor(args[0]) ts2 = args[1] result = to_datetime([ts1, ts2], format=fmt, utc=utc) - if constructor is Timestamp: - expected = expected.as_unit("s") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -714,7 +712,7 @@ def test_to_datetime_mixed_offsets_with_none_tz_utc_false_removed( "%Y-%m-%d %H:%M:%S%z", DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-01-02 00:00:00+00:00", "NaT"], - dtype="datetime64[s, UTC]", + dtype="datetime64[us, UTC]", ), id="ISO8601, UTC", ), @@ -722,7 +720,7 @@ def test_to_datetime_mixed_offsets_with_none_tz_utc_false_removed( "%Y-%d-%m %H:%M:%S%z", DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-02-01 00:00:00+00:00", "NaT"], - dtype="datetime64[s, UTC]", + dtype="datetime64[us, UTC]", ), id="non-ISO8601, UTC", ), @@ -1159,7 +1157,7 @@ def test_to_datetime_tz(self, cache): result = to_datetime(arr, cache=cache) expected = DatetimeIndex( ["2013-01-01 13:00:00", "2013-01-02 14:00:00"], tz="US/Pacific" - ).as_unit("s") + ).as_unit("us") tm.assert_index_equal(result, expected) def test_to_datetime_tz_mixed(self, cache): @@ -1178,7 +1176,7 @@ def test_to_datetime_tz_mixed(self, cache): result = to_datetime(arr, cache=cache, errors="coerce") expected = DatetimeIndex( - ["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[s, US/Pacific]" + ["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[us, US/Pacific]" ) tm.assert_index_equal(result, expected) @@ -1471,17 +1469,15 @@ def test_to_datetime_cache_scalar(self): assert result == expected @pytest.mark.parametrize( - "datetimelikes,expected_values,exp_unit", + "datetimelikes,expected_values", ( ( (None, np.nan) + (NaT,) * start_caching_at, (NaT,) * (start_caching_at + 2), - "s", ), ( (None, Timestamp("2012-07-26")) + (NaT,) * start_caching_at, (NaT, Timestamp("2012-07-26")) + (NaT,) * start_caching_at, - "s", ), ( (None,) @@ -1489,12 +1485,11 @@ def test_to_datetime_cache_scalar(self): + ("2012 July 26", Timestamp("2012-07-26")), (NaT,) * (start_caching_at + 1) + (Timestamp("2012-07-26"), Timestamp("2012-07-26")), - "s", ), ), ) def test_convert_object_to_datetime_with_cache( - self, datetimelikes, expected_values, exp_unit + self, datetimelikes, expected_values ): # GH#39882 ser = Series( @@ -1502,10 +1497,10 @@ def test_convert_object_to_datetime_with_cache( dtype="object", ) result_series = to_datetime(ser, errors="coerce") - expected_series = Series( - expected_values, - dtype=f"datetime64[{exp_unit}]", - ) + expected_series = Series(expected_values, dtype="datetime64[us]") + if expected_series.isna().all(): + # TODO should this also be `us`? + expected_series = expected_series.astype("datetime64[s]") tm.assert_series_equal(result_series, expected_series) @pytest.mark.parametrize( @@ -1580,13 +1575,15 @@ def test_to_datetime_coerce_oob(self, string_arg, format, outofbounds): format.startswith("%B") ^ outofbounds.startswith("J") ): # the strings don't match the given format, so they raise and we coerce - expected = DatetimeIndex([datetime(2018, 3, 1), NaT], dtype="M8[s]") + expected = DatetimeIndex([datetime(2018, 3, 1), NaT], dtype="M8[us]") elif isinstance(outofbounds, datetime): expected = DatetimeIndex( [datetime(2018, 3, 1), outofbounds], dtype="M8[us]" ) else: - expected = DatetimeIndex([datetime(2018, 3, 1), outofbounds], dtype="M8[s]") + expected = DatetimeIndex( + [datetime(2018, 3, 1), outofbounds], dtype="M8[us]" + ) tm.assert_index_equal(result, expected) def test_to_datetime_malformed_no_raise(self): @@ -1646,7 +1643,7 @@ def test_iso_8601_strings_with_different_offsets_utc(self): result = to_datetime(ts_strings, utc=True) expected = DatetimeIndex( [Timestamp(2015, 11, 18, 10), Timestamp(2015, 11, 18, 10), NaT], tz="UTC" - ).as_unit("s") + ).as_unit("us") tm.assert_index_equal(result, expected) def test_mixed_offsets_with_native_datetime_utc_false_raises(self): @@ -1672,7 +1669,7 @@ def test_non_iso_strings_with_tz_offset(self): result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2) expected = DatetimeIndex( [datetime(2018, 3, 1, 12, tzinfo=timezone(timedelta(minutes=240)))] * 2 - ).as_unit("s") + ).as_unit("us") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -1695,7 +1692,7 @@ def test_to_datetime_with_format_out_of_bounds(self, dt_str): # GH 9107 res = to_datetime(dt_str, format="%Y%m%d") dtobj = datetime.strptime(dt_str, "%Y%m%d") - expected = Timestamp(dtobj).as_unit("s") + expected = Timestamp(dtobj).as_unit("us") assert res == expected assert res.unit == expected.unit @@ -2216,7 +2213,7 @@ def test_dataframe_utc_true(self): df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) result = to_datetime(df, utc=True) expected = Series( - np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[s]") + np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[us]") ).dt.tz_localize("UTC") tm.assert_series_equal(result, expected) @@ -2422,7 +2419,7 @@ def test_to_datetime_with_space_in_series(self, cache): result_coerce = to_datetime(ser, errors="coerce", cache=cache) expected_coerce = Series( [datetime(2006, 10, 18), datetime(2008, 10, 18), NaT] - ).dt.as_unit("s") + ).dt.as_unit("us") tm.assert_series_equal(result_coerce, expected_coerce) @td.skip_if_not_us_locale @@ -2543,7 +2540,7 @@ def test_string_na_nat_conversion(self, cache): strings = np.array(["1/1/2000", "1/2/2000", np.nan, "1/4/2000"], dtype=object) - expected = np.empty(4, dtype="M8[s]") + expected = np.empty(4, dtype="M8[us]") for i, val in enumerate(strings): if isna(val): expected[i] = iNaT @@ -2588,7 +2585,7 @@ def test_string_na_nat_conversion_with_name(self, cache): result = to_datetime(series, cache=cache) dresult = to_datetime(dseries, cache=cache) - expected = Series(np.empty(5, dtype="M8[s]"), index=idx) + expected = Series(np.empty(5, dtype="M8[us]"), index=idx) for i in range(5): x = series.iloc[i] if isna(x): @@ -2628,7 +2625,7 @@ def test_dayfirst(self, cache): arr = ["10/02/2014", "11/02/2014", "12/02/2014"] expected = DatetimeIndex( [datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)] - ).as_unit("s") + ).as_unit("us") idx1 = DatetimeIndex(arr, dayfirst=True) idx2 = DatetimeIndex(np.array(arr), dayfirst=True) idx3 = to_datetime(arr, dayfirst=True, cache=cache) @@ -2652,7 +2649,7 @@ def test_dayfirst_warnings_valid_input(self): # CASE 1: valid input arr = ["31/12/2014", "10/03/2011"] expected = DatetimeIndex( - ["2014-12-31", "2011-03-10"], dtype="datetime64[s]", freq=None + ["2014-12-31", "2011-03-10"], dtype="datetime64[us]", freq=None ) # A. dayfirst arg correct, no warning @@ -2757,7 +2754,7 @@ def test_to_datetime_consistent_format(self, cache): ser = Series(np.array(data)) result = to_datetime(ser, cache=cache) expected = Series( - ["2011-01-01", "2011-02-01", "2011-03-01"], dtype="datetime64[s]" + ["2011-01-01", "2011-02-01", "2011-03-01"], dtype="datetime64[us]" ) tm.assert_series_equal(result, expected) @@ -2769,7 +2766,9 @@ def test_to_datetime_series_with_nans(self, cache): ) ) result = to_datetime(ser, cache=cache) - expected = Series(["2011-01-01", NaT, "2011-01-03", NaT], dtype="datetime64[s]") + expected = Series( + ["2011-01-01", NaT, "2011-01-03", NaT], dtype="datetime64[us]" + ) tm.assert_series_equal(result, expected) def test_to_datetime_series_start_with_nans(self, cache): @@ -2788,7 +2787,7 @@ def test_to_datetime_series_start_with_nans(self, cache): result = to_datetime(ser, cache=cache) expected = Series( - [NaT, NaT, "2011-01-01", "2011-01-02", "2011-01-03"], dtype="datetime64[s]" + [NaT, NaT, "2011-01-01", "2011-01-02", "2011-01-03"], dtype="datetime64[us]" ) tm.assert_series_equal(result, expected) @@ -2802,7 +2801,7 @@ def test_infer_datetime_format_tz_name(self, tz_name, offset): result = to_datetime(ser) tz = timezone(timedelta(minutes=offset)) expected = Series([Timestamp("2019-02-02 08:07:13").tz_localize(tz)]) - expected = expected.dt.as_unit("s") + expected = expected.dt.as_unit("us") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -2966,9 +2965,9 @@ def test_parsers(self, date_str, expected, cache): reso = { "nanosecond": "ns", "microsecond": "us", - "millisecond": "ms", - "second": "s", - }.get(reso_attrname, "s") + "millisecond": "us", + "second": "us", + }.get(reso_attrname, "us") result2 = to_datetime(date_str, yearfirst=yearfirst) result3 = to_datetime([date_str], yearfirst=yearfirst) # result5 is used below @@ -3413,7 +3412,7 @@ def test_empty_string_datetime(errors, args, format): # coerce empty string to pd.NaT result = to_datetime(td, format=format, errors=errors) - expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[s]") + expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[us]") tm.assert_series_equal(expected, result) @@ -3598,7 +3597,7 @@ def test_to_datetime_with_empty_str_utc_false_format_mixed(): # GH 50887 vals = ["2020-01-01 00:00+00:00", ""] result = to_datetime(vals, format="mixed") - expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[s, UTC]") + expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[us, UTC]") tm.assert_index_equal(result, expected) # Check that a couple of other similar paths work the same way @@ -3771,3 +3770,77 @@ def test_to_datetime_wrapped_datetime64_ps(): ["1970-01-01 00:00:01.901901901"], dtype="datetime64[ns]", freq=None ) tm.assert_index_equal(result, expected) + + +class TestToDatetimeInferUnit: + @pytest.mark.parametrize( + "hour,unit", + [ + ("", "us"), + ("T09:00", "us"), + ("T09:00:00", "us"), + ("T09:00:00.123", "us"), + ("T09:00:00.123456", "us"), + ("T09:00:00.123456789", "ns"), + ("T09:00:00.123456789123", "ns"), + ], + ) + def test_strings(self, hour, unit): + result = to_datetime(["2020-01-01" + hour, "2020-01-02" + hour]) + assert result.dtype == f"datetime64[{unit}]" + + # parsing from out of bounds date does not actually work + # def test_strings_out_of_bounds(self): + # pd.to_datetime(["-290301-01-01"], format="ISO8601") + + @pytest.mark.parametrize( + "dt", + [ + datetime(2020, 1, 1), + datetime(2020, 1, 1, 9, 0, 30), + datetime(2020, 1, 1, 9, 0, 30, 123), + datetime(2020, 1, 1, 9, 0, 30, 123456), + ], + ) + def test_datetime_datetime(self, dt): + # range of datetime.datetime/date objects are always supported for us + result = to_datetime([dt, dt]) + assert result.dtype == "datetime64[us]" + + @pytest.mark.parametrize("year", [2012, 9999]) + def test_datetime_date(self, year): + dt = date(year, 1, 1) + result = to_datetime([dt, dt]) + assert result.dtype == "datetime64[us]" + + @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + def test_np_datetime64_array(self, unit): + # numpy datetime64 already has a unit -> preserve that in this case + arr = np.array(["2020-01-01T09:00:30.123456"], dtype=f"datetime64[{unit}]") + result = to_datetime(arr) + assert result.dtype == arr.dtype + + @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + def test_np_datetime64_objects(self, unit): + dt = np.datetime64("2020-01-01T09:00:30.123456", unit) + result = to_datetime([dt, dt]) + assert result.dtype == dt.dtype + + @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + def test_timestamp_objects(self, unit): + ts = Timestamp("2020-01-01T09:00:30").as_unit(unit) + result = to_datetime([ts, ts]) + assert result.dtype == f"datetime64[{unit}]" + + # @pytest.mark.parametrize("year", [2012, 9999]) + # def test_dataframe_components(self, year): + # df = pd.DataFrame({ + # "year": [year, year], + # "month": [1, 1], + # "day": [1, 2], + # "hour": [9, 10], + # "minute": [0, 30], + # "second": [30, 45], + # }) + # result = to_datetime(df) + # assert result.dtype == "datetime64[us]" diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index fc0000553049e..d319f4674e6d3 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -45,7 +45,7 @@ def test_infer_homogeoneous_date_objects(self): arr = np.array([None, dt2, dt2, dt2], dtype=object) result, tz = tslib.array_to_datetime(arr, creso=creso_infer) assert tz is None - expected = np.array([np.datetime64("NaT"), dt2, dt2, dt2], dtype="M8[s]") + expected = np.array([np.datetime64("NaT"), dt2, dt2, dt2], dtype="M8[us]") tm.assert_numpy_array_equal(result, expected) def test_infer_homogeoneous_dt64(self): @@ -111,7 +111,7 @@ def test_array_to_datetime_with_tz_resolution(self): tz = tzoffset("custom", 3600) vals = np.array(["2016-01-01 02:03:04.567", NaT], dtype=object) res = tslib.array_to_datetime_with_tz(vals, tz, False, False, creso_infer) - assert res.dtype == "M8[ms]" + assert res.dtype == "M8[us]" vals2 = np.array([datetime(2016, 1, 1, 2, 3, 4), NaT], dtype=object) res2 = tslib.array_to_datetime_with_tz(vals2, tz, False, False, creso_infer) @@ -155,7 +155,7 @@ def test_parsing_valid_dates(data, expected): arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr) - expected = np.array(expected, dtype="M8[s]") + expected = np.array(expected, dtype="M8[us]") tm.assert_numpy_array_equal(result, expected) @@ -209,10 +209,10 @@ def test_parsing_different_timezone_offsets(): @pytest.mark.parametrize( "invalid_date,exp_unit", [ - (date(1000, 1, 1), "s"), + (date(1000, 1, 1), "us"), (datetime(1000, 1, 1), "us"), - ("1000-01-01", "s"), - ("Jan 1, 1000", "s"), + ("1000-01-01", "us"), + ("Jan 1, 1000", "us"), (np.datetime64("1000-01-01"), "s"), ], ) @@ -235,7 +235,7 @@ def test_coerce_outside_ns_bounds_one_valid(): result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["1000-01-01T00:00:00.000000000", "2000-01-01T00:00:00.000000000"] - expected = np.array(expected, dtype="M8[s]") + expected = np.array(expected, dtype="M8[us]") tm.assert_numpy_array_equal(result, expected) @@ -245,13 +245,13 @@ def test_coerce_of_invalid_datetimes(): # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] - tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[s]")) + tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[us]")) # With coercing, the invalid dates becomes iNaT result, _ = tslib.array_to_datetime(arr, errors="coerce") expected = ["2013-01-01T00:00:00.000000000", iNaT, iNaT] - tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[s]")) + tm.assert_numpy_array_equal(result, np.array(expected, dtype="M8[us]")) def test_to_datetime_barely_out_of_bounds(): diff --git a/pandas/tests/tslibs/test_strptime.py b/pandas/tests/tslibs/test_strptime.py index d726006b03f6d..0905d6926bf2f 100644 --- a/pandas/tests/tslibs/test_strptime.py +++ b/pandas/tests/tslibs/test_strptime.py @@ -36,7 +36,9 @@ def test_array_strptime_resolution_inference_homogeneous_strings(self, tz): fmt = "%Y-%m-%d %H:%M:%S" dtstr = dt.strftime(fmt) arr = np.array([dtstr] * 3, dtype=object) - expected = np.array([dt.replace(tzinfo=None)] * 3, dtype="M8[s]") + expected = np.array( + [dt.replace(tzinfo=None, microsecond=0)] * 3, dtype="M8[us]" + ) res, _ = array_strptime(arr, fmt=fmt, utc=False, creso=creso_infer) tm.assert_numpy_array_equal(res, expected) @@ -97,14 +99,14 @@ def test_array_strptime_resolution_todaynow(self): def test_array_strptime_str_outside_nano_range(self): vals = np.array(["2401-09-15"], dtype=object) - expected = np.array(["2401-09-15"], dtype="M8[s]") + expected = np.array(["2401-09-15"], dtype="M8[us]") fmt = "ISO8601" res, _ = array_strptime(vals, fmt=fmt, creso=creso_infer) tm.assert_numpy_array_equal(res, expected) # non-iso -> different path vals2 = np.array(["Sep 15, 2401"], dtype=object) - expected2 = np.array(["2401-09-15"], dtype="M8[s]") + expected2 = np.array(["2401-09-15"], dtype="M8[us]") fmt2 = "%b %d, %Y" res2, _ = array_strptime(vals2, fmt=fmt2, creso=creso_infer) tm.assert_numpy_array_equal(res2, expected2) diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index 60bbcf08ce8e7..a980476f2ee3b 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -55,7 +55,7 @@ def test_tzlocal_offset(): ts = Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()) offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) - offset = offset.total_seconds() + offset = offset.total_seconds() * 1_000_000 # convert to microseconds assert ts._value + offset == Timestamp("2011-01-01")._value