Skip to content

Commit 7b45422

Browse files
committed
chk F.covar_samp
1 parent 0cd2f90 commit 7b45422

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

python/pyspark/pandas/series.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3396,11 +3396,14 @@ def autocorr(self, lag: int = 1) -> float:
33963396
else:
33973397
lag_scol = F.lag(scol, lag).over(Window.orderBy(NATURAL_ORDER_COLUMN_NAME))
33983398
lag_col_name = verify_temp_column_name(sdf, "__autocorr_lag_tmp_col__")
3399-
corr = (
3400-
sdf.withColumn(lag_col_name, lag_scol)
3401-
.select(F.corr(scol, F.col(lag_col_name)))
3402-
.head()[0]
3403-
)
3399+
3400+
sdf_lag = sdf.withColumn(lag_col_name, lag_scol)
3401+
if is_ansi_mode_enabled(sdf.sparkSession):
3402+
cov_value = sdf_lag.select(F.covar_samp(scol, F.col(lag_col_name))).first()[0]
3403+
if cov_value is None or cov_value == 0.0:
3404+
return np.nan
3405+
corr = sdf_lag.select(F.corr(scol, F.col(lag_col_name))).head()[0]
3406+
34043407
return np.nan if corr is None else corr
34053408

34063409
def corr(

0 commit comments

Comments
 (0)