Skip to content

Commit d4f341c

Browse files
committed
提升数据准确性,优化部分函数性能
1 parent ee51122 commit d4f341c

File tree

4 files changed

+113
-35
lines changed

4 files changed

+113
-35
lines changed

changelog.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,74 @@
11
# Changelog
22

3+
## v0.5.5(2025-03-15)
4+
5+
### Fixed
6+
7+
- `get_realtime_quotes` 盘中数据存在重复数据
8+
9+
### Changed
10+
11+
- `get_all_company_performance``get_latest_holder_number` 速度优化
12+
13+
---
14+
315
## v0.5.4(2025-03-10)
416

517
### Fixed
618

719
- `get_realtime_quotes` 分页更加智能
820

21+
---
22+
923
## v0.5.3(2025-02-17)
1024

1125
### Fixed
1226

1327
- `get_realtime_quotes` 自动分页以获取完整数据
1428

29+
---
30+
1531
## v0.5.2(2024-04-24)
1632

1733
### Fixed
1834

1935
- 补充 setup.py 缺失的依赖
2036

37+
---
38+
2139
## v0.5.1(2024-02-22)
2240

2341
### Added
2442

2543
- 增加用于检索股票市场的参数
2644

45+
---
46+
2747
## v0.5.0(2023-01-08)
2848

2949
### Added
3050

3151
- 增加获取 广期所 期货行情的功能
3252
- 添加扩展市场类型的函数
3353

54+
---
55+
3456
## v0.4.9(2022-07-29)
3557

3658
### Added
3759

3860
-`get_quote_history` 添加更多选项
3961

62+
---
63+
4064
## v0.4.8(2022-06-30)
4165

4266
### Fixed
4367

4468
- 修复 `stock` 模块的 `get_base_info` 函数的 bug
4569

70+
---
71+
4672
## v0.4.7(2022-06-24)
4773

4874
### Added
@@ -53,13 +79,17 @@
5379

5480
- 修复部分函数命名拼写错误
5581

82+
---
83+
5684
## v0.4.6(2022-06-19)
5785

5886
### Changed
5987

6088
-`stock` 模块中抽离出获取证券最新行情的函数
6189
- 增强 `common` 模块中的配置以支持自定义获取更多属性
6290

91+
---
92+
6393
## v0.4.5(2022-06-05)
6494

6595
### Fixed
@@ -70,6 +100,8 @@
70100

71101
`futures``bond` 模块添加获取最新交易日成交明细的功能
72102

103+
---
104+
73105
## v0.4.4(2022-04-29)
74106

75107
### Added
@@ -80,13 +112,17 @@
80112

81113
-`stock` 模块里面的 `get_belong_plate` 重命名为 `get_belong_board`
82114

115+
---
116+
83117
## v0.4.3(2022-04-28)
84118

85119
### Added
86120

87121
-`stock` 模块添加 `get_belong_plate` 函数,以支持获取股票所属板块
88122
-`stock` 模块添加 `get_deal_detail` 函数,以支持获取股票最新交易日的成交明细
89123

124+
---
125+
90126
## v0.4.2(2022-03-07)
91127

92128
### Changed
@@ -139,6 +175,8 @@
139175
- 添加获取多个板块成分股实时行情以及板块历史行情的功能
140176
- 添加获取 ETF、LOF 基金实时行情的功能
141177

178+
---
179+
142180
## v0.3.7(2021-08-30)
143181

144182
### Added
@@ -147,6 +185,8 @@
147185
- 添加获取沪深 A 股股东数量的功能
148186
-`stock` 模块添加龙虎榜数据获取功能
149187

188+
---
189+
150190
### Fixed
151191

152192
- 修复 `fund` 模块获取基金代码的函数产生的 bug

efinance/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
__title__ = "efinance"
2-
__version__ = "0.5.4"
2+
__version__ = "0.5.5"
33
__author__ = "micro sheep"
44
__url__ = "https://github.com/Micro-sheep/efinance"
55
__author_email__ = "[email protected]"

efinance/common/getter.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from datetime import datetime
22
from typing import Dict, List, Union
3-
3+
import numpy as np
44
import multitasking
55
import pandas as pd
66
from jsonpath import jsonpath
@@ -40,13 +40,14 @@ def get_realtime_quotes_by_fs(fs: str, **kwargs) -> pd.DataFrame:
4040

4141
def get_by_page(pn: int, pz: int):
4242
params = (
43-
("pn", f"{pn}"),
44-
("pz", f"{pz}"),
43+
("pn", pn),
44+
("pz", pz),
4545
("po", "1"),
4646
("np", "1"),
4747
("fltt", "2"),
4848
("invt", "2"),
49-
("fid", "f3"),
49+
# NOTE 按代码排序。避免多次请求顺序不一致
50+
("fid", "f12"),
5051
("fs", fs),
5152
("fields", fields),
5253
)
@@ -56,6 +57,17 @@ def get_by_page(pn: int, pz: int):
5657
).json()
5758
return json_response
5859

60+
def mixed_sort_key(series: pd.Series):
61+
sort_keys = pd.Series(index=series.index, dtype=float)
62+
63+
for idx, val in series.items():
64+
if isinstance(val, str):
65+
sort_keys[idx] = np.nan
66+
else:
67+
sort_keys[idx] = val
68+
69+
return sort_keys
70+
5971
json_response = get_by_page(1, pz=200)
6072
total = json_response["data"]["total"]
6173
pz = len(json_response["data"]["diff"])
@@ -70,9 +82,17 @@ def get_by_page(pn: int, pz: int):
7082
pd.DataFrame(response["data"]["diff"])[list(columns.keys())]
7183
for response in responses
7284
]
73-
df = pd.concat(dfs, axis=0, ignore_index=True).rename(columns=columns)[
74-
columns.values()
75-
]
85+
df = (
86+
pd.concat(dfs, axis=0, ignore_index=True)
87+
.rename(columns=columns)[columns.values()]
88+
.sort_values(
89+
by="涨跌幅",
90+
ascending=False,
91+
ignore_index=True,
92+
key=mixed_sort_key,
93+
)
94+
)
95+
7696
df["行情ID"] = df["市场编号"].astype(str) + "." + df["代码"].astype(str)
7797
df["市场类型"] = (
7898
df["市场编号"].astype(str).apply(lambda x: MARKET_NUMBER_DICT.get(x))

efinance/stock/getter.py

Lines changed: 45 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import sys
55
from datetime import datetime, timedelta
66
from typing import Dict, List, Union
7+
from concurrent.futures import ThreadPoolExecutor
78

89
import threading
910
import multitasking
@@ -827,28 +828,37 @@ def get_all_company_performance(date: str = None) -> pd.DataFrame:
827828
return pd.DataFrame(columns=fields.values())
828829

829830
date = f"(REPORTDATE='{date}')"
830-
page = 1
831-
dfs: List[pd.DataFrame] = []
832-
while 1:
831+
832+
def get_by_page(pn: int, pz: int):
833833
params = (
834834
("st", "NOTICE_DATE,SECURITY_CODE"),
835835
("sr", "-1,-1"),
836-
("ps", "500"),
837-
("p", f"{page}"),
836+
("ps", pz),
837+
("p", pn),
838838
("type", "RPT_LICO_FN_CPD"),
839839
("sty", "ALL"),
840840
("token", "894050c76af8597a853f5b408b759f5d"),
841841
# ! 只选沪深A股
842842
("filter", f'(SECURITY_TYPE_CODE in ("058001001","058001008")){date}'),
843843
)
844844
url = "http://datacenter-web.eastmoney.com/api/data/get"
845-
response = session.get(url, headers=EASTMONEY_REQUEST_HEADERS, params=params)
846-
items = jsonpath(response.json(), "$..data[:]")
847-
if not items:
848-
break
849-
df = pd.DataFrame(items)
850-
dfs.append(df)
851-
page += 1
845+
json_response = session.get(
846+
url, headers=EASTMONEY_REQUEST_HEADERS, params=params
847+
).json()
848+
return json_response
849+
850+
json_response = get_by_page(1, pz=500)
851+
total = json_response["result"]["count"]
852+
pz = len(jsonpath(json_response, "$..data[:]"))
853+
div, mod = divmod(total, pz)
854+
pages = div + 1 if mod else div
855+
856+
with ThreadPoolExecutor() as executor:
857+
tasks = executor.map(get_by_page, range(1, pages + 1), [pz] * pages)
858+
responses = list(tasks)
859+
860+
dfs = [pd.DataFrame(jsonpath(response, "$..data[:]")) for response in responses]
861+
852862
if len(dfs) == 0:
853863
df = pd.DataFrame(columns=fields.values())
854864
return df
@@ -940,12 +950,12 @@ def get_latest_holder_number(date: str = None) -> pd.DataFrame:
940950
"HOLD_NOTICE_DATE": "公告日期",
941951
}
942952

943-
while 1:
953+
def get_by_page(pn: int, pz: int):
944954
params = [
945955
("sortColumns", "HOLD_NOTICE_DATE,SECURITY_CODE"),
946956
("sortTypes", "-1,-1"),
947-
("pageSize", "500"),
948-
("pageNumber", page),
957+
("pageSize", pz),
958+
("pageNumber", pn),
949959
(
950960
"columns",
951961
"SECURITY_CODE,SECURITY_NAME_ABBR,END_DATE,INTERVAL_CHRATE,AVG_MARKET_CAP,AVG_HOLD_NUM,TOTAL_MARKET_CAP,TOTAL_A_SHARES,HOLD_NOTICE_DATE,HOLDER_NUM,PRE_HOLDER_NUM,HOLDER_NUM_CHANGE,HOLDER_NUM_RATIO,END_DATE,PRE_END_DATE",
@@ -963,18 +973,26 @@ def get_latest_holder_number(date: str = None) -> pd.DataFrame:
963973

964974
params = tuple(params)
965975
url = "http://datacenter-web.eastmoney.com/api/data/v1/get"
966-
response = session.get(url, headers=EASTMONEY_REQUEST_HEADERS, params=params)
967-
items = jsonpath(response.json(), "$..data[:]")
968-
if not items:
969-
break
970-
df = pd.DataFrame(items)
971-
df = df.rename(columns=fields)[fields.values()]
972-
page += 1
973-
dfs.append(df)
974-
if len(dfs) == 0:
975-
df = pd.DataFrame(columns=fields.values())
976-
return df
977-
df = pd.concat(dfs, ignore_index=True)
976+
json_response = session.get(
977+
url, headers=EASTMONEY_REQUEST_HEADERS, params=params
978+
).json()
979+
return json_response
980+
981+
json_response = get_by_page(1, pz=500)
982+
total = json_response["result"]["count"]
983+
pz = len(jsonpath(json_response, "$..data[:]"))
984+
div, mod = divmod(total, pz)
985+
pages = div + 1 if mod else div
986+
987+
if total == 0:
988+
return pd.DataFrame(columns=fields.values())
989+
990+
with ThreadPoolExecutor() as executor:
991+
tasks = executor.map(get_by_page, range(1, pages + 1), [pz] * pages)
992+
responses = list(tasks)
993+
994+
dfs = [pd.DataFrame(jsonpath(response, "$..data[:]")) for response in responses]
995+
df = pd.concat(dfs, ignore_index=True).rename(columns=fields)[fields.values()]
978996
return df
979997

980998

0 commit comments

Comments
 (0)