1
1
import dlt
2
2
import pytest
3
- from pendulum import datetime
3
+ from dlt .common import pendulum
4
+ from dlt .pipeline .exceptions import PipelineStepFailed
4
5
5
6
from sources .slack import slack_source
6
7
from tests .utils import ALL_DESTINATIONS , assert_load_info , load_table_counts
7
8
9
+ # NOTE: Since the number of users in our community slack got super big, most tests will exclude it
10
+
8
11
9
12
@pytest .mark .parametrize ("destination_name" , ALL_DESTINATIONS )
10
- def test_tabel_per_channel (destination_name : str ) -> None :
13
+ def test_table_per_channel (destination_name : str ) -> None :
11
14
pipeline = dlt .pipeline (
12
15
pipeline_name = "slack" ,
13
16
destination = destination_name ,
@@ -17,10 +20,10 @@ def test_tabel_per_channel(destination_name: str) -> None:
17
20
18
21
# Set page size to ensure we use pagination
19
22
source = slack_source (
20
- start_date = datetime ( 2024 , 1 , 31 ),
21
- end_date = datetime ( 2024 , 2 , 1 ),
23
+ start_date = pendulum . now (). subtract ( weeks = 1 ),
24
+ end_date = pendulum . now ( ),
22
25
selected_channels = ["dlt-github-ci" , "3-technical-help" ],
23
- )
26
+ ). with_resources ( "dlt-github-ci" , "3-technical-help" , "channels" )
24
27
load_info = pipeline .run (source )
25
28
assert_load_info (load_info )
26
29
@@ -33,8 +36,9 @@ def test_tabel_per_channel(destination_name: str) -> None:
33
36
34
37
assert set (table_counts .keys ()) >= set (expected_tables )
35
38
assert table_counts ["channels" ] >= 15
36
- assert table_counts [ci_table ] == 6
37
- assert table_counts [help_table ] == 5
39
+ # Note: Message counts may vary with dynamic dates, so we check for > 0
40
+ assert table_counts [ci_table ] > 0
41
+ assert table_counts [help_table ] > 0
38
42
39
43
40
44
@pytest .mark .parametrize ("destination_name" , ALL_DESTINATIONS )
@@ -49,12 +53,17 @@ def test_all_resources(destination_name: str) -> None:
49
53
# Set page size to ensure we use pagination
50
54
source = slack_source (
51
55
page_size = 40 ,
52
- start_date = datetime ( 2024 , 1 , 31 ),
53
- end_date = datetime ( 2024 , 2 , 1 ),
56
+ start_date = pendulum . now (). subtract ( weeks = 1 ),
57
+ end_date = pendulum . now ( ),
54
58
selected_channels = ["dlt-github-ci" , "1-announcements" ],
55
59
table_per_channel = False ,
56
60
)
57
- load_info = pipeline .run (source )
61
+ almost_all_resources = [
62
+ source
63
+ for source in source .resources .keys ()
64
+ if source != "users" and source != "access_logs"
65
+ ]
66
+ load_info = pipeline .run (source .with_resources (* almost_all_resources ))
58
67
assert_load_info (load_info )
59
68
60
69
table_names = [t ["name" ] for t in pipeline .default_schema .data_tables ()]
@@ -65,7 +74,26 @@ def test_all_resources(destination_name: str) -> None:
65
74
assert set (table_counts .keys ()) >= set (expected_tables )
66
75
assert "replies" not in table_names
67
76
assert table_counts ["channels" ] >= 15
68
- assert table_counts ["messages" ] == 34
77
+ # Note: Message counts may vary with dynamic dates, so we check for > 0
78
+ assert table_counts ["messages" ] > 0
79
+
80
+
81
+ # @pytest.mark.skip(reason="Access logs require paid plan")
82
+ @pytest .mark .parametrize ("destination_name" , ALL_DESTINATIONS )
83
+ def test_access_logs_resource (destination_name : str ) -> None :
84
+ pipeline = dlt .pipeline (
85
+ pipeline_name = "slack" ,
86
+ destination = destination_name ,
87
+ dataset_name = "slack_data" ,
88
+ dev_mode = True ,
89
+ )
90
+ source = slack_source (
91
+ start_date = pendulum .now ().subtract (weeks = 1 ),
92
+ end_date = pendulum .now (),
93
+ ).with_resources ("access_logs" )
94
+ with pytest .raises (PipelineStepFailed ) as exc_info :
95
+ pipeline .run (source )
96
+ assert "just available on paid accounts" in str (exc_info .value )
69
97
70
98
71
99
@pytest .mark .parametrize ("destination_name" , ALL_DESTINATIONS )
@@ -79,19 +107,20 @@ def test_replies(destination_name: str) -> None:
79
107
80
108
# Set page size to ensure we use pagination
81
109
source = slack_source (
82
- start_date = datetime ( 2023 , 12 , 19 ),
83
- end_date = datetime ( 2024 , 1 , 10 ),
84
- selected_channels = ["1-announcements " ],
110
+ start_date = pendulum . now (). subtract ( weeks = 1 ),
111
+ end_date = pendulum . now ( ),
112
+ selected_channels = ["3-technical-help " ],
85
113
replies = True ,
86
114
table_per_channel = False ,
87
- )
115
+ ). with_resources ( "messages" , "replies" )
88
116
load_info = pipeline .run (source )
89
117
assert_load_info (load_info )
90
118
91
119
table_names = [t ["name" ] for t in pipeline .default_schema .data_tables ()]
92
120
table_counts = load_table_counts (pipeline , * table_names )
93
121
assert "replies" in table_names
94
- assert table_counts ["replies" ] >= 5
122
+ # Note: Reply counts may vary with dynamic dates, so we check for > 0
123
+ assert table_counts ["replies" ] > 0
95
124
96
125
97
126
@pytest .mark .parametrize ("destination_name" , ALL_DESTINATIONS )
@@ -107,14 +136,17 @@ def test_with_merge_disposition(destination_name: str, table_per_channel: bool)
107
136
dev_mode = True ,
108
137
)
109
138
110
- # Set page size to ensure we use pagination
139
+ def get_resource_names (table_per_channel : bool , channel_name : str ) -> str :
140
+ return channel_name if table_per_channel else "messages"
141
+
142
+ channel_name = "1-announcements"
143
+ resource_names = get_resource_names (table_per_channel , channel_name )
111
144
source = slack_source (
112
- start_date = datetime (2023 , 12 , 19 ),
113
- end_date = datetime (2024 , 1 , 10 ),
114
- selected_channels = ["1-announcements" ],
115
- replies = True ,
145
+ start_date = pendulum .now ().subtract (weeks = 4 ),
146
+ end_date = pendulum .now ().subtract (weeks = 1 ),
147
+ selected_channels = [channel_name ],
116
148
table_per_channel = table_per_channel ,
117
- )
149
+ ). with_resources ( resource_names )
118
150
pipeline .run (source )
119
151
table_names = [t ["name" ] for t in pipeline .default_schema .data_tables ()]
120
152
current_table_counts = load_table_counts (pipeline , * table_names )
@@ -126,7 +158,6 @@ def test_with_merge_disposition(destination_name: str, table_per_channel: bool)
126
158
assert all (
127
159
table_counts [table_name ] == current_table_counts [table_name ]
128
160
for table_name in table_names
129
- if table_name != "users"
130
161
)
131
162
132
163
@@ -140,8 +171,13 @@ def test_users(destination_name: str) -> None:
140
171
)
141
172
142
173
# Selected just one channel to avoid loading all channels
143
- source = slack_source (
144
- selected_channels = ["1-announcements" ],
174
+ source = (
175
+ slack_source (
176
+ page_size = 200 ,
177
+ selected_channels = ["1-announcements" ],
178
+ )
179
+ .with_resources ("users" )
180
+ .add_limit (3 )
145
181
)
146
182
load_info = pipeline .run (source )
147
183
assert_load_info (load_info )
@@ -154,3 +190,27 @@ def test_users(destination_name: str) -> None:
154
190
print (table_counts .keys ())
155
191
assert set (table_counts .keys ()) >= set (expected_tables )
156
192
assert table_counts ["users" ] >= 300 # The number of users can increase over time
193
+
194
+
195
+ @pytest .mark .parametrize ("destination_name" , ALL_DESTINATIONS )
196
+ def test_private_channels (destination_name : str ) -> None :
197
+ pipeline = dlt .pipeline (
198
+ pipeline_name = "slack" ,
199
+ destination = destination_name ,
200
+ dataset_name = "slack_data" ,
201
+ dev_mode = True ,
202
+ )
203
+ PRIVATE_CHANNEL_NAME = "test-private-channel"
204
+ source = slack_source (
205
+ start_date = pendulum .now ().subtract (weeks = 1 ),
206
+ end_date = pendulum .now (),
207
+ selected_channels = [PRIVATE_CHANNEL_NAME ],
208
+ include_private_channels = True ,
209
+ ).with_resources (PRIVATE_CHANNEL_NAME )
210
+ load_info = pipeline .run (source )
211
+ assert_load_info (load_info )
212
+ table_names = [t ["name" ] for t in pipeline .default_schema .data_tables ()]
213
+
214
+ expected_message_table_name = f"{ PRIVATE_CHANNEL_NAME } _message" .replace ("-" , "_" )
215
+
216
+ assert expected_message_table_name in table_names
0 commit comments