@@ -161,84 +161,107 @@ def get_file_content(self, file_path: str, sha: Optional[str] = None) -> Optiona
161
161
162
162
# Skip files that don't need processing
163
163
if any (file_path .endswith (ext ) for ext in [
164
- # Package manager and dependencies
165
- '.lock' , '.pnpm-lock.yaml' , 'package-lock.json' , 'Gemfile.lock' ,
166
- 'poetry.lock' , 'Cargo.lock' , 'composer.lock' ,
167
- '.pyc' , '.pyo' , 'requirements.txt' , '.gitignore' ,
168
-
169
- # Binaries and compiled files
170
- '.exe' , '.dll' , '.so' , '.dylib' , '.bin' , '.obj' , '.o' , '.a' ,
164
+ # Binary and Compiled Files
165
+ '.exe' , '.dll' , '.so' , '.dylib' , '.bin' , '.obj' , '.o' , '.a' ,
171
166
'.lib' , '.jar' , '.war' , '.ear' , '.class' , '.pdb' , '.ilk' , '.exp' ,
172
- '.apk' , '.aab' , '.ipa' , # Mobile apps
173
- '.wasm' , # WebAssembly
167
+ '.apk' , '.aab' , '.ipa' , '.wasm' ,
174
168
175
- # Media and compressed files
169
+ # Media Files
176
170
'.png' , '.jpg' , '.jpeg' , '.gif' , '.ico' , '.bmp' , '.tiff' , '.webp' ,
177
171
'.svg' , '.eps' , '.psd' , '.ai' , '.sketch' ,
178
172
'.mp3' , '.mp4' , '.wav' , '.flac' , '.ogg' , '.m4a' ,
179
173
'.avi' , '.mov' , '.mkv' , '.webm' , '.wmv' , '.flv' ,
180
- '.pdf' , '.doc' , '.docx' , '.xls' , '.xlsx' , '.ppt' , '.pptx' ,
174
+ '.ttf' , '.otf' , '.eot' , '.woff' , '.woff2' ,
175
+
176
+ # Compressed and Binary Data
181
177
'.zip' , '.rar' , '.7z' , '.tar' , '.gz' , '.bz2' , '.xz' , '.tgz' ,
178
+ '.pkl' , '.pickle' ,
179
+ '.npy' , '.npz' ,
180
+ '.h5' , '.hdf5' ,
182
181
183
- # System and hidden files
184
- '.DS_Store ' , 'Thumbs.db ' , '.dockerignore ' ,
185
- '.gitattributes ' , '.gitmodules ' ,
182
+ # Lock Files and Dependencies
183
+ '.lock ' , 'package-lock.json ' , 'yarn.lock' , 'pnpm-lock.yaml ' ,
184
+ 'Gemfile.lock ' , 'poetry.lock' , 'Cargo.lock' , 'composer.lock ' ,
186
185
187
- # Documentation and resource files
188
- '.min.js' , '.min.css' , '.map' , '.po' , '.mo' , '.pot' , '.drawio' ,
189
- '.ttf' , '.otf' , '.eot' , '.woff' , '.woff2' , # Fonts
186
+ # Compiled Python
187
+ '.pyc' , '.pyo' ,
190
188
191
- # Configuration and data files
192
- '.conf' , '.config' , '.cfg' , '.ini' ,
193
- '.sqlite' , '.db' , '.mdb' , '.sql' ,
194
- '.pb' , '.pbtxt' , # Protocol buffers
195
- '.ipynb' , # Jupyter notebooks
196
- '.pkl' , '.pickle' , # Python serialized objects
197
- '.tfrecords' , '.tf' , # TensorFlow files
198
- '.onnx' , # ONNX models
199
- '.h5' , '.hdf5' , # HDF5 files
200
- '.npy' , '.npz' , # NumPy files
189
+ # System and Hidden Files
190
+ '.DS_Store' , 'Thumbs.db' ,
201
191
202
- # Build outputs
192
+ # Generated Code Files
193
+ '.g.dart' , '.freezed.dart' ,
194
+ '.pb.go' ,
195
+ '_pb2.py' , '_pb2_grpc.py' ,
196
+ '.generated.ts' , '.generated.tsx' ,
197
+ '.proto.ts' , '.proto.js' ,
203
198
'.min.js' , '.min.css' ,
204
199
'.bundle.js' , '.bundle.css' ,
205
- '.chunk.js' , '.chunk.css'
200
+ '.chunk.js' , '.chunk.css' ,
201
+
202
+ # IDE Generated
203
+ '.pbxproj' , '.xcworkspacedata' ,
204
+ '.csproj.user' , '.suo' ,
205
+ '.iml' , '.ipr' , '.iws' ,
206
+
207
+ # Map Files
208
+ '.map' , '.js.map' , '.css.map'
206
209
207
210
]) or any (pattern in file_path for pattern in [
208
- # Special directories
209
- '/node_modules/' ,
211
+ # Cache and Temporary Directories
210
212
'/__pycache__/' ,
211
213
'/.git/' ,
212
214
'/.idea/' ,
213
215
'/.vscode/' ,
214
216
'/.vs/' ,
215
217
'/.svn/' ,
216
218
'/.hg/' ,
217
- '/dist/' ,
218
- '/build/' ,
219
- '/target/' ,
220
- '/out/' ,
221
- '/bin/' ,
222
- '/obj/' ,
223
- '/Debug/' ,
224
- '/Release/' ,
225
- '/.next/' ,
226
- '/.nuxt/' ,
227
- '/vendor/' ,
228
- '/venv/' ,
229
- '/.env' ,
230
- '/coverage/' ,
231
- '/logs/' ,
232
- '/.github/' ,
233
- '/assets/' ,
234
- '/public/assets/' ,
235
- '/static/assets/' ,
236
- '/.pytest_cache/' ,
237
219
'/.sass-cache/' ,
238
220
'/.parcel-cache/' ,
239
221
'/.cache/' ,
240
222
'/tmp/' ,
241
- '/temp/'
223
+ '/temp/' ,
224
+
225
+ # Package Manager Directories
226
+ '/node_modules/' ,
227
+ '/bower_components/' ,
228
+
229
+ # Test Coverage and Reports
230
+ '/coverage/' ,
231
+ '/.nyc_output/' ,
232
+ '/.pytest_cache/' ,
233
+ '/.tox/' ,
234
+
235
+ # Environment and Runtime
236
+ '/venv/' ,
237
+ '/.env/' ,
238
+ '/.virtualenv/' ,
239
+
240
+ # Framework Generated
241
+ '/.dart_tool/' ,
242
+ '/.pub-cache/' ,
243
+ '/.angular/' ,
244
+ '/.nuxt/' ,
245
+ '/.next/' ,
246
+ '/.ipynb_checkpoints/' ,
247
+
248
+ # CI/CD
249
+ '/.github/workflows/' ,
250
+ '/.gitlab/ci/' ,
251
+ '/.circleci/' ,
252
+
253
+ # Logs
254
+ '/logs/' ,
255
+ '/log/' ,
256
+
257
+ # Binary Assets
258
+ '/assets/images/' ,
259
+ '/assets/fonts/' ,
260
+ '/assets/media/' ,
261
+ '/public/images/' ,
262
+ '/public/fonts/' ,
263
+ '/static/images/' ,
264
+ '/static/fonts/'
242
265
]):
243
266
logger .debug (f"Skipping non-processable file: { file_path } " )
244
267
return None
0 commit comments