Skip to content

Commit e242564

Browse files
author
Jet Xu
committed
Update logic to skip non-readable repo file
1 parent 9d9cfa4 commit e242564

File tree

5 files changed

+85
-56
lines changed

5 files changed

+85
-56
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,4 +160,6 @@ cython_debug/
160160
#.idea/
161161

162162
.DS_Store
163-
tests/self/
163+
tests/self/
164+
tests/self/*
165+
tests/self/*/*

llama_github/data_retrieval/github_entities.py

Lines changed: 75 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -161,84 +161,107 @@ def get_file_content(self, file_path: str, sha: Optional[str] = None) -> Optiona
161161

162162
# Skip files that don't need processing
163163
if any(file_path.endswith(ext) for ext in [
164-
# Package manager and dependencies
165-
'.lock', '.pnpm-lock.yaml', 'package-lock.json', 'Gemfile.lock',
166-
'poetry.lock', 'Cargo.lock', 'composer.lock',
167-
'.pyc', '.pyo', 'requirements.txt', '.gitignore',
168-
169-
# Binaries and compiled files
170-
'.exe', '.dll', '.so', '.dylib', '.bin', '.obj', '.o', '.a',
164+
# Binary and Compiled Files
165+
'.exe', '.dll', '.so', '.dylib', '.bin', '.obj', '.o', '.a',
171166
'.lib', '.jar', '.war', '.ear', '.class', '.pdb', '.ilk', '.exp',
172-
'.apk', '.aab', '.ipa', # Mobile apps
173-
'.wasm', # WebAssembly
167+
'.apk', '.aab', '.ipa', '.wasm',
174168

175-
# Media and compressed files
169+
# Media Files
176170
'.png', '.jpg', '.jpeg', '.gif', '.ico', '.bmp', '.tiff', '.webp',
177171
'.svg', '.eps', '.psd', '.ai', '.sketch',
178172
'.mp3', '.mp4', '.wav', '.flac', '.ogg', '.m4a',
179173
'.avi', '.mov', '.mkv', '.webm', '.wmv', '.flv',
180-
'.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
174+
'.ttf', '.otf', '.eot', '.woff', '.woff2',
175+
176+
# Compressed and Binary Data
181177
'.zip', '.rar', '.7z', '.tar', '.gz', '.bz2', '.xz', '.tgz',
178+
'.pkl', '.pickle',
179+
'.npy', '.npz',
180+
'.h5', '.hdf5',
182181

183-
# System and hidden files
184-
'.DS_Store', 'Thumbs.db', '.dockerignore',
185-
'.gitattributes', '.gitmodules',
182+
# Lock Files and Dependencies
183+
'.lock', 'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml',
184+
'Gemfile.lock', 'poetry.lock', 'Cargo.lock', 'composer.lock',
186185

187-
# Documentation and resource files
188-
'.min.js', '.min.css', '.map', '.po', '.mo', '.pot', '.drawio',
189-
'.ttf', '.otf', '.eot', '.woff', '.woff2', # Fonts
186+
# Compiled Python
187+
'.pyc', '.pyo',
190188

191-
# Configuration and data files
192-
'.conf', '.config', '.cfg', '.ini',
193-
'.sqlite', '.db', '.mdb', '.sql',
194-
'.pb', '.pbtxt', # Protocol buffers
195-
'.ipynb', # Jupyter notebooks
196-
'.pkl', '.pickle', # Python serialized objects
197-
'.tfrecords', '.tf', # TensorFlow files
198-
'.onnx', # ONNX models
199-
'.h5', '.hdf5', # HDF5 files
200-
'.npy', '.npz', # NumPy files
189+
# System and Hidden Files
190+
'.DS_Store', 'Thumbs.db',
201191

202-
# Build outputs
192+
# Generated Code Files
193+
'.g.dart', '.freezed.dart',
194+
'.pb.go',
195+
'_pb2.py', '_pb2_grpc.py',
196+
'.generated.ts', '.generated.tsx',
197+
'.proto.ts', '.proto.js',
203198
'.min.js', '.min.css',
204199
'.bundle.js', '.bundle.css',
205-
'.chunk.js', '.chunk.css'
200+
'.chunk.js', '.chunk.css',
201+
202+
# IDE Generated
203+
'.pbxproj', '.xcworkspacedata',
204+
'.csproj.user', '.suo',
205+
'.iml', '.ipr', '.iws',
206+
207+
# Map Files
208+
'.map', '.js.map', '.css.map'
206209

207210
]) or any(pattern in file_path for pattern in [
208-
# Special directories
209-
'/node_modules/',
211+
# Cache and Temporary Directories
210212
'/__pycache__/',
211213
'/.git/',
212214
'/.idea/',
213215
'/.vscode/',
214216
'/.vs/',
215217
'/.svn/',
216218
'/.hg/',
217-
'/dist/',
218-
'/build/',
219-
'/target/',
220-
'/out/',
221-
'/bin/',
222-
'/obj/',
223-
'/Debug/',
224-
'/Release/',
225-
'/.next/',
226-
'/.nuxt/',
227-
'/vendor/',
228-
'/venv/',
229-
'/.env',
230-
'/coverage/',
231-
'/logs/',
232-
'/.github/',
233-
'/assets/',
234-
'/public/assets/',
235-
'/static/assets/',
236-
'/.pytest_cache/',
237219
'/.sass-cache/',
238220
'/.parcel-cache/',
239221
'/.cache/',
240222
'/tmp/',
241-
'/temp/'
223+
'/temp/',
224+
225+
# Package Manager Directories
226+
'/node_modules/',
227+
'/bower_components/',
228+
229+
# Test Coverage and Reports
230+
'/coverage/',
231+
'/.nyc_output/',
232+
'/.pytest_cache/',
233+
'/.tox/',
234+
235+
# Environment and Runtime
236+
'/venv/',
237+
'/.env/',
238+
'/.virtualenv/',
239+
240+
# Framework Generated
241+
'/.dart_tool/',
242+
'/.pub-cache/',
243+
'/.angular/',
244+
'/.nuxt/',
245+
'/.next/',
246+
'/.ipynb_checkpoints/',
247+
248+
# CI/CD
249+
'/.github/workflows/',
250+
'/.gitlab/ci/',
251+
'/.circleci/',
252+
253+
# Logs
254+
'/logs/',
255+
'/log/',
256+
257+
# Binary Assets
258+
'/assets/images/',
259+
'/assets/fonts/',
260+
'/assets/media/',
261+
'/public/images/',
262+
'/public/fonts/',
263+
'/static/images/',
264+
'/static/fonts/'
242265
]):
243266
logger.debug(f"Skipping non-processable file: {file_path}")
244267
return None

llama_github/llm_integration/initial_load.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,11 @@ def __init__(self,
6969
elif mistral_api_key is not None and mistral_api_key != "" and self.llm is None:
7070
logger.info("Initializing Mistral API...")
7171
self.llm = ChatMistralAI(mistral_api_key=mistral_api_key, model="mistral-large-2411")
72-
self.llm_simple = ChatMistralAI(mistral_api_key=mistral_api_key, model="open-mistral-nemo")
72+
self.llm_simple = ChatMistralAI(
73+
mistral_api_key=mistral_api_key,
74+
model="open-mistral-nemo",
75+
temperature=0.2
76+
)
7377
self.model_type = "OpenAI"
7478
elif openai_api_key is not None and openai_api_key != "" and self.llm is None:
7579
logger.info("Initializing OpenAI API...")

llama_github/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '0.2.3'
1+
__version__ = '0.2.4'

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = llama-github
3-
version = 0.2.3
3+
version = 0.2.4
44
author = Jet Xu
55
author_email = [email protected]
66
description = Llama-github is an open-source Python library that empowers LLM Chatbots, AI Agents, and Auto-dev Agents to conduct Retrieval from actively selected GitHub public projects. It Augments through LLMs and Generates context for any coding question, in order to streamline the development of sophisticated AI-driven applications.

0 commit comments

Comments
 (0)