Skip to content

Commit 054d653

Browse files
committed
fix and correct gdrive
Signed-off-by: Derek Anderson <[email protected]>
1 parent 670edf9 commit 054d653

File tree

7 files changed

+436
-79
lines changed

7 files changed

+436
-79
lines changed
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
# Testing Google Drive Backend with Real Credentials
2+
3+
This guide explains how to set up and test the Google Drive backend with actual Google Cloud credentials.
4+
5+
## Prerequisites
6+
7+
1. A Google Cloud Platform (GCP) account
8+
2. Access to Google Drive
9+
3. Python environment with required packages installed
10+
11+
## Setup Steps
12+
13+
### 1. Create a Google Cloud Project
14+
15+
1. Go to [Google Cloud Console](https://console.cloud.google.com/)
16+
2. Create a new project or select an existing one
17+
3. Enable the Google Drive API:
18+
- Go to "APIs & Services" > "Library"
19+
- Search for "Google Drive API"
20+
- Click "Enable"
21+
4. Enable the Google Sheets API:
22+
- Search for "Google Sheets API"
23+
- Click "Enable"
24+
25+
### 2. Set Up Authentication
26+
27+
Choose one of the following methods:
28+
29+
#### Option A: Service Account (Recommended for automated testing)
30+
31+
1. Go to "APIs & Services" > "Credentials"
32+
2. Click "Create Credentials" > "Service Account"
33+
3. Fill in the service account details
34+
4. Click "Create and Continue"
35+
5. Skip role assignment (click "Continue")
36+
6. Click "Done"
37+
7. Click on the created service account
38+
8. Go to "Keys" tab
39+
9. Click "Add Key" > "Create New Key"
40+
10. Choose "JSON" format
41+
11. Download the JSON file and save it securely (e.g., `service-account-key.json`)
42+
43+
#### Option B: OAuth 2.0 (For interactive testing)
44+
45+
1. Go to "APIs & Services" > "Credentials"
46+
2. Click "Create Credentials" > "OAuth 2.0 Client IDs"
47+
3. Choose "Desktop application"
48+
4. Download the JSON file and save it as `oauth-credentials.json`
49+
50+
### 3. Create a Test Folder in Google Drive
51+
52+
1. Go to [Google Drive](https://drive.google.com)
53+
2. Create a new folder for testing (e.g., "Ragas Test Data")
54+
3. Right-click the folder > "Share"
55+
4. If using Service Account: Share with the service account email (found in the JSON file)
56+
5. Copy the folder ID from the URL (e.g., `https://drive.google.com/drive/folders/FOLDER_ID_HERE`)
57+
58+
### 4. Install Dependencies
59+
60+
```bash
61+
cd /Users/derekanderson/Projects/explodinggradients/ragas/experimental
62+
pip install -r ../requirements/gdrive.txt
63+
```
64+
65+
## Running Tests with Real Credentials
66+
67+
### Method 1: Environment Variables
68+
69+
Set up environment variables for your credentials:
70+
71+
```bash
72+
# For Service Account
73+
export GDRIVE_SERVICE_ACCOUNT_PATH="/path/to/your/service-account-key.json"
74+
export GDRIVE_FOLDER_ID="your_folder_id_here"
75+
76+
# For OAuth (optional)
77+
export GDRIVE_OAUTH_CREDENTIALS_PATH="/path/to/your/oauth-credentials.json"
78+
```
79+
80+
### Method 2: Direct File Paths
81+
82+
Create a test configuration file or modify the test directly with your paths.
83+
84+
## Test Examples
85+
86+
### Basic Integration Test
87+
88+
Create a file `test_gdrive_integration.py`:
89+
90+
```python
91+
import os
92+
import pytest
93+
from ragas_experimental.backends.gdrive_backend import GDriveBackend
94+
from ragas_experimental.project.core import Project
95+
from pydantic import BaseModel
96+
97+
class TestData(BaseModel):
98+
question: str
99+
answer: str
100+
score: float
101+
102+
def test_gdrive_backend_real_credentials():
103+
"""Test Google Drive backend with real credentials."""
104+
105+
# Get credentials from environment
106+
service_account_path = os.getenv("GDRIVE_SERVICE_ACCOUNT_PATH")
107+
folder_id = os.getenv("GDRIVE_FOLDER_ID")
108+
109+
if not service_account_path or not folder_id:
110+
pytest.skip("Real Google Drive credentials not provided")
111+
112+
# Test backend creation
113+
backend = GDriveBackend(
114+
folder_id=folder_id,
115+
project_id="test_project",
116+
dataset_id="test_dataset",
117+
dataset_name="Integration Test Dataset",
118+
type="datasets",
119+
service_account_path=service_account_path
120+
)
121+
122+
# Test data operations
123+
test_data = [
124+
TestData(question="What is AI?", answer="Artificial Intelligence", score=0.9),
125+
TestData(question="What is ML?", answer="Machine Learning", score=0.8)
126+
]
127+
128+
# Test save
129+
backend.save(test_data)
130+
print("✅ Data saved successfully")
131+
132+
# Test load
133+
loaded_data = backend.load(TestData)
134+
assert len(loaded_data) == 2
135+
assert loaded_data[0].question == "What is AI?"
136+
print("✅ Data loaded successfully")
137+
138+
# Test update
139+
test_data[0].score = 0.95
140+
backend.save(test_data)
141+
updated_data = backend.load(TestData)
142+
assert updated_data[0].score == 0.95
143+
print("✅ Data updated successfully")
144+
145+
print("🎉 All integration tests passed!")
146+
147+
def test_project_with_gdrive():
148+
"""Test Project creation with Google Drive backend."""
149+
150+
service_account_path = os.getenv("GDRIVE_SERVICE_ACCOUNT_PATH")
151+
folder_id = os.getenv("GDRIVE_FOLDER_ID")
152+
153+
if not service_account_path or not folder_id:
154+
pytest.skip("Real Google Drive credentials not provided")
155+
156+
project = Project.create(
157+
name="gdrive_test_project",
158+
backend="gdrive",
159+
gdrive_folder_id=folder_id,
160+
gdrive_service_account_path=service_account_path
161+
)
162+
163+
# Create a dataset
164+
dataset = project.create_dataset(
165+
name="test_dataset",
166+
schema=TestData
167+
)
168+
169+
# Add some data
170+
test_data = [
171+
TestData(question="Test Q1", answer="Test A1", score=0.7),
172+
TestData(question="Test Q2", answer="Test A2", score=0.8)
173+
]
174+
175+
dataset.add(test_data)
176+
177+
# Retrieve data
178+
retrieved_data = dataset.to_list()
179+
assert len(retrieved_data) == 2
180+
181+
print("🎉 Project integration test passed!")
182+
183+
if __name__ == "__main__":
184+
test_gdrive_backend_real_credentials()
185+
test_project_with_gdrive()
186+
```
187+
188+
## Running the Tests
189+
190+
### Option 1: With Environment Variables
191+
192+
```bash
193+
# Set your credentials
194+
export GDRIVE_SERVICE_ACCOUNT_PATH="/path/to/service-account-key.json"
195+
export GDRIVE_FOLDER_ID="your_folder_id"
196+
197+
# Run the integration test
198+
python test_gdrive_integration.py
199+
200+
# Or with pytest
201+
pytest test_gdrive_integration.py -v
202+
```
203+
204+
### Option 2: Direct Execution
205+
206+
Modify the test file to include your credentials directly (not recommended for production):
207+
208+
```python
209+
# In test_gdrive_integration.py, replace environment variables with actual values
210+
service_account_path = "/path/to/your/service-account-key.json"
211+
folder_id = "your_actual_folder_id"
212+
```
213+
214+
## Troubleshooting
215+
216+
### Common Issues
217+
218+
1. **Permission Denied**: Make sure the service account has access to the Google Drive folder
219+
2. **API Not Enabled**: Ensure both Google Drive API and Google Sheets API are enabled
220+
3. **Invalid Credentials**: Check that the JSON file path is correct and the file is valid
221+
4. **Folder Not Found**: Verify the folder ID is correct and accessible
222+
223+
### Debug Mode
224+
225+
Add debug logging to see what's happening:
226+
227+
```python
228+
import logging
229+
logging.basicConfig(level=logging.DEBUG)
230+
231+
# Your test code here
232+
```
233+
234+
### Manual Verification
235+
236+
After running tests, check your Google Drive folder to see if:
237+
1. A spreadsheet was created with your dataset name
238+
2. Data was properly written to the spreadsheet
239+
3. Updates are reflected correctly
240+
241+
## Security Best Practices
242+
243+
1. **Never commit credentials**: Add credential files to `.gitignore`
244+
2. **Use environment variables**: Don't hardcode paths in your code
245+
3. **Limit permissions**: Only grant necessary permissions to service accounts
246+
4. **Rotate keys**: Regularly rotate service account keys
247+
5. **Use separate test projects**: Don't use production GCP projects for testing
248+
249+
## CI/CD Integration
250+
251+
For automated testing in CI/CD:
252+
253+
1. Store credentials as encrypted secrets
254+
2. Use service accounts with minimal permissions
255+
3. Create dedicated test folders/projects
256+
4. Clean up test data after runs
257+
258+
```yaml
259+
# Example GitHub Actions step
260+
- name: Test Google Drive Backend
261+
env:
262+
GDRIVE_SERVICE_ACCOUNT_KEY: ${{ secrets.GDRIVE_SERVICE_ACCOUNT_KEY }}
263+
GDRIVE_FOLDER_ID: ${{ secrets.GDRIVE_FOLDER_ID }}
264+
run: |
265+
echo "$GDRIVE_SERVICE_ACCOUNT_KEY" > service-account.json
266+
export GDRIVE_SERVICE_ACCOUNT_PATH="./service-account.json"
267+
pytest test_gdrive_integration.py
268+
```

experimental/examples/gdrive_backend_example.py

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -38,37 +38,22 @@ def example_oauth_setup():
3838
"""Example using OAuth authentication."""
3939

4040
# Set up environment variables (or pass directly to Project.create)
41-
os.environ["GDRIVE_FOLDER_ID"] = "your_google_drive_folder_id_here"
42-
os.environ["GDRIVE_CREDENTIALS_PATH"] = "path/to/your/credentials.json"
41+
# os.environ["GDRIVE_FOLDER_ID"] = "your_google_drive_folder_id_here"
42+
# os.environ["GDRIVE_CREDENTIALS_PATH"] = "path/to/your/credentials.json"
4343

4444
# Create project with Google Drive backend
4545
project = Project.create(
4646
name="my_ragas_project",
4747
description="A project using Google Drive for storage",
4848
backend="gdrive",
49-
gdrive_folder_id="your_google_drive_folder_id_here",
50-
gdrive_credentials_path="path/to/your/credentials.json",
49+
gdrive_folder_id="1HLvvtKLnwGWKTely0YDlJ397XPTQ77Yg",
50+
gdrive_credentials_path="/Users/derekanderson/Downloads/credentials.json",
5151
gdrive_token_path="token.json" # Will be created automatically
5252
)
5353

5454
return project
5555

5656

57-
def example_service_account_setup():
58-
"""Example using Service Account authentication."""
59-
60-
# Create project with Google Drive backend using service account
61-
project = Project.create(
62-
name="my_ragas_project",
63-
description="A project using Google Drive for storage",
64-
backend="gdrive",
65-
gdrive_folder_id="your_google_drive_folder_id_here",
66-
gdrive_service_account_path="path/to/your/service_account.json"
67-
)
68-
69-
return project
70-
71-
7257
def example_usage():
7358
"""Example of using the Google Drive backend."""
7459

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""Base classes for dataset backends."""
2+
3+
from abc import ABC, abstractmethod
4+
import typing as t
5+
6+
7+
class DatasetBackend(ABC):
8+
"""Abstract base class for dataset backends.
9+
10+
All dataset storage backends must implement these methods.
11+
"""
12+
13+
@abstractmethod
14+
def initialize(self, dataset):
15+
"""Initialize the backend with dataset information"""
16+
pass
17+
18+
@abstractmethod
19+
def get_column_mapping(self, model):
20+
"""Get mapping between model fields and backend columns"""
21+
pass
22+
23+
@abstractmethod
24+
def load_entries(self, model_class):
25+
"""Load all entries from storage"""
26+
pass
27+
28+
@abstractmethod
29+
def append_entry(self, entry):
30+
"""Add a new entry to storage and return its ID"""
31+
pass
32+
33+
@abstractmethod
34+
def update_entry(self, entry):
35+
"""Update an existing entry in storage"""
36+
pass
37+
38+
@abstractmethod
39+
def delete_entry(self, entry_id):
40+
"""Delete an entry from storage"""
41+
pass
42+
43+
@abstractmethod
44+
def get_entry_by_field(self, field_name: str, field_value: t.Any, model_class):
45+
"""Get an entry by field value"""
46+
pass

0 commit comments

Comments
 (0)