Skip to content
Open
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
956f388
feat: implement hybrid cache architecture for repository cloning
fabiovincenzi Oct 10, 2025
cc49057
feat: add configurable cache limits via proxy.config.json
fabiovincenzi Oct 10, 2025
d6413ce
test: update clearBareClone tests for hybrid cache structure
fabiovincenzi Oct 10, 2025
cf76665
chore: fix metric logging
fabiovincenzi Oct 10, 2025
e998d07
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 13, 2025
734621c
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 20, 2025
452eb18
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 20, 2025
168d9b0
feat: implement git-operations module with native git commands
fabiovincenzi Oct 22, 2025
08116ba
refactor: update pullRemote to use git-operations module
fabiovincenzi Oct 22, 2025
992c862
test: add comprehensive hybrid cache integration tests
fabiovincenzi Oct 22, 2025
d5e1b5b
chore: remove unused isomorphic-git dependency
fabiovincenzi Oct 22, 2025
6d5f886
Merge branch 'feature/hybrid-cache' of https://github.com/fabiovincen…
fabiovincenzi Oct 22, 2025
6bc0ddc
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 22, 2025
627137b
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 27, 2025
235e152
chore: remove redundant cache config fallback
fabiovincenzi Oct 27, 2025
c762b5e
refactor: use bytes internally in CacheManager for consistency
fabiovincenzi Oct 27, 2025
ab28d78
chore: use toSorted()
fabiovincenzi Oct 27, 2025
88bbce8
refactor: remove test-only cleanup, rely on CacheManager limits
fabiovincenzi Oct 27, 2025
1737bfd
refactor: use spawnSync instead of execSync
fabiovincenzi Oct 27, 2025
d35d109
test: increase timeout for git clone tests in ConfigLoader
fabiovincenzi Oct 27, 2025
0a54773
perf: use performance.now() instead of Date.now()
fabiovincenzi Oct 27, 2025
133e5e6
refactor: use cache paths from configuration
fabiovincenzi Oct 27, 2025
f03d686
feat: add mutex to prevent race conditions in cache operations
fabiovincenzi Oct 29, 2025
bc0be9f
perf: remove unnecessary sort from getCacheStats
fabiovincenzi Oct 29, 2025
d07ed9c
fix: add logging for silent errors in getDirectorySize
fabiovincenzi Oct 29, 2025
2acaee7
refactor: rename cacheDir to repoCacheDir to disambiguate from Config…
fabiovincenzi Oct 29, 2025
49695ff
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 29, 2025
b5bc3d8
refactor: rename cacheDir to repoCacheDir to disambiguate from Config…
fabiovincenzi Oct 29, 2025
d110463
Merge branch 'feature/hybrid-cache' of https://github.com/fabiovincen…
fabiovincenzi Oct 29, 2025
b073eb3
docs: add readme and cache benchmark script
fabiovincenzi Oct 31, 2025
928846d
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Oct 31, 2025
5b94ec9
chore: add results csv to gitignore
fabiovincenzi Nov 5, 2025
7c05bfb
chore: move and edit benchmark script
fabiovincenzi Nov 5, 2025
a42bd2e
refactor: use multiplier for speed improvement
fabiovincenzi Nov 5, 2025
fd23676
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Nov 5, 2025
bd14e59
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Nov 5, 2025
e67ff36
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Nov 7, 2025
7e97d9e
Merge branch 'main' into feature/hybrid-cache
fabiovincenzi Nov 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 205 additions & 0 deletions benchmark-cache.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#!/bin/bash

set -e

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

echo -e "${BLUE}=== Git Proxy Hybrid Cache Benchmark ===${NC}"
echo ""

# Configuration
PROXY_URL="http://localhost:8000"
GITHUB_REPO="${1:-fabiovincenzi/open-webui}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick: We might want to add an explicit error message when executing benchmark-cache.sh but the chosen repo is not added to the authorised list.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just edited to show original errors everywhere

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we could also throw an error if the user didn't provide their own repo in the arguments rather than defaulting to your fork?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I tought i removed all the references of my github, I just realized I didn't commit those

TEST_BRANCH="${2:-main}"
NUM_PUSHES="${3:-3}"

# Construct proxy URL (format: http://localhost:8000/github.com/user/repo.git)
PROXY_REPO_URL="$PROXY_URL/github.com/$GITHUB_REPO.git"

echo "Configuration:"
echo " Proxy URL: $PROXY_URL"
echo " GitHub Repo: $GITHUB_REPO"
echo " Proxy Repo URL: $PROXY_REPO_URL"
echo " Branch: $TEST_BRANCH"
echo " Number of pushes: $NUM_PUSHES"
echo ""

# Check if git-proxy is running
echo -e "${YELLOW}Checking if git-proxy is running...${NC}"
if ! curl -s "$PROXY_URL" > /dev/null 2>&1; then
echo -e "${RED}ERROR: git-proxy is not running on $PROXY_URL${NC}"
echo "Please start git-proxy with: npm start"
exit 1
fi
echo -e "${GREEN}✓ git-proxy is running${NC}"
echo ""

# Get GitHub credentials from git credential helper
echo -e "${YELLOW}Retrieving GitHub credentials...${NC}"
CREDENTIALS=$(echo -e "protocol=https\nhost=github.com\n" | git credential fill 2>/dev/null)
if [ -z "$CREDENTIALS" ]; then
echo -e "${RED}ERROR: No GitHub credentials found${NC}"
echo "Please configure git credentials first:"
echo " git config --global credential.helper store"
echo " git clone https://github.com/your-repo.git"
exit 1
fi

GITHUB_USERNAME=$(echo "$CREDENTIALS" | grep "^username=" | cut -d= -f2)
GITHUB_TOKEN=$(echo "$CREDENTIALS" | grep "^password=" | cut -d= -f2)

if [ -z "$GITHUB_USERNAME" ] || [ -z "$GITHUB_TOKEN" ]; then
echo -e "${RED}ERROR: Could not extract GitHub credentials${NC}"
exit 1
fi

echo -e "${GREEN}✓ GitHub credentials retrieved for user: $GITHUB_USERNAME${NC}"
echo ""

# Setup test directory
TEST_DIR="./benchmark-test-$(date +%s)"
echo -e "${YELLOW}Creating test directory: $TEST_DIR${NC}"
mkdir -p "$TEST_DIR"
cd "$TEST_DIR"

REPO_NAME=$(basename "$GITHUB_REPO")

# Clear cache before starting
echo -e "${YELLOW}Clearing cache before benchmark...${NC}"
rm -rf ../.remote/cache/* ../.remote/work/* 2>/dev/null || true
echo -e "${GREEN}✓ Cache cleared${NC}"
echo ""

measure_push() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wonder if we could improve error handling here to prevent the script from hanging? I tried pulling my backstage fork but it strangely stopped responding on the 3rd push:

Image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think now the error handling shoud be much better, I'm sorry I didn't push latest changes

local push_number=$1
local is_first=$2

echo -e "${BLUE}=== Push #$push_number $([ "$is_first" = "true" ] && echo "(COLD CACHE)" || echo "(WARM CACHE)") ===${NC}"

# Clone repo through proxy
echo "Cloning repository..."
START_CLONE=$(date +%s.%N)

rm -rf "$REPO_NAME" 2>/dev/null || true
git clone "$PROXY_REPO_URL" "$REPO_NAME" > clone.log 2>&1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we could output the clone status just to make sure things are going well for larger repos too?

Suggested change
git clone "$PROXY_REPO_URL" "$REPO_NAME" > clone.log 2>&1
git clone "$PROXY_REPO_URL" "$REPO_NAME" 2>&1 | tee clone.log


END_CLONE=$(date +%s.%N)
CLONE_TIME=$(echo "$END_CLONE - $START_CLONE" | bc)

cd "$REPO_NAME"

# Get email from git config
GITHUB_EMAIL=$(git config --global user.email)
if [ -z "$GITHUB_EMAIL" ]; then
GITHUB_EMAIL="[email protected]"
fi

git config user.email "$GITHUB_EMAIL"
git config user.name "$GITHUB_USERNAME"

# Create a test commit
echo "benchmark-$push_number-$(date +%s)" > "benchmark-$push_number.txt"
git add "benchmark-$push_number.txt"
git commit -m "Benchmark push #$push_number" > /dev/null 2>&1

# Push through proxy with credentials
echo "Pushing commit..."
START_PUSH=$(date +%s.%N)

# Use credential helper to pass GitHub credentials
git -c credential.helper="!f() { echo username=$GITHUB_USERNAME; echo password=$GITHUB_TOKEN; }; f" \
push "$PROXY_REPO_URL" "HEAD:refs/heads/benchmark-test-$push_number" > push.log 2>&1 || true

END_PUSH=$(date +%s.%N)
PUSH_TIME=$(echo "$END_PUSH - $START_PUSH" | bc)

TOTAL_TIME=$(echo "$CLONE_TIME + $PUSH_TIME" | bc)

cd ..

echo -e "${GREEN}Results:${NC}"
echo " Clone time: ${CLONE_TIME}s"
echo " Push time: ${PUSH_TIME}s"
echo " Total time: ${TOTAL_TIME}s"
echo ""

# Store results
echo "$push_number,$is_first,$CLONE_TIME,$PUSH_TIME,$TOTAL_TIME" >> results.csv
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should add the resulting .csvs to .gitignore? 🤔

}

# Initialize results file
echo "push_number,is_cold_cache,clone_time,push_time,total_time" > results.csv

# Measure first push (cold cache)
measure_push 1 true

# Get cache stats after first push
echo -e "${BLUE}=== Cache Statistics After First Push ===${NC}"
CACHE_DIR="../.remote/cache"
if [ -d "$CACHE_DIR" ]; then
CACHE_SIZE=$(du -sh "$CACHE_DIR" | cut -f1)
CACHE_REPOS=$(ls -1 "$CACHE_DIR" | wc -l)
echo " Cache size: $CACHE_SIZE"
echo " Cached repos: $CACHE_REPOS"
else
echo " Cache directory not found"
fi
echo ""

# Measure subsequent pushes (warm cache)
for i in $(seq 2 $NUM_PUSHES); do
measure_push $i false
done

# Final cache stats
echo -e "${BLUE}=== Final Cache Statistics ===${NC}"
if [ -d "$CACHE_DIR" ]; then
CACHE_SIZE=$(du -sh "$CACHE_DIR" | cut -f1)
CACHE_REPOS=$(ls -1 "$CACHE_DIR" | wc -l)
echo " Cache size: $CACHE_SIZE"
echo " Cached repos: $CACHE_REPOS"
echo ""
echo " Cached repositories:"
ls -lh "$CACHE_DIR" | tail -n +2 | awk '{print " " $9 " (" $5 ")"}'
fi
echo ""

# Calculate and display summary
echo -e "${BLUE}=== Performance Summary ===${NC}"
echo ""

# Read results
FIRST_PUSH_TIME=$(awk -F, 'NR==2 {print $5}' results.csv)
AVG_WARM_TIME=$(awk -F, 'NR>2 {sum+=$5; count++} END {if(count>0) print sum/count; else print 0}' results.csv)

echo "First push (cold cache): ${FIRST_PUSH_TIME}s"
if (( $(echo "$AVG_WARM_TIME > 0" | bc -l) )); then
echo "Average warm push: ${AVG_WARM_TIME}s"
SPEEDUP=$(echo "scale=2; $FIRST_PUSH_TIME / $AVG_WARM_TIME" | bc)
IMPROVEMENT=$(echo "scale=1; (1 - $AVG_WARM_TIME / $FIRST_PUSH_TIME) * 100" | bc)
echo ""
echo -e "${GREEN}Performance improvement: ${IMPROVEMENT}% faster (${SPEEDUP}x speedup)${NC}"
fi
echo ""

# Show detailed results table
echo -e "${BLUE}=== Detailed Results ===${NC}"
echo ""
printf "%-12s %-12s %-12s %-12s %-12s\n" "Push #" "Cache" "Clone (s)" "Push (s)" "Total (s)"
printf "%-12s %-12s %-12s %-12s %-12s\n" "------" "-----" "---------" "--------" "---------"
awk -F, 'NR>1 {
cache = ($2 == "true") ? "COLD" : "WARM"
printf "%-12s %-12s %-12.2f %-12.2f %-12.2f\n", $1, cache, $3, $4, $5
}' results.csv
echo ""

# Cleanup prompt
echo -e "${YELLOW}Test directory: $TEST_DIR${NC}"
echo "To clean up: rm -rf $TEST_DIR"
echo ""
echo -e "${GREEN}✓ Benchmark complete!${NC}"
20 changes: 20 additions & 0 deletions config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,26 @@
}
}
}
},
"cache": {
"description": "Configuration for bare repository cache (hybrid cache system)",
"type": "object",
"properties": {
"maxSizeGB": {
"type": "number",
"description": "Maximum cache size in gigabytes (default 2GB)"
},
"maxRepositories": {
"type": "number",
"description": "Maximum number of repositories in cache (default 50)"
},
"cacheDir": {
"type": "string",
"description": "Directory path for bare repository cache (default ./.remote/cache)"
}
},
"required": ["maxSizeGB", "maxRepositories", "cacheDir"],
"additionalProperties": false
}
},
"definitions": {
Expand Down
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@
"express-rate-limit": "^8.1.0",
"express-session": "^1.18.2",
"history": "5.3.0",
"isomorphic-git": "^1.34.0",
"jsonwebtoken": "^9.0.2",
"jwk-to-pem": "^2.0.7",
"load-plugin": "^6.0.3",
Expand Down
5 changes: 5 additions & 0 deletions proxy.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -178,5 +178,10 @@
"loginRequired": true
}
]
},
"cache": {
"maxSizeGB": 2,
"maxRepositories": 50,
"cacheDir": "./.remote/cache"
}
}
31 changes: 31 additions & 0 deletions src/config/generated/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ export interface GitProxyConfig {
* List of repositories that are authorised to be pushed to through the proxy.
*/
authorisedList?: AuthorisedRepo[];
/**
* Configuration for bare repository cache (hybrid cache system)
*/
cache?: Cache;
/**
* Block commits based on rules defined over author/committer e-mail addresses, commit
* message content and diff content
Expand Down Expand Up @@ -286,6 +290,24 @@ export interface AuthorisedRepo {
[property: string]: any;
}

/**
* Configuration for bare repository cache (hybrid cache system)
*/
export interface Cache {
/**
* Directory path for bare repository cache (default ./.remote/cache)
*/
cacheDir: string;
/**
* Maximum number of repositories in cache (default 50)
*/
maxRepositories: number;
/**
* Maximum cache size in gigabytes (default 2GB)
*/
maxSizeGB: number;
}

/**
* Block commits based on rules defined over author/committer e-mail addresses, commit
* message content and diff content
Expand Down Expand Up @@ -690,6 +712,7 @@ const typeMap: any = {
typ: u(undefined, a(r('AuthenticationElement'))),
},
{ json: 'authorisedList', js: 'authorisedList', typ: u(undefined, a(r('AuthorisedRepo'))) },
{ json: 'cache', js: 'cache', typ: u(undefined, r('Cache')) },
{ json: 'commitConfig', js: 'commitConfig', typ: u(undefined, r('CommitConfig')) },
{ json: 'configurationSources', js: 'configurationSources', typ: u(undefined, 'any') },
{ json: 'contactEmail', js: 'contactEmail', typ: u(undefined, '') },
Expand Down Expand Up @@ -793,6 +816,14 @@ const typeMap: any = {
],
'any',
),
Cache: o(
[
{ json: 'cacheDir', js: 'cacheDir', typ: '' },
{ json: 'maxRepositories', js: 'maxRepositories', typ: 3.14 },
{ json: 'maxSizeGB', js: 'maxSizeGB', typ: 3.14 },
],
false,
),
CommitConfig: o(
[
{ json: 'author', js: 'author', typ: u(undefined, r('Author')) },
Expand Down
9 changes: 9 additions & 0 deletions src/config/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ function mergeConfigurations(
commitConfig: { ...defaultConfig.commitConfig, ...userSettings.commitConfig },
attestationConfig: { ...defaultConfig.attestationConfig, ...userSettings.attestationConfig },
rateLimit: userSettings.rateLimit || defaultConfig.rateLimit,
cache: userSettings.cache
? { ...defaultConfig.cache, ...userSettings.cache }
: defaultConfig.cache,
tls: tlsConfig,
tempPassword: { ...defaultConfig.tempPassword, ...userSettings.tempPassword },
// Preserve legacy SSL fields
Expand Down Expand Up @@ -196,6 +199,7 @@ export const logConfiguration = () => {
console.log(`data sink = ${JSON.stringify(getDatabase())}`);
console.log(`authentication = ${JSON.stringify(getAuthMethods())}`);
console.log(`rateLimit = ${JSON.stringify(getRateLimit())}`);
console.log(`cache = ${JSON.stringify(getCacheConfig())}`);
};

export const getAPIs = () => {
Expand Down Expand Up @@ -285,6 +289,11 @@ export const getRateLimit = () => {
return config.rateLimit;
};

export const getCacheConfig = () => {
const config = loadFullConfiguration();
return config.cache;
};

// Function to handle configuration updates
const handleConfigUpdate = async (newConfig: Configuration) => {
console.log('Configuration updated from external source');
Expand Down
Loading
Loading