Skip to content

Commit a429e42

Browse files
committed
fix: enable automatic cache refresh for small datasets and optimize configuration
Implement dynamic cache invalidation for datasets under COUNT_LIMIT threshold to ensure entity counts remain accurate without manual intervention. Fix SPARQL BIND clause syntax in display rules to prevent query errors.
1 parent 84845eb commit a429e42

File tree

6 files changed

+175
-424
lines changed

6 files changed

+175
-424
lines changed

config.example.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ class Config(object):
3636
REDIS_URL = os.environ.get("REDIS_URL")
3737

3838
# Query Configuration
39+
# COUNT_LIMIT serves dual purpose:
40+
# 1. Maximum entity count to display (shows "10000+" if exceeded)
41+
# 2. Threshold for automatic cache refresh after entity modifications
42+
# - Datasets below this limit: auto-refresh enabled (always accurate counts)
43+
# - Datasets above this limit: cache remains static (manual refresh via admin endpoint)
3944
COUNT_LIMIT = int(os.environ.get("COUNT_LIMIT", "10000"))
4045

4146
# Database configuration

dataset_database/virtuoso.ini

Lines changed: 80 additions & 207 deletions
Original file line numberDiff line numberDiff line change
@@ -1,247 +1,120 @@
1-
;
2-
; virtuoso.ini
3-
;
4-
; Configuration file for the OpenLink Virtuoso VDBMS Server
5-
;
6-
; To learn more about this product, or any other product in our
7-
; portfolio, please check out our web site at:
8-
;
9-
; http://virtuoso.openlinksw.com/
10-
;
11-
; or contact us at:
12-
;
13-
14-
;
15-
; If you have any technical questions, please contact our support
16-
; staff at:
17-
;
18-
19-
;
20-
;
21-
; Database setup
22-
;
231
[Database]
24-
DatabaseFile = /database/virtuoso.db
25-
ErrorLogFile = /database/virtuoso.log
26-
LockFile = /database/virtuoso.lck
27-
TransactionFile = /database/virtuoso.trx
28-
xa_persistent_file = /database/virtuoso.pxa
29-
ErrorLogLevel = 7
30-
FileExtend = 200
2+
DatabaseFile = ../database/virtuoso.db
3+
ErrorLogFile = ../database/virtuoso.log
4+
LockFile = ../database/virtuoso.lck
5+
TransactionFile = ../database/virtuoso.trx
6+
xa_persistent_file = ../database/virtuoso.pxa
7+
ErrorLogLevel = 7
8+
FileExtend = 200
319
MaxCheckpointRemap = 2000
32-
Striping = 0
33-
TempStorage = TempDatabase
10+
Striping = 0
11+
TempStorage = TempDatabase
3412

3513
[TempDatabase]
36-
DatabaseFile = /database/virtuoso-temp.db
37-
TransactionFile = /database/virtuoso-temp.trx
14+
DatabaseFile = ../database/virtuoso-temp.db
15+
TransactionFile = ../database/virtuoso-temp.trx
3816
MaxCheckpointRemap = 2000
39-
Striping = 0
40-
;MaxTempDBPages = 50G ; Limit size of the TempDatabase file to 50GB on disk
17+
Striping = 0
4118

42-
;
43-
; Server parameters
44-
;
4519
[Parameters]
46-
ServerPort = 1111
47-
LiteMode = 0
48-
DisableUnixSocket = 1
49-
DisableTcpSocket = 0
50-
;SSLServerPort = 2111
51-
;SSLCertificate = cert.pem
52-
;SSLPrivateKey = pk.pem
53-
;X509ClientVerify = 0
54-
;X509ClientVerifyDepth = 0
55-
;X509ClientVerifyCAFile = ca.pem
56-
MaxClientConnections = 10
57-
CheckpointInterval = 60
58-
O_DIRECT = 0
59-
CaseMode = 2
60-
MaxStaticCursorRows = 5000
61-
CheckpointAuditTrail = 0
62-
AllowOSCalls = 0
63-
SchedulerInterval = 10
64-
DirsAllowed = ., ../vad, /usr/share/proj, /data
65-
ThreadCleanupInterval = 1
66-
ThreadThreshold = 10
20+
ServerPort = 1111
21+
LiteMode = 0
22+
DisableUnixSocket = 1
23+
DisableTcpSocket = 0
24+
MaxClientConnections = 10
25+
CheckpointInterval = 60
26+
O_DIRECT = 0
27+
CaseMode = 2
28+
MaxStaticCursorRows = 5000
29+
CheckpointAuditTrail = 0
30+
AllowOSCalls = 0
31+
SchedulerInterval = 10
32+
DirsAllowed = ../virtuoso_input,/usr/share/proj,../vad,.,/opt/virtuoso-opensource/database
33+
ThreadCleanupInterval = 1
34+
ThreadThreshold = 10
6735
ResourcesCleanupInterval = 1
68-
FreeTextBatchSize = 100000
69-
SingleCPU = 0
70-
VADInstallDir = ../vad/
71-
PrefixResultNames = 0
72-
RdfFreeTextRulesSize = 100
73-
IndexTreeMaps = 64
74-
MaxMemPoolSize = 200000000
75-
PrefixResultNames = 0
76-
MacSpotlight = 0
77-
MaxQueryMem = 2G ; memory allocated to query processor
78-
VectorSize = 1000 ; initial parallel query vector (array of query operations) size
79-
MaxVectorSize = 1000000 ; query vector size threshold.
80-
AdjustVectorSize = 0
81-
ThreadsPerQuery = 4
82-
AsyncQueueMaxThreads = 10
83-
;;
84-
;; When running with large data sets, one should configure the Virtuoso
85-
;; process to use between 2/3 to 3/5 of free system memory and to stripe
86-
;; storage on all available disks.
87-
;;
88-
;; Uncomment next two lines if there is 2 GB system memory free
89-
;NumberOfBuffers = 170000
90-
;MaxDirtyBuffers = 130000
91-
;; Uncomment next two lines if there is 4 GB system memory free
92-
;NumberOfBuffers = 340000
93-
; MaxDirtyBuffers = 250000
94-
;; Uncomment next two lines if there is 8 GB system memory free
95-
;NumberOfBuffers = 680000
96-
;MaxDirtyBuffers = 500000
97-
;; Uncomment next two lines if there is 16 GB system memory free
98-
;NumberOfBuffers = 1360000
99-
;MaxDirtyBuffers = 1000000
100-
;; Uncomment next two lines if there is 32 GB system memory free
101-
;NumberOfBuffers = 2720000
102-
;MaxDirtyBuffers = 2000000
103-
;; Uncomment next two lines if there is 48 GB system memory free
104-
;NumberOfBuffers = 4000000
105-
;MaxDirtyBuffers = 3000000
106-
;; Uncomment next two lines if there is 64 GB system memory free
107-
;NumberOfBuffers = 5450000
108-
;MaxDirtyBuffers = 4000000
109-
;;
110-
;; Note the default settings will take very little memory
111-
;; but will not result in very good performance
112-
;;
113-
NumberOfBuffers = 10000
114-
MaxDirtyBuffers = 6000
115-
SSLServerPort = 1112
116-
SSLCertificate = virtuoso.crt
117-
SSLPrivateKey = virtuoso.key
36+
FreeTextBatchSize = 100000
37+
SingleCPU = 0
38+
VADInstallDir = ../vad/
39+
PrefixResultNames = 0
40+
RdfFreeTextRulesSize = 100
41+
IndexTreeMaps = 64
42+
MaxMemPoolSize = 200000000
43+
MacSpotlight = 0
44+
MaxQueryMem = 2G ; memory allocated to query processor
45+
VectorSize = 1000 ; initial parallel query vector (array of query operations) size
46+
MaxVectorSize = 1000000 ; query vector size threshold.
47+
AdjustVectorSize = 0
48+
ThreadsPerQuery = 4
49+
AsyncQueueMaxThreads = 10
50+
NumberOfBuffers = 88583
51+
MaxDirtyBuffers = 66437
11852

11953
[HTTPServer]
120-
ServerPort = 8890
121-
ServerRoot = ../vsp
122-
MaxClientConnections = 10
123-
DavRoot = DAV
124-
EnabledDavVSP = 0
125-
HTTPProxyEnabled = 0
126-
TempASPXDir = 0
127-
DefaultMailServer = localhost:25
128-
MaxKeepAlives = 10
129-
KeepAliveTimeout = 10
130-
MaxCachedProxyConnections = 10
54+
ServerPort = 8890
55+
ServerRoot = ../vsp
56+
MaxClientConnections = 10
57+
DavRoot = DAV
58+
EnabledDavVSP = 0
59+
HTTPProxyEnabled = 0
60+
TempASPXDir = 0
61+
DefaultMailServer = localhost:25
62+
MaxKeepAlives = 10
63+
KeepAliveTimeout = 10
64+
MaxCachedProxyConnections = 10
13165
ProxyConnectionCacheTimeout = 15
132-
HTTPThreadSize = 280000
133-
HttpPrintWarningsInOutput = 0
134-
Charset = UTF-8
135-
;HTTPLogFile = logs/http.log
136-
MaintenancePage = atomic.html
137-
EnabledGzipContent = 1
138-
SSLPort = 8891
139-
SSLCertificate = virtuoso.crt
140-
SSLPrivateKey = virtuoso.key
66+
HTTPThreadSize = 280000
67+
HttpPrintWarningsInOutput = 0
68+
Charset = UTF-8
69+
MaintenancePage = atomic.html
70+
EnabledGzipContent = 1
14171

14272
[AutoRepair]
14373
BadParentLinks = 0
14474

14575
[Client]
146-
SQL_PREFETCH_ROWS = 100
76+
SQL_PREFETCH_ROWS = 100
14777
SQL_PREFETCH_BYTES = 16000
148-
SQL_QUERY_TIMEOUT = 0
149-
SQL_TXN_TIMEOUT = 0
150-
;SQL_NO_CHAR_C_ESCAPE = 1
151-
;SQL_UTF8_EXECS = 0
152-
;SQL_NO_SYSTEM_TABLES = 0
153-
;SQL_BINARY_TIMESTAMP = 1
154-
;SQL_ENCRYPTION_ON_PASSWORD = -1
78+
SQL_QUERY_TIMEOUT = 0
79+
SQL_TXN_TIMEOUT = 0
15580

15681
[VDB]
157-
ArrayOptimization = 0
158-
NumArrayParameters = 10
159-
VDBDisconnectTimeout = 1000
82+
ArrayOptimization = 0
83+
NumArrayParameters = 10
84+
VDBDisconnectTimeout = 1000
16085
KeepConnectionOnFixedThread = 0
16186

16287
[Replication]
163-
ServerName = db-ALPINE-PORT
88+
ServerName = db-BIONIC-PORT
16489
ServerEnable = 1
165-
QueueMax = 50000
90+
QueueMax = 50000
16691

167-
;
168-
; Striping setup
169-
;
170-
; These parameters have only effect when Striping is set to 1 in the
171-
; [Database] section, in which case the DatabaseFile parameter is ignored.
172-
;
173-
; With striping, the database is spawned across multiple segments
174-
; where each segment can have multiple stripes.
175-
;
176-
; Format of the lines below:
177-
; Segment<number> = <size>, <stripe file name> [, <stripe file name> .. ]
178-
;
179-
; <number> must be ordered from 1 up.
180-
;
181-
; The <size> is the total size of the segment which is equally divided
182-
; across all stripes forming the segment. Its specification can be in
183-
; gigabytes (g), megabytes (m), kilobytes (k) or in database blocks
184-
; (b, the default)
185-
;
186-
; Note that the segment size must be a multiple of the database page size
187-
; which is currently 8k. Also, the segment size must be divisible by the
188-
; number of stripe files forming the segment.
189-
;
190-
; The example below creates a 200 meg database striped on two segments
191-
; with two stripes of 50 meg and one of 100 meg.
192-
;
193-
; You can always add more segments to the configuration, but once
194-
; added, do not change the setup.
195-
;
19692
[Striping]
19793
Segment1 = 100M, db-seg1-1.db, db-seg1-2.db
19894
Segment2 = 100M, db-seg2-1.db
199-
;...
200-
;[TempStriping]
201-
;Segment1 = 100M, db-seg1-1.db, db-seg1-2.db
202-
;Segment2 = 100M, db-seg2-1.db
203-
;...
204-
;[Ucms]
205-
;UcmPath = <path>
206-
;Ucm1 = <file>
207-
;Ucm2 = <file>
208-
;...
20995

21096
[Zero Config]
211-
ServerName = virtuoso (ALPINE-PORT)
212-
;ServerDSN = ZDSN
213-
;SSLServerName =
214-
;SSLServerDSN =
97+
ServerName = virtuoso (BIONIC-PORT)
21598

21699
[Mono]
217-
;MONO_TRACE = Off
218-
;MONO_PATH = <path_here>
219-
;MONO_ROOT = <path_here>
220-
;MONO_CFG_DIR = <path_here>
221-
;virtclr.dll =
222100

223101
[URIQA]
224102
DynamicLocal = 0
225-
DefaultHost = localhost:8890
103+
DefaultHost = localhost:8890
226104

227105
[SPARQL]
228-
;ExternalQuerySource = 1
229-
;ExternalXsltSource = 1
230-
;DefaultGraph = http://localhost:8890/dataspace
231-
;ImmutableGraphs = http://localhost:8890/dataspace
232-
ResultSetMaxRows = 100000
233-
MaxConstructTriples = 100000
106+
ResultSetMaxRows = 10000
107+
MaxConstructTriples = 10000
234108
MaxQueryCostEstimationTime = 400 ; in seconds
235-
MaxQueryExecutionTime = 60 ; in seconds
236-
DefaultQuery = select distinct ?Concept where {[] a ?Concept} LIMIT 100
237-
DeferInferenceRulesInit = 0 ; controls inference rules loading
238-
MaxMemInUse = 0 ; limits the amount of memory for construct dict (0=unlimited)
239-
;LabelInferenceName = facets ; Only needed when using the Faceted Browser
240-
;PingService = http://rpc.pingthesemanticweb.com/
109+
MaxQueryExecutionTime = 60 ; in seconds
110+
DefaultQuery = SELECT (COUNT(*) AS ?triples) WHERE {?s ?p ?o}
111+
DeferInferenceRulesInit = 0 ; controls inference rules loading
112+
MaxMemInUse = 0 ; limits the amount of memory for construct dict (0=unlimited)
241113

242114
[Plugins]
243115
LoadPath = ../hosting
244-
Load1 = plain, geos
245-
Load2 = plain, graphql
246-
Load3 = plain, proj4
247-
Load4 = plain, shapefileio
116+
Load1 = plain, geos
117+
Load2 = plain, graphql
118+
Load3 = plain, proj4
119+
Load4 = plain, shapefileio
120+

display_rules.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2722,9 +2722,8 @@ rules:
27222722
GROUP BY ?title ?pubDate ?publisher ?edition
27232723
}
27242724
2725-
BIND(
2726-
IF(BOUND(?editorList) && STRLEN(?editorList) > 0, CONCAT(?editorList, " (Ed.)"),
2727-
COALESCE(?authorList, "")),
2725+
BIND(CONCAT(
2726+
IF(BOUND(?editorList) && STRLEN(?editorList) > 0, CONCAT(?editorList, " (Ed.)"), COALESCE(?authorList, "")),
27282727
IF((BOUND(?editorList) && STRLEN(?editorList) > 0 || BOUND(?authorList)) && BOUND(?pubDate), " ", ""),
27292728
IF(BOUND(?pubDate), CONCAT("(", ?pubDate, ")"), ""),
27302729
IF(BOUND(?title) && STRLEN(?title) > 0, CONCAT(

heritrace/__init__.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from heritrace.cli import register_cli_commands
1111
from heritrace.utils.sparql_utils import precompute_available_classes_cache
1212

13-
1413
def create_app(config_object=None):
1514
app = Flask(__name__)
1615

@@ -42,12 +41,8 @@ def create_app(config_object=None):
4241

4342
with app.app_context():
4443
app.logger.info("[STARTUP] Pre-computing available classes cache...")
45-
try:
46-
precompute_available_classes_cache()
47-
app.logger.info("[STARTUP] Available classes cache computed successfully")
48-
except Exception as e:
49-
app.logger.warning(f"[STARTUP] Failed to pre-compute classes cache: {e}")
50-
app.logger.warning("[STARTUP] Classes will be computed on first request")
44+
precompute_available_classes_cache()
45+
app.logger.info("[STARTUP] Available classes cache computed successfully")
5146

5247
register_blueprints(app)
5348

heritrace/utils/sparql_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,9 +199,15 @@ def get_available_classes():
199199
"""
200200
Fetch and format all available entity classes.
201201
Returns cached result if available (computed at startup).
202+
For small datasets (< COUNT_LIMIT), cache is invalidated to keep counts accurate.
202203
"""
203204
global _AVAILABLE_CLASSES_CACHE
204205

206+
if _AVAILABLE_CLASSES_CACHE is not None:
207+
total_count = sum(cls.get('count_numeric', 0) for cls in _AVAILABLE_CLASSES_CACHE)
208+
if total_count < COUNT_LIMIT:
209+
_AVAILABLE_CLASSES_CACHE = None
210+
205211
if _AVAILABLE_CLASSES_CACHE is not None:
206212
return _AVAILABLE_CLASSES_CACHE
207213

0 commit comments

Comments
 (0)