@@ -136,3 +136,118 @@ db_params = {
136136```
137137
138138Please contact [ Donatella Torretta] ( mailto:torretta@fnal.gov ) or [ Matteo Vicenzi] ( mailto:mvicenzi@bnl.gov ) for the password.
139+
140+
141+ ##### Access via Python script
142+
143+ The following example shows a way to access the off-line database replica.
144+ Here, selected entries are turned into a [ Pandas] ( https://pandas.pydata.org ) dataframe.
145+ The PostgreSQL database is accessed via [ SQLAlchemy] ( https://docs.sqlalchemy.org/en/20 ) API (2.0),
146+ which use [ psychopg] ( https://www.psycopg.org/psycopg3/docs ) (3) backend:
147+ all these packages need to be installed and operational.
148+
149+ In addition, the database server is not directly accessible, and a workaround is needed.
150+ One is to open an SSH tunnel to the server, hopping through a Fermilab server we can access.
151+ For example, using ` icarusgpvm03.fnal.gov ` (after obtaining a Kerberos ticket):
152+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .bash
153+ ssh -x -L ' 5455:ifdbdaqrep01.fnal.gov:5455' -N " ${USER} @icarusgpvm03.fnal.gov"
154+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
155+ should open the tunner needed.
156+
157+ Here is the example, with some comments within:
158+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .py
159+ import sqlalchemy as sql
160+ import pandas
161+ import time
162+
163+ ConnectionSettings = {
164+ ' database' : ' icarus_trigger_prd' ,
165+ ' username' : ' triggerdb_reader' ,
166+ ' password' : ' ******' ,
167+ ' host' : ' ifdbdaqrep01.fnal.gov' ,
168+ ' port' : 5455 ,
169+ }
170+ TableName = ' triggerdata'
171+
172+ #
173+ # Custom settings
174+ #
175+ # The password is obviously not published. About the host: it is not accessible from outside Fermilab.
176+ # One way around it is to use SSH tunnelling: that we have access to icarusgpvm03.fnal.gov via Kerberos ticket
177+ #
178+ # ssh -x -L '5455:ifdbdaqrep01.fnal.gov:5455' -N "${USER}@icarusgpvm03.fnal.gov"
179+ #
180+ # will open a tunnel from the port 5455 (see ConnectionSettings['port'] above)
181+ # of host ifdbdaqrep01.fnal.gov (see ConnectionSettings['host']),
182+ # accessible via localhost.
183+ #
184+ ConnectionSettings[' password' ] = r ' ****** '
185+ ConnectionSettings[' host' ] = ' localhost'
186+
187+ #
188+ # create an "engine" to create connections to the database with
189+ #
190+ DBconnectionURL = sql.URL .create(' postgresql+psycopg' , ** ConnectionSettings)
191+ DBengine = sql.create_engine(DBconnectionURL, echo = True )
192+ print (f " { DBconnectionURL= } " )
193+
194+ #
195+ # make the table and its schema known to SQLAlchemy library
196+ #
197+ DBmetadata = sql.MetaData() # glorified dictionary of stuff that SQLAlchemy can track
198+
199+ # triggerDataTable = sql.Table(TableName, DBmetadata, autoload_with=DBengine) # this would load the whole table schema
200+
201+ # instead of declaring the whole table schema, we skip the part that we don't need
202+ # by selecting columns ahead;
203+ # all these columns are integers and there are two primary keys in the database
204+
205+ SelectedColumns = (
206+ ' run_number' , ' event_no' ,
207+ ' wr_seconds' , ' wr_nanoseconds' ,
208+ ' beam_seconds' , ' beam_nanoseconds' ,
209+ ' gate_type' , ' trigger_type' , ' trigger_source' ,
210+ ' gate_id' , ' gate_id_bnb' , ' gate_id_numi' , ' gate_id_bnboff' , ' gate_id_numioff' ,
211+ )
212+ PrimaryKeys = { ' run_number' , ' event_no' }
213+
214+ triggerDataTable = sql.Table(
215+ TableName, DBmetadata,
216+ * [ sql.Column(colName, sql.Integer, primary_key = (colName in PrimaryKeys), nullable = False )
217+ for colName in SelectedColumns
218+ ],
219+ )
220+
221+ #
222+ # read the whole table (all available runs) into a Pandas dataframe, except:
223+ # * calibration gate events
224+ # * minimum bias events
225+ # This query loads >11M events.
226+ #
227+ # The database, at the time of writing, has hundreds of millions of entries,
228+ # and tens of gigabytes of data.
229+ # Even with a fast connection and a lot of available memory, reading the whole
230+ # thing is daunting.
231+ # Limiting the range of runs via `run_number` column is often useful.
232+ #
233+ selector = sql.select(triggerDataTable).where(sql.and_(
234+ sql.between(triggerDataTable.c.gate_type, 1 , 4 ), # not calibration gate
235+ triggerDataTable.c.trigger_type == 0 , # light-based trigger
236+ ))
237+ # print(f"Query:\n{selector}")
238+ startTime = time.time()
239+ with DBengine.connect() as DBconn:
240+ df = pandas.read_sql_query(selector, DBconn)
241+
242+ print (f " Whoa! it took { time.time() - startTime:.1f } \" to load { len (df)} entries and { len (df.columns)} columns from the database!!! " )
243+
244+ #
245+ # combine the two timestamp pieces
246+ #
247+ df[' triggerTimestamp' ] = df.wr_seconds * 1_000_000_000 + df.wr_nanoseconds
248+ df[' beamGateTimestamp' ] = df.beam_seconds * 1_000_000_000 + df.beam_nanoseconds # note: still includes the 4 us veto time
249+ df[' triggerFromBeamGate' ] = df.triggerTimestamp - df.beamGateTimestamp
250+ del df[' wr_seconds' ], df[' wr_nanoseconds' ], df[' beam_seconds' ], df[' beam_nanoseconds' ],
251+
252+ df
253+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0 commit comments