2727from DIRAC .RequestManagementSystem .Client .ReqClient import ReqClient
2828from DIRAC .RequestManagementSystem .Client .Request import Request
2929from DIRAC .RequestManagementSystem .private .RequestValidator import RequestValidator
30- from DIRAC .Resources .Computing .BatchSystems .TimeLeft .TimeLeft import TimeLeft
3130from DIRAC .Resources .Computing .ComputingElementFactory import ComputingElementFactory
3231from DIRAC .WorkloadManagementSystem .Client import JobStatus , PilotStatus
3332from DIRAC .WorkloadManagementSystem .Client .JobManagerClient import JobManagerClient
@@ -66,7 +65,7 @@ def __init__(self, agentName, loadName, baseAgentName=False, properties=None):
6665
6766 # Agent options
6867 # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
69- self .cpuFactor = 0.0
68+ self .cpuPower = 0.0
7069 self .jobSubmissionDelay = 10
7170 self .fillingMode = True
7271 self .minimumTimeLeft = 5000
@@ -80,11 +79,10 @@ def __init__(self, agentName, loadName, baseAgentName=False, properties=None):
8079 self .logLevel = "INFO"
8180 self .defaultWrapperLocation = "DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py"
8281
83- # Timeleft
84- self .initTimes = os .times ()
85- self .initTimeLeft = 0.0
86- self .timeLeft = self .initTimeLeft
87- self .timeLeftUtil = None
82+ # CPU work left (Wall-clock time * CPU Power)
83+ self .initCPUWork = 0.0
84+ self .timeLeft = self .initCPUWork
85+ self .initTime = time .time ()
8886 self .pilotInfoReportedFlag = False
8987
9088 # Attributes related to the processed jobs, it should take the following form:
@@ -109,23 +107,18 @@ def initialize(self):
109107 if not result ["OK" ]:
110108 return result
111109
112- result = self ._getCEDict (self .computingElement )
113- if not result ["OK" ]:
114- return result
115- ceDict = result ["Value" ][0 ]
110+ # Read initial CPU work left from config (seeded by pilot via dirac-wms-get-queue-cpu-time)
111+ self .initCPUWork = gConfig .getValue ("/LocalSite/CPUTimeLeft" , self .initCPUWork )
112+ self .timeLeft = self .initCPUWork
116113
117- self .initTimeLeft = ceDict .get ("CPUTime" , self .initTimeLeft )
118- self .initTimeLeft = gConfig .getValue ("/Resources/Computing/CEDefaults/MaxCPUTime" , self .initTimeLeft )
119- self .timeLeft = self .initTimeLeft
120-
121- self .initTimes = os .times ()
114+ self .initTime = time .time ()
122115 # Localsite options
123116 self .siteName = siteName ()
124117 self .pilotReference = gConfig .getValue ("/LocalSite/PilotReference" , self .pilotReference )
125118 self .defaultProxyLength = gConfig .getValue ("/Registry/DefaultProxyLifeTime" , self .defaultProxyLength )
126119 # Agent options
127120 # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
128- self .cpuFactor = gConfig .getValue ("/LocalSite/CPUNormalizationFactor" , self .cpuFactor )
121+ self .cpuPower = gConfig .getValue ("/LocalSite/CPUNormalizationFactor" , self .cpuPower )
129122 self .jobSubmissionDelay = self .am_getOption ("SubmissionDelay" , self .jobSubmissionDelay )
130123 self .fillingMode = self .am_getOption ("FillingModeFlag" , self .fillingMode )
131124 self .minimumTimeLeft = self .am_getOption ("MinimumTimeLeft" , self .minimumTimeLeft )
@@ -136,9 +129,6 @@ def initialize(self):
136129 self .logLevel = self .am_getOption ("DefaultLogLevel" , self .logLevel )
137130 self .defaultWrapperLocation = self .am_getOption ("JobWrapperTemplate" , self .defaultWrapperLocation )
138131
139- # Utilities
140- self .timeLeftUtil = TimeLeft ()
141-
142132 # Some innerCEs may want to make use of CGroup2 support, so we prepare it globally here
143133 res = CG2Manager ().setUp ()
144134 if res ["OK" ]:
@@ -180,15 +170,17 @@ def execute(self):
180170 if result ["OK" ] and result ["Value" ]:
181171 return result
182172
183- # Check that we are allowed to continue and that time left is sufficient
173+ # Update CPU work left: wall-clock is ticking whether a job is running or not
174+ cpuWorkLeft = self ._computeCPUWorkLeft ()
175+ result = self ._setCPUWorkLeft (cpuWorkLeft )
176+ if not result ["OK" ]:
177+ return result
178+
179+ # After the first job, check filling mode eligibility
184180 if self .jobCount :
185- cpuWorkLeft = self ._computeCPUWorkLeft ()
186181 result = self ._checkCPUWorkLeft (cpuWorkLeft )
187182 if not result ["OK" ]:
188183 return result
189- result = self ._setCPUWorkLeft (cpuWorkLeft )
190- if not result ["OK" ]:
191- return result
192184
193185 # Get environment details and enhance them
194186 result = self ._getCEDict (self .computingElement )
@@ -373,7 +365,7 @@ def _setCEDict(self, ceDict):
373365 ceDict ["GridCE" ] = gridCE
374366 if "PilotReference" not in ceDict :
375367 ceDict ["PilotReference" ] = str (self .pilotReference )
376- ceDict ["PilotBenchmark" ] = self .cpuFactor
368+ ceDict ["PilotBenchmark" ] = self .cpuPower
377369 ceDict ["PilotInfoReportedFlag" ] = self .pilotInfoReportedFlag
378370
379371 # Add possible job requirements
@@ -403,28 +395,22 @@ def _checkCEAvailability(self, computingElement):
403395 return S_OK ()
404396
405397 #############################################################################
406- def _computeCPUWorkLeft (self , processors = 1 ):
398+ def _computeCPUWorkLeft (self ):
407399 """
408- Compute CPU Work Left in hepspec06 seconds
400+ Compute CPU Work Left in hepspec06 seconds.
401+
402+ Uses a simple wall-clock countdown from the initial value (seeded by the pilot
403+ via dirac-wms-get-queue-cpu-time). The elapsed wall-clock time is multiplied by
404+ the CPU normalization factor to get the consumed CPU work.
409405
410- :param int processors: number of processors available
411406 :return: cpu work left (cpu time left * cpu power of the cpus)
412407 """
413- # Sum all times but the last one (elapsed_time) and remove times at init (is this correct?)
414- cpuTimeConsumed = sum (os .times ()[:- 1 ]) - sum (self .initTimes [:- 1 ])
415- result = self .timeLeftUtil .getTimeLeft (cpuTimeConsumed , processors )
416- if not result ["OK" ]:
417- self .log .warn ("There were errors calculating time left using the Timeleft utility" , result ["Message" ])
418- self .log .warn ("The time left will be calculated using os.times() and the info in our possession" )
419- self .log .info (f"Current raw CPU time consumed is { cpuTimeConsumed } " )
420- if self .cpuFactor :
421- return self .initTimeLeft - cpuTimeConsumed * self .cpuFactor
422- return self .timeLeft
423- return result ["Value" ]
408+ elapsed = time .time () - self .initTime
409+ cpuWorkConsumed = elapsed * self .cpuPower
410+ return self .initCPUWork - cpuWorkConsumed
424411
425412 def _checkCPUWorkLeft (self , cpuWorkLeft ):
426413 """Check that fillingMode is enabled and time left is sufficient to continue the execution"""
427- # Only call timeLeft utility after a job has been picked up
428414 self .log .info ("Attempting to check CPU time left for filling mode" )
429415 if not self .fillingMode :
430416 return self ._finish ("Filling Mode is Disabled" )
@@ -435,7 +421,7 @@ def _checkCPUWorkLeft(self, cpuWorkLeft):
435421 return S_OK ()
436422
437423 def _setCPUWorkLeft (self , cpuWorkLeft ):
438- """Update the TimeLeft within the CE and the configuration for next matching request"""
424+ """Update the CPU work left within the CE and the configuration for next matching request"""
439425 self .timeLeft = cpuWorkLeft
440426
441427 result = self .computingElement .setCPUTimeLeft (cpuTimeLeft = self .timeLeft )
0 commit comments