Skip to content

Commit a411db4

Browse files
committed
wait before profiling newly created containers
1 parent 86995b1 commit a411db4

File tree

9 files changed

+66
-46
lines changed

9 files changed

+66
-46
lines changed

gprofiler/containers_client.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16+
import time
1617
from typing import Dict, List, Optional, Set
1718

1819
from granulate_utils.containers.client import ContainersClient
@@ -25,6 +26,8 @@
2526

2627
logger = get_logger_adapter(__name__)
2728

29+
NEWLY_CREATED_CONTAINER_AGE_IN_SECONDS = 3
30+
2831

2932
class ContainerNamesClient:
3033
def __init__(self) -> None:
@@ -73,9 +76,13 @@ def get_container_name(self, pid: int) -> str:
7376
def _safely_get_process_container_name(self, pid: int) -> Optional[str]:
7477
try:
7578
try:
76-
container_id = get_process_container_id(Process(pid))
79+
process = Process(pid)
80+
container_id = get_process_container_id(process)
7781
if container_id is None:
7882
return None
83+
# If the container is newly created, we wait a bit to make sure the container is available
84+
if time.time() - process.create_time() <= NEWLY_CREATED_CONTAINER_AGE_IN_SECONDS:
85+
time.sleep(2)
7986
except NoSuchProcess:
8087
return None
8188
return self._get_container_name(container_id)

gprofiler/profilers/java.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1229,6 +1229,7 @@ def _check_async_profiler_loaded(self, process: Process) -> bool:
12291229
def _profile_process(self, process: Process, duration: int, spawned: bool) -> ProfileData:
12301230
comm = process_comm(process)
12311231
exe = process_exe(process)
1232+
container_name = self._profiler_state.get_container_name(process.pid)
12321233
java_version_output: Optional[str] = get_java_version_logged(process, self._profiler_state.stop_event)
12331234

12341235
if self._enabled_proc_events_java:
@@ -1258,7 +1259,6 @@ def _profile_process(self, process: Process, duration: int, spawned: bool) -> Pr
12581259
self._profiled_pids.add(process.pid)
12591260

12601261
logger.info(f"Profiling{' spawned' if spawned else ''} process {process.pid} with async-profiler")
1261-
container_name = self._profiler_state.get_container_name(process.pid)
12621262
app_metadata = self._metadata.get_metadata(process)
12631263
appid = application_identifiers.get_java_app_id(process, self._collect_spark_app_name)
12641264

gprofiler/profilers/php.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,11 @@ def extract_metadata_section(re_expr: Pattern, metadata_line: str) -> str:
210210
if profiler_state.processes_to_profile is not None:
211211
if pid not in [process.pid for process in profiler_state.processes_to_profile]:
212212
continue
213+
container_name = profiler_state.get_container_name(pid)
213214
# TODO: appid & app metadata for php!
214215
appid = None
215216
app_metadata = None
216-
profiles[pid] = ProfileData(results[pid], appid, app_metadata, profiler_state.get_container_name(pid))
217+
profiles[pid] = ProfileData(results[pid], appid, app_metadata, container_name)
217218

218219
return profiles
219220

gprofiler/profilers/python_ebpf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,9 @@ def snapshot(self) -> ProcessToProfileData:
262262
if self._profiler_state.processes_to_profile is not None:
263263
if process not in self._profiler_state.processes_to_profile:
264264
continue
265+
container_name = self._profiler_state.get_container_name(pid)
265266
appid = application_identifiers.get_python_app_id(process)
266267
app_metadata = self._metadata.get_metadata(process)
267-
container_name = self._profiler_state.get_container_name(pid)
268268
except NoSuchProcess:
269269
appid = None
270270
app_metadata = None

tests/test_java.py

Lines changed: 37 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def read_ap_version(self: AsyncProfiledProcess) -> str:
118118
return version
119119

120120

121-
def xtest_async_profiler_already_running(
121+
def test_async_profiler_already_running(
122122
application_pid: int,
123123
profiler_state: ProfilerState,
124124
assert_collapsed: AssertInCollapsed,
@@ -163,7 +163,7 @@ def xtest_async_profiler_already_running(
163163

164164

165165
@pytest.mark.parametrize("in_container", [True])
166-
def xtest_java_async_profiler_cpu_mode(
166+
def test_java_async_profiler_cpu_mode(
167167
application_pid: int,
168168
assert_collapsed: AssertInCollapsed,
169169
profiler_state: ProfilerState,
@@ -186,7 +186,7 @@ def xtest_java_async_profiler_cpu_mode(
186186

187187
@pytest.mark.parametrize("in_container", [True])
188188
@pytest.mark.parametrize("application_image_tag", ["musl"])
189-
def xtest_java_async_profiler_musl_and_cpu(
189+
def test_java_async_profiler_musl_and_cpu(
190190
application_pid: int,
191191
assert_collapsed: AssertInCollapsed,
192192
profiler_state: ProfilerState,
@@ -215,7 +215,7 @@ def test_java_safemode_parameters(profiler_state: ProfilerState) -> None:
215215
assert "Java version checks are mandatory in --java-safemode" in str(excinfo.value)
216216

217217

218-
def xtest_java_safemode_version_check(
218+
def test_java_safemode_version_check(
219219
monkeypatch: MonkeyPatch,
220220
caplog: LogCaptureFixture,
221221
application_pid: int,
@@ -226,6 +226,7 @@ def xtest_java_safemode_version_check(
226226
monkeypatch.setitem(JavaProfiler.MINIMAL_SUPPORTED_VERSIONS, 8, (Version("8.999"), 0))
227227

228228
with make_java_profiler(profiler_state) as profiler:
229+
profiler._profiler_state.get_container_name(application_pid)
229230
process = profiler._select_processes_to_profile()[0]
230231
jvm_version_str = cast_away_optional(get_java_version(process, profiler._profiler_state.stop_event))
231232
jvm_version = parse_jvm_version(jvm_version_str)
@@ -236,7 +237,7 @@ def xtest_java_safemode_version_check(
236237
assert log_record_extra(log_record)["jvm_version"] == repr(jvm_version)
237238

238239

239-
def xtest_java_safemode_build_number_check(
240+
def test_java_safemode_build_number_check(
240241
monkeypatch: MonkeyPatch,
241242
caplog: LogCaptureFixture,
242243
application_pid: int,
@@ -245,6 +246,7 @@ def xtest_java_safemode_build_number_check(
245246
profiler_state: ProfilerState,
246247
) -> None:
247248
with make_java_profiler(profiler_state) as profiler:
249+
profiler._profiler_state.get_container_name(application_pid)
248250
process = profiler._select_processes_to_profile()[0]
249251
jvm_version_str = cast_away_optional(get_java_version(process, profiler._profiler_state.stop_event))
250252
jvm_version = parse_jvm_version(jvm_version_str)
@@ -261,10 +263,10 @@ def xtest_java_safemode_build_number_check(
261263
[
262264
(False, (), False), # default
263265
(False, ("-XX:ErrorFile=/tmp/my_custom_error_file.log",), False), # custom error file
264-
# (True, (), False), # containerized (other params are ignored)
266+
(True, (), False), # containerized (other params are ignored)
265267
],
266268
)
267-
def xtest_hotspot_error_file(
269+
def test_hotspot_error_file(
268270
application_pid: int,
269271
monkeypatch: MonkeyPatch,
270272
caplog: LogCaptureFixture,
@@ -297,7 +299,7 @@ def start_async_profiler_and_crash(self: AsyncProfiledProcess, *args: Any, **kwa
297299
assert profiler._safemode_disable_reason is not None
298300

299301

300-
def xtest_disable_java_profiling(
302+
def test_disable_java_profiling(
301303
application_pid: int,
302304
monkeypatch: MonkeyPatch,
303305
caplog: LogCaptureFixture,
@@ -315,7 +317,7 @@ def xtest_disable_java_profiling(
315317
assert "Java profiling has been disabled, skipping profiling of all java process" in caplog.text
316318

317319

318-
def xtest_already_loaded_async_profiler_profiling_failure(
320+
def test_already_loaded_async_profiler_profiling_failure(
319321
monkeypatch: MonkeyPatch,
320322
caplog: LogCaptureFixture,
321323
application_pid: int,
@@ -339,7 +341,7 @@ def xtest_already_loaded_async_profiler_profiling_failure(
339341
# test only once; and don't test in container - as it will go down once we kill the Java app.
340342
@pytest.mark.parametrize("in_container", [False])
341343
@pytest.mark.parametrize("check_app_exited", [False]) # we're killing it, the exit check will raise.
342-
def xtest_async_profiler_output_written_upon_jvm_exit(
344+
def test_async_profiler_output_written_upon_jvm_exit(
343345
tmp_path_world_accessible: Path,
344346
application_pid: int,
345347
assert_collapsed: AssertInCollapsed,
@@ -367,7 +369,7 @@ def delayed_kill() -> None:
367369

368370
# test only once
369371
@pytest.mark.parametrize("in_container", [False])
370-
def xtest_async_profiler_stops_after_given_timeout(
372+
def test_async_profiler_stops_after_given_timeout(
371373
tmp_path_world_accessible: Path,
372374
application_pid: int,
373375
assert_collapsed: AssertInCollapsed,
@@ -400,7 +402,7 @@ def xtest_async_profiler_stops_after_given_timeout(
400402

401403
@pytest.mark.parametrize("in_container", [True])
402404
@pytest.mark.parametrize("application_image_tag,search_for", [("j9", "OpenJ9"), ("zing", "Zing")])
403-
def xtest_sanity_other_jvms(
405+
def test_sanity_other_jvms(
404406
application_pid: int,
405407
assert_collapsed: AssertInCollapsed,
406408
search_for: str,
@@ -417,6 +419,7 @@ def xtest_sanity_other_jvms(
417419
frequency=99,
418420
java_async_profiler_mode="cpu",
419421
) as profiler:
422+
profiler._profiler_state.get_container_name(application_pid)
420423
process = psutil.Process(application_pid)
421424
assert search_for in cast_away_optional(get_java_version(process, profiler._profiler_state.stop_event))
422425
process_collapsed = snapshot_pid_collapsed(profiler, application_pid)
@@ -425,7 +428,7 @@ def xtest_sanity_other_jvms(
425428

426429
@pytest.mark.parametrize("in_container", [True])
427430
@pytest.mark.parametrize("application_image_tag,search_for", [("eclipse-temurin-latest", "Temurin")])
428-
def xtest_sanity_latest_jvms(
431+
def test_sanity_latest_jvms(
429432
application_pid: int,
430433
assert_collapsed: AssertInCollapsed,
431434
search_for: str,
@@ -438,6 +441,7 @@ def xtest_sanity_latest_jvms(
438441
"""
439442

440443
with make_java_profiler(profiler_state) as profiler:
444+
profiler._profiler_state.get_container_name(application_pid)
441445
# sanity check that this is the correct JVM we're targeting
442446
assert search_for in cast_away_optional(
443447
get_java_version(psutil.Process(application_pid), profiler._profiler_state.stop_event)
@@ -462,7 +466,7 @@ def simulate_libjvm_delete(application_pid: int) -> None:
462466

463467
# test only once. in a container, so that we don't mess up the environment :)
464468
@pytest.mark.parametrize("in_container", [True])
465-
def xtest_java_deleted_libjvm(
469+
def test_java_deleted_libjvm(
466470
application_pid: int,
467471
application_docker_container: Container,
468472
assert_collapsed: AssertInCollapsed,
@@ -501,7 +505,7 @@ def _filter_record(r: LogRecord) -> bool:
501505
pytest.param("ro", [docker.types.Mount(target="/tmpfs", source="", type="tmpfs", read_only=True)], id="ro"),
502506
],
503507
)
504-
def xtest_java_noexec_or_ro_dirs(
508+
def test_java_noexec_or_ro_dirs(
505509
tmp_path_world_accessible: Path, # will be used by AP for logs & outputs
506510
application_pid: int,
507511
extra_application_docker_mounts: List[docker.types.Mount],
@@ -569,7 +573,7 @@ def xtest_java_noexec_or_ro_dirs(
569573

570574

571575
@pytest.mark.parametrize("in_container", [True])
572-
def xtest_java_symlinks_in_paths(
576+
def test_java_symlinks_in_paths(
573577
application_pid: int,
574578
application_docker_container: Container,
575579
assert_collapsed: AssertInCollapsed,
@@ -616,7 +620,7 @@ def xtest_java_symlinks_in_paths(
616620

617621

618622
@pytest.mark.parametrize("in_container", [True]) # only in container is enough
619-
def xtest_java_appid_and_metadata_before_process_exits(
623+
def test_java_appid_and_metadata_before_process_exits(
620624
application_pid: int,
621625
assert_collapsed: AssertInCollapsed,
622626
monkeypatch: MonkeyPatch,
@@ -657,7 +661,7 @@ def start_async_profiler_and_interrupt(self: AsyncProfiledProcess, *args: Any, *
657661

658662

659663
@pytest.mark.parametrize("in_container", [True]) # only in container is enough
660-
def xtest_java_attach_socket_missing(
664+
def test_java_attach_socket_missing(
661665
application_pid: int,
662666
profiler_state: ProfilerState,
663667
) -> None:
@@ -680,7 +684,7 @@ def xtest_java_attach_socket_missing(
680684

681685
# we know what messages to expect when in container, not on the host Java
682686
@pytest.mark.parametrize("in_container", [True])
683-
def xtest_java_jattach_async_profiler_log_output(
687+
def test_java_jattach_async_profiler_log_output(
684688
application_pid: int,
685689
caplog: LogCaptureFixture,
686690
profiler_state: ProfilerState,
@@ -817,7 +821,7 @@ def test_non_java_basename_version(
817821

818822
@pytest.mark.parametrize("in_container", [True])
819823
@pytest.mark.parametrize("insert_dso_name", [False, True])
820-
def xtest_dso_name_in_ap_profile(
824+
def test_dso_name_in_ap_profile(
821825
application_pid: int,
822826
insert_dso_name: bool,
823827
profiler_state: ProfilerState,
@@ -836,7 +840,7 @@ def xtest_dso_name_in_ap_profile(
836840
@pytest.mark.parametrize("in_container", [True])
837841
@pytest.mark.parametrize("insert_dso_name", [False, True])
838842
@pytest.mark.parametrize("libc_pattern", [r"(^|;)\(/.*/libc-.*\.so\)($|;)"])
839-
def xtest_handling_missing_symbol_in_profile(
843+
def test_handling_missing_symbol_in_profile(
840844
application_pid: int,
841845
insert_dso_name: bool,
842846
libc_pattern: str,
@@ -852,7 +856,7 @@ def xtest_handling_missing_symbol_in_profile(
852856

853857

854858
@pytest.mark.parametrize("in_container", [True])
855-
def xtest_meminfo_logged(
859+
def test_meminfo_logged(
856860
application_pid: int,
857861
caplog: LogCaptureFixture,
858862
profiler_state: ProfilerState,
@@ -869,7 +873,7 @@ def xtest_meminfo_logged(
869873

870874
# test that java frames include no semicolon but use a pipe '|' character instead, as implemented by AP
871875
@pytest.mark.parametrize("in_container", [True])
872-
def xtest_java_frames_include_no_semicolons(
876+
def test_java_frames_include_no_semicolons(
873877
application_pid: int,
874878
profiler_state: ProfilerState,
875879
) -> None:
@@ -896,7 +900,7 @@ def xtest_java_frames_include_no_semicolons(
896900

897901
# test that async profiler doesn't print anything to applications stdout, stderr streams
898902
@pytest.mark.parametrize("in_container", [True])
899-
def xtest_no_stray_output_in_stdout_stderr(
903+
def test_no_stray_output_in_stdout_stderr(
900904
application_pid: int,
901905
application_docker_container: Container,
902906
monkeypatch: MonkeyPatch,
@@ -1094,7 +1098,7 @@ def flush_output_and_stop_async_profiler(self: AsyncProfiledProcess, *args: Any,
10941098
),
10951099
],
10961100
)
1097-
def xtest_collect_default_jvm_flags(
1101+
def test_collect_default_jvm_flags(
10981102
profiler_state: ProfilerState,
10991103
tmp_path: Path,
11001104
application_pid: int,
@@ -1178,7 +1182,7 @@ def xtest_collect_default_jvm_flags(
11781182
),
11791183
],
11801184
)
1181-
def xtest_collect_cmdline_and_env_jvm_flags(
1185+
def test_collect_cmdline_and_env_jvm_flags(
11821186
docker_client: DockerClient,
11831187
application_docker_image: Image,
11841188
assert_collapsed: AssertInCollapsed,
@@ -1218,7 +1222,7 @@ def xtest_collect_cmdline_and_env_jvm_flags(
12181222
@pytest.mark.parametrize("java_cli_flags", ["-XX:MinHeapFreeRatio=5 -XX:MaxHeapFreeRatio=95"])
12191223
@pytest.mark.parametrize("in_container", [True])
12201224
@pytest.mark.parametrize("expected_flags", [[]])
1221-
def xtest_collect_flags_unsupported_filtered_out(
1225+
def test_collect_flags_unsupported_filtered_out(
12221226
docker_client: DockerClient,
12231227
application_docker_image: Image,
12241228
assert_collapsed: AssertInCollapsed,
@@ -1246,10 +1250,9 @@ def xtest_collect_flags_unsupported_filtered_out(
12461250
f"exec java {java_cli_flags} -jar Fibonacci.jar",
12471251
],
12481252
) as container:
1249-
assert (
1250-
profiler._metadata.get_jvm_flags_serialized(psutil.Process(container.attrs["State"]["Pid"]))
1251-
== expected_flags
1252-
)
1253+
pid = container.attrs["State"]["Pid"]
1254+
profiler._profiler_state.get_container_name(pid)
1255+
assert profiler._metadata.get_jvm_flags_serialized(psutil.Process(pid)) == expected_flags
12531256
log_record = next(filter(lambda r: r.message == "Missing requested flags:", caplog.records))
12541257
# use slicing to remove the leading -XX: instead of removeprefix as it's not available in python 3.8
12551258
assert (
@@ -1260,7 +1263,7 @@ def xtest_collect_flags_unsupported_filtered_out(
12601263

12611264
@pytest.mark.parametrize("in_container", [True])
12621265
@pytest.mark.parametrize("expected_flags", [[]])
1263-
def xtest_collect_none_jvm_flags(
1266+
def test_collect_none_jvm_flags(
12641267
profiler_state: ProfilerState,
12651268
tmp_path: Path,
12661269
application_pid: int,
@@ -1272,7 +1275,7 @@ def xtest_collect_none_jvm_flags(
12721275

12731276
@pytest.mark.parametrize("in_container", [True])
12741277
@pytest.mark.parametrize("include_mmm", [True, False])
1275-
def xtest_including_method_modifiers(
1278+
def test_including_method_modifiers(
12761279
application_pid: int,
12771280
profiler_state: ProfilerState,
12781281
include_mmm: bool,
@@ -1287,7 +1290,7 @@ def xtest_including_method_modifiers(
12871290

12881291
@pytest.mark.parametrize("java_line_numbers", ["none", "line-of-function"])
12891292
@pytest.mark.parametrize("in_container", [True])
1290-
def xtest_including_line_numbers(
1293+
def test_including_line_numbers(
12911294
application_pid: int,
12921295
profiler_state: ProfilerState,
12931296
java_line_numbers: str,

tests/test_profiling_mode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def test_sanity(
5656
("java", "ap", True, "java.lang.String[]"),
5757
],
5858
)
59-
def xtest_allocation_being_profiled(
59+
def test_allocation_being_profiled(
6060
application_docker_container: Container,
6161
docker_client: DockerClient,
6262
gprofiler_docker_image: Image,

0 commit comments

Comments
 (0)