Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "1.2.6-dev8"
current_version = "1.2.6-dev9"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.
Expand Down
2 changes: 1 addition & 1 deletion fullwave/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
__version__ = version("fullwave")
except PackageNotFoundError:
# Update via bump-my-version, not manually
__version__ = "1.2.6-dev8"
__version__ = "1.2.6-dev9"

VERSION = __version__ # for convenience
logger.info("Fullwave version: %s", __version__)
59 changes: 48 additions & 11 deletions fullwave/solver/pml_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import concurrent.futures
import gc
import logging
from collections import OrderedDict
from dataclasses import dataclass, field
Expand Down Expand Up @@ -563,6 +564,33 @@ def _extend_map_for_pml(

return output

def _move_named_arrays_to_cpu(
self,
named_arrays: list[tuple[str, NDArray]],
attr_names: list[str] | None = None,
) -> list[tuple[str, NDArray]]:
"""Convert CuPy arrays to numpy and free GPU memory pools.

Also replaces the corresponding ``medium_org`` attributes (given by
*attr_names*) so the original GPU arrays can be garbage-collected.
"""
import cupy as cp # noqa: PLC0415

numpy_arrays = []
for name, arr in named_arrays:
if hasattr(arr, "get"):
arr_np = arr.get()
numpy_arrays.append((name, arr_np))
# Update medium_org so the CuPy array can be freed
if attr_names and name in attr_names:
setattr(self.medium_org, name, arr_np)
else:
numpy_arrays.append((name, arr))
gc.collect()
cp.get_default_memory_pool().free_all_blocks()
cp.get_default_pinned_memory_pool().free_all_blocks()
return numpy_arrays

def _extend_arrays_gpu(
self,
named_arrays: list[tuple[str, NDArray]],
Expand All @@ -571,8 +599,8 @@ def _extend_arrays_gpu(

Parameters
----------
named_arrays : list of (name, numpy_array) pairs
Arrays to extend. Must be numpy arrays (CPU).
named_arrays : list of (name, numpy_array) or (name, cupy_array) pairs
Arrays to extend.

Returns
-------
Expand All @@ -585,6 +613,8 @@ def _extend_arrays_gpu(
n_gpus = cp.cuda.runtime.getDeviceCount()
logger.info("CUDA devices available: %d", n_gpus)

attr_names = [name for name, _ in named_arrays]

# Strategy 1: Multi-GPU parallel
if n_gpus > 1:
n_workers = min(n_gpus, len(named_arrays))
Expand All @@ -597,16 +627,15 @@ def _extend_arrays_gpu(
)
return self._extend_arrays_multi_gpu(named_arrays, n_gpus)
except Exception:
logger.warning("Multi-GPU extension failed. Falling back to sequential single-GPU.")
logger.warning(
"Multi-GPU extension failed. Falling back to CPU.",
exc_info=True,
)

# Strategy 2: Sequential single-GPU (extend one, free, repeat)
try:
logger.info("Extending %d medium arrays sequentially on GPU 0.", len(named_arrays))
return self._extend_arrays_sequential_gpu(named_arrays)
except Exception:
logger.warning("Single-GPU extension failed. Falling back to CPU.")
# Move arrays to CPU and free GPU memory before CPU fallback.
# The original medium arrays may still occupy GPU memory.
named_arrays = self._move_named_arrays_to_cpu(named_arrays, attr_names)

# Strategy 3: CPU
logger.info("Extending %d medium arrays on CPU.", len(named_arrays))
return self._extend_arrays_cpu(named_arrays)

Expand Down Expand Up @@ -669,13 +698,21 @@ def _extend_arrays_cpu(
named_arrays: list[tuple[str, NDArray]],
) -> dict[str, NDArray]:
"""Extend arrays on CPU using ThreadPoolExecutor."""
# Convert any CuPy arrays to numpy before CPU fallback
numpy_arrays = []
for name, arr in named_arrays:
if hasattr(arr, "get"):
numpy_arrays.append((name, arr.get()))
else:
numpy_arrays.append((name, arr))

orig_xp = self.xp
self.xp = np
try:
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {
name: executor.submit(self._extend_map_for_pml, arr)
for name, arr in named_arrays
for name, arr in numpy_arrays
}
return {name: future.result() for name, future in futures.items()}
finally:
Expand Down
17 changes: 14 additions & 3 deletions fullwave/utils/pulse.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,25 @@ def gaussian_modulated_sinusoidal_signal(
dt = duration / nt
t_offset = ncycles / f0 + delay_sec

phase_correction = 0.0
if i_layer is not None:
if dt_for_layer_delay is None:
error_msg = "dt_for_layer_delay must be provided if i_layer is provided"
raise ValueError(error_msg)
if cfl_for_layer_delay is None:
error_msg = "cfl_for_layer_delay must be provided if i_layer is provided"
raise ValueError(error_msg)
t_offset += (dt_for_layer_delay / cfl_for_layer_delay) * i_layer
layer_delay_sec = (dt_for_layer_delay / cfl_for_layer_delay) * i_layer
t_offset += layer_delay_sec
# Correct carrier phase for fractional-sample delays.
# A continuous time shift of a fractional number of samples changes
# the carrier phase at each discrete sample point, causing sign
# inversion and wrong peak positions. Remove the fractional phase
# offset so the carrier stays on the same discrete phase grid as
# the base (i_layer=0) signal.
delay_samples = layer_delay_sec / dt
fractional_samples = delay_samples - round(delay_samples)
phase_correction = 2.0 * np.pi * f0 * fractional_samples * dt

t = np.arange(nt, dtype=dtype) * dt - t_offset

Expand All @@ -87,5 +98,5 @@ def gaussian_modulated_sinusoidal_signal(
else:
env = np.exp(-(a_sq**drop_off))

# Compute final signal
return env * np.sin(w_t) * p0
# Compute final signal with phase-corrected carrier
return env * np.sin(w_t + phase_correction) * p0
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "fullwave25"
version = "1.2.6-dev8" # Update via bump-my-version, not manually
version = "1.2.6-dev9" # Update via bump-my-version, not manually
description = "Fullwave 2.5: Ultrasound wave propagation simulation with heterogeneous power law attenuation modelling capabilities"
readme = "README.md"
requires-python = ">=3.10"
Expand Down
62 changes: 62 additions & 0 deletions tests/test_pulse.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,68 @@ def test_float32_dtype(base_params):
assert y.dtype == np.float32


def test_fractional_layer_delay_preserves_phase(base_params):
"""Fractional-sample layer delays must match a manual integer shift.

Previously, fractional delays caused carrier phase errors that inverted
the peak sign for odd layers (Finding 19 in cross-physics experiment).
The fix corrects carrier phase so that the layered signal matches an
integer-sample-shifted version of the base (high NCC, same peak sign).
"""
# cfl=0.3 => dt_layer/cfl = 3.33e-8 s per i_layer
# dt_sim = 2.5e-9 s => 13.33 sim samples per i_layer (fractional)
dt_layer_frac = 1e-8
cfl_frac = 0.3
dt_sim = base_params["duration"] / base_params["nt"]
delay_per_layer_sec = dt_layer_frac / cfl_frac

y_base = gaussian_modulated_sinusoidal_signal(**base_params)

for i in range(1, 6):
y_layer = gaussian_modulated_sinusoidal_signal(
**base_params,
i_layer=i,
dt_for_layer_delay=dt_layer_frac,
cfl_for_layer_delay=cfl_frac,
)
# Manual integer shift of base signal
shift = round(delay_per_layer_sec * i / dt_sim)
y_manual = np.zeros_like(y_base)
y_manual[shift:] = y_base[: len(y_base) - shift]

# NCC must be high (signals should be nearly identical)
ncc = np.dot(y_layer, y_manual) / (np.linalg.norm(y_layer) * np.linalg.norm(y_manual))
assert ncc > 0.999, f"i_layer={i}: NCC={ncc:.6f}, expected > 0.999"


def test_integer_layer_delay_unchanged(base_params):
"""Integer-sample layer delays should produce NCC ~ 1.0 vs manual shift."""
dt_layer = 1e-8
cfl_layer = 0.4
# delay per i_layer = 2.5e-8 s = 10 sim samples (integer)
dt_sim = base_params["duration"] / base_params["nt"]
delay_per_layer_samples = (dt_layer / cfl_layer) / dt_sim
assert delay_per_layer_samples == 10.0 # confirm integer

y_base = gaussian_modulated_sinusoidal_signal(**base_params)

for i_layer in [1, 2, 4]:
y_layer = gaussian_modulated_sinusoidal_signal(
**base_params,
i_layer=i_layer,
dt_for_layer_delay=dt_layer,
cfl_for_layer_delay=cfl_layer,
)
# Manual integer shift
shift = int(round(delay_per_layer_samples * i_layer))
y_manual = np.zeros_like(y_base)
y_manual[shift:] = y_base[: len(y_base) - shift]

# NCC should be very high
ncc = np.dot(y_layer, y_manual) / (np.linalg.norm(y_layer) * np.linalg.norm(y_manual))
assert ncc > 0.999, f"i_layer={i_layer}: NCC={ncc:.6f}, expected > 0.999"


def test_performance(base_params):
"""Ensure the function completes in well under 1 second."""
start = time.perf_counter()
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading