Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
a729663
Untested impl of more robust index sampling
juntyr May 13, 2022
fe3a620
Start of modular RNG generator+sampler redesign experiment
juntyr May 16, 2022
b835866
Some further progress in the exploration
juntyr May 17, 2022
ed6085b
Some more progress
juntyr May 18, 2022
d8f8c53
Finished refactoring of necsim-impls
juntyr May 18, 2022
a40424a
It compiles?
juntyr May 18, 2022
c4542b5
Make SimpleRNG FFI safe
juntyr May 18, 2022
bf23007
Some first refactoring
juntyr May 19, 2022
e205e7c
Backup of refactoring progress for Rng<M> + Samples<M, D>
juntyr May 19, 2022
b452796
Finished refactoring into Rng<M> + Samples<M, D>
juntyr May 19, 2022
fb639bb
Backup of partial refactoring of Rng not a subtrait of RngCore
juntyr May 19, 2022
5892205
Finished refactoring of Rng not a subtrait of RngCore
juntyr May 20, 2022
d87a919
Added RawDistribution for better DistributionSampler impl self-docume…
juntyr May 20, 2022
855d02e
Refactored Rng+RngCore without Clone
juntyr May 21, 2022
61eb544
Implemented the RandRng sampler based on rand_distr
juntyr May 21, 2022
d6e0dee
Small optimisations of rng usage
juntyr May 21, 2022
ce75311
Implemented untested modular distribution samplers
juntyr May 22, 2022
1163b63
Switch alias sampler tests to modular distribution sampling
juntyr May 22, 2022
c10953a
Removed MathsCore usage from OriginSampler
juntyr May 23, 2022
f444ce0
Fixed new 1.63 lints
juntyr Aug 13, 2022
517e390
Fixed new WrappingHabitat for improved Rng
juntyr Dec 28, 2022
7a16320
Pipeline CUDA mem transfer with async
juntyr Nov 19, 2022
bc42ae3
Initial first fixes to get CUDA running again
juntyr Nov 20, 2022
9cd9db7
Fixed CUDA algorithm with spatially explicit habitat
juntyr Nov 20, 2022
4d30084
Upgraded to latest const-type-layout and rust-cuda
juntyr Nov 24, 2022
1e3ac5c
Backup of on-device sorting progress
juntyr Nov 27, 2022
2b1a72d
First running sorting impl, efficiency untested
juntyr Nov 28, 2022
727dd77
some progress on CUDA event sorting, still buggy
juntyr Nov 28, 2022
5784307
Upgrade to 2021 edition + better GPU-event-sorting
juntyr Nov 30, 2022
dc198d0
Upgrade to latest const-type-layout and rust-cuda
juntyr Nov 30, 2022
f78384e
Start with bitonic sort implementation [wip]
juntyr Nov 30, 2022
636a3e5
Added bitonic sort with shared memory, still much slower than CPU-only
juntyr Dec 1, 2022
4365b41
Experiment with incremental timsort inspired by std::slice::sort
juntyr Dec 10, 2022
8ccf8a3
More perf improvements for incremental sorting
juntyr Dec 11, 2022
71fe3ce
Minor incremental sorting perf optimisations
juntyr Dec 11, 2022
1b33e56
Cleaned up CUDA EventBuffer
juntyr Dec 29, 2022
04540c6
Update to latest const-type-layout and rust-cuda
juntyr Jan 16, 2023
81534a9
Progress upgrading to the latest rust-cuda, FitsIntoDeviceRegister st…
juntyr May 9, 2023
2228a16
Added compile-time-checked lints against local mem and register spill…
juntyr May 9, 2023
d97cf4f
Fix inlining for event comparison on CUDA
juntyr May 9, 2023
b3de59e
Install CUDA in more CI runs
juntyr May 9, 2023
5a758db
Update to latest const-type-layout and rust-cuda
juntyr May 13, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ jobs:
- name: Checkout the Repository
uses: actions/checkout@v2

- name: Install CUDA
run: |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
curl -L -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb
sudo dpkg -i cuda-keyring_1.0-1_all.deb
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
sudo apt-get update -q
sudo apt-get install cuda -y --no-install-recommends

- name: Install OpenMPI
run: |
sudo apt-get update -q
Expand Down Expand Up @@ -79,6 +89,16 @@ jobs:
steps:
- name: Checkout the Repository
uses: actions/checkout@v2

- name: Install CUDA
run: |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
curl -L -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb
sudo dpkg -i cuda-keyring_1.0-1_all.deb
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
sudo apt-get update -q
sudo apt-get install cuda -y --no-install-recommends

- name: Install OpenMPI
run: |
Expand Down
42 changes: 31 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions docs/simulate.ron
Original file line number Diff line number Diff line change
Expand Up @@ -522,9 +522,14 @@
/* CUDA thread grid 1D size, should be a power of 2
* optional, default = 64 */
grid_size: (u32),
/* CUDA event sorting thread block 1D size, should be a power of 2
* optional, default = 512 */
sort_block_size: (u32),
/* number of steps which an individual performs on the GPU without supervision
* -> shorter slices enable quicker termination of single individuals
* -> longer slices incur less overhead from kernel launches
* -> if step_slice = a * 2^b, i.e. aligned to some power of two,
* slightly less sorting needs to be performed
* optional, default = 150 */
step_slice: (0 < u64),
/* selection of the mode of the individual deduplication cache
Expand Down
8 changes: 4 additions & 4 deletions necsim/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "necsim-core"
version = "0.1.0"
authors = ["Juniper Tyree <[email protected]>"]
license = "MIT OR Apache-2.0"
edition = "2018"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

Expand All @@ -15,12 +15,12 @@ cuda = ["rust-cuda"]
necsim-core-maths = { path = "maths" }
necsim-core-bond = { path = "bond" }

const-type-layout = { git = "https://github.com/juntyr/const-type-layout", rev = "e163b36" }
const-type-layout = { git = "https://github.com/juntyr/const-type-layout", rev = "645de0e" }
contracts = "0.6.3"
serde = { version = "1.0", default-features = false, features = ["derive"] }

[target.'cfg(target_os = "cuda")'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5d5cd02", features = ["derive"], optional = true }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "575bc27", features = ["derive"], optional = true }

[target.'cfg(not(target_os = "cuda"))'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5d5cd02", features = ["derive", "host"], optional = true }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "575bc27", features = ["derive", "host"], optional = true }
4 changes: 2 additions & 2 deletions necsim/core/bond/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "necsim-core-bond"
version = "0.1.0"
authors = ["Juniper Tyree <[email protected]>"]
license = "MIT OR Apache-2.0"
edition = "2018"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

Expand All @@ -13,5 +13,5 @@ default = []
[dependencies]
necsim-core-maths = { path = "../maths" }

const-type-layout = { git = "https://github.com/juntyr/const-type-layout", rev = "e163b36" }
const-type-layout = { git = "https://github.com/juntyr/const-type-layout", rev = "645de0e" }
serde = { version = "1.0", default-features = false, features = ["derive"] }
8 changes: 8 additions & 0 deletions necsim/core/bond/src/closed_open_unit_f64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ impl From<ClosedOpenUnitF64> for f64 {
}
}

impl TryFrom<ClosedUnitF64> for ClosedOpenUnitF64 {
type Error = ClosedOpenUnitF64Error;

fn try_from(value: ClosedUnitF64) -> Result<Self, Self::Error> {
Self::new(value.get())
}
}

impl fmt::Debug for ClosedOpenUnitF64 {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
struct ClosedOpenUnitF64Range(f64);
Expand Down
10 changes: 9 additions & 1 deletion necsim/core/bond/src/open_closed_unit_f64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use core::{
use necsim_core_maths::MathsCore;
use serde::{Deserialize, Serialize};

use crate::NonPositiveF64;
use crate::{ClosedUnitF64, NonPositiveF64};

#[derive(Debug)]
#[allow(clippy::module_name_repetitions)]
Expand Down Expand Up @@ -46,6 +46,14 @@ impl From<OpenClosedUnitF64> for f64 {
}
}

impl TryFrom<ClosedUnitF64> for OpenClosedUnitF64 {
type Error = OpenClosedUnitF64Error;

fn try_from(value: ClosedUnitF64) -> Result<Self, Self::Error> {
Self::new(value.get())
}
}

impl fmt::Debug for OpenClosedUnitF64 {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
struct OpenClosedUnitF64Range(f64);
Expand Down
2 changes: 1 addition & 1 deletion necsim/core/maths/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "necsim-core-maths"
version = "0.1.0"
authors = ["Juniper Tyree <[email protected]>"]
license = "MIT OR Apache-2.0"
edition = "2018"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

Expand Down
8 changes: 4 additions & 4 deletions necsim/core/src/cogs/active_lineage_sampler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use core::ops::ControlFlow;
use necsim_core_bond::{NonNegativeF64, PositiveF64};

use super::{
CoalescenceSampler, DispersalSampler, EmigrationExit, EventSampler, Habitat, ImmigrationEntry,
LineageStore, MathsCore, RngCore, SpeciationProbability, TurnoverRate,
Backup, CoalescenceSampler, DispersalSampler, EmigrationExit, EventSampler, Habitat,
ImmigrationEntry, LineageStore, MathsCore, Rng, SpeciationProbability, TurnoverRate,
};

use crate::{lineage::Lineage, simulation::partial::active_lineage_sampler::PartialSimulation};
Expand All @@ -15,7 +15,7 @@ use crate::{lineage::Lineage, simulation::partial::active_lineage_sampler::Parti
pub trait ActiveLineageSampler<
M: MathsCore,
H: Habitat<M>,
G: RngCore<M>,
G: Rng<M>,
S: LineageStore<M, H>,
X: EmigrationExit<M, H, G, S>,
D: DispersalSampler<M, H, G>,
Expand All @@ -24,7 +24,7 @@ pub trait ActiveLineageSampler<
N: SpeciationProbability<M, H>,
E: EventSampler<M, H, G, S, X, D, C, T, N>,
I: ImmigrationEntry<M>,
>: crate::cogs::Backup + core::fmt::Debug
>: Backup + core::fmt::Debug
{
type LineageIterator<'a>: Iterator<Item = &'a Lineage>
where
Expand Down
41 changes: 18 additions & 23 deletions necsim/core/src/cogs/coalescence_sampler.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
use core::cmp::{Ord, Ordering};

use necsim_core_bond::ClosedOpenUnitF64;
use core::{
cmp::{Ord, Ordering},
num::NonZeroU32,
};

use serde::{Deserialize, Serialize};

use crate::{
cogs::{Backup, MathsCore, RngCore},
cogs::{Backup, Habitat, LineageStore, MathsCore, Rng, RngCore},
landscape::{IndexedLocation, Location},
lineage::LineageInteraction,
};

use super::{Habitat, LineageStore};

#[allow(clippy::inline_always, clippy::inline_fn_without_body)]
#[contract_trait]
pub trait CoalescenceSampler<M: MathsCore, H: Habitat<M>, S: LineageStore<M, H>>:
crate::cogs::Backup + core::fmt::Debug
Backup + core::fmt::Debug
{
#[must_use]
#[debug_requires(habitat.get_habitat_at_location(&location) > 0, "location is habitable")]
Expand All @@ -31,7 +30,7 @@ pub trait CoalescenceSampler<M: MathsCore, H: Habitat<M>, S: LineageStore<M, H>>
#[allow(clippy::unsafe_derive_deserialize)]
#[derive(Debug, PartialEq, Serialize, Deserialize, TypeLayout)]
#[repr(transparent)]
pub struct CoalescenceRngSample(ClosedOpenUnitF64);
pub struct CoalescenceRngSample(u64);

#[contract_trait]
impl Backup for CoalescenceRngSample {
Expand All @@ -57,24 +56,20 @@ impl Eq for CoalescenceRngSample {}
impl CoalescenceRngSample {
#[must_use]
#[inline]
pub fn new<M: MathsCore, G: RngCore<M>>(rng: &mut G) -> Self {
use crate::cogs::RngSampler;

Self(rng.sample_uniform_closed_open())
pub fn new<M: MathsCore, G: Rng<M>>(rng: &mut G) -> Self {
Self(rng.generator().sample_u64())
}

#[must_use]
#[inline]
#[debug_ensures(ret < length, "samples U(0, length - 1)")]
pub fn sample_coalescence_index<M: MathsCore>(self, length: u32) -> u32 {
// attributes on expressions are experimental
// see https://github.com/rust-lang/rust/issues/15701
#[allow(
clippy::cast_precision_loss,
clippy::cast_possible_truncation,
clippy::cast_sign_loss
)]
let index = M::floor(self.0.get() * f64::from(length)) as u32;
index
#[debug_ensures(ret < length.get(), "samples U(0, length - 1)")]
pub fn sample_coalescence_index(self, length: NonZeroU32) -> u32 {
// Sample U(0, length - 1) using a widening multiplication
// Note: Some slight bias is traded for only needing one u64 sample
// Note: Should optimise to a single 64 bit (high-only) multiplication
#[allow(clippy::cast_possible_truncation)]
{
(((u128::from(self.0) * u128::from(length.get())) >> 64) & u128::from(!0_u32)) as u32
}
}
}
8 changes: 4 additions & 4 deletions necsim/core/src/cogs/dispersal_sampler.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use necsim_core_bond::ClosedUnitF64;

use crate::{
cogs::{MathsCore, RngCore},
cogs::{Backup, MathsCore, Rng},
landscape::Location,
};

Expand All @@ -11,8 +11,8 @@ use super::Habitat;
#[allow(clippy::no_effect_underscore_binding)]
#[allow(clippy::module_name_repetitions)]
#[contract_trait]
pub trait DispersalSampler<M: MathsCore, H: Habitat<M>, G: RngCore<M>>:
crate::cogs::Backup + core::fmt::Debug
pub trait DispersalSampler<M: MathsCore, H: Habitat<M>, G: Rng<M>>:
Backup + core::fmt::Debug
{
#[must_use]
#[debug_requires(habitat.is_location_habitable(location), "location is habitable")]
Expand All @@ -29,7 +29,7 @@ pub trait DispersalSampler<M: MathsCore, H: Habitat<M>, G: RngCore<M>>:
#[allow(clippy::no_effect_underscore_binding)]
#[allow(clippy::module_name_repetitions)]
#[contract_trait]
pub trait SeparableDispersalSampler<M: MathsCore, H: Habitat<M>, G: RngCore<M>>:
pub trait SeparableDispersalSampler<M: MathsCore, H: Habitat<M>, G: Rng<M>>:
DispersalSampler<M, H, G>
{
#[must_use]
Expand Down
Loading