Skip to content
Closed
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
ca0f10f
Initial progress towards using rust-cuda with async
juntyr Jan 7, 2024
29b098e
Some progress
juntyr Jan 8, 2024
52f4ce2
Further async integration progress, rustcoalescence fails to compile
juntyr Jan 9, 2024
5cd2354
Some progress with dispatch coersion
juntyr Jan 10, 2024
65ed1c8
Small cleanup
juntyr Jan 10, 2024
b6dd445
Cleanup cuda algorithm coersion
juntyr Jan 11, 2024
775094a
Some more cleanup
juntyr Jan 11, 2024
352b4e7
Add back missing Backup for SeaHash and WyHash rngs
juntyr Jan 11, 2024
b24a056
Fix CUDA kernel extraneous pub exports
juntyr Jan 12, 2024
a632584
Minor improvement of the event buffer hack
juntyr Jan 12, 2024
8c2cc90
Remove unused control_flow_enum feature
juntyr Jan 12, 2024
7fbf443
Revert Copy for [Indexed]Location
juntyr Jan 12, 2024
be20a8d
Revert new clone
juntyr Jan 12, 2024
7d2633c
Update to rust-cuda with async kernel launch async return
juntyr Jan 13, 2024
10acf51
Update to latest rust-cuda
juntyr Jan 14, 2024
a809f61
Fix rustfmt
juntyr Jan 14, 2024
4e79d0c
Temporary fix to allow CUDA algorithm linking
juntyr Jan 16, 2024
4673ccf
Small cleanup, mostly of unused clippy allows
juntyr Jan 18, 2024
7a25f70
Small improvement to CUDA EventBuffer
juntyr Jan 18, 2024
387e4b6
Try trait-based kernel signature check
juntyr Jan 20, 2024
f5f490c
Update rust-toolchain
juntyr Jan 20, 2024
9db2ef5
Fix clippy lints
juntyr Jan 20, 2024
064d798
Try with const match instead
juntyr Jan 21, 2024
9705e28
Try with memcmp intrinsic
juntyr Jan 21, 2024
8306317
Try out experimental const-type-layout with compression
juntyr Feb 1, 2024
c650a4e
Try interning all const layout strings
juntyr Feb 2, 2024
ccb2b8a
Try check
juntyr Feb 5, 2024
d494d5c
Try check again
juntyr Feb 5, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
260 changes: 238 additions & 22 deletions Cargo.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions necsim/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ contracts = "0.6.3"
serde = { version = "1.0", default-features = false, features = ["derive"] }

[target.'cfg(target_os = "cuda")'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive"], optional = true }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive"], optional = true }

[target.'cfg(not(target_os = "cuda"))'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive", "host"], optional = true }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "host"], optional = true }
3 changes: 2 additions & 1 deletion necsim/core/src/landscape/extent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ use super::Location;

#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)]
#[derive(PartialEq, Eq, Clone, Debug, serde::Deserialize, serde::Serialize, TypeLayout)]
#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))]
#[cfg_attr(feature = "cuda", cuda(ignore))]
#[serde(rename = "Extent")]
#[serde(deny_unknown_fields)]
#[repr(C)]
pub struct LandscapeExtent {
x: u32,
y: u32,
Expand Down
23 changes: 9 additions & 14 deletions necsim/core/src/landscape/location.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,18 @@
use serde::{Deserialize, Serialize};

use crate::cogs::Backup;

#[allow(clippy::unsafe_derive_deserialize)]
#[allow(clippy::module_name_repetitions)]
#[derive(
Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Debug, Serialize, Deserialize, TypeLayout,
)]
#[serde(deny_unknown_fields)]
#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))]
#[repr(C)]
#[cfg_attr(feature = "cuda", cuda(ignore))]
#[serde(deny_unknown_fields)]
pub struct Location {
x: u32,
y: u32,
}

#[contract_trait]
impl Backup for Location {
unsafe fn backup_unchecked(&self) -> Self {
self.clone()
}
}

impl Location {
#[must_use]
pub const fn new(x: u32, y: u32) -> Self {
Expand All @@ -46,10 +39,13 @@ impl From<IndexedLocation> for Location {
#[derive(
Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Debug, Serialize, Deserialize, TypeLayout,
)]
#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)]
#[serde(from = "IndexedLocationRaw", into = "IndexedLocationRaw")]
#[allow(clippy::module_name_repetitions)]
#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))]
#[repr(C)]
#[cfg_attr(feature = "cuda", cuda(ignore))]
#[serde(from = "IndexedLocationRaw", into = "IndexedLocationRaw")]
pub struct IndexedLocation {
#[cfg_attr(feature = "cuda", cuda(embed))]
location: Location,
index: u32,
}
Expand All @@ -74,7 +70,6 @@ impl IndexedLocation {
#[derive(Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename = "IndexedLocation")]
#[repr(C)]
struct IndexedLocationRaw {
x: u32,
y: u32,
Expand Down
1 change: 0 additions & 1 deletion necsim/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#![no_std]
#![feature(const_type_name)]
#![feature(offset_of)]
#![feature(control_flow_enum)]
#![feature(min_specialization)]

#[doc(hidden)]
Expand Down
13 changes: 11 additions & 2 deletions necsim/core/src/lineage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::{
};

#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, TypeLayout)]
#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))]
#[repr(transparent)]
pub struct GlobalLineageReference(u64);

Expand Down Expand Up @@ -94,21 +95,29 @@ impl From<Option<GlobalLineageReference>> for LineageInteraction {
}
}

#[allow(clippy::unsafe_derive_deserialize)]
#[allow(clippy::unsafe_derive_deserialize, clippy::module_name_repetitions)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, TypeLayout)]
#[serde(deny_unknown_fields)]
#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))]
#[repr(C)]
#[cfg_attr(feature = "cuda", cuda(ignore))]
#[serde(deny_unknown_fields)]
pub struct Lineage {
#[cfg_attr(feature = "cuda", cuda(embed))]
#[cfg_attr(feature = "cuda", cuda(ignore))]
#[serde(alias = "id", alias = "ref")]
pub global_reference: GlobalLineageReference,
#[cfg_attr(feature = "cuda", cuda(ignore))]
#[serde(alias = "time")]
pub last_event_time: NonNegativeF64,
#[cfg_attr(feature = "cuda", cuda(embed))]
#[cfg_attr(feature = "cuda", cuda(ignore))]
#[serde(alias = "loc")]
pub indexed_location: IndexedLocation,
}

impl Lineage {
#[must_use]
#[allow(clippy::no_effect_underscore_binding)]
#[debug_ensures(
ret.indexed_location == old(indexed_location.clone()),
"stores the indexed_location"
Expand Down
2 changes: 1 addition & 1 deletion necsim/core/src/reporter/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ mod private {
impl Sealed for super::False {}
}

pub trait Boolean: private::Sealed {
pub trait Boolean: 'static + private::Sealed {
const VALUE: bool;
}

Expand Down
2 changes: 1 addition & 1 deletion necsim/core/src/simulation/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ impl<
}

#[derive(Debug, TypeLayout)]
#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))]
#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))]
#[cfg_attr(feature = "cuda", cuda(free = "M"))]
#[repr(C)]
pub struct Simulation<
Expand Down
4 changes: 2 additions & 2 deletions necsim/impls/cuda/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ contracts = "0.6.3"
serde = { version = "1.0", default-features = false, features = ["derive"] }

[target.'cfg(target_os = "cuda")'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive"] }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive"] }

[target.'cfg(not(target_os = "cuda"))'.dependencies]
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive", "host"] }
rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "4148959", features = ["derive", "host"] }
48 changes: 33 additions & 15 deletions necsim/impls/cuda/src/cogs/rng.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,34 @@ use core::marker::PhantomData;
use necsim_core::cogs::{MathsCore, PrimeableRng, RngCore};

use const_type_layout::TypeGraphLayout;
use rust_cuda::safety::StackOnly;
use rust_cuda::{
safety::{PortableBitSemantics, StackOnly},
utils::adapter::RustToCudaWithPortableBitCloneSemantics,
};

use serde::{Deserialize, Deserializer, Serialize, Serializer};

#[allow(clippy::module_name_repetitions)]
#[derive(Debug, rust_cuda::common::LendRustToCuda)]
#[derive(Debug, rust_cuda::lend::LendRustToCuda)]
#[cuda(free = "M", free = "R")]
pub struct CudaRng<M: MathsCore, R>
where
R: RngCore<M> + StackOnly + TypeGraphLayout,
R: RngCore<M> + StackOnly + PortableBitSemantics + TypeGraphLayout,
{
inner: R,
#[cuda(embed)]
inner: RustToCudaWithPortableBitCloneSemantics<R>,
marker: PhantomData<M>,
}

impl<M: MathsCore, R: RngCore<M> + StackOnly + TypeGraphLayout> Clone for CudaRng<M, R> {
impl<M: MathsCore, R: RngCore<M> + StackOnly + PortableBitSemantics + TypeGraphLayout + Copy> Copy
for CudaRng<M, R>
{
}

#[allow(clippy::expl_impl_clone_on_copy)]
impl<M: MathsCore, R: RngCore<M> + StackOnly + PortableBitSemantics + TypeGraphLayout> Clone
for CudaRng<M, R>
{
fn clone(&self) -> Self {
Self {
inner: self.inner.clone(),
Expand All @@ -27,25 +39,29 @@ impl<M: MathsCore, R: RngCore<M> + StackOnly + TypeGraphLayout> Clone for CudaRn
}
}

impl<M: MathsCore, R: RngCore<M> + StackOnly + TypeGraphLayout> From<R> for CudaRng<M, R> {
impl<M: MathsCore, R: RngCore<M> + StackOnly + PortableBitSemantics + TypeGraphLayout> From<R>
for CudaRng<M, R>
{
#[must_use]
#[inline]
fn from(rng: R) -> Self {
Self {
inner: rng,
inner: rng.into(),
marker: PhantomData::<M>,
}
}
}

impl<M: MathsCore, R: RngCore<M> + StackOnly + TypeGraphLayout> RngCore<M> for CudaRng<M, R> {
impl<M: MathsCore, R: RngCore<M> + StackOnly + PortableBitSemantics + TypeGraphLayout> RngCore<M>
for CudaRng<M, R>
{
type Seed = <R as RngCore<M>>::Seed;

#[must_use]
#[inline]
fn from_seed(seed: Self::Seed) -> Self {
Self {
inner: R::from_seed(seed),
inner: R::from_seed(seed).into(),
marker: PhantomData::<M>,
}
}
Expand All @@ -57,26 +73,28 @@ impl<M: MathsCore, R: RngCore<M> + StackOnly + TypeGraphLayout> RngCore<M> for C
}
}

impl<M: MathsCore, R: PrimeableRng<M> + StackOnly + TypeGraphLayout> PrimeableRng<M>
for CudaRng<M, R>
impl<M: MathsCore, R: PrimeableRng<M> + StackOnly + PortableBitSemantics + TypeGraphLayout>
PrimeableRng<M> for CudaRng<M, R>
{
#[inline]
fn prime_with(&mut self, location_index: u64, time_index: u64) {
self.inner.prime_with(location_index, time_index);
}
}

impl<M: MathsCore, R: RngCore<M> + StackOnly + TypeGraphLayout> Serialize for CudaRng<M, R> {
impl<M: MathsCore, R: RngCore<M> + StackOnly + PortableBitSemantics + TypeGraphLayout> Serialize
for CudaRng<M, R>
{
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.inner.serialize(serializer)
}
}

impl<'de, M: MathsCore, R: RngCore<M> + StackOnly + TypeGraphLayout> Deserialize<'de>
for CudaRng<M, R>
impl<'de, M: MathsCore, R: RngCore<M> + StackOnly + PortableBitSemantics + TypeGraphLayout>
Deserialize<'de> for CudaRng<M, R>
{
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let inner = R::deserialize(deserializer)?;
let inner = R::deserialize(deserializer)?.into();

Ok(Self {
inner,
Expand Down
Loading