diff --git a/Cargo.lock b/Cargo.lock index c7c4c3eef..c62829a52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,9 +38,9 @@ checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" [[package]] name = "anstream" -version = "0.6.5" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" dependencies = [ "anstyle", "anstyle-parse", @@ -52,9 +52,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" +checksum = "2faccea4cc4ab4a667ce676a30e8ec13922a692c99bb8f5b11f1502c72e04220" [[package]] name = "anstyle-parse" @@ -141,9 +141,9 @@ checksum = "23ce669cd6c8588f79e15cf450314f9638f967fc5770ff1c7c1deb0925ea7cfa" [[package]] name = "base64" -version = "0.21.5" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "bincode" @@ -207,9 +207,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.1" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" dependencies = [ "serde", ] @@ -241,9 +241,9 @@ checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "bytemuck" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +checksum = "ed2490600f404f2b94c167e31d3ed1d5f3c225a0f3b80230053b3e0b7b962bd9" dependencies = [ "bytemuck_derive", ] @@ -325,9 +325,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "9f13690e35a5e4ace198e7beea2895d29f3a9cc55015fcebe6336bd2010af9eb" dependencies = [ "num-traits", ] @@ -345,9 +345,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.13" +version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52bdc885e4cacc7f7c9eedc1ef6da641603180c783c41a15c264944deeaab642" +checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" dependencies = [ "clap_builder", "clap_derive", @@ -355,9 +355,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.12" +version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb7fb5e4e979aec3be7791562fcba452f94ad85e954da024396433e0e25a79e9" +checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" dependencies = [ "anstream", "anstyle", @@ -402,8 +402,7 @@ dependencies = [ [[package]] name = "const-type-layout" version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a8a1418a7c3cfdf6db57795ced0855a24249ddd38f1a3373d648cc3ef390d9" +source = "git+https://github.com/juntyr/const-type-layout?branch=compress#51836b1b05b7ac31e74f7c4b981ea7a0fb795be2" dependencies = [ "const-type-layout-derive", ] @@ -411,8 +410,7 @@ dependencies = [ [[package]] name = "const-type-layout-derive" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf506f23bd8d2a7b9758a9abe0f4cddb87d6fd9206c836e65a48ecbdec74d4e" +source = "git+https://github.com/juntyr/const-type-layout?branch=compress#51836b1b05b7ac31e74f7c4b981ea7a0fb795be2" dependencies = [ "proc-macro-error", "proc-macro2", @@ -622,6 +620,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1912868bad388722991f80323855d922e32b09ad00d76a13a98e465358765079" +[[package]] +name = "find_cuda_helper" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9f9e65c593dd01ac77daad909ea4ad17f0d6d1776193fc8ea766356177abdad" +dependencies = [ + "glob", +] + [[package]] name = "findshlibs" version = "0.10.2" @@ -656,11 +663,24 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" +[[package]] +name = "generator" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e" +dependencies = [ + "cc", + "libc", + "log", + "rustversion", + "windows", +] + [[package]] name = "getrandom" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "js-sys", @@ -789,15 +809,15 @@ dependencies = [ [[package]] name = "jpeg-decoder" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc0000e42512c92e31c2252315bda326620a4e034105e900c98ec492fa077b3e" +checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" [[package]] name = "js-sys" -version = "0.3.66" +version = "0.3.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" +checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" dependencies = [ "wasm-bindgen", ] @@ -816,9 +836,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.151" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "libloading" @@ -848,9 +868,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "log" @@ -858,6 +878,29 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "loom" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" +dependencies = [ + "cfg-if", + "generator", + "pin-utils", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "mbox" version = "0.6.0" @@ -892,9 +935,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] @@ -1154,6 +1197,16 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-traits" version = "0.2.17" @@ -1169,6 +1222,21 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "oneshot" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f6640c6bda7731b1fdbab747981a0f896dd1fedaf9f4a53fa237a04a84431f4" +dependencies = [ + "loom", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "pcg_rand" version = "0.13.0" @@ -1199,11 +1267,23 @@ dependencies = [ "ucd-trie", ] +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" -version = "0.3.28" +version = "0.3.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" [[package]] name = "planus" @@ -1256,9 +1336,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.76" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] @@ -1329,27 +1409,42 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.5", + "regex-syntax 0.8.2", ] [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + +[[package]] +name = "regex-automata" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.2", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.2" @@ -1363,7 +1458,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" dependencies = [ "base64", - "bitflags 2.4.1", + "bitflags 2.4.2", "serde", "serde_derive", ] @@ -1374,7 +1469,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "fallible-iterator", "fallible-streaming-iterator", "hashlink", @@ -1385,24 +1480,39 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=6b53e88#6b53e88ee0cf10e68c85f8e09f092d8f7f7b4683" +source = "git+https://github.com/juntyr/rust-cuda?rev=3ec81181#3ec8118114eabbb1b3048af248d0439e4d250a37" dependencies = [ "const-type-layout", "final", + "oneshot", + "regex", "rust-cuda-derive", - "rust-cuda-ptx-jit", + "rust-cuda-kernel", "rustacuda", "rustacuda_core", "rustacuda_derive", + "safer_owning_ref", ] [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=6b53e88#6b53e88ee0cf10e68c85f8e09f092d8f7f7b4683" +source = "git+https://github.com/juntyr/rust-cuda?rev=3ec81181#3ec8118114eabbb1b3048af248d0439e4d250a37" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "rust-cuda-kernel" +version = "0.1.0" +source = "git+https://github.com/juntyr/rust-cuda?rev=3ec81181#3ec8118114eabbb1b3048af248d0439e4d250a37" dependencies = [ "cargo_metadata", "colored", + "find_cuda_helper", "lazy_static", "proc-macro-error", "proc-macro2", @@ -1413,23 +1523,13 @@ dependencies = [ "serde_json", "strip-ansi-escapes", "syn 1.0.109", -] - -[[package]] -name = "rust-cuda-ptx-jit" -version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=6b53e88#6b53e88ee0cf10e68c85f8e09f092d8f7f7b4683" -dependencies = [ - "lazy_static", - "regex", - "rustacuda", + "thiserror", ] [[package]] name = "rustacuda" version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47208516ab5338b592d63560e90eaef405d0ec880347eaf7742d893b0a31e228" +source = "git+https://github.com/juntyr/RustaCUDA?rev=c6ea7cc#c6ea7ccf24b15c4edbd5576852a8dcdc7df272b0" dependencies = [ "bitflags 1.3.2", "cuda-driver-sys", @@ -1440,14 +1540,12 @@ dependencies = [ [[package]] name = "rustacuda_core" version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3858b08976dc2f860c5efbbb48cdcb0d4fafca92a6ac0898465af16c0dbe848" +source = "git+https://github.com/juntyr/RustaCUDA?rev=c6ea7cc#c6ea7ccf24b15c4edbd5576852a8dcdc7df272b0" [[package]] name = "rustacuda_derive" version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43ce8670a1a1d0fc2514a3b846dacdb65646f9bd494b6674cfacbb4ce430bd7e" +source = "git+https://github.com/juntyr/RustaCUDA?rev=c6ea7cc#c6ea7ccf24b15c4edbd5576852a8dcdc7df272b0" dependencies = [ "proc-macro2", "quote", @@ -1622,23 +1720,44 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.28" +version = "0.38.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" dependencies = [ - "bitflags 2.4.1", + "bitflags 2.4.2", "errno", "libc", "linux-raw-sys", "windows-sys 0.52.0", ] +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + [[package]] name = "ryu" version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +[[package]] +name = "safer_owning_ref" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af21b9de2df966f61c07b5b541c81c98225b86e48ababd43366a642654de30ef" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "seahash" version = "4.1.0" @@ -1674,18 +1793,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.195" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" +checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.195" +version = "1.0.196" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" +checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" dependencies = [ "proc-macro2", "quote", @@ -1705,9 +1824,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.111" +version = "1.0.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" +checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" dependencies = [ "itoa", "ryu", @@ -1733,6 +1852,15 @@ dependencies = [ "serde", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shell-words" version = "1.1.0" @@ -1741,9 +1869,9 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" [[package]] name = "shlex" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simdutf8" @@ -1762,9 +1890,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.2" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" [[package]] name = "stable_deref_trait" @@ -1835,11 +1963,21 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "tiff" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d172b0f4d3fba17ba89811858b9d3d97f928aece846475bbda076ca46736211" +checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e" dependencies = [ "flate2", "jpeg-decoder", @@ -1864,6 +2002,67 @@ dependencies = [ "serde", ] +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + [[package]] name = "tskit" version = "0.14.1" @@ -1909,6 +2108,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" @@ -1949,9 +2154,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" +checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1959,9 +2164,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" +checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" dependencies = [ "bumpalo", "log", @@ -1974,9 +2179,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" +checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1984,9 +2189,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" +checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" dependencies = [ "proc-macro2", "quote", @@ -1997,15 +2202,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.89" +version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" +checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" [[package]] name = "weezl" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9193164d4de03a926d909d3bc7c30543cecb35400c02114792c2cae20d5e2dbb" +checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" [[package]] name = "which" @@ -2041,6 +2246,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index de5b6c629..e2475c4d1 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -15,12 +15,12 @@ cuda = ["rust-cuda"] necsim-core-maths = { path = "maths" } necsim-core-bond = { path = "bond" } -const-type-layout = { version = "0.2.0", features = ["derive"] } +const-type-layout = { git = "https://github.com/juntyr/const-type-layout", branch = "compress", features = ["derive"] } contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "host"], optional = true } diff --git a/necsim/core/bond/Cargo.toml b/necsim/core/bond/Cargo.toml index c9c8651f6..d2729e917 100644 --- a/necsim/core/bond/Cargo.toml +++ b/necsim/core/bond/Cargo.toml @@ -13,5 +13,5 @@ default = [] [dependencies] necsim-core-maths = { path = "../maths" } -const-type-layout = { version = "0.2.0", features = ["derive"] } +const-type-layout = { git = "https://github.com/juntyr/const-type-layout", branch = "compress", features = ["derive"] } serde = { version = "1.0", default-features = false, features = ["derive"] } diff --git a/necsim/core/bond/src/closed_open_unit_f64.rs b/necsim/core/bond/src/closed_open_unit_f64.rs index 0d2155c13..e6424106a 100644 --- a/necsim/core/bond/src/closed_open_unit_f64.rs +++ b/necsim/core/bond/src/closed_open_unit_f64.rs @@ -88,6 +88,7 @@ impl ClosedOpenUnitF64 { } impl PartialEq for ClosedOpenUnitF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/closed_unit_f64.rs b/necsim/core/bond/src/closed_unit_f64.rs index 664c9f20e..d5c0bdc02 100644 --- a/necsim/core/bond/src/closed_unit_f64.rs +++ b/necsim/core/bond/src/closed_unit_f64.rs @@ -122,6 +122,7 @@ impl From for ClosedUnitF64 { } impl PartialEq for ClosedUnitF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/lib.rs b/necsim/core/bond/src/lib.rs index 67c6b9e81..ff3007150 100644 --- a/necsim/core/bond/src/lib.rs +++ b/necsim/core/bond/src/lib.rs @@ -4,7 +4,6 @@ #![feature(const_float_bits_conv)] #![feature(const_float_classify)] #![feature(const_type_name)] -#![feature(offset_of)] #[macro_use] extern crate const_type_layout; diff --git a/necsim/core/bond/src/non_negative_f64.rs b/necsim/core/bond/src/non_negative_f64.rs index cf60da503..89e3ea295 100644 --- a/necsim/core/bond/src/non_negative_f64.rs +++ b/necsim/core/bond/src/non_negative_f64.rs @@ -161,6 +161,7 @@ impl From for NonNegativeF64 { } impl PartialEq for NonNegativeF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/non_positive_f64.rs b/necsim/core/bond/src/non_positive_f64.rs index 62807c4bf..2e7cce0e8 100644 --- a/necsim/core/bond/src/non_positive_f64.rs +++ b/necsim/core/bond/src/non_positive_f64.rs @@ -94,6 +94,7 @@ impl NonPositiveF64 { } impl PartialEq for NonPositiveF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/open_closed_unit_f64.rs b/necsim/core/bond/src/open_closed_unit_f64.rs index a82fdfc37..b4b3441dc 100644 --- a/necsim/core/bond/src/open_closed_unit_f64.rs +++ b/necsim/core/bond/src/open_closed_unit_f64.rs @@ -94,6 +94,7 @@ impl OpenClosedUnitF64 { } impl PartialEq for OpenClosedUnitF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/bond/src/positive_f64.rs b/necsim/core/bond/src/positive_f64.rs index ff710fceb..65561dfb8 100644 --- a/necsim/core/bond/src/positive_f64.rs +++ b/necsim/core/bond/src/positive_f64.rs @@ -122,6 +122,7 @@ impl From for PositiveF64 { } impl PartialEq for PositiveF64 { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.0.eq(&other.0) } diff --git a/necsim/core/maths/src/lib.rs b/necsim/core/maths/src/lib.rs index 7102424da..3a73a5f33 100644 --- a/necsim/core/maths/src/lib.rs +++ b/necsim/core/maths/src/lib.rs @@ -1,5 +1,6 @@ #![deny(clippy::pedantic)] #![no_std] +#![allow(internal_features)] #![feature(core_intrinsics)] pub trait MathsCore: 'static + Clone + core::fmt::Debug { diff --git a/necsim/core/src/cogs/coalescence_sampler.rs b/necsim/core/src/cogs/coalescence_sampler.rs index 93af7bc92..f4d0aa4da 100644 --- a/necsim/core/src/cogs/coalescence_sampler.rs +++ b/necsim/core/src/cogs/coalescence_sampler.rs @@ -28,7 +28,6 @@ pub trait CoalescenceSampler, S: LineageStore> ) -> (IndexedLocation, LineageInteraction); } -#[allow(clippy::unsafe_derive_deserialize)] #[derive(Debug, PartialEq, Serialize, Deserialize, TypeLayout)] #[repr(transparent)] pub struct CoalescenceRngSample(ClosedOpenUnitF64); diff --git a/necsim/core/src/event.rs b/necsim/core/src/event.rs index 40108ae85..af42ac633 100644 --- a/necsim/core/src/event.rs +++ b/necsim/core/src/event.rs @@ -55,7 +55,6 @@ pub struct Dispersal { } #[allow(clippy::module_name_repetitions)] -#[allow(clippy::unsafe_derive_deserialize)] #[derive(Debug, Clone, Serialize, Deserialize, TypeLayout)] #[repr(C)] pub struct SpeciationEvent { diff --git a/necsim/core/src/landscape/extent.rs b/necsim/core/src/landscape/extent.rs index 1339938f3..7de809e69 100644 --- a/necsim/core/src/landscape/extent.rs +++ b/necsim/core/src/landscape/extent.rs @@ -2,11 +2,12 @@ use necsim_core_bond::OffByOneU32; use super::Location; -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive(PartialEq, Eq, Clone, Debug, serde::Deserialize, serde::Serialize, TypeLayout)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] +#[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(rename = "Extent")] #[serde(deny_unknown_fields)] -#[repr(C)] pub struct LandscapeExtent { x: u32, y: u32, diff --git a/necsim/core/src/landscape/location.rs b/necsim/core/src/landscape/location.rs index c3686e5c6..6bcc520a6 100644 --- a/necsim/core/src/landscape/location.rs +++ b/necsim/core/src/landscape/location.rs @@ -2,12 +2,14 @@ use serde::{Deserialize, Serialize}; use crate::cogs::Backup; -#[allow(clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive( Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Debug, Serialize, Deserialize, TypeLayout, )] -#[serde(deny_unknown_fields)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] +#[cfg_attr(feature = "cuda", cuda(ignore))] +#[serde(deny_unknown_fields)] pub struct Location { x: u32, y: u32, @@ -46,10 +48,13 @@ impl From for Location { #[derive( Eq, PartialEq, PartialOrd, Ord, Clone, Hash, Debug, Serialize, Deserialize, TypeLayout, )] -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] -#[serde(from = "IndexedLocationRaw", into = "IndexedLocationRaw")] +#[allow(clippy::module_name_repetitions)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] +#[cfg_attr(feature = "cuda", cuda(ignore))] +#[serde(from = "IndexedLocationRaw", into = "IndexedLocationRaw")] pub struct IndexedLocation { + #[cfg_attr(feature = "cuda", cuda(embed))] location: Location, index: u32, } @@ -74,7 +79,6 @@ impl IndexedLocation { #[derive(Serialize, Deserialize)] #[serde(deny_unknown_fields)] #[serde(rename = "IndexedLocation")] -#[repr(C)] struct IndexedLocationRaw { x: u32, y: u32, diff --git a/necsim/core/src/landscape/mod.rs b/necsim/core/src/landscape/mod.rs index 6c05344ca..41a00b87f 100644 --- a/necsim/core/src/landscape/mod.rs +++ b/necsim/core/src/landscape/mod.rs @@ -1,6 +1,6 @@ mod extent; mod location; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use extent::{LandscapeExtent, LocationIterator}; pub use location::{IndexedLocation, Location}; diff --git a/necsim/core/src/lib.rs b/necsim/core/src/lib.rs index 0888987ba..a8da66266 100644 --- a/necsim/core/src/lib.rs +++ b/necsim/core/src/lib.rs @@ -1,8 +1,6 @@ #![deny(clippy::pedantic)] #![no_std] #![feature(const_type_name)] -#![feature(offset_of)] -#![feature(control_flow_enum)] #![feature(min_specialization)] #[doc(hidden)] diff --git a/necsim/core/src/lineage.rs b/necsim/core/src/lineage.rs index 8e20ba0a5..398973fd0 100644 --- a/necsim/core/src/lineage.rs +++ b/necsim/core/src/lineage.rs @@ -16,6 +16,7 @@ use crate::{ }; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, TypeLayout)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(transparent)] pub struct GlobalLineageReference(u64); @@ -94,21 +95,29 @@ impl From> for LineageInteraction { } } -#[allow(clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, TypeLayout)] -#[serde(deny_unknown_fields)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] +#[cfg_attr(feature = "cuda", cuda(ignore))] +#[serde(deny_unknown_fields)] pub struct Lineage { + #[cfg_attr(feature = "cuda", cuda(embed))] + #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(alias = "id", alias = "ref")] pub global_reference: GlobalLineageReference, + #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(alias = "time")] pub last_event_time: NonNegativeF64, + #[cfg_attr(feature = "cuda", cuda(embed))] + #[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(alias = "loc")] pub indexed_location: IndexedLocation, } impl Lineage { #[must_use] + #[allow(clippy::no_effect_underscore_binding)] #[debug_ensures( ret.indexed_location == old(indexed_location.clone()), "stores the indexed_location" diff --git a/necsim/core/src/reporter/boolean.rs b/necsim/core/src/reporter/boolean.rs index 372b43db1..686330300 100644 --- a/necsim/core/src/reporter/boolean.rs +++ b/necsim/core/src/reporter/boolean.rs @@ -5,7 +5,7 @@ mod private { impl Sealed for super::False {} } -pub trait Boolean: private::Sealed { +pub trait Boolean: 'static + private::Sealed { const VALUE: bool; } diff --git a/necsim/core/src/reporter/mod.rs b/necsim/core/src/reporter/mod.rs index 821ae269f..a934f58b1 100644 --- a/necsim/core/src/reporter/mod.rs +++ b/necsim/core/src/reporter/mod.rs @@ -12,11 +12,11 @@ use used::MaybeUsed; pub mod boolean; pub mod used; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use combinator::ReporterCombinator; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use filter::FilteredReporter; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use null::NullReporter; pub trait Reporter: core::fmt::Debug { diff --git a/necsim/core/src/simulation/builder.rs b/necsim/core/src/simulation/builder.rs index c73c112cb..013c39434 100644 --- a/necsim/core/src/simulation/builder.rs +++ b/necsim/core/src/simulation/builder.rs @@ -86,7 +86,7 @@ impl< } #[derive(Debug, TypeLayout)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] #[repr(C)] pub struct Simulation< diff --git a/necsim/core/src/simulation/mod.rs b/necsim/core/src/simulation/mod.rs index c5356f1a2..29368e5a6 100644 --- a/necsim/core/src/simulation/mod.rs +++ b/necsim/core/src/simulation/mod.rs @@ -18,7 +18,7 @@ use crate::{ reporter::Reporter, }; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use builder::{Simulation, SimulationBuilder}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; @@ -51,7 +51,7 @@ impl< #[inline] pub fn simulate_incremental_early_stop< - F: FnMut(&Self, u64, PositiveF64) -> ControlFlow<(), ()>, + F: FnMut(&Self, u64, PositiveF64, &P) -> ControlFlow<(), ()>, P: Reporter, >( &mut self, @@ -69,13 +69,17 @@ impl< .map(|lineage| (lineage.event_time, lineage.tie_breaker)); let self_ptr = self as *const Self; + let reporter_ptr = reporter as *const P; let old_rng = unsafe { self.rng.backup_unchecked() }; let mut early_stop_flow = ControlFlow::Continue(()); let early_peek_stop = |next_event_time| { // Safety: We are only passing in an immutable reference - early_stop_flow = early_stop(unsafe { &*self_ptr }, steps, next_event_time); + early_stop_flow = + early_stop(unsafe { &*self_ptr }, steps, next_event_time, unsafe { + &*reporter_ptr + }); if early_stop_flow.is_break() { return ControlFlow::Break(()); @@ -131,6 +135,6 @@ impl< #[inline] pub fn simulate(mut self, reporter: &mut P) -> (NonNegativeF64, u64) { - self.simulate_incremental_early_stop(|_, _, _| ControlFlow::Continue(()), reporter) + self.simulate_incremental_early_stop(|_, _, _, _| ControlFlow::Continue(()), reporter) } } diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 677c9e908..22f5ad1ba 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -10,12 +10,12 @@ edition = "2021" [dependencies] necsim-core = { path = "../../core", features = ["cuda"] } -const-type-layout = { version = "0.2.0", features = ["derive"] } +const-type-layout = { git = "https://github.com/juntyr/const-type-layout", branch = "compress", features = ["derive"] } contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "host"] } diff --git a/necsim/impls/cuda/src/cogs/maths.rs b/necsim/impls/cuda/src/cogs/maths.rs index 6326ffa2a..f68e69135 100644 --- a/necsim/impls/cuda/src/cogs/maths.rs +++ b/necsim/impls/cuda/src/cogs/maths.rs @@ -36,11 +36,14 @@ impl MathsCore for NvptxMathsCore { } #[cfg(not(target_os = "cuda"))] { - extern "C" { - fn nvptx_maths_core_ln_on_cpu(_x: f64) -> !; - } + // extern "C" { + // fn nvptx_maths_core_ln_on_cpu(_x: f64) -> !; + // } + + // unsafe { nvptx_maths_core_ln_on_cpu(x) } - unsafe { nvptx_maths_core_ln_on_cpu(x) } + // TODO: disallow using NvptxMathsCore::ln on CPU + unsafe { core::intrinsics::logf64(x) } } } diff --git a/necsim/impls/cuda/src/cogs/rng.rs b/necsim/impls/cuda/src/cogs/rng.rs index bc34a8f0f..8237ed1cf 100644 --- a/necsim/impls/cuda/src/cogs/rng.rs +++ b/necsim/impls/cuda/src/cogs/rng.rs @@ -3,49 +3,48 @@ use core::marker::PhantomData; use necsim_core::cogs::{MathsCore, PrimeableRng, RngCore}; use const_type_layout::TypeGraphLayout; -use rust_cuda::safety::StackOnly; +use rust_cuda::{ + safety::{PortableBitSemantics, StackOnly}, + utils::adapter::RustToCudaWithPortableBitCloneSemantics, +}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[allow(clippy::module_name_repetitions)] -#[derive(Debug, rust_cuda::common::LendRustToCuda)] +#[derive(Debug, Clone, rust_cuda::lend::LendRustToCuda)] #[cuda(free = "M", free = "R")] pub struct CudaRng where - R: RngCore + StackOnly + TypeGraphLayout, + R: RngCore + StackOnly + PortableBitSemantics + TypeGraphLayout, { - inner: R, + #[cuda(embed)] + inner: RustToCudaWithPortableBitCloneSemantics, marker: PhantomData, } -impl + StackOnly + TypeGraphLayout> Clone for CudaRng { - fn clone(&self) -> Self { - Self { - inner: self.inner.clone(), - marker: PhantomData::, - } - } -} - -impl + StackOnly + TypeGraphLayout> From for CudaRng { +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> From + for CudaRng +{ #[must_use] #[inline] fn from(rng: R) -> Self { Self { - inner: rng, + inner: rng.into(), marker: PhantomData::, } } } -impl + StackOnly + TypeGraphLayout> RngCore for CudaRng { +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> RngCore + for CudaRng +{ type Seed = >::Seed; #[must_use] #[inline] fn from_seed(seed: Self::Seed) -> Self { Self { - inner: R::from_seed(seed), + inner: R::from_seed(seed).into(), marker: PhantomData::, } } @@ -57,8 +56,8 @@ impl + StackOnly + TypeGraphLayout> RngCore for C } } -impl + StackOnly + TypeGraphLayout> PrimeableRng - for CudaRng +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> + PrimeableRng for CudaRng { #[inline] fn prime_with(&mut self, location_index: u64, time_index: u64) { @@ -66,17 +65,19 @@ impl + StackOnly + TypeGraphLayout> PrimeableRn } } -impl + StackOnly + TypeGraphLayout> Serialize for CudaRng { +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> Serialize + for CudaRng +{ fn serialize(&self, serializer: S) -> Result { self.inner.serialize(serializer) } } -impl<'de, M: MathsCore, R: RngCore + StackOnly + TypeGraphLayout> Deserialize<'de> - for CudaRng +impl<'de, M: MathsCore, R: RngCore + StackOnly + PortableBitSemantics + TypeGraphLayout> + Deserialize<'de> for CudaRng { fn deserialize>(deserializer: D) -> Result { - let inner = R::deserialize(deserializer)?; + let inner = R::deserialize(deserializer)?.into(); Ok(Self { inner, diff --git a/necsim/impls/cuda/src/event_buffer.rs b/necsim/impls/cuda/src/event_buffer.rs index 6fb9f314f..1a08d85ca 100644 --- a/necsim/impls/cuda/src/event_buffer.rs +++ b/necsim/impls/cuda/src/event_buffer.rs @@ -1,13 +1,22 @@ -use core::fmt; +use core::{ + fmt, + ops::{Deref, DerefMut}, +}; +use const_type_layout::TypeGraphLayout; #[cfg(not(target_os = "cuda"))] -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ error::CudaResult, function::{BlockSize, GridSize}, }; -use rust_cuda::utils::{ - aliasing::SplitSliceOverCudaThreadsDynamicStride, exchange::buffer::CudaExchangeBuffer, +use rust_cuda::{ + lend::RustToCudaProxy, + safety::{PortableBitSemantics, SafeMutableAliasing, StackOnly}, + utils::{ + aliasing::SplitSliceOverCudaThreadsDynamicStride, + exchange::buffer::{CudaExchangeBuffer, CudaExchangeItem}, + }, }; use necsim_core::{ @@ -24,11 +33,16 @@ use necsim_core::impl_report; use super::utils::MaybeSome; #[allow(clippy::module_name_repetitions, clippy::type_complexity)] -#[derive(rust_cuda::common::LendRustToCuda)] +#[derive(rust_cuda::lend::LendRustToCuda)] #[cuda(free = "ReportSpeciation", free = "ReportDispersal")] pub struct EventBuffer { + #[cfg(not(target_os = "cuda"))] #[cuda(embed)] event_mask: SplitSliceOverCudaThreadsDynamicStride>, + #[cfg(target_os = "cuda")] + #[cuda(embed = "SplitSliceOverCudaThreadsDynamicStride>")] + event_mask: CudaExchangeSlice>, + #[cfg(not(target_os = "cuda"))] #[cuda(embed)] event_buffer: SplitSliceOverCudaThreadsDynamicStride< CudaExchangeBuffer< @@ -37,14 +51,49 @@ pub struct EventBuffer { true, >, >, - max_events: usize, - event_counter: usize, + #[cfg(target_os = "cuda")] + #[cuda(embed = "SplitSliceOverCudaThreadsDynamicStride< + CudaExchangeBuffer< + MaybeSome< as EventType>::Event>, + false, + true, + >, +>")] + event_buffer: CudaExchangeSlice< + CudaExchangeItem< + MaybeSome< as EventType>::Event>, + false, + true, + >, + >, +} + +// Safety: +// - no mutable aliasing occurs since all parts implement SafeMutableAliasing +// - dropping does not trigger (de)alloc since EventBuffer doesn't impl Drop and +// all parts implement SafeMutableAliasing +// - EventBuffer has no shallow mutable state +unsafe impl SafeMutableAliasing + for EventBuffer +where + SplitSliceOverCudaThreadsDynamicStride>: + SafeMutableAliasing, + SplitSliceOverCudaThreadsDynamicStride< + CudaExchangeBuffer< + MaybeSome< as EventType>::Event>, + false, + true, + >, + >: SafeMutableAliasing, +{ } pub trait EventType { type Event: 'static - + rust_cuda::const_type_layout::TypeGraphLayout + + Sync + + rust_cuda::deps::const_type_layout::TypeGraphLayout + rust_cuda::safety::StackOnly + + rust_cuda::safety::PortableBitSemantics + Into + Into + Clone; @@ -76,10 +125,7 @@ impl fmt::Debug for EventBuffer { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - fmt.debug_struct("EventBuffer") - .field("max_events", &self.max_events) - .field("event_counter", &self.event_counter) - .finish_non_exhaustive() + fmt.debug_struct("EventBuffer").finish_non_exhaustive() } } @@ -120,8 +166,6 @@ impl CudaExchangeBuffer::from_vec(event_buffer)?, max_events, ), - max_events, - event_counter: 0_usize, }) } @@ -146,9 +190,31 @@ impl mask.write(false); } } +} + +#[cfg(target_os = "cuda")] +impl + EventBuffer +{ + #[must_use] + pub fn can_buffer_next_event(&self) -> bool { + !self.event_buffer.is_empty() + } - pub fn max_events_per_individual(&self) -> usize { - self.max_events + fn report_event( + &mut self, + event: impl Into< as EventType>::Event>, + ) { + if let ([mask, mask_rest @ ..], [buffer, buffer_rest @ ..]) = ( + core::mem::take(&mut *self.event_mask), + core::mem::take(&mut *self.event_buffer), + ) { + mask.write(true); + buffer.write(MaybeSome::Some(event.into())); + + *self.event_mask = mask_rest; + *self.event_buffer = buffer_rest; + } } } @@ -167,19 +233,11 @@ impl Reporter impl Reporter for EventBuffer { impl_report!( #[debug_requires( - self.event_counter < self.max_events, + self.can_buffer_next_event(), "does not report extraneous dispersal events" )] dispersal(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(self.event_counter) { - mask.write(true); - - unsafe { - self.event_buffer.get_unchecked_mut(self.event_counter) - }.write(MaybeSome::Some(event.clone().into())); - } - - self.event_counter += 1; + self.report_event(event.clone()); } ); } @@ -188,19 +246,14 @@ impl Reporter for EventBuffer { impl Reporter for EventBuffer { impl_report!( #[debug_requires( - self.event_counter == 0, + self.can_buffer_next_event(), "does not report extraneous speciation events" )] speciation(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(0) { - mask.write(true); + self.report_event(event.clone()); - unsafe { - self.event_buffer.get_unchecked_mut(0) - }.write(MaybeSome::Some(event.clone())); - } - - self.event_counter = self.max_events; + *self.event_mask = &mut []; + *self.event_buffer = &mut []; } ); } @@ -209,37 +262,75 @@ impl Reporter for EventBuffer { impl Reporter for EventBuffer { impl_report!( #[debug_requires( - self.event_counter < self.max_events, + self.can_buffer_next_event(), "does not report extraneous speciation events" )] speciation(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(self.event_counter) { - mask.write(true); - - unsafe { - self.event_buffer.get_unchecked_mut(self.event_counter) - }.write(MaybeSome::Some(event.clone().into())); - } + self.report_event(event.clone()); - self.event_counter = self.max_events; + *self.event_mask = &mut []; + *self.event_buffer = &mut []; } ); impl_report!( #[debug_requires( - self.event_counter < self.max_events, + self.can_buffer_next_event(), "does not report extraneous dispersal events" )] dispersal(&mut self, event: Used) { - if let Some(mask) = self.event_mask.get_mut(self.event_counter) { - mask.write(true); - - unsafe { - self.event_buffer.get_unchecked_mut(self.event_counter) - }.write(MaybeSome::Some(event.clone().into())); - } - - self.event_counter += 1; + self.report_event(event.clone()); } ); } + +// TODO: find a prettier workaround +struct CudaExchangeSlice( + &'static mut [T], +); + +impl Deref + for CudaExchangeSlice +{ + type Target = &'static mut [T]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut + for CudaExchangeSlice +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl< + T: 'static + StackOnly + PortableBitSemantics + TypeGraphLayout, + const M2D: bool, + const M2H: bool, + > RustToCudaProxy>> + for SplitSliceOverCudaThreadsDynamicStride> +{ + fn from_ref(_val: &CudaExchangeSlice>) -> &Self { + unsafe { unreachable_cuda_event_buffer_hack() } + } + + fn from_mut(_val: &mut CudaExchangeSlice>) -> &mut Self { + unsafe { unreachable_cuda_event_buffer_hack() } + } + + fn into(mut self) -> CudaExchangeSlice> { + let slice: &mut [CudaExchangeItem] = &mut self; + + let slice = unsafe { core::slice::from_raw_parts_mut(slice.as_mut_ptr(), slice.len()) }; + + CudaExchangeSlice(slice) + } +} + +extern "C" { + fn unreachable_cuda_event_buffer_hack() -> !; +} diff --git a/necsim/impls/cuda/src/lib.rs b/necsim/impls/cuda/src/lib.rs index e7d657c28..abb2b2fcd 100644 --- a/necsim/impls/cuda/src/lib.rs +++ b/necsim/impls/cuda/src/lib.rs @@ -1,13 +1,13 @@ #![deny(clippy::pedantic)] #![no_std] -#![feature(core_intrinsics)] #![feature(const_type_name)] -#![feature(offset_of)] #![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] #![cfg_attr(target_os = "cuda", feature(asm_const))] #![cfg_attr(target_os = "cuda", feature(const_float_bits_conv))] #![allow(incomplete_features)] #![feature(specialization)] +#![allow(internal_features)] +#![feature(core_intrinsics)] extern crate alloc; diff --git a/necsim/impls/cuda/src/utils.rs b/necsim/impls/cuda/src/utils.rs index 8ff8033a5..39c1c8285 100644 --- a/necsim/impls/cuda/src/utils.rs +++ b/necsim/impls/cuda/src/utils.rs @@ -3,7 +3,7 @@ use core::mem::MaybeUninit; use rust_cuda::safety::StackOnly; #[derive(TypeLayout)] -#[repr(C)] +#[repr(transparent)] #[doc(hidden)] pub struct MaybeSome(MaybeUninit); diff --git a/necsim/impls/cuda/src/value_buffer.rs b/necsim/impls/cuda/src/value_buffer.rs index 04d844f6f..b1dc71f1a 100644 --- a/necsim/impls/cuda/src/value_buffer.rs +++ b/necsim/impls/cuda/src/value_buffer.rs @@ -3,7 +3,7 @@ use core::iter::Iterator; use const_type_layout::TypeGraphLayout; use rust_cuda::{ - safety::StackOnly, + safety::{PortableBitSemantics, SafeMutableAliasing, StackOnly}, utils::{ aliasing::SplitSliceOverCudaThreadsConstStride, exchange::buffer::{CudaExchangeBuffer, CudaExchangeItem}, @@ -11,19 +11,19 @@ use rust_cuda::{ }; #[cfg(not(target_os = "cuda"))] -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ error::CudaResult, function::{BlockSize, GridSize}, }; use super::utils::MaybeSome; -#[derive(rust_cuda::common::LendRustToCuda)] +#[derive(rust_cuda::lend::LendRustToCuda)] #[cuda(free = "T")] #[allow(clippy::module_name_repetitions)] pub struct ValueBuffer where - T: StackOnly + TypeGraphLayout, + T: StackOnly + PortableBitSemantics + TypeGraphLayout, { #[cuda(embed)] mask: SplitSliceOverCudaThreadsConstStride, 1_usize>, @@ -32,8 +32,25 @@ where SplitSliceOverCudaThreadsConstStride, M2D, M2H>, 1_usize>, } +// Safety: +// - no mutable aliasing occurs since all parts implement SafeMutableAliasing +// - dropping does not trigger (de)alloc since ValueBuffer doesn't impl Drop and +// all parts implement SafeMutableAliasing +// - ValueBuffer has no shallow mutable state +unsafe impl + SafeMutableAliasing for ValueBuffer +where + SplitSliceOverCudaThreadsConstStride, 1_usize>: + SafeMutableAliasing, + SplitSliceOverCudaThreadsConstStride, M2D, M2H>, 1_usize>: + SafeMutableAliasing, +{ +} + #[cfg(not(target_os = "cuda"))] -impl ValueBuffer { +impl + ValueBuffer +{ /// # Errors /// Returns a `rustacuda::errors::CudaError` iff an error occurs inside CUDA pub fn new(block_size: &BlockSize, grid_size: &GridSize) -> CudaResult { @@ -67,7 +84,9 @@ impl ValueBuff } #[cfg(not(target_os = "cuda"))] -impl ValueBuffer { +impl + ValueBuffer +{ pub fn iter(&self) -> impl Iterator> { self.mask .iter() @@ -90,7 +109,7 @@ impl ValueBuffer } #[cfg(target_os = "cuda")] -impl ValueBuffer { +impl ValueBuffer { pub fn with_value_for_core) -> Option>(&mut self, inner: F) { let value = if self .mask @@ -117,7 +136,9 @@ impl ValueBuffer { } #[cfg(target_os = "cuda")] -impl ValueBuffer { +impl + ValueBuffer +{ pub fn take_value_for_core(&mut self) -> Option { #[allow(clippy::option_if_let_else)] if let Some(mask) = self.mask.get_mut(0) { @@ -135,7 +156,9 @@ impl ValueBuffer } #[cfg(target_os = "cuda")] -impl ValueBuffer { +impl + ValueBuffer +{ pub fn put_value_for_core(&mut self, value: Option) { if let Some(mask) = self.mask.get_mut(0) { mask.write(value.is_some()); @@ -148,13 +171,15 @@ impl ValueBuffer } #[cfg(not(target_os = "cuda"))] -pub struct ValueRefMut<'v, T: StackOnly, const M2D: bool> { +pub struct ValueRefMut<'v, T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool> { mask: &'v mut CudaExchangeItem, value: &'v mut CudaExchangeItem, M2D, true>, } #[cfg(not(target_os = "cuda"))] -impl<'v, T: StackOnly, const M2D: bool> ValueRefMut<'v, T, M2D> { +impl<'v, T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool> + ValueRefMut<'v, T, M2D> +{ pub fn take(&mut self) -> Option { if *self.mask.read() { self.mask.write(false); @@ -176,7 +201,7 @@ impl<'v, T: StackOnly, const M2D: bool> ValueRefMut<'v, T, M2D> { } #[cfg(not(target_os = "cuda"))] -impl<'v, T: StackOnly> ValueRefMut<'v, T, true> { +impl<'v, T: StackOnly + PortableBitSemantics + TypeGraphLayout> ValueRefMut<'v, T, true> { #[must_use] pub fn as_mut(&mut self) -> Option<&mut T> { if *self.mask.read() { diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index d1cad4374..f2265ab8e 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -17,7 +17,7 @@ necsim-core-maths = { path = "../../core/maths" } necsim-core-bond = { path = "../../core/bond" } necsim-partitioning-core = { path = "../../partitioning/core" } -const-type-layout = { version = "0.2.0", features = ["derive"] } +const-type-layout = { git = "https://github.com/juntyr/const-type-layout", branch = "compress", features = ["derive"] } contracts = "0.6.3" libm = "0.2" hashbrown = "0.13" @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "final", "host"], optional = true } diff --git a/necsim/impls/no-std/src/alias/mod.rs b/necsim/impls/no-std/src/alias/mod.rs index 2eec4ef53..c6a22d8ea 100644 --- a/necsim/impls/no-std/src/alias/mod.rs +++ b/necsim/impls/no-std/src/alias/mod.rs @@ -1,3 +1,5 @@ +use core::cmp::Ordering; + use alloc::vec::Vec; use necsim_core::cogs::{MathsCore, RngCore}; @@ -62,11 +64,10 @@ impl AliasMethodSampler { }; Ks[underfull_index] = Es[overfull_index]; - #[allow(clippy::comparison_chain)] - if Us[overfull_index] < 1.0_f64 { - underfull_indices.push(overfull_index); - } else if Us[overfull_index] > 1.0_f64 { - overfull_indices.push(overfull_index); + match Us[overfull_index].cmp(&NonNegativeF64::one()) { + Ordering::Less => underfull_indices.push(overfull_index), + Ordering::Equal => (), + Ordering::Greater => overfull_indices.push(overfull_index), } } diff --git a/necsim/impls/no-std/src/array2d.rs b/necsim/impls/no-std/src/array2d.rs index 14fe7fc83..dd4552ebe 100644 --- a/necsim/impls/no-std/src/array2d.rs +++ b/necsim/impls/no-std/src/array2d.rs @@ -10,12 +10,12 @@ use core::ops::{Index, IndexMut}; /// A fixed sized two-dimensional array. #[derive(Clone, Eq, PartialEq)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr( feature = "cuda", cuda( free = "T", - bound = "T: rust_cuda::safety::StackOnly + const_type_layout::TypeGraphLayout" + bound = "T: rust_cuda::safety::PortableBitSemantics + const_type_layout::TypeGraphLayout" ) )] pub struct Array2D { diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/indexed/tests.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/indexed/tests.rs index 00e8d33a5..04d554d8e 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/indexed/tests.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/indexed/tests.rs @@ -1037,7 +1037,7 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodIndexedSampler { exponents: [], total_weight: 0.0 }" + "DynamicAliasMethodIndexedSampler { exponents: [], total_weight: 0.0, .. }" ); for i in (1..=6_u8).rev() { @@ -1046,7 +1046,7 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodIndexedSampler { exponents: [2, 1, 0], total_weight: 21.0 }" + "DynamicAliasMethodIndexedSampler { exponents: [2, 1, 0], total_weight: 21.0, .. }" ); let mut sampler_clone = unsafe { sampler.backup_unchecked() }; @@ -1062,11 +1062,11 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodIndexedSampler { exponents: [2, 1, 0], total_weight: 18.0 }" + "DynamicAliasMethodIndexedSampler { exponents: [2, 1, 0], total_weight: 18.0, .. }" ); assert_eq!( &alloc::format!("{sampler_clone:?}"), - "DynamicAliasMethodIndexedSampler { exponents: [2, 1], total_weight: 20.0 }" + "DynamicAliasMethodIndexedSampler { exponents: [2, 1], total_weight: 20.0, .. }" ); } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/stack/tests.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/stack/tests.rs index 461fe6904..505bf295c 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/stack/tests.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/sampler/stack/tests.rs @@ -535,7 +535,7 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodStackSampler { exponents: [], total_weight: 0.0 }" + "DynamicAliasMethodStackSampler { exponents: [], total_weight: 0.0, .. }" ); for i in (1..=6_u8).rev() { @@ -544,7 +544,7 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodStackSampler { exponents: [2, 1, 0], total_weight: 21.0 }" + "DynamicAliasMethodStackSampler { exponents: [2, 1, 0], total_weight: 21.0, .. }" ); let mut sampler_clone = unsafe { sampler.backup_unchecked() }; @@ -560,11 +560,11 @@ fn debug_display_sampler() { assert_eq!( &alloc::format!("{sampler:?}"), - "DynamicAliasMethodStackSampler { exponents: [2, 1, 0], total_weight: 18.0 }" + "DynamicAliasMethodStackSampler { exponents: [2, 1, 0], total_weight: 18.0, .. }" ); assert_eq!( &alloc::format!("{sampler_clone:?}"), - "DynamicAliasMethodStackSampler { exponents: [2, 1], total_weight: 20.0 }" + "DynamicAliasMethodStackSampler { exponents: [2, 1], total_weight: 20.0, .. }" ); } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs index b69bc20c0..598721483 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/const.rs @@ -8,7 +8,7 @@ use super::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct ConstEventTimeSampler { event_time: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs index 8b6bdc9c4..9e7b1207e 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/exp.rs @@ -11,7 +11,7 @@ const INV_PHI: u64 = 0x9e37_79b9_7f4a_7c15_u64; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct ExpEventTimeSampler { delta_t: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs index 5685d57fe..c6ac3227d 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/fixed.rs @@ -8,7 +8,7 @@ use super::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug, Default)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct FixedEventTimeSampler([u8; 0]); #[contract_trait] diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs index be31a8a60..476685396 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/geometric.rs @@ -8,7 +8,7 @@ use super::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct GeometricEventTimeSampler { delta_t: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs index fcd1355ab..db7a42683 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/event_time_sampler/poisson.rs @@ -11,7 +11,7 @@ const INV_PHI: u64 = 0x9e37_79b9_7f4a_7c15_u64; #[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct PoissonEventTimeSampler { delta_t: PositiveF64, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs index 1aafbee33..eb5243a48 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/independent/mod.rs @@ -25,7 +25,7 @@ use event_time_sampler::EventTimeSampler; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct IndependentActiveLineageSampler< M: MathsCore, @@ -37,10 +37,7 @@ pub struct IndependentActiveLineageSampler< N: SpeciationProbability, J: EventTimeSampler, > { - #[cfg_attr( - feature = "cuda", - cuda(embed = "Option>") - )] + #[cfg_attr(feature = "cuda", cuda(embed))] active_lineage: Option, min_event_time: NonNegativeF64, last_event_time: NonNegativeF64, diff --git a/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs b/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs index 0e9a16f6a..f15e3f672 100644 --- a/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs +++ b/necsim/impls/no-std/src/cogs/coalescence_sampler/independent.rs @@ -15,7 +15,7 @@ use crate::cogs::lineage_store::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H"))] pub struct IndependentCoalescenceSampler>(PhantomData<(M, H)>); diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs index 7b71b472b..3be2d4f16 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/almost_infinite_normal.rs @@ -10,7 +10,7 @@ use crate::cogs::habitat::almost_infinite::AlmostInfiniteHabitat; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "G"))] pub struct AlmostInfiniteNormalDispersalSampler> { sigma: NonNegativeF64, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs index 5485cce7d..68e561bdf 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/in_memory/packed_alias/mod.rs @@ -40,7 +40,7 @@ impl From for Range { } #[allow(clippy::module_name_repetitions)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H", free = "G"))] pub struct InMemoryPackedAliasDispersalSampler, G: RngCore> { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs index 23fbe2a0e..d2018d513 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/non_spatial.rs @@ -10,7 +10,7 @@ use crate::cogs::habitat::non_spatial::NonSpatialHabitat; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "G"))] pub struct NonSpatialDispersalSampler> { marker: PhantomData<(M, G)>, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs index 9664e50bb..744182261 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/spatially_implicit.rs @@ -11,7 +11,7 @@ use crate::cogs::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct SpatiallyImplicitDispersalSampler> { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs index 996dc2684..1ad63b0b7 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/mod.rs @@ -28,7 +28,7 @@ pub trait AntiTrespassingDispersalSampler, G: RngCor #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct TrespassingDispersalSampler< M: MathsCore, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs index 22e3216d2..26bef8225 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/trespassing/uniform.rs @@ -9,7 +9,7 @@ use super::AntiTrespassingDispersalSampler; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H", free = "G"))] pub struct UniformAntiTrespassingDispersalSampler< M: MathsCore, diff --git a/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs b/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs index 5f38306db..6f3075bf4 100644 --- a/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs +++ b/necsim/impls/no-std/src/cogs/dispersal_sampler/wrapping_noise.rs @@ -14,7 +14,7 @@ use crate::cogs::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct WrappingNoiseApproximateNormalDispersalSampler> { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/emigration_exit/never.rs b/necsim/impls/no-std/src/cogs/emigration_exit/never.rs index 74a68fdda..62e5320a5 100644 --- a/necsim/impls/no-std/src/cogs/emigration_exit/never.rs +++ b/necsim/impls/no-std/src/cogs/emigration_exit/never.rs @@ -8,7 +8,7 @@ use necsim_core_bond::{NonNegativeF64, PositiveF64}; #[allow(clippy::module_name_repetitions)] #[derive(Debug, Default)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct NeverEmigrationExit([u8; 0]); #[contract_trait] diff --git a/necsim/impls/no-std/src/cogs/event_sampler/independent.rs b/necsim/impls/no-std/src/cogs/event_sampler/independent.rs index baeb01622..17ac313d0 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/independent.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/independent.rs @@ -21,7 +21,7 @@ use super::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr( feature = "cuda", cuda( @@ -43,12 +43,7 @@ pub struct IndependentEventSampler< T: TurnoverRate, N: SpeciationProbability, > { - #[cfg_attr( - feature = "cuda", - cuda( - embed = "Option>" - ) - )] + #[cfg_attr(feature = "cuda", cuda(embed))] min_spec_sample: Option, marker: PhantomData<(M, H, G, X, D, T, N)>, } diff --git a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs index 8b5c1cccd..6804e70ea 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs @@ -27,10 +27,12 @@ pub trait MinSpeciationTrackingEventSampler< } #[derive(Clone, Debug, TypeLayout)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] pub struct SpeciationSample { speciation_sample: ClosedOpenUnitF64, sample_time: PositiveF64, + #[cfg_attr(feature = "cuda", cuda(embed))] sample_location: IndexedLocation, } diff --git a/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs b/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs index 914672dbc..8eafa0c6d 100644 --- a/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs +++ b/necsim/impls/no-std/src/cogs/habitat/almost_infinite.rs @@ -12,7 +12,7 @@ const ALMOST_INFINITE_EXTENT: LandscapeExtent = LandscapeExtent::new(0, 0, OffByOneU32::max(), OffByOneU32::max()); #[allow(clippy::module_name_repetitions)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct AlmostInfiniteHabitat { marker: PhantomData, diff --git a/necsim/impls/no-std/src/cogs/habitat/in_memory.rs b/necsim/impls/no-std/src/cogs/habitat/in_memory.rs index 838ee499f..fb47d7a6d 100644 --- a/necsim/impls/no-std/src/cogs/habitat/in_memory.rs +++ b/necsim/impls/no-std/src/cogs/habitat/in_memory.rs @@ -14,13 +14,14 @@ use crate::array2d::Array2D; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct InMemoryHabitat { #[cfg_attr(feature = "cuda", cuda(embed))] habitat: Final>, #[cfg_attr(feature = "cuda", cuda(embed))] u64_injection: Final>, + #[cfg_attr(feature = "cuda", cuda(embed))] extent: LandscapeExtent, marker: PhantomData, } diff --git a/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs b/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs index bbba06e66..6d83fe75b 100644 --- a/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs +++ b/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs @@ -11,9 +11,10 @@ use necsim_core_bond::{OffByOneU32, OffByOneU64}; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct NonSpatialHabitat { + #[cfg_attr(feature = "cuda", cuda(embed))] extent: LandscapeExtent, deme: NonZeroU32, marker: PhantomData, diff --git a/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs b/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs index 5f78012e9..02c822977 100644 --- a/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs +++ b/necsim/impls/no-std/src/cogs/habitat/spatially_implicit.rs @@ -13,7 +13,7 @@ const SPATIALLY_IMPLICIT_EXTENT: LandscapeExtent = #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct SpatiallyImplicitHabitat { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs b/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs index e6482e557..892c02f11 100644 --- a/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs +++ b/necsim/impls/no-std/src/cogs/habitat/wrapping_noise/mod.rs @@ -18,7 +18,7 @@ use crate::cogs::{ }; #[allow(clippy::module_name_repetitions)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct WrappingNoiseHabitat { #[cfg_attr(feature = "cuda", cuda(embed))] diff --git a/necsim/impls/no-std/src/cogs/immigration_entry/never.rs b/necsim/impls/no-std/src/cogs/immigration_entry/never.rs index fc148b60e..9c4df3ac8 100644 --- a/necsim/impls/no-std/src/cogs/immigration_entry/never.rs +++ b/necsim/impls/no-std/src/cogs/immigration_entry/never.rs @@ -5,7 +5,7 @@ use necsim_core::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug, Default)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct NeverImmigrationEntry([u8; 0]); #[contract_trait] diff --git a/necsim/impls/no-std/src/cogs/lineage_store/independent.rs b/necsim/impls/no-std/src/cogs/lineage_store/independent.rs index d20b0dbd1..606be853e 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/independent.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/independent.rs @@ -7,7 +7,7 @@ use necsim_core::{ #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M", free = "H"))] pub struct IndependentLineageStore> { marker: PhantomData<(M, H)>, diff --git a/necsim/impls/no-std/src/cogs/maths/intrinsics.rs b/necsim/impls/no-std/src/cogs/maths/intrinsics.rs index 7375c9fc8..46801aac8 100644 --- a/necsim/impls/no-std/src/cogs/maths/intrinsics.rs +++ b/necsim/impls/no-std/src/cogs/maths/intrinsics.rs @@ -1,4 +1,2 @@ -#![allow(clippy::useless_attribute)] - #[allow(clippy::module_name_repetitions)] pub use necsim_core_maths::IntrinsicsMathsCore; diff --git a/necsim/impls/no-std/src/cogs/rng/seahash.rs b/necsim/impls/no-std/src/cogs/rng/seahash.rs index 93cc87ecd..bbfc0df7b 100644 --- a/necsim/impls/no-std/src/cogs/rng/seahash.rs +++ b/necsim/impls/no-std/src/cogs/rng/seahash.rs @@ -4,7 +4,7 @@ use necsim_core::cogs::{Backup, MathsCore, PrimeableRng, RngCore}; use serde::{Deserialize, Serialize}; -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug, Serialize, Deserialize, TypeLayout)] #[serde(deny_unknown_fields)] #[layout(free = "M")] diff --git a/necsim/impls/no-std/src/cogs/rng/wyhash.rs b/necsim/impls/no-std/src/cogs/rng/wyhash.rs index c4fdeed68..dfa2d4d3e 100644 --- a/necsim/impls/no-std/src/cogs/rng/wyhash.rs +++ b/necsim/impls/no-std/src/cogs/rng/wyhash.rs @@ -11,7 +11,7 @@ const P1: u64 = 0xe703_7ed1_a0b4_28db; const P2: u64 = 0x8ebc_6af0_9c88_c6e3; const P5: u64 = 0xeb44_acca_b455_d165; -#[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] +#[allow(clippy::module_name_repetitions)] #[derive(Clone, Debug, Serialize, Deserialize, TypeLayout)] #[layout(free = "M")] #[serde(deny_unknown_fields)] diff --git a/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs b/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs index d50e77707..a542e24b1 100644 --- a/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs +++ b/necsim/impls/no-std/src/cogs/speciation_probability/spatially_implicit.rs @@ -7,7 +7,7 @@ use necsim_core_bond::{ClosedUnitF64, OpenClosedUnitF64 as PositiveUnitF64}; use crate::cogs::habitat::spatially_implicit::SpatiallyImplicitHabitat; #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[allow(clippy::module_name_repetitions)] pub struct SpatiallyImplicitSpeciationProbability { meta_speciation_probability: PositiveUnitF64, diff --git a/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs b/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs index dd8d2dfae..82ceeeba7 100644 --- a/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs +++ b/necsim/impls/no-std/src/cogs/speciation_probability/uniform.rs @@ -5,7 +5,7 @@ use necsim_core::{ use necsim_core_bond::ClosedUnitF64; #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[allow(clippy::module_name_repetitions)] pub struct UniformSpeciationProbability { speciation_probability: ClosedUnitF64, diff --git a/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs b/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs index 6b83d35be..4fce34f0c 100644 --- a/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs +++ b/necsim/impls/no-std/src/cogs/turnover_rate/in_memory.rs @@ -12,7 +12,7 @@ use crate::{array2d::Array2D, cogs::habitat::in_memory::InMemoryHabitat}; #[allow(clippy::module_name_repetitions)] #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] pub struct InMemoryTurnoverRate { #[cfg_attr(feature = "cuda", cuda(embed))] turnover_rate: Final>, diff --git a/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs b/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs index 99411a19a..5255625bf 100644 --- a/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs +++ b/necsim/impls/no-std/src/cogs/turnover_rate/uniform.rs @@ -5,7 +5,7 @@ use necsim_core::{ use necsim_core_bond::{NonNegativeF64, PositiveF64}; #[derive(Debug)] -#[cfg_attr(feature = "cuda", derive(rust_cuda::common::LendRustToCuda))] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[allow(clippy::module_name_repetitions)] pub struct UniformTurnoverRate { turnover_rate: PositiveF64, diff --git a/necsim/impls/no-std/src/lib.rs b/necsim/impls/no-std/src/lib.rs index f26467e88..aa63583a1 100644 --- a/necsim/impls/no-std/src/lib.rs +++ b/necsim/impls/no-std/src/lib.rs @@ -3,7 +3,6 @@ #![feature(iter_advance_by)] #![feature(extract_if)] #![feature(const_type_name)] -#![feature(offset_of)] #![feature(negative_impls)] #![feature(impl_trait_in_assoc_type)] #![allow(incomplete_features)] diff --git a/necsim/impls/no-std/src/parallelisation/independent/individuals.rs b/necsim/impls/no-std/src/parallelisation/independent/individuals.rs index 93fbe37f0..c6355adf4 100644 --- a/necsim/impls/no-std/src/parallelisation/independent/individuals.rs +++ b/necsim/impls/no-std/src/parallelisation/independent/individuals.rs @@ -125,7 +125,7 @@ pub fn simulate< // detected at the next shared duplicate event let (new_time, new_steps) = simulation.simulate_incremental_early_stop( - |_, steps, _| { + |_, steps, _, _| { if steps >= step_slice.get() { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/independent/landscape.rs b/necsim/impls/no-std/src/parallelisation/independent/landscape.rs index 75c83085d..0177ec941 100644 --- a/necsim/impls/no-std/src/parallelisation/independent/landscape.rs +++ b/necsim/impls/no-std/src/parallelisation/independent/landscape.rs @@ -137,7 +137,7 @@ pub fn simulate< // detected at the next shared duplicate event let (new_time, new_steps) = simulation.simulate_incremental_early_stop( - |_, steps, _| { + |_, steps, _, _| { if steps >= step_slice.get() { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs b/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs index b3ac9a64c..faecd44c9 100644 --- a/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs +++ b/necsim/impls/no-std/src/parallelisation/independent/monolithic/mod.rs @@ -226,7 +226,7 @@ pub fn simulate< previous_next_event_time = None; let (new_time, new_steps) = simulation.simulate_incremental_early_stop( - |_, steps, next_event_time| { + |_, steps, next_event_time, _| { previous_next_event_time = Some(next_event_time); if steps >= step_slice.get() || next_event_time >= level_time { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs b/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs index 187ee4038..143b61156 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/averaging.rs @@ -78,7 +78,7 @@ pub fn simulate< let next_safe_time = global_safe_time + independent_time_slice; let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time >= next_safe_time { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs b/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs index 174c2c358..2b29631aa 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/lockstep.rs @@ -77,7 +77,7 @@ pub fn simulate< // Simulate for zero-steps (immediate early stop) without side effects // to peek the next local event time simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { next_local_time = Some(next_event_time); ControlFlow::Break(()) @@ -102,7 +102,7 @@ pub fn simulate< // The partition with the next event gets to simulate just the next step if let Ok(next_global_time) = local_partition.reduce_vote_min_time(next_local_time) { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time > next_global_time { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs b/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs index 895344836..246e582df 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/monolithic.rs @@ -69,7 +69,7 @@ pub fn simulate< // ically later time let (time, steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { pause_before.map_or(ControlFlow::Continue(()), |pause_before| { if next_event_time >= pause_before { ControlFlow::Break(()) diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs index b12afb7d7..349c74164 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic.rs @@ -109,7 +109,7 @@ pub fn simulate< // e.g. (1->2)|(2->3)|(3->1) => (1->2)|(3->1) let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time >= next_safe_time { ControlFlow::Break(()) } else { diff --git a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs index 767e5cbb9..def28db5e 100644 --- a/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs +++ b/necsim/impls/no-std/src/parallelisation/monolithic/optimistic_lockstep.rs @@ -78,7 +78,7 @@ pub fn simulate< // (we already know at least one partition has some next event time) let next_local_emigration_time = { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |simulation, _, _| { + |simulation, _, _, _| { if simulation.emigration_exit().is_empty() { ControlFlow::Continue(()) } else { @@ -115,7 +115,7 @@ pub fn simulate< // that event Ok(next_global_time) => { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time > next_global_time { ControlFlow::Break(()) } else { @@ -139,7 +139,7 @@ pub fn simulate< // All other partitions get to simulate until just before this next migration event Err(next_global_time) => { let (_, new_steps) = simulation.simulate_incremental_early_stop( - |_, _, next_event_time| { + |_, _, next_event_time, _| { if next_event_time >= next_global_time { ControlFlow::Break(()) } else { diff --git a/necsim/impls/std/src/event_log/replay/sorted_segments.rs b/necsim/impls/std/src/event_log/replay/sorted_segments.rs index 2c209cd95..57c18b6e9 100644 --- a/necsim/impls/std/src/event_log/replay/sorted_segments.rs +++ b/necsim/impls/std/src/event_log/replay/sorted_segments.rs @@ -101,6 +101,7 @@ impl PartialOrd for SortedSortedSegments { } impl PartialEq for SortedSortedSegments { + #[allow(clippy::unconditional_recursion)] fn eq(&self, other: &Self) -> bool { self.next.eq(&other.next) } diff --git a/necsim/partitioning/mpi/src/partition/mod.rs b/necsim/partitioning/mpi/src/partition/mod.rs index 90055f711..d05940d3d 100644 --- a/necsim/partitioning/mpi/src/partition/mod.rs +++ b/necsim/partitioning/mpi/src/partition/mod.rs @@ -13,9 +13,9 @@ mod parallel; mod root; mod utils; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use parallel::MpiParallelPartition; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use root::MpiRootPartition; #[allow(clippy::module_name_repetitions)] diff --git a/necsim/plugins/core/src/import/combinator.rs b/necsim/plugins/core/src/import/combinator.rs index d948c5e3e..a99fb5784 100644 --- a/necsim/plugins/core/src/import/combinator.rs +++ b/necsim/plugins/core/src/import/combinator.rs @@ -3,7 +3,6 @@ use std::{ iter::{FromIterator, IntoIterator}, marker::PhantomData, path::Path, - rc::Rc, }; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -68,7 +67,8 @@ impl>(); let result = inner(self); diff --git a/rust-toolchain b/rust-toolchain index 3ab928278..19bb9762a 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,4 +1,4 @@ [toolchain] -channel = "nightly-2023-11-10" +channel = "nightly" components = [ "cargo", "rustfmt", "clippy", "rust-src" ] targets = [ "x86_64-unknown-linux-gnu", "nvptx64-nvidia-cuda" ] diff --git a/rustcoalescence/Cargo.toml b/rustcoalescence/Cargo.toml index ebf41fc18..2a367eb33 100644 --- a/rustcoalescence/Cargo.toml +++ b/rustcoalescence/Cargo.toml @@ -10,6 +10,12 @@ edition = "2021" [features] default = [] +necsim-partitioning-mpi = ["dep:necsim-partitioning-mpi"] + +rustcoalescence-algorithms-gillespie = ["dep:rustcoalescence-algorithms-gillespie"] +rustcoalescence-algorithms-independent = ["dep:rustcoalescence-algorithms-independent"] +rustcoalescence-algorithms-cuda = ["dep:rustcoalescence-algorithms-cuda"] + [dependencies] necsim-core = { path = "../necsim/core" } necsim-core-bond = { path = "../necsim/core/bond" } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index e25ab2387..2ea0383d4 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index 7587473ae..c9eefc131 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs index 5c908339e..8f206ab43 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/lib.rs @@ -1,6 +1,5 @@ #![deny(clippy::pedantic)] -#![allow(incomplete_features)] -#![feature(specialization)] +#![allow(long_running_const_eval)] #![recursion_limit = "1024"] use necsim_core::{ @@ -16,145 +15,29 @@ use necsim_impls_no_std::cogs::{ event_sampler::tracking::MinSpeciationTrackingEventSampler, }; -use rust_cuda::{ - common::RustToCuda, - host::{CudaDropWrapper, LaunchConfig, LaunchPackage, Launcher, TypedKernel}, - rustacuda::{ - error::CudaResult, - function::{BlockSize, Function, GridSize}, - stream::Stream, - }, -}; - -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rust_cuda::lend::RustToCuda; mod link; mod patch; -pub type KernelCompilationCallback = dyn FnMut(&Function) -> CudaResult<()>; - -#[allow(clippy::module_name_repetitions)] -pub struct SimulationKernel< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, +#[allow(clippy::type_complexity)] +pub struct SimulationKernelPtx< + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, -> { - #[allow(clippy::type_complexity)] - kernel: TypedKernel< - dyn SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - ReportSpeciation, - ReportDispersal, - >, - >, - stream: CudaDropWrapper, - grid: GridSize, - block: BlockSize, - ptx_jit: bool, - watcher: Box, -} - -impl< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, - ReportSpeciation: Boolean, - ReportDispersal: Boolean, - > SimulationKernel -{ - /// # Errors - /// - /// Returns a `CudaError` if loading the CUDA kernel failed. - pub fn try_new( - stream: Stream, - grid: GridSize, - block: BlockSize, - ptx_jit: bool, - on_compile: Box, - ) -> CudaResult - where - Self: SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - ReportSpeciation, - ReportDispersal, - >, - { - let stream = CudaDropWrapper::from(stream); - let kernel = Self::new_kernel()?; - - Ok(Self { - kernel, - stream, - grid, - block, - ptx_jit, - watcher: on_compile, - }) - } -} - -impl< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, - ReportSpeciation: Boolean, - ReportDispersal: Boolean, - > Launcher - for SimulationKernel -{ - type CompilationWatcher = Box; - type KernelTraitObject = dyn SimulatableKernel< +>( + std::marker::PhantomData<( M, H, G, @@ -169,25 +52,5 @@ impl< A, ReportSpeciation, ReportDispersal, - >; - - fn get_launch_package(&mut self) -> LaunchPackage { - LaunchPackage { - config: LaunchConfig { - grid: self.grid.clone(), - block: self.block.clone(), - shared_memory_size: 0_u32, - ptx_jit: self.ptx_jit, - }, - - kernel: &mut self.kernel, - stream: &mut self.stream, - - watcher: &mut self.watcher, - } - } - - fn on_compile(kernel: &Function, watcher: &mut Self::CompilationWatcher) -> CudaResult<()> { - (watcher)(kernel) - } -} + )>, +); diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs index 98f3b0819..933cb48aa 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/link.rs @@ -1,6 +1,52 @@ -use rustcoalescence_algorithms_cuda_gpu_kernel::{SimulatableKernel, SimulationKernelArgs}; +use necsim_core::{ + cogs::{ + CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, + LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate, + }, + reporter::boolean::Boolean, +}; -use crate::SimulationKernel; +use necsim_impls_no_std::cogs::{ + active_lineage_sampler::singular::SingularActiveLineageSampler, + event_sampler::tracking::MinSpeciationTrackingEventSampler, +}; + +use rust_cuda::lend::RustToCuda; + +#[allow(clippy::type_complexity)] +pub struct SimulationKernelPtx< + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, + ReportSpeciation: Boolean, + ReportDispersal: Boolean, +>( + std::marker::PhantomData<( + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + ReportSpeciation, + ReportDispersal, + )>, +); macro_rules! link_kernel { ($habitat:ty, $dispersal:ty, $turnover:ty, $speciation:ty) => { @@ -29,7 +75,7 @@ macro_rules! link_kernel { $habitat:ty, $dispersal:ty, $turnover:ty, $speciation:ty, $report_speciation:ty, $report_dispersal:ty ) => { - rustcoalescence_algorithms_cuda_gpu_kernel::link_kernel!( + rustcoalescence_algorithms_cuda_gpu_kernel::link! { impl simulate< necsim_impls_cuda::cogs::maths::NvptxMathsCore, $habitat, necsim_impls_cuda::cogs::rng::CudaRng< @@ -82,9 +128,9 @@ macro_rules! link_kernel { >, $report_speciation, $report_dispersal, - ); + > for SimulationKernelPtx } - rustcoalescence_algorithms_cuda_gpu_kernel::link_kernel!( + rustcoalescence_algorithms_cuda_gpu_kernel::link! { impl simulate< necsim_impls_cuda::cogs::maths::NvptxMathsCore, $habitat, necsim_impls_cuda::cogs::rng::CudaRng< @@ -197,7 +243,7 @@ macro_rules! link_kernel { >, $report_speciation, $report_dispersal, - ); + > for SimulationKernelPtx } }; } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs index 129565624..04404ad9b 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/src/patch.rs @@ -1,76 +1,61 @@ -use std::sync::atomic::AtomicU64; +use std::ffi::CStr; use necsim_core::{ cogs::{ CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate, }, - lineage::Lineage, reporter::boolean::{Boolean, False, True}, - simulation::Simulation, }; -use necsim_core_bond::{NonNegativeF64, PositiveF64}; -use necsim_impls_cuda::{event_buffer::EventBuffer, value_buffer::ValueBuffer}; use necsim_impls_no_std::cogs::{ active_lineage_sampler::singular::SingularActiveLineageSampler, - event_sampler::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}, + event_sampler::tracking::MinSpeciationTrackingEventSampler, }; -use rust_cuda::{ - common::{DeviceAccessible, RustToCuda}, - host::{HostAndDeviceConstRef, HostAndDeviceMutRef, TypedKernel}, - rustacuda::error::CudaResult, - utils::device_copy::SafeDeviceCopyWrapper, -}; +use rust_cuda::{kernel::CompiledKernelPtx, lend::RustToCuda}; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; -use crate::SimulationKernel; +use crate::SimulationKernelPtx; // If `Kernel` is implemented for `ReportSpeciation` x `ReportDispersal`, i.e. // for {`False`, `True`} x {`False`, `True`} then it is implemented for all // `Boolean`s. However, Rust does not recognise that `Boolean` is closed over -// {`False`, `True`}. These default impls provide the necessary coersion. - -extern "C" { - fn unreachable_cuda_simulation_linking_reporter() -> !; -} +// {`False`, `True`}. This explicit impl provides the necessary coersion. -#[allow(clippy::trait_duplication_in_bounds)] unsafe impl< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, - > SimulatableKernel - for SimulationKernel + > + CompiledKernelPtx< + simulate, + > for SimulationKernelPtx where - SimulationKernel: - SimulatableKernel, - SimulationKernel: - SimulatableKernel, - SimulationKernel: - SimulatableKernel, - SimulationKernel: - SimulatableKernel, + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, + crate::link::SimulationKernelPtx: + CompiledKernelPtx>, { - default fn get_ptx_str() -> &'static str { - unsafe { unreachable_cuda_simulation_linking_reporter() } - } - - default fn new_kernel() -> CudaResult< - TypedKernel< - dyn SimulatableKernel< + #[inline] + fn get_ptx() -> &'static CStr { + match (ReportSpeciation::VALUE, ReportDispersal::VALUE) { + (false, false) => crate::link::SimulationKernelPtx::< M, H, G, @@ -83,59 +68,127 @@ where E, I, A, - ReportSpeciation, - ReportDispersal, - >, - >, - > { - unsafe { unreachable_cuda_simulation_linking_reporter() } - } - - default fn simulate( - &mut self, - _simulation: &mut Simulation, - _task_list: &mut ValueBuffer, - _event_buffer_reporter: &mut EventBuffer, - _min_spec_sample_buffer: &mut ValueBuffer, - _next_event_time_buffer: &mut ValueBuffer, - _total_time_max: &AtomicU64, - _total_steps_sum: &AtomicU64, - _max_steps: u64, - _max_next_event_time: NonNegativeF64, - ) -> CudaResult<()> { - unsafe { unreachable_cuda_simulation_linking_reporter() } + False, + False, + >::get_ptx(), + (false, true) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + False, + True, + >::get_ptx(), + (true, false) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + True, + False, + >::get_ptx(), + (true, true) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + True, + True, + >::get_ptx(), + } } - default fn simulate_raw( - &mut self, - _simulation: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - _task_list: HostAndDeviceMutRef< - DeviceAccessible< as RustToCuda>::CudaRepresentation>, - >, - _event_buffer_reporter: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - _min_spec_sample_buffer: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - _next_event_time_buffer: HostAndDeviceMutRef< - DeviceAccessible< - as RustToCuda>::CudaRepresentation, - >, - >, - _total_time_max: HostAndDeviceConstRef>, - _total_steps_sum: HostAndDeviceConstRef>, - _max_steps: SafeDeviceCopyWrapper, - _max_next_event_time: SafeDeviceCopyWrapper, - ) -> CudaResult<()> { - unsafe { unreachable_cuda_simulation_linking_reporter() } + #[inline] + fn get_entry_point() -> &'static CStr { + match (ReportSpeciation::VALUE, ReportDispersal::VALUE) { + (false, false) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + False, + False, + >::get_entry_point(), + (false, true) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + False, + True, + >::get_entry_point(), + (true, false) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + True, + False, + >::get_entry_point(), + (true, true) => crate::link::SimulationKernelPtx::< + M, + H, + G, + S, + X, + D, + C, + T, + N, + E, + I, + A, + True, + True, + >::get_entry_point(), + } } } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index 2a13df0fc..5fec391e1 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -16,4 +16,8 @@ necsim-core-bond = { path = "../../../../necsim/core/bond" } necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["cuda"] } necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6b53e88", features = ["derive"] } +[target.'cfg(target_os = "cuda")'.dependencies] +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "device", "kernel"] } + +[target.'cfg(not(target_os = "cuda"))'.dependencies] +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "6311a6d4", features = ["derive", "kernel"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs index 1e5724d9c..94938d34d 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/src/lib.rs @@ -1,12 +1,10 @@ #![deny(clippy::pedantic)] #![no_std] +#![feature(type_alias_impl_trait)] +#![feature(decl_macro)] #![cfg_attr(target_os = "cuda", feature(abi_ptx))] -#![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] -#![cfg_attr(target_os = "cuda", feature(panic_info_message))] -#![cfg_attr(target_os = "cuda", feature(atomic_from_mut))] #![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] -#![cfg_attr(target_os = "cuda", feature(stdsimd))] -#![cfg_attr(target_os = "cuda", feature(control_flow_enum))] +#![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] #![allow(long_running_const_eval)] #![recursion_limit = "1024"] @@ -14,81 +12,70 @@ extern crate alloc; #[cfg(target_os = "cuda")] use core::ops::ControlFlow; +use core::sync::atomic::AtomicU64; use necsim_core::{ cogs::{ CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry, LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate, }, + lineage::Lineage, reporter::boolean::Boolean, + simulation::Simulation, }; +use necsim_core_bond::{NonNegativeF64, PositiveF64}; +use necsim_impls_cuda::{event_buffer::EventBuffer, value_buffer::ValueBuffer}; use necsim_impls_no_std::cogs::{ active_lineage_sampler::singular::SingularActiveLineageSampler, event_sampler::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample}, }; -use rust_cuda::common::RustToCuda; +use rust_cuda::{ + kernel::param::{DeepPerThreadBorrow, PerThreadShallowCopy, PtxJit, ShallowInteriorMutable}, + lend::RustToCuda, +}; -#[rust_cuda::common::kernel( - pub use link_kernel! as impl SimulatableKernel for SimulationKernel +#[rust_cuda::kernel::kernel(pub use link! for impl)] +#[kernel( + allow(ptx::double_precision_use), + forbid(ptx::local_memory_use, ptx::register_spills) )] #[allow(clippy::too_many_arguments)] #[allow(clippy::type_complexity)] pub fn simulate< - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler + RustToCuda, + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, ReportSpeciation: Boolean, ReportDispersal: Boolean, >( - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - simulation: &mut ShallowCopy< - necsim_core::simulation::Simulation, - >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - task_list: &mut ShallowCopy< - necsim_impls_cuda::value_buffer::ValueBuffer, - >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - event_buffer_reporter: &mut ShallowCopy< - necsim_impls_cuda::event_buffer::EventBuffer, + simulation: &PtxJit>>, + task_list: &mut PtxJit>>, + event_buffer_reporter: &mut PtxJit< + DeepPerThreadBorrow>, >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - min_spec_sample_buffer: &mut ShallowCopy< - necsim_impls_cuda::value_buffer::ValueBuffer, + min_spec_sample_buffer: &mut PtxJit< + DeepPerThreadBorrow>, >, - #[rustfmt::skip] - #[kernel(pass = LendRustToCuda, jit)] - next_event_time_buffer: &mut ShallowCopy< - necsim_impls_cuda::value_buffer::ValueBuffer, - >, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - total_time_max: &core::sync::atomic::AtomicU64, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - total_steps_sum: &core::sync::atomic::AtomicU64, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - max_steps: u64, - #[rustfmt::skip] - #[kernel(pass = SafeDeviceCopy)] - max_next_event_time: necsim_core_bond::NonNegativeF64, + next_event_time_buffer: &mut PtxJit>>, + total_time_max: &ShallowInteriorMutable, + total_steps_sum: &ShallowInteriorMutable, + max_steps: PerThreadShallowCopy, + max_next_event_time: PerThreadShallowCopy, ) { + // TODO: use simulation with non-allocating clone + let mut simulation = unsafe { core::mem::ManuallyDrop::new(core::ptr::read(simulation)) }; + task_list.with_value_for_core(|task| { // Discard the prior task (the simulation is just a temporary local copy) core::mem::drop( @@ -103,13 +90,16 @@ pub fn simulate< let mut final_next_event_time = None; let (time, steps) = simulation.simulate_incremental_early_stop( - |_, steps, next_event_time| { + |_, steps, next_event_time, reporter| { final_next_event_time = Some(next_event_time); - if steps >= max_steps || next_event_time >= max_next_event_time { - ControlFlow::Break(()) - } else { + if steps < max_steps + && next_event_time < max_next_event_time + && reporter.can_buffer_next_event() + { ControlFlow::Continue(()) + } else { + ControlFlow::Break(()) } }, event_buffer_reporter, @@ -133,37 +123,34 @@ pub fn simulate< #[cfg(target_os = "cuda")] mod cuda_prelude { - use core::arch::nvptx; - - use rust_cuda::device::utils; + use rust_cuda::device::alloc::PTXAllocator; #[global_allocator] - static _GLOBAL_ALLOCATOR: utils::PTXAllocator = utils::PTXAllocator; + static _GLOBAL_ALLOCATOR: PTXAllocator = PTXAllocator; #[cfg(not(debug_assertions))] #[panic_handler] fn panic(_panic_info: &::core::panic::PanicInfo) -> ! { - unsafe { nvptx::trap() } + rust_cuda::device::utils::abort() } #[cfg(debug_assertions)] #[panic_handler] - fn panic(panic_info: &::core::panic::PanicInfo) -> ! { - use rust_cuda::println; - - println!( - "Panic occurred at {:?}: {:?}!", - panic_info.location(), - panic_info - .message() - .unwrap_or(&format_args!("unknown reason")) - ); - - unsafe { nvptx::trap() } + fn panic(info: &::core::panic::PanicInfo) -> ! { + rust_cuda::device::utils::pretty_print_panic_info(info, true, true); + rust_cuda::device::utils::abort() } + #[cfg(not(debug_assertions))] #[alloc_error_handler] fn alloc_error_handler(_: core::alloc::Layout) -> ! { - unsafe { nvptx::trap() } + rust_cuda::device::utils::abort() + } + + #[cfg(debug_assertions)] + #[alloc_error_handler] + fn alloc_error_handler(layout: core::alloc::Layout) -> ! { + rust_cuda::device::utils::pretty_print_alloc_error(layout); + rust_cuda::device::utils::abort() } } diff --git a/rustcoalescence/algorithms/cuda/src/cuda.rs b/rustcoalescence/algorithms/cuda/src/cuda.rs index c523bf2d2..d8222ebb1 100644 --- a/rustcoalescence/algorithms/cuda/src/cuda.rs +++ b/rustcoalescence/algorithms/cuda/src/cuda.rs @@ -1,4 +1,4 @@ -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ context::{Context, CurrentContext, ResourceLimit}, prelude::*, }; @@ -13,7 +13,7 @@ pub fn with_initialised_cuda, F: FnOnce() -> Result> inner: F, ) -> Result { // Initialize the CUDA API - rust_cuda::rustacuda::init(CudaFlags::empty())?; + rust_cuda::deps::rustacuda::init(CudaFlags::empty())?; // Get the first device let device = Device::get_device(device)?; diff --git a/rustcoalescence/algorithms/cuda/src/error.rs b/rustcoalescence/algorithms/cuda/src/error.rs index e69898247..f81a9e3c1 100644 --- a/rustcoalescence/algorithms/cuda/src/error.rs +++ b/rustcoalescence/algorithms/cuda/src/error.rs @@ -1,4 +1,4 @@ -use rust_cuda::rustacuda::error::CudaError as RustaCudaError; +use rust_cuda::deps::rustacuda::error::CudaError as RustaCudaError; use serde::{Deserialize, Serialize}; #[derive(thiserror::Error, Debug, Clone, Serialize, Deserialize)] diff --git a/rustcoalescence/algorithms/cuda/src/info.rs b/rustcoalescence/algorithms/cuda/src/info.rs index 1abf4ec07..78a5452ea 100644 --- a/rustcoalescence/algorithms/cuda/src/info.rs +++ b/rustcoalescence/algorithms/cuda/src/info.rs @@ -1,4 +1,4 @@ -use rust_cuda::rustacuda::{ +use rust_cuda::deps::rustacuda::{ context::{CurrentContext, ResourceLimit}, function::{Function, FunctionAttribute}, }; diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs b/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs index 06401c685..6c8dee90a 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/fixup.rs @@ -28,7 +28,7 @@ use rustcoalescence_algorithms::{ }; use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; @@ -42,19 +42,21 @@ pub struct FixUpInitialiser> { impl< L: ExactSizeIterator, - M: MathsCore, - G: PrimeableRng + RustToCuda, + M: MathsCore + Sync, + G: PrimeableRng + RustToCuda + Sync, O: Scenario, > CudaLineageStoreSampleInitialiser> for FixUpInitialiser where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { type ActiveLineageSampler< - X: EmigrationExit> + RustToCuda, - J: EventTimeSampler + RustToCuda, + X: EmigrationExit> + + RustToCuda + + Sync, + J: EventTimeSampler + RustToCuda + Sync, > = IndependentActiveLineageSampler< M, O::Habitat, @@ -76,8 +78,10 @@ where fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs b/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs index 5f851c286..72b836902 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/genesis.rs @@ -14,7 +14,7 @@ use necsim_impls_no_std::cogs::{ use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; @@ -23,17 +23,19 @@ use super::CudaLineageStoreSampleInitialiser; #[allow(clippy::module_name_repetitions)] pub struct GenesisInitialiser; -impl + RustToCuda, O: Scenario> +impl + RustToCuda + Sync, O: Scenario> CudaLineageStoreSampleInitialiser for GenesisInitialiser where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { type ActiveLineageSampler< - X: EmigrationExit> + RustToCuda, - J: EventTimeSampler + RustToCuda, + X: EmigrationExit> + + RustToCuda + + Sync, + J: EventTimeSampler + RustToCuda + Sync, > = IndependentActiveLineageSampler< M, O::Habitat, @@ -50,8 +52,10 @@ where fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs b/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs index a1a39e87e..8a0d9a27c 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/mod.rs @@ -17,7 +17,7 @@ use necsim_impls_no_std::cogs::{ use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; @@ -28,38 +28,40 @@ pub mod resume; #[allow(clippy::module_name_repetitions)] pub trait CudaLineageStoreSampleInitialiser< M: MathsCore, - G: PrimeableRng + RustToCuda, + G: PrimeableRng + RustToCuda + Sync, O: Scenario, Error: From, > where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { - type DispersalSampler: DispersalSampler + RustToCuda; + type DispersalSampler: DispersalSampler + RustToCuda + Sync; type ActiveLineageSampler< X: EmigrationExit< M, O::Habitat, G, IndependentLineageStore, - > + RustToCuda, - J: EventTimeSampler + RustToCuda, + > + RustToCuda + Sync, + J: EventTimeSampler + RustToCuda + Sync, >: SingularActiveLineageSampler< M, O::Habitat, G, IndependentLineageStore, X, Self::DispersalSampler, IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, IndependentEventSampler< M, O::Habitat, G, X, Self::DispersalSampler, O::TurnoverRate, O::SpeciationProbability >, NeverImmigrationEntry, - > + RustToCuda; + > + RustToCuda + Sync; #[allow(clippy::type_complexity)] fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs b/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs index 2cba7640b..478690d96 100644 --- a/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs +++ b/rustcoalescence/algorithms/cuda/src/initialiser/resume.rs @@ -17,7 +17,7 @@ use necsim_impls_no_std::cogs::{ use rustcoalescence_algorithms::result::ResumeError; use rustcoalescence_scenarios::Scenario; -use rust_cuda::common::RustToCuda; +use rust_cuda::lend::RustToCuda; use crate::CudaError; @@ -31,19 +31,21 @@ pub struct ResumeInitialiser> { impl< L: ExactSizeIterator, - M: MathsCore, - G: PrimeableRng + RustToCuda, + M: MathsCore + Sync, + G: PrimeableRng + RustToCuda + Sync, O: Scenario, > CudaLineageStoreSampleInitialiser> for ResumeInitialiser where - O::Habitat: RustToCuda, - O::DispersalSampler>: RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { type ActiveLineageSampler< - X: EmigrationExit> + RustToCuda, - J: EventTimeSampler + RustToCuda, + X: EmigrationExit> + + RustToCuda + + Sync, + J: EventTimeSampler + RustToCuda + Sync, > = IndependentActiveLineageSampler< M, O::Habitat, @@ -60,8 +62,10 @@ where fn init< 'h, T: TrustedOriginSampler<'h, M, Habitat = O::Habitat>, - J: EventTimeSampler + RustToCuda, - X: EmigrationExit> + RustToCuda, + J: EventTimeSampler + RustToCuda + Sync, + X: EmigrationExit> + + RustToCuda + + Sync, >( self, origin_sampler: T, diff --git a/rustcoalescence/algorithms/cuda/src/launch.rs b/rustcoalescence/algorithms/cuda/src/launch.rs index 12589699a..44e0e66f6 100644 --- a/rustcoalescence/algorithms/cuda/src/launch.rs +++ b/rustcoalescence/algorithms/cuda/src/launch.rs @@ -1,9 +1,12 @@ use std::marker::PhantomData; -use necsim_core::{cogs::MathsCore, reporter::Reporter, simulation::SimulationBuilder}; +use necsim_core::{ + cogs::{MathsCore, PrimeableRng}, + reporter::Reporter, + simulation::SimulationBuilder, +}; use necsim_core_bond::NonNegativeF64; -use necsim_impls_cuda::cogs::rng::CudaRng; use necsim_impls_no_std::{ cogs::{ active_lineage_sampler::independent::event_time_sampler::exp::ExpEventTimeSampler, @@ -16,7 +19,6 @@ use necsim_impls_no_std::{ origin_sampler::{ decomposition::DecompositionOriginSampler, pre_sampler::OriginPreSampler, }, - rng::wyhash::WyHash, }, parallelisation::Status, }; @@ -25,15 +27,16 @@ use necsim_partitioning_core::LocalPartition; use rustcoalescence_algorithms::result::SimulationOutcome; use rustcoalescence_scenarios::Scenario; -use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernel; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; use rust_cuda::{ - common::RustToCuda, - rustacuda::{ + deps::rustacuda::{ function::{BlockSize, GridSize}, prelude::{Stream, StreamFlags}, }, + host::CudaDropWrapper, + kernel::{CompiledKernelPtx, LaunchConfig, Launcher, TypedPtxKernel}, + lend::RustToCuda, }; use crate::{ @@ -49,75 +52,54 @@ use crate::{ #[allow(clippy::too_many_lines)] pub fn initialise_and_simulate< 'p, - M: MathsCore, - O: Scenario>>, + M: MathsCore + Sync, + G: PrimeableRng + RustToCuda + Sync, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, I: Iterator, - L: CudaLineageStoreSampleInitialiser>, O, Error>, + L: CudaLineageStoreSampleInitialiser, Error: From, ->( - args: &CudaArguments, - rng: CudaRng>, - scenario: O, - pre_sampler: OriginPreSampler, - pause_before: Option, - local_partition: &mut P, - lineage_store_sampler_initialiser: L, -) -> Result>>, Error> -where - O::Habitat: RustToCuda, - O::DispersalSampler>>>: - RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, - SimulationKernel< - M, - O::Habitat, - CudaRng>, - IndependentLineageStore, - NeverEmigrationExit, - L::DispersalSampler, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< - M, - O::Habitat, - CudaRng>, - NeverEmigrationExit, - L::DispersalSampler, - O::TurnoverRate, - O::SpeciationProbability, - >, - NeverImmigrationEntry, - L::ActiveLineageSampler, - R::ReportSpeciation, - R::ReportDispersal, - >: SimulatableKernel< - M, - O::Habitat, - CudaRng>, - IndependentLineageStore, - NeverEmigrationExit, - L::DispersalSampler, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< + Ptx: CompiledKernelPtx< + simulate< M, O::Habitat, - CudaRng>, + G, + IndependentLineageStore, NeverEmigrationExit, L::DispersalSampler, + IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, + IndependentEventSampler< + M, + O::Habitat, + G, + NeverEmigrationExit, + L::DispersalSampler, + O::TurnoverRate, + O::SpeciationProbability, + >, + NeverImmigrationEntry, + L::ActiveLineageSampler, + R::ReportSpeciation, + R::ReportDispersal, >, - NeverImmigrationEntry, - L::ActiveLineageSampler, - R::ReportSpeciation, - R::ReportDispersal, >, +>( + args: &CudaArguments, + rng: G, + scenario: O, + pre_sampler: OriginPreSampler, + pause_before: Option, + local_partition: &mut P, + lineage_store_sampler_initialiser: L, +) -> Result, Error> +where + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, { let ( habitat, @@ -126,8 +108,7 @@ where speciation_probability, origin_sampler_auxiliary, decomposition_auxiliary, - ) = scenario - .build::>>>(); + ) = scenario.build::>(); let coalescence_sampler = IndependentCoalescenceSampler::default(); let event_sampler = IndependentEventSampler::default(); @@ -196,26 +177,36 @@ where }; let (mut status, time, steps, lineages) = with_initialised_cuda(args.device, || { - let kernel = SimulationKernel::try_new( - Stream::new(StreamFlags::NON_BLOCKING, None)?, - grid_size.clone(), - block_size.clone(), - args.ptx_jit, - Box::new(|kernel| { - crate::info::print_kernel_function_attributes("simulate", kernel); - Ok(()) - }), - )?; - - parallelisation::monolithic::simulate( - &mut simulation, - kernel, - (grid_size, block_size, args.dedup_cache, args.step_slice), - lineages, - event_slice, - pause_before, - local_partition, - ) + let mut stream = CudaDropWrapper::from(Stream::new(StreamFlags::NON_BLOCKING, None)?); + + let mut kernel = TypedPtxKernel::new::(Some(Box::new(|kernel| { + crate::info::print_kernel_function_attributes("simulate", kernel); + Ok(()) + }))); + + let config = LaunchConfig { + grid: grid_size, + block: block_size, + ptx_jit: args.ptx_jit, + }; + + rust_cuda::host::Stream::with(&mut stream, |stream| { + let launcher = Launcher { + stream, + kernel: &mut kernel, + config, + }; + + parallelisation::monolithic::simulate( + &mut simulation, + launcher, + (args.dedup_cache, args.step_slice), + lineages, + event_slice, + pause_before, + local_partition, + ) + }) }) .map_err(CudaError::from)?; diff --git a/rustcoalescence/algorithms/cuda/src/lib.rs b/rustcoalescence/algorithms/cuda/src/lib.rs index e2c221dca..8aa09353f 100644 --- a/rustcoalescence/algorithms/cuda/src/lib.rs +++ b/rustcoalescence/algorithms/cuda/src/lib.rs @@ -5,7 +5,12 @@ #[macro_use] extern crate serde_derive_state; -use necsim_core::{cogs::MathsCore, lineage::Lineage, reporter::Reporter}; +use initialiser::CudaLineageStoreSampleInitialiser; +use necsim_core::{ + cogs::{MathsCore, PrimeableRng}, + lineage::Lineage, + reporter::Reporter, +}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; use necsim_impls_cuda::cogs::{maths::NvptxMathsCore, rng::CudaRng}; @@ -37,10 +42,10 @@ use rustcoalescence_algorithms::{ }; use rustcoalescence_scenarios::Scenario; -use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernel; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernelPtx; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; -use rust_cuda::common::RustToCuda; +use rust_cuda::{kernel::CompiledKernelPtx, lend::RustToCuda}; mod arguments; mod cuda; @@ -68,42 +73,38 @@ impl AlgorithmParamters for CudaAlgorithm { impl AlgorithmDefaults for CudaAlgorithm { type MathsCore = NvptxMathsCore; + type Rng = CudaRng>; } -#[allow(clippy::trait_duplication_in_bounds)] impl< 'p, - M: MathsCore, - O: Scenario>>, + M: MathsCore + Sync, + G: PrimeableRng + RustToCuda + Sync, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, - > Algorithm<'p, M, O, R, P> for CudaAlgorithm + > Algorithm<'p, M, G, O, R, P> for CudaAlgorithm where - O::Habitat: RustToCuda, - O::DispersalSampler>>>: - RustToCuda, - O::TurnoverRate: RustToCuda, - O::SpeciationProbability: RustToCuda, - SimulationKernel< + O::Habitat: RustToCuda + Sync, + O::DispersalSampler>: RustToCuda + Sync, + O::TurnoverRate: RustToCuda + Sync, + O::SpeciationProbability: RustToCuda + Sync, + SimulationKernelPtx< M, O::Habitat, - CudaRng>, + G, IndependentLineageStore, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, IndependentEventSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, O::TurnoverRate, O::SpeciationProbability, >, @@ -111,70 +112,62 @@ where IndependentActiveLineageSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, O::TurnoverRate, O::SpeciationProbability, ExpEventTimeSampler, >, R::ReportSpeciation, R::ReportDispersal, - >: SimulatableKernel< - M, - O::Habitat, - CudaRng>, - IndependentLineageStore, - NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< + >: CompiledKernelPtx< + simulate< M, O::Habitat, - CudaRng>, + G, + IndependentLineageStore, NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, + O::DispersalSampler>, + IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, - >, - NeverImmigrationEntry, - IndependentActiveLineageSampler< - M, - O::Habitat, - CudaRng>, - NeverEmigrationExit, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, + IndependentEventSampler< + M, + O::Habitat, + G, + NeverEmigrationExit, + O::DispersalSampler>, + O::TurnoverRate, + O::SpeciationProbability, >, - O::TurnoverRate, - O::SpeciationProbability, - ExpEventTimeSampler, + NeverImmigrationEntry, + IndependentActiveLineageSampler< + M, + O::Habitat, + G, + NeverEmigrationExit, + O::DispersalSampler>, + O::TurnoverRate, + O::SpeciationProbability, + ExpEventTimeSampler, + >, + R::ReportSpeciation, + R::ReportDispersal, >, - R::ReportSpeciation, - R::ReportDispersal, >, - SimulationKernel< + SimulationKernelPtx< M, O::Habitat, - CudaRng>, + G, IndependentLineageStore, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, IndependentCoalescenceSampler, O::TurnoverRate, @@ -182,16 +175,14 @@ where IndependentEventSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, O::TurnoverRate, O::SpeciationProbability, @@ -200,16 +191,14 @@ where IndependentActiveLineageSampler< M, O::Habitat, - CudaRng>, + G, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, O::TurnoverRate, O::SpeciationProbability, @@ -217,66 +206,61 @@ where >, R::ReportSpeciation, R::ReportDispersal, - >: SimulatableKernel< - M, - O::Habitat, - CudaRng>, - IndependentLineageStore, - NeverEmigrationExit, - TrespassingDispersalSampler< - M, - O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, - >, - IndependentCoalescenceSampler, - O::TurnoverRate, - O::SpeciationProbability, - IndependentEventSampler< + >: CompiledKernelPtx< + simulate< M, O::Habitat, - CudaRng>, + G, + IndependentLineageStore, NeverEmigrationExit, TrespassingDispersalSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, - >, - UniformAntiTrespassingDispersalSampler>>, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, + IndependentCoalescenceSampler, O::TurnoverRate, O::SpeciationProbability, - >, - NeverImmigrationEntry, - IndependentActiveLineageSampler< - M, - O::Habitat, - CudaRng>, - NeverEmigrationExit, - TrespassingDispersalSampler< + IndependentEventSampler< M, O::Habitat, - CudaRng>, - O::DispersalSampler< - InMemoryPackedAliasDispersalSampler>>, + G, + NeverEmigrationExit, + TrespassingDispersalSampler< + M, + O::Habitat, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, >, - UniformAntiTrespassingDispersalSampler>>, + O::TurnoverRate, + O::SpeciationProbability, >, - O::TurnoverRate, - O::SpeciationProbability, - ConstEventTimeSampler, + NeverImmigrationEntry, + IndependentActiveLineageSampler< + M, + O::Habitat, + G, + NeverEmigrationExit, + TrespassingDispersalSampler< + M, + O::Habitat, + G, + O::DispersalSampler>, + UniformAntiTrespassingDispersalSampler, + >, + O::TurnoverRate, + O::SpeciationProbability, + ConstEventTimeSampler, + >, + R::ReportSpeciation, + R::ReportDispersal, >, - R::ReportSpeciation, - R::ReportDispersal, >, { type LineageStore = IndependentLineageStore; - type Rng = CudaRng>; fn get_logical_partition(args: &Self::Arguments, _local_partition: &P) -> Partition { match &args.parallelism_mode { @@ -290,13 +274,28 @@ where fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { - launch::initialise_and_simulate( + ) -> Result, Self::Error> { + launch::initialise_and_simulate::<_, _, _, _, _, _, _, _, SimulationKernelPtx< + _, + _, + _, + _, + _, + >::DispersalSampler, + _, + _, + _, + _, + _, + >::ActiveLineageSampler<_, _>, + _, + _, + >>( &args, rng, scenario, @@ -311,18 +310,32 @@ where /// /// Returns a `ContinueError::Sample` if initialising the resuming /// simulation failed - #[allow(clippy::too_many_lines)] fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { - launch::initialise_and_simulate( + ) -> Result, ResumeError> { + launch::initialise_and_simulate::<_, _, _, _, _, _, _, _, SimulationKernelPtx< + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::DispersalSampler, + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::ActiveLineageSampler<_, _>, + _, + _, + >>( &args, rng, scenario, @@ -340,24 +353,38 @@ where /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { - launch::initialise_and_simulate( + ) -> Result, ResumeError> { + launch::initialise_and_simulate::<_, _, _, _, _, _, _, _, SimulationKernelPtx< + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::DispersalSampler, + _, + _, + _, + _, + _, + as CudaLineageStoreSampleInitialiser<_, _, O, _>>::ActiveLineageSampler<_, ConstEventTimeSampler>, + _, + _, + >>( &args, rng, scenario, pre_sampler, - Some(PositiveF64::max_after(restart_at.into(), restart_at.into()).into()), - local_partition, + Some(PositiveF64::max_after(restart_at.into(), + restart_at.into()).into()), local_partition, FixUpInitialiser { lineages, restart_at, diff --git a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs index 66e1ff479..213f6aa11 100644 --- a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs +++ b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs @@ -1,9 +1,9 @@ use std::{collections::VecDeque, convert::TryInto, num::NonZeroU64, sync::atomic::AtomicU64}; use rust_cuda::{ - common::RustToCuda, - host::{HostAndDeviceMutRef, LendToCuda}, - rustacuda::function::{BlockSize, GridSize}, + host::HostAndDeviceMutRef, + kernel::Launcher, + lend::{LendToCuda, RustToCuda}, utils::exchange::wrapper::ExchangeWrapperOnHost, }; @@ -37,8 +37,7 @@ use necsim_partitioning_core::LocalPartition; use necsim_impls_cuda::{event_buffer::EventBuffer, value_buffer::ValueBuffer}; -use rustcoalescence_algorithms_cuda_cpu_kernel::SimulationKernel; -use rustcoalescence_algorithms_cuda_gpu_kernel::SimulatableKernel; +use rustcoalescence_algorithms_cuda_gpu_kernel::simulate; use crate::error::CudaError; @@ -48,25 +47,24 @@ type Result = std::result::Result; pub fn simulate< 'l, 'p, - M: MathsCore, - H: Habitat + RustToCuda, - G: PrimeableRng + RustToCuda, - S: LineageStore + RustToCuda, - X: EmigrationExit + RustToCuda, - D: DispersalSampler + RustToCuda, - C: CoalescenceSampler + RustToCuda, - T: TurnoverRate + RustToCuda, - N: SpeciationProbability + RustToCuda, - E: MinSpeciationTrackingEventSampler + RustToCuda, - I: ImmigrationEntry + RustToCuda, - A: SingularActiveLineageSampler - + RustToCuda, + M: MathsCore + Sync, + H: Habitat + RustToCuda + Sync, + G: PrimeableRng + RustToCuda + Sync, + S: LineageStore + RustToCuda + Sync, + X: EmigrationExit + RustToCuda + Sync, + D: DispersalSampler + RustToCuda + Sync, + C: CoalescenceSampler + RustToCuda + Sync, + T: TurnoverRate + RustToCuda + Sync, + N: SpeciationProbability + RustToCuda + Sync, + E: MinSpeciationTrackingEventSampler + RustToCuda + Sync, + I: ImmigrationEntry + RustToCuda + Sync, + A: SingularActiveLineageSampler + RustToCuda + Sync, P: Reporter, L: LocalPartition<'p, P>, LI: IntoIterator, >( simulation: &mut Simulation, - mut kernel: SimulationKernel< + mut launcher: Launcher>::WaterLevelReporter as Reporter>::ReportSpeciation, <>::WaterLevelReporter as Reporter>::ReportDispersal, - >, - config: (GridSize, BlockSize, DedupCache, NonZeroU64), + >>, + config: (DedupCache, NonZeroU64), lineages: LI, event_slice: EventSlice, pause_before: Option, local_partition: &'l mut L, -) -> Result<(Status, NonNegativeF64, u64, impl IntoIterator)> - where SimulationKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - <>::WaterLevelReporter as Reporter>::ReportSpeciation, - <>::WaterLevelReporter as Reporter>::ReportDispersal, - >: SimulatableKernel< - M, - H, - G, - S, - X, - D, - C, - T, - N, - E, - I, - A, - <>::WaterLevelReporter as Reporter>::ReportSpeciation, - <>::WaterLevelReporter as Reporter>::ReportDispersal, - >, -{ +) -> Result<( + Status, + NonNegativeF64, + u64, + impl IntoIterator, +)> { let mut slow_lineages = lineages .into_iter() .map(|lineage| { @@ -143,7 +114,7 @@ pub fn simulate< L, >>::WaterLevelReporter::new(event_slice.get(), local_partition); - let (grid_size, block_size, dedup_cache, step_slice) = config; + let (dedup_cache, step_slice) = config; #[allow(clippy::or_fun_call)] let intial_max_time = slow_lineages @@ -153,10 +124,13 @@ pub fn simulate< .unwrap_or(NonNegativeF64::zero()); // Initialise the total_time_max and total_steps_sum atomics - let mut total_time_max = AtomicU64::new(intial_max_time.get().to_bits()).into(); - let mut total_steps_sum = AtomicU64::new(0_u64).into(); + let mut total_time_max = AtomicU64::new(intial_max_time.get().to_bits()); + let mut total_steps_sum = AtomicU64::new(0_u64); - let mut task_list = ExchangeWrapperOnHost::new(ValueBuffer::new(&block_size, &grid_size)?)?; + let mut task_list = ExchangeWrapperOnHost::new(ValueBuffer::new( + &launcher.config.block, + &launcher.config.grid, + )?)?; let mut event_buffer: ExchangeWrapperOnHost< EventBuffer< <>::WaterLevelReporter as Reporter>::ReportDispersal, >, > = ExchangeWrapperOnHost::new(EventBuffer::new( - &block_size, - &grid_size, + &launcher.config.block, &launcher.config.grid, step_slice.get().try_into().unwrap_or(usize::MAX), )?)?; - let mut min_spec_sample_buffer = - ExchangeWrapperOnHost::new(ValueBuffer::new(&block_size, &grid_size)?)?; - let mut next_event_time_buffer = - ExchangeWrapperOnHost::new(ValueBuffer::new(&block_size, &grid_size)?)?; + let mut min_spec_sample_buffer = ExchangeWrapperOnHost::new(ValueBuffer::new( + &launcher.config.block, + &launcher.config.grid, + )?)?; + let mut next_event_time_buffer = ExchangeWrapperOnHost::new(ValueBuffer::new( + &launcher.config.block, + &launcher.config.grid, + )?)?; let mut min_spec_samples = dedup_cache.construct(slow_lineages.len()); @@ -195,8 +172,7 @@ pub fn simulate< HostAndDeviceMutRef::with_new(&mut total_time_max, |total_time_max| -> Result<()> { HostAndDeviceMutRef::with_new(&mut total_steps_sum, |total_steps_sum| -> Result<()> { - // TODO: Pipeline async launches and callbacks of simulation/event analysis - simulation.lend_to_cuda_mut(|mut simulation_cuda_repr| -> Result<()> { + simulation.lend_to_cuda(|simulation_cuda_repr| -> Result<()> { while !slow_lineages.is_empty() && pause_before.map_or(true, |pause_before| level_time < pause_before) { @@ -242,8 +218,16 @@ pub fn simulate< proxy.advance_water_level(level_time); // Simulate all slow lineages until they have finished or exceeded the - // new water level + // new water level while !slow_lineages.is_empty() { + // Move the event buffer and min speciation sample buffer to CUDA + let mut event_buffer_cuda_async = + event_buffer.move_to_device_async(launcher.stream)?; + let mut min_spec_sample_buffer_cuda_async = + min_spec_sample_buffer.move_to_device_async(launcher.stream)?; + let mut next_event_time_buffer_cuda_async = + next_event_time_buffer.move_to_device_async(launcher.stream)?; + // Upload the new tasks from the front of the task queue for mut task in task_list.iter_mut() { let next_slow_lineage = loop { @@ -261,31 +245,44 @@ pub fn simulate< task.replace(next_slow_lineage); } - // Move the task list, event buffer and min speciation sample buffer - // to CUDA - let mut event_buffer_cuda = event_buffer.move_to_device()?; - let mut min_spec_sample_buffer_cuda = - min_spec_sample_buffer.move_to_device()?; - let mut next_event_time_buffer_cuda = - next_event_time_buffer.move_to_device()?; - let mut task_list_cuda = task_list.move_to_device()?; - - kernel.simulate_raw( - simulation_cuda_repr.as_mut(), - task_list_cuda.as_mut(), - event_buffer_cuda.as_mut(), - min_spec_sample_buffer_cuda.as_mut(), - next_event_time_buffer_cuda.as_mut(), - total_time_max.as_ref(), - total_steps_sum.as_ref(), - step_slice.get().into(), - level_time.into(), + // Move the task list to CUDA + let mut task_list_cuda_async = + task_list.move_to_device_async(launcher.stream)?; + + let launch = launcher.launch9_async( + simulation_cuda_repr.as_async(launcher.stream).extract_ref(), + task_list_cuda_async.as_mut_async(), + event_buffer_cuda_async.as_mut_async(), + min_spec_sample_buffer_cuda_async.as_mut_async(), + next_event_time_buffer_cuda_async.as_mut_async(), + total_time_max + .as_ref() + .as_async(launcher.stream) + .extract_ref(), + total_steps_sum + .as_ref() + .as_async(launcher.stream) + .extract_ref(), + step_slice.get(), + level_time, )?; - min_spec_sample_buffer = min_spec_sample_buffer_cuda.move_to_host()?; - next_event_time_buffer = next_event_time_buffer_cuda.move_to_host()?; - task_list = task_list_cuda.move_to_host()?; - event_buffer = event_buffer_cuda.move_to_host()?; + let min_spec_sample_buffer_host_async = + min_spec_sample_buffer_cuda_async + .move_to_host_async(launcher.stream)?; + let next_event_time_buffer_host_async = + next_event_time_buffer_cuda_async + .move_to_host_async(launcher.stream)?; + let task_list_host_async = + task_list_cuda_async.move_to_host_async(launcher.stream)?; + let event_buffer_host_async = + event_buffer_cuda_async.move_to_host_async(launcher.stream)?; + + task_list = task_list_host_async.synchronize()?; + next_event_time_buffer = next_event_time_buffer_host_async.synchronize()?; + min_spec_sample_buffer = min_spec_sample_buffer_host_async.synchronize()?; + + launch.synchronize()?; // Fetch the completion of the tasks for ((mut spec_sample, mut next_event_time), mut task) in @@ -303,8 +300,7 @@ pub fn simulate< { if !duplicate_individual { // Reclassify lineages as either slow (still below - // water) or - // fast + // the metaphorical water level) or fast if next_event_time < level_time { slow_lineages.push_back((task, next_event_time.into())); } else { @@ -314,6 +310,8 @@ pub fn simulate< } } + event_buffer = event_buffer_host_async.synchronize()?; + // TODO: explore partial sorting on the GPU event_buffer.report_events_unordered(&mut proxy); proxy.local_partition().get_reporter().report_progress( @@ -336,10 +334,9 @@ pub fn simulate< })?; // Safety: Max of NonNegativeF64 values from the GPU - let total_time_max = unsafe { - NonNegativeF64::new_unchecked(f64::from_bits(total_time_max.into_inner().into_inner())) - }; - let total_steps_sum = total_steps_sum.into_inner().into_inner(); + let total_time_max = + unsafe { NonNegativeF64::new_unchecked(f64::from_bits(total_time_max.into_inner())) }; + let total_steps_sum = total_steps_sum.into_inner(); local_partition.report_progress_sync(slow_lineages.len() as u64); @@ -348,5 +345,9 @@ pub fn simulate< local_partition.reduce_global_time_steps(total_time_max, total_steps_sum); let lineages = slow_lineages.into_iter().map(|(lineage, _)| lineage); + // Note: The simulation requires no mutation, since all components are + // either immutable or have singular swap states, and the list + // of all lineages (which does change) is returned separately + Ok((status, global_time, global_steps, lineages)) } diff --git a/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs b/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs index 4dc2bfa4e..1c37f0db9 100644 --- a/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/event_skipping/mod.rs @@ -1,5 +1,5 @@ use necsim_core::{ - cogs::{GloballyCoherentLineageStore, MathsCore, SeparableDispersalSampler}, + cogs::{GloballyCoherentLineageStore, MathsCore, SeparableDispersalSampler, SplittableRng}, lineage::Lineage, reporter::Reporter, }; @@ -39,18 +39,24 @@ impl AlgorithmParamters for EventSkippingAlgorithm { impl AlgorithmDefaults for EventSkippingAlgorithm { type MathsCore = IntrinsicsMathsCore; + type Rng = Pcg; } -impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore> - Algorithm<'p, M, O, R, P> for EventSkippingAlgorithm +impl< + 'p, + O: Scenario, + R: Reporter, + P: LocalPartition<'p, R>, + M: MathsCore, + G: SplittableRng, + > Algorithm<'p, M, G, O, R, P> for EventSkippingAlgorithm where O::LineageStore>: GloballyCoherentLineageStore, - O::DispersalSampler>>: - SeparableDispersalSampler>, + O::DispersalSampler>: + SeparableDispersalSampler, { type LineageStore = O::LineageStore>; - type Rng = Pcg; fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition { get_gillespie_logical_partition(args, local_partition) @@ -58,12 +64,12 @@ where fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( args, rng, @@ -81,14 +87,14 @@ where /// simulation failed fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, @@ -109,14 +115,14 @@ where /// simulation (incl. running the algorithm) failed fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs index 06114bca7..892b7e285 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/classical/mod.rs @@ -1,5 +1,5 @@ use necsim_core::{ - cogs::{LocallyCoherentLineageStore, MathsCore}, + cogs::{LocallyCoherentLineageStore, MathsCore, SplittableRng}, lineage::Lineage, reporter::Reporter, }; @@ -9,7 +9,6 @@ use necsim_impls_no_std::cogs::{ lineage_store::coherent::locally::classical::ClassicalLineageStore, origin_sampler::pre_sampler::OriginPreSampler, turnover_rate::uniform::UniformTurnoverRate, }; -use necsim_impls_std::cogs::rng::pcg::Pcg; use necsim_partitioning_core::LocalPartition; use rustcoalescence_algorithms::{ @@ -31,24 +30,24 @@ use initialiser::{ // Optimised 'Classical' implementation for the `UniformTurnoverSampler` impl< 'p, - O: Scenario, TurnoverRate = UniformTurnoverRate>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore, - > Algorithm<'p, M, O, R, P> for GillespieAlgorithm + G: SplittableRng, + > Algorithm<'p, M, G, O, R, P> for GillespieAlgorithm where O::LineageStore>: LocallyCoherentLineageStore, { - #[allow(clippy::too_many_lines)] fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( args, rng, @@ -66,14 +65,14 @@ where /// simulation failed fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, @@ -92,17 +91,16 @@ where /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs index f485eb6a6..c1f775555 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/mod.rs @@ -1,4 +1,6 @@ +use necsim_core::cogs::MathsCore; use necsim_impls_no_std::cogs::maths::intrinsics::IntrinsicsMathsCore; +use necsim_impls_std::cogs::rng::pcg::Pcg; use rustcoalescence_algorithms::{AlgorithmDefaults, AlgorithmParamters}; @@ -17,4 +19,5 @@ impl AlgorithmParamters for GillespieAlgorithm { impl AlgorithmDefaults for GillespieAlgorithm { type MathsCore = IntrinsicsMathsCore; + type Rng = Pcg; } diff --git a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs index 7f44e6280..a08985da7 100644 --- a/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs +++ b/rustcoalescence/algorithms/gillespie/src/gillespie/turnover/mod.rs @@ -1,5 +1,5 @@ use necsim_core::{ - cogs::{LocallyCoherentLineageStore, MathsCore}, + cogs::{LocallyCoherentLineageStore, MathsCore, SplittableRng}, lineage::Lineage, reporter::Reporter, }; @@ -9,7 +9,6 @@ use necsim_impls_no_std::cogs::{ lineage_store::coherent::locally::classical::ClassicalLineageStore, origin_sampler::pre_sampler::OriginPreSampler, }; -use necsim_impls_std::cogs::rng::pcg::Pcg; use necsim_partitioning_core::{partition::Partition, LocalPartition}; use rustcoalescence_algorithms::{ @@ -31,28 +30,32 @@ use initialiser::{ }; // Default 'Gillespie' implementation for any turnover sampler -impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore> - Algorithm<'p, M, O, R, P> for GillespieAlgorithm +impl< + 'p, + O: Scenario, + R: Reporter, + P: LocalPartition<'p, R>, + M: MathsCore, + G: SplittableRng, + > Algorithm<'p, M, G, O, R, P> for GillespieAlgorithm where O::LineageStore>: LocallyCoherentLineageStore, { type LineageStore = O::LineageStore>; - type Rng = Pcg; default fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition { get_gillespie_logical_partition(args, local_partition) } - #[allow(clippy::shadow_unrelated, clippy::too_many_lines)] default fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( args, rng, @@ -68,20 +71,19 @@ where /// /// Returns a `ContinueError::Sample` if initialising the resuming /// simulation failed - #[allow(clippy::too_many_lines)] default fn resume_and_simulate< I: Iterator, L: ExactSizeIterator, >( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, @@ -100,17 +102,16 @@ where /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] default fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( args, rng, diff --git a/rustcoalescence/algorithms/independent/src/lib.rs b/rustcoalescence/algorithms/independent/src/lib.rs index 8a7d0473d..7550642c9 100644 --- a/rustcoalescence/algorithms/independent/src/lib.rs +++ b/rustcoalescence/algorithms/independent/src/lib.rs @@ -4,7 +4,11 @@ #[macro_use] extern crate serde_derive_state; -use necsim_core::{cogs::MathsCore, lineage::Lineage, reporter::Reporter}; +use necsim_core::{ + cogs::{MathsCore, PrimeableRng}, + lineage::Lineage, + reporter::Reporter, +}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; use necsim_impls_no_std::cogs::{ @@ -39,13 +43,19 @@ impl AlgorithmParamters for IndependentAlgorithm { impl AlgorithmDefaults for IndependentAlgorithm { type MathsCore = IntrinsicsMathsCore; + type Rng = WyHash; } -impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: MathsCore> - Algorithm<'p, M, O, R, P> for IndependentAlgorithm +impl< + 'p, + O: Scenario, + R: Reporter, + P: LocalPartition<'p, R>, + M: MathsCore, + G: PrimeableRng, + > Algorithm<'p, M, G, O, R, P> for IndependentAlgorithm { type LineageStore = IndependentLineageStore; - type Rng = WyHash; fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition { match &args.parallelism_mode { @@ -62,12 +72,12 @@ impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: Ma fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { launch::initialise_and_simulate( &args, rng, @@ -85,14 +95,14 @@ impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: Ma /// simulation failed fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( &args, rng, @@ -111,17 +121,16 @@ impl<'p, O: Scenario>, R: Reporter, P: LocalPartition<'p, R>, M: Ma /// /// Returns a `ContinueError` if fixing up the restarting /// simulation (incl. running the algorithm) failed - #[allow(clippy::too_many_lines)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError> { + ) -> Result, ResumeError> { launch::initialise_and_simulate( &args, rng, diff --git a/rustcoalescence/algorithms/src/lib.rs b/rustcoalescence/algorithms/src/lib.rs index 7ed7ce88a..da7ada445 100644 --- a/rustcoalescence/algorithms/src/lib.rs +++ b/rustcoalescence/algorithms/src/lib.rs @@ -27,17 +27,18 @@ pub trait AlgorithmParamters { pub trait AlgorithmDefaults { type MathsCore: MathsCore; + type Rng: RngCore; } pub trait Algorithm< 'p, M: MathsCore, - O: Scenario, + G: RngCore, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >: Sized + AlgorithmParamters + AlgorithmDefaults { - type Rng: RngCore; type LineageStore: LineageStore; fn get_logical_partition(args: &Self::Arguments, local_partition: &P) -> Partition; @@ -48,12 +49,12 @@ pub trait Algorithm< /// the algorithm failed fn initialise_and_simulate>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, pause_before: Option, local_partition: &mut P, - ) -> Result, Self::Error>; + ) -> Result, Self::Error>; /// # Errors /// @@ -62,14 +63,14 @@ pub trait Algorithm< #[allow(clippy::type_complexity, clippy::too_many_arguments)] fn resume_and_simulate, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, resume_after: Option, pause_before: Option, local_partition: &mut P, - ) -> Result, ResumeError>; + ) -> Result, ResumeError>; /// # Errors /// @@ -78,12 +79,12 @@ pub trait Algorithm< #[allow(clippy::type_complexity, clippy::too_many_arguments)] fn fixup_for_restart, L: ExactSizeIterator>( args: Self::Arguments, - rng: Self::Rng, + rng: G, scenario: O, pre_sampler: OriginPreSampler, lineages: L, restart_at: PositiveF64, fixup_strategy: RestartFixUpStrategy, local_partition: &mut P, - ) -> Result, ResumeError>; + ) -> Result, ResumeError>; } diff --git a/rustcoalescence/scenarios/src/spatially_explicit/mod.rs b/rustcoalescence/scenarios/src/spatially_explicit/mod.rs index c4bc85206..d40d5c984 100644 --- a/rustcoalescence/scenarios/src/spatially_explicit/mod.rs +++ b/rustcoalescence/scenarios/src/spatially_explicit/mod.rs @@ -1,11 +1,11 @@ mod maps; mod turnover; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use turnover::map::{ SpatiallyExplicitTurnoverMapArguments, SpatiallyExplicitTurnoverMapScenario, }; -#[allow(clippy::useless_attribute, clippy::module_name_repetitions)] +#[allow(clippy::module_name_repetitions)] pub use turnover::uniform::{ SpatiallyExplicitUniformTurnoverArguments, SpatiallyExplicitUniformTurnoverScenario, }; diff --git a/rustcoalescence/src/args/config/rng/mod.rs b/rustcoalescence/src/args/config/rng/mod.rs index 6899a848a..5e536a5c3 100644 --- a/rustcoalescence/src/args/config/rng/mod.rs +++ b/rustcoalescence/src/args/config/rng/mod.rs @@ -188,11 +188,7 @@ impl<'a> ProtectedState<'a> { } fn from_bytes(bytes: &'a [u8]) -> Option { - if bytes.len() < 4 { - return None; - } - - let (state, checksum) = bytes.rsplit_array_ref(); + let (state, checksum) = bytes.split_last_chunk()?; let checksum = u32::from_le_bytes(*checksum); if adler::adler32_slice(state) != checksum { diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs index b8628421f..54715cf40 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs @@ -36,41 +36,42 @@ use super::{super::super::BufferingSimulateArgsBuilder, rng}; macro_rules! match_scenario_algorithm { ( - ($algorithm:expr, $scenario:expr => $algscen:ident) { + ($algorithm:expr, $scenario:expr => $algscen:ident : $algscenty:ident) { $($(#[$meta:meta])* $algpat:pat => $algcode:block),* <=> - $($scenpat:pat => $scencode:block),* + $($scenpat:pat => $scencode:block => $scenty:ident),* } ) => { match_scenario_algorithm! { - impl ($algorithm, $scenario => $algscen) { + impl ($algorithm, $scenario => $algscen : $algscenty) { $($(#[$meta])* $algpat => $algcode),* <=> - $($scenpat => $scencode),* + $($scenpat => $scencode => $scenty),* <=> } } }; ( - impl ($algorithm:expr, $scenario:expr => $algscen:ident) { + impl ($algorithm:expr, $scenario:expr => $algscen:ident : $algscenty:ident) { $(#[$meta:meta])* $algpat:pat => $algcode:block, $($(#[$metarem:meta])* $algpatrem:pat => $algcoderem:block),+ <=> - $($scenpat:pat => $scencode:block),* + $($scenpat:pat => $scencode:block => $scenty:ident),* <=> $($tail:tt)* } ) => { match_scenario_algorithm! { - impl ($algorithm, $scenario => $algscen) { + impl ($algorithm, $scenario => $algscen : $algscenty) { $($(#[$metarem])* $algpatrem => $algcoderem),+ <=> - $($scenpat => $scencode),* + $($scenpat => $scencode => $scenty),* <=> $($tail)* $(#[$meta])* $algpat => { match $scenario { $($scenpat => { + type $algscenty = $scenty; let $algscen = $scencode; $algcode }),* @@ -80,10 +81,10 @@ macro_rules! match_scenario_algorithm { } }; ( - impl ($algorithm:expr, $scenario:expr => $algscen:ident) { + impl ($algorithm:expr, $scenario:expr => $algscen:ident : $algscenty:ident) { $(#[$meta:meta])* $algpat:pat => $algcode:block <=> - $($scenpat:pat => $scencode:block),* + $($scenpat:pat => $scencode:block => $scenty:ident),* <=> $($tail:tt)* } @@ -93,6 +94,7 @@ macro_rules! match_scenario_algorithm { $(#[$meta])* $algpat => { match $scenario { $($scenpat => { + type $algscenty = $scenty; let $algscen = $scencode; $algcode }),* @@ -116,13 +118,14 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( normalised_args: &mut BufferingSimulateArgsBuilder, ) -> anyhow::Result { match_scenario_algorithm!( - (algorithm, scenario => scenario) + (algorithm, scenario => scenario: ScenarioTy) { #[cfg(feature = "rustcoalescence-algorithms-gillespie")] AlgorithmArgs::Gillespie(algorithm_args) => { rng::dispatch::< ::MathsCore, - GillespieAlgorithm, _, R, P, + ::Rng<_>, + GillespieAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -132,7 +135,8 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( AlgorithmArgs::EventSkipping(algorithm_args) => { rng::dispatch::< ::MathsCore, - EventSkippingAlgorithm, _, R, P, + ::Rng<_>, + EventSkippingAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -142,7 +146,8 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( AlgorithmArgs::Independent(algorithm_args) => { rng::dispatch::< ::MathsCore, - IndependentAlgorithm, _, R, P, + ::Rng<_>, + IndependentAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -152,7 +157,8 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( AlgorithmArgs::Cuda(algorithm_args) => { rng::dispatch::< ::MathsCore, - CudaAlgorithm, _, R, P, + ::Rng<_>, + CudaAlgorithm, ScenarioTy<_, _>, R, P, >( local_partition, sample, algorithm_args, scenario, pause_before, ron_args, normalised_args, @@ -164,40 +170,40 @@ pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( scenario_args, speciation_probability_per_generation, )? - }, + } => SpatiallyExplicitUniformTurnoverScenario, ScenarioArgs::SpatiallyExplicitTurnoverMap(scenario_args) => { SpatiallyExplicitTurnoverMapScenario::initialise( scenario_args, speciation_probability_per_generation, )? - }, + } => SpatiallyExplicitTurnoverMapScenario, ScenarioArgs::NonSpatial(scenario_args) => { NonSpatialScenario::initialise( scenario_args, speciation_probability_per_generation, ) .into_ok() - }, + } => NonSpatialScenario, ScenarioArgs::AlmostInfinite(scenario_args) => { AlmostInfiniteScenario::initialise( scenario_args, speciation_probability_per_generation, ) .into_ok() - }, + } => AlmostInfiniteScenario, ScenarioArgs::SpatiallyImplicit(scenario_args) => { SpatiallyImplicitScenario::initialise( scenario_args, speciation_probability_per_generation, ) .into_ok() - }, + } => SpatiallyImplicitScenario, ScenarioArgs::WrappingNoise(scenario_args) => { WrappingNoiseScenario::initialise( scenario_args, speciation_probability_per_generation, ) .into_ok() - } + } => WrappingNoiseScenario }) } diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs index aac4223c9..1b390136c 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/info.rs @@ -5,7 +5,7 @@ use anyhow::{Context, Result}; use rustcoalescence_algorithms::{result::SimulationOutcome, Algorithm}; use necsim_core::{ - cogs::MathsCore, + cogs::{MathsCore, RngCore}, reporter::{boolean::Boolean, Reporter}, }; use necsim_core_bond::NonNegativeF64; @@ -25,23 +25,23 @@ use super::{super::super::BufferingSimulateArgsBuilder, launch}; pub(super) fn dispatch< 'p, M: MathsCore, - A: Algorithm<'p, M, O, R, P>, - O: Scenario, + G: RngCore, + A: Algorithm<'p, M, G, O, R, P>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >( algorithm_args: A::Arguments, - rng: A::Rng, + rng: G, scenario: O, sample: Sample, pause_before: Option, mut local_partition: P, normalised_args: &BufferingSimulateArgsBuilder, -) -> anyhow::Result> +) -> anyhow::Result> where - Result, A::Error>: - anyhow::Context, A::Error>, + Result, A::Error>: anyhow::Context, A::Error>, { let config_str = normalised_args .build() @@ -118,7 +118,7 @@ where warn!("The simulation will report no events."); } - let result = launch::simulate::( + let result = launch::simulate::( algorithm_args, rng, scenario, diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs index e070202d6..ceb5ff4c8 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/launch.rs @@ -2,7 +2,10 @@ use anyhow::Context; use rustcoalescence_algorithms::{result::SimulationOutcome, Algorithm}; -use necsim_core::{cogs::MathsCore, reporter::Reporter}; +use necsim_core::{ + cogs::{MathsCore, RngCore}, + reporter::Reporter, +}; use necsim_core_bond::{NonNegativeF64, PositiveF64}; use necsim_impls_no_std::cogs::origin_sampler::pre_sampler::OriginPreSampler; use necsim_partitioning_core::LocalPartition; @@ -14,18 +17,19 @@ use crate::args::config::sample::{Sample, SampleMode, SampleModeRestart, SampleO pub(super) fn simulate< 'p, M: MathsCore, - A: Algorithm<'p, M, O, R, P>, - O: Scenario, + G: RngCore, + A: Algorithm<'p, M, G, O, R, P>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >( algorithm_args: A::Arguments, - rng: A::Rng, + rng: G, scenario: O, sample: Sample, pause_before: Option, local_partition: &mut P, -) -> anyhow::Result> { +) -> anyhow::Result> { let lineages = match sample.origin { SampleOrigin::Habitat => { return A::initialise_and_simulate( diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs index 929556339..2907eddc8 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/rng.rs @@ -27,8 +27,9 @@ use super::{ pub(super) fn dispatch< 'p, M: MathsCore, - A: Algorithm<'p, M, O, R, P>, - O: Scenario, + G: RngCore, + A: Algorithm<'p, M, G, O, R, P>, + O: Scenario, R: Reporter, P: LocalPartition<'p, R>, >( @@ -43,17 +44,16 @@ pub(super) fn dispatch< normalised_args: &mut BufferingSimulateArgsBuilder, ) -> anyhow::Result where - Result, A::Error>: - anyhow::Context, A::Error>, + Result, A::Error>: anyhow::Context, A::Error>, { - let rng: A::Rng = match parse::rng::parse_and_normalise( + let rng: G = match parse::rng::parse_and_normalise( ron_args, normalised_args, &mut A::get_logical_partition(&algorithm_args, &local_partition), )? { RngArgs::Seed(seed) => SeedableRng::seed_from_u64(seed), RngArgs::Sponge(bytes) => { - let mut seed = >::Seed::default(); + let mut seed = G::Seed::default(); let mut sponge = Keccak::v256(); sponge.update(&bytes); @@ -64,7 +64,7 @@ where RngArgs::State(state) => state.into(), }; - let result = info::dispatch::( + let result = info::dispatch::( algorithm_args, rng, scenario,