From 728b0769a3edc7d60b6bad7ce5efd52c7fb5fb09 Mon Sep 17 00:00:00 2001 From: Julian P Samaroo Date: Tue, 12 Nov 2024 12:01:33 -0800 Subject: [PATCH] GPU: Simplify stencils code and add animations --- Manifest.toml | 410 +++++++++++++-- Project.toml | 2 + parts/gpu/stencil.ipynb | 1045 +++++++++++++++++++++++++++++++++------ 3 files changed, 1282 insertions(+), 175 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 55d702f..772dd4e 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2,7 +2,7 @@ julia_version = "1.10.4" manifest_format = "2.0" -project_hash = "301191f13b64bde29b037705efe1f5740c5318a8" +project_hash = "0a5652c8b6619c366c604b9eae1fc02e79dd92ac" [[deps.AbstractFFTs]] deps = ["LinearAlgebra"] @@ -140,6 +140,11 @@ git-tree-sha1 = "f1dff6729bc61f4d49e140da1af55dcd1ac97b2f" uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" version = "1.5.0" +[[deps.BitFlags]] +git-tree-sha1 = "0691e34b3bb8be9307330f88d1a3c3f25466c24d" +uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" +version = "0.1.9" + [[deps.Bzip2_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "8873e196c2eb87962a2048b3b8e08946535864a1" @@ -227,6 +232,12 @@ git-tree-sha1 = "397b871ff701290cc122cca06af61c5bdf9f5605" uuid = "ae650224-84b6-46f8-82ea-d812ca08434e" version = "3.1.0" +[[deps.CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "bce6804e5e6044c6daab27bb533d1295e4a2e759" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.6" + [[deps.ColorBrewer]] deps = ["Colors", "JSON", "Test"] git-tree-sha1 = "61c5334f33d91e570e1d0c3eb5465835242582c4" @@ -285,6 +296,12 @@ weakdeps = ["InverseFunctions"] [deps.CompositionsBase.extensions] CompositionsBaseInverseFunctionsExt = "InverseFunctions" +[[deps.ConcurrentUtilities]] +deps = ["Serialization", "Sockets"] +git-tree-sha1 = "ea32b83ca4fefa1768dc84e504cc0a94fb1ab8d1" +uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" +version = "2.4.2" + [[deps.Conda]] deps = ["Downloads", "JSON", "VersionParsing"] git-tree-sha1 = "b19db3927f0db4151cb86d073689f2428e524576" @@ -338,6 +355,12 @@ version = "1.0.0" deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" +[[deps.Dbus_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl"] +git-tree-sha1 = "fc173b380865f70627d7dd1190dc2fce6cc105af" +uuid = "ee1fde0b-3d02-5ea6-8484-8dfef6360eab" +version = "1.14.10+0" + [[deps.DelaunayTriangulation]] deps = ["AdaptivePredicates", "EnumX", "ExactPredicates", "PrecompileTools", "Random"] git-tree-sha1 = "89df54fbe66e5872d91d8c2cd3a375f660c3fd64" @@ -392,12 +415,24 @@ git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237" uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56" version = "1.0.4" +[[deps.EpollShim_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "8e9441ee83492030ace98f9789a654a6d0b1f643" +uuid = "2702e6a9-849d-5ed8-8c21-79e8b8f9ee43" +version = "0.0.20230411+0" + [[deps.ExactPredicates]] deps = ["IntervalArithmetic", "Random", "StaticArrays"] git-tree-sha1 = "b3f2ff58735b5f024c392fde763f29b057e4b025" uuid = "429591f6-91af-11e9-00e2-59fbe8cec110" version = "2.2.8" +[[deps.ExceptionUnwrapping]] +deps = ["Test"] +git-tree-sha1 = "dcb08a0d93ec0b1cdc4af184b26b591e9695423a" +uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4" +version = "0.1.10" + [[deps.Expat_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] git-tree-sha1 = "1c6317308b9dc757616f0b5cb379db10494443a7" @@ -414,11 +449,17 @@ git-tree-sha1 = "81023caa0021a41712685887db1fc03db26f41f5" uuid = "411431e0-e8b7-467b-b5e0-f676ba4f2910" version = "0.1.4" +[[deps.FFMPEG]] +deps = ["FFMPEG_jll"] +git-tree-sha1 = "53ebe7511fa11d33bec688a9178fac4e49eeee00" +uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a" +version = "0.4.2" + [[deps.FFMPEG_jll]] deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "PCRE2_jll", "Zlib_jll", "libaom_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] -git-tree-sha1 = "8cc47f299902e13f90405ddb5bf87e5d474c0d38" +git-tree-sha1 = "466d45dc38e15794ec7d5d63ec03d776a9aff36e" uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" -version = "6.1.2+0" +version = "4.4.4+1" [[deps.FFTW]] deps = ["AbstractFFTs", "FFTW_jll", "LinearAlgebra", "MKL_jll", "Preferences", "Reexport"] @@ -515,6 +556,12 @@ version = "1.0.14+0" deps = ["Random"] uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" +[[deps.GLFW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Xorg_libXcursor_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll", "libdecor_jll", "xkbcommon_jll"] +git-tree-sha1 = "532f9126ad901533af1d4f5c198867227a7bb077" +uuid = "0656b61e-2033-5cc2-a64a-77c0f6c09b89" +version = "3.4.0+1" + [[deps.GPUArrays]] deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] git-tree-sha1 = "62ee71528cca49be797076a76bdc654a170a523e" @@ -533,6 +580,18 @@ git-tree-sha1 = "1d6f290a5eb1201cd63574fbc4440c788d5cb38f" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" version = "0.27.8" +[[deps.GR]] +deps = ["Artifacts", "Base64", "DelimitedFiles", "Downloads", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Preferences", "Printf", "Qt6Wayland_jll", "Random", "Serialization", "Sockets", "TOML", "Tar", "Test", "p7zip_jll"] +git-tree-sha1 = "ee28ddcd5517d54e417182fec3886e7412d3926f" +uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" +version = "0.73.8" + +[[deps.GR_jll]] +deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "FreeType2_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Qt6Base_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "f31929b9e67066bee48eec8b03c0df47d31a74b3" +uuid = "d2c73de3-f751-5644-a686-071e5b155ba9" +version = "0.73.8+0" + [[deps.GeoFormatTypes]] git-tree-sha1 = "59107c179a586f0fe667024c5eb7033e81333271" uuid = "68eda718-8dee-11e9-39e7-89f7f65f511f" @@ -570,9 +629,9 @@ version = "2.80.5+0" [[deps.Graphics]] deps = ["Colors", "LinearAlgebra", "NaNMath"] -git-tree-sha1 = "d61890399bc535850c4bf08e4e0d3a7ad0f21cbd" +git-tree-sha1 = "a641238db938fff9b2f60d08ed9030387daf428c" uuid = "a2bd30eb-e257-5431-a919-1863eab51364" -version = "1.1.2" +version = "1.1.3" [[deps.Graphite2_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] @@ -591,6 +650,12 @@ git-tree-sha1 = "53bb909d1151e57e2484c3d1b53e19552b887fb2" uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe" version = "1.0.2" +[[deps.HTTP]] +deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] +git-tree-sha1 = "1336e07ba2eb75614c99496501a8f4b233e9fafe" +uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" +version = "1.10.10" + [[deps.HarfBuzz_jll]] deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll"] git-tree-sha1 = "401e4f3f30f43af2c8478fc008da50096ea5240f" @@ -609,9 +674,9 @@ weakdeps = ["AbstractTrees"] [[deps.Hwloc_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "dd3b49277ec2bb2c6b94eb1604d4d0616016f7a6" +git-tree-sha1 = "50aedf345a709ab75872f80a2779568dc0bb461b" uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" -version = "2.11.2+0" +version = "2.11.2+1" [[deps.HypergeometricFunctions]] deps = ["LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"] @@ -627,9 +692,9 @@ version = "1.26.0" [[deps.ImageAxes]] deps = ["AxisArrays", "ImageBase", "ImageCore", "Reexport", "SimpleTraits"] -git-tree-sha1 = "2e4520d67b0cef90865b3ef727594d2a58e0e1f8" +git-tree-sha1 = "e12629406c6c4442539436581041d372d69c55ba" uuid = "2803e5a7-5153-5ecf-9a86-9b4c37f5f5ac" -version = "0.6.11" +version = "0.6.12" [[deps.ImageBase]] deps = ["ImageCore", "Reexport"] @@ -639,9 +704,9 @@ version = "0.1.7" [[deps.ImageCore]] deps = ["ColorVectorSpace", "Colors", "FixedPointNumbers", "MappedArrays", "MosaicViews", "OffsetArrays", "PaddedViews", "PrecompileTools", "Reexport"] -git-tree-sha1 = "b2a7eaa169c13f5bcae8131a83bc30eff8f71be0" +git-tree-sha1 = "b219503865f42a12ad20ea67082e0fdb69b73ad9" uuid = "a09fc81d-aa75-5fe9-8630-4744c3626534" -version = "0.10.2" +version = "0.10.3" [[deps.ImageIO]] deps = ["FileIO", "IndirectArrays", "JpegTurbo", "LazyModules", "Netpbm", "OpenEXR", "PNGFiles", "QOI", "Sixel", "TiffImages", "UUIDs", "WebP"] @@ -651,9 +716,9 @@ version = "0.6.9" [[deps.ImageMetadata]] deps = ["AxisArrays", "ImageAxes", "ImageBase", "ImageCore"] -git-tree-sha1 = "355e2b974f2e3212a75dfb60519de21361ad3cb7" +git-tree-sha1 = "2a81c3897be6fbcde0802a0ebe6796d0562f63ec" uuid = "bc367c6b-8a6b-528e-b4bd-a4b897500b49" -version = "0.9.9" +version = "0.9.10" [[deps.Imath_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] @@ -731,17 +796,13 @@ version = "0.22.19" git-tree-sha1 = "dba9ddf07f77f60450fe5d2e2beb9854d9a49bd0" uuid = "8197267c-284f-5f27-9208-e0e47529a953" version = "0.7.10" +weakdeps = ["Random", "RecipesBase", "Statistics"] [deps.IntervalSets.extensions] IntervalSetsRandomExt = "Random" IntervalSetsRecipesBaseExt = "RecipesBase" IntervalSetsStatisticsExt = "Statistics" - [deps.IntervalSets.weakdeps] - Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" - Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" - [[deps.InverseFunctions]] git-tree-sha1 = "a779299d77cd080bf77b97535acecd73e1c5e5cb" uuid = "3587e190-3f89-42d0-90ee-14403ec27112" @@ -778,6 +839,12 @@ git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" uuid = "82899510-4779-5014-852e-03e436cf321d" version = "1.0.0" +[[deps.JLFzf]] +deps = ["Pipe", "REPL", "Random", "fzf_jll"] +git-tree-sha1 = "39d64b09147620f5ffbf6b2d3255be3c901bec63" +uuid = "1019f520-868f-41f5-a6de-eb00f4b6a39c" +version = "0.1.8" + [[deps.JLLWrappers]] deps = ["Artifacts", "Preferences"] git-tree-sha1 = "be3dc50a92e5a386872a493a10050136d4703f9b" @@ -880,6 +947,22 @@ git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c" uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" version = "1.4.0" +[[deps.Latexify]] +deps = ["Format", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "OrderedCollections", "Requires"] +git-tree-sha1 = "ce5f5621cac23a86011836badfedf664a612cee4" +uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" +version = "0.16.5" + + [deps.Latexify.extensions] + DataFramesExt = "DataFrames" + SparseArraysExt = "SparseArrays" + SymEngineExt = "SymEngine" + + [deps.Latexify.weakdeps] + DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + SymEngine = "123dc426-2d89-5057-bbad-38513e3affd8" + [[deps.LazyArtifacts]] deps = ["Artifacts", "Pkg"] uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" @@ -987,6 +1070,12 @@ version = "0.3.28" [[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +[[deps.LoggingExtras]] +deps = ["Dates", "Logging"] +git-tree-sha1 = "f02b56007b064fbfddb4c9cd60161b6dd0f40df3" +uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" +version = "1.1.0" + [[deps.MKL_jll]] deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "oneTBB_jll"] git-tree-sha1 = "f046ccd0c6db2832a9f639e2c669c6fe867e5f4f" @@ -1069,6 +1158,11 @@ deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" version = "2.28.2+1" +[[deps.Measures]] +git-tree-sha1 = "c13304c81eec1ed3af7fc20e75fb6b26092a1102" +uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e" +version = "0.3.2" + [[deps.MicrosoftMPI_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "f12a29c4400ba812841c6ace3f4efbb6dbb3ba01" @@ -1176,6 +1270,12 @@ git-tree-sha1 = "bfce6d523861a6c562721b262c0d1aaeead2647f" uuid = "fe0851c0-eecd-5654-98d4-656369965a5c" version = "5.0.5+0" +[[deps.OpenSSL]] +deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"] +git-tree-sha1 = "38cb508d080d21dc1128f7fb04f20387ed4c0af4" +uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" +version = "1.4.3" + [[deps.OpenSSL_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] git-tree-sha1 = "7493f61f55a6cce7325f197443aa80d32554ba10" @@ -1240,6 +1340,11 @@ git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" version = "2.8.1" +[[deps.Pipe]] +git-tree-sha1 = "6842804e7867b115ca9de748a0cf6b364523c16d" +uuid = "b98c9c47-44ae-5843-9183-064241ee97a0" +version = "1.3.0" + [[deps.Pixman_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "Libdl"] git-tree-sha1 = "35621f10a7531bc8fa58f74610b1bfb70a3cfc6b" @@ -1257,12 +1362,38 @@ git-tree-sha1 = "f9501cc0430a26bc3d156ae1b5b0c1b47af4d6da" uuid = "eebad327-c553-4316-9ea0-9fa01ccd7688" version = "0.3.3" +[[deps.PlotThemes]] +deps = ["PlotUtils", "Statistics"] +git-tree-sha1 = "41031ef3a1be6f5bbbf3e8073f210556daeae5ca" +uuid = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a" +version = "3.3.0" + [[deps.PlotUtils]] deps = ["ColorSchemes", "Colors", "Dates", "PrecompileTools", "Printf", "Random", "Reexport", "StableRNGs", "Statistics"] git-tree-sha1 = "3ca9a356cd2e113c420f2c13bea19f8d3fb1cb18" uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043" version = "1.4.3" +[[deps.Plots]] +deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "JLFzf", "JSON", "LaTeXStrings", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "PrecompileTools", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "RelocatableFolders", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "TOML", "UUIDs", "UnicodeFun", "UnitfulLatexify", "Unzip"] +git-tree-sha1 = "45470145863035bb124ca51b320ed35d071cc6c2" +uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +version = "1.40.8" + + [deps.Plots.extensions] + FileIOExt = "FileIO" + GeometryBasicsExt = "GeometryBasics" + IJuliaExt = "IJulia" + ImageInTerminalExt = "ImageInTerminal" + UnitfulExt = "Unitful" + + [deps.Plots.weakdeps] + FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" + GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" + IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a" + ImageInTerminal = "d8c32880-2388-543b-8c61-d9f865259254" + Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" + [[deps.PolygonOps]] git-tree-sha1 = "77b3d3605fc1cd0b42d95eba87dfcd2bf67d5ff6" uuid = "647866c9-e3ac-4575-94e7-e3d426903924" @@ -1313,9 +1444,33 @@ version = "1.2.1" [[deps.QOI]] deps = ["ColorTypes", "FileIO", "FixedPointNumbers"] -git-tree-sha1 = "18e8f4d1426e965c7b532ddd260599e1510d26ce" +git-tree-sha1 = "8b3fc30bc0390abdce15f8822c889f669baed73d" uuid = "4b34888f-f399-49d4-9bb3-47ed5cae4e65" -version = "1.0.0" +version = "1.0.1" + +[[deps.Qt6Base_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Vulkan_Loader_jll", "Xorg_libSM_jll", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_cursor_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "libinput_jll", "xkbcommon_jll"] +git-tree-sha1 = "492601870742dcd38f233b23c3ec629628c1d724" +uuid = "c0090381-4147-56d7-9ebc-da0b1113ec56" +version = "6.7.1+1" + +[[deps.Qt6Declarative_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Qt6Base_jll", "Qt6ShaderTools_jll"] +git-tree-sha1 = "e5dd466bf2569fe08c91a2cc29c1003f4797ac3b" +uuid = "629bc702-f1f5-5709-abd5-49b8460ea067" +version = "6.7.1+2" + +[[deps.Qt6ShaderTools_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Qt6Base_jll"] +git-tree-sha1 = "1a180aeced866700d4bebc3120ea1451201f16bc" +uuid = "ce943373-25bb-56aa-8eca-768745ed7b5a" +version = "6.7.1+1" + +[[deps.Qt6Wayland_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Qt6Base_jll", "Qt6Declarative_jll"] +git-tree-sha1 = "729927532d48cf79f49070341e1d918a65aba6b0" +uuid = "e99dba38-086e-5de3-a5b1-6e4c66e897c3" +version = "6.7.1+1" [[deps.QuadGK]] deps = ["DataStructures", "LinearAlgebra"] @@ -1364,6 +1519,18 @@ weakdeps = ["FixedPointNumbers"] [deps.Ratios.extensions] RatiosFixedPointNumbersExt = "FixedPointNumbers" +[[deps.RecipesBase]] +deps = ["PrecompileTools"] +git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff" +uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" +version = "1.3.4" + +[[deps.RecipesPipeline]] +deps = ["Dates", "NaNMath", "PlotUtils", "PrecompileTools", "RecipesBase"] +git-tree-sha1 = "45cf9fd0ca5839d06ef333c8201714e888486342" +uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c" +version = "0.6.12" + [[deps.Reexport]] git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" uuid = "189a3867-3050-52da-a836-e630ba90ab69" @@ -1445,6 +1612,11 @@ git-tree-sha1 = "d263a08ec505853a5ff1c1ebde2070419e3f28e9" uuid = "73760f76-fbc4-59ce-8f25-708e95d2df96" version = "0.4.0" +[[deps.SimpleBufferStream]] +git-tree-sha1 = "f305871d2f381d21527c770d4788c06c097c9bc1" +uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7" +version = "1.2.0" + [[deps.SimpleTraits]] deps = ["InteractiveUtils", "MacroTools"] git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231" @@ -1638,9 +1810,9 @@ version = "0.4.5" [[deps.TiffImages]] deps = ["ColorTypes", "DataStructures", "DocStringExtensions", "FileIO", "FixedPointNumbers", "IndirectArrays", "Inflate", "Mmap", "OffsetArrays", "PkgVersion", "ProgressMeter", "SIMD", "UUIDs"] -git-tree-sha1 = "6ee0c220d0aecad18792c277ae358129cc50a475" +git-tree-sha1 = "0248b1b2210285652fbc67fd6ced9bf0394bcfec" uuid = "731e570b-9d59-4bfa-96dc-6df516fadf69" -version = "0.11.0" +version = "0.11.1" [[deps.TimerOutputs]] deps = ["ExprTools", "Printf"] @@ -1658,6 +1830,11 @@ git-tree-sha1 = "4d4ed7f294cda19382ff7de4c137d24d16adc89b" uuid = "981d1d27-644d-49a2-9326-4793e63143c3" version = "0.1.0" +[[deps.URIs]] +git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b" +uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" +version = "1.5.1" + [[deps.UUIDs]] deps = ["Random", "SHA"] uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" @@ -1682,6 +1859,12 @@ weakdeps = ["ConstructionBase", "InverseFunctions"] ConstructionBaseUnitfulExt = "ConstructionBase" InverseFunctionsUnitfulExt = "InverseFunctions" +[[deps.UnitfulLatexify]] +deps = ["LaTeXStrings", "Latexify", "Unitful"] +git-tree-sha1 = "975c354fcd5f7e1ddcc1f1a23e6e091d99e99bc8" +uuid = "45397f5d-5981-4c77-b2b3-fc36d6e9b728" +version = "1.6.4" + [[deps.UnsafeAtomics]] git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" @@ -1693,16 +1876,39 @@ git-tree-sha1 = "2d17fabcd17e67d7625ce9c531fb9f40b7c42ce4" uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" version = "0.2.1" +[[deps.Unzip]] +git-tree-sha1 = "ca0969166a028236229f63514992fc073799bb78" +uuid = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d" +version = "0.2.0" + [[deps.VersionParsing]] git-tree-sha1 = "58d6e80b4ee071f5efd07fda82cb9fbe17200868" uuid = "81def892-9a0e-5fdd-b105-ffc91e053289" version = "1.3.0" +[[deps.Vulkan_Loader_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Wayland_jll", "Xorg_libX11_jll", "Xorg_libXrandr_jll", "xkbcommon_jll"] +git-tree-sha1 = "2f0486047a07670caad3a81a075d2e518acc5c59" +uuid = "a44049a8-05dd-5a78-86c9-5fde0876e88c" +version = "1.3.243+0" + +[[deps.Wayland_jll]] +deps = ["Artifacts", "EpollShim_jll", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "7558e29847e99bc3f04d6569e82d0f5c54460703" +uuid = "a2964d1f-97da-50d4-b82a-358c7fce9d89" +version = "1.21.0+1" + +[[deps.Wayland_protocols_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "93f43ab61b16ddfb2fd3bb13b3ce241cafb0e6c9" +uuid = "2381bf8a-dfd0-557d-9999-79630e7b1b91" +version = "1.31.0+0" + [[deps.WebP]] deps = ["CEnum", "ColorTypes", "FileIO", "FixedPointNumbers", "ImageCore", "libwebp_jll"] -git-tree-sha1 = "f1f6d497ff84039deeb37f264396dac0c2250497" +git-tree-sha1 = "aa1ca3c47f119fbdae8770c29820e5e6119b83f2" uuid = "e3aaa7dc-3e4b-44e0-be63-ffb868ccd7c1" -version = "0.1.2" +version = "0.1.3" [[deps.WoodburyMatrices]] deps = ["LinearAlgebra", "SparseArrays"] @@ -1728,6 +1934,18 @@ git-tree-sha1 = "15e637a697345f6743674f1322beefbc5dcd5cfc" uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800" version = "5.6.3+0" +[[deps.Xorg_libICE_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "326b4fea307b0b39892b3e85fa451692eda8d46c" +uuid = "f67eecfb-183a-506d-b269-f58e52b52d7c" +version = "1.1.1+0" + +[[deps.Xorg_libSM_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libICE_jll"] +git-tree-sha1 = "3796722887072218eabafb494a13c963209754ce" +uuid = "c834827a-8449-5923-a945-d239c165b7dd" +version = "1.2.4+0" + [[deps.Xorg_libX11_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libxcb_jll", "Xorg_xtrans_jll"] git-tree-sha1 = "afead5aba5aa507ad5a3bf01f58f82c8d1403495" @@ -1740,6 +1958,12 @@ git-tree-sha1 = "6035850dcc70518ca32f012e46015b9beeda49d8" uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec" version = "1.0.11+0" +[[deps.Xorg_libXcursor_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXfixes_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "12e0eb3bc634fa2080c1c37fccf56f7c22989afd" +uuid = "935fb764-8cf2-53bf-bb30-45bb1f8bf724" +version = "1.2.0+4" + [[deps.Xorg_libXdmcp_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] git-tree-sha1 = "34d526d318358a859d7de23da945578e8e8727b7" @@ -1752,6 +1976,30 @@ git-tree-sha1 = "d2d1a5c49fae4ba39983f63de6afcbea47194e85" uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3" version = "1.3.6+0" +[[deps.Xorg_libXfixes_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "0e0dc7431e7a0587559f9294aeec269471c991a4" +uuid = "d091e8ba-531a-589c-9de9-94069b037ed8" +version = "5.0.3+4" + +[[deps.Xorg_libXi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXfixes_jll"] +git-tree-sha1 = "89b52bc2160aadc84d707093930ef0bffa641246" +uuid = "a51aa0fd-4e3c-5386-b890-e753decda492" +version = "1.7.10+4" + +[[deps.Xorg_libXinerama_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll"] +git-tree-sha1 = "26be8b1c342929259317d8b9f7b53bf2bb73b123" +uuid = "d1454406-59df-5ea1-beac-c340f2130bc3" +version = "1.1.4+4" + +[[deps.Xorg_libXrandr_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "34cea83cb726fb58f325887bf0612c6b3fb17631" +uuid = "ec84b674-ba8e-5d96-8ba1-2a689ba10484" +version = "1.5.2+4" + [[deps.Xorg_libXrender_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libX11_jll"] git-tree-sha1 = "47e45cd78224c53109495b3e324df0c37bb61fbe" @@ -1770,6 +2018,60 @@ git-tree-sha1 = "bcd466676fef0878338c61e655629fa7bbc69d8e" uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b" version = "1.17.0+0" +[[deps.Xorg_libxkbfile_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libX11_jll"] +git-tree-sha1 = "730eeca102434283c50ccf7d1ecdadf521a765a4" +uuid = "cc61e674-0454-545c-8b26-ed2c68acab7a" +version = "1.1.2+0" + +[[deps.Xorg_xcb_util_cursor_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_jll", "Xorg_xcb_util_renderutil_jll"] +git-tree-sha1 = "04341cb870f29dcd5e39055f895c39d016e18ccd" +uuid = "e920d4aa-a673-5f3a-b3d7-f755a4d47c43" +version = "0.1.4+0" + +[[deps.Xorg_xcb_util_image_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "0fab0a40349ba1cba2c1da699243396ff8e94b97" +uuid = "12413925-8142-5f55-bb0e-6d7ca50bb09b" +version = "0.4.0+1" + +[[deps.Xorg_xcb_util_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll"] +git-tree-sha1 = "e7fd7b2881fa2eaa72717420894d3938177862d1" +uuid = "2def613f-5ad1-5310-b15b-b15d46f528f5" +version = "0.4.0+1" + +[[deps.Xorg_xcb_util_keysyms_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "d1151e2c45a544f32441a567d1690e701ec89b00" +uuid = "975044d2-76e6-5fbe-bf08-97ce7c6574c7" +version = "0.4.0+1" + +[[deps.Xorg_xcb_util_renderutil_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "dfd7a8f38d4613b6a575253b3174dd991ca6183e" +uuid = "0d47668e-0667-5a69-a72c-f761630bfb7e" +version = "0.3.9+1" + +[[deps.Xorg_xcb_util_wm_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "e78d10aab01a4a154142c5006ed44fd9e8e31b67" +uuid = "c22f9ab0-d5fe-5066-847c-f4bb1cd4e361" +version = "0.4.1+1" + +[[deps.Xorg_xkbcomp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libxkbfile_jll"] +git-tree-sha1 = "330f955bc41bb8f5270a369c473fc4a5a4e4d3cb" +uuid = "35661453-b289-5fab-8a00-3d9160c6a3a4" +version = "1.4.6+0" + +[[deps.Xorg_xkeyboard_config_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_xkbcomp_jll"] +git-tree-sha1 = "691634e5453ad362044e2ad653e79f3ee3bb98c3" +uuid = "33bec58e-1273-512f-9401-5d533626f822" +version = "2.39.0+0" + [[deps.Xorg_xtrans_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] git-tree-sha1 = "e92a1a012a10506618f10b7047e478403a046c77" @@ -1805,6 +2107,24 @@ git-tree-sha1 = "6498e3581023f8e530f34760d18f75a69e3a4ea8" uuid = "1e29f10c-031c-5a83-9565-69cddfc27673" version = "1.3.0+0" +[[deps.eudev_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "gperf_jll"] +git-tree-sha1 = "431b678a28ebb559d224c0b6b6d01afce87c51ba" +uuid = "35ca27e7-8b34-5b7f-bca9-bdc33f59eb06" +version = "3.2.9+0" + +[[deps.fzf_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "936081b536ae4aa65415d869287d43ef3cb576b2" +uuid = "214eeab7-80f7-51ab-84ad-2988db7cef09" +version = "0.53.0+0" + +[[deps.gperf_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "3516a5630f741c9eecb3720b1ec9d8edc3ecc033" +uuid = "1a1c6b14-54f6-533d-8383-74cd7377aa70" +version = "3.1.1+0" + [[deps.isoband_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "51b5eeb3f98367157a7a12a1fb0aa5328946c03c" @@ -1828,12 +2148,30 @@ deps = ["Artifacts", "Libdl"] uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" version = "5.8.0+1" +[[deps.libdecor_jll]] +deps = ["Artifacts", "Dbus_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pango_jll", "Wayland_jll", "xkbcommon_jll"] +git-tree-sha1 = "9bf7903af251d2050b467f76bdbe57ce541f7f4f" +uuid = "1183f4f0-6f2a-5f1a-908b-139f9cdfea6f" +version = "0.2.2+0" + +[[deps.libevdev_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "141fe65dc3efabb0b1d5ba74e91f6ad26f84cc22" +uuid = "2db6ffa8-e38f-5e21-84af-90c45d0032cc" +version = "1.11.0+0" + [[deps.libfdk_aac_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] git-tree-sha1 = "8a22cf860a7d27e4f3498a0fe0811a7957badb38" uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280" version = "2.0.3+0" +[[deps.libinput_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "eudev_jll", "libevdev_jll", "mtdev_jll"] +git-tree-sha1 = "ad50e5b90f222cfe78aa3d5183a20a12de1322ce" +uuid = "36db933b-70db-51c0-b978-0f229ee0e533" +version = "1.18.0+0" + [[deps.libpng_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Zlib_jll"] git-tree-sha1 = "b70c870239dc3d7bc094eb2d6be9b73d27bef280" @@ -1864,6 +2202,12 @@ git-tree-sha1 = "ccbb625a89ec6195856a50aa2b668a5c08712c94" uuid = "c5f90fcd-3b7e-5836-afba-fc50a0988cb2" version = "1.4.0+0" +[[deps.mtdev_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "814e154bdb7be91d78b6802843f76b6ece642f11" +uuid = "009596ad-96f7-51b1-9f1b-5ce2d5e8a71e" +version = "1.1.6+0" + [[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" @@ -1881,13 +2225,19 @@ uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" version = "17.4.0+2" [[deps.x264_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "35976a1216d6c066ea32cba2150c4fa682b276fc" +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4fea590b89e6ec504593146bf8b988b2c00922b2" uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a" -version = "10164.0.0+0" +version = "2021.5.5+0" [[deps.x265_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "dcc541bb19ed5b0ede95581fb2e41ecf179527d2" +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ee567a171cce03570d77ad3a43e90218e38937a9" uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76" -version = "3.6.0+0" +version = "3.5.0+0" + +[[deps.xkbcommon_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll", "Wayland_protocols_jll", "Xorg_libxcb_jll", "Xorg_xkeyboard_config_jll"] +git-tree-sha1 = "9c304562909ab2bab0262639bd4f444d7bc2be37" +uuid = "d8fb68d0-12a3-5cfd-a85a-d49703b185fd" +version = "1.4.1+1" diff --git a/Project.toml b/Project.toml index 0e3617e..29e570e 100644 --- a/Project.toml +++ b/Project.toml @@ -3,12 +3,14 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e" +ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" ThreadPinning = "811555cd-349b-4f26-b7bc-1f208b848042" diff --git a/parts/gpu/stencil.ipynb b/parts/gpu/stencil.ipynb index 80e5ca6..8bfc0bb 100644 --- a/parts/gpu/stencil.ipynb +++ b/parts/gpu/stencil.ipynb @@ -64,23 +64,24 @@ "outputs": [], "source": [ "#import Pkg\n", - "#Pkg.add(\"CairoMakie\")" + "#Pkg.add(\"Plots\")\n", + "#Pkg.add(\"ColorTypes\")" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": { "tags": [] }, "outputs": [], "source": [ - "using CUDA" + "using CUDA # for NVIDIA GPU access" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": { "tags": [] }, @@ -91,24 +92,24 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[32m\u001b[1mPrecompiling\u001b[22m\u001b[39m CairoMakie\n", - " CairoMakie\u001b[36m Being precompiled by another process (pid: 1288681, pidfile: /pscratch/sd/t/train921/depot/compiled/v1.10/CairoMakie/9mSey_z5c7n.ji.pidfile)\u001b[39m\n", - "\u001b[32m ✓ \u001b[39mCairoMakie\n", - " 1 dependency successfully precompiled in 9 seconds. 240 already precompiled.\n" - ] - } - ], + "outputs": [], "source": [ - "using CairoMakie # for plotting" + "using ProgressMeter # for progress monitoring" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "using Plots, ColorTypes # for plotting" ] }, { @@ -120,7 +121,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "metadata": { "tags": [] }, @@ -131,7 +132,7 @@ "text": [ "CUDA runtime 12.2, local installation\n", "CUDA driver 12.6\n", - "NVIDIA driver 535.183.6\n", + "NVIDIA driver 535.216.1\n", "\n", "CUDA libraries: \n", "- CUBLAS: 12.2.1\n", @@ -140,7 +141,7 @@ "- CUSOLVER: 11.5.0\n", "- CUSPARSE: 12.1.1\n", "- CUPTI: 2023.2.0 (API 20.0.0)\n", - "- NVML: 12.0.0+535.183.6\n", + "- NVML: 12.0.0+535.216.1\n", "\n", "Julia packages: \n", "- CUDA: 5.5.2\n", @@ -157,7 +158,7 @@ "- CUDA_Runtime_jll.local: true\n", "\n", "1 device:\n", - " 0: NVIDIA A100-PCIE-40GB (sm_80, 30.909 GiB / 40.000 GiB available)\n" + " 0: NVIDIA A100-SXM4-40GB (sm_80, 39.390 GiB / 40.000 GiB available)\n" ] } ], @@ -215,6 +216,663 @@ "By iterating this update rule over successive time steps, the heat equation's behavior in 2D space can be simulated, demonstrating how heat spreads out from regions of high temperature to cooler areas, resulting in a smooth temperature distribution over time. This approach can be efficiently implemented in computational algorithms and forms the basis for more advanced simulations in fields like physics, engineering, and climate modeling.\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CPU (Single-Threaded) Implementation of the Discretized Heat Equation" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "simulate_gs_cpu (generic function with 3 methods)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function laplacian_cpu(A, dx)\n", + " roll_x = circshift(A, (1, 0)) + circshift(A, (-1, 0))\n", + " roll_y = circshift(A, (0, 1)) + circshift(A, (0, -1))\n", + " roll_xy = -4 * A\n", + " return (roll_x + roll_y + roll_xy) / dx^2\n", + "end\n", + "function calculate_gs_cpu!(U, V, D_u, D_v, F, k, dt, dx, nx, ny)\n", + " # Compute diffusion\n", + " ΔU = laplacian_cpu(U, dx)\n", + " ΔV = laplacian_cpu(V, dx)\n", + "\n", + " # Compute and update concentration fields\n", + " UVV = U .* V .* V\n", + " U .+= (D_u .* ΔU .- UVV .+ F .* (1 .- U)) .* dt\n", + " V .+= (D_v .* ΔV .+ UVV .- (F .+ k) .* V) .* dt\n", + " \n", + " return\n", + "end\n", + "function simulate_gs_cpu(n = 500 #=Grid size=#, nsteps = 10000 #=Number of iterations=#)\n", + " # Initialize parameters\n", + " D_u = 0.16 # Diffusion rate of U\n", + " D_v = 0.08 # Diffusion rate of V\n", + " F = 0.035 # Feed rate\n", + " k = 0.06 # Kill rate\n", + " dt = 1.0 # Time step\n", + " dx = 1.0 # Spatial step\n", + " plot_every = 500 # Plot every 500 steps\n", + " \n", + " # Initialize concentration fields U and V\n", + " U = rand(0.0:0.001:0.1, n, n)\n", + " V = zeros(n, n)\n", + "\n", + " # Set up an initial small disturbance\n", + " n_mid = div(n, 2)\n", + " U[n_mid-5:n_mid+5, n_mid-5:n_mid+5] .= 0.5\n", + " V[n_mid-5:n_mid+5, n_mid-5:n_mid+5] .= 0.25\n", + "\n", + " # Set up a progress bar\n", + " prog = Progress(div(nsteps, plot_every))\n", + "\n", + " # Set up a figure for the heatmaps\n", + " plt = Plots.plot(title=\"Gray-Scott Model\", xlabel=\"X\", ylabel=\"Y\", size=(600, 600))\n", + "\n", + " # Simulation loop\n", + " anim = Plots.@animate for T in 1:div(nsteps, plot_every)\n", + " # Update the U and V concentrations\n", + " for t in 1:plot_every\n", + " calculate_gs_cpu!(U, V, D_u, D_v, F, k, dt, dx, n, n)\n", + " end\n", + " \n", + " # Update the progress bar\n", + " next!(prog)\n", + "\n", + " # Plot the current U concentrations\n", + " Plots.heatmap!(plt, U, color=cgrad([:black, :blue, :white]), clims=(0, 1), title=\"Gray-Scott Model\")\n", + " end\n", + " gif(anim, \"gs_cpu_serial.gif\"; fps=10)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:47\u001b[39m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 49.895226 seconds (426.31 M allocations: 367.460 GiB, 7.30% gc time, 5.38% compilation time: 28% of which was recompilation)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m\u001b[1m[ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mSaved animation to /global/u2/t/train921/julia-hpc-tutorial-sc24-main/parts/gpu/gs_cpu_serial.gif\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "Plots.AnimatedGif(\"/global/u2/t/train921/julia-hpc-tutorial-sc24-main/parts/gpu/gs_cpu_serial.gif\")" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@time simulate_gs_cpu(500, 10000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CPU (Multi-Threaded) Implementation of the Discretized Heat Equation" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "simulate_gs_cpu_mt (generic function with 3 methods)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function laplacian_cpu_mt(A, x, y, dx)\n", + " @inbounds DAx = (A[x-1, y] - 2.0 * A[x, y] + A[x+1, y]) / dx\n", + " @inbounds DAy = (A[x, y-1] - 2.0 * A[x, y] + A[x, y+1]) / dx\n", + " return DAx + DAy\n", + "end\n", + "function calculate_gs_cpu_mt!(U, V, D_u, D_v, F, k, dt, dx, nx, ny)\n", + " # Allocate U and V temporaries for updates\n", + " DU = Matrix{Float64}(undef, nx, ny)\n", + " DV = Matrix{Float64}(undef, nx, ny)\n", + "\n", + " # Compute diffusion and concentration fields changes\n", + " Threads.@threads for y in 2:(ny-1)\n", + " for x in 2:(nx-1)\n", + " # Compute diffusion\n", + " ΔU = laplacian_cpu_mt(U, x, y, dx)\n", + " ΔV = laplacian_cpu_mt(V, x, y, dx)\n", + "\n", + " # Compute concentration fields changes\n", + " UVV = @inbounds U[x, y] * (V[x, y] ^ 2)\n", + " @inbounds DU[x, y] = D_u * ΔU - UVV + F * (1 - U[x, y])\n", + " @inbounds DV[x, y] = D_v * ΔV + UVV - (F + k) * V[x, y]\n", + " end\n", + " end\n", + "\n", + " # Update concentration fields\n", + " Threads.@threads for y in 2:(ny-1)\n", + " for x in 2:(nx-1)\n", + " @inbounds U[x, y] += DU[x, y] * dt\n", + " @inbounds V[x, y] += DV[x, y] * dt\n", + " end\n", + " end\n", + " \n", + " return\n", + "end\n", + "function simulate_gs_cpu_mt(n = 500 #=Grid size=#, nsteps = 10000 #=Number of iterations=#)\n", + " # Initialize parameters\n", + " D_u = 0.16 # Diffusion rate of U\n", + " D_v = 0.08 # Diffusion rate of V\n", + " F = 0.035 # Feed rate\n", + " k = 0.06 # Kill rate\n", + " dt = 1.0 # Time step\n", + " dx = 1.0 # Spatial step\n", + " plot_every = 500 # Plot every 500 steps\n", + " \n", + " # Initialize concentration fields U and V\n", + " U = rand(0.0:0.001:0.1, n, n)\n", + " V = zeros(n, n)\n", + "\n", + " # Set up an initial small disturbance\n", + " n_mid = div(n, 2)\n", + " U[n_mid-5:n_mid+5, n_mid-5:n_mid+5] .= 0.5\n", + " V[n_mid-5:n_mid+5, n_mid-5:n_mid+5] .= 0.25\n", + "\n", + " # Set up a progress bar\n", + " prog = Progress(div(nsteps, plot_every))\n", + "\n", + " # Set up a figure for the heatmaps\n", + " plt = Plots.plot(title=\"Gray-Scott Model\", xlabel=\"X\", ylabel=\"Y\", size=(600, 600))\n", + "\n", + " # Simulation loop\n", + " anim = Plots.@animate for T in 1:div(nsteps, plot_every)\n", + " # Update the U and V concentrations\n", + " for t in 1:plot_every\n", + " calculate_gs_cpu!(U, V, D_u, D_v, F, k, dt, dx, n, n)\n", + " end\n", + " \n", + " # Update the progress bar\n", + " next!(prog)\n", + "\n", + " # Plot the current U concentrations\n", + " Plots.heatmap!(plt, U, color=cgrad([:black, :blue, :white]), clims=(0, 1), title=\"Gray-Scott Model\")\n", + " end\n", + " gif(anim, \"gs_cpu_mt.gif\"; fps=10)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:43\u001b[39m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 45.249138 seconds (420.64 M allocations: 367.074 GiB, 6.26% gc time, 0.01% compilation time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m\u001b[1m[ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mSaved animation to /global/u2/t/train921/julia-hpc-tutorial-sc24-main/parts/gpu/gs_cpu_mt.gif\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "Plots.AnimatedGif(\"/global/u2/t/train921/julia-hpc-tutorial-sc24-main/parts/gpu/gs_cpu_mt.gif\")" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@time simulate_gs_cpu_mt(500, 10000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## GPU (CUDA) Implementation of the Discretized Heat Equation" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "simulate_gs_cuda (generic function with 3 methods)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function laplacian_cuda(A, x, y, dx)\n", + " @inbounds DAx = (A[x-1, y] - 2.0 * A[x, y] + A[x+1, y]) / dx\n", + " @inbounds DAy = (A[x, y-1] - 2.0 * A[x, y] + A[x, y+1]) / dx\n", + " return DAx + DAy\n", + "end\n", + "function calculate_gs_cuda!(U, V, D_u, D_v, F, k, dt, dx, nx, ny)\n", + " x = (blockIdx().x - 1) * blockDim().x + threadIdx().x\n", + " y = (blockIdx().y - 1) * blockDim().y + threadIdx().y\n", + "\n", + " # Skip ghost cells on the boundaries\n", + " if (1 < x < nx) && (1 < y < ny)\n", + " # Compute diffusion\n", + " ΔU = laplacian_cuda(U, x, y, dx)\n", + " ΔV = laplacian_cuda(V, x, y, dx)\n", + "\n", + " # Update concentration fields\n", + " UVV = @inbounds U[x, y] * (V[x, y] ^ 2)\n", + " DU = @inbounds D_u * ΔU - UVV + F * (1 - U[x, y])\n", + " DV = @inbounds D_v * ΔV + UVV - (F + k) * V[x, y]\n", + "\n", + " # Wait for all threads to read U and V\n", + " CUDA.sync_threads()\n", + "\n", + " # Update concentration fields\n", + " @inbounds U[x, y] += DU * dt\n", + " @inbounds V[x, y] += DV * dt\n", + "\n", + " # Wait for all threads to write U and V\n", + " CUDA.sync_threads()\n", + " end\n", + " \n", + " return\n", + "end\n", + "function simulate_gs_cuda(n = 500 #=Grid size=#, nsteps = 10000 #=Number of iterations=#)\n", + " # Initialize parameters\n", + " D_u = 0.16 # Diffusion rate of U\n", + " D_v = 0.08 # Diffusion rate of V\n", + " F = 0.035 # Feed rate\n", + " k = 0.06 # Kill rate\n", + " dt = 1.0 # Time step\n", + " dx = 1.0 # Spatial step\n", + " plot_every = 500 # Plot every 500 steps\n", + " \n", + " # Initialize concentration fields U and V\n", + " U = rand(0.0:0.001:0.1, n, n)\n", + " V = zeros(n, n)\n", + "\n", + " # Set up an initial small disturbance\n", + " n_mid = div(n, 2)\n", + " U[n_mid-5:n_mid+5, n_mid-5:n_mid+5] .= 0.5\n", + " V[n_mid-5:n_mid+5, n_mid-5:n_mid+5] .= 0.25\n", + " \n", + " # Transfer to the GPU\n", + " U_gpu = CuArray(U)\n", + " V_gpu = CuArray(V)\n", + "\n", + " # Set up a progress bar\n", + " prog = Progress(div(nsteps, plot_every))\n", + "\n", + " # Set up a figure for the heatmaps\n", + " plt = Plots.plot(title=\"Gray-Scott Model\", xlabel=\"X\", ylabel=\"Y\", size=(600, 600))\n", + "\n", + " # Simulation loop\n", + " anim = Plots.@animate for T in 1:div(nsteps, plot_every)\n", + " # Update the U and V concentrations\n", + " for t in 1:plot_every\n", + " @cuda threads=(16, 16) blocks=(cld(n, 16), cld(n, 16)) calculate_gs_cuda!(U_gpu, V_gpu, D_u, D_v, F, k, dt, dx, n, n)\n", + " end\n", + " U_cpu = Array(U_gpu)\n", + " \n", + " # Update the progress bar\n", + " next!(prog)\n", + "\n", + " # Plot the current U concentrations\n", + " Plots.heatmap!(plt, U_cpu, color=cgrad([:black, :blue, :white]), clims=(0, 1), title=\"Gray-Scott Model\")\n", + " end\n", + " gif(anim, \"gs_cuda.gif\"; fps=10)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:14\u001b[39m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 16.302365 seconds (424.54 M allocations: 13.482 GiB, 3.91% gc time, 23.74% compilation time: 8% of which was recompilation)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m\u001b[1m[ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mSaved animation to /global/u2/t/train921/julia-hpc-tutorial-sc24-main/parts/gpu/gs_cuda.gif\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "Plots.AnimatedGif(\"/global/u2/t/train921/julia-hpc-tutorial-sc24-main/parts/gpu/gs_cuda.gif\")" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@time simulate_gs_cuda(500, 10000)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## KernelAbstractions Implementation of the Discretized Heat Equation" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "using KernelAbstractions" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "CUDABackend(false, false)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# add all options for different backends and array types\n", + "DEV = :NVIDIA\n", + "\n", + "if DEV == :NVIDIA\n", + " using CUDA\n", + " ArrayKA = CUDA.CuArray\n", + " Backend = CUDA.CUDABackend()\n", + "elseif DEV == :AMD\n", + " using AMDGPU\n", + " ArrayKA = AMDGPU.ROCArray\n", + " Backend = AMDGPU.ROCBackend()\n", + "elseif DEV == :oneAPI\n", + " using oneAPI \n", + " ArrayKA = oneAPI.oneArray\n", + " Backend = oneAPI.oneAPIBackend()\n", + "elseif DEV == :Metal\n", + " using Metal \n", + " ArrayKA = Metal.MtlArray\n", + " Backend = Metal.MetalBackend()\n", + "else DEV == :CPU\n", + " ArrayKA = Array\n", + " Backend = CPU()\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "simulate_gs_ka (generic function with 3 methods)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function laplacian_ka(A, x, y, dx)\n", + " @inbounds DAx = (A[x-1, y] - 2.0 * A[x, y] + A[x+1, y]) / dx\n", + " @inbounds DAy = (A[x, y-1] - 2.0 * A[x, y] + A[x, y+1]) / dx\n", + " return DAx + DAy\n", + "end\n", + "@kernel function calculate_gs_ka!(U, V, D_u, D_v, F, k, dt, dx, nx, ny)\n", + " x, y = @index(Global, NTuple)[1:2]\n", + "\n", + " # Skip ghost cells on the boundaries\n", + " if (1 < x < nx) && (1 < y < ny)\n", + " # Compute diffusion\n", + " ΔU = laplacian_ka(U, x, y, dx)\n", + " ΔV = laplacian_ka(V, x, y, dx)\n", + "\n", + " # Update concentration fields\n", + " UVV = @inbounds U[x, y] * (V[x, y] ^ 2)\n", + " DU = @inbounds D_u * ΔU - UVV + F * (1 - U[x, y])\n", + " DV = @inbounds D_v * ΔV + UVV - (F + k) * V[x, y]\n", + "\n", + " # Wait for all threads to read U and V\n", + " @synchronize()\n", + "\n", + " # Update concentration fields\n", + " @inbounds U[x, y] += DU * dt\n", + " @inbounds V[x, y] += DV * dt\n", + "\n", + " # Wait for all threads to write U and V\n", + " @synchronize()\n", + " end\n", + " \n", + " # No return, KernelAbstractions doesn't allow these right now\n", + "end\n", + "function simulate_gs_ka(n = 500 #=Grid size=#, nsteps = 10000 #=Number of iterations=#)\n", + " # Initialize parameters\n", + " D_u = 0.16 # Diffusion rate of U\n", + " D_v = 0.08 # Diffusion rate of V\n", + " F = 0.035 # Feed rate\n", + " k = 0.06 # Kill rate\n", + " dt = 1.0 # Time step\n", + " dx = 1.0 # Spatial step\n", + " plot_every = 500 # Plot every 500 steps\n", + " \n", + " # Initialize concentration fields U and V\n", + " U = rand(0.0:0.001:0.1, n, n)\n", + " V = zeros(n, n)\n", + "\n", + " # Set up an initial small disturbance\n", + " n_mid = div(n, 2)\n", + " U[n_mid-5:n_mid+5, n_mid-5:n_mid+5] .= 0.5\n", + " V[n_mid-5:n_mid+5, n_mid-5:n_mid+5] .= 0.25\n", + " \n", + " # Transfer to the GPU\n", + " U_gpu = KernelAbstractions.allocate(Backend, Float64, n, n)\n", + " V_gpu = KernelAbstractions.allocate(Backend, Float64, n, n)\n", + " KernelAbstractions.copyto!(Backend, U_gpu, U)\n", + " KernelAbstractions.copyto!(Backend, V_gpu, V)\n", + " \n", + " # Compile KA kernel\n", + " kernel = calculate_gs_ka!(Backend, (cld(n, 16), cld(n, 16)))\n", + "\n", + " # Set up a progress bar\n", + " prog = Progress(div(nsteps, plot_every))\n", + "\n", + " # Set up a figure for the heatmaps\n", + " plt = Plots.plot(title=\"Gray-Scott Model\", xlabel=\"X\", ylabel=\"Y\", size=(600, 600))\n", + "\n", + " # Simulation loop\n", + " anim = Plots.@animate for T in 1:div(nsteps, plot_every)\n", + " # Update the U and V concentrations\n", + " for t in 1:plot_every\n", + " kernel(U_gpu, V_gpu, D_u, D_v, F, k, dt, dx, n, n, ndrange=(n, n))\n", + " end\n", + "\n", + " # Copy U results back to CPU array and synchronize\n", + " KernelAbstractions.copyto!(Backend, U, U_gpu)\n", + " KernelAbstractions.synchronize(Backend)\n", + " \n", + " # Update the progress bar\n", + " next!(prog)\n", + "\n", + " # Plot the current U concentrations\n", + " Plots.heatmap!(plt, U, color=cgrad([:black, :blue, :white]), clims=(0, 1), title=\"Gray-Scott Model\")\n", + " end\n", + " gif(anim, \"gs_ka.gif\"; fps=10)\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:11\u001b[39m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 12.580729 seconds (421.02 M allocations: 13.190 GiB, 5.28% gc time, 0.08% compilation time)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m\u001b[1m[ \u001b[22m\u001b[39m\u001b[36m\u001b[1mInfo: \u001b[22m\u001b[39mSaved animation to /global/u2/t/train921/julia-hpc-tutorial-sc24-main/parts/gpu/gs_ka.gif\n" + ] + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "Plots.AnimatedGif(\"/global/u2/t/train921/julia-hpc-tutorial-sc24-main/parts/gpu/gs_ka.gif\")" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@time simulate_gs_ka(500, 10000)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# OLD CODE" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -224,7 +882,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "metadata": { "tags": [] }, @@ -235,7 +893,7 @@ "500" ] }, - "execution_count": 10, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -262,7 +920,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 7, "metadata": { "tags": [] }, @@ -273,7 +931,7 @@ "Field" ] }, - "execution_count": 11, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -291,13 +949,14 @@ " ny::Int64 # The number of grid cells in the y-direction.\n", " dx::Float64 # The size of each grid cell in the x-direction.\n", " dy::Float64 # The size of each grid cell in the y-direction.\n", - " data::T # A 2D array storing the temperature values across the grid, including ghost layers.\n", + " data1::T # A 2D array storing the temperature values across the grid, including ghost layers.\n", + " data2::T\n", "end" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "metadata": { "tags": [] }, @@ -308,31 +967,31 @@ "Field" ] }, - "execution_count": 12, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# outer constructor with default cell sizes and initialized data\n", - "Field(nx::Int64, ny::Int64, data) = Field{typeof(data)}(nx, ny, 0.01, 0.01, data)" + "Field(nx::Int64, ny::Int64, data1, data2=data1) = Field{typeof(data1)}(nx, ny, 0.01, 0.01, data1, data2)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 16, "metadata": { "tags": [] }, "outputs": [], "source": [ "# extend deepcopy to new type\n", - "Base.deepcopy(f::Field) = Field(f.nx, f.ny, f.dx, f.dy, deepcopy(f.data))" + "Base.deepcopy(f::Field) = Field(f.nx, f.ny, f.dx, f.dy, deepcopy(f.data1), deepcopy(f.data2))" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "metadata": { "tags": [] }, @@ -343,7 +1002,7 @@ "show_heatmap (generic function with 1 method)" ] }, - "execution_count": 14, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -352,7 +1011,7 @@ "# heatmap plotting helper\n", "function show_heatmap(field::Field)\n", " # Copy to CPU, if necessary\n", - " data_raw = Matrix(field.data)\n", + " data_raw = Matrix(field.data1)\n", "\n", " # Slice out ghost regions\n", " data = data_raw[begin+1:end-1,begin+1:end-1]\n", @@ -366,7 +1025,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 18, "metadata": { "tags": [] }, @@ -377,7 +1036,7 @@ "initialize" ] }, - "execution_count": 15, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -391,17 +1050,17 @@ "temperature distributions along with boundary conditions.\n", "\"\"\"\n", "function initialize(nx = 1000, ny = 1000, arraytype = Matrix)\n", - " data = zeros(nx+2, ny+2)\n", + " data1 = zeros(nx+2, ny+2)\n", " \n", " # generate a field with boundary conditions\n", " # in GPU, you can generate the field in cpu then you can move it to gpu\n", " if arraytype != Matrix\n", - " tmp = Field(nx, ny, data)\n", + " tmp = Field(nx, ny, data1)\n", " generate_field!(tmp)\n", - " gpudata = arraytype(tmp.data)\n", + " gpudata = arraytype(tmp.data1)\n", " previous = Field(nx, ny, gpudata)\n", " else\n", - " previous = Field(nx, ny, data)\n", + " previous = Field(nx, ny, data1)\n", " generate_field!(previous)\n", " end\n", " \n", @@ -413,7 +1072,56 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "initialize_gs" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"\n", + " initialize_gs(nx::Int, ny::Int, arraytype = Matrix)\n", + "\n", + "The initialize function provided is designed to set up an initial state for \n", + "a simulation by creating two fields, previous and current, which hold the initial \n", + "temperature distributions along with boundary conditions.\n", + "\"\"\"\n", + "function initialize_gs(nx = 1000, ny = 1000, arraytype = Matrix)\n", + " data1 = zeros(nx+2, ny+2)\n", + " data2 = zeros(nx+2, ny+2)\n", + " \n", + " # generate a field with boundary conditions\n", + " # in GPU, you can generate the field in cpu then you can move it to gpu\n", + " if arraytype != Matrix\n", + " tmp = Field(nx, ny, data1, data2)\n", + " generate_field!(tmp)\n", + " gpudata1 = arraytype(tmp.data1)\n", + " gpudata2 = arraytype(tmp.data2)\n", + " previous = Field(nx, ny, gpudata1, gpudata2)\n", + " else\n", + " previous = Field(nx, ny, data1, data2)\n", + " generate_field!(previous)\n", + " end\n", + " \n", + " current = Base.deepcopy(previous)\n", + "\n", + " return current, previous\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 20, "metadata": { "tags": [] }, @@ -424,7 +1132,7 @@ "generate_field!" ] }, - "execution_count": 17, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -448,18 +1156,24 @@ " for i = 1:field.nx+2\n", " ds2 = (i - field.nx / 2)^2 + (j - field.ny / 2)^2\n", " if ds2 < radius2 \n", - " field.data[i,j] = tdisc\n", + " field.data1[i,j] = tdisc\n", + " field.data2[i,j] = tdisc\n", " else\n", - " field.data[i,j] = tarea\n", + " field.data1[i,j] = tarea\n", + " field.data2[i,j] = tarea\n", " end\n", " end \n", " end \n", "\n", " # Boundary conditions\n", - " field.data[:,1] .= tleft\n", - " field.data[:,field.ny+2] .= tright\n", - " field.data[1,:] .= tupper\n", - " field.data[field.nx+2,:] .= tlower\n", + " field.data1[:,1] .= tleft\n", + " field.data2[:,1] .= tleft\n", + " field.data1[:,field.ny+2] .= tright\n", + " field.data2[:,field.ny+2] .= tright\n", + " field.data1[1,:] .= tupper\n", + " field.data2[1,:] .= tupper\n", + " field.data1[field.nx+2,:] .= tlower\n", + " field.data2[field.nx+2,:] .= tlower\n", " \n", " return\n", "end" @@ -467,7 +1181,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 21, "metadata": { "tags": [] }, @@ -478,7 +1192,7 @@ "swap_fields!" ] }, - "execution_count": 18, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -490,16 +1204,19 @@ "Swap the data of two fields curr and prev. \n", "\"\"\" \n", "function swap_fields!(curr::Field, prev::Field)\n", - " tmp = curr.data\n", - " curr.data = prev.data\n", - " prev.data = tmp\n", + " tmp1 = curr.data1\n", + " tmp2 = curr.data2\n", + " curr.data1 = prev.data1\n", + " curr.data2 = prev.data2\n", + " prev.data1 = tmp1\n", + " prev.data2 = tmp2\n", " return\n", "end" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 22, "metadata": { "tags": [] }, @@ -510,7 +1227,7 @@ "average_temperature" ] }, - "execution_count": 19, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -521,19 +1238,12 @@ "\n", "Calculate average temperature of a temperature field. \n", "\"\"\"\n", - "average_temperature(f::Field) = sum(f.data[2:f.nx+1, 2:f.ny+1]) / (f.nx * f.ny)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## CPU Implementation of the Discretized Heat Equation" + "average_temperature(f::Field) = sum(f.data1[2:f.nx+1, 2:f.ny+1]) / (f.nx * f.ny)" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 14, "metadata": { "tags": [] }, @@ -544,7 +1254,7 @@ "calculate_cpu!" ] }, - "execution_count": 20, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -584,7 +1294,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "metadata": { "tags": [] }, @@ -595,7 +1305,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 16, "metadata": { "tags": [] }, @@ -606,7 +1316,7 @@ "simulate_cpu!" ] }, - "execution_count": 25, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -647,7 +1357,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 17, "metadata": { "tags": [] }, @@ -672,7 +1382,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 18, "metadata": { "tags": [] }, @@ -688,7 +1398,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:47\u001b[39m\n" + "\u001b[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:41\u001b[39m\n" ] }, { @@ -696,7 +1406,7 @@ "output_type": "stream", "text": [ "Final average temperature: 59.66948531363336\n", - " 50.141055 seconds (1.91 M allocations: 1.686 GiB, 0.47% gc time, 5.17% compilation time)\n" + " 43.267232 seconds (1.90 M allocations: 1.686 GiB, 0.30% gc time, 3.80% compilation time)\n" ] }, { @@ -717,13 +1427,6 @@ "show_heatmap(curr)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## GPU (CUDA) Implementation of the Discretized Heat Equation" - ] - }, { "cell_type": "code", "execution_count": 28, @@ -898,68 +1601,6 @@ "show_heatmap(curr)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## KernelAbstractions Implementation of the Discretized Heat Equation" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "using KernelAbstractions" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "CUDABackend(false, false)" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# add all options for different backends and array types\n", - "DEV = :NVIDIA\n", - "\n", - "if DEV == :NVIDIA\n", - " using CUDA\n", - " ArrayKA = CUDA.CuArray\n", - " Backend = CUDA.CUDABackend()\n", - "elseif DEV == :AMD\n", - " using AMDGPU\n", - " ArrayKA = AMDGPU.ROCArray\n", - " Backend = AMDGPU.ROCBackend()\n", - "elseif DEV == :oneAPI\n", - " using oneAPI \n", - " ArrayKA = oneAPI.oneArray\n", - " Backend = oneAPI.oneAPIBackend()\n", - "elseif DEV == :Metal\n", - " using Metal \n", - " ArrayKA = Metal.MtlArray\n", - " Backend = Metal.MetalBackend()\n", - "else DEV == :CPU\n", - " ArrayKA = Array\n", - " Backend = CPU()\n", - "end" - ] - }, { "cell_type": "code", "execution_count": 35, @@ -1163,14 +1804,128 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "function laplacian(i, j, k, var)\n", + " @inbounds l = var[i - 1, j, k] + var[i + 1, j, k] + var[i, j - 1, k] +\n", + " var[i, j + 1, k] + var[i, j, k - 1] + var[i, j, k + 1] -\n", + " 6.0 * var[i, j, k]\n", + " return l / 6.0\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "calculate_gs_cuda! (generic function with 1 method)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function calculate_gs_cuda!(u_curr, u_prev, v_curr, v_prev, dx2, dy2, nx, ny, a, dt)\n", + " i = (blockIdx().x - 1) * blockDim().x + threadIdx().x\n", + " j = (blockIdx().y - 1) * blockDim().y + threadIdx().y\n", + " if i > 1 && j > 1 && i < nx+2 && j < ny+2\n", + " @inbounds u_xderiv = (u_prev[i-1, j] - 2.0 * u_prev[i, j] + u_prev[i+1, j]) / dx2\n", + " @inbounds u_yderiv = (u_prev[i, j-1] - 2.0 * u_prev[i, j] + u_prev[i, j+1]) / dy2\n", + " @inbounds u_curr[i, j] = u_prev[i, j] + a * dt * (u_xderiv + u_yderiv)\n", + " @inbounds v_xderiv = (v_prev[i-1, j] - 2.0 * v_prev[i, j] + v_prev[i+1, j]) / dx2\n", + " @inbounds v_yderiv = (v_prev[i, j-1] - 2.0 * v_prev[i, j] + v_prev[i, j+1]) / dy2\n", + " @inbounds v_curr[i, j] = v_prev[i, j] + a * dt * (v_xderiv + v_yderiv)\n", + " end\n", + " \n", + " return\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "simulate_gs_gpu! (generic function with 1 method)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function simulate_gs_gpu!(curr::Field, prev::Field, nsteps::Integer)\n", + " println(\"Initial average temperature: $(average_temperature(curr))\")\n", + "\n", + " # Diffusion constant\n", + " α = 0.5\n", + " # Largest stable time step\n", + " dt = curr.dx^2 * curr.dy^2 / (2.0 * α * (curr.dx^2 + curr.dy^2))\n", + " \n", + " # display a nice progress bar\n", + " p = Progress(nsteps)\n", + "\n", + " for i = 1:nsteps\n", + " # calculate new state based on previous state\n", + " \n", + " nx, ny = size(curr.data1) .- 2 \n", + " xthreads = ythreads = 32\n", + " xblocks, yblocks = cld(curr.nx, xthreads), cld(curr.ny, ythreads)\n", + " @cuda threads=(xthreads, ythreads) blocks = (xblocks, yblocks) calculate_cuda!(curr.data1, prev.data1, curr.data2, prev.data2, curr.dx^2, curr.dy^2, nx, ny, α, dt)\n", + "\n", + " # swap current and previous fields\n", + " swap_fields!(curr, prev)\n", + "\n", + " # increment the progress bar\n", + " next!(p)\n", + " end \n", + "\n", + " println(\"Final average temperature: $(average_temperature(curr))\")\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# initialize data on CPU and move it to GPU\n", + "curr, prev = initialize_gs(nx, ny, CuArray)\n", + "\n", + "show_heatmap(curr)" + ] } ], "metadata": { "kernelspec": { - "display_name": "Julia Tutorial Single Threaded", + "display_name": "Julia Tutorial Multi Threaded", "language": "julia", - "name": "julia-tutorial-single-threaded" + "name": "julia-tutorial-multi-threaded" }, "language_info": { "file_extension": ".jl",