diff --git a/.github/actions/setup-numba-dpex/action.yml b/.github/actions/setup-numba-dpex/action.yml
index 1884e34ec2..6db253c703 100644
--- a/.github/actions/setup-numba-dpex/action.yml
+++ b/.github/actions/setup-numba-dpex/action.yml
@@ -16,6 +16,10 @@ runs:
       run: conda env update -n base -f ${{ inputs.environment }} --prune
       shell: bash
 
+    - name: Test conda environment
+      run: conda list
+      shell: bash
+
     - name: Build numba-dpex
       run: |
         export PATH=$CONDA/bin-llvm:$PATH
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2850ebf068..74f616106d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,40 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.21.0] - 2023-06-17
+
+### Added
+* Support addition and multiplication-based prange reduction loops (#999)
+* Proper boxing, unboxing of dpctl.SyclQueue objects inside dpjit decorated functions (#963, #1064)
+* Support for `queue` keyword arguments inside dpnp array constructors in dpjit (#1032)
+* Overloads for dpnp array constructors: dpnp.full (#991), dpnp.full_like (#997)
+* Support for complex64 and complex128 types as kernel arguments and in parfors (#1033, #1035)
+* New config to run the ConstantSizeStaticLocalMemoryPass optionally (#999)
+* Support for Numba 0.57 (#1030, #1003, #1002)
+* Support for Python 3.11 (#1054)
+* Support for SPIRV 1.4 (#1056, #1060)
+
+### Changed
+* Parfor lowering happens using the kernel pipeline (#996)
+* Minimum required Numba version is now 0.57 (#1030)
+* Numba monkey patches are moved to numba_dpex.numba_patches (#1030)
+* Redesigned unit test suite (#1018, #1017, #1015, #1036, #1037, #1072)
+
+### Fixed
+* Fix stride computation when unboxing a dpnp array (#1023)
+* Using cached queue instead of creating new one on type inference (#946)
+* Fixed bug in reduction mul operation for dpjit (#1048)
+* Offload of parfor nodes to OpenCL UHD GPU devices (#1074)
+
+### Removed
+* Support for offloading NumPy-based parfor nodes to SYCL devices (#1041)
+* Removed rename_numpy_functions_pass (#1041)
+* Dpnp overloads using stubs (#1041, #1025)
+* Support for `like` keyword argument in dpnp array constructor overloads (#1043)
+* Support for NumPy arrays as kernel arguments (#1049)
+* Kernel argument access specifiers (#1049)
+* Support for dpctl.device_context to launch kernels and njit offloading (#1041)
+
 ## [0.20.1] - 2023-04-07
 
 ### Added
diff --git a/conda-recipe/run_test.bat b/conda-recipe/run_test.bat
index 7b9ed8e820..dbcdc5d6e9 100644
--- a/conda-recipe/run_test.bat
+++ b/conda-recipe/run_test.bat
@@ -1,4 +1,12 @@
-pytest -q -ra --disable-warnings --pyargs numba_dpex -vv
-IF %ERRORLEVEL% NEQ 0 exit /B 1
+set "ONEAPI_DEVICE_SELECTOR="
+
+for /F "USEBACKQ tokens=* delims=" %%F in (
+`python -c "import dpctl; print(\"\n\".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))"`
+) do (
+    set "ONEAPI_DEVICE_SELECTOR=%%F"
+
+    pytest -q -ra --disable-warnings --pyargs numba_dpex -vv
+    IF %ERRORLEVEL% NEQ 0 exit /B 1
+)
 
 exit /B 0
diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh
old mode 100644
new mode 100755
index dbcd713d64..4454e3abae
--- a/conda-recipe/run_test.sh
+++ b/conda-recipe/run_test.sh
@@ -1,12 +1,19 @@
 #!/bin/bash
 
 set -euxo pipefail
+unset ONEAPI_DEVICE_SELECTOR
 
-pytest -q -ra --disable-warnings --pyargs numba_dpex -vv
+for selector in $(python -c "import dpctl; print(\" \".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))")
+do
+    export "ONEAPI_DEVICE_SELECTOR=$selector"
+    unset NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1
 
-export NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1
+    pytest -q -ra --disable-warnings --pyargs numba_dpex -vv
 
-pytest -q -ra --disable-warnings -vv \
-    --pyargs numba_dpex.tests.kernel_tests.test_atomic_op::test_atomic_fp_native
+    export NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1
+
+    pytest -q -ra --disable-warnings -vv \
+        --pyargs numba_dpex.tests.kernel_tests.test_atomic_op::test_atomic_fp_native
+done
 
 exit 0
diff --git a/docs/user_guides/debugging/index.rst b/docs/user_guides/debugging/index.rst
index 19ade475b5..015a9b4903 100644
--- a/docs/user_guides/debugging/index.rst
+++ b/docs/user_guides/debugging/index.rst
@@ -25,7 +25,7 @@ work.
 `Intel® Distribution for GDB*` is part of `Intel oneAPI`. For relevant
 documentation, refer to the `Intel® Distribution for GDB* product page`_.
 
-.. _`Intel® Distribution for GDB* product page`: https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/distribution-for-gdb.html
+.. _`Intel® Distribution for GDB* product page`: https://www.intel.com/content/www/us/en/developer/tools/oneapi/distribution-for-gdb.html
 
 .. toctree::
     :maxdepth: 2
diff --git a/docs/user_guides/debugging/set_up_machine.rst b/docs/user_guides/debugging/set_up_machine.rst
index 1a27d0087b..015ea9ad9a 100644
--- a/docs/user_guides/debugging/set_up_machine.rst
+++ b/docs/user_guides/debugging/set_up_machine.rst
@@ -11,7 +11,7 @@ Install drivers using the following guides:
 
 .. _Intel® GPGPU driver installation guide: https://dgpu-docs.intel.com/installation-guides/index.html
 .. _Intel® oneAPI GPU driver installation guide:
-    https://software.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/prerequisites/install-intel-gpu-drivers.html
+    https://www.intel.com/content/www/us/en/docs/oneapi/installation-guide-linux/current/install-intel-gpu-drivers.html
 
 The user should be in the "video" group (on Ubuntu* 18, Fedora* 30, and SLES* 15
 SP1) or "render" group (on Ubuntu* 19 and higher, CentOS* 8, and Fedora* 31). An
@@ -93,5 +93,5 @@ If you are installing DCD for the first time, create keys. For details, see the
 
 See also:
 
-  - `Get Started with Intel® Distribution for GDB* on Linux* OS Host <https://software.intel.com/content/www/us/en/develop/documentation/get-started-with-debugging-dpcpp-linux/top.html>`_
-  - `Public signature key <https://software.intel.com/content/www/us/en/develop/documentation/get-started-with-debugging-dpcpp-linux/top.html#:~:text=sudo%20modprobe%20igfxdcd-,The%20host%20system%20does%20not%20recognize%20the%20igfxdcd%20signature%20if,gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB,-If%20you%20have>`_
+  - `Get Started with Intel® Distribution for GDB* on Linux* OS Host <https://www.intel.com/content/www/us/en/docs/distribution-for-gdb/get-started-guide-linux/current/overview.html>`_
+  - `Public signature key <https://www.intel.com/content/www/us/en/docs/distribution-for-gdb/get-started-guide-linux/current/overview.html#:~:text=sudo%20modprobe%20igfxdcd-,The%20host%20system%20does%20not%20recognize%20the%20igfxdcd%20signature%20if,gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB,-If%20you%20have>`_
diff --git a/docs/user_guides/debugging/stepping.rst b/docs/user_guides/debugging/stepping.rst
index 57b7b87bb2..bdfdea3161 100644
--- a/docs/user_guides/debugging/stepping.rst
+++ b/docs/user_guides/debugging/stepping.rst
@@ -72,5 +72,4 @@ a single line without interference, set the scheduler-locking setting to `on` or
 
 See also:
 
-- `Single Stepping <https://software.intel.com/content/www/us/en/develop/documentation/debugging-dpcpp-linux/top/debug-a-dpc-application-on-a-cpu/single-stepping.html>`_
 - `Continuing and Stepping in GDB* <https://sourceware.org/gdb/current/onlinedocs/gdb/Continuing-and-Stepping.html#Continuing-and-Stepping>`_
diff --git a/docs/user_guides/getting_started.rst b/docs/user_guides/getting_started.rst
index bb1541fe44..795f1c0396 100644
--- a/docs/user_guides/getting_started.rst
+++ b/docs/user_guides/getting_started.rst
@@ -154,6 +154,6 @@ Refer to :ref:`Docker <docker>` section for more options.
 .. _`packaging`: https://packaging.pypa.io/
 .. _`scipy`: https://anaconda.org/intel/scipy
 .. _`pytest`: https://docs.pytest.org
-.. _`Intel Distribution for Python`: https://software.intel.com/content/www/us/en/develop/tools/oneapi/components/distribution-for-python.html
+.. _`Intel Distribution for Python`: https://www.intel.com/content/www/us/en/developer/tools/oneapi/distribution-for-python.html
 .. _`anaconda.org/intel`: https://anaconda.org/intel
-.. _`Intel oneAPI`: https://software.intel.com/content/www/us/en/develop/tools/oneapi.html
+.. _`Intel oneAPI`: https://www.intel.com/content/www/us/en/developer/tools/oneapi/overview.html
diff --git a/environment/docs.yml b/environment/docs.yml
index 2bb7d8693e..7d0573c203 100644
--- a/environment/docs.yml
+++ b/environment/docs.yml
@@ -1,18 +1,17 @@
-name: dev
+name: dpex-docs-dev
 channels:
-  - defaults
   - dppy/label/dev
   - numba
   - intel
-  - numba/label/dev
+  - conda-forge
   - nodefaults
 dependencies:
-  - python=3.9
+  - python=3.10
   - gxx_linux-64
   - dpcpp_linux-64
   - numba 0.57*
   - dpctl 0.14*
-  - dpnp >=0.10.2
+  - dpnp >=0.12*
   - spirv-tools
   - dpcpp-llvm-spirv
   - packaging
@@ -23,8 +22,6 @@ dependencies:
     - pre-commit
     - flake8
     - black==20.8b1
-    - pytest-cov
-    - pytest-xdist
     - pexpect
     - sphinx
     - autodoc
@@ -32,6 +29,3 @@ dependencies:
     - sphinx-rtd-theme
     - sphinxcontrib-apidoc
     - sphinxcontrib-googleanalytics
-variables:
-  CHANNELS: -c defaults -c numba -c intel -c numba/label/dev -c dppy/label/dev --override-channels
-  CHANNELS_DEV: -c dppy/label/dev -c defaults -c numba -c intel -c numba/label/dev --override-channels
diff --git a/numba_dpex/core/parfors/kernel_builder.py b/numba_dpex/core/parfors/kernel_builder.py
index 7200a6e62a..a941e03c10 100644
--- a/numba_dpex/core/parfors/kernel_builder.py
+++ b/numba_dpex/core/parfors/kernel_builder.py
@@ -28,7 +28,7 @@
 from numba_dpex import config
 
 from ..descriptor import dpex_kernel_target
-from ..types.dpnp_ndarray_type import DpnpNdArray
+from ..types import DpnpNdArray, USMNdArray
 from ..utils.kernel_templates import RangeKernelTemplate
 
 
@@ -70,6 +70,30 @@ def _compile_kernel_parfor(
         func_ir, kernel_name
     )
 
+    # A cast from DpnpNdArray type to USMNdArray is needed for all arguments of
+    # DpnpNdArray type. Although, DpnpNdArray derives from USMNdArray the two
+    # types use different data models. USMNdArray uses the
+    # numba_dpex.core.datamodel.models.ArrayModel data model that defines all
+    # CPointer type members in the GLOBAL address space. The DpnpNdArray uses
+    # Numba's default ArrayModel that does not define pointers in any specific
+    # address space. For OpenCL HD Graphics devices, defining a kernel function
+    # (spir_kernel calling convention) with pointer arguments that have no
+    # address space qualifier causes a run time crash. By casting the argument
+    # type for parfor arguments from DpnpNdArray type to the USMNdArray type the
+    # generated kernel always has an address space qualifier, avoiding the issue
+    # on OpenCL HD graphics devices.
+
+    for i, argty in enumerate(argtypes):
+        if isinstance(argty, DpnpNdArray):
+            new_argty = USMNdArray(
+                ndim=argty.ndim,
+                layout=argty.layout,
+                dtype=argty.dtype,
+                usm_type=argty.usm_type,
+                queue=argty.queue,
+            )
+            argtypes[i] = new_argty
+
     # compile the kernel
     kernel.compile(
         args=argtypes,
diff --git a/numba_dpex/core/types/dpnp_ndarray_type.py b/numba_dpex/core/types/dpnp_ndarray_type.py
index 75d77141c4..04edec02b1 100644
--- a/numba_dpex/core/types/dpnp_ndarray_type.py
+++ b/numba_dpex/core/types/dpnp_ndarray_type.py
@@ -58,6 +58,12 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         else:
             return
 
+    def __str__(self):
+        return self.name.replace("USMNdArray", "DpnpNdarray")
+
+    def __repr__(self):
+        return self.__str__()
+
     def __allocate__(
         self,
         typingctx,
diff --git a/numba_dpex/core/types/usm_ndarray_type.py b/numba_dpex/core/types/usm_ndarray_type.py
index f6eb08564f..f5d83783b1 100644
--- a/numba_dpex/core/types/usm_ndarray_type.py
+++ b/numba_dpex/core/types/usm_ndarray_type.py
@@ -87,7 +87,7 @@ def __init__(
             self.dtype = dtype
 
         if name is None:
-            type_name = "usm_ndarray"
+            type_name = "USMNdArray"
             if readonly:
                 type_name = "readonly " + type_name
             if not aligned:
@@ -116,6 +116,9 @@ def __init__(
             aligned=aligned,
         )
 
+    def __repr__(self):
+        return self.name
+
     def copy(
         self,
         dtype=None,
diff --git a/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py b/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py
index 6c23bd6147..81ebfe32d6 100644
--- a/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py
+++ b/numba_dpex/tests/core/passes/test_parfor_legalize_cfd_pass.py
@@ -14,7 +14,7 @@
 
 from numba_dpex import dpjit
 from numba_dpex.core.exceptions import ExecutionQueueInferenceError
-from numba_dpex.tests._helper import skip_no_opencl_gpu
+from numba_dpex.tests._helper import skip_no_opencl_cpu, skip_no_opencl_gpu
 
 shapes = [10, (2, 5)]
 dtypes = [dpnp.int32, dpnp.int64, dpnp.float32, dpnp.float64]
@@ -58,6 +58,7 @@ def test_parfor_legalize_cfd_pass(shape, dtype, usm_type, device):
 
 
 @skip_no_opencl_gpu
+@skip_no_opencl_cpu
 def test_parfor_legalize_cfd_pass_raise():
     a = dpnp.zeros(shape=10, device="cpu")
     b = dpnp.ones(shape=10, device="gpu")
@@ -67,6 +68,7 @@ def test_parfor_legalize_cfd_pass_raise():
 
 
 @skip_no_opencl_gpu
+@skip_no_opencl_cpu
 def test_cfd_error_due_to_lhs():
     a = dpnp.zeros(shape=10, device="cpu")
     b = dpnp.ones(shape=10, device="cpu")