From 391daf04940f58f843e36cb64cdad53ba375b09b Mon Sep 17 00:00:00 2001
From: Sean Engelstad <sean.engelstad1@gmail.com>
Date: Fri, 22 Nov 2024 09:46:49 -0500
Subject: [PATCH] Expressions for Shell Element in TACS (#116)

* add new a2d shell code

* update the shell assemble frame

* add new mat concat expression

* update shell element a2d headers

* add test for the matrix rotate frame expression

* bug fixes and shell strain fixes

* a2d mat rotate frame tests pass!

* all shell internal deriv tests pass now

* working symMatRotateFrame

* fix forward comp

* reverse + forward ad compatibility

* updated a2d obj include

* CUDA compatibility in A2D

* update to use A2D_FUNCTION for cuda headers

* device compatible sqrt, etc other operations

* close #ifndef A2D_FUNCTION CUDA part

* clang format

* remove a2dmatconcat.h

* clang format v3
---
 include/a2dcore.h                        |   7 +
 include/a2ddefs.h                        |   8 +-
 include/ad/a2dgemm.h                     |   2 +-
 include/ad/a2dgreenstrain.h              |   4 +-
 include/ad/a2dmatsum.h                   |   4 +
 include/ad/a2dmatvecmult.h               | 125 +++++
 include/ad/a2dobj.h                      | 160 ++++++
 include/ad/a2dstack.h                    |   3 +
 include/ad/core/a2dmatinvcore.h          |   8 +-
 include/ad/core/a2dmatveccore.h          |  10 +
 include/ad/shell/a2dmatrotateframe.h     | 360 ++++++++++++++
 include/ad/shell/a2dshellassembleframe.h | 252 ++++++++++
 include/ad/shell/a2dshellstrain.h        | 501 +++++++++++++++++++
 include/ad/shell/a2dsymmatrotateframe.h  | 588 +++++++++++++++++++++++
 include/adscalar.h                       | 144 ++++--
 tests/ad/test_ad_expressions.cpp         |   4 +
 16 files changed, 2122 insertions(+), 58 deletions(-)
 create mode 100644 include/ad/shell/a2dmatrotateframe.h
 create mode 100644 include/ad/shell/a2dshellassembleframe.h
 create mode 100644 include/ad/shell/a2dshellstrain.h
 create mode 100644 include/ad/shell/a2dsymmatrotateframe.h
diff --git a/include/a2dcore.h b/include/a2dcore.h
index 660cb53a..fb6fd9c2 100644
--- a/include/a2dcore.h
+++ b/include/a2dcore.h
@@ -31,4 +31,11 @@
 #include "ad/a2dvecouter.h"
 #include "ad/a2dvecsum.h"
 
+// shell routines
+#include "ad/shell/a2dshellassembleframe.h"
+// #include "ad/shell/a2dmatconcat.h"
+#include "ad/shell/a2dmatrotateframe.h"
+#include "ad/shell/a2dshellstrain.h"
+#include "ad/shell/a2dsymmatrotateframe.h"
+
 #endif  //  A2D_CORE_H
diff --git a/include/a2ddefs.h b/include/a2ddefs.h
index 954ce33b..edce9af1 100644
--- a/include/a2ddefs.h
+++ b/include/a2ddefs.h
@@ -8,8 +8,14 @@
 template <typename T>
 using A2D_complex_t = std::complex<T>;
 
+// CUDA headers
 #ifndef A2D_FUNCTION
-#define A2D_FUNCTION  // A2D_FUNCTION does nothing in this scenario
+#ifdef __CUDACC__
+#define A2D_FUNCTION \
+  __host__ __device__  // A2D_FUNCTION does nothing in this scenario
+#else                  // not __CUDACC__
+#define A2D_FUNCTION
+#endif
 #endif
 
 namespace A2D {
diff --git a/include/ad/a2dgemm.h b/include/ad/a2dgemm.h
index 18ecb2c7..58e5322b 100644
--- a/include/ad/a2dgemm.h
+++ b/include/ad/a2dgemm.h
@@ -119,7 +119,7 @@ class MatMatMultExpr {
         "Can't perform second order forward with first order objects");
     constexpr ADseed seed = conditional_value<ADseed, forder == ADorder::FIRST,
                                               ADseed::b, ADseed::p>::value;
-    if constexpr (adA == ADiffType::ACTIVE) {
+    if constexpr (adA == ADiffType::ACTIVE && adB == ADiffType::ACTIVE) {
       constexpr bool additive = true;
       MatMatMultCore<T, N, M, K, L, P, Q, opA, opB>(
           GetSeed<seed>::get_data(A), get_data(B), GetSeed<seed>::get_data(C));
diff --git a/include/ad/a2dgreenstrain.h b/include/ad/a2dgreenstrain.h
index f42eb4e3..a7512f1e 100644
--- a/include/ad/a2dgreenstrain.h
+++ b/include/ad/a2dgreenstrain.h
@@ -43,7 +43,9 @@ class MatGreenStrainExpr {
   static_assert(get_diff_order<Utype>::order == order,
                 "ADorder does not match");
 
-  A2D_FUNCTION MatGreenStrainExpr(Utype& Ux, Etype& E) : Ux(Ux), E(E) {}
+  A2D_FUNCTION MatGreenStrainExpr(Utype& Ux, Etype& E) : Ux(Ux), E(E) {
+    // printf("made mat green strain expression\n");
+  }
 
   A2D_FUNCTION void eval() {
     if constexpr (etype == GreenStrainType::LINEAR) {
diff --git a/include/ad/a2dmatsum.h b/include/ad/a2dmatsum.h
index f1944cdc..1a68b926 100644
--- a/include/ad/a2dmatsum.h
+++ b/include/ad/a2dmatsum.h
@@ -217,6 +217,8 @@ class MatSumScaleExpr {
     }
   }
 
+  A2D_FUNCTION void bzero() { B.bzero(); }
+
   A2D_FUNCTION void reverse() {
     constexpr ADseed seed = ADseed::b;
     if constexpr (adA == ADiffType::ACTIVE) {
@@ -237,6 +239,8 @@ class MatSumScaleExpr {
     }
   }
 
+  A2D_FUNCTION void hzero() { C.hzero(); }
+
   A2D_FUNCTION void hreverse() {
     constexpr ADseed seed = ADseed::h;
     if constexpr (adA == ADiffType::ACTIVE) {
diff --git a/include/ad/a2dmatvecmult.h b/include/ad/a2dmatvecmult.h
index a895b8d0..e52477aa 100644
--- a/include/ad/a2dmatvecmult.h
+++ b/include/ad/a2dmatvecmult.h
@@ -214,6 +214,131 @@ A2D_FUNCTION auto MatVecMult(const Atype& A, A2DObj<xtype>& x,
   return MatVecMultExpr<op, const Atype, A2DObj<xtype>, A2DObj<ytype>>(A, x, y);
 }
 
+// now define MatScale
+template <typename T, int M, int N>
+A2D_FUNCTION void MatScale(const T alpha, const Mat<T, M, N>& x,
+                           Mat<T, M, N>& y) {
+  MatScaleCore<T, M, N>(alpha, get_data(x), get_data(y));
+}
+
+template <class dtype, class Atype, class Btype>
+class MatScaleExpr {
+ public:
+  // Extract the numeric type to use
+  typedef typename get_object_numeric_type<dtype>::type T;
+
+  // Extract the dimensions of the underlying vectors
+  static constexpr int M = get_matrix_rows<Atype>::size;
+  static constexpr int N = get_matrix_columns<Atype>::size;
+  static constexpr int size = get_num_matrix_entries<Atype>::size;
+
+  // Get the differentiation order from the output
+  static constexpr ADorder order = get_diff_order<Btype>::order;
+
+  // Get the types of the matrices
+  static constexpr ADiffType add = get_diff_type<dtype>::diff_type;
+  static constexpr ADiffType adA = get_diff_type<Atype>::diff_type;
+
+  // Make sure the matrix dimensions are consistent
+  static_assert((get_a2d_object_type<Atype>::value ==
+                 get_a2d_object_type<Btype>::value),
+                "Matrices are not all of the same type");
+
+  A2D_FUNCTION MatScaleExpr(dtype alpha, Atype& A, Btype& B)
+      : alpha(alpha), A(A), B(B) {}
+
+  A2D_FUNCTION void eval() {
+    MatScaleCore<T, M, N>(get_data(alpha), get_data(A), get_data(B));
+  }
+
+  A2D_FUNCTION void bzero() { B.bzero(); }
+
+  template <ADorder forder>
+  A2D_FUNCTION void forward() {
+    constexpr ADseed seed = conditional_value<ADseed, forder == ADorder::FIRST,
+                                              ADseed::b, ADseed::p>::value;
+
+    if constexpr (add == ADiffType::ACTIVE && adA == ADiffType::ACTIVE) {
+      MatScaleCore<T, M, N>(GetSeed<seed>::get_data(alpha), get_data(A),
+                            GetSeed<seed>::get_data(B));
+      VecAddCore<T, size>(get_data(alpha), GetSeed<seed>::get_data(A),
+                          GetSeed<seed>::get_data(B));
+    } else if constexpr (add == ADiffType::ACTIVE) {
+      MatScaleCore<T, M, N>(GetSeed<seed>::get_data(alpha), get_data(A),
+                            GetSeed<seed>::get_data(B));
+    } else if constexpr (adA == ADiffType::ACTIVE) {
+      MatScaleCore<T, M, N>(get_data(alpha), GetSeed<seed>::get_data(A),
+                            GetSeed<seed>::get_data(B));
+    }
+  }
+  A2D_FUNCTION void reverse() {
+    constexpr ADseed seed = ADseed::b;
+    if constexpr (add == ADiffType::ACTIVE) {
+      GetSeed<seed>::get_data(alpha) +=
+          VecDotCore<T, size>(GetSeed<seed>::get_data(B), get_data(A));
+    }
+    if constexpr (adA == ADiffType::ACTIVE) {
+      VecAddCore<T, size>(get_data(alpha), GetSeed<seed>::get_data(B),
+                          GetSeed<seed>::get_data(A));
+    }
+  }
+
+  A2D_FUNCTION void hzero() { B.hzero(); }
+
+  A2D_FUNCTION void hreverse() {
+    if constexpr (add == ADiffType::ACTIVE) {
+      GetSeed<ADseed::h>::get_data(alpha) +=
+          VecDotCore<T, size>(GetSeed<ADseed::h>::get_data(B), get_data(A));
+    }
+    if constexpr (adA == ADiffType::ACTIVE) {
+      VecAddCore<T, size>(get_data(alpha), GetSeed<ADseed::h>::get_data(B),
+                          GetSeed<ADseed::h>::get_data(B));
+    }
+    if constexpr (add == ADiffType::ACTIVE && adA == ADiffType::ACTIVE) {
+      GetSeed<ADseed::h>::get_data(alpha) += VecDotCore<T, size>(
+          GetSeed<ADseed::b>::get_data(B), GetSeed<ADseed::p>::get_data(A));
+      VecAddCore<T, size>(GetSeed<ADseed::p>::get_data(alpha),
+                          GetSeed<ADseed::b>::get_data(B),
+                          GetSeed<ADseed::h>::get_data(A));
+    }
+  }
+
+  dtype alpha;
+  Atype& A;
+  Btype& B;
+};
+
+template <class T, class Atype, class Btype>
+A2D_FUNCTION auto MatScale(ADObj<T>& alpha, ADObj<Atype>& x, ADObj<Btype>& y) {
+  return MatScaleExpr<ADObj<T>&, ADObj<Atype>, ADObj<Atype>>(alpha, x, y);
+}
+
+template <class T, class Atype, class Btype>
+A2D_FUNCTION auto MatScale(const T alpha, ADObj<Atype>& x, ADObj<Btype>& y) {
+  return MatScaleExpr<const T, ADObj<Atype>, ADObj<Atype>>(alpha, x, y);
+}
+
+template <class T, class Atype, class Btype>
+A2D_FUNCTION auto MatScale(ADObj<T>& alpha, const Atype& x, ADObj<Btype>& y) {
+  return MatScaleExpr<ADObj<T>&, const Atype, ADObj<Atype>>(alpha, x, y);
+}
+
+template <class T, class Atype, class Btype>
+A2D_FUNCTION auto MatScale(A2DObj<T>& alpha, A2DObj<Atype>& x,
+                           A2DObj<Btype>& y) {
+  return MatScaleExpr<A2DObj<T>&, A2DObj<Atype>, A2DObj<Atype>>(alpha, x, y);
+}
+
+template <class T, class Atype, class Btype>
+A2D_FUNCTION auto MatScale(const T alpha, A2DObj<Atype>& x, A2DObj<Btype>& y) {
+  return MatScaleExpr<const T, A2DObj<Atype>, A2DObj<Atype>>(alpha, x, y);
+}
+
+template <class T, class Atype, class Btype>
+A2D_FUNCTION auto MatScale(A2DObj<T>& alpha, const Atype& x, A2DObj<Btype>& y) {
+  return MatScaleExpr<A2DObj<T>&, const Atype, A2DObj<Atype>>(alpha, x, y);
+}
+
 namespace Test {
 
 template <MatOp op, typename T, int N, int M, int K, int P>
diff --git a/include/ad/a2dobj.h b/include/ad/a2dobj.h
index 8f906817..2b383462 100644
--- a/include/ad/a2dobj.h
+++ b/include/ad/a2dobj.h
@@ -4,6 +4,7 @@
 #include "../a2ddefs.h"
 #include "a2dmat.h"
 #include "a2dvec.h"
+#include "adscalar.h"
 
 namespace A2D {
 
@@ -303,6 +304,11 @@ struct __get_matrix_rows<Mat<T, N, M>> {
   static constexpr int size = N;
 };
 
+template <template <typename, int> class SymMat, typename T, int N>
+struct __get_matrix_rows<SymMat<T, N>> {
+  static constexpr int size = N;
+};
+
 template <class T>
 struct get_matrix_rows : __get_matrix_rows<typename remove_a2dobj<T>::type> {
   static_assert(get_a2d_object_type<T>::value == ADObjType::MATRIX,
@@ -322,6 +328,11 @@ struct __get_matrix_columns<Mat<T, N, M>> {
   static constexpr int size = M;
 };
 
+template <template <typename, int> class SymMat, typename T, int N>
+struct __get_matrix_columns<SymMat<T, N>> {
+  static constexpr int size = N;
+};
+
 template <class T>
 struct get_matrix_columns
     : __get_matrix_columns<typename remove_a2dobj<T>::type> {
@@ -736,6 +747,155 @@ A2D_FUNCTION T* get_data(A2DObj<Vec<T, n>&>& vec) {
   return vec.value().get_data();
 }
 
+// new ADScalar get_data  (SPE)
+template <class T, int N>
+struct __is_numeric_type<ADScalar<T, N>> : std::is_floating_point<T> {};
+
+template <class T, int N>
+struct __is_numeric_type<ADScalar<std::complex<T>, N>>
+    : std::is_floating_point<T> {};
+
+template <int N>
+struct __get_object_numeric_type<ADScalar<double, N>> {
+  using type = ADScalar<double, N>;
+};
+
+template <int N>
+struct __get_object_numeric_type<ADScalar<std::complex<double>, N>> {
+  using type = ADScalar<std::complex<double>, N>;
+};
+
+template <typename T, int N,
+          std::enable_if_t<is_numeric_type<T>::value, bool> = true>
+ADScalar<T, N>& get_data(ADScalar<T, N>& value) {
+  return value;
+}
+
+template <typename T, int N,
+          std::enable_if_t<is_numeric_type<T>::value, bool> = true>
+const ADScalar<T, N>& get_data(const ADScalar<T, N>& value) {
+  return value;
+}
+
+template <typename T, int N,
+          std::enable_if_t<is_numeric_type<T>::value, bool> = true>
+ADScalar<T, N>& get_data(ADObj<ADScalar<T, N>>& value) {
+  return value.value();
+}
+
+template <typename T, int N,
+          std::enable_if_t<is_numeric_type<T>::value, bool> = true>
+const ADScalar<T, N>& get_data(const ADObj<ADScalar<T, N>>& value) {
+  return value.value();
+}
+
+template <typename T, int N,
+          std::enable_if_t<is_numeric_type<T>::value, bool> = true>
+ADScalar<T, N>& get_data(A2DObj<ADScalar<T, N>>& value) {
+  return value.value();
+}
+
+template <typename T, int N,
+          std::enable_if_t<is_numeric_type<T>::value, bool> = true>
+const ADScalar<T, N>& get_data(const A2DObj<ADScalar<T, N>>& value) {
+  return value.value();
+}
+
+/**
+ * @brief Get data pointers from objects
+ */
+template <typename T, int N, int m, int n>
+A2D_FUNCTION ADScalar<T, N>* get_data(Mat<ADScalar<T, N>, m, n>& mat) {
+  return mat.get_data();
+}
+
+template <typename T, int N, int m, int n>
+A2D_FUNCTION const ADScalar<T, N>* get_data(
+    const Mat<ADScalar<T, N>, m, n>& mat) {
+  return mat.get_data();
+}
+
+template <typename T, int N, int m, int n>
+A2D_FUNCTION ADScalar<T, N>* get_data(ADObj<Mat<ADScalar<T, N>, m, n>>& mat) {
+  return mat.value().get_data();
+}
+
+template <typename T, int N, int m, int n>
+A2D_FUNCTION ADScalar<T, N>* get_data(A2DObj<Mat<ADScalar<T, N>, m, n>>& mat) {
+  return mat.value().get_data();
+}
+
+template <typename T, int N, int m, int n>
+A2D_FUNCTION ADScalar<T, N>* get_data(ADObj<Mat<ADScalar<T, N>, m, n>&>& mat) {
+  return mat.value().get_data();
+}
+
+template <typename T, int N, int m, int n>
+A2D_FUNCTION ADScalar<T, N>* get_data(A2DObj<Mat<ADScalar<T, N>, m, n>&>& mat) {
+  return mat.value().get_data();
+}
+
+template <typename T, int N, int m>
+A2D_FUNCTION ADScalar<T, N>* get_data(SymMat<ADScalar<T, N>, m>& mat) {
+  return mat.get_data();
+}
+
+template <typename T, int N, int m>
+A2D_FUNCTION const ADScalar<T, N>* get_data(
+    const SymMat<ADScalar<T, N>, m>& mat) {
+  return mat.get_data();
+}
+
+template <typename T, int N, int m>
+A2D_FUNCTION ADScalar<T, N>* get_data(ADObj<SymMat<ADScalar<T, N>, m>>& mat) {
+  return mat.value().get_data();
+}
+
+template <typename T, int N, int m>
+A2D_FUNCTION ADScalar<T, N>* get_data(A2DObj<SymMat<ADScalar<T, N>, m>>& mat) {
+  return mat.value().get_data();
+}
+
+template <typename T, int N, int m>
+A2D_FUNCTION ADScalar<T, N>* get_data(ADObj<SymMat<ADScalar<T, N>, m>&>& mat) {
+  return mat.value().get_data();
+}
+
+template <typename T, int N, int m>
+A2D_FUNCTION ADScalar<T, N>* get_data(A2DObj<SymMat<ADScalar<T, N>, m>&>& mat) {
+  return mat.value().get_data();
+}
+
+template <typename T, int N, int n>
+A2D_FUNCTION ADScalar<T, N>* get_data(Vec<ADScalar<T, N>, n>& vec) {
+  return vec.get_data();
+}
+
+template <typename T, int N, int n>
+A2D_FUNCTION const ADScalar<T, N>* get_data(const Vec<ADScalar<T, N>, n>& vec) {
+  return vec.get_data();
+}
+
+template <typename T, int N, int n>
+A2D_FUNCTION ADScalar<T, N>* get_data(ADObj<Vec<ADScalar<T, N>, n>>& vec) {
+  return vec.value().get_data();
+}
+
+template <typename T, int N, int n>
+A2D_FUNCTION ADScalar<T, N>* get_data(A2DObj<Vec<ADScalar<T, N>, n>>& vec) {
+  return vec.value().get_data();
+}
+
+template <typename T, int N, int n>
+A2D_FUNCTION ADScalar<T, N>* get_data(ADObj<Vec<ADScalar<T, N>, n>&>& vec) {
+  return vec.value().get_data();
+}
+
+template <typename T, int N, int n>
+A2D_FUNCTION ADScalar<T, N>* get_data(A2DObj<Vec<ADScalar<T, N>, n>&>& vec) {
+  return vec.value().get_data();
+}
+
 }  // namespace A2D
 
 #endif  // A2D_OBJECTS_H
\ No newline at end of file
diff --git a/include/ad/a2dstack.h b/include/ad/a2dstack.h
index 2297d8d9..457d8e95 100644
--- a/include/ad/a2dstack.h
+++ b/include/ad/a2dstack.h
@@ -14,6 +14,7 @@ class OperationStack {
 
   A2D_FUNCTION OperationStack(Operations &&...s)
       : stack(std::forward<Operations>(s)...) {
+    // printf("in stack constructor\n");
     eval_<0>();
   }
 
@@ -66,6 +67,7 @@ class OperationStack {
 
   template <index_t index>
   A2D_FUNCTION void eval_() {
+    // printf("evaluating the stack\n");
     std::get<index>(stack).eval();
     if constexpr (index < num_ops - 1) {
       eval_<index + 1>();
@@ -132,6 +134,7 @@ class OperationStack {
  */
 template <class... Operations>
 A2D_FUNCTION auto MakeStack(Operations &&...s) {
+  // printf("in make stack\n");
   return OperationStack<Operations...>(std::forward<Operations>(s)...);
 }
 
diff --git a/include/ad/core/a2dmatinvcore.h b/include/ad/core/a2dmatinvcore.h
index bbd46bdf..68ae54c6 100644
--- a/include/ad/core/a2dmatinvcore.h
+++ b/include/ad/core/a2dmatinvcore.h
@@ -24,15 +24,15 @@ A2D_FUNCTION void MatInvCore(const T A[], T Ainv[]) {
     T detinv = 1.0 / det;
 
     Ainv[0] = (A[4] * A[8] - A[5] * A[7]) * detinv;
-    Ainv[1] = -(A[1] * A[8] - A[2] * A[7]) * detinv;
+    Ainv[1] = -1.0 * (A[1] * A[8] - A[2] * A[7]) * detinv;
     Ainv[2] = (A[1] * A[5] - A[2] * A[4]) * detinv;
 
-    Ainv[3] = -(A[3] * A[8] - A[5] * A[6]) * detinv;
+    Ainv[3] = -1.0 * (A[3] * A[8] - A[5] * A[6]) * detinv;
     Ainv[4] = (A[0] * A[8] - A[2] * A[6]) * detinv;
-    Ainv[5] = -(A[0] * A[5] - A[2] * A[3]) * detinv;
+    Ainv[5] = -1.0 * (A[0] * A[5] - A[2] * A[3]) * detinv;
 
     Ainv[6] = (A[3] * A[7] - A[4] * A[6]) * detinv;
-    Ainv[7] = -(A[0] * A[7] - A[1] * A[6]) * detinv;
+    Ainv[7] = -1.0 * (A[0] * A[7] - A[1] * A[6]) * detinv;
     Ainv[8] = (A[0] * A[4] - A[1] * A[3]) * detinv;
   }
 }
diff --git a/include/ad/core/a2dmatveccore.h b/include/ad/core/a2dmatveccore.h
index d1ff4630..396f4aa5 100644
--- a/include/ad/core/a2dmatveccore.h
+++ b/include/ad/core/a2dmatveccore.h
@@ -118,6 +118,16 @@ A2D_FUNCTION T MatInnerCore(const T A[], const T x[], const T y[]) noexcept {
   return value;
 }
 
+template <typename T, int M, int N>
+A2D_FUNCTION void MatScaleCore(const T alpha, const T A[], T B[]) noexcept {
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < N; j++) {
+      B[0] = alpha * A[0];
+      A++, B++;
+    }
+  }
+}
+
 }  // namespace A2D
 
 #endif  //  A2D_MAT_VEC_CORE_H
\ No newline at end of file
diff --git a/include/ad/shell/a2dmatrotateframe.h b/include/ad/shell/a2dmatrotateframe.h
new file mode 100644
index 00000000..bd8a4b10
--- /dev/null
+++ b/include/ad/shell/a2dmatrotateframe.h
@@ -0,0 +1,360 @@
+#ifndef A2d_MAT_ROTATE_FRAME_H
+#define A2d_MAT_ROTATE_FRAME_H
+
+#include <type_traits>
+
+#include "../../a2ddefs.h"
+#include "../a2dmat.h"
+#include "../a2dstack.h"
+#include "../a2dtest.h"
+#include "../core/a2dgemmcore.h"
+
+namespace A2D {
+
+/*
+  Define an expression for C = A^T * B * A
+*/
+
+template <typename T, int N, bool additive = false>
+A2D_FUNCTION void MatMatSquareMult(const T A[], const T B[], T C[]) {
+  MatMatMultCore<T, N, N, N, N, N, N, MatOp::NORMAL, MatOp::NORMAL, additive>(
+      A, B, C);
+}
+
+template <typename T, int N, bool additive = false>
+A2D_FUNCTION void MatMatLeftTrSquareMult(const T A[], const T B[], T C[]) {
+  MatMatMultCore<T, N, N, N, N, N, N, MatOp::TRANSPOSE, MatOp::NORMAL,
+                 additive>(A, B, C);
+}
+
+template <typename T, int N, bool additive = false>
+A2D_FUNCTION void MatMatRightTrSquareMult(const T A[], const T B[], T C[]) {
+  MatMatMultCore<T, N, N, N, N, N, N, MatOp::NORMAL, MatOp::TRANSPOSE,
+                 additive>(A, B, C);
+}
+
+template <typename T, int N>
+A2D_FUNCTION void MatRotateFrame(const Mat<T, N, N>& A, const Mat<T, N, N>& B,
+                                 Mat<T, N, N>& C) {
+  Mat<T, N, N> Ctemp;
+  // Ctemp = A^T * B
+  MatMatLeftTrSquareMult<T, N>(get_data(A), get_data(B), get_data(Ctemp));
+  // C = Ctemp * A
+  MatMatSquareMult<T, N>(get_data(Ctemp), get_data(A), get_data(C));
+}
+
+template <class Atype, class Btype, class Ctype>
+class MatRotateFrameExpr {
+ public:
+  // Extract the numeric type to use
+  typedef typename get_object_numeric_type<Ctype>::type T;
+
+  // Extract the dimensions of the matrices
+  // if (get_diff_type::)
+  // how to get matrix rows for symMat?
+  // optional SymMat or Mat here
+  // const bool A_issym = get_a2d_object_type<Atype>::value == ADObjType::SYMMAT
+  static constexpr int N = get_matrix_rows<Atype>::size;
+  static constexpr int M = get_matrix_columns<Atype>::size;
+  static constexpr int K = get_matrix_rows<Btype>::size;
+  static constexpr int L = get_matrix_columns<Btype>::size;
+  static constexpr int P = get_matrix_rows<Ctype>::size;
+  static constexpr int Q = get_matrix_columns<Ctype>::size;
+
+  // check all square matrices
+  static_assert((N == M) && (M == K) && (K == L) && (L == P) && (P == Q),
+                "all matrices in MatRotateFrameExpr must be same N x N square "
+                "matrix size.");
+
+  // Get the types of the matrices
+  static constexpr ADiffType adA = get_diff_type<Atype>::diff_type;
+  static constexpr ADiffType adB = get_diff_type<Btype>::diff_type;
+
+  // Get the differentiation order from the output
+  static constexpr ADorder order = get_diff_order<Ctype>::order;
+
+  A2D_FUNCTION MatRotateFrameExpr(Atype& A, Btype& B, Ctype& C)
+      : A(A), B(B), C(C) {}
+
+  A2D_FUNCTION void eval() {
+    Mat<T, N, N> Ctemp;
+    // Ctemp = A^T * B
+    MatMatLeftTrSquareMult<T, N>(get_data(A), get_data(B), get_data(Ctemp));
+    // C = Ctemp * A
+    MatMatSquareMult<T, N>(get_data(Ctemp), get_data(A), get_data(C));
+  }
+
+  A2D_FUNCTION void bzero() { C.bzero(); }
+
+  template <ADorder forder>
+  A2D_FUNCTION void forward() {
+    static_assert(
+        !(order == ADorder::FIRST and forder == ADorder::SECOND),
+        "Can't perform second order forward with first order objects");
+    constexpr ADseed seed = conditional_value<ADseed, forder == ADorder::FIRST,
+                                              ADseed::b, ADseed::p>::value;
+
+    // full expression of forward pass:
+    //   Cdot = Adot^T * B * A + A^T * B * Adot + A^T * Bdot * A
+
+    if constexpr (adA == ADiffType::ACTIVE and adB == ADiffType::ACTIVE) {
+      Mat<T, N, N> Ctemp;
+      // Adot term1
+      MatMatLeftTrSquareMult<T, N>(GetSeed<seed>::get_data(A), get_data(B),
+                                   get_data(Ctemp));
+      MatMatSquareMult<T, N>(get_data(Ctemp), get_data(A),
+                             GetSeed<seed>::get_data(C));
+      // Adot term2
+      MatMatLeftTrSquareMult<T, N>(get_data(A), get_data(B), get_data(Ctemp));
+      MatMatSquareMult<T, N, true>(get_data(Ctemp), GetSeed<seed>::get_data(A),
+                                   GetSeed<seed>::get_data(C));
+      // Bdot term
+      MatMatLeftTrSquareMult<T, N>(get_data(A), GetSeed<seed>::get_data(B),
+                                   get_data(Ctemp));
+      MatMatSquareMult<T, N, true>(get_data(Ctemp), get_data(A),
+                                   GetSeed<seed>::get_data(C));
+
+    } else if constexpr (adA == ADiffType::ACTIVE) {
+      Mat<T, N, N> Ctemp;
+      // Adot term1
+      MatMatLeftTrSquareMult<T, N>(GetSeed<seed>::get_data(A), get_data(B),
+                                   get_data(Ctemp));
+      MatMatSquareMult<T, N>(get_data(Ctemp), get_data(A),
+                             GetSeed<seed>::get_data(C));
+      // Adot term2
+      MatMatLeftTrSquareMult<T, N>(get_data(A), get_data(B), get_data(Ctemp));
+      MatMatSquareMult<T, N, true>(get_data(Ctemp), GetSeed<seed>::get_data(A),
+                                   GetSeed<seed>::get_data(C));
+
+    } else if constexpr (adB == ADiffType::ACTIVE) {
+      Mat<T, N, N> Ctemp;
+      // Bdot term
+      MatMatLeftTrSquareMult<T, N>(get_data(A), GetSeed<seed>::get_data(B),
+                                   get_data(Ctemp));
+      MatMatSquareMult<T, N>(get_data(Ctemp), get_data(A),
+                             GetSeed<seed>::get_data(C));
+    }
+  }
+
+  A2D_FUNCTION void reverse() {
+    if constexpr (adA == ADiffType::ACTIVE) {
+      Mat<T, N, N> temp;
+      // full expression: Abar += B^T * A * Cbar + B * A * Cbar^T
+      // first term B^T * A * Cbar
+      MatMatLeftTrSquareMult<T, N>(get_data(B), get_data(A), get_data(temp));
+      MatMatSquareMult<T, N, true>(get_data(temp),
+                                   GetSeed<ADseed::b>::get_data(C),
+                                   GetSeed<ADseed::b>::get_data(A));
+
+      // second term B * A * Cbar^T added in
+      MatMatSquareMult<T, N>(get_data(B), get_data(A), get_data(temp));
+      MatMatRightTrSquareMult<T, N, true>(get_data(temp),
+                                          GetSeed<ADseed::b>::get_data(C),
+                                          GetSeed<ADseed::b>::get_data(A));
+    }
+    if constexpr (adB == ADiffType::ACTIVE) {
+      Mat<T, N, N> temp;
+      // full expresion Bbar += A * Cbar * A^T
+      MatMatSquareMult<T, N>(get_data(A), GetSeed<ADseed::b>::get_data(C),
+                             get_data(temp));
+      MatMatRightTrSquareMult<T, N, true>(get_data(temp), get_data(A),
+                                          GetSeed<ADseed::b>::get_data(B));
+    }
+  }
+
+  A2D_FUNCTION void hzero() { C.hzero(); }
+
+  A2D_FUNCTION void hreverse() {
+    static_assert(order == ADorder::SECOND,
+                  "hreverse() can be called for only second order objects.");
+
+    // HJP backpropagation based on Aaron's paper and my ppt
+    //
+    // Ahat += B^T * A * Chat + B * A * Chat^T +
+    //         Bdot^T * A * Cbar + Bdot * A * Cbar^T +
+    //         B^T * Adot * Cbar + B * Adot * Cbar^T
+    //
+    // Bhat += A * Chat * A^T +
+    //         Adot * Cbar * A^T + A * Cbar * Adot^T
+
+    if constexpr (adA == ADiffType::ACTIVE) {
+      Mat<T, N, N> temp;
+      // term1 for Ahat : B^T * A * Chat
+      MatMatLeftTrSquareMult<T, N>(get_data(B), get_data(A), get_data(temp));
+      MatMatSquareMult<T, N, true>(get_data(temp),
+                                   GetSeed<ADseed::h>::get_data(C),
+                                   GetSeed<ADseed::h>::get_data(A));
+
+      // term2 for Ahat : B * A * Chat^T
+      MatMatSquareMult<T, N>(get_data(B), get_data(A), get_data(temp));
+      MatMatRightTrSquareMult<T, N, true>(get_data(temp),
+                                          GetSeed<ADseed::h>::get_data(C),
+                                          GetSeed<ADseed::h>::get_data(A));
+
+      // term 5 for Ahat : B^T * Adot * Cbar
+      MatMatLeftTrSquareMult<T, N>(get_data(B), GetSeed<ADseed::p>::get_data(A),
+                                   get_data(temp));
+      MatMatSquareMult<T, N, true>(get_data(temp),
+                                   GetSeed<ADseed::b>::get_data(C),
+                                   GetSeed<ADseed::h>::get_data(A));
+
+      // term 6 for Ahat : B * Adot * Cbar^T
+      MatMatSquareMult<T, N>(get_data(B), GetSeed<ADseed::p>::get_data(A),
+                             get_data(temp));
+      MatMatRightTrSquareMult<T, N, true>(get_data(temp),
+                                          GetSeed<ADseed::b>::get_data(C),
+                                          GetSeed<ADseed::h>::get_data(A));
+    }
+
+    if constexpr (adB == ADiffType::ACTIVE) {
+      Mat<T, N, N> temp;
+      // term 1 for Bhat : A * Chat * A^T
+      MatMatSquareMult<T, N>(get_data(A), GetSeed<ADseed::h>::get_data(C),
+                             get_data(temp));
+      MatMatRightTrSquareMult<T, N, true>(get_data(temp), get_data(A),
+                                          GetSeed<ADseed::h>::get_data(B));
+    }
+
+    if constexpr (adA == ADiffType::ACTIVE && adB == ADiffType::ACTIVE) {
+      // now only remaining terms how up
+      Mat<T, N, N> temp;
+
+      // term3 for Ahat : Bdot^T * A * Cbar
+      MatMatLeftTrSquareMult<T, N>(GetSeed<ADseed::p>::get_data(B), get_data(A),
+                                   get_data(temp));
+      MatMatSquareMult<T, N, true>(get_data(temp),
+                                   GetSeed<ADseed::b>::get_data(C),
+                                   GetSeed<ADseed::h>::get_data(A));
+
+      // term4 for Ahat : Bdot * A * Cbar^T
+      MatMatSquareMult<T, N>(GetSeed<ADseed::p>::get_data(B), get_data(A),
+                             get_data(temp));
+      MatMatRightTrSquareMult<T, N, true>(get_data(temp),
+                                          GetSeed<ADseed::b>::get_data(C),
+                                          GetSeed<ADseed::h>::get_data(A));
+
+      // term2 for Bhat : Adot * Cbar * A^T
+      MatMatSquareMult<T, N>(GetSeed<ADseed::p>::get_data(A),
+                             GetSeed<ADseed::b>::get_data(C), get_data(temp));
+      MatMatRightTrSquareMult<T, N, true>(get_data(temp), get_data(A),
+                                          GetSeed<ADseed::h>::get_data(B));
+
+      // term3 for Bhat : A * Cbar * Adot^T
+      MatMatSquareMult<T, N>(get_data(A), GetSeed<ADseed::b>::get_data(C),
+                             get_data(temp));
+      MatMatRightTrSquareMult<T, N, true>(get_data(temp),
+                                          GetSeed<ADseed::p>::get_data(A),
+                                          GetSeed<ADseed::h>::get_data(B));
+    }
+  }
+
+ private:
+  Atype& A;
+  Btype& B;
+  Ctype& C;
+};
+
+// all implementations
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto MatRotateFrame(ADObj<Atype>& A, ADObj<Btype>& B,
+                                 ADObj<Ctype>& C) {
+  return MatRotateFrameExpr<ADObj<Atype>, ADObj<Btype>, ADObj<Ctype>>(A, B, C);
+}
+
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto MatRotateFrame(ADObj<Atype>& A, Btype& B, ADObj<Ctype>& C) {
+  return MatRotateFrameExpr<ADObj<Atype>, Btype, ADObj<Ctype>>(A, B, C);
+}
+
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto MatRotateFrame(Atype& A, ADObj<Btype>& B, ADObj<Ctype>& C) {
+  return MatRotateFrameExpr<Atype, ADObj<Btype>, ADObj<Ctype>>(A, B, C);
+}
+
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto MatRotateFrame(A2DObj<Atype>& A, A2DObj<Btype>& B,
+                                 A2DObj<Ctype>& C) {
+  return MatRotateFrameExpr<A2DObj<Atype>, A2DObj<Btype>, A2DObj<Ctype>>(A, B,
+                                                                         C);
+}
+
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto MatRotateFrame(A2DObj<Atype>& A, Btype& B, A2DObj<Ctype>& C) {
+  return MatRotateFrameExpr<A2DObj<Atype>, Btype, A2DObj<Ctype>>(A, B, C);
+}
+
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto MatRotateFrame(Atype& A, A2DObj<Btype>& B, A2DObj<Ctype>& C) {
+  return MatRotateFrameExpr<Atype, A2DObj<Btype>, A2DObj<Ctype>>(A, B, C);
+}
+
+namespace Test {
+
+template <typename T, int N>
+class MatRotateFrameTest
+    : public A2DTest<T, Mat<T, N, N>, Mat<T, N, N>, Mat<T, N, N>> {
+ public:
+  using Input = VarTuple<T, Mat<T, N, N>, Mat<T, N, N>>;
+  using Output = VarTuple<T, Mat<T, N, N>>;
+
+  // Assemble a string to describe the test
+  std::string name() {
+    std::stringstream s;
+    s << "MatRotateFrame<" << N << "," << N << ">";
+    return s.str();
+  }
+
+  // Evaluate the matrix-matrix product
+  Output eval(const Input& x) {
+    Mat<T, N, N> A, B, C;
+
+    x.get_values(A, B);
+    MatRotateFrame(A, B, C);
+    return MakeVarTuple<T>(C);
+  }
+
+  // Compute the derivative
+  void deriv(const Output& seed, const Input& x, Input& g) {
+    ADObj<Mat<T, N, N>> A, B, C;
+
+    x.get_values(A.value(), B.value());
+    auto stack = MakeStack(MatRotateFrame(A, B, C));
+    seed.get_values(C.bvalue());
+    stack.reverse();
+    g.set_values(A.bvalue(), B.bvalue());
+  }
+
+  // Compute the second-derivative
+  void hprod(const Output& seed, const Output& hval, const Input& x,
+             const Input& p, Input& h) {
+    A2DObj<Mat<T, N, N>> A, B, C;
+
+    x.get_values(A.value(), B.value());
+    p.get_values(A.pvalue(), B.pvalue());
+    auto stack = MakeStack(MatRotateFrame(A, B, C));
+    seed.get_values(C.bvalue());
+    hval.get_values(C.hvalue());
+    stack.hproduct();
+    h.set_values(A.hvalue(), B.hvalue());
+  }
+};
+
+bool MatRotateFrameTestAll(bool component = false, bool write_output = true) {
+  using Tc = std::complex<double>;
+
+  bool passed = true;
+  MatRotateFrameTest<Tc, 2> test1;
+  passed = passed && Run(test1, component, write_output);
+  MatRotateFrameTest<Tc, 3> test2;
+  passed = passed && Run(test2, component, write_output);
+  MatRotateFrameTest<Tc, 4> test3;
+  passed = passed && Run(test3, component, write_output);
+
+  return passed;
+}
+
+}  // namespace Test
+
+}  // namespace A2D
+
+#endif  // A2d_MAT_ROTATE_FRAME_H
\ No newline at end of file
diff --git a/include/ad/shell/a2dshellassembleframe.h b/include/ad/shell/a2dshellassembleframe.h
new file mode 100644
index 00000000..2de6d062
--- /dev/null
+++ b/include/ad/shell/a2dshellassembleframe.h
@@ -0,0 +1,252 @@
+#ifndef A2D_SHELL_ASSEMBLE_FRAME_H
+#define A2D_SHELL_ASSEMBLE_FRAME_H
+
+#include <type_traits>
+
+#include "../../a2ddefs.h"
+#include "../a2dmat.h"
+#include "../a2dstack.h"
+#include "../a2dtest.h"
+#include "../a2dvec.h"
+
+namespace A2D {
+
+template <typename T>
+A2D_FUNCTION void ShellAssembleFrameCore(const T Axi[], const T n[],
+                                         T frame[]) {
+  // Axi usually 3x2 matrix, n is length 3 vec => assembled to 3x3 frame matrix
+  frame[0] = Axi[0];
+  frame[1] = Axi[1];
+  frame[2] = n[0];
+
+  frame[3] = Axi[2];
+  frame[4] = Axi[3];
+  frame[5] = n[1];
+
+  frame[6] = Axi[4];
+  frame[7] = Axi[5];
+  frame[8] = n[2];
+}
+
+template <typename T>
+A2D_FUNCTION void ShellAssembleFrameReverseCore(const T frameb[], T Axib[],
+                                                T nb[]) {
+  // backprop sensitivities from frame to Axi, n
+  Axib[0] += frameb[0];
+  Axib[1] += frameb[1];
+  nb[0] += frameb[2];
+
+  Axib[2] += frameb[3];
+  Axib[3] += frameb[4];
+  nb[1] += frameb[5];
+
+  Axib[4] += frameb[6];
+  Axib[5] += frameb[7];
+  nb[2] += frameb[8];
+}
+
+template <typename T>
+A2D_FUNCTION void ShellAssembleFrame(const Mat<T, 3, 2> &Axi,
+                                     const Vec<T, 3> &n, Mat<T, 3, 3> &frame) {
+  ShellAssembleFrameCore<T>(get_data(Axi), get_data(n), get_data(frame));
+}
+
+template <class Axitype, class ntype, class frametype>
+class ShellAssembleFrameExpr {
+ public:
+  // extract numeric type to use
+  typedef typename get_object_numeric_type<frametype>::type T;
+
+  // Get the sizes of the matrices, vectors
+  static constexpr int N = get_matrix_rows<Axitype>::size;
+  static constexpr int M = get_matrix_columns<Axitype>::size;
+  static constexpr int K = get_vec_size<ntype>::size;
+  static constexpr int L = get_matrix_rows<frametype>::size;
+  static constexpr int P = get_matrix_columns<frametype>::size;
+
+  // Get the types of the matrices and scalars
+  static constexpr ADiffType adAxi = get_diff_type<Axitype>::diff_type;
+  static constexpr ADiffType adn = get_diff_type<ntype>::diff_type;
+  static constexpr ADiffType adframe = get_diff_type<frametype>::diff_type;
+
+  // assert all are same type
+  //   static_assert(((get_a2d_object_type<Axitype>::value ==
+  //               get_a2d_object_type<ntype>::value) &&
+  //              (get_a2d_object_type<ntype>::value ==
+  //               get_a2d_object_type<frametype>::value)),
+  //             "Inputs are not all of the same type");
+
+  // assert correct sizes
+  static_assert((N == 3) && (M == 2) && (K == 3) && (L == 3) && (P == 3),
+                "AssembleFrame (3,2);(3) => (3,3)");
+
+  // get the differentiation order
+  static constexpr ADorder order = get_diff_order<frametype>::order;
+
+  A2D_FUNCTION ShellAssembleFrameExpr(Axitype &Axi, ntype &n, frametype &frame)
+      : Axi(Axi), n(n), frame(frame) {}
+
+  A2D_FUNCTION void eval() {
+    ShellAssembleFrameCore<T>(get_data(Axi), get_data(n), get_data(frame));
+  }
+
+  A2D_FUNCTION void bzero() { frame.bzero(); }
+
+  template <ADorder forder>
+  A2D_FUNCTION void forward() {
+    constexpr ADseed seed = conditional_value<ADseed, forder == ADorder::FIRST,
+                                              ADseed::b, ADseed::p>::value;
+
+    if constexpr (adAxi == ADiffType::ACTIVE && adn == ADiffType::ACTIVE) {
+      ShellAssembleFrameCore<T>(GetSeed<seed>::get_data(Axi),
+                                GetSeed<seed>::get_data(n),
+                                GetSeed<seed>::get_data(frame));
+    } else if constexpr (adn == ADiffType::ACTIVE) {
+      Mat<T, N, M> Axi_void;
+      ShellAssembleFrameCore<T>(get_data(Axi_void), GetSeed<seed>::get_data(n),
+                                GetSeed<seed>::get_data(frame));
+    } else if constexpr (adAxi == ADiffType::ACTIVE) {
+      Vec<T, K> n_void;
+      ShellAssembleFrameCore<T>(GetSeed<seed>::get_data(Axi), get_data(n_void),
+                                GetSeed<seed>::get_data(frame));
+    }
+  }
+
+  A2D_FUNCTION void reverse() {
+    constexpr ADseed seed = ADseed::b;
+    if constexpr (adn == ADiffType::ACTIVE) {
+      Mat<T, N, M> Axi_void;
+      ShellAssembleFrameReverseCore<T>(GetSeed<seed>::get_data(frame),
+                                       get_data(Axi_void),
+                                       GetSeed<seed>::get_data(n));
+    }
+    if constexpr (adAxi == ADiffType::ACTIVE) {
+      Vec<T, K> n_void;
+      ShellAssembleFrameReverseCore<T>(GetSeed<seed>::get_data(frame),
+                                       GetSeed<seed>::get_data(Axi),
+                                       get_data(n_void));
+    }
+  }
+
+  A2D_FUNCTION void hzero() { frame.hzero(); }
+
+  A2D_FUNCTION void hreverse() {
+    constexpr ADseed seed = ADseed::h;
+    if constexpr (adn == ADiffType::ACTIVE) {
+      Mat<T, N, M> Axi_void;
+      ShellAssembleFrameReverseCore<T>(GetSeed<seed>::get_data(frame),
+                                       get_data(Axi_void),
+                                       GetSeed<seed>::get_data(n));
+    }
+    if constexpr (adAxi == ADiffType::ACTIVE) {
+      Vec<T, K> n_void;
+      ShellAssembleFrameReverseCore<T>(GetSeed<seed>::get_data(frame),
+                                       GetSeed<seed>::get_data(Axi),
+                                       get_data(n_void));
+    }
+  }
+
+  Axitype &Axi;
+  ntype &n;
+  frametype &frame;
+};  // end of ShellAssembleFrameExpr class definition
+
+// Full active variants
+template <class Axitype, class ntype, class frametype>
+A2D_FUNCTION auto ShellAssembleFrame(ADObj<Axitype> &Axi, ADObj<ntype> &n,
+                                     ADObj<frametype> &frame) {
+  return ShellAssembleFrameExpr<ADObj<Axitype>, ADObj<ntype>, ADObj<frametype>>(
+      Axi, n, frame);
+}
+
+template <class Axitype, class ntype, class frametype>
+A2D_FUNCTION auto ShellAssembleFrame(Axitype &Axi, ntype &n,
+                                     ADObj<frametype> &frame) {
+  return ShellAssembleFrameExpr<Axitype, ntype, ADObj<frametype>>(Axi, n,
+                                                                  frame);
+}
+
+template <class Axitype, class ntype, class frametype>
+A2D_FUNCTION auto ShellAssembleFrame(A2DObj<Axitype> &Axi, A2DObj<ntype> &n,
+                                     A2DObj<frametype> &frame) {
+  return ShellAssembleFrameExpr<A2DObj<Axitype>, A2DObj<ntype>,
+                                A2DObj<frametype>>(Axi, n, frame);
+}
+
+template <class Axitype, class ntype, class frametype>
+A2D_FUNCTION auto ShellAssembleFrame(Axitype &Axi, ntype &n,
+                                     A2DObj<frametype> &frame) {
+  return ShellAssembleFrameExpr<Axitype, ntype, A2DObj<frametype>>(Axi, n,
+                                                                   frame);
+}
+
+namespace Test {
+
+template <typename T>
+class ShellAssembleFrameTest
+    : public A2DTest<T, Mat<T, 3, 3>, Mat<T, 3, 2>, Vec<T, 3>> {
+ public:
+  using Input = VarTuple<T, Mat<T, 3, 2>, Vec<T, 3>>;
+  using Output = VarTuple<T, Mat<T, 3, 3>>;
+
+  // Assemble a string to describe the test
+  std::string name() {
+    std::stringstream s;
+    s << "ShellAssembleFrame>";
+    return s.str();
+  }
+
+  // Evaluate the matrix-matrix product
+  Output eval(const Input &x) {
+    Mat<T, 3, 2> Axi;
+    Vec<T, 3> n;
+    Mat<T, 3, 3> frame;
+
+    x.get_values(Axi, n);
+    ShellAssembleFrame(Axi, n, frame);
+    return MakeVarTuple<T>(frame);
+  }
+
+  // Compute the derivative
+  void deriv(const Output &seed, const Input &x, Input &g) {
+    ADObj<Mat<T, 3, 2>> Axi;
+    ADObj<Vec<T, 3>> n;
+    ADObj<Mat<T, 3, 3>> frame;
+
+    x.get_values(Axi.value(), n.value());
+    auto stack = MakeStack(ShellAssembleFrame(Axi, n, frame));
+    seed.get_values(frame.bvalue());
+    stack.reverse();
+    g.set_values(Axi.bvalue(), n.bvalue());
+  }
+
+  // Compute the second-derivative
+  void hprod(const Output &seed, const Output &hval, const Input &x,
+             const Input &p, Input &h) {
+    A2DObj<Mat<T, 3, 2>> Axi;
+    A2DObj<Vec<T, 3>> n;
+    A2DObj<Mat<T, 3, 3>> frame;
+
+    x.get_values(Axi.value(), n.value());
+    p.get_values(Axi.pvalue(), n.pvalue());
+    auto stack = MakeStack(ShellAssembleFrame(Axi, n, frame));
+    seed.get_values(frame.bvalue());
+    hval.get_values(frame.hvalue());
+    stack.hproduct();
+    h.set_values(Axi.hvalue(), n.hvalue());
+  }
+};
+
+bool ShellAssembleFrameTestAll(bool component = false,
+                               bool write_output = true) {
+  using Tc = std::complex<double>;
+
+  ShellAssembleFrameTest<Tc> test;
+  bool passed = Run(test, component, write_output);
+  return passed;
+}
+
+}  // namespace Test
+
+}  // namespace A2D
+#endif  // A2D_SHELL_ASSEMBLE_FRAME_H
\ No newline at end of file
diff --git a/include/ad/shell/a2dshellstrain.h b/include/ad/shell/a2dshellstrain.h
new file mode 100644
index 00000000..c7bcaa71
--- /dev/null
+++ b/include/ad/shell/a2dshellstrain.h
@@ -0,0 +1,501 @@
+#ifndef A2D_SHELL_STRAIN_H
+#define A2D_SHELL_STRAIN_H
+
+#include <type_traits>
+
+#include "../../a2ddefs.h"
+#include "../a2dmat.h"
+#include "../a2dstack.h"
+#include "../a2dtest.h"
+// #include "../core/a2dgreenstraincore.h"
+
+namespace A2D {
+
+enum class ShellStrainType { LINEAR, NONLINEAR };
+
+template <typename T>
+A2D_FUNCTION void LinearShellStrainCore(const T u0x[], const T u1x[],
+                                        const T e0ty[], const T et[], T e[]) {
+  // Evaluate the in-plane strains from the tying strain expressions
+  e[0] = e0ty[0];        // e11
+  e[1] = e0ty[3];        // e22
+  e[2] = 2.0 * e0ty[1];  // e12
+
+  // Compute the bending strain
+  e[3] = u1x[0];           // k11
+  e[4] = u1x[4];           // k22
+  e[5] = u1x[1] + u1x[3];  // k12
+
+  // Add the components of the shear strain
+  e[6] = 2.0 * e0ty[4];  // e23, transverse shear
+  e[7] = 2.0 * e0ty[2];  // e13, transverse shear
+  e[8] = et[0];          // e12 (drill strain)
+}
+
+template <typename T>
+A2D_FUNCTION void LinearShellStrainForwardCore(const T u0x[], const T u1x[],
+                                               const T e0ty[], const T et[],
+                                               T e[]) {
+  // Evaluate the in-plane strains from the tying strain expressions
+  e[0] = e0ty[0];        // e11
+  e[1] = e0ty[3];        // e22
+  e[2] = 2.0 * e0ty[1];  // e12
+
+  // Compute the bending strain
+  e[3] = u1x[0];           // k11
+  e[4] = u1x[4];           // k22
+  e[5] = u1x[1] + u1x[3];  // k12
+
+  // Add the components of the shear strain
+  e[6] = 2.0 * e0ty[4];  // e23, transverse shear
+  e[7] = 2.0 * e0ty[2];  // e13, transverse shear
+  e[8] = et[0];          // e12 (drill strain)
+}
+
+template <typename T>
+A2D_FUNCTION void LinearShellStrainReverseCore(const T eb[], T u0xb[], T u1xb[],
+                                               T e0tyb[], T etb[]) {
+  // Evaluate the in-plane strains from the tying strain expressions
+  e0tyb[0] += eb[0];        // e1
+  e0tyb[3] += eb[1];        // e22
+  e0tyb[1] += 2.0 * eb[2];  // e12
+
+  // Compute the bending strain
+  u1xb[0] += eb[3];  // k11
+  u1xb[4] += eb[4];  // k22
+  u1xb[1] += eb[5];  // k12
+  u1xb[3] += eb[5];  // k12
+
+  // Add the components of the shear strain
+  e0tyb[4] += 2.0 * eb[6];  // e23, transverse shear
+  e0tyb[2] += 2.0 * eb[7];  // e13, transverse shear
+  etb[0] += eb[8];          // e12 (drill strain)
+}
+
+template <typename T>
+A2D_FUNCTION void NonlinearShellStrainCore(const T u0x[], const T u1x[],
+                                           const T e0ty[], const T et[],
+                                           T e[]) {
+  // Evaluate the in-plane strains from the tying strain expressions
+  e[0] = e0ty[0];        // e11
+  e[1] = e0ty[3];        // e22
+  e[2] = 2.0 * e0ty[1];  // e12
+
+  // Compute the bending strain (here's where nonlinearity comes in)
+  e[3] = u1x[0] + (u0x[0] * u1x[0] + u0x[3] * u1x[3] + u0x[6] * u1x[6]);  // k11
+  e[4] = u1x[4] + (u0x[1] * u1x[1] + u0x[4] * u1x[4] + u0x[7] * u1x[7]);  // k22
+  e[5] = u1x[1] + u1x[3] +
+         (u0x[0] * u1x[1] + u0x[3] * u1x[4] + u0x[6] * u1x[7] +
+          u1x[0] * u0x[1] + u1x[3] * u0x[4] + u1x[6] * u0x[7]);  // k12
+
+  // Add the components of the shear strain
+  e[6] = 2.0 * e0ty[4];  // e23, transverse shear
+  e[7] = 2.0 * e0ty[2];  // e13, transverse shear
+  e[8] = et[0];          // e12 (drill strain)
+}
+
+template <typename T>
+A2D_FUNCTION void NonlinearShellStrainForwardCore(const T u0x[], const T u1x[],
+                                                  const T e0ty[], const T et[],
+                                                  T e[]) {
+  // Evaluate the in-plane strains from the tying strain expressions
+  e[0] = e0ty[0];        // e11
+  e[1] = e0ty[3];        // e22
+  e[2] = 2.0 * e0ty[1];  // e12
+
+  // Compute the bending strain (here's where nonlinearity comes in)
+  e[3] = u1x[0] + (u0x[0] * u1x[0] + u0x[3] * u1x[3] + u0x[6] * u1x[6]);  // k11
+  e[4] = u1x[4] + (u0x[1] * u1x[1] + u0x[4] * u1x[4] + u0x[7] * u1x[7]);  // k22
+  e[5] = u1x[1] + u1x[3] +
+         (u0x[0] * u1x[1] + u0x[3] * u1x[4] + u0x[6] * u1x[7] +
+          u1x[0] * u0x[1] + u1x[3] * u0x[4] + u1x[6] * u0x[7]);  // k12
+
+  // Add the components of the shear strain
+  e[6] = 2.0 * e0ty[4];  // e23, transverse shear
+  e[7] = 2.0 * e0ty[2];  // e13, transverse shear
+  e[8] = et[0];          // e12 (drill strain)
+}
+
+template <typename T>
+A2D_FUNCTION void NonlinearShellStrainReverseCore(const T eb[], const T u0x[],
+                                                  const T u1x[], const T e0ty[],
+                                                  const T et[], T u0xb[],
+                                                  T u1xb[], T e0tyb[],
+                                                  T etb[]) {
+  // This is just 1st order backprop version
+  // Evaluate the in-plane strains from the tying strain expressions
+  // ----------------------
+  e0tyb[0] += eb[0];        // e1
+  e0tyb[3] += eb[1];        // e22
+  e0tyb[1] += 2.0 * eb[2];  // e12
+
+  // Compute the bending strain
+  // --------------------------
+  // k11 computation
+  u1xb[0] += eb[3];
+  u0xb[0] += u1x[0] * eb[3];
+  u1xb[0] += u0x[0] * eb[3];
+  u0xb[3] += u1x[3] * eb[3];
+  u1xb[3] += u0x[3] * eb[3];
+  u0xb[6] += u1x[6] * eb[3];
+  u1xb[6] += u0x[6] * eb[3];
+  // k22 computation
+  u1xb[4] += eb[4];
+  u0xb[1] += u1x[1] * eb[4];
+  u1xb[1] += u0x[1] * eb[4];
+  u0xb[4] += u1x[4] * eb[4];
+  u1xb[4] += u0x[4] * eb[4];
+  u0xb[7] += u1x[7] * eb[4];
+  u1xb[7] += u0x[7] * eb[4];
+  // k12 computation
+  u1xb[1] += eb[5];
+  u1xb[3] += eb[5];
+  u0xb[0] += u1x[1] * eb[5];
+  u1xb[0] += u0x[1] * eb[5];
+  u0xb[1] += u1x[0] * eb[5];
+  u1xb[1] += u0x[0] * eb[5];
+  u0xb[3] += u1x[4] * eb[5];
+  u1xb[3] += u0x[4] * eb[5];
+  u0xb[4] += u1x[3] * eb[5];
+  u1xb[4] += u0x[3] * eb[5];
+  u0xb[6] += u1x[7] * eb[5];
+  u1xb[6] += u0x[7] * eb[5];
+  u0xb[7] += u1x[6] * eb[5];
+  u1xb[7] += u0x[6] * eb[5];
+
+  // Add the components of the shear strain
+  // --------------------------------------
+  e0tyb[4] += 2.0 * eb[6];  // e23, transverse shear
+  e0tyb[2] += 2.0 * eb[7];  // e13, transverse shear
+  etb[0] += eb[8];          // e12 (drill strain)
+}
+
+template <typename T>
+A2D_FUNCTION void NonlinearShellStrainHessianReverseCore(
+    const T eh[], const T eb[], const T u0x[], const T u1x[], const T e0ty[],
+    const T et[], const T u0xp[], const T u1xp[], const T e0typ[],
+    const T etp[], T u0xh[], T u1xh[], T e0tyh[], T eth[]) {
+  // This is the 2nd order backprop version
+  // Tip: use alt+shift+leftclick and drag to multi-line select and make this
+  // code easier. Evaluate the in-plane strains from the tying strain
+  // expressions
+  // ----------------------
+  e0tyh[0] += eh[0];        // e1
+  e0tyh[3] += eh[1];        // e22
+  e0tyh[1] += 2.0 * eh[2];  // e12
+
+  // Compute the bending strain
+  // --------------------------
+  // k11 computation
+  u1xh[0] += eh[3];
+  //   nonlinear input * h terms
+  u0xh[0] += u1x[0] * eh[3];
+  u1xh[0] += u0x[0] * eh[3];
+  u0xh[3] += u1x[3] * eh[3];
+  u1xh[3] += u0x[3] * eh[3];
+  u0xh[6] += u1x[6] * eh[3];
+  u1xh[6] += u0x[6] * eh[3];
+  //   nonlinear bar * ptest terms
+  u0xh[0] += u1xp[0] * eb[3];
+  u1xh[0] += u0xp[0] * eb[3];
+  u0xh[3] += u1xp[3] * eb[3];
+  u1xh[3] += u0xp[3] * eb[3];
+  u0xh[6] += u1xp[6] * eb[3];
+  u1xh[6] += u0xp[6] * eb[3];
+
+  // k22 computation
+  u1xh[4] += eh[4];
+  //   nonlinear input * h terms
+  u0xh[1] += u1x[1] * eh[4];
+  u1xh[1] += u0x[1] * eh[4];
+  u0xh[4] += u1x[4] * eh[4];
+  u1xh[4] += u0x[4] * eh[4];
+  u0xh[7] += u1x[7] * eh[4];
+  u1xh[7] += u0x[7] * eh[4];
+  //   nonlinear bar * ptest terms
+  u0xh[1] += u1xp[1] * eb[4];
+  u1xh[1] += u0xp[1] * eb[4];
+  u0xh[4] += u1xp[4] * eb[4];
+  u1xh[4] += u0xp[4] * eb[4];
+  u0xh[7] += u1xp[7] * eb[4];
+  u1xh[7] += u0xp[7] * eb[4];
+
+  // k12 computation
+  u1xh[1] += eh[5];
+  u1xh[3] += eh[5];
+  //   nonlinear input * h terms
+  u0xh[0] += u1x[1] * eh[5];
+  u1xh[0] += u0x[1] * eh[5];
+  u0xh[1] += u1x[0] * eh[5];
+  u1xh[1] += u0x[0] * eh[5];
+  u0xh[3] += u1x[4] * eh[5];
+  u1xh[3] += u0x[4] * eh[5];
+  u0xh[4] += u1x[3] * eh[5];
+  u1xh[4] += u0x[3] * eh[5];
+  u0xh[6] += u1x[7] * eh[5];
+  u1xh[6] += u0x[7] * eh[5];
+  u0xh[7] += u1x[6] * eh[5];
+  u1xh[7] += u0x[6] * eh[5];
+  //   nonlinear bar * ptest terms
+  u0xh[0] += u1xp[1] * eb[5];
+  u1xh[0] += u0xp[1] * eb[5];
+  u0xh[1] += u1xp[0] * eb[5];
+  u1xh[1] += u0xp[0] * eb[5];
+  u0xh[3] += u1xp[4] * eb[5];
+  u1xh[3] += u0xp[4] * eb[5];
+  u0xh[4] += u1xp[3] * eb[5];
+  u1xh[4] += u0xp[3] * eb[5];
+  u0xh[6] += u1xp[7] * eb[5];
+  u1xh[6] += u0xp[7] * eb[5];
+  u0xh[7] += u1xp[6] * eb[5];
+  u1xh[7] += u0xp[6] * eb[5];
+
+  // Add the components of the shear strain
+  // --------------------------------------
+  e0tyh[4] += 2.0 * eh[6];  // e23, transverse shear
+  e0tyh[2] += 2.0 * eh[7];  // e13, transverse shear
+  eth[0] += eh[8];          // e12 (drill strain)
+}
+
+template <ShellStrainType straintype, typename T>
+A2D_FUNCTION void ShellStrain(const Mat<T, 3, 3>& u0x, const Mat<T, 3, 3>& u1x,
+                              const SymMat<T, 3>& e0ty, const Vec<T, 1> et,
+                              Vec<T, 9>& e) {
+  if constexpr (straintype == ShellStrainType::LINEAR) {
+    LinearShellStrainCore<T>(get_data(u0x), get_data(u1x), get_data(e0ty),
+                             get_data(et), get_data(e));
+  } else {
+    NonlinearShellStrainCore<T>(get_data(u0x), get_data(u1x), get_data(e0ty),
+                                get_data(et), get_data(e));
+  }
+}
+
+template <ShellStrainType straintype, class u0xtype, class u1xtype,
+          class e0tytype, class ettype, class etype>
+class ShellStrainExpr {
+ public:
+  // Extract the numeric type to use
+  typedef typename get_object_numeric_type<etype>::type T;
+
+  // Extract the dimensions of the underlying matrix
+  static constexpr int u0x_rows = get_matrix_rows<u0xtype>::size;
+  static constexpr int u0x_cols = get_matrix_columns<u0xtype>::size;
+  static constexpr int u1x_rows = get_matrix_rows<u1xtype>::size;
+  static constexpr int u1x_cols = get_matrix_columns<u1xtype>::size;
+  static constexpr int e0ty_size = get_symmatrix_size<e0tytype>::size;
+  static constexpr int et_size = get_vec_size<ettype>::size;
+  static constexpr int e_size = get_vec_size<etype>::size;
+
+  // make sure the correct sizes
+  static_assert((u0x_rows == 3) && (u0x_cols == 3) && (u1x_rows == 3) &&
+                    (u1x_cols == 3) && (e0ty_size == 3) && (et_size == 1) &&
+                    (e_size == 9),
+                "Shell Strain Expression does not have right size..");
+
+  static constexpr ADiffType adu0x = get_diff_type<u0xtype>::diff_type;
+  static constexpr ADiffType adu1x = get_diff_type<u1xtype>::diff_type;
+  static constexpr ADiffType ade0ty = get_diff_type<e0tytype>::diff_type;
+  static constexpr ADiffType adet = get_diff_type<ettype>::diff_type;
+
+  // Get the differentiation order from the output
+  static constexpr ADorder order = get_diff_order<etype>::order;
+
+  // Make sure that the order matches
+  static_assert(get_diff_order<u0xtype>::order == order,
+                "ADorder does not match");
+
+  A2D_FUNCTION ShellStrainExpr(u0xtype& u0x, u1xtype& u1x, e0tytype& e0ty,
+                               ettype& et, etype& e)
+      : u0x(u0x), u1x(u1x), e0ty(e0ty), et(et), e(e) {}
+
+  A2D_FUNCTION void eval() {
+    if constexpr (straintype == ShellStrainType::LINEAR) {
+      LinearShellStrainCore<T>(get_data(u0x), get_data(u1x), get_data(e0ty),
+                               get_data(et), get_data(e));
+    } else {
+      NonlinearShellStrainCore<T>(get_data(u0x), get_data(u1x), get_data(e0ty),
+                                  get_data(et), get_data(e));
+    }
+  }
+
+  A2D_FUNCTION void bzero() { e.bzero(); }
+
+  template <ADorder forder>
+  A2D_FUNCTION void forward() {
+    static_assert(
+        !(order == ADorder::FIRST and forder == ADorder::SECOND),
+        "Can't perform second order forward with first order objects");
+    constexpr ADseed seed = conditional_value<ADseed, forder == ADorder::FIRST,
+                                              ADseed::b, ADseed::p>::value;
+
+    // need more statements here? (maybe some with only some pvalues transferred
+    // forward at a time? see matSum expr)
+    if constexpr (straintype == ShellStrainType::LINEAR) {
+      LinearShellStrainForwardCore<T>(
+          GetSeed<seed>::get_data(u0x), GetSeed<seed>::get_data(u1x),
+          GetSeed<seed>::get_data(e0ty), GetSeed<seed>::get_data(et),
+          GetSeed<seed>::get_data(e));
+    } else {
+      NonlinearShellStrainForwardCore<T>(
+          GetSeed<seed>::get_data(u0x), GetSeed<seed>::get_data(u1x),
+          GetSeed<seed>::get_data(e0ty), GetSeed<seed>::get_data(et),
+          GetSeed<seed>::get_data(e));
+    }
+  }
+
+  A2D_FUNCTION void reverse() {
+    constexpr ADseed seed = ADseed::b;
+    // need more conditions on which ADseeds are active here
+    if constexpr (straintype == ShellStrainType::LINEAR) {
+      LinearShellStrainReverseCore<T>(
+          GetSeed<seed>::get_data(e), GetSeed<seed>::get_data(u0x),
+          GetSeed<seed>::get_data(u1x), GetSeed<seed>::get_data(e0ty),
+          GetSeed<seed>::get_data(et));
+    } else {
+      NonlinearShellStrainReverseCore<T>(
+          GetSeed<seed>::get_data(e), get_data(u0x), get_data(u1x),
+          get_data(e0ty), get_data(et), GetSeed<seed>::get_data(u0x),
+          GetSeed<seed>::get_data(u1x), GetSeed<seed>::get_data(e0ty),
+          GetSeed<seed>::get_data(et));
+    }
+  }
+
+  A2D_FUNCTION void hzero() { e.hzero(); }
+
+  A2D_FUNCTION void hreverse() {
+    // need more conditions on which ADseeds are active here
+    constexpr ADseed seed = ADseed::h;
+    if constexpr (straintype == ShellStrainType::LINEAR) {
+      LinearShellStrainReverseCore<T>(
+          GetSeed<seed>::get_data(e), GetSeed<seed>::get_data(u0x),
+          GetSeed<seed>::get_data(u1x), GetSeed<seed>::get_data(e0ty),
+          GetSeed<seed>::get_data(et));
+    } else {
+      NonlinearShellStrainHessianReverseCore<T>(
+          GetSeed<ADseed::h>::get_data(e), GetSeed<ADseed::b>::get_data(e),
+          get_data(u0x), get_data(u1x), get_data(e0ty), get_data(et),
+          GetSeed<ADseed::p>::get_data(u0x), GetSeed<ADseed::p>::get_data(u1x),
+          GetSeed<ADseed::p>::get_data(e0ty), GetSeed<ADseed::p>::get_data(et),
+          GetSeed<ADseed::h>::get_data(u0x), GetSeed<ADseed::h>::get_data(u1x),
+          GetSeed<ADseed::h>::get_data(e0ty), GetSeed<ADseed::h>::get_data(et));
+    }
+  }
+
+  u0xtype& u0x;
+  u1xtype& u1x;
+  e0tytype& e0ty;
+  ettype& et;
+  etype& e;
+};
+
+// template <ShellStrainType straintype, typename T>
+// A2D_FUNCTION void ShellStrain(const Mat<T,3,3> &u0x, const Mat<T,3,3> &u1x,
+//                                 const SymMat<T,3> &e0ty, const T &et,
+//                                 Vec<T,9> &e) {
+
+template <ShellStrainType straintype, class u0xtype, class u1xtype,
+          class e0tytype, class ettype, class etype>
+A2D_FUNCTION auto ShellStrain(ADObj<u0xtype>& u0x, ADObj<u1xtype>& u1x,
+                              ADObj<e0tytype>& e0ty, ADObj<ettype>& et,
+                              ADObj<etype>& e) {
+  return ShellStrainExpr<straintype, ADObj<u0xtype>, ADObj<u1xtype>,
+                         ADObj<e0tytype>, ADObj<ettype>, ADObj<etype>>(
+      u0x, u1x, e0ty, et, e);
+}
+
+template <ShellStrainType straintype, class u0xtype, class u1xtype,
+          class e0tytype, class ettype, class etype>
+A2D_FUNCTION auto ShellStrain(A2DObj<u0xtype>& u0x, A2DObj<u1xtype>& u1x,
+                              A2DObj<e0tytype>& e0ty, A2DObj<ettype>& et,
+                              A2DObj<etype>& e) {
+  return ShellStrainExpr<straintype, A2DObj<u0xtype>, A2DObj<u1xtype>,
+                         A2DObj<e0tytype>, A2DObj<ettype>, A2DObj<etype>>(
+      u0x, u1x, e0ty, et, e);
+}
+
+namespace Test {
+
+// template <ShellStrainType straintype, typename T>
+// A2D_FUNCTION void ShellStrain(const Mat<T,3,3> &u0x, const Mat<T,3,3> &u1x,
+//                                 const SymMat<T,3> &e0ty, const T &et,
+//                                 Vec<T,9> &e)
+
+template <ShellStrainType straintype, typename T>
+class ShellStrainTest : public A2DTest<T, Vec<T, 9>, Mat<T, 3, 3>, Mat<T, 3, 3>,
+                                       SymMat<T, 3>, Vec<T, 1>> {
+ public:
+  using Input =
+      VarTuple<T, Mat<T, 3, 3>, Mat<T, 3, 3>, SymMat<T, 3>, Vec<T, 1>>;
+  using Output = VarTuple<T, Vec<T, 9>>;
+
+  // Assemble a string to describe the test
+  std::string name() {
+    std::stringstream s;
+    s << "ShellStrain<";
+    if (straintype == ShellStrainType::LINEAR) {
+      s << "LINEAR>";
+    } else {
+      s << "NONLINEAR>";
+    }
+
+    return s.str();
+  }
+
+  // Evaluate the matrix-matrix product
+  Output eval(const Input& x) {
+    Mat<T, 3, 3> u0x, u1x;
+    SymMat<T, 3> e0ty;
+    Vec<T, 1> et;
+    Vec<T, 9> e;
+    x.get_values(u0x, u1x, e0ty, et);
+    ShellStrain<straintype>(u0x, u1x, e0ty, et, e);
+    return MakeVarTuple<T>(e);
+  }
+
+  // Compute the derivative
+  void deriv(const Output& seed, const Input& x, Input& g) {
+    ADObj<Mat<T, 3, 3>> u0x, u1x;
+    ADObj<SymMat<T, 3>> e0ty;
+    ADObj<Vec<T, 1>> et;
+    ADObj<Vec<T, 9>> e;
+    x.get_values(u0x.value(), u1x.value(), e0ty.value(), et.value());
+    auto stack = MakeStack(ShellStrain<straintype>(u0x, u1x, e0ty, et, e));
+    seed.get_values(e.bvalue());
+    stack.reverse();
+    g.set_values(u0x.bvalue(), u1x.bvalue(), e0ty.bvalue(), et.bvalue());
+  }
+
+  // Compute the second-derivative
+  void hprod(const Output& seed, const Output& hval, const Input& x,
+             const Input& p, Input& h) {
+    A2DObj<Mat<T, 3, 3>> u0x, u1x;
+    A2DObj<SymMat<T, 3>> e0ty;
+    A2DObj<Vec<T, 1>> et;
+    A2DObj<Vec<T, 9>> e;
+    x.get_values(u0x.value(), u1x.value(), e0ty.value(), et.value());
+    p.get_values(u0x.pvalue(), u1x.pvalue(), e0ty.pvalue(), et.pvalue());
+    auto stack = MakeStack(ShellStrain<straintype>(u0x, u1x, e0ty, et, e));
+    seed.get_values(e.bvalue());
+    hval.get_values(e.hvalue());
+    stack.hproduct();
+    h.set_values(u0x.hvalue(), u1x.hvalue(), e0ty.hvalue(), et.hvalue());
+  }
+};
+
+bool ShellStrainTestAll(bool component = false, bool write_output = true) {
+  using Tc = std::complex<double>;
+
+  ShellStrainTest<ShellStrainType::LINEAR, Tc> test1;
+  bool passed = Run(test1, component, write_output);
+
+  ShellStrainTest<ShellStrainType::NONLINEAR, Tc> test2;
+  passed = passed && Run(test2, component, write_output);
+
+  return passed;
+}
+
+}  // namespace Test
+
+}  // namespace A2D
+
+#endif  // A2D_SHELL_STRAIN_H
\ No newline at end of file
diff --git a/include/ad/shell/a2dsymmatrotateframe.h b/include/ad/shell/a2dsymmatrotateframe.h
new file mode 100644
index 00000000..58cb9cc2
--- /dev/null
+++ b/include/ad/shell/a2dsymmatrotateframe.h
@@ -0,0 +1,588 @@
+#ifndef A2D_SYM_MAT_ROTATE_FRAME_H
+#define A2D_SYM_MAT_ROTATE_FRAME_H
+
+#include <type_traits>
+
+#include "../../a2ddefs.h"
+#include "../a2dmat.h"
+#include "../a2dstack.h"
+#include "../a2dtest.h"
+
+namespace A2D {
+
+/*
+  Define an expression for C = A^T * B * A
+*/
+
+template <typename T, int N, bool symA = false, bool symB = false,
+          bool symC = true, bool additive = false>
+A2D_FUNCTION void SymMatMatSquareMult(const T A[], const T B[], T C[]) {
+  // C = A * B
+  // zero the matrix if not additive
+  if constexpr (!additive) {
+    if constexpr (symC) {
+      std::fill(C, C + N * (N - 1) / 2, static_cast<T>(0));
+    } else {
+      std::fill(C, C + N * N, static_cast<T>(0));
+    }
+  }
+
+  // Precompute the symmetric matrix indices
+  std::array<int, N * N> symMatIndices;
+  int index = 0;
+  for (int i = 0; i < N; ++i) {
+    for (int j = i; j < N; ++j, ++index) {
+      symMatIndices[i * N + j] = index;
+      symMatIndices[j * N + i] = index;
+    }
+  }
+
+  // Compute Mat A * Mat B => SymMat C (symmetric part only)
+  int inner_start;
+  if constexpr (symC) {
+    for (int irow = 0; irow < N; irow++) {
+      for (int icol = irow; icol < N;
+           icol++) {  // only populate lower / upper diag once symMat C
+        int inner_start;
+        if constexpr (symB) {  // had extra halfDiag bool here before, only
+                               // shows up jin reverse mode stuff
+          inner_start = icol;
+        } else {
+          inner_start = 0;
+        }
+        // int inner_start = 0;
+        int ic = symMatIndices[irow * N + icol];
+        for (int inner = inner_start; inner < N; inner++) {
+          T aVal;
+          if constexpr (symA) {
+            int ia = symMatIndices[N * irow + inner];
+            aVal = A[ia];
+          } else {
+            aVal = A[N * irow + inner];
+          }
+          T bVal;
+          if constexpr (symB) {
+            int ib = symMatIndices[N * inner + icol];
+            bVal = B[ib];
+          } else {
+            bVal = B[N * inner + icol];
+          }
+
+          C[ic] += aVal * bVal;
+        }
+      }
+    }
+  } else {  // not symC
+    for (int irow = 0; irow < N; irow++) {
+      for (int icol = 0; icol < N;
+           icol++) {  // only populate lower / upper diag once symMat C
+        int inner_start;
+        if constexpr (symB) {  // had extra halfDiag bool here before, only
+                               // shows up jin reverse mode stuff
+          inner_start = icol;
+        } else {
+          inner_start = 0;
+        }
+        // int inner_start = 0;
+        for (int inner = inner_start; inner < N; inner++) {
+          T aVal;
+          if constexpr (symA) {
+            int ia = symMatIndices[N * irow + inner];
+            aVal = A[ia];
+          } else {
+            aVal = A[N * irow + inner];
+          }
+          T bVal;
+          if constexpr (symB) {
+            int ib = symMatIndices[N * inner + icol];
+            // printf("ib = %d\n", ib);
+            bVal = B[ib];
+          } else {
+            bVal = B[N * inner + icol];
+          }
+
+          C[N * irow + icol] += aVal * bVal;
+        }
+      }
+    }
+  }
+}
+
+template <typename T, int N, bool symA = false, bool symB = true,
+          bool symC = false, bool additive = false>
+A2D_FUNCTION void SymMatMatLeftTrSquareMult(const T A[], const T B[], T C[]) {
+  // zero the matrix if not additive
+  if constexpr (!additive) {
+    if constexpr (symC) {
+      std::fill(C, C + N * (N - 1) / 2, static_cast<T>(0));
+    } else {
+      std::fill(C, C + N * N, static_cast<T>(0));
+    }
+  }
+
+  // Precompute the symmetric matrix indices
+  std::array<int, N * N> symMatIndices;
+  int index = 0;
+  for (int i = 0; i < N; ++i) {
+    for (int j = i; j < N; ++j, ++index) {
+      symMatIndices[i * N + j] = index;
+      symMatIndices[j * N + i] = index;
+    }
+  }
+
+  // Compute Mat A^T * SymMat B => C (most of the time)
+  // symC is always false so not considering it (just makes it easier to type
+  // same # inputs for template)
+  for (int inner = 0; inner < N; ++inner) {
+    for (int icol = 0; icol < N; ++icol) {
+      T bValue;
+      if constexpr (symB) {
+        // Use precomputed index
+        int ib = symMatIndices[inner * N + icol];
+        bValue = B[ib];  // Access the value of symmetric matrix A once
+      } else {
+        bValue = B[N * inner + icol];
+      }
+
+      // Use a temporary variable to accumulate values for C
+      for (int irow = 0; irow < N; ++irow) {
+        T aValue;
+        if constexpr (symA) {
+          int ia = symMatIndices[N * inner + irow];
+          aValue = A[ia];
+        } else {
+          aValue = A[N * inner + irow];
+        }
+        // printf("W[%d] += S[%d] * T[%d]\n", N * irow + icol, N * inner + irow,
+        // ib);
+        C[N * irow + icol] += aValue * bValue;
+      }
+    }
+  }
+}
+
+template <typename T, int N, bool symA = false, bool symB = false,
+          bool symC = false, bool additive = false>
+A2D_FUNCTION void SymMatMatRightTrSquareMult(const T A[], const T B[], T C[]) {
+  // zero the matrix if not additive
+  if constexpr (!additive) {
+    if constexpr (symC) {
+      std::fill(C, C + N * (N - 1) / 2, static_cast<T>(0));
+    } else {
+      std::fill(C, C + N * N, static_cast<T>(0));
+    }
+  }
+
+  // Precompute the symmetric matrix indices
+  std::array<int, N * N> symMatIndices;
+  int index = 0;
+  for (int i = 0; i < N; ++i) {
+    for (int j = i; j < N; ++j, ++index) {
+      symMatIndices[i * N + j] = index;
+      symMatIndices[j * N + i] = index;
+    }
+  }
+
+  // Compute Mat A * SymMat B^T => C
+  // symA is not really used here..
+  // if C output is symmetric
+  if constexpr (symC) {
+    for (int inner = 0; inner < N; ++inner) {
+      int col_start;
+      if constexpr (symB) {  // had extra halfDiag bool here before, only shows
+                             // up jin reverse mode stuff
+        col_start = inner;
+      } else {
+        col_start = 0;
+      }
+      // int col_start = 0;
+      for (int icol = col_start; icol < N; ++icol) {
+        // Use precomputed index (because transpose)
+        T bValue;
+        if constexpr (symB) {
+          int ib = symMatIndices[icol * N + inner];
+          bValue = B[ib];
+        } else {
+          bValue = B[N * icol + inner];
+        }
+
+        // Use a temporary variable to accumulate values for C
+        for (int irow = 0; irow < N; ++irow) {
+          int ic = symMatIndices[N * irow + icol];
+          C[ic] += A[N * irow + inner] * bValue;
+        }
+      }
+    }
+  } else {  // C output is not symmetric
+    for (int inner = 0; inner < N; ++inner) {
+      int col_start;
+      if constexpr (symB) {  // had extra halfDiag bool here before, only shows
+                             // up jin reverse mode stuff
+        col_start = inner;
+      } else {
+        col_start = 0;
+      }
+      // int col_start = 0;
+      for (int icol = col_start; icol < N; ++icol) {
+        // Use precomputed index (because transpose)
+        T bValue;
+        if constexpr (symB) {
+          int ib = symMatIndices[icol * N + inner];
+          bValue = B[ib];
+        } else {
+          bValue = B[N * icol + inner];
+        }
+
+        for (int irow = 0; irow < N; ++irow) {
+          C[N * irow + icol] += A[N * irow + inner] * bValue;
+        }
+      }
+    }
+  }
+}
+
+template <typename T, int N>
+A2D_FUNCTION void SymMatRotateFrame(const Mat<T, N, N>& A,
+                                    const SymMat<T, N>& B, SymMat<T, N>& C) {
+  Mat<T, N, N> Ctemp;
+  // Ctemp = A^T * B
+  SymMatMatLeftTrSquareMult<T, N>(get_data(A), get_data(B), get_data(Ctemp));
+  // C = Ctemp * A
+  SymMatMatSquareMult<T, N>(get_data(Ctemp), get_data(A), get_data(C));
+}
+
+template <class Atype, class Btype, class Ctype>
+class SymMatRotateFrameExpr {
+ public:
+  // Extract the numeric type to use
+  typedef typename get_object_numeric_type<Ctype>::type T;
+
+  // Extract the dimensions of the matrices
+  // if (get_diff_type::)
+  // how to get matrix rows for symMat?
+  // optional SymMat or Mat here
+  // const bool A_issym = get_a2d_object_type<Atype>::value == ADObjType::SYMMAT
+  static constexpr int N = get_matrix_rows<Atype>::size;
+  static constexpr int M = get_matrix_columns<Atype>::size;
+  static constexpr int K = get_symmatrix_size<Btype>::size;
+  static constexpr int P = get_symmatrix_size<Ctype>::size;
+
+  // check all square matrices
+  static_assert((N == M) && (M == K) && (K == P),
+                "all matrices in MatRotateFrameExpr must be same N x N square "
+                "matrix size.");
+
+  // Get the types of the matrices
+  static constexpr ADiffType adA = get_diff_type<Atype>::diff_type;
+  static constexpr ADiffType adB = get_diff_type<Btype>::diff_type;
+
+  // Get the differentiation order from the output
+  static constexpr ADorder order = get_diff_order<Ctype>::order;
+
+  A2D_FUNCTION SymMatRotateFrameExpr(Atype& A, Btype& B, Ctype& C)
+      : A(A), B(B), C(C) {}
+
+  A2D_FUNCTION void eval() {
+    Mat<T, N, N> Ctemp;
+    // Ctemp = A^T * B
+    SymMatMatLeftTrSquareMult<T, N>(get_data(A), get_data(B), get_data(Ctemp));
+    // C = Ctemp * A
+    SymMatMatSquareMult<T, N>(get_data(Ctemp), get_data(A), get_data(C));
+  }
+
+  A2D_FUNCTION void bzero() { C.bzero(); }
+
+  template <ADorder forder>
+  A2D_FUNCTION void forward() {
+    static_assert(
+        !(order == ADorder::FIRST and forder == ADorder::SECOND),
+        "Can't perform second order forward with first order objects");
+    constexpr ADseed seed = conditional_value<ADseed, forder == ADorder::FIRST,
+                                              ADseed::b, ADseed::p>::value;
+
+    // full expression of forward pass:
+    //   Cdot = Adot^T * B * A + A^T * B * Adot + A^T * Bdot * A
+    // printf("forward\n");
+
+    if constexpr (adA == ADiffType::ACTIVE and adB == ADiffType::ACTIVE) {
+      Mat<T, N, N> Ctemp;
+      // Adot term1
+      SymMatMatLeftTrSquareMult<T, N, false, true, false, false>(
+          GetSeed<seed>::get_data(A), get_data(B), get_data(Ctemp));
+      SymMatMatSquareMult<T, N, false, false, true, false>(
+          get_data(Ctemp), get_data(A), GetSeed<seed>::get_data(C));
+      // Adot term2
+      SymMatMatLeftTrSquareMult<T, N, false, true, false, false>(
+          get_data(A), get_data(B), get_data(Ctemp));
+      SymMatMatSquareMult<T, N, false, false, true, true>(
+          get_data(Ctemp), GetSeed<seed>::get_data(A),
+          GetSeed<seed>::get_data(C));
+      // Bdot term
+      SymMatMatLeftTrSquareMult<T, N, false, true, false, false>(
+          get_data(A), GetSeed<seed>::get_data(B), get_data(Ctemp));
+      SymMatMatSquareMult<T, N, false, false, true, true>(
+          get_data(Ctemp), get_data(A), GetSeed<seed>::get_data(C));
+
+    } else if constexpr (adA == ADiffType::ACTIVE) {
+      Mat<T, N, N> Ctemp;
+      // Adot term1
+      SymMatMatLeftTrSquareMult<T, N, false, true, false, false>(
+          GetSeed<seed>::get_data(A), get_data(B), get_data(Ctemp));
+      SymMatMatSquareMult<T, N, false, false, true, false>(
+          get_data(Ctemp), get_data(A), GetSeed<seed>::get_data(C));
+      // Adot term2
+      SymMatMatLeftTrSquareMult<T, N, false, true, false, false>(
+          get_data(A), get_data(B), get_data(Ctemp));
+      SymMatMatSquareMult<T, N, false, false, true, true>(
+          get_data(Ctemp), GetSeed<seed>::get_data(A),
+          GetSeed<seed>::get_data(C));
+
+    } else if constexpr (adB == ADiffType::ACTIVE) {
+      Mat<T, N, N> Ctemp;
+      // Bdot term
+      SymMatMatLeftTrSquareMult<T, N, false, true, false, false>(
+          get_data(A), GetSeed<seed>::get_data(B), get_data(Ctemp));
+      SymMatMatSquareMult<T, N, false, false, true, false>(
+          get_data(Ctemp), get_data(A), GetSeed<seed>::get_data(C));
+    }
+  }
+
+  A2D_FUNCTION void reverse() {
+    // printf("reverse\n");
+    if constexpr (adA == ADiffType::ACTIVE) {
+      Mat<T, N, N> temp;
+      // full expression: Abar += B^T * A * Cbar + B * A * Cbar^T
+      // first term B^T * A * Cbar
+      SymMatMatLeftTrSquareMult<T, N, true, false, false, false>(
+          get_data(B), get_data(A), get_data(temp));
+      SymMatMatSquareMult<T, N, false, true, false, true>(
+          get_data(temp), GetSeed<ADseed::b>::get_data(C),
+          GetSeed<ADseed::b>::get_data(A));
+
+      // second term B * A * Cbar^T added in
+      SymMatMatSquareMult<T, N, true, false, false, false>(
+          get_data(B), get_data(A), get_data(temp));
+      SymMatMatRightTrSquareMult<T, N, false, true, false, true>(
+          get_data(temp), GetSeed<ADseed::b>::get_data(C),
+          GetSeed<ADseed::b>::get_data(A));
+    }
+    if constexpr (adB == ADiffType::ACTIVE) {
+      Mat<T, N, N> temp;
+      // full expresion Bbar += A * Cbar * A^T
+      SymMatMatSquareMult<T, N, false, true, false, false>(
+          get_data(A), GetSeed<ADseed::b>::get_data(C), get_data(temp));
+      SymMatMatRightTrSquareMult<T, N, false, false, true, true>(
+          get_data(temp), get_data(A), GetSeed<ADseed::b>::get_data(B));
+    }
+  }
+
+  A2D_FUNCTION void hzero() { C.hzero(); }
+
+  A2D_FUNCTION void hreverse() {
+    static_assert(order == ADorder::SECOND,
+                  "hreverse() can be called for only second order objects.");
+
+    // HJP backpropagation based on Aaron's paper and my ppt
+    //
+    // Ahat += B^T * A * Chat + B * A * Chat^T +
+    //         Bdot^T * A * Cbar + Bdot * A * Cbar^T +
+    //         B^T * Adot * Cbar + B * Adot * Cbar^T
+    //
+    // Bhat += A * Chat * A^T +
+    //         Adot * Cbar * A^T + A * Cbar * Adot^T
+
+    if constexpr (adA == ADiffType::ACTIVE) {
+      Mat<T, N, N> temp;
+      // term1 for Ahat : B^T * A * Chat
+      SymMatMatLeftTrSquareMult<T, N, true, false, false, false>(
+          get_data(B), get_data(A), get_data(temp));
+      SymMatMatSquareMult<T, N, false, true, false, true>(
+          get_data(temp), GetSeed<ADseed::h>::get_data(C),
+          GetSeed<ADseed::h>::get_data(A));
+
+      // term2 for Ahat : B * A * Chat^T
+      SymMatMatSquareMult<T, N, true, false, false, false>(
+          get_data(B), get_data(A), get_data(temp));
+      SymMatMatRightTrSquareMult<T, N, false, true, false, true>(
+          get_data(temp), GetSeed<ADseed::h>::get_data(C),
+          GetSeed<ADseed::h>::get_data(A));
+
+      // term 5 for Ahat : B^T * Adot * Cbar
+      SymMatMatLeftTrSquareMult<T, N, true, false, false, false>(
+          get_data(B), GetSeed<ADseed::p>::get_data(A), get_data(temp));
+      SymMatMatSquareMult<T, N, false, true, false, true>(
+          get_data(temp), GetSeed<ADseed::b>::get_data(C),
+          GetSeed<ADseed::h>::get_data(A));
+
+      // term 6 for Ahat : B * Adot * Cbar^T
+      SymMatMatSquareMult<T, N, true, false, false, false>(
+          get_data(B), GetSeed<ADseed::p>::get_data(A), get_data(temp));
+      SymMatMatRightTrSquareMult<T, N, false, true, false, true>(
+          get_data(temp), GetSeed<ADseed::b>::get_data(C),
+          GetSeed<ADseed::h>::get_data(A));
+    }
+
+    if constexpr (adB == ADiffType::ACTIVE) {
+      Mat<T, N, N> temp;
+      // term 1 for Bhat : A * Chat * A^T
+      SymMatMatSquareMult<T, N, false, true, false, false>(
+          get_data(A), GetSeed<ADseed::h>::get_data(C), get_data(temp));
+      SymMatMatRightTrSquareMult<T, N, false, false, true, true>(
+          get_data(temp), get_data(A), GetSeed<ADseed::h>::get_data(B));
+    }
+
+    if constexpr (adA == ADiffType::ACTIVE && adB == ADiffType::ACTIVE) {
+      // now only remaining terms how up
+      Mat<T, N, N> temp;
+
+      // term3 for Ahat : Bdot^T * A * Cbar
+      SymMatMatLeftTrSquareMult<T, N, true, false, false, false>(
+          GetSeed<ADseed::p>::get_data(B), get_data(A), get_data(temp));
+      SymMatMatSquareMult<T, N, false, true, false, true>(
+          get_data(temp), GetSeed<ADseed::b>::get_data(C),
+          GetSeed<ADseed::h>::get_data(A));
+
+      // term4 for Ahat : Bdot * A * Cbar^T
+      SymMatMatSquareMult<T, N, true, false, false, false>(
+          GetSeed<ADseed::p>::get_data(B), get_data(A), get_data(temp));
+      SymMatMatRightTrSquareMult<T, N, false, true, false, true>(
+          get_data(temp), GetSeed<ADseed::b>::get_data(C),
+          GetSeed<ADseed::h>::get_data(A));
+
+      // term2 for Bhat : Adot * Cbar * A^T
+      SymMatMatSquareMult<T, N, false, true, false, false>(
+          GetSeed<ADseed::p>::get_data(A), GetSeed<ADseed::b>::get_data(C),
+          get_data(temp));
+      SymMatMatRightTrSquareMult<T, N, false, false, true, true>(
+          get_data(temp), get_data(A), GetSeed<ADseed::h>::get_data(B));
+
+      // term3 for Bhat : A * Cbar * Adot^T
+      SymMatMatSquareMult<T, N, false, true, false, false>(
+          get_data(A), GetSeed<ADseed::b>::get_data(C), get_data(temp));
+      SymMatMatRightTrSquareMult<T, N, false, false, true, true>(
+          get_data(temp), GetSeed<ADseed::p>::get_data(A),
+          GetSeed<ADseed::h>::get_data(B));
+    }
+  }
+
+ private:
+  Atype& A;
+  Btype& B;
+  Ctype& C;
+};
+
+// all implementations
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto SymMatRotateFrame(ADObj<Atype>& A, ADObj<Btype>& B,
+                                    ADObj<Ctype>& C) {
+  return SymMatRotateFrameExpr<ADObj<Atype>, ADObj<Btype>, ADObj<Ctype>>(A, B,
+                                                                         C);
+}
+
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto SymMatRotateFrame(ADObj<Atype>& A, Btype& B,
+                                    ADObj<Ctype>& C) {
+  return SymMatRotateFrameExpr<ADObj<Atype>, Btype, ADObj<Ctype>>(A, B, C);
+}
+
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto SymMatRotateFrame(Atype& A, ADObj<Btype>& B,
+                                    ADObj<Ctype>& C) {
+  return SymMatRotateFrameExpr<Atype, ADObj<Btype>, ADObj<Ctype>>(A, B, C);
+}
+
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto SymMatRotateFrame(A2DObj<Atype>& A, A2DObj<Btype>& B,
+                                    A2DObj<Ctype>& C) {
+  return SymMatRotateFrameExpr<A2DObj<Atype>, A2DObj<Btype>, A2DObj<Ctype>>(
+      A, B, C);
+}
+
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto SymMatRotateFrame(A2DObj<Atype>& A, Btype& B,
+                                    A2DObj<Ctype>& C) {
+  return SymMatRotateFrameExpr<A2DObj<Atype>, Btype, A2DObj<Ctype>>(A, B, C);
+}
+
+template <class Atype, class Btype, class Ctype>
+A2D_FUNCTION auto SymMatRotateFrame(Atype& A, A2DObj<Btype>& B,
+                                    A2DObj<Ctype>& C) {
+  return SymMatRotateFrameExpr<Atype, A2DObj<Btype>, A2DObj<Ctype>>(A, B, C);
+}
+
+namespace Test {
+
+template <typename T, int N>
+class SymMatRotateFrameTest
+    : public A2DTest<T, SymMat<T, N>, Mat<T, N, N>, SymMat<T, N>> {
+ public:
+  using Input = VarTuple<T, Mat<T, N, N>, SymMat<T, N>>;
+  using Output = VarTuple<T, SymMat<T, N>>;
+
+  // Assemble a string to describe the test
+  std::string name() {
+    std::stringstream s;
+    s << "SymMatRotateFrame<" << N << "," << N << ">";
+    return s.str();
+  }
+
+  // Evaluate the matrix-matrix product
+  Output eval(const Input& x) {
+    Mat<T, N, N> A;
+    SymMat<T, N> B, C;
+
+    x.get_values(A, B);
+    SymMatRotateFrame(A, B, C);
+    return MakeVarTuple<T>(C);
+  }
+
+  // Compute the derivative
+  void deriv(const Output& seed, const Input& x, Input& g) override {
+    ADObj<Mat<T, N, N>> A;
+    ADObj<SymMat<T, N>> B, C;
+
+    x.get_values(A.value(), B.value());
+    auto stack = MakeStack(SymMatRotateFrame(A, B, C));
+    seed.get_values(C.bvalue());
+    stack.reverse();
+    g.set_values(A.bvalue(), B.bvalue());
+  }
+
+  // Compute the second-derivative
+  void hprod(const Output& seed, const Output& hval, const Input& x,
+             const Input& p, Input& h) override {
+    A2DObj<Mat<T, N, N>> A;
+    A2DObj<SymMat<T, N>> B, C;
+
+    x.get_values(A.value(), B.value());
+    p.get_values(A.pvalue(), B.pvalue());
+    auto stack = MakeStack(SymMatRotateFrame(A, B, C));
+    seed.get_values(C.bvalue());
+    hval.get_values(C.hvalue());
+    stack.hproduct();
+    h.set_values(A.hvalue(), B.hvalue());
+  }
+};
+
+bool SymMatRotateFrameTestAll(bool component = false,
+                              bool write_output = true) {
+  using Tc = std::complex<double>;
+
+  bool passed = true;
+  // SymMatRotateFrameTest<Tc, 1> test0;
+  // passed = passed && Run(test0, component, write_output);
+  SymMatRotateFrameTest<Tc, 2> test1;
+  passed = passed && Run(test1, component, write_output);
+  SymMatRotateFrameTest<Tc, 3> test2;
+  passed = passed && Run(test2, component, write_output);
+  SymMatRotateFrameTest<Tc, 4> test3;
+  passed = passed && Run(test3, component, write_output);
+
+  return passed;
+}
+
+}  // namespace Test
+
+}  // namespace A2D
+
+#endif  // A2D_SYM_MAT_ROTATE_FRAME_H
\ No newline at end of file
diff --git a/include/adscalar.h b/include/adscalar.h
index 49657672..73e99c2d 100644
--- a/include/adscalar.h
+++ b/include/adscalar.h
@@ -35,26 +35,28 @@ using get_non_scalar_type_t = typename get_non_scalar_type<Types...>::type;
 template <class T, int N>
 class ADScalar {
  public:
-  ADScalar() {}
+  // using type = T;
+
+  A2D_FUNCTION ADScalar() {}
   template <typename R, typename = std::enable_if_t<is_scalar_type<R>::value>>
 
-  ADScalar(const R value) : value(value), deriv{0.0} {}
+  A2D_FUNCTION ADScalar(const R value) : value(value), deriv{0.0} {}
 
   template <typename R, typename = std::enable_if_t<is_scalar_type<R>::value>>
-  ADScalar(const R value, const T d[]) : value(value) {
+  A2D_FUNCTION ADScalar(const R value, const T d[]) : value(value) {
     for (int i = 0; i < N; i++) {
       deriv[i] = d[i];
     }
   }
 
-  ADScalar(const ADScalar<T, N> &r) : value(r.value) {
+  A2D_FUNCTION ADScalar(const ADScalar<T, N> &r) : value(r.value) {
     for (int i = 0; i < N; i++) {
       deriv[i] = r.deriv[i];
     }
   }
 
   template <typename R, typename = std::enable_if_t<is_scalar_type<R>::value>>
-  inline ADScalar<T, N> &operator=(const R &r) {
+  A2D_FUNCTION inline ADScalar<T, N> &operator=(const R &r) {
     value = r;
     for (int i = 0; i < N; i++) {
       deriv[i] = 0.0;
@@ -64,41 +66,45 @@ class ADScalar {
 
   // Comparison operators
   template <typename R, typename = std::enable_if_t<is_scalar_type<R>::value>>
-  inline bool operator<(const R &rhs) const {
+  A2D_FUNCTION inline bool operator<(const R &rhs) const {
     return value < rhs;
   }
   template <typename R, typename = std::enable_if_t<is_scalar_type<R>::value>>
-  inline bool operator<=(const R &rhs) const {
+  A2D_FUNCTION inline bool operator<=(const R &rhs) const {
     return value <= rhs;
   }
   template <typename R, typename = std::enable_if_t<is_scalar_type<R>::value>>
-  inline bool operator>(const R &rhs) const {
+  A2D_FUNCTION inline bool operator>(const R &rhs) const {
     return value > rhs;
   }
   template <typename R, typename = std::enable_if_t<is_scalar_type<R>::value>>
-  inline bool operator>=(const R &rhs) const {
+  A2D_FUNCTION inline bool operator>=(const R &rhs) const {
     return value >= rhs;
   }
+  template <typename R, typename = std::enable_if_t<is_scalar_type<R>::value>>
+  A2D_FUNCTION inline bool operator!=(const R &rhs) const {
+    return value != rhs;
+  }
 
   template <typename X, int M>
-  inline bool operator<(const ADScalar<X, M> &rhs) const {
+  A2D_FUNCTION inline bool operator<(const ADScalar<X, M> &rhs) const {
     return value < rhs.value;
   }
   template <typename X, int M>
-  inline bool operator<=(const ADScalar<X, M> &rhs) const {
+  A2D_FUNCTION inline bool operator<=(const ADScalar<X, M> &rhs) const {
     return value <= rhs.value;
   }
   template <typename X, int M>
-  inline bool operator>(const ADScalar<X, M> &rhs) const {
+  A2D_FUNCTION inline bool operator>(const ADScalar<X, M> &rhs) const {
     return value > rhs.value;
   }
   template <typename X, int M>
-  inline bool operator>=(const ADScalar<X, M> &rhs) const {
+  A2D_FUNCTION inline bool operator>=(const ADScalar<X, M> &rhs) const {
     return value >= rhs.value;
   }
 
   // Operator +=, -=, *=, /=
-  inline ADScalar<T, N> &operator+=(const ADScalar<T, N> &r) {
+  A2D_FUNCTION inline ADScalar<T, N> &operator+=(const ADScalar<T, N> &r) {
     value += r.value;
     for (int i = 0; i < N; i++) {
       deriv[i] += r.deriv[i];
@@ -106,11 +112,11 @@ class ADScalar {
     return *this;
   }
   template <class R, typename = std::enable_if_t<is_scalar_type<R>::value>>
-  inline ADScalar<T, N> &operator+=(const R &r) {
+  A2D_FUNCTION inline ADScalar<T, N> &operator+=(const R &r) {
     value += r;
     return *this;
   }
-  inline ADScalar<T, N> &operator-=(const ADScalar<T, N> &r) {
+  A2D_FUNCTION inline ADScalar<T, N> &operator-=(const ADScalar<T, N> &r) {
     value -= r.value;
     for (int i = 0; i < N; i++) {
       deriv[i] -= r.deriv[i];
@@ -118,11 +124,11 @@ class ADScalar {
     return *this;
   }
   template <class R, typename = std::enable_if_t<is_scalar_type<R>::value>>
-  inline ADScalar<T, N> &operator-=(const R &r) {
+  A2D_FUNCTION inline ADScalar<T, N> &operator-=(const R &r) {
     value -= r;
     return *this;
   }
-  inline ADScalar<T, N> &operator*=(const ADScalar<T, N> &r) {
+  A2D_FUNCTION inline ADScalar<T, N> &operator*=(const ADScalar<T, N> &r) {
     for (int i = 0; i < N; i++) {
       deriv[i] = r.value * deriv[i] + value * r.deriv[i];
     }
@@ -130,7 +136,7 @@ class ADScalar {
     return *this;
   }
   template <class R, typename = std::enable_if_t<is_scalar_type<R>::value>>
-  inline ADScalar<T, N> &operator*=(const R &r) {
+  A2D_FUNCTION inline ADScalar<T, N> &operator*=(const R &r) {
     value *= r;
     for (int i = 0; i < N; i++) {
       deriv[i] = r * deriv[i];
@@ -138,7 +144,7 @@ class ADScalar {
     return *this;
   }
 
-  inline ADScalar<T, N> &operator/=(const ADScalar<T, N> &r) {
+  A2D_FUNCTION inline ADScalar<T, N> &operator/=(const ADScalar<T, N> &r) {
     T inv = 1.0 / r.value;
     T inv2 = value * inv * inv;
     value *= inv;
@@ -148,7 +154,7 @@ class ADScalar {
     return *this;
   }
   template <class R, typename = std::enable_if_t<is_scalar_type<R>::value>>
-  inline ADScalar<T, N> &operator/=(const R &r) {
+  A2D_FUNCTION inline ADScalar<T, N> operator/=(const R &r) {
     T inv = 1.0 / r;
     value *= inv;
     for (int i = 0; i < N; i++) {
@@ -157,6 +163,15 @@ class ADScalar {
     return *this;
   }
 
+  A2D_FUNCTION inline ADScalar<T, N> operator-() const {
+    T negderivs[N];
+    for (int i = 0; i < N; i++) {
+      negderivs[i] = -deriv[i];
+    }
+    return ADScalar<T, N>(-value,
+                          negderivs);  // Return by value, not by reference
+  }
+
   //  private:
   T value;
   T deriv[N];
@@ -164,8 +179,8 @@ class ADScalar {
 
 // Addition
 template <class X, int M>
-inline ADScalar<X, M> operator+(const ADScalar<X, M> &l,
-                                const ADScalar<X, M> &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator+(const ADScalar<X, M> &l,
+                                             const ADScalar<X, M> &r) {
   ADScalar<X, M> out(l.value + r.value);
   for (int i = 0; i < M; i++) {
     out.deriv[i] = l.deriv[i] + r.deriv[i];
@@ -174,19 +189,21 @@ inline ADScalar<X, M> operator+(const ADScalar<X, M> &l,
 }
 template <class X, int M, class L,
           typename = std::enable_if_t<is_scalar_type<L>::value>>
-inline ADScalar<X, M> operator+(const L &l, const ADScalar<X, M> &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator+(const L &l,
+                                             const ADScalar<X, M> &r) {
   return ADScalar<X, M>(r.value + l, r.deriv);
 }
 template <class X, int M, class R,
           typename = std::enable_if_t<is_scalar_type<R>::value>>
-inline ADScalar<X, M> operator+(const ADScalar<X, M> &l, const R &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator+(const ADScalar<X, M> &l,
+                                             const R &r) {
   return ADScalar<X, M>(l.value + r, l.deriv);
 }
 
 // Subtraction
 template <class X, int M>
-inline ADScalar<X, M> operator-(const ADScalar<X, M> &l,
-                                const ADScalar<X, M> &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator-(const ADScalar<X, M> &l,
+                                             const ADScalar<X, M> &r) {
   ADScalar<X, M> out(l.value - r.value);
   for (int i = 0; i < M; i++) {
     out.deriv[i] = l.deriv[i] - r.deriv[i];
@@ -195,7 +212,8 @@ inline ADScalar<X, M> operator-(const ADScalar<X, M> &l,
 }
 template <class X, int M, class L,
           typename = std::enable_if_t<is_scalar_type<L>::value>>
-inline ADScalar<X, M> operator-(const L &l, const ADScalar<X, M> &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator-(const L &l,
+                                             const ADScalar<X, M> &r) {
   ADScalar<X, M> out(l - r.value);
   for (int i = 0; i < M; i++) {
     out.deriv[i] = -r.deriv[i];
@@ -204,14 +222,15 @@ inline ADScalar<X, M> operator-(const L &l, const ADScalar<X, M> &r) {
 }
 template <class X, int M, class R,
           typename = std::enable_if_t<is_scalar_type<R>::value>>
-inline ADScalar<X, M> operator-(const ADScalar<X, M> &l, const R &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator-(const ADScalar<X, M> &l,
+                                             const R &r) {
   return ADScalar<X, M>(l.value - r, l.deriv);
 }
 
 // Multiplication
 template <class X, int M>
-inline ADScalar<X, M> operator*(const ADScalar<X, M> &l,
-                                const ADScalar<X, M> &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator*(const ADScalar<X, M> &l,
+                                             const ADScalar<X, M> &r) {
   ADScalar<X, M> out(l.value * r.value);
   for (int i = 0; i < M; i++) {
     out.deriv[i] = r.value * l.deriv[i] + r.deriv[i] * l.value;
@@ -220,7 +239,8 @@ inline ADScalar<X, M> operator*(const ADScalar<X, M> &l,
 }
 template <class X, int M, class L,
           typename = std::enable_if_t<is_scalar_type<L>::value>>
-inline ADScalar<X, M> operator*(const L &l, const ADScalar<X, M> &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator*(const L &l,
+                                             const ADScalar<X, M> &r) {
   ADScalar<X, M> out(l * r.value);
   for (int i = 0; i < M; i++) {
     out.deriv[i] = r.deriv[i] * l;
@@ -229,7 +249,8 @@ inline ADScalar<X, M> operator*(const L &l, const ADScalar<X, M> &r) {
 }
 template <class X, int M, class R,
           typename = std::enable_if_t<is_scalar_type<R>::value>>
-inline ADScalar<X, M> operator*(const ADScalar<X, M> &l, const R &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator*(const ADScalar<X, M> &l,
+                                             const R &r) {
   ADScalar<X, M> out(l.value * r);
   for (int i = 0; i < M; i++) {
     out.deriv[i] = l.deriv[i] * r;
@@ -239,8 +260,8 @@ inline ADScalar<X, M> operator*(const ADScalar<X, M> &l, const R &r) {
 
 // Division
 template <class X, int M>
-inline ADScalar<X, M> operator/(const ADScalar<X, M> &l,
-                                const ADScalar<X, M> &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator/(const ADScalar<X, M> &l,
+                                             const ADScalar<X, M> &r) {
   X inv = 1.0 / r.value;
   X inv2 = l.value * inv * inv;
   ADScalar<X, M> out(inv * l.value);
@@ -252,7 +273,8 @@ inline ADScalar<X, M> operator/(const ADScalar<X, M> &l,
 }
 template <class X, int M, class L,
           typename = std::enable_if_t<is_scalar_type<L>::value>>
-inline ADScalar<X, M> operator/(const L &l, const ADScalar<X, M> &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator/(const L &l,
+                                             const ADScalar<X, M> &r) {
   X inv = 1.0 / r.value;
   X inv2 = l * inv * inv;
   ADScalar<X, M> out(inv * l);
@@ -264,7 +286,8 @@ inline ADScalar<X, M> operator/(const L &l, const ADScalar<X, M> &r) {
 }
 template <class X, int M, class R,
           typename = std::enable_if_t<is_scalar_type<R>::value>>
-inline ADScalar<X, M> operator/(const ADScalar<X, M> &l, const R &r) {
+A2D_FUNCTION inline ADScalar<X, M> operator/(const ADScalar<X, M> &l,
+                                             const R &r) {
   X inv = 1.0 / r;
   ADScalar<X, M> out(inv * l.value);
 
@@ -276,12 +299,13 @@ inline ADScalar<X, M> operator/(const ADScalar<X, M> &l, const R &r) {
 
 // fabs, sqrt
 template <class X, int M>
-inline ADScalar<X, M> fabs(const ADScalar<X, M> &r) {
+A2D_FUNCTION inline ADScalar<X, M> fabs(const ADScalar<X, M> &r) {
   X scalar = 1.0;
   if (r.value < 0.0) {
     scalar = -1.0;
   }
-  ADScalar<X, M> out(fabs(r.value));
+  // device compatible fabs
+  ADScalar<X, M> out(::fabs(r.value));
   for (int i = 0; i < M; i++) {
     out.deriv[i] = scalar * r.deriv[i];
   }
@@ -289,8 +313,9 @@ inline ADScalar<X, M> fabs(const ADScalar<X, M> &r) {
 }
 
 template <class X, int M>
-inline ADScalar<X, M> sqrt(const ADScalar<X, M> &r) {
-  X value = sqrt(r.value);
+A2D_FUNCTION inline ADScalar<X, M> sqrt(const ADScalar<X, M> &r) {
+  // device compatible sqrt
+  X value = ::sqrt(r.value);
   ADScalar<X, M> out(value);
   X inv = 0.5 / value;
   for (int i = 0; i < M; i++) {
@@ -301,8 +326,10 @@ inline ADScalar<X, M> sqrt(const ADScalar<X, M> &r) {
 
 template <class X, int M, class R,
           typename = std::enable_if_t<is_scalar_type<R>::value>>
-inline ADScalar<X, M> pow(const ADScalar<X, M> &r, const R &exponent) {
-  X value = pow(r.value, exponent);
+A2D_FUNCTION inline ADScalar<X, M> pow(const ADScalar<X, M> &r,
+                                       const R &exponent) {
+  // device compatible pow
+  X value = ::pow(r.value, exponent);
   ADScalar<X, M> out(value);
   X inv = exponent * value / r.value;
   for (int i = 0; i < M; i++) {
@@ -312,8 +339,9 @@ inline ADScalar<X, M> pow(const ADScalar<X, M> &r, const R &exponent) {
 }
 
 template <class X, int M>
-inline ADScalar<X, M> exp(const ADScalar<X, M> &r) {
-  X value = exp(r.value);
+A2D_FUNCTION inline ADScalar<X, M> exp(const ADScalar<X, M> &r) {
+  // device compatible exp
+  X value = ::exp(r.value);
   ADScalar<X, M> out(value);
   for (int i = 0; i < M; i++) {
     out.deriv[i] = value * r.deriv[i];
@@ -322,9 +350,10 @@ inline ADScalar<X, M> exp(const ADScalar<X, M> &r) {
 }
 
 template <class X, int M>
-inline ADScalar<X, M> sin(const ADScalar<X, M> &r) {
-  ADScalar<X, M> out(sin(r.value));
-  X d = cos(r.value);
+A2D_FUNCTION inline ADScalar<X, M> sin(const ADScalar<X, M> &r) {
+  // device compatible sin, cos
+  ADScalar<X, M> out(::sin(r.value));
+  X d = ::cos(r.value);
   for (int i = 0; i < M; i++) {
     out.deriv[i] = d * r.deriv[i];
   }
@@ -332,15 +361,28 @@ inline ADScalar<X, M> sin(const ADScalar<X, M> &r) {
 }
 
 template <class X, int M>
-inline ADScalar<X, M> cos(const ADScalar<X, M> &r) {
-  ADScalar<X, M> out(cos(r.value));
-  X d = -sin(r.value);
+A2D_FUNCTION inline ADScalar<X, M> cos(const ADScalar<X, M> &r) {
+  // device compatible sin, cos
+  ADScalar<X, M> out(::cos(r.value));
+  X d = -::sin(r.value);
   for (int i = 0; i < M; i++) {
     out.deriv[i] = d * r.deriv[i];
   }
   return out;
 }
 
+// for A2D Objects
+
+// template <int N>
+// struct __get_object_numeric_type<ADScalar<double,N>> {
+//   using type = ADScalar<double,N>;
+// };
+
+// template <int N>
+// struct __get_object_numeric_type<ADScalar<std::complex<double>,N>> {
+//   using type = ADScalar<std::complex<double>,N>;
+// };
+
 }  // namespace A2D
 
 #endif  // A2D_ADSCALAR_H
diff --git a/tests/ad/test_ad_expressions.cpp b/tests/ad/test_ad_expressions.cpp
index 22f0de62..edf5b0ce 100644
--- a/tests/ad/test_ad_expressions.cpp
+++ b/tests/ad/test_ad_expressions.cpp
@@ -683,6 +683,10 @@ int main(int argc, char *argv[]) {
   tests.push_back(A2D::Test::QuaternionMatrixTestAll);
   tests.push_back(A2D::Test::QuaternionAngularVelocityTestAll);
   tests.push_back(A2D::Test::VecHadamardTestAll);
+  tests.push_back(A2D::Test::MatRotateFrameTestAll);
+  tests.push_back(A2D::Test::SymMatRotateFrameTestAll);
+  tests.push_back(A2D::Test::ShellStrainTestAll);
+  tests.push_back(A2D::Test::ShellAssembleFrameTestAll);
 
   bool passed = true;
   for (int i = 0; i < tests.size(); i++) {