From 9af21aa7f1d6edb3dd081c6454cc3e1d6d81f291 Mon Sep 17 00:00:00 2001
From: Jan Meis <meis@imbi.uni-heidelberg.de>
Date: Wed, 10 Jul 2024 15:58:49 +0200
Subject: [PATCH] Preparing new CRAN upload

---
 DESCRIPTION                                |   2 +-
 tests/testthat/test_densities.R            |   1 +
 tests/testthat/test_dsmean.R               |   6 +
 tests/testthat/test_evaluate_estimator.R   |   1 +
 tests/testthat/test_integrals.R            |   3 +
 tests/testthat/test_pseudo_rao_blackwell.R |   1 +
 vignettes/Introduction.Rmd                 | 351 ++++++++++++++++++++-
 7 files changed, 363 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index abc4589..541f82c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: adestr
 Type: Package
 Title: Estimation in Optimal Adaptive Two-Stage Designs
-Version: 0.5.2
+Version: 1.0.0
 Authors@R:c(person("Jan", "Meis", role = c("aut", "cre"), email = "meis@imbi.uni-heidelberg.de", comment = c(ORCID = "0000-0001-5407-7220")),
   person("Martin", "Maechler", role = c("cph"), email = "maechler@stat.math.ethz.ch", comment = c(ORCID = "0000-0002-8685-9910", "Original author of monoSpl.c (from the 'stats' package).")))
 Description:
diff --git a/tests/testthat/test_densities.R b/tests/testthat/test_densities.R
index 3ef8204..63d084e 100644
--- a/tests/testthat/test_densities.R
+++ b/tests/testthat/test_densities.R
@@ -212,6 +212,7 @@ test_that("two-armed trial, full sampling distribution, unknown variance, integr
 
 test_that("two-armed trial, full sampling distribution, unknown variance, integral of second-stage density equals 1",
           {
+            skip_on_cran()
             expect_equal(
               hcubature(
                 mf2_uv_full,
diff --git a/tests/testthat/test_dsmean.R b/tests/testthat/test_dsmean.R
index 0b529c5..ae2ba86 100644
--- a/tests/testthat/test_dsmean.R
+++ b/tests/testthat/test_dsmean.R
@@ -33,6 +33,7 @@ test_that("density of MLE sums up to one (normal distribution, one-armed) (exact
 
 test_that("density of MLE sums up to one (t distribution, one-armed)",
           {
+            skip_on_cran()
             expect_equal(
               dsmean(
                 Student(two_armed = FALSE),
@@ -50,6 +51,7 @@ test_that("density of MLE sums up to one (t distribution, one-armed)",
 
 test_that("density of MLE sums up to one (t distribution, one-armed) (exact=TRUE)",
           {
+            skip_on_cran()
             expect_equal(
               dsmean(
                 Student(two_armed = FALSE),
@@ -68,6 +70,7 @@ test_that("density of MLE sums up to one (t distribution, one-armed) (exact=TRUE
 
 test_that("density of MLE sums up to one (normal distribution, two-armed, treatment group)",
           {
+            skip_on_cran()
             expect_equal(dsmeanT(Normal(),
                                  designad,
                                  .x <- seq(-2, 2, .h <- .01),
@@ -80,6 +83,7 @@ test_that("density of MLE sums up to one (normal distribution, two-armed, treatm
 
 test_that("density of MLE sums up to one (normal distribution, two-armed, treatment group) (exact=TRUE)",
           {
+            skip_on_cran()
             expect_equal(dsmeanT(Normal(),
                                  designad,
                                  .x <- seq(-2, 2, .h <- .01),
@@ -92,6 +96,7 @@ test_that("density of MLE sums up to one (normal distribution, two-armed, treatm
 
 test_that("density of MLE sums up to one (t distribution, two-armed, treatment group)",
           {
+            skip_on_cran()
             expect_equal(dsmeanT(Student(),
                                  designad,
                                  .x <- seq(-2, 2, .h <- .01),
@@ -104,6 +109,7 @@ test_that("density of MLE sums up to one (t distribution, two-armed, treatment g
 
 test_that("density of MLE sums up to one (t distribution, two-armed, treatment group) (exact=TRUE)",
           {
+            skip_on_cran()
             expect_equal(dsmeanT(Student(),
                                  designad,
                                  .x <- seq(-2, 2, .h <- .1),
diff --git a/tests/testthat/test_evaluate_estimator.R b/tests/testthat/test_evaluate_estimator.R
index bd5bfa2..c5f61d1 100644
--- a/tests/testthat/test_evaluate_estimator.R
+++ b/tests/testthat/test_evaluate_estimator.R
@@ -103,6 +103,7 @@ test_that("TestAgreement of NaiveCI can be calculated",
 
 test_that("TestAgreement of NaivePValue can be calculated",
           {
+            skip_on_cran()
             expect_lt(
               evaluate_estimator(
                 score = TestAgreement(),
diff --git a/tests/testthat/test_integrals.R b/tests/testthat/test_integrals.R
index dd4795e..4ecdbcc 100644
--- a/tests/testthat/test_integrals.R
+++ b/tests/testthat/test_integrals.R
@@ -39,6 +39,7 @@ test_that("integral over sample space is equal to 1 for case: unknown variance,
           })
 test_that("integral over sample space is equal to 1 for case: unknown variance, one-armed (exact=TRUE)",
           {
+            skip_on_cran()
             expect_equal(
               int_uv(
                 design = designad,
@@ -89,6 +90,7 @@ test_that("integral over sample space is equal to 1 for case: unknown variance,
           })
 test_that("integral over sample space is equal to 1 for case: unknown variance, two-armed (exact=TRUE)",
           {
+            skip_on_cran()
             expect_equal(
               int_uv(
                 design = designad,
@@ -136,6 +138,7 @@ test_that("integral over sample space is equal to 1 for case: unknown variance,
           })
 test_that("integral over sample space is equal to 1 for case: unknown variance, two-armed, full sampling distribution, exact n2",
           {
+            skip_on_cran()
             expect_equal(
               int_uv_full(
                 design = designad,
diff --git a/tests/testthat/test_pseudo_rao_blackwell.R b/tests/testthat/test_pseudo_rao_blackwell.R
index f0ba9de..9576c23 100644
--- a/tests/testthat/test_pseudo_rao_blackwell.R
+++ b/tests/testthat/test_pseudo_rao_blackwell.R
@@ -1,5 +1,6 @@
 test_that("Pseudo Rao-Blackwell estimator has less variance than Rao-Blackwell estimator",
           {
+            skip_on_cran()
             prb <- evaluate_estimator(score = Variance(),
                                       estimator = PseudoRaoBlackwell(),
                                       design = designad,
diff --git a/vignettes/Introduction.Rmd b/vignettes/Introduction.Rmd
index b7e2125..286eae8 100644
--- a/vignettes/Introduction.Rmd
+++ b/vignettes/Introduction.Rmd
@@ -119,15 +119,360 @@ evaluate_estimator(
  estimator = MedianUnbiasedMLEOrdering(),
  data_distribution = Normal(two_armed = TRUE),
  design = get_example_design(two_armed = TRUE),
+ mu = 0.4,
+ sigma = 1
+)
+
+evaluate_estimator(
+ score = OverestimationProbability(),
+ estimator = MedianUnbiasedLikelihoodRatioOrdering(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = 0.4,
+ sigma = 1
+)
+
+evaluate_estimator(
+ score = OverestimationProbability(),
+ estimator = MedianUnbiasedScoreTestOrdering(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = 0.4,
+ sigma = 1
+)
+
+evaluate_estimator(
+ score = OverestimationProbability(),
+ estimator = MedianUnbiasedStagewiseCombinationFunctionOrdering(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = 0.4,
+ sigma = 1
+)
+```
+
+
+Compare this to the Overestimation probability of the sample mean:
+
+```{r}
+evaluate_estimator(
+ score = OverestimationProbability(),
+ estimator = SampleMean(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = 0.4,
+ sigma = 1
+)
+```
+
+## Evaluating bias-reduced unbiased estimators
+Apart from sample-space ordering based methods, there are various other ways of
+defining alternative point estimators which may have desirable properties.
+Here are a couple that are presented in [our paper](https://doi.org/10.1002/sim.10020)
+evaluated for mu=0.3 and sigma=1.
+
+```{r}
+evaluate_estimator(
+ score = MSE(),
+ estimator = SampleMean(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = 0.3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = MSE(),
+ estimator = BiasReduced(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
  mu = 0.3,
  sigma = 1
 )
+evaluate_estimator(
+ score = MSE(),
+ estimator = PseudoRaoBlackwell(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = 0.3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = MSE(),
+ estimator = RaoBlackwell(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = 0.3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = MSE(),
+ estimator = AdaptivelyWeightedSampleMean(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = 0.3,
+ sigma = 1
+)
+```
+
+
+
+# Evaluating interval estimators
+## Evaluating coverage
+The coverage of an interval estimator is the probability of that an estimator will
+cover the true value of the parameter in question. It may be evaluated like this:
+
+```{r}
+evaluate_estimator(
+ score = Coverage(),
+ estimator = NaiveCI(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .07,
+ sigma = 1
+)
+```
+
+As you can see, the naive confidence interval does not have the correct coverage of
+95%. Here is an example for an estimator that achieves the correct coverage: 
+
+```{r}
+evaluate_estimator(
+ score = Coverage(),
+ estimator = LikelihoodRatioOrderingCI(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .07,
+ sigma = 1
+)
+```
+
+For the parameter of mu that we chose in this example, the naive confidence interval
+performs particularly bad. We can plot the coverage of the naive confidence interval
+for an array of values like this:
+
+```{r, fig.width=7.2, fig.height=4, dev="svg"}
+coverage_naive <- evaluate_estimator(
+ score = Coverage(),
+ estimator = NaiveCI(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = seq(-0.75, 1.32, .03),
+ sigma = 1
+)
+plot(coverage_naive)
+```
+
+## Evaluating the widht of an interval estimator
+Amongst the interval estimators which have the correct coverage, one might be
+interested in selecting the one with the least width. We can evaluate the
+expected width of an interval estimator for a particular set of assumptions like this:
+
+```{r}
+evaluate_estimator(
+ score = Width(),
+ estimator = MLEOrderingCI(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = Width(),
+ estimator = LikelihoodRatioOrderingCI(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = Width(),
+ estimator = ScoreTestOrderingCI(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = Width(),
+ estimator = StagewiseCombinationFunctionOrderingCI(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+```
+
+## Evaluating the centrality of a point estimator with respect to an interval estimator
+When choosing a combination of point and interval estimator to report at the end of a
+trial, one might want the point estimator to be more or less in the middle of the
+respective interval. To evaluate the 'centrality' of an estimator, which in this case
+is defined as the difference between the distance of the point estimator the lower end
+of an interval and the upper end of an interval, one can use the following command:
+
+```{r}
+evaluate_estimator(
+ score = Centrality(interval = StagewiseCombinationFunctionOrderingCI()),
+ estimator = SampleMean(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = Centrality(interval = NaiveCI()),
+ estimator = SampleMean(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
 ```
 
 
 
+## Evaluating agreement with the primary test decision of the design for an interval estimator
+In the framework of optimal adaptive designs, the rejection boundaries for the primary
+testing decision of a design are optimized directly and are not derived from a common
+test statistic. Therefore, confidence intervals derived from such statistics do not
+necessarily agree with the primary test decision. One may evaluate the chance of
+agreement like this:
 
-## Analyzing datasets
+```{r}
+evaluate_estimator(
+ score = TestAgreement(),
+ estimator = MLEOrderingCI(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = TestAgreement(),
+ estimator = LikelihoodRatioOrderingCI(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = TestAgreement(),
+ estimator = ScoreTestOrderingCI(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = TestAgreement(),
+ estimator = StagewiseCombinationFunctionOrderingCI(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+```
+
+The confidence interval derived from the stage-wise combination function ordering
+(by its construction) always agrees with the primary testing decision.
+
+# Evaluating p-values
+## Evaluating agreement with the primary test decision of the design for a p-value
+Like confidence intervals, p-values always come with an associated test decision.
+However, for the same reason as described in the chapter about interval estimators,
+these test decision derived from various ways of calculating p-values may not necessarily
+agree with the primary test decision of an optimal adaptive design. One may evaluate
+the probability of agreement for a particular set of assumptions like this:
+
+```{r}
+evaluate_estimator(
+ score = TestAgreement(),
+ estimator = MLEOrderingPValue(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = TestAgreement(),
+ estimator = LikelihoodRatioOrderingPValue(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = TestAgreement(),
+ estimator = ScoreTestOrderingPValue(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+evaluate_estimator(
+ score = TestAgreement(),
+ estimator = StagewiseCombinationFunctionOrderingPValue(),
+ data_distribution = Normal(two_armed = TRUE),
+ design = get_example_design(two_armed = TRUE),
+ mu = .3,
+ sigma = 1
+)
+```
+
+Again, we see that only the p-value derived from the stage-wise combination function
+ordering always agrees with the primary testing decision.
+
+
+# Conducting larger scale investigations in the performance characteristics of end-of-trial statistics
+So far we have only looked at evaluating performance characteristics for a particular
+set of assumptions, a choice of performance characteristics and a choice of a few estimators.
+However, when designing a trial, one might want to produce results for a variety of different
+scenarios, which can be time consuming. The evaluation of performance characteristics in
+`adestr` can run in parallel using the `future` framework.
+
+
+```{r}
+library(future)
+# Change to e.g. plan(multisession) for parallel computing
+plan(sequential)
+
+# Scenario 1:
+scores1 <- list(MSE(), OverestimationProbability())
+estimators1 <- list(SampleMean(), BiasReduced())
+dist1 <- list(Normal(two_armed = TRUE))
+designs1 <- list(get_example_design(two_armed = TRUE))
+mu1 <- seq(-1,1,.5)
+sigma1 <- 1
+
+# Scenario 2:
+scores2 <- list(Coverage(), Width())
+estimators2 <- list(NaiveCI(), StagewiseCombinationFunctionOrderingCI())
+dist2 <- list(Normal(two_armed = TRUE))
+designs2 <- list(get_example_design(two_armed = TRUE))
+mu2 <- seq(-1,1,.5)
+sigma2 <- 1
+
+# Evaluate in parallel
+res <- evaluate_scenarios_parallel(
+  score_lists = list(scores1, scores2),
+  estimator_lists =  list(estimators1, estimators2),
+  data_distribution_lists = list(dist1, dist2),
+  design_lists =  list(designs1, designs2),
+  mu_lists = list(mu1, mu2),
+  sigma_lists = list(sigma1, sigma2)
+)
+
+res[[1]]
+res[[2]]
+```
+
+You will get back a list of data.frames containing the results for each scenario.
+Note that within one scenario, the evaluation will take place for the cross-product
+of all arguments that are supplied! This means that within one scenario, every estimator
+is evaluated with every score at every point of mu for every sigma and every design.
+Depending on your settings, this can get very time-consuming very fast, making parallelization
+essential.
+
+# Analyzing datasets
 Next, let us look at how to the package can be used to calculate estimates
 after data has been collected.
 
@@ -190,3 +535,7 @@ Keep in mind that a difference of $\mu=0.3$ was used in the simulation.
 Note that while the median unbiased estimator performs well in this particular example, this
 is not universally true.
 
+
+
+
+