From 9af21aa7f1d6edb3dd081c6454cc3e1d6d81f291 Mon Sep 17 00:00:00 2001 From: Jan Meis Date: Wed, 10 Jul 2024 15:58:49 +0200 Subject: [PATCH] Preparing new CRAN upload --- DESCRIPTION | 2 +- tests/testthat/test_densities.R | 1 + tests/testthat/test_dsmean.R | 6 + tests/testthat/test_evaluate_estimator.R | 1 + tests/testthat/test_integrals.R | 3 + tests/testthat/test_pseudo_rao_blackwell.R | 1 + vignettes/Introduction.Rmd | 351 ++++++++++++++++++++- 7 files changed, 363 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index abc4589..541f82c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: adestr Type: Package Title: Estimation in Optimal Adaptive Two-Stage Designs -Version: 0.5.2 +Version: 1.0.0 Authors@R:c(person("Jan", "Meis", role = c("aut", "cre"), email = "meis@imbi.uni-heidelberg.de", comment = c(ORCID = "0000-0001-5407-7220")), person("Martin", "Maechler", role = c("cph"), email = "maechler@stat.math.ethz.ch", comment = c(ORCID = "0000-0002-8685-9910", "Original author of monoSpl.c (from the 'stats' package)."))) Description: diff --git a/tests/testthat/test_densities.R b/tests/testthat/test_densities.R index 3ef8204..63d084e 100644 --- a/tests/testthat/test_densities.R +++ b/tests/testthat/test_densities.R @@ -212,6 +212,7 @@ test_that("two-armed trial, full sampling distribution, unknown variance, integr test_that("two-armed trial, full sampling distribution, unknown variance, integral of second-stage density equals 1", { + skip_on_cran() expect_equal( hcubature( mf2_uv_full, diff --git a/tests/testthat/test_dsmean.R b/tests/testthat/test_dsmean.R index 0b529c5..ae2ba86 100644 --- a/tests/testthat/test_dsmean.R +++ b/tests/testthat/test_dsmean.R @@ -33,6 +33,7 @@ test_that("density of MLE sums up to one (normal distribution, one-armed) (exact test_that("density of MLE sums up to one (t distribution, one-armed)", { + skip_on_cran() expect_equal( dsmean( Student(two_armed = FALSE), @@ -50,6 +51,7 @@ test_that("density of MLE sums up to one (t distribution, one-armed)", test_that("density of MLE sums up to one (t distribution, one-armed) (exact=TRUE)", { + skip_on_cran() expect_equal( dsmean( Student(two_armed = FALSE), @@ -68,6 +70,7 @@ test_that("density of MLE sums up to one (t distribution, one-armed) (exact=TRUE test_that("density of MLE sums up to one (normal distribution, two-armed, treatment group)", { + skip_on_cran() expect_equal(dsmeanT(Normal(), designad, .x <- seq(-2, 2, .h <- .01), @@ -80,6 +83,7 @@ test_that("density of MLE sums up to one (normal distribution, two-armed, treatm test_that("density of MLE sums up to one (normal distribution, two-armed, treatment group) (exact=TRUE)", { + skip_on_cran() expect_equal(dsmeanT(Normal(), designad, .x <- seq(-2, 2, .h <- .01), @@ -92,6 +96,7 @@ test_that("density of MLE sums up to one (normal distribution, two-armed, treatm test_that("density of MLE sums up to one (t distribution, two-armed, treatment group)", { + skip_on_cran() expect_equal(dsmeanT(Student(), designad, .x <- seq(-2, 2, .h <- .01), @@ -104,6 +109,7 @@ test_that("density of MLE sums up to one (t distribution, two-armed, treatment g test_that("density of MLE sums up to one (t distribution, two-armed, treatment group) (exact=TRUE)", { + skip_on_cran() expect_equal(dsmeanT(Student(), designad, .x <- seq(-2, 2, .h <- .1), diff --git a/tests/testthat/test_evaluate_estimator.R b/tests/testthat/test_evaluate_estimator.R index bd5bfa2..c5f61d1 100644 --- a/tests/testthat/test_evaluate_estimator.R +++ b/tests/testthat/test_evaluate_estimator.R @@ -103,6 +103,7 @@ test_that("TestAgreement of NaiveCI can be calculated", test_that("TestAgreement of NaivePValue can be calculated", { + skip_on_cran() expect_lt( evaluate_estimator( score = TestAgreement(), diff --git a/tests/testthat/test_integrals.R b/tests/testthat/test_integrals.R index dd4795e..4ecdbcc 100644 --- a/tests/testthat/test_integrals.R +++ b/tests/testthat/test_integrals.R @@ -39,6 +39,7 @@ test_that("integral over sample space is equal to 1 for case: unknown variance, }) test_that("integral over sample space is equal to 1 for case: unknown variance, one-armed (exact=TRUE)", { + skip_on_cran() expect_equal( int_uv( design = designad, @@ -89,6 +90,7 @@ test_that("integral over sample space is equal to 1 for case: unknown variance, }) test_that("integral over sample space is equal to 1 for case: unknown variance, two-armed (exact=TRUE)", { + skip_on_cran() expect_equal( int_uv( design = designad, @@ -136,6 +138,7 @@ test_that("integral over sample space is equal to 1 for case: unknown variance, }) test_that("integral over sample space is equal to 1 for case: unknown variance, two-armed, full sampling distribution, exact n2", { + skip_on_cran() expect_equal( int_uv_full( design = designad, diff --git a/tests/testthat/test_pseudo_rao_blackwell.R b/tests/testthat/test_pseudo_rao_blackwell.R index f0ba9de..9576c23 100644 --- a/tests/testthat/test_pseudo_rao_blackwell.R +++ b/tests/testthat/test_pseudo_rao_blackwell.R @@ -1,5 +1,6 @@ test_that("Pseudo Rao-Blackwell estimator has less variance than Rao-Blackwell estimator", { + skip_on_cran() prb <- evaluate_estimator(score = Variance(), estimator = PseudoRaoBlackwell(), design = designad, diff --git a/vignettes/Introduction.Rmd b/vignettes/Introduction.Rmd index b7e2125..286eae8 100644 --- a/vignettes/Introduction.Rmd +++ b/vignettes/Introduction.Rmd @@ -119,15 +119,360 @@ evaluate_estimator( estimator = MedianUnbiasedMLEOrdering(), data_distribution = Normal(two_armed = TRUE), design = get_example_design(two_armed = TRUE), + mu = 0.4, + sigma = 1 +) + +evaluate_estimator( + score = OverestimationProbability(), + estimator = MedianUnbiasedLikelihoodRatioOrdering(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = 0.4, + sigma = 1 +) + +evaluate_estimator( + score = OverestimationProbability(), + estimator = MedianUnbiasedScoreTestOrdering(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = 0.4, + sigma = 1 +) + +evaluate_estimator( + score = OverestimationProbability(), + estimator = MedianUnbiasedStagewiseCombinationFunctionOrdering(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = 0.4, + sigma = 1 +) +``` + + +Compare this to the Overestimation probability of the sample mean: + +```{r} +evaluate_estimator( + score = OverestimationProbability(), + estimator = SampleMean(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = 0.4, + sigma = 1 +) +``` + +## Evaluating bias-reduced unbiased estimators +Apart from sample-space ordering based methods, there are various other ways of +defining alternative point estimators which may have desirable properties. +Here are a couple that are presented in [our paper](https://doi.org/10.1002/sim.10020) +evaluated for mu=0.3 and sigma=1. + +```{r} +evaluate_estimator( + score = MSE(), + estimator = SampleMean(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = 0.3, + sigma = 1 +) +evaluate_estimator( + score = MSE(), + estimator = BiasReduced(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), mu = 0.3, sigma = 1 ) +evaluate_estimator( + score = MSE(), + estimator = PseudoRaoBlackwell(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = 0.3, + sigma = 1 +) +evaluate_estimator( + score = MSE(), + estimator = RaoBlackwell(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = 0.3, + sigma = 1 +) +evaluate_estimator( + score = MSE(), + estimator = AdaptivelyWeightedSampleMean(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = 0.3, + sigma = 1 +) +``` + + + +# Evaluating interval estimators +## Evaluating coverage +The coverage of an interval estimator is the probability of that an estimator will +cover the true value of the parameter in question. It may be evaluated like this: + +```{r} +evaluate_estimator( + score = Coverage(), + estimator = NaiveCI(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .07, + sigma = 1 +) +``` + +As you can see, the naive confidence interval does not have the correct coverage of +95%. Here is an example for an estimator that achieves the correct coverage: + +```{r} +evaluate_estimator( + score = Coverage(), + estimator = LikelihoodRatioOrderingCI(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .07, + sigma = 1 +) +``` + +For the parameter of mu that we chose in this example, the naive confidence interval +performs particularly bad. We can plot the coverage of the naive confidence interval +for an array of values like this: + +```{r, fig.width=7.2, fig.height=4, dev="svg"} +coverage_naive <- evaluate_estimator( + score = Coverage(), + estimator = NaiveCI(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = seq(-0.75, 1.32, .03), + sigma = 1 +) +plot(coverage_naive) +``` + +## Evaluating the widht of an interval estimator +Amongst the interval estimators which have the correct coverage, one might be +interested in selecting the one with the least width. We can evaluate the +expected width of an interval estimator for a particular set of assumptions like this: + +```{r} +evaluate_estimator( + score = Width(), + estimator = MLEOrderingCI(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +evaluate_estimator( + score = Width(), + estimator = LikelihoodRatioOrderingCI(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +evaluate_estimator( + score = Width(), + estimator = ScoreTestOrderingCI(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +evaluate_estimator( + score = Width(), + estimator = StagewiseCombinationFunctionOrderingCI(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +``` + +## Evaluating the centrality of a point estimator with respect to an interval estimator +When choosing a combination of point and interval estimator to report at the end of a +trial, one might want the point estimator to be more or less in the middle of the +respective interval. To evaluate the 'centrality' of an estimator, which in this case +is defined as the difference between the distance of the point estimator the lower end +of an interval and the upper end of an interval, one can use the following command: + +```{r} +evaluate_estimator( + score = Centrality(interval = StagewiseCombinationFunctionOrderingCI()), + estimator = SampleMean(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +evaluate_estimator( + score = Centrality(interval = NaiveCI()), + estimator = SampleMean(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) ``` +## Evaluating agreement with the primary test decision of the design for an interval estimator +In the framework of optimal adaptive designs, the rejection boundaries for the primary +testing decision of a design are optimized directly and are not derived from a common +test statistic. Therefore, confidence intervals derived from such statistics do not +necessarily agree with the primary test decision. One may evaluate the chance of +agreement like this: -## Analyzing datasets +```{r} +evaluate_estimator( + score = TestAgreement(), + estimator = MLEOrderingCI(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +evaluate_estimator( + score = TestAgreement(), + estimator = LikelihoodRatioOrderingCI(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +evaluate_estimator( + score = TestAgreement(), + estimator = ScoreTestOrderingCI(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +evaluate_estimator( + score = TestAgreement(), + estimator = StagewiseCombinationFunctionOrderingCI(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +``` + +The confidence interval derived from the stage-wise combination function ordering +(by its construction) always agrees with the primary testing decision. + +# Evaluating p-values +## Evaluating agreement with the primary test decision of the design for a p-value +Like confidence intervals, p-values always come with an associated test decision. +However, for the same reason as described in the chapter about interval estimators, +these test decision derived from various ways of calculating p-values may not necessarily +agree with the primary test decision of an optimal adaptive design. One may evaluate +the probability of agreement for a particular set of assumptions like this: + +```{r} +evaluate_estimator( + score = TestAgreement(), + estimator = MLEOrderingPValue(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +evaluate_estimator( + score = TestAgreement(), + estimator = LikelihoodRatioOrderingPValue(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +evaluate_estimator( + score = TestAgreement(), + estimator = ScoreTestOrderingPValue(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +evaluate_estimator( + score = TestAgreement(), + estimator = StagewiseCombinationFunctionOrderingPValue(), + data_distribution = Normal(two_armed = TRUE), + design = get_example_design(two_armed = TRUE), + mu = .3, + sigma = 1 +) +``` + +Again, we see that only the p-value derived from the stage-wise combination function +ordering always agrees with the primary testing decision. + + +# Conducting larger scale investigations in the performance characteristics of end-of-trial statistics +So far we have only looked at evaluating performance characteristics for a particular +set of assumptions, a choice of performance characteristics and a choice of a few estimators. +However, when designing a trial, one might want to produce results for a variety of different +scenarios, which can be time consuming. The evaluation of performance characteristics in +`adestr` can run in parallel using the `future` framework. + + +```{r} +library(future) +# Change to e.g. plan(multisession) for parallel computing +plan(sequential) + +# Scenario 1: +scores1 <- list(MSE(), OverestimationProbability()) +estimators1 <- list(SampleMean(), BiasReduced()) +dist1 <- list(Normal(two_armed = TRUE)) +designs1 <- list(get_example_design(two_armed = TRUE)) +mu1 <- seq(-1,1,.5) +sigma1 <- 1 + +# Scenario 2: +scores2 <- list(Coverage(), Width()) +estimators2 <- list(NaiveCI(), StagewiseCombinationFunctionOrderingCI()) +dist2 <- list(Normal(two_armed = TRUE)) +designs2 <- list(get_example_design(two_armed = TRUE)) +mu2 <- seq(-1,1,.5) +sigma2 <- 1 + +# Evaluate in parallel +res <- evaluate_scenarios_parallel( + score_lists = list(scores1, scores2), + estimator_lists = list(estimators1, estimators2), + data_distribution_lists = list(dist1, dist2), + design_lists = list(designs1, designs2), + mu_lists = list(mu1, mu2), + sigma_lists = list(sigma1, sigma2) +) + +res[[1]] +res[[2]] +``` + +You will get back a list of data.frames containing the results for each scenario. +Note that within one scenario, the evaluation will take place for the cross-product +of all arguments that are supplied! This means that within one scenario, every estimator +is evaluated with every score at every point of mu for every sigma and every design. +Depending on your settings, this can get very time-consuming very fast, making parallelization +essential. + +# Analyzing datasets Next, let us look at how to the package can be used to calculate estimates after data has been collected. @@ -190,3 +535,7 @@ Keep in mind that a difference of $\mu=0.3$ was used in the simulation. Note that while the median unbiased estimator performs well in this particular example, this is not universally true. + + + +