BECCA_stm.rmd

---
title: "New Layout"
output: 
  flexdashboard::flex_dashboard:
    theme: cerulean
    logo: undp48.png
    orientation: rows
    runtime: shiny
    vertical_layout: fill
runtime: shiny
---

```{r Load Packages, include=FALSE}
require(flexdashboard)
require(tm)
require(shiny)
require(shinyBS)
require(quanteda)
require(RYandexTranslate)
require(stm)
require(wordcloud)
require(htmlwidgets)
require(stmBrowser)
require(devtools)
require(jsonlite)
require(SnowballC)
require(ggplot2)
require(cluster)
require(fpc)
require(skmeans)
require(knitr)
require(xlsx) # for reading original xls files
require(plotly) # for interactive map
require(likert) # for Likert plots
require(RColorBrewer) # for the Brewer color palette used in Triad plots
require(grid) # required by Triad and likert plots
require(gridExtra) # for arrangin the Triad plots
require(mixtools) # for computing ellipsoids from clustering
require(sp)
require(magrittr)
require(plotly)
require(data.table)
require(xtable)

devtools::install_github("timelyportfolio/stmBrowser@htmlwidget") # This needs to be installed once

```

```{r Load All Datasets}
knitr::opts_knit$set(root.dir = normalizePath('./'))
# print(opts_knit$get("root.dir"))

##  Load All Datasets

wd <- getwd()
dataset <- c("kyrgyzstan", "moldova", "unicef", "serbia", "tajikistan", "yemen")
# print(dataset)

for (d in 1:length(dataset)){
  load(paste0(wd,"/clean_data/",dataset[d],"_clean.RData"))
  dataset <- c("kyrgyzstan", "moldova", "unicef", "serbia", "tajikistan", "yemen")
  assign(paste0(dataset[d]),clean)
}
```

Understanding Topics {data-navmenu="STM"}
=====================================

Understanding Topics {.sidebar}
-------------------------------------

```{r, echo=FALSE}

##  Set Working Directory and create environment

stm.env <<- new.env()
stm.values <- reactiveValues()
if (length(ls(stm.env)) != 0) rm(ls(stm.env))
test_stm <<- 0 # Use test_model instead of running stm (for faster debugging).

##  Choose Country

selectInput("STMCountry", label = "Select Country",
            choices = c("Moldova","Kyrgyzstan UNDP","Kyrgyzstan Unicef", "Serbia","Tajikistan","Yemen"))

##  Choose Language

radioButtons(inputId = "stm_lang",
             label = "Select Language",
             choices = c("Original" = "texts_org",
                         "English" = "texts_eng"))

##  Choose K Topics

sliderInput("select_k", "Select number of topics", min = 5, max = 50, value = 25)

##  Choose Prevalence Covariates

p("Choose Prevalence Covariates")

  ##  Choose signifer type to drop-down

selectInput(inputId = "stm_sigs",
            label = "Signifier Type",
            choices = c("Select","Triads","Dyads","Stones","Questions","Descriptors"),
            selected = "Select")

renderUI({
  
  ##  Source covariate names from dataset

  stm_dataset <<- switch(input$STMCountry,
                         "Moldova" = moldova,
                         "Kyrgyzstan UNDP" = kyrgyzstan,
                         "Kyrgyzstan Unicef" = unicef,
                         "Serbia" = serbia,
                         "Tajikistan" = tajikistan,
                         "Yemen" = yemen)
  
  ##  Sort them by signifier type
  
  source("sort_signifiers.R")
  sigtypes(stm_dataset)
  
  cov_choices <<- reactive({switch(input$stm_sigs,
                                  "Select" = "Choose Signifier Type",
                                  "Triads" = names(triads),
                                  "Dyads" = names(dyads),
                                  "Stones" = names(stones),
                                  "Questions" = names(questions),
                                  "Descriptors" = names(descriptors))})

  ##  Choose covariates drop-down

conditionalPanel(condition = "input.stm_sigs != 'Select'",
                 checkboxGroupInput(inputId = "stm_cov",
                                    label = "Select Covariates",
                                    choices = cov_choices()))
})

  ##  Action Buttons - Include chosen covariates, Reset them

actionButton("stm_include", "Include")
actionButton("reset_cov", "Reset")

  ##  Object containing covariate selection

prev_cov <<- NULL
stm.env$covariates <- eventReactive(input$stm_include,{
  c(prev_cov, input$stm_cov)
  prev_cov <<- c(prev_cov, input$stm_cov)
})

hr()

##  Run STM Model

actionButton("stm_go", "Run STM")

##  Table displaying covariate selection

hr()
renderUI({
  req(stm.env$covariates)
  strong("Prevalence Covariates")
})

output$cov <- renderTable({
  req(stm.env$covariates)
  input$reset_cov
  matrix(data = stm.env$covariates(), byrow = TRUE)},include.rownames=FALSE,include.colnames=FALSE)

##  Reset Button function

observeEvent(input$reset_cov,{
  prev_cov <<- NULL
  updateSelectInput(session,
                    inputId = "stm_cov",
                    label = "Select Covariates",
                    choices = cov_choices(),
                    selected = "Select")
})

tableOutput("cov")

hr()

##  Select Topic to view (Word clouds & Most Rep. Stories)

renderUI({
  req(stm.values$topic_names)
  selectInput("view_k", label = "View Topic", choices = stm.values$topic_names)
})
```

```{r}

##  STM Fitting Function

observeEvent(input$stm_go,{
  
  ##  K, Language
  
  lang <- input$stm_lang
  texts <- stm_dataset[[lang]]
  stm.env$new_k <<- input$select_k

  ##  Prevalence Formula
  
  stm_formula <- NULL
  if (exists("covariates()", envir = stm.env)){
    for (p in 1:length(stm.env$covariates())){
      stm_formula <- paste(stm_formula,stm.env$covariates()[p],"+")
    }
    stm_formula <- substr(stm_formula,1,nchar(stm_formula)-2)
    stm_formula <- as.formula(paste("~",stm_formula))
  }
  stm.env$stm_formula <- stm_formula
  
  ##  Text processing
    
  set.seed(67)
  temp<-textProcessor(documents=texts,metadata=stm_dataset)
  meta<-temp$meta
  vocab<-temp$vocab
  docs<-temp$documents
  out <- prepDocuments(docs, vocab, meta)
  docs<- out$documents
  vocab<-out$vocab
  meta <-out$meta
  
  meta$EntryDate <- as.numeric(meta$EntryDate, format="%m/%d/%Y")
  meta$DQ2.Gender <- as.factor(meta$DQ2.Gender)
  meta$DQ3.Education <- as.factor(meta$DQ3.Education)
  meta$DQ1.Age <- as.factor(meta$DQ1.Age)
  
  ##  Reset Sidebar Instruments
  
  updateSelectInput(session,
                    inputId = "stm_sigs",
                    label = "Signifier Type",
                    choices = c("Select","Triads","Dyads","Stones","Questions","Descriptors"),
                    selected = "Select")
  
  if(test_stm == 0){ ## Load test_model instead (faster debugging)
  
  ##  Fit Model
  
  if (!is.null(stm_formula)) {
    ##  With covariates
  withProgress(message = "Fitting STM...", detail = "This may take a while.",{
  stm_model <- stm(docs, vocab, stm.env$new_k, prevalence = stm_formula,  data = meta, init.type = "Spectral", max.em.its = 700)
  })
  } else {
    ##  No covariates
    withProgress(message = "Fitting STM...", detail = "This may take a while.",{
  stm_model <- stm(docs, vocab, stm.env$new_k,  data = meta, init.type = "Spectral", max.em.its = 700)
  })
  }
  } else {load("test_stm.RData")}
  
  ##  Retrieve model spec and export to environment
  
  topic_words <- labelTopics(stm_model, n = 10)
  topic_names <- topic_words$frex[,1:5]
  topic_names <- apply(topic_names,1, function(topic_names) paste(topic_names, collapse = ", "))
  
  stm.values$fitted_k <- stm_model$settings$dim$K
  stm.values$topic_words <- topic_words
  stm.values$topic_names <- topic_names
  stm.values$stm_model <- stm_model
  stm.values$meta <- meta

})
```

Row {.tabset}
-------------------------------------

### Topic Wordclouds and Representative Stories

```{r, echo=FALSE}

##  Main Display Panel

fluidRow(
  
  ##  Word Cloud
  
  column(strong(align = "center", "Word Cloud"),hr(),
    tags$img(align = "top",renderPlot({
      req(stm.values$stm_model)
      stm_model <- stm.values$stm_model
      cloud(stm_model, topic = match(input$view_k, stm.values$topic_names), type=c("model", "documents"), fin = c(1,1), plt = c(0,1,1,0))
    }, width = 550, height = 550)
    ), width = 6),
  
  ##  Most Representative Stories
  
  column(tags$div(strong(align = "center", "Most Representative Stories"),hr(),
    renderPlot({
      req(stm.values$stm_model)
      stm_model <- stm.values$stm_model
  plotQuote(findThoughts(stm_model, texts = as.character(stm.values$meta$texts_eng), topics = match(input$view_k, stm.values$topic_names), n=3)$docs[[1]], width = 55, text.cex = 1.2)
}, width = 450, height = 600))
  , width = 6)
)
```

### STM Explained

**UNDERSTANDING TOPICS**

Structural topic modelling is a form of topic modelling, which itself is a statistical model from machine learning and natural language processing. It discovers underlying topics in textual data. STM assigns the different micro narratives to a number of abstract topics. On the "Understand topics" page you can attempt to understand what the different topics are about and whether they are useful in your analysis."Higest prob" shows words with highest probability of being within a topic. "Frex" shows frequent and exclusive words within a topic. Therefore, while two topics might share high probability words, the likelihood for sharing "frex" words is less. "Lift" and "score" are other measures that are not relevant, unless you have experience with topic modeling.

The word cloud graphically visualize a topic. The larger the displayed word are,  the more common they are in the text.The text on the right show three documents that the model assumes to be representative of the topic. You can find more information about Structural Topic Modeling and read academic papers utilizing it here: [http://structuraltopicmodel.com/](http://structuraltopicmodel.com/)

**EXPLORE TOPICS**

STM Brower is an interactive D3 visualisation created by [Freeman, Chuang, Roberts, Stewart and Tingley (2015)](https://github.com/mroberts/stmBrowser) , that helps you explore the topics in the text and metadata covariate relationships (for example gender, education, etc.). Narratives will be displayed on the right side if you click on them. Narratives can be placed within multiple topics, so they are rarely fully within a single topic.

### Debugging

```{r}

##  Temporary Panel For Debugging

inputPanel(textInput(inputId = "debugger","Print Console"),actionButton("run", "Execute"))

observeEvent(input$run,{
  output$debug <- renderPrint(eval(parse(text=isolate(input$debugger)), envir = parent.frame(1)))
})
htmlOutput("debug")

conditionalPanel("updateBusy() || $('html').hasClass('shiny-busy')",
                 id='progressIndicator',
                 h1("Fitting STM..."),
                 div(id='progress',
                     includeHTML("timer.js"))
)
```

Topic Exploration {data-navmenu="STM"} 
=====================================
  
Topic Exploration {.sidebar}
-------------------------------------
  
```{r, eval=FALSE}
selectInput("BrowserCountry", label = "Select Country",
            choices = c("Moldova" = "moldova", "Kyrgyzstan UNDP" = "kyrgyzstan", "Kyrgyzstan Unicef" = "unicef", "Serbia" = "serbia", 
                        "Tajikistan" = "tajikistan", "Yemen" = "yemen"),
            selected = "moldova")

```

STM Browser {data-width=1000}
-------------------------------------
  
### Topic modelling
```{r, echo=FALSE, eval = FALSE}
stmBrowser_widget(moldovaSTM, data=moldovameta, c("EntryDate","DQ1.Age","DQ3.Education", "DQ2.Gender", "Q7.Score"),text="texts_eng", labeltype='frex') 
```