diff --git a/paper.bib b/paper.bib new file mode 100644 index 0000000..a9b232a --- /dev/null +++ b/paper.bib @@ -0,0 +1,76 @@ +@Article{MATPLOTLIB, + Author = {Hunter, J. D.}, + Title = {Matplotlib: A 2D graphics environment}, + Journal = {Computing In Science \& Engineering}, + Volume = {9}, + Number = {3}, + Pages = {90--95}, + abstract = {Matplotlib is a 2D graphics package used for Python + for application development, interactive scripting, and + publication-quality image generation across user + interfaces and operating systems.}, + publisher = {IEEE COMPUTER SOC}, + year = 2007 +} +@misc{LIFELINES, + author = {C., Davidson-Pilon}, + title = {Lifelines}, + year = {2016}, + publisher = {GitHub}, + journal = {GitHub repository}, + howpublished = {\url{https://github.com/camdavidsonpilon/lifelines}}, + commit = {latest_commit} +} +@InProceedings{PANDAS, + author = {Wes McKinney }, + title = {Data Structures for Statistical Computing in Python }, + booktitle = {Proceedings of the 9th Python in Science Conference }, + pages = {51 - 56 }, + year = {2010 }, + editor = {St\'efan van der Walt and Jarrod Millman } +} +@Misc{SCIPY, + author = {Eric Jones and Travis Oliphant and Pearu Peterson and others}, + title = {{SciPy}: Open source scientific tools for {Python}}, + year = {2001--}, + url = "http://www.scipy.org/", +} +@Article{IPYTHON, + Author = {P\'erez, Fernando and Granger, Brian E.}, + Title = {{IP}ython: a System for Interactive Scientific Computing}, + Journal = {Computing in Science and Engineering}, + Volume = {9}, + Number = {3}, + Pages = {21--29}, + month = may, + year = 2007, + url = "http://ipython.org", + ISSN = "1521-9615", + doi = {10.1109/MCSE.2007.53}, + publisher = {IEEE Computer Society}, +} +@Misc{MPLD3, + author = {Jake Vanderplas}, + title = {{mpld3}: A D3 Viewer for {Matplotlib}}, + year = {2016}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = "http://www.scipy.org/", +} +@Misc{XLRD, + author = {John Machin}, + title = {{xlrd}: a library to extract data from Microsoft Excel (tm) files}, + year = {2016}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = "https://github.com/python-excel/xlrd", +} +@article{KAPLANMEIER, +author = { E. L. Kaplan and Paul Meier }, +title = {Nonparametric Estimation from Incomplete Observations}, +journal = {Journal of the American Statistical Association}, +volume = {53}, +number = {282}, +pages = {457-481}, +year = {1958}, +doi = {10.1080/01621459.1958.10501452}, diff --git a/paper.md b/paper.md new file mode 100644 index 0000000..04f3894 --- /dev/null +++ b/paper.md @@ -0,0 +1,30 @@ +--- +title: 'SurvivalVolume: interactive volume threshold survival graphs' +tags: + - bioinformatics + - biostatisics + - visualisation + - treatment studies +authors: + - name: Matthew J. Wakefield + orcid: 0000-0001-6624-4698 + affiliation: The Walter and Eliza Hall Institute + affiliation: The University of Melbourne +date: 31 Oct 2016 +bibliography: paper.bib +--- + +# Summary + +Treatment studies of cancer frequently use tumour volume to measure response to therapy. Therapeutic response will be apparent at different time points during the experiment. Progressive disease (increasing volume), stable disease and regression (reduction in volume) under therapy are important measures of response in addition to the overall time to reach a defined maximum volume. Traditional methods of presenting this data involve 3 unconnected graphs: line graphs of each individual, average volume of each group with standard error of the mean, and a Kaplan-Meier graph of time to maximum volume. +Survival volume is a python package to produce an integrated plot of these three representations of the same data, and to provide interaction with the plots of volume to enhance exploration of outliers and subgroups that are of interest clinically. + +Survival volume is written for python3 and uses matplotlib [@MATPLOTLIB] and lifelines [@LIFELINES] Kaplan-Meier [@KAPLANMEIER] implementation for generating plots, mpld3 [@MPLD3] for interactivity. Utility functions are provided for importing data from spreadsheets using xlrd [@Machin], preprocessing to provide consistent time scales for comparison of treatments, and conversion from volume measurements to survival format. Statistics are calculated using lifelines and scipy.stats [@SCIPY]. Pandas [@PANDAS] data frames are used to provide flexible manipulation of data and use within Jupyter [@IPYTHON] notebooks is supported and encouraged. + +Plot elements are presented in a visual hierarchy giving greatest weight to the information rich mean, and transparency is used to legibly overplot the confidence interval of the mean and the complete dataset. Interactivity through mouseover and plot zooming provides rich access to the full data set. By co-plotting the Kaplan-Meier representation with a shared x-axis endpoint and censoring events can be related between the plots enriching the information accessible about each event. + +Survival volume is released under the GPLv3 and is available from GitHub and PyPI. + + +# References +