# load in the files containing the methyaltion data and the source # code containing the SSRPMM functions
|
|
- Dortha McGee
- 8 years ago
- Views:
Transcription
1 ################ EXAMPLE ANALYSES TO ILLUSTRATE SS-RPMM ######################## # load in the files containing the methyaltion data and the source # code containing the SSRPMM functions # Note, the SSRPMM Tutorial Objects.Rdata file contains 4 objects, representing # the methlation beta values and covariate data for a head and neck squamous # cell carcinoma (HNSCC) cancer data set (Langevin et al. 2012) and a mesothelioma # cancer data set (Christensen et al. 2009). The four objects are named: Betas_HNSCC, # Covariates_HNSCC, Betas_Mesothelioma, and Covariates_Mesothelioma. Betas_HNSCC is # a N x J data.frame of methylation average-beta values, Covariates_HNSCC is a N x P # data.frame of covariates for the samples in the HNSCC data, Betas_Mesothelioma is # a N x J data.frame of methylation average-beta values, and Covariates_Mesothelioma # is a N x P data.frame of covariates for the samples in the Mesothelioma data. load("ssrpmm Tutorial Objects.Rdata") source("ssrpmm Functions R") # Store relevant details regarding the HNSCC and Mesothelioma data J1 = dim(betas_hnscc)[2] #number of CpG loci in the HNSCC dataset N1 = dim(betas_hnscc)[1] #number of samples in the HNSCC dataset P1 = dim(covariates_hnscc)[2] #number of covariate factors in the HNSCC covariate data N1 == dim(covariates_hnscc)[1] #should be true! J2 = dim(betas_mesothelioma)[2] #number of CpG loci in the Mesothelioma dataset N2 = dim(betas_mesothelioma)[1] #number of samples in the Mesothelioma dataset P2 = dim(covariates_mesothelioma)[2] #number of covariate factors in the Mesothelioma covariate data N2 == dim(covariates_mesothelioma)[1] #should be true! # STEP 1: Randomly split the full data into training and testing sets.(1) For the # Mesothelioma data do a stratified random split by tumor histology and # (2) for the HNSCC data do a stratified random split by HNSCC # case/control status. The stratified random split is implemented here # to ensure an approximately equal distribution of subjects between # the training and testing, with respect to a certian variable HNSCCSplit = TrainTestSplit(Betas_HNSCC, Covariates_HNSCC, Strat = "case", seed =1, proptrain = 1/2) MesoSplit = TrainTestSplit(Betas_Mesothelioma, Covariates_Mesothelioma, Strat = "histology", seed =12, proptrain = 1/2) # STEP (1a): Processing the results of the TrainTestSplit function. Extract the # training and testing data from the Split objects above HNSCCTrainingData = HNSCCSplit[[1]] HNSCCTestingData = HNSCCSplit[[2]]
2 MesoTrainingData = MesoSplit[[1]] MesoTestingData = MesoSplit[[2]] # STEP (1b): Extract the beta values and the covariate values from the training # and testing sets from STEP (1a). # HNSCC Training Data HNSCCTrainingBetas = HNSCCTrainingData[,-(1:P1)] HNSCCTrainingCovariates = HNSCCTrainingData[,(1:P1)] # HNSCC Testing Data HNSCCTestingBetas = HNSCCTestingData[,-(1:P1)] HNSCCTestingCovariates = HNSCCTestingData[,(1:P1)] # Meso Training Data MesoTrainingBetas = MesoTrainingData[,-(1:P2)] MesoTrainingCovariates = MesoTrainingData[,(1:P2)] # Meso Testing Data MesoTestingBetas =MesoTestingData[,-(1:P2)] MesoTestingCovariates = MesoTestingData[,(1:P2)] # STEP 2: Identify the CpG loci that are most associated with the clinical # variable of interest using only the training data. If the clinical # variable of interest is time to event data (i.e. survival or disease recurrence) # then use the function called MostImpCpGsSurvival, otherwise use the # function called MostImpCpGs. This function will provide a ranked list # of CpGs and their corresponding T-score. The CpGs at the top of the # list are those that are most associated with the clinical variable of interest # HNSCC data: clinical variable of interest is "case/control" status HNSCCScores = MostImpCpGs(Y = HNSCCTrainingBetas, covariates = HNSCCTrainingCovariates, clinvar = "case", terms = NULL, factors = NULL) # Meso data: clinical variable of interest is "survival" status MesoScores = MostImpCpGsSurvival(Y = MesoTrainingBetas, covariates = MesoTrainingCovariates, times = "time", censor = "dead", terms = c("age", "sex"), factors = NULL, strat = "histology") # STEP 3a: The objective of this step is to determine the number of high ranking # CpG loci from the previous step to be used in fitting RPMM to the # Training data. Note* if the clinical variable of interest is survival # then use the NestedXValidationSurvival function, otherwise use the # NestedXValidation function. Warning, depending on the selection of mrange # and L, this step can take quite some time. HNSCCXvalidationResults = NestedXValidation(Y = HNSCCTrainingBetas, covariates = HNSCCTrainingCovariates, TScores = HNSCCScores, clinvar = "case", vartype = "binary", mrange = c(5,50), method = "gaussian", L = 20, seeds = 1:20) MesoXvalidationResults = NestedXValidationSurvival(Y = MesoTrainingBetas, covariates = MesoTrainingCovariates, CoxScores = MesoScores, times = "time", censor = "dead", mrange =
3 c(5,50), method = "gaussian", L = 20, seeds = 1:20) # STEP 3b: The selection of M can be based on what value of m yeilded the lowest median p- value # as indicated in the HNSCCXvalidationResults and MesoXvalidationResults objects. Alternatively, # a better approach to select M would be to smooth the HNSCCXvalidationResults and MesoXvalidationResults objects # and chose the value of M where the smooth function of median p-values attains it's minimum # value mrange = 5:20 loesscurve = loess.smooth(mrange, MesoXvalidationResults, degree = 2) MOpt_Mesothelioma = subset(data.frame(loesscurve$x, loesscurve$y), loesscurve$y == min(loesscurve$y))[[1]] par(mar = c(5,5,4,2)) plot(mrange, MesoXvalidationResults, cex = 0.75, xlab = "Number of top ranking loci (M)", ylab = "Median P-value", cex.lab = 2, cex.axis = 1.5) lines(loesscurve$x, loesscurve$y, lwd = 5) abline(v = MOpt_Mesothelioma, col = "red", lwd = 2, lty = "dashed") # STEP 4: The final step of SS-RPMM is to fit an RPMM to the Training Data using # the M CpG loci with the largest absolute T-Score, where M is determined # based on the results from step 3. Based on this solution, we want to # predict the methylation class membership for the observations in the # test data. We achieve this using the PredMethClasses function. HNSCCMethClassesTesting = PredMethClasses(Ytrain = HNSCCTrainingBetas, Ytest = HNSCCTestingBetas, Scores = HNSCCScores, M = MOpt_HNSCC, method = "gaussian") MesoMethClassesTesting = PredMethClasses(Ytrain = MesoTrainingBetas, Ytest = MesoTestingBetas, Scores = MesoScores, M = MOpt_Mesothelioma, method = "gaussian") ################# POST SS-RPMM ANALYSES #################### # Post SS-RPMM Analysis: Following the prediction of the methylation classes # in the Testing data, we now want to determine whether # or not the methylation classes we've identified are # clinically relevant. We do this by testing the # association between the predicted methylation classes # in the test data and the clinical outcome of interest # (i.e. case/control status for the HNSCC data and survival # for the Meso data) # Some Post SS-RPMM analyses for the HNSCC data # [1] Test whether or not the predicted methylation classes in the test data # are associated with HNSCC case/control status.
4 permtestchisquare(hnsccmethclassestesting, HNSCCTestingCovariates[,"case"]) # [2] generate a barplot of percent case/control by predicted methylation class RowSum = apply(table(hnsccmethclassestesting, HNSCCTestingCovariates[,"case"]), 1, sum) Table = table(hnsccmethclassestesting, HNSCCTestingCovariates[,"case"]) Percentages = sweep(table, 1, RowSum, "/") barplot(t(percentages), main = "Percent Case/Control by RPMM Class", ylab = "Percent Case/Control", xlab="predicted Class", col=c("darkblue","red")) # [3] Test whether or not the predicted methylation classes in the test data # are associated with HNSCC case/control status, controlling for confounders. HNSCCfit = glm(case~gender+age+packyrs+factor(smk_cfn)+hnsccmethclassestesting, data = HNSCCTestingCovariates, family = binomial(link = "logit")) summary(hnsccfit) # [4] generate a heatmap of the Testing Data by predicted methylation class K = seq(.5,5.5,1) M = 6 HNSCCTestingBetasTopM = as.matrix(hnscctestingbetas[,rownames(hnsccscores)[1:m]]) OrdBeta = hclust(dist(t(hnscctestingbetastopm)), method="ward")$order par(pty = "m", mai = c(1.5,.65,.1,.6)) plotmethbyclass(hnscctestingbetastopm, HNSCCMethClassesTesting, sep = "red", OrdBeta) axis(1, at = K, labels = rownames(hnsccscores)[1:m], las=2,cex.axis=0.8, line = -.5) title(xlab = "Loci used for determining subtypes (M=6)", line = 4.8, outer =F, cex.lab = 1.5) # Some Post SS-RPMM analyses for the Meso data # [1] Kaplan-Meier survival plot by predicted methylation class in the testing # data plot(survfit(surv(time,dead)~ MesoMethClassesTesting, data = MesoTestingCovariates), col = rainbow(length(levels(mesomethclassestesting))), xlab = "Time (months)", ylab = "Probability of Survival", cex.lab = 1.5, cex.axis = 1.2, lwd = 2) legend("bottomleft", legend = levels(mesomethclassestesting), col = rainbow(length(levels(mesomethclassestesting))), cex = 1.2, lty = 1, lwd = 2) # [2] Test whether or not the predicted methylation classes in the test data # are associated with survival time (i.e. log-rank test). survdiff(surv(time,dead)~mesomethclassestesting, data = MesoTestingCovariates) # [3] Test whether or not the predicted methylation classes in the test data # are associated with survival time, controlling for confounders. CoxModMeso = coxph(surv(time,dead)~mesomethclassestesting + age + sex +
5 strata(histology), data = MesoTestingCovariates) summary(coxmodmeso)
Linda Staub & Alexandros Gekenidis
Seminar in Statistics: Survival Analysis Chapter 2 Kaplan-Meier Survival Curves and the Log- Rank Test Linda Staub & Alexandros Gekenidis March 7th, 2011 1 Review Outcome variable of interest: time until
More information# For usage of the functions, it is necessary to install the "survival" and the "penalized" package.
###################################################################### ### R-script for the manuscript ### ### ### ### Survival models with preclustered ### ### gene groups as covariates ### ### ### ###
More informationLecture 5 : The Poisson Distribution
Lecture 5 : The Poisson Distribution Jonathan Marchini November 10, 2008 1 Introduction Many experimental situations occur in which we observe the counts of events within a set unit of time, area, volume,
More informationTutorial 3: Graphics and Exploratory Data Analysis in R Jason Pienaar and Tom Miller
Tutorial 3: Graphics and Exploratory Data Analysis in R Jason Pienaar and Tom Miller Getting to know the data An important first step before performing any kind of statistical analysis is to familiarize
More informationPackage survpresmooth
Package survpresmooth February 20, 2015 Type Package Title Presmoothed Estimation in Survival Analysis Version 1.1-8 Date 2013-08-30 Author Ignacio Lopez de Ullibarri and Maria Amalia Jacome Maintainer
More informationViewing Ecological data using R graphics
Biostatistics Illustrations in Viewing Ecological data using R graphics A.B. Dufour & N. Pettorelli April 9, 2009 Presentation of the principal graphics dealing with discrete or continuous variables. Course
More informationTime Series Analysis AMS 316
Time Series Analysis AMS 316 Programming language and software environment for data manipulation, calculation and graphical display. Originally created by Ross Ihaka and Robert Gentleman at University
More informationsample median Sample quartiles sample deciles sample quantiles sample percentiles Exercise 1 five number summary # Create and view a sorted
Sample uartiles We have seen that the sample median of a data set {x 1, x, x,, x n }, sorted in increasing order, is a value that divides it in such a way, that exactly half (i.e., 50%) of the sample observations
More informationStatistical Models in R
Statistical Models in R Some Examples Steven Buechler Department of Mathematics 276B Hurley Hall; 1-6233 Fall, 2007 Outline Statistical Models Linear Models in R Regression Regression analysis is the appropriate
More informationStatistical Data Mining. Practical Assignment 3 Discriminant Analysis and Decision Trees
Statistical Data Mining Practical Assignment 3 Discriminant Analysis and Decision Trees In this practical we discuss linear and quadratic discriminant analysis and tree-based classification techniques.
More informationExploratory Data Analysis
Goals of EDA Relationship between mean response and covariates (including time). Variance, correlation structure, individual-level heterogeneity. Guidelines for graphical displays of longitudinal data
More informationPackage smoothhr. November 9, 2015
Encoding UTF-8 Type Package Depends R (>= 2.12.0),survival,splines Package smoothhr November 9, 2015 Title Smooth Hazard Ratio Curves Taking a Reference Value Version 1.0.2 Date 2015-10-29 Author Artur
More informationSurvey, Statistics and Psychometrics Core Research Facility University of Nebraska-Lincoln. Log-Rank Test for More Than Two Groups
Survey, Statistics and Psychometrics Core Research Facility University of Nebraska-Lincoln Log-Rank Test for More Than Two Groups Prepared by Harlan Sayles (SRAM) Revised by Julia Soulakova (Statistics)
More informationM1 in Economics and Economics and Statistics Applied multivariate Analysis - Big data analytics Worksheet 3 - Random Forest
Nathalie Villa-Vialaneix Année 2014/2015 M1 in Economics and Economics and Statistics Applied multivariate Analysis - Big data analytics Worksheet 3 - Random Forest This worksheet s aim is to learn how
More informationBasic Statistics and Data Analysis for Health Researchers from Foreign Countries
Basic Statistics and Data Analysis for Health Researchers from Foreign Countries Volkert Siersma siersma@sund.ku.dk The Research Unit for General Practice in Copenhagen Dias 1 Content Quantifying association
More informationGraphics in R. Biostatistics 615/815
Graphics in R Biostatistics 615/815 Last Lecture Introduction to R Programming Controlling Loops Defining your own functions Today Introduction to Graphics in R Examples of commonly used graphics functions
More informationSECOND M.B. AND SECOND VETERINARY M.B. EXAMINATIONS INTRODUCTION TO THE SCIENTIFIC BASIS OF MEDICINE EXAMINATION. Friday 14 March 2008 9.00-9.
SECOND M.B. AND SECOND VETERINARY M.B. EXAMINATIONS INTRODUCTION TO THE SCIENTIFIC BASIS OF MEDICINE EXAMINATION Friday 14 March 2008 9.00-9.45 am Attempt all ten questions. For each question, choose the
More informationComputational Assignment 4: Discriminant Analysis
Computational Assignment 4: Discriminant Analysis -Written by James Wilson -Edited by Andrew Nobel In this assignment, we will investigate running Fisher s Discriminant analysis in R. This is a powerful
More informationR software tutorial: Random Forest Clustering Applied to Renal Cell Carcinoma Steve Horvath and Tao Shi
R software tutorial: Random Forest Clustering Applied to Renal Cell Carcinoma Steve orvath and Tao Shi Correspondence: shorvath@mednet.ucla.edu Department of uman Genetics and Biostatistics University
More informationPackage copa. R topics documented: August 9, 2016
Package August 9, 2016 Title Functions to perform cancer outlier profile analysis. Version 1.41.0 Date 2006-01-26 Author Maintainer COPA is a method to find genes that undergo
More informationTips for surviving the analysis of survival data. Philip Twumasi-Ankrah, PhD
Tips for surviving the analysis of survival data Philip Twumasi-Ankrah, PhD Big picture In medical research and many other areas of research, we often confront continuous, ordinal or dichotomous outcomes
More informationTemporal Trends in Demographics and Overall Survival of Non Small-Cell Lung Cancer Patients at Moffitt Cancer Center From 1986 to 2008
Special Report Temporal Trends in Demographics and Overall Survival of Non Small-Cell Lung Cancer Patients at Moffitt Cancer Center From 1986 to 2008 Matthew B. Schabath, PhD, Zachary J. Thompson, PhD,
More informationPackage empiricalfdr.deseq2
Type Package Package empiricalfdr.deseq2 May 27, 2015 Title Simulation-Based False Discovery Rate in RNA-Seq Version 1.0.3 Date 2015-05-26 Author Mikhail V. Matz Maintainer Mikhail V. Matz
More informationJournal of Statistical Software
JSS Journal of Statistical Software January 2011, Volume 38, Issue 5. http://www.jstatsoft.org/ Lexis: An R Class for Epidemiological Studies with Long-Term Follow-Up Martyn Plummer International Agency
More informationSurvival Analysis: An Introduction
Survival Analysis: An Introduction Jaine Blayney Bioinformatics, CCRCB j.blayney@qub.ac.uk 24/09/2012 JKB 1 DEFINITION OF SURVIVAL ANALYSIS Survival analysis examines and models the time it takes for events
More informationStudy Design. Date: March 11, 2003 Reviewer: Jawahar Tiwari, Ph.D. Ellis Unger, M.D. Ghanshyam Gupta, Ph.D. Chief, Therapeutics Evaluation Branch
BLA: STN 103471 Betaseron (Interferon β-1b) for the treatment of secondary progressive multiple sclerosis. Submission dated June 29, 1998. Chiron Corp. Date: March 11, 2003 Reviewer: Jawahar Tiwari, Ph.D.
More informationSupplementary Online Content
Supplementary Online Content Arterburn DE, Olsen MK, Smith VA, Livingston EH, Van Scoyoc L, Yancy WS, Jr. Association Between Bariatric Surgery and Long-Term Survival. JAMA. doi:10.1001/jama.2014.16968.
More informationEach function call carries out a single task associated with drawing the graph.
Chapter 3 Graphics with R 3.1 Low-Level Graphics R has extensive facilities for producing graphs. There are both low- and high-level graphics facilities. The low-level graphics facilities provide basic
More informationII. DISTRIBUTIONS distribution normal distribution. standard scores
Appendix D Basic Measurement And Statistics The following information was developed by Steven Rothke, PhD, Department of Psychology, Rehabilitation Institute of Chicago (RIC) and expanded by Mary F. Schmidt,
More informationPropensity score based data analysis
Propensity score based data analysis Susanne Stampf March 28, 2014 Abstract For some time, propensity score (PS) based methods have been frequently applied in the analysis of observational and registry
More informationRare Thoracic Tumours
Rare Thoracic Tumours 1. Epithelial Tumour of Trachea 1 1.1 General Results Table 1. Epithelial Tumours of Trachea: Incidence, Trends, Survival Flemish Region 2001-2010 Both Sexes Incidence Trend EAPC
More informationPersonalized Predictive Medicine and Genomic Clinical Trials
Personalized Predictive Medicine and Genomic Clinical Trials Richard Simon, D.Sc. Chief, Biometric Research Branch National Cancer Institute http://brb.nci.nih.gov brb.nci.nih.gov Powerpoint presentations
More informationEfficacy analysis and graphical representation in Oncology trials - A case study
Efficacy analysis and graphical representation in Oncology trials - A case study Anindita Bhattacharjee Vijayalakshmi Indana Cytel, Pune The views expressed in this presentation are our own and do not
More informationSPONTANEOUS MESOTHELIOMA DATA: AN INTERPRETATION. Robin Howie, Robin Howie Associates, Edinburgh.
SPONTANEOUS MESOTHELIOMA DATA: AN INTERPRETATION Robin Howie, Robin Howie Associates, Edinburgh. SPONTANEOUS MESOTHELIOMA MESOTHELIOMA DEATHS HSE (2003a) estimated there are about 26 spontaneous deaths
More informationLab 13: Logistic Regression
Lab 13: Logistic Regression Spam Emails Today we will be working with a corpus of emails received by a single gmail account over the first three months of 2012. Just like any other email address this account
More informationMaximally Selected Rank Statistics in R
Maximally Selected Rank Statistics in R by Torsten Hothorn and Berthold Lausen This document gives some examples on how to use the maxstat package and is basically an extention to Hothorn and Lausen (2002).
More informationL Lang-Lazdunski, A Bille, S Marshall, R Lal, D Landau, J Spicer
Pleurectomy/decortication, hyperthermic pleural lavage with povidone-iodine and systemic chemotherapy in malignant pleural mesothelioma. A 10-year experience. L Lang-Lazdunski, A Bille, S Marshall, R Lal,
More informationProstatectomy, pelvic lymphadenect. Med age 63 years Mean followup 53 months No other cancer related therapy before recurrence. Negative.
Adjuvante und Salvage Radiotherapie Ludwig Plasswilm Klinik für Radio-Onkologie, KSSG CANCER CONTROL WITH RADICAL PROSTATECTOMY ALONE IN 1,000 CONSECUTIVE PATIENTS 1983 1998 Clinical stage T1 and T2 Mean
More informationCluster Analysis using R
Cluster analysis or clustering is the task of assigning a set of objects into groups (called clusters) so that the objects in the same cluster are more similar (in some sense or another) to each other
More informationGuide for Data Visualization and Analysis using ACSN
Guide for Data Visualization and Analysis using ACSN ACSN contains the NaviCell tool box, the intuitive and user- friendly environment for data visualization and analysis. The tool is accessible from the
More informationPackage TRADER. February 10, 2016
Type Package Package TRADER February 10, 2016 Title Tree Ring Analysis of Disturbance Events in R Version 1.2-1 Date 2016-02-10 Author Pavel Fibich , Jan Altman ,
More informationIntroduction Objective Methods Results Conclusion
Introduction Objective Methods Results Conclusion 2 Malignant pleural mesothelioma (MPM) is the most common form of mesothelioma a rare cancer associated with long latency period (i.e. 20 to 40 years),
More informationLatent Class Regression Part II
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike License. Your use of this material constitutes acceptance of that license and the conditions of use of materials on this
More informationCross Validation techniques in R: A brief overview of some methods, packages, and functions for assessing prediction models.
Cross Validation techniques in R: A brief overview of some methods, packages, and functions for assessing prediction models. Dr. Jon Starkweather, Research and Statistical Support consultant This month
More informationDEPARTMENT OF PSYCHOLOGY UNIVERSITY OF LANCASTER MSC IN PSYCHOLOGICAL RESEARCH METHODS ANALYSING AND INTERPRETING DATA 2 PART 1 WEEK 9
DEPARTMENT OF PSYCHOLOGY UNIVERSITY OF LANCASTER MSC IN PSYCHOLOGICAL RESEARCH METHODS ANALYSING AND INTERPRETING DATA 2 PART 1 WEEK 9 Analysis of covariance and multiple regression So far in this course,
More informationTargeting Specific Cell Signaling Pathways for the Treatment of Malignant Peritoneal Mesothelioma
The Use of Kinase Inhibitors: Translational Lab Results Targeting Specific Cell Signaling Pathways for the Treatment of Malignant Peritoneal Mesothelioma Sheelu Varghese, Ph.D. H. Richard Alexander, M.D.
More informationPackage TestSurvRec. R topics documented: February 19, 2015. Type Package
Type Package Package TestSurvRec February 19, 2015 Title Statistical tests to compare two survival curves with recurrent events Version 1.2.1 Date 2013-10-07 Depends survrec, boot, R(>= 3.0.2) Author Dr.
More informationESTIMATING THE DISTRIBUTION OF DEMAND USING BOUNDED SALES DATA
ESTIMATING THE DISTRIBUTION OF DEMAND USING BOUNDED SALES DATA Michael R. Middleton, McLaren School of Business, University of San Francisco 0 Fulton Street, San Francisco, CA -00 -- middleton@usfca.edu
More informationPredictive Gene Signature Selection for Adjuvant Chemotherapy in Non-Small Cell Lung Cancer Patients
Predictive Gene Signature Selection for Adjuvant Chemotherapy in Non-Small Cell Lung Cancer Patients by Li Liu A practicum report submitted to the Department of Public Health Sciences in conformity with
More informationNotating the Multilevel Longitudinal Model. Multilevel Modeling of Longitudinal Data. Notating (cont.) Notating (cont.)
Notating the Multilevel Modeling of Longitudinal Data Recall the typical -level model Y ij = γ 00 + (γ 0 + u j )X ij + u 0j + e ij Dr. J. Kyle Roberts Southern Methodist University Simmons School of Education
More informationExample: Credit card default, we may be more interested in predicting the probabilty of a default than classifying individuals as default or not.
Statistical Learning: Chapter 4 Classification 4.1 Introduction Supervised learning with a categorical (Qualitative) response Notation: - Feature vector X, - qualitative response Y, taking values in C
More informationPREDA S4-classes. Francesco Ferrari October 13, 2015
PREDA S4-classes Francesco Ferrari October 13, 2015 Abstract This document provides a description of custom S4 classes used to manage data structures for PREDA: an R package for Position RElated Data Analysis.
More informationPEER REVIEW HISTORY ARTICLE DETAILS VERSION 1 - REVIEW. Elizabeth Comino Centre fo Primary Health Care and Equity 12-Aug-2015
PEER REVIEW HISTORY BMJ Open publishes all reviews undertaken for accepted manuscripts. Reviewers are asked to complete a checklist review form (http://bmjopen.bmj.com/site/about/resources/checklist.pdf)
More informationSummary of treatment benefits
Risk Management Plan PEMETREXED Powder for concentrate for Solution for infusion Pemetrexed is also indicated as monotherapy for the maintenance treatment of locally advanced or metastatic non small cell
More informationFINDING SUBGROUPS OF ENHANCED TREATMENT EFFECT. Jeremy M G Taylor Jared Foster University of Michigan Steve Ruberg Eli Lilly
FINDING SUBGROUPS OF ENHANCED TREATMENT EFFECT Jeremy M G Taylor Jared Foster University of Michigan Steve Ruberg Eli Lilly 1 1. INTRODUCTION and MOTIVATION 2. PROPOSED METHOD Random Forests Classification
More informationThe Kaplan-Meier Plot. Olaf M. Glück
The Kaplan-Meier Plot 1 Introduction 2 The Kaplan-Meier-Estimator (product limit estimator) 3 The Kaplan-Meier Curve 4 From planning to the Kaplan-Meier Curve. An Example 5 Sources & References 1 Introduction
More informationSimple Linear Regression Inference
Simple Linear Regression Inference 1 Inference requirements The Normality assumption of the stochastic term e is needed for inference even if it is not a OLS requirement. Therefore we have: Interpretation
More informationUsing splines in regression
Using splines in regression Author: Nicholas G Reich, Jeff Goldsmith This material is part of the statsteachr project Made available under the Creative Commons Attribution-ShareAlike 3.0 Unported License:
More informationStudy Design and Statistical Analysis
Study Design and Statistical Analysis Anny H Xiang, PhD Department of Preventive Medicine University of Southern California Outline Designing Clinical Research Studies Statistical Data Analysis Designing
More informationIf several different trials are mentioned in one publication, the data of each should be extracted in a separate data extraction form.
General Remarks This template of a data extraction form is intended to help you to start developing your own data extraction form, it certainly has to be adapted to your specific question. Delete unnecessary
More informationA Handbook of Statistical Analyses Using R. Brian S. Everitt and Torsten Hothorn
A Handbook of Statistical Analyses Using R Brian S. Everitt and Torsten Hothorn CHAPTER 6 Logistic Regression and Generalised Linear Models: Blood Screening, Women s Role in Society, and Colonic Polyps
More informationRegression and Programming in R. Anja Bråthen Kristoffersen Biomedical Research Group
Regression and Programming in R Anja Bråthen Kristoffersen Biomedical Research Group R Reference Card http://cran.r-project.org/doc/contrib/short-refcard.pdf Simple linear regression Describes the relationship
More informationR Graphics II: Graphics for Exploratory Data Analysis
UCLA Department of Statistics Statistical Consulting Center Irina Kukuyeva ikukuyeva@stat.ucla.edu April 26, 2010 Outline 1 Summary Plots 2 Time Series Plots 3 Geographical Plots 4 3D Plots 5 Simulation
More informationPackage SurvCorr. February 26, 2015
Type Package Title Correlation of Bivariate Survival Times Version 1.0 Date 2015-02-25 Package SurvCorr February 26, 2015 Author Meinhard Ploner, Alexandra Kaider and Georg Heinze Maintainer Georg Heinze
More informationGuide to Biostatistics
MedPage Tools Guide to Biostatistics Study Designs Here is a compilation of important epidemiologic and common biostatistical terms used in medical research. You can use it as a reference guide when reading
More informationExcel 2003 Tutorials - Video File Attributes
Using Excel Files 18.00 2.73 The Excel Environment 3.20 0.14 Opening Microsoft Excel 2.00 0.12 Opening a new workbook 1.40 0.26 Opening an existing workbook 1.50 0.37 Save a workbook 1.40 0.28 Copy a workbook
More informationVignette for survrm2 package: Comparing two survival curves using the restricted mean survival time
Vignette for survrm2 package: Comparing two survival curves using the restricted mean survival time Hajime Uno Dana-Farber Cancer Institute March 16, 2015 1 Introduction In a comparative, longitudinal
More informationTests for Two Survival Curves Using Cox s Proportional Hazards Model
Chapter 730 Tests for Two Survival Curves Using Cox s Proportional Hazards Model Introduction A clinical trial is often employed to test the equality of survival distributions of two treatment groups.
More informationMultivariate Logistic Regression
1 Multivariate Logistic Regression As in univariate logistic regression, let π(x) represent the probability of an event that depends on p covariates or independent variables. Then, using an inv.logit formulation
More informationPackage MDM. February 19, 2015
Type Package Title Multinomial Diversity Model Version 1.3 Date 2013-06-28 Package MDM February 19, 2015 Author Glenn De'ath ; Code for mdm was adapted from multinom in the nnet package
More informationespecially with continuous
Handling interactions in Stata, especially with continuous predictors Patrick Royston & Willi Sauerbrei German Stata Users meeting, Berlin, 1 June 2012 Interactions general concepts General idea of a (two-way)
More information11. Analysis of Case-control Studies Logistic Regression
Research methods II 113 11. Analysis of Case-control Studies Logistic Regression This chapter builds upon and further develops the concepts and strategies described in Ch.6 of Mother and Child Health:
More informationUsing Open Source Software to Teach Mathematical Statistics p.1/29
Using Open Source Software to Teach Mathematical Statistics Douglas M. Bates bates@r-project.org University of Wisconsin Madison Using Open Source Software to Teach Mathematical Statistics p.1/29 Outline
More informationData Analysis, Research Study Design and the IRB
Minding the p-values p and Quartiles: Data Analysis, Research Study Design and the IRB Don Allensworth-Davies, MSc Research Manager, Data Coordinating Center Boston University School of Public Health IRB
More informationTitle: Detection of erbb2 copy number variations in plasma of patients with esophageal carcinoma.
Author's response to reviews Title: Detection of erbb2 copy number variations in plasma of patients with esophageal carcinoma. Authors: Immacolata Andolfo (andolfo@ceinge.unina.it) Giuseppe Petrosino (petrosino@ceinge.unina.it)
More informationKaplan-Meier Plot. Time to Event Analysis Diagnostic Plots. Outline. Simulating time to event. The Kaplan-Meier Plot. Visual predictive checks
1 Time to Event Analysis Diagnostic Plots Nick Holford Dept Pharmacology & Clinical Pharmacology University of Auckland, New Zealand 2 Outline The Kaplan-Meier Plot Simulating time to event Visual predictive
More informationGamma Distribution Fitting
Chapter 552 Gamma Distribution Fitting Introduction This module fits the gamma probability distributions to a complete or censored set of individual or grouped data values. It outputs various statistics
More informationAppendix G STATISTICAL METHODS INFECTIOUS METHODS STATISTICAL ROADMAP. Prepared in Support of: CDC/NCEH Cross Sectional Assessment Study.
Appendix G STATISTICAL METHODS INFECTIOUS METHODS STATISTICAL ROADMAP Prepared in Support of: CDC/NCEH Cross Sectional Assessment Study Prepared by: Centers for Disease Control and Prevention National
More informationLongitudinal Data Analysis: Stata Tutorial
Part A: Overview of Stata I. Reading Data: Longitudinal Data Analysis: Stata Tutorial use Read data that have been saved in Stata format. infile Read raw data and dictionary files. insheet Read spreadsheets
More informationA Comparison of Decision Tree and Logistic Regression Model Xianzhe Chen, North Dakota State University, Fargo, ND
Paper D02-2009 A Comparison of Decision Tree and Logistic Regression Model Xianzhe Chen, North Dakota State University, Fargo, ND ABSTRACT This paper applies a decision tree model and logistic regression
More informationComparative genomic hybridization Because arrays are more than just a tool for expression analysis
Microarray Data Analysis Workshop MedVetNet Workshop, DTU 2008 Comparative genomic hybridization Because arrays are more than just a tool for expression analysis Carsten Friis ( with several slides from
More informationTime Series Analysis with R - Part I. Walter Zucchini, Oleg Nenadić
Time Series Analysis with R - Part I Walter Zucchini, Oleg Nenadić Contents 1 Getting started 2 1.1 Downloading and Installing R.................... 2 1.2 Data Preparation and Import in R.................
More informationBIOL 933 Lab 6 Fall 2015. Data Transformation
BIOL 933 Lab 6 Fall 2015 Data Transformation Transformations in R General overview Log transformation Power transformation The pitfalls of interpreting interactions in transformed data Transformations
More informationImpact / Performance Matrix A Strategic Planning Tool
Impact / Performance Matrix A Strategic Planning Tool Larry J. Seibert, Ph.D. When Board members and staff convene for strategic planning sessions, there are a number of questions that typically need to
More informationIntroduction to Observational studies Dr. Javaria Gulzar Clinical Research Associate SCRC.
Introduction to Observational studies Dr. Javaria Gulzar Clinical Research Associate SCRC. Observational Study A study in which a researcher simply observes behavior in a systemic manner with out any active
More informationPlease be cognizant of whether you are using a public pad or private/team pad, and take appropriate precautions with data you post here!
Summer 2015 Python Notes (copied from MoPad) NOTE: These are rough notes taken in/by Python Group. Use with caution as they may contain errors! If you find an error (or have a better solution), we would
More informationStatistics in Medicine Research Lecture Series CSMC Fall 2014
Catherine Bresee, MS Senior Biostatistician Biostatistics & Bioinformatics Research Institute Statistics in Medicine Research Lecture Series CSMC Fall 2014 Overview Review concept of statistical power
More informationLinear Discriminant Analysis
Fiche TD avec le logiciel : course5 Linear Discriminant Analysis A.B. Dufour Contents 1 Fisher s iris dataset 2 2 The principle 5 2.1 Linking one variable and a factor.................. 5 2.2 Linking a
More informationExploratory Data Analyses
5 Exploratory Data Analyses 5.1 Introduction What do time series data look like? The purpose of this chapter is to provide a number of different answers to this question. In addition, we outline the rudiments
More informationInstructions for applying data validation(s) to data fields in Microsoft Excel
1 of 10 Instructions for applying data validation(s) to data fields in Microsoft Excel According to Microsoft Excel, a data validation is used to control the type of data or the values that users enter
More informationLecture 2: Descriptive Statistics and Exploratory Data Analysis
Lecture 2: Descriptive Statistics and Exploratory Data Analysis Further Thoughts on Experimental Design 16 Individuals (8 each from two populations) with replicates Pop 1 Pop 2 Randomly sample 4 individuals
More informationPackage cgdsr. August 27, 2015
Type Package Package cgdsr August 27, 2015 Title R-Based API for Accessing the MSKCC Cancer Genomics Data Server (CGDS) Version 1.2.5 Date 2015-08-25 Author Anders Jacobsen Maintainer Augustin Luna
More informationOrdinal Regression. Chapter
Ordinal Regression Chapter 4 Many variables of interest are ordinal. That is, you can rank the values, but the real distance between categories is unknown. Diseases are graded on scales from least severe
More informationMethods for Meta-analysis in Medical Research
Methods for Meta-analysis in Medical Research Alex J. Sutton University of Leicester, UK Keith R. Abrams University of Leicester, UK David R. Jones University of Leicester, UK Trevor A. Sheldon University
More informationAdvanced Microsoft Excel 2010
Advanced Microsoft Excel 2010 Table of Contents THE PASTE SPECIAL FUNCTION... 2 Paste Special Options... 2 Using the Paste Special Function... 3 ORGANIZING DATA... 4 Multiple-Level Sorting... 4 Subtotaling
More informationA short course in Longitudinal Data Analysis ESRC Research Methods and Short Course Material for Practicals with the joiner package.
A short course in Longitudinal Data Analysis ESRC Research Methods and Short Course Material for Practicals with the joiner package. Lab 2 - June, 2008 1 jointdata objects To analyse longitudinal data
More informationHow To Model The Fate Of An Animal
Models Where the Fate of Every Individual is Known This class of models is important because they provide a theory for estimation of survival probability and other parameters from radio-tagged animals.
More informationNon-Parametric Tests (I)
Lecture 5: Non-Parametric Tests (I) KimHuat LIM lim@stats.ox.ac.uk http://www.stats.ox.ac.uk/~lim/teaching.html Slide 1 5.1 Outline (i) Overview of Distribution-Free Tests (ii) Median Test for Two Independent
More informationGeneral Method: Difference of Means. 3. Calculate df: either Welch-Satterthwaite formula or simpler df = min(n 1, n 2 ) 1.
General Method: Difference of Means 1. Calculate x 1, x 2, SE 1, SE 2. 2. Combined SE = SE1 2 + SE2 2. ASSUMES INDEPENDENT SAMPLES. 3. Calculate df: either Welch-Satterthwaite formula or simpler df = min(n
More informationDescribing and presenting data
Describing and presenting data All epidemiological studies involve the collection of data on the exposures and outcomes of interest. In a well planned study, the raw observations that constitute the data
More information