1. Run the following code to generate a sample of size 50 from the Student t distribution with degree of freedom 30, and then:

set.seed(57)
x <- rt(n=50,df=30)

Solution:

hist(x, breaks=15)  ## a  histogram of sample x
summary(x)    ##  summary statistics of x
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -2.14200 -0.91750  0.03698 -0.06852  0.60500  2.11500
car::qqPlot(x)  ## Q-Q plot with 95% CI for x

shapiro.test(x)  ## Shapiro-Wilk normality test
## 
##  Shapiro-Wilk normality test
## 
## data:  x
## W = 0.96843, p-value = 0.1996

2. Run the following simulation code first and then:

set.seed(1234)
x <- rnorm(1000, 3, 0.5)
y <- 3-5*x+3*rnorm(1000)

Solution:

cor.test(x,y)  ## Pearson correlation 
## 
##  Pearson's product-moment correlation
## 
## data:  x and y
## t = -25.01, df = 998, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6574083 -0.5810789
## sample estimates:
##        cor 
## -0.6207122
plot(x,y)  ## scatter plot

3. Run the following simulation code first and then choose an appropriate test to compare the mean of sample x1 and x2. And then check the sample sizes.

set.seed(57)
x1 <- rnorm(30, 2.2, 1)
x2 <- rnorm(30, 2.0, 1)

Solution:

shapiro.test(x1)  ## Shapiro-Wilk normality test
## 
##  Shapiro-Wilk normality test
## 
## data:  x1
## W = 0.96923, p-value = 0.5183
shapiro.test(x2)
## 
##  Shapiro-Wilk normality test
## 
## data:  x2
## W = 0.95732, p-value = 0.2642
t.test(x1, x2)  ## two-sample t test
## 
##  Welch Two Sample t-test
## 
## data:  x1 and x2
## t = 0.39001, df = 54.354, p-value = 0.6981
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5071571  0.7521677
## sample estimates:
## mean of x mean of y 
##  2.268637  2.146132
length(x1)  ## check sample sizes
## [1] 30
length(x2)
## [1] 30

4. Run the following simulation first and then:

set.seed(1234)
r <- runif(1000, -pi,pi)
x <- 16*sin(r)^3
y <- 13*cos(r)-5*cos(2*r)-2*cos(3*r)-cos(4*r)+rnorm(1000)

Solution:

cor.test(x,y)  ## test linear association using Pearson correlation
## 
##  Pearson's product-moment correlation
## 
## data:  x and y
## t = -0.45893, df = 998, p-value = 0.6464
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07644984  0.04751026
## sample estimates:
##        cor 
## -0.0145256
plot(x,y)   ## scatter plot

5. Run the following simulation code first and then:

set.seed(1234)
x <- rnorm(1000, 5, 2)
y <- 3+5*x+rnorm(1000)

Solution:

fit <- lm(y~x)  
summary(fit)   ## regress y on x
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1661 -0.6439  0.0145  0.6537  3.0684 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.87671    0.08291    34.7   <2e-16 ***
## x            5.02786    0.01555   323.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9801 on 998 degrees of freedom
## Multiple R-squared:  0.9905, Adjusted R-squared:  0.9905 
## F-statistic: 1.046e+05 on 1 and 998 DF,  p-value: < 2.2e-16
plot(fit)   ## model diagnostic plots

hist(x, breaks=200, col="blue")   ##  histgram of x

mean(x)   ## mean of x
## [1] 4.946806
hist(y, breaks=200, col="blue")   ##  histgram of y

mean(y)   ## mean of y
## [1] 27.74854
3+5*mean(x)+0   ## relation between mean values
## [1] 27.73403
cor.test(x,y)  ## Pearson correlation
## 
##  Pearson's product-moment correlation
## 
## data:  x and y
## t = 323.42, df = 998, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9946388 0.9958151
## sample estimates:
##       cor 
## 0.9952633

6. Run the following code to attach a dataset first. Then test the linear association between variable HEARTRTE and variable BMI while adjusting for variable age effect on HEARTRTE.

data("DIGdata", package="asympTest")
attach(DIGdata)

Solution:

summary(HEARTRTE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   40.00   70.00   80.00   78.76   88.00  156.00       8
summary(AGE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   21.00   57.00   65.00   63.48   71.00   94.00
summary(BMI)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   14.44   23.68   26.50   27.11   29.80   62.66       1
fit <- lm(HEARTRTE ~ AGE+BMI)  ## Run multivariate regression
summary(fit)
## 
## Call:
## lm(formula = HEARTRTE ~ AGE + BMI)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.208  -8.638   0.529   8.877  77.729 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 75.44503    1.21070  62.315   <2e-16 ***
## AGE          0.02330    0.01406   1.657   0.0975 .  
## BMI          0.06767    0.02957   2.289   0.0221 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.65 on 6788 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.001172,   Adjusted R-squared:  0.000878 
## F-statistic: 3.984 on 2 and 6788 DF,  p-value: 0.01866
## model diagnostics
plot(fit)