In this exercise you will:
You need to have installed R, RStudio, and the necessary packages for
the course, including the ELMER
package. See how to
get set up for this course
data(BabyWeights, package="ELMER")
str(BabyWeights)
'data.frame': 24 obs. of 3 variables:
$ Age : int 40 36 40 38 42 39 40 37 36 38 ...
$ Weight: int 3317 2729 2935 2754 3210 2817 3126 2539 2412 2991 ...
$ Gender: Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 1 ...
As per the example in ELMER, you need to fit five models. Make sure you label them carefully to match ELMER.
= lm(Weight~1, data=BabyWeights)
Baby.lm1 = lm(Weight~Gender, data=BabyWeights)
Baby.lm2 = lm(Weight~Age, data=BabyWeights)
Baby.lm3 = lm(Weight~Age+Gender, data=BabyWeights)
Baby.lm4 coef(Baby.lm4) # needed later
(Intercept) Age GenderMale
-1773.3218 120.8943 163.0393
= lm(Weight~Age*Gender, data=BabyWeights) Baby.lm5
= data.frame(expand.grid(Age=c(36, 38, 40,42), Gender=c("Female", "Male")))
NewBabies = NewBabies |> mutate(Preds1=predict(Baby.lm1, NewBabies), Preds2=predict(Baby.lm2, NewBabies),
NewBabies Preds3=predict(Baby.lm3, NewBabies), Preds4=predict(Baby.lm4, NewBabies),
Preds5=predict(Baby.lm5, NewBabies))
N.B. You should try to do this using ggplot()
if you
can.
= BabyWeights |> summarise(Weight=mean(Weight))
OverallSum OverallSum
Weight
1 2967.667
= BabyWeights |> filter(Gender=="Female") |> summarise(Weight=mean(Weight))
FemaleSum FemaleSum
Weight
1 2911.333
= BabyWeights |> filter(Gender=="Male") |> summarise(Weight=mean(Weight))
MaleSum MaleSum
Weight
1 3024
Starting with:
= BabyWeights |> ggplot(aes(y=Weight,x=Age)) + geom_point(aes(col=Gender)) Graph0
+geom_hline(yintercept=OverallSum$Weight) Graph0
+geom_hline(yintercept=FemaleSum$Weight, col="red") + geom_hline(yintercept=MaleSum$Weight, col="blue") Graph0
+geom_smooth(method="lm", se=FALSE) Graph0
`geom_smooth()` using formula 'y ~ x'
+geom_abline(slope=coef(Baby.lm4)[2], intercept=coef(Baby.lm4)[1], col="red")+geom_abline(slope=coef(Baby.lm4)[2], intercept=coef(Baby.lm4)[1]+coef(Baby.lm4)[3], col="blue") Graph0
+geom_smooth(method="lm", aes(group=Gender)) Graph0
`geom_smooth()` using formula 'y ~ x'
|> kable(caption="Fitted values from five models fitted to the Baby Weights data.", col.names=c("Age", "Gender", paste("Model",1:5)) ) NewBabies
Age | Gender | Model 1 | Model 2 | Model 3 | Model 4 | Model 5 |
---|---|---|---|---|---|---|
36 | Female | 2967.667 | 2911.333 | 2674.032 | 2578.874 | 2552.733 |
38 | Female | 2967.667 | 2911.333 | 2905.089 | 2820.663 | 2813.533 |
40 | Female | 2967.667 | 2911.333 | 3136.145 | 3062.451 | 3074.333 |
42 | Female | 2967.667 | 2911.333 | 3367.202 | 3304.240 | 3335.133 |
36 | Male | 2967.667 | 3024.000 | 2674.032 | 2741.913 | 2762.707 |
38 | Male | 2967.667 | 3024.000 | 2905.089 | 2983.702 | 2986.672 |
40 | Male | 2967.667 | 3024.000 | 3136.145 | 3225.491 | 3210.638 |
42 | Male | 2967.667 | 3024.000 | 3367.202 | 3467.279 | 3434.603 |