lm.predictions.a1 <- predict(final.lm,clean.algae) algae <- algae[-manyNAs(algae),] clean.algae <- knnImputation(algae,k=10) clean.algae lm.a1 <- lm(a1~.,data=clean.algae[,1:12]) lm.a1 summary(lm.a1) anova(lm.a1) lm2.a1 <- update(lm.a1,.~.-season) lm2.a1 summary(lm2.a1) anova(lm.a1,lm2.a1) final.lm <- step(lm.a1) summary(final.lm) ####回归树 library(rpart) library(DMwR)#加载这个包,才会有海藻数据集 data(algae) algae <- algae[-manyNAs(algae),] rt.a1 <- rpart(a1~.,data=algae[,1:12]) rt.a1 prettyTree(rt.a1) printcp(rt.a1) rt2.a1 <- prune(rt.a1,cp=0.08) rt2.a1 first.tree <- rpart(a1~.,data=algae[,1:12]) snip.rpart(first.tree,c(4,7)) rt.a1 <- rpartXse(a1~.,data=algae[,1:12]) a rt.a1 prettyTree(first.tree) snip.rpart(first.tree) lm.predictions.a1 <- predict(final.lm,clean.algae) summary(lm.a1) anova(lm.a1) lm2.a1 <- update(lm.a1,.~.-season) summary(lm2.a1) anova(lm.a1,lm2.a1) anova(lm2.a1) #不断重复这个过程,直到没有可剔除的候选系数 final.lm <- step(lm.a1) summary(final.lm) library(rpart) data(algae) algae <- algae[-manyNAs(algae),] rt.a1 <- rpart(a1~.,data=algae[,1:12]) rt.a1 prettyTree(rt.a1) printcp(rt.a1) rt2.a1 <- prune(rt.a1,cp=0.08) rt2.a1 rt.a1 <- rpartXse(a1~.,data=algae[,1:12]) rt.a1 first.tree <- rpart(a1~.,data=algae[,1:12]) snip.rpart(first.tree,c(4,7)) my.tree <- snip.rpart(first.tree,c(4,7)) my.tree prettyTree(first.tree) snip.rpart(first.tree) q() rt.predictions.a1 <- predict(rt.a1,algae) (mae.a1.lm <- mean(abs(lm.predictions.a1-algae[,"a1"]))) (mae.a1.rt <- mean(abs(rt.predictions.a1-algae[,"a1"]))) #均方误差 (mse.a1.lm <- mean((lm.predictions.a1-algae[,"a1"])^2)) (mse.a1.rt <- mean((rt.predictions.a1-algae[,"a1"])^2)) #平均绝对误差 NMSE (nmse.a1.lm <- mean((lm.predictions.a1-algae[,‘a1‘])^2)/mean((mean(algae[,‘a1‘])-algae[,‘a1‘])^2)) (nmse.a1.rt <- mean((rt.predictions.a1-algae[,‘a1‘])^2)/mean((mean(algae[,‘a1‘])-a1gae[,‘a1‘])^2)) old.par <- par(mfrow=c(1,2) plot(lm.predictions.a1,clean.algae[,"a1"],main="Linear Model",xlab="Predictions",ylab="True Values") lm.predictions.a1 algae[,"a1"] abline(0,1,lty=2) plot(rt.predictions.a1,clean.algae[,"a1"],main="Regression Tree",xlab="Predictions",ylab="True Values") abline(0,1,lty=2) par(old.par) algae[identify(lm.predictions.a1,clean.algae[,"a1"]),] sensible.lm.preictions.a1 <- ifelse(lm.predictions.a1<0,0,lm.predictions.a1)
时间: 2024-09-29 09:30:04