R Commander交互作业–万捷–11307100021

演示视频网址(实在弄不成高清,请老师见谅):

http://v.youku.com/v_show/id_XODU1NTA0NDEy.html

 

 

Html报告输出文档脚本注释:
> library(foreign, pos=14) #加载R包,以下省略注释

> Dataset <-
+ read.spss("C:/Users/wan/Desktop/GREEN&SALKIND(2005).Data/Lesson 33.Multiple Regression/Lesson 33 Exercise File 1.sav",
+ use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) #导入数据到Dataset1

> colnames(Dataset) <- tolower(colnames(Dataset))

> library(relimp, pos=15)

> showData(Dataset, placement='-20+200', font=getRcmdr('logFont'), maxwidth=80, maxheight=30) #检视数据集

> editDataset(Dataset) #编辑数据集

> scatterplotMatrix(~eng_gpa+math_gpa+othr_gpa, reg.line=lm, smooth=TRUE, spread=FALSE, span=0.5, id.n=0, diagonal =
+ 'density', data=Dataset) #画出eng_gpa,math_gpa,othr_gpa的散点矩阵图

散点矩阵图

> with(Dataset, Hist(statexam, scale="frequency", breaks="Sturges", col="darkgray")) #画出直方图,变量为statexam

直方图

>RegModel.1 <- lm(statexam~math_gpa+mathtest, data=Dataset) #建立线性回归模型1

> summary(RegModel.1) #模型1概览

Call:
lm(formula = statexam ~ math_gpa + mathtest, data = Dataset)

Residuals:
Min 1Q Median 3Q Max
-40.643 -13.824 0.086 12.655 33.441

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.0098 16.9128 -0.296 0.768
math_gpa 3.8220 6.2443 0.612 0.542
mathtest 0.1183 0.0244 4.850 4.71e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 17.46 on 97 degrees of freedom
Multiple R-squared: 0.2374, Adjusted R-squared: 0.2217
F-statistic: 15.1 on 2 and 97 DF, p-value: 1.951e-06
> RegModel.2 <- lm(statexam~eng_gpa+engtest+math_gpa+mathtest+othr_gpa, data=Dataset) #建立线性回归模型2
> summary(RegModel.2) #模型2概览

Call:
lm(formula = statexam ~ eng_gpa + engtest + math_gpa + mathtest +
othr_gpa, data = Dataset)

Residuals:
Min 1Q Median 3Q Max
-38.841 -14.726 0.093 13.452 33.126

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.74484 27.69089 0.244 0.8081
eng_gpa -3.36491 7.44624 -0.452 0.6524
engtest 0.04942 0.02721 1.816 0.0725 .
math_gpa 5.47788 6.86508 0.798 0.4269
mathtest 0.11580 0.02451 4.726 8.03e-06 ***
othr_gpa -9.70152 8.49977 -1.141 0.2566
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 17.36 on 94 degrees of freedom
Multiple R-squared: 0.2691, Adjusted R-squared: 0.2302
F-statistic: 6.922 on 5 and 94 DF, p-value: 1.527e-05
> anova(RegModel.1, RegModel.2) #比较模型1与模型2
Analysis of Variance Table

Model 1: statexam ~ math_gpa + mathtest
Model 2: statexam ~ eng_gpa + engtest + math_gpa + mathtest + othr_gpa
Res.Df RSS Df Sum of Sq F Pr(>F)
1 97 29561
2 94 28333 3 1228 1.358 0.2605

> cor(Dataset[,c("engtest","mathtest")], use="complete") #做出engtest与mathtest的协方差矩阵
engtest mathtest
engtest 1.0000000 0.1213693
mathtest 0.1213693 1.0000000

> library(abind, pos=16)

> library(e1071, pos=17)

> numSummary(Dataset[,"mathtest"], statistics=c("mean", "sd", "IQR", "quantiles"), quantiles=c(0,.25,.5,.75,1)) #概览变量mathtest
mean sd IQR 0% 25% 50% 75% 100% n
460.6 77.36598 92.5 310 410 450 502.5 650 100

> Dataset2 <-
+ read.spss("C:/Users/wan/Desktop/GREEN&SALKIND(2005).Data/Lesson 25.Two-Way ANOVA/Lesson 25 Data File 1.sav",
+ use.value.labels=TRUE, max.value.labels=Inf, to.data.frame=TRUE) #导入数据集到Dataset2

> colnames(Dataset2) <- tolower(colnames(Dataset2))

> showData(Dataset2, placement='-20+200', font=getRcmdr('logFont'), maxwidth=80, maxheight=30) #检视数据集

> t.test(gpaimpr~gender, alternative='two.sided', conf.level=.95, var.equal=FALSE, data=Dataset2) #进行gender的独立两组t检验
Welch Two Sample t-test

data: gpaimpr by gender
t = 3.1039, df = 51.894, p-value = 0.00309
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.06598044 0.30735290
sample estimates:
mean in group Men mean in group Women
0.3800000 0.1933333
> library(mvtnorm, pos=18)

> library(survival, pos=18)

> library(TH.data, pos=18)

> library(multcomp, pos=18)

> AnovaModel.3 <- aov(gpaimpr ~ method, data=Dataset2) #进行method的单变量anova分析

> summary(AnovaModel.3)
Df Sum Sq Mean Sq F value Pr(>F)
method 2 1.174 0.5870 13.41 1.69e-05 ***
Residuals 57 2.495 0.0438
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

> with(Dataset2, numSummary(gpaimpr, groups=method, statistics=c("mean", "sd")))
mean sd data:n
Method 1 0.2525 0.2185328 20
Method 2 0.4725 0.2489319 20
Control 0.1350 0.1469873 20

> AnovaModel.4 <- (lm(gpaimpr ~ gender*method, data=Dataset2)) #进行method与gender的双变量anova分析

> Anova(AnovaModel.4)
Anova Table (Type II tests)

Response: gpaimpr
Sum Sq Df F value Pr(>F)
gender 0.52267 1 15.8562 0.0002058 ***
method 1.17408 2 17.8091 1.148e-06 ***
gender:method 0.19258 2 2.9212 0.0624289 .
Residuals 1.78000 54
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

> with(Dataset2, (tapply(gpaimpr, list(gender, method), mean, na.rm=TRUE))) # means
Method 1 Method 2 Control
Men 0.335 0.640 0.165
Women 0.170 0.305 0.105

> with(Dataset2, (tapply(gpaimpr, list(gender, method), sd, na.rm=TRUE))) # std. deviations
Method 1 Method 2 Control
Men 0.2285826 0.1776388 0.1491643
Women 0.1828782 0.1921371 0.1461544

> with(Dataset2, (tapply(gpaimpr, list(gender, method), function(x) sum(!is.na(x))))) # counts
Method 1 Method 2 Control
Men 10 10 10
Women 10 10 10

 

 

R的plotEx.data.frame(…)函数编写与示范–万捷–11307100021

plotEx函数编写代码(截图版是为了让结构看上去更清晰,下面有文字版)

plotEx函数

函数示范:

输入下列plotEx函数代码以后,再加载之后的示范代码。

示范用的数据是Using SPSS for windows 第25课的内容,有group(离散),gender(离散),gpaimpr(连续)三个变量。

若无法在线加载,请手动下载附件导入。

代码中的数据集d1代表单个离散变量的演示,

d2代表离散~连续,

d3代表离散~离散,

依次使用plotEx函数可得到对应图象。

图象见本文末。

plotEx <- function (x, ...)

{
plot2 <- function(x, xlab = names(x)[1L], ylab = names(x)[2L],  ...)

plot(x[[1L]], x[[2L]], xlab = xlab, ylab = ylab, ...)

if (!is.data.frame(x))
stop("'plot.data.frame' applied to non data frame")
if (ncol(x) == 1)
{
x1 <- x[[1L]]
cl <- class(x1)
if (cl %in% c("integer", "numeric"))
{
if(length(unique(x1))<=5) # 单变量离散
{
leibie=factor(x1)
pie(tapply(x1,leibie,length))
}
else
stripchart(x1, ...)
}
else plot(x1, ...)
}
else if (ncol(x) == 2) # 双变量情况下若有离散变量,默认第一列变量为离散变量
{
x1 <- x[[1L]]
cl <- class(x1)
x2 <- x[[2L]]
c2 <- class(x2)
if ((cl %in% c("integer", "numeric")) & (c2 %in% c("integer", "numeric")) )
{
if(length(unique(x1))<=5)
{
if(length(unique(x2))>5) # 离散~连续
{
boxplot(x2~x1,data=x,xlab = names(x)[1L], ylab = names(x)[2L],varwidth=TRUE,col="red")
}
else #离散~离散
{
mosaic(ta<-table(x1,x2))
mosaic(ta,direction="v")
mosaic(t(ta))
}
}
else plot2(x,...)
}
else plot2(x, ...)
}
else {
pairs(data.matrix(x), ...)
}
}

Dataset <- read.csv(file="http://fudan.lxxm.com/u11307100021/wp-content/uploads/sites/30/2014/12/GPA-data.csv")
library(vcd)
attach(Dataset)
d1 <- data.frame(gender)
d2 <- data.frame(group,gpaimpr)
d3 <- data.frame(group,gender)
plotEx(d1)
plotEx(d2)
plotEx(d3)

作图plotEx函数

GPA data

R语言作图作业

R语言作图作业–万捷–11307100021

R语言作图作业

代码如下

EuroHPI <- read.csv(file="http://fudan.lxxm.com/u11307100021/wp-content/uploads/sites/30/2014/12/EuroHPIinput.csv")

attach(EuroHPI)
par(pin=c(5,5),cex=0.8,pch=15)
plot(ZGDP,ZHPI,xlab="人均GDP在世界中的标准化值",ylab="HPI在
世界中的标准化值",col.lab="blue",cex.lab=1.1)
abline(lm(ZHPI~ZGDP))
title(main="Regression of ZHPI on ZGDP of Euro
Countries",col.main="blue")
text(ZGDP,ZHPI,Country,col="red",cex=0.9,pos=4)
idx <- identify(ZGDP,ZHPI, label=HPI,pos=1)
EuroHPI[idx,]

 

若无法在线读取数据可下载本文末尾的Excel文件--EuroHPI,并在R Commander中使用汇入资料功能导入,选中EuroHPI sheet, 并将该数据集命名为EuroHPI。之后的代码从attach部分开始执行。

原数据中各欧洲国家的ZGDP,ZHPI是将HPI报告中统计的世界所有国家的人均GDP、HPI指数,利用SPSS进行标准化后得到的Z值。原数据可见附件Whole HPI Data sheet。标准化是为了去除变量变化范围不一致以便于作图。

请注意在执行代码后将图放大。

下图为执行代码后的效果图。通过点击图例可以出现每个国家的HPI指数

R语言作图作业

 

 

原数据文件:

Excel 原数据:EuroHPI

在线导入:EuroHPIinput