为提升研究的严谨性和可追溯性,数据分析报告的自动化被越来越多的研究者关注。
在实际应用中,自动化报告的好处在于节省大量粘贴复制、排版的时间,还能避免人工整理的误差,应用也是越来越广泛。
有时,数据分析报告强调即时性,通过自动化报告算法可以达成这一目的。
本文将以一项简单的现场评分分析任务为例,讲述如何运用R与Latex的结合,快速自动化构建PDF报告。
任务:要求评分员对45份(已提前分好类,好中差各有15份)由6个Item构成的试卷作答进行评分,评分完成后立即进行直方图比较分析报告。
####注意,下方语句中的## @knitr load_lib、## @knitr getdata、## @knitr plot在R中虽然表示注释,但不可省略,因为其是R与Latex联系的关键语句####
#------------------------------------------------------------------
## @knitr load_lib
library(plyr) ##载入用于处理数据的R包###
library('ggplot2') ##载入用于画图的R包###
#-----------------------------------------------------------------------------
## @knitr getdata
setwd("C:/...") ###设置分析目录###
data = read.csv("C:/... .csv", header=TRUE) ###载入数据###
##载入的数据共7列,每列有45个数值###
##第1列为数据标签,共有3类标签,分别是"Low level","Mid level", "High level",每个标签15行###
##第2-7列为6个item的评分数据,每列均有45个数值###
#-----------------------------------------------------------------------
## @knitr plot
######以下为画图语句########
for (i in 2:ncol(data)){
mean = tapply(data[,i],data$level,mean)
sd = tapply(data[,i],data$level,sd)
mm = as.data.frame(round(mean,1))
ss = as.data.frame(round(sd,1))
DD = ddply(data[,c(1,i)],.(level),summarize,Volume=length(level))
xx = cbind(DD,mm[,1],ss[,1])
data$level = factor(data$level , levels=c("Low level","Mid level", "High level"), ordered=TRUE)
xx$level = factor(xx$level , levels=c("Low level","Mid level", "High level"), ordered=TRUE)
p = ggplot(data, aes(x = data[,i], fill = level)) +
geom_histogram(binwidth = 10, colour="black") +
facet_grid(level~.,scales = "free") +
labs(y='Count', x=colnames(data)[i])+
scale_x_continuous(breaks=seq(0,100,10)) +
scale_y_continuous(breaks=seq(0,20,2)) +
theme_bw() + theme(panel.grid.minor = element_blank(),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6),
strip.background = element_rect(colour = "black", fill = "white"),
strip.text.y = element_text(face = "bold",size = 12),
axis.text.x = element_text(size = 14,color="black"),
axis.text.y = element_text(size = 15,color="black"),
axis.title.x = element_text(size=14)) +
guides(fill=F)+
geom_text(data=xx,aes(x=60,y=5,label = paste("Mean=",mm[,1],",","SD=",ss[,1])),
check_overlap = TRUE,vjust = 2,size =8)+
scale_fill_manual(values=c("coral", "light blue", "light green"))
print(p)
}
#-----------------------------------------------------------------------
########最终依据每列数据共画出6个图,其中1个图如下########
##图中展示了各个水平的得分分布、平均值和标准差######
#################################################################################
#################以下为Latex语句,用于将R画出的6个直方图在排好版后自动化生成1个PDF报告,文件后缀名为.rnw###########################################
#####################################################################
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% LATEX PACKAGES – MAY NEED .STY FILES IN THE PROJECT FOLDER
% 以下为载入的latex包
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\documentclass[a4paper]{article}
\usepackage[british]{babel}
\usepackage{float}
\usepackage{longtable}
%%\usepackage{numberedbib}
\usepackage{rotating}
\usepackage{fullpage}
\usepackage{lscape}
\usepackage{subcaption} % an alternative package for sub figures
\usepackage{pdflscape}
\usepackage{booktabs}
\usepackage{colortbl, xcolor}
\usepackage{appendix}
\usepackage[sc]{mathpazo}
\usepackage[T1]{fontenc}
\usepackage{geometry}
\geometry{verbose,tmargin=2.5cm,bmargin=2.5cm,lmargin=2.5cm,rmargin=2.5cm}
\setcounter{secnumdepth}{2}
\setcounter{tocdepth}{2}
\usepackage{url}
\usepackage[unicode=true,pdfusetitle,
bookmarks=true,bookmarksnumbered=true,bookmarksopen=true,bookmarksopenlevel=2,
breaklinks=false,pdfborder={0 0 1},backref=false,colorlinks=false]
{hyperref}
\hypersetup{
pdfstartview={XYZ null null 1}}
\usepackage{breakurl}
\usepackage{supertabular}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% DOCUMENT HEADER & FOOTER
% 需将R分析语句存放的目录输入read_chunk('C:/…/… .R')
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{fancyhdr}
\pagestyle{fancy}
\fancyhf{}
%\fancyhead[C]{DRAFT: Not for circulation}
\fancyfoot[C]{Page \thepage}
%--------------- early read external R code ----------------------
<<external_code, echo=FALSE,warning=FALSE, cache=FALSE>>=
read_chunk('C:/.../... .R')
@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% TITLE PAGE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% LOAD R PACKAGES FOR THIS ANALYSIS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
<<load_lib, echo=FALSE, results='hide', warning=FALSE, message=FALSE>>=
@
<<getdata, eval=TRUE, echo=FALSE, message=FALSE,warning=FALSE, results='asis', cache=FALSE>>=
@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% DATA ANALYSIS AND MODELLING
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
<<analysis, eval=TRUE, echo=FALSE, message=FALSE, cache=FALSE, warning=FALSE, results='hide'>>=
@
<<presentation, eval=TRUE,warning=FALSE,echo=FALSE>>=
@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% MAIN REPORT WITH EMBEDDED GRAPHS AND TABLES
% \title{…}用于设定PDF首页置顶居中文章名
% \author{…}用于设定作者名
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title{Plot}
\author{beneplot}
\maketitle\thispagestyle{empty}
<<plot,echo=FALSE,message=FALSE,results='asis',warning=FALSE,results='hide', fig.width =8, fig.height = 6>>=
@
\end{document}
###################################################################################