R语言实现SQL中的联结
##在Windows环境下运行
rm(list=ls())
getwd()
setwd("F:\\Data")
##读取数据-检查变量--注意数据中是否会出现#,以及fill的使用与否
studentData <- read.table(file= "F:\\Data\\student.txt" ,sep="\t", stringsAsFactors=FALSE)
classData <- read.table(file= "F:\\Data\\class.txt" ,sep="\t", stringsAsFactors=FALSE)
str(studentData )
str(classData )
##去除科学计数法
studentData$V1 <- format(studentData$V1,scientific=FALSE)
classData$V1 <- format(classData$V1,scientific=FALSE)
##在连接之前,估算可能的数据边界
##自身有没有重复
length(unique(studentData$V1))
length(unique(classData$V1))
##两个连接的数据的相互差集和交集
data01 <- setdiff(as.vector(as.matrix(studentData$V1)),as.vector(as.matrix(classData$V1)) )
str(data01)
data02 <- setdiff(as.vector(as.matrix(classData$V1)),as.vector(as.matrix(studentData$V1)) )
str(data02)
allData <- intersect(as.vector(as.matrix(classData$V1)),as.vector(as.matrix(studentData$V1)) )
str(allData )
##连接
##如果要实现类似sql里面的inner join 功能,则用代码
m1 <- merge(studentData, classData, by.x = "V1", by.y = "V1")
str(m1)
##如果要实现left join功能则用代码
m2 <- merge(studentData, classData, by.x = "V1", by.y = "V1",all.x=TRUE)
##right join功能代码
m3 <- merge(studentData, classData, by.x = "V1", by.y = "V1",all.y=TRUE)
##all join功能代码
m4 <- merge(studentData, classData, by.x = "V1", by.y = "V1",all=TRUE)
参考:
R语言merge函数总结 http://bbs.pinggu.org/thread-3234639-1-1.html