library(QTLRel)

if(FALSE){
load(“justin.RData”)
rownames(K) <- colnames(K) <- read.csv(“namesK.csv”,header=TRUE)[,1]
#core<- read.csv(“core473list.csv”,header=TRUE)

idx<- match(rownames(K),all_block$ecotypeid)
idx<- is.element(all3$ril,toupper(all_block$line)[idx])
all3Tmp<- all3[idx,]
all3Tmp$ecotypeid<- all_block$ecotypeid[match(all3Tmp$ril,toupper(all_block$line))]
dim(all3Tmp) # 3200 8

######################
# env = (1, 2, 3, 4) #
######################

# gdat and pdat
idx1<- all3Tmp$planting==”September” & all3Tmp$year==2010 & all3Tmp$shelfHeight==”top-middle”
idx2<- all3Tmp$planting==”September” & all3Tmp$year==2010 & all3Tmp$shelfHeight==”bottom-middle”
idx3<- all3Tmp$planting==”September” & all3Tmp$year==2040 & all3Tmp$shelfHeight==”top-middle”
idx4<- all3Tmp$planting==”September” & all3Tmp$year==2040 & all3Tmp$shelfHeight==”bottom-middle”
env<- (idx1*1 + idx2*2 + idx3*3 + idx4*4)
pdat<- rbind(all3Tmp[idx1,], all3Tmp[idx2,], all3Tmp[idx3,], all3Tmp[idx4,])
pdat<- cbind(pdat, env=c(env[idx1],env[idx2],env[idx3],env[idx4]))
gdat<- geno[match(pdat$eco,rownames(geno)),]
gdat<- sweep(gdat,2,gdat[“8279″,],”==”)
rm(list=setdiff(ls(all=TRUE),c(“K”,”gdat”,”pdat”)))

# variance components
for(i in 1:4){
ii<- match(pdat$eco[pdat$env==i],colnames(K))
vcTmp<- estVC(pdat$DTF[pdat$env==i], v=list(AA=K[ii,ii],
DD=NULL,AD=NULL,MH=NULL,HH=NULL,EE=diag(length(ii))))
assign(paste(“vc”,i,sep=””),vcTmp)
}
rm(i,ii)

# genetic matrices
vcMtr.0<- matrix(0,nrow=nrow(pdat),ncol=nrow(pdat))
for(i in 1:4){
idx<- pdat$env==i
vTmp<- K[match(pdat$eco[idx],rownames(K)),match(pdat$eco[idx],colnames(K))]*
get(paste(“vc”,i,sep=””))$par[“AA”]
vcMtr.0[idx,idx]<- vTmp
}
rm(i,idx,vTmp)
vcMtr<- matrix(NA,nrow=nrow(pdat),ncol=nrow(pdat))
for(i in 1:4){
idx1<- pdat$env==i
for(j in 1:4){
idx2<- pdat$env==j
vTmp<- K[match(pdat$eco[idx1],rownames(K)),match(pdat$eco[idx2],colnames(K))]*
sqrt(get(paste(“vc”,i,sep=””))$par[“AA”])*
sqrt(get(paste(“vc”,j,sep=””))$par[“AA”])
vcMtr[idx1,idx2]<- vTmp
}
}
rm(i,j,idx1,idx2,vTmp)

# variance matrix
if(FALSE){#exclude “EE” — June 5, 2013
for(i in 1:4){
diag(vcMtr.0)[pdat$env==i]<-
diag(vcMtr.0)[pdat$env==i] + get(paste(“vc”,i,sep=””))$par[“EE”]
diag(vcMtr)[pdat$env==i]<-
diag(vcMtr)[pdat$env==i] + get(paste(“vc”,i,sep=””))$par[“EE”]
}
rm(i)
}
vcTmp<- estVC(y=pdat$DTF,x=as.factor(pdat$env),
v=list(AA=vcMtr.0,DD=NULL,AD=NULL,MH=NULL,HH=NULL,EE=diag(nrow(pdat))))
pdat$e.0<- pdat$env # recode env as estimated
pdat$e.0[pdat$env==1]<- 0
pdat$e.0[pdat$env==2]<- vcTmp$par[2]
pdat$e.0[pdat$env==3]<- vcTmp$par[3]
pdat$e.0[pdat$env==4]<- vcTmp$par[4]
vc.0<- estVC(y=pdat$DTF,x=pdat$e.0,
v=list(AA=vcMtr.0,DD=NULL,AD=NULL,MH=NULL,HH=NULL,EE=diag(nrow(pdat))))
vcTmp<- estVC(y=pdat$DTF,x=as.factor(pdat$env),
v=list(AA=vcMtr,DD=NULL,AD=NULL,MH=NULL,HH=NULL,EE=diag(nrow(pdat))))
pdat$e<- pdat$env # recode env as estimated
pdat$e[pdat$env==1]<- 0
pdat$e[pdat$env==2]<- vcTmp$par[2]
pdat$e[pdat$env==3]<- vcTmp$par[3]
pdat$e[pdat$env==4]<- vcTmp$par[4]
vc<- estVC(y=pdat$DTF,x=pdat$e,
v=list(AA=vcMtr,DD=NULL,AD=NULL,MH=NULL,HH=NULL,EE=diag(nrow(pdat))))

#save.image(file=”saved.RData”)
}

################
# genome scan #
################

load(“saved.RData”)

date()
lrt.0<- scanOne(pdat$DTF,x=pdat$e.0,gdat=gdat,vc=vc.0,minorGenoFreq=0.05)
date()
lrt.e.0<- scanOne(pdat$DTF,gdat=gdat,vc=vc.0,intcovar=pdat$e.0,minorGenoFreq=0.05)
date()
lrt<- scanOne(pdat$DTF,x=pdat$e,gdat=gdat,vc=vc,minorGenoFreq=0.05)
date()
lrt.e<- scanOne(pdat$DTF,gdat=gdat,vc=vc,intcovar=pdat$e,minorGenoFreq=0.05)
date()

save.image(“saved.RData”)
save(lrt.0, lrt.e.0, lrt,lrt.e,file=”lrt.RData”)
q(“no”)

################
# thresholds #
################

load(“saved.RData”)
rm(list=setdiff(ls(all=TRUE),c(“gdat”,”pdat”,”vc”)))

ntimes<- 30
chrs<- unique(substring(colnames(gdat),1,1))
pTmp<- pTmp.e<- pTmp.qe<- NULL
for(n in 1:ntimes){
idx<- sample(1:nrow(pdat), replace=FALSE)

lrtp<- lrtp.e<- NULL
for(j in 1:length(chrs)){
jj<- substring(colnames(gdat),1,1)==chrs[j]
lrtTmp<- scanOne(pdat$DTF,x=pdat$e,gdat=gdat[idx,jj],vc=vc,minorGenoFreq=0.05)
lrtp<- c(lrtp,lrtTmp$p); rm(lrtTmp)
lrtTmp<- scanOne(pdat$DTF,gdat=gdat[idx,jj],vc=vc,intcovar=pdat$e,minorGenoFreq=0.05)
lrtp.e<- c(lrtp.e,lrtTmp$p); rm(lrtTmp)
}

pTmp<- c(pTmp,max(lrtp))
pTmp.e<- c(pTmp.e,max(lrtp.e))
pTmp.qe<- c(pTmp.qe,max(lrtp.e-lrtp))

if(n%%5==0){
write.table(as.matrix(pTmp),file=”qtl.txt”,row.names=FALSE,col.names=FALSE,append=TRUE)
write.table(as.matrix(pTmp.e),file=”qtlE.txt”,row.names=FALSE,col.names=FALSE,append=TRUE)
write.table(as.matrix(pTmp.qe),file=”qtlQE.txt”,row.names=FALSE,col.names=FALSE,append=TRUE)
pTmp<- pTmp.e<- pTmp.qe<- NULL
}
cat(n,”/”,ntimes,”\r”)
}

###########
# summary #
###########

# estimates
lrtEst<- data.frame(lrt$par); lrtEst<- t(lrtEst)
rownames(lrtEst)<- substring(rownames(lrtEst),2)
lrtEst<- as.data.frame(lrtEst)
colnames(lrtEst)<- c(“Intercept”, “e”, “snp1”)
lrtEst$lrt<- lrt$p
lrtEst.e<- data.frame(lrt.e$par); lrtEst.e<- t(lrtEst.e)
rownames(lrtEst.e)<- substring(rownames(lrtEst.e),2)
lrtEst.e<- as.data.frame(lrtEst.e)
colnames(lrtEst.e)<- c(“Intercept”, “e”, “snp1”, “e:snp1″)
lrtEst.e$lrt<- lrt.e$p

# write.csv(lrtEst,file=”lrtEst.csv”)
# write.csv(lrtEst.e,file=”lrtEst_e.csv”)

phyMap<- data.frame(snp=names(lrt$par),
chr=as.numeric(substr(names(lrt$par),1,1)),
dist=as.numeric(substring(names(lrt$par),3)))
class(lrt)<- class(lrt.e)<- c(“None”,”scanOne”)
lrt.qe<- lrt.e
lrt.qe$p<- lrt.e$p-lrt$p

# postscript(“dtf.ps”)
# png(“dtf.png”)
# G
par(mfrow=c(2,1),mar=c(3.5,3.5,0.5,1),mgp=c(2,1,0))
cv<- read.table(“qtl.txt”,header=F)[,1]
cv<- quantile(cv,0.95); cv/2/log(10) # 5.957593
idx<- order(phyMap$chr,phyMap$dist)
plot(lrt$p[idx]/2/log(10), ylim=c(0,11), main=””, xlab=”Chromosome”, ylab=”(A) LOD For G”,,xaxt=”n”,
col=phyMap$chr[idx]%%2+3,pch=19,cex=0.75)
xct<- NULL; for(ch in 1:5) xct<- c(xct, mean(idx[phyMap$chr==ch]))
axis(1,at=xct,labels=1:5)
abline(h=cv/2/log(10),lty=4)
# GxE
cv<- read.table(“qtlQE.txt”,header=F)[,1]
cv<- quantile(cv,0.95); cv/2/log(10) # 5.936353
idx<- order(phyMap$chr,phyMap$dist)
plot(lrt.qe$p[idx]/2/log(10), ylim=c(0,11), main=””, xlab=”Chromosome”, ylab=”(B) LOD For GxE”,,xaxt=”n”,
col=phyMap$chr[idx]%%2+3,pch=19,cex=0.75)
xct<- NULL; for(ch in 1:5) xct<- c(xct, mean(idx[phyMap$chr==ch]))
axis(1,at=xct,labels=1:5)
abline(h=cv/2/log(10),lty=4)
# dev.off()

rm(cv,xct,ch,idx,vcTmp)
# save.image(“saved.RData”)

###################
# model selection #
###################

# top Q + Q:E
cv<- read.table(“qtlE.txt”,header=F)[,1]
cv<- quantile(cv,c(0.5,0.90,0.95)); cv/2/log(10) # 5.719920 7.151775 7.776803
idx<- lrt.e$p>cv[1]
lrtTmp<- list(p=lrt.e$p[idx])
gdatTmp<- gdat[,match(names(lrtTmp$p),colnames(gdat))]
rownames(gdatTmp)<- 1:nrow(gdatTmp)
topQE<- NULL
while(any(idx)){
ii<- which.max(lrtTmp$p)
topQE<- rbind(topQE,data.frame(snp=colnames(gdatTmp)[ii], lrt=lrtTmp$p[ii]))
print(topQE)
datTmp<- data.frame(gdatTmp[,match(topQE$snp,colnames(gdatTmp))],
e=pdat$e,check.names=FALSE)
xTmp<- model.matrix(~.*e,data=datTmp)[,-1]
lrtTmp<- scanOne(pdat$DTF,x=xTmp,gdat=gdatTmp,vc=vc,intcovar=pdat$e)
idx<- lrtTmp$p>cv[1]
}
rownames(topQE)<- 1:nrow(topQE)

tmp<- phyMap[match(topQE$snp,phyMap$snp),]
tmp$lrt<- topQE$lrt
# write.csv(tmp,file=”topQE.csv”,row.names=FALSE)

# top Q
cv<- read.table(“qtl.txt”,header=F)[,1]
cv<- quantile(cv,c(0.5,0.90,0.95)); cv/2/log(10) # 4.598181 5.579367 5.957593
idx<- lrt$p>cv[1]
lrtTmp<- list(p=lrt$p[idx])
gdatTmp<- gdat[,match(names(lrtTmp$p),colnames(gdat))]
rownames(gdatTmp)<- 1:nrow(gdatTmp)
topQ<- NULL
while(any(idx)){
ii<- which.max(lrtTmp$p)
topQ<- rbind(topQ,data.frame(snp=colnames(gdatTmp)[ii], lrt=lrtTmp$p[ii]))
print(topQ)
datTmp<- data.frame(gdatTmp[,match(topQ$snp,colnames(gdatTmp))],
e=pdat$e,check.names=FALSE)
xTmp<- model.matrix(~.,data=datTmp)[,-1]
lrtTmp<- scanOne(pdat$DTF,x=xTmp,gdat=gdatTmp,vc=vc)
idx<- lrtTmp$p>cv[1]
}
rownames(topQ)<- 1:nrow(topQ)

tmp<- phyMap[match(topQ$snp,phyMap$snp),]
tmp$lrt<- topQ$lrt
# write.csv(tmp,file=”topQ.csv”,row.names=FALSE)

# top E
cv<- read.table(“qtlQE.txt”,header=F)[,1]
cv<- quantile(cv,c(0.5,0.90,0.95)); cv/2/log(10) # 4.372958 5.478809 5.936353
idx<- lrt.qe$p>cv[1]
lrtTmp<- list(p=lrt.qe$p[idx])
snpExtQ<- c(“4_269962″,”4_268809″,”4_219919″,”5_3188328”)
snpExtQ<- union(topQ$snp, snpExtQ)
snpExtQTmp<- setdiff(snpExtQ, names(lrtTmp$p))
if(length(snpExtQTmp)>0){
lrtTmp$p<- c(lrt$p[match(snpExtQTmp,names(lrt$p))],lrtTmp$p)
lrtTmp$p[snpExtQTmp]<- 0
}
gdatTmp<- gdat[,match(names(lrtTmp$p),colnames(gdat))]
rownames(gdatTmp)<- 1:nrow(gdatTmp)
topE<- NULL
while(TRUE){
datTmp<- data.frame(gdatTmp[,match(union(snpExtQ,topE$snp),colnames(gdatTmp))],
e=pdat$e,check.names=FALSE)
iiQ<- match(snpExtQ,colnames(datTmp))
iiE<- match(topE$snp,colnames(datTmp))
colnames(datTmp)[-ncol(datTmp)]<- paste(“X”,1:(ncol(datTmp)-1),sep=””)
if(length(iiE)>0){
xTmp<- paste(colnames(datTmp)[iiE],collapse=”+”)
xTmp<- paste(“(“,xTmp,”)”,sep=””)
xTmp<- paste(xTmp,”e”,sep=”*”)
}else xTmp<- “e”
xTmp<- paste(paste(colnames(datTmp)[iiQ],collapse=”+”),xTmp,sep=”+”)
xTmp<- formula(paste(c(“~”,xTmp),collapse=””))
xTmp<- model.matrix(xTmp,data=datTmp)[,-1]
lrtTmp<- scanOne(pdat$DTF,x=xTmp,gdat=gdatTmp,vc=vc)
ii<- match(“e”,colnames(xTmp))
lrt.tmp<- scanOne(pdat$DTF,x=xTmp[,-ii],gdat=gdatTmp,vc=vc,intcovar=pdat$e)
lrtTmp$p<- lrt.tmp$p-lrtTmp$p

idx<- lrtTmp$p>cv[1]
if(any(idx)){
ii<- which.max(lrtTmp$p)
topE<- rbind(topE,data.frame(snp=colnames(gdatTmp)[ii], lrt=lrtTmp$p[ii]))
print(topE)
}else break
}
rownames(topE)<- 1:nrow(topE)

tmp<- phyMap[match(topE$snp,phyMap$snp),]
tmp$lrt<- topE$lrt
# write.csv(tmp,file=”topE.csv”,row.names=FALSE)

if(FALSE){
# backward
# top Q
topQ<- read.csv(“topQ.csv”,header=TRUE)

gdatTmp<- gdat[,lrt$p>20 | is.element(colnames(gdat),topQ$snp)]
xin<- is.element(colnames(gdatTmp),topQ$snp)
topTmp<- mAIC(y=pdat$DTF,x=pdat$e,gdat=gdatTmp,vc=vc,xin=xin,k=20,
direction=”backward”,ext=TRUE,verbose=TRUE)
gdatTmp<- gdatTmp[,topTmp$xin]
topTmp<- mAIC(y=pdat$DTF,x=pdat$e,gdat=gdatTmp,vc=vc,
xin=rep(TRUE,sum(topTmp$xin)),k=20,
direction=”backward”,ext=TRUE,verbose=TRUE)

topQb<- data.frame(snp=”1″,lrt=NA)[-1,]
gdatTmp<- gdatTmp[,topTmp$xin]
rownames(gdatTmp)<- 1:nrow(gdatTmp)
xTmp<- pdat$e
lrtTmp<- scanOne(pdat$DTF,x=xTmp,gdat=gdatTmp,vc=vc)
idx<- lrtTmp$p>20
while(any(idx)){
ii<- which.max(lrtTmp$p)
topQb<- rbind(topQb,data.frame(snp=colnames(gdatTmp)[ii], lrt=lrtTmp$p[ii]))
print(topQb)
datTmp<- data.frame(gdatTmp[,match(topQb$snp,colnames(gdatTmp))], e=pdat$e,
check.names = FALSE)
colnames(datTmp)<- c(paste(“snp”,topQb$snp,sep=””),”e”)
xTmp<- model.matrix(~.-1,data=datTmp)
lrtTmp<- scanOne(pdat$DTF,x=xTmp,gdat=gdatTmp,vc=vc)
idx<- lrtTmp$p>20
}
rownames(topQb)<- 1:nrow(topQb)

tmp<- phyMap[match(topQb$snp,phyMap$snp),]
tmp$lrt<- topQb$lrt
# write.csv(tmp,file=”topQb.csv”,row.names=FALSE)
}
#############
# estimates #
#############

topQ<- read.csv(“topQ.csv”,header=T) #topQ<- read.csv(“topQb.csv”,header=T)[1:5,]
topE<- read.csv(“topE.csv”,header=T)
snpE<- as.character(topE$snp)
snpQ<- setdiff(as.character(topQ$snp),snpE)
snpT<- union(snpQ,snpE)
dat<- gdat[,match(snpT,colnames(gdat))]
colnames(dat)<- paste(“snp”,colnames(dat),sep=””)
dat<- cbind(dat,e=pdat$e,y=pdat$DTF)
dat<- as.data.frame(dat)
fml<- paste(“y ~”,paste(paste(“snp”,snpT,sep=””),collapse=” + “),
“+ e +”,paste(paste(“snp”,snpE,sep=””),”e”,sep=”:”,collapse=” + “),sep=” “)
fml<- formula(fml)
mdl<- gls(fml,data=dat,vc=vc)

write.csv(mdl,file=”mdl.csv”)

# add 4 SNPs
snpTmp<- c(“4_269962″,”4_268809″,”4_219919″,”5_3188328”)
topQ<- read.csv(“topQ.csv”,header=T) #topQ<- read.csv(“topQb.csv”,header=T)[1:5,]
topE<- read.csv(“topE.csv”,header=T)
snpE<- as.character(topE$snp)
snpQ<- setdiff(c(as.character(topQ$snp),snpTmp),snpE)
snpT<- union(snpQ,snpE)
dat<- gdat[,match(snpT,colnames(gdat))]
colnames(dat)<- paste(“snp”,colnames(dat),sep=””)
dat<- cbind(dat,e=pdat$e,y=pdat$DTF)
dat<- as.data.frame(dat)
fml<- paste(“y ~”,paste(paste(“snp”,snpT,sep=””),collapse=” + “),
“+ e +”,paste(paste(“snp”,snpE,sep=””),”e”,sep=”:”,collapse=” + “),sep=” “)
fml<- formula(fml)
mdl<- gls(fml,data=dat,vc=vc)

write.csv(mdl,file=”mdl4p.csv”)

# latex output of the table
library(xtable)
options(digits=3)
xtable(as.matrix(mdl))

#############
# epistasis #
#############

topQ<- read.csv(“topQ.csv”,header=T) #topQb<- read.csv(“topQb.csv”,header=T)
topE<- read.csv(“topE.csv”,header=T)

snpTmp<- c(“4_269962″,”4_268809″,”4_219919″,”5_3188328”)
snpE<- as.character(topE$snp)
snpQ<- setdiff(c(as.character(topQ$snp),snpTmp),snpE)
snpT<- union(snpQ,snpE)
dat<- gdat[,match(snpT,colnames(gdat))]
colnames(dat)<- paste(“snp”,colnames(dat),sep=””)
dat<- cbind(dat,e=pdat$e,y=pdat$DTF)
dat<- as.data.frame(dat)
fml<- paste(“y ~”,paste(paste(“snp”,snpT,sep=””),collapse=” + “),”+ e”,sep=” “)
fml<- formula(fml)
xTmp<- model.matrix(fml,data=dat)[,-1]

gdatTmp<- gdat[,is.element(colnames(gdat),snpT)]
eps<- scanTwo(pdat$DTF,x=xTmp,gdat=gdatTmp,vc=vc)

write.csv(eps, file=”eps.csv”)

########
# BLUP #
########

# genetic matrices
vcMtrTmp<- matrix(NA,nrow=nrow(pdat),ncol=nrow(pdat))
for(i in 1:4){
idx1<- pdat$env==i
for(j in 1:4){
idx2<- pdat$env==j
vTmp<- K[match(pdat$eco[idx1],rownames(K)),match(pdat$eco[idx2],colnames(K))]*
sqrt(get(paste(“vc”,i,sep=””))$par[“AA”])*
sqrt(get(paste(“vc”,j,sep=””))$par[“AA”])
vcMtrTmp[idx1,idx2]<- vTmp
}
}
rm(i,j,idx1,idx2,vTmp)

# case I
topQ<- read.csv(“topQ.csv”,header=T) #topQb<- read.csv(“topQb.csv”,header=T)[1:5,]
topE<- read.csv(“topE.csv”,header=T)
snpTmp<- c(“4_269962″,”4_268809″,”4_219919″,”5_3188328”)
snpE<- as.character(topE$snp)
snpQ<- setdiff(c(as.character(topQ$snp),snpTmp),snpE)
snpT<- union(snpQ,snpE)
dat<- gdat[,match(snpT,colnames(gdat))]
colnames(dat)<- paste(“snp”,colnames(dat),sep=””)
dat<- cbind(dat,e=pdat$e,y=pdat$DTF)
dat<- as.data.frame(dat)
fml<- paste(“y ~”,paste(paste(“snp”,snpT,sep=””),collapse=” + “),
“+ e +”,paste(paste(“snp”,snpE,sep=””),”e”,sep=”:”,collapse=” + “),sep=” “)
fml<- formula(fml)
xTmp<- model.matrix(fml,data=dat)[,-1]
date()
vcQE14<- estVC(y=pdat$DTF,x=xTmp,initpar=c(rep(0,ncol(xTmp)+1),0.7009252, 113.6252972),
v=list(AA=vcMtrTmp,DD=NULL,AD=NULL,MH=NULL,HH=NULL,EE=diag(nrow(pdat))))
date()
blupQE14<- blup(vcQE14)

tmp<- data.frame(blupQE14$fixed,A=blupQE14$AA,E=blupQE14$EE)
rownames(tmp)<- paste(pdat$env,pdat$ecotypeid,sep=”_”)
colnames(tmp)[1]<- “(Intercept)”
# write.csv(tmp,file=”blupQE14.csv”)

date()
vcQE14.0<- estVC(y=pdat$DTF[pdat$env!=4],x=xTmp[pdat$env!=4,],
initpar=c(rep(0,ncol(xTmp)+1),0.6009252, 113.6252972),
v=list(AA=vcMtrTmp[pdat$env!=4,pdat$env!=4],DD=NULL,AD=NULL,MH=NULL,HH=NULL,
EE=diag(sum(pdat$env!=4))))
date()
vcTmp<- vcQE14.0
vcTmp$y<- matrix(pdat$DTF[pdat$env==4])
vcTmp$x<- cbind(1,xTmp[pdat$env==4,])
vcTmp$v$AA<- vcMtrTmp[pdat$env==4,pdat$env==4]
vcTmp$v$EE<- diag(sum(pdat$env==4))
blupQE14.0<- blup(vcTmp)
tmp<- data.frame(blupQE14.0$fixed,A=blupQE14.0$AA,E=blupQE14.0$EE)
rownames(tmp)<- paste(pdat$env,pdat$ecotypeid,sep=”_”)[pdat$env==4]
colnames(tmp)[1]<- “(Intercept)”
# write.csv(tmp,file=”blupQE14_0.csv”,row.names=TRUE)
# save(vcMtrTmp,topQ,topE,snpTmp,snpQ,snpE,snpT,xTmp,vcQE14,vcQE14.0,blupQE14,blupQE14.0,pdat,file=”blupQE14.RData”)

# plotting
idx<- pdat$env != 4
tmp<- data.frame(blupQE14.0$fixed,A=blupQE14.0$AA,E=blupQE14.0$EE)
rownames(tmp)<- paste(pdat$env[!idx],pdat$ecotypeid[!idx],sep=”_”)
colnames(tmp)[1]<- “(Intercept)”
R2<- cor(pdat$DTF[!idx],pdat$DTF[!idx]-tmp$E)^2

# pdf(“blupQE14_Env4.pdf”)
# png(“blueQE14_Env4.png”)
plot(x=pdat$DTF[!idx],y=pdat$DTF[!idx]-tmp$E,xlab=”Observed DTF”,ylab=”Predicted DTF”)
text(80,140,substitute(R^2==R2,list(R2=round(R2,3))))
abline(0,1,col=2,lty=3)
# dev.off()
plot(tmp$E,xlab=”Index”,ylab=”Residual”);abline(h=0,col=2,lty=3)
# dev.off()
# case II
if(FALSE){
set.seed(9)
idx<- is.element(pdat$ecotypeid,sample(unique(pdat$ecotypeid),375))

date()
vcQE0<- estVC(y=pdat$DTF[idx],x=xTmp[idx,],initpar=c(rep(0,ncol(xTmp)+1),0.5009252, 113.6252972),
v=list(AA=vcMtrTmp[idx,idx],DD=NULL,AD=NULL,MH=NULL,HH=NULL,EE=diag(sum(idx))))
date()
vcTmp<- vcQE0
vcTmp$y<- matrix(pdat$DTF[!idx])
vcTmp$x<- cbind(1,xTmp[!idx,])
vcTmp$v$AA<- vcMtrTmp[!idx,!idx]
vcTmp$v$EE<- diag(sum(!idx))
blupQE0<- blup(vcTmp)
tmp<- data.frame(blupQE0$fixed,A=blupQE0$AA,E=blupQE0$EE)
rownames(tmp)<- paste(pdat$env[!idx],pdat$ecotypeid[!idx],sep=”_”)
colnames(tmp)[1]<- “(Intercept)”
tmp<- cbind(DTF=pdat$DTF[!idx],tmp)
# write.csv(tmp,file=”blupQE14_Miss10.csv”,row.names=TRUE)
# save(vcMtrTmp,topQ,topE,snpTmp,snpQ,snpE,snpT,xTmp,vcQE0,blupQE0,pdat,file=”blupQE0.RData”)

# plotting
set.seed(9)
idx<- is.element(pdat$ecotypeid,sample(unique(pdat$ecotypeid),375))
tmp<- data.frame(blupQE0$fixed,A=blupQE0$AA,E=blupQE0$EE)
rownames(tmp)<- paste(pdat$env[!idx],pdat$ecotypeid[!idx],sep=”_”)
colnames(tmp)[1]<- “(Intercept)”
R2<- cor(pdat$DTF[!idx],pdat$DTF[!idx]-tmp$E)^2

# pdf(“blupQE14_Miss10.pdf”)
# png(“blueQE14_Miss10.png”)
plot(x=pdat$DTF[!idx],y=pdat$DTF[!idx]-tmp$E,xlab=”Observed DTF”,ylab=”Predicted DTF”)
text(80,140,substitute(R^2==R2,list(R2=round(R2,3))))
abline(0,1,col=2,lty=3)
# dev.off()
plot(tmp$E,xlab=”Index”,ylab=”Residual”);abline(h=0,col=2,lty=3)
# dev.off()
# more…
id1<- pdat$ecotypeid[!idx & pdat$env==1]
id3<- pdat$ecotypeid[!idx & pdat$env==3]
idB<- intersect(id1,id3)
idx1<- match(paste(“1″,idB,sep=”_”),rownames(tmp))
idx3<- match(paste(“3″,idB,sep=”_”),rownames(tmp))
ii1<- match(paste(“1″,idB,sep=”_”),paste(pdat$env,pdat$ecotypeid,sep=”_”))
ii3<- match(paste(“3″,idB,sep=”_”),paste(pdat$env,pdat$ecotypeid,sep=”_”))
# png(“Miss10pheno.png”)
plot(x=pdat$DTF[ii1],y=pdat$DTF[ii3],xlab=”DTF in Sep 2010″,ylab=”DTF in Sep 2040″,
xlim=c(10,150),ylim=c(10,150))
text(x=pdat$DTF[ii1],y=pdat$DTF[ii3],labels=as.character(idB),pos=4,col=”green”,cex=0.5)
points(rowSums(tmp[,-ncol(tmp)])[idx1],rowSums(tmp[,-ncol(tmp)])[idx3],pch=”+”,col=2)
text(rowSums(tmp[,-ncol(tmp)])[idx1],rowSums(tmp[,-ncol(tmp)])[idx3],
labels=as.character(idB),pos=4,col=”blue”,cex=0.5)
abline(0,1,lty=3,cex=0.5)
# dev.off()

# combined plot
# png(“blup.png”, width=960)
par(mfrow=c(1,2))
load(“blupQE14.RData”)
idx<- pdat$env != 4
tmp<- data.frame(blupQE14.0$fixed,A=blupQE14.0$AA,E=blupQE14.0$EE)
rownames(tmp)<- paste(pdat$env[!idx],pdat$ecotypeid[!idx],sep=”_”)
colnames(tmp)[1]<- “(Intercept)”
R2<- cor(pdat$DTF[!idx],pdat$DTF[!idx]-tmp$E)^2
plot(x=pdat$DTF[!idx],y=pdat$DTF[!idx]-tmp$E,xlab=”Observed DTF”,ylab=”Predicted DTF”,main=”(A) BLUP for DTF in 2055″)
text(80,140,substitute(R^2==R2,list(R2=round(R2,3))))
abline(0,1,col=2,lty=3)

load(“blupQE0.RData”)
set.seed(9)
idx<- is.element(pdat$ecotypeid,sample(unique(pdat$ecotypeid),375))
tmp<- data.frame(blupQE0$fixed,A=blupQE0$AA,E=blupQE0$EE)
rownames(tmp)<- paste(pdat$env[!idx],pdat$ecotypeid[!idx],sep=”_”)
colnames(tmp)[1]<- “(Intercept)”
R2<- cor(pdat$DTF[!idx],pdat$DTF[!idx]-tmp$E)^2
plot(x=pdat$DTF[!idx],y=pdat$DTF[!idx]-tmp$E,xlab=”Observed DTF”,ylab=”Predicted DTF”,main=”(B) BLUP for DTF of 10% Leave-Out Accessions”)
text(80,140,substitute(R^2==R2,list(R2=round(R2,3))))
abline(0,1,col=2,lty=3)
# dev.off()
}

##################
# heritabilities #
##################

W.inv<- function(W, symmetric=TRUE,inverse=TRUE){
eW <- eigen(W, symmetric=symmetric)
d <- eW$values
if (min(d) <0 && abs(min(d))>sqrt(.Machine$double.eps))
stop(“‘W’ is not positive definite”)
else d[d<=0]<- ifelse(inverse, Inf, 0)
A <- diag(d^ifelse(inverse, -0.5, 0.5)) %*% t(eW$vector)
A # t(A)%*%A = W^{-1}
}

# adapte
# generalized least squares estimates
gls<- function(formula,data=NULL,vc=NULL){
if(is.null(data)){
xx<- model.matrix(formula)
}else xx<- model.matrix(formula,data)
yy<- model.response(model.frame(formula,data))

nr<- nrow(xx)
if(!is.null(vc)){
if(is.element(“bgv”,attr(vc,”class”))){
nb<- length(vc$par) – sum(vc$nnl)
nr<- nrow(vc$y)
cov<- matrix(0,nrow=nr,ncol=nr)
for(i in 1:vc$nv)
if(vc$nnl[i]) cov<- cov + vc$v[[i]]*vc$par[nb+vc$nn[i]]
}else{
if(is.data.frame(vc)) vc<- as.matrix(vc)
if(!is.matrix(vc)) stop(“vc should be a matrix.”)
if(!is.numeric(vc)) stop(“vc should be a numeric matrix.”)
cov<- vc
}
}else cov<- diag(nrow(as.matrix(yy)))
A<- W.inv(cov)

x<- A%*%xx; colnames(x)[1]<- “Intercept”
y<- A%*%yy
dtf<- data.frame(y=y,x)
mdl<- lm(y~.-1, data=dtf)
mdl$data<- dtf

mdl
}

load(“blupQE14.RData”)

S<- vcQE$v$AA*vcQE$par[“AA”]
diag(S)<- diag(S) + vcQE$par[“EE”]

datTmp<- data.frame(
y = vcQE$y,
e = vcQE$x[,”e”],
vcQE$x[,match(paste(“snp”,snpQ,sep=””),colnames(vcQE$x))],
vcQE$x[,match(paste(“snp”,snpE,sep=””),colnames(vcQE$x))],
vcQE$x[,grep(“\\.e”,colnames(vcQE$x))]
)
g0<- gls(y~1,data=datTmp,vc=S) # none
g.0<- gls(y~e,data=datTmp,vc=S) # no QTL (only e)
g.a<- gls(y~.,data=datTmp,vc=S) # G, GxE
datTmp<- data.frame(
y = vcQE$y,
e = vcQE$x[,”e”],
vcQE$x[,match(paste(“snp”,snpQ,sep=””),colnames(vcQE$x))],
vcQE$x[,match(paste(“snp”,snpE,sep=””),colnames(vcQE$x))]
)
g.gxe<- gls(y~.,data=datTmp,vc=S) # G: major G, apriori G and GxE related G
datTmp<- data.frame(
y = vcQE$y,
e = vcQE$x[,”e”],
vcQE$x[,match(paste(“snp”,snpQ,sep=””),colnames(vcQE$x))]
)
g.e<- gls(y~.,data=datTmp,vc=S) # G: major G, apriori G
datTmp<- data.frame(
y = vcQE$y,
e = vcQE$x[,”e”],
vcQE$x[,match(paste(“snp”,setdiff(snpQ,snpTmp),sep=””),colnames(vcQE$x))]
)
g.q<- gls(y~.,data=datTmp,vc=S) # G: major G

gg<- anova(g0,g.0,g.q,g.e,g.gxe,g.a)
hrt<- cbind(gg,”%var”=gg[“Sum of Sq”]/gg[1,”RSS”]*100)
rownames(hrt)<- c(“Intercept”,”e”,”Major QTL”,”apriori QTL”,”QTL in GxE”,”GxE”)
colnames(hrt)[ncol(hrt)]<- “%Var”
xtable(hrt,digits=2)

r<- diag(vcQE$v$AA*vcQE$par[“AA”])
r<- sum(r)/sum(r+vcQE$par[“EE”])
rss<- hrt[,”Sum of Sq”]
rss[1]<- hrt[1,”RSS”]
h<- c(rss[-1],(rss[1]-sum(rss[-1]))*r)/rss[1]*100
names(h)<- c(rownames(hrt)[-1],”Polygenic Var”)
round(h,2)
# e Major QTL apriori QTL QTL in GxE GxE
# 0.15 12.03 1.40 0.15 7.04
#Polygenic Var
# 75.59

####################
# cross validation #
####################

# not run yet
load(“saved.RData”)

ids<- unique(pdat$ecotypeid)
gdatTmp<- gdat#[,lrt$p>20]
cv.lrt<- NULL
for(i in 1:10){
cat(i,”:”,date(),”\n”)
ii<- (i-1)*length(ids)/10 + c(1,length(ids)/10)
ii<- round(ii)
idTmp<- ids[-(ii[1]:ii[2])]
idx<- is.element(pdat$ecotypeid,idTmp)
lrtTmp<- scanOne(pdat$DTF[idx],x=pdat$e[idx],gdat=gdatTmp[idx,],vc=vcMtr[idx,idx],
minorGenoFreq=0.05)
cv.lrt<- cbind(cv.lrt,lrtTmp$p[as.character(colnames(gdat))])
}
rownames(cv.lrt)<- colnames(gdatTmp)
topQ<- read.csv(“topQ.csv”,header=T)
cv.lrt[as.character(topQ$snp),]

write.table(cv.lrt,file=”cv10all.txt”,col.names=FALSE)

###################################################################################
# the end #
###########

##############################
# prepare data: don’t delete #
##############################

load(“justin.RData”)
rownames(K) <- colnames(K) <- read.csv(“namesK.csv”,header=TRUE)[,1]
#core<- read.csv(“core473list.csv”,header=TRUE)

idx<- match(rownames(K),all_block$ecotypeid)
idx<- is.element(all3$ril,toupper(all_block$line)[idx])
all3Tmp<- all3[idx,]
all3Tmp$ecotypeid<- all_block$ecotypeid[match(all3Tmp$ril,toupper(all_block$line))]
dim(all3Tmp) # 3200 7

# gdat and pdat
idx1<- all3Tmp$planting==”September” & all3Tmp$year==2010 & all3Tmp$shelfHeight==”top-middle”
idx2<- all3Tmp$planting==”September” & all3Tmp$year==2010 & all3Tmp$shelfHeight==”bottom-middle”
idx3<- all3Tmp$planting==”September” & all3Tmp$year==2040 & all3Tmp$shelfHeight==”top-middle”
idx4<- all3Tmp$planting==”September” & all3Tmp$year==2040 & all3Tmp$shelfHeight==”bottom-middle”
# genetic matrices
AAMtr<- matrix(NA,nrow=nrow(pdat),ncol=nrow(pdat))
for(i in 1:4){
idx1<- pdat$env==i
for(j in 1:4){
idx2<- pdat$env==j
vTmp<- K[match(pdat$eco[idx1],rownames(K)),match(pdat$eco[idx2],colnames(K))]
AAMtr[idx1,idx2]<- vTmp
}
}
rm(i,j,idx1,idx2,vTmp)

AAvc<- estVC(y=pdat$DTF,x=pdat$e,
v=list(AA=AAMtr,DD=NULL,AD=NULL,MH=NULL,HH=NULL,EE=diag(nrow(pdat))))

#*********************************************