########################################### # Estimating a Strategic Model From Data ########################################### # # Mark Fey # Jeremy Kedziora # #Consider the case where we have the following: # 1 # a / \ b # / \ # U1a 2 # U2a / \ # c / \ d # U1bc U1bd # U2bc U2bd rm(list=ls()) library(foreign) #Read the data file into memory. war1800<-read.dta(file="/Work/LearnR/war1800.dta") #Take the columns we want. x.possible<-war1800[,6:20] ########################################################################## #Doing it some convex combination of the Ramey way and the Kedziora way!!! ########################################################################## #set things up to return several things from the function setClass('Data.Maker',representation(u1a='matrix',u1bc='matrix',u1bd='matrix',u2bc='matrix',u2bd='matrix',Ia='matrix',Ibc='matrix',Ibd='matrix')) #define a function that builds the variables we need #when you call this function: c__ = 0 for a column of zeroes only # c__ = 1 for a column of ones only # c__ = 2 for variables with no constant # c__ = 3 for variables and a constant # # u__.index = columns for variables (default is no variables) #remove all the NAs BEFORE you run the data through this function Data.Maker<-function(Data,u1a.index=0,c1a,u1bc.index=0,c1bc,u1bd.index=0,c1bd,u2bc.index=0,c2bc,u2bd.index=0,c2bd,Ia.index,Ibc.index,Ibd.index){ if( (c1a==2|c1a==3)&(u1a.index==0)[1] ){cat("Did you define u1a.index correctly?\n")} if(c1a==0){u1a<-as.matrix(rep(0,nrow(Data)))} if(c1a==1){u1a<-as.matrix(rep(1,nrow(Data))) colnames(u1a)<-c("u1a Constant") } if(c1a==2){ u1a<-as.matrix(Data[,u1a.index]) colnames(u1a)<-colnames(Data[,u1a.index]) } if(c1a==3){ u1a<-as.matrix(cbind(1,Data[,u1a.index])) colnames(u1a)<-c("u1a Constant",colnames(Data[,u1a.index])) } if( (c1bc==2|c1bc==3)&(u1bc.index==0)[1] ){cat("Did you define u1bc.index correctly?\n")} if(c1bc==0){u1bc<-as.matrix(rep(0,nrow(Data)))} if(c1bc==1){u1bc<-as.matrix(rep(1,nrow(Data))) colnames(u1bc)<-"u1bc Constant" } if(c1bc==2){ u1bc<-as.matrix(Data[,u1bc.index]) colnames(u1bc)<-colnames(Data[,u1bc.index]) } if(c1bc==3){ u1bc<-as.matrix(cbind(1,Data[,u1bc.index])) colnames(u1bc)<-c("u1bc Constant",colnames(Data[,u1bc.index])) } if( (c1bd==2|c1bd==3)&(u1bd.index==0)[1] ){cat("Did you define u1bd.index correctly?\n")} if(c1bd==0){u1bd<-as.matrix(rep(0,nrow(Data)))} if(c1bd==1){u1bd<-as.matrix(rep(1,nrow(Data))) colnames(u1bd)<-c("u1bd Constant") } if(c1bd==2){ u1bd<-as.matrix(Data[,u1bd.index]) colnames(u1bd)<-c(colnames(Data[,u1bd.index])) } if(c1bd==3){ u1bd<-as.matrix(cbind(1,Data[,u1bd.index])) colnames(u1bd)<-c("u1bd Constant",colnames(Data[,u1bd.index])) } if( (c2bc==2|c2bc==3)&(u2bc.index==0)[1] ){cat("Did you define u2bc.index correctly?\n")} if(c2bc==0){u2bc<-as.matrix(rep(0,nrow(Data)))} if(c2bc==1){u2bc<-as.matrix(rep(1,nrow(Data))) colnames(u2bc)<-c("u2bc Constant") } if(c2bc==2){ u2bc<-as.matrix(Data[,u2bc.index]) colnames(u2bc)<-c(colnames(Data[,u2bc.index])) } if(c2bc==3){ u2bc<-as.matrix(cbind(1,Data[,u2bc.index])) colnames(u2bc)<-c("u2bc Constant",colnames(Data[,u2bc.index])) } if( (c2bd==2|c2bd==3)&(u2bd.index==0)[1] ){cat("Did you define u2bd.index correctly?\n")} if(c2bd==0){u2bd<-as.matrix(rep(0,nrow(Data)))} if(c2bd==1){u2bd<-as.matrix(rep(1,nrow(Data))) colnames(u2bd)<-c("u2bd Constant") } if(c2bd==2){ u2bd<-as.matrix(Data[,u2bd.index]) colnames(u2bd)<-c(colnames(Data[,u2bd.index])) } if(c2bd==3){ u2bd<-as.matrix(cbind(1,Data[,u2bd.index])) colnames(u2bd)<-c("u2bd Constant",colnames(Data[,u2bd.index])) } Ia<-as.matrix(Data[,Ia.index]) Ibc<-as.matrix(Data[,Ibc.index]) Ibd<-as.matrix(Data[,Ibd.index]) result <- new('Data.Maker',u1a=u1a,u1bc=u1bc,u1bd=u1bd,u2bc=u2bc,u2bd=u2bd,Ia=Ia,Ibc=Ibc,Ibd=Ibd) class(result) <- 'Data.Maker' result } #remove NAs from data file x.possible<-na.omit(war1800[,6:20]) #columns for the variables for the utility functions cols4u1a<-c(9,6,11) #cols4u1bc<-0 cols4u1bd<-c(3,4) #cols4u2bc<-0 cols4u2bd<-c(7,3,5) #columns for the indicator variables cols4Ia<-13 cols4Ibc<-14 cols4Ibd<-15 #call the function # # Remember the syntax! # #when you call this function: c__ = 0 for a column of zeroes only # c__ = 1 for a column of ones only # c__ = 2 for variables with no constant # c__ = 3 for variables and a constant # # u__.index = columns for variables (default is no variables) u.vars<-Data.Maker(x.possible,u1a.index=cols4u1a,c1a=3,c1bc=0,u1bd.index=cols4u1bd,c1bd=3,c2bc=0,u2bd.index=cols4u2bd,c2bd=3, Ia.index=cols4Ia,Ibc.index=cols4Ibc,Ibd.index=cols4Ibd) #create the utility function covariates u1a<-as.data.frame(u.vars@u1a) u1bc<-as.data.frame(u.vars@u1bc) u1bd<-as.data.frame(u.vars@u1bd) u2bc<-as.data.frame(u.vars@u2bc) u2bd<-as.data.frame(u.vars@u2bd) #create the indicator variables Ia<-u.vars@Ia Ibc<-u.vars@Ibc Ibd<-u.vars@Ibd #create a dummy variable to check for zero variables # # u___.nonzero = 1 if it is nonzero # u___.nonzero = 0 if it is zero u1a.nonzero<-1-((sum(u1a!=0))==0)*1 u1bc.nonzero<-1-((sum(u1bc!=0))==0)*1 u1bd.nonzero<-1-((sum(u1bd!=0))==0)*1 u2bc.nonzero<-1-((sum(u2bc!=0))==0)*1 u2bd.nonzero<-1-((sum(u2bd!=0))==0)*1 #total number of parameters we need to estimate n.par<-u1a.nonzero*ncol(u1a)+u1bc.nonzero*ncol(u1bc)+u1bd.nonzero*ncol(u1bd)+u2bc.nonzero*ncol(u2bc)+u2bd.nonzero*ncol(u2bd) # #Define the prob of choosing between alt A and alt B, given some parameter # logistic.probA<-function(A,B,param){ 1/(1+exp(param*(B-A))) } #specify the log-likelihood for the strategic model llik.logistic<-function(b){ #set the variance to 1 for identification s2<-1 #if nonzero, calculate U1A by combining data with coefficients if(u1a.nonzero==1){ U1A<-as.matrix(u1a)%*%as.matrix(b[1:ncol(u1a)]) nextcol<-ncol(u1a)+1 } #if zero, then U1A is zero if(u1a.nonzero==0){ U1A<-as.matrix(u1a) nextcol<-1 } if(u1bc.nonzero==1){ U1BC<-as.matrix(u1bc)%*%as.matrix(b[nextcol:(nextcol+ncol(u1bc)-1)]) nextcol<-nextcol+ncol(u1bc) } if(u1bc.nonzero==0){ U1BC<-as.matrix(u1bc) nextcol<-nextcol } if(u1bd.nonzero==1){ U1BD<-as.matrix(u1bd)%*%as.matrix(b[nextcol:(nextcol+ncol(u1bd)-1)]) nextcol<-nextcol+ncol(u1bd) } if(u1bd.nonzero==0){ U1BD<-as.matrix(u1bd) nextcol<-nextcol } if(u2bc.nonzero==1){ U2BC<-as.matrix(u2bc)%*%as.matrix(b[nextcol:(nextcol+ncol(u2bc)-1)]) nextcol<-nextcol+ncol(u2bc) } if(u2bc.nonzero==0){ U2BC<-as.matrix(u2bc) nextcol<-nextcol } if(u2bd.nonzero==1){ U2BD<-as.matrix(u2bd)%*%as.matrix(b[nextcol:(nextcol+ncol(u2bd)-1)]) nextcol<-nextcol+ncol(u2bd) } if(u2bd.nonzero==0){ U2BD<-as.matrix(u2bd) nextcol<-nextcol } #here are the probabilities for actions #lamda is normalized to 1 p.c<-logistic.probA(U2BC,U2BD,1) p.d<-1-p.c p.a<-logistic.probA(U1A,(p.c*U1BC+p.d*U1BD),1) p.b<-1-p.a #here are the outcome probabilities p.A<-p.a p.BC<-p.b*p.c p.BD<-p.b*p.d #now we only need the log-likelihood llik.<-Ia*log(p.A)+Ibc*log(p.BC)+Ibd*log(p.BD) sum(llik.) } #generate random starting values stval<-runif(n.par)*0.001 #call optim mle.logistic<-optim(stval,llik.logistic,method="BFGS",hessian=TRUE,control=list(fnscale=-1,trace=5,maxit=1500,reltol=1e-15)) #extract parameters and calculate standard errors parameters<-mle.logistic$par SE<-as.matrix(sqrt(diag(solve(-1*mle.logistic$hessian)))) Tstat<-as.matrix(parameters/SE) pvalue<-2*pt(abs(Tstat),nrow(u1a)-(n.par),lower.tail=FALSE) #build a table to present results Table<-cbind(round(parameters,digits=3),round(SE,digits=3),round(Tstat,digits=3),round(pvalue,digits=3)) colnames(Table)<-c("Coefficient","Std Error","T-statistic","p-values") #get variables names, excluding the zero variables vars<-cbind(u1a,u1bc,u1bd,u2bc,u2bd) index<-apply(vars,2,sum) vars<-vars[,-(index==0)*seq(1,ncol(vars))] #assign rownames rownames(Table)<-colnames(vars) cat("Logistic Estimates\n") print(Table) #xtable(Table) cat("\n") #predicted probabilities: predicted<-function(u,column,b,n.grid){ u1a.median<-matrix(NA,nrow=n.grid,ncol=ncol(u1a)) for(i in 1:ncol(u1a)){ u1a.median[,i]<-median(u1a[,i]) } u1bc.median<-matrix(NA,nrow=n.grid,ncol=ncol(u1bc)) for(i in 1:ncol(u1bc)){ u1bc.median[,i]<-median(u1bc[,i]) } u1bd.median<-matrix(NA,nrow=n.grid,ncol=ncol(u1bd)) for(i in 1:ncol(u1bd)){ u1bd.median[,i]<-median(u1bd[,i]) } u2bc.median<-matrix(NA,nrow=n.grid,ncol=ncol(u2bc)) for(i in 1:ncol(u2bc)){ u2bc.median[,i]<-median(u2bc[,i]) } u2bd.median<-matrix(NA,nrow=n.grid,ncol=ncol(u2bd)) for(i in 1:ncol(u2bd)){ u2bd.median[,i]<-median(u2bd[,i]) } if(u==1){ x.varying<-seq(min(u1a[,column]),max(u1a[,column]),length=n.grid) u1a.median[,column]<-x.varying } if(u==2){ x.varying<-seq(min(u1bc[,column]),max(u1bc[,column]),length=n.grid) u1bc.median[,column]<-x.varying } if(u==3){ x.varying<-seq(min(u1bd[,column]),max(u1bd[,column]),length=n.grid) u1bd.median[,column]<-x.varying } if(u==4){ x.varying<-seq(min(u2bc[,column]),max(u2bc[,column]),length=n.grid) u2bc.median[,column]<-x.varying } if(u==5){ x.varying<-seq(min(u2bd[,column]),max(u2bd[,column]),length=n.grid) u2bd.median[,column]<-x.varying } #if nonzero, calculate U1A by combining data with coefficients if(u1a.nonzero==1){ U1A<-as.matrix(u1a.median)%*%as.matrix(b[1:ncol(u1a.median)]) nextcol<-ncol(u1a.median)+1 } #if zero, then U1A is zero if(u1a.nonzero==0){ U1A<-as.matrix(u1a.median) nextcol<-1 } if(u1bc.nonzero==1){ U1BC<-as.matrix(u1bc.median)%*%as.matrix(b[nextcol:(nextcol+ncol(u1bc.median)-1)]) nextcol<-nextcol+ncol(u1bc.median) } if(u1bc.nonzero==0){ U1BC<-as.matrix(u1bc.median) nextcol<-nextcol } if(u1bd.nonzero==1){ U1BD<-as.matrix(u1bd.median)%*%as.matrix(b[nextcol:(nextcol+ncol(u1bd.median)-1)]) nextcol<-nextcol+ncol(u1bd.median) } if(u1bd.nonzero==0){ U1BD<-as.matrix(u1bd.median) nextcol<-nextcol } if(u2bc.nonzero==1){ U2BC<-as.matrix(u2bc.median)%*%as.matrix(b[nextcol:(nextcol+ncol(u2bc.median)-1)]) nextcol<-nextcol+ncol(u2bc.median) } if(u2bc.nonzero==0){ U2BC<-as.matrix(u2bc.median) nextcol<-nextcol } if(u2bd.nonzero==1){ U2BD<-as.matrix(u2bd.median)%*%as.matrix(b[nextcol:(nextcol+ncol(u2bd.median)-1)]) nextcol<-nextcol+ncol(u2bd.median) } if(u2bd.nonzero==0){ U2BD<-as.matrix(u2bd.median) nextcol<-nextcol } #here are the probabilities for actions #lamda is normalized to 1 p.c<-logistic.probA(U2BC,U2BD,1) p.d<-1-p.c p.a<-logistic.probA(U1A,(p.c*U1BC+p.d*U1BD),1) p.b<-1-p.a #here are the outcome probabilities p.A<-p.a p.BC<-p.b*p.c p.BD<-p.b*p.d plot(x.varying,p.BD,type="l",lty=1,xlab="Variable",ylab="Probability of BD",ylim=c(0,1)) } predicted(5,4,parameters,100)