library(readstata13)
library(reshape2)
library(foreign)

wkdir<-getwd()
stopifnot(grepl("updated_csp",wkdir,fixed=T))

##############
#Clean CITI
##############
setwd("citi")
df.citi.raw<-read.dta13("stateideology_v2018.dta")
df.citi<-df.citi.raw[,c("statename","year","citi6016")]
df.citi<-df.citi[complete.cases(df.citi),]
df.citi<-df.citi[df.citi$statename!="",]
colnames(df.citi)<-c("state","year","citiUPD")

############################
#Clean SPI and Population
############################
setwd("../SPI_Pop/spi0915")
df.spi.pop.wide<-read.csv("SA1_1929_2014.csv",stringsAsFactors=F)
df.spi.pop<-melt(df.spi.pop.wide,
				id.vars=colnames(df.spi.pop.wide)[!grepl("X\\d+",colnames(df.spi.pop.wide))]
			)
df.spi.pop$year<-gsub("X","",df.spi.pop$variable)

#SPI
df.spi<-df.spi.pop[grepl("Per capita personal income",df.spi.pop$Description,fixed=T),
						c("GeoName","year","value")
					]
colnames(df.spi)<-c("state","year","pi1000sUPD")

#Population
df.pop<-df.spi.pop[grepl("Population",df.spi.pop$Description,fixed=T),
						c("GeoName","year","value")
					]
colnames(df.pop)<-c("state","year","popUPD")

##############
#Clean GSP
##############
setwd("../../GSP/gsp_naics_all_c")
df.gsp.wide<-read.csv("gsp_naics_all_C.csv",stringsAsFactors=F)
df.gsp.long<-melt(df.gsp.wide,
			id.vars=colnames(df.gsp.wide)[!grepl("X\\d+",colnames(df.gsp.wide))],
			value.name="gspUPD"
		)
df.gsp.long$year<-gsub("X","",df.gsp.long$variable)
df.gsp<-df.gsp.long[grepl("All industry",df.gsp.long$Description,fixed=T)
						,c("GeoName","year","gspUPD")
					]
colnames(df.gsp)<-c("state","year","gspUPDmil")

#Clean nonwhite
setwd("../../nonwhite")
df.nw.list<-list()
for(i in list.files(pattern=".csv")){
	df.nw.list[[i]]<-read.csv(i,stringsAsFactors=F,skip=2)
	df.nw.list[[i]][,"year"]<-gsub(".*?(\\d+).csv","\\1",i)
}
df.nw.raw<-do.call(rbind,df.nw.list)
rownames(df.nw.raw)<-1:nrow(df.nw.raw)
df.nw.raw$nonwhite<-1-df.nw.raw$White
df.nw<-df.nw.raw[,c("Location","year","nonwhite")]
df.nw<-df.nw[complete.cases(df.nw),]
colnames(df.nw)<-c("state","year","nonwhiteUPD")

##############
#Merge together the updates
##############
df.updates<-df.citi
df.updates<-merge(df.updates,df.spi,by=c("state","year"),all.x=T,all.y=T)
df.updates<-merge(df.updates,df.pop,by=c("state","year"),all.x=T,all.y=T)
df.updates<-merge(df.updates,df.nw,by=c("state","year"),all.x=T,all.y=T)
df.updates<-merge(df.updates,df.gsp,by=c("state","year"),all.x=T,all.y=T)

setwd("../")
write.dta(df.updates,"CSPupdates.dta")