################################################################################################ ########################## EstaTiDados #################################################### ########################## GUSTAVO BRUSSE #################################################### ################################################################################################ # Baixando pacotes install.packages("devtools") install.packages("stringi") devtools::install_github("lucasmation/microdadosBrasil") # Libraries library('microdadosBrasil') library(dplyr) library(data.table) library(ggplot2) require(gridExtra) # Diretorio origem setwd("D:\\Gustavo\\Zjg1ZGMzMmRlOThiNGRjZD\\Volume{970c5b77-5f9c-4b5d-af34-3a3b0c6b725e}\\Users\\Gustavo\\Desktop\\Doutorado\\PROJETO EDSD") # Download PNAD 2011 download_sourceData("PNAD", 2011, unzip = T) # Lendo banco de Domicilios d <- read_PNAD("domicilios", 2011) # Lendo banco de Pessoas p <- read_PNAD("pessoas", 2011) # Selecionando apenas variaveis: estado (V0102), variavel controle (V0103), sexo (V0302), idade (V8005) # relacao com pessoa de referencia (V0401) data.1<-p[,c(1, 2, 4, 8, 9)] # Selecionando alguns estados SP (35), RJ (33), MG (31) e ES (32) data.1$V0102<-as.numeric(data.1$V0102) data.2<-subset(data.1, V0102>=34999999 & V0102<=35999999 | V0102>=32999999 & V0102<=33999999 | V0102>=30999999 & V0102<=31999999 | V0102>=32000000 & V0102<=32999999) # Deletando missing values data.3<-subset(data.2, complete.cases(data.2)==TRUE) # Criando variavel controle unica data.3$id <- as.numeric(paste(data.3$V0102, data.3$V0103, sep = "")) ####### algoritmo para criar tipos de domicilios ##########################################################3 # Identificando pessoas de referencia data.3$V0401<-as.numeric(data.3$V0401) data.3$head<-as.numeric(data.3$V0401==1) # Identificando conjuges data.3$spouse<-as.numeric(data.3$V0401==2) # Identificando filhos e enteados data.3$son<-as.numeric(data.3$V0401==3) # Identificando outros parentes data.3$other<-as.numeric(data.3$V0401==4) # Identificando nao parentes data.3$non.relatives<-as.numeric(data.3$V0401>4) # Agregando individuos em domicilios usando a variavel controle data.4<-data.3[,c( 11, 6, 7, 8, 9, 10)] data.4$id<-as.numeric(data.4$id) data.5<-aggregate(data.4, by=list(data.4$id), FUN=sum) # Criando a variavel "tipo de domicilio" (HH) data.5$HH<-NA # Tipo 1: Unipessoal data.5$HH[data.5$spouse==0 & data.5$son==0 & data.5$other==0 & data.5$non.relatives==0]<-1 # Tipo 2: Casal sem filhos + outros data.5$HH[data.5$spouse==1 & data.5$son==0 & data.5$non.relatives==0]<-2 # Tipo 3: Monoparental + outros data.5$HH[data.5$spouse==0 & data.5$son>0 & data.5$non.relatives==0]<-3 # Tipo 4: Casal com filhos + outros data.5$HH[data.5$spouse==1 & data.5$son>0 & data.5$non.relatives==0]<-4 # Tipo 5: Estendidos ou compostos data.5$HH[data.5$non.relatives>0 | data.5$spouse==0 & data.5$son==0 & data.5$other>0]<-5 # renomear categorias levels(data.5$HH) <- c("unipessoal", "casal_sem_filhos", "monoparental", "casal_com_filhos", "estendido_composto" ) # checar missing values sum(is.na(data.5$HH)) # Tabela de frequencia t<-table(data.5$HH) addmargins(t) ## Fazendo merge da variavel 'tipo de domicilio' na base dos individuos data.6<-data.5[,c( 1, 8)] names(data.6)<-c('id', 'HH') data.7<-merge(data.3, data.6, by = "id") data.8<-data.7[,c(4, 5, 7, 12)] names(data.8)<-c('sex','age','head','HH') ########### Calculando Taxas de Chefia #################################### # Criando variavel grupo de idade data.8$age<-as.numeric(data.8$age) data.8$ageclass<-NA data.8$ageclass[data.8$age>=0 & data.8$age<=4]<-1 data.8$ageclass[data.8$age>=5 & data.8$age<=9]<-2 data.8$ageclass[data.8$age>=10 & data.8$age<=14]<-3 data.8$ageclass[data.8$age>=15 & data.8$age<=19]<-4 data.8$ageclass[data.8$age>=20 & data.8$age<=24]<-5 data.8$ageclass[data.8$age>=25 & data.8$age<=29]<-6 data.8$ageclass[data.8$age>=30 & data.8$age<=34]<-7 data.8$ageclass[data.8$age>=35 & data.8$age<=39]<-8 data.8$ageclass[data.8$age>=40 & data.8$age<=44]<-9 data.8$ageclass[data.8$age>=45 & data.8$age<=49]<-10 data.8$ageclass[data.8$age>=50 & data.8$age<=54]<-11 data.8$ageclass[data.8$age>=55 & data.8$age<=59]<-12 data.8$ageclass[data.8$age>=60 & data.8$age<=64]<-13 data.8$ageclass[data.8$age>=65 & data.8$age<=69]<-14 data.8$ageclass[data.8$age>=70 & data.8$age<=74]<-15 data.8$ageclass[data.8$age>=75 & data.8$age<=79]<-16 data.8$ageclass[data.8$age>=80]<-17 # Variavel sexo como numerico data.8$sex<-as.numeric(data.8$sex) str(data.8) # Tabulando os dados por sexo e grupo de idade tab<-table(data.8$HH,data.8$sex,data.8$ageclass) tab1<-table(data.8$HH,data.8$sex,data.8$ageclass,data.8$head) # funcao que calcula a Taxa de Chefia pop<-vector(l=170) head<-vector(l=170) pos=1 for(i in 1:5) { for(j in 1:2) { for(k in 1:17) { pop[pos]<-sum(tab[,j,k]) head[pos]<-tab1[i,j,k,2] pos<-pos+1 } } } df<-data.frame(HH=c(rep(1,34),rep(2,34),rep(3,34),rep(4,34),rep(5,34)), sex=rep(c(rep(1,17),rep(2,17)),5), age=rep(1:17,10), head=head, pop=pop) ## Checando os resultados tab[5,2,17] # pop tab1[5,2,17,2] # head # Criando variavel Taxa de Chefia df$HSrate2001<-round(df$head/df$pop,4) # Saving database in .CSV write.csv(df, file = "2011.csv") ###### Plotting ######## # Male - Casal sem filhos maleCNC<-subset(df, df$sex==1 & df$HH==2) plot_maleCNC<-ggplot(maleCNC, aes(x=age, y=HSrate2001)) + geom_line(size=1.7,color="grey69")+ geom_smooth(span = 0.45)+ labs(title="Homens - Casal sem filhos",x="Grupo de idade", y = "População") plot_maleCNC # Female - Couple without (no) children femaleCNC<-subset(df, df$sex==2 & df$HH==2) plot_femaleCNC<-ggplot(femaleCNC, aes(x=age, y=HSrate2001)) + geom_line(size=1.7,color="grey69")+ geom_smooth(span = 0.5)+ labs(title="Female - Couple without (no) children",x="Age groups", y = "Population") plot_femaleCNC # Male - Lone Parente maleLP<-subset(df, df$sex==1 & df$HH==3) plot_maleLP<-ggplot(maleLP, aes(x=age, y=HSrate2001)) + geom_line(size=1.7,color="grey69")+ geom_smooth(span = 0.6)+ labs(title="Male - Lone Parente",x="Age groups", y = "Population") plot_maleLP # Female - Lone Parente femaleLP<-subset(df, df$sex==2 & df$HH==3) plot_femaleLP<-ggplot(femaleLP, aes(x=age, y=HSrate2001)) + geom_line(size=1.7,color="grey69")+ geom_smooth(span = 0.6)+ labs(title="Female - Lone Parente",x="Age groups", y = "Population") plot_femaleLP # Male - Couple with children maleCWC<-subset(df, df$sex==1 & df$HH==4) plot_maleCWC<-ggplot(maleCWC, aes(x=age, y=HSrate2001)) + geom_line(size=1.7,color="grey69")+ geom_smooth(span = 0.5)+ labs(title="Male - Couple with children",x="Age groups", y = "Population") plot_maleCWC # Female - Couple with children femaleCWC<-subset(df, df$sex==2 & df$HH==4) plot_femaleCWC<-ggplot(femaleCWC, aes(x=age, y=HSrate2001)) + geom_line(size=1.7,color="grey69")+ geom_smooth(span = 0.5)+ labs(title="Female - Couple with children",x="Age groups", y = "Population") plot_femaleCWC # Male - All others maleAO<-subset(df, df$sex==1 & df$HH==5) plot_maleAO<-ggplot(maleAO, aes(x=age, y=HSrate2001)) + geom_line(size=1.7,color="grey69")+ geom_smooth(span = 0.5)+ labs(title="Male - All others",x="Age groups", y = "Population") plot_maleAO # Male - All others femaleAO<-subset(df, df$sex==2 & df$HH==5) plot_femaleAO<-ggplot(femaleAO, aes(x=age, y=HSrate2001)) + geom_line(size=1.7,color="grey69")+ geom_smooth(span = 0.5)+ labs(title="Male - All others",x="Age groups", y = "Population") plot_femaleAO grid.arrange(plot_maleCNC, plot_femaleCNC, plot_maleLP, plot_femaleLP, plot_maleCWC, plot_femaleCWC, plot_maleAO, plot_femaleAO, ncol=2)