library(tidyverse)

Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang
Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2
-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v ggplot2 3.1.1       v purrr   0.3.2  
v tibble  2.1.1       v dplyr   0.8.0.1
v tidyr   0.8.3       v stringr 1.4.0  
v readr   1.3.1       v forcats 0.4.0  
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()


certificates <- read.csv("~/11.2020_Project Cygnus/20.11.2020_EPC_Leeds/domestic-E08000035-Leeds/certificates.csv")
PC <- read.csv("~/11.2020_Project Cygnus/20.11.2020_EPC_Leeds/domestic-E08000035-Leeds/Postcode File/PCD_OA_LSOA_MSOA_LAD_NOV20_UK_LU.csv")


sum(is.na(certificates$MAINHEAT_DESCRIPTION))


certificates_ord <- certificates[order(rev(certificates[, 'BUILDING_REFERENCE_NUMBER']), certificates[, 'INSPECTION_DATE']),]
certificates_dedup <- certificates_ord[!duplicated(certificates_ord$BUILDING_REFERENCE_NUMBER),]


PC_LSOA <- PC %>% 
  select(pcds, lsoa11cd, ladcd)

cert_final <- inner_join(certificates_dedup, PC_LSOA, by = c("POSTCODE" = "pcds")) %>%
  filter(ladcd == "E08000035")

Warning message:
"Column `POSTCODE`/`pcds` joining factors with different levels, coercing to character vector"


unique(cert_final$MAINHEAT_DESCRIPTION)


cert_final$Heat_type <- sub(".*, ", "", cert_final$MAINHEAT_DESCRIPTION)


unique(cert_final$Heat_type)
table(cert_final$Heat_type)

                                |electric|trydan 
                                             179 
                   |mains gas|nwy prif gyflenwad 
                                              56 
                                      anthracite 
                                              11 
                            Boiler and radiators 
                                               4 
                                     bottled gas 
                                               3 
                                     bottled LPG 
                                               5 
                                            coal 
                                              96 
                                       community 
                                              19 
                                Community scheme 
                                            4177 
                       Community scheme with CHP 
                                              34 
                    dual fuel (mineral and wood) 
                                              94 
                                        electric 
                                           21279 
                        Electric ceiling heating 
                                              43 
                        Electric storage heaters 
                                           12531 
                     Electric underfloor heating 
                                             979 
                                    Electricaire 
                                             348 
                                     electricity 
                                               3 
                                             LPG 
                                             399 
                                       mains gas 
                                          179520 
     No system present: electric heaters assumed 
                                            1165 
                              nwy prif gyflenwad 
                                               3 
                                             oil 
                                             373 
                       Portable electric heaters 
                                               8 
Portable electric heaters assumed for most rooms 
                                             746 
Portable electric heating assumed for most rooms 
                                             108 
                                       radiators 
                                             743 
                              SAP05:Main-Heating 
                                             384 
                                  smokeless fuel 
                                              55 
                                      wood chips 
                                               2 
                                       wood logs 
                                              33 
                                    wood pellets 
                                              16


Heating <- cert_final %>% select(HEATING_COST_CURRENT, HEATING_COST_POTENTIAL, MAINHEAT_DESCRIPTION, Heat_type, lsoa11cd)


Heating2 <- Heating %>%
  mutate(
    Gen_Heat = case_when(
      Heat_type == "bottled gas" ~ "Bottled Gas",
      Heat_type == "bottled LPG" ~ "Bottled LPG",
      Heat_type == "LPG" ~ "LPG",
      Heat_type == "mains gas" ~ "Mains Gas",
      Heat_type == "|mains gas|nwy prif gyflenwad" ~ "Mains Gas",
      Heat_type == "nwy prif gyflenwad" ~ "Mains Gas",
      Heat_type == "anthracite" ~ "Coal and Anthracite",
      Heat_type == "coal" ~ "Coal and Anthracite",
      Heat_type == "anthracite" ~ "Coal and Anthracite",
      Heat_type == "dual fuel (mineral and wood)" ~ "Dual Fuel (Mineral and Wood)",
      Heat_type == "wood chips" ~ "Wood",
      Heat_type == "wood logs" ~ "Wood",
      Heat_type == "wood pellets" ~ "Wood",
      Heat_type == "community" ~ "Community Scheme",
      Heat_type == "Community scheme" ~ "Community Scheme",
      Heat_type == "Community scheme with CHP" ~ "Community Scheme with CHP",
      Heat_type == "electric" ~ "Electric",
      Heat_type == "|electric|trydan" ~ "Electric",
      Heat_type == "Electricaire" ~ "Electric",
      Heat_type == "electricity" ~ "Electric",
      Heat_type == "Electric ceiling heating" ~ "Electric Ceiling Heating",
      Heat_type == "Electric storage heaters" ~ "Electric Storage Heating",
      MAINHEAT_DESCRIPTION == "Electric storage heaters, radiators" ~ "Electric Storage Heating & Radiators",
      Heat_type == "Electric underfloor heating" ~ "Electric Underfloor Heating",
      Heat_type == "Portable electric heaters" ~ "Portable Electric Heating",
      Heat_type == "Portable electric heaters assumed for most rooms" ~ "Portable Electric Heating",
      Heat_type == "Portable electric heating assumed for most rooms" ~ "Portable Electric Heating",
      Heat_type == "No system present: electric heaters assumed" ~ "No system present: Electric heaters assumed",
      Heat_type == "oil" ~ "Oil",
      Heat_type == "smokeless fuel" ~ "Smokeless Fuel",
      Heat_type == "Boiler and radiators" ~ "Unknown",
      Heat_type == "SAP05:Main-Heating" ~ "Unknown"
    )
  )


Heat_Freq <- data.frame(table(Heating2$Gen_Heat))
write.csv(Heat_Freq, 'HeatingTypes_Frequencies.csv')

Heat_LSOA <- xtabs(~ lsoa11cd + Gen_Heat, data=Heating2)
write.csv(Heat_LSOA, 'HeatingType_LSOA.csv')


Heat_Freq %>%
  mutate(Var1 = fct_reorder(Var1, Freq)) %>%
  ggplot( aes(x=Var1, y=Freq)) +
  geom_bar(stat="identity", fill="#D73058", alpha=.6, width=.4) +
  xlab("Heating Systems or Fuels") +
  ylab("Number of Dwellings") +
  theme_bw() +
  coord_flip()


HeatingONS <- Heating %>%
  mutate(
    Gen_Heat = case_when(
      Heat_type == "bottled gas" ~ "Other",
      Heat_type == "bottled LPG" ~ "Other",
      Heat_type == "LPG" ~ "Other",
      Heat_type == "mains gas" ~ "Mains Gas",
      Heat_type == "|mains gas|nwy prif gyflenwad" ~ "Mains Gas",
      Heat_type == "nwy prif gyflenwad" ~ "Mains Gas",
      Heat_type == "anthracite" ~ "Other",
      Heat_type == "coal" ~ "Other",
      Heat_type == "anthracite" ~ "Other",
      Heat_type == "dual fuel (mineral and wood)" ~ "Other",
      Heat_type == "wood chips" ~ "Other",
      Heat_type == "wood logs" ~ "Other",
      Heat_type == "wood pellets" ~ "Other",
      Heat_type == "community" ~ "Community Scheme",
      Heat_type == "Community scheme" ~ "Community Scheme",
      Heat_type == "Community scheme with CHP" ~ "Community Scheme",
      Heat_type == "electric" ~ "Electric",
      Heat_type == "|electric|trydan" ~ "Electric",
      Heat_type == "Electricaire" ~ "Electric",
      Heat_type == "electricity" ~ "Electric",
      Heat_type == "Electric ceiling heating" ~ "Electric",
      Heat_type == "Electric storage heaters" ~ "Electric",
      MAINHEAT_DESCRIPTION == "Electric storage heaters, radiators" ~ "Electric",
      Heat_type == "Electric underfloor heating" ~ "Electric",
      Heat_type == "Portable electric heaters" ~ "Electric",
      Heat_type == "Portable electric heaters assumed for most rooms" ~ "Electric",
      Heat_type == "Portable electric heating assumed for most rooms" ~ "Electric",
      Heat_type == "No system present: electric heaters assumed" ~ "Unknown",
      Heat_type == "oil" ~ "Oil",
      Heat_type == "smokeless fuel" ~ "Other",
      Heat_type == "Boiler and radiators" ~ "Unknown",
      Heat_type == "SAP05:Main-Heating" ~ "Unknown"
    )
  )


Heat_FreqONS <- data.frame(table(HeatingONS$Gen_Heat))
write.csv(Heat_Freq, 'HeatingTypesONS_Frequencies.csv')

Heat_LSOAONS <- xtabs(~ lsoa11cd + Gen_Heat, data=HeatingONS)
write.csv(Heat_LSOA, 'HeatingType_LSOA.csv')


Heat_FreqONS %>%
  mutate(Var1 = fct_reorder(Var1, Freq)) %>%
  ggplot( aes(x=Var1, y=Freq)) +
  geom_bar(stat="identity", fill="#D73058", alpha=.6, width=.4) +
  xlab("Heating Systems or Fuels") +
  ylab("Number of Dwellings") +
  theme_bw() +
  coord_flip()


sum(is.na(cert_final$TRANSACTION_TYPE))


unique(cert_final$TRANSACTION_TYPE)
table (cert_final$TRANSACTION_TYPE)

                                                         assessment for green deal 
                                                                              6876 
                                                                    ECO assessment 
                                                                              8404 
                                                                   FiT application 
                                                                              2634 
                                                              following green deal 
                                                                               297 
                                                                     marketed sale 
                                                                             80455 
                                                                      new dwelling 
                                                                             20807 
                                                                 non marketed sale 
                                                                              4866 
                                                                 none of the above 
                                                                             15365 
                                                                      not recorded 
                                                                                 7 
                                                                            rental 
                                                                              1667 
                                                                  rental (private) 
                                                                             42630 
rental (private) - this is for backwards compatibility only and should not be used 
                                                                                13 
                                                                   rental (social) 
                                                                             39153 
                                                                   RHI application 
                                                                               115 
                                                            Stock Condition Survey 
                                                                                10 
                                                                           unknown 
                                                                               117


Transaction <- cert_final %>% 
  select(lsoa11cd, TRANSACTION_TYPE)
Transaction$TRANSACTION_TYPE <- gsub('not recorded', 'unknown', Transaction$TRANSACTION_TYPE)
Transaction <- Transaction[!(Transaction$TRANSACTION_TYPE=="rental (private) - this is for backwards compatibility only and should not be used"),]

unique(Transaction$TRANSACTION_TYPE)


TransDf <- data.frame(table(Transaction$TRANSACTION_TYPE))
write.csv(TransDf, 'TransactionType_Frequencies.csv')

TransDf1 <- xtabs(~ lsoa11cd + TRANSACTION_TYPE, data=Transaction)
write.csv(TransDf1, 'TransactionType_LSOA.csv')


TransDf %>%
  mutate(Var1 = fct_reorder(Var1, Freq)) %>%
  ggplot( aes(x=Var1, y=Freq)) +
  geom_bar(stat="identity", fill="#178CFF", alpha=.6, width=.4) +
    xlab("Reason for EPC Evaluation") +
    ylab("Number of Dwellings") +
    theme_bw() +
    coord_flip()


cert_ALL <- inner_join(certificates, PC_LSOA, by = c("POSTCODE" = "pcds")) %>%
  filter(ladcd == "E08000035")

Transaction_ALL <- cert_ALL %>% 
  select(lsoa11cd, TRANSACTION_TYPE)
Transaction_ALL$TRANSACTION_TYPE <- gsub('not recorded', 'unknown', Transaction_ALL$TRANSACTION_TYPE)
Transaction_ALL <- Transaction_ALL[!(Transaction_ALL$TRANSACTION_TYPE=="rental (private) - this is for backwards compatibility only and should not be used"),]

Warning message:
"Column `POSTCODE`/`pcds` joining factors with different levels, coercing to character vector"


TransDf_ALL <- data.frame(table(Transaction_ALL$TRANSACTION_TYPE))
write.csv(TransDf_ALL, 'ALLTransactionType_Frequencies.csv')

TransDf1_ALL <- xtabs(~ lsoa11cd + TRANSACTION_TYPE, data=Transaction_ALL)
write.csv(TransDf1_ALL, 'ALLTransactionType_LSOA.csv')


TransDf_ALL %>%
  mutate(Var1 = fct_reorder(Var1, Freq)) %>%
  ggplot( aes(x=Var1, y=Freq)) +
  geom_bar(stat="identity", fill="#178CFF", alpha=.6, width=.4) +
    xlab("Reason for EPC Evaluation") +
    ylab("Number of Dwellings") +
    theme_bw() +
    coord_flip()


GreenDeal <- certificates_ord %>%
  filter(str_detect(TRANSACTION_TYPE, "green deal"))


GreenDeal <- GreenDeal[GreenDeal$BUILDING_REFERENCE_NUMBER %in% GreenDeal$BUILDING_REFERENCE_NUMBER[duplicated(GreenDeal$BUILDING_REFERENCE_NUMBER)],] %>%
    select(BUILDING_REFERENCE_NUMBER, TRANSACTION_TYPE, INSPECTION_DATE, CURRENT_ENERGY_EFFICIENCY, POTENTIAL_ENERGY_EFFICIENCY)
GreenDeal$INSPECTION_DATE <- as.numeric(GreenDeal$INSPECTION_DATE)


# Assessment for Green Deal:

GD_assmt <- GreenDeal %>%
  filter(TRANSACTION_TYPE == "assessment for green deal") %>%
  arrange(., BUILDING_REFERENCE_NUMBER, desc(INSPECTION_DATE))
GD_assmt <- GD_assmt[!duplicated(GD_assmt$BUILDING_REFERENCE_NUMBER),]

# Create a Column for the difference between Potential and Current Energy Efficiency
GD_assmt$Diff_EE <- (GD_assmt$POTENTIAL_ENERGY_EFFICIENCY - GD_assmt$CURRENT_ENERGY_EFFICIENCY)

# Add an indicator to the column names to help differentiate the dataframes
colnames(GD_assmt) <- paste("A", colnames(GD_assmt), sep = "_")


# Following Green Deal:

GD_follw <- GreenDeal %>%
  filter(TRANSACTION_TYPE == "following green deal") %>%
  arrange(., BUILDING_REFERENCE_NUMBER, INSPECTION_DATE)
GD_follw <- GD_follw[!duplicated(GD_follw$BUILDING_REFERENCE_NUMBER),]

# Create a Column for the difference between Potential and Current Energy Efficiency
GD_follw$Diff_EE <- (GD_follw$POTENTIAL_ENERGY_EFFICIENCY - GD_follw$CURRENT_ENERGY_EFFICIENCY)

# Add an indicator to the column names to help differentiate the dataframes
colnames(GD_follw) <- paste("F", colnames(GD_follw), sep = "_")


GD_comp <- inner_join(GD_assmt, GD_follw, by = c("A_BUILDING_REFERENCE_NUMBER" = "F_BUILDING_REFERENCE_NUMBER"))


GD_comp2 <- GD_comp %>%
    filter(., A_INSPECTION_DATE < F_INSPECTION_DATE)


GD_comp2$Diff_CEE <- (GD_comp2$F_CURRENT_ENERGY_EFFICIENCY - GD_comp2$A_CURRENT_ENERGY_EFFICIENCY)


GD_comp2$Diff_AP_FC <- (GD_comp2$F_CURRENT_ENERGY_EFFICIENCY - GD_comp2$A_POTENTIAL_ENERGY_EFFICIENCY)


GD_comp2$Diff_ADE_FDE <- (GD_comp2$F_Diff_EE - GD_comp2$A_Diff_EE)


mean (GD_comp2 [["Diff_CEE"]])


mean (GD_comp2 [["Diff_AP_FC"]])


mean (-(GD_comp2 [["A_Diff_EE"]]))
# negative to make it comparable


mean (GD_comp2 [["Diff_ADE_FDE"]])


write.csv(GD_comp2, 'Green_Deal_Comparisons.csv')


ggplot(GD_comp2, aes(x=Diff_CEE)) +
  geom_histogram(color= "black", fill="#67E767", alpha=.6, binwidth=10) +
    xlab("Difference in Energy Efficiency") +
    ylab("Number of Dwellings") +
    theme_bw()


ggplot(GD_comp2, aes(x=Diff_AP_FC)) +
  geom_histogram(color= "black", fill="#67E767", alpha=.6, binwidth=10) +
  xlab("Difference in Energy Efficiency") +
  ylab("Number of Dwellings") +
  theme_bw()


ggplot(GD_comp2, aes(x=-(A_Diff_EE))) +
  geom_histogram(color= "black", fill="#67E767", alpha=.6, binwidth=10) +
  xlab("Difference in Energy Efficiency") +
  ylab("Number of Dwellings") +
  theme_bw()


ggplot(GD_comp2, aes(x=Diff_ADE_FDE)) +
  geom_histogram(color= "black", fill="#67E767", alpha=.6, binwidth=10) +
  xlab("Difference in Energy Efficiency") +
  ylab("Number of Dwellings") +
  theme_bw()

Heating, Transaction Types & Green Deal¶

Analysis Prep¶

Packages¶

Load Data¶

Analysis: Heating Types¶

Data Cleaning¶

Missing Values¶

Duplicate Observations¶

Invalid Postcodes¶

Unique Values¶

Grouping Unique Values¶

Tables¶

Visualisation¶

Extra: Grouping according to ONS report¶

Grouping¶

Tables¶

Visualisation¶

Analysis: Transaction Types¶

Data Cleaning¶

Missing Values¶

Unique Values¶

Grouping Unique Values¶

Tables¶

Visualisation¶

Extra: Analysis for all observations (so including duplicates)¶

Cleaning¶

Tables¶

Visualisation¶

Green Deal¶

Data Cleaning¶

Calculating Difference Variables¶

Changes in 'Current Energy Efficiency' before and after Green Deal¶

Difference between 'Potential Energy Efficiency' before Green Deal and 'Current Energy Efficiency' after¶

Difference between Past and Present: Current and Potential Divide¶

Means of Difference Variables¶

'Current Energy Efficiency' before and after Green Deal:¶

'Potential Energy Efficiency' before Green Deal and 'Current Energy Efficiency' after:¶

'Potential Energy Efficiency' and 'Current Energy Efficiency' before Green Deal:¶

Past and Present: Current-Potential Divide:¶

Table¶

Visualisation of Differences¶

'Current Energy Efficiency' before and after Green Deal:¶

'Potential Energy Efficiency' before Green Deal and 'Current Energy Efficiency' after:¶

'Potential Energy Efficiency' and 'Current Energy Efficiency' before Green Deal:¶

Past and Present: Current and Potential Divide:¶