#Read.csv needs to be commented after initial run to avoid access error through source
#x <- read.csv("https://raw.githubusercontent.com/ali-ce/datasets/master/Tropical-Storms/Storms.csv")
library(RCurl)
library(foreign)
library(tidyr)
library(dplyr)
library(ggplot2)
options(scipen = 999)
url <- "https://raw.githubusercontent.com/ali-ce/datasets/master/Tropical-Storms/Storms.csv"
storm.data<-getURL(url,ssl.verifypeer = FALSE)
storm.data<-read.csv(textConnection(storm.data))

#Removing data where there are NA values to enable more accurate analysis
e <- storm.data[complete.cases(storm.data), ]

#Convert data to a dataframe and check the datatype using class()
storm_data<-as.data.frame(e)
class(storm_data)
## [1] "data.frame"
#Cleaning data: Removing unrequired columns
storm_data$Page.url<-NULL
storm_data$ID<-NULL

#Set width of the output in the markdown to 200 so that all columns appear on same line
#options(width=200)

#Check to see what the matrix contains
#head(x)

storm_data<-as.data.frame(x)
names(storm_data)
##  [1] "Name"                            "Type"                           
##  [3] "Year"                            "Category..Saffir.Simpson.Scale."
##  [5] "Wind..knots."                    "Area"                           
##  [7] "Gender.of.Name"                  "Month.s."                       
##  [9] "Start.Date"                      "End.Date"                       
## [11] "Duration..days."
#Check to see if conversion was successful by using class() function
class(storm_data)
## [1] "data.frame"
#Remove the row names from the data frame
row.names(storm_data)<-NULL
#Replacing the column names to make them clearer for users
colnames(storm_data)[which(names(storm_data) == "Wind..knots.")] <- "WindSpeed"
colnames(storm_data)[which(names(storm_data) == "Month.s.")] <- "Months"
colnames(storm_data)[which(names(storm_data) == "Duration..days.")] <- "DurationDays"
#Check new column names
names(storm_data)
##  [1] "Name"                            "Type"                           
##  [3] "Year"                            "Category..Saffir.Simpson.Scale."
##  [5] "WindSpeed"                       "Area"                           
##  [7] "Gender.of.Name"                  "Months"                         
##  [9] "Start.Date"                      "End.Date"                       
## [11] "DurationDays"
#get month numbers to get month names
library(lubridate)
storm_data$month <- month(storm_data$Start.Date)
#Convert factor to Date(POSIXlt)
storm_data$StartDateis <- as.POSIXct(strptime(as.character(storm_data$Start.Date), "%d/%m/%Y"))
storm_data$StartDate2is <- as.Date(storm_data$StartDateis)

library(tidyr)
library(dplyr)
library(ggplot2)
#Create a data frame that aggregates the departure delay based on the date
flights_NYC_small <- storm_data %>%
  group_by(StartDate2is) %>%
  summarise(sum_delay = sum(DurationDays,na.rm=T))

#Create a time series plot showing date vs the departure delays
ggplot(flights_NYC_small,aes(StartDate2is,sum_delay))+
  geom_point(col=rgb(255,0,0,75,maxColorValue=255),pch=19,cex=4)+
  xlab("Storm Dates")+
  ylab("Storm Duration(In Days)")+
  ggtitle("Mapping trend for duration of storms across specific set of start dates")+
  theme_bw()

#Creating a new column called monthnames to show which month we are considering 
#in our analysis
storm_data$monthnames <- rep(NA,nrow(storm_data))
storm_data[storm_data$month==1,][,"monthnames"] <- "Jan"
storm_data[storm_data$month==2,][,"monthnames"] <- "Feb"
storm_data[storm_data$month==3,][,"monthnames"] <- "Mar"
storm_data[storm_data$month==4,][,"monthnames"] <- "Apr"
storm_data[storm_data$month==5,][,"monthnames"] <- "May"
storm_data[storm_data$month==6,][,"monthnames"] <- "Jun"
storm_data[storm_data$month==7,][,"monthnames"] <- "Jul"
storm_data[storm_data$month==8,][,"monthnames"] <- "Aug"
storm_data[storm_data$month==9,][,"monthnames"] <- "Sep"
storm_data[storm_data$month==10,][,"monthnames"] <- "Oct"
storm_data[storm_data$month==11,][,"monthnames"] <- "Nov"
storm_data[storm_data$month==12,][,"monthnames"] <- "Dec"

storm_data$monthnames <- factor(storm_data$monthnames,levels = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"))

storm_data$SaffirSimpsonScale<-cut(storm_data$Category..Saffir.Simpson.Scale., seq(1,6,1), right=FALSE, labels=c(1:5))
#Creating a plot to visualize the departure delays on a daily basis per month 
#which helps us in looking for a pattern
ggplot(storm_data,aes(WindSpeed,DurationDays))+
  geom_point(aes(color=SaffirSimpsonScale),pch=16,cex=3.5)+
  xlab("Wind Speed (In Knots)")+
  ylab("Storm Duration(In Days)")+
  facet_wrap(~monthnames)+
  ggtitle("Monthly Trend of Storms")+
  geom_smooth(size=1,se=FALSE, col="Black")+
  theme_bw()

#Added today

par(mar=c(10,3,3,1))
par(xpd=TRUE)
##storm_data<- storm.data[complete.cases(storm_data), ]
counts <- table(storm.data$Area, storm.data$Type)
barplot(counts,
  ylim=c(0,700),
  xlim=c(0,100),
  cex.axis=0.75,
  las=2,cex.names = 0.65,xlab = "",
  col=unique(as.factor(storm.data$Area)),
  beside=FALSE,width=10)
  legend("topright",pch = 20,
       legend = rownames(counts),
       title="Storm Regions",
       fill=unique(as.factor(storm.data$Area)),
       bty="n",
       cex=0.65
       )
#mtext("Types of Storms",side = 1,line = 7)
text(40,-630,"Types of Storms")

#Added today
par(mar=c(10,3,3,1))
par(xpd=TRUE)
storm.data$Gender.of.Name <- sub("^$", "Other", storm.data$Gender.of.Name)
##storm_data<- storm.data[complete.cases(storm_data), ]
counts <- table(storm.data$Gender.of.Name, storm.data$Area)
barplot(counts,
  ylim=c(0,250),
  xlim=c(0,100),
  cex.axis=0.75,
  las=2,cex.names = 0.65,xlab = "",
  col=unique(as.factor(storm.data$Gender.of.Name)),
  beside=TRUE,width=3.6)
  legend("topright",pch = 20,
       legend = rownames(counts),
       title="Storm Regions",
       fill=unique(as.factor(storm.data$Gender.of.Name)),
       bty="n",
       cex=0.65
       )
mtext("Storm Regions",side = 1,line = 7)