#Read.csv needs to be commented after initial run to avoid access error through source
#x <- read.csv("https://raw.githubusercontent.com/ali-ce/datasets/master/Tropical-Storms/Storms.csv")
library(RCurl)
library(foreign)
library(tidyr)
library(dplyr)
library(ggplot2)
options(scipen = 999)
url <- "https://raw.githubusercontent.com/ali-ce/datasets/master/Tropical-Storms/Storms.csv"
storm.data<-getURL(url,ssl.verifypeer = FALSE)
storm.data<-read.csv(textConnection(storm.data))
#Removing data where there are NA values to enable more accurate analysis
e <- storm.data[complete.cases(storm.data), ]
#Convert data to a dataframe and check the datatype using class()
storm_data<-as.data.frame(e)
class(storm_data)
## [1] "data.frame"
#Cleaning data: Removing unrequired columns
storm_data$Page.url<-NULL
storm_data$ID<-NULL
#Set width of the output in the markdown to 200 so that all columns appear on same line
#options(width=200)
#Check to see what the matrix contains
#head(x)
storm_data<-as.data.frame(x)
names(storm_data)
## [1] "Name" "Type"
## [3] "Year" "Category..Saffir.Simpson.Scale."
## [5] "Wind..knots." "Area"
## [7] "Gender.of.Name" "Month.s."
## [9] "Start.Date" "End.Date"
## [11] "Duration..days."
#Check to see if conversion was successful by using class() function
class(storm_data)
## [1] "data.frame"
#Remove the row names from the data frame
row.names(storm_data)<-NULL
#Replacing the column names to make them clearer for users
colnames(storm_data)[which(names(storm_data) == "Wind..knots.")] <- "WindSpeed"
colnames(storm_data)[which(names(storm_data) == "Month.s.")] <- "Months"
colnames(storm_data)[which(names(storm_data) == "Duration..days.")] <- "DurationDays"
#Check new column names
names(storm_data)
## [1] "Name" "Type"
## [3] "Year" "Category..Saffir.Simpson.Scale."
## [5] "WindSpeed" "Area"
## [7] "Gender.of.Name" "Months"
## [9] "Start.Date" "End.Date"
## [11] "DurationDays"
#get month numbers to get month names
library(lubridate)
storm_data$month <- month(storm_data$Start.Date)
#Convert factor to Date(POSIXlt)
storm_data$StartDateis <- as.POSIXct(strptime(as.character(storm_data$Start.Date), "%d/%m/%Y"))
storm_data$StartDate2is <- as.Date(storm_data$StartDateis)
library(tidyr)
library(dplyr)
library(ggplot2)
#Create a data frame that aggregates the departure delay based on the date
flights_NYC_small <- storm_data %>%
group_by(StartDate2is) %>%
summarise(sum_delay = sum(DurationDays,na.rm=T))
#Create a time series plot showing date vs the departure delays
ggplot(flights_NYC_small,aes(StartDate2is,sum_delay))+
geom_point(col=rgb(255,0,0,75,maxColorValue=255),pch=19,cex=4)+
xlab("Storm Dates")+
ylab("Storm Duration(In Days)")+
ggtitle("Mapping trend for duration of storms across specific set of start dates")+
theme_bw()

#Creating a new column called monthnames to show which month we are considering
#in our analysis
storm_data$monthnames <- rep(NA,nrow(storm_data))
storm_data[storm_data$month==1,][,"monthnames"] <- "Jan"
storm_data[storm_data$month==2,][,"monthnames"] <- "Feb"
storm_data[storm_data$month==3,][,"monthnames"] <- "Mar"
storm_data[storm_data$month==4,][,"monthnames"] <- "Apr"
storm_data[storm_data$month==5,][,"monthnames"] <- "May"
storm_data[storm_data$month==6,][,"monthnames"] <- "Jun"
storm_data[storm_data$month==7,][,"monthnames"] <- "Jul"
storm_data[storm_data$month==8,][,"monthnames"] <- "Aug"
storm_data[storm_data$month==9,][,"monthnames"] <- "Sep"
storm_data[storm_data$month==10,][,"monthnames"] <- "Oct"
storm_data[storm_data$month==11,][,"monthnames"] <- "Nov"
storm_data[storm_data$month==12,][,"monthnames"] <- "Dec"
storm_data$monthnames <- factor(storm_data$monthnames,levels = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"))
storm_data$SaffirSimpsonScale<-cut(storm_data$Category..Saffir.Simpson.Scale., seq(1,6,1), right=FALSE, labels=c(1:5))
#Creating a plot to visualize the departure delays on a daily basis per month
#which helps us in looking for a pattern
ggplot(storm_data,aes(WindSpeed,DurationDays))+
geom_point(aes(color=SaffirSimpsonScale),pch=16,cex=3.5)+
xlab("Wind Speed (In Knots)")+
ylab("Storm Duration(In Days)")+
facet_wrap(~monthnames)+
ggtitle("Monthly Trend of Storms")+
geom_smooth(size=1,se=FALSE, col="Black")+
theme_bw()

#Added today
par(mar=c(10,3,3,1))
par(xpd=TRUE)
##storm_data<- storm.data[complete.cases(storm_data), ]
counts <- table(storm.data$Area, storm.data$Type)
barplot(counts,
ylim=c(0,700),
xlim=c(0,100),
cex.axis=0.75,
las=2,cex.names = 0.65,xlab = "",
col=unique(as.factor(storm.data$Area)),
beside=FALSE,width=10)
legend("topright",pch = 20,
legend = rownames(counts),
title="Storm Regions",
fill=unique(as.factor(storm.data$Area)),
bty="n",
cex=0.65
)
#mtext("Types of Storms",side = 1,line = 7)
text(40,-630,"Types of Storms")

#Added today
par(mar=c(10,3,3,1))
par(xpd=TRUE)
storm.data$Gender.of.Name <- sub("^$", "Other", storm.data$Gender.of.Name)
##storm_data<- storm.data[complete.cases(storm_data), ]
counts <- table(storm.data$Gender.of.Name, storm.data$Area)
barplot(counts,
ylim=c(0,250),
xlim=c(0,100),
cex.axis=0.75,
las=2,cex.names = 0.65,xlab = "",
col=unique(as.factor(storm.data$Gender.of.Name)),
beside=TRUE,width=3.6)
legend("topright",pch = 20,
legend = rownames(counts),
title="Storm Regions",
fill=unique(as.factor(storm.data$Gender.of.Name)),
bty="n",
cex=0.65
)
mtext("Storm Regions",side = 1,line = 7)
