#dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style('whitegrid')
sns.set_palette('coolwarm')
#load file into dataframe
df = pd.read_csv('resources/kickstart201801_cleaned.csv')
df.head()
df.info()
from datetime import datetime
#converting launch string to datetime type
df['launched_timestamp'] = pd.to_datetime(df['launched'], infer_datetime_format=True)
df.head()
#converting deadline string to datatime type
df['deadline_timestamp'] = pd.to_datetime(df['deadline'], infer_datetime_format=True)
df.head()
df.info()
#calculating duration of each project
df['duration'] = pd.Series(delta.days for delta in (df['deadline_timestamp'] - df['launched_timestamp']))
df.head()
#dropping unneccesary columns
df.drop(['currency','goal','deadline','launched','pledged','usd pledged','launched_timestamp','deadline_timestamp'],axis=1,inplace=True)
df.head()
#stats on df
df.describe()
#plot of top 5 projects by usd pledge real
plt.figure(figsize=(10,5))
temp = df.sort_values(by='usd_pledged_real', ascending=False).head()
g = sns.barplot('usd_pledged_real', 'name', data=temp, palette='coolwarm');
plt.ylabel('');
#plot of top 5 projects according to number of backers
plt.figure(figsize=(10,5))
temp = df.sort_values(by='backers', ascending=False).head()
g = sns.barplot('backers', 'name', data=temp, palette='coolwarm');
plt.ylabel('');
#average duration according to state of project
temp = df.groupby('state').mean()['duration']
temp = temp.sort_values(ascending=False)
print(temp)
plt.figure(figsize=(10,10))
sns.barplot(x=temp, y=temp.index);
plt.ylabel('');
#average backers according to state project
temp = df.groupby('state').mean()['backers']
temp = temp.sort_values(ascending=False)
print(temp)
#average usd goal real according to project state
temp = df.groupby('state').mean()['usd_goal_real']
temp = temp.sort_values(ascending=False)
print(temp)
plt.figure(figsize=(20,10))
sns.barplot(x=temp, y=temp.index);
plt.ylabel('');