Please enable JavaScript.

Coggle requires JavaScript to display documents.

titanic (データサイエンスのフレームワーク (探索的分析の実行 (探索する要点 (問題, パターン, 分類, 相関関係, 比較),…

- - - - 問題
      - パターン
      - 分類
      - 相関関係
      - 比較
    - - 記述
      - 視覚化
- - - - for dataset in combine:
        dataset["Embarked"] = dataset["Embarked"].map({"S":0, "C":1, "Q": 2}).astype(int)
      - for dataset in combine:
        　　　dataset["Sex"] = dataset["Sex"].map({"female": 1, "male": 0}).astype(int)
    - - dataset['Sex_Code'] = label.fit_transform(dataset['Sex'])
        dataset['Embarked_Code'] = label.fit_transform(dataset['Embarked'])
- - - - train_df[['Pclass', 'Survived']].groupby(['Pclass'], as_index=False).mean().sort_values(by='Survived', ascending=False)
  - - - FacetGrid :<3:
    - - grid = sns.FacetGrid(train_df, col = 'Survived', row = 'Pclass', size=2.2, aspect = 1.6)
        grid.map(plt.hist, 'Age', alpha = .5, bins = 20)
        grid.add_legend()
    - - grid = sns.FacetGrid(train_df, row = 'Embarked', size = 2.2, aspect = 1.6)
        grid.map(sns.pointplot, 'Pclass', "Survived", "Sex", palette = "deep")
        grid.add_legend()
  - - - plt.figure(figsize=[16,12])
        
        plt.subplot(234)
        plt.hist(x = [data1[data1['Survived']==1]['Fare'], data1[data1['Survived']==0]['Fare']], stacked=True, color = ['g','r'],label = ['Survived','Dead'])
        plt.title('Fare Histogram by Survival')
        plt.xlabel('Fare ($)')
        plt.ylabel('# of Passengers')
        plt.legend()
        
        plt.subplot(231)
        plt.boxplot(x=data1['Fare'], showmeans = True, meanline = True)
        plt.title('Fare Boxplot')
        plt.ylabel('Fare ($)')
    - - fig, saxis = plt.subplots(2, 3,figsize=(16,12))
        
        sns.barplot(x = 'Embarked', y = 'Survived', data=data1, ax = saxis[0,0])
        
        sns.pointplot(x = 'FareBin', y = 'Survived', data=data1, ax = saxis[1,0])
      - fig, (axis1,axis2,axis3) = plt.subplots(1,3,figsize=(14,12))
        
        sns.boxplot(x = 'Pclass', y = 'Fare', hue = 'Survived', data = data1, ax = axis1)
        axis1.set_title('Pclass vs Fare Survival Comparison')
        
        sns.violinplot(x = 'Pclass', y = 'Age', hue = 'Survived', data = data1, split = True, ax = axis2)
        axis2.set_title('Pclass vs Age Survival Comparison')
        
        sns.boxplot(x = 'Pclass', y ='FamilySize', hue = 'Survived', data = data1, ax = axis3)
        axis3.set_title('Pclass vs Family Size Survival Comparison')
      - fig, qaxis = plt.subplots(1,3,figsize=(14,12))
        
        sns.barplot(x = 'Sex', y = 'Survived', hue = 'Embarked', data=data1, ax = qaxis[0])
        axis1.set_title('Sex vs Embarked Survival Comparison')
    - - def correlationheatmap(df): , ax = plt.subplots(figsize =(14, 12))
        　colormap = sns.diverging_palette(220, 10, ascmap = True) 　 = sns.heatmap(df.corr(), cmap = colormap,square=True,cbar_kws={'shrink':.9 }, ax=ax,annot=True, linewidths=0.1,vmax=1.0, linecolor='white',annot_kws={'fontsize':12 })
        plt.title('Pearson Correlation of Features', y=1.05, size=15)
        correlation_heatmap(data1)