Please enable JavaScript.

Coggle requires JavaScript to display documents.

Data Science & Machine Learning - Coggle Diagram

- - - - .upper
        
        .split
  - - - .pop(index)
    - - nest
        
        mutable
  - - - .keys
        
        .values
        
        .items
  - - - immutable
        
        unpacking
        
        for a,b in tuple
  - - - Unique
        
        Elements
  - - - inf <
        
        ==
        
        !=
  - - - or
  - - - else:
  - - - return
- - - - .arange(start,stop,step)
        
        .linspace(start,end,nb_points)
        
        slice
    - - .array(list2D)
        
        .eye(dimension)
    - - ones((nb_items1,nb_items2))
    - - arr [ arr > 5 ]
  - - - 0 to 1
        
        Uniform
    - - 0 to 1
        
        Normal
  - - - .shape
    - - .max()
        
        .min()
        
        value
      - .argmax()
        
        .argmin()
        
        index
  - - - Items by items
    - - np.exp(arr)
        
        np.sin(arr)
        
        np.log(arr)
- - - - import pandas as pd
  - - - df[column_value]
    - - Sharing
        
        Same
        
        Index
    - - df.info()
        
        df.describe()
    - - df.iloc[index_location]
    - - df.reset_index()
  - - - df [ df [ column_name] > 0 ]
  - - - df.dropna(thresh = 2)
    - - df.fillna ( value = df [ column_name ].mean() )
  - - - .describe().transpose()
  - - - Same index
  - - - df [ 'col2' ] . unique()
        
        List
        
        nunique()
      - df [ 'col2' ] . nunique()
        
        Count
      - df [ 'col2' ] . value_counts()
        
        Occurence
        
        List
    - - df.index
        
        df.isnull()
    - - pd.read_csv ( 'file_name.csv' )
        
        df.to_csv ( 'my_output' , in dex = False)
      - pd.read_excel('Excel_Sample.xlsx',sheet_name='Sheet1')
      - pd.read_html('url_link.html')
  - - - Variance t/t-1
    - - Occurence
        
        1st index
      - idxmax()
    - - df['year'] = df['date'].apply(lambda date : date.year)
- - - - plt.title(<titile>)
        
        plt.show()
  - - - axes.plot(x, y, 'b')
        
        axes.set_xlabel('Set X Label')
    - - axes[0]
    - - dpi=100
    - - ax.legend(loc=0)
  - - - alpha = 0.5
        
        linestyle='--'
        
        marker='o'
        
        markersize=3
        
        markerfacecolor = 'yellow'
        
        markeredgewidth = 3
        
        markeredgecolor = "blue"
- - - - %matplotlib inline
  - - - All combinations
  - - - Moustache
    - - Scattered
    - - Violin+Strip
    - - General
  - - - g.map_diag(plt.hist)
        
        g.map_upper(plt.scatter)
        
        g.map_lower(sns.kdeplot)
    - - g.map(plt.hist, "total_bill")
      - g.map(plt.scatter, "total_bill", "tip").add_legend()
  - - - sns.despine(left=True)
        
        plt.figure(figsize=(12,3))
        
        sns.set_context('poster',font_scale=4)
- - - - .area(alpha=0.4)
      - .bar()
        
        .bar(stacked=True)
      - .line(x=<field1>,y=<field2>)
      - .scatter(x=<field1>,y=<field2>,c=<field3>)
      - .box()
      - .hexbin(x=<field1>,y=<field2>,c=<field3>),cmap='coolwarm')
      - .hist()
    - - df3.iloc[0:30].plot.area(alpha=0.4)
  - - - pip install cufflinks
    - - init_notebook_mode(connected=True)
        
        cf.go_offline()
    - - https://plotly.com/python/reference/#choropleth
  - - - import plotly.graph_objs as go
        
        from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
    - - layout = dict(geo = {'scope':'usa'})
    - - iplot(choromap)
- - - - Training
        
        Validation
        
        Test
    - - Classification
        
        Accuracy
        
        Correct
        
        By
        
        Total
        
        Unbalanced :warning:
        
        Recall
        
        True Positive
        
        By
        
        True Positive + False Engative
        
        Precision
        
        True Positive
        
        By
        
        True + False Positive
        
        F1 Score
        
        2 * (Prec X Recall) /(Prec + Recall)
      - Regression
        
        Error
        
        Mean
        
        Absolute
        
        Square
        
        Root Square
- - - - sklearn.model_selection import train_test_split
    - - numpy as np
    - - seaborn as sns
    - - sklearn.linear_model import LinearRegression
- - - - 1 / (1 + e^(-Z))
        
        0 to 1
  - - - Null
  - - - train . drop ( 'Cabin' , axis = 1 , inplace = True)
        
        train.dropna ( inplace = True )
  - - - train = pd.concat( [train,sex,embark] , axis = 1]
  - - - Logmodel = LogisticRegression()
- - - - scaler.fit(df)
        
        scaled_features = scaler.transform(df)
  - - - pred = knn.predict(X_test)
- - - - dtree.fit(X_trainy_train)
        
        predictions = dtree.predict(X_test)
  - - - rfc.fit(X_train,y_train)
        
        rfc_pred = rfc.predict(X_test)
- - - - model.fit(X_train,,y_train)
  - - - grid = GridSearchCV(SVC(), param_grid, verbose = 3)
        
        grid.fit(X_train,y_train)
- - - - Elbow method
  - - - kmeans.fit(X)
        
        kmeans.labels_
        
        kmeans.cluster.centers_
- - - - Components
  - - - pca.fit(scaled_data)
        
        x_pca = pca.transform(scaled_data)
- - - - Product
        
        Attributes
    - - User
        
        Knowledge
      - Sub-type
        
        Memory
        
        Model
- - - - Cosinus
        
        Similarity
    - - Term Frequency
        
        d = Document
        
        t = Term
  - - - nltk.download_shell()
  - - - messages.hist(column='length',by='label')
  - - - from nltk.corpus import stopwords
        
        stopwords.words('english')
  - - - Punctuation
        
        Stopwords
        
        ''.join([c for c in mess if c not in string.punctuation])
  - - - Weight count
        
        Normalize
        
        Unit Lenght
  - - - bow_tranformer =CountVectorizer(analyzer=text_process).fit(messages['message'])
    - - TfidfTransformer().fit(messages_bow)
        
        messages_tfidf = tfidf_transformer.transform(messages_bow)
    - - spam_detect_model = MultinomialNB().fit(messages_tfidf,messages['label'])
  - - - pipeline = Pipeline( [('bow',CountVectorizer(analyszer=text_process)),('tfidf',TfidfTransformer()),('classifier',MultinomialNB())])
        
        pipeline.fit(msg_train,label_train)
- - - - Activation
        
        Step
        
        Sigmoid (0-1)
        
        Hyperbolic Tang (-1/+1)
        
        Relu
        
        Max(0,z)
        
        Softmax
        
        Exclusive
      - Cost Function :heavy_dollar_sign:
        
        Quadratic
      - Gradient Descent
        
        Learning Rate
        
        Adam
      - BackPropagation
  - - - Open Source
        
        Google
    - - High Level Python
        
        Library
        
        On top of
        
        Tensorflow
        
        Theano
  - - - train_test_split
        
        Normalize
        
        from sklearn.preprocessing import MinMaxScaler
        
        scaler = MinMaxScaler().fit(X_train)
        
        X_train = scaler.transform(X_train)
  - - - from tensorflow.keras.layers import Dense
    - - model = Sequential()
        
        model.add(Dense(4,activation='relu'))
      - model = Sequential([Dense(4, activation='relu'),Dense(2,.....)]
    - - model.history.history
  - - - model.predict(X_test)
        
        from tensorflow.keras.models import load_model
        
        model.save('my_model.h5')
        
        load_model('my_model.h5')
  - - - Random Turn-off
        
        from tensorflow.keras.layers import Dropout
        
        model.add(Dropout(0.5))
    - - Epochs
        
        from tensorflow.keras.callbacks import EarlyStopping
        
        early_stop = EarlyStopping(monitor ='val_loss' , mode = 'min',verbose = 1, patience = 25)
        
        model.fit(.....,callbacks =[early_stop])
    - - sigmoid
        
        loss = binary_cross_entropy
- - - - 128MB block
  - - - Tasks
    - - On
        
        HDFS
  - - - MapReduce Like
        
        Cassandra + HDFS + S3 + ....
    - - In Memory
    - - Transformation
        
        Filter
        
        Map
        
        flatMap
      - Actions
        
        Collect
        
        Count
        
        First
        
        Take