Please enable JavaScript.
Coggle requires JavaScript to display documents.
PYTHON - Coggle Diagram
PYTHON
IMPORTING DATA
TEXT files
filename = 'huck_finn.txt'
file = open(filename, mode='r') # 'r' is to read
text = file.read()
file.close()
-
FLAT files
with NUNMPY
Mixed datatypes
np.genfromtxt('titanic.csv', delimiter=',', names=True, dtype=None)
-
np.recfromcsv(file) - defaults: delimiter= ',' & dtype=None
-
Single Data type
-
np.loadtxt(filename, delimiter=',')
with Pandas
pd.read_csv(file,nrows=5, header=None)
pd.read_csv("file.csv", index_col=0, na_values='n/a', parse_dates=['Last Upadate'])
-
-
-
EXCEL files
-
data= pd.read_excel(filename, sheet_name=['sheet1','sheet2'], na_values='n/a')
-
PICKLE files
with open('filename.pkl','rb') as file
-
-
-
HDF5 files
import h5py
data= h5py.File(filename,'r')
-
-
-
DATABASES
STANDARD
from sqlalchemy import create_engine
engine = create_engine('sqlite:///Northwind.sqlite')
table_names = engine.table_names()
con= engine.connect()
rs=con.execute("SELECT * from Orders")
df=pd.DataFrame(rs.fetchall())
df.columns = rs.keys()
con.close()
CONTEXT MANAGER
with engine.connect() as con:
__rs = con.execute("SELECT OrderID, OrderDate, ShipName FROM Orders")
__df = pd.DataFrame(rs.fetchmany(size=5))
__df.columns = rs.keys()
PANDAS
df = pd.read_sql_query("SELECT * FROM Orders", engine)
WEB scrapping
-
REQUESTS package
-
-
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_doc)
pretty_soup = soup.prettify()
title = soup.title
text = soup.get_text()
-
PANDAS package
df=pd.read_csv(url,sep=';')
API
-
AUTHENTICATION
-
TWITTER
AUTHENTICATION
import tweepy, json
access_token = "..."
access_token_secret = "..."
consumer_key = "..."
consumer_secret = "..."auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
STREAM TWEETS
st_class.py
classMyStreamListener(tweepy.StreamListener):
definit(self, api=None):
__super(MyStreamListener, self).init()
__self.num_tweets = 0
self.file = open("tweets.txt", "w")defon_status(self, status): tweet = status._json
self.file.write(json.dumps(tweet) + '\n') tweet_list.append(status)
self.num_tweets += 1
if self.num_tweets < 100:
__returnTrue
else:
__returnFalse
____self.file.close()
- Create Streaming object and authenticate*
l = MyStreamListener()
stream = tweepy.Stream(auth, l)
- This line filters Twitter Streams to capture data by keywords:*
stream.filter(track=['apples', 'oranges'])
TOOLBOX
-
-
-
Generators
Return a generator object, not a list
The results are not stored in memory
-
-
Generator functions
Produces generator objects when called
Defined like a regular function - def
Yields a sequence of values instead of returning a single value
Generates a value with yield keyword
-
INTERMEDIATE
-
BOOLEAN OPERATORS
and
or
not
np.logical_and(expr1, expr2)
np.logical_or(expr1, expr2)
np.logical_not(expr1, expr2)
-
LOOPS
-
for var1, var2 in seq:
list
for index,values in enumerate(data):
-
dictionnary
for key,val in dictionnary.items():
-
dataframes
for label,row in dataframe.iterrows(): -> gets observations
INTRODUCTION
Variables & Types
-
-
-
-
list
SUBSETTING
x[4]
x[-1]
x[2:6] -> 2,3,4,5
FUNCTIONS
-
Adding elements:
x + [2 , 3]
-
-
-
-
numpy.Dataframe
-
SUBSETTING
-
-
-
LABEL based:
-
-
data.loc[["RU", "IN"], ["country","state"]]
data.loc[:, ["country","state"]]
-
METHODS
-
.sortvalues("column", ascending=True)
-
-
-
-
-
-
-
-
PACKAGES
numpy
-
FUNCTIONS
-
-
-
-
-
-
np.random.normal(mean,std,no.samples)
-
-
-
np.random.randint(start,end)
-
pandas
FUNCTIONS
pd.read_csv("file.csv", index_col=0, na_values='n/a', parse_dates=['Last Upadate'])
-
seaborn
FUNCTIONS
sns.set(color_codes=True)
ax = sns.barplot(cd, [clinton, trump, sanders, cruz])
ax.set(ylabel="count")
plt.show()
pandas_datareader
FUNCTIONS
stock_prices = DataReader(ticker, data_source, start, end)
-