Contents of the command:
Note: double and single quotes both work
import pickle
mydata = [1,2,3,4,5,6,7,8,9,10]
pickle.dump(mydata, open('mydata.p','wb'))
more_data = [10,9,8,7,6,5,4,3,2,1]
pickle.dump([mydata,more_data], open('so_much_data.p','wb'))
mydata = pickle.load(open("mydata.p",'rb'))
print(mydata)
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[mydata, more_data] = pickle.load(open('so_much_data.p','rb'))
print(mydata)
print(more_data)
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
my_list = [1,3,2,4,7,'Sandwich']
print(len(my_list))
print(my_list[0:2])
print(my_list[-1])
print(my_list[0:4:2])
6 [1, 3] Sandwich [1, 2]
import numpy as np
my_array = np.random.poisson(lam=3,size=10)
print(my_array)
print(my_array.shape)
[0 5 4 2 6 2 3 1 2 2] (10,)
import pandas as pd
my_series = pd.Series(my_list)
my_series.shape
(6,)
my_series = pd.Series(my_array,
index = [1,2,3,'cat','dog','10','n',8,7,6])
print(my_series)
(10,) 1 0 2 5 3 4 cat 2 dog 6 10 2 n 3 8 1 7 2 6 2 dtype: int64
print(my_series.mean())
my_series = pd.Series(['hello world','hello planet'])
print(my_series.str.replace('hello','goodbye'))
2.7 0 goodbye world 1 goodbye planet dtype: object
new_list = list(my_array)
print(new_list)
[0, 5, 4, 2, 6, 2, 3, 1, 2, 2]
my_2d_list = [[1,4],[2,1],[8,10],[4,7],[9,2],[4,5]]
my_3var_list = [(1,4,7),(2,1,0),(8,10,2),(4,7,4),(9,2,7),(4,5,3)]
for i,new_var in enumerate(my_list):
my_2d_list[i].append(new_var)
print(my_2d_list)
[[1, 4, 1], [2, 1, 3], [8, 10, 2], [4, 7, 4], [9, 2, 7], [4, 5, 8]]
my_dict = {
'var1':[1,2,8,4,9,4],
'var2': [4,1,10,7,2,5]
}
my_dict['var3']=my_list
print(my_dict['var3'])
[1, 3, 2, 4, 7, 8]
my_matrix = np.array(my_2d_list)
my_other_matrix = np.array(my_3var_list)
print(my_matrix)
print(my_matrix[0,0:2])
[[ 1 4 1] [ 2 1 3] [ 8 10 2] [ 4 7 4] [ 9 2 7] [ 4 5 8]] [1 4]
big_matrix = np.concatenate([my_matrix, my_other_matrix],axis=0)
print(big_matrix)
[[ 1 4 1] [ 2 1 3] [ 8 10 2] [ 4 7 4] [ 9 2 7] [ 4 5 8] [ 1 4 7] [ 2 1 0] [ 8 10 2] [ 4 7 4] [ 9 2 7] [ 4 5 3]]
big_matrix = np.concatenate([my_matrix, my_other_matrix],axis=1)
print(big_matrix)
[[ 1 4 1 1 4 7] [ 2 1 3 2 1 0] [ 8 10 2 8 10 2] [ 4 7 4 4 7 4] [ 9 2 7 9 2 7] [ 4 5 8 4 5 3]]
print(my_matrix.T + my_other_matrix.T*5)
[[ 6 12 48 24 54 24] [24 6 60 42 12 30] [36 3 12 24 42 23]]
my_rand_matrix = np.random.randn(5,3)
print(my_rand_matrix)
[[-1.55580936 -0.17261167 0.75196263] [ 0.84330579 1.33124031 0.3352658 ] [ 0.27599767 -1.15216039 -0.78153232] [-0.05247263 0.60094014 0.60057787] [-1.16768905 -0.43397707 -0.75927943]]
my_rand_matrix[:,0]=my_rand_matrix[:,0]*.5+5
my_rand_matrix[:,1]=my_rand_matrix[:,1]*.5-5
my_rand_matrix[:,2]=my_rand_matrix[:,2]*10+50
print(my_rand_matrix.T)
[[ 4.22209532 5.42165289 5.13799884 4.97376368 4.41615548] [ -5.08630584 -4.33437984 -5.5760802 -4.69952993 -5.21698854] [ 57.51962633 53.35265798 42.1846768 56.00577874 42.40720567]]
BIG_array = np.zeros((100,100))
rows = (1,6,29,40,43,50)
columns = (3,6,90,58,34,88)
BIG_array[(rows,columns)]=[4,6,14,1,3,22]
import scipy as sp
from scipy import sparse
BIG_array = sparse.csc_matrix(BIG_array)
print(BIG_array)
(1, 3) 4.0 (6, 6) 6.0 (43, 34) 3.0 (40, 58) 1.0 (50, 88) 22.0 (29, 90) 14.0
df = pd.DataFrame(my_dict)
df
var1 | var2 | var3 | |
---|---|---|---|
0 | 1 | 4 | 1 |
1 | 2 | 1 | 3 |
2 | 8 | 10 | 2 |
3 | 4 | 7 | 4 |
4 | 9 | 2 | 7 |
5 | 4 | 5 | 8 |
df = pd.DataFrame(my_2d_list,
columns = ['var1','var2','var3'])
df
var1 | var2 | var3 | |
---|---|---|---|
0 | 1 | 4 | 1 |
1 | 2 | 1 | 3 |
2 | 8 | 10 | 2 |
3 | 4 | 7 | 4 |
4 | 9 | 2 | 7 |
5 | 4 | 5 | 8 |
df = pd.DataFrame(my_rand_matrix,
columns = ['dist_1','dist_2','dist_3'],
index = ['obs1','obs2','obs3','obs4','fred'])
df
dist_1 | dist_2 | dist_3 | |
---|---|---|---|
obs1 | 4.222095 | -5.086306 | 57.519626 |
obs2 | 5.421653 | -4.334380 | 53.352658 |
obs3 | 5.137999 | -5.576080 | 42.184677 |
obs4 | 4.973764 | -4.699530 | 56.005779 |
fred | 4.416155 | -5.216989 | 42.407206 |
df.describe()
dist_1 | dist_2 | dist_3 | |
---|---|---|---|
count | 5.000000 | 5.000000 | 5.000000 |
mean | 4.834333 | -4.982657 | 50.293989 |
std | 0.501574 | 0.479122 | 7.452384 |
min | 4.222095 | -5.576080 | 42.184677 |
25% | 4.416155 | -5.216989 | 42.407206 |
50% | 4.973764 | -5.086306 | 53.352658 |
75% | 5.137999 | -4.699530 | 56.005779 |
max | 5.421653 | -4.334380 | 57.519626 |
import matplotlib.pyplot as plt
%matplotlib inline
df.plot.density()
<matplotlib.axes._subplots.AxesSubplot at 0x7fdaf0662128>
df.dist_1.plot.hist(bins=3)
<matplotlib.axes._subplots.AxesSubplot at 0x7fdaf06b4828>
baad_covars = pd.read_csv('BAAD_1_Lethality_Data.tab',sep='\t')
baad_covars.head(3)
mastertccode3606 | group | statespond | cowmastercountry | masterccode | fatalities19982005 | OrgAge | ordsize | terrStrong | degree | ContainRelig | ContainEthno | LeftNoReligEthno | PureRelig | PureEthno | ReligEthno | ContainRelig2 | ContainEthno2 | Islam | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 50 | Animal Liberation Front (ALF) | 0 | United States of America | 2 | 0 | 30 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 89 | Army of God | 0 | United States of America | 2 | 1 | 24 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
2 | 113 | Cambodian Freedom Fighters (CFF) | 0 | United States of America | 2 | 0 | 8 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
print(baad_covars.shape)
baad_covars.columns
(395, 19)
Index(['mastertccode3606', 'group', 'statespond', 'cowmastercountry', 'masterccode', 'fatalities19982005', 'OrgAge', 'ordsize', 'terrStrong', 'degree', 'ContainRelig', 'ContainEthno', 'LeftNoReligEthno', 'PureRelig', 'PureEthno', 'ReligEthno', 'ContainRelig2', 'ContainEthno2', 'Islam'], dtype='object')
baad_covars.rename(columns = {'cowmastercountry':'country',
'masterccode':'ccode',
'mastertccode3606':'group_code',
'fatalities19982005':'fatalities'},
inplace = True)
baad_covars.replace({'country':{'United States of America':'US'}},
inplace = True)
print('Dimensions: ',baad_covars.shape)
baad_covars.head()
Dimensions: (395, 19)
group_code | group | statespond | country | ccode | fatalities | OrgAge | ordsize | terrStrong | degree | ContainRelig | ContainEthno | LeftNoReligEthno | PureRelig | PureEthno | ReligEthno | ContainRelig2 | ContainEthno2 | Islam | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 50 | Animal Liberation Front (ALF) | 0 | US | 2 | 0 | 30 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 89 | Army of God | 0 | US | 2 | 1 | 24 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
2 | 113 | Cambodian Freedom Fighters (CFF) | 0 | US | 2 | 0 | 8 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
3 | 126 | Coalition to Save the Preserves (CSP) | 0 | US | 2 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 153 | Earth Liberation Front (ELF) | 0 | US | 2 | 0 | 14 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
#Set the index
baad_covars.set_index(['group_code'],inplace = True)
baad_covars.head()
group | statespond | country | ccode | fatalities | OrgAge | ordsize | terrStrong | degree | ContainRelig | ContainEthno | LeftNoReligEthno | PureRelig | PureEthno | ReligEthno | ContainRelig2 | ContainEthno2 | Islam | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
group_code | ||||||||||||||||||
50 | Animal Liberation Front (ALF) | 0 | US | 2 | 0 | 30 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
89 | Army of God | 0 | US | 2 | 1 | 24 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
113 | Cambodian Freedom Fighters (CFF) | 0 | US | 2 | 0 | 8 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
126 | Coalition to Save the Preserves (CSP) | 0 | US | 2 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
153 | Earth Liberation Front (ELF) | 0 | US | 2 | 0 | 14 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
baad_covars.to_csv('updated_baad.csv')
baad_covars.loc[:, ['fatalities']].head()
group_code 50 0 89 1 113 0 126 0 153 0 Name: fatalities, dtype: int64
baad_covars.loc[(baad_covars.fatalities>1) | (baad_covars.degree>=1),
['group','country']].head()
group | country | |
---|---|---|
group_code | ||
50 | Animal Liberation Front (ALF) | US |
153 | Earth Liberation Front (ELF) | US |
30035 | Maras Salvatruchas | US |
10042 | Group of Guerilla Combatants of Jose Maria Mor... | Mexico |
246 | Justice Army of the Defenseless People | Mexico |
baad_covars.loc[(baad_covars.ContainRelig==1)&
(baad_covars.terrStrong==1),['group']]
group | |
---|---|
198 | Hisba |
203 | Lord's Resistance Army (LRA) |
238 | Ansar al-Islam |
252 | Mahdi Army |
272 | Hezbollah |
281 | Hamas |
298 | Hizb-I-Islami |
303 | Taliban |
305 | Islamic Movement of Uzbekistan (IMU) |
356 | al-Qaeda |
379 | Abu Sayyaf Group (ASG) |
383 | Moro Islamic Liberation Front (MILF) |
384 | Moro National Liberation Front (MNLF) |
391 | Jemaah Islamiya (JI) |
392 | Laskar Jihad |
baad_covars.OrgAge.plot.hist(bins=10)
<matplotlib.axes._subplots.AxesSubplot at 0x7fdaf0883588>
state_level = baad_covars.loc[:,['country','OrgAge',
'ordsize','degree',
'fatalities']
].groupby(['country']).sum()
state_level.head()
OrgAge | ordsize | degree | fatalities | |
---|---|---|---|---|
country | ||||
Afghanistan | 58 | 4 | 11 | 353 |
Algeria | 24 | 2 | 6 | 409 |
Angola | 83 | 5 | 0 | 276 |
Argentina | 2 | 0 | 0 | 0 |
Bangladesh | 83 | 5 | 4 | 80 |
baad_covars['big'] = 0
baad_covars.loc[(baad_covars.fatalities>1) |
(baad_covars.degree>=1),
'big']=1
baad_covars.big.head()
group_code 50 1 89 0 113 0 126 0 153 1 Name: big, dtype: int64
print(type(np.nan))
baad_covars.loc[(baad_covars.fatalities>1) | (baad_covars.degree>=1),
['terrStrong']] = None
baad_covars.loc[(baad_covars.fatalities>1) | (baad_covars.degree>=1),
['terrStrong']].head()
<class 'float'>
terrStrong | |
---|---|
group_code | |
50 | NaN |
153 | NaN |
30035 | NaN |
10042 | NaN |
246 | NaN |
baad_covars.loc[baad_covars.terrStrong.isnull(),'terrStrong'].head()
group_code 50 NaN 153 NaN 30035 NaN 10042 NaN 246 NaN Name: terrStrong, dtype: float64
baad_covars['terrStrong'] = baad_covars.terrStrong.fillna(-77)
baad_covars.terrStrong.head()
group_code 50 -77.0 89 0.0 113 0.0 126 0.0 153 -77.0 Name: terrStrong, dtype: float64
baad_covars_dropped = baad_covars.dropna(axis='index',
subset=['terrStrong'],
inplace=False)
baad_covars.reset_index(inplace=True,
drop = False)
baad_covars.head()
group_code | group | statespond | country | ccode | fatalities | OrgAge | ordsize | terrStrong | degree | ContainRelig | ContainEthno | LeftNoReligEthno | PureRelig | PureEthno | ReligEthno | ContainRelig2 | ContainEthno2 | Islam | big | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 50 | Animal Liberation Front (ALF) | 0 | US | 2 | 0 | 30 | 0 | -77.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
1 | 89 | Army of God | 0 | US | 2 | 1 | 24 | 0 | 0.0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
2 | 113 | Cambodian Freedom Fighters (CFF) | 0 | US | 2 | 0 | 8 | 0 | 0.0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
3 | 126 | Coalition to Save the Preserves (CSP) | 0 | US | 2 | 0 | 6 | 0 | 0.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 153 | Earth Liberation Front (ELF) | 0 | US | 2 | 0 | 14 | 0 | -77.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
baad_covars.set_index(['group','country'],inplace = True)
baad_covars.head()
group_code | statespond | ccode | fatalities | OrgAge | ordsize | terrStrong | degree | ContainRelig | ContainEthno | LeftNoReligEthno | PureRelig | PureEthno | ReligEthno | ContainRelig2 | ContainEthno2 | Islam | big | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
group | country | ||||||||||||||||||
Animal Liberation Front (ALF) | US | 50 | 0 | 2 | 0 | 30 | 0 | -77.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Army of God | US | 89 | 0 | 2 | 1 | 24 | 0 | 0.0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
Cambodian Freedom Fighters (CFF) | US | 113 | 0 | 2 | 0 | 8 | 0 | 0.0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
Coalition to Save the Preserves (CSP) | US | 126 | 0 | 2 | 0 | 6 | 0 | 0.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Earth Liberation Front (ELF) | US | 153 | 0 | 2 | 0 | 14 | 0 | -77.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
indonesia_grps = baad_covars.xs('Indonesia',level = 'country',drop_level=False)
indonesia_grps = indonesia_grps.loc[indonesia_grps.fatalities>=1,['degree','ContainRelig',
'ContainEthno','terrStrong',
'ordsize','OrgAge']]
indonesia_grps.head()
degree | ContainRelig | ContainEthno | terrStrong | ordsize | OrgAge | ||
---|---|---|---|---|---|---|---|
group | country | ||||||
Free Aceh Movement (GAM) | Indonesia | 1 | 1 | 1 | -77.0 | 2 | 31 |
Jemaah Islamiya (JI) | Indonesia | 2 | 1 | 0 | -77.0 | 1 | 13 |
Laskar Jihad | Indonesia | 3 | 1 | 1 | -77.0 | 0 | 6 |
South Maluku Republic (RMS) | Indonesia | 0 | 0 | 1 | -77.0 | 2 | 8 |
little_df = pd.DataFrame([1,2,3,4,5],columns = ['A'])
little_df['B']=[0,1,0,1,1]
copied_df = little_df
print('before:')
print(copied_df)
little_df.loc[little_df.A == 3,'B'] = 'Sandwich'
print('after')
print(copied_df)
before: A B 0 1 0 1 2 1 2 3 0 3 4 1 4 5 1 after A B 0 1 0 1 2 1 2 3 Sandwich 3 4 1 4 5 1
import copy
little_df = pd.DataFrame([1,2,3,4,5],columns = ['A'])
little_df['B']=[0,1,0,1,1]
copied_df = little_df.copy()
print('before:')
print(copied_df)
little_df.loc[little_df.A == 3,'B'] = 'Sandwich'
print('after')
print(copied_df)
before: A B 0 1 0 1 2 1 2 3 0 3 4 1 4 5 1 after A B 0 1 0 1 2 1 2 3 0 3 4 1 4 5 1
C = pd.DataFrame(['apple','orange','grape','pear','banana'],
columns = ['C'],
index = [2,4,3,0,1])
little_df['C'] = C
little_df
A | B | C | |
---|---|---|---|
0 | 1 | 0 | pear |
1 | 2 | 1 | banana |
2 | 3 | Sandwich | apple |
3 | 4 | 1 | grape |
4 | 5 | 1 | orange |
C = pd.DataFrame(['apple','orange','grape','apple'],
columns = ['C'],
index = [2,4,3,'a'])
C['cuts']=['slices','wedges','whole','spirals']
print('C:')
print(C)
print('Inner: Intersection')
print(little_df.merge(right=C,
how='inner',
on=None,
left_index = True,
right_index =True))
print('Outer: Keep all rows')
print(little_df.merge(right=C,
how='outer',
on=None,
left_index = True,
right_index =True))
print('Left: Keep little_df')
print(little_df.merge(right=C,
how='left',
on=None,
left_index = True,
right_index =True))
print('Right: Keep C')
print(little_df.merge(right=C,
how='right',
on=None,
left_index = True,
right_index =True))
print('Outer, merging on column instead of index')
print(little_df.merge(right=C,
how='outer',
on='C',
left_index = False,
right_index =False))
C: C cuts 2 apple slices 4 orange wedges 3 grape whole a apple spirals Inner: Intersection A B C_x C_y cuts 2 3 Sandwich apple apple slices 3 4 1 grape grape whole 4 5 1 orange orange wedges Outer: Keep all rows A B C_x C_y cuts 0 1.0 0 pear NaN NaN 1 2.0 1 banana NaN NaN 2 3.0 Sandwich apple apple slices 3 4.0 1 grape grape whole 4 5.0 1 orange orange wedges a NaN NaN NaN apple spirals Left: Keep little_df A B C_x C_y cuts 0 1 0 pear NaN NaN 1 2 1 banana NaN NaN 2 3 Sandwich apple apple slices 3 4 1 grape grape whole 4 5 1 orange orange wedges Right: Keep C A B C_x C_y cuts 2 3.0 Sandwich apple apple slices 4 5.0 1 orange orange wedges 3 4.0 1 grape grape whole a NaN NaN NaN apple spirals Outer, merging on column instead of index A B C cuts 0 1 0 pear NaN 1 2 1 banana NaN 2 3 Sandwich apple slices 3 3 Sandwich apple spirals 4 4 1 grape whole 5 5 1 orange wedges
/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py:3772: RuntimeWarning: '<' not supported between instances of 'int' and 'str', sort order is undefined for incomparable objects return this.join(other, how=how, return_indexers=return_indexers)
add_df = pd.DataFrame({'A':[6],'B':[7],'C':'peach'},index= ['p'])
little_df = pd.concat([little_df,add_df])
little_df
A | B | C | |
---|---|---|---|
0 | 1 | 0 | pear |
1 | 2 | 1 | banana |
2 | 3 | Sandwich | apple |
3 | 4 | 1 | grape |
4 | 5 | 1 | orange |
p | 6 | 7 | peach |
asthma_data = pd.read_csv('asthma-emergency-department-visit-rates-by-zip-code.csv')
asthma_data.head(2)
Year | ZIP code | Age Group | Number of Visits | Age-adjusted rate | County Fips code | County | |
---|---|---|---|---|---|---|---|
0 | 2015 | 90004\n(34.07646, -118.309453) | Children (0-17) | 117.0 | 91.7 | 6037 | LOS ANGELES |
1 | 2015 | 90011\n(34.007055, -118.258872) | Children (0-17) | 381.0 | 102.8 | 6037 | LOS ANGELES |
asthma_data[['zip','coordinates']] = asthma_data.loc[:,'ZIP code'].str.split(
pat='\n',expand=True)
asthma_data.drop('ZIP code', axis=1,inplace=True)
asthma_data.head(2)
Year | Age Group | Number of Visits | Age-adjusted rate | County Fips code | County | zip | coordinates | |
---|---|---|---|---|---|---|---|---|
0 | 2015 | Children (0-17) | 117.0 | 91.7 | 6037 | LOS ANGELES | 90004 | (34.07646, -118.309453) |
1 | 2015 | Children (0-17) | 381.0 | 102.8 | 6037 | LOS ANGELES | 90011 | (34.007055, -118.258872) |
asthma_grouped = asthma_data.groupby(by=['Year','zip']).sum()
asthma_grouped.head(4)
Number of Visits | Age-adjusted rate | County Fips code | ||
---|---|---|---|---|
Year | zip | |||
2009 | 90001 | 818.0 | 226.074245 | 18111 |
90002 | 836.0 | 265.349315 | 18111 | |
90003 | 1542.0 | 369.202131 | 18111 | |
90004 | 580.0 | 145.538276 | 18111 |
asthma_grouped.drop('County Fips code',axis=1,inplace=True)
temp_grp = asthma_data.groupby(by=['Year','zip']).first()
asthma_grouped[['fips',
'county',
'coordinates']]=temp_grp.loc[:,['County Fips code',
'County',
'coordinates']].copy()
asthma_grouped.loc[:,'Number of Visits']=\
asthma_grouped.loc[:,'Number of Visits']/2
asthma_grouped.head(2)
Number of Visits | Age-adjusted rate | fips | county | coordinates | ||
---|---|---|---|---|---|---|
Year | zip | |||||
2009 | 90001 | 409.0 | 226.074245 | 6037 | LOS ANGELES | (33.973252, -118.249154) |
90002 | 418.0 | 265.349315 | 6037 | LOS ANGELES | (33.949079, -118.247877) |
asthma_unstacked = asthma_data.pivot_table(index = ['Year',
'zip',
'County',
'coordinates',
'County Fips code'],
columns = 'Age Group',
values = 'Number of Visits')
asthma_unstacked.reset_index(drop=False,inplace=True)
asthma_unstacked.head(2)
Age Group | Year | zip | County | coordinates | County Fips code | Adults (18+) | All Ages | Children (0-17) |
---|---|---|---|---|---|---|---|---|
0 | 2009 | 90001 | LOS ANGELES | (33.973252, -118.249154) | 6037 | 206.0 | 409.0 | 203.0 |
1 | 2009 | 90002 | LOS ANGELES | (33.949079, -118.247877) | 6037 | 204.0 | 418.0 | 214.0 |
asthma_unstacked.rename(columns={
'zip':'Zip',
'coordinates':'Coordinates',
'County Fips code':'Fips',
'Adults (18+)':'Adults',
'All Ages':'Incidents',
'Children (0-17)': 'Children'
},
inplace=True)
asthma_2015 = asthma_unstacked.loc[asthma_unstacked.Year==2015,:]
asthma_2015.head(2)
Age Group | Year | Zip | County | Coordinates | Fips | Adults | Incidents | Children |
---|---|---|---|---|---|---|---|---|
4693 | 2015 | 90001 | LOS ANGELES | (33.973252, -118.249154) | 6037 | 229.0 | 441.0 | 212.0 |
4694 | 2015 | 90002 | LOS ANGELES | (33.949079, -118.247877) | 6037 | 249.0 | 476.0 | 227.0 |
asthma_2015.to_csv('asthma_2015.csv')
<img src = "https://media.giphy.com/media/6VWz7mToYWdDNYglYr/giphy.gif">