from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
from pandas.tools.plotting import scatter_matrix
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import numpy as np
import math
import random
import os
from lightning import Lightning
edges = pd.read_csv('offshore_leaks_csvs-20160621/all_edges.csv')
rels = edges.rel_type.unique()
edges_officer_of = edges.loc[edges['rel_type']=='officer_of']
edges_intermediary_of = edges.loc[edges['rel_type']=='intermediary_of']
edges_registered_address = edges.loc[edges['rel_type']=='registered_address']
edges_similar = edges.loc[edges['rel_type']=='similar']
edges_underlying = edges.loc[edges['rel_type']=='underlying']
Officers = pd.read_csv('offshore_leaks_csvs-20160621/Officers.csv')
Intermediaries = pd.read_csv('offshore_leaks_csvs-20160621/Intermediaries.csv')
Entities = pd.read_csv('offshore_leaks_csvs-20160621/Entities.csv')
Addresses = pd.read_csv('offshore_leaks_csvs-20160621/Addresses.csv')
print 'The data matrix "edges" contains %i rows and %i columns' %(edges.shape[0],edges.shape[1])
print 'The', edges.shape[1], 'columns of the data matrix "edges" are', list(edges.columns)
print 'The types of relations are', list(rels)
print "The number of edges of type 'officer_of' are", edges_officer_of.shape[0]
print "The number of edges of type 'intermediary_of' are", edges_intermediary_of.shape[0]
print "The number of edges of type 'registered_address' are", edges_registered_address.shape[0]
print "The number of edges of type 'similar' are", edges_similar.shape[0]
print "The number of edges of type 'underlying' are", edges_underlying.shape[0]
print
print 'The data matrix "officers" contains %i rows and %i columns' %(Officers.shape[0],Officers.shape[1])
print 'The', Officers.shape[1], 'columns of the data matrix "officers" are:'
print list(Officers.columns)
print
print 'The data matrix "intermediaries" contains %i rows and %i columns' %(Intermediaries.shape[0],Intermediaries.shape[1])
print 'The', Intermediaries.shape[1], 'columns of the data matrix "intermediaries" are:'
print list(Intermediaries.columns)
print
print 'The data matrix "entities" contains %i rows and %i columns' %(Entities.shape[0],Entities.shape[1])
print 'The', Entities.shape[1], 'columns of the data matrix "entities" are:'
print list(Entities.columns)
print
print 'The data matrix "addresses" contains %i rows and %i columns' %(Addresses.shape[0],Addresses.shape[1])
print 'The', Addresses.shape[1], 'columns of the data matrix "addresses" are:'
print list(Addresses.columns)
officers = pd.read_csv('offshore_leaks_csvs-20160621/Officers.csv').set_index('node_id')
intermediaries = pd.read_csv('offshore_leaks_csvs-20160621/Intermediaries.csv').set_index('node_id')
addresses = pd.read_csv('offshore_leaks_csvs-20160621/Addresses.csv').set_index('node_id')
entities = pd.read_csv('offshore_leaks_csvs-20160621/Entities.csv').set_index('node_id')
officers["type"] = "officer"
intermediaries["type"] = "intermediary"
addresses["type"] = "address"
entities["type"] = "entity"
all_nodes = pd.concat([officers, intermediaries, addresses, entities])
all_nodes['name'] = all_nodes['name'].str.upper()
all_nodes['name'] = all_nodes['name'].str.strip()
all_nodes['name'].replace(to_replace=[r'MRS?\.\s+', r'\.', r'\s+', 'LIMITED'],
value=['', '', ' ', 'LTD'], inplace=True,
regex=True)
# Ensure that all "Bearers" do not become a single node
alBear=all_nodes[all_nodes.name == 'THE BEARER'].to_dict()
all_nodes.loc[all_nodes.name == 'THE BEARER']['name'] = np.nan
officers=None
intermediaries=None
addresses=None
entities=None
all_nodes = all_nodes.reset_index()
print 'The dataframe "all_nodes" contains', all_nodes.shape[0], 'rows and', all_nodes.shape[1], 'columns'
print 'The', all_nodes.shape[1], 'columns of the dataframe "all_nodes" are:'
print list(all_nodes.columns)
print
print 'The dataframe "all_nodes" contains', len(all_nodes[all_nodes.type == 'officer']), 'officers'
print 'The dataframe "all_nodes" contains', len(all_nodes[all_nodes.type == 'intermediary']), 'intermediaries'
print 'The dataframe "all_nodes" contains', len(all_nodes[all_nodes.type == 'entity']), '(offshore) entities'
print 'The dataframe "all_nodes" contains', len(all_nodes[all_nodes.type == 'address']), 'addresses'
fildir='offshore_leaks_csvs'
edges = pd.read_csv(os.path.join(fildir, 'all_edges.csv'))
rels = edges.rel_type.unique()
edges_officer_of = edges.loc[edges['rel_type']=='officer_of']
edges_intermediary_of = edges.loc[edges['rel_type']=='intermediary_of']
edges_registered_address = edges.loc[edges['rel_type']=='registered_address']
edges_similar = edges.loc[edges['rel_type']=='similar']
edges_underlying = edges.loc[edges['rel_type']=='underlying']
Officers = pd.read_csv(os.path.join(fildir, 'Officers.csv'))
Intermediaries = pd.read_csv(os.path.join(fildir, 'Intermediaries.csv'))
Entities = pd.read_csv(os.path.join(fildir, 'Entities.csv'), low_memory=False)
Addresses = pd.read_csv(os.path.join(fildir, 'Addresses.csv'))
print 'The data matrix "edges" contains %i rows and %i columns' %(edges.shape[0],edges.shape[1])
print 'The', edges.shape[1], 'columns of the data matrix "edges" are', list(edges.columns)
print 'The types of relations are', list(rels)
print "The number of edges of type 'officer_of' are", edges_officer_of.shape[0]
print "The number of edges of type 'intermediary_of' are", edges_intermediary_of.shape[0]
print "The number of edges of type 'registered_address' are", edges_registered_address.shape[0]
print "The number of edges of type 'similar' are", edges_similar.shape[0]
print "The number of edges of type 'underlying' are", edges_underlying.shape[0]
print
print 'The data matrix "officers" contains %i rows and %i columns' %(Officers.shape[0],Officers.shape[1])
print 'The', Officers.shape[1], 'columns of the data matrix "officers" are:'
print list(Officers.columns)
print
print 'The data matrix "intermediaries" contains %i rows and %i columns' %(Intermediaries.shape[0],Intermediaries.shape[1])
print 'The', Intermediaries.shape[1], 'columns of the data matrix "intermediaries" are:'
print list(Intermediaries.columns)
print
print 'The data matrix "entities" contains %i rows and %i columns' %(Entities.shape[0],Entities.shape[1])
print 'The', Entities.shape[1], 'columns of the data matrix "entities" are:'
print list(Entities.columns)
print
print 'The data matrix "addresses" contains %i rows and %i columns' %(Addresses.shape[0],Addresses.shape[1])
print 'The', Addresses.shape[1], 'columns of the data matrix "addresses" are:'
print list(Addresses.columns)
from IPython.display import Image
Image(filename='figs/oie.png')
all_nodes.head(20)
cc_dict=all_nodes[['country_codes','countries','node_id','type']].to_dict()
from collections import Counter
mono={}#Counter()
countries_dict={}
for k,v in cc_dict['country_codes'].items():
if isinstance(v,float):
continue
vv=v.split(';')
kk=cc_dict['countries'][k].split(';')
for ik,vk in enumerate(vv):
if vk not in mono:
mono[vk]={}
if cc_dict['type'][k] not in mono[vk]:
mono[vk][cc_dict['type'][k]]=Counter()
mono[vk][cc_dict['type'][k]][vk]+=1
countries_dict[kk[ik]]=vk
print 'The total number of countries in Panama Papers is', len(countries_dict)
key_lis=[]
for key in sorted(countries_dict):
vv=countries_dict[key]
sor={'Country_name':key,'Country_code':vv}
for k,v in mono[vv].items():
sor[k]=v[vv]
key_lis.append(sor)#{'Country_name':key,'Country_code':vv,'Number_of_nodes':mono[countries_dict[key]]})
countries_pd=pd.DataFrame(key_lis)
countries_pd
import warnings
warnings.filterwarnings("ignore")
ntei='Scatter Matrix Plot of the Disribution of Officers, Intermediaries and Entities over Countries'
f, ax = plt.subplots(figsize=(15,15))
sss=scatter_matrix(countries_pd[['officer','intermediary','entity']], alpha=0.9, color='black', diagonal='hist',ax=ax)
plt.suptitle(ntei,fontsize=18,fontweight='bold')
corr = countries_pd.corr().as_matrix()
for i, j in zip(*plt.np.triu_indices_from(sss, k=1)):
sss[i, j].annotate("pearson = %.3f" %corr[i,j], (0.8, 0.93), xycoords='axes fraction', ha='center', va='center')
c1='Greece'
i1=70
c2='Cyprus'
i2=46
c3='Russia'
i3=154
c4='Turkey'
i4=192
c5='United Kingdom'
i5=199
c6='United States'
i6=200
c7='Belgium'
i7=18
c8='Austria'
i8=11
c9='Bulgaria'
i9=29
c10='Belarus'
i10=17
c11='Czech Republic'
i11=47
c12='Denmark'
i12=50
c13='Estonia'
i13=58
c14='Finland'
i14=61
c15='France'
i15=62
c16='Georgia'
i16=66
c17='Germany'
i17=67
c18='Hungary'
i18=81
c19='Iceland'
i19=82
c20='Italy'
i20=90
c21='Ireland'
i21=87
# c22='Kazakshtan'
# i22=95
c23='Latvia'
i23=100
c24='Liechtenstein'
i24=105
c25='Luxembourg'
i25=107
c26='Malta'
i26=115
c27='Moldova'
i27=120
c28='Monaco'
i28=121
c29='Netherlands'
i29=130
c30='Norway'
i30=139
c31='Ireland'
i31=87
c32='Poland'
i32=149
c33='Portugal'
i33=150
c34='Romania'
i34=153
c35='Serbia'
i35=165
c36='Slovakia'
i36=170
c37='Slovenia'
i37=171
c38='Spain'
i38=176
c39='Sweden'
i39=181
c40='Switzerland'
i40=182
c41='Ukraine'
i41=197
c42='Andorra'
i42=3
# c43='Azerbaijan'
# i43=12
# c1='Zimbabwe'
# i1=208
# c2='Turkey'
# i2=192
# c3='Venezuela'
# i3=204
gr=countries_pd[countries_pd['Country_name']==c1].to_dict()
negr=[]
negr.append(gr['officer'][i1])
negr.append(gr['intermediary'][i1])
negr.append(gr['entity'][i1])
cy=countries_pd[countries_pd['Country_name']==c2].to_dict()
necy=[]
necy.append(cy['officer'][i2])
necy.append(cy['intermediary'][i2])
necy.append(cy['entity'][i2])
ru=countries_pd[countries_pd['Country_name']==c3].to_dict()
neru=[]
neru.append(ru['officer'][i3])
neru.append(ru['intermediary'][i3])
neru.append(ru['entity'][i3])
cc4=countries_pd[countries_pd['Country_name']==c4].to_dict()
ncc4=[]
ncc4.append(cc4['officer'][i4])
ncc4.append(cc4['intermediary'][i4])
ncc4.append(cc4['entity'][i4])
cc5=countries_pd[countries_pd['Country_name']==c5].to_dict()
ncc5=[]
ncc5.append(cc5['officer'][i5])
ncc5.append(cc5['intermediary'][i5])
ncc5.append(cc5['entity'][i5])
cc6=countries_pd[countries_pd['Country_name']==c6].to_dict()
ncc6=[]
ncc6.append(cc6['officer'][i6])
ncc6.append(cc6['intermediary'][i6])
ncc6.append(cc6['entity'][i6])
cc7=countries_pd[countries_pd['Country_name']==c7].to_dict()
ncc7=[]
ncc7.append(cc7['officer'][i7])
ncc7.append(cc7['intermediary'][i7])
ncc7.append(cc7['entity'][i7])
cc8=countries_pd[countries_pd['Country_name']==c8].to_dict()
ncc8=[]
ncc8.append(cc8['officer'][i8])
ncc8.append(cc8['intermediary'][i8])
ncc8.append(cc8['entity'][i8])
cc9=countries_pd[countries_pd['Country_name']==c9].to_dict()
ncc9=[]
ncc9.append(cc9['officer'][i9])
ncc9.append(cc9['intermediary'][i9])
ncc9.append(cc9['entity'][i9])
cc10=countries_pd[countries_pd['Country_name']==c10].to_dict()
ncc10=[]
ncc10.append(cc10['officer'][i10])
ncc10.append(cc10['intermediary'][i10])
ncc10.append(cc10['entity'][i10])
cc11=countries_pd[countries_pd['Country_name']==c11].to_dict()
ncc11=[]
ncc11.append(cc11['officer'][i11])
ncc11.append(cc11['intermediary'][i11])
ncc11.append(cc11['entity'][i11])
cc12=countries_pd[countries_pd['Country_name']==c12].to_dict()
ncc12=[]
ncc12.append(cc12['officer'][i12])
ncc12.append(cc12['intermediary'][i12])
ncc12.append(cc12['entity'][i12])
cc13=countries_pd[countries_pd['Country_name']==c13].to_dict()
ncc13=[]
ncc13.append(cc13['officer'][i13])
ncc13.append(cc13['intermediary'][i13])
ncc13.append(cc13['entity'][i13])
cc14=countries_pd[countries_pd['Country_name']==c14].to_dict()
ncc14=[]
ncc14.append(cc14['officer'][i14])
ncc14.append(cc14['intermediary'][i14])
ncc14.append(cc14['entity'][i14])
cc15=countries_pd[countries_pd['Country_name']==c15].to_dict()
ncc15=[]
ncc15.append(cc15['officer'][i15])
ncc15.append(cc15['intermediary'][i15])
ncc15.append(cc15['entity'][i15])
cc16=countries_pd[countries_pd['Country_name']==c16].to_dict()
ncc16=[]
ncc16.append(cc16['officer'][i16])
ncc16.append(cc16['intermediary'][i16])
ncc16.append(cc16['entity'][i16])
cc17=countries_pd[countries_pd['Country_name']==c17].to_dict()
ncc17=[]
ncc17.append(cc17['officer'][i17])
ncc17.append(cc17['intermediary'][i17])
ncc17.append(cc17['entity'][i17])
cc18=countries_pd[countries_pd['Country_name']==c18].to_dict()
ncc18=[]
ncc18.append(cc18['officer'][i18])
ncc18.append(cc18['intermediary'][i18])
ncc18.append(cc18['entity'][i18])
cc19=countries_pd[countries_pd['Country_name']==c19].to_dict()
ncc19=[]
ncc19.append(cc19['officer'][i19])
ncc19.append(cc19['intermediary'][i19])
ncc19.append(cc19['entity'][i19])
cc20=countries_pd[countries_pd['Country_name']==c20].to_dict()
ncc20=[]
ncc20.append(cc20['officer'][i20])
ncc20.append(cc20['intermediary'][i20])
ncc20.append(cc20['entity'][i20])
cc21=countries_pd[countries_pd['Country_name']==c21].to_dict()
ncc21=[]
ncc21.append(cc21['officer'][i21])
ncc21.append(cc21['intermediary'][i21])
ncc21.append(cc21['entity'][i21])
# cc22=countries_pd[countries_pd['Country_name']==c22].to_dict()
# ncc22=[]
# ncc22.append(cc22['officer'][i22])
# ncc22.append(cc22['intermediary'][i22])
# ncc22.append(cc22['entity'][i22])
cc23=countries_pd[countries_pd['Country_name']==c23].to_dict()
ncc23=[]
ncc23.append(cc23['officer'][i23])
ncc23.append(cc23['intermediary'][i23])
ncc23.append(cc23['entity'][i23])
cc24=countries_pd[countries_pd['Country_name']==c24].to_dict()
ncc24=[]
ncc24.append(cc24['officer'][i24])
ncc24.append(cc24['intermediary'][i24])
ncc24.append(cc24['entity'][i24])
cc25=countries_pd[countries_pd['Country_name']==c25].to_dict()
ncc25=[]
ncc25.append(cc25['officer'][i25])
ncc25.append(cc25['intermediary'][i25])
ncc25.append(cc25['entity'][i25])
cc26=countries_pd[countries_pd['Country_name']==c26].to_dict()
ncc26=[]
ncc26.append(cc26['officer'][i26])
ncc26.append(cc26['intermediary'][i26])
ncc26.append(cc26['entity'][i26])
cc27=countries_pd[countries_pd['Country_name']==c27].to_dict()
ncc27=[]
ncc27.append(cc27['officer'][i27])
ncc27.append(cc27['intermediary'][i27])
ncc27.append(cc27['entity'][i27])
cc28=countries_pd[countries_pd['Country_name']==c28].to_dict()
ncc28=[]
ncc28.append(cc28['officer'][i28])
ncc28.append(cc28['intermediary'][i28])
ncc28.append(cc28['entity'][i28])
cc29=countries_pd[countries_pd['Country_name']==c29].to_dict()
ncc29=[]
ncc29.append(cc29['officer'][i29])
ncc29.append(cc29['intermediary'][i29])
ncc29.append(cc29['entity'][i29])
cc30=countries_pd[countries_pd['Country_name']==c30].to_dict()
ncc30=[]
ncc30.append(cc30['officer'][i30])
ncc30.append(cc30['intermediary'][i30])
ncc30.append(cc30['entity'][i30])
cc31=countries_pd[countries_pd['Country_name']==c31].to_dict()
ncc31=[]
ncc31.append(cc31['officer'][i31])
ncc31.append(cc31['intermediary'][i31])
ncc31.append(cc31['entity'][i31])
cc32=countries_pd[countries_pd['Country_name']==c32].to_dict()
ncc32=[]
ncc32.append(cc32['officer'][i32])
ncc32.append(cc32['intermediary'][i32])
ncc32.append(cc32['entity'][i32])
cc33=countries_pd[countries_pd['Country_name']==c33].to_dict()
ncc33=[]
ncc33.append(cc33['officer'][i33])
ncc33.append(cc33['intermediary'][i33])
ncc33.append(cc33['entity'][i33])
cc34=countries_pd[countries_pd['Country_name']==c34].to_dict()
ncc34=[]
ncc34.append(cc34['officer'][i34])
ncc34.append(cc34['intermediary'][i34])
ncc34.append(cc34['entity'][i34])
cc35=countries_pd[countries_pd['Country_name']==c35].to_dict()
ncc35=[]
ncc35.append(cc35['officer'][i35])
ncc35.append(cc35['intermediary'][i35])
ncc35.append(cc35['entity'][i35])
cc36=countries_pd[countries_pd['Country_name']==c36].to_dict()
ncc36=[]
ncc36.append(cc36['officer'][i36])
ncc36.append(cc36['intermediary'][i36])
ncc36.append(cc36['entity'][i36])
cc37=countries_pd[countries_pd['Country_name']==c37].to_dict()
ncc37=[]
ncc37.append(cc37['officer'][i37])
ncc37.append(cc37['intermediary'][i37])
ncc37.append(cc37['entity'][i37])
cc38=countries_pd[countries_pd['Country_name']==c38].to_dict()
ncc38=[]
ncc38.append(cc38['officer'][i38])
ncc38.append(cc38['intermediary'][i38])
ncc38.append(cc38['entity'][i38])
cc39=countries_pd[countries_pd['Country_name']==c39].to_dict()
ncc39=[]
ncc39.append(cc39['officer'][i39])
ncc39.append(cc39['intermediary'][i39])
ncc39.append(cc39['entity'][i39])
cc40=countries_pd[countries_pd['Country_name']==c40].to_dict()
ncc40=[]
ncc40.append(cc40['officer'][i40])
ncc40.append(cc40['intermediary'][i40])
ncc40.append(cc40['entity'][i40])
cc41=countries_pd[countries_pd['Country_name']==c41].to_dict()
ncc41=[]
ncc41.append(cc41['officer'][i41])
ncc41.append(cc41['intermediary'][i41])
ncc41.append(cc41['entity'][i41])
cc42=countries_pd[countries_pd['Country_name']==c42].to_dict()
ncc42=[]
ncc42.append(cc42['officer'][i42])
ncc42.append(cc42['intermediary'][i42])
ncc42.append(cc42['entity'][i42])
# cc43=countries_pd[countries_pd['Country_name']==c43].to_dict()
# ncc43=[]
# ncc43.append(cc43['officer'][i42])
# ncc43.append(cc43['intermediary'][i42])
# ncc43.append(cc43['entity'][i42])
lisl=[]
for i,v in enumerate(negr):
if i ==0:
lisl.append([v,necy[i],neru[i],ncc4[i],ncc5[i],ncc7[i],ncc8[i],ncc9[i],ncc10[i],ncc11[i],ncc12[i],ncc13[i],ncc14[i],ncc15[i],ncc16[i],ncc17[i],ncc18[i],ncc19[i],ncc20[1],ncc21[i],ncc23[i],ncc24[i],ncc25[i],ncc26[i],ncc27[i],ncc28[i],ncc29[i],ncc30[i],ncc31[i],ncc32[i],ncc33[i],ncc34[i],ncc35[i],ncc36[i],ncc37[1],ncc38[i],ncc39[i],ncc40[i],ncc41[i],ncc42[i],ncc6[i]]) #ncc22[i], ,ncc43[i]
elif i==1:
lisl.append([v+negr[i-1],necy[i]+necy[i-1],neru[i]+neru[i-1],ncc4[i]+ncc4[i-1],ncc5[i]+ncc5[i-1],ncc7[i]+ncc7[i-1],ncc8[i]+ncc8[i-1],ncc9[i]+ncc9[i-1],ncc10[i]+ncc10[i-1],ncc11[i]+ncc11[i-1],ncc12[i]+ncc12[i-1],ncc13[i]+ncc13[i-1],ncc14[i]+ncc14[i-1],ncc15[i]+ncc15[i-1],ncc16[i]+ncc16[i-1],ncc17[i]+ncc17[i-1],ncc18[i]+ncc18[i-1],ncc19[i]+ncc19[i-1],ncc20[i]+ncc20[i-1],ncc21[i]+ncc21[i-1],ncc23[i]+ncc23[i-1],ncc24[i]+ncc24[i-1],ncc25[i]+ncc25[i-1],ncc26[i]+ncc26[i-1],ncc27[i]+ncc27[i-1],ncc28[i]+ncc28[i-1],ncc29[i]+ncc29[i-1],ncc30[i]+ncc30[i-1],ncc31[i]+ncc31[i-1],ncc32[i]+ncc32[i-1],ncc33[i]+ncc33[i-1],ncc34[i]+ncc34[i-1],ncc35[i]+ncc35[i-1],ncc36[i]+ncc36[i-1],ncc37[i]+ncc37[i-1],ncc38[i]+ncc38[i-1],ncc39[i]+ncc39[i-1],ncc40[i]+ncc40[i-1],ncc41[i]+ncc41[i-1],ncc42[i]+ncc42[i-1],ncc6[i]+ncc6[i-1]
]) #,ncc43[i]+ncc43[i-1] ncc22[i]+ncc22[i-1],
elif i==2:
lisl.append([v+negr[i-1]+negr[i-2],necy[i]+necy[i-1]+necy[i-2],neru[i]+neru[i-1]+neru[i-2],ncc4[i]+ncc4[i-1]+ncc4[i-2],ncc5[i]+ncc5[i-1]+ncc5[i-2],ncc7[i]+ncc7[i-1]+ncc7[i-2],ncc8[i]+ncc8[i-1]+ncc8[i-2],ncc9[i]+ncc9[i-1]+ncc9[i-2],ncc10[i]+ncc10[i-1]+ncc10[i-2],ncc11[i]+ncc11[i-1]+ncc11[i-2],ncc12[i]+ncc12[i-1]+ncc12[i-2],ncc13[i]+ncc13[i-1]+ncc13[i-2],ncc14[i]+ncc14[i-1]+ncc14[i-2],ncc15[i]+ncc15[i-1]+ncc15[i-2],ncc16[i]+ncc16[i-1]+ncc16[i-2],ncc17[i]+ncc17[i-1]+ncc17[i-2],ncc18[i]+ncc18[i-1]+ncc18[i-2],ncc19[i]+ncc19[i-1]+ncc19[i-2],ncc20[i]+ncc20[i-1]+ncc20[i-2],ncc21[i]+ncc21[i-1]+ncc21[i-2],ncc23[i]+ncc23[i-1]+ncc23[i-2],ncc24[i]+ncc24[i-1]+ncc24[i-2],ncc25[i]+ncc25[i-1]+ncc25[i-2],ncc26[i]+ncc26[i-1]+ncc26[i-2],ncc27[i]+ncc27[i-1]+ncc27[i-2],ncc28[i]+ncc28[i-1]+ncc28[i-2],ncc29[i]+ncc29[i-1]+ncc29[i-2],ncc30[i]+ncc30[i-1]+ncc30[i-2],ncc31[i]+ncc31[i-1]+ncc31[i-2],ncc32[i]+ncc32[i-1]+ncc32[i-2],ncc33[i]+ncc33[i-1]+ncc33[i-2],ncc34[i]+ncc34[i-1]+ncc34[i-2],ncc35[i]+ncc35[i-1]+ncc35[i-2],ncc36[i]+ncc36[i-1]+ncc36[i-2],ncc37[i]+ncc37[i-1]+ncc37[i-2],ncc38[i]+ncc38[i-1]+ncc38[i-2],ncc39[i]+ncc39[i-1]+ncc39[i-2],ncc40[i]+ncc40[i-1]+ncc40[i-2],ncc41[i]+ncc41[i-1]+ncc41[i-2],ncc42[i]+ncc42[i-1]+ncc42[i-2],ncc6[i]+ncc6[i-1]+ncc6[i-2]]) #,ncc43[i]+ncc43[i-1]+ncc43[i-2] ,ncc22[i]+ncc22[i-1]+ncc22[i-2]
# # print lisl,i
# beaker.tot=lisl
# # beaker.negr=negr
# # beaker.necy=necy
# # beaker.neru=neru
# beaker.base=[0,lisl[0],lisl[1]]
# beaker.countries=[c1,c2,c3,c4,c5,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c23,c24,c25,c26,c27, c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c6] #c22,,c43
Image(filename='figs/coie.png')
name1 = c1 #Greece
cc1=countries_dict[name1]
c2='Cyprus'
name2=c2
cc2=countries_dict[name2]
# c2=''
# cc2={}
# name2=c2
c3='Russia'
name3=c3
cc3=countries_dict[name3]
names = ", ".join([c1,c2,c3])#'Russian Federarion, Greece and Cyprus'
# names
def find_nodes_countries(cc,cc_dict):
cnodes_dict=[]
for k,v in cc_dict['country_codes'].items():
if isinstance(v,float):
continue
vv=v.split(';')
for ik,vk in enumerate(vv):
if vk ==cc:
cnodes_dict.append(cc_dict['node_id'][k])
return cnodes_dict
ccnodes1=find_nodes_countries(cc1,cc_dict)
print 'Total number of nodes from %s: %i' %(name1,len(ccnodes1)) #,cc1
ccnodes2=find_nodes_countries(cc2,cc_dict)
print 'Total number of nodes from %s: %i' %(name2,len(ccnodes2)) #,cc1
# print 'Total number of nodes from', name2, ':', len(ccnodes2) #,cc2
ccnodes3=find_nodes_countries(cc3,cc_dict)
print 'Total number of nodes from %s: %i' %(name3,len(ccnodes3))
# print 'Total number of nodes from %s: %i' %(name1,len(ccnodes1)+len(ccnodes2)+len(ccnodes3))
# print 'Total number of nodes from all countries (%s, %s, %s): %i' %(name1,name2,name3,
# len(ccnodes1)+len(ccnodes2)+len(ccnodes3))
nodes_rem=[]
for k,v in alBear['status'].items():
nodes_rem.append(k)
# fildirg='/home/sergios-len/Dropbox/Python Projects (1)/PPs'
# fildirg='/home/mosesboudourides/Dropbox/Python Projects/PPs'
# F1=nx.read_graphml(os.path.join(fildirg, 'graphs/F1.graphml'))
F1=nx.read_graphml('graphs/F1.graphml')
union_nodes=list(set(ccnodes1).union(set(ccnodes2)).union(set(ccnodes3)))#
union_nodes=[str(i) for i in union_nodes]
# ccnodes1
# print len(union_nodes)
F=F1
graph = nx.subgraph(F, union_nodes)
graph.remove_nodes_from(nx.isolates(graph))
offic = list(Officers['node_id'].unique())
inter = list(Intermediaries['node_id'].unique())
enti = list(Entities['node_id'].unique())
addr = list(Addresses['node_id'].unique())
# print 'Total number of nodes in the (%s,%s,%s) graph: %i' %(name1,name2,name3,len(graph.nodes()))
labels={}
groups={}
noddd={}
deg=nx.degree(graph)
ngroups={}
cgroups={}
for i,nd in enumerate(graph.nodes()):
noddd[nd]=i
nd=int(nd)
if nd in ccnodes1:
groups[i]=1
elif nd in ccnodes2:
groups[i]=2
elif nd in ccnodes3:
groups[i]=3
if nd in offic:
labels[i]=Officers.loc[Officers['node_id'] == nd]['name'].tolist()[0]#.capitalize()
ngroups[i]=1
elif nd in inter:
labels[i]= Intermediaries.loc[Intermediaries['node_id'] == nd]['name'].tolist()[0]#.capitalize()
ngroups[i]=4
elif nd in enti:
labels[i]= Entities.loc[Entities['node_id'] == nd]['name'].tolist()[0]#.capitalize()
ngroups[i]=5
elif nd in addr:
labels[i]= Addresses.loc[Addresses['node_id'] == nd]['address'].tolist()[0]#.capitalize()
ngroups[i]=2
colorr={}
for k,v in ngroups.items():
if v ==1: # Officers
if groups[k]==1: #Greece
colorr[k]=(204,204,255)
elif groups[k]==2: # Cyprus
colorr[k]=(204,255,204)
elif groups[k]==3: #Russia
colorr[k]= (255,204,204)
elif v==5:
if groups[k]==1: #Greece
colorr[k]=(0,0,255)
elif groups[k]==2: # Cyprus
colorr[k]=(0,255,0)
elif groups[k]==3: #Russsia
colorr[k]= (255,0,0)
else:
colorr[k]= (255,255,255)
lali=[]
grouli=[]
cols=[]
vals=[]
for v in graph.nodes():
lali.append(labels[noddd[v]])
grouli.append(groups[noddd[v]])
cols.append(colorr[noddd[v]])
vals.append(deg[v])
edges=[]
for edd in graph.edges():
if 'weight' in graph[edd[0]][edd[1]]:
wei=graph[edd[0]][edd[1]]['weight']
else:
wei=1
edges.append([noddd[edd[0]],noddd[edd[1]],wei])
print 'Total number of nonisolated nodes in the graph of %s, %s and %s: %i' %(name1,name2,name3,len(cols))
print 'Total number of edges in the graph of %s, %s and %s: %i' %(name1,name2,name3,len(edges))
# print 'Total number of edges in the (%s,%s,%s) graph: %i' %(name1,name2,name3,len(edges))
# 'Number of nodes: %i Number of edges: %i' %(len(cols),len(edges))
lgn = Lightning(ipython=True, host='http://public.lightning-viz.org',size='full') # vis at server
# lgn = Lightning(ipython=True,local=True,size='large') # local vis
vis=lgn.force(conn=edges, values=None, labels=lali, color=cols, group=None, colormap=None, size=3, tooltips=True,
width=1200, brush=True,zoom=True, height=None,
description=r'''## **The Panama Papers Network of %s**''' %names)
vis.open() # vis at server
# vis # local vis
from IPython.display import IFrame
IFrame('http://public.lightning-viz.org/visualizations/a634166d-2bbe-4133-ac78-8a19a0ee75f4/public/', width=1000, height=1000)
def get_nat(ed,c_d):
natt=None
for nat in c_d:
# print nat,c_d[nat]
if int(ed) in c_d[nat]:
natt=nat
return natt
def count_edges_nat(c_d,edges):
nat_edgs=Counter()
for ed in edges:
edg=get_nat(ed[0],c_d)
deg=get_nat(ed[1],c_d)
edd=sorted((edg,deg))
edde='%s , %s' %(edd[0],edd[1])
# print ed,edg,deg,sorted(edg,deg)
nat_edgs[edde]+=1
return nat_edgs
c_d={c1:ccnodes1,c2:ccnodes2,c3:ccnodes3}
edges_nationalities=count_edges_nat(c_d,graph.edges())
for nat,nat_value in edges_nationalities.items():
natt=nat.split(' ,')
print 'There are %i edges between %s and %s' %(nat_value,natt[0],natt[1])
een=edges_nationalities.values()
r1 = [2*een[4],een[-1],een[1]]
r2 = [een[-1],2*een[3],een[2]]
r3 = [een[1],een[2],2*een[0]]
m=[r1,r2,r3]
import numpy as np
M=np.array(m)
if M.sum() != 1.0:
M=M/float(M.sum())
M=np.asmatrix(M)
s=(M*M).sum()
t=M.trace()
R=t-s
r=R/(1-s)
ac = float(r)
print 'The Attribute Assortativity Coefficient of the graph of %s, %s and %s is %.4f' %(name1,name2,name3,ac)
import itertools as it
addr=[str(i) for i in addr]
offic=[str(i) for i in offic]
enti=[str(i) for i in enti]
# nodes_no_addr_ent=set(union_nodes)-(set(addr).union(set(offic)))
nodes_no_addr_ent=set(union_nodes)-(set(addr).union(set(enti)))
# print len(union_nodes)
# print len(nodes_no_addr_ent)
pgraph = nx.subgraph(F, nodes_no_addr_ent)
# print len(enti),len(addr)
entil=set(enti).intersection(set(union_nodes))
addrl=set(addr).intersection(set(union_nodes))
# print len(entil),len(addrl)
ll=[enti]
for ae in ll:
for nd in ae:
if nd in graph:
nnei=nx.all_neighbors(graph,nd)
nei=list(set(nodes_no_addr_ent).intersection(set(nnei)))
for ii in it.combinations(nei,2):
ed=ii[0]
de=ii[1]
if pgraph.has_edge(ed,de):
if 'weight' in pgraph[ed][de]:
wei=pgraph[ed][de]['weight']+1
else:
wei=1
else:
wei=1
pgraph.add_edge(ed,de,weight=wei)
pgraph.remove_nodes_from(nx.isolates(pgraph))
# print 'The projected network has', len(pgraph.nodes()), 'and', len(pgraph.edges()), 'edges'
print 'Total number of nonisolated nodes in the graph of %s, %s and %s: %i' %(name1,name2,name3,len(pgraph.nodes()))
print 'Total number of edges in the graph of %s, %s and %s: %i' %(name1,name2,name3,len(pgraph.edges()))
labels={}
groups={}
noddd={}
deg=nx.degree(pgraph)
ngroups={}
for i,nd in enumerate(pgraph.nodes()):
noddd[nd]=i
ndd=int(nd)
if ndd in ccnodes1:
groups[i]=1
elif ndd in ccnodes2:
groups[i]=2
elif ndd in ccnodes3:
groups[i]=3
if nd in offic:
labels[i]=Officers.loc[Officers['node_id'] == ndd]['name'].tolist()[0]#.capitalize()
ngroups[i]=1
elif nd in inter:
labels[i]= Intermediaries.loc[Intermediaries['node_id'] == ndd]['name'].tolist()[0]#.capitalize()
ngroups[i]=4
elif nd in enti:
labels[i]= Entities.loc[Entities['node_id'] == ndd]['name'].tolist()[0]#.capitalize()
ngroups[i]=5
elif nd in addr:
labels[i]= Addresses.loc[Addresses['node_id'] == ndd]['address'].tolist()[0]#.capitalize()
ngroups[i]=2
# print groups
for k,v in ngroups.items():
if v ==1: # Officers
if groups[k]==1: #Greek
colorr[k]=(204,204,255)
# colorr[k]= (255,204,204)
elif groups[k]==2: # Cypr
colorr[k]=(204,255,204)
elif groups[k]==3: #Rus
colorr[k]= (255,204,204)
# colorr[k]=(204,204,255)
elif v==5:
if groups[k]==1: #Greek
colorr[k]=(0,0,255)
# colorr[k]= (255,0,0)
elif groups[k]==2: # Cypr
colorr[k]=(0,255,0)
elif groups[k]==3: #Rus
colorr[k]= (255,0,0)
else:
colorr[k]= (255,255,255)
plali=[]
pgrouli=[]
pcols=[]
pvals=[]
for v in pgraph.nodes():
plali.append(labels[noddd[v]])
pgrouli.append(groups[noddd[v]])
pcols.append(colorr[noddd[v]])
pvals.append(deg[v])
pedges=[]
for edd in pgraph.edges():
if 'weight' in pgraph[edd[0]][edd[1]]:
wei=4*pgraph[edd[0]][edd[1]]['weight']
else:
wei=4
pedges.append([noddd[edd[0]],noddd[edd[1]],wei])
# print 'Number of nodes: %i Number of edges: %i' %(len(cols),len(edges))
ssssi=set()
for edd in pgraph.edges():
if 'weight' in pgraph[edd[0]][edd[1]]:
ssssi.add(pgraph[edd[0]][edd[1]]['weight'])
# print ssssi
lgn = Lightning(ipython=True, host='http://public.lightning-viz.org',size='full') # vis at server
# lgn = Lightning(ipython=True,local=True,size='large') #local
vis=lgn.force(conn=pedges, values=None, labels=plali, color=pcols, group=None, colormap=None, size=3, tooltips=True,
width=1200, brush=True,zoom=True, height=800,
description=r'''## **The Projected Panama Papers Network of %s**''' %names)
vis.open() # vis at server
# vis ## local
from IPython.display import IFrame
IFrame('http://public.lightning-viz.org/visualizations/8a06add8-f937-47ea-beb2-3b0649262219/public/', width=1000, height=1000)
def get_nat(ed,c_d):
natt=None
for nat in c_d:
# print nat,c_d[nat]
if int(ed) in c_d[nat]:
natt=nat
return natt
def count_edges_nat(c_d,edges):
nat_edgs=Counter()
for ed in edges:
edg=get_nat(ed[0],c_d)
deg=get_nat(ed[1],c_d)
edd=sorted((edg,deg))
edde='%s , %s' %(edd[0],edd[1])
# print ed,edg,deg,sorted(edg,deg)
nat_edgs[edde]+=1
return nat_edgs
c_d={c1:ccnodes1,c2:ccnodes2,c3:ccnodes3}
edges_nationalities=count_edges_nat(c_d,pgraph.edges())
for nat,nat_value in edges_nationalities.items():
natt=nat.split(' ,')
print 'There are %i edges between %s and %s' %(nat_value,natt[0],natt[1])
een=edges_nationalities.values()
r1 = [2*een[4],een[-1],een[1]]
r2 = [een[-1],2*een[3],een[2]]
r3 = [een[1],een[2],2*een[0]]
m=[r1,r2,r3]
import numpy as np
M=np.array(m)
if M.sum() != 1.0:
M=M/float(M.sum())
M=np.asmatrix(M)
s=(M*M).sum()
t=M.trace()
R=t-s
r=R/(1-s)
ac = float(r)
print 'The Attribute Assortativity Coefficient of the graph of %s, %s and %s is %.4f' %(name1,name2,name3,ac)
def create_centralities_list(G,maxiter=2000,pphi=5,centList=[]):
if len(centList)==0:
centList=['degree_centrality','closeness_centrality','betweenness_centrality',
'eigenvector_centrality','katz_centrality','page_rank']
cenLen=len(centList)
valus={}
# plt.figure(figsize=figsi)
for uu,centr in enumerate(centList):
if centr=='degree_centrality':
cent=nx.degree_centrality(G)
sstt='Degree Centralities'
ssttt='degree centrality'
valus[centr]=cent
elif centr=='closeness_centrality':
cent=nx.closeness_centrality(G)
sstt='Closeness Centralities'
ssttt='closeness centrality'
valus[centr]=cent
elif centr=='betweenness_centrality':
cent=nx.betweenness_centrality(G)
sstt='Betweenness Centralities'
ssttt='betweenness centrality'
valus[centr]=cent
elif centr=='eigenvector_centrality':
try:
cent=nx.eigenvector_centrality(G,max_iter=maxiter)
sstt='Eigenvector Centralities'
ssttt='eigenvector centrality'
valus[centr]=cent
except:
valus[centr]=None
continue
elif centr=='katz_centrality':
phi = (1+math.sqrt(pphi))/2.0 # largest eigenvalue of adj matrix
cent=nx.katz_centrality_numpy(G,1/phi-0.01)
sstt='Katz Centralities'
ssttt='Katz centrality'
valus[centr]=cent
elif centr=='page_rank':
try:
cent=nx.pagerank(G)
sstt='PageRank'
ssttt='pagerank'
valus[centr]=cent
except:
valus[centr]=None
continue
print '%s done!!!' %sstt
return valus
centList=['degree_centrality','closeness_centrality','betweenness_centrality',
'eigenvector_centrality','katz_centrality','page_rank']
centrali=create_centralities_list(pgraph)
dfco=pd.DataFrame()
u=0
for k in centList:
try:
v=centrali[k].values()
except:
v=None
dfco.insert(u,k,v)
u+=1
dfco.insert(0,'Nodes',centrali[centrali.keys()[0]].keys())
dfco
import warnings
warnings.filterwarnings("ignore")
ntei='Scatter Matrix Plot of Centralities of the Projected Network of Officers from %s, %s and %s' %(name1,name2,name3) #+ names
f, ax = plt.subplots(figsize=(20,20))
sss=scatter_matrix(dfco[centList], alpha=0.9, color='black', diagonal='hist',ax=ax)
plt.suptitle(ntei,fontsize=18,fontweight='bold')
corr = dfco.corr().as_matrix()
for i, j in zip(*plt.np.triu_indices_from(sss, k=1)):
sss[i, j].annotate("pearson = %.3f" %corr[i,j], (0.8, 0.93), xycoords='axes fraction', ha='center', va='center')
import itertools as it
addr=[str(i) for i in addr]
offic=[str(i) for i in offic]
enti=[str(i) for i in enti]
# nodes_no_addr_ent=set(union_nodes)-(set(addr).union(set(offic)))
nodes_no_addr_ent=set(union_nodes)-(set(addr).union(set(offic)))
# print len(union_nodes)
# print len(nodes_no_addr_ent)
pgraph = nx.subgraph(F, nodes_no_addr_ent)
# print len(enti),len(addr)
entil=set(enti).intersection(set(union_nodes))
addrl=set(addr).intersection(set(union_nodes))
# print len(entil),len(addrl)
ll=[offic]
for ae in ll:
for nd in ae:
if nd in graph:
nnei=nx.all_neighbors(graph,nd)
nei=list(set(nodes_no_addr_ent).intersection(set(nnei)))
for ii in it.combinations(nei,2):
ed=ii[0]
de=ii[1]
if pgraph.has_edge(ed,de):
if 'weight' in pgraph[ed][de]:
wei=pgraph[ed][de]['weight']+1
else:
wei=1
else:
wei=1
pgraph.add_edge(ed,de,weight=wei)
pgraph.remove_nodes_from(nx.isolates(pgraph))
# print 'The projected network has', len(pgraph.nodes()), 'and', len(pgraph.edges()), 'edges'
print 'Total number of nonisolated nodes in the graph of %s, %s and %s: %i' %(name1,name2,name3,len(pgraph.nodes()))
print 'Total number of edges in the graph of %s, %s and %s: %i' %(name1,name2,name3,len(pgraph.edges()))
labels={}
groups={}
noddd={}
deg=nx.degree(pgraph)
ngroups={}
for i,nd in enumerate(pgraph.nodes()):
noddd[nd]=i
ndd=int(nd)
if ndd in ccnodes1:
groups[i]=1
elif ndd in ccnodes2:
groups[i]=2
elif ndd in ccnodes3:
groups[i]=3
if nd in offic:
labels[i]=Officers.loc[Officers['node_id'] == ndd]['name'].tolist()[0]#.capitalize()
ngroups[i]=1
elif nd in inter:
labels[i]= Intermediaries.loc[Intermediaries['node_id'] == ndd]['name'].tolist()[0]#.capitalize()
ngroups[i]=4
elif nd in enti:
labels[i]= Entities.loc[Entities['node_id'] == ndd]['name'].tolist()[0]#.capitalize()
ngroups[i]=5
elif nd in addr:
labels[i]= Addresses.loc[Addresses['node_id'] == ndd]['address'].tolist()[0]#.capitalize()
ngroups[i]=2
# print groups
for k,v in ngroups.items():
if v ==1: # Officers
if groups[k]==1: #Greek
colorr[k]=(204,204,255)
# colorr[k]= (255,204,204)
elif groups[k]==2: # Cypr
colorr[k]=(204,255,204)
elif groups[k]==3: #Rus
colorr[k]= (255,204,204)
# colorr[k]=(204,204,255)
elif v==5:
if groups[k]==1: #Greek
colorr[k]=(0,0,255)
# colorr[k]= (255,0,0)
elif groups[k]==2: # Cypr
colorr[k]=(0,255,0)
elif groups[k]==3: #Rus
colorr[k]= (255,0,0)
else:
colorr[k]= (255,255,255)
plali=[]
pgrouli=[]
pcols=[]
pvals=[]
for v in pgraph.nodes():
plali.append(labels[noddd[v]])
pgrouli.append(groups[noddd[v]])
pcols.append(colorr[noddd[v]])
pvals.append(deg[v])
pedges=[]
for edd in pgraph.edges():
if 'weight' in pgraph[edd[0]][edd[1]]:
wei=4*pgraph[edd[0]][edd[1]]['weight']
else:
wei=4
pedges.append([noddd[edd[0]],noddd[edd[1]],wei])
# print 'Number of nodes: %i Number of edges: %i' %(len(cols),len(edges))
ssssi=set()
for edd in pgraph.edges():
if 'weight' in pgraph[edd[0]][edd[1]]:
ssssi.add(pgraph[edd[0]][edd[1]]['weight'])
# print ssssi
lgn = Lightning(ipython=True, host='http://public.lightning-viz.org',size='full') # vis at server
# lgn = Lightning(ipython=True,local=True,size='large') #local
vis=lgn.force(conn=pedges, values=None, labels=plali, color=pcols, group=None, colormap=None, size=3, tooltips=True,
width=1200, brush=True,zoom=True, height=800,
description=r'''## **The Projected Panama Papers Network of %s**''' %names)
vis.open() # vis at server
# vis ## local
from IPython.display import IFrame
IFrame('http://public.lightning-viz.org/visualizations/212c22cf-3f51-4492-a976-a4f5e96f949f/public/', width=1000, height=1000)
def get_nat(ed,c_d):
natt=None
for nat in c_d:
# print nat,c_d[nat]
if int(ed) in c_d[nat]:
natt=nat
return natt
def count_edges_nat(c_d,edges):
nat_edgs=Counter()
for ed in edges:
edg=get_nat(ed[0],c_d)
deg=get_nat(ed[1],c_d)
edd=sorted((edg,deg))
edde='%s , %s' %(edd[0],edd[1])
# print ed,edg,deg,sorted(edg,deg)
nat_edgs[edde]+=1
return nat_edgs
c_d={c1:ccnodes1,c2:ccnodes2,c3:ccnodes3}
edges_nationalities=count_edges_nat(c_d,pgraph.edges())
for nat,nat_value in edges_nationalities.items():
natt=nat.split(' ,')
print 'There are %i edges between %s and %s' %(nat_value,natt[0],natt[1])
een=edges_nationalities.values()
# print een
# print aaaa
r1 = [2*een[1],0,0]
r2 = [0,2*een[-1],een[2]]
r3 = [0,een[2],2*een[0]]
# r1 = [2*een[4],een[-1],een[1]]
# r2 = [een[-1],2*een[3],een[2]]
# r3 = [een[1],een[2],2*een[0]]
m=[r1,r2,r3]
import numpy as np
M=np.array(m)
if M.sum() != 1.0:
M=M/float(M.sum())
M=np.asmatrix(M)
s=(M*M).sum()
t=M.trace()
R=t-s
r=R/(1-s)
ac = float(r)
print 'The Attribute Assortativity Coefficient of the graph of %s, %s and %s is %.4f' %(name1,name2,name3,ac)
centList=['degree_centrality','closeness_centrality','betweenness_centrality',
'eigenvector_centrality','katz_centrality','page_rank']
centrali=create_centralities_list(pgraph)
# centrali=create_centralities_list(graph_no_addr_ent)
dfce=pd.DataFrame()
u=0
for k in centList:
try:
v=centrali[k].values()
except:
v=None
dfce.insert(u,k,v)
u+=1
dfce.insert(0,'Nodes',centrali[centrali.keys()[0]].keys())
dfce
import warnings
warnings.filterwarnings("ignore")
ntei='Scatter Matrix Plot of Centralities of the Projected Network of Entities from %s, %s and %s' %(name1,name2,name3) #+ names
f, ax = plt.subplots(figsize=(20,20))
sss=scatter_matrix(dfce[centList], alpha=0.9, color='black', diagonal='hist',ax=ax)
plt.suptitle(ntei,fontsize=18,fontweight='bold')
corr = dfce.corr().as_matrix()
for i, j in zip(*plt.np.triu_indices_from(sss, k=1)):
sss[i, j].annotate("pearson = %.3f" %corr[i,j], (0.8, 0.93), xycoords='axes fraction', ha='center', va='center')
# sss= scatter_matrix(dfce[centList], alpha=0.9, figsize=(20,20), color='black', diagonal='hist')