EXPLORATORY TWITTER NETWORK ANALYSIS WITH PYTHON¶

I. CO-OCCURRENT HASHTAG NETWORKS¶

EXTRACTED FROM TWITTER DATA ON REFUGEES IN OCTOBER-DECEMBER 2015¶

By Moses A. Boudourides & Sergios T. Lenis¶

Table of Contents

1. The Graph

1.1. Statistics of Edge Weights
1.2. Statistics of Node Degrees
1.3. Connected Components
1.4. Communities

2. Graph Cuts by Degree

2.1. Visualization
2.2. Centralities
2.3. Communities

3. Egocentric Graph Cuts

3.1. One Ego

3.2. Two Egos

%matplotlib inline

import networkx as nx 
from networkx.drawing.nx_agraph import graphviz_layout
import matplotlib.pyplot as plt
import matplotlib as mpl
from lightning import Lightning
import pandas as pd
import random
import warnings
import seaborn as sns
sns.set_style("white")
sns.set_style("ticks") 
from tools import draw_centralities, draw_centralities_subplots, draw_centralities_subplots_dir, create_centralities_list
from tools import lgp, plot_light, plot_light_online, draw_comms,print_communities
# %autoreload 2
warnings.filterwarnings("ignore")

from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

1. The Graph¶

G=nx.read_gpickle("chgp1.pic")

lv = len(G.nodes(data=True))
print 'The number of nodes (hashtags) of G is', lv
print
le = len(G.edges(data=True))
print 'The number of edges (hashtag co-occurrences in the same tweets) of G is', le
print
print 'The density of G is', nx.density(G)

The number of nodes (hashtags) of G is 31589

The number of edges (hashtag co-occurrences in the same tweets) of G is 142052

The density of G is 0.000284720895654

General Structure of the Graph¶

print 'G is of type', type(G)
print
print 'Is G directed?', nx.is_directed(G)
print
lv = len(G.nodes(data=True))
print 'Is G connected?', nx.is_connected(G)
print
print 'The number of connected components of G is', nx.number_connected_components(G)
print 
print 'The number of maximal cliques of G is', nx.graph_number_of_cliques(G)
print 
print 'The average clustering coefficient of G is', nx.average_clustering(G)

G is of type <class 'networkx.classes.graph.Graph'>

Is G directed? False

Is G connected? False

The number of connected components of G is 1512

The number of maximal cliques of G is 104120

The average clustering coefficient of G is 0.593698188721

1.1. Statistics of Edge Weights¶

ws=[{'from_node':ed[0],'to_node':ed[1],'weight':ed[2]['weight']} for ed in G.edges(data=True)]
edf=pd.DataFrame(ws)
edf.head(20)

# Find edges/weights from a node

node='sığınmacı' 
rr=pd.concat([edf[edf['from_node'] == node],edf[edf['to_node'] == node]])

# Find the weight of an edge

nodes=['sığınmacı','mülteci']
# nodes=['sığınmacı','sergios']

nn=edf[edf.from_node.isin(nodes)] 
nn[nn.to_node.isin(nodes)]

edf.describe()

plt.figure(figsize=(10,10))
bins=10
ax=sns.distplot(edf['weight'], bins=bins, kde=False, rug=False)
plt.ylabel('Number of Edge Weights')
plt.xlabel('Number of Edges')
tt='The Histogram of Edge Weights' 
total = float(len(edf))
wws=[i['weight'] for i in ws]
for p in ax.patches:
    height = p.get_height()
    ax.text(p.get_x()+(((max(wws)/float(bins))/2)-2), height+ 3, '%i'%(height))#/total))
plt.title(tt)

warnings.filterwarnings("ignore")

1.2. Statistics of Node Degrees¶

degrees=[{'hashtag':i.decode('utf-8'),'degree':nx.degree(G,i)} for i in G.nodes()]
ddf=pd.DataFrame(degrees)

k=0
ddf0=ddf[ddf.degree > k]
ddf0=ddf0[['hashtag','degree']].sort_values(by="degree",ascending=0)

# writer = pd.ExcelWriter('/home/mosesboudourides/Dropbox/Python Projects/DublinNovemer2016 Conf/outs/hashtagsP1.xlsx', engine='xlsxwriter')
# ddegst.to_excel(writer)
# writer.save()

ddf0.head(20)

# Find the degree of some node/hashtag

node_to_search='muslim'
ddf0[ddf0['hashtag'] == node_to_search]

# Searching for an inexistent node/hashtag

print G.has_node('Dublin')
ddf0[ddf0['hashtag'] == 'Dublin']

False

ddf0.describe()

plt.figure(figsize=(10,10))
bins=10
ax=sns.distplot(ddf0['degree'], bins=bins, kde=False, rug=False)
plt.ylabel('Degree')
plt.xlabel('Number of Nodes')
tt='Degree Histogram' 
total = float(len(ddf0))
wws=ddf0.degree.tolist()
# wws=[i['weight'] for i in ws]
for p in ax.patches:
    height = p.get_height()
    ax.text(p.get_x()+(((max(wws)/float(bins))/2)-2), height+ 3, '%i'%(height))#/total))
plt.title(tt)

warnings.filterwarnings("ignore")

# Handshaking Theorem
sum(ddf0['degree']) == 2*len(G.edges())

True

1.3. Connected Components¶

ccl=sorted(nx.connected_components(G), key = len, reverse=True)

# Check whether nodes/hashtags are in the same connected component

def find_common_comp(ccl,nodes):
    for i,gg in enumerate(ccl):
        if all([nd in gg for nd in nodes]):
            return i,gg ,'are in'
        else:
            return ' ', None, 'are not in the same'
        
# node=['yunanistan']
nodes1=['yunanistan','syria']

nodes1=['yunanistan','byy']
# print 'yunanistan' in G.neighbors('miracle')
nodes2=['yunanistan','usa']
print 
# finding=find_common_comp(ccl,node)
finding1=find_common_comp(ccl,nodes1)
finding2=find_common_comp(ccl,nodes2)
print 'Nodes/hashtags', nodes1, '%s connected component %s' %(finding1[2],finding1[0])
print 'Nodes/hashtags', nodes2, '%s connected component %s' %(finding2[2],finding2[0])

Nodes/hashtags ['yunanistan', 'byy'] are not in the same connected component  
Nodes/hashtags ['yunanistan', 'usa'] are in connected component 0

ppf=pd.DataFrame([{'graph':i,'size':len(g)} for i,g in enumerate(ccl)])

ppf.describe()

plt.figure(figsize=(10,10))
bins=4
ax=sns.distplot(ppf['size'], bins=bins, kde=False, rug=False)
plt.ylabel('Number of Connected Components')
plt.xlabel('Number of Nodes in Connected Components')
tt='The Histogram of Connected Components' 
# ax = sns.distplot(x="class", hue="who", data=titanic)
total = float(len(ppf))
wws=ppf['size'].tolist()
# prin4t wws
# wws=[i['weight'] for i in ws]
for p in ax.patches:
    height = p.get_height()
    ax.text(p.get_x()+(((max(wws)/float(bins))/2)-2), height+ 3, '%i'%(height))#/total))
plt.title(tt)

warnings.filterwarnings("ignore")

1.4. Communities¶

import community as comms
from collections import Counter
part=comms.best_partition(G)
npart=Counter()
nnpart={}#v:k for k,v in part.items()}
for pp,vv in part.items():
    npart[vv]+=1
    if vv not in nnpart:
        nnpart[vv]=[]
    nnpart[vv].append(pp)
ppcom=pd.DataFrame([{'community':i,'size':k} for i,k in npart.items()]).sort_values(by="size",ascending=0)

print 'The number of communities is', max(part.values())+1
print 'The graph modularity coefficient is', comms.modularity(part,G)
print
print 'The size of the top 20 communities:'
ppcom.drop('community', axis=1).head(20)

The number of communities is 1668
The graph modularity coefficient is 0.518913510348

The size of the top 20 communities:

print 'The statistics of community membership:'
ppcom.drop('community', axis=1).describe()

The statistics of community membership:

def find_common_comm(part,nodes):
    for k,v in part.items():
#         print  all([nd in v for nd in nodes]),  [nd in v for nd in nodes]
        if all([nd in v for nd in nodes]):
            return k,'are in'
        else:
            continue
    return ' ','are not in the same'
nodes=['yunanistan','syria']
nodes=['yunanistan']       
finding=find_common_comm(nnpart,nodes)
print 'Nodes/hashtags ',nodes, '%s community %s ' %(finding[1],finding[0])

Nodes/hashtags  ['yunanistan'] are in community 14

# Find nodes/hashtags in a community

commn=40
ll=[]
for k,v in part.items():
    if v == commn:
        ll.append(k)
print 'The members of community', commn, 'are:'
print ll

The members of community 40 are:
['sportsdiplomacy', 'pdnews', 'sportsmedia', 'premiership', 'internationaldevelopment', 'bpl', 'premierleague', 'epl']

plt.figure(figsize=(10,10))
bins=4
ax=sns.distplot(ppcom['size'], bins=bins, kde=False, rug=False)
plt.ylabel('Number of Communities')
plt.xlabel('Number of Nodes in Communities')
tt='The Histogram of Communities' 
# ax = sns.distplot(x="class", hue="who", data=titanic)
total = float(len(ppcom))
wws=ppcom['size'].tolist()
# prin4t wws
# wws=[i['weight'] for i in ws]
for p in ax.patches:
    height = p.get_height()
    ax.text(p.get_x()+(((max(wws)/float(bins))/2)-2), height+ 3, '%i'%(height))#/total))
plt.title(tt)

warnings.filterwarnings("ignore")

2. Graph Cuts by Degree¶

k=800
ddf800=ddf[ddf.degree > k]
ddf800=ddf800[['hashtag','degree']].sort_values(by="degree",ascending=0)

ddf800#.head(20)

ddf800.describe()

hashtags=[i.encode('utf-8') for i in ddf800.hashtag.unique()] 
Gh=nx.subgraph(G,hashtags)

print 'The network of hashtags with the top %i degrees has %i nodes (hashtags), %i edges and average clustering coefficient %.3f' %(k,len(Gh.nodes()),len(Gh.edges()),nx.average_clustering(Gh))
print
print 'The %i nodes (hashtags) of the network of hashtags with the top %i degrees are:' %(len(Gh.nodes()),k)
print Gh.nodes()

The network of hashtags with the top 800 degrees has 20 nodes (hashtags), 171 edges and average clustering coefficient 0.959

The 20 nodes (hashtags) of the network of hashtags with the top 800 degrees are:
['europe', 'parisattacks', 'usa', 'isis', 'fl\xc3\xbcchtling', 'tcot', 'paris', 'syrian', 'refugee', 'syrianrefugees', 'np', 'germany', 'refugeeswelcome', 'eu', 'syria', 'news', 'refugeecrisis', 'refugees', 'crisis', 'obama']

cws=[{'from_node':ed[0],'to_node':ed[1],'weight':ed[2]['weight']} for ed in Gh.edges(data=True)]
cedf=pd.DataFrame(cws).sort_values(by="weight",ascending=0)
cedf.head(20)

nodes=['usa','germany']
nn=edf[edf.from_node.isin(nodes)] 
nn[nn.to_node.isin(nodes)]

2.1. Visualization¶

Ghh=nx.Graph()
for nd in Gh.edges(data=True):
    ed=nd[0]
    de=nd[1]
    att_dici=nd[2]
    if 'weight' in att_dici:
        wei=att_dici['weight']
    else:
        wei=0
    
    Ghh.add_edge(ed.decode('utf-8'),de.decode('utf-8'),weight=wei)

pos=nx.circular_layout(Ghh)

edgewidth=[]
for (u,v,d) in Ghh.edges(data=True):
    edgewidth.append(d['weight']/500.)
plt.figure(figsize=(12,8))
paris_at=['parisattacks','paris']
cols=['r' if nd in paris_at else 'g' for nd in Ghh.nodes() ]
# print cols
nn1=nx.draw_networkx_nodes(Ghh,pos, node_size=1000,node_color =cols,alpha=0.35) #with_labels=True,
nn2=nx.draw_networkx_edges(Ghh,pos,edge_color='b',width=edgewidth,alpha=0.35)
nn3=nx.draw_networkx_labels(Ghh,pos,font_size=15,font_color="k")
naxis=plt.axis('off')

# # print len(Ghh.nodes())
# group=[1 if nd in paris_at else 0 for nd in Ghh.nodes()]
# vis=plot_light(Ghh,label=2,size=10,group=group)
# vis

2.2. Centralities¶

centrali=draw_centralities_subplots(Ghh,pos,withLabels=True,labfs=15,figsi=(15,22),ealpha=0.25,vals=True)

dfc=pd.DataFrame()
u=0
for i,k in centrali.items():
#     print i,k
    if k is None:
        continue
    dfc.insert(u,i,k.values())
    u+=1
dfc.insert(0,'nodes',centrali[centrali.keys()[0]].keys())
# dfc.sort_values(by="betweenness_centrality",ascending=0)
dfc#.head(10)

# dfc[["nodes","degree_centrality"]].sort_values(by="degree_centrality",ascending=0)#.head(10)
# dfc[["nodes","closeness_centrality"]].sort_values(by="closeness_centrality",ascending=0)#.head(10)
dfc[["nodes","betweenness_centrality"]].sort_values(by="betweenness_centrality",ascending=0)#.head(10)
# dfc[["nodes","eigenvector_centrality"]].sort_values(by="eigenvector_centrality",ascending=0)#.head(10)
# dfc[["nodes","katz_centrality"]].sort_values(by="katz_centrality",ascending=0)#.head(10)
# dfc[["nodes","page_rank"]].sort_values(by="page_rank",ascending=0)#.head(10)

2.3. Communities¶

part,nodper=print_communities(Ghh,'the graph Ghh')

d=0.8 
dd=0.8
c=1.2
cc=1.4
alpha=0.25
ealpha=0.25
vcc={}
# sstta="The %s Communities of %s Network of Selected Noun Phrases" %(max(part.values())+1,titlename)#sstt)

draw_comms(Ghh,Ghh.nodes(),[],[],[] ,part,part,d,dd,c,cc,alpha,ealpha,nodper,'',titlefont=20,labelfont=17,valpha=0.5)

Number of communities of the graph Ghh = 2
Community partition of the graph Ghh:
[[u'europe', u'germany', u'refugeeswelcome', u'eu', u'syrian', u'crisis', u'usa', u'refugee', u'syria', u'refugeecrisis', u'refugees'], [u'tcot', u'fl\xfcchtling', u'parisattacks', u'news', u'obama', u'syrianrefugees', u'isis', u'paris', u'np']]
Community modularity of the graph Ghh = 0.0936

3. Egocentric Graph Cuts¶

3.1. One Ego¶

ego='yunanistan'
alters=nx.neighbors(G,ego)
alters.append(ego)
Ge=nx.subgraph(G,alters)

print 'The %s-egonetwork of hashtags has %i nodes and %i edges' %(ego,len(Ge.nodes()),len(Ge.edges()))
print
print 'The %i nodes of the %s-egonetwork of hashtags are:' %(len(Ge.nodes()),ego)
print Ge.nodes()

The yunanistan-egonetwork of hashtags has 51 nodes and 317 edges

The 51 nodes of the yunanistan-egonetwork of hashtags are:
['rus', 'refugees', 'ab', 'm\xc3\xbclteci', 'makedonya', 'akdeniz', 'parisunderattack', '\xc3\xa7e\xc5\x9fme', 'ka\xc3\xa7ak', 'tsipras', 'ter\xc3\xb6rist', '\xc3\xa7anakkale', 'pop\xc3\xbclizm', 'greece', 'deniz', 'sava\xc5\x9f', 'turkey', 't\xc3\xbcrkiye', '11kas', 'davuto\xc4\x9flu', 'paris', '11kas\xc4\xb1m', '\xc3\xa7ipras', 'egedenizi', 'siyasiyorumlar', 'avrupa', 'unutmad\xc4\xb1mki', 'refugeecrisis', 'almanya', 'sondakika', 'lesvos', 'abd', 'midilli', 'multeci', 'korkma', 'd\xc3\xbcnya', 'erdogan', 'm\xc3\xbcltecikrizi', 'humanrights', 't\xc3\xbcrk', 'parisattacks', 'bodrum', 'insan', 'libya', 'taraf', 'suriye', 'haber', 's\xc4\xb1\xc4\x9f\xc4\xb1nmac\xc4\xb1', 'syria', 'insanl\xc4\xb1k', 'yunanistan']

3.1.1. Visualization¶

import math
Gee=nx.Graph()
for nd in Ge.edges(data=True):
    ed=nd[0]
    de=nd[1]
    att_dici=nd[2]
    if 'weight' in att_dici:
        wei=att_dici['weight']
    else:
        wei=0
    Gee.add_edge(ed.decode('utf-8'),de.decode('utf-8'),weight=wei)
# print nx.is_connected(Gee)   

paris_att=[u'parisunderattack','parisattacks','paris']
# pos=nx.circular_layout(Gee)
pos=nx.nx_agraph.graphviz_layout(Gee)
# nx.spring_layout(Gee)


edgewidth=[]
for (u,v,d) in Gee.edges(data=True):
#     edgewidth.append(d['weight']/200.)
    edgewidth.append(1+math.log(d['weight']))

plt.figure(figsize=(20,18))
# paris_att=[u'parisattacks',u'paris']
cols=[]
ggroups=[]
for nd in Gee.nodes():
    
    if nd in paris_att:
        cols.append('r')
        ggroups.append(0)
    elif nd == ego:
        
        cols.append('m')
        ggroups.append(1)

    else:
        cols.append('g')
        ggroups.append(2)
#     print nd,cols
# cols=['r' if nd in paris_att else 'g' for nd in Gee.nodes() ]
# ggroups=[0 if nd in paris_att else 1 for nd in Gee.nodes()]
# print cols
# print cols
nn1=nx.draw_networkx_nodes(Gee,pos, node_size=500,node_color =cols,alpha=0.35) #with_labels=True,
nn2=nx.draw_networkx_edges(Gee,pos,edge_color='b',width=edgewidth,alpha=0.35)
nn3=nx.draw_networkx_labels(Gee,pos,font_size=18,font_color="k")
naxis=plt.axis('off')

# ggroups=[0 if nd in paris_att else 1 for nd in Gee.nodes()]
# print ggroups
# vis=plot_light(Gee,label=2,group=ggroups,size=5)
# vis

3.1.2. Centralities¶

# pos=nx.spring_layout(Gee)
centrali=draw_centralities_subplots(Gee,pos,withLabels=True,labfs=10,figsi=(15,22),ealpha=0.25,vals=True)

dfc=pd.DataFrame()
u=0
for i,k in centrali.items():
#     print i,k
    if k is None:
        continue
    dfc.insert(u,i,k.values())
    u+=1
dfc.insert(0,'nodes',centrali[centrali.keys()[0]].keys())
# dfc.sort_values(by="betweenness_centrality",ascending=0)
dfc#.head(10)

# dfc[["nodes","degree_centrality"]].sort_values(by="degree_centrality",ascending=0)#.head(10)
# dfc[["nodes","closeness_centrality"]].sort_values(by="closeness_centrality",ascending=0)#.head(10)
dfc[["nodes","betweenness_centrality"]].sort_values(by="betweenness_centrality",ascending=0)#.head(10)
# dfc[["nodes","eigenvector_centrality"]].sort_values(by="eigenvector_centrality",ascending=0)#.head(10)
# dfc[["nodes","katz_centrality"]].sort_values(by="katz_centrality",ascending=0)#.head(10)
# dfc[["nodes","page_rank"]].sort_values(by="page_rank",ascending=0)#.head(10)

3.1.3. Communities¶

part,nodper=print_communities(Gee,'the graph Gee')

d=0.8 
dd=0.8
c=1.2
cc=1.4
alpha=0.25
ealpha=0.25
vcc={}
# sstta="The %s Communities of %s Network of Selected Noun Phrases" %(max(part.values())+1,titlename)#sstt)

draw_comms(Gee,Gee.nodes(),[],[],[] ,part,part,d,dd,c,cc,alpha,ealpha,nodper,'',titlefont=20,labelfont=17,valpha=0.5)

Number of communities of the graph Gee = 3
Community partition of the graph Gee:
[[u'\xe7e\u015fme', u'ab', u'\xe7ipras', u'makedonya', u'akdeniz', u'\xe7anakkale', u'deniz', u'unutmad\u0131mki', u'11kas', u'm\xfcltecikrizi', u'rus', u'sava\u015f', u'egedenizi', u'siyasiyorumlar', u'ka\xe7ak', u'avrupa', u'almanya', u'sondakika', u'davuto\u011flu', u'abd', u'pop\xfclizm', u'midilli', u'multeci', u'korkma', u'd\xfcnya', u's\u0131\u011f\u0131nmac\u0131', u't\xfcrk', u'11kas\u0131m', u'suriye', u'bodrum', u'insan', u'taraf', u't\xfcrkiye', u'haber', u'insanl\u0131k', u'ter\xf6rist', u'm\xfclteci', u'yunanistan'], [u'parisunderattack', u'tsipras', u'parisattacks', u'greece', u'paris'], [u'humanrights', u'turkey', u'refugeecrisis', u'lesvos', u'erdogan', u'libya', u'syria', u'refugees']]
Community modularity of the graph Gee = 0.3394

3.2. Two Egos¶

egos=['bodrum','parisunderattack']
sst='('

alters=[]
for ego in egos:
    sst+=ego+','
    for alt in nx.neighbors(G,ego):
        alters.append(alt)
    alters.append(ego)
sst=sst[:-1]+')'
Ge=nx.subgraph(G,alters)

print 'The %s-egonetwork of hashtags has %i nodes and %i edges' %(sst,len(Ge.nodes()),len(Ge.edges()))
print
print 'The %i nodes of the %s-egonetwork of hashtags are:' %(len(Ge.nodes()),ego)
print Ge.nodes()

The (bodrum,parisunderattack)-egonetwork of hashtags has 36 nodes and 204 edges

The 36 nodes of the parisunderattack-egonetwork of hashtags are:
['m\xc3\xbclteci', 'parisunderattack', 'parisattacks', 'facia', 'k\xc3\xbcrdistandireniyor', 'greece', 'nohate', 'syrian', 'sava\xc5\x9f', 'turkey', 'mu\xc4\x9fla', 'senabebek', 'yunanistan', 'paris', 'tekne', 'sena', 'egedenizi', 'criminal', 'war', 'aylan', 'sondakika', 'teknebatt\xc4\xb1', 'multeci', 'muslim', 'terrorist', 'aylankurdi', 'afghan', 'suriyeli', 'bodrum', 'isis', 'k\xc3\xb6\xc5\x9f', 'refugee', 'aylanbebek', 'm\xc3\xbclteciler', 'syria', 'refugees']

3.2.1. Visualization¶

import math
GeE=nx.Graph()
for nd in Ge.edges(data=True):
    ed=nd[0]
    de=nd[1]
    att_dici=nd[2]
    if 'weight' in att_dici:
        wei=att_dici['weight']
    else:
        wei=0
    GeE.add_edge(ed.decode('utf-8'),de.decode('utf-8'),weight=wei)
# print nx.is_connected(Gee)   

paris_att=['parisattacks','paris']
# pos=nx.circular_layout(Gee)
# pos=nx.graphviz_layout(Gee)
pos=nx.nx_agraph.graphviz_layout(GeE)
nx.spring_layout(GeE)


edgewidth=[]
for (u,v,d) in GeE.edges(data=True):
#     edgewidth.append(d['weight']/200.)
    edgewidth.append(1+math.log(d['weight']))

plt.figure(figsize=(20,18))
# paris_att=[u'parisattacks',u'paris']
cols=[]
ggroups=[]
for nd in GeE.nodes():
    
    if nd in paris_att:
        cols.append('r')
        ggroups.append(0)
    elif nd in egos:
        
        cols.append('m')
        ggroups.append(1)

    else:
        cols.append('g')
        ggroups.append(2)
#     print nd,cols
# cols=['r' if nd in paris_att else 'g' for nd in Gee.nodes() ]
# ggroups=[0 if nd in paris_att else 1 for nd in Gee.nodes()]
# print cols
# print cols
nn1=nx.draw_networkx_nodes(GeE,pos, node_size=500,node_color =cols,alpha=0.35) #with_labels=True,
nn2=nx.draw_networkx_edges(GeE,pos,edge_color='b',width=edgewidth,alpha=0.35)
nn3=nx.draw_networkx_labels(GeE,pos,font_size=18,font_color="k")
naxis=plt.axis('off')

# vis=plot_light(Gee,label=2,group=ggroups,size=5)
# vis

3.2.2. Centralities¶

# pos=graphviz_layout(G)
# pos=nx.spring_layout(Gee)
centrali=draw_centralities_subplots(GeE,pos,withLabels=True,labfs=12,figsi=(15,22),ealpha=0.25,vals=True)

dfc=pd.DataFrame()
u=0
for i,k in centrali.items():
#     print i,k
    if k is None:
        continue
    dfc.insert(u,i,k.values())
    u+=1
dfc.insert(0,'nodes',centrali[centrali.keys()[0]].keys())
# dfc.sort_values(by="betweenness_centrality",ascending=0)
dfc#.head(10)

# nodename='refugees'
# dfc[dfc['Nodes'] == nodename]

# dfc[["nodes","degree_centrality"]].sort_values(by="degree_centrality",ascending=0)#.head(10)
# dfc[["nodes","closeness_centrality"]].sort_values(by="closeness_centrality",ascending=0)#.head(10)
dfc[["nodes","betweenness_centrality"]].sort_values(by="betweenness_centrality",ascending=0)#.head(10)
# dfc[["nodes","eigenvector_centrality"]].sort_values(by="eigenvector_centrality",ascending=0)#.head(10)
# dfc[["nodes","katz_centrality"]].sort_values(by="katz_centrality",ascending=0)#.head(10)
# dfc[["nodes","page_rank"]].sort_values(by="page_rank",ascending=0)#.head(10)

3.2.3. Communities¶

part,nodper=print_communities(GeE,'the graph GeE')

d=0.8 
dd=0.8
c=1.2
cc=1.4
alpha=0.25
ealpha=0.25
vcc={}
# sstta="The %s Communities of %s Network of Selected Noun Phrases" %(max(part.values())+1,titlename)#sstt)

draw_comms(GeE,GeE.nodes(),[],[],[] ,part,part,d,dd,c,cc,alpha,ealpha,nodper,'',titlefont=20,labelfont=17,valpha=0.5)

Number of communities of the graph GeE = 3
Community partition of the graph GeE:
[[u'sava\u015f', u'parisunderattack', u'facia', u'nohate', u'senabebek', u'yunanistan', u'tekne', u'sena', u'egedenizi', u'm\xfclteci', u'aylan', u'sondakika', u'mu\u011fla', u'multeci', u'teknebatt\u0131', u'aylankurdi', u'suriyeli', u'bodrum', u'criminal', u'aylanbebek', u'k\xf6\u015f', u'm\xfclteciler', u'k\xfcrdistandireniyor'], [u'parisattacks', u'greece', u'paris', u'terrorist'], [u'syrian', u'turkey', u'war', u'syria', u'muslim', u'afghan', u'isis', u'refugee', u'refugees']]
Community modularity of the graph GeE = 0.1000

	from_node	to_node	weight
0	europeancommission	refugee	1
1	europeancommission	refugeecrisis	1
2	europeancommission	westernbalkans	1
3	adopteerights	fox23	1
4	givingjustice	refugee	1
5	givingjustice	plannedparenthood	1
6	givingjustice	givingtuesday	1
7	givingjustice	blacklivesmatter	1
8	woods	bahrain	2
9	woods	countryhouse	1
10	woods	jual	2
11	woods	country	1
12	woods	tigers	2
13	woods	imagine	2
14	woods	tree	2
15	woods	foodpics	2
16	woods	ap	2
17	woods	losers	2
18	woods	house	1
19	woods	refugee	1

	weight
count	142052.000000
mean	6.731021
std	62.685006
min	1.000000
25%	1.000000
50%	1.000000
75%	3.000000
max	7552.000000

	hashtag	degree
2060	refugee	9842
19557	refugees	3042
18869	syria	2313
27276	refugeecrisis	1927
20622	syrian	1866
19117	refugeeswelcome	1759
16035	syrianrefugees	1743
6054	news	1637
25704	tcot	1242
29570	paris	1222
9981	parisattacks	1160
2325	isis	1157
10340	eu	1094
14169	np	1081
24845	europe	1067
5281	flüchtling	959
2101	germany	952
15250	usa	885
8869	obama	858
6306	crisis	814

	degree
count	31589.000000
mean	8.993764
std	70.910260
min	1.000000
25%	2.000000
50%	3.000000
75%	6.000000
max	9842.000000

	graph	size
count	1512.000000	1512.000000
mean	755.500000	20.892196
std	436.621117	721.745801
min	0.000000	2.000000
25%	377.750000	2.000000
50%	755.500000	2.000000
75%	1133.250000	2.000000
max	1511.000000	28067.000000

	size
0	5545
2	3182
8	2966
11	2698
6	2668
5	2479
25	1343
12	1334
14	958
21	748
15	733
20	538
41	397
16	350
19	225
30	196
72	193
56	175
17	112
158	90

	size
count	1668.000000
mean	18.938249
std	213.512854
min	2.000000
25%	2.000000
50%	2.000000
75%	3.000000
max	5545.000000

	degree
count	20.000000
mean	1831.000000
std	1968.924553
min	814.000000
25%	1040.000000
50%	1191.000000
75%	1785.750000
max	9842.000000

	from_node	to_node	weight
109	refugee	refugeecrisis	5625
110	refugee	syrian	3455
107	refugee	syria	3223
112	refugee	obama	3171
77	paris	refugee	2620
38	isis	refugee	2473
7	europe	refugee	1750
105	refugee	refugeeswelcome	1727
103	refugee	eu	1508
94	refugees	syria	1462
20	syrianrefugees	tcot	1370
64	usa	refugee	1346
75	paris	tcot	1343
97	refugees	refugeecrisis	1342
101	refugee	tcot	1331
78	paris	parisattacks	1232
86	paris	syrian	1167
104	refugee	germany	1139
102	refugee	parisattacks	1112
49	isis	obama	1032

	nodes	closeness_centrality	katz_centrality	betweenness_centrality	page_rank	eigenvector_centrality	degree_centrality
0	europe	0.950000	0.059069	0.002249	0.034992	0.146116	0.947368
1	syrianrefugees	0.904762	0.054714	0.000000	0.052584	0.158972	0.894737
2	isis	0.950000	-0.010997	0.002249	0.056859	0.225581	0.947368
3	usa	0.950000	0.011878	0.002249	0.026413	0.171868	0.947368
4	tcot	0.904762	0.035374	0.000000	0.055986	0.096868	0.894737
5	flüchtling	0.760000	0.306891	0.000000	0.009981	0.006864	0.684211
6	paris	1.000000	-0.031966	0.018916	0.062956	0.228309	1.000000
7	refugees	0.950000	-0.054697	0.002249	0.048538	0.238213	0.947368
8	refugee	1.000000	0.004180	0.018916	0.216890	0.616406	1.000000
9	parisattacks	0.950000	0.006409	0.002249	0.045562	0.130749	0.947368
10	eu	0.950000	0.100726	0.002249	0.007621	0.000493	0.947368
11	germany	1.000000	-0.173279	0.018916	0.025884	0.088808	1.000000
12	refugeeswelcome	0.950000	0.020324	0.002249	0.043944	0.165160	0.947368
13	np	0.575758	0.838705	0.000000	0.031397	0.123458	0.263158
14	syria	0.950000	-0.046121	0.002249	0.064174	0.275396	0.947368
15	news	1.000000	-0.057651	0.018916	0.020807	0.047734	1.000000
16	refugeecrisis	0.950000	0.003124	0.002249	0.074101	0.375469	0.947368
17	syrian	0.904762	-0.089441	0.000000	0.064759	0.206156	0.894737
18	crisis	0.904762	0.366776	0.000000	0.014702	0.048363	0.894737
19	obama	0.950000	0.041880	0.015205	0.041848	0.207980	0.947368

	nodes	closeness_centrality	katz_centrality	betweenness_centrality	page_rank	eigenvector_centrality	degree_centrality
0	sondakika	0.595238	-0.024137	0.008253	0.004393	0.000128	0.32
1	ab	0.609756	-0.040334	0.008571	0.044025	0.004462	0.36
2	çipras	0.555556	-0.085572	0.001960	0.006351	0.000328	0.20
3	makedonya	0.526316	-0.041746	0.000527	0.004707	0.000276	0.10
4	akdeniz	0.549451	0.174227	0.003084	0.009087	0.000160	0.18
5	parisunderattack	0.531915	-0.088354	0.000051	0.003161	0.000723	0.12
6	tsipras	0.568182	0.071260	0.003646	0.008152	0.005695	0.24
7	parisattacks	0.574713	0.001897	0.001765	0.043254	0.015022	0.26
8	çanakkale	0.561798	-0.000444	0.004531	0.007722	0.343558	0.22
9	greece	0.595238	-0.002436	0.007953	0.060293	0.000770	0.32
10	deniz	0.555556	0.143249	0.003320	0.010779	0.362160	0.20
11	türk	0.520833	-0.090745	0.000000	0.006844	0.050325	0.08
12	turkey	0.625000	-0.001734	0.009567	0.003429	0.000221	0.40
13	unutmadımki	0.531915	-0.128528	0.000000	0.029294	0.157949	0.12
14	11kas	0.526316	-0.094062	0.000000	0.004187	0.000009	0.10
15	mültecikrizi	0.537634	-0.125762	0.000544	0.003259	0.000054	0.14
16	paris	0.602410	-0.000731	0.007962	0.004717	0.000027	0.34
17	rus	0.543478	0.104641	0.002245	0.046986	0.330298	0.16
18	savaş	0.537634	0.265641	0.000082	0.004692	0.000057	0.14
19	egedenizi	0.549451	-0.059638	0.002016	0.005628	0.000372	0.18
20	siyasiyorumlar	0.531915	0.363387	0.000000	0.004253	0.000369	0.12
21	kaçak	0.537634	-0.104138	0.000000	0.003764	0.000011	0.14
22	avrupa	0.588235	0.005146	0.007343	0.027595	0.000123	0.30
23	türkiye	0.675676	-0.013142	0.040120	0.061287	0.002013	0.52
24	refugeecrisis	0.617284	-0.004723	0.010142	0.008517	0.411534	0.38
25	almanya	0.561798	0.046105	0.002035	0.015454	0.000284	0.22
26	çeşme	0.543478	-0.153808	0.000401	0.010803	0.000331	0.16
27	davutoğlu	0.561798	0.036509	0.001883	0.034655	0.000874	0.22
28	lesvos	0.581395	-0.011357	0.008441	0.010780	0.168554	0.28
29	abd	0.561798	0.087693	0.006451	0.003216	0.000483	0.22
30	popülizm	0.515464	0.019016	0.000000	0.015820	0.000016	0.06
31	midilli	0.543478	0.013669	0.001891	0.003618	0.002760	0.16
32	multeci	0.520833	0.196869	0.000286	0.004187	0.000005	0.08
33	korkma	0.531915	-0.128528	0.000000	0.008457	0.000009	0.12
34	dünya	0.555556	0.088914	0.000675	0.006664	0.000061	0.20
35	sığınmacı	0.520833	-0.510037	0.000163	0.006992	0.000186	0.08
36	bodrum	0.561798	0.068885	0.004392	0.005517	0.010851	0.22
37	humanrights	0.574713	0.010127	0.001040	0.004848	0.000002	0.26
38	11kasım	0.526316	0.016949	0.000000	0.045419	0.000325	0.10
39	erdogan	0.568182	-0.026536	0.002170	0.004560	0.008201	0.24
40	insan	0.561798	-0.058522	0.002129	0.004807	0.001023	0.22
41	libya	0.588235	0.025246	0.009534	0.006589	0.000145	0.30
42	taraf	0.531915	0.363387	0.000000	0.004253	0.011121	0.12
43	suriye	0.694444	0.050013	0.047395	0.054109	0.000011	0.56
44	haber	0.581395	0.017906	0.004191	0.023165	0.002218	0.28
45	insanlık	0.526316	0.321381	0.000204	0.004030	0.000535	0.10
46	syria	0.632911	0.004365	0.018272	0.059235	0.000023	0.42
47	terörist	0.526316	-0.195685	0.000204	0.005582	0.408539	0.10
48	refugees	0.617284	0.001969	0.009915	0.072545	0.498754	0.38
49	mülteci	0.943396	-0.010674	0.234367	0.134933	0.013877	0.94
50	yunanistan	1.000000	0.045322	0.302318	0.033389	0.000104	1.00

	nodes	closeness_centrality	katz_centrality	betweenness_centrality	page_rank	eigenvector_centrality	degree_centrality
0	savaş	0.573770	-0.066518	0.000542	0.006589	8.266720e-05	0.257143
1	parisunderattack	0.614035	0.522820	0.016168	0.005567	8.508132e-07	0.371429
2	parisattacks	0.660377	-0.000070	0.022706	0.044317	2.307671e-04	0.485714
3	facia	0.507246	0.135316	0.002900	0.009177	1.353859e-07	0.171429
4	greece	0.660377	0.010619	0.014711	0.047169	4.587667e-04	0.485714
5	nohate	0.460526	0.520546	0.000000	0.004832	4.153301e-02	0.171429
6	syrian	0.603448	-0.006851	0.004308	0.056540	2.142782e-01	0.371429
7	turkey	0.648148	0.013523	0.010794	0.025850	2.330901e-06	0.457143
8	senabebek	0.507246	0.131664	0.000000	0.006233	8.508132e-07	0.171429
9	yunanistan	0.614035	0.010818	0.024874	0.021254	8.310077e-07	0.371429
10	paris	0.673077	-0.000471	0.032291	0.075890	2.470208e-01	0.514286
11	tekne	0.500000	0.144549	0.000000	0.007986	4.285355e-04	0.142857
12	sena	0.514706	0.002775	0.002017	0.008747	1.177534e-04	0.200000
13	egedenizi	0.555556	0.070942	0.006690	0.009452	3.455834e-01	0.228571
14	mülteci	0.813953	0.000943	0.228096	0.109364	1.066931e-02	0.771429
15	war	0.625000	-0.006702	0.007107	0.012224	1.526929e-04	0.400000
16	aylan	0.573770	-0.038817	0.008096	0.046365	6.400887e-03	0.257143
17	sondakika	0.555556	-0.074739	0.006457	0.012713	1.548162e-01	0.228571
18	muğla	0.507246	0.131664	0.000000	0.006233	2.307745e-03	0.171429
19	multeci	0.479452	-0.032708	0.000420	0.065892	3.951786e-02	0.085714
20	muslim	0.538462	0.011174	0.008772	0.005394	2.868484e-01	0.428571
21	terrorist	0.514706	-0.002984	0.003334	0.012221	3.631704e-01	0.342857
22	teknebattı	0.500000	0.092314	0.000000	0.029680	2.146390e-01	0.142857
23	aylankurdi	0.614035	0.014528	0.013130	0.006442	5.990244e-01	0.371429
24	afghan	0.573770	0.024490	0.003296	0.015876	9.493575e-06	0.257143
25	suriyeli	0.564516	0.114322	0.007461	0.006798	6.359007e-05	0.228571
26	bodrum	0.813953	-0.006538	0.226934	0.019676	8.406402e-07	0.771429
27	isis	0.522388	-0.001871	0.002385	0.023001	1.145953e-01	0.371429
28	refugees	0.700000	-0.004311	0.044502	0.050603	6.938434e-06	0.571429
29	refugee	0.729167	-0.000070	0.082278	0.163356	8.390473e-07	0.628571
30	köş	0.479452	-0.120989	0.000000	0.005194	3.376557e-01	0.057143
31	aylanbebek	0.514706	0.166764	0.002101	0.008211	7.310469e-04	0.200000
32	syria	0.686275	-0.001646	0.021643	0.004512	4.642376e-04	0.542857
33	kürdistandireniyor	0.486111	-0.166433	0.000000	0.009802	3.076538e-06	0.085714
34	criminal	0.486111	0.518631	0.001029	0.052159	1.841696e-03	0.228571
35	mülteciler	0.530303	-0.044181	0.000000	0.004683	7.585654e-05	0.114286