IMPORTANT: To use this notebook, you'll need to
ipython notebook
in the same directory where notebook and scripts were put
This work is licensed under a Creative Commons Attribution 4.0 International License.
import random
import nltk
import codecs
from textblob import TextBlob
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from collections import Counter
import imp
# utilsdir='/home/sergios-len/Dropbox/Python Projects (1)/utils/'#tools.py'
utilsdir='/home/mab/Dropbox/Python Projects/utils/'
%matplotlib inline
%load_ext autoreload
filename = 'Plato_Phaedrus1.txt'
titlename = "Plato's Phaedrus"
f = codecs.open(filename, "r", encoding="utf-8").read()
num_lines = 0
num_words = 0
num_chars = 0
for line in f:
words = line.split()
num_lines += 1
num_words += len(words)
num_chars += len(line)
print "%s has number of words = %i and number of characters = %i" %(titlename,num_words,num_chars)
blob = TextBlob(f)
all_sents=blob.sentences
occurdic=Counter()
for sen in all_sents:
dd=sen.dict
for np in dd['noun_phrases']:
occurdic[np]+=1
df = pd.DataFrame(columns=["%s Noun Phrases" %titlename, "Frequencies"])
u=1
for l,v in occurdic.items():
df.loc[u]=[l,v]
u+=1
print "The total number of noun phrases in %s is %i." %(titlename,len(df))#len(npA))
df.sort(["Frequencies"], ascending=[0])
cut = 2
df = df[df['Frequencies']>cut].sort(["Frequencies"], ascending=[0])
print "The total number of noun phrases in %s with frequencies > %i is %i." %(titlename,cut,len(df))#len(npA))
df.sort(["Frequencies"], ascending=[0])
%autoreload 2
selectedTerms={}
excluded = ['who','will','exactly','enough','shall','suppose','well']
for k in df["Plato's Phaedrus Noun Phrases"].tolist(): #df["Plato's Phaedrus Noun Phrases"].tolist():
if k not in excluded:
selectedTerms[k] = k.capitalize()
tool= imp.load_source('tools', utilsdir+'tools.py')
create_pandas_dataframe_from_text=tool.create_pandas_dataframe_from_text
dfst,sec_prot,coccurlist,occurlist,dflines=create_pandas_dataframe_from_text(blob,selectedTerms,selectedTerms,titlename)
# print len(sec_prot.nodes()), sec_prot.nodes()
# dfst.sort_values(by='Frequencies').sort(["Frequencies"], ascending=[0])
prot_pol_sub=dflines[['protagonists','#_of_protagonists','polarity','subjectivity']].reset_index()
prot_pol_sub['sentence_id']=prot_pol_sub.index
prot_pol_sub=prot_pol_sub[['sentence_id','protagonists','#_of_protagonists','polarity','subjectivity']]
cuts = 1
prot_pol_sub = prot_pol_sub[prot_pol_sub['#_of_protagonists']>cuts]
lp = prot_pol_sub['protagonists'].tolist()
lpn = []
for i in lp:
for j in i:
lpn.append(j)
# len(set(lpn))
print "The total number of sentences in %s with at least %i selected noun phrases in each one of them is %i." %(titlename,cuts+1,len(prot_pol_sub))
prot_pol_sub.rename(columns={'protagonists':'list_of_selected_noun_phrases','#_of_protagonists':'#_of_selected_noun_phrases'},inplace=True)
prot_pol_sub.sort(["#_of_selected_noun_phrases"], ascending=[0]) #.drop('sentence_id', 1)
ddff = prot_pol_sub.drop('sentence_id', 1)
ddff.index.name = 'sentence_id'
ddff
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
ndfl=dflines[dflines['#_of_protagonists']>0 ]
fig, ax = plt.subplots(figsize=[12, 10])
axes2 = zoomed_inset_axes(ax, 6, loc=5) # zoom = 6
dflines['#_of_protagonists'].plot.hist(ax=ax)
ax.set_xlabel('#_of_Characters')
ax.set_ylabel('Frequency')
ax.set_title('Histogram of # of characters')
x1, x2, y1, y2 = 2.9, 3., 0, 25
axes2.set_xlim(x1, x2)
axes2.set_ylim(y1, y2)
ndfl['#_of_protagonists'].plot.hist(ax=axes2)
axes2.set_ylabel('Frequency')
mark_inset(ax, axes2, loc1=2, loc2=4, fc="none", ec="0.5")
axes3 = zoomed_inset_axes(ax, 6, loc=10)
x1, x2, y1, y2 = 2, 2.05, 0, 30
axes3.set_xlim(x1, x2)
axes3.set_ylim(y1, y2)
ndfl['#_of_protagonists'].plot.hist(ax=axes3)
axes3.set_ylabel('Frequency')
mark_inset(ax, axes3, loc1=2, loc2=4, fc="none", ec="0.5")
plt.show()
%autoreload 2
draw_network_node_color=tool.draw_network_node_color
sstt="%s Two-Mode Network of Sentences and Selected Noun Phrases" %titlename
pos=nx.spring_layout(sec_prot)
nds=[nd for nd in sec_prot.nodes() if isinstance(nd,int)]
prot=[nd for nd in sec_prot.nodes() if nd not in nds]
for en,nd in enumerate(nds):
if en<len(nds)/2.:
pos[nd][0]=-1
pos[nd][1]=en*2./len(nds)
else:
pos[nd][0]=1
pos[nd][1]=(en-len(nds)/2.)*2./len(nds)
for en ,nd in enumerate(prot):
pos[nd][0]=0
pos[nd][1]=en*1./len(prot)
possit=draw_network_node_color(sec_prot,sstt,pos=pos,with_edgewidth=False,withLabels=True,labfs=12,valpha=0.2,
ealpha=0.4,labelfont=15,with_node_weight=False,node_size_fixer=300.,node_col='polarity')
possit=draw_network_node_color(sec_prot,sstt,pos=pos,with_edgewidth=False,withLabels=True,labfs=12,valpha=0.2,
ealpha=0.4,labelfont=15,with_node_weight=False,node_size_fixer=300.,
node_col='subjectivity',colormat='Greens')
%autoreload 2
plist = prot_pol_sub['list_of_selected_noun_phrases'].tolist()
pplist=prot_pol_sub['polarity'].tolist()
nplist=prot_pol_sub['#_of_selected_noun_phrases'].tolist()
splist=prot_pol_sub['subjectivity'].tolist()
G = tool.make_graph_from_lists(plist,pplist,nplist,splist)
posg=nx.spring_layout(G,scale=50)#,k=0.55)#,iterations=20)
sstt="%s Network of Selected Noun Phrases \n(Sentences colored in polarity)" %titlename
possit=tool.draw_network(G,sstt,pos=posg,with_edgewidth=True,withLabels=True,labfs=15,valpha=0.2,ealpha=0.7,labelfont=15,
with_edgecolor=True,edgecolor='polarity',colormat='Blues')
sstt="%s Network of Selected Noun Phrases \n(Sentences colored in subjectivity)" %titlename
possit=tool.draw_network(G,sstt,pos=posg,with_edgewidth=True,withLabels=True,labfs=15,valpha=0.2,ealpha=0.7,labelfont=15,
with_edgecolor=True,edgecolor='subjectivity',colormat='Greys')
centrali=tool.draw_centralities_subplots(G,pos=posg,withLabels=False,labfs=5,figsi=(15,22),ealpha=1,vals=True)
dfc=pd.DataFrame()
u=0
for i,k in centrali.items():
dfc.insert(u,i,k.values())
u+=1
dfc.insert(0,'Nodes',centrali[centrali.keys()[0]].keys())
dfc
%autoreload 2
part,nodper=tool.print_communities(G,sstt)
d=0.8
dd=0.8
c=1.2
cc=1.4
alpha=0.2
ealpha=0.2
vcc={}
sstta="The %s Communities of %s Network of Selected Noun Phrases" %(max(part.values())+1,titlename)#sstt)
tool.draw_comms(G,G.nodes(),[],[],[] ,part,part,d,dd,c,cc,alpha,ealpha,nodper,sstta,titlefont=20,labelfont=17,valpha=0.5)
%autoreload 2
tool= imp.load_source('tools', utilsdir+'tools.py')
trj= imp.load_source('trajectories', utilsdir+'trajectories_t.py')
protagonists=prot_pol_sub.list_of_selected_noun_phrases.tolist()
start=range(1,len(protagonists)+1)
end=range(2,len(protagonists)+2)
polarities=prot_pol_sub.polarity.tolist()
subj=prot_pol_sub.subjectivity.tolist()
qq=0
figi=None
search_name='Phaedrus'
G,ndls,pold,subjd=trj.creatTestGraph_pandas_bips(start,end,protagonists,search_name,polarities,subj)
trajpdfs=trj.main_work_search_name(G,ndls,qq,figi,search_name,verb=False,no_anala=True,plot_first_mode=False)
# trajpdfs=trj.main_work_search_name(G,ndls,qq,figi,search_name,verb=False,no_anala=False,plot_first_mode=False)
trajpdfs["['Phaedrus']"]
import igraph as ig
igraph_draw_traj=tool.igraph_draw_traj
filname='S_out_graphs/%s_graph.graphml' %search_name
g,visual_style,layout=igraph_draw_traj(filname,pold)
ig.plot(g, **visual_style)
g,visual_style,layout=igraph_draw_traj(filname,subjd,polar=False,layout=layout)
ig.plot(g, **visual_style)
qq=0
figi=None
search_name='Lysias'
G,ndls,pold,subjd=trj.creatTestGraph_pandas_bips(start,end,protagonists,search_name,polarities,subj)
trajpdfs=trj.main_work_search_name(G,ndls,qq,figi,search_name,verb=False,no_anala=True,plot_first_mode=False)
# trajpdfs=trj.main_work_search_name(G,ndls,qq,figi,search_name,verb=False,no_anala=False,plot_first_mode=False)
trajpdfs["['Lysias']"]
import igraph as ig
igraph_draw_traj=tool.igraph_draw_traj
filname='S_out_graphs/%s_graph.graphml' %search_name
g,visual_style,layout=igraph_draw_traj(filname,pold)
ig.plot(g, **visual_style)
g,visual_style,layout=igraph_draw_traj(filname,subjd,polar=False,layout=layout)
ig.plot(g, **visual_style)
qq=0
figi=None
search_name='Socrates'
G,ndls,pold,subjd=trj.creatTestGraph_pandas_bips(start,end,protagonists,search_name,polarities,subj)
trajpdfs=trj.main_work_search_name(G,ndls,qq,figi,search_name,verb=False,no_anala=True,plot_first_mode=False)
# trajpdfs=trj.main_work_search_name(G,ndls,qq,figi,search_name,verb=False,no_anala=False,plot_first_mode=False)
trajpdfs["['Socrates']"]
import igraph as ig
igraph_draw_traj=tool.igraph_draw_traj
filname='S_out_graphs/%s_graph.graphml' %search_name
g,visual_style,layout=igraph_draw_traj(filname,pold)
ig.plot(g, **visual_style)
g,visual_style,layout=igraph_draw_traj(filname,subjd,polar=False,layout=layout)
ig.plot(g, **visual_style)