IMPORTANT: To use this notebook, you'll need to
ipython notebook
in the same directory where notebook and scripts were put
This work is licensed under a Creative Commons Attribution 4.0 International License.
import random
import nltk
import codecs
from textblob import TextBlob
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import os
import imp
import seaborn as sns # pip install seaborn
sns.set_style("white") # For seaborn to show axes in iPython Notebook
from lightning import Lightning
from numpy import random, asarray, sqrt, arctan2, pi, clip
from seaborn import color_palette
from sklearn import datasets
from colorsys import hsv_to_rgb
# utilsdir='/Users/mosesboudourides/Dropbox/Python Projects/EUSN2016_LiteraryTextNetworksWorkshop/utils/'
utilsdir='/home/mab/Dropbox/Python Projects/utils/'#tools.py'
utilsdir='/home/sergios-len/Dropbox/Python Projects (1)/utils/'#EUSN2016_LiteraryTextNetworksWorkshop/utils/'
%matplotlib inline
%load_ext autoreload
filename = 'texts/AnimalFarm.txt'
titlename = "Animal Farm"
central_hero = 'Napoleon'
online_list_of_characters = 'https://en.wikipedia.org/wiki/Agnes_Grey'
nn1 = "Animal Farm selected terms"
nn2 = "Animal Farm Characters"
vname1 = 'vids/AnimalFarm.gif'
vname2 = 'vids/AnimalFarm.mp4'
nn3 = "['Napoleon']"
filename1 = 'S_out_graphs/Animal Farm_graph.graphml'
# https://en.wikipedia.org/wiki/Animal_Farm
list_of_chars=['Old Major','Napoleon','Snowball','Squealer','Minimus','piglets',
'young pigs','Pinkeye','Mr Jones','Mr Frederick','Mr Pilkington',
'Mr Whymper','Boxer','Mollie','Clover','Benjamin','Muriel','puppies',
'Moses','sheep','hens','cows','cat']
dici={i:i for i in list_of_chars}
f = codecs.open(filename, "r", encoding="utf-8").read()
num_lines = 0
num_words = 0
num_chars = 0
for line in f:
words = line.split()
num_lines += 1
num_words += len(words)
num_chars += len(line)
print "%s has number of words = %i and number of characters = %i" %(titlename,num_words,num_chars)
blob = TextBlob(f)
ndici={i.lower():k for i,k in dici.items()}
dnici=[(i.split()[0],i.split()[1]) for i in ndici.keys() if len(i.split())>1]
selectedTerms=ndici.keys()
%autoreload 2
tool= imp.load_source('tools', utilsdir+'tools.py')
# tool= imp.load_source('tools', '/Users/mosesboudourides/Dropbox/Python Projects/utils/tools.py')
# import tools as tool
create_pandas_dataframe_from_text=tool.create_pandas_dataframe_from_text
create_coo_graph=tool.create_coo_graph
dfst,sec_prot,coccurlist,occurlist,dflines=create_pandas_dataframe_from_text(blob,selectedTerms,ndici,titlename)
co_graph=create_coo_graph(coccurlist)
dfst.rename(columns={nn1:nn2},inplace=True)
# dfst.rename(columns={"Anne Bronte's Agnes Grey selected terms":"Anne Bronte's Agnes Grey Characters"},inplace=True)
dfst.sort_values(by='Frequencies').sort(["Frequencies"], ascending=[0])
prot_pol_sub=dflines[['protagonists','#_of_protagonists','polarity','subjectivity']].reset_index()
prot_pol_sub['sentence_id']=prot_pol_sub.index
prot_pol_sub=prot_pol_sub[['sentence_id','protagonists','#_of_protagonists','polarity','subjectivity']]
cuts = 0
# prot_pol_sub = prot_pol_sub[prot_pol_sub['#_of_protagonists']>=cuts]
lp = prot_pol_sub['protagonists'].tolist()
lpn = []
for i in lp:
for j in i:
lpn.append(j)
# len(set(lpn))
print "The total number of sentences in %s is %i." %(titlename,len(prot_pol_sub))
# print "The total number of sentences in %s with at least %i characters in each one of them is %i." %(titlename,cuts+1,len(prot_pol_sub))
prot_pol_sub.rename(columns={'protagonists':'Lists_of_Characters','#_of_protagonists':'#_of_Characters','polarity':'Polarity','subjectivity':'Subjectivity'},inplace=True)
prot_pol_sub.sort(["#_of_Characters"], ascending=[0])
ddff = prot_pol_sub.drop('sentence_id', 1)
ddff.index.name = 'Sentence_ID'
ddff.head(50)
nddd=ddff[ddff['Polarity'] !=0 ]#& ddff['Subjectivity'] !=0]
nddd=nddd[nddd['Subjectivity'] !=0]
# ddff=nddd
ddff[['#_of_Characters','Polarity','Subjectivity']].describe()
corrmat = ddff.corr()
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(corrmat, vmax=.8, square=True, annot=True)
from pandas.tools.plotting import scatter_matrix
ntei='Scatter Matrix Plot of ' + titlename
f, ax = plt.subplots(figsize=(12,12))
# nddd
sss=scatter_matrix(ddff[['#_of_Characters','Polarity','Subjectivity']], alpha=0.9, color='black', diagonal='hist',ax=ax)
plt.suptitle(ntei,fontsize=18,fontweight='bold')
corr = ddff.corr().as_matrix() #nddd.corr().as_matrix()
for i, j in zip(*plt.np.triu_indices_from(sss, k=1)):
sss[i, j].annotate("pearson = %.3f" %corr[i,j], (0.8, 0.93), xycoords='axes fraction', ha='center', va='center')
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
ndfl=dflines[dflines['#_of_protagonists']>0 ]
fig, ax = plt.subplots(figsize=[12, 10])
axes2 = zoomed_inset_axes(ax, 16, loc=7) # zoom = 6
dflines['#_of_protagonists'].plot.hist(ax=ax)
ax.set_xlabel('#_of_Characters')
ax.set_ylabel('Frequency')
ax.set_title('Histogram of # of characters')
x1, x2, y1, y2 = 2.95, 3., 0, 30
axes2.set_xlim(x1, x2)
axes2.set_ylim(y1, y2)
ndfl['#_of_protagonists'].plot.hist(ax=axes2)
axes2.set_ylabel('Frequency')
mark_inset(ax, axes2, loc1=2, loc2=4, fc="none", ec="0.5")
axes3 = zoomed_inset_axes(ax, 10, loc=10)
x1, x2, y1, y2 = 2, 2.1, 0, 60
axes3.set_xlim(x1, x2)
axes3.set_ylim(y1, y2)
ndfl['#_of_protagonists'].plot.hist(ax=axes3)
axes3.set_ylabel('Frequency')
mark_inset(ax, axes3, loc1=2, loc2=4, fc="none", ec="0.5")
plt.show()
x = nddd['Polarity']
y = nddd['Subjectivity']
z = nddd['#_of_Characters']
lgn = Lightning(ipython=True, host='http://public.lightning-viz.org')
series = [x,y]
lgn.line(series)
viz = lgn.scatter(x, y, values = z, alpha=0.6, colormap='YlOrRd')
viz
from ggplot import *
import matplotlib as mpl
ntei='Scatter Plot of ' + titlename
p = ggplot(aes(x='Polarity', y='Subjectivity',color='#_of_Characters'), data=ddff) #nddd)
p + geom_point() + ggtitle(ntei) + theme_matplotlib(rc={"figure.figsize": "12, 9"}, matplotlib_defaults=False)
ntei=' The Sentiment Space of ' + titlename
f, ax = plt.subplots(figsize=(12,9))
ddff.plot.hexbin(x='Polarity',y='Subjectivity',gridsize=20,C='#_of_Characters',ax=ax,reduce_C_function=max,cmap='jet')#,title=ntei)
plt.xlim(-1.1, 1.1)
plt.ylim(-.1, 1.1)
plt.suptitle(ntei,fontsize=15,fontweight='bold')
ntei='KDE Joint Plot of the Sentiment Space of ' + titlename
# f, ax = plt.subplots(figsize=(10,10))
# cmap = sns.cubehelix_palette(light=1, as_cmap=True)
# cmap = sns.cubehelix_palette(rot=-.4, as_cmap=True)
cmap = sns.cubehelix_palette(8, start=.5, rot=-.75, as_cmap=True)
ggn=sns.jointplot(x='Polarity',y='Subjectivity', data=nddd, kind ="kde",cmap=cmap,space=0, size=10) #, ax=ax) #kind="kde",
# sns.kdeplot(nddd['Polarity'],nddd['Subjectivity'], cmap=cmap, shade=True)
ggn.plot_joint(plt.scatter, c="r", s=30, linewidth=1, marker="+")
ggn.ax_joint.collections[0].set_alpha(0)
ggn.set_axis_labels("Polarity", "Subjectivity")
plt.suptitle(ntei,fontsize=15,fontweight='bold')
# pols=nddd.Polarity.tolist()
# subj=nddd.Subjectivity.tolist()
# ntei=titlename+' in Sentiment Space'
# import numpy as np
# import matplotlib.pyplot as plt
# import matplotlib.animation as animation
# fig, ax = plt.subplots()
# plt.xlim(-1.1, 1.1)
# plt.ylim(-.1, 1.1)
# ax.set_xlabel('Polarity')
# ax.set_ylabel('Subjectivity')
# # arro=ax.arrow(pols[0], subj[0], pols[1]-pols[0], subj[1]-subj[0], head_width=0.03, head_length=0, fc='b', ec='b',
# # length_includes_head=False,
# # # # head_starts_at_zero=True
# # # # overhang=-.51
# # fill=False)
# def animate(i):
# col=(1.*i/(1.*len(pols)),.5,.5)
# plt.plot(pols[i],subj[i],'o',color=col, markersize=5)
# # return arro,
# ani = animation.FuncAnimation(fig, animate, np.arange(0, len(pols)-1),
# interval=25, blit=False)
# # ani.save(vname2)#, metadata={'artist':'Guido'})
# ani.save(vname1, writer='imagemagick')
# plt.show()
# print len(pols)
# %%bash
# ffmpeg -f gif -i vids/AnimalFarm.gif vids/AnimalFarm.mp4
import io
import base64
from IPython.display import HTML
video = io.open(vname2, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<video alt="test" controls>
<source src="data:video/mp4;base64,{0}" type="video/mp4" />
</video>'''.format(encoded.decode('ascii')))
# /Users/mosesboudourides/Dropbox/Python Projects/LiteratureNetworks/ArthurConanDoyle/SherlockHolmesStoriesNetwork/
%autoreload 2
# from tools import draw_network_node_color
sstt="%s Two-Mode Network of Sentences and Characters" %titlename
pos=nx.spring_layout(sec_prot)
nds=[nd for nd in sec_prot.nodes() if isinstance(nd,int)]
prot=[nd for nd in sec_prot.nodes() if nd not in nds]
for en,nd in enumerate(nds):
if en<len(nds)/2.:
pos[nd][0]=-1
pos[nd][1]=en*2./len(nds)
else:
pos[nd][0]=1
pos[nd][1]=(en-len(nds)/2.)*2./len(nds)
for en ,nd in enumerate(prot):
pos[nd][0]=0
pos[nd][1]=en*1./len(prot)
possit=tool.draw_network_node_color(sec_prot,sstt,pos=pos,with_edgewidth=False,withLabels=True,labfs=12,valpha=0.2,
ealpha=0.4,labelfont=15,with_node_weight=False,node_size_fixer=300.,node_col='polarity')
# possit=draw_network_node_color(sec_prot,sstt,pos=pos,with_edgewidth=False,withLabels=True,labfs=12,valpha=0.2,
# ealpha=0.4,labelfont=15,with_node_weight=False,node_size_fixer=300.,node_col='polarity')
possit=tool.draw_network_node_color(sec_prot,sstt,pos=pos,with_edgewidth=False,withLabels=True,labfs=12,valpha=0.2,
ealpha=0.4,labelfont=15,with_node_weight=False,node_size_fixer=300.,
node_col='subjectivity',colormat='Greens')
# possit=draw_network_node_color(sec_prot,sstt,pos=pos,with_edgewidth=False,withLabels=True,labfs=12,valpha=0.2,
# ealpha=0.4,labelfont=15,with_node_weight=False,node_size_fixer=300.,
# node_col='subjectivity',colormat='Greens')
%autoreload 2
# from tools import draw_network, make_graph_from_lists
tool= imp.load_source('tools', utilsdir+'tools.py')
plist = prot_pol_sub['Lists_of_Characters'].tolist()
pplist=prot_pol_sub['Polarity'].tolist()
nplist=prot_pol_sub['#_of_Characters'].tolist()
splist=prot_pol_sub['Subjectivity'].tolist()
G = tool.make_graph_from_lists(plist,pplist,nplist,splist)
posg=nx.spring_layout(G,scale=50,k=0.55,iterations=20)
# posg=nx.spring_layout(G,scale=50)#,k=0.55)#,iterations=20)
sstt="%s Network of Selected Characters \n(Sentences colored in polarity)" %titlename
possit=tool.draw_network(G,sstt,pos=posg,with_edgewidth=True,withLabels=True,labfs=15,valpha=0.2,ealpha=0.7,labelfont=15,
with_edgecolor=True,edgecolor='polarity',colormat='Blues')
sstt="%s Network of Selected Characters \n(Sentences Colored in Subjectivity)" %titlename
possit=tool.draw_network(G,sstt,pos=posg,with_edgewidth=True,withLabels=True,labfs=15,valpha=0.2,ealpha=0.7,labelfont=15,
with_edgecolor=True,edgecolor='subjectivity',colormat='Blues')
from tools import draw_centralities_subplots
centrali=draw_centralities_subplots(G,pos=posg,withLabels=False,labfs=5,figsi=(15,22),ealpha=1,vals=True)
dfc=pd.DataFrame()
u=0
for i,k in centrali.items():
dfc.insert(u,i,k.values())
u+=1
dfc.insert(0,'Nodes',centrali[centrali.keys()[0]].keys())
dfc
%autoreload 2
from tools import draw_comms, modul_arity, print_communities
part,nodper=print_communities(G,sstt)
ndfl=dflines[dflines['#_of_protagonists']>0 ]
# ndfl['#_of_protagonists'].plot.hist()
d=0.8
dd=0.8
c=1.2
cc=1.4
alpha=0.2
ealpha=0.4
vcc={}
sstta="The %s Communities of %s Network of Characters" %(max(part.values())+1,titlename)#sstt)
draw_comms(G,G.nodes(),[],[],[] ,part,part,d,dd,c,cc,alpha,ealpha,nodper,sstta,titlefont=20,labelfont=17,valpha=0.5)
# %autoreload 2
# !pip install --user pygexf
# !pip install --user pyinterval
# !pip install --user pyinter
# !pip install --user python-igraph
# import trajectories as trj
trj= imp.load_source('trajectories', utilsdir+'trajectories_t.py')
# import create_gexf_year as cgy
def search_in_list(x):
l=x['protagonists']
return central_hero in l
ndfl=dflines[dflines['#_of_protagonists']>1 ]
dialogs=ndfl[ndfl.apply(search_in_list,axis=1) ==True]
protagonists=dialogs.protagonists.tolist()
start=range(1,len(protagonists)+1)
end=range(2,len(protagonists)+2)
polarities=dialogs.polarity.tolist()
subj=dialogs.subjectivity.tolist()
qq=0
figi=None
search_name=central_hero
G,ndls,pold,subjd=trj.creatTestGraph_pandas_bip(start,end,protagonists,search_name,polarities,subj)
trajpdfs=trj.main_work_search_name(G,ndls,qq,figi,search_name,verb=False,plot_first_mode=False)
trajpdfs[nn3] #["['Agnes Grey']"]
# trajpdfs["['Sherlock Holmes']"]
%autoreload 2
import igraph as ig
from tools import igraph_draw_traj
filname='S_out_graphs/Napoleon_graph.graphml'
# filname=filename1 #'S_out_graphs/Agnes Grey_graph.graphml'
# print pold
# npold={'-'.join(k.split('__')) : v for k,v in pold.items()}
# print npold
g,visual_style,layout=igraph_draw_traj(filname,pold)
ig.plot(g, **visual_style)
g,visual_style,layout=igraph_draw_traj(filname,subjd,polar=False,layout=layout)
ig.plot(g, **visual_style)