IMPORTANT: To use this notebook, you'll need to
ipython notebook
in the same directory where notebook and scripts were put
This work is licensed under a Creative Commons Attribution 4.0 International License.
import random
import nltk
import codecs
from textblob import TextBlob
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
doc="Aristotle's Nicomachean Ethics"
%matplotlib inline
%load_ext autoreload
filename = 'Aristotle_NicomacheanEthics.txt'
titlename = "Aristotle's Nicomachean Ethics"
f = codecs.open(filename, "r", encoding="utf-8")
document=f.read()
blobbook = TextBlob(document)
npbook = blobbook.np_counts
dfst = pd.DataFrame(columns=["%s noun phrases" %titlename, "Frequencies"])
u=1
selectedTermsDic={}
for l in npbook:
dfst.loc[u]=[l,npbook[l]]
u+=1
print "The total number of noun phrases in %s is %i." %(titlename,len(npbook))
dfstt = dfst[dfst['Frequencies']>10]
dfstt = dfstt[~dfstt["Aristotle's Nicomachean Ethics noun phrases"].isin(['hence','surely','good men','such things','certain kind']) ]
selectedTermsDic=dict(zip(dfstt["Aristotle's Nicomachean Ethics noun phrases"].tolist(),dfstt["Frequencies"].tolist()))
print "The total number of selected noun phrases in %s occurring at least 10 times is %i." %(titlename,dfstt.shape[0])
dfstt.sort(["Frequencies"], ascending=[0])
%autoreload 2
from tools import occurrences, makegraph
documentDict = occurrences(document,selectedTermsDic)
documentGraph = makegraph(documentDict)
pos=nx.spring_layout(documentGraph,scale=50,k=0.5,iterations=20)
# pos=nx.graphviz_layout(documentGraph)
from tools import dhist
sstth="The Degree Histogram of %s wordnet" %titlename
dhp=dhist(documentGraph,sstth,pos=pos,figsize=(12,10))
from tools import draw_network
sstt="The Network Map of %s" %titlename
possit=draw_network(documentGraph,sstt,pos=pos,with_edgewidth=True,withLabels=True,labfs=20,valpha=0.2,ealpha=0.4,labelfont=15,with_node_weight=True,node_size_fixer=300.)
from tools import draw_centralities_subplots
centrali=draw_centralities_subplots(documentGraph,pos,withLabels=False,labfs=5,figsi=(15,22),ealpha=1,vals=True)
dfc=pd.DataFrame()
u=0
for i,k in centrali.items():
dfc.insert(u,i,k.values())
u+=1
dfc.insert(0,'Nodes',centrali[centrali.keys()[0]].keys())
dfc
%autoreload 2
from tools import draw_comms, modul_arity, print_communities
part,nodper=print_communities(documentGraph,sstt)
d=0.8
dd=0.8
c=1.2
cc=1.4
alpha=0.2
ealpha=0.2
vcc={}
sstta="The %s %s Communities" %(max(part.values())+1,sstt)
draw_comms(documentGraph,documentGraph.nodes(),[],[],[] ,part,part,d,dd,c,cc,alpha,ealpha,nodper,sstta,titlefont=20,labelfont=17,valpha=0.5)