First set the folder with the text files to a variable.
import os
folder = "./data/Editorial Placement/"
txt_files = os.listdir(folder)
Create a list of all the text files in the folder in order to read each of the files individually.
import pandas as pd
corpus = []for file_path in txt_files:
with open(folder+file_path,encoding='utf8') as f_input:
corpus.append(f_input.read())
- NB: Installing google-cloud does not work you have to specifically install google-cloud-language in order to get the import module to work
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types
import osos.environ["GOOGLE_APPLICATION_CREDENTIALS"] = #where your configuration json file is located.#print('Credendtials from environ: {}'.format(os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')))
# Instantiates a client
client = language.LanguageServiceClient()
neg = 0
pos = 0
s_pos = 0neu = 0for i in range(len(corpus)):
document = types.Document(
content=corpus[i],
type=enums.Document.Type.PLAIN_TEXT)
# Detects the sentiment of the text
sentiment = client.analyze_sentiment(document=document).document_sentiment
response3 = client.classify_text(document)
analysis = ''
if sentiment.score == 0:
analysis = 'Neutral'
neu+=1
elif sentiment.score < 0:
analysis = 'Negative'
neg+=1
elif sentiment.score > 0 and sentiment.score < 0.5 :
analysis = 'Slightly Positive'
s_pos+=1
else:
analysis = 'Positive'
pos+=1
print('Document: '+ txt_files[i][:-4] )
print('Sentiment Prediction: {}, Sentiment Analysis: {}'.format(sentiment.score, analysis))
print("")
print("")
sentiment_analysis = [pos, s_pos, neu, neg]
names_analysis = ["Positive", "Slightly Positive", "Neutral", "Negative"]
import plotly.graph_objects as go
fig = go.Figure(data=[go.Pie(labels=names_analysis, values=sentiment_analysis, title="Pie Chart of Editorial Placements")])
fig.show()
per = 0
num = 0
con = 0
eve = 0
loc = 0
org = 0persons = []numbers = []consumers = []events = []locations = []organizations = []for i in range(len(corpus)):
document = types.Document(
content=corpus[i],
type=enums.Document.Type.PLAIN_TEXT)
person = []
number = []
consumer = []
event = []
location = []
organization = []for j in range(len(response.entities)):
if (response.entities[j].type) == 1:
person.append(response.entities[j].name)
persons.append(str(response.entities[j].name))
elif (response.entities[j].type) == 12:
number.append(response.entities[j].name)
numbers.append(str(response.entities[j].name))
elif (response.entities[j].type) == 6:
consumer.append(response.entities[j].name)
consumers.append(str(response.entities[j].name))
elif (response.entities[j].type) == 4:
event.append(response.entities[j].name)
events.append(str(response.entities[j].name))
elif (response.entities[j].type) == 2:
location.append(response.entities[j].name)
locations.append(str(response.entities[j].name))
elif (response.entities[j].type) == 3:
organization.append(response.entities[j].name)
organizations.append(str(response.entities[j].name))print('Person Entities Named in '+txt_files[i][:-4] + " Letter")
print("")
print(person)
per += len(person)
print('Number Entities Named in '+txt_files[i][:-4] + " Letter")
print(number)
num += len(number)print("")
print('Consumer Entities Named in '+txt_files[i][:-4] + " Letter")
print(consumer)
con += len(consumer)
print("")
print('Event Entities Named in '+txt_files[i][:-4] + " Letter")
print(event)
eve += len(event)
print("")
print('Location Entities Named in '+txt_files[i][:-4] + " Letter")
print(location)
loc += len(location)
print("")
print('Organization Entities Named in '+txt_files[i][:-4] + " Letter")
print(organization)
org += len(organization)
print("")
print("")types1 = ["Person Entity", "Number Entity", "Consumer Entity", "Event Entity", "Location Entity", "Organization Entity"]
total_num=[per,num,con,eve,loc,org]
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
from collections import Counter
import matplotlib.pyplot as pltwordcloud = WordCloud().generate_from_frequencies(Counter(consumers))# Display the generated image:plt.title("Consumer Entity")
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()wordcloud = WordCloud().generate_from_frequencies(Counter(events))# Display the generated image:
plt.title("Event Entity")
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()wordcloud = WordCloud().generate_from_frequencies(Counter(numbers))# Display the generated image:
plt.title("Numbers Entity")
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()wordcloud = WordCloud().generate_from_frequencies(Counter(persons))# Display the generated image:
plt.title("Person Entity")
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()wordcloud = WordCloud().generate_from_frequencies(Counter(locations))# Display the generated image:
plt.title("Locations Entity")
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()wordcloud = WordCloud().generate_from_frequencies(Counter(organizations))# Display the generated image:
plt.title("Organization Entity")
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()