Update network.py
This commit is contained in:
@@ -1,71 +1,84 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
|
||||||
import nltk
|
import nltk
|
||||||
from nltk import TweetTokenizer
|
from nltk import TweetTokenizer
|
||||||
|
|
||||||
hashtags = []
|
tt = TweetTokenizer()
|
||||||
mapped_hashtags = dict()
|
special_chars = "1234567890.=?\",”$%^&’*(…):!><"
|
||||||
emotional_values = dict()
|
|
||||||
tokenizer = TweetTokenizer()
|
|
||||||
final_emotional_data = {}
|
|
||||||
|
|
||||||
with open('AFINN-111.txt', encoding="utf-8") as file:
|
words_tweet_tokenizer = []
|
||||||
for line in file:
|
words_per_tweet = {}
|
||||||
|
special_words = []
|
||||||
|
hashtags = []
|
||||||
|
|
||||||
|
# Handles the tweet json, separates words into needed categories, and extracts hashtags
|
||||||
|
with open("tweets.json", "r", encoding="utf-8") as tweetJson:
|
||||||
|
tweetJsonData = json.load(tweetJson)
|
||||||
|
for tweet in tweetJsonData:
|
||||||
|
tempWords = tt.tokenize(tweet["text"])
|
||||||
|
words_per_tweet[tweet["id"]] = tempWords
|
||||||
|
for word in tempWords:
|
||||||
|
if not any(c in special_chars for c in word) and len(word) > 1:
|
||||||
|
words_tweet_tokenizer.append(word)
|
||||||
|
if word[0] == '#':
|
||||||
|
hashtags.append(word)
|
||||||
|
else:
|
||||||
|
if len(word) > 1:
|
||||||
|
special_words.append(word)
|
||||||
|
|
||||||
|
# Counts hashtags and outputs the top
|
||||||
|
hashtag_dictionary = {x: 0 for x in hashtags}
|
||||||
|
for hashtag in hashtags:
|
||||||
|
hashtag_dictionary[hashtag] += 1
|
||||||
|
|
||||||
|
hashtag_dictionary = dict(sorted(hashtag_dictionary.items(), key=lambda item: item[1], reverse=True))
|
||||||
|
print("===================")
|
||||||
|
print("Top 10 #hashtags:")
|
||||||
|
print("===================")
|
||||||
|
count = 0
|
||||||
|
for hashtag in hashtag_dictionary:
|
||||||
|
if count < 10:
|
||||||
|
print(hashtag, " ", hashtag_dictionary[hashtag])
|
||||||
|
count += 1
|
||||||
|
|
||||||
|
# Analyzes each word
|
||||||
|
tweet_rated_emotion = {}
|
||||||
|
word_emotion_dict = {}
|
||||||
|
with open("AFINN-111.txt", "r", encoding="utf-8") as AFINNdict:
|
||||||
|
for line in AFINNdict:
|
||||||
words = nltk.word_tokenize(line)
|
words = nltk.word_tokenize(line)
|
||||||
nr = words[len(words) - 1]
|
nr = words[len(words) - 1]
|
||||||
str = ""
|
str = ""
|
||||||
for x in range(len(words) - 1):
|
for x in range(len(words) - 1):
|
||||||
str += words[x];
|
str += words[x]
|
||||||
emotional_values[str] = nr
|
word_emotion_dict[str] = nr
|
||||||
|
|
||||||
with open('tweets.json', 'r', encoding='utf-8') as tweet_json:
|
for id in words_per_tweet:
|
||||||
tweet_data = json.load(tweet_json)
|
total_rating = 0
|
||||||
for i in range(len(tweet_data)):
|
for word in words_per_tweet[id]:
|
||||||
emotion_rating = 0
|
if word.lower() in word_emotion_dict:
|
||||||
words = tokenizer.tokenize(tweet_data[i]["text"])
|
total_rating += int(word_emotion_dict[word.lower()])
|
||||||
for x in words:
|
tweet_rated_emotion[id] = total_rating
|
||||||
if x[0] == '#' and len(x) > 1:
|
|
||||||
hashtags.append(x)
|
|
||||||
if re.sub("\s\s+", " ", x).lower() in emotional_values:
|
|
||||||
emotion_rating += int(emotional_values[x.lower()])
|
|
||||||
final_emotional_data[tweet_data[i]["id"]] = emotion_rating
|
|
||||||
|
|
||||||
for i in range(len(hashtags)):
|
print("==========================")
|
||||||
mapped_hashtags[hashtags[i]] = 0
|
print("Top 10 Positive :D Tweets:")
|
||||||
for i in range(len(hashtags)):
|
print("==========================")
|
||||||
mapped_hashtags[hashtags[i]] += 1
|
tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=True))
|
||||||
|
count = 0
|
||||||
sorted_dict = dict(sorted(mapped_hashtags.items(), key=lambda item: item[1], reverse=True))
|
for tweet in tweet_rated_emotion:
|
||||||
counter = 10
|
if count < 10:
|
||||||
x = 1
|
print(tweet, " ", tweet_rated_emotion[tweet])
|
||||||
print("========================")
|
count += 1
|
||||||
print("Top #10 Hashtags")
|
print("===========================")
|
||||||
print("========================")
|
print("Top 10 Negative >:D Tweets:")
|
||||||
for i in sorted_dict:
|
print("===========================")
|
||||||
if x <= counter:
|
tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=False))
|
||||||
print(x,'.', i, " ", sorted_dict[i])
|
count = 0
|
||||||
x += 1
|
for tweet in tweet_rated_emotion:
|
||||||
x = 1
|
if count < 10:
|
||||||
sorted_emotion_reverse = dict(sorted(final_emotional_data.items(), key=lambda item: item[1], reverse=True))
|
print(tweet, " ", tweet_rated_emotion[tweet])
|
||||||
sorted_emotion = dict(sorted(final_emotional_data.items(), key=lambda item: item[1]))
|
count += 1
|
||||||
print("========================")
|
print("=====================================")
|
||||||
print("Top #10 Positive Tweets")
|
print("All Tweets Rated by Emotional Damage:")
|
||||||
print("=========================")
|
print("=====================================")
|
||||||
x = 1
|
print(tweet_rated_emotion)
|
||||||
for i in sorted_emotion_reverse:
|
|
||||||
if x <= counter:
|
|
||||||
print(i, " ", sorted_emotion_reverse[i])
|
|
||||||
x += 1
|
|
||||||
print("========================")
|
|
||||||
print("Top #10 Negative Tweets")
|
|
||||||
print("========================")
|
|
||||||
x = 1
|
|
||||||
for i in sorted_emotion:
|
|
||||||
if x <= counter:
|
|
||||||
print(i, " ", sorted_emotion[i])
|
|
||||||
x += 1
|
|
||||||
print("========================")
|
|
||||||
print("All Emotional Values per ID")
|
|
||||||
print("=========================")
|
|
||||||
for x in final_emotional_data:
|
|
||||||
print(x, final_emotional_data[x])
|
|
||||||
|
|||||||
Reference in New Issue
Block a user