Update networkbonus.py

This commit is contained in:
Daniel
2022-12-27 01:29:45 +02:00
committed by GitHub
parent 2033e638f2
commit f4b88a506f

View File

@@ -1 +1,123 @@
import json
import nltk
from nltk import TweetTokenizer
tt = TweetTokenizer()
special_chars = "1234567890.=?\",”$%^;&*(…):!><"
special_chars_CSV = "1234567890.=?\",”;$%^&*(…):@!>#@<"
words_tweet_tokenizer = []
words_per_tweet = {}
special_words = []
hashtags = []
wordsForCSV = []
# Handles the tweet json, separates words into needed categories, and extracts hashtags
with open("tweets.json", "r", encoding="utf-8") as tweetJson:
tweetJsonData = json.load(tweetJson)
for tweet in tweetJsonData:
tempAppend = []
tempWords = tt.tokenize(tweet["text"])
words_per_tweet[tweet["id"]] = tempWords
for word in tempWords:
if not any(c in special_chars for c in word) and len(word) > 1:
words_tweet_tokenizer.append(word)
if word[0] == '#':
hashtags.append(word)
else:
if len(word) > 1:
special_words.append(word)
if not any(c in special_chars_CSV for c in word) and len(word) > 1:
tempAppend.append(word)
wordsForCSV.append(tempAppend)
# Counts hashtags and outputs the top
hashtag_dictionary = {x: 0 for x in hashtags}
for hashtag in hashtags:
hashtag_dictionary[hashtag] += 1
hashtag_dictionary = dict(sorted(hashtag_dictionary.items(), key=lambda item: item[1], reverse=True))
print("===================")
print("Top 10 #hashtags:")
print("===================")
count = 0
for hashtag in hashtag_dictionary:
if count < 10:
print(hashtag, " ", hashtag_dictionary[hashtag])
count += 1
# Analyzes each word
tweet_rated_emotion = {}
word_emotion_dict = {}
with open("AFINN-111.txt", "r", encoding="utf-8") as AFINNdict:
for line in AFINNdict:
words = nltk.word_tokenize(line)
nr = words[len(words) - 1]
strn = ""
for x in range(len(words) - 1):
strn += words[x]
word_emotion_dict[strn] = nr
for id in words_per_tweet:
total_rating = 0
for word in words_per_tweet[id]:
if word.lower() in word_emotion_dict:
total_rating += int(word_emotion_dict[word.lower()])
tweet_rated_emotion[id] = total_rating
print("==========================")
print("Top 10 Positive :D Tweets:")
print("==========================")
tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=True))
count = 0
for tweet in tweet_rated_emotion:
if count < 10:
print(tweet, " ", tweet_rated_emotion[tweet])
count += 1
print("===========================")
print("Top 10 Negative >:D Tweets:")
print("===========================")
tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=False))
count = 0
for tweet in tweet_rated_emotion:
if count < 10:
print(tweet, " ", tweet_rated_emotion[tweet])
count += 1
print("=====================================")
print("All Tweets Rated by Emotional Damage:")
print("=====================================")
print(tweet_rated_emotion)
# Creates the CSV file
studentID = 10
startingPoint = studentID * int(200/7)
endingPoint = startingPoint + 200
graph_dict = {}
maxm = 0
for i in range(startingPoint, endingPoint):
for x in wordsForCSV[i]:
graph_dict[x] = []
filter_words = ["RT"]
file = open("data.csv", "w", encoding="utf-8")
file.write("NODE,")
for i in range(startingPoint, endingPoint):
for word in wordsForCSV[i]:
for x in wordsForCSV[i]:
if x is not word and x not in graph_dict[word] and x not in filter_words:
graph_dict[word].append(x)
for x in graph_dict:
if len(graph_dict[x]) > maxm:
maxm = len(graph_dict[x])
for x in range(maxm):
file.write("EDGE" + str(x))
if x != maxm - 1:
file.write(",")
file.write("\n")
for x in graph_dict:
file.write(x)
file.write(",")
for z in range(len(graph_dict[x])):
file.write(graph_dict[x][z])
if z != len(graph_dict[x]) - 1:
file.write(",")
file.write("\n")