Update networkbonus.py

2022-12-27 01:29:45 +02:00
parent 2033e638f2
commit f4b88a506f
1 changed files with 122 additions and 0 deletions
--- a/LabMD_3/networkbonus.py
+++ b/LabMD_3/networkbonus.py
@@ -1 +1,123 @@

+import json
+
+import nltk
+from nltk import TweetTokenizer
+
+tt = TweetTokenizer()
+special_chars = "1234567890.=?\",”$%^;&’*(…):!><"
+special_chars_CSV = "1234567890.=?\",”;$%^&’*(…):@!>#@<"
+words_tweet_tokenizer = []
+words_per_tweet = {}
+special_words = []
+hashtags = []
+wordsForCSV = []
+
+# Handles the tweet json, separates words into needed categories, and extracts hashtags
+with open("tweets.json", "r", encoding="utf-8") as tweetJson:
+    tweetJsonData = json.load(tweetJson)
+    for tweet in tweetJsonData:
+        tempAppend = []
+        tempWords = tt.tokenize(tweet["text"])
+        words_per_tweet[tweet["id"]] = tempWords
+        for word in tempWords:
+            if not any(c in special_chars for c in word) and len(word) > 1:
+                words_tweet_tokenizer.append(word)
+                if word[0] == '#':
+                    hashtags.append(word)
+            else:
+                if len(word) > 1:
+                    special_words.append(word)
+            if not any(c in special_chars_CSV for c in word) and len(word) > 1:
+                tempAppend.append(word)
+        wordsForCSV.append(tempAppend)
+
+# Counts hashtags and outputs the top
+hashtag_dictionary = {x: 0 for x in hashtags}
+for hashtag in hashtags:
+    hashtag_dictionary[hashtag] += 1
+hashtag_dictionary = dict(sorted(hashtag_dictionary.items(), key=lambda item: item[1], reverse=True))
+print("===================")
+print("Top 10 #hashtags:")
+print("===================")
+count = 0
+for hashtag in hashtag_dictionary:
+    if count < 10:
+        print(hashtag, " ", hashtag_dictionary[hashtag])
+    count += 1
+
+# Analyzes each word
+tweet_rated_emotion = {}
+word_emotion_dict = {}
+with open("AFINN-111.txt", "r", encoding="utf-8") as AFINNdict:
+    for line in AFINNdict:
+        words = nltk.word_tokenize(line)
+        nr = words[len(words) - 1]
+        strn = ""
+        for x in range(len(words) - 1):
+            strn += words[x]
+        word_emotion_dict[strn] = nr
+
+for id in words_per_tweet:
+    total_rating = 0
+    for word in words_per_tweet[id]:
+        if word.lower() in word_emotion_dict:
+            total_rating += int(word_emotion_dict[word.lower()])
+    tweet_rated_emotion[id] = total_rating
+
+print("==========================")
+print("Top 10 Positive :D Tweets:")
+print("==========================")
+tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=True))
+count = 0
+for tweet in tweet_rated_emotion:
+    if count < 10:
+        print(tweet, " ", tweet_rated_emotion[tweet])
+    count += 1
+print("===========================")
+print("Top 10 Negative >:D Tweets:")
+print("===========================")
+tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=False))
+count = 0
+for tweet in tweet_rated_emotion:
+    if count < 10:
+        print(tweet, " ", tweet_rated_emotion[tweet])
+    count += 1
+print("=====================================")
+print("All Tweets Rated by Emotional Damage:")
+print("=====================================")
+print(tweet_rated_emotion)
+
+# Creates the CSV file
+studentID = 10
+startingPoint = studentID * int(200/7)
+endingPoint = startingPoint + 200
+graph_dict = {}
+maxm = 0
+for i in range(startingPoint, endingPoint):
+    for x in wordsForCSV[i]:
+        graph_dict[x] = []
+filter_words = ["RT"]
+file = open("data.csv", "w", encoding="utf-8")
+file.write("NODE,")
+for i in range(startingPoint, endingPoint):
+    for word in wordsForCSV[i]:
+        for x in wordsForCSV[i]:
+            if x is not word and x not in graph_dict[word] and x not in filter_words:
+                graph_dict[word].append(x)
+for x in graph_dict:
+    if len(graph_dict[x]) > maxm:
+        maxm = len(graph_dict[x])
+for x in range(maxm):
+    file.write("EDGE" + str(x))
+    if x != maxm - 1:
+        file.write(",")
+file.write("\n")
+for x in graph_dict:
+    file.write(x)
+    file.write(",")
+    for z in range(len(graph_dict[x])):
+        file.write(graph_dict[x][z])
+        if z != len(graph_dict[x]) - 1:
+            file.write(",")
+    file.write("\n")