From 550888c84b5e39cbb117af5a2c2ef718e296d792 Mon Sep 17 00:00:00 2001
From: Daniel <59575049+lemoentjiez@users.noreply.github.com>
Date: Mon, 26 Dec 2022 01:01:38 +0200
Subject: [PATCH] Update network.py

---
 LabMD_3/network.py | 135 +++++++++++++++++++++++++--------------------
 1 file changed, 74 insertions(+), 61 deletions(-)

diff --git a/LabMD_3/network.py b/LabMD_3/network.py
index f03fa62..8f70c15 100644
--- a/LabMD_3/network.py
+++ b/LabMD_3/network.py
@@ -1,71 +1,84 @@
 import json
-import re
+
 import nltk
 from nltk import TweetTokenizer
 
-hashtags = []
-mapped_hashtags = dict()
-emotional_values = dict()
-tokenizer = TweetTokenizer()
-final_emotional_data = {}
+tt = TweetTokenizer()
+special_chars = "1234567890.=?\",”$%^&’*(…):!><"
 
-with open('AFINN-111.txt', encoding="utf-8") as file:
-    for line in file:
+words_tweet_tokenizer = []
+words_per_tweet = {}
+special_words = []
+hashtags = []
+
+# Handles the tweet json, separates words into needed categories, and extracts hashtags
+with open("tweets.json", "r", encoding="utf-8") as tweetJson:
+    tweetJsonData = json.load(tweetJson)
+    for tweet in tweetJsonData:
+        tempWords = tt.tokenize(tweet["text"])
+        words_per_tweet[tweet["id"]] = tempWords
+        for word in tempWords:
+            if not any(c in special_chars for c in word) and len(word) > 1:
+                words_tweet_tokenizer.append(word)
+                if word[0] == '#':
+                    hashtags.append(word)
+            else:
+                if len(word) > 1:
+                    special_words.append(word)
+
+# Counts hashtags and outputs the top
+hashtag_dictionary = {x: 0 for x in hashtags}
+for hashtag in hashtags:
+    hashtag_dictionary[hashtag] += 1
+
+hashtag_dictionary = dict(sorted(hashtag_dictionary.items(), key=lambda item: item[1], reverse=True))
+print("===================")
+print("Top 10 #hashtags:")
+print("===================")
+count = 0
+for hashtag in hashtag_dictionary:
+    if count < 10:
+        print(hashtag, " ", hashtag_dictionary[hashtag])
+    count += 1
+
+# Analyzes each word
+tweet_rated_emotion = {}
+word_emotion_dict = {}
+with open("AFINN-111.txt", "r", encoding="utf-8") as AFINNdict:
+    for line in AFINNdict:
         words = nltk.word_tokenize(line)
         nr = words[len(words) - 1]
         str = ""
         for x in range(len(words) - 1):
-            str += words[x];
-        emotional_values[str] = nr
+            str += words[x]
+        word_emotion_dict[str] = nr
 
-with open('tweets.json', 'r', encoding='utf-8') as tweet_json:
-    tweet_data = json.load(tweet_json)
-    for i in range(len(tweet_data)):
-        emotion_rating = 0
-        words = tokenizer.tokenize(tweet_data[i]["text"])
-        for x in words:
-            if x[0] == '#' and len(x) > 1:
-                hashtags.append(x)
-            if re.sub("\s\s+", " ", x).lower() in emotional_values:
-                emotion_rating += int(emotional_values[x.lower()])
-        final_emotional_data[tweet_data[i]["id"]] = emotion_rating
-
-for i in range(len(hashtags)):
-    mapped_hashtags[hashtags[i]] = 0
-for i in range(len(hashtags)):
-    mapped_hashtags[hashtags[i]] += 1
-
-sorted_dict = dict(sorted(mapped_hashtags.items(), key=lambda item: item[1], reverse=True))
-counter = 10
-x = 1
-print("========================")
-print("Top #10 Hashtags")
-print("========================")
-for i in sorted_dict:
-    if x <= counter:
-        print(x,'.', i, " ", sorted_dict[i])
-    x += 1
-x = 1
-sorted_emotion_reverse = dict(sorted(final_emotional_data.items(), key=lambda item: item[1], reverse=True))
-sorted_emotion = dict(sorted(final_emotional_data.items(), key=lambda item: item[1]))
-print("========================")
-print("Top #10 Positive Tweets")
-print("=========================")
-x = 1
-for i in sorted_emotion_reverse:
-    if x <= counter:
-        print(i, " ", sorted_emotion_reverse[i])
-    x += 1
-print("========================")
-print("Top #10 Negative Tweets")
-print("========================")
-x = 1
-for i in sorted_emotion:
-    if x <= counter:
-        print(i, " ", sorted_emotion[i])
-    x += 1
-print("========================")
-print("All Emotional Values per ID")
-print("=========================")
-for x in final_emotional_data:
-    print(x, final_emotional_data[x])
+for id in words_per_tweet:
+    total_rating = 0
+    for word in words_per_tweet[id]:
+        if word.lower() in word_emotion_dict:
+            total_rating += int(word_emotion_dict[word.lower()])
+    tweet_rated_emotion[id] = total_rating
+    
+print("==========================")
+print("Top 10 Positive :D Tweets:")
+print("==========================")
+tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=True))
+count = 0
+for tweet in tweet_rated_emotion:
+    if count < 10:
+        print(tweet, " ", tweet_rated_emotion[tweet])
+    count += 1
+print("===========================")
+print("Top 10 Negative >:D Tweets:")
+print("===========================")
+tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=False))
+count = 0
+for tweet in tweet_rated_emotion:
+    if count < 10:
+        print(tweet, " ", tweet_rated_emotion[tweet])
+    count += 1
+print("=====================================")
+print("All Tweets Rated by Emotional Damage:")
+print("=====================================")
+print(tweet_rated_emotion)