From 550888c84b5e39cbb117af5a2c2ef718e296d792 Mon Sep 17 00:00:00 2001 From: Daniel <59575049+lemoentjiez@users.noreply.github.com> Date: Mon, 26 Dec 2022 01:01:38 +0200 Subject: [PATCH] Update network.py --- LabMD_3/network.py | 135 +++++++++++++++++++++++++-------------------- 1 file changed, 74 insertions(+), 61 deletions(-) diff --git a/LabMD_3/network.py b/LabMD_3/network.py index f03fa62..8f70c15 100644 --- a/LabMD_3/network.py +++ b/LabMD_3/network.py @@ -1,71 +1,84 @@ import json -import re + import nltk from nltk import TweetTokenizer -hashtags = [] -mapped_hashtags = dict() -emotional_values = dict() -tokenizer = TweetTokenizer() -final_emotional_data = {} +tt = TweetTokenizer() +special_chars = "1234567890.=?\",”$%^&’*(…):!><" -with open('AFINN-111.txt', encoding="utf-8") as file: - for line in file: +words_tweet_tokenizer = [] +words_per_tweet = {} +special_words = [] +hashtags = [] + +# Handles the tweet json, separates words into needed categories, and extracts hashtags +with open("tweets.json", "r", encoding="utf-8") as tweetJson: + tweetJsonData = json.load(tweetJson) + for tweet in tweetJsonData: + tempWords = tt.tokenize(tweet["text"]) + words_per_tweet[tweet["id"]] = tempWords + for word in tempWords: + if not any(c in special_chars for c in word) and len(word) > 1: + words_tweet_tokenizer.append(word) + if word[0] == '#': + hashtags.append(word) + else: + if len(word) > 1: + special_words.append(word) + +# Counts hashtags and outputs the top +hashtag_dictionary = {x: 0 for x in hashtags} +for hashtag in hashtags: + hashtag_dictionary[hashtag] += 1 + +hashtag_dictionary = dict(sorted(hashtag_dictionary.items(), key=lambda item: item[1], reverse=True)) +print("===================") +print("Top 10 #hashtags:") +print("===================") +count = 0 +for hashtag in hashtag_dictionary: + if count < 10: + print(hashtag, " ", hashtag_dictionary[hashtag]) + count += 1 + +# Analyzes each word +tweet_rated_emotion = {} +word_emotion_dict = {} +with open("AFINN-111.txt", "r", encoding="utf-8") as AFINNdict: + for line in AFINNdict: words = nltk.word_tokenize(line) nr = words[len(words) - 1] str = "" for x in range(len(words) - 1): - str += words[x]; - emotional_values[str] = nr + str += words[x] + word_emotion_dict[str] = nr -with open('tweets.json', 'r', encoding='utf-8') as tweet_json: - tweet_data = json.load(tweet_json) - for i in range(len(tweet_data)): - emotion_rating = 0 - words = tokenizer.tokenize(tweet_data[i]["text"]) - for x in words: - if x[0] == '#' and len(x) > 1: - hashtags.append(x) - if re.sub("\s\s+", " ", x).lower() in emotional_values: - emotion_rating += int(emotional_values[x.lower()]) - final_emotional_data[tweet_data[i]["id"]] = emotion_rating - -for i in range(len(hashtags)): - mapped_hashtags[hashtags[i]] = 0 -for i in range(len(hashtags)): - mapped_hashtags[hashtags[i]] += 1 - -sorted_dict = dict(sorted(mapped_hashtags.items(), key=lambda item: item[1], reverse=True)) -counter = 10 -x = 1 -print("========================") -print("Top #10 Hashtags") -print("========================") -for i in sorted_dict: - if x <= counter: - print(x,'.', i, " ", sorted_dict[i]) - x += 1 -x = 1 -sorted_emotion_reverse = dict(sorted(final_emotional_data.items(), key=lambda item: item[1], reverse=True)) -sorted_emotion = dict(sorted(final_emotional_data.items(), key=lambda item: item[1])) -print("========================") -print("Top #10 Positive Tweets") -print("=========================") -x = 1 -for i in sorted_emotion_reverse: - if x <= counter: - print(i, " ", sorted_emotion_reverse[i]) - x += 1 -print("========================") -print("Top #10 Negative Tweets") -print("========================") -x = 1 -for i in sorted_emotion: - if x <= counter: - print(i, " ", sorted_emotion[i]) - x += 1 -print("========================") -print("All Emotional Values per ID") -print("=========================") -for x in final_emotional_data: - print(x, final_emotional_data[x]) +for id in words_per_tweet: + total_rating = 0 + for word in words_per_tweet[id]: + if word.lower() in word_emotion_dict: + total_rating += int(word_emotion_dict[word.lower()]) + tweet_rated_emotion[id] = total_rating + +print("==========================") +print("Top 10 Positive :D Tweets:") +print("==========================") +tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=True)) +count = 0 +for tweet in tweet_rated_emotion: + if count < 10: + print(tweet, " ", tweet_rated_emotion[tweet]) + count += 1 +print("===========================") +print("Top 10 Negative >:D Tweets:") +print("===========================") +tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=False)) +count = 0 +for tweet in tweet_rated_emotion: + if count < 10: + print(tweet, " ", tweet_rated_emotion[tweet]) + count += 1 +print("=====================================") +print("All Tweets Rated by Emotional Damage:") +print("=====================================") +print(tweet_rated_emotion)