From f4b88a506fdbb8e5236a2181cc69c7dd18808561 Mon Sep 17 00:00:00 2001 From: Daniel <59575049+lemoentjiez@users.noreply.github.com> Date: Tue, 27 Dec 2022 01:29:45 +0200 Subject: [PATCH] Update networkbonus.py --- LabMD_3/networkbonus.py | 122 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/LabMD_3/networkbonus.py b/LabMD_3/networkbonus.py index 8b13789..6e7ede0 100644 --- a/LabMD_3/networkbonus.py +++ b/LabMD_3/networkbonus.py @@ -1 +1,123 @@ +import json + +import nltk +from nltk import TweetTokenizer + +tt = TweetTokenizer() +special_chars = "1234567890.=?\",”$%^;&’*(…):!><" +special_chars_CSV = "1234567890.=?\",”;$%^&’*(…):@!>#@<" +words_tweet_tokenizer = [] +words_per_tweet = {} +special_words = [] +hashtags = [] +wordsForCSV = [] + +# Handles the tweet json, separates words into needed categories, and extracts hashtags +with open("tweets.json", "r", encoding="utf-8") as tweetJson: + tweetJsonData = json.load(tweetJson) + for tweet in tweetJsonData: + tempAppend = [] + tempWords = tt.tokenize(tweet["text"]) + words_per_tweet[tweet["id"]] = tempWords + for word in tempWords: + if not any(c in special_chars for c in word) and len(word) > 1: + words_tweet_tokenizer.append(word) + if word[0] == '#': + hashtags.append(word) + else: + if len(word) > 1: + special_words.append(word) + if not any(c in special_chars_CSV for c in word) and len(word) > 1: + tempAppend.append(word) + wordsForCSV.append(tempAppend) + +# Counts hashtags and outputs the top +hashtag_dictionary = {x: 0 for x in hashtags} +for hashtag in hashtags: + hashtag_dictionary[hashtag] += 1 +hashtag_dictionary = dict(sorted(hashtag_dictionary.items(), key=lambda item: item[1], reverse=True)) +print("===================") +print("Top 10 #hashtags:") +print("===================") +count = 0 +for hashtag in hashtag_dictionary: + if count < 10: + print(hashtag, " ", hashtag_dictionary[hashtag]) + count += 1 + +# Analyzes each word +tweet_rated_emotion = {} +word_emotion_dict = {} +with open("AFINN-111.txt", "r", encoding="utf-8") as AFINNdict: + for line in AFINNdict: + words = nltk.word_tokenize(line) + nr = words[len(words) - 1] + strn = "" + for x in range(len(words) - 1): + strn += words[x] + word_emotion_dict[strn] = nr + +for id in words_per_tweet: + total_rating = 0 + for word in words_per_tweet[id]: + if word.lower() in word_emotion_dict: + total_rating += int(word_emotion_dict[word.lower()]) + tweet_rated_emotion[id] = total_rating + +print("==========================") +print("Top 10 Positive :D Tweets:") +print("==========================") +tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=True)) +count = 0 +for tweet in tweet_rated_emotion: + if count < 10: + print(tweet, " ", tweet_rated_emotion[tweet]) + count += 1 +print("===========================") +print("Top 10 Negative >:D Tweets:") +print("===========================") +tweet_rated_emotion = dict(sorted(tweet_rated_emotion.items(), key=lambda item: item[1], reverse=False)) +count = 0 +for tweet in tweet_rated_emotion: + if count < 10: + print(tweet, " ", tweet_rated_emotion[tweet]) + count += 1 +print("=====================================") +print("All Tweets Rated by Emotional Damage:") +print("=====================================") +print(tweet_rated_emotion) + +# Creates the CSV file +studentID = 10 +startingPoint = studentID * int(200/7) +endingPoint = startingPoint + 200 +graph_dict = {} +maxm = 0 +for i in range(startingPoint, endingPoint): + for x in wordsForCSV[i]: + graph_dict[x] = [] +filter_words = ["RT"] +file = open("data.csv", "w", encoding="utf-8") +file.write("NODE,") +for i in range(startingPoint, endingPoint): + for word in wordsForCSV[i]: + for x in wordsForCSV[i]: + if x is not word and x not in graph_dict[word] and x not in filter_words: + graph_dict[word].append(x) +for x in graph_dict: + if len(graph_dict[x]) > maxm: + maxm = len(graph_dict[x]) +for x in range(maxm): + file.write("EDGE" + str(x)) + if x != maxm - 1: + file.write(",") +file.write("\n") +for x in graph_dict: + file.write(x) + file.write(",") + for z in range(len(graph_dict[x])): + file.write(graph_dict[x][z]) + if z != len(graph_dict[x]) - 1: + file.write(",") + file.write("\n")