Update networking.py

2022-12-26 02:57:50 +02:00
parent 550888c84b
commit bcfb9755f0
1 changed files with 177 additions and 0 deletions
--- a/LabPSA_3/networking.py
+++ b/LabPSA_3/networking.py
@@ -1 +1,178 @@

+import json
+import matplotlib.pyplot as plt
+import nltk
+from nltk import TweetTokenizer
+
+tt = TweetTokenizer()
+special_chars = "1234567890#@.=?\",”$%^&’*(…[]):!><"
+
+likes_per_id = {}
+retweets_per_id = {}
+filter_words = ["sure"]
+month_and_noun = {'2020-10': 0, '2020-11': 0, '2020-12': 0,'2022-01': 0, '2022-02': 0, '2022-03': 0,'2022-11': 0}
+words_tweet_tokenizer = []
+words_set = []
+nouns_counted = {}
+word_count_not_case_sensitive = {}
+proper_nouns_counted = {}
+words_per_tweet = {}
+special_words = []
+word_count_dict = {}
+hashtags = []
+
+# Handles the tweet json, separates words into needed categories
+with open("tweets.json", "r", encoding="utf-8") as tweetJson:
+    tweetJsonData = json.load(tweetJson)
+    for tweet in tweetJsonData:
+        tempWords = tt.tokenize(tweet["text"])
+        words_per_tweet[tweet["id"]] = tempWords
+        for word in tempWords:
+            if not any(c in special_chars for c in word) and len(word) > 1:
+                words_tweet_tokenizer.append(word)
+                if word not in word_count_not_case_sensitive:
+                    word_count_not_case_sensitive[word] = 1
+                else:
+                    word_count_not_case_sensitive[word] += 1
+                if word not in word_count_dict and word.lower() not in word_count_dict:
+                    word_count_dict[word.lower()] = 1
+                else:
+                    word_count_dict[word.lower()] += 1
+                if word[0] == '#':
+                    hashtags.append(word)
+                if word not in words_set:
+                    words_set.append(word)
+            else:
+                if len(word) > 1:
+                    special_words.append(word)
+
+# Outputs counted words
+print("============")
+print("Top 10 words")
+print("============")
+word_count_dict = dict(sorted(word_count_dict.items(), key=lambda item: item[1], reverse=True))
+x = 1
+for i in word_count_dict:
+    if x <= 10:
+        print(i, " ", word_count_dict[i])
+    x += 1
+
+# This piece of shit code counts the number of a word per month
+word = input("Write a word")
+with open('tweets.json', 'r', encoding='utf-8') as tweet_json:
+    tweet_data = json.load(tweet_json)
+    for i in range(len(tweet_data)):
+        temp_msg = nltk.word_tokenize(tweet_data[i]["text"])
+        temp_date = nltk.word_tokenize(tweet_data[i]["created_at"])
+        for x in temp_msg:
+            if x == word or x.lower() == word:
+                month_and_noun[temp_date[0][:7]] += 1
+x = list(month_and_noun.keys())
+y = list(month_and_noun.values())
+plt.bar(x, y, color='maroon', width=0.7)
+plt.show()
+
+# Counts nouns
+for i in word_count_dict:
+    ans = nltk.pos_tag([i])[0][1]
+    if ans == 'NN' or ans == 'NNS' or ans == 'NNPS' or ans == 'NNP':
+        if i not in filter_words and not any(c in ["\'"] for c in i):
+            nouns_counted[i] = word_count_dict[i]
+nouns_counted = dict(sorted(nouns_counted.items(), key=lambda item: item[1], reverse=True))
+
+# Outputs counted nouns
+print("============")
+print("Top 10 nouns")
+print("============")
+x = 1
+for i in nouns_counted:
+    if x <= 10:
+        print(i, " ", nouns_counted[i])
+    x += 1
+
+# Counts proper nouns
+for i in word_count_not_case_sensitive:
+    ans = nltk.pos_tag([i])[0][1]
+    if ans == 'NN' or ans == 'NNS' or ans == 'NNPS' or ans == 'NNP':
+        if i not in filter_words and not any(c in ["\'"] for c in i) and i[0].isupper():
+            proper_nouns_counted[i] = word_count_not_case_sensitive[i]
+proper_nouns_counted = dict(sorted(proper_nouns_counted.items(), key=lambda item: item[1], reverse=True))
+
+# Outputs proper nouns
+print("===================")
+print("Top 10 proper nouns")
+print("===================")
+x = 1
+for i in proper_nouns_counted:
+    if x <= 10:
+        print(i, " ", proper_nouns_counted[i])
+    x += 1
+
+
+# Counts likes and retweets for each tweet
+with open('tweets.json', 'r', encoding='utf-8') as tweet_json:
+    tweet_data = json.load(tweet_json)
+    for tweet in tweet_data:
+        likes_per_id[tweet["id"]] = tweet["likes"]
+        retweets_per_id[tweet["id"]] = tweet["retweets"]
+
+# Counts popularity by formula
+popularity_nouns = {}
+for noun in nouns_counted:
+    normLikes = 0
+    normRetweets = 0
+    for id in words_per_tweet:
+        if noun in words_per_tweet[id]:
+            normLikes += likes_per_id[id]
+            normRetweets += retweets_per_id[id]
+    popularity_nouns[noun] = word_count_dict[noun] * (1.4 + normRetweets) * (1.2 + normLikes)
+popularity_nouns = dict(sorted(popularity_nouns.items(), key=lambda item: item[1], reverse=True))
+
+# Outputs popularity nouns
+print("==========================")
+print("Top 10 nouns by popularity")
+print("==========================")
+x = 1
+for i in popularity_nouns:
+    if x <= 10:
+        print(i, " ", popularity_nouns[i])
+    x += 1
+
+# Suggestion stuff
+word_sliced_count = {}
+word = input("Write word for suggestion")
+for x in word_count_dict:
+    if x not in word_sliced_count:
+        word_sliced_count[x] = 0
+for x in word_count_dict:
+    if word == x[:len(word)] and word != x:
+        word_sliced_count[x] += word_count_dict[x]
+word_sliced_count = dict(sorted(word_sliced_count.items(), key=lambda item: item[1], reverse=True))
+print("===============")
+print("Top suggestions")
+print("===============")
+x = 1
+for i in word_sliced_count:
+    if x <= 10:
+        print(i, " ", word_sliced_count[i])
+    x += 1
+
+# Suggestion occurrences stuff
+word = input("Write word for suggestion")
+words_suggestion_counted = {}
+for id in words_per_tweet:
+    for i in range(len(words_per_tweet[id]) - 2):
+        if words_per_tweet[id][i] == word:
+            if words_per_tweet[id][i+1] not in words_suggestion_counted and len(words_per_tweet[id][i+1]) > 1:
+                words_suggestion_counted[words_per_tweet[id][i+1]] = 1
+            elif len(words_per_tweet[id][i+1]) > 1:
+                words_suggestion_counted[words_per_tweet[id][i + 1]] += 1
+words_suggestion_counted = dict(sorted(words_suggestion_counted.items(), key=lambda item: item[1], reverse=True))
+print("==========================")
+print("Top suggestion occurrences")
+print("==========================")
+x = 1
+for i in words_suggestion_counted:
+    if x <= 5:
+        print(i, " ", words_suggestion_counted[i])
+    x += 1