Update network.py

2022-12-27 01:29:34 +02:00
parent 48c8a2a2ea
commit 2033e638f2
1 changed files with 45 additions and 7 deletions
--- a/LabMD_3/network.py
+++ b/LabMD_3/network.py
@@ -4,17 +4,19 @@ import nltk
 from nltk import TweetTokenizer
 tt = TweetTokenizer()
-special_chars = "1234567890.=?\",”$%^&’*(…):!><"
+special_chars = "1234567890.=?\",”$%^;&’*(…):!><"
-
+special_chars_CSV = "1234567890.=?\",”;$%^&’*(…):@!>#@<"
 words_tweet_tokenizer = []
 words_per_tweet = {}
 special_words = []
 hashtags = []
 wordsForCSV = []
 # Handles the tweet json, separates words into needed categories, and extracts hashtags
 with open("tweets.json", "r", encoding="utf-8") as tweetJson:
    tweetJsonData = json.load(tweetJson)
    for tweet in tweetJsonData:
        tempAppend = []
        tempWords = tt.tokenize(tweet["text"])
        words_per_tweet[tweet["id"]] = tempWords
        for word in tempWords:
@@ -25,12 +27,14 @@ with open("tweets.json", "r", encoding="utf-8") as tweetJson:
            else:
                if len(word) > 1:
                    special_words.append(word)
            if not any(c in special_chars_CSV for c in word) and len(word) > 1:
                tempAppend.append(word)
        wordsForCSV.append(tempAppend)
 # Counts hashtags and outputs the top
 hashtag_dictionary = {x: 0 for x in hashtags}
 for hashtag in hashtags:
    hashtag_dictionary[hashtag] += 1
 hashtag_dictionary = dict(sorted(hashtag_dictionary.items(), key=lambda item: item[1], reverse=True))
 print("===================")
 print("Top 10 #hashtags:")
@@ -48,10 +52,10 @@ with open("AFINN-111.txt", "r", encoding="utf-8") as AFINNdict:
    for line in AFINNdict:
        words = nltk.word_tokenize(line)
        nr = words[len(words) - 1]
-        str = ""
+        strn = ""
        for x in range(len(words) - 1):
-            str += words[x]
+            strn += words[x]
-        word_emotion_dict[str] = nr
+        word_emotion_dict[strn] = nr
 for id in words_per_tweet:
    total_rating = 0
@@ -59,7 +63,7 @@ for id in words_per_tweet:
        if word.lower() in word_emotion_dict:
            total_rating += int(word_emotion_dict[word.lower()])
    tweet_rated_emotion[id] = total_rating
-    
+
 print("==========================")
 print("Top 10 Positive :D Tweets:")
 print("==========================")
@@ -82,3 +86,37 @@ print("=====================================")
 print("All Tweets Rated by Emotional Damage:")
 print("=====================================")
 print(tweet_rated_emotion)
 # Creates the CSV file
 studentID = 10
 startingPoint = studentID * int(200/7)
 endingPoint = startingPoint + 200
 graph_dict = {}
 maxm = 0
 for i in range(startingPoint, endingPoint):
    for x in wordsForCSV[i]:
        graph_dict[x] = []
 filter_words = ["RT"]
 file = open("data.csv", "w", encoding="utf-8")
 file.write("NODE,")
 for i in range(startingPoint, endingPoint):
    for word in wordsForCSV[i]:
        for x in wordsForCSV[i]:
            if x is not word and x not in graph_dict[word] and x not in filter_words:
                graph_dict[word].append(x)
 for x in graph_dict:
    if len(graph_dict[x]) > maxm:
        maxm = len(graph_dict[x])
 for x in range(maxm):
    file.write("EDGE" + str(x))
    if x != maxm - 1:
        file.write(",")
 file.write("\n")
 for x in graph_dict:
    file.write(x)
    file.write(",")
    for z in range(len(graph_dict[x])):
        file.write(graph_dict[x][z])
        if z != len(graph_dict[x]) - 1:
            file.write(",")
    file.write("\n")