Skip to content
Snippets Groups Projects
Commit 3debb7c5 authored by Ishi Bansal's avatar Ishi Bansal
Browse files

using positive and negative test files now and fixed demographic

parent a4025c79
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
import json
import chardet
positive_words = set()
negative_words = set()
# Read positive words from the file "positive.txt"
with open("positive.txt", 'r', encoding='utf-8') as positive_file:
positive_words.update(word.strip().casefold() for word in positive_file)
# Read negative words from the file "negative.txt"
with open("negative.txt", 'r', encoding='utf-8') as negative_file:
negative_words.update(word.strip().casefold() for word in negative_file)
json_data = {
"projectFileVersion": "2022-05-01",
"stringIndexType": "Utf16CodeUnit",
......@@ -49,13 +60,15 @@ for i in range(1, 11):
]
}
for ind, word in enumerate(utf_data.split()):
if word.casefold().__contains__("positive") or word.casefold().__contains__("excite") or word.casefold().__contains__("benefit") or word.casefold().__contains__("beneficial") or word.casefold().__contains__("favor") or word.casefold().__contains__("good") or word.casefold().__contains__("optimist") or word.casefold().__contains__("favor"):
#if word.casefold().__contains__("positive") or word.casefold().__contains__("excite") or word.casefold().__contains__("benefit") or word.casefold().__contains__("beneficial") or word.casefold().__contains__("favor") or word.casefold().__contains__("good") or word.casefold().__contains__("optimist") or word.casefold().__contains__("favor"):
if word.casefold() in positive_words:
category = "positive"
offset = utf_data.find(word, last_position)
length = len(word)
last_position = offset + length
is_a_category = True
elif word.casefold().__contains__("negative") or word.casefold().__contains__("concern") or word.casefold().__contains__("harm") or word.casefold().__contains__("reject") or word.casefold().__contains__("oppose") or word.casefold().__contains__("danger") or word.casefold().__contains__("bad") or word.casefold().__contains__("pessimist") or word.casefold().__contains__("risk") or word.casefold().__contains__("threat"):
#elif word.casefold().__contains__("negative") or word.casefold().__contains__("concern") or word.casefold().__contains__("harm") or word.casefold().__contains__("reject") or word.casefold().__contains__("oppose") or word.casefold().__contains__("danger") or word.casefold().__contains__("bad") or word.casefold().__contains__("pessimist") or word.casefold().__contains__("risk") or word.casefold().__contains__("threat"):
elif word.casefold() in negative_words:
category = "negative"
offset = utf_data.find(word, last_position)
length = len(word)
......@@ -67,7 +80,7 @@ for i in range(1, 11):
length = len(word)
last_position = offset + length
is_a_category = True
elif word.casefold().__contains__("American") or word.casefold().__contains__("white") or word.casefold().__contains__("black") or word.casefold().__contains__("asian") or word.casefold().__contains__("hispanic") or word.casefold().__contains__("women") or word.casefold().__contains__("men"):
elif word.casefold().__contains__("American") or word.casefold().__contains__("white") or word.casefold().__contains__("black") or word.casefold().__contains__("asian") or word.casefold().__contains__("hispanic") or word.casefold() == "women" or word.casefold() == "men":
category = "demographic"
offset = utf_data.find(word, last_position)
length = len(word)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment