From 7aeb1308d734e346a80845d614bd2ddb00743bb2 Mon Sep 17 00:00:00 2001 From: starek21 <starek21@vt.edu> Date: Wed, 1 May 2024 15:05:39 -0400 Subject: [PATCH] text_summarizers folder --- text_summarizers/T5_summarizer.py | 32 ++++++++++++ text_summarizers/nltk_summarizer.py | 64 ++++++++++++++++++++++++ text_summarizers/post_cli_tool2.py | 15 ++++++ text_summarizers/test-BERT_summarizer.py | 48 ++++++++++++++++++ text_summarizers/test-nltk_summarizer.py | 33 ++++++++++++ text_summarizers/text_summarizer.py | 25 +++++++++ 6 files changed, 217 insertions(+) create mode 100644 text_summarizers/T5_summarizer.py create mode 100644 text_summarizers/nltk_summarizer.py create mode 100644 text_summarizers/post_cli_tool2.py create mode 100644 text_summarizers/test-BERT_summarizer.py create mode 100644 text_summarizers/test-nltk_summarizer.py create mode 100644 text_summarizers/text_summarizer.py diff --git a/text_summarizers/T5_summarizer.py b/text_summarizers/T5_summarizer.py new file mode 100644 index 0000000..4469e5d --- /dev/null +++ b/text_summarizers/T5_summarizer.py @@ -0,0 +1,32 @@ +from flask import Flask, request, jsonify +from transformers import AutoTokenizer, AutoModelWithLMHead + +app = Flask(__name__) + +tokenizer = AutoTokenizer.from_pretrained('t5-base') +model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True) + +@app.route('/T5_summarize', methods=['POST']) +def summarize(): + try: + data = request.json + text = data['text'] + + inputs = tokenizer.encode("summarize: " + text, return_tensors='pt', max_length=512, truncation=True) + + summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2) + summary = tokenizer.decode(summary_ids[0]) + + return jsonify({'summary': summary}), 200 + + except Exception as e: + return jsonify({'error': str(e)}), 400 + +if __name__ == '__main__': + app.run(debug=True) + + +# HOW TO RUN: +# Run this python file in your terminal +# Open the post_cli_tool2.py file and modify the url string to be 'http://127.0.0.1:5000/T5_summarize' +# In a different terminal, run python post_cli_tool2.py \ No newline at end of file diff --git a/text_summarizers/nltk_summarizer.py b/text_summarizers/nltk_summarizer.py new file mode 100644 index 0000000..b04078d --- /dev/null +++ b/text_summarizers/nltk_summarizer.py @@ -0,0 +1,64 @@ +from flask import Flask, request, jsonify +import nltk +from nltk.corpus import stopwords +nltk.download('stopwords') +nltk.download('punkt') +from nltk.tokenize import word_tokenize, sent_tokenize + +app = Flask(__name__) + +@app.route('/nltk_summarize', methods=['POST']) +def summarize(): + try: + data = request.json + text = data['text'] + + stopWords = set(stopwords.words("english")) + words = word_tokenize(text) + + freqTable = dict() + for word in words: + word = word.lower() + if word in stopWords: + continue + if word in freqTable: + freqTable[word] += 1 + else: + freqTable[word] = 1 + + sentences = sent_tokenize(text) + sentenceValue = dict() + + for sentence in sentences: + for word, freq in freqTable.items(): + if word in sentence.lower(): + if word in sentence.lower(): + if sentence in sentenceValue: + sentenceValue[sentence] += freq + else: + sentenceValue[sentence] = freq + + sumValues = 0 + for sentence in sentenceValue: + sumValues += sentenceValue[sentence] + + average = int(sumValues / len(sentenceValue)) + + summary = '' + for sentence in sentences: + if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)): + summary += " " + sentence + + return jsonify({'summary': summary}), 200 + + except Exception as e: + return jsonify({'error': str(e)}), 400 + +if __name__ == '__main__': + app.run(debug=True) + + +# HOW TO RUN: +# Run this python file in your terminal +# Open the post_cli_tool2.py file and modify the url string to be 'http://127.0.0.1:5000/nltk_summarize' +# In a different terminal, run python post_cli_tool2.py \ No newline at end of file diff --git a/text_summarizers/post_cli_tool2.py b/text_summarizers/post_cli_tool2.py new file mode 100644 index 0000000..acdc4e2 --- /dev/null +++ b/text_summarizers/post_cli_tool2.py @@ -0,0 +1,15 @@ +import requests + +# Specify the API URL we want to send our JSON to +url = 'http://127.0.0.1:5000/T5_summarize' + +# Specify the appropriate header for the POST request +headers = {'Content-type': 'application/json'} + +# Specify the JSON data we want to send +with open('short_input.txt', 'r', encoding='utf-8') as file: + text = file.read() +data = '{"text": "' +text+ '"}' + +response = requests.post(url, headers=headers, data=data) +print(response.text, response.status_code) \ No newline at end of file diff --git a/text_summarizers/test-BERT_summarizer.py b/text_summarizers/test-BERT_summarizer.py new file mode 100644 index 0000000..ed131ea --- /dev/null +++ b/text_summarizers/test-BERT_summarizer.py @@ -0,0 +1,48 @@ +import unittest +from unittest.mock import patch +from text_summarizer import app + +class BERTSummarizerTestCase(unittest.TestCase): + + def setUp(self): + self.app = app.test_client() + self.app.testing = True + + @patch('text_summarizer.Summarizer') + def test_summarize(self, mock_summarizer): + # Mock the Summarizer class + mock_instance = mock_summarizer.return_value + mock_instance.return_value = "This is a summary." + + # Define input data + input_data = {'text': 'This is a test input.'} + + # Send a POST request to the endpoint + response = self.app.post('/BERT_summarize', json=input_data) + + # Check the response + self.assertEqual(response.status_code, 200) + data = response.get_json() + self.assertIn('summary', data) + self.assertEqual(data['summary'], "This is a summary.") + + @patch('text_summarizer.Summarizer') + def test_error_handling(self, mock_summarizer): + # Mock the Summarizer class to raise an exception + mock_instance = mock_summarizer.return_value + mock_instance.side_effect = Exception('An error occurred.') + + # Define input data + input_data = {'text': 'This is a test input.'} + + # Send a POST request to the endpoint + response = self.app.post('/BERT_summarize', json=input_data) + + # Check the response + self.assertEqual(response.status_code, 400) + data = response.get_json() + self.assertIn('error', data) + self.assertEqual(data['error'], 'An error occurred.') + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/text_summarizers/test-nltk_summarizer.py b/text_summarizers/test-nltk_summarizer.py new file mode 100644 index 0000000..b16db1a --- /dev/null +++ b/text_summarizers/test-nltk_summarizer.py @@ -0,0 +1,33 @@ +import unittest +import json +from nltk_summarizer import app + +class NLTKSummarizerTestCase(unittest.TestCase): + + def setUp(self): + self.app = app.test_client() + self.app.testing = True + + def test_summarize(self): + input_data = { + 'text': 'This is a test. It is only a test.' + } + response = self.app.post('/nltk_summarize', json=input_data) + data = json.loads(response.data.decode('utf-8')) + + self.assertEqual(response.status_code, 200) + self.assertIn('summary', data) + self.assertTrue(isinstance(data['summary'], str)) + + def test_invalid_input(self): + input_data = { + 'incorrect_key': 'This is a test. It is only a test.' + } + response = self.app.post('/nltk_summarize', json=input_data) + data = json.loads(response.data.decode('utf-8')) + + self.assertEqual(response.status_code, 400) + self.assertIn('error', data) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/text_summarizers/text_summarizer.py b/text_summarizers/text_summarizer.py new file mode 100644 index 0000000..01c2d27 --- /dev/null +++ b/text_summarizers/text_summarizer.py @@ -0,0 +1,25 @@ +from summarizer import Summarizer +import requests as r +from bs4 import BeautifulSoup + +from flask import Flask, jsonify, request + +app = Flask(__name__) + +@app.route('/BERT_summarize', methods=['POST']) +def summarize(): #(text) + try: + model = Summarizer() + data = request.json + summary = model(data['text']) + return jsonify({'summary':summary}), 200 + except Exception as e: + return jsonify({'error':str(e)}), 400 + +if __name__ == '__main__': + app.run(debug=True) + +# HOW TO RUN: +# Run this python file in your terminal +# Open the post_cli_tool2.py file and modify the url string to be 'http://127.0.0.1:5000/BERT_summarize' +# In a different terminal, run python post_cli_tool2.py \ No newline at end of file -- GitLab