diff --git a/text_summarizers/T5_summarizer.py b/text_summarizers/T5_summarizer.py new file mode 100644 index 0000000000000000000000000000000000000000..4469e5db45dfbc97a8770f05ad8d4a3d52405715 --- /dev/null +++ b/text_summarizers/T5_summarizer.py @@ -0,0 +1,32 @@ +from flask import Flask, request, jsonify +from transformers import AutoTokenizer, AutoModelWithLMHead + +app = Flask(__name__) + +tokenizer = AutoTokenizer.from_pretrained('t5-base') +model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True) + +@app.route('/T5_summarize', methods=['POST']) +def summarize(): + try: + data = request.json + text = data['text'] + + inputs = tokenizer.encode("summarize: " + text, return_tensors='pt', max_length=512, truncation=True) + + summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2) + summary = tokenizer.decode(summary_ids[0]) + + return jsonify({'summary': summary}), 200 + + except Exception as e: + return jsonify({'error': str(e)}), 400 + +if __name__ == '__main__': + app.run(debug=True) + + +# HOW TO RUN: +# Run this python file in your terminal +# Open the post_cli_tool2.py file and modify the url string to be 'http://127.0.0.1:5000/T5_summarize' +# In a different terminal, run python post_cli_tool2.py \ No newline at end of file diff --git a/text_summarizers/nltk_summarizer.py b/text_summarizers/nltk_summarizer.py new file mode 100644 index 0000000000000000000000000000000000000000..b04078d684c3eb2eb6339096840c1284d25b5db0 --- /dev/null +++ b/text_summarizers/nltk_summarizer.py @@ -0,0 +1,64 @@ +from flask import Flask, request, jsonify +import nltk +from nltk.corpus import stopwords +nltk.download('stopwords') +nltk.download('punkt') +from nltk.tokenize import word_tokenize, sent_tokenize + +app = Flask(__name__) + +@app.route('/nltk_summarize', methods=['POST']) +def summarize(): + try: + data = request.json + text = data['text'] + + stopWords = set(stopwords.words("english")) + words = word_tokenize(text) + + freqTable = dict() + for word in words: + word = word.lower() + if word in stopWords: + continue + if word in freqTable: + freqTable[word] += 1 + else: + freqTable[word] = 1 + + sentences = sent_tokenize(text) + sentenceValue = dict() + + for sentence in sentences: + for word, freq in freqTable.items(): + if word in sentence.lower(): + if word in sentence.lower(): + if sentence in sentenceValue: + sentenceValue[sentence] += freq + else: + sentenceValue[sentence] = freq + + sumValues = 0 + for sentence in sentenceValue: + sumValues += sentenceValue[sentence] + + average = int(sumValues / len(sentenceValue)) + + summary = '' + for sentence in sentences: + if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)): + summary += " " + sentence + + return jsonify({'summary': summary}), 200 + + except Exception as e: + return jsonify({'error': str(e)}), 400 + +if __name__ == '__main__': + app.run(debug=True) + + +# HOW TO RUN: +# Run this python file in your terminal +# Open the post_cli_tool2.py file and modify the url string to be 'http://127.0.0.1:5000/nltk_summarize' +# In a different terminal, run python post_cli_tool2.py \ No newline at end of file diff --git a/text_summarizers/post_cli_tool2.py b/text_summarizers/post_cli_tool2.py new file mode 100644 index 0000000000000000000000000000000000000000..acdc4e2b6cf4a57548620db3255cf4e160f58783 --- /dev/null +++ b/text_summarizers/post_cli_tool2.py @@ -0,0 +1,15 @@ +import requests + +# Specify the API URL we want to send our JSON to +url = 'http://127.0.0.1:5000/T5_summarize' + +# Specify the appropriate header for the POST request +headers = {'Content-type': 'application/json'} + +# Specify the JSON data we want to send +with open('short_input.txt', 'r', encoding='utf-8') as file: + text = file.read() +data = '{"text": "' +text+ '"}' + +response = requests.post(url, headers=headers, data=data) +print(response.text, response.status_code) \ No newline at end of file diff --git a/text_summarizers/test-BERT_summarizer.py b/text_summarizers/test-BERT_summarizer.py new file mode 100644 index 0000000000000000000000000000000000000000..ed131ea385e8f2b54b3d28c37bbe96aca16dcf70 --- /dev/null +++ b/text_summarizers/test-BERT_summarizer.py @@ -0,0 +1,48 @@ +import unittest +from unittest.mock import patch +from text_summarizer import app + +class BERTSummarizerTestCase(unittest.TestCase): + + def setUp(self): + self.app = app.test_client() + self.app.testing = True + + @patch('text_summarizer.Summarizer') + def test_summarize(self, mock_summarizer): + # Mock the Summarizer class + mock_instance = mock_summarizer.return_value + mock_instance.return_value = "This is a summary." + + # Define input data + input_data = {'text': 'This is a test input.'} + + # Send a POST request to the endpoint + response = self.app.post('/BERT_summarize', json=input_data) + + # Check the response + self.assertEqual(response.status_code, 200) + data = response.get_json() + self.assertIn('summary', data) + self.assertEqual(data['summary'], "This is a summary.") + + @patch('text_summarizer.Summarizer') + def test_error_handling(self, mock_summarizer): + # Mock the Summarizer class to raise an exception + mock_instance = mock_summarizer.return_value + mock_instance.side_effect = Exception('An error occurred.') + + # Define input data + input_data = {'text': 'This is a test input.'} + + # Send a POST request to the endpoint + response = self.app.post('/BERT_summarize', json=input_data) + + # Check the response + self.assertEqual(response.status_code, 400) + data = response.get_json() + self.assertIn('error', data) + self.assertEqual(data['error'], 'An error occurred.') + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/text_summarizers/test-nltk_summarizer.py b/text_summarizers/test-nltk_summarizer.py new file mode 100644 index 0000000000000000000000000000000000000000..b16db1a81bcb2d6d713d5713b54f8bcfd65dba05 --- /dev/null +++ b/text_summarizers/test-nltk_summarizer.py @@ -0,0 +1,33 @@ +import unittest +import json +from nltk_summarizer import app + +class NLTKSummarizerTestCase(unittest.TestCase): + + def setUp(self): + self.app = app.test_client() + self.app.testing = True + + def test_summarize(self): + input_data = { + 'text': 'This is a test. It is only a test.' + } + response = self.app.post('/nltk_summarize', json=input_data) + data = json.loads(response.data.decode('utf-8')) + + self.assertEqual(response.status_code, 200) + self.assertIn('summary', data) + self.assertTrue(isinstance(data['summary'], str)) + + def test_invalid_input(self): + input_data = { + 'incorrect_key': 'This is a test. It is only a test.' + } + response = self.app.post('/nltk_summarize', json=input_data) + data = json.loads(response.data.decode('utf-8')) + + self.assertEqual(response.status_code, 400) + self.assertIn('error', data) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/text_summarizers/text_summarizer.py b/text_summarizers/text_summarizer.py new file mode 100644 index 0000000000000000000000000000000000000000..01c2d27ef3046015b927e1dad08a5e98eff7985f --- /dev/null +++ b/text_summarizers/text_summarizer.py @@ -0,0 +1,25 @@ +from summarizer import Summarizer +import requests as r +from bs4 import BeautifulSoup + +from flask import Flask, jsonify, request + +app = Flask(__name__) + +@app.route('/BERT_summarize', methods=['POST']) +def summarize(): #(text) + try: + model = Summarizer() + data = request.json + summary = model(data['text']) + return jsonify({'summary':summary}), 200 + except Exception as e: + return jsonify({'error':str(e)}), 400 + +if __name__ == '__main__': + app.run(debug=True) + +# HOW TO RUN: +# Run this python file in your terminal +# Open the post_cli_tool2.py file and modify the url string to be 'http://127.0.0.1:5000/BERT_summarize' +# In a different terminal, run python post_cli_tool2.py \ No newline at end of file