From 7aeb1308d734e346a80845d614bd2ddb00743bb2 Mon Sep 17 00:00:00 2001
From: starek21 <starek21@vt.edu>
Date: Wed, 1 May 2024 15:05:39 -0400
Subject: [PATCH] text_summarizers folder

---
 text_summarizers/T5_summarizer.py        | 32 ++++++++++++
 text_summarizers/nltk_summarizer.py      | 64 ++++++++++++++++++++++++
 text_summarizers/post_cli_tool2.py       | 15 ++++++
 text_summarizers/test-BERT_summarizer.py | 48 ++++++++++++++++++
 text_summarizers/test-nltk_summarizer.py | 33 ++++++++++++
 text_summarizers/text_summarizer.py      | 25 +++++++++
 6 files changed, 217 insertions(+)
 create mode 100644 text_summarizers/T5_summarizer.py
 create mode 100644 text_summarizers/nltk_summarizer.py
 create mode 100644 text_summarizers/post_cli_tool2.py
 create mode 100644 text_summarizers/test-BERT_summarizer.py
 create mode 100644 text_summarizers/test-nltk_summarizer.py
 create mode 100644 text_summarizers/text_summarizer.py

diff --git a/text_summarizers/T5_summarizer.py b/text_summarizers/T5_summarizer.py
new file mode 100644
index 0000000..4469e5d
--- /dev/null
+++ b/text_summarizers/T5_summarizer.py
@@ -0,0 +1,32 @@
+from flask import Flask, request, jsonify
+from transformers import AutoTokenizer, AutoModelWithLMHead
+
+app = Flask(__name__)
+
+tokenizer = AutoTokenizer.from_pretrained('t5-base')
+model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
+
+@app.route('/T5_summarize', methods=['POST'])
+def summarize():
+    try:
+        data = request.json
+        text = data['text']
+
+        inputs = tokenizer.encode("summarize: " + text, return_tensors='pt', max_length=512, truncation=True)
+
+        summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
+        summary = tokenizer.decode(summary_ids[0])
+
+        return jsonify({'summary': summary}), 200
+
+    except Exception as e:
+        return jsonify({'error': str(e)}), 400
+
+if __name__ == '__main__':
+    app.run(debug=True)
+
+
+# HOW TO RUN:
+# Run this python file in your terminal
+# Open the post_cli_tool2.py file and modify the url string to be 'http://127.0.0.1:5000/T5_summarize'
+# In a different terminal, run python post_cli_tool2.py
\ No newline at end of file
diff --git a/text_summarizers/nltk_summarizer.py b/text_summarizers/nltk_summarizer.py
new file mode 100644
index 0000000..b04078d
--- /dev/null
+++ b/text_summarizers/nltk_summarizer.py
@@ -0,0 +1,64 @@
+from flask import Flask, request, jsonify
+import nltk          
+from nltk.corpus import stopwords      
+nltk.download('stopwords') 
+nltk.download('punkt')                  
+from nltk.tokenize import word_tokenize, sent_tokenize
+
+app = Flask(__name__)
+
+@app.route('/nltk_summarize', methods=['POST'])
+def summarize():
+    try:
+        data = request.json
+        text = data['text']
+
+        stopWords = set(stopwords.words("english"))
+        words = word_tokenize(text)
+
+        freqTable = dict()  
+        for word in words:               
+            word = word.lower()                 
+            if word in stopWords:                 
+                continue                  
+            if word in freqTable:                       
+                freqTable[word] += 1            
+            else:          
+                freqTable[word] = 1
+
+        sentences = sent_tokenize(text)                 
+        sentenceValue = dict()                     
+
+        for sentence in sentences:               
+            for word, freq in freqTable.items():              
+                if word in sentence.lower():           
+                    if word in sentence.lower():                   
+                        if sentence in sentenceValue:                                 
+                            sentenceValue[sentence] += freq                       
+                        else:                       
+                            sentenceValue[sentence] = freq                    
+
+        sumValues = 0                        
+        for sentence in sentenceValue:              
+            sumValues += sentenceValue[sentence] 
+
+        average = int(sumValues / len(sentenceValue))
+
+        summary = ''      
+        for sentence in sentences:
+            if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):                
+                summary += " " + sentence                  
+        
+        return jsonify({'summary': summary}), 200
+
+    except Exception as e:
+        return jsonify({'error': str(e)}), 400
+
+if __name__ == '__main__':
+    app.run(debug=True)
+
+
+# HOW TO RUN:
+# Run this python file in your terminal
+# Open the post_cli_tool2.py file and modify the url string to be 'http://127.0.0.1:5000/nltk_summarize'
+# In a different terminal, run python post_cli_tool2.py
\ No newline at end of file
diff --git a/text_summarizers/post_cli_tool2.py b/text_summarizers/post_cli_tool2.py
new file mode 100644
index 0000000..acdc4e2
--- /dev/null
+++ b/text_summarizers/post_cli_tool2.py
@@ -0,0 +1,15 @@
+import requests
+
+# Specify the API URL we want to send our JSON to
+url = 'http://127.0.0.1:5000/T5_summarize'
+
+# Specify the appropriate header for the POST request
+headers = {'Content-type': 'application/json'}
+
+# Specify the JSON data we want to send
+with open('short_input.txt', 'r', encoding='utf-8') as file:
+    text = file.read()
+data = '{"text": "' +text+ '"}'
+
+response = requests.post(url, headers=headers, data=data)
+print(response.text, response.status_code)
\ No newline at end of file
diff --git a/text_summarizers/test-BERT_summarizer.py b/text_summarizers/test-BERT_summarizer.py
new file mode 100644
index 0000000..ed131ea
--- /dev/null
+++ b/text_summarizers/test-BERT_summarizer.py
@@ -0,0 +1,48 @@
+import unittest
+from unittest.mock import patch
+from text_summarizer import app
+
+class BERTSummarizerTestCase(unittest.TestCase):
+
+    def setUp(self):
+        self.app = app.test_client()
+        self.app.testing = True
+
+    @patch('text_summarizer.Summarizer')
+    def test_summarize(self, mock_summarizer):
+        # Mock the Summarizer class
+        mock_instance = mock_summarizer.return_value
+        mock_instance.return_value = "This is a summary."
+
+        # Define input data
+        input_data = {'text': 'This is a test input.'}
+
+        # Send a POST request to the endpoint
+        response = self.app.post('/BERT_summarize', json=input_data)
+
+        # Check the response
+        self.assertEqual(response.status_code, 200)
+        data = response.get_json()
+        self.assertIn('summary', data)
+        self.assertEqual(data['summary'], "This is a summary.")
+
+    @patch('text_summarizer.Summarizer')
+    def test_error_handling(self, mock_summarizer):
+        # Mock the Summarizer class to raise an exception
+        mock_instance = mock_summarizer.return_value
+        mock_instance.side_effect = Exception('An error occurred.')
+
+        # Define input data
+        input_data = {'text': 'This is a test input.'}
+
+        # Send a POST request to the endpoint
+        response = self.app.post('/BERT_summarize', json=input_data)
+
+        # Check the response
+        self.assertEqual(response.status_code, 400)
+        data = response.get_json()
+        self.assertIn('error', data)
+        self.assertEqual(data['error'], 'An error occurred.')
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/text_summarizers/test-nltk_summarizer.py b/text_summarizers/test-nltk_summarizer.py
new file mode 100644
index 0000000..b16db1a
--- /dev/null
+++ b/text_summarizers/test-nltk_summarizer.py
@@ -0,0 +1,33 @@
+import unittest
+import json
+from nltk_summarizer import app
+
+class NLTKSummarizerTestCase(unittest.TestCase):
+
+    def setUp(self):
+        self.app = app.test_client()
+        self.app.testing = True
+
+    def test_summarize(self):
+        input_data = {
+            'text': 'This is a test. It is only a test.'
+        }
+        response = self.app.post('/nltk_summarize', json=input_data)
+        data = json.loads(response.data.decode('utf-8'))
+
+        self.assertEqual(response.status_code, 200)
+        self.assertIn('summary', data)
+        self.assertTrue(isinstance(data['summary'], str))
+
+    def test_invalid_input(self):
+        input_data = {
+            'incorrect_key': 'This is a test. It is only a test.'
+        }
+        response = self.app.post('/nltk_summarize', json=input_data)
+        data = json.loads(response.data.decode('utf-8'))
+
+        self.assertEqual(response.status_code, 400)
+        self.assertIn('error', data)
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/text_summarizers/text_summarizer.py b/text_summarizers/text_summarizer.py
new file mode 100644
index 0000000..01c2d27
--- /dev/null
+++ b/text_summarizers/text_summarizer.py
@@ -0,0 +1,25 @@
+from summarizer import Summarizer
+import requests as r
+from bs4 import BeautifulSoup
+
+from flask import Flask, jsonify, request
+
+app = Flask(__name__)
+
+@app.route('/BERT_summarize', methods=['POST'])
+def summarize(): #(text)
+    try:
+        model = Summarizer()
+        data = request.json
+        summary = model(data['text'])
+        return jsonify({'summary':summary}), 200
+    except Exception as e:
+        return jsonify({'error':str(e)}), 400
+
+if __name__ == '__main__':
+    app.run(debug=True)
+
+# HOW TO RUN:
+# Run this python file in your terminal
+# Open the post_cli_tool2.py file and modify the url string to be 'http://127.0.0.1:5000/BERT_summarize'
+# In a different terminal, run python post_cli_tool2.py
\ No newline at end of file
-- 
GitLab