converted summarization scripts into a Flask API endpoint

3139b4b6 · Tarek Shah · d40c6cf4 · d40c6cf4 · d40c6cf4 · 3139b4b6
Commit 3139b4b6 authored 1 year ago by Tarek Shah
--- a/T5_summarizer.py
+++ b/T5_summarizer.py
-import torch             
-
-from transformers import AutoTokenizer, AutoModelWithLMHead
-
-tokenizer = AutoTokenizer.from_pretrained('t5-base')                     
-model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
-
-inputs = tokenizer.encode("summarize: " + """The Saturday night shooting came as the city's majority Asian community was marking the Lunar New Year, transforming one of the most auspicious days of the calendar into a tragedy. 
-“There is a lot of fear and anxiety out there. People are fearful of this kind of situation where our joyous Lunar New Year celebration was turned utterly upside down into tragedy and fear,” Rep. Judy Chu, who represents Monterey Park, said Monday at a candlelight vigil for the victims.
-California Gov. Gavin Newsom was meeting with victims of the shooting Monday when he was pulled away to be briefed on another fatal mass shooting in Half Moon Bay - the state's second mass shooting in three days. “Tragedy upon tragedy,” Newsom said in a tweet.
-Authorities revealed new details Monday from their search of the home of the Monterey Park shooting suspect, 72-year-old Huu Can Tran, in Hemet, about 80 miles east of Monterey Park.
-Detectives executed a search warrant and found “hundreds of rounds” of ammunition, a .308-caliber rifle, various electronic devices and evidence leading officials to believe he was “manufacturing homemade firearm suppressors,” Los Angeles County Sheriff Robert Luna said.""",                  
-return_tensors='pt',              
-max_length=512,             
-truncation=True)
-
-summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
-summary = tokenizer.decode(summary_ids[0])
-print(summary)
\ No newline at end of file
--- a/nltk_summarizer.py
+++ b/nltk_summarizer.py
-import nltk          
-from nltk.corpus import stopwords      
-nltk.download('stopwords') 
-nltk.download('punkt')                  
-from nltk.tokenize import word_tokenize, sent_tokenize
-
-text = """The Saturday night shooting came as the city's majority Asian community was marking the Lunar New Year, transforming one of the most auspicious days of the calendar into a tragedy. 
-“There is a lot of fear and anxiety out there. People are fearful of this kind of situation where our joyous Lunar New Year celebration was turned utterly upside down into tragedy and fear,” Rep. Judy Chu, who represents Monterey Park, said Monday at a candlelight vigil for the victims.
-California Gov. Gavin Newsom was meeting with victims of the shooting Monday when he was pulled away to be briefed on another fatal mass shooting in Half Moon Bay - the state's second mass shooting in three days. “Tragedy upon tragedy,” Newsom said in a tweet.
-Authorities revealed new details Monday from their search of the home of the Monterey Park shooting suspect, 72-year-old Huu Can Tran, in Hemet, about 80 miles east of Monterey Park.
-Detectives executed a search warrant and found “hundreds of rounds” of ammunition, a .308-caliber rifle, various electronic devices and evidence leading officials to believe he was “manufacturing homemade firearm suppressors,” Los Angeles County Sheriff Robert Luna said."""
-
-stopWords = set(stopwords.words("english"))
-words = word_tokenize(text)
-
-freqTable = dict()  
-for word in words:               
-    word = word.lower()                 
-    if word in stopWords:                 
-        continue                  
-    if word in freqTable:                       
-        freqTable[word] += 1            
-    else:          
-        freqTable[word] = 1
-
-sentences = sent_tokenize(text)                 
-sentenceValue = dict()                     
-
-for sentence in sentences:               
-    for word, freq in freqTable.items():              
-        if word in sentence.lower():           
-            if word in sentence.lower():                   
-                if sentence in sentenceValue:                                 
-                    sentenceValue[sentence] += freq                       
-                else:                       
-                    sentenceValue[sentence] = freq                    
-
-sumValues = 0                        
-for sentence in sentenceValue:              
-    sumValues += sentenceValue[sentence] 
-
-average = int(sumValues / len(sentenceValue))
-
-summary = ''      
-for sentence in sentences:
-    if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):                
-        summary += " " + sentence                  
-print(summary)
\ No newline at end of file
--- a/text_summarizers/T5_summarizer.py
+++ b/text_summarizers/T5_summarizer.py
-import torch             
-
+from flask import Flask, request, jsonify
+import torch
 from transformers import AutoTokenizer, AutoModelWithLMHead

-tokenizer = AutoTokenizer.from_pretrained('t5-base')                     
+app = Flask(__name__)
+
+tokenizer = AutoTokenizer.from_pretrained('t5-base')
 model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)

-with open('summarizer_input.txt', 'r', encoding='utf-8') as file:
-    text = file.read()
+@app.route('/summarize', methods=['POST'])
+def summarize():
+    try:
+        data = request.json
+        text = data['text']
+
+        inputs = tokenizer.encode("summarize: " + text, return_tensors='pt', max_length=512, truncation=True)
+
+        summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
+        summary = tokenizer.decode(summary_ids[0])
+
+        return jsonify({'summary': summary}), 200
+
+    except Exception as e:
+        return jsonify({'error': str(e)}), 400

-inputs = tokenizer.encode("summarize: " + text, return_tensors='pt', max_length=512, truncation=True)
+if __name__ == '__main__':
+    app.run(debug=True)

-summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
-summary = tokenizer.decode(summary_ids[0])
-print(summary)
\ No newline at end of file
+# Windows Powershell POST request:
+# Invoke-RestMethod -Method POST -Uri http://localhost:5000/summarize -ContentType "application/json" -Body '{"text": <input text here>"}'
+    
+# In Bash:
+# curl -X POST -H "Content-Type: application/json" -d '{"text": "<input text here>"}' http://localhost:5000/summarize
\ No newline at end of file
--- a/text_summarizers/nltk_summarizer.py
+++ b/text_summarizers/nltk_summarizer.py
+from flask import Flask, request, jsonify
 import nltk          
 from nltk.corpus import stopwords      
 nltk.download('stopwords') 
 nltk.download('punkt')                  
 from nltk.tokenize import word_tokenize, sent_tokenize

-with open('summarizer_input.txt', 'r', encoding='utf-8') as file:
-    text = file.read()
-
-stopWords = set(stopwords.words("english"))
-words = word_tokenize(text)
-
-freqTable = dict()  
-for word in words:               
-    word = word.lower()                 
-    if word in stopWords:                 
-        continue                  
-    if word in freqTable:                       
-        freqTable[word] += 1            
-    else:          
-        freqTable[word] = 1
-
-sentences = sent_tokenize(text)                 
-sentenceValue = dict()                     
-
-for sentence in sentences:               
-    for word, freq in freqTable.items():              
-        if word in sentence.lower():           
-            if word in sentence.lower():                   
-                if sentence in sentenceValue:                                 
-                    sentenceValue[sentence] += freq                       
-                else:                       
-                    sentenceValue[sentence] = freq                    
-
-sumValues = 0                        
-for sentence in sentenceValue:              
-    sumValues += sentenceValue[sentence] 
-
-average = int(sumValues / len(sentenceValue))
-
-summary = ''      
-for sentence in sentences:
-    if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):                
-        summary += " " + sentence                  
-print(summary)
\ No newline at end of file
+app = Flask(__name__)
+
+@app.route('/summarize', methods=['POST'])
+def summarize():
+    try:
+        data = request.json
+        text = data['text']
+
+        stopWords = set(stopwords.words("english"))
+        words = word_tokenize(text)
+
+        freqTable = dict()  
+        for word in words:               
+            word = word.lower()                 
+            if word in stopWords:                 
+                continue                  
+            if word in freqTable:                       
+                freqTable[word] += 1            
+            else:          
+                freqTable[word] = 1
+
+        sentences = sent_tokenize(text)                 
+        sentenceValue = dict()                     
+
+        for sentence in sentences:               
+            for word, freq in freqTable.items():              
+                if word in sentence.lower():           
+                    if word in sentence.lower():                   
+                        if sentence in sentenceValue:                                 
+                            sentenceValue[sentence] += freq                       
+                        else:                       
+                            sentenceValue[sentence] = freq                    
+
+        sumValues = 0                        
+        for sentence in sentenceValue:              
+            sumValues += sentenceValue[sentence] 
+
+        average = int(sumValues / len(sentenceValue))
+
+        summary = ''      
+        for sentence in sentences:
+            if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):                
+                summary += " " + sentence                  
+        
+        return jsonify({'summary': summary}), 200
+
+    except Exception as e:
+        return jsonify({'error': str(e)}), 400
+
+if __name__ == '__main__':
+    app.run(debug=True)
+
+# Windows Powershell POST request:
+# Invoke-RestMethod -Method POST -Uri http://localhost:5000/summarize -ContentType "application/json" -Body '{"text": <input text here>"}'
+    
+# In Bash:
+# curl -X POST -H "Content-Type: application/json" -d '{"text": "<input text here>"}' http://localhost:5000/summarize
\ No newline at end of file