Skip to content
Snippets Groups Projects
Commit 8a396e6e authored by Parth Mittal's avatar Parth Mittal
Browse files

Testing Model Training

parent 8f329351
No related branches found
No related tags found
No related merge requests found
GUI.py 0 → 100644
%% Cell type:markdown id: tags:
# ReelRating: CS 4804 Mini-Project
## Jeff Suliga, Tanya Acharya, Rishi Patel, Parth Mittal
%% Cell type:code id: tags:
``` python
# All the imports for the project:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.svm import SVR
import re
```
%% Cell type:code id: tags:
``` python
# Read the data from the csv file:
file_path = 'IMDBDataset.csv'
data = pd.read_csv(file_path)
# data.info()
#data.head()
```
%% Cell type:code id: tags:
``` python
# Function to clean the text data
def clean_text(text):
text = re.sub(r'<.*?>', '', text) # Remove HTML tags
text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Remove non-alphanumeric characters
text = text.lower() # Convert to lowercase
return text
data['cleaned-review'] = data['review'].apply(clean_text)
data['label'] = data['sentiment'].map({'positive': 1, 'negative': 0})
X_train, X_test, Y_train, Y_test = train_test_split(data['cleaned-review'], data['label'], test_size=0.2, random_state=42)
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)
regression_model = SVR()
regression_model.fit(X_train_tfidf, Y_train)
predictions = regression_model.predict(X_test_tfidf)
report = classification_report(Y_test, predictions, output_dict=True)
report
```
%% Output
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
c:\Users\SC\OneDrive - Virginia Tech\Fall 2023\Intro to AI\reelrating\reelrating_training.ipynb Cell 4 line 2
<a href='vscode-notebook-cell:/c%3A/Users/SC/OneDrive%20-%20Virginia%20Tech/Fall%202023/Intro%20to%20AI/reelrating/reelrating_training.ipynb#W3sZmlsZQ%3D%3D?line=18'>19</a> regression_model.fit(X_train_tfidf, Y_train)
<a href='vscode-notebook-cell:/c%3A/Users/SC/OneDrive%20-%20Virginia%20Tech/Fall%202023/Intro%20to%20AI/reelrating/reelrating_training.ipynb#W3sZmlsZQ%3D%3D?line=20'>21</a> predictions = regression_model.predict(X_test_tfidf)
---> <a href='vscode-notebook-cell:/c%3A/Users/SC/OneDrive%20-%20Virginia%20Tech/Fall%202023/Intro%20to%20AI/reelrating/reelrating_training.ipynb#W3sZmlsZQ%3D%3D?line=21'>22</a> report = classification_report(Y_test, predictions, output_dict=True)
<a href='vscode-notebook-cell:/c%3A/Users/SC/OneDrive%20-%20Virginia%20Tech/Fall%202023/Intro%20to%20AI/reelrating/reelrating_training.ipynb#W3sZmlsZQ%3D%3D?line=22'>23</a> report
File c:\Users\SC\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:211, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
205 try:
206 with config_context(
207 skip_parameter_validation=(
208 prefer_skip_nested_validation or global_skip_validation
209 )
210 ):
--> 211 return func(*args, **kwargs)
212 except InvalidParameterError as e:
213 # When the function is just a wrapper around an estimator, we allow
214 # the function to delegate validation to the estimator, but we replace
215 # the name of the estimator by the name of the function in the error
216 # message to avoid confusion.
217 msg = re.sub(
218 r"parameter of \w+ must be",
219 f"parameter of {func.__qualname__} must be",
220 str(e),
221 )
File c:\Users\SC\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:2539, in classification_report(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)
2405 @validate_params(
2406 {
2407 "y_true": ["array-like", "sparse matrix"],
(...)
2430 zero_division="warn",
2431 ):
2432 """Build a text report showing the main classification metrics.
2433
2434 Read more in the :ref:`User Guide <classification_report>`.
(...)
2536 <BLANKLINE>
2537 """
-> 2539 y_type, y_true, y_pred = _check_targets(y_true, y_pred)
2541 if labels is None:
2542 labels = unique_labels(y_true, y_pred)
File c:\Users\SC\anaconda3\Lib\site-packages\sklearn\metrics\_classification.py:93, in _check_targets(y_true, y_pred)
90 y_type = {"multiclass"}
92 if len(y_type) > 1:
---> 93 raise ValueError(
94 "Classification metrics can't handle a mix of {0} and {1} targets".format(
95 type_true, type_pred
96 )
97 )
99 # We can't have more than one value on y_type => The set is no more needed
100 y_type = y_type.pop()
ValueError: Classification metrics can't handle a mix of binary and continuous targets
%% Cell type:code id: tags:
``` python
def predict_rating(review):
cleaned_review = clean_text(review)
review_tfidf = vectorizer.transform([cleaned_review])
predicted_rating = regression_model.predict(review_tfidf)[0]
return predicted_rating
data = pd.read_csv('IMDBDataset.csv')
print(predict_rating("This movie was awesome! The acting was great, plot was wonderful, and there were pythons...so yea!"))
```
%% Cell type:code id: tags:
``` python
```
......
import re
def clean_text(text):
""" Clean the input text. """
text = re.sub(r'<.*?>', '', text) # Remove HTML tags
text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Remove non-alphanumeric characters
text = text.lower() # Convert to lowercase
return text
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment