"\u001b[1;32mc:\\Users\\SC\\OneDrive - Virginia Tech\\Fall 2023\\Intro to AI\\reelrating\\reelrating_training.ipynb Cell 4\u001b[0m line \u001b[0;36m2\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/SC/OneDrive%20-%20Virginia%20Tech/Fall%202023/Intro%20to%20AI/reelrating/reelrating_training.ipynb#W3sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m regression_model\u001b[39m.\u001b[39mfit(X_train_tfidf, Y_train)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/SC/OneDrive%20-%20Virginia%20Tech/Fall%202023/Intro%20to%20AI/reelrating/reelrating_training.ipynb#W3sZmlsZQ%3D%3D?line=20'>21</a>\u001b[0m predictions \u001b[39m=\u001b[39m regression_model\u001b[39m.\u001b[39mpredict(X_test_tfidf)\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/SC/OneDrive%20-%20Virginia%20Tech/Fall%202023/Intro%20to%20AI/reelrating/reelrating_training.ipynb#W3sZmlsZQ%3D%3D?line=21'>22</a>\u001b[0m report \u001b[39m=\u001b[39m classification_report(Y_test, predictions, output_dict\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m <a href='vscode-notebook-cell:/c%3A/Users/SC/OneDrive%20-%20Virginia%20Tech/Fall%202023/Intro%20to%20AI/reelrating/reelrating_training.ipynb#W3sZmlsZQ%3D%3D?line=22'>23</a>\u001b[0m report\n",
"File \u001b[1;32mc:\\Users\\SC\\anaconda3\\Lib\\site-packages\\sklearn\\utils\\_param_validation.py:211\u001b[0m, in \u001b[0;36mvalidate_params.<locals>.decorator.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 205\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 206\u001b[0m \u001b[39mwith\u001b[39;00m config_context(\n\u001b[0;32m 207\u001b[0m skip_parameter_validation\u001b[39m=\u001b[39m(\n\u001b[0;32m 208\u001b[0m prefer_skip_nested_validation \u001b[39mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 209\u001b[0m )\n\u001b[0;32m 210\u001b[0m ):\n\u001b[1;32m--> 211\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 212\u001b[0m \u001b[39mexcept\u001b[39;00m InvalidParameterError \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 213\u001b[0m \u001b[39m# When the function is just a wrapper around an estimator, we allow\u001b[39;00m\n\u001b[0;32m 214\u001b[0m \u001b[39m# the function to delegate validation to the estimator, but we replace\u001b[39;00m\n\u001b[0;32m 215\u001b[0m \u001b[39m# the name of the estimator by the name of the function in the error\u001b[39;00m\n\u001b[0;32m 216\u001b[0m \u001b[39m# message to avoid confusion.\u001b[39;00m\n\u001b[0;32m 217\u001b[0m msg \u001b[39m=\u001b[39m re\u001b[39m.\u001b[39msub(\n\u001b[0;32m 218\u001b[0m \u001b[39mr\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mparameter of \u001b[39m\u001b[39m\\\u001b[39m\u001b[39mw+ must be\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 219\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mparameter of \u001b[39m\u001b[39m{\u001b[39;00mfunc\u001b[39m.\u001b[39m\u001b[39m__qualname__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m must be\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 220\u001b[0m \u001b[39mstr\u001b[39m(e),\n\u001b[0;32m 221\u001b[0m )\n",
"File \u001b[1;32mc:\\Users\\SC\\anaconda3\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:2539\u001b[0m, in \u001b[0;36mclassification_report\u001b[1;34m(y_true, y_pred, labels, target_names, sample_weight, digits, output_dict, zero_division)\u001b[0m\n\u001b[0;32m 2405\u001b[0m \u001b[39m@validate_params\u001b[39m(\n\u001b[0;32m 2406\u001b[0m {\n\u001b[0;32m 2407\u001b[0m \u001b[39m\"\u001b[39m\u001b[39my_true\u001b[39m\u001b[39m\"\u001b[39m: [\u001b[39m\"\u001b[39m\u001b[39marray-like\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39msparse matrix\u001b[39m\u001b[39m\"\u001b[39m],\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2430\u001b[0m zero_division\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mwarn\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 2431\u001b[0m ):\n\u001b[0;32m 2432\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Build a text report showing the main classification metrics.\u001b[39;00m\n\u001b[0;32m 2433\u001b[0m \n\u001b[0;32m 2434\u001b[0m \u001b[39m Read more in the :ref:`User Guide <classification_report>`.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 2536\u001b[0m \u001b[39m <BLANKLINE>\u001b[39;00m\n\u001b[0;32m 2537\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 2539\u001b[0m y_type, y_true, y_pred \u001b[39m=\u001b[39m _check_targets(y_true, y_pred)\n\u001b[0;32m 2541\u001b[0m \u001b[39mif\u001b[39;00m labels \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 2542\u001b[0m labels \u001b[39m=\u001b[39m unique_labels(y_true, y_pred)\n",
"File \u001b[1;32mc:\\Users\\SC\\anaconda3\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:93\u001b[0m, in \u001b[0;36m_check_targets\u001b[1;34m(y_true, y_pred)\u001b[0m\n\u001b[0;32m 90\u001b[0m y_type \u001b[39m=\u001b[39m {\u001b[39m\"\u001b[39m\u001b[39mmulticlass\u001b[39m\u001b[39m\"\u001b[39m}\n\u001b[0;32m 92\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(y_type) \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m---> 93\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 94\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mClassification metrics can\u001b[39m\u001b[39m'\u001b[39m\u001b[39mt handle a mix of \u001b[39m\u001b[39m{0}\u001b[39;00m\u001b[39m and \u001b[39m\u001b[39m{1}\u001b[39;00m\u001b[39m targets\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(\n\u001b[0;32m 95\u001b[0m type_true, type_pred\n\u001b[0;32m 96\u001b[0m )\n\u001b[0;32m 97\u001b[0m )\n\u001b[0;32m 99\u001b[0m \u001b[39m# We can't have more than one value on y_type => The set is no more needed\u001b[39;00m\n\u001b[0;32m 100\u001b[0m y_type \u001b[39m=\u001b[39m y_type\u001b[39m.\u001b[39mpop()\n",
"\u001b[1;31mValueError\u001b[0m: Classification metrics can't handle a mix of binary and continuous targets"
]
}
],
"source": [
"# Function to clean the text data\n",
"\n",
"def clean_text(text):\n",
" text = re.sub(r'<.*?>', '', text) # Remove HTML tags\n",
" text = re.sub(r'[^a-zA-Z0-9\\s]', '', text) # Remove non-alphanumeric characters\n",
File c:\Users\SC\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:211, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
205 try:
206 with config_context(
207 skip_parameter_validation=(
208 prefer_skip_nested_validation or global_skip_validation
209 )
210 ):
--> 211 return func(*args, **kwargs)
212 except InvalidParameterError as e:
213 # When the function is just a wrapper around an estimator, we allow
214 # the function to delegate validation to the estimator, but we replace
215 # the name of the estimator by the name of the function in the error