{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":6262939,"sourceType":"datasetVersion","datasetId":3599815}],"dockerImageVersionId":30527,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n#Text cleaning\nimport re, string\n!pip install emoji\nimport emoji\nimport nltk\nfrom nltk.stem import WordNetLemmatizer,PorterStemmer\nfrom nltk.corpus import stopwords\nnltk.download('stopwords')\nstop_words = set(stopwords.words('english'))\n\n#Data preprocessing\nfrom sklearn import preprocessing\nfrom sklearn.model_selection import train_test_split\nfrom imblearn.over_sampling import RandomOverSampler\n\n#Naive Bayes\nfrom sklearn.feature_extraction.text import CountVectorizer\nfrom sklearn.feature_extraction.text import TfidfTransformer\nfrom sklearn.naive_bayes import MultinomialNB\n\n\n#PyTorch LSTM\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n\n#Tokenization for LSTM\nfrom collections import Counter\nfrom gensim.models import Word2Vec\n\n#Transformers library for BERT\n!pip install transformers\nimport transformers\nfrom transformers import BertModel\nfrom transformers import BertTokenizer\nfrom transformers import AdamW, get_linear_schedule_with_warmup\n\nfrom sklearn.metrics import classification_report, confusion_matrix\n\n#Seed for reproducibility\nimport random\n\nseed_value=42\nrandom.seed(seed_value)\nnp.random.seed(seed_value)\ntorch.manual_seed(seed_value)\ntorch.cuda.manual_seed_all(seed_value)\n\nimport time\n\n#set style for plots\nsns.set_style(\"whitegrid\")\nsns.despine()\nplt.style.use(\"seaborn-whitegrid\")\nplt.rc(\"figure\", autolayout=True)\nplt.rc(\"axes\", labelweight=\"bold\", labelsize=\"large\", titleweight=\"bold\", titlepad=10)","metadata":{"execution":{"iopub.status.busy":"2024-02-27T09:09:37.032955Z","iopub.execute_input":"2024-02-27T09:09:37.033392Z","iopub.status.idle":"2024-02-27T09:10:24.697861Z","shell.execute_reply.started":"2024-02-27T09:09:37.033358Z","shell.execute_reply":"2024-02-27T09:10:24.696266Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n","output_type":"stream"},{"name":"stdout","text":"Requirement already satisfied: emoji in /opt/conda/lib/python3.10/site-packages (2.6.0)\n[nltk_data] Downloading package stopwords to /usr/share/nltk_data...\n[nltk_data] Package stopwords is already up-to-date!\nRequirement already satisfied: transformers in /opt/conda/lib/python3.10/site-packages (4.30.2)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from transformers) (3.12.2)\nRequirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /opt/conda/lib/python3.10/site-packages (from transformers) (0.16.4)\nRequirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from transformers) (1.23.5)\nRequirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from transformers) (21.3)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from transformers) (6.0)\nRequirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers) (2023.6.3)\nRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from transformers) (2.31.0)\nRequirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /opt/conda/lib/python3.10/site-packages (from transformers) (0.13.3)\nRequirement already satisfied: safetensors>=0.3.1 in /opt/conda/lib/python3.10/site-packages (from transformers) (0.3.1)\nRequirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.10/site-packages (from transformers) (4.65.0)\nRequirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0)\nRequirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.6.3)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.10/site-packages (from packaging>=20.0->transformers) (3.0.9)\nRequirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (3.1.0)\nRequirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (3.4)\nRequirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (1.26.15)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->transformers) (2023.5.7)\n","output_type":"stream"},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:98: UserWarning: unable to load libtensorflow_io_plugins.so: unable to open file: libtensorflow_io_plugins.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so']\ncaused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']\n warnings.warn(f\"unable to load libtensorflow_io_plugins.so: {e}\")\n/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/__init__.py:104: UserWarning: file system plugins are not loaded: unable to open file: libtensorflow_io.so, from paths: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']\ncaused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']\n warnings.warn(f\"file system plugins are not loaded: {e}\")\n/tmp/ipykernel_32/1414603384.py:60: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-\n
\n | Unnamed: 0 | \nInput | \nlabel | \nLength | \nPredicted_Label | \n
---|---|---|---|---|---|
0 | \n0 | \ncharlie hebdo become well known for publish th... | \n0.0 | \n82 | \njoy | \n
1 | \n1 | \nnow 10 dead in a shooting there today retweet ... | \n0.0 | \n138 | \njoy | \n
2 | \n2 | \nbbcdaniel bbcworld I am guess this is being co... | \n0.0 | \n93 | \nanger | \n
3 | \n3 | \nbbcdaniel bbcworld why would you mention that ... | \n0.0 | \n95 | \njoy | \n
4 | \n4 | \nbbcdaniels bbcworld perps identify | \n0.0 | \n39 | \nfear | \n
... | \n... | \n... | \n... | \n... | \n... | \n
62440 | \n62440 | \nanonyop xplant so that mean its ok to torch an... | \n1.0 | \n76 | \nfear | \n
62441 | \n62441 | \nrianalden not at all but they need to change s... | \n1.0 | \n85 | \nanger | \n
62442 | \n62442 | \nxplant anonyop absoluteky but it pain I to see... | \n1.0 | \n138 | \nsadness | \n
62443 | \n62443 | \nxplant anonyop I am curious how many of these ... | \n1.0 | \n140 | \njoy | \n
62444 | \n62444 | \nxplant anonyop you get 15000 people show up to... | \n1.0 | \n143 | \nsadness | \n
62445 rows × 5 columns
\n"},"metadata":{}}]},{"cell_type":"code","source":"emotion.Predicted_Label.value_counts()","metadata":{"execution":{"iopub.status.busy":"2024-02-27T09:10:52.020897Z","iopub.execute_input":"2024-02-27T09:10:52.021361Z","iopub.status.idle":"2024-02-27T09:10:52.043896Z","shell.execute_reply.started":"2024-02-27T09:10:52.021321Z","shell.execute_reply":"2024-02-27T09:10:52.042373Z"},"trusted":true},"execution_count":3,"outputs":[{"execution_count":3,"output_type":"execute_result","data":{"text/plain":"joy 18131\nsadness 16588\nanger 12578\nfear 11310\nlove 3238\nsurprise 600\nName: Predicted_Label, dtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"emotion['text'] = emotion['Input'] +\" \" + emotion['Predicted_Label']\nemotion.text[31217]","metadata":{"execution":{"iopub.status.busy":"2023-08-31T04:52:18.297052Z","iopub.execute_input":"2023-08-31T04:52:18.297452Z","iopub.status.idle":"2023-08-31T04:52:18.341536Z","shell.execute_reply.started":"2023-08-31T04:52:18.297420Z","shell.execute_reply":"2023-08-31T04:52:18.340425Z"},"trusted":true},"execution_count":3,"outputs":[{"execution_count":3,"output_type":"execute_result","data":{"text/plain":"'marcburleigh holopainenminna laracchi fear'"},"metadata":{}}]},{"cell_type":"code","source":"emotion.label.value_counts()","metadata":{"execution":{"iopub.status.busy":"2023-08-31T04:52:19.837509Z","iopub.execute_input":"2023-08-31T04:52:19.837906Z","iopub.status.idle":"2023-08-31T04:52:19.850119Z","shell.execute_reply.started":"2023-08-31T04:52:19.837876Z","shell.execute_reply":"2023-08-31T04:52:19.848888Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"0.0 48619\n1.0 13824\nName: label, dtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"emotion[emotion.duplicated()]","metadata":{"execution":{"iopub.status.busy":"2023-08-31T04:52:21.431169Z","iopub.execute_input":"2023-08-31T04:52:21.432099Z","iopub.status.idle":"2023-08-31T04:52:21.521441Z","shell.execute_reply.started":"2023-08-31T04:52:21.432049Z","shell.execute_reply":"2023-08-31T04:52:21.520193Z"},"trusted":true},"execution_count":5,"outputs":[{"execution_count":5,"output_type":"execute_result","data":{"text/plain":"Empty DataFrame\nColumns: [Unnamed: 0, Input, label, Length, Predicted_Label, text]\nIndex: []","text/html":"\n | Unnamed: 0 | \nInput | \nlabel | \nLength | \nPredicted_Label | \ntext | \n
---|
\n | Unnamed: 0 | \nInput | \nlabel | \nLength | \nPredicted_Label | \ntext | \ntext_len | \n
---|---|---|---|---|---|---|---|
13377 | \n13377 | \nfrance24 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _... | \n0.0 | \n140 | \nfear | \nfrance24 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _... | \n50 | \n
57474 | \n57474 | \ncnnvideo dnt b rude alexdiaz dnt let his name ... | \n1.0 | \n136 | \nlove | \ncnnvideo dnt b rude alexdiaz dnt let his name ... | \n35 | \n
49015 | \n49015 | \nrational_crisis what the fuck do the democrati... | \n0.0 | \n138 | \nanger | \nrational_crisis what the fuck do the democrati... | \n34 | \n
58719 | \n58719 | \nkylestrunk I am say do not release your side o... | \n1.0 | \n137 | \nanger | \nkylestrunk I am say do not release your side o... | \n34 | \n
60133 | \n60133 | \ngretawire so you fuck thing he deserve to get ... | \n1.0 | \n140 | \nsadness | \ngretawire so you fuck thing he deserve to get ... | \n33 | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
10819 | \n10819 | \nglorioustyagi time httptcohqwvz3mfv8 | \n0.0 | \n43 | \nanger | \nglorioustyagi time httptcohqwvz3mfv8 anger | \n4 | \n
42436 | \n42436 | \namymek democrat voter | \n0.0 | \n23 | \nsadness | \namymek democrat voter sadness | \n4 | \n
10830 | \n10830 | \nelbeardsley morningedition httptco1hlaxapvzz | \n0.0 | \n51 | \nsadness | \nelbeardsley morningedition httptco1hlaxapvzz s... | \n4 | \n
33660 | \n33660 | \njeffersonobama jesuischarlie too | \n1.0 | \n35 | \nanger | \njeffersonobama jesuischarlie too anger | \n4 | \n
35511 | \n35511 | \ntime stopkillinginnocentpeoplefreepalestinefre... | \n1.0 | \n93 | \nsadness | \ntime stopkillinginnocentpeoplefreepalestinefre... | \n4 | \n
60582 rows × 7 columns
\n\n | Unnamed: 0 | \nInput | \nlabel | \nLength | \nPredicted_Label | \ntext | \ntext_len | \n
---|---|---|---|---|---|---|---|
13377 | \n13377 | \nfrance24 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _... | \n0.0 | \n140 | \nfear | \nfrance24 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _... | \n50 | \n
57474 | \n57474 | \ncnnvideo dnt b rude alexdiaz dnt let his name ... | \n1.0 | \n136 | \nlove | \ncnnvideo dnt b rude alexdiaz dnt let his name ... | \n35 | \n
49015 | \n49015 | \nrational_crisis what the fuck do the democrati... | \n0.0 | \n138 | \nanger | \nrational_crisis what the fuck do the democrati... | \n34 | \n
58719 | \n58719 | \nkylestrunk I am say do not release your side o... | \n1.0 | \n137 | \nanger | \nkylestrunk I am say do not release your side o... | \n34 | \n
60133 | \n60133 | \ngretawire so you fuck thing he deserve to get ... | \n1.0 | \n140 | \nsadness | \ngretawire so you fuck thing he deserve to get ... | \n33 | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
10819 | \n10819 | \nglorioustyagi time httptcohqwvz3mfv8 | \n0.0 | \n43 | \nanger | \nglorioustyagi time httptcohqwvz3mfv8 anger | \n4 | \n
42436 | \n42436 | \namymek democrat voter | \n0.0 | \n23 | \nsadness | \namymek democrat voter sadness | \n4 | \n
10830 | \n10830 | \nelbeardsley morningedition httptco1hlaxapvzz | \n0.0 | \n51 | \nsadness | \nelbeardsley morningedition httptco1hlaxapvzz s... | \n4 | \n
33660 | \n33660 | \njeffersonobama jesuischarlie too | \n1.0 | \n35 | \nanger | \njeffersonobama jesuischarlie too anger | \n4 | \n
35511 | \n35511 | \ntime stopkillinginnocentpeoplefreepalestinefre... | \n1.0 | \n93 | \nsadness | \ntime stopkillinginnocentpeoplefreepalestinefre... | \n4 | \n
60582 rows × 7 columns
\nMultinomialNB()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MultinomialNB()