From a313367b104cf9407611b6c1f8048f68649dd8a4 Mon Sep 17 00:00:00 2001 From: ABHINAV KUMAR Date: Sun, 10 Oct 2021 16:19:21 +0530 Subject: [PATCH] Add files via upload --- Ensemble_SVM_LR_Malayalam.ipynb | 1 + Ensemble_SVM_LR_RF_Kanada.ipynb | 1 + Ensemble_SVM_LR_TAMIL.ipynb | 1 + ML_Classifier_Kannada_code_mixed.ipynb | 1 + ML_Classifier_Malayalam_code_mixed.ipynb | 1 + ML_Classifier_Tamil_Sentiment (1).ipynb | 1 + 6 files changed, 6 insertions(+) create mode 100644 Ensemble_SVM_LR_Malayalam.ipynb create mode 100644 Ensemble_SVM_LR_RF_Kanada.ipynb create mode 100644 Ensemble_SVM_LR_TAMIL.ipynb create mode 100644 ML_Classifier_Kannada_code_mixed.ipynb create mode 100644 ML_Classifier_Malayalam_code_mixed.ipynb create mode 100644 ML_Classifier_Tamil_Sentiment (1).ipynb diff --git a/Ensemble_SVM_LR_Malayalam.ipynb b/Ensemble_SVM_LR_Malayalam.ipynb new file mode 100644 index 0000000..84860b3 --- /dev/null +++ b/Ensemble_SVM_LR_Malayalam.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Ensemble_SVM_LR_Malayalam.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NmxNxMi5TECW","executionInfo":{"status":"ok","timestamp":1624685064818,"user_tz":-330,"elapsed":20322,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"73bc084c-c68a-44c8-e1da-4730b454edbd"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"7akUl9KLBilu"},"source":["import pandas as pd\n","SVM = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/SVM_char_1-6_gram_Malayalam_74.csv')\n","LR = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/LR_char_1-6_gram_Malayalam_73.csv')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"giZvKKReCGFd"},"source":["predictions_test_submission=(SVM+LR)/2"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"pwxeTE5ZC6y3","executionInfo":{"status":"ok","timestamp":1624685124940,"user_tz":-330,"elapsed":9,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"c606c89a-6a0f-47dc-bb4c-2f2f6127e003"},"source":["predictions_test_submission"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Mixed_feelingsNegativePositivenot-malayalamunknown_state
00.3823890.1032090.1869920.0808960.246514
10.0631250.1495190.3073310.0080700.471956
20.0620190.1457680.3949810.0368230.360409
30.0022110.0035880.0156160.8915080.087077
40.0055250.0065910.9577150.0063890.023780
..................
19570.0157210.2813890.3893910.0002670.313233
19580.0082930.0150680.0343170.0111540.931168
19590.1027010.1673980.4327410.0036430.293517
19600.1016950.3449800.3189640.0021000.232261
19610.1915140.2624730.4007890.0030910.142132
\n","

1962 rows × 5 columns

\n","
"],"text/plain":[" Mixed_feelings Negative Positive not-malayalam unknown_state\n","0 0.382389 0.103209 0.186992 0.080896 0.246514\n","1 0.063125 0.149519 0.307331 0.008070 0.471956\n","2 0.062019 0.145768 0.394981 0.036823 0.360409\n","3 0.002211 0.003588 0.015616 0.891508 0.087077\n","4 0.005525 0.006591 0.957715 0.006389 0.023780\n","... ... ... ... ... ...\n","1957 0.015721 0.281389 0.389391 0.000267 0.313233\n","1958 0.008293 0.015068 0.034317 0.011154 0.931168\n","1959 0.102701 0.167398 0.432741 0.003643 0.293517\n","1960 0.101695 0.344980 0.318964 0.002100 0.232261\n","1961 0.191514 0.262473 0.400789 0.003091 0.142132\n","\n","[1962 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"yx6tspZ0CJsv"},"source":["import numpy as np\n","predictions_test_submission = np.asarray(predictions_test_submission)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"yhvtEGcuCMKO"},"source":["import numpy as np\n","predictions_test_submission1 = np.zeros_like(predictions_test_submission)\n","predictions_test_submission1[np.arange(len(predictions_test_submission)), predictions_test_submission.argmax(1)] = 1"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UFz5rdWVDil_","executionInfo":{"status":"ok","timestamp":1624685131170,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"64d670ec-cb71-45a5-aa4b-4973f4ec7dc9"},"source":["predictions_test_submission1"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[1., 0., 0., 0., 0.],\n"," [0., 0., 0., 0., 1.],\n"," [0., 0., 1., 0., 0.],\n"," ...,\n"," [0., 0., 1., 0., 0.],\n"," [0., 1., 0., 0., 0.],\n"," [0., 0., 1., 0., 0.]])"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"EcEP5VhyDktU"},"source":["predictions_test_submission1_final = np.argmax(predictions_test_submission1, axis=1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"z_-UdPnAFOfo","executionInfo":{"status":"ok","timestamp":1624685133667,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"02518122-bb6f-4902-fb96-ee8a39d1f3e1"},"source":["predictions_test_submission1_final"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([0, 4, 2, ..., 2, 1, 2])"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"id":"yj7AcwqbmTKP"},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_train.tsv', sep = '\\t')\n","data_val = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_test_withoutlabels.tsv', sep = '\\t')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZnuYD4aqTkWk","executionInfo":{"status":"ok","timestamp":1624685178312,"user_tz":-330,"elapsed":658,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"7dd716ae-503c-42c0-8096-0691658b4f7b"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_val_index = le.transform(data_val['category'])"],"execution_count":null,"outputs":[{"output_type":"stream","text":["['Mixed_feelings' 'Negative' 'Positive' 'not-malayalam' 'unknown_state']\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o5T7yAcFFpfm"},"source":["predictions_test_submission1_final1 = le.inverse_transform(predictions_test_submission1_final)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PSSBXvkTFze-","executionInfo":{"status":"ok","timestamp":1624685181535,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"58cfe4a7-98fc-453d-afe6-a23c63270ad4"},"source":["predictions_test_submission1_final1"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['Mixed_feelings', 'unknown_state', 'Positive', ..., 'Positive',\n"," 'Negative', 'Positive'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"id":"otag6te-F1It"},"source":["data_test['category'] = predictions_test_submission1_final1"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"5ozZTPDwGiSv","executionInfo":{"status":"ok","timestamp":1624685186909,"user_tz":-330,"elapsed":6,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"4f1f70e7-af9b-4645-9a2c-ff4336200777"},"source":["data_test"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Mal_1Teaserinu kurach samayamkoodi mathram. Cant wa...Mixed_feelings
1Mal_2അപ്പോൾ കഥയുടെ റൂട്ട് മാറിയല്ലോ...unknown_state
2Mal_3മൂത്തോൻ ട്രൈലെർ Trending List വരാത്തത് എന്താ ?Positive
3Mal_4Nowadays 944k views is considered as 1M views ...not-malayalam
4Mal_5Maass.trailer ennu paranja ithaanU makkale.......Positive
............
1957Mal_1958Expression വച്ചു നോക്കുമ്പോൾ മൂക്കുത്തി കട്ടത്...Positive
1958Mal_1959എനിക്ക് ഒരു 100 like തരുമോunknown_state
1959Mal_1960Nannayi onnu poliyunna lakshnm unde...Thrill m...Positive
1960Mal_1961Athikam eduthu kayatti vekkenda....direction k...Negative
1961Mal_1962Ithupole mooonchiya Oru padam jeevithathil kan...Positive
\n","

1962 rows × 3 columns

\n","
"],"text/plain":[" id ... category\n","0 Mal_1 ... Mixed_feelings\n","1 Mal_2 ... unknown_state\n","2 Mal_3 ... Positive\n","3 Mal_4 ... not-malayalam\n","4 Mal_5 ... Positive\n","... ... ... ...\n","1957 Mal_1958 ... Positive\n","1958 Mal_1959 ... unknown_state\n","1959 Mal_1960 ... Positive\n","1960 Mal_1961 ... Negative\n","1961 Mal_1962 ... Positive\n","\n","[1962 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"id":"Jt6Z80uFGjsD"},"source":["data_test.to_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam//Ensemble_SVM_LR_Malalayam.tsv\", sep=\"\\t\", index=False)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"11fdDMTpG8QS"},"source":["check_submission = pd.read_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam//Ensemble_SVM_LR_Malalayam.tsv\", sep=\"\\t\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"uwyh-IqGG8SG","executionInfo":{"status":"ok","timestamp":1624685262383,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"867f7e4a-2308-4e01-b70c-9999e9dcd1be"},"source":["check_submission"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Mal_1Teaserinu kurach samayamkoodi mathram. Cant wa...Mixed_feelings
1Mal_2അപ്പോൾ കഥയുടെ റൂട്ട് മാറിയല്ലോ...unknown_state
2Mal_3മൂത്തോൻ ട്രൈലെർ Trending List വരാത്തത് എന്താ ?Positive
3Mal_4Nowadays 944k views is considered as 1M views ...not-malayalam
4Mal_5Maass.trailer ennu paranja ithaanU makkale.......Positive
............
1957Mal_1958Expression വച്ചു നോക്കുമ്പോൾ മൂക്കുത്തി കട്ടത്...Positive
1958Mal_1959എനിക്ക് ഒരു 100 like തരുമോunknown_state
1959Mal_1960Nannayi onnu poliyunna lakshnm unde...Thrill m...Positive
1960Mal_1961Athikam eduthu kayatti vekkenda....direction k...Negative
1961Mal_1962Ithupole mooonchiya Oru padam jeevithathil kan...Positive
\n","

1962 rows × 3 columns

\n","
"],"text/plain":[" id ... category\n","0 Mal_1 ... Mixed_feelings\n","1 Mal_2 ... unknown_state\n","2 Mal_3 ... Positive\n","3 Mal_4 ... not-malayalam\n","4 Mal_5 ... Positive\n","... ... ... ...\n","1957 Mal_1958 ... Positive\n","1958 Mal_1959 ... unknown_state\n","1959 Mal_1960 ... Positive\n","1960 Mal_1961 ... Negative\n","1961 Mal_1962 ... Positive\n","\n","[1962 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":18}]},{"cell_type":"code","metadata":{"id":"MIEN8srCnBlP"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/Ensemble_SVM_LR_RF_Kanada.ipynb b/Ensemble_SVM_LR_RF_Kanada.ipynb new file mode 100644 index 0000000..a4cf08a --- /dev/null +++ b/Ensemble_SVM_LR_RF_Kanada.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Ensemble_SVM_LR_RF_Kanada.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NmxNxMi5TECW","executionInfo":{"status":"ok","timestamp":1624643509523,"user_tz":-330,"elapsed":19203,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"6cefe5b4-4a7e-4965-88ff-977f2be88ceb"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"7akUl9KLBilu","executionInfo":{"status":"ok","timestamp":1624643580587,"user_tz":-330,"elapsed":726,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import pandas as pd\n","SVM = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/SVM_char_1-6_gram_kanada_64.csv')\n","RF = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/RF_char_1-6_gram_Kanada_63.csv')\n","LR = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/LR_char_1-6_gram_Kanda_66.csv')"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"id":"giZvKKReCGFd","executionInfo":{"status":"ok","timestamp":1624643580587,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["predictions_test_submission=(SVM+RF+LR)/3"],"execution_count":3,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"pwxeTE5ZC6y3","executionInfo":{"status":"ok","timestamp":1624643581267,"user_tz":-330,"elapsed":18,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"db634182-7af9-4c08-d6dd-193748b4e9ff"},"source":["predictions_test_submission"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Mixed feelingsNegativePositivenot-Kannadaunknown state
00.0774950.1976420.6546230.0045610.065680
10.0238960.0088630.6600680.1832410.123932
20.0668250.0339830.3002890.3361930.262711
30.0047950.0004790.1444800.8396620.010585
40.0370830.0337710.7366470.1618070.030693
..................
7630.0894780.1983190.3388110.2024160.170976
7640.0963020.6196560.2267300.0268540.030458
7650.0285950.1588530.8038930.0010300.007630
7660.1143380.2025690.6567950.0046180.021679
7670.1526700.3122350.4444140.0306630.060018
\n","

768 rows × 5 columns

\n","
"],"text/plain":[" Mixed feelings Negative Positive not-Kannada unknown state\n","0 0.077495 0.197642 0.654623 0.004561 0.065680\n","1 0.023896 0.008863 0.660068 0.183241 0.123932\n","2 0.066825 0.033983 0.300289 0.336193 0.262711\n","3 0.004795 0.000479 0.144480 0.839662 0.010585\n","4 0.037083 0.033771 0.736647 0.161807 0.030693\n",".. ... ... ... ... ...\n","763 0.089478 0.198319 0.338811 0.202416 0.170976\n","764 0.096302 0.619656 0.226730 0.026854 0.030458\n","765 0.028595 0.158853 0.803893 0.001030 0.007630\n","766 0.114338 0.202569 0.656795 0.004618 0.021679\n","767 0.152670 0.312235 0.444414 0.030663 0.060018\n","\n","[768 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"yx6tspZ0CJsv","executionInfo":{"status":"ok","timestamp":1624643585135,"user_tz":-330,"elapsed":2,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import numpy as np\n","predictions_test_submission = np.asarray(predictions_test_submission)"],"execution_count":5,"outputs":[]},{"cell_type":"code","metadata":{"id":"yhvtEGcuCMKO","executionInfo":{"status":"ok","timestamp":1624643585594,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import numpy as np\n","predictions_test_submission1 = np.zeros_like(predictions_test_submission)\n","predictions_test_submission1[np.arange(len(predictions_test_submission)), predictions_test_submission.argmax(1)] = 1"],"execution_count":6,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UFz5rdWVDil_","executionInfo":{"status":"ok","timestamp":1624643585595,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"61c29499-5542-43ca-89d3-b854d76c7c26"},"source":["predictions_test_submission1"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[0., 0., 1., 0., 0.],\n"," [0., 0., 1., 0., 0.],\n"," [0., 0., 0., 1., 0.],\n"," ...,\n"," [0., 0., 1., 0., 0.],\n"," [0., 0., 1., 0., 0.],\n"," [0., 0., 1., 0., 0.]])"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"EcEP5VhyDktU","executionInfo":{"status":"ok","timestamp":1624643587508,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["predictions_test_submission1_final = np.argmax(predictions_test_submission1, axis=1)"],"execution_count":8,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"z_-UdPnAFOfo","executionInfo":{"status":"ok","timestamp":1624643587972,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"b0202367-a8ca-4ed5-9e01-cfea7b07fb7a"},"source":["predictions_test_submission1_final"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([2, 2, 3, 3, 2, 3, 2, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 1, 2, 2, 2,\n"," 2, 2, 3, 2, 3, 2, 0, 1, 2, 3, 3, 4, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2,\n"," 2, 3, 4, 2, 2, 2, 1, 3, 2, 2, 4, 2, 2, 1, 2, 3, 2, 2, 4, 2, 2, 4,\n"," 2, 1, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2,\n"," 2, 3, 1, 1, 2, 2, 1, 2, 1, 3, 2, 4, 3, 1, 2, 1, 4, 4, 2, 1, 1, 2,\n"," 2, 2, 2, 3, 2, 1, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1, 2, 1, 3, 2,\n"," 2, 2, 2, 2, 2, 1, 3, 2, 3, 1, 2, 3, 2, 2, 2, 2, 4, 1, 2, 1, 2, 2,\n"," 2, 1, 2, 0, 4, 2, 2, 1, 1, 3, 4, 2, 4, 2, 2, 1, 1, 1, 2, 2, 2, 2,\n"," 2, 1, 4, 2, 3, 2, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, 2, 4, 0,\n"," 2, 1, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 3, 4,\n"," 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 4, 1, 2, 2, 1, 1, 3, 2, 2, 2, 2,\n"," 2, 2, 2, 2, 2, 2, 1, 1, 1, 3, 1, 4, 2, 2, 2, 4, 1, 2, 3, 2, 4, 4,\n"," 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 2, 3, 2, 2, 2, 3, 2, 1,\n"," 2, 2, 2, 2, 3, 3, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 3, 1, 2, 4, 2, 1,\n"," 1, 2, 4, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 3, 2, 1, 2, 1, 2, 2, 4, 2,\n"," 3, 2, 2, 2, 4, 3, 2, 2, 2, 2, 1, 3, 3, 2, 1, 3, 1, 2, 2, 2, 2, 2,\n"," 2, 2, 4, 2, 1, 2, 2, 3, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 3, 1, 2,\n"," 2, 2, 2, 2, 2, 2, 3, 0, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n"," 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 4, 2, 3, 4, 3, 2,\n"," 2, 2, 2, 3, 2, 3, 2, 2, 3, 3, 2, 1, 3, 4, 2, 2, 3, 2, 1, 4, 1, 2,\n"," 2, 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 3, 2, 3, 4, 2, 2, 2, 1,\n"," 2, 2, 4, 1, 3, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 0, 2, 2, 1,\n"," 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 3, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2,\n"," 1, 2, 2, 2, 2, 2, 2, 3, 2, 4, 2, 1, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n"," 1, 2, 2, 2, 2, 4, 1, 2, 2, 1, 3, 2, 1, 2, 2, 2, 2, 2, 4, 3, 2, 2,\n"," 2, 4, 2, 2, 1, 1, 4, 1, 2, 3, 2, 3, 1, 1, 2, 3, 2, 2, 3, 1, 2, 2,\n"," 1, 2, 2, 2, 2, 1, 2, 2, 3, 3, 1, 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 1,\n"," 2, 3, 2, 2, 3, 2, 1, 4, 2, 3, 2, 2, 3, 2, 0, 3, 2, 2, 3, 3, 2, 3,\n"," 2, 3, 4, 2, 2, 2, 2, 3, 2, 2, 2, 2, 4, 2, 2, 1, 2, 2, 3, 3, 2, 3,\n"," 3, 1, 1, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 3, 4, 2, 2, 2, 2, 1, 2, 1,\n"," 2, 2, 2, 1, 2, 2, 3, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 1, 3,\n"," 2, 3, 2, 3, 2, 2, 3, 1, 2, 3, 2, 2, 4, 2, 3, 4, 2, 2, 2, 2, 2, 2,\n"," 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 3, 2, 2, 1, 2, 1, 1, 2,\n"," 1, 1, 3, 2, 2, 2, 2, 2, 2, 2, 3, 1, 2, 2, 1, 4, 1, 2, 1, 3, 2, 2,\n"," 2, 2, 2, 1, 3, 1, 2, 2, 3, 4, 4, 2, 2, 2, 1, 2, 1, 2, 2, 2])"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"id":"yj7AcwqbmTKP","executionInfo":{"status":"ok","timestamp":1624643635383,"user_tz":-330,"elapsed":426,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_train.tsv', sep = '\\t')\n","data_val = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_test_withoutlabels.tsv', sep = '\\t')"],"execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZnuYD4aqTkWk","executionInfo":{"status":"ok","timestamp":1624643638171,"user_tz":-330,"elapsed":1185,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"768009b9-243a-43f8-e6aa-c8beff8adf8c"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_val_index = le.transform(data_val['category'])\n"],"execution_count":11,"outputs":[{"output_type":"stream","text":["['Mixed feelings' 'Negative' 'Positive' 'not-Kannada' 'unknown state']\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o5T7yAcFFpfm","executionInfo":{"status":"ok","timestamp":1624643649647,"user_tz":-330,"elapsed":490,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["predictions_test_submission1_final1 = le.inverse_transform(predictions_test_submission1_final)"],"execution_count":12,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PSSBXvkTFze-","executionInfo":{"status":"ok","timestamp":1624643651281,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"0eaee799-d6e0-40db-c3f5-32d499664a11"},"source":["predictions_test_submission1_final1"],"execution_count":13,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['Positive', 'Positive', 'not-Kannada', 'not-Kannada', 'Positive',\n"," 'not-Kannada', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Positive', 'not-Kannada', 'Positive', 'Mixed feelings',\n"," 'Negative', 'Positive', 'not-Kannada', 'not-Kannada',\n"," 'unknown state', 'Negative', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'not-Kannada', 'unknown state', 'Positive',\n"," 'Positive', 'Positive', 'Negative', 'not-Kannada', 'Positive',\n"," 'Positive', 'unknown state', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'unknown state',\n"," 'Positive', 'Positive', 'unknown state', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Negative', 'Negative', 'Positive',\n"," 'Positive', 'not-Kannada', 'Negative', 'Negative', 'Positive',\n"," 'Positive', 'Negative', 'Positive', 'Negative', 'not-Kannada',\n"," 'Positive', 'unknown state', 'not-Kannada', 'Negative', 'Positive',\n"," 'Negative', 'unknown state', 'unknown state', 'Positive',\n"," 'Negative', 'Negative', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'not-Kannada', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'not-Kannada',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'Negative', 'Positive', 'Negative', 'not-Kannada', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'not-Kannada', 'Positive', 'not-Kannada', 'Negative',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'unknown state', 'Negative', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Positive', 'Negative', 'Positive',\n"," 'Mixed feelings', 'unknown state', 'Positive', 'Positive',\n"," 'Negative', 'Negative', 'not-Kannada', 'unknown state', 'Positive',\n"," 'unknown state', 'Positive', 'Positive', 'Negative', 'Negative',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Negative', 'unknown state', 'Positive', 'not-Kannada',\n"," 'Positive', 'not-Kannada', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'not-Kannada', 'unknown state', 'Positive',\n"," 'unknown state', 'Mixed feelings', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'not-Kannada',\n"," 'not-Kannada', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Positive', 'Negative', 'Positive', 'not-Kannada', 'unknown state',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Negative', 'Positive', 'Negative',\n"," 'Positive', 'unknown state', 'Negative', 'Positive', 'Positive',\n"," 'Negative', 'Negative', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Negative', 'Negative',\n"," 'Negative', 'not-Kannada', 'Negative', 'unknown state', 'Positive',\n"," 'Positive', 'Positive', 'unknown state', 'Negative', 'Positive',\n"," 'not-Kannada', 'Positive', 'unknown state', 'unknown state',\n"," 'Positive', 'Positive', 'Negative', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'Negative', 'Negative', 'Negative', 'Positive',\n"," 'not-Kannada', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'not-Kannada', 'not-Kannada', 'Negative', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'Negative',\n"," 'Positive', 'unknown state', 'Positive', 'Negative', 'Negative',\n"," 'Positive', 'unknown state', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'not-Kannada', 'Positive', 'Negative',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'unknown state',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'unknown state', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Negative', 'not-Kannada', 'not-Kannada', 'Positive',\n"," 'Negative', 'not-Kannada', 'Negative', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'unknown state', 'Positive', 'Negative', 'Positive', 'Positive',\n"," 'not-Kannada', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Negative', 'not-Kannada', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'not-Kannada', 'Mixed feelings', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'not-Kannada', 'Positive', 'Positive', 'Positive', 'unknown state',\n"," 'Positive', 'not-Kannada', 'unknown state', 'not-Kannada',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'not-Kannada',\n"," 'not-Kannada', 'Positive', 'Negative', 'not-Kannada',\n"," 'unknown state', 'Positive', 'Positive', 'not-Kannada', 'Positive',\n"," 'Negative', 'unknown state', 'Negative', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Negative', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'Positive',\n"," 'not-Kannada', 'unknown state', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'unknown state', 'Negative',\n"," 'not-Kannada', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'Mixed feelings',\n"," 'Positive', 'Positive', 'Negative', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Negative', 'Negative', 'Positive',\n"," 'Positive', 'Negative', 'Negative', 'Positive', 'not-Kannada',\n"," 'Positive', 'Positive', 'Negative', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'not-Kannada', 'Positive', 'unknown state',\n"," 'Positive', 'Negative', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Negative', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'unknown state', 'Negative', 'Positive',\n"," 'Positive', 'Negative', 'not-Kannada', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'unknown state', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'unknown state', 'Positive', 'Positive', 'Negative', 'Negative',\n"," 'unknown state', 'Negative', 'Positive', 'not-Kannada', 'Positive',\n"," 'not-Kannada', 'Negative', 'Negative', 'Positive', 'not-Kannada',\n"," 'Positive', 'Positive', 'not-Kannada', 'Negative', 'Positive',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'not-Kannada',\n"," 'not-Kannada', 'Negative', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'Positive',\n"," 'Negative', 'Positive', 'Negative', 'Positive', 'not-Kannada',\n"," 'Positive', 'Positive', 'not-Kannada', 'Positive', 'Negative',\n"," 'unknown state', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'not-Kannada', 'Positive', 'Mixed feelings', 'not-Kannada',\n"," 'Positive', 'Positive', 'not-Kannada', 'not-Kannada', 'Positive',\n"," 'not-Kannada', 'Positive', 'not-Kannada', 'unknown state',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'unknown state',\n"," 'Positive', 'Positive', 'Negative', 'Positive', 'Positive',\n"," 'not-Kannada', 'not-Kannada', 'Positive', 'not-Kannada',\n"," 'not-Kannada', 'Negative', 'Negative', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'unknown state',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'not-Kannada', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Negative', 'not-Kannada', 'Positive',\n"," 'not-Kannada', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'not-Kannada', 'Negative', 'Positive', 'not-Kannada', 'Positive',\n"," 'Positive', 'unknown state', 'Positive', 'not-Kannada',\n"," 'unknown state', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Negative', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'Negative', 'Negative', 'Positive', 'Negative',\n"," 'Negative', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Negative', 'Positive', 'Positive', 'Negative', 'unknown state',\n"," 'Negative', 'Positive', 'Negative', 'not-Kannada', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'not-Kannada', 'Negative', 'Positive', 'Positive', 'not-Kannada',\n"," 'unknown state', 'unknown state', 'Positive', 'Positive',\n"," 'Positive', 'Negative', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Positive'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"id":"otag6te-F1It","executionInfo":{"status":"ok","timestamp":1624643669153,"user_tz":-330,"elapsed":657,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["data_test['category'] = predictions_test_submission1_final1"],"execution_count":14,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"5ozZTPDwGiSv","executionInfo":{"status":"ok","timestamp":1624643671972,"user_tz":-330,"elapsed":9,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"dfb7c7d5-5638-4890-a17e-cb4892e618f4"},"source":["data_test"],"execution_count":15,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Kan_1ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ...Positive
1Kan_2Jai D BossPositive
2Kan_3Signature movenot-Kannada
3Kan_4Super song bronot-Kannada
4Kan_5Wow Super agi helidira sirPositive
............
763Kan_764Thu thukali trailerPositive
764Kan_765Siri gannadam galge haakbitallapa Thu yaro ni...Negative
765Kan_766ನಾವೇನಾದರು ಸ್ವಾಬಿಮಾನಿಯಾಗಿ ಬದುಕನ್ನು ಕಟ್ಟಿಕೊಳ್ಳಲು...Positive
766Kan_767ದಿಯಾ ನೀವು ಸೋಷಿಯಲ್ ಮೀಡಿಯಾ ಅಲ್ಲಿ ಕಾಣ್ತಿಲ್ಲ.nದಯವಿ...Positive
767Kan_768magaluru kade kalsi avanannu .navu avanige mad...Positive
\n","

768 rows × 3 columns

\n","
"],"text/plain":[" id text category\n","0 Kan_1 ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ... Positive\n","1 Kan_2 Jai D Boss Positive\n","2 Kan_3 Signature move not-Kannada\n","3 Kan_4 Super song bro not-Kannada\n","4 Kan_5 Wow Super agi helidira sir Positive\n",".. ... ... ...\n","763 Kan_764 Thu thukali trailer Positive\n","764 Kan_765 Siri gannadam galge haakbitallapa Thu yaro ni... Negative\n","765 Kan_766 ನಾವೇನಾದರು ಸ್ವಾಬಿಮಾನಿಯಾಗಿ ಬದುಕನ್ನು ಕಟ್ಟಿಕೊಳ್ಳಲು... Positive\n","766 Kan_767 ದಿಯಾ ನೀವು ಸೋಷಿಯಲ್ ಮೀಡಿಯಾ ಅಲ್ಲಿ ಕಾಣ್ತಿಲ್ಲ.nದಯವಿ... Positive\n","767 Kan_768 magaluru kade kalsi avanannu .navu avanige mad... Positive\n","\n","[768 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"id":"Jt6Z80uFGjsD","executionInfo":{"status":"ok","timestamp":1624643733058,"user_tz":-330,"elapsed":2,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["data_test.to_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/Ensemble_SVM_LR_RF.tsv\", sep=\"\\t\", index=False)"],"execution_count":16,"outputs":[]},{"cell_type":"code","metadata":{"id":"11fdDMTpG8QS","executionInfo":{"status":"ok","timestamp":1624643752467,"user_tz":-330,"elapsed":950,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["check_submission = pd.read_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/Ensemble_SVM_LR_RF.tsv\", sep=\"\\t\")"],"execution_count":17,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"uwyh-IqGG8SG","executionInfo":{"status":"ok","timestamp":1624643754211,"user_tz":-330,"elapsed":6,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"c567c4f7-cf85-4b53-e177-a56ab2f4b33a"},"source":["check_submission"],"execution_count":18,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Kan_1ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ...Positive
1Kan_2Jai D BossPositive
2Kan_3Signature movenot-Kannada
3Kan_4Super song bronot-Kannada
4Kan_5Wow Super agi helidira sirPositive
............
763Kan_764Thu thukali trailerPositive
764Kan_765Siri gannadam galge haakbitallapa Thu yaro ni...Negative
765Kan_766ನಾವೇನಾದರು ಸ್ವಾಬಿಮಾನಿಯಾಗಿ ಬದುಕನ್ನು ಕಟ್ಟಿಕೊಳ್ಳಲು...Positive
766Kan_767ದಿಯಾ ನೀವು ಸೋಷಿಯಲ್ ಮೀಡಿಯಾ ಅಲ್ಲಿ ಕಾಣ್ತಿಲ್ಲ.nದಯವಿ...Positive
767Kan_768magaluru kade kalsi avanannu .navu avanige mad...Positive
\n","

768 rows × 3 columns

\n","
"],"text/plain":[" id text category\n","0 Kan_1 ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ... Positive\n","1 Kan_2 Jai D Boss Positive\n","2 Kan_3 Signature move not-Kannada\n","3 Kan_4 Super song bro not-Kannada\n","4 Kan_5 Wow Super agi helidira sir Positive\n",".. ... ... ...\n","763 Kan_764 Thu thukali trailer Positive\n","764 Kan_765 Siri gannadam galge haakbitallapa Thu yaro ni... Negative\n","765 Kan_766 ನಾವೇನಾದರು ಸ್ವಾಬಿಮಾನಿಯಾಗಿ ಬದುಕನ್ನು ಕಟ್ಟಿಕೊಳ್ಳಲು... Positive\n","766 Kan_767 ದಿಯಾ ನೀವು ಸೋಷಿಯಲ್ ಮೀಡಿಯಾ ಅಲ್ಲಿ ಕಾಣ್ತಿಲ್ಲ.nದಯವಿ... Positive\n","767 Kan_768 magaluru kade kalsi avanannu .navu avanige mad... Positive\n","\n","[768 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":18}]},{"cell_type":"code","metadata":{"id":"MIEN8srCnBlP"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/Ensemble_SVM_LR_TAMIL.ipynb b/Ensemble_SVM_LR_TAMIL.ipynb new file mode 100644 index 0000000..ec846e4 --- /dev/null +++ b/Ensemble_SVM_LR_TAMIL.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Ensemble_SVM_LR_TAMIL.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NmxNxMi5TECW","executionInfo":{"status":"ok","timestamp":1624727247942,"user_tz":-330,"elapsed":32028,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"8b9e58f1-38e9-4a77-d019-376842937dbd"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"7akUl9KLBilu","executionInfo":{"status":"ok","timestamp":1624727303639,"user_tz":-330,"elapsed":1090,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["import pandas as pd\n","SVM = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/SVM_char_1-6_gram_TAMIL_60.csv')\n","LR = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/LR_char_1-6_gram_TAMIL_60.csv')"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"id":"giZvKKReCGFd","executionInfo":{"status":"ok","timestamp":1624727316880,"user_tz":-330,"elapsed":481,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["predictions_test_submission=(SVM+LR)/2"],"execution_count":3,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"pwxeTE5ZC6y3","executionInfo":{"status":"ok","timestamp":1624727317337,"user_tz":-330,"elapsed":8,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"60d3d4aa-3e83-4270-8703-549046f79c21"},"source":["predictions_test_submission"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Mixed_feelingsNegativePositivenot-Tamilunknown_state
00.0272270.0056860.9228720.0020070.042209
10.1111000.4542580.3581380.0179900.058514
20.0249650.0455580.8540310.0023350.073111
30.1157170.0816070.5685920.1354850.098599
40.0714290.0701230.0805850.6531910.124671
..................
43970.1248220.6527520.1575180.0051160.059792
43980.1150320.0857330.7218800.0087460.068609
43990.0385340.0322520.9176040.0022800.009329
44000.2734350.0768230.5613150.0045620.083865
44010.0988150.0314360.8164310.0048270.048490
\n","

4402 rows × 5 columns

\n","
"],"text/plain":[" Mixed_feelings Negative Positive not-Tamil unknown_state\n","0 0.027227 0.005686 0.922872 0.002007 0.042209\n","1 0.111100 0.454258 0.358138 0.017990 0.058514\n","2 0.024965 0.045558 0.854031 0.002335 0.073111\n","3 0.115717 0.081607 0.568592 0.135485 0.098599\n","4 0.071429 0.070123 0.080585 0.653191 0.124671\n","... ... ... ... ... ...\n","4397 0.124822 0.652752 0.157518 0.005116 0.059792\n","4398 0.115032 0.085733 0.721880 0.008746 0.068609\n","4399 0.038534 0.032252 0.917604 0.002280 0.009329\n","4400 0.273435 0.076823 0.561315 0.004562 0.083865\n","4401 0.098815 0.031436 0.816431 0.004827 0.048490\n","\n","[4402 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"yx6tspZ0CJsv","executionInfo":{"status":"ok","timestamp":1624727321084,"user_tz":-330,"elapsed":654,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["import numpy as np\n","predictions_test_submission = np.asarray(predictions_test_submission)"],"execution_count":5,"outputs":[]},{"cell_type":"code","metadata":{"id":"yhvtEGcuCMKO","executionInfo":{"status":"ok","timestamp":1624727321085,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["import numpy as np\n","predictions_test_submission1 = np.zeros_like(predictions_test_submission)\n","predictions_test_submission1[np.arange(len(predictions_test_submission)), predictions_test_submission.argmax(1)] = 1"],"execution_count":6,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UFz5rdWVDil_","executionInfo":{"status":"ok","timestamp":1624727321498,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"ede28170-0148-4175-9d45-8f26c9409a15"},"source":["predictions_test_submission1"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[0., 0., 1., 0., 0.],\n"," [0., 1., 0., 0., 0.],\n"," [0., 0., 1., 0., 0.],\n"," ...,\n"," [0., 0., 1., 0., 0.],\n"," [0., 0., 1., 0., 0.],\n"," [0., 0., 1., 0., 0.]])"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"EcEP5VhyDktU","executionInfo":{"status":"ok","timestamp":1624727322570,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["predictions_test_submission1_final = np.argmax(predictions_test_submission1, axis=1)"],"execution_count":8,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"z_-UdPnAFOfo","executionInfo":{"status":"ok","timestamp":1624727324845,"user_tz":-330,"elapsed":6,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"f3d082d8-5071-478e-bac3-be463a89ba87"},"source":["predictions_test_submission1_final"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([2, 1, 2, ..., 2, 2, 2])"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"id":"yj7AcwqbmTKP","executionInfo":{"status":"ok","timestamp":1624727377674,"user_tz":-330,"elapsed":3182,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_train.tsv', sep = '\\t')\n","data_dev = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_test_withoutlabels.tsv', sep = '\\t')\n"],"execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"id":"BbWLut5TS_Hf","executionInfo":{"status":"ok","timestamp":1624727377675,"user_tz":-330,"elapsed":6,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["data_train['category'] = data_train['category'].replace('Positive ', 'Positive')"],"execution_count":11,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JDQ4yP2CS_Jo","executionInfo":{"status":"ok","timestamp":1624727377675,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"818f5be5-c653-4d8d-9f8c-646aec72c034"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_dev_index = le.transform(data_dev['category'])"],"execution_count":12,"outputs":[{"output_type":"stream","text":["['Mixed_feelings' 'Negative' 'Positive' 'not-Tamil' 'unknown_state']\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o5T7yAcFFpfm","executionInfo":{"status":"ok","timestamp":1624727385401,"user_tz":-330,"elapsed":602,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["predictions_test_submission1_final1 = le.inverse_transform(predictions_test_submission1_final)"],"execution_count":13,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PSSBXvkTFze-","executionInfo":{"status":"ok","timestamp":1624727385862,"user_tz":-330,"elapsed":8,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"b8c4e7df-4803-4f30-b795-6e949a5ded81"},"source":["predictions_test_submission1_final1"],"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['Positive', 'Negative', 'Positive', ..., 'Positive', 'Positive',\n"," 'Positive'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"code","metadata":{"id":"otag6te-F1It","executionInfo":{"status":"ok","timestamp":1624727389243,"user_tz":-330,"elapsed":494,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["data_test['category'] = predictions_test_submission1_final1"],"execution_count":15,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"5ozZTPDwGiSv","executionInfo":{"status":"ok","timestamp":1624727391557,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"5bad1970-1449-499d-de33-1a3a9d9dbccd"},"source":["data_test"],"execution_count":16,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Tam_1வீர செங்குந்தர் சார்பாக இந்த திரைப்படம் வெற்றி...Positive
1Tam_2Teruk ah irukku .... mokke movie .. waste of timeNegative
2Tam_3manitha samuthaayam amaipil irunthu intha pada...Positive
3Tam_4JJ mam we miss uPositive
4Tam_5Subtitle me traller dekhne wale like karonot-Tamil
............
4397Tam_4398Ithukum dislike potta kammanattti koovaingalam...Negative
4398Tam_4399Suyama Sinthikiravan than super Hero Seama dia...Positive
4399Tam_4400Super thalaiva.... Nee mass dha eppavumePositive
4400Tam_4401பெண்ணை அடிமையாக்க நினைக்கும் இந்த படம் தோல்வித...Positive
4401Tam_4402Semma thalaiva alu athikama akirukum enimale e...Positive
\n","

4402 rows × 3 columns

\n","
"],"text/plain":[" id text category\n","0 Tam_1 வீர செங்குந்தர் சார்பாக இந்த திரைப்படம் வெற்றி... Positive\n","1 Tam_2 Teruk ah irukku .... mokke movie .. waste of time Negative\n","2 Tam_3 manitha samuthaayam amaipil irunthu intha pada... Positive\n","3 Tam_4 JJ mam we miss u Positive\n","4 Tam_5 Subtitle me traller dekhne wale like karo not-Tamil\n","... ... ... ...\n","4397 Tam_4398 Ithukum dislike potta kammanattti koovaingalam... Negative\n","4398 Tam_4399 Suyama Sinthikiravan than super Hero Seama dia... Positive\n","4399 Tam_4400 Super thalaiva.... Nee mass dha eppavume Positive\n","4400 Tam_4401 பெண்ணை அடிமையாக்க நினைக்கும் இந்த படம் தோல்வித... Positive\n","4401 Tam_4402 Semma thalaiva alu athikama akirukum enimale e... Positive\n","\n","[4402 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":16}]},{"cell_type":"code","metadata":{"id":"Jt6Z80uFGjsD","executionInfo":{"status":"ok","timestamp":1624727424501,"user_tz":-330,"elapsed":697,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["data_test.to_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/Ensemble_SVM_LR_TAMIL.tsv\", sep=\"\\t\", index=False)"],"execution_count":17,"outputs":[]},{"cell_type":"code","metadata":{"id":"11fdDMTpG8QS","executionInfo":{"status":"ok","timestamp":1624727435532,"user_tz":-330,"elapsed":692,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["check_submission = pd.read_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/Ensemble_SVM_LR_TAMIL.tsv\", sep=\"\\t\")"],"execution_count":18,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"uwyh-IqGG8SG","executionInfo":{"status":"ok","timestamp":1624727435533,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"187dc68d-0639-47cc-ed40-a20eb171e3e9"},"source":["check_submission"],"execution_count":19,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Tam_1வீர செங்குந்தர் சார்பாக இந்த திரைப்படம் வெற்றி...Positive
1Tam_2Teruk ah irukku .... mokke movie .. waste of timeNegative
2Tam_3manitha samuthaayam amaipil irunthu intha pada...Positive
3Tam_4JJ mam we miss uPositive
4Tam_5Subtitle me traller dekhne wale like karonot-Tamil
............
4397Tam_4398Ithukum dislike potta kammanattti koovaingalam...Negative
4398Tam_4399Suyama Sinthikiravan than super Hero Seama dia...Positive
4399Tam_4400Super thalaiva.... Nee mass dha eppavumePositive
4400Tam_4401பெண்ணை அடிமையாக்க நினைக்கும் இந்த படம் தோல்வித...Positive
4401Tam_4402Semma thalaiva alu athikama akirukum enimale e...Positive
\n","

4402 rows × 3 columns

\n","
"],"text/plain":[" id text category\n","0 Tam_1 வீர செங்குந்தர் சார்பாக இந்த திரைப்படம் வெற்றி... Positive\n","1 Tam_2 Teruk ah irukku .... mokke movie .. waste of time Negative\n","2 Tam_3 manitha samuthaayam amaipil irunthu intha pada... Positive\n","3 Tam_4 JJ mam we miss u Positive\n","4 Tam_5 Subtitle me traller dekhne wale like karo not-Tamil\n","... ... ... ...\n","4397 Tam_4398 Ithukum dislike potta kammanattti koovaingalam... Negative\n","4398 Tam_4399 Suyama Sinthikiravan than super Hero Seama dia... Positive\n","4399 Tam_4400 Super thalaiva.... Nee mass dha eppavume Positive\n","4400 Tam_4401 பெண்ணை அடிமையாக்க நினைக்கும் இந்த படம் தோல்வித... Positive\n","4401 Tam_4402 Semma thalaiva alu athikama akirukum enimale e... Positive\n","\n","[4402 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":19}]},{"cell_type":"code","metadata":{"id":"MIEN8srCnBlP"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/ML_Classifier_Kannada_code_mixed.ipynb b/ML_Classifier_Kannada_code_mixed.ipynb new file mode 100644 index 0000000..1174ae5 --- /dev/null +++ b/ML_Classifier_Kannada_code_mixed.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ML_Classifier_Kannada_code_mixed.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"O1S8SYrnMTNI","executionInfo":{"status":"ok","timestamp":1624642719333,"user_tz":-330,"elapsed":630,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"b28b0e18-9903-45cc-df47-3ec230c986aa"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":32,"outputs":[{"output_type":"stream","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"57R5J3_1OfiA","executionInfo":{"status":"ok","timestamp":1624642720014,"user_tz":-330,"elapsed":684,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_train.tsv', sep = '\\t')\n","data_val = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_test_withoutlabels.tsv', sep = '\\t')"],"execution_count":33,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"aIWUjapcPoVJ","executionInfo":{"status":"ok","timestamp":1624642720015,"user_tz":-330,"elapsed":37,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"3fa6db7b-785b-4d7e-9ae4-bf7e2c54dd2c"},"source":["data_train.head()"],"execution_count":34,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory
0ಒಂದು ದೇಶದ ಮುಂದುವರಿಯುವುದು ಅದರ ಆರ್ಥಿಕ ಸ್ಥಿತಿಯನ್ನ...Negative
1ಕನ್ನಡದಲ್ಲಿ ಡೈಲಿ ಟೆಕ್ ಅಪ್ಡೇಟ್ಸ್ ಪಡೆಯಲು ಸಬ್ಸ್ಕ್ರ...Positive
2Super sar songnot-Kannada
3Tiktokers present situation... nನೋಡುವವರು ಯಾರು ...Negative
4Super ಸಾಂಗ್ ವೆರಿ ನೈಸ್....Positive
\n","
"],"text/plain":[" text category\n","0 ಒಂದು ದೇಶದ ಮುಂದುವರಿಯುವುದು ಅದರ ಆರ್ಥಿಕ ಸ್ಥಿತಿಯನ್ನ... Negative\n","1 ಕನ್ನಡದಲ್ಲಿ ಡೈಲಿ ಟೆಕ್ ಅಪ್ಡೇಟ್ಸ್ ಪಡೆಯಲು ಸಬ್ಸ್ಕ್ರ... Positive\n","2 Super sar song not-Kannada\n","3 Tiktokers present situation... nನೋಡುವವರು ಯಾರು ... Negative\n","4 Super ಸಾಂಗ್ ವೆರಿ ನೈಸ್.... Positive"]},"metadata":{"tags":[]},"execution_count":34}]},{"cell_type":"code","metadata":{"id":"0vk8YlBibEYt","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1624642720016,"user_tz":-330,"elapsed":34,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"c19c69f5-b0b3-4b1d-b0a7-6f8a62ef14f3"},"source":["data_val.head()"],"execution_count":35,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory
0Binduge saryagi ugithidira good go ahead we a...Mixed feelings
1yen song guru ...superPositive
2my fevorat storynot-Kannada
3Super ತೋಗರಿ ತೀಪ್ಪPositive
4ನಿಮ್ಮ ಮಾತುಗಳು ಅಕ್ಷರಶಃ ಸತ್ಯ... ನಿಮ್ಮ ಈ ಸಾಮಾನ್ಯ ...Positive
\n","
"],"text/plain":[" text category\n","0 Binduge saryagi ugithidira good go ahead we a... Mixed feelings\n","1 yen song guru ...super Positive\n","2 my fevorat story not-Kannada\n","3 Super ತೋಗರಿ ತೀಪ್ಪ Positive\n","4 ನಿಮ್ಮ ಮಾತುಗಳು ಅಕ್ಷರಶಃ ಸತ್ಯ... ನಿಮ್ಮ ಈ ಸಾಮಾನ್ಯ ... Positive"]},"metadata":{"tags":[]},"execution_count":35}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"7jeRfw0XPqe5","executionInfo":{"status":"ok","timestamp":1624642720016,"user_tz":-330,"elapsed":33,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"134a1bd1-9d1d-4a87-8f3e-dfe9b73d244e"},"source":["data_test.head()"],"execution_count":36,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtext
0Kan_1ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ...
1Kan_2Jai D Boss
2Kan_3Signature move
3Kan_4Super song bro
4Kan_5Wow Super agi helidira sir
\n","
"],"text/plain":[" id text\n","0 Kan_1 ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ...\n","1 Kan_2 Jai D Boss\n","2 Kan_3 Signature move\n","3 Kan_4 Super song bro\n","4 Kan_5 Wow Super agi helidira sir"]},"metadata":{"tags":[]},"execution_count":36}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tMXviBypwXb0","executionInfo":{"status":"ok","timestamp":1624642720017,"user_tz":-330,"elapsed":28,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"9c5d4144-927a-4595-d5ec-ab1a0ebfead5"},"source":["data_train.shape, data_val.shape, data_test.shape"],"execution_count":37,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((6212, 2), (691, 2), (768, 2))"]},"metadata":{"tags":[]},"execution_count":37}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Cr674jk8wyck","executionInfo":{"status":"ok","timestamp":1624642720019,"user_tz":-330,"elapsed":25,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"6ecb97df-ec48-4bc6-c0f8-5c7c969e478e"},"source":["data_train['category'].value_counts()"],"execution_count":38,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 2823\n","Negative 1188\n","not-Kannada 916\n","unknown state 711\n","Mixed feelings 574\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":38}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tREo3vWbNLQA","executionInfo":{"status":"ok","timestamp":1624642720020,"user_tz":-330,"elapsed":24,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"ae349996-5a84-440c-8c73-0f1a8f89fc44"},"source":["data_val['category'].value_counts()"],"execution_count":39,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 321\n","Negative 139\n","not-Kannada 110\n","unknown state 69\n","Mixed feelings 52\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":39}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"zX-6Saf7QPh7","executionInfo":{"status":"ok","timestamp":1624642720021,"user_tz":-330,"elapsed":22,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"e421c6bb-8c4a-4d13-dda0-44e4826e9ca6"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_val_index = le.transform(data_val['category'])\n"],"execution_count":40,"outputs":[{"output_type":"stream","text":["['Mixed feelings' 'Negative' 'Positive' 'not-Kannada' 'unknown state']\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mY8Wqps9WAMq"},"source":["For word = ngram_range (1,3)\n","For char = ngram_range (1,6)"]},{"cell_type":"code","metadata":{"id":"jqicHV-qRH4V","executionInfo":{"status":"ok","timestamp":1624642723260,"user_tz":-330,"elapsed":2669,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["from sklearn.feature_extraction.text import TfidfVectorizer\n","tfidfvec = TfidfVectorizer(ngram_range=(1,6), analyzer = 'char_wb', max_features = 50000)\n","#tfidfvec = TfidfVectorizer(ngram_range=(1,3), max_features = 30000)\n","tfidf_train = tfidfvec.fit_transform(data_train['text'])\n","tfidf_val = tfidfvec.transform(data_val['text'])"],"execution_count":41,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iV6FzGg_0leL","executionInfo":{"status":"ok","timestamp":1624642723260,"user_tz":-330,"elapsed":31,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"538c5bbd-d642-49ae-87ea-0acbd446c158"},"source":["tfidfvec.get_feature_names()"],"execution_count":42,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[' ',\n"," ' !',\n"," ' !!',\n"," ' !! ',\n"," ' !!!',\n"," ' !!! ',\n"," ' !!!!',\n"," ' #',\n"," ' # ',\n"," ' ##',\n"," ' #1',\n"," ' #1 ',\n"," ' #a',\n"," ' #as',\n"," ' #asn',\n"," ' #asn ',\n"," ' #av',\n"," ' #ava',\n"," ' #avan',\n"," ' #d',\n"," ' #k',\n"," ' #kg',\n"," ' #kgf',\n"," ' #r',\n"," ' #ra',\n"," ' #s',\n"," ' #ಕ',\n"," ' &',\n"," ' & ',\n"," \" '\",\n"," ' (',\n"," ' ( ',\n"," ' )',\n"," ' ) ',\n"," ' *',\n"," ' +',\n"," ' +s',\n"," ' +sh',\n"," ' +shi',\n"," ' +shiv',\n"," ' -',\n"," ' - ',\n"," ' .',\n"," ' . ',\n"," ' ..',\n"," ' .. ',\n"," ' ...',\n"," ' ... ',\n"," ' ....',\n"," ' .... ',\n"," ' .....',\n"," ' ....n',\n"," ' ....s',\n"," ' ...a',\n"," ' ...n',\n"," ' ..?',\n"," ' ..a',\n"," ' ..av',\n"," ' ..avr',\n"," ' ..f',\n"," ' ..h',\n"," ' ..m',\n"," ' ..ma',\n"," ' ..n',\n"," ' ..ni',\n"," ' .a',\n"," ' .c',\n"," ' .i',\n"," ' .n',\n"," ' .ನ',\n"," ' /',\n"," ' / ',\n"," ' 0',\n"," ' 1',\n"," ' 1 ',\n"," ' 1.',\n"," ' 1.8',\n"," ' 1.8k',\n"," ' 1.8k ',\n"," ' 10',\n"," ' 10 ',\n"," ' 100',\n"," ' 100 ',\n"," ' 100%',\n"," ' 100% ',\n"," ' 1000',\n"," ' 1000 ',\n"," ' 10m',\n"," ' 10m ',\n"," ' 11',\n"," ' 11 ',\n"," ' 12',\n"," ' 12 ',\n"," ' 13',\n"," ' 15',\n"," ' 15 ',\n"," ' 18',\n"," ' 18 ',\n"," ' 1s',\n"," ' 1st',\n"," ' 1st ',\n"," ' 2',\n"," ' 2 ',\n"," ' 2.',\n"," ' 2.0',\n"," ' 20',\n"," ' 20 ',\n"," ' 200',\n"," ' 200 ',\n"," ' 201',\n"," ' 2019',\n"," ' 2019 ',\n"," ' 202',\n"," ' 2020',\n"," ' 2020 ',\n"," ' 24',\n"," ' 24 ',\n"," ' 25',\n"," ' 25 ',\n"," ' 27',\n"," ' 27 ',\n"," ' 28',\n"," ' 2n',\n"," ' 2nd',\n"," ' 2nd ',\n"," ' 3',\n"," ' 3 ',\n"," ' 3.',\n"," ' 30',\n"," ' 30 ',\n"," ' 300',\n"," ' 300 ',\n"," ' 32',\n"," ' 38',\n"," ' 3r',\n"," ' 3rd',\n"," ' 3rdn',\n"," ' 4',\n"," ' 4 ',\n"," ' 42',\n"," ' 420',\n"," ' 420 ',\n"," ' 5',\n"," ' 5 ',\n"," ' 50',\n"," ' 500',\n"," ' 5000',\n"," ' 5000 ',\n"," ' 6',\n"," ' 6 ',\n"," ' 6-',\n"," ' 6-5',\n"," ' 6-5=',\n"," ' 6-5=2',\n"," ' 60',\n"," ' 63',\n"," ' 7',\n"," ' 7 ',\n"," ' 8',\n"," ' 8 ',\n"," ' 80',\n"," ' 85',\n"," ' 854',\n"," ' 8546',\n"," ' 85469',\n"," ' 9',\n"," ' 9 ',\n"," ' 97',\n"," ' 974',\n"," ' 9743',\n"," ' 97434',\n"," ' 98',\n"," ' =',\n"," ' = ',\n"," ' >',\n"," ' ?',\n"," ' ? ',\n"," ' ??',\n"," ' ?? ',\n"," ' ???',\n"," ' ??? ',\n"," ' ???n',\n"," ' @',\n"," ' @ ',\n"," ' @#',\n"," ' @#w',\n"," ' @#wi',\n"," ' @#win',\n"," ' @1',\n"," ' @10',\n"," ' @10 ',\n"," ' @a',\n"," ' @aa',\n"," ' @aad',\n"," ' @aadh',\n"," ' @ab',\n"," ' @abh',\n"," ' @abhi',\n"," ' @ai',\n"," ' @ais',\n"," ' @aisw',\n"," ' @aj',\n"," ' @aja',\n"," ' @ajay',\n"," ' @aji',\n"," ' @ajit',\n"," ' @ak',\n"," ' @akk',\n"," ' @akki',\n"," ' @aks',\n"," ' @aksh',\n"," ' @al',\n"," ' @am',\n"," ' @amb',\n"," ' @ambr',\n"," ' @an',\n"," ' @ani',\n"," ' @anil',\n"," ' @ar',\n"," ' @aru',\n"," ' @arun',\n"," ' @as',\n"," ' @ash',\n"," ' @ashw',\n"," ' @av',\n"," ' @b',\n"," ' @ba',\n"," ' @bas',\n"," ' @basa',\n"," ' @bh',\n"," ' @bha',\n"," ' @c',\n"," ' @ch',\n"," ' @cha',\n"," ' @chan',\n"," ' @che',\n"," ' @chet',\n"," ' @chn',\n"," ' @chna',\n"," ' @cr',\n"," ' @cre',\n"," ' @crea',\n"," ' @d',\n"," ' @d ',\n"," ' @da',\n"," ' @dac',\n"," ' @dacc',\n"," ' @dar',\n"," ' @dars',\n"," ' @day',\n"," ' @daya',\n"," ' @de',\n"," ' @dev',\n"," ' @devi',\n"," ' @dh',\n"," ' @dha',\n"," ' @dhan',\n"," ' @dhe',\n"," ' @dhee',\n"," ' @e',\n"," ' @en',\n"," ' @ent',\n"," ' @ente',\n"," ' @f',\n"," ' @g',\n"," ' @ga',\n"," ' @gaj',\n"," ' @gaje',\n"," ' @go',\n"," ' @gow',\n"," ' @gowt',\n"," ' @h',\n"," ' @ha',\n"," ' @hi',\n"," ' @hit',\n"," ' @i',\n"," ' @it',\n"," ' @j',\n"," ' @ja',\n"," ' @jag',\n"," ' @jaga',\n"," ' @k',\n"," ' @ka',\n"," ' @kar',\n"," ' @kart',\n"," ' @ki',\n"," ' @kic',\n"," ' @kicc',\n"," ' @kin',\n"," ' @king',\n"," ' @kir',\n"," ' @kira',\n"," ' @kr',\n"," ' @kri',\n"," ' @kris',\n"," ' @ku',\n"," ' @kum',\n"," ' @kuma',\n"," ' @kus',\n"," ' @kush',\n"," ' @l',\n"," ' @la',\n"," ' @lal',\n"," ' @m',\n"," ' @ma',\n"," ' @mad',\n"," ' @mad ',\n"," ' @mada',\n"," ' @madh',\n"," ' @mah',\n"," ' @mahe',\n"," ' @man',\n"," ' @manj',\n"," ' @mano',\n"," ' @me',\n"," ' @meg',\n"," ' @megh',\n"," ' @mo',\n"," ' @mov',\n"," ' @movi',\n"," ' @mu',\n"," ' @n',\n"," ' @na',\n"," ' @nag',\n"," ' @naga',\n"," ' @nan',\n"," ' @nand',\n"," ' @nav',\n"," ' @nave',\n"," ' @navy',\n"," ' @ni',\n"," ' @nik',\n"," ' @niki',\n"," ' @nit',\n"," ' @nith',\n"," ' @nk',\n"," ' @nkn',\n"," ' @nkn ',\n"," ' @p',\n"," ' @pa',\n"," ' @po',\n"," ' @poo',\n"," ' @pooj',\n"," ' @pr',\n"," ' @pra',\n"," ' @prad',\n"," ' @praj',\n"," ' @pras',\n"," ' @prav',\n"," ' @pri',\n"," ' @prit',\n"," ' @pu',\n"," ' @pun',\n"," ' @punt',\n"," ' @r',\n"," ' @ra',\n"," ' @rad',\n"," ' @radh',\n"," ' @rag',\n"," ' @ragh',\n"," ' @raj',\n"," ' @raj ',\n"," ' @raja',\n"," ' @raje',\n"," ' @re',\n"," ' @reg',\n"," ' @regi',\n"," ' @ro',\n"," ' @roh',\n"," ' @rohi',\n"," ' @roy',\n"," ' @roya',\n"," ' @s',\n"," ' @s ',\n"," ' @sa',\n"," ' @sac',\n"," ' @sach',\n"," ' @sag',\n"," ' @saga',\n"," ' @sam',\n"," ' @san',\n"," ' @sang',\n"," ' @sanj',\n"," ' @sat',\n"," ' @sath',\n"," ' @sati',\n"," ' @sh',\n"," ' @sha',\n"," ' @shar',\n"," ' @shas',\n"," ' @shi',\n"," ' @shiv',\n"," ' @shr',\n"," ' @si',\n"," ' @sid',\n"," ' @sidd',\n"," ' @su',\n"," ' @suj',\n"," ' @suja',\n"," ' @suji',\n"," ' @sum',\n"," ' @sumy',\n"," ' @sup',\n"," ' @supr',\n"," ' @sur',\n"," ' @sw',\n"," ' @swe',\n"," ' @swee',\n"," ' @sy',\n"," ' @sye',\n"," ' @syed',\n"," ' @sᴀ',\n"," ' @sᴀɴ',\n"," ' @sᴀɴᴅ',\n"," ' @t',\n"," ' @th',\n"," ' @thi',\n"," ' @tr',\n"," ' @tro',\n"," ' @trol',\n"," ' @u',\n"," ' @un',\n"," ' @uni',\n"," ' @unit',\n"," ' @us',\n"," ' @ush',\n"," ' @usha',\n"," ' @v',\n"," ' @va',\n"," ' @var',\n"," ' @vara',\n"," ' @vas',\n"," ' @vasa',\n"," ' @ve',\n"," ' @ven',\n"," ' @venk',\n"," ' @venu',\n"," ' @vi',\n"," ' @vin',\n"," ' @vina',\n"," ' @vino',\n"," ' @vis',\n"," ' @vish',\n"," ' @vy',\n"," ' @vyv',\n"," ' @vyv ',\n"," ' @w',\n"," ' @wi',\n"," ' @wil',\n"," ' @wild',\n"," ' @y',\n"," ' @ya',\n"," ' @yas',\n"," ' @yash',\n"," ' @ŕ',\n"," ' @ŕä',\n"," ' @ŕäv',\n"," ' @ŕävî',\n"," ' @ಕ',\n"," ' @ಕಿ',\n"," ' @ಕಿಚ',\n"," ' @ಕಿಚ್',\n"," ' _',\n"," ' __',\n"," ' a',\n"," ' a ',\n"," ' aa',\n"," ' aa ',\n"," ' aaa',\n"," ' aaa ',\n"," ' aad',\n"," ' aada',\n"," ' aadar',\n"," ' aadh',\n"," ' aadm',\n"," ' aadme',\n"," ' aadr',\n"," ' aadre',\n"," ' aadru',\n"," ' aadt',\n"," ' aadti',\n"," ' aag',\n"," ' aaga',\n"," ' aagal',\n"," ' aagb',\n"," ' aagbe',\n"," ' aage',\n"," ' aage ',\n"," ' aagi',\n"," ' aagi ',\n"," ' aagid',\n"," ' aagil',\n"," ' aagir',\n"," ' aagl',\n"," ' aagle',\n"," ' aagli',\n"," ' aago',\n"," ' aagod',\n"," ' aagt',\n"," ' aagta',\n"," ' aagth',\n"," ' aagu',\n"," ' aagut',\n"," ' aah',\n"," ' aai',\n"," ' aan',\n"," ' aap',\n"," ' aapa',\n"," ' aar',\n"," ' aas',\n"," ' aase',\n"," ' aase ',\n"," ' aat',\n"," ' aata',\n"," ' aath',\n"," ' aay',\n"," ' aayi',\n"," ' aayt',\n"," ' aayth',\n"," ' ab',\n"," ' ab ',\n"," ' abb',\n"," ' abba',\n"," ' abbb',\n"," ' abh',\n"," ' abhi',\n"," ' abhig',\n"," ' abhim',\n"," ' abhin',\n"," ' abi',\n"," ' abim',\n"," ' abima',\n"," ' abo',\n"," ' abou',\n"," ' about',\n"," ' abt',\n"," ' abt ',\n"," ' ac',\n"," ' acc',\n"," ' acch',\n"," ' accha',\n"," ' acco',\n"," ' accou',\n"," ' ach',\n"," ' act',\n"," ' act ',\n"," ' acti',\n"," ' actin',\n"," ' acto',\n"," ' actor',\n"," ' actu',\n"," ' actua',\n"," ' ad',\n"," ' ada',\n"," ' ada ',\n"," ' adaa',\n"," ' adag',\n"," ' adaga',\n"," ' adak',\n"," ' adakk',\n"," ' adal',\n"," ' adali',\n"," ' adam',\n"," ' adame',\n"," ' adan',\n"," ' adana',\n"," ' adann',\n"," ' adar',\n"," ' adar ',\n"," ' adara',\n"," ' adare',\n"," ' adb',\n"," ' adbh',\n"," ' adbhu',\n"," ' adbu',\n"," ' adbut',\n"," ' add',\n"," ' add ',\n"," ' addi',\n"," ' addic',\n"," ' addr',\n"," ' addre',\n"," ' adds',\n"," ' adds ',\n"," ' ade',\n"," ' ade ',\n"," ' adel',\n"," ' adell',\n"," ' aden',\n"," ' aden ',\n"," ' adh',\n"," ' adha',\n"," ' adhb',\n"," ' adhbh',\n"," ' adhr',\n"," ' adhre',\n"," ' adhru',\n"," ' adhu',\n"," ' adhu ',\n"," ' adhy',\n"," ' adhya',\n"," ' adi',\n"," ' adi ',\n"," ' adik',\n"," ' adike',\n"," ' adk',\n"," ' adke',\n"," ' adke ',\n"," ' adkk',\n"," ' adkke',\n"," ' adm',\n"," ' ado',\n"," ' ador',\n"," ' adoru',\n"," ' adr',\n"," ' adra',\n"," ' adra ',\n"," ' adral',\n"," ' adre',\n"," ' adre ',\n"," ' adru',\n"," ' adru ',\n"," ' ads',\n"," ' adt',\n"," ' adta',\n"," ' adth',\n"," ' adtha',\n"," ' adthi',\n"," ' adti',\n"," ' adu',\n"," ' adu ',\n"," ' aduk',\n"," ' aduke',\n"," ' adukk',\n"," ' adun',\n"," ' adune',\n"," ' adunn',\n"," ' ae',\n"," ' af',\n"," ' aft',\n"," ' afte',\n"," ' after',\n"," ' ag',\n"," ' ag ',\n"," ' aga',\n"," ' aga ',\n"," ' agai',\n"," ' again',\n"," ' agal',\n"," ' agall',\n"," ' agalv',\n"," ' agat',\n"," ' agati',\n"," ' agb',\n"," ' agba',\n"," ' agbe',\n"," ' agbek',\n"," ' agbh',\n"," ' agbi',\n"," ' agbit',\n"," ' agd',\n"," ' age',\n"," ' age ',\n"," ' aged',\n"," ' agi',\n"," ' agi ',\n"," ' agid',\n"," ' agida',\n"," ' agidd',\n"," ' agide',\n"," ' agidh',\n"," ' agidi',\n"," ' agidr',\n"," ' agii',\n"," ' agii ',\n"," ' agil',\n"," ' agill',\n"," ' agir',\n"," ' agirb',\n"," ' agiro',\n"," ' agiru',\n"," ' agit',\n"," ' agith',\n"," ' agl',\n"," ' agle',\n"," ' agle ',\n"," ' agli',\n"," ' agli ',\n"," ' aglil',\n"," ' agll',\n"," ' aglli',\n"," ' ago',\n"," ' ago ',\n"," ' agod',\n"," ' agode',\n"," ' agodu',\n"," ' agoi',\n"," ' agoit',\n"," ' agok',\n"," ' agoke',\n"," ' agol',\n"," ' agoll',\n"," ' agr',\n"," ' agre',\n"," ' agree',\n"," ' agt',\n"," ' agta',\n"," ' agta ',\n"," ' agtai',\n"," ' agth',\n"," ' agthi',\n"," ' agti',\n"," ' agtid',\n"," ' agtil',\n"," ' agtir',\n"," ' agu',\n"," ' agut',\n"," ' agute',\n"," ' aguth',\n"," ' agutt',\n"," ' ah',\n"," ' ah ',\n"," ' aha',\n"," ' aha ',\n"," ' ahe',\n"," ' ai',\n"," ' ais',\n"," ' ait',\n"," ' aith',\n"," ' aithu',\n"," ' aitu',\n"," ' aitu ',\n"," ' aj',\n"," ' aja',\n"," ' ajan',\n"," ' ajane',\n"," ' ajn',\n"," ' ajne',\n"," ' ajnee',\n"," ' ak',\n"," ' aka',\n"," ' akb',\n"," ' akba',\n"," ' akbar',\n"," ' akh',\n"," ' akhi',\n"," ' akhil',\n"," ' aki',\n"," ' akid',\n"," ' akk',\n"," ' akka',\n"," ' akka ',\n"," ' akkan',\n"," ' akki',\n"," ' akki ',\n"," ' akkn',\n"," ' akkna',\n"," ' ako',\n"," ' akt',\n"," ' akta',\n"," ' al',\n"," ' ala',\n"," ' ala ',\n"," ' ald',\n"," ' aldh',\n"," ' ale',\n"," ' ale ',\n"," ' ali',\n"," ' ali ',\n"," ' all',\n"," ' all ',\n"," ' alla',\n"," ' alla ',\n"," ' alla.',\n"," ' allap',\n"," ' alle',\n"," ' alle ',\n"," ' alli',\n"," ' alli ',\n"," ' alll',\n"," ' allla',\n"," ' allo',\n"," ' allu',\n"," ' allu ',\n"," ' alp',\n"," ' alpa',\n"," ' alpa ',\n"," ' alr',\n"," ' alre',\n"," ' alrea',\n"," ' als',\n"," ' also',\n"," ' also ',\n"," ' alt',\n"," ' alv',\n"," ' alva',\n"," ' alva ',\n"," ' alvaa',\n"," ' alw',\n"," ' alwa',\n"," ' alwa ',\n"," ' alway',\n"," ' am',\n"," ' am ',\n"," ' ama',\n"," ' amar',\n"," ' amar ',\n"," ' amaz',\n"," ' amaze',\n"," ' amazi',\n"," ' amazo',\n"," ' amb',\n"," ' ambe',\n"," ' amber',\n"," ' ame',\n"," ' amel',\n"," ' amele',\n"," ' amez',\n"," ' amm',\n"," ' amma',\n"," ' amma ',\n"," ' amman',\n"," ' ammu',\n"," ' ammun',\n"," ' amo',\n"," ' amou',\n"," ' amoun',\n"," ' an',\n"," ' an ',\n"," ' ana',\n"," ' anan',\n"," ' anand',\n"," ' anat',\n"," ' anath',\n"," ' anb',\n"," ' anbe',\n"," ' anbed',\n"," ' anc',\n"," ' anch',\n"," ' ancho',\n"," ' and',\n"," ' and ',\n"," ' anda',\n"," ' andh',\n"," ' andha',\n"," ' andhr',\n"," ' andi',\n"," ' andid',\n"," ' andk',\n"," ' andko',\n"," ' andr',\n"," ' andre',\n"," ' andru',\n"," ' ang',\n"," ' ange',\n"," ' ange ',\n"," ' ani',\n"," ' anis',\n"," ' anisu',\n"," ' anj',\n"," ' anja',\n"," ' anjal',\n"," ' ank',\n"," ' anko',\n"," ' ankon',\n"," ' ankot',\n"," ' ann',\n"," ' anna',\n"," ' anna ',\n"," ' anna.',\n"," ' annan',\n"," ' anni',\n"," ' annis',\n"," ' annn',\n"," ' annna',\n"," ' anno',\n"," ' anno ',\n"," ' annod',\n"," ' annor',\n"," ' annov',\n"," ' annu',\n"," ' annu ',\n"," ' annuv',\n"," ' ano',\n"," ' anod',\n"," ' anodu',\n"," ' ans',\n"," ' ansa',\n"," ' ansat',\n"," ' ansb',\n"," ' ansi',\n"," ' ansid',\n"," ' ansit',\n"," ' anso',\n"," ' ansod',\n"," ' anst',\n"," ' ansta',\n"," ' ansth',\n"," ' ansti',\n"," ' anstu',\n"," ' ansu',\n"," ' ansut',\n"," ' answ',\n"," ' answe',\n"," ' ant',\n"," ' anta',\n"," ' anta ',\n"," ' anta.',\n"," ' antad',\n"," ' antar',\n"," ' ante',\n"," ' ante ',\n"," ' anth',\n"," ' antha',\n"," ' anthi',\n"," ' anthu',\n"," ' anti',\n"," ' antid',\n"," ' antir',\n"," ' antu',\n"," ' antu ',\n"," ' anty',\n"," ' antya',\n"," ' anu',\n"," ' anu ',\n"," ' anus',\n"," ' anust',\n"," ' anusu',\n"," ' anv',\n"," ' any',\n"," ' any ',\n"," ' anyo',\n"," ' anyon',\n"," ' ap',\n"," ' app',\n"," ' app ',\n"," ' appa',\n"," ' appa ',\n"," ' appaj',\n"," ' appan',\n"," ' appp',\n"," ' appp ',\n"," ' appr',\n"," ' appre',\n"," ' apps',\n"," ' apps ',\n"," ' appu',\n"," ' appu ',\n"," ' ar',\n"," ' ar ',\n"," ' ara',\n"," ' are',\n"," ' are ',\n"," ' ari',\n"," ' arit',\n"," ' arita',\n"," ' arj',\n"," ' arju',\n"," ' arjun',\n"," ' arm',\n"," ' army',\n"," ' army ',\n"," ' aro',\n"," ' art',\n"," ' arta',\n"," ' arta ',\n"," ' arth',\n"," ' artha',\n"," ' arthi',\n"," ' arti',\n"," ' artis',\n"," ' as',\n"," ' as ',\n"," ' asa',\n"," ' asay',\n"," ' asayy',\n"," ' asc',\n"," ' asca',\n"," ' ascar',\n"," ' ase',\n"," ' ase ',\n"," ' ash',\n"," ' asht',\n"," ' asht ',\n"," ' ashte',\n"," ' ashtu',\n"," ' ashv',\n"," ' ashva',\n"," ' ashw',\n"," ' ashwa',\n"," ' asn',\n"," ' asn ',\n"," ' asn.',\n"," ' asn..',\n"," ' asnn',\n"," ' aso',\n"," ' asom',\n"," ' ast',\n"," ' ast ',\n"," ' aste',\n"," ...]"]},"metadata":{"tags":[]},"execution_count":42}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EDDXkAUI1DF1","executionInfo":{"status":"ok","timestamp":1624642723261,"user_tz":-330,"elapsed":28,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"a3ef66d5-3c7d-4084-eb55-25f5ed157019"},"source":["tfidf_train.shape"],"execution_count":43,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(6212, 50000)"]},"metadata":{"tags":[]},"execution_count":43}]},{"cell_type":"code","metadata":{"id":"szYsgWh4UFOy","executionInfo":{"status":"ok","timestamp":1624642723261,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["x_train = tfidf_train\n","x_test = tfidf_val\n","y_train = y_train_index\n","y_test = y_val_index"],"execution_count":44,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QbypHNhYSMDy","executionInfo":{"status":"ok","timestamp":1624643180218,"user_tz":-330,"elapsed":456961,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"3a0db849-f0f6-4a9a-a80e-db218d5e362b"},"source":[" from sklearn import metrics\n"," from sklearn.svm import SVC\n"," model_SVM = SVC(probability=True)\n"," model_SVM.fit(x_train, y_train)\n"," y_pred_SVM = model_SVM.predict(x_test)\n"," print(\"SVM\")\n"," print(metrics.classification_report(y_test, y_pred_SVM))\n"," \n"," from sklearn.ensemble import RandomForestClassifier\n"," rf = RandomForestClassifier()\n"," rf.fit(x_train,y_train)\n"," y_pred_rf = rf.predict(x_test)\n"," print(\"random\")\n"," print(metrics.classification_report(y_test, y_pred_rf))\n"," \n"," from sklearn.linear_model import LogisticRegression\n"," LR = LogisticRegression()\n"," LR.fit(x_train,y_train)\n"," y_pred_LR = LR.predict(x_test)\n"," print(\"Logistic Regression\")\n"," print(metrics.classification_report(y_test, y_pred_LR ))\n"," \n"," from sklearn.neighbors import KNeighborsClassifier\n"," neigh = KNeighborsClassifier()\n"," neigh.fit(x_train,y_train)\n"," y_pred_KNN = neigh.predict(x_test)\n"," print(\"KNN\")\n"," print(metrics.classification_report(y_test, y_pred_KNN ))\n"," \n"," from sklearn.naive_bayes import GaussianNB\n"," naive = GaussianNB()\n"," naive.fit(x_train.toarray(),y_train)\n"," y_pred_naive = naive.predict(x_test.toarray())\n"," print(\"Naive Bayes\")\n"," print(metrics.classification_report(y_test, y_pred_naive ))\n"," \n"," from sklearn.ensemble import GradientBoostingClassifier\n"," gradient = GradientBoostingClassifier()\n"," gradient.fit(x_train,y_train)\n"," y_pred_gradient = gradient.predict(x_test)\n"," print(\"Gradient Boosting\")\n"," print(metrics.classification_report(y_test, y_pred_gradient ))\n"," \n"," from sklearn.tree import DecisionTreeClassifier\n"," decision = DecisionTreeClassifier()\n"," decision.fit(x_train,y_train)\n"," y_pred_decision = decision.predict(x_test)\n"," print(\"Decision Tree\")\n"," print(metrics.classification_report(y_test, y_pred_decision ))\n","\n"," from sklearn.ensemble import AdaBoostClassifier\n"," AdaBoostclf = AdaBoostClassifier()\n"," AdaBoostclf.fit(x_train,y_train)\n"," y_pred_AdaBoostclf = AdaBoostclf.predict(x_test)\n"," print(\"AdaBoost classifier\")\n"," print(metrics.classification_report(y_test, y_pred_AdaBoostclf ))\n"," "],"execution_count":45,"outputs":[{"output_type":"stream","text":["SVM\n"," precision recall f1-score support\n","\n"," 0 1.00 0.08 0.14 52\n"," 1 0.68 0.58 0.62 139\n"," 2 0.65 0.86 0.74 321\n"," 3 0.70 0.67 0.69 110\n"," 4 0.59 0.35 0.44 69\n","\n"," accuracy 0.66 691\n"," macro avg 0.72 0.51 0.53 691\n","weighted avg 0.69 0.66 0.64 691\n","\n","random\n"," precision recall f1-score support\n","\n"," 0 0.45 0.10 0.16 52\n"," 1 0.74 0.51 0.60 139\n"," 2 0.65 0.90 0.75 321\n"," 3 0.69 0.65 0.67 110\n"," 4 0.64 0.30 0.41 69\n","\n"," accuracy 0.66 691\n"," macro avg 0.63 0.49 0.52 691\n","weighted avg 0.66 0.66 0.63 691\n","\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n","STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n","\n","Increase the number of iterations (max_iter) or scale the data as shown in:\n"," https://scikit-learn.org/stable/modules/preprocessing.html\n","Please also refer to the documentation for alternative solver options:\n"," https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n"," extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"],"name":"stderr"},{"output_type":"stream","text":["Logistic Regression\n"," precision recall f1-score support\n","\n"," 0 0.60 0.06 0.11 52\n"," 1 0.69 0.63 0.65 139\n"," 2 0.69 0.87 0.77 321\n"," 3 0.72 0.74 0.73 110\n"," 4 0.60 0.36 0.45 69\n","\n"," accuracy 0.69 691\n"," macro avg 0.66 0.53 0.54 691\n","weighted avg 0.68 0.69 0.66 691\n","\n","KNN\n"," precision recall f1-score support\n","\n"," 0 0.19 0.13 0.16 52\n"," 1 0.60 0.58 0.59 139\n"," 2 0.65 0.73 0.69 321\n"," 3 0.53 0.62 0.57 110\n"," 4 0.50 0.22 0.30 69\n","\n"," accuracy 0.59 691\n"," macro avg 0.49 0.46 0.46 691\n","weighted avg 0.57 0.59 0.57 691\n","\n","Naive Bayes\n"," precision recall f1-score support\n","\n"," 0 0.22 0.15 0.18 52\n"," 1 0.42 0.63 0.50 139\n"," 2 0.64 0.57 0.60 321\n"," 3 0.63 0.65 0.64 110\n"," 4 0.34 0.22 0.27 69\n","\n"," accuracy 0.53 691\n"," macro avg 0.45 0.45 0.44 691\n","weighted avg 0.53 0.53 0.52 691\n","\n","Gradient Boosting\n"," precision recall f1-score support\n","\n"," 0 0.43 0.06 0.10 52\n"," 1 0.68 0.51 0.58 139\n"," 2 0.63 0.86 0.73 321\n"," 3 0.72 0.69 0.71 110\n"," 4 0.69 0.32 0.44 69\n","\n"," accuracy 0.65 691\n"," macro avg 0.63 0.49 0.51 691\n","weighted avg 0.64 0.65 0.62 691\n","\n","Decision Tree\n"," precision recall f1-score support\n","\n"," 0 0.16 0.19 0.18 52\n"," 1 0.44 0.45 0.44 139\n"," 2 0.67 0.64 0.66 321\n"," 3 0.51 0.55 0.53 110\n"," 4 0.29 0.26 0.27 69\n","\n"," accuracy 0.52 691\n"," macro avg 0.42 0.42 0.42 691\n","weighted avg 0.52 0.52 0.52 691\n","\n","AdaBoost classifier\n"," precision recall f1-score support\n","\n"," 0 0.67 0.04 0.07 52\n"," 1 0.55 0.50 0.53 139\n"," 2 0.63 0.80 0.70 321\n"," 3 0.56 0.59 0.57 110\n"," 4 0.41 0.22 0.28 69\n","\n"," accuracy 0.59 691\n"," macro avg 0.56 0.43 0.43 691\n","weighted avg 0.58 0.59 0.56 691\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"dutLuW-7NitW"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"uSFmluVXNkL2"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"aKJdPx2jNkO3"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"XbQ1Hm3TNkSP"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"bQANqfdsf0i6","executionInfo":{"status":"ok","timestamp":1624643355150,"user_tz":-330,"elapsed":533,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["tfidf_test = tfidfvec.transform(data_test['text'])"],"execution_count":46,"outputs":[]},{"cell_type":"code","metadata":{"id":"RBDDmfD_URTa","executionInfo":{"status":"ok","timestamp":1624643360271,"user_tz":-330,"elapsed":3034,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["y_pred_model_SVM_prob = model_SVM.predict_proba(tfidf_test)"],"execution_count":47,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7VQ3hfA7gECM","executionInfo":{"status":"ok","timestamp":1624643360272,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"9af2c69b-2f30-4acf-e507-b39c6d2e48ed"},"source":["y_pred_model_SVM_prob"],"execution_count":48,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[0.05626558, 0.13327691, 0.77781715, 0.00490246, 0.0277379 ],\n"," [0.02677196, 0.00776521, 0.86121443, 0.07037198, 0.03387642],\n"," [0.05356787, 0.01402725, 0.24949638, 0.47680516, 0.20610333],\n"," ...,\n"," [0.02673411, 0.12512806, 0.84205796, 0.00177428, 0.00430561],\n"," [0.15453031, 0.15698626, 0.65826056, 0.00645513, 0.02376774],\n"," [0.20196312, 0.30472424, 0.43928437, 0.00637452, 0.04765375]])"]},"metadata":{"tags":[]},"execution_count":48}]},{"cell_type":"code","metadata":{"id":"i_feFgnOiZM0","executionInfo":{"status":"ok","timestamp":1624643361973,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_model_SVM_prob= pd.DataFrame(y_pred_model_SVM_prob, columns = ['Mixed feelings' ,'Negative' ,'Positive' ,'not-Kannada', 'unknown state'])"],"execution_count":49,"outputs":[]},{"cell_type":"code","metadata":{"id":"PujUzf51iZOG","executionInfo":{"status":"ok","timestamp":1624643363970,"user_tz":-330,"elapsed":635,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_model_SVM_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/SVM_char_1-6_gram_kanada_64.csv',index = False)"],"execution_count":50,"outputs":[]},{"cell_type":"code","metadata":{"id":"IcKeAb1giZQa","executionInfo":{"status":"ok","timestamp":1624643366014,"user_tz":-330,"elapsed":589,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["y_pred_rf_prob = rf.predict_proba(tfidf_test)"],"execution_count":51,"outputs":[]},{"cell_type":"code","metadata":{"id":"nf9Ryeo0i1I8","executionInfo":{"status":"ok","timestamp":1624643368373,"user_tz":-330,"elapsed":2,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_rf_prob= pd.DataFrame(y_pred_rf_prob, columns = ['Mixed feelings' ,'Negative' ,'Positive' ,'not-Kannada', 'unknown state'])"],"execution_count":52,"outputs":[]},{"cell_type":"code","metadata":{"id":"nTMjSz-_i1Ki","executionInfo":{"status":"ok","timestamp":1624643370188,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_rf_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/RF_char_1-6_gram_Kanada_63.csv', index = False)"],"execution_count":53,"outputs":[]},{"cell_type":"code","metadata":{"id":"k-XiNgH_i1Md","executionInfo":{"status":"ok","timestamp":1624643371958,"user_tz":-330,"elapsed":2,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["y_pred_LR_prob = LR.predict_proba(tfidf_test)"],"execution_count":54,"outputs":[]},{"cell_type":"code","metadata":{"id":"pS6nkrMbi1P_","executionInfo":{"status":"ok","timestamp":1624643374240,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_LR_prob= pd.DataFrame(y_pred_LR_prob, columns = ['Mixed feelings' ,'Negative' ,'Positive' ,'not-Kannada', 'unknown state'])"],"execution_count":55,"outputs":[]},{"cell_type":"code","metadata":{"id":"ru0vsVTsiZT3","executionInfo":{"status":"ok","timestamp":1624643375873,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_LR_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/LR_char_1-6_gram_Kanda_66.csv', index = False)"],"execution_count":56,"outputs":[]},{"cell_type":"code","metadata":{"id":"7vXANa-i7HFG"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/ML_Classifier_Malayalam_code_mixed.ipynb b/ML_Classifier_Malayalam_code_mixed.ipynb new file mode 100644 index 0000000..4e03767 --- /dev/null +++ b/ML_Classifier_Malayalam_code_mixed.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ML_Classifier_Malayalam_code_mixed.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"O1S8SYrnMTNI","executionInfo":{"status":"ok","timestamp":1624682778862,"user_tz":-330,"elapsed":21007,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"0d117409-9793-484e-8b65-1c837581ebce"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"57R5J3_1OfiA","executionInfo":{"status":"ok","timestamp":1624682781513,"user_tz":-330,"elapsed":1259,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_train.tsv', sep = '\\t')\n","data_val = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_test_withoutlabels.tsv', sep = '\\t')"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"aIWUjapcPoVJ","executionInfo":{"status":"ok","timestamp":1624682781514,"user_tz":-330,"elapsed":21,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"e8baeab0-bdab-4b39-aaff-ad3df348872a"},"source":["data_train.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory
0Sunny chechi fans evide like adichu power kani...unknown_state
1Angane july month ile ende aadyathe leave njan...Positive
2ഏട്ടന്റെ പുതിയ പടത്തിനു വേണ്ടി കാത്തിരിക്കുന്ന...unknown_state
3ഇനി ലാലേട്ടന്റെ വേട്ട തുടങ്ങാൻ പോകുന്നു..........Positive
4Trailer powli oru raksha illa . Pakshea padam ...Positive
\n","
"],"text/plain":[" text category\n","0 Sunny chechi fans evide like adichu power kani... unknown_state\n","1 Angane july month ile ende aadyathe leave njan... Positive\n","2 ഏട്ടന്റെ പുതിയ പടത്തിനു വേണ്ടി കാത്തിരിക്കുന്ന... unknown_state\n","3 ഇനി ലാലേട്ടന്റെ വേട്ട തുടങ്ങാൻ പോകുന്നു.......... Positive\n","4 Trailer powli oru raksha illa . Pakshea padam ... Positive"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"0vk8YlBibEYt","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1624682781515,"user_tz":-330,"elapsed":20,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"8c773c32-f24b-4d65-a201-9c9423c3c290"},"source":["data_val.head()"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory
0Mammooka ninghal mass aa pwoli itemPositive
1Waiting for Malayalam movie For Tamil paiyannot-malayalam
2ദളപതി ഫാൻസിന്റെ വക ഒരു ഒന്നൊന്നര വിജയാശംസകൾ...Positive
3#Trailer pwolichuuuu ഓണത്തിന് വന്നങ്ങു തകർത്തേ...Positive
4Mammoookkaaaa polichadukkiii katta waiting nv 21Positive
\n","
"],"text/plain":[" text category\n","0 Mammooka ninghal mass aa pwoli item Positive\n","1 Waiting for Malayalam movie For Tamil paiyan not-malayalam\n","2 ദളപതി ഫാൻസിന്റെ വക ഒരു ഒന്നൊന്നര വിജയാശംസകൾ... Positive\n","3 #Trailer pwolichuuuu ഓണത്തിന് വന്നങ്ങു തകർത്തേ... Positive\n","4 Mammoookkaaaa polichadukkiii katta waiting nv 21 Positive"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"7jeRfw0XPqe5","executionInfo":{"status":"ok","timestamp":1624682781515,"user_tz":-330,"elapsed":19,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"dc3bf79d-effc-4438-e672-c15972f9707e"},"source":["data_test.head()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtext
0Mal_1Teaserinu kurach samayamkoodi mathram. Cant wa...
1Mal_2അപ്പോൾ കഥയുടെ റൂട്ട് മാറിയല്ലോ...
2Mal_3മൂത്തോൻ ട്രൈലെർ Trending List വരാത്തത് എന്താ ?
3Mal_4Nowadays 944k views is considered as 1M views ...
4Mal_5Maass.trailer ennu paranja ithaanU makkale.......
\n","
"],"text/plain":[" id text\n","0 Mal_1 Teaserinu kurach samayamkoodi mathram. Cant wa...\n","1 Mal_2 അപ്പോൾ കഥയുടെ റൂട്ട് മാറിയല്ലോ...\n","2 Mal_3 മൂത്തോൻ ട്രൈലെർ Trending List വരാത്തത് എന്താ ?\n","3 Mal_4 Nowadays 944k views is considered as 1M views ...\n","4 Mal_5 Maass.trailer ennu paranja ithaanU makkale......."]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tMXviBypwXb0","executionInfo":{"status":"ok","timestamp":1624682781516,"user_tz":-330,"elapsed":19,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"dd53e503-f71f-42b2-b88d-c0ef1c27cd21"},"source":["data_train.shape, data_val.shape, data_test.shape"],"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((15888, 2), (1766, 2), (1962, 2))"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Cr674jk8wyck","executionInfo":{"status":"ok","timestamp":1624682781517,"user_tz":-330,"elapsed":19,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"3c47d663-c6e2-4fb3-fa1c-76ecd3778927"},"source":["data_train['category'].value_counts()"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 6421\n","unknown_state 5279\n","Negative 2105\n","not-malayalam 1157\n","Mixed_feelings 926\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tREo3vWbNLQA","executionInfo":{"status":"ok","timestamp":1624682781518,"user_tz":-330,"elapsed":17,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"18e38bd8-6315-43e8-d10d-89ef0bc33926"},"source":["data_val['category'].value_counts()"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 706\n","unknown_state 580\n","Negative 237\n","not-malayalam 141\n","Mixed_feelings 102\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"zX-6Saf7QPh7","executionInfo":{"status":"ok","timestamp":1624682782802,"user_tz":-330,"elapsed":12,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"dff6a3bc-45be-4822-fded-9267ffc6a2ca"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_val_index = le.transform(data_val['category'])\n"],"execution_count":9,"outputs":[{"output_type":"stream","text":["['Mixed_feelings' 'Negative' 'Positive' 'not-malayalam' 'unknown_state']\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mY8Wqps9WAMq"},"source":["For word = ngram_range (1,3)\n","For char = ngram_range (1,6)"]},{"cell_type":"code","metadata":{"id":"jqicHV-qRH4V","executionInfo":{"status":"ok","timestamp":1624682789930,"user_tz":-330,"elapsed":5758,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["from sklearn.feature_extraction.text import TfidfVectorizer\n","tfidfvec = TfidfVectorizer(ngram_range=(1,6), analyzer = 'char_wb', max_features = 30000)\n","#tfidfvec = TfidfVectorizer(ngram_range=(1,3), max_features = 30000)\n","tfidf_train = tfidfvec.fit_transform(data_train['text'])\n","tfidf_val = tfidfvec.transform(data_val['text'])"],"execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iV6FzGg_0leL","executionInfo":{"status":"ok","timestamp":1624682789931,"user_tz":-330,"elapsed":24,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"9afbb63c-228c-4706-d245-41e5a1d2c76d"},"source":["tfidfvec.get_feature_names()"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[' ',\n"," ' !',\n"," ' ! ',\n"," ' !!',\n"," ' !! ',\n"," ' !!!',\n"," ' !!! ',\n"," ' !!!!',\n"," ' #',\n"," ' # ',\n"," ' #f',\n"," ' #i',\n"," ' #l',\n"," ' #la',\n"," ' #lal',\n"," ' #lale',\n"," ' #m',\n"," ' #ma',\n"," ' #mam',\n"," ' #mama',\n"," ' #mamm',\n"," ' #r',\n"," ' #s',\n"," ' #sh',\n"," ' #t',\n"," ' #മ',\n"," ' &',\n"," ' & ',\n"," ' (',\n"," ' ( ',\n"," ' )',\n"," ' ) ',\n"," ' *',\n"," ' +',\n"," ' + ',\n"," ' ,',\n"," ' , ',\n"," ' ,,',\n"," ' ,,,',\n"," ' -',\n"," ' - ',\n"," ' .',\n"," ' . ',\n"," ' ..',\n"," ' .. ',\n"," ' ...',\n"," ' ... ',\n"," ' ....',\n"," ' .... ',\n"," ' .....',\n"," ' .അ',\n"," ' 0',\n"," ' 0.',\n"," ' 0:',\n"," ' 0:4',\n"," ' 0:5',\n"," ' 1',\n"," ' 1 ',\n"," ' 1.',\n"," ' 10',\n"," ' 10 ',\n"," ' 100',\n"," ' 100 ',\n"," ' 100%',\n"," ' 100c',\n"," ' 100cr',\n"," ' 11',\n"," ' 12',\n"," ' 12 ',\n"," ' 14',\n"," ' 15',\n"," ' 16',\n"," ' 17',\n"," ' 17 ',\n"," ' 18',\n"," ' 19',\n"," ' 1:',\n"," ' 1:0',\n"," ' 1:1',\n"," ' 1:2',\n"," ' 1:3',\n"," ' 1:5',\n"," ' 1m',\n"," ' 1m ',\n"," ' 1s',\n"," ' 1st',\n"," ' 1st ',\n"," ' 2',\n"," ' 2 ',\n"," ' 2.',\n"," ' 20',\n"," ' 20 ',\n"," ' 200',\n"," ' 200 ',\n"," ' 201',\n"," ' 2019',\n"," ' 2019 ',\n"," ' 202',\n"," ' 2020',\n"," ' 2020 ',\n"," ' 21',\n"," ' 21 ',\n"," ' 25',\n"," ' 28',\n"," ' 2:',\n"," ' 3',\n"," ' 3 ',\n"," ' 3.',\n"," ' 30',\n"," ' 30 ',\n"," ' 36',\n"," ' 369',\n"," ' 369 ',\n"," ' 4',\n"," ' 4 ',\n"," ' 5',\n"," ' 5 ',\n"," ' 50',\n"," ' 50 ',\n"," ' 500',\n"," ' 500 ',\n"," ' 6',\n"," ' 6 ',\n"," ' 68',\n"," ' 68 ',\n"," ' 7',\n"," ' 7 ',\n"," ' 8',\n"," ' 8 ',\n"," ' 9',\n"," ' :',\n"," ' : ',\n"," ' :)',\n"," ' :) ',\n"," ' :-',\n"," ' :- ',\n"," ' <',\n"," ' <3',\n"," ' <3 ',\n"," ' =',\n"," ' = ',\n"," ' ?',\n"," ' ? ',\n"," ' ??',\n"," ' ?? ',\n"," ' ???',\n"," ' ??? ',\n"," ' @',\n"," ' \\\\',\n"," ' \\\\r',\n"," ' \\\\r ',\n"," ' _',\n"," ' a',\n"," ' a ',\n"," ' a1',\n"," ' a10',\n"," ' a10 ',\n"," ' aa',\n"," ' aa ',\n"," ' aaa',\n"," ' aaa ',\n"," ' aaaa',\n"," ' aaan',\n"," ' aaay',\n"," ' aad',\n"," ' aada',\n"," ' aady',\n"," ' aadya',\n"," ' aag',\n"," ' aah',\n"," ' aak',\n"," ' aaka',\n"," ' aakan',\n"," ' aakat',\n"," ' aaki',\n"," ' aakk',\n"," ' aaku',\n"," ' aakum',\n"," ' aal',\n"," ' aan',\n"," ' aan ',\n"," ' aana',\n"," ' aanal',\n"," ' aane',\n"," ' aane ',\n"," ' aanen',\n"," ' aano',\n"," ' aano ',\n"," ' aanu',\n"," ' aanu ',\n"," ' aanu.',\n"," ' aar',\n"," ' aara',\n"," ' aare',\n"," ' aaren',\n"," ' aark',\n"," ' aaro',\n"," ' aarok',\n"," ' aaru',\n"," ' aarum',\n"," ' aas',\n"," ' aash',\n"," ' aasha',\n"," ' aav',\n"," ' aava',\n"," ' aavat',\n"," ' aavu',\n"," ' aavum',\n"," ' aay',\n"," ' aaya',\n"," ' aaya ',\n"," ' aayal',\n"," ' aayi',\n"," ' aayi ',\n"," ' aayir',\n"," ' aayit',\n"," ' ab',\n"," ' abh',\n"," ' abhi',\n"," ' abhin',\n"," ' abi',\n"," ' abin',\n"," ' abina',\n"," ' abr',\n"," ' abra',\n"," ' abrah',\n"," ' abri',\n"," ' abrid',\n"," ' ac',\n"," ' ach',\n"," ' acha',\n"," ' achan',\n"," ' achay',\n"," ' act',\n"," ' acti',\n"," ' actin',\n"," ' actio',\n"," ' acto',\n"," ' actor',\n"," ' ad',\n"," ' ada',\n"," ' adaa',\n"," ' adaar',\n"," ' adar',\n"," ' add',\n"," ' addi',\n"," ' addic',\n"," ' adh',\n"," ' adhi',\n"," ' adi',\n"," ' adi ',\n"," ' adi.',\n"," ' adi..',\n"," ' adic',\n"," ' adich',\n"," ' adii',\n"," ' adik',\n"," ' adik ',\n"," ' adika',\n"," ' adikk',\n"," ' adiku',\n"," ' adip',\n"," ' adipo',\n"," ' adiy',\n"," ' adu',\n"," ' adut',\n"," ' aduth',\n"," ' ady',\n"," ' af',\n"," ' aft',\n"," ' afte',\n"," ' after',\n"," ' ag',\n"," ' aga',\n"," ' agai',\n"," ' again',\n"," ' age',\n"," ' age ',\n"," ' agr',\n"," ' agra',\n"," ' agrah',\n"," ' ah',\n"," ' aha',\n"," ' ai',\n"," ' aj',\n"," ' aja',\n"," ' ak',\n"," ' aka',\n"," ' akk',\n"," ' akka',\n"," ' aku',\n"," ' akum',\n"," ' akum ',\n"," ' al',\n"," ' ala',\n"," ' alay',\n"," ' alayu',\n"," ' ali',\n"," ' all',\n"," ' all ',\n"," ' alla',\n"," ' alla ',\n"," ' allat',\n"," ' alle',\n"," ' alle ',\n"," ' alw',\n"," ' alwa',\n"," ' alway',\n"," ' am',\n"," ' am ',\n"," ' ama',\n"," ' amb',\n"," ' amm',\n"," ' amma',\n"," ' an',\n"," ' an ',\n"," ' ana',\n"," ' and',\n"," ' and ',\n"," ' ane',\n"," ' anen',\n"," ' anenn',\n"," ' ang',\n"," ' anga',\n"," ' angan',\n"," ' angu',\n"," ' angu ',\n"," ' ani',\n"," ' aniy',\n"," ' aniya',\n"," ' ann',\n"," ' ann ',\n"," ' anna',\n"," ' anna ',\n"," ' annan',\n"," ' anne',\n"," ' annu',\n"," ' annu ',\n"," ' annum',\n"," ' ano',\n"," ' ano ',\n"," ' ant',\n"," ' anth',\n"," ' anu',\n"," ' anu ',\n"," ' anu.',\n"," ' any',\n"," ' ap',\n"," ' apo',\n"," ' apo ',\n"," ' app',\n"," ' appa',\n"," ' appo',\n"," ' appo ',\n"," ' ar',\n"," ' ara',\n"," ' are',\n"," ' are ',\n"," ' ari',\n"," ' arik',\n"," ' arike',\n"," ' ariy',\n"," ' ariya',\n"," ' ariyi',\n"," ' arj',\n"," ' arju',\n"," ' arjun',\n"," ' ark',\n"," ' aro',\n"," ' arok',\n"," ' arokk',\n"," ' aru',\n"," ' arum',\n"," ' arum ',\n"," ' as',\n"," ' as ',\n"," ' ash',\n"," ' asha',\n"," ' asham',\n"," ' ass',\n"," ' at',\n"," ' at ',\n"," ' ath',\n"," ' ath ',\n"," ' atha',\n"," ' athe',\n"," ' athe ',\n"," ' athi',\n"," ' athin',\n"," ' atho',\n"," ' athr',\n"," ' athra',\n"," ' athu',\n"," ' athu ',\n"," ' athuk',\n"," ' au',\n"," ' av',\n"," ' ava',\n"," ' aval',\n"," ' avan',\n"," ' avan ',\n"," ' avar',\n"," ' avas',\n"," ' avasa',\n"," ' avast',\n"," ' avat',\n"," ' avath',\n"," ' ave',\n"," ' avi',\n"," ' avid',\n"," ' avu',\n"," ' avum',\n"," ' aw',\n"," ' awa',\n"," ' awe',\n"," ' awes',\n"," ' aweso',\n"," ' ay',\n"," ' aya',\n"," ' aya ',\n"," ' ayal',\n"," ' ayi',\n"," ' ayi ',\n"," ' ayir',\n"," ' ayiri',\n"," ' ayiru',\n"," ' ayit',\n"," ' ayo',\n"," ' ayy',\n"," ' b',\n"," ' b ',\n"," ' ba',\n"," ' baa',\n"," ' baap',\n"," ' baap ',\n"," ' bab',\n"," ' baby',\n"," ' bac',\n"," ' back',\n"," ' back ',\n"," ' bah',\n"," ' bahu',\n"," ' bahub',\n"," ' bak',\n"," ' ban',\n"," ' bas',\n"," ' be',\n"," ' be ',\n"," ' ben',\n"," ' bes',\n"," ' best',\n"," ' best ',\n"," ' bg',\n"," ' bgm',\n"," ' bgm ',\n"," ' bh',\n"," ' bha',\n"," ' bhas',\n"," ' bhasi',\n"," ' bi',\n"," ' big',\n"," ' big ',\n"," ' bir',\n"," ' birt',\n"," ' birth',\n"," ' bl',\n"," ' blo',\n"," ' bloc',\n"," ' block',\n"," ' bo',\n"," ' bol',\n"," ' bom',\n"," ' bon',\n"," ' bor',\n"," ' bore',\n"," ' bore ',\n"," ' bos',\n"," ' boss',\n"," ' boss ',\n"," ' box',\n"," ' box ',\n"," ' br',\n"," ' bri',\n"," ' bro',\n"," ' bro ',\n"," ' brot',\n"," ' broth',\n"," ' bu',\n"," ' bud',\n"," ' bus',\n"," ' but',\n"," ' but ',\n"," ' butt',\n"," ' butto',\n"," ' by',\n"," ' by ',\n"," ' c',\n"," ' ca',\n"," ' cam',\n"," ' came',\n"," ' camer',\n"," ' can',\n"," ' car',\n"," ' cari',\n"," ' carib',\n"," ' cas',\n"," ' ce',\n"," ' ch',\n"," ' cha',\n"," ' chaa',\n"," ' chak',\n"," ' chal',\n"," ' chali',\n"," ' chan',\n"," ' chang',\n"," ' chann',\n"," ' char',\n"," ' chara',\n"," ' chari',\n"," ' chat',\n"," ' chath',\n"," ' chav',\n"," ' chay',\n"," ' che',\n"," ' chec',\n"," ' chech',\n"," ' chei',\n"," ' chek',\n"," ' chem',\n"," ' chen',\n"," ' chenn',\n"," ' cher',\n"," ' cheri',\n"," ' chet',\n"," ' chett',\n"," ' chey',\n"," ' cheya',\n"," ' cheyt',\n"," ' cheyu',\n"," ' cheyy',\n"," ' chi',\n"," ' chil',\n"," ' chila',\n"," ' chin',\n"," ' chir',\n"," ' chiri',\n"," ' cho',\n"," ' chod',\n"," ' chodi',\n"," ' chor',\n"," ' chr',\n"," ' chri',\n"," ' chris',\n"," ' chu',\n"," ' chud',\n"," ' chudu',\n"," ' chun',\n"," ' chunk',\n"," ' chy',\n"," ' ci',\n"," ' cin',\n"," ' cine',\n"," ' cinem',\n"," ' cini',\n"," ' cinim',\n"," ' cl',\n"," ' cla',\n"," ' clas',\n"," ' class',\n"," ' cli',\n"," ' clim',\n"," ' clima',\n"," ' clu',\n"," ' club',\n"," ' cm',\n"," ' cmn',\n"," ' co',\n"," ' col',\n"," ' coll',\n"," ' colle',\n"," ' com',\n"," ' comb',\n"," ' combo',\n"," ' come',\n"," ' come ',\n"," ' comed',\n"," ' comm',\n"," ' comme',\n"," ' comp',\n"," ' compa',\n"," ' compl',\n"," ' con',\n"," ' cop',\n"," ' copy',\n"," ' copy ',\n"," ' cr',\n"," ' cr ',\n"," ' cro',\n"," ' cror',\n"," ' crore',\n"," ' cu',\n"," ' cut',\n"," ' cut ',\n"," ' d',\n"," ' d ',\n"," ' da',\n"," ' da ',\n"," ' daa',\n"," ' dai',\n"," ' dail',\n"," ' daiv',\n"," ' daiva',\n"," ' dan',\n"," ' danc',\n"," ' dance',\n"," ' dar',\n"," ' dat',\n"," ' date',\n"," ' date ',\n"," ' day',\n"," ' day ',\n"," ' days',\n"," ' days ',\n"," ' de',\n"," ' de ',\n"," ' dea',\n"," ' dec',\n"," ' deg',\n"," ' degr',\n"," ' degra',\n"," ' del',\n"," ' deli',\n"," ' deliv',\n"," ' dev',\n"," ' dh',\n"," ' dha',\n"," ' di',\n"," ' dia',\n"," ' dial',\n"," ' dialo',\n"," ' die',\n"," ' die ',\n"," ' dil',\n"," ' dile',\n"," ' dilee',\n"," ' dilo',\n"," ' dir',\n"," ' dire',\n"," ' direc',\n"," ' dis',\n"," ' disl',\n"," ' disli',\n"," ' div',\n"," ' diva',\n"," ' divas',\n"," ' do',\n"," ' don',\n"," ' dou',\n"," ' doub',\n"," ' doubl',\n"," ' doubt',\n"," ' dq',\n"," ' dq ',\n"," ' dr',\n"," ' dri',\n"," ' du',\n"," ' dub',\n"," ' dub ',\n"," ' dubb',\n"," ' e',\n"," ' e ',\n"," ' ea',\n"," ' eat',\n"," ' eath',\n"," ' eatt',\n"," ' eatta',\n"," ' ed',\n"," ' eda',\n"," ' edh',\n"," ' edi',\n"," ' edit',\n"," ' edit ',\n"," ' editi',\n"," ' edu',\n"," ' eduk',\n"," ' edukk',\n"," ' edut',\n"," ' eduth',\n"," ' ee',\n"," ' ee ',\n"," ' eee',\n"," ' eee ',\n"," ' eet',\n"," ' ef',\n"," ' eff',\n"," ' effe',\n"," ' effec',\n"," ' eg',\n"," ' ej',\n"," ' ejj',\n"," ' ejja',\n"," ' ejjat',\n"," ' ek',\n"," ' ekk',\n"," ' ekka',\n"," ' ekka ',\n"," ' el',\n"," ' ela',\n"," ' ell',\n"," ' ella',\n"," ' ella ',\n"," ' ellaa',\n"," ' ellam',\n"," ' ellar',\n"," ' ellav',\n"," ' en',\n"," ' ena',\n"," ' end',\n"," ' enda',\n"," ' endh',\n"," ' ene',\n"," ' eng',\n"," ' enga',\n"," ' engan',\n"," ' engi',\n"," ' engl',\n"," ' engli',\n"," ' eni',\n"," ' eni ',\n"," ' enik',\n"," ' enik ',\n"," ' enikk',\n"," ' eniku',\n"," ' enk',\n"," ' enki',\n"," ' enkil',\n"," ' enn',\n"," ' enn ',\n"," ' enna',\n"," ' enna ',\n"," ' ennaa',\n"," ' ennal',\n"," ' enne',\n"," ' enne ',\n"," ' enni',\n"," ' enno',\n"," ' ennor',\n"," ' ennu',\n"," ' ennu ',\n"," ' ennul',\n"," ' ennum',\n"," ' ent',\n"," ' enta',\n"," ' enta ',\n"," ' entam',\n"," ' ente',\n"," ' ente ',\n"," ' enter',\n"," ' enth',\n"," ' enth ',\n"," ' entha',\n"," ' enthe',\n"," ' enthi',\n"," ' entho',\n"," ' enthu',\n"," ' entr',\n"," ' enu',\n"," ' enu ',\n"," ' ep',\n"," ' epi',\n"," ' epp',\n"," ' eppo',\n"," ' er',\n"," ' era',\n"," ' erak',\n"," ' eran',\n"," ' erang',\n"," ' es',\n"," ' esh',\n"," ' esht',\n"," ' et',\n"," ' eth',\n"," ' eth ',\n"," ' etha',\n"," ' ethan',\n"," ' ethi',\n"," ' ethil',\n"," ' etho',\n"," ' ethr',\n"," ' ethra',\n"," ' ethu',\n"," ' ethu ',\n"," ' etr',\n"," ' etra',\n"," ' etra ',\n"," ' ett',\n"," ' etta',\n"," ' etta ',\n"," ' ettan',\n"," ' ettav',\n"," ' ev',\n"," ' eva',\n"," ' evd',\n"," ' evde',\n"," ' eve',\n"," ' ever',\n"," ' evi',\n"," ' evid',\n"," ' evide',\n"," ' ex',\n"," ' exp',\n"," ' expe',\n"," ' ez',\n"," ' ezh',\n"," ' ezhu',\n"," ' ezhut',\n"," ' f',\n"," ' fa',\n"," ' fac',\n"," ' face',\n"," ' face ',\n"," ' fah',\n"," ' faha',\n"," ' fahad',\n"," ' fam',\n"," ' fami',\n"," ' famil',\n"," ' fan',\n"," ' fan ',\n"," ' fan.',\n"," ' fan..',\n"," ' fana',\n"," ' fans',\n"," ' fans ',\n"," ' fansi',\n"," ' fas',\n"," ' fd',\n"," ' fdf',\n"," ' fdfs',\n"," ' fdfs ',\n"," ' fe',\n"," ' fee',\n"," ' feel',\n"," ' feel ',\n"," ' feel.',\n"," ' feeli',\n"," ' fi',\n"," ' fig',\n"," ' figh',\n"," ' fight',\n"," ' fil',\n"," ' fili',\n"," ' filim',\n"," ' film',\n"," ' film ',\n"," ' film.',\n"," ' films',\n"," ' fir',\n"," ' firs',\n"," ' first',\n"," ' fl',\n"," ' flo',\n"," ' flop',\n"," ' flop ',\n"," ' fo',\n"," ' for',\n"," ' for ',\n"," ' fr',\n"," ' fra',\n"," ' fre',\n"," ' free',\n"," ' fri',\n"," ' frie',\n"," ' frien',\n"," ' fro',\n"," ' from',\n"," ' from ',\n"," ' fu',\n"," ' ful',\n"," ' full',\n"," ' full ',\n"," ' g',\n"," ' ga',\n"," ' gam',\n"," ' gan',\n"," ' ge',\n"," ' gee',\n"," ' geet',\n"," ' gen',\n"," ' get',\n"," ' get ',\n"," ' gh',\n"," ' gho',\n"," ' ghos',\n"," ' gi',\n"," ' gir',\n"," ' giri',\n"," ' gl',\n"," ' go',\n"," ' god',\n"," ' goo',\n"," ' good',\n"," ' good ',\n"," ' goos',\n"," ' goose',\n"," ' gop',\n"," ' gopi',\n"," ' gopi ',\n"," ' gr',\n"," ' gra',\n"," ' gre',\n"," ' grea',\n"," ' great',\n"," ' gu',\n"," ' guy',\n"," ' guys',\n"," ' h',\n"," ' ha',\n"," ' ha ',\n"," ' hai',\n"," ' han',\n"," ' hang',\n"," ' hang ',\n"," ' hap',\n"," ' happ',\n"," ' happy',\n"," ' har',\n"," ' hard',\n"," ' hard ',\n"," ' hat',\n"," ' hate',\n"," ' hater',\n"," ' he',\n"," ' hea',\n"," ' heav',\n"," ' heavy',\n"," ' her',\n"," ' here',\n"," ' here ',\n"," ' here.',\n"," ' hero',\n"," ' hero ',\n"," ' hi',\n"," ' hig',\n"," ' high',\n"," ' hin',\n"," ' hind',\n"," ' hindi',\n"," ' his',\n"," ' hist',\n"," ' histo',\n"," ' hit',\n"," ' hit ',\n"," ' ho',\n"," ' hol',\n"," ' holl',\n"," ' holly',\n"," ' hop',\n"," ' hou',\n"," ' hour',\n"," ' hr',\n"," ' hri',\n"," ' hrid',\n"," ' hrida',\n"," ' hu',\n"," ' hy',\n"," ' i',\n"," ' i ',\n"," \" i'\",\n"," \" i'm\",\n"," \" i'm \",\n"," ' ia',\n"," ' iam',\n"," ' iam ',\n"," ' ic',\n"," ' ich',\n"," ' icha',\n"," ' ichay',\n"," ' id',\n"," ' ida',\n"," ' idak',\n"," ' idakk',\n"," ' idh',\n"," ' idh ',\n"," ' idi',\n"," ' idi ',\n"," ' idu',\n"," ' ij',\n"," ' ijj',\n"," ' ijja',\n"," ' ijjat',\n"," ' ik',\n"," ' ikk',\n"," ' ikka',\n"," ' ikka ',\n"," ' ikka.',\n"," ' ikkaa',\n"," ' ikkad',\n"," ' ikkan',\n"," ' ikkay',\n"," ...]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EDDXkAUI1DF1","executionInfo":{"status":"ok","timestamp":1624682789931,"user_tz":-330,"elapsed":17,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"9999a616-7036-4ebf-e658-3faa21fef2d9"},"source":["tfidf_train.shape"],"execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(15888, 30000)"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"szYsgWh4UFOy","executionInfo":{"status":"ok","timestamp":1624682789932,"user_tz":-330,"elapsed":8,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["x_train = tfidf_train\n","x_test = tfidf_val\n","y_train = y_train_index\n","y_test = y_val_index"],"execution_count":13,"outputs":[]},{"cell_type":"code","metadata":{"id":"QbypHNhYSMDy","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1624684748205,"user_tz":-330,"elapsed":1958280,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"c480092c-c0bb-4ce3-c750-71aff0662557"},"source":[" from sklearn import metrics\n"," from sklearn.svm import SVC\n"," model_SVM = SVC(probability=True)\n"," model_SVM.fit(x_train, y_train)\n"," y_pred_SVM = model_SVM.predict(x_test)\n"," print(\"SVM\")\n"," print(metrics.classification_report(y_test, y_pred_SVM))\n"," \n"," from sklearn.ensemble import RandomForestClassifier\n"," rf = RandomForestClassifier()\n"," rf.fit(x_train,y_train)\n"," y_pred_rf = rf.predict(x_test)\n"," print(\"random\")\n"," print(metrics.classification_report(y_test, y_pred_rf))\n"," \n"," from sklearn.linear_model import LogisticRegression\n"," LR = LogisticRegression()\n"," LR.fit(x_train,y_train)\n"," y_pred_LR = LR.predict(x_test)\n"," print(\"Logistic Regression\")\n"," print(metrics.classification_report(y_test, y_pred_LR ))\n"," \n"," from sklearn.neighbors import KNeighborsClassifier\n"," neigh = KNeighborsClassifier()\n"," neigh.fit(x_train,y_train)\n"," y_pred_KNN = neigh.predict(x_test)\n"," print(\"KNN\")\n"," print(metrics.classification_report(y_test, y_pred_KNN ))\n"," \n"," from sklearn.naive_bayes import GaussianNB\n"," naive = GaussianNB()\n"," naive.fit(x_train.toarray(),y_train)\n"," y_pred_naive = naive.predict(x_test.toarray())\n"," print(\"Naive Bayes\")\n"," print(metrics.classification_report(y_test, y_pred_naive ))\n"," \n"," from sklearn.ensemble import GradientBoostingClassifier\n"," gradient = GradientBoostingClassifier()\n"," gradient.fit(x_train,y_train)\n"," y_pred_gradient = gradient.predict(x_test)\n"," print(\"Gradient Boosting\")\n"," print(metrics.classification_report(y_test, y_pred_gradient ))\n"," \n"," from sklearn.tree import DecisionTreeClassifier\n"," decision = DecisionTreeClassifier()\n"," decision.fit(x_train,y_train)\n"," y_pred_decision = decision.predict(x_test)\n"," print(\"Decision Tree\")\n"," print(metrics.classification_report(y_test, y_pred_decision ))\n","\n"," from sklearn.ensemble import AdaBoostClassifier\n"," AdaBoostclf = AdaBoostClassifier()\n"," AdaBoostclf.fit(x_train,y_train)\n"," y_pred_AdaBoostclf = AdaBoostclf.predict(x_test)\n"," print(\"AdaBoost classifier\")\n"," print(metrics.classification_report(y_test, y_pred_AdaBoostclf ))\n"," "],"execution_count":14,"outputs":[{"output_type":"stream","text":["SVM\n"," precision recall f1-score support\n","\n"," 0 0.73 0.24 0.36 102\n"," 1 0.78 0.54 0.64 237\n"," 2 0.74 0.87 0.80 706\n"," 3 0.83 0.79 0.81 141\n"," 4 0.74 0.77 0.75 580\n","\n"," accuracy 0.75 1766\n"," macro avg 0.76 0.64 0.67 1766\n","weighted avg 0.75 0.75 0.74 1766\n","\n","random\n"," precision recall f1-score support\n","\n"," 0 0.70 0.14 0.23 102\n"," 1 0.83 0.29 0.43 237\n"," 2 0.65 0.88 0.74 706\n"," 3 0.80 0.67 0.73 141\n"," 4 0.71 0.72 0.71 580\n","\n"," accuracy 0.69 1766\n"," macro avg 0.74 0.54 0.57 1766\n","weighted avg 0.71 0.69 0.66 1766\n","\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n","STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n","\n","Increase the number of iterations (max_iter) or scale the data as shown in:\n"," https://scikit-learn.org/stable/modules/preprocessing.html\n","Please also refer to the documentation for alternative solver options:\n"," https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n"," extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"],"name":"stderr"},{"output_type":"stream","text":["Logistic Regression\n"," precision recall f1-score support\n","\n"," 0 0.64 0.26 0.37 102\n"," 1 0.77 0.51 0.61 237\n"," 2 0.74 0.86 0.80 706\n"," 3 0.84 0.76 0.80 141\n"," 4 0.72 0.77 0.74 580\n","\n"," accuracy 0.74 1766\n"," macro avg 0.74 0.63 0.66 1766\n","weighted avg 0.74 0.74 0.73 1766\n","\n","KNN\n"," precision recall f1-score support\n","\n"," 0 0.46 0.29 0.36 102\n"," 1 0.48 0.49 0.48 237\n"," 2 0.63 0.79 0.70 706\n"," 3 0.65 0.62 0.64 141\n"," 4 0.67 0.53 0.59 580\n","\n"," accuracy 0.62 1766\n"," macro avg 0.58 0.54 0.55 1766\n","weighted avg 0.62 0.62 0.61 1766\n","\n","Naive Bayes\n"," precision recall f1-score support\n","\n"," 0 0.21 0.28 0.24 102\n"," 1 0.28 0.58 0.38 237\n"," 2 0.70 0.62 0.66 706\n"," 3 0.68 0.61 0.64 141\n"," 4 0.62 0.42 0.50 580\n","\n"," accuracy 0.53 1766\n"," macro avg 0.50 0.50 0.48 1766\n","weighted avg 0.59 0.53 0.54 1766\n","\n","Gradient Boosting\n"," precision recall f1-score support\n","\n"," 0 0.67 0.16 0.25 102\n"," 1 0.81 0.37 0.51 237\n"," 2 0.63 0.89 0.74 706\n"," 3 0.82 0.66 0.73 141\n"," 4 0.74 0.67 0.70 580\n","\n"," accuracy 0.69 1766\n"," macro avg 0.73 0.55 0.59 1766\n","weighted avg 0.71 0.69 0.67 1766\n","\n","Decision Tree\n"," precision recall f1-score support\n","\n"," 0 0.22 0.21 0.21 102\n"," 1 0.41 0.38 0.40 237\n"," 2 0.63 0.65 0.64 706\n"," 3 0.59 0.56 0.58 141\n"," 4 0.59 0.60 0.60 580\n","\n"," accuracy 0.56 1766\n"," macro avg 0.49 0.48 0.48 1766\n","weighted avg 0.56 0.56 0.56 1766\n","\n","AdaBoost classifier\n"," precision recall f1-score support\n","\n"," 0 0.34 0.10 0.15 102\n"," 1 0.81 0.18 0.29 237\n"," 2 0.53 0.50 0.52 706\n"," 3 0.61 0.61 0.61 141\n"," 4 0.45 0.68 0.54 580\n","\n"," accuracy 0.50 1766\n"," macro avg 0.55 0.41 0.42 1766\n","weighted avg 0.54 0.50 0.48 1766\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"dutLuW-7NitW","executionInfo":{"status":"ok","timestamp":1624684748208,"user_tz":-330,"elapsed":29,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":[""],"execution_count":14,"outputs":[]},{"cell_type":"code","metadata":{"id":"uSFmluVXNkL2","executionInfo":{"status":"ok","timestamp":1624684748209,"user_tz":-330,"elapsed":21,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":[""],"execution_count":14,"outputs":[]},{"cell_type":"code","metadata":{"id":"aKJdPx2jNkO3"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"XbQ1Hm3TNkSP"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"bQANqfdsf0i6","executionInfo":{"status":"ok","timestamp":1624684807665,"user_tz":-330,"elapsed":1105,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["tfidf_test = tfidfvec.transform(data_test['text'])"],"execution_count":15,"outputs":[]},{"cell_type":"code","metadata":{"id":"RBDDmfD_URTa","executionInfo":{"status":"ok","timestamp":1624684837824,"user_tz":-330,"elapsed":29475,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["y_pred_model_SVM_prob = model_SVM.predict_proba(tfidf_test)"],"execution_count":16,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7VQ3hfA7gECM","executionInfo":{"status":"ok","timestamp":1624684837826,"user_tz":-330,"elapsed":12,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"d72bbc7b-ff46-4285-a82f-5b670495cf1b"},"source":["y_pred_model_SVM_prob"],"execution_count":17,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[5.02168957e-01, 8.42647481e-02, 1.51468772e-01, 4.81105332e-02,\n"," 2.13986990e-01],\n"," [4.69749344e-02, 1.37106408e-01, 3.08146152e-01, 2.19189985e-03,\n"," 5.05580606e-01],\n"," [5.17165730e-02, 1.39418025e-01, 4.16065412e-01, 1.71490637e-02,\n"," 3.75650926e-01],\n"," ...,\n"," [9.95890693e-02, 1.62487191e-01, 4.38871624e-01, 1.20002178e-03,\n"," 2.97852093e-01],\n"," [9.88383717e-02, 4.00563448e-01, 3.18753952e-01, 6.64705865e-04,\n"," 1.81179523e-01],\n"," [2.13842768e-01, 2.47631509e-01, 4.29130849e-01, 4.49199039e-04,\n"," 1.08945674e-01]])"]},"metadata":{"tags":[]},"execution_count":17}]},{"cell_type":"code","metadata":{"id":"i_feFgnOiZM0","executionInfo":{"status":"ok","timestamp":1624684977869,"user_tz":-330,"elapsed":615,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_model_SVM_prob= pd.DataFrame(y_pred_model_SVM_prob, columns = ['Mixed_feelings' ,'Negative' ,'Positive' ,'not-malayalam' ,'unknown_state'])"],"execution_count":18,"outputs":[]},{"cell_type":"code","metadata":{"id":"PujUzf51iZOG","executionInfo":{"status":"ok","timestamp":1624684979229,"user_tz":-330,"elapsed":696,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_model_SVM_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/SVM_char_1-6_gram_Malayalam_74.csv',index = False)"],"execution_count":19,"outputs":[]},{"cell_type":"code","metadata":{"id":"k-XiNgH_i1Md","executionInfo":{"status":"ok","timestamp":1624684981037,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["y_pred_LR_prob = LR.predict_proba(tfidf_test)"],"execution_count":20,"outputs":[]},{"cell_type":"code","metadata":{"id":"pS6nkrMbi1P_","executionInfo":{"status":"ok","timestamp":1624684982931,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_LR_prob= pd.DataFrame(y_pred_LR_prob, columns = ['Mixed_feelings' ,'Negative' ,'Positive' ,'not-malayalam' ,'unknown_state'])"],"execution_count":21,"outputs":[]},{"cell_type":"code","metadata":{"id":"ru0vsVTsiZT3","executionInfo":{"status":"ok","timestamp":1624684984209,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_LR_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/LR_char_1-6_gram_Malayalam_73.csv', index = False)"],"execution_count":22,"outputs":[]},{"cell_type":"code","metadata":{"id":"7vXANa-i7HFG"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"OYQGvHNYnxGT"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Ig9Rc2Lrny6y"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/ML_Classifier_Tamil_Sentiment (1).ipynb b/ML_Classifier_Tamil_Sentiment (1).ipynb new file mode 100644 index 0000000..77dad5c --- /dev/null +++ b/ML_Classifier_Tamil_Sentiment (1).ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ML_Classifier_Tamil_Sentiment.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wXKoRJGIBNuM","executionInfo":{"status":"ok","timestamp":1624713991382,"user_tz":-330,"elapsed":25886,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"873c6438-eb14-48fa-b826-e400b25a394f"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"57R5J3_1OfiA","executionInfo":{"status":"ok","timestamp":1624713993052,"user_tz":-330,"elapsed":1674,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_train.tsv', sep = '\\t')\n","data_dev = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_test_withoutlabels.tsv', sep = '\\t')\n"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"SNMFlX4j7BMK","executionInfo":{"status":"ok","timestamp":1624713993052,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"5012f734-02a9-4138-d8d8-99674d323153"},"source":["data_train['category'].value_counts()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 20069\n","unknown_state 5628\n","Negative 4271\n","Mixed_feelings 4020\n","not-Tamil 1667\n","Positive 1\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"MJkeYXof7BNp","executionInfo":{"status":"ok","timestamp":1624713993501,"user_tz":-330,"elapsed":9,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["data_train['category'] = data_train['category'].replace('Positive ', 'Positive')"],"execution_count":4,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"DsGfMROx7BPv","executionInfo":{"status":"ok","timestamp":1624713993501,"user_tz":-330,"elapsed":9,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"d9d21b2f-935d-41e8-ae3a-5a5ec32a8665"},"source":["data_train['category'].value_counts()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 20070\n","unknown_state 5628\n","Negative 4271\n","Mixed_feelings 4020\n","not-Tamil 1667\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"yt4mzstX7BSt","executionInfo":{"status":"ok","timestamp":1624713993502,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"0015582d-6c3d-4a1d-a505-86f4d132e1e9"},"source":["data_dev['category'].value_counts()"],"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 2257\n","unknown_state 611\n","Negative 480\n","Mixed_feelings 438\n","not-Tamil 176\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cuV4OjaH7BUu","executionInfo":{"status":"ok","timestamp":1624713993941,"user_tz":-330,"elapsed":444,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"73474deb-c152-40ed-8a10-290e801a0293"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_dev_index = le.transform(data_dev['category'])"],"execution_count":7,"outputs":[{"output_type":"stream","text":["['Mixed_feelings' 'Negative' 'Positive' 'not-Tamil' 'unknown_state']\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tMXviBypwXb0","executionInfo":{"status":"ok","timestamp":1624713993941,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"6e584401-65d6-454a-ba9a-6724be297fa2"},"source":["data_train.shape, data_dev.shape, data_test.shape"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((35656, 2), (3962, 2), (4402, 2))"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"mY8Wqps9WAMq"},"source":["For word = ngram_range (1,3)\n","For char = ngram_range (1,6)"]},{"cell_type":"code","metadata":{"id":"jqicHV-qRH4V","executionInfo":{"status":"ok","timestamp":1624714004512,"user_tz":-330,"elapsed":10573,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["from sklearn.feature_extraction.text import TfidfVectorizer\n","tfidfvec = TfidfVectorizer(ngram_range=(1,6), analyzer = 'char_wb', max_features = 15000)\n","#tfidfvec = TfidfVectorizer(ngram_range=(1,3), max_features = 30000)\n","tfidf_train = tfidfvec.fit_transform(data_train['text'])\n","tfidf_val = tfidfvec.transform(data_dev['text'])"],"execution_count":9,"outputs":[]},{"cell_type":"code","metadata":{"id":"iV6FzGg_0leL","executionInfo":{"status":"ok","timestamp":1624714004512,"user_tz":-330,"elapsed":8,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["#tfidfvec.get_feature_names()"],"execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EDDXkAUI1DF1","executionInfo":{"status":"ok","timestamp":1624714004513,"user_tz":-330,"elapsed":8,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"ad16a88d-c351-4aab-8b3c-c40eeac71095"},"source":["tfidf_train.shape"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(35656, 15000)"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"szYsgWh4UFOy","executionInfo":{"status":"ok","timestamp":1624714004513,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["x_train = tfidf_train\n","x_test = tfidf_val\n","y_train = y_train_index\n","y_test = y_dev_index"],"execution_count":12,"outputs":[]},{"cell_type":"code","metadata":{"id":"jaY3wC5IUFSI","executionInfo":{"status":"ok","timestamp":1624714004513,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":[""],"execution_count":12,"outputs":[]},{"cell_type":"code","metadata":{"id":"QbypHNhYSMDy","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1624726693021,"user_tz":-330,"elapsed":12688514,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"d226681b-d734-40d4-f88b-966b0ecc94bf"},"source":[" from sklearn import metrics\n"," from sklearn.svm import SVC\n"," model_SVM = SVC(probability=True)\n"," model_SVM.fit(x_train, y_train)\n"," y_pred_SVM = model_SVM.predict(x_test)\n"," print(\"SVM\")\n"," print(metrics.classification_report(y_test, y_pred_SVM))\n"," \n"," from sklearn.ensemble import RandomForestClassifier\n"," rf = RandomForestClassifier()\n"," rf.fit(x_train,y_train)\n"," y_pred_rf = rf.predict(x_test)\n"," print(\"random\")\n"," print(metrics.classification_report(y_test, y_pred_rf))\n"," \n"," from sklearn.linear_model import LogisticRegression\n"," LR = LogisticRegression()\n"," LR.fit(x_train,y_train)\n"," y_pred_LR = LR.predict(x_test)\n"," print(\"Logistic Regression\")\n"," print(metrics.classification_report(y_test, y_pred_LR ))\n"," \n"," from sklearn.neighbors import KNeighborsClassifier\n"," neigh = KNeighborsClassifier()\n"," neigh.fit(x_train,y_train)\n"," y_pred_KNN = neigh.predict(x_test)\n"," print(\"KNN\")\n"," print(metrics.classification_report(y_test, y_pred_KNN ))\n"," \n"," from sklearn.naive_bayes import GaussianNB\n"," naive = GaussianNB()\n"," naive.fit(x_train.toarray(),y_train)\n"," y_pred_naive = naive.predict(x_test.toarray())\n"," print(\"Naive Bayes\")\n"," print(metrics.classification_report(y_test, y_pred_naive ))\n"," \n"," from sklearn.ensemble import GradientBoostingClassifier\n"," gradient = GradientBoostingClassifier()\n"," gradient.fit(x_train,y_train)\n"," y_pred_gradient = gradient.predict(x_test)\n"," print(\"Gradient Boosting\")\n"," print(metrics.classification_report(y_test, y_pred_gradient ))\n"," \n"," from sklearn.tree import DecisionTreeClassifier\n"," decision = DecisionTreeClassifier()\n"," decision.fit(x_train,y_train)\n"," y_pred_decision = decision.predict(x_test)\n"," print(\"Decision Tree\")\n"," print(metrics.classification_report(y_test, y_pred_decision ))\n","\n"," from sklearn.ensemble import AdaBoostClassifier\n"," AdaBoostclf = AdaBoostClassifier()\n"," AdaBoostclf.fit(x_train,y_train)\n"," y_pred_AdaBoostclf = AdaBoostclf.predict(x_test)\n"," print(\"AdaBoost classifier\")\n"," print(metrics.classification_report(y_test, y_pred_AdaBoostclf ))\n"," "],"execution_count":13,"outputs":[{"output_type":"stream","text":["SVM\n"," precision recall f1-score support\n","\n"," 0 0.41 0.10 0.16 438\n"," 1 0.55 0.28 0.37 480\n"," 2 0.67 0.93 0.78 2257\n"," 3 0.81 0.49 0.61 176\n"," 4 0.56 0.33 0.42 611\n","\n"," accuracy 0.65 3962\n"," macro avg 0.60 0.43 0.47 3962\n","weighted avg 0.62 0.65 0.60 3962\n","\n","random\n"," precision recall f1-score support\n","\n"," 0 0.48 0.08 0.14 438\n"," 1 0.59 0.16 0.25 480\n"," 2 0.64 0.96 0.77 2257\n"," 3 0.81 0.39 0.53 176\n"," 4 0.62 0.28 0.38 611\n","\n"," accuracy 0.64 3962\n"," macro avg 0.63 0.37 0.41 3962\n","weighted avg 0.62 0.64 0.57 3962\n","\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n","STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n","\n","Increase the number of iterations (max_iter) or scale the data as shown in:\n"," https://scikit-learn.org/stable/modules/preprocessing.html\n","Please also refer to the documentation for alternative solver options:\n"," https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n"," extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"],"name":"stderr"},{"output_type":"stream","text":["Logistic Regression\n"," precision recall f1-score support\n","\n"," 0 0.35 0.13 0.19 438\n"," 1 0.52 0.32 0.40 480\n"," 2 0.69 0.89 0.78 2257\n"," 3 0.70 0.52 0.60 176\n"," 4 0.47 0.36 0.41 611\n","\n"," accuracy 0.64 3962\n"," macro avg 0.55 0.45 0.47 3962\n","weighted avg 0.60 0.64 0.60 3962\n","\n","KNN\n"," precision recall f1-score support\n","\n"," 0 0.23 0.33 0.27 438\n"," 1 0.42 0.24 0.31 480\n"," 2 0.71 0.79 0.75 2257\n"," 3 0.70 0.48 0.57 176\n"," 4 0.43 0.30 0.36 611\n","\n"," accuracy 0.58 3962\n"," macro avg 0.50 0.43 0.45 3962\n","weighted avg 0.58 0.58 0.57 3962\n","\n","Naive Bayes\n"," precision recall f1-score support\n","\n"," 0 0.19 0.14 0.16 438\n"," 1 0.22 0.66 0.33 480\n"," 2 0.84 0.30 0.45 2257\n"," 3 0.16 0.78 0.27 176\n"," 4 0.34 0.28 0.31 611\n","\n"," accuracy 0.35 3962\n"," macro avg 0.35 0.43 0.30 3962\n","weighted avg 0.59 0.35 0.37 3962\n","\n","Gradient Boosting\n"," precision recall f1-score support\n","\n"," 0 0.43 0.10 0.16 438\n"," 1 0.59 0.20 0.30 480\n"," 2 0.64 0.95 0.77 2257\n"," 3 0.75 0.42 0.54 176\n"," 4 0.56 0.23 0.32 611\n","\n"," accuracy 0.63 3962\n"," macro avg 0.59 0.38 0.42 3962\n","weighted avg 0.60 0.63 0.56 3962\n","\n","Decision Tree\n"," precision recall f1-score support\n","\n"," 0 0.19 0.19 0.19 438\n"," 1 0.25 0.23 0.24 480\n"," 2 0.68 0.68 0.68 2257\n"," 3 0.41 0.39 0.40 176\n"," 4 0.32 0.35 0.33 611\n","\n"," accuracy 0.51 3962\n"," macro avg 0.37 0.37 0.37 3962\n","weighted avg 0.51 0.51 0.51 3962\n","\n","AdaBoost classifier\n"," precision recall f1-score support\n","\n"," 0 0.12 0.00 0.00 438\n"," 1 0.35 0.16 0.22 480\n"," 2 0.62 0.91 0.74 2257\n"," 3 0.53 0.41 0.46 176\n"," 4 0.43 0.17 0.24 611\n","\n"," accuracy 0.59 3962\n"," macro avg 0.41 0.33 0.33 3962\n","weighted avg 0.50 0.59 0.50 3962\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"J9ZwiyNu7f68"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"gu1dEWtV7hGe"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"n3yfQ5gK7hIv"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"LXrTzA5b7hJu"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"lXJo2xDN7hL_"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"QPT8OERF7hNS"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"pqnwcmm07hPD"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"3w33K6ut7hQp"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"plDF26KA7hUK"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"bQANqfdsf0i6","executionInfo":{"status":"ok","timestamp":1624726842779,"user_tz":-330,"elapsed":1265,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["tfidf_test = tfidfvec.transform(data_test['text'])"],"execution_count":14,"outputs":[]},{"cell_type":"code","metadata":{"id":"RBDDmfD_URTa","executionInfo":{"status":"ok","timestamp":1624726967915,"user_tz":-330,"elapsed":123645,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["y_pred_model_SVM_prob = model_SVM.predict_proba(tfidf_test)"],"execution_count":15,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7VQ3hfA7gECM","executionInfo":{"status":"ok","timestamp":1624726967919,"user_tz":-330,"elapsed":77,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"f7032a84-e789-4bb7-d2e2-ba05296fbaac"},"source":["y_pred_model_SVM_prob"],"execution_count":16,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[0.03102559, 0.00748961, 0.89722316, 0.00369449, 0.06056715],\n"," [0.12136104, 0.48912205, 0.31628225, 0.01132158, 0.06191308],\n"," [0.02553129, 0.04740874, 0.84037752, 0.00242552, 0.08425693],\n"," ...,\n"," [0.06049699, 0.04710759, 0.87798557, 0.00254955, 0.01186029],\n"," [0.2697956 , 0.07287886, 0.56514433, 0.00699149, 0.08518972],\n"," [0.10483392, 0.03613657, 0.80593112, 0.0055734 , 0.04752499]])"]},"metadata":{"tags":[]},"execution_count":16}]},{"cell_type":"code","metadata":{"id":"i_feFgnOiZM0","executionInfo":{"status":"ok","timestamp":1624726967921,"user_tz":-330,"elapsed":52,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["df_y_pred_model_SVM_prob= pd.DataFrame(y_pred_model_SVM_prob, columns = ['Mixed_feelings', 'Negative', 'Positive' ,'not-Tamil', 'unknown_state'])"],"execution_count":17,"outputs":[]},{"cell_type":"code","metadata":{"id":"PujUzf51iZOG","executionInfo":{"status":"ok","timestamp":1624727147819,"user_tz":-330,"elapsed":553,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["df_y_pred_model_SVM_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/SVM_char_1-6_gram_TAMIL_60.csv',index = False)"],"execution_count":21,"outputs":[]},{"cell_type":"code","metadata":{"id":"k-XiNgH_i1Md","executionInfo":{"status":"ok","timestamp":1624727153865,"user_tz":-330,"elapsed":683,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["y_pred_LR_prob = LR.predict_proba(tfidf_test)"],"execution_count":22,"outputs":[]},{"cell_type":"code","metadata":{"id":"pS6nkrMbi1P_","executionInfo":{"status":"ok","timestamp":1624727165632,"user_tz":-330,"elapsed":427,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["df_y_pred_LR_prob= pd.DataFrame(y_pred_LR_prob, columns = ['Mixed_feelings', 'Negative', 'Positive' ,'not-Tamil', 'unknown_state'])"],"execution_count":24,"outputs":[]},{"cell_type":"code","metadata":{"id":"ru0vsVTsiZT3","executionInfo":{"status":"ok","timestamp":1624727184840,"user_tz":-330,"elapsed":463,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["df_y_pred_LR_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/LR_char_1-6_gram_TAMIL_60.csv', index = False)"],"execution_count":25,"outputs":[]},{"cell_type":"code","metadata":{"id":"7vXANa-i7HFG"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file