diff --git a/Ensemble_SVM_LR_Malayalam.ipynb b/Ensemble_SVM_LR_Malayalam.ipynb new file mode 100644 index 0000000..84860b3 --- /dev/null +++ b/Ensemble_SVM_LR_Malayalam.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Ensemble_SVM_LR_Malayalam.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NmxNxMi5TECW","executionInfo":{"status":"ok","timestamp":1624685064818,"user_tz":-330,"elapsed":20322,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"73bc084c-c68a-44c8-e1da-4730b454edbd"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"7akUl9KLBilu"},"source":["import pandas as pd\n","SVM = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/SVM_char_1-6_gram_Malayalam_74.csv')\n","LR = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/LR_char_1-6_gram_Malayalam_73.csv')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"giZvKKReCGFd"},"source":["predictions_test_submission=(SVM+LR)/2"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"pwxeTE5ZC6y3","executionInfo":{"status":"ok","timestamp":1624685124940,"user_tz":-330,"elapsed":9,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"c606c89a-6a0f-47dc-bb4c-2f2f6127e003"},"source":["predictions_test_submission"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Mixed_feelingsNegativePositivenot-malayalamunknown_state
00.3823890.1032090.1869920.0808960.246514
10.0631250.1495190.3073310.0080700.471956
20.0620190.1457680.3949810.0368230.360409
30.0022110.0035880.0156160.8915080.087077
40.0055250.0065910.9577150.0063890.023780
..................
19570.0157210.2813890.3893910.0002670.313233
19580.0082930.0150680.0343170.0111540.931168
19590.1027010.1673980.4327410.0036430.293517
19600.1016950.3449800.3189640.0021000.232261
19610.1915140.2624730.4007890.0030910.142132
\n","

1962 rows × 5 columns

\n","
"],"text/plain":[" Mixed_feelings Negative Positive not-malayalam unknown_state\n","0 0.382389 0.103209 0.186992 0.080896 0.246514\n","1 0.063125 0.149519 0.307331 0.008070 0.471956\n","2 0.062019 0.145768 0.394981 0.036823 0.360409\n","3 0.002211 0.003588 0.015616 0.891508 0.087077\n","4 0.005525 0.006591 0.957715 0.006389 0.023780\n","... ... ... ... ... ...\n","1957 0.015721 0.281389 0.389391 0.000267 0.313233\n","1958 0.008293 0.015068 0.034317 0.011154 0.931168\n","1959 0.102701 0.167398 0.432741 0.003643 0.293517\n","1960 0.101695 0.344980 0.318964 0.002100 0.232261\n","1961 0.191514 0.262473 0.400789 0.003091 0.142132\n","\n","[1962 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"yx6tspZ0CJsv"},"source":["import numpy as np\n","predictions_test_submission = np.asarray(predictions_test_submission)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"yhvtEGcuCMKO"},"source":["import numpy as np\n","predictions_test_submission1 = np.zeros_like(predictions_test_submission)\n","predictions_test_submission1[np.arange(len(predictions_test_submission)), predictions_test_submission.argmax(1)] = 1"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UFz5rdWVDil_","executionInfo":{"status":"ok","timestamp":1624685131170,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"64d670ec-cb71-45a5-aa4b-4973f4ec7dc9"},"source":["predictions_test_submission1"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[1., 0., 0., 0., 0.],\n"," [0., 0., 0., 0., 1.],\n"," [0., 0., 1., 0., 0.],\n"," ...,\n"," [0., 0., 1., 0., 0.],\n"," [0., 1., 0., 0., 0.],\n"," [0., 0., 1., 0., 0.]])"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"EcEP5VhyDktU"},"source":["predictions_test_submission1_final = np.argmax(predictions_test_submission1, axis=1)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"z_-UdPnAFOfo","executionInfo":{"status":"ok","timestamp":1624685133667,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"02518122-bb6f-4902-fb96-ee8a39d1f3e1"},"source":["predictions_test_submission1_final"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([0, 4, 2, ..., 2, 1, 2])"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"id":"yj7AcwqbmTKP"},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_train.tsv', sep = '\\t')\n","data_val = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_test_withoutlabels.tsv', sep = '\\t')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZnuYD4aqTkWk","executionInfo":{"status":"ok","timestamp":1624685178312,"user_tz":-330,"elapsed":658,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"7dd716ae-503c-42c0-8096-0691658b4f7b"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_val_index = le.transform(data_val['category'])"],"execution_count":null,"outputs":[{"output_type":"stream","text":["['Mixed_feelings' 'Negative' 'Positive' 'not-malayalam' 'unknown_state']\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o5T7yAcFFpfm"},"source":["predictions_test_submission1_final1 = le.inverse_transform(predictions_test_submission1_final)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PSSBXvkTFze-","executionInfo":{"status":"ok","timestamp":1624685181535,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"58cfe4a7-98fc-453d-afe6-a23c63270ad4"},"source":["predictions_test_submission1_final1"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['Mixed_feelings', 'unknown_state', 'Positive', ..., 'Positive',\n"," 'Negative', 'Positive'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"id":"otag6te-F1It"},"source":["data_test['category'] = predictions_test_submission1_final1"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"5ozZTPDwGiSv","executionInfo":{"status":"ok","timestamp":1624685186909,"user_tz":-330,"elapsed":6,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"4f1f70e7-af9b-4645-9a2c-ff4336200777"},"source":["data_test"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Mal_1Teaserinu kurach samayamkoodi mathram. Cant wa...Mixed_feelings
1Mal_2അപ്പോൾ കഥയുടെ റൂട്ട് മാറിയല്ലോ...unknown_state
2Mal_3മൂത്തോൻ ട്രൈലെർ Trending List വരാത്തത് എന്താ ?Positive
3Mal_4Nowadays 944k views is considered as 1M views ...not-malayalam
4Mal_5Maass.trailer ennu paranja ithaanU makkale.......Positive
............
1957Mal_1958Expression വച്ചു നോക്കുമ്പോൾ മൂക്കുത്തി കട്ടത്...Positive
1958Mal_1959എനിക്ക് ഒരു 100 like തരുമോunknown_state
1959Mal_1960Nannayi onnu poliyunna lakshnm unde...Thrill m...Positive
1960Mal_1961Athikam eduthu kayatti vekkenda....direction k...Negative
1961Mal_1962Ithupole mooonchiya Oru padam jeevithathil kan...Positive
\n","

1962 rows × 3 columns

\n","
"],"text/plain":[" id ... category\n","0 Mal_1 ... Mixed_feelings\n","1 Mal_2 ... unknown_state\n","2 Mal_3 ... Positive\n","3 Mal_4 ... not-malayalam\n","4 Mal_5 ... Positive\n","... ... ... ...\n","1957 Mal_1958 ... Positive\n","1958 Mal_1959 ... unknown_state\n","1959 Mal_1960 ... Positive\n","1960 Mal_1961 ... Negative\n","1961 Mal_1962 ... Positive\n","\n","[1962 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"id":"Jt6Z80uFGjsD"},"source":["data_test.to_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam//Ensemble_SVM_LR_Malalayam.tsv\", sep=\"\\t\", index=False)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"11fdDMTpG8QS"},"source":["check_submission = pd.read_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam//Ensemble_SVM_LR_Malalayam.tsv\", sep=\"\\t\")"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"uwyh-IqGG8SG","executionInfo":{"status":"ok","timestamp":1624685262383,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"867f7e4a-2308-4e01-b70c-9999e9dcd1be"},"source":["check_submission"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Mal_1Teaserinu kurach samayamkoodi mathram. Cant wa...Mixed_feelings
1Mal_2അപ്പോൾ കഥയുടെ റൂട്ട് മാറിയല്ലോ...unknown_state
2Mal_3മൂത്തോൻ ട്രൈലെർ Trending List വരാത്തത് എന്താ ?Positive
3Mal_4Nowadays 944k views is considered as 1M views ...not-malayalam
4Mal_5Maass.trailer ennu paranja ithaanU makkale.......Positive
............
1957Mal_1958Expression വച്ചു നോക്കുമ്പോൾ മൂക്കുത്തി കട്ടത്...Positive
1958Mal_1959എനിക്ക് ഒരു 100 like തരുമോunknown_state
1959Mal_1960Nannayi onnu poliyunna lakshnm unde...Thrill m...Positive
1960Mal_1961Athikam eduthu kayatti vekkenda....direction k...Negative
1961Mal_1962Ithupole mooonchiya Oru padam jeevithathil kan...Positive
\n","

1962 rows × 3 columns

\n","
"],"text/plain":[" id ... category\n","0 Mal_1 ... Mixed_feelings\n","1 Mal_2 ... unknown_state\n","2 Mal_3 ... Positive\n","3 Mal_4 ... not-malayalam\n","4 Mal_5 ... Positive\n","... ... ... ...\n","1957 Mal_1958 ... Positive\n","1958 Mal_1959 ... unknown_state\n","1959 Mal_1960 ... Positive\n","1960 Mal_1961 ... Negative\n","1961 Mal_1962 ... Positive\n","\n","[1962 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":18}]},{"cell_type":"code","metadata":{"id":"MIEN8srCnBlP"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/Ensemble_SVM_LR_RF_Kanada.ipynb b/Ensemble_SVM_LR_RF_Kanada.ipynb new file mode 100644 index 0000000..a4cf08a --- /dev/null +++ b/Ensemble_SVM_LR_RF_Kanada.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Ensemble_SVM_LR_RF_Kanada.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NmxNxMi5TECW","executionInfo":{"status":"ok","timestamp":1624643509523,"user_tz":-330,"elapsed":19203,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"6cefe5b4-4a7e-4965-88ff-977f2be88ceb"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"7akUl9KLBilu","executionInfo":{"status":"ok","timestamp":1624643580587,"user_tz":-330,"elapsed":726,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import pandas as pd\n","SVM = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/SVM_char_1-6_gram_kanada_64.csv')\n","RF = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/RF_char_1-6_gram_Kanada_63.csv')\n","LR = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/LR_char_1-6_gram_Kanda_66.csv')"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"id":"giZvKKReCGFd","executionInfo":{"status":"ok","timestamp":1624643580587,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["predictions_test_submission=(SVM+RF+LR)/3"],"execution_count":3,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"pwxeTE5ZC6y3","executionInfo":{"status":"ok","timestamp":1624643581267,"user_tz":-330,"elapsed":18,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"db634182-7af9-4c08-d6dd-193748b4e9ff"},"source":["predictions_test_submission"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Mixed feelingsNegativePositivenot-Kannadaunknown state
00.0774950.1976420.6546230.0045610.065680
10.0238960.0088630.6600680.1832410.123932
20.0668250.0339830.3002890.3361930.262711
30.0047950.0004790.1444800.8396620.010585
40.0370830.0337710.7366470.1618070.030693
..................
7630.0894780.1983190.3388110.2024160.170976
7640.0963020.6196560.2267300.0268540.030458
7650.0285950.1588530.8038930.0010300.007630
7660.1143380.2025690.6567950.0046180.021679
7670.1526700.3122350.4444140.0306630.060018
\n","

768 rows × 5 columns

\n","
"],"text/plain":[" Mixed feelings Negative Positive not-Kannada unknown state\n","0 0.077495 0.197642 0.654623 0.004561 0.065680\n","1 0.023896 0.008863 0.660068 0.183241 0.123932\n","2 0.066825 0.033983 0.300289 0.336193 0.262711\n","3 0.004795 0.000479 0.144480 0.839662 0.010585\n","4 0.037083 0.033771 0.736647 0.161807 0.030693\n",".. ... ... ... ... ...\n","763 0.089478 0.198319 0.338811 0.202416 0.170976\n","764 0.096302 0.619656 0.226730 0.026854 0.030458\n","765 0.028595 0.158853 0.803893 0.001030 0.007630\n","766 0.114338 0.202569 0.656795 0.004618 0.021679\n","767 0.152670 0.312235 0.444414 0.030663 0.060018\n","\n","[768 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"yx6tspZ0CJsv","executionInfo":{"status":"ok","timestamp":1624643585135,"user_tz":-330,"elapsed":2,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import numpy as np\n","predictions_test_submission = np.asarray(predictions_test_submission)"],"execution_count":5,"outputs":[]},{"cell_type":"code","metadata":{"id":"yhvtEGcuCMKO","executionInfo":{"status":"ok","timestamp":1624643585594,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import numpy as np\n","predictions_test_submission1 = np.zeros_like(predictions_test_submission)\n","predictions_test_submission1[np.arange(len(predictions_test_submission)), predictions_test_submission.argmax(1)] = 1"],"execution_count":6,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UFz5rdWVDil_","executionInfo":{"status":"ok","timestamp":1624643585595,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"61c29499-5542-43ca-89d3-b854d76c7c26"},"source":["predictions_test_submission1"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[0., 0., 1., 0., 0.],\n"," [0., 0., 1., 0., 0.],\n"," [0., 0., 0., 1., 0.],\n"," ...,\n"," [0., 0., 1., 0., 0.],\n"," [0., 0., 1., 0., 0.],\n"," [0., 0., 1., 0., 0.]])"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"EcEP5VhyDktU","executionInfo":{"status":"ok","timestamp":1624643587508,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["predictions_test_submission1_final = np.argmax(predictions_test_submission1, axis=1)"],"execution_count":8,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"z_-UdPnAFOfo","executionInfo":{"status":"ok","timestamp":1624643587972,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"b0202367-a8ca-4ed5-9e01-cfea7b07fb7a"},"source":["predictions_test_submission1_final"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([2, 2, 3, 3, 2, 3, 2, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 1, 2, 2, 2,\n"," 2, 2, 3, 2, 3, 2, 0, 1, 2, 3, 3, 4, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2,\n"," 2, 3, 4, 2, 2, 2, 1, 3, 2, 2, 4, 2, 2, 1, 2, 3, 2, 2, 4, 2, 2, 4,\n"," 2, 1, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2,\n"," 2, 3, 1, 1, 2, 2, 1, 2, 1, 3, 2, 4, 3, 1, 2, 1, 4, 4, 2, 1, 1, 2,\n"," 2, 2, 2, 3, 2, 1, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1, 2, 1, 3, 2,\n"," 2, 2, 2, 2, 2, 1, 3, 2, 3, 1, 2, 3, 2, 2, 2, 2, 4, 1, 2, 1, 2, 2,\n"," 2, 1, 2, 0, 4, 2, 2, 1, 1, 3, 4, 2, 4, 2, 2, 1, 1, 1, 2, 2, 2, 2,\n"," 2, 1, 4, 2, 3, 2, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, 2, 4, 0,\n"," 2, 1, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 3, 4,\n"," 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 4, 1, 2, 2, 1, 1, 3, 2, 2, 2, 2,\n"," 2, 2, 2, 2, 2, 2, 1, 1, 1, 3, 1, 4, 2, 2, 2, 4, 1, 2, 3, 2, 4, 4,\n"," 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 2, 3, 2, 2, 2, 3, 2, 1,\n"," 2, 2, 2, 2, 3, 3, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 3, 1, 2, 4, 2, 1,\n"," 1, 2, 4, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 3, 2, 1, 2, 1, 2, 2, 4, 2,\n"," 3, 2, 2, 2, 4, 3, 2, 2, 2, 2, 1, 3, 3, 2, 1, 3, 1, 2, 2, 2, 2, 2,\n"," 2, 2, 4, 2, 1, 2, 2, 3, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 3, 1, 2,\n"," 2, 2, 2, 2, 2, 2, 3, 0, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n"," 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 4, 2, 3, 4, 3, 2,\n"," 2, 2, 2, 3, 2, 3, 2, 2, 3, 3, 2, 1, 3, 4, 2, 2, 3, 2, 1, 4, 1, 2,\n"," 2, 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 3, 2, 3, 4, 2, 2, 2, 1,\n"," 2, 2, 4, 1, 3, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 0, 2, 2, 1,\n"," 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 3, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2,\n"," 1, 2, 2, 2, 2, 2, 2, 3, 2, 4, 2, 1, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n"," 1, 2, 2, 2, 2, 4, 1, 2, 2, 1, 3, 2, 1, 2, 2, 2, 2, 2, 4, 3, 2, 2,\n"," 2, 4, 2, 2, 1, 1, 4, 1, 2, 3, 2, 3, 1, 1, 2, 3, 2, 2, 3, 1, 2, 2,\n"," 1, 2, 2, 2, 2, 1, 2, 2, 3, 3, 1, 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 1,\n"," 2, 3, 2, 2, 3, 2, 1, 4, 2, 3, 2, 2, 3, 2, 0, 3, 2, 2, 3, 3, 2, 3,\n"," 2, 3, 4, 2, 2, 2, 2, 3, 2, 2, 2, 2, 4, 2, 2, 1, 2, 2, 3, 3, 2, 3,\n"," 3, 1, 1, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 3, 4, 2, 2, 2, 2, 1, 2, 1,\n"," 2, 2, 2, 1, 2, 2, 3, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 1, 3,\n"," 2, 3, 2, 3, 2, 2, 3, 1, 2, 3, 2, 2, 4, 2, 3, 4, 2, 2, 2, 2, 2, 2,\n"," 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 3, 2, 2, 1, 2, 1, 1, 2,\n"," 1, 1, 3, 2, 2, 2, 2, 2, 2, 2, 3, 1, 2, 2, 1, 4, 1, 2, 1, 3, 2, 2,\n"," 2, 2, 2, 1, 3, 1, 2, 2, 3, 4, 4, 2, 2, 2, 1, 2, 1, 2, 2, 2])"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"id":"yj7AcwqbmTKP","executionInfo":{"status":"ok","timestamp":1624643635383,"user_tz":-330,"elapsed":426,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_train.tsv', sep = '\\t')\n","data_val = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_test_withoutlabels.tsv', sep = '\\t')"],"execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZnuYD4aqTkWk","executionInfo":{"status":"ok","timestamp":1624643638171,"user_tz":-330,"elapsed":1185,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"768009b9-243a-43f8-e6aa-c8beff8adf8c"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_val_index = le.transform(data_val['category'])\n"],"execution_count":11,"outputs":[{"output_type":"stream","text":["['Mixed feelings' 'Negative' 'Positive' 'not-Kannada' 'unknown state']\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o5T7yAcFFpfm","executionInfo":{"status":"ok","timestamp":1624643649647,"user_tz":-330,"elapsed":490,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["predictions_test_submission1_final1 = le.inverse_transform(predictions_test_submission1_final)"],"execution_count":12,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PSSBXvkTFze-","executionInfo":{"status":"ok","timestamp":1624643651281,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"0eaee799-d6e0-40db-c3f5-32d499664a11"},"source":["predictions_test_submission1_final1"],"execution_count":13,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['Positive', 'Positive', 'not-Kannada', 'not-Kannada', 'Positive',\n"," 'not-Kannada', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Positive', 'not-Kannada', 'Positive', 'Mixed feelings',\n"," 'Negative', 'Positive', 'not-Kannada', 'not-Kannada',\n"," 'unknown state', 'Negative', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'not-Kannada', 'unknown state', 'Positive',\n"," 'Positive', 'Positive', 'Negative', 'not-Kannada', 'Positive',\n"," 'Positive', 'unknown state', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'unknown state',\n"," 'Positive', 'Positive', 'unknown state', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Negative', 'Negative', 'Positive',\n"," 'Positive', 'not-Kannada', 'Negative', 'Negative', 'Positive',\n"," 'Positive', 'Negative', 'Positive', 'Negative', 'not-Kannada',\n"," 'Positive', 'unknown state', 'not-Kannada', 'Negative', 'Positive',\n"," 'Negative', 'unknown state', 'unknown state', 'Positive',\n"," 'Negative', 'Negative', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'not-Kannada', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'not-Kannada',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'Negative', 'Positive', 'Negative', 'not-Kannada', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'not-Kannada', 'Positive', 'not-Kannada', 'Negative',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'unknown state', 'Negative', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Positive', 'Negative', 'Positive',\n"," 'Mixed feelings', 'unknown state', 'Positive', 'Positive',\n"," 'Negative', 'Negative', 'not-Kannada', 'unknown state', 'Positive',\n"," 'unknown state', 'Positive', 'Positive', 'Negative', 'Negative',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Negative', 'unknown state', 'Positive', 'not-Kannada',\n"," 'Positive', 'not-Kannada', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'not-Kannada', 'unknown state', 'Positive',\n"," 'unknown state', 'Mixed feelings', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'not-Kannada',\n"," 'not-Kannada', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Positive', 'Negative', 'Positive', 'not-Kannada', 'unknown state',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Negative', 'Positive', 'Negative',\n"," 'Positive', 'unknown state', 'Negative', 'Positive', 'Positive',\n"," 'Negative', 'Negative', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Negative', 'Negative',\n"," 'Negative', 'not-Kannada', 'Negative', 'unknown state', 'Positive',\n"," 'Positive', 'Positive', 'unknown state', 'Negative', 'Positive',\n"," 'not-Kannada', 'Positive', 'unknown state', 'unknown state',\n"," 'Positive', 'Positive', 'Negative', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'Negative', 'Negative', 'Negative', 'Positive',\n"," 'not-Kannada', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'not-Kannada', 'not-Kannada', 'Negative', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'Negative',\n"," 'Positive', 'unknown state', 'Positive', 'Negative', 'Negative',\n"," 'Positive', 'unknown state', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'not-Kannada', 'Positive', 'Negative',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'unknown state',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'unknown state', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Negative', 'not-Kannada', 'not-Kannada', 'Positive',\n"," 'Negative', 'not-Kannada', 'Negative', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'unknown state', 'Positive', 'Negative', 'Positive', 'Positive',\n"," 'not-Kannada', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Negative', 'not-Kannada', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'not-Kannada', 'Mixed feelings', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'not-Kannada', 'Positive', 'Positive', 'Positive', 'unknown state',\n"," 'Positive', 'not-Kannada', 'unknown state', 'not-Kannada',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'not-Kannada',\n"," 'not-Kannada', 'Positive', 'Negative', 'not-Kannada',\n"," 'unknown state', 'Positive', 'Positive', 'not-Kannada', 'Positive',\n"," 'Negative', 'unknown state', 'Negative', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Negative', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'Positive',\n"," 'not-Kannada', 'unknown state', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'unknown state', 'Negative',\n"," 'not-Kannada', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'Mixed feelings',\n"," 'Positive', 'Positive', 'Negative', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Negative', 'Negative', 'Positive',\n"," 'Positive', 'Negative', 'Negative', 'Positive', 'not-Kannada',\n"," 'Positive', 'Positive', 'Negative', 'Negative', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'not-Kannada', 'Positive', 'unknown state',\n"," 'Positive', 'Negative', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Negative', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'unknown state', 'Negative', 'Positive',\n"," 'Positive', 'Negative', 'not-Kannada', 'Positive', 'Negative',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'unknown state', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'unknown state', 'Positive', 'Positive', 'Negative', 'Negative',\n"," 'unknown state', 'Negative', 'Positive', 'not-Kannada', 'Positive',\n"," 'not-Kannada', 'Negative', 'Negative', 'Positive', 'not-Kannada',\n"," 'Positive', 'Positive', 'not-Kannada', 'Negative', 'Positive',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'not-Kannada',\n"," 'not-Kannada', 'Negative', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'Positive',\n"," 'Negative', 'Positive', 'Negative', 'Positive', 'not-Kannada',\n"," 'Positive', 'Positive', 'not-Kannada', 'Positive', 'Negative',\n"," 'unknown state', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'not-Kannada', 'Positive', 'Mixed feelings', 'not-Kannada',\n"," 'Positive', 'Positive', 'not-Kannada', 'not-Kannada', 'Positive',\n"," 'not-Kannada', 'Positive', 'not-Kannada', 'unknown state',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'unknown state',\n"," 'Positive', 'Positive', 'Negative', 'Positive', 'Positive',\n"," 'not-Kannada', 'not-Kannada', 'Positive', 'not-Kannada',\n"," 'not-Kannada', 'Negative', 'Negative', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'not-Kannada', 'unknown state',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'not-Kannada', 'Positive',\n"," 'Negative', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Negative', 'not-Kannada', 'Positive',\n"," 'not-Kannada', 'Positive', 'not-Kannada', 'Positive', 'Positive',\n"," 'not-Kannada', 'Negative', 'Positive', 'not-Kannada', 'Positive',\n"," 'Positive', 'unknown state', 'Positive', 'not-Kannada',\n"," 'unknown state', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',\n"," 'Negative', 'Negative', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Negative', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'not-Kannada', 'Positive', 'Positive', 'Negative',\n"," 'Positive', 'Negative', 'Negative', 'Positive', 'Negative',\n"," 'Negative', 'not-Kannada', 'Positive', 'Positive', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'not-Kannada',\n"," 'Negative', 'Positive', 'Positive', 'Negative', 'unknown state',\n"," 'Negative', 'Positive', 'Negative', 'not-Kannada', 'Positive',\n"," 'Positive', 'Positive', 'Positive', 'Positive', 'Negative',\n"," 'not-Kannada', 'Negative', 'Positive', 'Positive', 'not-Kannada',\n"," 'unknown state', 'unknown state', 'Positive', 'Positive',\n"," 'Positive', 'Negative', 'Positive', 'Negative', 'Positive',\n"," 'Positive', 'Positive'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":13}]},{"cell_type":"code","metadata":{"id":"otag6te-F1It","executionInfo":{"status":"ok","timestamp":1624643669153,"user_tz":-330,"elapsed":657,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["data_test['category'] = predictions_test_submission1_final1"],"execution_count":14,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"5ozZTPDwGiSv","executionInfo":{"status":"ok","timestamp":1624643671972,"user_tz":-330,"elapsed":9,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"dfb7c7d5-5638-4890-a17e-cb4892e618f4"},"source":["data_test"],"execution_count":15,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Kan_1ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ...Positive
1Kan_2Jai D BossPositive
2Kan_3Signature movenot-Kannada
3Kan_4Super song bronot-Kannada
4Kan_5Wow Super agi helidira sirPositive
............
763Kan_764Thu thukali trailerPositive
764Kan_765Siri gannadam galge haakbitallapa Thu yaro ni...Negative
765Kan_766ನಾವೇನಾದರು ಸ್ವಾಬಿಮಾನಿಯಾಗಿ ಬದುಕನ್ನು ಕಟ್ಟಿಕೊಳ್ಳಲು...Positive
766Kan_767ದಿಯಾ ನೀವು ಸೋಷಿಯಲ್ ಮೀಡಿಯಾ ಅಲ್ಲಿ ಕಾಣ್ತಿಲ್ಲ.nದಯವಿ...Positive
767Kan_768magaluru kade kalsi avanannu .navu avanige mad...Positive
\n","

768 rows × 3 columns

\n","
"],"text/plain":[" id text category\n","0 Kan_1 ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ... Positive\n","1 Kan_2 Jai D Boss Positive\n","2 Kan_3 Signature move not-Kannada\n","3 Kan_4 Super song bro not-Kannada\n","4 Kan_5 Wow Super agi helidira sir Positive\n",".. ... ... ...\n","763 Kan_764 Thu thukali trailer Positive\n","764 Kan_765 Siri gannadam galge haakbitallapa Thu yaro ni... Negative\n","765 Kan_766 ನಾವೇನಾದರು ಸ್ವಾಬಿಮಾನಿಯಾಗಿ ಬದುಕನ್ನು ಕಟ್ಟಿಕೊಳ್ಳಲು... Positive\n","766 Kan_767 ದಿಯಾ ನೀವು ಸೋಷಿಯಲ್ ಮೀಡಿಯಾ ಅಲ್ಲಿ ಕಾಣ್ತಿಲ್ಲ.nದಯವಿ... Positive\n","767 Kan_768 magaluru kade kalsi avanannu .navu avanige mad... Positive\n","\n","[768 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"id":"Jt6Z80uFGjsD","executionInfo":{"status":"ok","timestamp":1624643733058,"user_tz":-330,"elapsed":2,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["data_test.to_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/Ensemble_SVM_LR_RF.tsv\", sep=\"\\t\", index=False)"],"execution_count":16,"outputs":[]},{"cell_type":"code","metadata":{"id":"11fdDMTpG8QS","executionInfo":{"status":"ok","timestamp":1624643752467,"user_tz":-330,"elapsed":950,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["check_submission = pd.read_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/Ensemble_SVM_LR_RF.tsv\", sep=\"\\t\")"],"execution_count":17,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"uwyh-IqGG8SG","executionInfo":{"status":"ok","timestamp":1624643754211,"user_tz":-330,"elapsed":6,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"c567c4f7-cf85-4b53-e177-a56ab2f4b33a"},"source":["check_submission"],"execution_count":18,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Kan_1ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ...Positive
1Kan_2Jai D BossPositive
2Kan_3Signature movenot-Kannada
3Kan_4Super song bronot-Kannada
4Kan_5Wow Super agi helidira sirPositive
............
763Kan_764Thu thukali trailerPositive
764Kan_765Siri gannadam galge haakbitallapa Thu yaro ni...Negative
765Kan_766ನಾವೇನಾದರು ಸ್ವಾಬಿಮಾನಿಯಾಗಿ ಬದುಕನ್ನು ಕಟ್ಟಿಕೊಳ್ಳಲು...Positive
766Kan_767ದಿಯಾ ನೀವು ಸೋಷಿಯಲ್ ಮೀಡಿಯಾ ಅಲ್ಲಿ ಕಾಣ್ತಿಲ್ಲ.nದಯವಿ...Positive
767Kan_768magaluru kade kalsi avanannu .navu avanige mad...Positive
\n","

768 rows × 3 columns

\n","
"],"text/plain":[" id text category\n","0 Kan_1 ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ... Positive\n","1 Kan_2 Jai D Boss Positive\n","2 Kan_3 Signature move not-Kannada\n","3 Kan_4 Super song bro not-Kannada\n","4 Kan_5 Wow Super agi helidira sir Positive\n",".. ... ... ...\n","763 Kan_764 Thu thukali trailer Positive\n","764 Kan_765 Siri gannadam galge haakbitallapa Thu yaro ni... Negative\n","765 Kan_766 ನಾವೇನಾದರು ಸ್ವಾಬಿಮಾನಿಯಾಗಿ ಬದುಕನ್ನು ಕಟ್ಟಿಕೊಳ್ಳಲು... Positive\n","766 Kan_767 ದಿಯಾ ನೀವು ಸೋಷಿಯಲ್ ಮೀಡಿಯಾ ಅಲ್ಲಿ ಕಾಣ್ತಿಲ್ಲ.nದಯವಿ... Positive\n","767 Kan_768 magaluru kade kalsi avanannu .navu avanige mad... Positive\n","\n","[768 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":18}]},{"cell_type":"code","metadata":{"id":"MIEN8srCnBlP"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/Ensemble_SVM_LR_TAMIL.ipynb b/Ensemble_SVM_LR_TAMIL.ipynb new file mode 100644 index 0000000..ec846e4 --- /dev/null +++ b/Ensemble_SVM_LR_TAMIL.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Ensemble_SVM_LR_TAMIL.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NmxNxMi5TECW","executionInfo":{"status":"ok","timestamp":1624727247942,"user_tz":-330,"elapsed":32028,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"8b9e58f1-38e9-4a77-d019-376842937dbd"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"7akUl9KLBilu","executionInfo":{"status":"ok","timestamp":1624727303639,"user_tz":-330,"elapsed":1090,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["import pandas as pd\n","SVM = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/SVM_char_1-6_gram_TAMIL_60.csv')\n","LR = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/LR_char_1-6_gram_TAMIL_60.csv')"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"id":"giZvKKReCGFd","executionInfo":{"status":"ok","timestamp":1624727316880,"user_tz":-330,"elapsed":481,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["predictions_test_submission=(SVM+LR)/2"],"execution_count":3,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"pwxeTE5ZC6y3","executionInfo":{"status":"ok","timestamp":1624727317337,"user_tz":-330,"elapsed":8,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"60d3d4aa-3e83-4270-8703-549046f79c21"},"source":["predictions_test_submission"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
Mixed_feelingsNegativePositivenot-Tamilunknown_state
00.0272270.0056860.9228720.0020070.042209
10.1111000.4542580.3581380.0179900.058514
20.0249650.0455580.8540310.0023350.073111
30.1157170.0816070.5685920.1354850.098599
40.0714290.0701230.0805850.6531910.124671
..................
43970.1248220.6527520.1575180.0051160.059792
43980.1150320.0857330.7218800.0087460.068609
43990.0385340.0322520.9176040.0022800.009329
44000.2734350.0768230.5613150.0045620.083865
44010.0988150.0314360.8164310.0048270.048490
\n","

4402 rows × 5 columns

\n","
"],"text/plain":[" Mixed_feelings Negative Positive not-Tamil unknown_state\n","0 0.027227 0.005686 0.922872 0.002007 0.042209\n","1 0.111100 0.454258 0.358138 0.017990 0.058514\n","2 0.024965 0.045558 0.854031 0.002335 0.073111\n","3 0.115717 0.081607 0.568592 0.135485 0.098599\n","4 0.071429 0.070123 0.080585 0.653191 0.124671\n","... ... ... ... ... ...\n","4397 0.124822 0.652752 0.157518 0.005116 0.059792\n","4398 0.115032 0.085733 0.721880 0.008746 0.068609\n","4399 0.038534 0.032252 0.917604 0.002280 0.009329\n","4400 0.273435 0.076823 0.561315 0.004562 0.083865\n","4401 0.098815 0.031436 0.816431 0.004827 0.048490\n","\n","[4402 rows x 5 columns]"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"id":"yx6tspZ0CJsv","executionInfo":{"status":"ok","timestamp":1624727321084,"user_tz":-330,"elapsed":654,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["import numpy as np\n","predictions_test_submission = np.asarray(predictions_test_submission)"],"execution_count":5,"outputs":[]},{"cell_type":"code","metadata":{"id":"yhvtEGcuCMKO","executionInfo":{"status":"ok","timestamp":1624727321085,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["import numpy as np\n","predictions_test_submission1 = np.zeros_like(predictions_test_submission)\n","predictions_test_submission1[np.arange(len(predictions_test_submission)), predictions_test_submission.argmax(1)] = 1"],"execution_count":6,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"UFz5rdWVDil_","executionInfo":{"status":"ok","timestamp":1624727321498,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"ede28170-0148-4175-9d45-8f26c9409a15"},"source":["predictions_test_submission1"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[0., 0., 1., 0., 0.],\n"," [0., 1., 0., 0., 0.],\n"," [0., 0., 1., 0., 0.],\n"," ...,\n"," [0., 0., 1., 0., 0.],\n"," [0., 0., 1., 0., 0.],\n"," [0., 0., 1., 0., 0.]])"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"EcEP5VhyDktU","executionInfo":{"status":"ok","timestamp":1624727322570,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["predictions_test_submission1_final = np.argmax(predictions_test_submission1, axis=1)"],"execution_count":8,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"z_-UdPnAFOfo","executionInfo":{"status":"ok","timestamp":1624727324845,"user_tz":-330,"elapsed":6,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"f3d082d8-5071-478e-bac3-be463a89ba87"},"source":["predictions_test_submission1_final"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([2, 1, 2, ..., 2, 2, 2])"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"id":"yj7AcwqbmTKP","executionInfo":{"status":"ok","timestamp":1624727377674,"user_tz":-330,"elapsed":3182,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_train.tsv', sep = '\\t')\n","data_dev = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_test_withoutlabels.tsv', sep = '\\t')\n"],"execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"id":"BbWLut5TS_Hf","executionInfo":{"status":"ok","timestamp":1624727377675,"user_tz":-330,"elapsed":6,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["data_train['category'] = data_train['category'].replace('Positive ', 'Positive')"],"execution_count":11,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JDQ4yP2CS_Jo","executionInfo":{"status":"ok","timestamp":1624727377675,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"818f5be5-c653-4d8d-9f8c-646aec72c034"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_dev_index = le.transform(data_dev['category'])"],"execution_count":12,"outputs":[{"output_type":"stream","text":["['Mixed_feelings' 'Negative' 'Positive' 'not-Tamil' 'unknown_state']\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"o5T7yAcFFpfm","executionInfo":{"status":"ok","timestamp":1624727385401,"user_tz":-330,"elapsed":602,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["predictions_test_submission1_final1 = le.inverse_transform(predictions_test_submission1_final)"],"execution_count":13,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"PSSBXvkTFze-","executionInfo":{"status":"ok","timestamp":1624727385862,"user_tz":-330,"elapsed":8,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"b8c4e7df-4803-4f30-b795-6e949a5ded81"},"source":["predictions_test_submission1_final1"],"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['Positive', 'Negative', 'Positive', ..., 'Positive', 'Positive',\n"," 'Positive'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"code","metadata":{"id":"otag6te-F1It","executionInfo":{"status":"ok","timestamp":1624727389243,"user_tz":-330,"elapsed":494,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["data_test['category'] = predictions_test_submission1_final1"],"execution_count":15,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"5ozZTPDwGiSv","executionInfo":{"status":"ok","timestamp":1624727391557,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"5bad1970-1449-499d-de33-1a3a9d9dbccd"},"source":["data_test"],"execution_count":16,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Tam_1வீர செங்குந்தர் சார்பாக இந்த திரைப்படம் வெற்றி...Positive
1Tam_2Teruk ah irukku .... mokke movie .. waste of timeNegative
2Tam_3manitha samuthaayam amaipil irunthu intha pada...Positive
3Tam_4JJ mam we miss uPositive
4Tam_5Subtitle me traller dekhne wale like karonot-Tamil
............
4397Tam_4398Ithukum dislike potta kammanattti koovaingalam...Negative
4398Tam_4399Suyama Sinthikiravan than super Hero Seama dia...Positive
4399Tam_4400Super thalaiva.... Nee mass dha eppavumePositive
4400Tam_4401பெண்ணை அடிமையாக்க நினைக்கும் இந்த படம் தோல்வித...Positive
4401Tam_4402Semma thalaiva alu athikama akirukum enimale e...Positive
\n","

4402 rows × 3 columns

\n","
"],"text/plain":[" id text category\n","0 Tam_1 வீர செங்குந்தர் சார்பாக இந்த திரைப்படம் வெற்றி... Positive\n","1 Tam_2 Teruk ah irukku .... mokke movie .. waste of time Negative\n","2 Tam_3 manitha samuthaayam amaipil irunthu intha pada... Positive\n","3 Tam_4 JJ mam we miss u Positive\n","4 Tam_5 Subtitle me traller dekhne wale like karo not-Tamil\n","... ... ... ...\n","4397 Tam_4398 Ithukum dislike potta kammanattti koovaingalam... Negative\n","4398 Tam_4399 Suyama Sinthikiravan than super Hero Seama dia... Positive\n","4399 Tam_4400 Super thalaiva.... Nee mass dha eppavume Positive\n","4400 Tam_4401 பெண்ணை அடிமையாக்க நினைக்கும் இந்த படம் தோல்வித... Positive\n","4401 Tam_4402 Semma thalaiva alu athikama akirukum enimale e... Positive\n","\n","[4402 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":16}]},{"cell_type":"code","metadata":{"id":"Jt6Z80uFGjsD","executionInfo":{"status":"ok","timestamp":1624727424501,"user_tz":-330,"elapsed":697,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["data_test.to_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/Ensemble_SVM_LR_TAMIL.tsv\", sep=\"\\t\", index=False)"],"execution_count":17,"outputs":[]},{"cell_type":"code","metadata":{"id":"11fdDMTpG8QS","executionInfo":{"status":"ok","timestamp":1624727435532,"user_tz":-330,"elapsed":692,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["check_submission = pd.read_csv(\"/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/Ensemble_SVM_LR_TAMIL.tsv\", sep=\"\\t\")"],"execution_count":18,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":423},"id":"uwyh-IqGG8SG","executionInfo":{"status":"ok","timestamp":1624727435533,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"187dc68d-0639-47cc-ed40-a20eb171e3e9"},"source":["check_submission"],"execution_count":19,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtextcategory
0Tam_1வீர செங்குந்தர் சார்பாக இந்த திரைப்படம் வெற்றி...Positive
1Tam_2Teruk ah irukku .... mokke movie .. waste of timeNegative
2Tam_3manitha samuthaayam amaipil irunthu intha pada...Positive
3Tam_4JJ mam we miss uPositive
4Tam_5Subtitle me traller dekhne wale like karonot-Tamil
............
4397Tam_4398Ithukum dislike potta kammanattti koovaingalam...Negative
4398Tam_4399Suyama Sinthikiravan than super Hero Seama dia...Positive
4399Tam_4400Super thalaiva.... Nee mass dha eppavumePositive
4400Tam_4401பெண்ணை அடிமையாக்க நினைக்கும் இந்த படம் தோல்வித...Positive
4401Tam_4402Semma thalaiva alu athikama akirukum enimale e...Positive
\n","

4402 rows × 3 columns

\n","
"],"text/plain":[" id text category\n","0 Tam_1 வீர செங்குந்தர் சார்பாக இந்த திரைப்படம் வெற்றி... Positive\n","1 Tam_2 Teruk ah irukku .... mokke movie .. waste of time Negative\n","2 Tam_3 manitha samuthaayam amaipil irunthu intha pada... Positive\n","3 Tam_4 JJ mam we miss u Positive\n","4 Tam_5 Subtitle me traller dekhne wale like karo not-Tamil\n","... ... ... ...\n","4397 Tam_4398 Ithukum dislike potta kammanattti koovaingalam... Negative\n","4398 Tam_4399 Suyama Sinthikiravan than super Hero Seama dia... Positive\n","4399 Tam_4400 Super thalaiva.... Nee mass dha eppavume Positive\n","4400 Tam_4401 பெண்ணை அடிமையாக்க நினைக்கும் இந்த படம் தோல்வித... Positive\n","4401 Tam_4402 Semma thalaiva alu athikama akirukum enimale e... Positive\n","\n","[4402 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":19}]},{"cell_type":"code","metadata":{"id":"MIEN8srCnBlP"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/ML_Classifier_Kannada_code_mixed.ipynb b/ML_Classifier_Kannada_code_mixed.ipynb new file mode 100644 index 0000000..1174ae5 --- /dev/null +++ b/ML_Classifier_Kannada_code_mixed.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ML_Classifier_Kannada_code_mixed.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"O1S8SYrnMTNI","executionInfo":{"status":"ok","timestamp":1624642719333,"user_tz":-330,"elapsed":630,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"b28b0e18-9903-45cc-df47-3ec230c986aa"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":32,"outputs":[{"output_type":"stream","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"57R5J3_1OfiA","executionInfo":{"status":"ok","timestamp":1624642720014,"user_tz":-330,"elapsed":684,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_train.tsv', sep = '\\t')\n","data_val = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/kannada_sentiment_full_test_withoutlabels.tsv', sep = '\\t')"],"execution_count":33,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"aIWUjapcPoVJ","executionInfo":{"status":"ok","timestamp":1624642720015,"user_tz":-330,"elapsed":37,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"3fa6db7b-785b-4d7e-9ae4-bf7e2c54dd2c"},"source":["data_train.head()"],"execution_count":34,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory
0ಒಂದು ದೇಶದ ಮುಂದುವರಿಯುವುದು ಅದರ ಆರ್ಥಿಕ ಸ್ಥಿತಿಯನ್ನ...Negative
1ಕನ್ನಡದಲ್ಲಿ ಡೈಲಿ ಟೆಕ್ ಅಪ್ಡೇಟ್ಸ್ ಪಡೆಯಲು ಸಬ್ಸ್ಕ್ರ...Positive
2Super sar songnot-Kannada
3Tiktokers present situation... nನೋಡುವವರು ಯಾರು ...Negative
4Super ಸಾಂಗ್ ವೆರಿ ನೈಸ್....Positive
\n","
"],"text/plain":[" text category\n","0 ಒಂದು ದೇಶದ ಮುಂದುವರಿಯುವುದು ಅದರ ಆರ್ಥಿಕ ಸ್ಥಿತಿಯನ್ನ... Negative\n","1 ಕನ್ನಡದಲ್ಲಿ ಡೈಲಿ ಟೆಕ್ ಅಪ್ಡೇಟ್ಸ್ ಪಡೆಯಲು ಸಬ್ಸ್ಕ್ರ... Positive\n","2 Super sar song not-Kannada\n","3 Tiktokers present situation... nನೋಡುವವರು ಯಾರು ... Negative\n","4 Super ಸಾಂಗ್ ವೆರಿ ನೈಸ್.... Positive"]},"metadata":{"tags":[]},"execution_count":34}]},{"cell_type":"code","metadata":{"id":"0vk8YlBibEYt","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1624642720016,"user_tz":-330,"elapsed":34,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"c19c69f5-b0b3-4b1d-b0a7-6f8a62ef14f3"},"source":["data_val.head()"],"execution_count":35,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory
0Binduge saryagi ugithidira good go ahead we a...Mixed feelings
1yen song guru ...superPositive
2my fevorat storynot-Kannada
3Super ತೋಗರಿ ತೀಪ್ಪPositive
4ನಿಮ್ಮ ಮಾತುಗಳು ಅಕ್ಷರಶಃ ಸತ್ಯ... ನಿಮ್ಮ ಈ ಸಾಮಾನ್ಯ ...Positive
\n","
"],"text/plain":[" text category\n","0 Binduge saryagi ugithidira good go ahead we a... Mixed feelings\n","1 yen song guru ...super Positive\n","2 my fevorat story not-Kannada\n","3 Super ತೋಗರಿ ತೀಪ್ಪ Positive\n","4 ನಿಮ್ಮ ಮಾತುಗಳು ಅಕ್ಷರಶಃ ಸತ್ಯ... ನಿಮ್ಮ ಈ ಸಾಮಾನ್ಯ ... Positive"]},"metadata":{"tags":[]},"execution_count":35}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"7jeRfw0XPqe5","executionInfo":{"status":"ok","timestamp":1624642720016,"user_tz":-330,"elapsed":33,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"134a1bd1-9d1d-4a87-8f3e-dfe9b73d244e"},"source":["data_test.head()"],"execution_count":36,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtext
0Kan_1ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ...
1Kan_2Jai D Boss
2Kan_3Signature move
3Kan_4Super song bro
4Kan_5Wow Super agi helidira sir
\n","
"],"text/plain":[" id text\n","0 Kan_1 ಈ ಹಾಡನ್ನು ಹಾಡಿದ ವಿಜಯ ಪ್ರಕಾಶ voice ಯಾರಿಗೆಲ್ಲಾ ಇ...\n","1 Kan_2 Jai D Boss\n","2 Kan_3 Signature move\n","3 Kan_4 Super song bro\n","4 Kan_5 Wow Super agi helidira sir"]},"metadata":{"tags":[]},"execution_count":36}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tMXviBypwXb0","executionInfo":{"status":"ok","timestamp":1624642720017,"user_tz":-330,"elapsed":28,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"9c5d4144-927a-4595-d5ec-ab1a0ebfead5"},"source":["data_train.shape, data_val.shape, data_test.shape"],"execution_count":37,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((6212, 2), (691, 2), (768, 2))"]},"metadata":{"tags":[]},"execution_count":37}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Cr674jk8wyck","executionInfo":{"status":"ok","timestamp":1624642720019,"user_tz":-330,"elapsed":25,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"6ecb97df-ec48-4bc6-c0f8-5c7c969e478e"},"source":["data_train['category'].value_counts()"],"execution_count":38,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 2823\n","Negative 1188\n","not-Kannada 916\n","unknown state 711\n","Mixed feelings 574\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":38}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tREo3vWbNLQA","executionInfo":{"status":"ok","timestamp":1624642720020,"user_tz":-330,"elapsed":24,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"ae349996-5a84-440c-8c73-0f1a8f89fc44"},"source":["data_val['category'].value_counts()"],"execution_count":39,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 321\n","Negative 139\n","not-Kannada 110\n","unknown state 69\n","Mixed feelings 52\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":39}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"zX-6Saf7QPh7","executionInfo":{"status":"ok","timestamp":1624642720021,"user_tz":-330,"elapsed":22,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"e421c6bb-8c4a-4d13-dda0-44e4826e9ca6"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_val_index = le.transform(data_val['category'])\n"],"execution_count":40,"outputs":[{"output_type":"stream","text":["['Mixed feelings' 'Negative' 'Positive' 'not-Kannada' 'unknown state']\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mY8Wqps9WAMq"},"source":["For word = ngram_range (1,3)\n","For char = ngram_range (1,6)"]},{"cell_type":"code","metadata":{"id":"jqicHV-qRH4V","executionInfo":{"status":"ok","timestamp":1624642723260,"user_tz":-330,"elapsed":2669,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["from sklearn.feature_extraction.text import TfidfVectorizer\n","tfidfvec = TfidfVectorizer(ngram_range=(1,6), analyzer = 'char_wb', max_features = 50000)\n","#tfidfvec = TfidfVectorizer(ngram_range=(1,3), max_features = 30000)\n","tfidf_train = tfidfvec.fit_transform(data_train['text'])\n","tfidf_val = tfidfvec.transform(data_val['text'])"],"execution_count":41,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iV6FzGg_0leL","executionInfo":{"status":"ok","timestamp":1624642723260,"user_tz":-330,"elapsed":31,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"538c5bbd-d642-49ae-87ea-0acbd446c158"},"source":["tfidfvec.get_feature_names()"],"execution_count":42,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[' ',\n"," ' !',\n"," ' !!',\n"," ' !! ',\n"," ' !!!',\n"," ' !!! ',\n"," ' !!!!',\n"," ' #',\n"," ' # ',\n"," ' ##',\n"," ' #1',\n"," ' #1 ',\n"," ' #a',\n"," ' #as',\n"," ' #asn',\n"," ' #asn ',\n"," ' #av',\n"," ' #ava',\n"," ' #avan',\n"," ' #d',\n"," ' #k',\n"," ' #kg',\n"," ' #kgf',\n"," ' #r',\n"," ' #ra',\n"," ' #s',\n"," ' #ಕ',\n"," ' &',\n"," ' & ',\n"," \" '\",\n"," ' (',\n"," ' ( ',\n"," ' )',\n"," ' ) ',\n"," ' *',\n"," ' +',\n"," ' +s',\n"," ' +sh',\n"," ' +shi',\n"," ' +shiv',\n"," ' -',\n"," ' - ',\n"," ' .',\n"," ' . ',\n"," ' ..',\n"," ' .. ',\n"," ' ...',\n"," ' ... ',\n"," ' ....',\n"," ' .... ',\n"," ' .....',\n"," ' ....n',\n"," ' ....s',\n"," ' ...a',\n"," ' ...n',\n"," ' ..?',\n"," ' ..a',\n"," ' ..av',\n"," ' ..avr',\n"," ' ..f',\n"," ' ..h',\n"," ' ..m',\n"," ' ..ma',\n"," ' ..n',\n"," ' ..ni',\n"," ' .a',\n"," ' .c',\n"," ' .i',\n"," ' .n',\n"," ' .ನ',\n"," ' /',\n"," ' / ',\n"," ' 0',\n"," ' 1',\n"," ' 1 ',\n"," ' 1.',\n"," ' 1.8',\n"," ' 1.8k',\n"," ' 1.8k ',\n"," ' 10',\n"," ' 10 ',\n"," ' 100',\n"," ' 100 ',\n"," ' 100%',\n"," ' 100% ',\n"," ' 1000',\n"," ' 1000 ',\n"," ' 10m',\n"," ' 10m ',\n"," ' 11',\n"," ' 11 ',\n"," ' 12',\n"," ' 12 ',\n"," ' 13',\n"," ' 15',\n"," ' 15 ',\n"," ' 18',\n"," ' 18 ',\n"," ' 1s',\n"," ' 1st',\n"," ' 1st ',\n"," ' 2',\n"," ' 2 ',\n"," ' 2.',\n"," ' 2.0',\n"," ' 20',\n"," ' 20 ',\n"," ' 200',\n"," ' 200 ',\n"," ' 201',\n"," ' 2019',\n"," ' 2019 ',\n"," ' 202',\n"," ' 2020',\n"," ' 2020 ',\n"," ' 24',\n"," ' 24 ',\n"," ' 25',\n"," ' 25 ',\n"," ' 27',\n"," ' 27 ',\n"," ' 28',\n"," ' 2n',\n"," ' 2nd',\n"," ' 2nd ',\n"," ' 3',\n"," ' 3 ',\n"," ' 3.',\n"," ' 30',\n"," ' 30 ',\n"," ' 300',\n"," ' 300 ',\n"," ' 32',\n"," ' 38',\n"," ' 3r',\n"," ' 3rd',\n"," ' 3rdn',\n"," ' 4',\n"," ' 4 ',\n"," ' 42',\n"," ' 420',\n"," ' 420 ',\n"," ' 5',\n"," ' 5 ',\n"," ' 50',\n"," ' 500',\n"," ' 5000',\n"," ' 5000 ',\n"," ' 6',\n"," ' 6 ',\n"," ' 6-',\n"," ' 6-5',\n"," ' 6-5=',\n"," ' 6-5=2',\n"," ' 60',\n"," ' 63',\n"," ' 7',\n"," ' 7 ',\n"," ' 8',\n"," ' 8 ',\n"," ' 80',\n"," ' 85',\n"," ' 854',\n"," ' 8546',\n"," ' 85469',\n"," ' 9',\n"," ' 9 ',\n"," ' 97',\n"," ' 974',\n"," ' 9743',\n"," ' 97434',\n"," ' 98',\n"," ' =',\n"," ' = ',\n"," ' >',\n"," ' ?',\n"," ' ? ',\n"," ' ??',\n"," ' ?? ',\n"," ' ???',\n"," ' ??? ',\n"," ' ???n',\n"," ' @',\n"," ' @ ',\n"," ' @#',\n"," ' @#w',\n"," ' @#wi',\n"," ' @#win',\n"," ' @1',\n"," ' @10',\n"," ' @10 ',\n"," ' @a',\n"," ' @aa',\n"," ' @aad',\n"," ' @aadh',\n"," ' @ab',\n"," ' @abh',\n"," ' @abhi',\n"," ' @ai',\n"," ' @ais',\n"," ' @aisw',\n"," ' @aj',\n"," ' @aja',\n"," ' @ajay',\n"," ' @aji',\n"," ' @ajit',\n"," ' @ak',\n"," ' @akk',\n"," ' @akki',\n"," ' @aks',\n"," ' @aksh',\n"," ' @al',\n"," ' @am',\n"," ' @amb',\n"," ' @ambr',\n"," ' @an',\n"," ' @ani',\n"," ' @anil',\n"," ' @ar',\n"," ' @aru',\n"," ' @arun',\n"," ' @as',\n"," ' @ash',\n"," ' @ashw',\n"," ' @av',\n"," ' @b',\n"," ' @ba',\n"," ' @bas',\n"," ' @basa',\n"," ' @bh',\n"," ' @bha',\n"," ' @c',\n"," ' @ch',\n"," ' @cha',\n"," ' @chan',\n"," ' @che',\n"," ' @chet',\n"," ' @chn',\n"," ' @chna',\n"," ' @cr',\n"," ' @cre',\n"," ' @crea',\n"," ' @d',\n"," ' @d ',\n"," ' @da',\n"," ' @dac',\n"," ' @dacc',\n"," ' @dar',\n"," ' @dars',\n"," ' @day',\n"," ' @daya',\n"," ' @de',\n"," ' @dev',\n"," ' @devi',\n"," ' @dh',\n"," ' @dha',\n"," ' @dhan',\n"," ' @dhe',\n"," ' @dhee',\n"," ' @e',\n"," ' @en',\n"," ' @ent',\n"," ' @ente',\n"," ' @f',\n"," ' @g',\n"," ' @ga',\n"," ' @gaj',\n"," ' @gaje',\n"," ' @go',\n"," ' @gow',\n"," ' @gowt',\n"," ' @h',\n"," ' @ha',\n"," ' @hi',\n"," ' @hit',\n"," ' @i',\n"," ' @it',\n"," ' @j',\n"," ' @ja',\n"," ' @jag',\n"," ' @jaga',\n"," ' @k',\n"," ' @ka',\n"," ' @kar',\n"," ' @kart',\n"," ' @ki',\n"," ' @kic',\n"," ' @kicc',\n"," ' @kin',\n"," ' @king',\n"," ' @kir',\n"," ' @kira',\n"," ' @kr',\n"," ' @kri',\n"," ' @kris',\n"," ' @ku',\n"," ' @kum',\n"," ' @kuma',\n"," ' @kus',\n"," ' @kush',\n"," ' @l',\n"," ' @la',\n"," ' @lal',\n"," ' @m',\n"," ' @ma',\n"," ' @mad',\n"," ' @mad ',\n"," ' @mada',\n"," ' @madh',\n"," ' @mah',\n"," ' @mahe',\n"," ' @man',\n"," ' @manj',\n"," ' @mano',\n"," ' @me',\n"," ' @meg',\n"," ' @megh',\n"," ' @mo',\n"," ' @mov',\n"," ' @movi',\n"," ' @mu',\n"," ' @n',\n"," ' @na',\n"," ' @nag',\n"," ' @naga',\n"," ' @nan',\n"," ' @nand',\n"," ' @nav',\n"," ' @nave',\n"," ' @navy',\n"," ' @ni',\n"," ' @nik',\n"," ' @niki',\n"," ' @nit',\n"," ' @nith',\n"," ' @nk',\n"," ' @nkn',\n"," ' @nkn ',\n"," ' @p',\n"," ' @pa',\n"," ' @po',\n"," ' @poo',\n"," ' @pooj',\n"," ' @pr',\n"," ' @pra',\n"," ' @prad',\n"," ' @praj',\n"," ' @pras',\n"," ' @prav',\n"," ' @pri',\n"," ' @prit',\n"," ' @pu',\n"," ' @pun',\n"," ' @punt',\n"," ' @r',\n"," ' @ra',\n"," ' @rad',\n"," ' @radh',\n"," ' @rag',\n"," ' @ragh',\n"," ' @raj',\n"," ' @raj ',\n"," ' @raja',\n"," ' @raje',\n"," ' @re',\n"," ' @reg',\n"," ' @regi',\n"," ' @ro',\n"," ' @roh',\n"," ' @rohi',\n"," ' @roy',\n"," ' @roya',\n"," ' @s',\n"," ' @s ',\n"," ' @sa',\n"," ' @sac',\n"," ' @sach',\n"," ' @sag',\n"," ' @saga',\n"," ' @sam',\n"," ' @san',\n"," ' @sang',\n"," ' @sanj',\n"," ' @sat',\n"," ' @sath',\n"," ' @sati',\n"," ' @sh',\n"," ' @sha',\n"," ' @shar',\n"," ' @shas',\n"," ' @shi',\n"," ' @shiv',\n"," ' @shr',\n"," ' @si',\n"," ' @sid',\n"," ' @sidd',\n"," ' @su',\n"," ' @suj',\n"," ' @suja',\n"," ' @suji',\n"," ' @sum',\n"," ' @sumy',\n"," ' @sup',\n"," ' @supr',\n"," ' @sur',\n"," ' @sw',\n"," ' @swe',\n"," ' @swee',\n"," ' @sy',\n"," ' @sye',\n"," ' @syed',\n"," ' @sᴀ',\n"," ' @sᴀɴ',\n"," ' @sᴀɴᴅ',\n"," ' @t',\n"," ' @th',\n"," ' @thi',\n"," ' @tr',\n"," ' @tro',\n"," ' @trol',\n"," ' @u',\n"," ' @un',\n"," ' @uni',\n"," ' @unit',\n"," ' @us',\n"," ' @ush',\n"," ' @usha',\n"," ' @v',\n"," ' @va',\n"," ' @var',\n"," ' @vara',\n"," ' @vas',\n"," ' @vasa',\n"," ' @ve',\n"," ' @ven',\n"," ' @venk',\n"," ' @venu',\n"," ' @vi',\n"," ' @vin',\n"," ' @vina',\n"," ' @vino',\n"," ' @vis',\n"," ' @vish',\n"," ' @vy',\n"," ' @vyv',\n"," ' @vyv ',\n"," ' @w',\n"," ' @wi',\n"," ' @wil',\n"," ' @wild',\n"," ' @y',\n"," ' @ya',\n"," ' @yas',\n"," ' @yash',\n"," ' @ŕ',\n"," ' @ŕä',\n"," ' @ŕäv',\n"," ' @ŕävî',\n"," ' @ಕ',\n"," ' @ಕಿ',\n"," ' @ಕಿಚ',\n"," ' @ಕಿಚ್',\n"," ' _',\n"," ' __',\n"," ' a',\n"," ' a ',\n"," ' aa',\n"," ' aa ',\n"," ' aaa',\n"," ' aaa ',\n"," ' aad',\n"," ' aada',\n"," ' aadar',\n"," ' aadh',\n"," ' aadm',\n"," ' aadme',\n"," ' aadr',\n"," ' aadre',\n"," ' aadru',\n"," ' aadt',\n"," ' aadti',\n"," ' aag',\n"," ' aaga',\n"," ' aagal',\n"," ' aagb',\n"," ' aagbe',\n"," ' aage',\n"," ' aage ',\n"," ' aagi',\n"," ' aagi ',\n"," ' aagid',\n"," ' aagil',\n"," ' aagir',\n"," ' aagl',\n"," ' aagle',\n"," ' aagli',\n"," ' aago',\n"," ' aagod',\n"," ' aagt',\n"," ' aagta',\n"," ' aagth',\n"," ' aagu',\n"," ' aagut',\n"," ' aah',\n"," ' aai',\n"," ' aan',\n"," ' aap',\n"," ' aapa',\n"," ' aar',\n"," ' aas',\n"," ' aase',\n"," ' aase ',\n"," ' aat',\n"," ' aata',\n"," ' aath',\n"," ' aay',\n"," ' aayi',\n"," ' aayt',\n"," ' aayth',\n"," ' ab',\n"," ' ab ',\n"," ' abb',\n"," ' abba',\n"," ' abbb',\n"," ' abh',\n"," ' abhi',\n"," ' abhig',\n"," ' abhim',\n"," ' abhin',\n"," ' abi',\n"," ' abim',\n"," ' abima',\n"," ' abo',\n"," ' abou',\n"," ' about',\n"," ' abt',\n"," ' abt ',\n"," ' ac',\n"," ' acc',\n"," ' acch',\n"," ' accha',\n"," ' acco',\n"," ' accou',\n"," ' ach',\n"," ' act',\n"," ' act ',\n"," ' acti',\n"," ' actin',\n"," ' acto',\n"," ' actor',\n"," ' actu',\n"," ' actua',\n"," ' ad',\n"," ' ada',\n"," ' ada ',\n"," ' adaa',\n"," ' adag',\n"," ' adaga',\n"," ' adak',\n"," ' adakk',\n"," ' adal',\n"," ' adali',\n"," ' adam',\n"," ' adame',\n"," ' adan',\n"," ' adana',\n"," ' adann',\n"," ' adar',\n"," ' adar ',\n"," ' adara',\n"," ' adare',\n"," ' adb',\n"," ' adbh',\n"," ' adbhu',\n"," ' adbu',\n"," ' adbut',\n"," ' add',\n"," ' add ',\n"," ' addi',\n"," ' addic',\n"," ' addr',\n"," ' addre',\n"," ' adds',\n"," ' adds ',\n"," ' ade',\n"," ' ade ',\n"," ' adel',\n"," ' adell',\n"," ' aden',\n"," ' aden ',\n"," ' adh',\n"," ' adha',\n"," ' adhb',\n"," ' adhbh',\n"," ' adhr',\n"," ' adhre',\n"," ' adhru',\n"," ' adhu',\n"," ' adhu ',\n"," ' adhy',\n"," ' adhya',\n"," ' adi',\n"," ' adi ',\n"," ' adik',\n"," ' adike',\n"," ' adk',\n"," ' adke',\n"," ' adke ',\n"," ' adkk',\n"," ' adkke',\n"," ' adm',\n"," ' ado',\n"," ' ador',\n"," ' adoru',\n"," ' adr',\n"," ' adra',\n"," ' adra ',\n"," ' adral',\n"," ' adre',\n"," ' adre ',\n"," ' adru',\n"," ' adru ',\n"," ' ads',\n"," ' adt',\n"," ' adta',\n"," ' adth',\n"," ' adtha',\n"," ' adthi',\n"," ' adti',\n"," ' adu',\n"," ' adu ',\n"," ' aduk',\n"," ' aduke',\n"," ' adukk',\n"," ' adun',\n"," ' adune',\n"," ' adunn',\n"," ' ae',\n"," ' af',\n"," ' aft',\n"," ' afte',\n"," ' after',\n"," ' ag',\n"," ' ag ',\n"," ' aga',\n"," ' aga ',\n"," ' agai',\n"," ' again',\n"," ' agal',\n"," ' agall',\n"," ' agalv',\n"," ' agat',\n"," ' agati',\n"," ' agb',\n"," ' agba',\n"," ' agbe',\n"," ' agbek',\n"," ' agbh',\n"," ' agbi',\n"," ' agbit',\n"," ' agd',\n"," ' age',\n"," ' age ',\n"," ' aged',\n"," ' agi',\n"," ' agi ',\n"," ' agid',\n"," ' agida',\n"," ' agidd',\n"," ' agide',\n"," ' agidh',\n"," ' agidi',\n"," ' agidr',\n"," ' agii',\n"," ' agii ',\n"," ' agil',\n"," ' agill',\n"," ' agir',\n"," ' agirb',\n"," ' agiro',\n"," ' agiru',\n"," ' agit',\n"," ' agith',\n"," ' agl',\n"," ' agle',\n"," ' agle ',\n"," ' agli',\n"," ' agli ',\n"," ' aglil',\n"," ' agll',\n"," ' aglli',\n"," ' ago',\n"," ' ago ',\n"," ' agod',\n"," ' agode',\n"," ' agodu',\n"," ' agoi',\n"," ' agoit',\n"," ' agok',\n"," ' agoke',\n"," ' agol',\n"," ' agoll',\n"," ' agr',\n"," ' agre',\n"," ' agree',\n"," ' agt',\n"," ' agta',\n"," ' agta ',\n"," ' agtai',\n"," ' agth',\n"," ' agthi',\n"," ' agti',\n"," ' agtid',\n"," ' agtil',\n"," ' agtir',\n"," ' agu',\n"," ' agut',\n"," ' agute',\n"," ' aguth',\n"," ' agutt',\n"," ' ah',\n"," ' ah ',\n"," ' aha',\n"," ' aha ',\n"," ' ahe',\n"," ' ai',\n"," ' ais',\n"," ' ait',\n"," ' aith',\n"," ' aithu',\n"," ' aitu',\n"," ' aitu ',\n"," ' aj',\n"," ' aja',\n"," ' ajan',\n"," ' ajane',\n"," ' ajn',\n"," ' ajne',\n"," ' ajnee',\n"," ' ak',\n"," ' aka',\n"," ' akb',\n"," ' akba',\n"," ' akbar',\n"," ' akh',\n"," ' akhi',\n"," ' akhil',\n"," ' aki',\n"," ' akid',\n"," ' akk',\n"," ' akka',\n"," ' akka ',\n"," ' akkan',\n"," ' akki',\n"," ' akki ',\n"," ' akkn',\n"," ' akkna',\n"," ' ako',\n"," ' akt',\n"," ' akta',\n"," ' al',\n"," ' ala',\n"," ' ala ',\n"," ' ald',\n"," ' aldh',\n"," ' ale',\n"," ' ale ',\n"," ' ali',\n"," ' ali ',\n"," ' all',\n"," ' all ',\n"," ' alla',\n"," ' alla ',\n"," ' alla.',\n"," ' allap',\n"," ' alle',\n"," ' alle ',\n"," ' alli',\n"," ' alli ',\n"," ' alll',\n"," ' allla',\n"," ' allo',\n"," ' allu',\n"," ' allu ',\n"," ' alp',\n"," ' alpa',\n"," ' alpa ',\n"," ' alr',\n"," ' alre',\n"," ' alrea',\n"," ' als',\n"," ' also',\n"," ' also ',\n"," ' alt',\n"," ' alv',\n"," ' alva',\n"," ' alva ',\n"," ' alvaa',\n"," ' alw',\n"," ' alwa',\n"," ' alwa ',\n"," ' alway',\n"," ' am',\n"," ' am ',\n"," ' ama',\n"," ' amar',\n"," ' amar ',\n"," ' amaz',\n"," ' amaze',\n"," ' amazi',\n"," ' amazo',\n"," ' amb',\n"," ' ambe',\n"," ' amber',\n"," ' ame',\n"," ' amel',\n"," ' amele',\n"," ' amez',\n"," ' amm',\n"," ' amma',\n"," ' amma ',\n"," ' amman',\n"," ' ammu',\n"," ' ammun',\n"," ' amo',\n"," ' amou',\n"," ' amoun',\n"," ' an',\n"," ' an ',\n"," ' ana',\n"," ' anan',\n"," ' anand',\n"," ' anat',\n"," ' anath',\n"," ' anb',\n"," ' anbe',\n"," ' anbed',\n"," ' anc',\n"," ' anch',\n"," ' ancho',\n"," ' and',\n"," ' and ',\n"," ' anda',\n"," ' andh',\n"," ' andha',\n"," ' andhr',\n"," ' andi',\n"," ' andid',\n"," ' andk',\n"," ' andko',\n"," ' andr',\n"," ' andre',\n"," ' andru',\n"," ' ang',\n"," ' ange',\n"," ' ange ',\n"," ' ani',\n"," ' anis',\n"," ' anisu',\n"," ' anj',\n"," ' anja',\n"," ' anjal',\n"," ' ank',\n"," ' anko',\n"," ' ankon',\n"," ' ankot',\n"," ' ann',\n"," ' anna',\n"," ' anna ',\n"," ' anna.',\n"," ' annan',\n"," ' anni',\n"," ' annis',\n"," ' annn',\n"," ' annna',\n"," ' anno',\n"," ' anno ',\n"," ' annod',\n"," ' annor',\n"," ' annov',\n"," ' annu',\n"," ' annu ',\n"," ' annuv',\n"," ' ano',\n"," ' anod',\n"," ' anodu',\n"," ' ans',\n"," ' ansa',\n"," ' ansat',\n"," ' ansb',\n"," ' ansi',\n"," ' ansid',\n"," ' ansit',\n"," ' anso',\n"," ' ansod',\n"," ' anst',\n"," ' ansta',\n"," ' ansth',\n"," ' ansti',\n"," ' anstu',\n"," ' ansu',\n"," ' ansut',\n"," ' answ',\n"," ' answe',\n"," ' ant',\n"," ' anta',\n"," ' anta ',\n"," ' anta.',\n"," ' antad',\n"," ' antar',\n"," ' ante',\n"," ' ante ',\n"," ' anth',\n"," ' antha',\n"," ' anthi',\n"," ' anthu',\n"," ' anti',\n"," ' antid',\n"," ' antir',\n"," ' antu',\n"," ' antu ',\n"," ' anty',\n"," ' antya',\n"," ' anu',\n"," ' anu ',\n"," ' anus',\n"," ' anust',\n"," ' anusu',\n"," ' anv',\n"," ' any',\n"," ' any ',\n"," ' anyo',\n"," ' anyon',\n"," ' ap',\n"," ' app',\n"," ' app ',\n"," ' appa',\n"," ' appa ',\n"," ' appaj',\n"," ' appan',\n"," ' appp',\n"," ' appp ',\n"," ' appr',\n"," ' appre',\n"," ' apps',\n"," ' apps ',\n"," ' appu',\n"," ' appu ',\n"," ' ar',\n"," ' ar ',\n"," ' ara',\n"," ' are',\n"," ' are ',\n"," ' ari',\n"," ' arit',\n"," ' arita',\n"," ' arj',\n"," ' arju',\n"," ' arjun',\n"," ' arm',\n"," ' army',\n"," ' army ',\n"," ' aro',\n"," ' art',\n"," ' arta',\n"," ' arta ',\n"," ' arth',\n"," ' artha',\n"," ' arthi',\n"," ' arti',\n"," ' artis',\n"," ' as',\n"," ' as ',\n"," ' asa',\n"," ' asay',\n"," ' asayy',\n"," ' asc',\n"," ' asca',\n"," ' ascar',\n"," ' ase',\n"," ' ase ',\n"," ' ash',\n"," ' asht',\n"," ' asht ',\n"," ' ashte',\n"," ' ashtu',\n"," ' ashv',\n"," ' ashva',\n"," ' ashw',\n"," ' ashwa',\n"," ' asn',\n"," ' asn ',\n"," ' asn.',\n"," ' asn..',\n"," ' asnn',\n"," ' aso',\n"," ' asom',\n"," ' ast',\n"," ' ast ',\n"," ' aste',\n"," ...]"]},"metadata":{"tags":[]},"execution_count":42}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EDDXkAUI1DF1","executionInfo":{"status":"ok","timestamp":1624642723261,"user_tz":-330,"elapsed":28,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"a3ef66d5-3c7d-4084-eb55-25f5ed157019"},"source":["tfidf_train.shape"],"execution_count":43,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(6212, 50000)"]},"metadata":{"tags":[]},"execution_count":43}]},{"cell_type":"code","metadata":{"id":"szYsgWh4UFOy","executionInfo":{"status":"ok","timestamp":1624642723261,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["x_train = tfidf_train\n","x_test = tfidf_val\n","y_train = y_train_index\n","y_test = y_val_index"],"execution_count":44,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"QbypHNhYSMDy","executionInfo":{"status":"ok","timestamp":1624643180218,"user_tz":-330,"elapsed":456961,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"3a0db849-f0f6-4a9a-a80e-db218d5e362b"},"source":[" from sklearn import metrics\n"," from sklearn.svm import SVC\n"," model_SVM = SVC(probability=True)\n"," model_SVM.fit(x_train, y_train)\n"," y_pred_SVM = model_SVM.predict(x_test)\n"," print(\"SVM\")\n"," print(metrics.classification_report(y_test, y_pred_SVM))\n"," \n"," from sklearn.ensemble import RandomForestClassifier\n"," rf = RandomForestClassifier()\n"," rf.fit(x_train,y_train)\n"," y_pred_rf = rf.predict(x_test)\n"," print(\"random\")\n"," print(metrics.classification_report(y_test, y_pred_rf))\n"," \n"," from sklearn.linear_model import LogisticRegression\n"," LR = LogisticRegression()\n"," LR.fit(x_train,y_train)\n"," y_pred_LR = LR.predict(x_test)\n"," print(\"Logistic Regression\")\n"," print(metrics.classification_report(y_test, y_pred_LR ))\n"," \n"," from sklearn.neighbors import KNeighborsClassifier\n"," neigh = KNeighborsClassifier()\n"," neigh.fit(x_train,y_train)\n"," y_pred_KNN = neigh.predict(x_test)\n"," print(\"KNN\")\n"," print(metrics.classification_report(y_test, y_pred_KNN ))\n"," \n"," from sklearn.naive_bayes import GaussianNB\n"," naive = GaussianNB()\n"," naive.fit(x_train.toarray(),y_train)\n"," y_pred_naive = naive.predict(x_test.toarray())\n"," print(\"Naive Bayes\")\n"," print(metrics.classification_report(y_test, y_pred_naive ))\n"," \n"," from sklearn.ensemble import GradientBoostingClassifier\n"," gradient = GradientBoostingClassifier()\n"," gradient.fit(x_train,y_train)\n"," y_pred_gradient = gradient.predict(x_test)\n"," print(\"Gradient Boosting\")\n"," print(metrics.classification_report(y_test, y_pred_gradient ))\n"," \n"," from sklearn.tree import DecisionTreeClassifier\n"," decision = DecisionTreeClassifier()\n"," decision.fit(x_train,y_train)\n"," y_pred_decision = decision.predict(x_test)\n"," print(\"Decision Tree\")\n"," print(metrics.classification_report(y_test, y_pred_decision ))\n","\n"," from sklearn.ensemble import AdaBoostClassifier\n"," AdaBoostclf = AdaBoostClassifier()\n"," AdaBoostclf.fit(x_train,y_train)\n"," y_pred_AdaBoostclf = AdaBoostclf.predict(x_test)\n"," print(\"AdaBoost classifier\")\n"," print(metrics.classification_report(y_test, y_pred_AdaBoostclf ))\n"," "],"execution_count":45,"outputs":[{"output_type":"stream","text":["SVM\n"," precision recall f1-score support\n","\n"," 0 1.00 0.08 0.14 52\n"," 1 0.68 0.58 0.62 139\n"," 2 0.65 0.86 0.74 321\n"," 3 0.70 0.67 0.69 110\n"," 4 0.59 0.35 0.44 69\n","\n"," accuracy 0.66 691\n"," macro avg 0.72 0.51 0.53 691\n","weighted avg 0.69 0.66 0.64 691\n","\n","random\n"," precision recall f1-score support\n","\n"," 0 0.45 0.10 0.16 52\n"," 1 0.74 0.51 0.60 139\n"," 2 0.65 0.90 0.75 321\n"," 3 0.69 0.65 0.67 110\n"," 4 0.64 0.30 0.41 69\n","\n"," accuracy 0.66 691\n"," macro avg 0.63 0.49 0.52 691\n","weighted avg 0.66 0.66 0.63 691\n","\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n","STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n","\n","Increase the number of iterations (max_iter) or scale the data as shown in:\n"," https://scikit-learn.org/stable/modules/preprocessing.html\n","Please also refer to the documentation for alternative solver options:\n"," https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n"," extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"],"name":"stderr"},{"output_type":"stream","text":["Logistic Regression\n"," precision recall f1-score support\n","\n"," 0 0.60 0.06 0.11 52\n"," 1 0.69 0.63 0.65 139\n"," 2 0.69 0.87 0.77 321\n"," 3 0.72 0.74 0.73 110\n"," 4 0.60 0.36 0.45 69\n","\n"," accuracy 0.69 691\n"," macro avg 0.66 0.53 0.54 691\n","weighted avg 0.68 0.69 0.66 691\n","\n","KNN\n"," precision recall f1-score support\n","\n"," 0 0.19 0.13 0.16 52\n"," 1 0.60 0.58 0.59 139\n"," 2 0.65 0.73 0.69 321\n"," 3 0.53 0.62 0.57 110\n"," 4 0.50 0.22 0.30 69\n","\n"," accuracy 0.59 691\n"," macro avg 0.49 0.46 0.46 691\n","weighted avg 0.57 0.59 0.57 691\n","\n","Naive Bayes\n"," precision recall f1-score support\n","\n"," 0 0.22 0.15 0.18 52\n"," 1 0.42 0.63 0.50 139\n"," 2 0.64 0.57 0.60 321\n"," 3 0.63 0.65 0.64 110\n"," 4 0.34 0.22 0.27 69\n","\n"," accuracy 0.53 691\n"," macro avg 0.45 0.45 0.44 691\n","weighted avg 0.53 0.53 0.52 691\n","\n","Gradient Boosting\n"," precision recall f1-score support\n","\n"," 0 0.43 0.06 0.10 52\n"," 1 0.68 0.51 0.58 139\n"," 2 0.63 0.86 0.73 321\n"," 3 0.72 0.69 0.71 110\n"," 4 0.69 0.32 0.44 69\n","\n"," accuracy 0.65 691\n"," macro avg 0.63 0.49 0.51 691\n","weighted avg 0.64 0.65 0.62 691\n","\n","Decision Tree\n"," precision recall f1-score support\n","\n"," 0 0.16 0.19 0.18 52\n"," 1 0.44 0.45 0.44 139\n"," 2 0.67 0.64 0.66 321\n"," 3 0.51 0.55 0.53 110\n"," 4 0.29 0.26 0.27 69\n","\n"," accuracy 0.52 691\n"," macro avg 0.42 0.42 0.42 691\n","weighted avg 0.52 0.52 0.52 691\n","\n","AdaBoost classifier\n"," precision recall f1-score support\n","\n"," 0 0.67 0.04 0.07 52\n"," 1 0.55 0.50 0.53 139\n"," 2 0.63 0.80 0.70 321\n"," 3 0.56 0.59 0.57 110\n"," 4 0.41 0.22 0.28 69\n","\n"," accuracy 0.59 691\n"," macro avg 0.56 0.43 0.43 691\n","weighted avg 0.58 0.59 0.56 691\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"dutLuW-7NitW"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"uSFmluVXNkL2"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"aKJdPx2jNkO3"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"XbQ1Hm3TNkSP"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"bQANqfdsf0i6","executionInfo":{"status":"ok","timestamp":1624643355150,"user_tz":-330,"elapsed":533,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["tfidf_test = tfidfvec.transform(data_test['text'])"],"execution_count":46,"outputs":[]},{"cell_type":"code","metadata":{"id":"RBDDmfD_URTa","executionInfo":{"status":"ok","timestamp":1624643360271,"user_tz":-330,"elapsed":3034,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["y_pred_model_SVM_prob = model_SVM.predict_proba(tfidf_test)"],"execution_count":47,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7VQ3hfA7gECM","executionInfo":{"status":"ok","timestamp":1624643360272,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"9af2c69b-2f30-4acf-e507-b39c6d2e48ed"},"source":["y_pred_model_SVM_prob"],"execution_count":48,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[0.05626558, 0.13327691, 0.77781715, 0.00490246, 0.0277379 ],\n"," [0.02677196, 0.00776521, 0.86121443, 0.07037198, 0.03387642],\n"," [0.05356787, 0.01402725, 0.24949638, 0.47680516, 0.20610333],\n"," ...,\n"," [0.02673411, 0.12512806, 0.84205796, 0.00177428, 0.00430561],\n"," [0.15453031, 0.15698626, 0.65826056, 0.00645513, 0.02376774],\n"," [0.20196312, 0.30472424, 0.43928437, 0.00637452, 0.04765375]])"]},"metadata":{"tags":[]},"execution_count":48}]},{"cell_type":"code","metadata":{"id":"i_feFgnOiZM0","executionInfo":{"status":"ok","timestamp":1624643361973,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_model_SVM_prob= pd.DataFrame(y_pred_model_SVM_prob, columns = ['Mixed feelings' ,'Negative' ,'Positive' ,'not-Kannada', 'unknown state'])"],"execution_count":49,"outputs":[]},{"cell_type":"code","metadata":{"id":"PujUzf51iZOG","executionInfo":{"status":"ok","timestamp":1624643363970,"user_tz":-330,"elapsed":635,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_model_SVM_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/SVM_char_1-6_gram_kanada_64.csv',index = False)"],"execution_count":50,"outputs":[]},{"cell_type":"code","metadata":{"id":"IcKeAb1giZQa","executionInfo":{"status":"ok","timestamp":1624643366014,"user_tz":-330,"elapsed":589,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["y_pred_rf_prob = rf.predict_proba(tfidf_test)"],"execution_count":51,"outputs":[]},{"cell_type":"code","metadata":{"id":"nf9Ryeo0i1I8","executionInfo":{"status":"ok","timestamp":1624643368373,"user_tz":-330,"elapsed":2,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_rf_prob= pd.DataFrame(y_pred_rf_prob, columns = ['Mixed feelings' ,'Negative' ,'Positive' ,'not-Kannada', 'unknown state'])"],"execution_count":52,"outputs":[]},{"cell_type":"code","metadata":{"id":"nTMjSz-_i1Ki","executionInfo":{"status":"ok","timestamp":1624643370188,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_rf_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/RF_char_1-6_gram_Kanada_63.csv', index = False)"],"execution_count":53,"outputs":[]},{"cell_type":"code","metadata":{"id":"k-XiNgH_i1Md","executionInfo":{"status":"ok","timestamp":1624643371958,"user_tz":-330,"elapsed":2,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["y_pred_LR_prob = LR.predict_proba(tfidf_test)"],"execution_count":54,"outputs":[]},{"cell_type":"code","metadata":{"id":"pS6nkrMbi1P_","executionInfo":{"status":"ok","timestamp":1624643374240,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_LR_prob= pd.DataFrame(y_pred_LR_prob, columns = ['Mixed feelings' ,'Negative' ,'Positive' ,'not-Kannada', 'unknown state'])"],"execution_count":55,"outputs":[]},{"cell_type":"code","metadata":{"id":"ru0vsVTsiZT3","executionInfo":{"status":"ok","timestamp":1624643375873,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_LR_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Kannada/LR_char_1-6_gram_Kanda_66.csv', index = False)"],"execution_count":56,"outputs":[]},{"cell_type":"code","metadata":{"id":"7vXANa-i7HFG"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/ML_Classifier_Malayalam_code_mixed.ipynb b/ML_Classifier_Malayalam_code_mixed.ipynb new file mode 100644 index 0000000..4e03767 --- /dev/null +++ b/ML_Classifier_Malayalam_code_mixed.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ML_Classifier_Malayalam_code_mixed.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"O1S8SYrnMTNI","executionInfo":{"status":"ok","timestamp":1624682778862,"user_tz":-330,"elapsed":21007,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"0d117409-9793-484e-8b65-1c837581ebce"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"57R5J3_1OfiA","executionInfo":{"status":"ok","timestamp":1624682781513,"user_tz":-330,"elapsed":1259,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_train.tsv', sep = '\\t')\n","data_val = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/Mal_sentiment_full_test_withoutlabels.tsv', sep = '\\t')"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"aIWUjapcPoVJ","executionInfo":{"status":"ok","timestamp":1624682781514,"user_tz":-330,"elapsed":21,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"e8baeab0-bdab-4b39-aaff-ad3df348872a"},"source":["data_train.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory
0Sunny chechi fans evide like adichu power kani...unknown_state
1Angane july month ile ende aadyathe leave njan...Positive
2ഏട്ടന്റെ പുതിയ പടത്തിനു വേണ്ടി കാത്തിരിക്കുന്ന...unknown_state
3ഇനി ലാലേട്ടന്റെ വേട്ട തുടങ്ങാൻ പോകുന്നു..........Positive
4Trailer powli oru raksha illa . Pakshea padam ...Positive
\n","
"],"text/plain":[" text category\n","0 Sunny chechi fans evide like adichu power kani... unknown_state\n","1 Angane july month ile ende aadyathe leave njan... Positive\n","2 ഏട്ടന്റെ പുതിയ പടത്തിനു വേണ്ടി കാത്തിരിക്കുന്ന... unknown_state\n","3 ഇനി ലാലേട്ടന്റെ വേട്ട തുടങ്ങാൻ പോകുന്നു.......... Positive\n","4 Trailer powli oru raksha illa . Pakshea padam ... Positive"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"0vk8YlBibEYt","colab":{"base_uri":"https://localhost:8080/","height":206},"executionInfo":{"status":"ok","timestamp":1624682781515,"user_tz":-330,"elapsed":20,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"8c773c32-f24b-4d65-a201-9c9423c3c290"},"source":["data_val.head()"],"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
textcategory
0Mammooka ninghal mass aa pwoli itemPositive
1Waiting for Malayalam movie For Tamil paiyannot-malayalam
2ദളപതി ഫാൻസിന്റെ വക ഒരു ഒന്നൊന്നര വിജയാശംസകൾ...Positive
3#Trailer pwolichuuuu ഓണത്തിന് വന്നങ്ങു തകർത്തേ...Positive
4Mammoookkaaaa polichadukkiii katta waiting nv 21Positive
\n","
"],"text/plain":[" text category\n","0 Mammooka ninghal mass aa pwoli item Positive\n","1 Waiting for Malayalam movie For Tamil paiyan not-malayalam\n","2 ദളപതി ഫാൻസിന്റെ വക ഒരു ഒന്നൊന്നര വിജയാശംസകൾ... Positive\n","3 #Trailer pwolichuuuu ഓണത്തിന് വന്നങ്ങു തകർത്തേ... Positive\n","4 Mammoookkaaaa polichadukkiii katta waiting nv 21 Positive"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":206},"id":"7jeRfw0XPqe5","executionInfo":{"status":"ok","timestamp":1624682781515,"user_tz":-330,"elapsed":19,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"dc3bf79d-effc-4438-e672-c15972f9707e"},"source":["data_test.head()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
idtext
0Mal_1Teaserinu kurach samayamkoodi mathram. Cant wa...
1Mal_2അപ്പോൾ കഥയുടെ റൂട്ട് മാറിയല്ലോ...
2Mal_3മൂത്തോൻ ട്രൈലെർ Trending List വരാത്തത് എന്താ ?
3Mal_4Nowadays 944k views is considered as 1M views ...
4Mal_5Maass.trailer ennu paranja ithaanU makkale.......
\n","
"],"text/plain":[" id text\n","0 Mal_1 Teaserinu kurach samayamkoodi mathram. Cant wa...\n","1 Mal_2 അപ്പോൾ കഥയുടെ റൂട്ട് മാറിയല്ലോ...\n","2 Mal_3 മൂത്തോൻ ട്രൈലെർ Trending List വരാത്തത് എന്താ ?\n","3 Mal_4 Nowadays 944k views is considered as 1M views ...\n","4 Mal_5 Maass.trailer ennu paranja ithaanU makkale......."]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tMXviBypwXb0","executionInfo":{"status":"ok","timestamp":1624682781516,"user_tz":-330,"elapsed":19,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"dd53e503-f71f-42b2-b88d-c0ef1c27cd21"},"source":["data_train.shape, data_val.shape, data_test.shape"],"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((15888, 2), (1766, 2), (1962, 2))"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Cr674jk8wyck","executionInfo":{"status":"ok","timestamp":1624682781517,"user_tz":-330,"elapsed":19,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"3c47d663-c6e2-4fb3-fa1c-76ecd3778927"},"source":["data_train['category'].value_counts()"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 6421\n","unknown_state 5279\n","Negative 2105\n","not-malayalam 1157\n","Mixed_feelings 926\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tREo3vWbNLQA","executionInfo":{"status":"ok","timestamp":1624682781518,"user_tz":-330,"elapsed":17,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"18e38bd8-6315-43e8-d10d-89ef0bc33926"},"source":["data_val['category'].value_counts()"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 706\n","unknown_state 580\n","Negative 237\n","not-malayalam 141\n","Mixed_feelings 102\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"zX-6Saf7QPh7","executionInfo":{"status":"ok","timestamp":1624682782802,"user_tz":-330,"elapsed":12,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"dff6a3bc-45be-4822-fded-9267ffc6a2ca"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_val_index = le.transform(data_val['category'])\n"],"execution_count":9,"outputs":[{"output_type":"stream","text":["['Mixed_feelings' 'Negative' 'Positive' 'not-malayalam' 'unknown_state']\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"mY8Wqps9WAMq"},"source":["For word = ngram_range (1,3)\n","For char = ngram_range (1,6)"]},{"cell_type":"code","metadata":{"id":"jqicHV-qRH4V","executionInfo":{"status":"ok","timestamp":1624682789930,"user_tz":-330,"elapsed":5758,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["from sklearn.feature_extraction.text import TfidfVectorizer\n","tfidfvec = TfidfVectorizer(ngram_range=(1,6), analyzer = 'char_wb', max_features = 30000)\n","#tfidfvec = TfidfVectorizer(ngram_range=(1,3), max_features = 30000)\n","tfidf_train = tfidfvec.fit_transform(data_train['text'])\n","tfidf_val = tfidfvec.transform(data_val['text'])"],"execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iV6FzGg_0leL","executionInfo":{"status":"ok","timestamp":1624682789931,"user_tz":-330,"elapsed":24,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"9afbb63c-228c-4706-d245-41e5a1d2c76d"},"source":["tfidfvec.get_feature_names()"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[' ',\n"," ' !',\n"," ' ! ',\n"," ' !!',\n"," ' !! ',\n"," ' !!!',\n"," ' !!! ',\n"," ' !!!!',\n"," ' #',\n"," ' # ',\n"," ' #f',\n"," ' #i',\n"," ' #l',\n"," ' #la',\n"," ' #lal',\n"," ' #lale',\n"," ' #m',\n"," ' #ma',\n"," ' #mam',\n"," ' #mama',\n"," ' #mamm',\n"," ' #r',\n"," ' #s',\n"," ' #sh',\n"," ' #t',\n"," ' #മ',\n"," ' &',\n"," ' & ',\n"," ' (',\n"," ' ( ',\n"," ' )',\n"," ' ) ',\n"," ' *',\n"," ' +',\n"," ' + ',\n"," ' ,',\n"," ' , ',\n"," ' ,,',\n"," ' ,,,',\n"," ' -',\n"," ' - ',\n"," ' .',\n"," ' . ',\n"," ' ..',\n"," ' .. ',\n"," ' ...',\n"," ' ... ',\n"," ' ....',\n"," ' .... ',\n"," ' .....',\n"," ' .അ',\n"," ' 0',\n"," ' 0.',\n"," ' 0:',\n"," ' 0:4',\n"," ' 0:5',\n"," ' 1',\n"," ' 1 ',\n"," ' 1.',\n"," ' 10',\n"," ' 10 ',\n"," ' 100',\n"," ' 100 ',\n"," ' 100%',\n"," ' 100c',\n"," ' 100cr',\n"," ' 11',\n"," ' 12',\n"," ' 12 ',\n"," ' 14',\n"," ' 15',\n"," ' 16',\n"," ' 17',\n"," ' 17 ',\n"," ' 18',\n"," ' 19',\n"," ' 1:',\n"," ' 1:0',\n"," ' 1:1',\n"," ' 1:2',\n"," ' 1:3',\n"," ' 1:5',\n"," ' 1m',\n"," ' 1m ',\n"," ' 1s',\n"," ' 1st',\n"," ' 1st ',\n"," ' 2',\n"," ' 2 ',\n"," ' 2.',\n"," ' 20',\n"," ' 20 ',\n"," ' 200',\n"," ' 200 ',\n"," ' 201',\n"," ' 2019',\n"," ' 2019 ',\n"," ' 202',\n"," ' 2020',\n"," ' 2020 ',\n"," ' 21',\n"," ' 21 ',\n"," ' 25',\n"," ' 28',\n"," ' 2:',\n"," ' 3',\n"," ' 3 ',\n"," ' 3.',\n"," ' 30',\n"," ' 30 ',\n"," ' 36',\n"," ' 369',\n"," ' 369 ',\n"," ' 4',\n"," ' 4 ',\n"," ' 5',\n"," ' 5 ',\n"," ' 50',\n"," ' 50 ',\n"," ' 500',\n"," ' 500 ',\n"," ' 6',\n"," ' 6 ',\n"," ' 68',\n"," ' 68 ',\n"," ' 7',\n"," ' 7 ',\n"," ' 8',\n"," ' 8 ',\n"," ' 9',\n"," ' :',\n"," ' : ',\n"," ' :)',\n"," ' :) ',\n"," ' :-',\n"," ' :- ',\n"," ' <',\n"," ' <3',\n"," ' <3 ',\n"," ' =',\n"," ' = ',\n"," ' ?',\n"," ' ? ',\n"," ' ??',\n"," ' ?? ',\n"," ' ???',\n"," ' ??? ',\n"," ' @',\n"," ' \\\\',\n"," ' \\\\r',\n"," ' \\\\r ',\n"," ' _',\n"," ' a',\n"," ' a ',\n"," ' a1',\n"," ' a10',\n"," ' a10 ',\n"," ' aa',\n"," ' aa ',\n"," ' aaa',\n"," ' aaa ',\n"," ' aaaa',\n"," ' aaan',\n"," ' aaay',\n"," ' aad',\n"," ' aada',\n"," ' aady',\n"," ' aadya',\n"," ' aag',\n"," ' aah',\n"," ' aak',\n"," ' aaka',\n"," ' aakan',\n"," ' aakat',\n"," ' aaki',\n"," ' aakk',\n"," ' aaku',\n"," ' aakum',\n"," ' aal',\n"," ' aan',\n"," ' aan ',\n"," ' aana',\n"," ' aanal',\n"," ' aane',\n"," ' aane ',\n"," ' aanen',\n"," ' aano',\n"," ' aano ',\n"," ' aanu',\n"," ' aanu ',\n"," ' aanu.',\n"," ' aar',\n"," ' aara',\n"," ' aare',\n"," ' aaren',\n"," ' aark',\n"," ' aaro',\n"," ' aarok',\n"," ' aaru',\n"," ' aarum',\n"," ' aas',\n"," ' aash',\n"," ' aasha',\n"," ' aav',\n"," ' aava',\n"," ' aavat',\n"," ' aavu',\n"," ' aavum',\n"," ' aay',\n"," ' aaya',\n"," ' aaya ',\n"," ' aayal',\n"," ' aayi',\n"," ' aayi ',\n"," ' aayir',\n"," ' aayit',\n"," ' ab',\n"," ' abh',\n"," ' abhi',\n"," ' abhin',\n"," ' abi',\n"," ' abin',\n"," ' abina',\n"," ' abr',\n"," ' abra',\n"," ' abrah',\n"," ' abri',\n"," ' abrid',\n"," ' ac',\n"," ' ach',\n"," ' acha',\n"," ' achan',\n"," ' achay',\n"," ' act',\n"," ' acti',\n"," ' actin',\n"," ' actio',\n"," ' acto',\n"," ' actor',\n"," ' ad',\n"," ' ada',\n"," ' adaa',\n"," ' adaar',\n"," ' adar',\n"," ' add',\n"," ' addi',\n"," ' addic',\n"," ' adh',\n"," ' adhi',\n"," ' adi',\n"," ' adi ',\n"," ' adi.',\n"," ' adi..',\n"," ' adic',\n"," ' adich',\n"," ' adii',\n"," ' adik',\n"," ' adik ',\n"," ' adika',\n"," ' adikk',\n"," ' adiku',\n"," ' adip',\n"," ' adipo',\n"," ' adiy',\n"," ' adu',\n"," ' adut',\n"," ' aduth',\n"," ' ady',\n"," ' af',\n"," ' aft',\n"," ' afte',\n"," ' after',\n"," ' ag',\n"," ' aga',\n"," ' agai',\n"," ' again',\n"," ' age',\n"," ' age ',\n"," ' agr',\n"," ' agra',\n"," ' agrah',\n"," ' ah',\n"," ' aha',\n"," ' ai',\n"," ' aj',\n"," ' aja',\n"," ' ak',\n"," ' aka',\n"," ' akk',\n"," ' akka',\n"," ' aku',\n"," ' akum',\n"," ' akum ',\n"," ' al',\n"," ' ala',\n"," ' alay',\n"," ' alayu',\n"," ' ali',\n"," ' all',\n"," ' all ',\n"," ' alla',\n"," ' alla ',\n"," ' allat',\n"," ' alle',\n"," ' alle ',\n"," ' alw',\n"," ' alwa',\n"," ' alway',\n"," ' am',\n"," ' am ',\n"," ' ama',\n"," ' amb',\n"," ' amm',\n"," ' amma',\n"," ' an',\n"," ' an ',\n"," ' ana',\n"," ' and',\n"," ' and ',\n"," ' ane',\n"," ' anen',\n"," ' anenn',\n"," ' ang',\n"," ' anga',\n"," ' angan',\n"," ' angu',\n"," ' angu ',\n"," ' ani',\n"," ' aniy',\n"," ' aniya',\n"," ' ann',\n"," ' ann ',\n"," ' anna',\n"," ' anna ',\n"," ' annan',\n"," ' anne',\n"," ' annu',\n"," ' annu ',\n"," ' annum',\n"," ' ano',\n"," ' ano ',\n"," ' ant',\n"," ' anth',\n"," ' anu',\n"," ' anu ',\n"," ' anu.',\n"," ' any',\n"," ' ap',\n"," ' apo',\n"," ' apo ',\n"," ' app',\n"," ' appa',\n"," ' appo',\n"," ' appo ',\n"," ' ar',\n"," ' ara',\n"," ' are',\n"," ' are ',\n"," ' ari',\n"," ' arik',\n"," ' arike',\n"," ' ariy',\n"," ' ariya',\n"," ' ariyi',\n"," ' arj',\n"," ' arju',\n"," ' arjun',\n"," ' ark',\n"," ' aro',\n"," ' arok',\n"," ' arokk',\n"," ' aru',\n"," ' arum',\n"," ' arum ',\n"," ' as',\n"," ' as ',\n"," ' ash',\n"," ' asha',\n"," ' asham',\n"," ' ass',\n"," ' at',\n"," ' at ',\n"," ' ath',\n"," ' ath ',\n"," ' atha',\n"," ' athe',\n"," ' athe ',\n"," ' athi',\n"," ' athin',\n"," ' atho',\n"," ' athr',\n"," ' athra',\n"," ' athu',\n"," ' athu ',\n"," ' athuk',\n"," ' au',\n"," ' av',\n"," ' ava',\n"," ' aval',\n"," ' avan',\n"," ' avan ',\n"," ' avar',\n"," ' avas',\n"," ' avasa',\n"," ' avast',\n"," ' avat',\n"," ' avath',\n"," ' ave',\n"," ' avi',\n"," ' avid',\n"," ' avu',\n"," ' avum',\n"," ' aw',\n"," ' awa',\n"," ' awe',\n"," ' awes',\n"," ' aweso',\n"," ' ay',\n"," ' aya',\n"," ' aya ',\n"," ' ayal',\n"," ' ayi',\n"," ' ayi ',\n"," ' ayir',\n"," ' ayiri',\n"," ' ayiru',\n"," ' ayit',\n"," ' ayo',\n"," ' ayy',\n"," ' b',\n"," ' b ',\n"," ' ba',\n"," ' baa',\n"," ' baap',\n"," ' baap ',\n"," ' bab',\n"," ' baby',\n"," ' bac',\n"," ' back',\n"," ' back ',\n"," ' bah',\n"," ' bahu',\n"," ' bahub',\n"," ' bak',\n"," ' ban',\n"," ' bas',\n"," ' be',\n"," ' be ',\n"," ' ben',\n"," ' bes',\n"," ' best',\n"," ' best ',\n"," ' bg',\n"," ' bgm',\n"," ' bgm ',\n"," ' bh',\n"," ' bha',\n"," ' bhas',\n"," ' bhasi',\n"," ' bi',\n"," ' big',\n"," ' big ',\n"," ' bir',\n"," ' birt',\n"," ' birth',\n"," ' bl',\n"," ' blo',\n"," ' bloc',\n"," ' block',\n"," ' bo',\n"," ' bol',\n"," ' bom',\n"," ' bon',\n"," ' bor',\n"," ' bore',\n"," ' bore ',\n"," ' bos',\n"," ' boss',\n"," ' boss ',\n"," ' box',\n"," ' box ',\n"," ' br',\n"," ' bri',\n"," ' bro',\n"," ' bro ',\n"," ' brot',\n"," ' broth',\n"," ' bu',\n"," ' bud',\n"," ' bus',\n"," ' but',\n"," ' but ',\n"," ' butt',\n"," ' butto',\n"," ' by',\n"," ' by ',\n"," ' c',\n"," ' ca',\n"," ' cam',\n"," ' came',\n"," ' camer',\n"," ' can',\n"," ' car',\n"," ' cari',\n"," ' carib',\n"," ' cas',\n"," ' ce',\n"," ' ch',\n"," ' cha',\n"," ' chaa',\n"," ' chak',\n"," ' chal',\n"," ' chali',\n"," ' chan',\n"," ' chang',\n"," ' chann',\n"," ' char',\n"," ' chara',\n"," ' chari',\n"," ' chat',\n"," ' chath',\n"," ' chav',\n"," ' chay',\n"," ' che',\n"," ' chec',\n"," ' chech',\n"," ' chei',\n"," ' chek',\n"," ' chem',\n"," ' chen',\n"," ' chenn',\n"," ' cher',\n"," ' cheri',\n"," ' chet',\n"," ' chett',\n"," ' chey',\n"," ' cheya',\n"," ' cheyt',\n"," ' cheyu',\n"," ' cheyy',\n"," ' chi',\n"," ' chil',\n"," ' chila',\n"," ' chin',\n"," ' chir',\n"," ' chiri',\n"," ' cho',\n"," ' chod',\n"," ' chodi',\n"," ' chor',\n"," ' chr',\n"," ' chri',\n"," ' chris',\n"," ' chu',\n"," ' chud',\n"," ' chudu',\n"," ' chun',\n"," ' chunk',\n"," ' chy',\n"," ' ci',\n"," ' cin',\n"," ' cine',\n"," ' cinem',\n"," ' cini',\n"," ' cinim',\n"," ' cl',\n"," ' cla',\n"," ' clas',\n"," ' class',\n"," ' cli',\n"," ' clim',\n"," ' clima',\n"," ' clu',\n"," ' club',\n"," ' cm',\n"," ' cmn',\n"," ' co',\n"," ' col',\n"," ' coll',\n"," ' colle',\n"," ' com',\n"," ' comb',\n"," ' combo',\n"," ' come',\n"," ' come ',\n"," ' comed',\n"," ' comm',\n"," ' comme',\n"," ' comp',\n"," ' compa',\n"," ' compl',\n"," ' con',\n"," ' cop',\n"," ' copy',\n"," ' copy ',\n"," ' cr',\n"," ' cr ',\n"," ' cro',\n"," ' cror',\n"," ' crore',\n"," ' cu',\n"," ' cut',\n"," ' cut ',\n"," ' d',\n"," ' d ',\n"," ' da',\n"," ' da ',\n"," ' daa',\n"," ' dai',\n"," ' dail',\n"," ' daiv',\n"," ' daiva',\n"," ' dan',\n"," ' danc',\n"," ' dance',\n"," ' dar',\n"," ' dat',\n"," ' date',\n"," ' date ',\n"," ' day',\n"," ' day ',\n"," ' days',\n"," ' days ',\n"," ' de',\n"," ' de ',\n"," ' dea',\n"," ' dec',\n"," ' deg',\n"," ' degr',\n"," ' degra',\n"," ' del',\n"," ' deli',\n"," ' deliv',\n"," ' dev',\n"," ' dh',\n"," ' dha',\n"," ' di',\n"," ' dia',\n"," ' dial',\n"," ' dialo',\n"," ' die',\n"," ' die ',\n"," ' dil',\n"," ' dile',\n"," ' dilee',\n"," ' dilo',\n"," ' dir',\n"," ' dire',\n"," ' direc',\n"," ' dis',\n"," ' disl',\n"," ' disli',\n"," ' div',\n"," ' diva',\n"," ' divas',\n"," ' do',\n"," ' don',\n"," ' dou',\n"," ' doub',\n"," ' doubl',\n"," ' doubt',\n"," ' dq',\n"," ' dq ',\n"," ' dr',\n"," ' dri',\n"," ' du',\n"," ' dub',\n"," ' dub ',\n"," ' dubb',\n"," ' e',\n"," ' e ',\n"," ' ea',\n"," ' eat',\n"," ' eath',\n"," ' eatt',\n"," ' eatta',\n"," ' ed',\n"," ' eda',\n"," ' edh',\n"," ' edi',\n"," ' edit',\n"," ' edit ',\n"," ' editi',\n"," ' edu',\n"," ' eduk',\n"," ' edukk',\n"," ' edut',\n"," ' eduth',\n"," ' ee',\n"," ' ee ',\n"," ' eee',\n"," ' eee ',\n"," ' eet',\n"," ' ef',\n"," ' eff',\n"," ' effe',\n"," ' effec',\n"," ' eg',\n"," ' ej',\n"," ' ejj',\n"," ' ejja',\n"," ' ejjat',\n"," ' ek',\n"," ' ekk',\n"," ' ekka',\n"," ' ekka ',\n"," ' el',\n"," ' ela',\n"," ' ell',\n"," ' ella',\n"," ' ella ',\n"," ' ellaa',\n"," ' ellam',\n"," ' ellar',\n"," ' ellav',\n"," ' en',\n"," ' ena',\n"," ' end',\n"," ' enda',\n"," ' endh',\n"," ' ene',\n"," ' eng',\n"," ' enga',\n"," ' engan',\n"," ' engi',\n"," ' engl',\n"," ' engli',\n"," ' eni',\n"," ' eni ',\n"," ' enik',\n"," ' enik ',\n"," ' enikk',\n"," ' eniku',\n"," ' enk',\n"," ' enki',\n"," ' enkil',\n"," ' enn',\n"," ' enn ',\n"," ' enna',\n"," ' enna ',\n"," ' ennaa',\n"," ' ennal',\n"," ' enne',\n"," ' enne ',\n"," ' enni',\n"," ' enno',\n"," ' ennor',\n"," ' ennu',\n"," ' ennu ',\n"," ' ennul',\n"," ' ennum',\n"," ' ent',\n"," ' enta',\n"," ' enta ',\n"," ' entam',\n"," ' ente',\n"," ' ente ',\n"," ' enter',\n"," ' enth',\n"," ' enth ',\n"," ' entha',\n"," ' enthe',\n"," ' enthi',\n"," ' entho',\n"," ' enthu',\n"," ' entr',\n"," ' enu',\n"," ' enu ',\n"," ' ep',\n"," ' epi',\n"," ' epp',\n"," ' eppo',\n"," ' er',\n"," ' era',\n"," ' erak',\n"," ' eran',\n"," ' erang',\n"," ' es',\n"," ' esh',\n"," ' esht',\n"," ' et',\n"," ' eth',\n"," ' eth ',\n"," ' etha',\n"," ' ethan',\n"," ' ethi',\n"," ' ethil',\n"," ' etho',\n"," ' ethr',\n"," ' ethra',\n"," ' ethu',\n"," ' ethu ',\n"," ' etr',\n"," ' etra',\n"," ' etra ',\n"," ' ett',\n"," ' etta',\n"," ' etta ',\n"," ' ettan',\n"," ' ettav',\n"," ' ev',\n"," ' eva',\n"," ' evd',\n"," ' evde',\n"," ' eve',\n"," ' ever',\n"," ' evi',\n"," ' evid',\n"," ' evide',\n"," ' ex',\n"," ' exp',\n"," ' expe',\n"," ' ez',\n"," ' ezh',\n"," ' ezhu',\n"," ' ezhut',\n"," ' f',\n"," ' fa',\n"," ' fac',\n"," ' face',\n"," ' face ',\n"," ' fah',\n"," ' faha',\n"," ' fahad',\n"," ' fam',\n"," ' fami',\n"," ' famil',\n"," ' fan',\n"," ' fan ',\n"," ' fan.',\n"," ' fan..',\n"," ' fana',\n"," ' fans',\n"," ' fans ',\n"," ' fansi',\n"," ' fas',\n"," ' fd',\n"," ' fdf',\n"," ' fdfs',\n"," ' fdfs ',\n"," ' fe',\n"," ' fee',\n"," ' feel',\n"," ' feel ',\n"," ' feel.',\n"," ' feeli',\n"," ' fi',\n"," ' fig',\n"," ' figh',\n"," ' fight',\n"," ' fil',\n"," ' fili',\n"," ' filim',\n"," ' film',\n"," ' film ',\n"," ' film.',\n"," ' films',\n"," ' fir',\n"," ' firs',\n"," ' first',\n"," ' fl',\n"," ' flo',\n"," ' flop',\n"," ' flop ',\n"," ' fo',\n"," ' for',\n"," ' for ',\n"," ' fr',\n"," ' fra',\n"," ' fre',\n"," ' free',\n"," ' fri',\n"," ' frie',\n"," ' frien',\n"," ' fro',\n"," ' from',\n"," ' from ',\n"," ' fu',\n"," ' ful',\n"," ' full',\n"," ' full ',\n"," ' g',\n"," ' ga',\n"," ' gam',\n"," ' gan',\n"," ' ge',\n"," ' gee',\n"," ' geet',\n"," ' gen',\n"," ' get',\n"," ' get ',\n"," ' gh',\n"," ' gho',\n"," ' ghos',\n"," ' gi',\n"," ' gir',\n"," ' giri',\n"," ' gl',\n"," ' go',\n"," ' god',\n"," ' goo',\n"," ' good',\n"," ' good ',\n"," ' goos',\n"," ' goose',\n"," ' gop',\n"," ' gopi',\n"," ' gopi ',\n"," ' gr',\n"," ' gra',\n"," ' gre',\n"," ' grea',\n"," ' great',\n"," ' gu',\n"," ' guy',\n"," ' guys',\n"," ' h',\n"," ' ha',\n"," ' ha ',\n"," ' hai',\n"," ' han',\n"," ' hang',\n"," ' hang ',\n"," ' hap',\n"," ' happ',\n"," ' happy',\n"," ' har',\n"," ' hard',\n"," ' hard ',\n"," ' hat',\n"," ' hate',\n"," ' hater',\n"," ' he',\n"," ' hea',\n"," ' heav',\n"," ' heavy',\n"," ' her',\n"," ' here',\n"," ' here ',\n"," ' here.',\n"," ' hero',\n"," ' hero ',\n"," ' hi',\n"," ' hig',\n"," ' high',\n"," ' hin',\n"," ' hind',\n"," ' hindi',\n"," ' his',\n"," ' hist',\n"," ' histo',\n"," ' hit',\n"," ' hit ',\n"," ' ho',\n"," ' hol',\n"," ' holl',\n"," ' holly',\n"," ' hop',\n"," ' hou',\n"," ' hour',\n"," ' hr',\n"," ' hri',\n"," ' hrid',\n"," ' hrida',\n"," ' hu',\n"," ' hy',\n"," ' i',\n"," ' i ',\n"," \" i'\",\n"," \" i'm\",\n"," \" i'm \",\n"," ' ia',\n"," ' iam',\n"," ' iam ',\n"," ' ic',\n"," ' ich',\n"," ' icha',\n"," ' ichay',\n"," ' id',\n"," ' ida',\n"," ' idak',\n"," ' idakk',\n"," ' idh',\n"," ' idh ',\n"," ' idi',\n"," ' idi ',\n"," ' idu',\n"," ' ij',\n"," ' ijj',\n"," ' ijja',\n"," ' ijjat',\n"," ' ik',\n"," ' ikk',\n"," ' ikka',\n"," ' ikka ',\n"," ' ikka.',\n"," ' ikkaa',\n"," ' ikkad',\n"," ' ikkan',\n"," ' ikkay',\n"," ...]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EDDXkAUI1DF1","executionInfo":{"status":"ok","timestamp":1624682789931,"user_tz":-330,"elapsed":17,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"9999a616-7036-4ebf-e658-3faa21fef2d9"},"source":["tfidf_train.shape"],"execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(15888, 30000)"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"szYsgWh4UFOy","executionInfo":{"status":"ok","timestamp":1624682789932,"user_tz":-330,"elapsed":8,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["x_train = tfidf_train\n","x_test = tfidf_val\n","y_train = y_train_index\n","y_test = y_val_index"],"execution_count":13,"outputs":[]},{"cell_type":"code","metadata":{"id":"QbypHNhYSMDy","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1624684748205,"user_tz":-330,"elapsed":1958280,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"c480092c-c0bb-4ce3-c750-71aff0662557"},"source":[" from sklearn import metrics\n"," from sklearn.svm import SVC\n"," model_SVM = SVC(probability=True)\n"," model_SVM.fit(x_train, y_train)\n"," y_pred_SVM = model_SVM.predict(x_test)\n"," print(\"SVM\")\n"," print(metrics.classification_report(y_test, y_pred_SVM))\n"," \n"," from sklearn.ensemble import RandomForestClassifier\n"," rf = RandomForestClassifier()\n"," rf.fit(x_train,y_train)\n"," y_pred_rf = rf.predict(x_test)\n"," print(\"random\")\n"," print(metrics.classification_report(y_test, y_pred_rf))\n"," \n"," from sklearn.linear_model import LogisticRegression\n"," LR = LogisticRegression()\n"," LR.fit(x_train,y_train)\n"," y_pred_LR = LR.predict(x_test)\n"," print(\"Logistic Regression\")\n"," print(metrics.classification_report(y_test, y_pred_LR ))\n"," \n"," from sklearn.neighbors import KNeighborsClassifier\n"," neigh = KNeighborsClassifier()\n"," neigh.fit(x_train,y_train)\n"," y_pred_KNN = neigh.predict(x_test)\n"," print(\"KNN\")\n"," print(metrics.classification_report(y_test, y_pred_KNN ))\n"," \n"," from sklearn.naive_bayes import GaussianNB\n"," naive = GaussianNB()\n"," naive.fit(x_train.toarray(),y_train)\n"," y_pred_naive = naive.predict(x_test.toarray())\n"," print(\"Naive Bayes\")\n"," print(metrics.classification_report(y_test, y_pred_naive ))\n"," \n"," from sklearn.ensemble import GradientBoostingClassifier\n"," gradient = GradientBoostingClassifier()\n"," gradient.fit(x_train,y_train)\n"," y_pred_gradient = gradient.predict(x_test)\n"," print(\"Gradient Boosting\")\n"," print(metrics.classification_report(y_test, y_pred_gradient ))\n"," \n"," from sklearn.tree import DecisionTreeClassifier\n"," decision = DecisionTreeClassifier()\n"," decision.fit(x_train,y_train)\n"," y_pred_decision = decision.predict(x_test)\n"," print(\"Decision Tree\")\n"," print(metrics.classification_report(y_test, y_pred_decision ))\n","\n"," from sklearn.ensemble import AdaBoostClassifier\n"," AdaBoostclf = AdaBoostClassifier()\n"," AdaBoostclf.fit(x_train,y_train)\n"," y_pred_AdaBoostclf = AdaBoostclf.predict(x_test)\n"," print(\"AdaBoost classifier\")\n"," print(metrics.classification_report(y_test, y_pred_AdaBoostclf ))\n"," "],"execution_count":14,"outputs":[{"output_type":"stream","text":["SVM\n"," precision recall f1-score support\n","\n"," 0 0.73 0.24 0.36 102\n"," 1 0.78 0.54 0.64 237\n"," 2 0.74 0.87 0.80 706\n"," 3 0.83 0.79 0.81 141\n"," 4 0.74 0.77 0.75 580\n","\n"," accuracy 0.75 1766\n"," macro avg 0.76 0.64 0.67 1766\n","weighted avg 0.75 0.75 0.74 1766\n","\n","random\n"," precision recall f1-score support\n","\n"," 0 0.70 0.14 0.23 102\n"," 1 0.83 0.29 0.43 237\n"," 2 0.65 0.88 0.74 706\n"," 3 0.80 0.67 0.73 141\n"," 4 0.71 0.72 0.71 580\n","\n"," accuracy 0.69 1766\n"," macro avg 0.74 0.54 0.57 1766\n","weighted avg 0.71 0.69 0.66 1766\n","\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n","STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n","\n","Increase the number of iterations (max_iter) or scale the data as shown in:\n"," https://scikit-learn.org/stable/modules/preprocessing.html\n","Please also refer to the documentation for alternative solver options:\n"," https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n"," extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"],"name":"stderr"},{"output_type":"stream","text":["Logistic Regression\n"," precision recall f1-score support\n","\n"," 0 0.64 0.26 0.37 102\n"," 1 0.77 0.51 0.61 237\n"," 2 0.74 0.86 0.80 706\n"," 3 0.84 0.76 0.80 141\n"," 4 0.72 0.77 0.74 580\n","\n"," accuracy 0.74 1766\n"," macro avg 0.74 0.63 0.66 1766\n","weighted avg 0.74 0.74 0.73 1766\n","\n","KNN\n"," precision recall f1-score support\n","\n"," 0 0.46 0.29 0.36 102\n"," 1 0.48 0.49 0.48 237\n"," 2 0.63 0.79 0.70 706\n"," 3 0.65 0.62 0.64 141\n"," 4 0.67 0.53 0.59 580\n","\n"," accuracy 0.62 1766\n"," macro avg 0.58 0.54 0.55 1766\n","weighted avg 0.62 0.62 0.61 1766\n","\n","Naive Bayes\n"," precision recall f1-score support\n","\n"," 0 0.21 0.28 0.24 102\n"," 1 0.28 0.58 0.38 237\n"," 2 0.70 0.62 0.66 706\n"," 3 0.68 0.61 0.64 141\n"," 4 0.62 0.42 0.50 580\n","\n"," accuracy 0.53 1766\n"," macro avg 0.50 0.50 0.48 1766\n","weighted avg 0.59 0.53 0.54 1766\n","\n","Gradient Boosting\n"," precision recall f1-score support\n","\n"," 0 0.67 0.16 0.25 102\n"," 1 0.81 0.37 0.51 237\n"," 2 0.63 0.89 0.74 706\n"," 3 0.82 0.66 0.73 141\n"," 4 0.74 0.67 0.70 580\n","\n"," accuracy 0.69 1766\n"," macro avg 0.73 0.55 0.59 1766\n","weighted avg 0.71 0.69 0.67 1766\n","\n","Decision Tree\n"," precision recall f1-score support\n","\n"," 0 0.22 0.21 0.21 102\n"," 1 0.41 0.38 0.40 237\n"," 2 0.63 0.65 0.64 706\n"," 3 0.59 0.56 0.58 141\n"," 4 0.59 0.60 0.60 580\n","\n"," accuracy 0.56 1766\n"," macro avg 0.49 0.48 0.48 1766\n","weighted avg 0.56 0.56 0.56 1766\n","\n","AdaBoost classifier\n"," precision recall f1-score support\n","\n"," 0 0.34 0.10 0.15 102\n"," 1 0.81 0.18 0.29 237\n"," 2 0.53 0.50 0.52 706\n"," 3 0.61 0.61 0.61 141\n"," 4 0.45 0.68 0.54 580\n","\n"," accuracy 0.50 1766\n"," macro avg 0.55 0.41 0.42 1766\n","weighted avg 0.54 0.50 0.48 1766\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"dutLuW-7NitW","executionInfo":{"status":"ok","timestamp":1624684748208,"user_tz":-330,"elapsed":29,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":[""],"execution_count":14,"outputs":[]},{"cell_type":"code","metadata":{"id":"uSFmluVXNkL2","executionInfo":{"status":"ok","timestamp":1624684748209,"user_tz":-330,"elapsed":21,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":[""],"execution_count":14,"outputs":[]},{"cell_type":"code","metadata":{"id":"aKJdPx2jNkO3"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"XbQ1Hm3TNkSP"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"bQANqfdsf0i6","executionInfo":{"status":"ok","timestamp":1624684807665,"user_tz":-330,"elapsed":1105,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["tfidf_test = tfidfvec.transform(data_test['text'])"],"execution_count":15,"outputs":[]},{"cell_type":"code","metadata":{"id":"RBDDmfD_URTa","executionInfo":{"status":"ok","timestamp":1624684837824,"user_tz":-330,"elapsed":29475,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["y_pred_model_SVM_prob = model_SVM.predict_proba(tfidf_test)"],"execution_count":16,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7VQ3hfA7gECM","executionInfo":{"status":"ok","timestamp":1624684837826,"user_tz":-330,"elapsed":12,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}},"outputId":"d72bbc7b-ff46-4285-a82f-5b670495cf1b"},"source":["y_pred_model_SVM_prob"],"execution_count":17,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[5.02168957e-01, 8.42647481e-02, 1.51468772e-01, 4.81105332e-02,\n"," 2.13986990e-01],\n"," [4.69749344e-02, 1.37106408e-01, 3.08146152e-01, 2.19189985e-03,\n"," 5.05580606e-01],\n"," [5.17165730e-02, 1.39418025e-01, 4.16065412e-01, 1.71490637e-02,\n"," 3.75650926e-01],\n"," ...,\n"," [9.95890693e-02, 1.62487191e-01, 4.38871624e-01, 1.20002178e-03,\n"," 2.97852093e-01],\n"," [9.88383717e-02, 4.00563448e-01, 3.18753952e-01, 6.64705865e-04,\n"," 1.81179523e-01],\n"," [2.13842768e-01, 2.47631509e-01, 4.29130849e-01, 4.49199039e-04,\n"," 1.08945674e-01]])"]},"metadata":{"tags":[]},"execution_count":17}]},{"cell_type":"code","metadata":{"id":"i_feFgnOiZM0","executionInfo":{"status":"ok","timestamp":1624684977869,"user_tz":-330,"elapsed":615,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_model_SVM_prob= pd.DataFrame(y_pred_model_SVM_prob, columns = ['Mixed_feelings' ,'Negative' ,'Positive' ,'not-malayalam' ,'unknown_state'])"],"execution_count":18,"outputs":[]},{"cell_type":"code","metadata":{"id":"PujUzf51iZOG","executionInfo":{"status":"ok","timestamp":1624684979229,"user_tz":-330,"elapsed":696,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_model_SVM_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/SVM_char_1-6_gram_Malayalam_74.csv',index = False)"],"execution_count":19,"outputs":[]},{"cell_type":"code","metadata":{"id":"k-XiNgH_i1Md","executionInfo":{"status":"ok","timestamp":1624684981037,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["y_pred_LR_prob = LR.predict_proba(tfidf_test)"],"execution_count":20,"outputs":[]},{"cell_type":"code","metadata":{"id":"pS6nkrMbi1P_","executionInfo":{"status":"ok","timestamp":1624684982931,"user_tz":-330,"elapsed":3,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_LR_prob= pd.DataFrame(y_pred_LR_prob, columns = ['Mixed_feelings' ,'Negative' ,'Positive' ,'not-malayalam' ,'unknown_state'])"],"execution_count":21,"outputs":[]},{"cell_type":"code","metadata":{"id":"ru0vsVTsiZT3","executionInfo":{"status":"ok","timestamp":1624684984209,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhN6WFfi4jSsnU9CzZnfDwqVV_KtgtD8yEnF1qt=s64","userId":"09645300777368779770"}}},"source":["df_y_pred_LR_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Malyalam/LR_char_1-6_gram_Malayalam_73.csv', index = False)"],"execution_count":22,"outputs":[]},{"cell_type":"code","metadata":{"id":"7vXANa-i7HFG"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"OYQGvHNYnxGT"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Ig9Rc2Lrny6y"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file diff --git a/ML_Classifier_Tamil_Sentiment (1).ipynb b/ML_Classifier_Tamil_Sentiment (1).ipynb new file mode 100644 index 0000000..77dad5c --- /dev/null +++ b/ML_Classifier_Tamil_Sentiment (1).ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"ML_Classifier_Tamil_Sentiment.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wXKoRJGIBNuM","executionInfo":{"status":"ok","timestamp":1624713991382,"user_tz":-330,"elapsed":25886,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"873c6438-eb14-48fa-b826-e400b25a394f"},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"57R5J3_1OfiA","executionInfo":{"status":"ok","timestamp":1624713993052,"user_tz":-330,"elapsed":1674,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["import pandas as pd \n","data_train = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_train.tsv', sep = '\\t')\n","data_dev = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_dev.tsv', sep = '\\t')\n","data_test = pd.read_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/tamil_sentiment_full_test_withoutlabels.tsv', sep = '\\t')\n"],"execution_count":2,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"SNMFlX4j7BMK","executionInfo":{"status":"ok","timestamp":1624713993052,"user_tz":-330,"elapsed":4,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"5012f734-02a9-4138-d8d8-99674d323153"},"source":["data_train['category'].value_counts()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 20069\n","unknown_state 5628\n","Negative 4271\n","Mixed_feelings 4020\n","not-Tamil 1667\n","Positive 1\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"MJkeYXof7BNp","executionInfo":{"status":"ok","timestamp":1624713993501,"user_tz":-330,"elapsed":9,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["data_train['category'] = data_train['category'].replace('Positive ', 'Positive')"],"execution_count":4,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"DsGfMROx7BPv","executionInfo":{"status":"ok","timestamp":1624713993501,"user_tz":-330,"elapsed":9,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"d9d21b2f-935d-41e8-ae3a-5a5ec32a8665"},"source":["data_train['category'].value_counts()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 20070\n","unknown_state 5628\n","Negative 4271\n","Mixed_feelings 4020\n","not-Tamil 1667\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"yt4mzstX7BSt","executionInfo":{"status":"ok","timestamp":1624713993502,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"0015582d-6c3d-4a1d-a505-86f4d132e1e9"},"source":["data_dev['category'].value_counts()"],"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Positive 2257\n","unknown_state 611\n","Negative 480\n","Mixed_feelings 438\n","not-Tamil 176\n","Name: category, dtype: int64"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cuV4OjaH7BUu","executionInfo":{"status":"ok","timestamp":1624713993941,"user_tz":-330,"elapsed":444,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"73474deb-c152-40ed-8a10-290e801a0293"},"source":["from sklearn import preprocessing\n","import numpy as np\n","\n","le = preprocessing.LabelEncoder()\n","\n","le.fit(data_train['category'])\n","print(le.classes_)\n","\n","y_train_index = le.transform(data_train['category'])\n","y_dev_index = le.transform(data_dev['category'])"],"execution_count":7,"outputs":[{"output_type":"stream","text":["['Mixed_feelings' 'Negative' 'Positive' 'not-Tamil' 'unknown_state']\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"tMXviBypwXb0","executionInfo":{"status":"ok","timestamp":1624713993941,"user_tz":-330,"elapsed":5,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"6e584401-65d6-454a-ba9a-6724be297fa2"},"source":["data_train.shape, data_dev.shape, data_test.shape"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":["((35656, 2), (3962, 2), (4402, 2))"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"mY8Wqps9WAMq"},"source":["For word = ngram_range (1,3)\n","For char = ngram_range (1,6)"]},{"cell_type":"code","metadata":{"id":"jqicHV-qRH4V","executionInfo":{"status":"ok","timestamp":1624714004512,"user_tz":-330,"elapsed":10573,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["from sklearn.feature_extraction.text import TfidfVectorizer\n","tfidfvec = TfidfVectorizer(ngram_range=(1,6), analyzer = 'char_wb', max_features = 15000)\n","#tfidfvec = TfidfVectorizer(ngram_range=(1,3), max_features = 30000)\n","tfidf_train = tfidfvec.fit_transform(data_train['text'])\n","tfidf_val = tfidfvec.transform(data_dev['text'])"],"execution_count":9,"outputs":[]},{"cell_type":"code","metadata":{"id":"iV6FzGg_0leL","executionInfo":{"status":"ok","timestamp":1624714004512,"user_tz":-330,"elapsed":8,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["#tfidfvec.get_feature_names()"],"execution_count":10,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"EDDXkAUI1DF1","executionInfo":{"status":"ok","timestamp":1624714004513,"user_tz":-330,"elapsed":8,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"ad16a88d-c351-4aab-8b3c-c40eeac71095"},"source":["tfidf_train.shape"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(35656, 15000)"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"szYsgWh4UFOy","executionInfo":{"status":"ok","timestamp":1624714004513,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["x_train = tfidf_train\n","x_test = tfidf_val\n","y_train = y_train_index\n","y_test = y_dev_index"],"execution_count":12,"outputs":[]},{"cell_type":"code","metadata":{"id":"jaY3wC5IUFSI","executionInfo":{"status":"ok","timestamp":1624714004513,"user_tz":-330,"elapsed":7,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":[""],"execution_count":12,"outputs":[]},{"cell_type":"code","metadata":{"id":"QbypHNhYSMDy","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1624726693021,"user_tz":-330,"elapsed":12688514,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"d226681b-d734-40d4-f88b-966b0ecc94bf"},"source":[" from sklearn import metrics\n"," from sklearn.svm import SVC\n"," model_SVM = SVC(probability=True)\n"," model_SVM.fit(x_train, y_train)\n"," y_pred_SVM = model_SVM.predict(x_test)\n"," print(\"SVM\")\n"," print(metrics.classification_report(y_test, y_pred_SVM))\n"," \n"," from sklearn.ensemble import RandomForestClassifier\n"," rf = RandomForestClassifier()\n"," rf.fit(x_train,y_train)\n"," y_pred_rf = rf.predict(x_test)\n"," print(\"random\")\n"," print(metrics.classification_report(y_test, y_pred_rf))\n"," \n"," from sklearn.linear_model import LogisticRegression\n"," LR = LogisticRegression()\n"," LR.fit(x_train,y_train)\n"," y_pred_LR = LR.predict(x_test)\n"," print(\"Logistic Regression\")\n"," print(metrics.classification_report(y_test, y_pred_LR ))\n"," \n"," from sklearn.neighbors import KNeighborsClassifier\n"," neigh = KNeighborsClassifier()\n"," neigh.fit(x_train,y_train)\n"," y_pred_KNN = neigh.predict(x_test)\n"," print(\"KNN\")\n"," print(metrics.classification_report(y_test, y_pred_KNN ))\n"," \n"," from sklearn.naive_bayes import GaussianNB\n"," naive = GaussianNB()\n"," naive.fit(x_train.toarray(),y_train)\n"," y_pred_naive = naive.predict(x_test.toarray())\n"," print(\"Naive Bayes\")\n"," print(metrics.classification_report(y_test, y_pred_naive ))\n"," \n"," from sklearn.ensemble import GradientBoostingClassifier\n"," gradient = GradientBoostingClassifier()\n"," gradient.fit(x_train,y_train)\n"," y_pred_gradient = gradient.predict(x_test)\n"," print(\"Gradient Boosting\")\n"," print(metrics.classification_report(y_test, y_pred_gradient ))\n"," \n"," from sklearn.tree import DecisionTreeClassifier\n"," decision = DecisionTreeClassifier()\n"," decision.fit(x_train,y_train)\n"," y_pred_decision = decision.predict(x_test)\n"," print(\"Decision Tree\")\n"," print(metrics.classification_report(y_test, y_pred_decision ))\n","\n"," from sklearn.ensemble import AdaBoostClassifier\n"," AdaBoostclf = AdaBoostClassifier()\n"," AdaBoostclf.fit(x_train,y_train)\n"," y_pred_AdaBoostclf = AdaBoostclf.predict(x_test)\n"," print(\"AdaBoost classifier\")\n"," print(metrics.classification_report(y_test, y_pred_AdaBoostclf ))\n"," "],"execution_count":13,"outputs":[{"output_type":"stream","text":["SVM\n"," precision recall f1-score support\n","\n"," 0 0.41 0.10 0.16 438\n"," 1 0.55 0.28 0.37 480\n"," 2 0.67 0.93 0.78 2257\n"," 3 0.81 0.49 0.61 176\n"," 4 0.56 0.33 0.42 611\n","\n"," accuracy 0.65 3962\n"," macro avg 0.60 0.43 0.47 3962\n","weighted avg 0.62 0.65 0.60 3962\n","\n","random\n"," precision recall f1-score support\n","\n"," 0 0.48 0.08 0.14 438\n"," 1 0.59 0.16 0.25 480\n"," 2 0.64 0.96 0.77 2257\n"," 3 0.81 0.39 0.53 176\n"," 4 0.62 0.28 0.38 611\n","\n"," accuracy 0.64 3962\n"," macro avg 0.63 0.37 0.41 3962\n","weighted avg 0.62 0.64 0.57 3962\n","\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n","STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n","\n","Increase the number of iterations (max_iter) or scale the data as shown in:\n"," https://scikit-learn.org/stable/modules/preprocessing.html\n","Please also refer to the documentation for alternative solver options:\n"," https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n"," extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"],"name":"stderr"},{"output_type":"stream","text":["Logistic Regression\n"," precision recall f1-score support\n","\n"," 0 0.35 0.13 0.19 438\n"," 1 0.52 0.32 0.40 480\n"," 2 0.69 0.89 0.78 2257\n"," 3 0.70 0.52 0.60 176\n"," 4 0.47 0.36 0.41 611\n","\n"," accuracy 0.64 3962\n"," macro avg 0.55 0.45 0.47 3962\n","weighted avg 0.60 0.64 0.60 3962\n","\n","KNN\n"," precision recall f1-score support\n","\n"," 0 0.23 0.33 0.27 438\n"," 1 0.42 0.24 0.31 480\n"," 2 0.71 0.79 0.75 2257\n"," 3 0.70 0.48 0.57 176\n"," 4 0.43 0.30 0.36 611\n","\n"," accuracy 0.58 3962\n"," macro avg 0.50 0.43 0.45 3962\n","weighted avg 0.58 0.58 0.57 3962\n","\n","Naive Bayes\n"," precision recall f1-score support\n","\n"," 0 0.19 0.14 0.16 438\n"," 1 0.22 0.66 0.33 480\n"," 2 0.84 0.30 0.45 2257\n"," 3 0.16 0.78 0.27 176\n"," 4 0.34 0.28 0.31 611\n","\n"," accuracy 0.35 3962\n"," macro avg 0.35 0.43 0.30 3962\n","weighted avg 0.59 0.35 0.37 3962\n","\n","Gradient Boosting\n"," precision recall f1-score support\n","\n"," 0 0.43 0.10 0.16 438\n"," 1 0.59 0.20 0.30 480\n"," 2 0.64 0.95 0.77 2257\n"," 3 0.75 0.42 0.54 176\n"," 4 0.56 0.23 0.32 611\n","\n"," accuracy 0.63 3962\n"," macro avg 0.59 0.38 0.42 3962\n","weighted avg 0.60 0.63 0.56 3962\n","\n","Decision Tree\n"," precision recall f1-score support\n","\n"," 0 0.19 0.19 0.19 438\n"," 1 0.25 0.23 0.24 480\n"," 2 0.68 0.68 0.68 2257\n"," 3 0.41 0.39 0.40 176\n"," 4 0.32 0.35 0.33 611\n","\n"," accuracy 0.51 3962\n"," macro avg 0.37 0.37 0.37 3962\n","weighted avg 0.51 0.51 0.51 3962\n","\n","AdaBoost classifier\n"," precision recall f1-score support\n","\n"," 0 0.12 0.00 0.00 438\n"," 1 0.35 0.16 0.22 480\n"," 2 0.62 0.91 0.74 2257\n"," 3 0.53 0.41 0.46 176\n"," 4 0.43 0.17 0.24 611\n","\n"," accuracy 0.59 3962\n"," macro avg 0.41 0.33 0.33 3962\n","weighted avg 0.50 0.59 0.50 3962\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"J9ZwiyNu7f68"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"gu1dEWtV7hGe"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"n3yfQ5gK7hIv"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"LXrTzA5b7hJu"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"lXJo2xDN7hL_"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"QPT8OERF7hNS"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"pqnwcmm07hPD"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"3w33K6ut7hQp"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"plDF26KA7hUK"},"source":[""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"bQANqfdsf0i6","executionInfo":{"status":"ok","timestamp":1624726842779,"user_tz":-330,"elapsed":1265,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["tfidf_test = tfidfvec.transform(data_test['text'])"],"execution_count":14,"outputs":[]},{"cell_type":"code","metadata":{"id":"RBDDmfD_URTa","executionInfo":{"status":"ok","timestamp":1624726967915,"user_tz":-330,"elapsed":123645,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["y_pred_model_SVM_prob = model_SVM.predict_proba(tfidf_test)"],"execution_count":15,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7VQ3hfA7gECM","executionInfo":{"status":"ok","timestamp":1624726967919,"user_tz":-330,"elapsed":77,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}},"outputId":"f7032a84-e789-4bb7-d2e2-ba05296fbaac"},"source":["y_pred_model_SVM_prob"],"execution_count":16,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[0.03102559, 0.00748961, 0.89722316, 0.00369449, 0.06056715],\n"," [0.12136104, 0.48912205, 0.31628225, 0.01132158, 0.06191308],\n"," [0.02553129, 0.04740874, 0.84037752, 0.00242552, 0.08425693],\n"," ...,\n"," [0.06049699, 0.04710759, 0.87798557, 0.00254955, 0.01186029],\n"," [0.2697956 , 0.07287886, 0.56514433, 0.00699149, 0.08518972],\n"," [0.10483392, 0.03613657, 0.80593112, 0.0055734 , 0.04752499]])"]},"metadata":{"tags":[]},"execution_count":16}]},{"cell_type":"code","metadata":{"id":"i_feFgnOiZM0","executionInfo":{"status":"ok","timestamp":1624726967921,"user_tz":-330,"elapsed":52,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["df_y_pred_model_SVM_prob= pd.DataFrame(y_pred_model_SVM_prob, columns = ['Mixed_feelings', 'Negative', 'Positive' ,'not-Tamil', 'unknown_state'])"],"execution_count":17,"outputs":[]},{"cell_type":"code","metadata":{"id":"PujUzf51iZOG","executionInfo":{"status":"ok","timestamp":1624727147819,"user_tz":-330,"elapsed":553,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["df_y_pred_model_SVM_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/SVM_char_1-6_gram_TAMIL_60.csv',index = False)"],"execution_count":21,"outputs":[]},{"cell_type":"code","metadata":{"id":"k-XiNgH_i1Md","executionInfo":{"status":"ok","timestamp":1624727153865,"user_tz":-330,"elapsed":683,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["y_pred_LR_prob = LR.predict_proba(tfidf_test)"],"execution_count":22,"outputs":[]},{"cell_type":"code","metadata":{"id":"pS6nkrMbi1P_","executionInfo":{"status":"ok","timestamp":1624727165632,"user_tz":-330,"elapsed":427,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["df_y_pred_LR_prob= pd.DataFrame(y_pred_LR_prob, columns = ['Mixed_feelings', 'Negative', 'Positive' ,'not-Tamil', 'unknown_state'])"],"execution_count":24,"outputs":[]},{"cell_type":"code","metadata":{"id":"ru0vsVTsiZT3","executionInfo":{"status":"ok","timestamp":1624727184840,"user_tz":-330,"elapsed":463,"user":{"displayName":"Abhinav Kumar","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gh0ZcXgvFqErEvgS7XrlCFDh4WVxpDXg_3VgQdH=s64","userId":"10203358762380431559"}}},"source":["df_y_pred_LR_prob.to_csv('/content/drive/MyDrive/HASOC_2021/Sentiment_Dravidian/Tamil/LR_char_1-6_gram_TAMIL_60.csv', index = False)"],"execution_count":25,"outputs":[]},{"cell_type":"code","metadata":{"id":"7vXANa-i7HFG"},"source":[""],"execution_count":null,"outputs":[]}]} \ No newline at end of file