Skip to content

Commit 9a4ef1e

Browse files
authored
Add files via upload
1 parent 3f63253 commit 9a4ef1e

File tree

1 file changed

+180
-0
lines changed

1 file changed

+180
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
# This code will automatically update cases' categories field in your Salesforce system.
2+
# The code is using the model that was built previously and it's inserting the model's classification to the Salesforce relevant field via API.
3+
4+
# To review the model development, please visit: https://github.com/aloncohen1/My-Projects/blob/master/NLP%20Project.ipynb
5+
6+
# Made by: Alon Cohen
7+
8+
9+
from simple_salesforce import Salesforce
10+
import requests
11+
import base64
12+
import json
13+
import salesforce_reporting
14+
import pandas as pd
15+
import pickle
16+
import re
17+
18+
# Connect to Salesforce API
19+
sf_production = Salesforce(username="[email protected]"
20+
, password="xxxxxxxx"
21+
, security_token="xxxxxxxxx", )
22+
23+
# Connect to Salesforce API - REPORTS
24+
reports_sf = salesforce_reporting.Connection(username="[email protected]"
25+
, password="xxxxxxxx"
26+
, security_token="xxxxxxxxx", )
27+
report = reports_sf.get_report(
28+
'Ener the report ID', ) ### Create a report of all the uncataloged cases and enter his ID here
29+
30+
parser = salesforce_reporting.ReportParser(report)
31+
32+
# Extract the report of the uncataloged cases and transform it into a Pandas data frame
33+
cases_to_catalog = pd.DataFrame(
34+
columns=['Case Number', 'Case ID', 'Subject', 'Description', 'Case Comments', 'Topic - for PC'],
35+
index=range(len(parser.records())))
36+
for i in range(len(parser.records())):
37+
cases_to_catalog['Case Number'][i] = parser.records()[i][0]
38+
cases_to_catalog['Case ID'][i] = parser.records()[i][1]
39+
cases_to_catalog['Subject'][i] = parser.records()[i][2]
40+
cases_to_catalog['Description'][i] = parser.records()[i][3]
41+
cases_to_catalog['Case Comments'][i] = parser.records()[i][4]
42+
43+
cases_to_catalog = cases_to_catalog.rename(index=str, columns={"Subject": "Title"})
44+
45+
46+
# Load the trained models
47+
def load_obj(name):
48+
with open(name + '.pkl', 'rb') as f:
49+
return pickle.load(f)
50+
51+
52+
pipeline_lr_linking = load_obj('pipeline_lr_linking')
53+
pipeline_lr_others = load_obj('pipeline_lr_others')
54+
55+
# Create dictionary of cases
56+
cases_to_catalog_dict = dict()
57+
count = 0
58+
for i in cases_to_catalog['Case ID']:
59+
if i not in cases_to_catalog_dict.keys():
60+
cases_to_catalog_dict[(cases_to_catalog['Case ID'][count])] = cases_to_catalog['Topic - for PC'][count]
61+
count += 1
62+
len(cases_to_catalog_dict)
63+
64+
65+
# Create functions that clean the text
66+
def my_function(raw):
67+
raw = raw.lower()
68+
raw = raw.replace(']', '')
69+
raw = raw.replace('[', '')
70+
raw = raw.replace(')', '')
71+
raw = raw.replace('(', '')
72+
raw = raw.replace(':', '')
73+
raw = raw.replace('.', '')
74+
raw = raw.replace(',', '')
75+
raw = raw.replace(' ', ' ')
76+
raw = raw.replace('"', '')
77+
raw = raw.replace('\n', ' ')
78+
raw = raw.replace('\t', ' ')
79+
raw = raw.replace('?', '')
80+
raw = re.sub(r"http\S+", "", raw)
81+
raw = re.sub('https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', "", raw)
82+
raw = re.sub(" \d{13} ", " isbn ", raw)
83+
raw = re.sub(" \d{10} ", " isbn ", raw)
84+
raw = re.sub(" \d{7}\d{1}[\dx] ", " issn ", raw)
85+
raw = re.sub(" \d{4}[-]\d{3}[\dx] ", " issn ", raw)
86+
raw = re.sub(" 10\.\S+ ", " doi ", raw)
87+
raw = re.sub('<.*>', "", raw)
88+
raw = re.sub('\S+@\S+', "email", raw)
89+
raw = re.sub('[0-9]+', "", raw)
90+
raw = re.sub(r'(\d+/\d+/\d+)', "date", raw)
91+
for i in raw.split():
92+
if len(i) > 22:
93+
raw = raw.replace(i, '')
94+
raw = raw.replace('$', '')
95+
raw = raw.replace('!', '')
96+
raw = raw.replace("'", '')
97+
raw = raw.replace("->", '')
98+
raw = raw.replace('&', '')
99+
raw = raw.replace('/', '')
100+
raw = raw.replace('%', ' ')
101+
raw = raw.replace(' - ', ' ')
102+
raw = raw.replace('+', '')
103+
raw = raw.replace('_', '')
104+
raw = raw.replace('@', '')
105+
raw = raw.replace('--', '')
106+
raw = raw.replace('#', '')
107+
raw = raw.replace('=', '')
108+
raw = raw.replace('â', '')
109+
raw = raw.replace('*', '')
110+
raw = raw.replace('-', '')
111+
raw = raw.replace(';', '')
112+
raw = raw.replace('<', '')
113+
raw = raw.replace('>', '')
114+
raw = raw.replace('ß', '')
115+
' '.join(raw.split())
116+
raw = raw.replace(' ', ' ')
117+
raw = raw.replace(' ', ' ')
118+
119+
return raw
120+
121+
122+
# Create data frame that flattens the information (from several comments to one long string)
123+
merged_cases_to_catalog = pd.DataFrame(columns=['Case ID', 'Mixed_Comments', 'Categorie'], )
124+
cases_by_id = cases_to_catalog.groupby('Case ID')
125+
126+
counter = 0
127+
for i in cases_to_catalog_dict.keys():
128+
case = cases_by_id.get_group(i)
129+
130+
mix_comment = ''
131+
132+
for x in case['Title']:
133+
title = x
134+
for y in case['Description']:
135+
description = y
136+
137+
mix_title = my_function(str(title)) + ' ' + my_function(str(description))
138+
mix_comment += mix_title
139+
for comment in case['Case Comments']:
140+
if len(str(comment).split()) > 4:
141+
mix_comment += (' ' + my_function(str(comment)))
142+
143+
for t in case['Topic - for PC']:
144+
topic = t
145+
merged_cases_to_catalog.loc[counter] = [i, mix_comment, topic]
146+
counter += 1
147+
148+
# Condition - the update will occur only if there are cases to update
149+
if len(merged_cases_to_catalog) > 0:
150+
151+
# Predict the cases topics using the trained models
152+
first_prediction_for_catalog = pipeline_lr_linking.predict(merged_cases_to_catalog['Mixed_Comments'])
153+
second_prediction_for_catalog = pipeline_lr_others.predict(merged_cases_to_catalog['Mixed_Comments'])
154+
155+
# Create a file that aggregate the data and the prediction
156+
final_prediction = pd.DataFrame(columns=['Case ID', 'Mixed_Comments', 'Linking/Not Linking', 'General Prediction',
157+
'Final Predicted Categorie'], )
158+
final_prediction['Case ID'] = merged_cases_to_catalog['Case ID']
159+
final_prediction['Mixed_Comments'] = merged_cases_to_catalog['Mixed_Comments']
160+
final_prediction['Linking/Not Linking'] = first_prediction_for_catalog
161+
final_prediction['General Prediction'] = second_prediction_for_catalog
162+
final_prediction['Final Predicted Categorie'] = second_prediction_for_catalog
163+
164+
# Cataloging to "Data linking" or "Other" - if "Other", then the second prediction will be counted
165+
for i in range(len(final_prediction)):
166+
if final_prediction['Linking/Not Linking'][i] == 'Data linking':
167+
final_prediction['Final Predicted Categorie'][i] = 'Data linking'
168+
169+
# Create a finale data frame for the update
170+
update_file = pd.DataFrame(columns=['Topic - for PC'], index=final_prediction['Case ID'])
171+
update_file['Topic - for PC'] = list(final_prediction['Final Predicted Categorie'])
172+
173+
# Update the cases topics using Salesforce API
174+
counter = 0
175+
for i in update_file.index:
176+
if sf_production.Case.get(i)['bl_New_Category__c'] == None:
177+
sf_production.Case.update(i, {'bl_New_Category__c': 'General'})
178+
sf_production.Case.update(i, {'Topic_for_PC__c': update_file['Topic - for PC'][i]})
179+
counter += 1
180+
print(str(counter) + ' ' + 'Cases has been updated')

0 commit comments

Comments
 (0)