Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion pii_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,19 @@ def analyze_text(text: str, show_supported=False, show_details=False, score_thre
patterns=[student_id_pattern])
registry.add_recognizer(student_id_recognizer)

american_phone_number_pattern = Pattern(name='american_phone_number',
regex=r'(\b)*1(-|.)?(\d{3}|\(\d{3}\))(-|.)?\d{3}(-|.)?\d{4}(\b)*',
score=0.85)
american_phone_number_recognizer = PatternRecognizer(supported_entity='AMERICAN_PHONE_NUMBER',
patterns=[american_phone_number_pattern])
registry.add_recognizer(american_phone_number_recognizer)

#DEWBERRY CUSTOM REGEX FOR LOCATIONS!
dewLocPattern = Pattern(name='DewLOCATION', regex=r'[0-9]+\s[A-Za-z]+\s[A-Za-z]+\s[A-Za-z]+,\s[A-Za-z]+,\s[A-Za-z][A-Za-z]\s\d\d\d\d\d', score=.9)
dewLocRecognizer = PatternRecognizer(supported_entity= 'DewLocEnt', patterns=[dewLocPattern])
registry.add_recognizer(dewLocRecognizer)
#END DEWBERRY CUSTOM REGEX ADDITION

# Customize SpacyRecognizer to include some additional labels
# First remove the default SpacyRecognizer
registry.remove_recognizer("SpacyRecognizer")
Expand Down
3 changes: 2 additions & 1 deletion test_pii_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def test_base_supported_entities(self):
'US_DRIVER_LICENSE',
'AU_ACN',
'ORGANIZATION',
'STUDENT_ID']
'STUDENT_ID',
'AMERICAN_PHONE_NUMBER']
for entity in supported_entities:
self.assertIn(entity, results)

Expand Down
35 changes: 28 additions & 7 deletions test_team_superior_coders.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,6 @@ def test_aggie_pride(self):
def test_student_id_detect(self):
"""Test to show if a student ID is detected"""

#positive test case
result = analyze_text('my student ID is: 926491673')
print(result)
self.assertIn('STUDENT_ID', str(result))

#positive test case
result = analyze_text('my student ID is: 123456789')
print(result)
Expand All @@ -25,7 +20,34 @@ def test_student_id_detect(self):
print(result)
self.assertNotIn('STUDENT_ID', str(result))

def test_american_phone_number_detect(self):
"""Test to ensure an American Phone Number is detected"""

#positive test case
results = analyze_text('My phone number is 1-(199)-555-9461')
print(results)
self.assertIn('AMERICAN_PHONE_NUMBER', str(results))

#positive test case
results = analyze_text('My phone number is 1.121.555.2962')
print(results)
self.assertIn('AMERICAN_PHONE_NUMBER', str(results))

#positive test case
results = analyze_text('My phone number is 11215552962')
print(results)
self.assertIn('AMERICAN_PHONE_NUMBER', str(results))

#Test to ensure an American Phone Number is not detected
#negetive test case
results = analyze_text('My phone number is -120-555-9461')
print(results)
self.assertNotIn('AMERICAN_PHONE_NUMBER', str(results))

#negetive test case
results = analyze_text('My phone number is 555-9461')
print(results)
self.assertNotIn('AMERICAN_PHONE_NUMBER', str(results))

def test_passport_number_detect(self):
"""Test to show if a passport number is detected"""
Expand Down Expand Up @@ -61,5 +83,4 @@ def test_ipv4_address(self):
#negative test case
result = analyze_text('My ip address is: 123.123')
print(result)
self.assertNotIn('IP_ADDRESS', str(result))

self.assertNotIn('IP_ADDRESS', str(result))