File tree 1 file changed +23
-2
lines changed
1 file changed +23
-2
lines changed Original file line number Diff line number Diff line change @@ -430,8 +430,29 @@ def get_domain_from_subdomain(subdomain):
430
430
Returns:
431
431
str: Domain name.
432
432
"""
433
- ext = tldextract .extract (subdomain )
434
- return '.' .join (ext [1 :3 ])
433
+ # ext = tldextract.extract(subdomain)
434
+ # return '.'.join(ext[1:3])
435
+
436
+ if not validators .domain (subdomain ):
437
+ return None
438
+
439
+ # Use tldextract to parse the subdomain
440
+ extracted = tldextract .extract (subdomain )
441
+
442
+ # if tldextract recognized the tld then its the final result
443
+ if extracted .suffix :
444
+ domain = f"{ extracted .domain } .{ extracted .suffix } "
445
+ else :
446
+ # Fallback method for unknown TLDs, like .clouds or .local etc
447
+ parts = subdomain .split ('.' )
448
+ if len (parts ) >= 2 :
449
+ domain = '.' .join (parts [- 2 :])
450
+ else :
451
+ return None
452
+
453
+ # Validate the domain before returning
454
+ return domain if validators .domain (domain ) else None
455
+
435
456
436
457
437
458
def sanitize_url (http_url ):
You can’t perform that action at this time.
0 commit comments